Skip to content

Commit e1d8f49

Browse files
committed
Test fixes
1 parent 5beb535 commit e1d8f49

18 files changed

+484
-1261
lines changed

include/model/CMetricStatGatherer.h

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -171,22 +171,18 @@ class CMeanTime {
171171

172172
core_t::TTime value() const {
173173
return static_cast<core_t::TTime>(
174-
std::round(maths::common::CBasicStatistics::mean(m_Time)));
174+
std::round(maths::common::CBasicStatistics::mean(m_Time)));
175175
}
176176

177177
void add(core_t::TTime time, unsigned int count) {
178178
m_Time.add(static_cast<double>(time), count);
179179
}
180180

181-
std::string toDelimited() const {
182-
return m_Time.toDelimited();
183-
}
181+
std::string toDelimited() const { return m_Time.toDelimited(); }
184182
bool fromDelimited(const std::string& value) {
185183
return m_Time.fromDelimited(value);
186184
}
187-
std::uint64_t checksum() const {
188-
return m_Time.checksum();
189-
}
185+
std::uint64_t checksum() const { return m_Time.checksum(); }
190186

191187
private:
192188
TMeanAccumulator m_Time;
@@ -199,7 +195,8 @@ class CStatGatherer {
199195
using TStat = STAT;
200196

201197
public:
202-
explicit CStatGatherer(core_t::TTime bucketLength, const STAT& initial) : m_Time{bucketLength}, m_Stat{initial} {}
198+
explicit CStatGatherer(core_t::TTime bucketLength, const STAT& initial)
199+
: m_Time{bucketLength}, m_Stat{initial} {}
203200

204201
std::size_t dimension() const {
205202
return metric_stat_shims::dimension(m_Stat);
@@ -218,8 +215,8 @@ class CStatGatherer {
218215
void add(core_t::TTime bucketTime, const TDouble1Vec& value, unsigned int count) {
219216
if (metric_stat_shims::wouldAdd(value, m_Stat)) {
220217
m_Time.add(bucketTime, count);
218+
metric_stat_shims::add(value, count, m_Stat);
221219
}
222-
metric_stat_shims::add(value, count, m_Stat);
223220
}
224221

225222
void acceptPersistInserter(core::CStatePersistInserter& inserter) const {
@@ -307,7 +304,7 @@ class CMetricStatGatherer {
307304

308305
const auto& gatherer = m_BucketStats.get(time);
309306

310-
if (gatherer.count() > 0) {
307+
if (gatherer.value().empty() == false) {
311308
TStrCRefDouble1VecDoublePrPrVecVec influenceValues(
312309
m_InfluencerBucketStats.size());
313310
for (std::size_t i = 0; i < m_InfluencerBucketStats.size(); ++i) {
@@ -332,8 +329,7 @@ class CMetricStatGatherer {
332329
gatherer.samples(time)};
333330
}
334331

335-
return {m_Classifier.isInteger(), m_Classifier.isNonNegative(),
336-
gatherer.samples(time)};
332+
return {m_Classifier.isInteger(), m_Classifier.isNonNegative(), {}};
337333
}
338334

339335
//! Update the state with a new measurement.

include/model/CModelFactory.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -319,10 +319,6 @@ class MODEL_EXPORT CModelFactory {
319319
//! models.
320320
void initialDecayRateMultiplier(double multiplier);
321321

322-
//! Set the maximum number of times we'll update a person's model
323-
//! in a bucketing interval.
324-
void maximumUpdatesPerBucket(double maximumUpdatesPerBucket);
325-
326322
//! Set the prune window scale factor minimum
327323
void pruneWindowScaleMinimum(double factor);
328324

include/model/ModelTypes.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -429,14 +429,6 @@ bool isSumFeature(EFeature feature);
429429
MODEL_EXPORT
430430
double varianceScale(EFeature feature, double sampleCount, double count);
431431

432-
//! Check if the feature is sampled.
433-
MODEL_EXPORT
434-
bool isSampled(EFeature feature);
435-
436-
//! Get the minimum useful sample count for a feature.
437-
MODEL_EXPORT
438-
unsigned minimumSampleCount(EFeature feature);
439-
440432
//! Offset count features so that their range starts at zero.
441433
MODEL_EXPORT
442434
double offsetCountToZero(EFeature feature, double count);

include/model/SModelParams.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,6 @@ struct MODEL_EXPORT SModelParams {
133133
//! The frequency at which to exclude an attribute.
134134
double s_ExcludeAttributeFrequency;
135135

136-
//! The maximum number of times we'll update a metric model in a bucket.
137-
double s_MaximumUpdatesPerBucket;
138-
139136
//! The number of buckets that are within the latency window.
140137
std::size_t s_LatencyBuckets;
141138

lib/model/CAnomalyDetectorModelConfig.cc

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,6 @@ namespace {
736736
const std::string ONLINE_LEARN_RATE_PROPERTY("learnrate");
737737
const std::string DECAY_RATE_PROPERTY("decayrate");
738738
const std::string INITIAL_DECAY_RATE_MULTIPLIER_PROPERTY("initialdecayratemultiplier");
739-
const std::string MAXIMUM_UPDATES_PER_BUCKET_PROPERTY("maximumupdatesperbucket");
740739
const std::string INDIVIDUAL_MODE_FRACTION_PROPERTY("individualmodefraction");
741740
const std::string POPULATION_MODE_FRACTION_PROPERTY("populationmodefraction");
742741
const std::string COMPONENT_SIZE_PROPERTY("componentsize");
@@ -796,18 +795,6 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre
796795
for (auto& factory : m_Factories) {
797796
factory.second->initialDecayRateMultiplier(multiplier);
798797
}
799-
} else if (propName == MAXIMUM_UPDATES_PER_BUCKET_PROPERTY) {
800-
double maximumUpdatesPerBucket;
801-
if (core::CStringUtils::stringToType(propValue, maximumUpdatesPerBucket) == false ||
802-
maximumUpdatesPerBucket < 0.0) {
803-
LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue);
804-
result = false;
805-
continue;
806-
}
807-
808-
for (auto& factory : m_Factories) {
809-
factory.second->maximumUpdatesPerBucket(maximumUpdatesPerBucket);
810-
}
811798
} else if (propName == INDIVIDUAL_MODE_FRACTION_PROPERTY) {
812799
double fraction;
813800
if (core::CStringUtils::stringToType(propValue, fraction) == false ||

lib/model/CMetricModel.cc

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -260,14 +260,6 @@ void CMetricModel::sample(core_t::TTime startTime,
260260
continue;
261261
}
262262

263-
const TOptionalSample& bucket = data_.second.s_BucketValue;
264-
if (model_t::isSampled(feature) && bucket != std::nullopt) {
265-
values.assign(1, core::make_triple(
266-
bucket->time(), TDouble2Vec(bucket->value(dimension)),
267-
model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID));
268-
model->addBucketValue(values);
269-
}
270-
271263
// For sparse data we reduce the impact of samples from empty buckets.
272264
// In effect, we smoothly transition to modeling only values from non-empty
273265
// buckets as the data becomes sparse.
@@ -276,12 +268,7 @@ void CMetricModel::sample(core_t::TTime startTime,
276268
continue;
277269
}
278270

279-
std::size_t n = samples.size();
280-
double countWeight =
281-
(this->params().s_MaximumUpdatesPerBucket > 0.0 && n > 0
282-
? this->params().s_MaximumUpdatesPerBucket / static_cast<double>(n)
283-
: 1.0) *
284-
this->learnRate(feature) * initialCountWeight;
271+
double countWeight = this->learnRate(feature) * initialCountWeight;
285272
double outlierWeightDerate = this->derate(pid, sampleTime);
286273
// Note we need to scale the amount of data we'll "age out" of the residual
287274
// model in one bucket by the empty bucket weight so the posterior doesn't
@@ -297,10 +284,10 @@ void CMetricModel::sample(core_t::TTime startTime,
297284
<< ", scaled count weight = " << scaledCountWeight
298285
<< ", scaled interval = " << scaledInterval);
299286

300-
values.resize(n);
301-
trendWeights.resize(n, maths_t::CUnitWeights::unit<TDouble2Vec>(dimension));
302-
priorWeights.resize(n, maths_t::CUnitWeights::unit<TDouble2Vec>(dimension));
303-
for (std::size_t i = 0; i < n; ++i) {
287+
values.resize(samples.size());
288+
trendWeights.resize(samples.size(), maths_t::CUnitWeights::unit<TDouble2Vec>(dimension));
289+
priorWeights.resize(samples.size(), maths_t::CUnitWeights::unit<TDouble2Vec>(dimension));
290+
for (std::size_t i = 0; i < samples.size(); ++i) {
304291
core_t::TTime ithSampleTime = samples[i].time();
305292
TDouble2Vec ithSampleValue(samples[i].value(dimension));
306293
double countVarianceScale = samples[i].varianceScale();

lib/model/CMetricPopulationModel.cc

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -406,8 +406,6 @@ void CMetricPopulationModel::sample(core_t::TTime startTime,
406406
continue;
407407
}
408408

409-
const TOptionalSample& bucket =
410-
CDataGatherer::extractData(data_).s_BucketValue;
411409
const auto& samples = CDataGatherer::extractData(data_).s_Samples;
412410
bool isInteger = CDataGatherer::extractData(data_).s_IsInteger;
413411
bool isNonNegative = CDataGatherer::extractData(data_).s_IsNonNegative;
@@ -421,22 +419,9 @@ void CMetricPopulationModel::sample(core_t::TTime startTime,
421419

422420
attribute.s_IsInteger &= isInteger;
423421
attribute.s_IsNonNegative &= isNonNegative;
424-
if (model_t::isSampled(feature) && bucket) {
425-
attribute.s_BucketValues.emplace_back(
426-
bucket->time(), TDouble2Vec(bucket->value(dimension)), pid);
427-
}
428-
429-
std::size_t n = std::count_if(samples.begin(), samples.end(),
430-
[cutoff](const CSample& sample) {
431-
return sample.time() >= cutoff;
432-
});
433-
double updatesPerBucket = this->params().s_MaximumUpdatesPerBucket;
434422
double countWeight = initialCountWeight *
435423
this->sampleRateWeight(pid, cid) *
436-
this->learnRate(feature) *
437-
(updatesPerBucket > 0.0 && n > 0
438-
? updatesPerBucket / static_cast<double>(n)
439-
: 1.0);
424+
this->learnRate(feature);
440425
LOG_TRACE(<< "countWeight = " << countWeight);
441426

442427
for (const auto& sample : samples) {

lib/model/CModelFactory.cc

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -239,10 +239,6 @@ void CModelFactory::initialDecayRateMultiplier(double multiplier) {
239239
m_ModelParams.s_InitialDecayRateMultiplier = multiplier;
240240
}
241241

242-
void CModelFactory::maximumUpdatesPerBucket(double maximumUpdatesPerBucket) {
243-
m_ModelParams.s_MaximumUpdatesPerBucket = maximumUpdatesPerBucket;
244-
}
245-
246242
void CModelFactory::pruneWindowScaleMinimum(double factor) {
247243
m_ModelParams.s_PruneWindowScaleMinimum = factor;
248244
}

lib/model/ModelTypes.cc

Lines changed: 0 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -422,126 +422,6 @@ double varianceScale(EFeature feature, double sampleCount, double count) {
422422
: 1.0;
423423
}
424424

425-
bool isSampled(EFeature feature) {
426-
switch (feature) {
427-
CASE_INDIVIDUAL_COUNT:
428-
return false;
429-
430-
case E_IndividualMeanByPerson:
431-
case E_IndividualLowMeanByPerson:
432-
case E_IndividualHighMeanByPerson:
433-
case E_IndividualMedianByPerson:
434-
case E_IndividualLowMedianByPerson:
435-
case E_IndividualHighMedianByPerson:
436-
case E_IndividualMinByPerson:
437-
case E_IndividualMaxByPerson:
438-
case E_IndividualVarianceByPerson:
439-
case E_IndividualLowVarianceByPerson:
440-
case E_IndividualHighVarianceByPerson:
441-
case E_IndividualMeanVelocityByPerson:
442-
case E_IndividualMinVelocityByPerson:
443-
case E_IndividualMaxVelocityByPerson:
444-
case E_IndividualMeanLatLongByPerson:
445-
return true;
446-
case E_IndividualSumByBucketAndPerson:
447-
case E_IndividualLowSumByBucketAndPerson:
448-
case E_IndividualHighSumByBucketAndPerson:
449-
case E_IndividualNonNullSumByBucketAndPerson:
450-
case E_IndividualLowNonNullSumByBucketAndPerson:
451-
case E_IndividualHighNonNullSumByBucketAndPerson:
452-
case E_IndividualSumVelocityByPerson:
453-
return false;
454-
455-
CASE_POPULATION_COUNT:
456-
return false;
457-
458-
case E_PopulationMeanByPersonAndAttribute:
459-
case E_PopulationLowMeanByPersonAndAttribute:
460-
case E_PopulationHighMeanByPersonAndAttribute:
461-
case E_PopulationMedianByPersonAndAttribute:
462-
case E_PopulationLowMedianByPersonAndAttribute:
463-
case E_PopulationHighMedianByPersonAndAttribute:
464-
case E_PopulationMinByPersonAndAttribute:
465-
case E_PopulationMaxByPersonAndAttribute:
466-
case E_PopulationVarianceByPersonAndAttribute:
467-
case E_PopulationLowVarianceByPersonAndAttribute:
468-
case E_PopulationHighVarianceByPersonAndAttribute:
469-
case E_PopulationMeanVelocityByPersonAndAttribute:
470-
case E_PopulationMinVelocityByPersonAndAttribute:
471-
case E_PopulationMaxVelocityByPersonAndAttribute:
472-
case E_PopulationMeanLatLongByPersonAndAttribute:
473-
return true;
474-
case E_PopulationSumByBucketPersonAndAttribute:
475-
case E_PopulationLowSumByBucketPersonAndAttribute:
476-
case E_PopulationHighSumByBucketPersonAndAttribute:
477-
case E_PopulationSumVelocityByPersonAndAttribute:
478-
return false;
479-
}
480-
return false;
481-
}
482-
483-
unsigned minimumSampleCount(EFeature feature) {
484-
switch (feature) {
485-
CASE_INDIVIDUAL_COUNT:
486-
return 1;
487-
488-
case E_IndividualMeanByPerson:
489-
case E_IndividualMinByPerson:
490-
case E_IndividualMaxByPerson:
491-
case E_IndividualSumByBucketAndPerson:
492-
case E_IndividualLowMeanByPerson:
493-
case E_IndividualHighMeanByPerson:
494-
case E_IndividualLowSumByBucketAndPerson:
495-
case E_IndividualHighSumByBucketAndPerson:
496-
case E_IndividualNonNullSumByBucketAndPerson:
497-
case E_IndividualLowNonNullSumByBucketAndPerson:
498-
case E_IndividualHighNonNullSumByBucketAndPerson:
499-
case E_IndividualMeanLatLongByPerson:
500-
case E_IndividualMaxVelocityByPerson:
501-
case E_IndividualMinVelocityByPerson:
502-
case E_IndividualMeanVelocityByPerson:
503-
case E_IndividualSumVelocityByPerson:
504-
case E_IndividualMedianByPerson:
505-
case E_IndividualLowMedianByPerson:
506-
case E_IndividualHighMedianByPerson:
507-
return 1;
508-
509-
// Population variance needs a minimum population size
510-
case E_IndividualVarianceByPerson:
511-
case E_IndividualLowVarianceByPerson:
512-
case E_IndividualHighVarianceByPerson:
513-
return 3;
514-
515-
CASE_POPULATION_COUNT:
516-
return 1;
517-
518-
case E_PopulationMeanByPersonAndAttribute:
519-
case E_PopulationMedianByPersonAndAttribute:
520-
case E_PopulationLowMedianByPersonAndAttribute:
521-
case E_PopulationHighMedianByPersonAndAttribute:
522-
case E_PopulationMinByPersonAndAttribute:
523-
case E_PopulationMaxByPersonAndAttribute:
524-
case E_PopulationSumByBucketPersonAndAttribute:
525-
case E_PopulationLowMeanByPersonAndAttribute:
526-
case E_PopulationHighMeanByPersonAndAttribute:
527-
case E_PopulationLowSumByBucketPersonAndAttribute:
528-
case E_PopulationHighSumByBucketPersonAndAttribute:
529-
case E_PopulationMeanLatLongByPersonAndAttribute:
530-
case E_PopulationMaxVelocityByPersonAndAttribute:
531-
case E_PopulationMinVelocityByPersonAndAttribute:
532-
case E_PopulationMeanVelocityByPersonAndAttribute:
533-
case E_PopulationSumVelocityByPersonAndAttribute:
534-
return 1;
535-
536-
// Population variance needs a minimum population size
537-
case E_PopulationVarianceByPersonAndAttribute:
538-
case E_PopulationLowVarianceByPersonAndAttribute:
539-
case E_PopulationHighVarianceByPersonAndAttribute:
540-
return 3;
541-
}
542-
return 1;
543-
}
544-
545425
double offsetCountToZero(EFeature feature, double count) {
546426
switch (feature) {
547427
case E_IndividualNonZeroCountByBucketAndPerson:

lib/model/SModelParams.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ SModelParams::SModelParams(core_t::TTime bucketLength)
4848
CAnomalyDetectorModelConfig::DEFAULT_MULTIVARIATE_COMPONENT_DELIMITER),
4949
s_ExcludeFrequent(model_t::E_XF_None), s_ExcludePersonFrequency(0.1),
5050
s_ExcludeAttributeFrequency(0.1),
51-
s_MaximumUpdatesPerBucket(CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_UPDATES_PER_BUCKET),
5251
s_LatencyBuckets(CAnomalyDetectorModelConfig::DEFAULT_LATENCY_BUCKETS),
5352
s_SampleCountFactor(CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_COUNT_FACTOR_NO_LATENCY),
5453
s_SampleQueueGrowthFactor(CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_QUEUE_GROWTH_FACTOR),
@@ -101,7 +100,6 @@ std::uint64_t SModelParams::checksum(std::uint64_t seed) const {
101100
seed = maths::common::CChecksum::calculate(seed, s_ExcludeFrequent);
102101
seed = maths::common::CChecksum::calculate(seed, s_ExcludePersonFrequency);
103102
seed = maths::common::CChecksum::calculate(seed, s_ExcludeAttributeFrequency);
104-
seed = maths::common::CChecksum::calculate(seed, s_MaximumUpdatesPerBucket);
105103
seed = maths::common::CChecksum::calculate(seed, s_InfluenceCutoff);
106104
seed = maths::common::CChecksum::calculate(seed, s_LatencyBuckets);
107105
seed = maths::common::CChecksum::calculate(seed, s_SampleCountFactor);

0 commit comments

Comments
 (0)