Skip to content

Commit 408a7a7

Browse files
committed
Improvements to trend modelling and periodicity testing for forecasting (#7)
This is a merge of a feature branch for issue #5.
1 parent d7ffa6e commit 408a7a7

File tree

106 files changed

+13977
-7927
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

106 files changed

+13977
-7927
lines changed

include/maths/CAdaptiveBucketing.h

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,10 @@ namespace maths
8080
class MATHS_EXPORT CAdaptiveBucketing
8181
{
8282
public:
83-
typedef std::vector<double> TDoubleVec;
84-
typedef std::vector<CFloatStorage> TFloatVec;
85-
typedef std::pair<core_t::TTime, core_t::TTime> TTimeTimePr;
86-
typedef CBasicStatistics::SSampleMeanVar<double>::TAccumulator TDoubleMeanVarAccumulator;
87-
typedef std::pair<TTimeTimePr, TDoubleMeanVarAccumulator> TTimeTimePrMeanVarPr;
88-
typedef std::vector<TTimeTimePrMeanVarPr> TTimeTimePrMeanVarPrVec;
83+
using TDoubleVec = std::vector<double>;
84+
using TFloatVec = std::vector<CFloatStorage>;
85+
using TFloatMeanAccumulator = CBasicStatistics::SSampleMean<CFloatStorage>::TAccumulator;
86+
using TFloatMeanAccumulatorVec = std::vector<TFloatMeanAccumulator>;
8987

9088
public:
9189
//! Restore by traversing a state document
@@ -116,14 +114,17 @@ class MATHS_EXPORT CAdaptiveBucketing
116114
//! \param[in] n The number of buckets.
117115
bool initialize(double a, double b, std::size_t n);
118116

119-
//! Add the function moments \f$([a_i,b_i], S_i)\f$ where
120-
//! \f$S_i\f$ are the means and variances of the function
121-
//! in the time intervals \f$([a_i,b_i])\f$.
117+
//! Add the function mean values \f$([a_i,b_i], m_i)\f$ where
118+
//! \f$m_i\f$ are the means of the function in the time intervals
119+
//! \f$([a+(i-1)l,b+il])\f$, \f$i\in[n]\f$ and \f$l=(b-a)/n\f$.
122120
//!
123-
//! \param[in] time The start of the period including \p values.
124-
//! \param[in] values Time ranges and the corresponding function
125-
//! value moments.
126-
void initialValues(core_t::TTime time, const TTimeTimePrMeanVarPrVec &values);
121+
//! \param[in] startTime The start of the period.
122+
//! \param[in] endTime The start of the period.
123+
//! \param[in] values The mean values in a regular subdivision
124+
//! of [\p start,\p end].
125+
void initialValues(core_t::TTime startTime,
126+
core_t::TTime endTime,
127+
const TFloatMeanAccumulatorVec &values);
127128

128129
//! Get the number of buckets.
129130
std::size_t size(void) const;
@@ -204,21 +205,18 @@ class MATHS_EXPORT CAdaptiveBucketing
204205
//! Get the memory used by this component
205206
std::size_t memoryUsage(void) const;
206207

207-
private:
208-
typedef CBasicStatistics::SSampleMean<CFloatStorage>::TAccumulator TFloatMeanAccumulator;
209-
210208
private:
211209
//! Compute the values corresponding to the change in end
212210
//! points from \p endpoints. The values are assigned based
213211
//! on their intersection with each bucket in the previous
214212
//! bucket configuration.
215213
virtual void refresh(const TFloatVec &endpoints) = 0;
216214

215+
//! Check if \p time is in the this component's window.
216+
virtual bool inWindow(core_t::TTime time) const = 0;
217+
217218
//! Add the function value at \p time.
218-
virtual void add(std::size_t bucket,
219-
core_t::TTime time,
220-
double offset,
221-
const TDoubleMeanVarAccumulator &value) = 0;
219+
virtual void add(std::size_t bucket, core_t::TTime time, double value, double weight) = 0;
222220

223221
//! Get the offset w.r.t. the start of the bucketing of \p time.
224222
virtual double offset(core_t::TTime time) const = 0;

include/maths/CBasicStatistics.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,23 @@ class MATHS_EXPORT CBasicStatistics
8585
//! Compute the sample median.
8686
static double median(const TDoubleVec &dataIn);
8787

88+
//! Compute the maximum of \p first, \p second and \p third.
89+
template<typename T>
90+
static T max(T first, T second, T third)
91+
{
92+
return first >= second ?
93+
(third >= first ? third : first) :
94+
(third >= second ? third : second);
95+
}
96+
97+
//! Compute the minimum of \p first, \p second and \p third.
98+
template<typename T>
99+
static T min(T first, T second, T third)
100+
{
101+
return first <= second ?
102+
(third <= first ? third : first) :
103+
(third <= second ? third : second);
104+
}
88105

89106
/////////////////////////// ACCUMULATORS ///////////////////////////
90107

@@ -1620,6 +1637,12 @@ class MATHS_EXPORT CBasicStatistics
16201637
return m_Max[0];
16211638
}
16221639

1640+
//! Get the range.
1641+
T range(void) const
1642+
{
1643+
return m_Max[0] - m_Min[0];
1644+
}
1645+
16231646
//! Get the margin by which all the values have the same sign.
16241647
T signMargin(void) const
16251648
{

include/maths/CCalendarComponentAdaptiveBucketing.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,7 @@ class CSeasonalTime;
4747
class MATHS_EXPORT CCalendarComponentAdaptiveBucketing : private CAdaptiveBucketing
4848
{
4949
public:
50-
typedef CAdaptiveBucketing::TTimeTimePrMeanVarPrVec TTimeTimePrMeanVarPrVec;
51-
typedef CBasicStatistics::SSampleMeanVar<CFloatStorage>::TAccumulator TFloatMeanVarAccumulator;
50+
using TFloatMeanVarAccumulator = CBasicStatistics::SSampleMeanVar<CFloatStorage>::TAccumulator;
5251

5352
public:
5453
CCalendarComponentAdaptiveBucketing(void);
@@ -160,7 +159,7 @@ class MATHS_EXPORT CCalendarComponentAdaptiveBucketing : private CAdaptiveBucket
160159
//@}
161160

162161
private:
163-
typedef std::vector<TFloatMeanVarAccumulator> TFloatMeanVarVec;
162+
using TFloatMeanVarVec = std::vector<TFloatMeanVarAccumulator>;
164163

165164
private:
166165
//! Restore by traversing a state document
@@ -174,11 +173,11 @@ class MATHS_EXPORT CCalendarComponentAdaptiveBucketing : private CAdaptiveBucket
174173
//! \param[in] endpoints The old end points.
175174
void refresh(const TFloatVec &endpoints);
176175

176+
//! Check if \p time is in the this component's window.
177+
virtual bool inWindow(core_t::TTime time) const;
178+
177179
//! Add the function value to \p bucket.
178-
virtual void add(std::size_t bucket,
179-
core_t::TTime time,
180-
double offset,
181-
const TDoubleMeanVarAccumulator &value);
180+
virtual void add(std::size_t bucket, core_t::TTime time, double value, double weight);
182181

183182
//! Get the offset w.r.t. the start of the bucketing of \p time.
184183
virtual double offset(core_t::TTime time) const;

include/maths/CDecompositionComponent.h

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,15 @@ namespace maths
4343
class MATHS_EXPORT CDecompositionComponent
4444
{
4545
public:
46-
typedef maths_t::TDoubleDoublePr TDoubleDoublePr;
47-
typedef std::vector<double> TDoubleVec;
48-
typedef std::vector<CFloatStorage> TFloatVec;
49-
typedef CSpline<boost::reference_wrapper<const TFloatVec>,
50-
boost::reference_wrapper<const TFloatVec>,
51-
boost::reference_wrapper<const TDoubleVec> > TSplineCRef;
52-
typedef CSpline<boost::reference_wrapper<TFloatVec>,
53-
boost::reference_wrapper<TFloatVec>,
54-
boost::reference_wrapper<TDoubleVec> > TSplineRef;
46+
using TDoubleDoublePr = maths_t::TDoubleDoublePr;
47+
using TDoubleVec = std::vector<double>;
48+
using TFloatVec = std::vector<CFloatStorage>;
49+
using TSplineCRef = CSpline<boost::reference_wrapper<const TFloatVec>,
50+
boost::reference_wrapper<const TFloatVec>,
51+
boost::reference_wrapper<const TDoubleVec>>;
52+
using TSplineRef = CSpline<boost::reference_wrapper<TFloatVec>,
53+
boost::reference_wrapper<TFloatVec>,
54+
boost::reference_wrapper<TDoubleVec>>;
5555

5656
public:
5757
//! Persist state by passing information to \p inserter.
@@ -72,9 +72,9 @@ class MATHS_EXPORT CDecompositionComponent
7272
};
7373

7474
public:
75-
typedef boost::array<CSplineTypes::EType, 2> TTypeArray;
76-
typedef boost::array<TFloatVec, 2> TFloatVecArray;
77-
typedef boost::array<TDoubleVec, 2> TDoubleVecArray;
75+
using TTypeArray = boost::array<CSplineTypes::EType, 2>;
76+
using TFloatVecArray = boost::array<TFloatVec, 2>;
77+
using TDoubleVecArray = boost::array<TDoubleVec, 2>;
7878

7979
public:
8080
CPackedSplines(CSplineTypes::EType valueInterpolationType,

include/maths/CExpandingWindow.h

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
/*
2+
* ELASTICSEARCH CONFIDENTIAL
3+
*
4+
* Copyright (c) 2018 Elasticsearch BV. All Rights Reserved.
5+
*
6+
* Notice: this software, and all information contained
7+
* therein, is the exclusive property of Elasticsearch BV
8+
* and its licensors, if any, and is protected under applicable
9+
* domestic and foreign law, and international treaties.
10+
*
11+
* Reproduction, republication or distribution without the
12+
* express written consent of Elasticsearch BV is
13+
* strictly prohibited.
14+
*/
15+
16+
#ifndef INCLUDED_ml_maths_CExpandingWindow_h
17+
#define INCLUDED_ml_maths_CExpandingWindow_h
18+
19+
#include <core/CFloatStorage.h>
20+
#include <core/CoreTypes.h>
21+
#include <core/CVectorRange.h>
22+
23+
#include <maths/CBasicStatistics.h>
24+
#include <maths/ImportExport.h>
25+
26+
#include <cstddef>
27+
#include <functional>
28+
#include <vector>
29+
30+
namespace ml
31+
{
32+
namespace core
33+
{
34+
class CStatePersistInserter;
35+
class CStateRestoreTraverser;
36+
}
37+
38+
namespace maths
39+
{
40+
41+
//! \brief Implements a fixed memory expanding time window.
42+
//!
43+
//! DESCRIPTION:\n
44+
//! As the window expands it compresses by merging adjacent values
45+
//! and maintaining means of merged values. It cycles through a
46+
//! sequence of increasing compression factors, which are determined
47+
//! by a sequence of increasing bucketing lengths supplied to the
48+
//! constructor. At the point it overflows, i.e. time since the
49+
//! beginning of the window exceeds "size" x "maximum bucket length",
50+
//! it will re-initialize the bucketing and update the start time.
51+
class MATHS_EXPORT CExpandingWindow
52+
{
53+
public:
54+
using TDoubleVec = std::vector<double>;
55+
using TTimeVec = std::vector<core_t::TTime>;
56+
using TTimeCRng = core::CVectorRange<const TTimeVec>;
57+
using TFloatMeanAccumulator = CBasicStatistics::SSampleMean<CFloatStorage>::TAccumulator;
58+
using TFloatMeanAccumulatorVec = std::vector<TFloatMeanAccumulator>;
59+
using TPredictor = std::function<double (core_t::TTime)>;
60+
61+
public:
62+
CExpandingWindow(core_t::TTime bucketLength,
63+
TTimeCRng bucketLengths,
64+
std::size_t size,
65+
double decayRate = 0.0);
66+
67+
//! Initialize by reading state from \p traverser.
68+
bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser);
69+
70+
//! Persist state by passing information to \p inserter.
71+
void acceptPersistInserter(core::CStatePersistInserter &inserter) const;
72+
73+
//! Get the start time of the sketch.
74+
core_t::TTime startTime() const;
75+
76+
//! Get the end time of the sketch.
77+
core_t::TTime endTime() const;
78+
79+
//! Get the current bucket length.
80+
core_t::TTime bucketLength() const;
81+
82+
//! Get the bucket values.
83+
const TFloatMeanAccumulatorVec &values() const;
84+
85+
//! Get the bucket values minus the values from \p trend.
86+
TFloatMeanAccumulatorVec valuesMinusPrediction(const TPredictor &predictor) const;
87+
88+
//! Set the start time to \p time.
89+
void initialize(core_t::TTime time);
90+
91+
//! Age the bucket values to account for \p time elapsed time.
92+
void propagateForwardsByTime(double time);
93+
94+
//! Add \p value at \p time.
95+
void add(core_t::TTime time, double value, double weight = 1.0);
96+
97+
//! Check if we need to compress by increasing the bucket span.
98+
bool needToCompress(core_t::TTime time) const;
99+
100+
//! Get a checksum for this object.
101+
uint64_t checksum(uint64_t seed = 0) const;
102+
103+
//! Debug the memory used by this object.
104+
void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const;
105+
106+
//! Get the memory used by this object.
107+
std::size_t memoryUsage() const;
108+
109+
private:
110+
//! The rate at which the bucket values are aged.
111+
double m_DecayRate;
112+
113+
//! The data bucketing length.
114+
core_t::TTime m_BucketLength;
115+
116+
//! The bucket lengths to test.
117+
TTimeCRng m_BucketLengths;
118+
119+
//! The index in m_BucketLengths of the current bucketing interval.
120+
std::size_t m_BucketLengthIndex;
121+
122+
//! The time of the first data point.
123+
core_t::TTime m_StartTime;
124+
125+
//! The bucket values.
126+
TFloatMeanAccumulatorVec m_BucketValues;
127+
128+
//! The mean value time modulo the data bucketing length.
129+
TFloatMeanAccumulator m_MeanOffset;
130+
};
131+
132+
}
133+
}
134+
135+
#endif // INCLUDED_ml_maths_CExpandingWindow_h

include/maths/CGammaRateConjugate.h

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,15 +85,19 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior
8585
//! \param[in] priorShape The shape parameter of the gamma prior.
8686
//! \param[in] priorRate The rate parameter of the gamma prior.
8787
//! \param[in] decayRate The rate at which to revert to non-informative.
88+
//! \param[in] offsetMargin The margin between the smallest value and the support
89+
//! left end.
8890
CGammaRateConjugate(maths_t::EDataType dataType,
8991
double offset,
9092
double priorShape,
9193
double priorRate,
92-
double decayRate = 0.0);
94+
double decayRate = 0.0,
95+
double offsetMargin = GAMMA_OFFSET_MARGIN);
9396

9497
//! Construct by traversing a state document.
9598
CGammaRateConjugate(const SDistributionRestoreParams &params,
96-
core::CStateRestoreTraverser &traverser);
99+
core::CStateRestoreTraverser &traverser,
100+
double offsetMargin = GAMMA_OFFSET_MARGIN);
97101

98102
// Default copy constructor and assignment operator work.
99103

@@ -103,10 +107,13 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior
103107
//! for details).
104108
//! \param[in] offset The offset to apply to the data.
105109
//! \param[in] decayRate The rate at which to revert to the non-informative prior.
110+
//! \param[in] offsetMargin The margin between the smallest value and the support
111+
//! left end.
106112
//! \return A non-informative prior.
107113
static CGammaRateConjugate nonInformativePrior(maths_t::EDataType dataType,
108114
double offset = 0.0,
109-
double decayRate = 0.0);
115+
double decayRate = 0.0,
116+
double offsetMargin = GAMMA_OFFSET_MARGIN);
110117
//@}
111118

112119
//! \name Prior Contract
@@ -123,7 +130,12 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior
123130
//! Reset the prior to non-informative.
124131
virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0);
125132

126-
//! Returns false.
133+
//! Get the margin between the smallest value and the support left
134+
//! end. Priors with non-negative support, automatically adjust the
135+
//! offset if a value is seen which is smaller than offset + margin.
136+
virtual double offsetMargin(void) const;
137+
138+
//! Returns true.
127139
virtual bool needsOffset(void) const;
128140

129141
//! Reset m_Offset so the smallest sample is not within some minimum
@@ -399,6 +411,9 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior
399411
//! us to model data with negative values greater than \f$-u\f$.
400412
double m_Offset;
401413

414+
//! The margin between the smallest value and the support left end.
415+
double m_OffsetMargin;
416+
402417
//! The maximum likelihood estimate of the shape parameter.
403418
double m_LikelihoodShape;
404419

0 commit comments

Comments
 (0)