Skip to content

Commit eac04e9

Browse files
authored
[ML] First pass implementation of support functionality for change detection and modelling (#9)
This implements 1) a naive Bayes classifier, using our distribution models, which will be used for modelling the probability of a change, and 2) a change detector framework, currently supporting detecting level shifts and time shifts, which works by comparing BIC of the various possible hypotheses against one another and a null hypothesis that there is no change.
1 parent 91c0057 commit eac04e9

14 files changed

+2265
-3
lines changed

include/maths/CNaiveBayes.h

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
#ifndef INCLUDED_ml_maths_CNaiveBayes_h
8+
#define INCLUDED_ml_maths_CNaiveBayes_h
9+
10+
#include <maths/ImportExport.h>
11+
12+
#include <maths/CPrior.h>
13+
14+
#include <boost/unordered_map.hpp>
15+
16+
#include <cstddef>
17+
#include <vector>
18+
19+
namespace ml
20+
{
21+
namespace core
22+
{
23+
class CStatePersistInserter;
24+
class CStateRestoreTraverser;
25+
}
26+
namespace maths
27+
{
28+
struct SDistributionRestoreParams;
29+
30+
//! \brief The interface expected by CNaiveBayes for implementations
31+
//! of the class conditional density functions.
32+
class MATHS_EXPORT CNaiveBayesFeatureDensity
33+
{
34+
public:
35+
using TDouble1Vec = core::CSmallVector<double, 1>;
36+
37+
public:
38+
virtual ~CNaiveBayesFeatureDensity() = default;
39+
40+
//! Create and return a clone.
41+
//!
42+
//! \note The caller owns this.
43+
virtual CNaiveBayesFeatureDensity *clone() const = 0;
44+
45+
//! Initialize by reading state from \p traverser.
46+
virtual bool acceptRestoreTraverser(const SDistributionRestoreParams &params,
47+
core::CStateRestoreTraverser &traverser) = 0;
48+
49+
//! Persist state by passing information to \p inserter.
50+
virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const = 0;
51+
52+
//! Add the value \p x.
53+
virtual void add(const TDouble1Vec &x) = 0;
54+
55+
//! Compute the log value of the density function at \p x.
56+
virtual double logValue(const TDouble1Vec &x) const = 0;
57+
58+
//! Age out old values density to account for \p time passing.
59+
virtual void propagateForwardsByTime(double time) = 0;
60+
61+
//! Debug the memory used by this object.
62+
virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const = 0;
63+
64+
//! Get the static size of this object.
65+
virtual std::size_t staticSize() const = 0;
66+
67+
//! Get the memory used by this object.
68+
virtual std::size_t memoryUsage() const = 0;
69+
70+
//! Get a checksum for this object.
71+
virtual uint64_t checksum(uint64_t seed) const = 0;
72+
};
73+
74+
//! \brief An implementation of the class conditional density function
75+
//! based on the CPrior hierarchy.
76+
class MATHS_EXPORT CNaiveBayesFeatureDensityFromPrior final : public CNaiveBayesFeatureDensity
77+
{
78+
public:
79+
CNaiveBayesFeatureDensityFromPrior() = default;
80+
CNaiveBayesFeatureDensityFromPrior(CPrior &prior);
81+
82+
//! Create and return a clone.
83+
//!
84+
//! \note The caller owns this.
85+
virtual CNaiveBayesFeatureDensityFromPrior *clone() const;
86+
87+
//! Initialize by reading state from \p traverser.
88+
virtual bool acceptRestoreTraverser(const SDistributionRestoreParams &params,
89+
core::CStateRestoreTraverser &traverser);
90+
91+
//! Persist state by passing information to \p inserter.
92+
virtual void acceptPersistInserter(core::CStatePersistInserter &inserter) const;
93+
94+
//! Add the value \p x.
95+
virtual void add(const TDouble1Vec &x);
96+
97+
//! Compute the log value of the density function at \p x.
98+
virtual double logValue(const TDouble1Vec &x) const;
99+
100+
//! Age out old values density to account for \p time passing.
101+
virtual void propagateForwardsByTime(double time);
102+
103+
//! Debug the memory used by this object.
104+
virtual void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const;
105+
106+
//! Get the static size of this object.
107+
virtual std::size_t staticSize() const;
108+
109+
//! Get the memory used by this object.
110+
virtual std::size_t memoryUsage() const;
111+
112+
//! Get a checksum for this object.
113+
virtual uint64_t checksum(uint64_t seed) const;
114+
115+
private:
116+
using TPriorPtr = boost::shared_ptr<CPrior>;
117+
118+
private:
119+
//! The density model.
120+
TPriorPtr m_Prior;
121+
};
122+
123+
//! \brief Implements a Naive Bayes classifier.
124+
class MATHS_EXPORT CNaiveBayes
125+
{
126+
public:
127+
using TDoubleSizePr = std::pair<double, std::size_t>;
128+
using TDoubleSizePrVec = std::vector<TDoubleSizePr>;
129+
using TDouble1Vec = core::CSmallVector<double, 1>;
130+
using TDouble1VecVec = std::vector<TDouble1Vec>;
131+
132+
public:
133+
explicit CNaiveBayes(const CNaiveBayesFeatureDensity &exemplar,
134+
double decayRate = 0.0);
135+
CNaiveBayes(const SDistributionRestoreParams &params,
136+
core::CStateRestoreTraverser &traverser);
137+
138+
//! Persist state by passing information to \p inserter.
139+
void acceptPersistInserter(core::CStatePersistInserter &inserter) const;
140+
141+
//! This can be used to optionally seed the class counts
142+
//! with \p counts. These are added on to data class counts
143+
//! to compute the class posterior probabilities.
144+
void initialClassCounts(const TDoubleSizePrVec &counts);
145+
146+
//! Add a training data point comprising the pair \f$(x,l)\f$
147+
//! for feature vector \f$x\f$ and class label \f$l\f$.
148+
//!
149+
//! \param[in] label The class label for \p x.
150+
//! \param[in] x The feature values.
151+
//! \note \p x size should be equal to the number of features.
152+
//! A feature is missing is indicated by passing an empty vector
153+
//! for that feature.
154+
void addTrainingDataPoint(std::size_t label, const TDouble1VecVec &x);
155+
156+
//! Age out old values from the class conditional densities
157+
//! to account for \p time passing.
158+
void propagateForwardsByTime(double time);
159+
160+
//! Get the top \p n class probabilities for \p features.
161+
//!
162+
//! \param[in] n The number of class probabilities to estimate.
163+
//! \param[in] x The feature values.
164+
//! \note \p x size should be equal to the number of features.
165+
//! A feature is missing is indicated by passing an empty vector
166+
//! for that feature.
167+
TDoubleSizePrVec highestClassProbabilities(std::size_t n,
168+
const TDouble1VecVec &x) const;
169+
170+
//! Debug the memory used by this object.
171+
void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const;
172+
173+
//! Get the memory used by this object.
174+
std::size_t memoryUsage() const;
175+
176+
//! Get a checksum for this object.
177+
uint64_t checksum(uint64_t seed = 0) const;
178+
179+
private:
180+
using TFeatureDensityPtr = boost::shared_ptr<CNaiveBayesFeatureDensity>;
181+
using TFeatureDensityPtrVec = std::vector<TFeatureDensityPtr>;
182+
183+
//! \brief The data associated with a class.
184+
struct SClass
185+
{
186+
//! Initialize by reading state from \p traverser.
187+
bool acceptRestoreTraverser(const SDistributionRestoreParams &params,
188+
core::CStateRestoreTraverser &traverser);
189+
//! Persist state by passing information to \p inserter.
190+
void acceptPersistInserter(core::CStatePersistInserter &inserter) const;
191+
//! Debug the memory used by this object.
192+
void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const;
193+
//! Get the memory used by this object.
194+
std::size_t memoryUsage() const;
195+
//! Get a checksum for this object.
196+
uint64_t checksum(uint64_t seed = 0) const;
197+
198+
//! The number of examples in this class.
199+
double s_Count = 0.0;
200+
//! The feature conditional densities for this class.
201+
TFeatureDensityPtrVec s_ConditionalDensities;
202+
};
203+
204+
using TSizeClassUMap = boost::unordered_map<std::size_t, SClass>;
205+
206+
private:
207+
//! Initialize by reading state from \p traverser.
208+
bool acceptRestoreTraverser(const SDistributionRestoreParams &params,
209+
core::CStateRestoreTraverser &traverser);
210+
211+
//! Validate \p x.
212+
bool validate(const TDouble1VecVec &x) const;
213+
214+
private:
215+
//! Controls the rate at which data are aged out.
216+
double m_DecayRate;
217+
218+
//! An exemplar for creating conditional densities.
219+
TFeatureDensityPtr m_Exemplar;
220+
221+
//! The class conditional density estimates and weights.
222+
TSizeClassUMap m_ClassConditionalDensities;
223+
};
224+
225+
}
226+
}
227+
228+
#endif // INCLUDED_ml_maths_CNaiveBayes_h

0 commit comments

Comments
 (0)