Skip to content

Commit 7b105a8

Browse files
committed
Merge branch 'master' into feature/analysis-pipeline
2 parents 454a561 + 9f15ef8 commit 7b105a8

File tree

69 files changed

+367
-2598
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+367
-2598
lines changed

bin/autodetect/CCmdLineParser.cc

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ bool CCmdLineParser::parse(int argc,
5050
bool& isPersistFileNamedPipe,
5151
size_t& maxAnomalyRecords,
5252
bool& memoryUsage,
53-
std::size_t& bucketResultsDelay,
5453
bool& multivariateByFields,
5554
TStrVec& clauseTokens) {
5655
try {
@@ -111,8 +110,6 @@ bool CCmdLineParser::parse(int argc,
111110
"The maximum number of records to be outputted for each bucket. Defaults to 100, a value 0 removes the limit.")
112111
("memoryUsage",
113112
"Log the model memory usage at the end of the job")
114-
("resultFinalizationWindow", boost::program_options::value<std::size_t>(),
115-
"The numer of half buckets to store before choosing which overlapping bucket has the biggest anomaly")
116113
("multivariateByFields",
117114
"Optional flag to enable multi-variate analysis of correlated by fields")
118115
;
@@ -222,9 +219,6 @@ bool CCmdLineParser::parse(int argc,
222219
if (vm.count("memoryUsage") > 0) {
223220
memoryUsage = true;
224221
}
225-
if (vm.count("resultFinalizationWindow") > 0) {
226-
bucketResultsDelay = vm["resultFinalizationWindow"].as<std::size_t>();
227-
}
228222
if (vm.count("multivariateByFields") > 0) {
229223
multivariateByFields = true;
230224
}

bin/autodetect/CCmdLineParser.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ class CCmdLineParser {
6262
bool& isPersistFileNamedPipe,
6363
size_t& maxAnomalyRecords,
6464
bool& memoryUsage,
65-
std::size_t& bucketResultsDelay,
6665
bool& multivariateByFields,
6766
TStrVec& clauseTokens);
6867

bin/autodetect/Main.cc

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ int main(int argc, char** argv) {
8686
bool isPersistFileNamedPipe(false);
8787
size_t maxAnomalyRecords(100u);
8888
bool memoryUsage(false);
89-
std::size_t bucketResultsDelay(0);
9089
bool multivariateByFields(false);
9190
TStrVec clauseTokens;
9291
if (ml::autodetect::CCmdLineParser::parse(
@@ -96,8 +95,8 @@ int main(int argc, char** argv) {
9695
timeFormat, quantilesStateFile, deleteStateFiles, persistInterval,
9796
maxQuantileInterval, inputFileName, isInputFileNamedPipe, outputFileName,
9897
isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe,
99-
persistFileName, isPersistFileNamedPipe, maxAnomalyRecords, memoryUsage,
100-
bucketResultsDelay, multivariateByFields, clauseTokens) == false) {
98+
persistFileName, isPersistFileNamedPipe, maxAnomalyRecords,
99+
memoryUsage, multivariateByFields, clauseTokens) == false) {
101100
return EXIT_FAILURE;
102101
}
103102

@@ -143,8 +142,7 @@ int main(int argc, char** argv) {
143142
summaryCountFieldName.empty() ? ml::model_t::E_None : ml::model_t::E_Manual);
144143
ml::model::CAnomalyDetectorModelConfig modelConfig =
145144
ml::model::CAnomalyDetectorModelConfig::defaultConfig(
146-
bucketSpan, summaryMode, summaryCountFieldName, latency,
147-
bucketResultsDelay, multivariateByFields);
145+
bucketSpan, summaryMode, summaryCountFieldName, latency, multivariateByFields);
148146
modelConfig.detectionRules(ml::model::CAnomalyDetectorModelConfig::TIntDetectionRuleVecUMapCRef(
149147
fieldConfig.detectionRules()));
150148
modelConfig.scheduledEvents(ml::model::CAnomalyDetectorModelConfig::TStrDetectionRulePrVecCRef(

build.gradle

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,16 +190,21 @@ task build(dependsOn: [check, assemble]) {
190190
description = 'Assemble and test the C++ part of Machine Learning'
191191
}
192192

193-
task wrapper(type: Wrapper) {
194-
distributionType = DistributionType.ALL
195-
193+
/*
194+
* This breaks when ml-cpp is in elasticsearch-extra
195+
* and elasticsearch is still using Gradle 4.x
196+
* TODO: Uncomment next time Gradle is upgraded
197+
*
198+
wrapper {
199+
distributionType = 'ALL'
196200
doLast {
197201
final DistributionLocator locator = new DistributionLocator()
198202
final GradleVersion version = GradleVersion.version(wrapper.gradleVersion)
199203
final URI distributionUri = locator.getDistributionFor(version, wrapper.distributionType.name().toLowerCase(Locale.ENGLISH))
200204
final URI sha256Uri = new URI(distributionUri.toString() + ".sha256")
201205
final String sha256Sum = new String(sha256Uri.toURL().bytes)
202206
wrapper.getPropertiesFile() << "distributionSha256Sum=${sha256Sum}\n"
207+
println "Added checksum to wrapper properties"
203208
}
204209
}
205-
210+
*/

docs/CHANGELOG.asciidoc

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,30 @@
2828
2929
//=== Regressions
3030
31+
== {es} version 6.6.0
32+
33+
=== Breaking Changes
34+
35+
=== Deprecations
36+
37+
=== New Features
38+
39+
=== Enhancements
40+
41+
=== Bug Fixes
42+
43+
Fix cause of "Sample out of bounds" error message (See {ml-pull}355[355].}
44+
45+
=== Regressions
46+
47+
== {es} version 6.5.3
48+
49+
=== Bug Fixes
50+
51+
Correct query times for model plot and forecast in the bucket to match the times we assign
52+
the samples we add to the model for each bucket. For long bucket lengths, this could result
53+
in apparently shifted model plot with respect to the data and increased errors in forecasts.
54+
3155
== {es} version 6.5.0
3256
3357
//=== Breaking Changes

gradle/wrapper/gradle-wrapper.jar

1014 Bytes
Binary file not shown.
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
distributionBase=GRADLE_USER_HOME
22
distributionPath=wrapper/dists
3-
distributionUrl=https\://services.gradle.org/distributions/gradle-4.10-all.zip
3+
distributionUrl=https\://services.gradle.org/distributions/gradle-5.0-all.zip
44
zipStoreBase=GRADLE_USER_HOME
55
zipStorePath=wrapper/dists
6-
distributionSha256Sum=fc049dcbcb245d5892bebae143bd515a78f6a5a93cec99d489b312dc0ce4aad9
6+
distributionSha256Sum=17847c8e12b2bcfce26a79f425f082c31d4ded822f99a66127eee2d96bf18216

gradlew

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ APP_NAME="Gradle"
2828
APP_BASE_NAME=`basename "$0"`
2929

3030
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31-
DEFAULT_JVM_OPTS=""
31+
DEFAULT_JVM_OPTS='"-Xmx64m"'
3232

3333
# Use the maximum available, or set MAX_FD != -1 to use that value.
3434
MAX_FD="maximum"

gradlew.bat

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ set APP_BASE_NAME=%~n0
1414
set APP_HOME=%DIRNAME%
1515

1616
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17-
set DEFAULT_JVM_OPTS=
17+
set DEFAULT_JVM_OPTS="-Xmx64m"
1818

1919
@rem Find java.exe
2020
if defined JAVA_HOME goto findJavaFromJavaHome

include/api/CAnomalyJob.h

Lines changed: 6 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,11 @@
1212

1313
#include <model/CAnomalyDetector.h>
1414
#include <model/CAnomalyDetectorModelConfig.h>
15-
#include <model/CBucketQueue.h>
1615
#include <model/CHierarchicalResults.h>
1716
#include <model/CHierarchicalResultsAggregator.h>
1817
#include <model/CHierarchicalResultsNormalizer.h>
1918
#include <model/CInterimBucketCorrector.h>
2019
#include <model/CResourceMonitor.h>
21-
#include <model/CResultsQueue.h>
2220
#include <model/CSearchKey.h>
2321

2422
#include <api/CDataProcessor.h>
@@ -117,26 +115,20 @@ class API_EXPORT CAnomalyJob : public CDataProcessor {
117115
std::pair<model::CSearchKey::TStrCRefKeyCRefPr, TAnomalyDetectorPtr>;
118116
using TKeyCRefAnomalyDetectorPtrPrVec = std::vector<TKeyCRefAnomalyDetectorPtrPr>;
119117
using TModelPlotDataVec = model::CAnomalyDetector::TModelPlotDataVec;
120-
using TModelPlotDataVecCItr = TModelPlotDataVec::const_iterator;
121-
using TModelPlotDataVecQueue = model::CBucketQueue<TModelPlotDataVec>;
122118

123119
struct API_EXPORT SRestoredStateDetail {
124120
ERestoreStateStatus s_RestoredStateStatus;
125121
boost::optional<std::string> s_Extra;
126122
};
127123

128124
struct SBackgroundPersistArgs {
129-
SBackgroundPersistArgs(const model::CResultsQueue& resultsQueue,
130-
const TModelPlotDataVecQueue& modelPlotQueue,
131-
core_t::TTime time,
125+
SBackgroundPersistArgs(core_t::TTime time,
132126
const model::CResourceMonitor::SResults& modelSizeStats,
133127
const model::CInterimBucketCorrector& interimBucketCorrector,
134128
const model::CHierarchicalResultsAggregator& aggregator,
135129
core_t::TTime latestRecordTime,
136130
core_t::TTime lastResultsTime);
137131

138-
model::CResultsQueue s_ResultsQueue;
139-
TModelPlotDataVecQueue s_ModelPlotQueue;
140132
core_t::TTime s_Time;
141133
model::CResourceMonitor::SResults s_ModelSizeStats;
142134
model::CInterimBucketCorrector s_InterimBucketCorrector;
@@ -224,14 +216,11 @@ class API_EXPORT CAnomalyJob : public CDataProcessor {
224216
void outputInterimResults(core_t::TTime bucketStartTime);
225217

226218
//! Helper function for outputResults.
227-
//! \p processingTimer is the processing time can be written to the bucket
228-
//! \p sumPastProcessingTime is the total time previously spent processing
229-
//! but resulted in no bucket being outputted.
219+
//! \p processingTime is the processing time of the bucket
230220
void writeOutResults(bool interim,
231221
model::CHierarchicalResults& results,
232222
core_t::TTime bucketTime,
233-
uint64_t processingTime,
234-
uint64_t sumPastProcessingTime);
223+
uint64_t processingTime);
235224

236225
//! Reset buckets in the range specified by the control message.
237226
void resetBuckets(const std::string& controlMessage);
@@ -259,8 +248,6 @@ class API_EXPORT CAnomalyJob : public CDataProcessor {
259248

260249
//! Persist the detectors to a stream.
261250
bool persistState(const std::string& descriptionPrefix,
262-
const model::CResultsQueue& resultsQueue,
263-
const TModelPlotDataVecQueue& modelPlotQueue,
264251
core_t::TTime time,
265252
const TKeyCRefAnomalyDetectorPtrPrVec& detectors,
266253
const model::CResourceMonitor::SResults& modelSizeStats,
@@ -296,16 +283,9 @@ class API_EXPORT CAnomalyJob : public CDataProcessor {
296283
//! \param[in] endTime The end of the time interval to skip sampling.
297284
void skipSampling(core_t::TTime endTime);
298285

299-
//! Outputs queued results and resets the queue to the given \p startTime
300-
void flushAndResetResultsQueue(core_t::TTime startTime);
301-
302286
//! Roll time forward to \p time
303287
void timeNow(core_t::TTime time);
304288

305-
//! Get the bucketLength, or half the bucketLength if
306-
//! out-of-phase buckets are active
307-
core_t::TTime effectiveBucketLength() const;
308-
309289
//! Update configuration
310290
void updateConfig(const std::string& config);
311291

@@ -333,15 +313,12 @@ class API_EXPORT CAnomalyJob : public CDataProcessor {
333313
//! specified time range.
334314
void generateModelPlot(core_t::TTime startTime,
335315
core_t::TTime endTime,
336-
const model::CAnomalyDetector& detector);
316+
const model::CAnomalyDetector& detector,
317+
TModelPlotDataVec& modelPlotData);
337318

338319
//! Write the pre-generated model plot to the output stream of the user's
339320
//! choosing: either file or streamed to the API
340-
void writeOutModelPlot(core_t::TTime resultsTime);
341-
342-
//! Write the pre-generated model plot to the output stream of the user's
343-
//! choosing: either file or streamed to the API
344-
void writeOutModelPlot(core_t::TTime, CModelPlotDataJsonWriter& writer);
321+
void writeOutModelPlot(const TModelPlotDataVec& modelPlotData);
345322

346323
//! Persist one detector to a stream.
347324
//! This method is static so that there is no danger of it accessing
@@ -477,15 +454,6 @@ class API_EXPORT CAnomalyJob : public CDataProcessor {
477454
//! The hierarchical results normalizer.
478455
model::CHierarchicalResultsNormalizer m_Normalizer;
479456

480-
//! Store the last N half-buckets' results in order
481-
//! to choose the best result
482-
model::CResultsQueue m_ResultsQueue;
483-
484-
//! Also store the model plot for the buckets for each
485-
//! result time - these will be output when the corresponding
486-
//! result is output
487-
TModelPlotDataVecQueue m_ModelPlotQueue;
488-
489457
friend class ::CBackgroundPersisterTest;
490458
friend class ::CAnomalyJobTest;
491459
};

0 commit comments

Comments
 (0)