Skip to content

Commit 48a3bb8

Browse files
authored
[ML] Remove old per-partition normalization code (#184)
Per-partition normalization is an old, undocumented feature that was never used by clients. It has been superseded by per-partition maximum scoring (see #32748). This PR removes the now redundant code. Relates elastic/elasticsearch#32816
1 parent 548222b commit 48a3bb8

21 files changed

+35
-311
lines changed

bin/autodetect/CCmdLineParser.cc

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ bool CCmdLineParser::parse(int argc,
5252
bool& memoryUsage,
5353
std::size_t& bucketResultsDelay,
5454
bool& multivariateByFields,
55-
bool& perPartitionNormalization,
5655
TStrVec& clauseTokens) {
5756
try {
5857
boost::program_options::options_description desc(DESCRIPTION);
@@ -116,8 +115,6 @@ bool CCmdLineParser::parse(int argc,
116115
"The numer of half buckets to store before choosing which overlapping bucket has the biggest anomaly")
117116
("multivariateByFields",
118117
"Optional flag to enable multi-variate analysis of correlated by fields")
119-
("perPartitionNormalization",
120-
"Optional flag to enable per partition normalization")
121118
;
122119
// clang-format on
123120

@@ -231,9 +228,6 @@ bool CCmdLineParser::parse(int argc,
231228
if (vm.count("multivariateByFields") > 0) {
232229
multivariateByFields = true;
233230
}
234-
if (vm.count("perPartitionNormalization") > 0) {
235-
perPartitionNormalization = true;
236-
}
237231

238232
boost::program_options::collect_unrecognized(
239233
parsed.options, boost::program_options::include_positional)

bin/autodetect/CCmdLineParser.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ class CCmdLineParser {
6464
bool& memoryUsage,
6565
std::size_t& bucketResultsDelay,
6666
bool& multivariateByFields,
67-
bool& perPartitionNormalization,
6867
TStrVec& clauseTokens);
6968

7069
private:

bin/autodetect/Main.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ int main(int argc, char** argv) {
8888
bool memoryUsage(false);
8989
std::size_t bucketResultsDelay(0);
9090
bool multivariateByFields(false);
91-
bool perPartitionNormalization(false);
9291
TStrVec clauseTokens;
9392
if (ml::autodetect::CCmdLineParser::parse(
9493
argc, argv, limitConfigFile, modelConfigFile, fieldConfigFile,
@@ -98,7 +97,7 @@ int main(int argc, char** argv) {
9897
maxQuantileInterval, inputFileName, isInputFileNamedPipe, outputFileName,
9998
isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe, persistFileName,
10099
isPersistFileNamedPipe, maxAnomalyRecords, memoryUsage, bucketResultsDelay,
101-
multivariateByFields, perPartitionNormalization, clauseTokens) == false) {
100+
multivariateByFields, clauseTokens) == false) {
102101
return EXIT_FAILURE;
103102
}
104103

@@ -146,7 +145,6 @@ int main(int argc, char** argv) {
146145
ml::model::CAnomalyDetectorModelConfig::defaultConfig(
147146
bucketSpan, summaryMode, summaryCountFieldName, latency,
148147
bucketResultsDelay, multivariateByFields);
149-
modelConfig.perPartitionNormalization(perPartitionNormalization);
150148
modelConfig.detectionRules(ml::model::CAnomalyDetectorModelConfig::TIntDetectionRuleVecUMapCRef(
151149
fieldConfig.detectionRules()));
152150
modelConfig.scheduledEvents(ml::model::CAnomalyDetectorModelConfig::TStrDetectionRulePrVecCRef(

bin/normalize/CCmdLineParser.cc

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@ bool CCmdLineParser::parse(int argc,
3030
bool& isOutputFileNamedPipe,
3131
std::string& quantilesState,
3232
bool& deleteStateFiles,
33-
bool& writeCsv,
34-
bool& perPartitionNormalization) {
33+
bool& writeCsv) {
3534
try {
3635
boost::program_options::options_description desc(DESCRIPTION);
3736
// clang-format off
@@ -60,8 +59,6 @@ bool CCmdLineParser::parse(int argc,
6059
"If this flag is set then delete the normalizer state files once they have been read")
6160
("writeCsv",
6261
"Write the results in CSV format (default is lineified JSON)")
63-
("perPartitionNormalization",
64-
"Optional flag to enable per partition normalization")
6562
;
6663
// clang-format on
6764

@@ -114,9 +111,6 @@ bool CCmdLineParser::parse(int argc,
114111
if (vm.count("writeCsv") > 0) {
115112
writeCsv = true;
116113
}
117-
if (vm.count("perPartitionNormalization") > 0) {
118-
perPartitionNormalization = true;
119-
}
120114
} catch (std::exception& e) {
121115
std::cerr << "Error processing command line: " << e.what() << std::endl;
122116
return false;

bin/normalize/CCmdLineParser.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ class CCmdLineParser {
4343
bool& isOutputFileNamedPipe,
4444
std::string& quantilesState,
4545
bool& deleteStateFiles,
46-
bool& writeCsv,
47-
bool& perPartitionNormalization);
46+
bool& writeCsv);
4847

4948
private:
5049
static const std::string DESCRIPTION;

bin/normalize/Main.cc

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,10 @@ int main(int argc, char** argv) {
5454
std::string quantilesStateFile;
5555
bool deleteStateFiles(false);
5656
bool writeCsv(false);
57-
bool perPartitionNormalization(false);
5857
if (ml::normalize::CCmdLineParser::parse(
59-
argc, argv, modelConfigFile, logProperties, logPipe, bucketSpan,
60-
lengthEncodedInput, inputFileName, isInputFileNamedPipe,
61-
outputFileName, isOutputFileNamedPipe, quantilesStateFile,
62-
deleteStateFiles, writeCsv, perPartitionNormalization) == false) {
58+
argc, argv, modelConfigFile, logProperties, logPipe, bucketSpan, lengthEncodedInput,
59+
inputFileName, isInputFileNamedPipe, outputFileName, isOutputFileNamedPipe,
60+
quantilesStateFile, deleteStateFiles, writeCsv) == false) {
6361
return EXIT_FAILURE;
6462
}
6563

@@ -93,7 +91,6 @@ int main(int argc, char** argv) {
9391
LOG_FATAL(<< "Ml model config file '" << modelConfigFile << "' could not be loaded");
9492
return EXIT_FAILURE;
9593
}
96-
modelConfig.perPartitionNormalization(perPartitionNormalization);
9794

9895
// There's a choice of input and output formats for the numbers to be normalised
9996
using TInputParserUPtr = std::unique_ptr<ml::api::CInputParser>;

include/api/CHierarchicalResultsWriter.h

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,7 @@ class API_EXPORT CHierarchicalResultsWriter : public model::CHierarchicalResults
5151
using TStr1Vec = core::CSmallVector<std::string, 1>;
5252

5353
public:
54-
enum EResultType {
55-
E_SimpleCountResult,
56-
E_PopulationResult,
57-
E_PartitionResult,
58-
E_Result
59-
};
54+
enum EResultType { E_SimpleCountResult, E_PopulationResult, E_Result };
6055
//! Type which wraps up the results of anomaly detection.
6156
struct API_EXPORT SResults {
6257
//! Construct for population results
@@ -168,9 +163,6 @@ class API_EXPORT CHierarchicalResultsWriter : public model::CHierarchicalResults
168163
//! pivot.
169164
void writePivotResult(const model::CHierarchicalResults& results, const TNode& node);
170165

171-
//! Write partition result if \p node is a partition level result
172-
void writePartitionResult(const model::CHierarchicalResults& results, const TNode& node);
173-
174166
//! Write out a simple count result if \p node is simple
175167
//! count.
176168
void writeSimpleCountResult(const TNode& node);

include/api/CJsonOutputWriter.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,6 @@ class API_EXPORT CJsonOutputWriter : public COutputHandler {
162162
// when the number to write is limited
163163
double s_LowestBucketInfluencerScore;
164164

165-
//! Partition scores
166-
TDocumentWeakPtrVec s_PartitionScoreDocuments;
167-
168165
//! scheduled event descriptions
169166
TStr1Vec s_ScheduledEventDescriptions;
170167
};
@@ -304,10 +301,6 @@ class API_EXPORT CJsonOutputWriter : public COutputHandler {
304301
void addInfluences(const CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec& influenceResults,
305302
TDocumentWeakPtr weakDoc);
306303

307-
//! Write partition score & probability
308-
void addPartitionScores(const CHierarchicalResultsWriter::TResults& results,
309-
TDocumentWeakPtr weakDoc);
310-
311304
private:
312305
//! The job ID
313306
std::string m_JobId;

include/api/CResultNormalizer.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,6 @@ class API_EXPORT CResultNormalizer {
9393
std::string& valueFieldName,
9494
double& probability);
9595

96-
bool parseDataFields(const TStrStrUMap& dataRowFields,
97-
std::string& level,
98-
std::string& partition,
99-
std::string& partitionValue,
100-
std::string& person,
101-
std::string& function,
102-
std::string& valueFieldName,
103-
double& probability);
104-
10596
template<typename T>
10697
bool parseDataField(const TStrStrUMap& dataRowFields,
10798
const std::string& fieldName,

include/model/CAnomalyDetectorModelConfig.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -418,12 +418,6 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig {
418418
const TDoubleDoublePrVec& normalizedScoreKnotPoints() const;
419419
//@}
420420

421-
//! Check if we should create one normalizer per partition field value.
422-
bool perPartitionNormalization() const;
423-
424-
//! Set whether we should create one normalizer per partition field value.
425-
void perPartitionNormalization(bool value);
426-
427421
//! Sets the reference to the detection rules map
428422
void detectionRules(TIntDetectionRuleVecUMapCRef detectionRules);
429423

@@ -494,9 +488,6 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig {
494488
//! and the normalized anomaly score with these knot points.
495489
//! \see DEFAULT_NORMALIZED_SCORE_KNOT_POINTS for details.
496490
TDoubleDoublePrVec m_NormalizedScoreKnotPoints;
497-
498-
//! If true then create one normalizer per partition field value.
499-
bool m_PerPartitionNormalisation;
500491
//@}
501492

502493
//! A reference to the map containing detection rules per

0 commit comments

Comments
 (0)