From 5546bd2576b186671245a9f3655463da4cc28387 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Thu, 12 Mar 2020 17:52:14 +0000 Subject: [PATCH 1/6] [ML] Add a model memory estimation endpoint for anomaly detection This PR completes the implementation of the model memory estimation endpoint: POST _ml/anomaly_detectors/estimate_model_memory Closes #53219 --- .../client/MLRequestConverters.java | 12 ++ .../client/MachineLearningClient.java | 44 +++++++ .../client/ml/EstimateModelMemoryRequest.java | 114 ++++++++++++++++++ .../ml/EstimateModelMemoryResponse.java | 80 ++++++++++++ .../client/MLRequestConvertersTests.java | 21 ++++ .../client/MachineLearningIT.java | 23 ++++ .../MlClientDocumentationIT.java | 61 ++++++++++ .../ml/estimate-model-memory.asciidoc | 42 +++++++ .../high-level/supported-apis.asciidoc | 2 + .../apis/estimate-model-memory.asciidoc | 74 ++++++++++++ .../ml/anomaly-detection/apis/ml-api.asciidoc | 2 + .../TransportEstimateModelMemoryAction.java | 49 +++++--- ...ansportEstimateModelMemoryActionTests.java | 4 + 13 files changed, 514 insertions(+), 14 deletions(-) create mode 100644 client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryRequest.java create mode 100644 client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryResponse.java create mode 100644 docs/java-rest/high-level/ml/estimate-model-memory.asciidoc create mode 100644 docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java index bf220d63b3c63..54dd11bf6caff 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java @@ -40,6 +40,7 @@ import org.elasticsearch.client.ml.DeleteJobRequest; import org.elasticsearch.client.ml.DeleteModelSnapshotRequest; import org.elasticsearch.client.ml.DeleteTrainedModelRequest; +import org.elasticsearch.client.ml.EstimateModelMemoryRequest; import org.elasticsearch.client.ml.EvaluateDataFrameRequest; import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; import org.elasticsearch.client.ml.FindFileStructureRequest; @@ -593,6 +594,17 @@ static Request deleteCalendarEvent(DeleteCalendarEventRequest deleteCalendarEven return new Request(HttpDelete.METHOD_NAME, endpoint); } + static Request estimateModelMemory(EstimateModelMemoryRequest estimateModelMemoryRequest) throws IOException { + String endpoint = new EndpointBuilder() + .addPathPartAsIs("_ml") + .addPathPartAsIs("anomaly_detectors") + .addPathPartAsIs("_estimate_model_memory") + .build(); + Request request = new Request(HttpPost.METHOD_NAME, endpoint); + request.setEntity(createEntity(estimateModelMemoryRequest, REQUEST_BODY_CONTENT_TYPE)); + return request; + } + static Request putDataFrameAnalytics(PutDataFrameAnalyticsRequest putRequest) throws IOException { String endpoint = new EndpointBuilder() .addPathPartAsIs("_ml", "data_frame", "analytics") diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java index 504cbc541f073..61d4b52db2d6d 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java @@ -23,6 +23,8 @@ import org.elasticsearch.client.ml.CloseJobRequest; import org.elasticsearch.client.ml.CloseJobResponse; import org.elasticsearch.client.ml.DeleteTrainedModelRequest; +import org.elasticsearch.client.ml.EstimateModelMemoryRequest; +import org.elasticsearch.client.ml.EstimateModelMemoryResponse; import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse; import org.elasticsearch.client.ml.DeleteCalendarEventRequest; @@ -1951,6 +1953,48 @@ public Cancellable setUpgradeModeAsync(SetUpgradeModeRequest request, RequestOpt Collections.emptySet()); } + /** + * Estimate the model memory an analysis config is likely to need given supplied field cardinalities + *

+ * For additional info + * see Estimate Model Memory + * + * @param request The {@link EstimateModelMemoryRequest} + * @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized + * @return {@link EstimateModelMemoryResponse} response object + */ + public EstimateModelMemoryResponse estimateModelMemory(EstimateModelMemoryRequest request, + RequestOptions options) throws IOException { + return restHighLevelClient.performRequestAndParseEntity(request, + MLRequestConverters::estimateModelMemory, + options, + EstimateModelMemoryResponse::fromXContent, + Collections.emptySet()); + } + + /** + * Estimate the model memory an analysis config is likely to need given supplied field cardinalities and notifies listener upon + * completion + *

+ * For additional info + * see Estimate Model Memory + * + * @param request The {@link EstimateModelMemoryRequest} + * @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized + * @param listener Listener to be notified upon request completion + * @return cancellable that may be used to cancel the request + */ + public Cancellable estimateModelMemoryAsync(EstimateModelMemoryRequest request, + RequestOptions options, + ActionListener listener) { + return restHighLevelClient.performRequestAsyncAndParseEntity(request, + MLRequestConverters::estimateModelMemory, + options, + EstimateModelMemoryResponse::fromXContent, + listener, + Collections.emptySet()); + } + /** * Creates a new Data Frame Analytics config *

diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryRequest.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryRequest.java new file mode 100644 index 0000000000000..d3d2c7c52a703 --- /dev/null +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryRequest.java @@ -0,0 +1,114 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.client.ml; + +import org.elasticsearch.client.Validatable; +import org.elasticsearch.client.ValidationException; +import org.elasticsearch.client.ml.job.config.AnalysisConfig; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Collections; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; + +/** + * Request to estimate the model memory an analysis config is likely to need given supplied field cardinalities. + */ +public class EstimateModelMemoryRequest implements Validatable, ToXContentObject { + + public static final String ANALYSIS_CONFIG = "analysis_config"; + public static final String OVERALL_CARDINALITY = "overall_cardinality"; + public static final String MAX_BUCKET_CARDINALITY = "max_bucket_cardinality"; + + private AnalysisConfig analysisConfig; + private Map overallCardinality = Collections.emptyMap(); + private Map maxBucketCardinality = Collections.emptyMap(); + + @Override + public Optional validate() { + return Optional.empty(); + } + + public EstimateModelMemoryRequest(AnalysisConfig analysisConfig) { + this.analysisConfig = Objects.requireNonNull(analysisConfig); + } + + public AnalysisConfig getAnalysisConfig() { + return analysisConfig; + } + + public void setAnalysisConfig(AnalysisConfig analysisConfig) { + this.analysisConfig = Objects.requireNonNull(analysisConfig); + } + + public Map getOverallCardinality() { + return overallCardinality; + } + + public void setOverallCardinality(Map overallCardinality) { + this.overallCardinality = Collections.unmodifiableMap(overallCardinality); + } + + public Map getMaxBucketCardinality() { + return maxBucketCardinality; + } + + public void setMaxBucketCardinality(Map maxBucketCardinality) { + this.maxBucketCardinality = Collections.unmodifiableMap(maxBucketCardinality); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(ANALYSIS_CONFIG, analysisConfig); + if (overallCardinality.isEmpty() == false) { + builder.field(OVERALL_CARDINALITY, overallCardinality); + } + if (maxBucketCardinality.isEmpty() == false) { + builder.field(MAX_BUCKET_CARDINALITY, maxBucketCardinality); + } + builder.endObject(); + return builder; + } + + @Override + public int hashCode() { + return Objects.hash(analysisConfig, overallCardinality, maxBucketCardinality); + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + + if (other == null || getClass() != other.getClass()) { + return false; + } + + EstimateModelMemoryRequest that = (EstimateModelMemoryRequest) other; + return Objects.equals(analysisConfig, that.analysisConfig) && + Objects.equals(overallCardinality, that.overallCardinality) && + Objects.equals(maxBucketCardinality, that.maxBucketCardinality); + } +} diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryResponse.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryResponse.java new file mode 100644 index 0000000000000..02b5c03d9b44a --- /dev/null +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryResponse.java @@ -0,0 +1,80 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.client.ml; + +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.XContentParser; + +import java.util.Objects; + +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; + +public class EstimateModelMemoryResponse { + + public static final ParseField MODEL_MEMORY_ESTIMATE = new ParseField("model_memory_estimate"); + + static final ConstructingObjectParser PARSER = + new ConstructingObjectParser<>( + "estimate_model_memory", + true, + args -> new EstimateModelMemoryResponse((String) args[0])); + + static { + PARSER.declareString(constructorArg(), MODEL_MEMORY_ESTIMATE); + } + + public static EstimateModelMemoryResponse fromXContent(final XContentParser parser) { + return PARSER.apply(parser, null); + } + + private final ByteSizeValue modelMemoryEstimate; + + public EstimateModelMemoryResponse(String modelMemoryEstimate) { + this.modelMemoryEstimate = ByteSizeValue.parseBytesSizeValue(modelMemoryEstimate, MODEL_MEMORY_ESTIMATE.getPreferredName()); + } + + /** + * @return An estimate of the model memory the supplied analysis config is likely to need given the supplied field cardinalities. + */ + public ByteSizeValue getModelMemoryEstimate() { + return modelMemoryEstimate; + } + + @Override + public boolean equals(Object o) { + + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + EstimateModelMemoryResponse other = (EstimateModelMemoryResponse) o; + return Objects.equals(this.modelMemoryEstimate, other.modelMemoryEstimate); + } + + @Override + public int hashCode() { + return Objects.hash(modelMemoryEstimate); + } +} diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java index 6c280fba5ab01..7137a2cb58a4c 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java @@ -36,6 +36,7 @@ import org.elasticsearch.client.ml.DeleteJobRequest; import org.elasticsearch.client.ml.DeleteModelSnapshotRequest; import org.elasticsearch.client.ml.DeleteTrainedModelRequest; +import org.elasticsearch.client.ml.EstimateModelMemoryRequest; import org.elasticsearch.client.ml.EvaluateDataFrameRequest; import org.elasticsearch.client.ml.EvaluateDataFrameRequestTests; import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; @@ -106,6 +107,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; @@ -694,6 +696,25 @@ public void testDeleteCalendarEvent() { assertEquals("/_ml/calendars/" + calendarId + "/events/" + eventId, request.getEndpoint()); } + public void testEstimateModelMemory() throws Exception { + String byFieldName = randomAlphaOfLength(10); + String influencerFieldName = randomAlphaOfLength(10); + AnalysisConfig analysisConfig = AnalysisConfig.builder( + Collections.singletonList( + Detector.builder().setFunction("count").setByFieldName(byFieldName).build() + )).setInfluencers(Collections.singletonList(influencerFieldName)).build(); + EstimateModelMemoryRequest estimateModelMemoryRequest = new EstimateModelMemoryRequest(analysisConfig); + estimateModelMemoryRequest.setOverallCardinality(Collections.singletonMap(byFieldName, randomNonNegativeLong())); + estimateModelMemoryRequest.setMaxBucketCardinality(Collections.singletonMap(influencerFieldName, randomNonNegativeLong())); + Request request = MLRequestConverters.estimateModelMemory(estimateModelMemoryRequest); + assertEquals(HttpPost.METHOD_NAME, request.getMethod()); + assertEquals("/_ml/anomaly_detectors/_estimate_model_memory", request.getEndpoint()); + + XContentBuilder builder = JsonXContent.contentBuilder(); + builder = estimateModelMemoryRequest.toXContent(builder, ToXContent.EMPTY_PARAMS); + assertEquals(Strings.toString(builder), requestEntityToString(request)); + } + public void testPutDataFrameAnalytics() throws IOException { PutDataFrameAnalyticsRequest putRequest = new PutDataFrameAnalyticsRequest(randomDataFrameAnalyticsConfig()); Request request = MLRequestConverters.putDataFrameAnalytics(putRequest); diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java index e13464aeddabd..fc91c71caf358 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java @@ -46,6 +46,8 @@ import org.elasticsearch.client.ml.DeleteJobResponse; import org.elasticsearch.client.ml.DeleteModelSnapshotRequest; import org.elasticsearch.client.ml.DeleteTrainedModelRequest; +import org.elasticsearch.client.ml.EstimateModelMemoryRequest; +import org.elasticsearch.client.ml.EstimateModelMemoryResponse; import org.elasticsearch.client.ml.EvaluateDataFrameRequest; import org.elasticsearch.client.ml.EvaluateDataFrameResponse; import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; @@ -1244,6 +1246,27 @@ public void testDeleteCalendarEvent() throws IOException { assertThat(remainingIds, not(hasItem(deletedEvent))); } + public void testEstimateModelMemory() throws Exception { + MachineLearningClient machineLearningClient = highLevelClient().machineLearning(); + + String byFieldName = randomAlphaOfLength(10); + String influencerFieldName = randomAlphaOfLength(10); + AnalysisConfig analysisConfig = AnalysisConfig.builder( + Collections.singletonList( + Detector.builder().setFunction("count").setByFieldName(byFieldName).build() + )).setInfluencers(Collections.singletonList(influencerFieldName)).build(); + EstimateModelMemoryRequest estimateModelMemoryRequest = new EstimateModelMemoryRequest(analysisConfig); + estimateModelMemoryRequest.setOverallCardinality(Collections.singletonMap(byFieldName, randomNonNegativeLong())); + estimateModelMemoryRequest.setMaxBucketCardinality(Collections.singletonMap(influencerFieldName, randomNonNegativeLong())); + + EstimateModelMemoryResponse estimateModelMemoryResponse = execute( + estimateModelMemoryRequest, + machineLearningClient::estimateModelMemory, machineLearningClient::estimateModelMemoryAsync); + + ByteSizeValue modelMemoryEstimate = estimateModelMemoryResponse.getModelMemoryEstimate(); + assertThat(modelMemoryEstimate.getBytes(), greaterThanOrEqualTo(10000000L)); + } + public void testPutDataFrameAnalyticsConfig_GivenOutlierDetectionAnalysis() throws Exception { MachineLearningClient machineLearningClient = highLevelClient().machineLearning(); String configId = "test-put-df-analytics-outlier-detection"; diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java index 74e34889897e3..1361e34d38f29 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java @@ -49,6 +49,8 @@ import org.elasticsearch.client.ml.DeleteJobResponse; import org.elasticsearch.client.ml.DeleteModelSnapshotRequest; import org.elasticsearch.client.ml.DeleteTrainedModelRequest; +import org.elasticsearch.client.ml.EstimateModelMemoryRequest; +import org.elasticsearch.client.ml.EstimateModelMemoryResponse; import org.elasticsearch.client.ml.EvaluateDataFrameRequest; import org.elasticsearch.client.ml.EvaluateDataFrameResponse; import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; @@ -4131,6 +4133,65 @@ public void onFailure(Exception e) { } } + public void testEstimateModelMemory() throws Exception { + RestHighLevelClient client = highLevelClient(); + { + // tag::estimate-model-memory-request + Detector.Builder detectorBuilder = new Detector.Builder() + .setFunction("count") + .setPartitionFieldName("status"); + AnalysisConfig.Builder analysisConfigBuilder = + new AnalysisConfig.Builder(Collections.singletonList(detectorBuilder.build())) + .setBucketSpan(TimeValue.timeValueMinutes(10)) + .setInfluencers(Collections.singletonList("src_ip")); + EstimateModelMemoryRequest request = new EstimateModelMemoryRequest(analysisConfigBuilder.build()); // <1> + request.setOverallCardinality(Collections.singletonMap("status", 50L)); // <2> + request.setMaxBucketCardinality(Collections.singletonMap("src_ip", 30L)); // <3> + // end::estimate-model-memory-request + + // tag::estimate-model-memory-execute + EstimateModelMemoryResponse estimateModelMemoryResponse = + client.machineLearning().estimateModelMemory(request, RequestOptions.DEFAULT); + // end::estimate-model-memory-execute + + // tag::estimate-model-memory-response + ByteSizeValue modelMemoryEstimate = estimateModelMemoryResponse.getModelMemoryEstimate(); // <1> + long estimateInBytes = modelMemoryEstimate.getBytes(); + // end::estimate-model-memory-response + assertThat(estimateInBytes, greaterThan(10000000L)); + } + { + AnalysisConfig analysisConfig = + AnalysisConfig.builder(Collections.singletonList(Detector.builder().setFunction("count").build())).build(); + EstimateModelMemoryRequest request = new EstimateModelMemoryRequest(analysisConfig); + + // tag::estimate-model-memory-execute-listener + ActionListener listener = new ActionListener() { + @Override + public void onResponse(EstimateModelMemoryResponse estimateModelMemoryResponse) { + // <1> + } + + @Override + public void onFailure(Exception e) { + // <2> + } + }; + // end::estimate-model-memory-execute-listener + + // Replace the empty listener by a blocking listener in test + final CountDownLatch latch = new CountDownLatch(1); + listener = new LatchedActionListener<>(listener, latch); + + // tag::estimate-model-memory-execute-async + client.machineLearning() + .estimateModelMemoryAsync(request, RequestOptions.DEFAULT, listener); // <1> + // end::estimate-model-memory-execute-async + + assertTrue(latch.await(30L, TimeUnit.SECONDS)); + } + } + private String createFilter(RestHighLevelClient client) throws IOException { MlFilter.Builder filterBuilder = MlFilter.builder("my_safe_domains") .setDescription("A list of safe domains") diff --git a/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc b/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc new file mode 100644 index 0000000000000..1a22805880bad --- /dev/null +++ b/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc @@ -0,0 +1,42 @@ +-- +:api: estimate-model-memory +:request: EstimateModelMemoryRequest +:response: EstimateModelMemoryResponse +-- +[role="xpack"] +[id="{upid}-{api}"] +=== Estimate {anomaly-job} Model Memory API + +Estimate the model memory an analysis config is likely to need for +given cardinality of the fields it references. + +[id="{upid}-{api}-request"] +==== Estimate {anomaly-job} Model Memory request + +A +{request}+ can be set up as follows: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-request] +-------------------------------------------------- +<1> Pass an `AnalysisConfig` to the constructor. +<2> For any `by_field_name`, `over_field_name` or + `partition_field_name` fields referenced by the + `Detector`s, supply overall cardinality estimates + in a `Map`. +<3> For any `influencers`, supply a `Map` containing + estimates of the highest cardinality expected in + any single bucket. + +include::../execution.asciidoc[] + +[id="{upid}-{api}-response"] +==== Estimate {anomaly-job} Model Memory response + +The returned +{response}+ contains the model memory estimate: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-response] +-------------------------------------------------- +<1> The model memory estimate. diff --git a/docs/java-rest/high-level/supported-apis.asciidoc b/docs/java-rest/high-level/supported-apis.asciidoc index efe7b6650640c..4999d0627a9eb 100644 --- a/docs/java-rest/high-level/supported-apis.asciidoc +++ b/docs/java-rest/high-level/supported-apis.asciidoc @@ -295,6 +295,7 @@ The Java High Level REST Client supports the following Machine Learning APIs: * <<{upid}-put-calendar-job>> * <<{upid}-delete-calendar-job>> * <<{upid}-delete-calendar>> +* <<{upid}-estimate-model-memory>> * <<{upid}-get-data-frame-analytics>> * <<{upid}-get-data-frame-analytics-stats>> * <<{upid}-put-data-frame-analytics>> @@ -351,6 +352,7 @@ include::ml/delete-calendar-event.asciidoc[] include::ml/put-calendar-job.asciidoc[] include::ml/delete-calendar-job.asciidoc[] include::ml/delete-calendar.asciidoc[] +include::ml/estimate-model-memory.asciidoc[] include::ml/get-data-frame-analytics.asciidoc[] include::ml/get-data-frame-analytics-stats.asciidoc[] include::ml/put-data-frame-analytics.asciidoc[] diff --git a/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc b/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc new file mode 100644 index 0000000000000..d09e89061d3a9 --- /dev/null +++ b/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc @@ -0,0 +1,74 @@ +[role="xpack"] +[testenv="platinum"] +[[ml-estimate-model-memory]] +=== Estimate {anomaly-jobs} Model Memory API +++++ +Estimate Model Memory +++++ + +Estimates the model memory an analysis config is likely to need given +cardinality estimates for the fields it references. + +[[ml-estimate-model-memory-request]] +==== {api-request-title} + +`POST _ml/anomaly_detectors/_estimate_model_memory` + +[[ml-estimate-model-memory-prereqs]] +==== {api-prereq-title} + +* If the {es} {security-features} are enabled, you must have `manage_ml` or +`manage` cluster privileges to use this API. See +<>. + +[[ml-estimate-model-memory-desc]] +==== {api-description-title} + +This API enables you to estimate the model memory and {anomaly-job} +configuration will require before you create the job. + +[[ml-estimate-model-memory-request-body]] +==== {api-request-body-title} + +For a list of the properties that you can specify in the `analysis_config` +component of the body of this API, see <>. + +[[ml-estimate-model-memory-example]] +==== {api-examples-title} + +[source,console] +-------------------------------------------------- +POST _ml/anomaly_detectors/_estimate_model_memory +{ + "analysis_config": { + "bucket_span": "5m", + "detectors": [ + { + "function": "sum", + "field_name": "bytes", + "by_field_name": "status", + "partition_field_name": "app" + } + ], + "influencers": [ "source_ip", "dest_ip" ] + }, + "overall_cardinality": { + "status": 10, + "app": 50 + }, + "max_bucket_cardinality": { + "source_ip": 300, + "dest_ip": 30 + } +} +-------------------------------------------------- +// TEST[skip:needs-licence] + +The estimate returns the following result: + +[source,console-result] +---- +{ + "model_memory_estimate": "123mb" +} +---- diff --git a/docs/reference/ml/anomaly-detection/apis/ml-api.asciidoc b/docs/reference/ml/anomaly-detection/apis/ml-api.asciidoc index f02312cb0ac94..e6514dff60a00 100644 --- a/docs/reference/ml/anomaly-detection/apis/ml-api.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/ml-api.asciidoc @@ -118,6 +118,8 @@ include::delete-job.asciidoc[] include::delete-calendar-job.asciidoc[] include::delete-snapshot.asciidoc[] include::delete-expired-data.asciidoc[] +//ESTIMATE +include::estimate-model-memory.asciidoc[] //FIND include::find-file-structure.asciidoc[] //FLUSH diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java index d415156450082..0a323d675d4f0 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java @@ -47,17 +47,17 @@ protected void doExecute(Task task, Map overallCardinality = request.getOverallCardinality(); Map maxBucketCardinality = request.getMaxBucketCardinality(); - long answer = BASIC_REQUIREMENT.getBytes() - + calculateDetectorsRequirementBytes(analysisConfig, overallCardinality) - + calculateInfluencerRequirementBytes(analysisConfig, maxBucketCardinality) - + calculateCategorizationRequirementBytes(analysisConfig); + long answer = BASIC_REQUIREMENT.getBytes(); + answer = addNonNegativeLongsWithMaxValueCap(answer, calculateDetectorsRequirementBytes(analysisConfig, overallCardinality)); + answer = addNonNegativeLongsWithMaxValueCap(answer, calculateInfluencerRequirementBytes(analysisConfig, maxBucketCardinality)); + answer = addNonNegativeLongsWithMaxValueCap(answer, calculateCategorizationRequirementBytes(analysisConfig)); listener.onResponse(new EstimateModelMemoryAction.Response(roundUpToNextMb(answer))); } static long calculateDetectorsRequirementBytes(AnalysisConfig analysisConfig, Map overallCardinality) { return analysisConfig.getDetectors().stream().map(detector -> calculateDetectorRequirementBytes(detector, overallCardinality)) - .reduce(0L, Long::sum); + .reduce(0L, TransportEstimateModelMemoryAction::addNonNegativeLongsWithMaxValueCap); } static long calculateDetectorRequirementBytes(Detector detector, Map overallCardinality) { @@ -130,19 +130,28 @@ static long calculateDetectorRequirementBytes(Detector detector, Map cardinalityEstimate(AnalysisConfig.INFLUENCERS.getPreferredName(), influencer, maxBucketCardinality, false) - * BYTES_PER_INFLUENCER_VALUE) - .reduce(0L, Long::sum); + long totalInfluencerCardinality = pureInfluencers.stream() + .map(influencer -> cardinalityEstimate(AnalysisConfig.INFLUENCERS.getPreferredName(), influencer, maxBucketCardinality, false)) + .reduce(0L, TransportEstimateModelMemoryAction::addNonNegativeLongsWithMaxValueCap); + if (Long.MAX_VALUE / BYTES_PER_INFLUENCER_VALUE < totalInfluencerCardinality) { + return Long.MAX_VALUE; + } + return BYTES_PER_INFLUENCER_VALUE * totalInfluencerCardinality; } static long calculateCategorizationRequirementBytes(AnalysisConfig analysisConfig) { @@ -187,7 +199,16 @@ static long cardinalityEstimate(String description, String fieldName, Map= 0; - return new ByteSizeValue((BYTES_IN_MB - 1 + bytes) / BYTES_IN_MB, ByteSizeUnit.MB); + assert bytes >= 0 : "negative bytes " + bytes; + return new ByteSizeValue((BYTES_IN_MB - 1 + Math.min(Long.MAX_VALUE - BYTES_IN_MB + 1, bytes)) / BYTES_IN_MB, ByteSizeUnit.MB); + } + + private static long addNonNegativeLongsWithMaxValueCap(long a, long b) { + assert a >= 0; + assert b >= 0; + if (Long.MAX_VALUE - a - b < 0) { + return Long.MAX_VALUE; + } + return a + b; } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java index ea10c9fb5f64c..c08af05234586 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java @@ -98,6 +98,10 @@ public void testRoundUpToNextMb() { equalTo(new ByteSizeValue(2, ByteSizeUnit.MB))); assertThat(TransportEstimateModelMemoryAction.roundUpToNextMb(2 * 1024 * 1024), equalTo(new ByteSizeValue(2, ByteSizeUnit.MB))); + // We don't round up at the extremes, to ensure that the resulting value can be represented as bytes in a long + // (At such extreme scale it won't be possible to actually run the analysis, so ease of use trumps precision) + assertThat(TransportEstimateModelMemoryAction.roundUpToNextMb(Long.MAX_VALUE - randomIntBetween(0, 1000000)), + equalTo(new ByteSizeValue(Long.MAX_VALUE / new ByteSizeValue(1, ByteSizeUnit.MB).getBytes() , ByteSizeUnit.MB))); } public static Detector createDetector(String function, String fieldName, String byFieldName, From 258a900ed71f0f57f3283f60c3136fcc432d503e Mon Sep 17 00:00:00 2001 From: David Roberts Date: Fri, 13 Mar 2020 13:30:05 +0000 Subject: [PATCH 2/6] Addressing docs comments --- .../apis/estimate-model-memory.asciidoc | 37 +++++++++++++------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc b/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc index d09e89061d3a9..45596312fcd4e 100644 --- a/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc @@ -1,13 +1,13 @@ [role="xpack"] [testenv="platinum"] [[ml-estimate-model-memory]] -=== Estimate {anomaly-jobs} Model Memory API +=== Estimate {anomaly-jobs} model memory API ++++ Estimate Model Memory ++++ -Estimates the model memory an analysis config is likely to need given -cardinality estimates for the fields it references. +Estimates the model memory an {anomaly-job} is likely to need based on analysis +configuration details and cardinality estimates for the fields it references. [[ml-estimate-model-memory-request]] ==== {api-request-title} @@ -21,17 +21,30 @@ cardinality estimates for the fields it references. `manage` cluster privileges to use this API. See <>. -[[ml-estimate-model-memory-desc]] -==== {api-description-title} - -This API enables you to estimate the model memory and {anomaly-job} -configuration will require before you create the job. - [[ml-estimate-model-memory-request-body]] ==== {api-request-body-title} -For a list of the properties that you can specify in the `analysis_config` -component of the body of this API, see <>. +`analysis_config`:: +(Required, object) For a list of the properties that you can specify in the +`analysis_config` component of the body of this API, see <>. + +`max_bucket_cardinality`:: +(Optional, object) Estimates of the highest cardinality in a single bucket +that will be observed for influencer fields over the time period the job +analyzes data for. To produce a good answer values must be provided for +all influencer fields. It does not matter if values are provided for fields +that are not listed as `influencers`. If there are no `influencers` then +`max_bucket_cardinality` can be safely omitted from the request. + +`overall_cardinality`:: +(Optional, object) Estimates of the cardinality that will be observed for +fields over the whole time period the job analyzes data for. To produce +a good answer values must be provided for fields referenced in the +`by_field_name`, `over_field_name` and `partition_field_name` of any +detectors. It does not matter if values are provided for other fields. +If no detectors have a `by_field_name`, `over_field_name` or +`partition_field_name` then `overall_cardinality` can be safely omitted +from the request. [[ml-estimate-model-memory-example]] ==== {api-examples-title} @@ -69,6 +82,6 @@ The estimate returns the following result: [source,console-result] ---- { - "model_memory_estimate": "123mb" + "model_memory_estimate": "45mb" } ---- From ce16b385665dc3e356be193cdad5ac4daf830e44 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Thu, 19 Mar 2020 15:45:28 +0000 Subject: [PATCH 3/6] Adding numbers for more functions --- .../TransportEstimateModelMemoryAction.java | 42 +++++++++---------- ...ansportEstimateModelMemoryActionTests.java | 4 +- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java index 0a323d675d4f0..0665944c1cf8c 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java @@ -64,6 +64,7 @@ static long calculateDetectorRequirementBytes(Detector detector, Map cardinalityEstimate(AnalysisConfig.INFLUENCERS.getPreferredName(), influencer, maxBucketCardinality, false)) .reduce(0L, TransportEstimateModelMemoryAction::addNonNegativeLongsWithMaxValueCap); - if (Long.MAX_VALUE / BYTES_PER_INFLUENCER_VALUE < totalInfluencerCardinality) { - return Long.MAX_VALUE; - } - return BYTES_PER_INFLUENCER_VALUE * totalInfluencerCardinality; + return multiplyNonNegativeLongsWithMaxValueCap(BYTES_PER_INFLUENCER_VALUE, totalInfluencerCardinality); } static long calculateCategorizationRequirementBytes(AnalysisConfig analysisConfig) { @@ -200,7 +191,7 @@ static long cardinalityEstimate(String description, String fieldName, Map= 0 : "negative bytes " + bytes; - return new ByteSizeValue((BYTES_IN_MB - 1 + Math.min(Long.MAX_VALUE - BYTES_IN_MB + 1, bytes)) / BYTES_IN_MB, ByteSizeUnit.MB); + return new ByteSizeValue(addNonNegativeLongsWithMaxValueCap(bytes, BYTES_IN_MB - 1) / BYTES_IN_MB, ByteSizeUnit.MB); } private static long addNonNegativeLongsWithMaxValueCap(long a, long b) { @@ -211,4 +202,13 @@ private static long addNonNegativeLongsWithMaxValueCap(long a, long b) { } return a + b; } + + private static long multiplyNonNegativeLongsWithMaxValueCap(long a, long b) { + assert a >= 0; + assert b >= 0; + if (Long.MAX_VALUE / a < b) { + return Long.MAX_VALUE; + } + return a * b; + } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java index c08af05234586..30118104e6510 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java @@ -36,7 +36,7 @@ public void testCalculateDetectorRequirementBytes() { Detector withByField = createDetector(function, "field", "buy", null, null); assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withByField, - overallCardinality), is(200 * 65536L)); + overallCardinality), is(134 * 65536L)); Detector withPartitionField = createDetector(function, "field", null, null, "part"); assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withPartitionField, @@ -44,7 +44,7 @@ public void testCalculateDetectorRequirementBytes() { Detector withByAndPartitionFields = createDetector(function, "field", "buy", null, "part"); assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withByAndPartitionFields, - overallCardinality), is(200 * 100 * 65536L)); + overallCardinality), is(134 * 100 * 65536L)); } public void testCalculateInfluencerRequirementBytes() { From 0caf242c5313ad39ca053c8824642fb7151cdc5b Mon Sep 17 00:00:00 2001 From: David Roberts Date: Tue, 24 Mar 2020 12:43:59 +0000 Subject: [PATCH 4/6] Address review comments --- .../elasticsearch/client/ml/EstimateModelMemoryRequest.java | 6 +----- docs/java-rest/high-level/ml/estimate-model-memory.asciidoc | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryRequest.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryRequest.java index d3d2c7c52a703..b0dc8bb7c294e 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryRequest.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryRequest.java @@ -40,7 +40,7 @@ public class EstimateModelMemoryRequest implements Validatable, ToXContentObject public static final String OVERALL_CARDINALITY = "overall_cardinality"; public static final String MAX_BUCKET_CARDINALITY = "max_bucket_cardinality"; - private AnalysisConfig analysisConfig; + private final AnalysisConfig analysisConfig; private Map overallCardinality = Collections.emptyMap(); private Map maxBucketCardinality = Collections.emptyMap(); @@ -57,10 +57,6 @@ public AnalysisConfig getAnalysisConfig() { return analysisConfig; } - public void setAnalysisConfig(AnalysisConfig analysisConfig) { - this.analysisConfig = Objects.requireNonNull(analysisConfig); - } - public Map getOverallCardinality() { return overallCardinality; } diff --git a/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc b/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc index 1a22805880bad..5730f8195aed0 100644 --- a/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc +++ b/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc @@ -8,7 +8,7 @@ === Estimate {anomaly-job} Model Memory API Estimate the model memory an analysis config is likely to need for -given cardinality of the fields it references. +the given cardinality of the fields it references. [id="{upid}-{api}-request"] ==== Estimate {anomaly-job} Model Memory request From 95dede2d050ea49f2c50459878467e892f1acae3 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Tue, 24 Mar 2020 16:39:10 +0000 Subject: [PATCH 5/6] Apply docs suggestions from code review Co-Authored-By: Lisa Cawley --- .../high-level/ml/estimate-model-memory.asciidoc | 8 ++++---- .../apis/estimate-model-memory.asciidoc | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc b/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc index 5730f8195aed0..8e8b5f1befa34 100644 --- a/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc +++ b/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc @@ -5,13 +5,13 @@ -- [role="xpack"] [id="{upid}-{api}"] -=== Estimate {anomaly-job} Model Memory API +=== Estimate {anomaly-job} model memory API Estimate the model memory an analysis config is likely to need for the given cardinality of the fields it references. [id="{upid}-{api}-request"] -==== Estimate {anomaly-job} Model Memory request +==== Estimate {anomaly-job} model memory request A +{request}+ can be set up as follows: @@ -22,7 +22,7 @@ include-tagged::{doc-tests-file}[{api}-request] <1> Pass an `AnalysisConfig` to the constructor. <2> For any `by_field_name`, `over_field_name` or `partition_field_name` fields referenced by the - `Detector`s, supply overall cardinality estimates + detectors, supply overall cardinality estimates in a `Map`. <3> For any `influencers`, supply a `Map` containing estimates of the highest cardinality expected in @@ -31,7 +31,7 @@ include-tagged::{doc-tests-file}[{api}-request] include::../execution.asciidoc[] [id="{upid}-{api}-response"] -==== Estimate {anomaly-job} Model Memory response +==== Estimate {anomaly-job} model memory response The returned +{response}+ contains the model memory estimate: diff --git a/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc b/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc index 45596312fcd4e..46a49f7cd239c 100644 --- a/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc @@ -3,7 +3,7 @@ [[ml-estimate-model-memory]] === Estimate {anomaly-jobs} model memory API ++++ -Estimate Model Memory +Estimate model memory ++++ Estimates the model memory an {anomaly-job} is likely to need based on analysis @@ -30,20 +30,20 @@ configuration details and cardinality estimates for the fields it references. `max_bucket_cardinality`:: (Optional, object) Estimates of the highest cardinality in a single bucket -that will be observed for influencer fields over the time period the job -analyzes data for. To produce a good answer values must be provided for +that will be observed for influencer fields over the time period that the job +analyzes data. To produce a good answer, values must be provided for all influencer fields. It does not matter if values are provided for fields that are not listed as `influencers`. If there are no `influencers` then -`max_bucket_cardinality` can be safely omitted from the request. +`max_bucket_cardinality` can be omitted from the request. `overall_cardinality`:: (Optional, object) Estimates of the cardinality that will be observed for -fields over the whole time period the job analyzes data for. To produce -a good answer values must be provided for fields referenced in the +fields over the whole time period that the job analyzes data. To produce +a good answer, values must be provided for fields referenced in the `by_field_name`, `over_field_name` and `partition_field_name` of any detectors. It does not matter if values are provided for other fields. If no detectors have a `by_field_name`, `over_field_name` or -`partition_field_name` then `overall_cardinality` can be safely omitted +`partition_field_name` then `overall_cardinality` can be omitted from the request. [[ml-estimate-model-memory-example]] From d51c6f4cdf232a9ebbea831e35795e44035ac119 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Tue, 24 Mar 2020 19:23:52 +0000 Subject: [PATCH 6/6] A few refinements --- .../TransportEstimateModelMemoryAction.java | 19 ++- .../test/ml/estimate_model_memory.yml | 128 +++++++++++++++++- 2 files changed, 138 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java index 0665944c1cf8c..776c84031428b 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java @@ -23,6 +23,17 @@ import java.util.Map; import java.util.Set; +/** + * Calculates the estimated model memory requirement of an anomaly detection job + * from its analysis config and estimates of the cardinality of the various fields + * referenced in it. + * + * Answers are capped at Long.MAX_VALUE bytes, to avoid returning + * values with bigger units that cannot trivially be converted back to bytes. + * (In reality if the memory estimate is greater than Long.MAX_VALUE + * bytes then the job will be impossible to run successfully, so this is not a + * major limitation.) + */ public class TransportEstimateModelMemoryAction extends HandledTransportAction { @@ -89,7 +100,8 @@ static long calculateDetectorRequirementBytes(Detector detector, Map + { + "analysis_config": { + "bucket_span": "1h", + "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline"}] + }, + "overall_cardinality": { + "airline": 50000 + } + } + - match: { model_memory_estimate: "35mb" } + +--- +"Test over field also influencer": + - do: + ml.estimate_model_memory: + body: > + { + "analysis_config": { + "bucket_span": "1h", + "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline"}], + "influencers": [ "airline" ] + }, + "overall_cardinality": { + "airline": 50000 + }, + "max_bucket_cardinality": { + "airline": 500 + } + } + - match: { model_memory_estimate: "35mb" } + +--- +"Test over field with independent influencer": + - do: + ml.estimate_model_memory: + body: > + { + "analysis_config": { + "bucket_span": "1h", + "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline"}], + "influencers": [ "country" ] + }, + "overall_cardinality": { + "airline": 50000 + }, + "max_bucket_cardinality": { + "country": 500 + } + } + - match: { model_memory_estimate: "40mb" } --- "Test partition field": @@ -125,7 +181,7 @@ "country": 600 } } - - match: { model_memory_estimate: "150010mb" } + - match: { model_memory_estimate: "100060mb" } --- "Test by and partition fields also influencers": @@ -147,7 +203,7 @@ "country": 40 } } - - match: { model_memory_estimate: "150010mb" } + - match: { model_memory_estimate: "100060mb" } --- "Test by and partition fields with independent influencer": @@ -168,5 +224,65 @@ "src_ip": 500 } } - - match: { model_memory_estimate: "150015mb" } + - match: { model_memory_estimate: "100065mb" } + +--- +"Test over and partition field": + - do: + ml.estimate_model_memory: + body: > + { + "analysis_config": { + "bucket_span": "1h", + "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline", "partition_field_name": "country"}] + }, + "overall_cardinality": { + "airline": 4000, + "country": 600 + } + } + - match: { model_memory_estimate: "1220mb" } + +--- +"Test over and partition fields also influencers": + - do: + ml.estimate_model_memory: + body: > + { + "analysis_config": { + "bucket_span": "1h", + "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline", "partition_field_name": "country"}], + "influencers": [ "airline", "country" ] + }, + "overall_cardinality": { + "airline": 4000, + "country": 600 + }, + "max_bucket_cardinality": { + "airline": 60, + "country": 40 + } + } + - match: { model_memory_estimate: "1220mb" } + +--- +"Test over and partition fields with independent influencer": + - do: + ml.estimate_model_memory: + body: > + { + "analysis_config": { + "bucket_span": "1h", + "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline", "partition_field_name": "country"}], + "influencers": [ "src_ip" ] + }, + "overall_cardinality": { + "airline": 4000, + "country": 600 + }, + "max_bucket_cardinality": { + "src_ip": 500 + } + } + - match: { model_memory_estimate: "1225mb" }