From 5546bd2576b186671245a9f3655463da4cc28387 Mon Sep 17 00:00:00 2001
From: David Roberts <dave.roberts@elastic.co>
Date: Thu, 12 Mar 2020 17:52:14 +0000
Subject: [PATCH 1/6] [ML] Add a model memory estimation endpoint for anomaly
 detection

This PR completes the implementation of the model
memory estimation endpoint:

POST _ml/anomaly_detectors/estimate_model_memory

Closes #53219
---
 .../client/MLRequestConverters.java           |  12 ++
 .../client/MachineLearningClient.java         |  44 +++++++
 .../client/ml/EstimateModelMemoryRequest.java | 114 ++++++++++++++++++
 .../ml/EstimateModelMemoryResponse.java       |  80 ++++++++++++
 .../client/MLRequestConvertersTests.java      |  21 ++++
 .../client/MachineLearningIT.java             |  23 ++++
 .../MlClientDocumentationIT.java              |  61 ++++++++++
 .../ml/estimate-model-memory.asciidoc         |  42 +++++++
 .../high-level/supported-apis.asciidoc        |   2 +
 .../apis/estimate-model-memory.asciidoc       |  74 ++++++++++++
 .../ml/anomaly-detection/apis/ml-api.asciidoc |   2 +
 .../TransportEstimateModelMemoryAction.java   |  49 +++++---
 ...ansportEstimateModelMemoryActionTests.java |   4 +
 13 files changed, 514 insertions(+), 14 deletions(-)
 create mode 100644 client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryRequest.java
 create mode 100644 client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryResponse.java
 create mode 100644 docs/java-rest/high-level/ml/estimate-model-memory.asciidoc
 create mode 100644 docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc

diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java
index bf220d63b3c63..54dd11bf6caff 100644
--- a/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java
+++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java
@@ -40,6 +40,7 @@
 import org.elasticsearch.client.ml.DeleteJobRequest;
 import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
 import org.elasticsearch.client.ml.DeleteTrainedModelRequest;
+import org.elasticsearch.client.ml.EstimateModelMemoryRequest;
 import org.elasticsearch.client.ml.EvaluateDataFrameRequest;
 import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
 import org.elasticsearch.client.ml.FindFileStructureRequest;
@@ -593,6 +594,17 @@ static Request deleteCalendarEvent(DeleteCalendarEventRequest deleteCalendarEven
         return new Request(HttpDelete.METHOD_NAME, endpoint);
     }
 
+    static Request estimateModelMemory(EstimateModelMemoryRequest estimateModelMemoryRequest) throws IOException {
+        String endpoint = new EndpointBuilder()
+            .addPathPartAsIs("_ml")
+            .addPathPartAsIs("anomaly_detectors")
+            .addPathPartAsIs("_estimate_model_memory")
+            .build();
+        Request request = new Request(HttpPost.METHOD_NAME, endpoint);
+        request.setEntity(createEntity(estimateModelMemoryRequest, REQUEST_BODY_CONTENT_TYPE));
+        return request;
+    }
+
     static Request putDataFrameAnalytics(PutDataFrameAnalyticsRequest putRequest) throws IOException {
         String endpoint = new EndpointBuilder()
             .addPathPartAsIs("_ml", "data_frame", "analytics")
diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java
index 504cbc541f073..61d4b52db2d6d 100644
--- a/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java
+++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java
@@ -23,6 +23,8 @@
 import org.elasticsearch.client.ml.CloseJobRequest;
 import org.elasticsearch.client.ml.CloseJobResponse;
 import org.elasticsearch.client.ml.DeleteTrainedModelRequest;
+import org.elasticsearch.client.ml.EstimateModelMemoryRequest;
+import org.elasticsearch.client.ml.EstimateModelMemoryResponse;
 import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
 import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse;
 import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
@@ -1951,6 +1953,48 @@ public Cancellable setUpgradeModeAsync(SetUpgradeModeRequest request, RequestOpt
             Collections.emptySet());
     }
 
+    /**
+     * Estimate the model memory an analysis config is likely to need given supplied field cardinalities
+     * <p>
+     * For additional info
+     * see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-estimate-model-memory.html">Estimate Model Memory</a>
+     *
+     * @param request The {@link EstimateModelMemoryRequest}
+     * @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
+     * @return {@link EstimateModelMemoryResponse} response object
+     */
+    public EstimateModelMemoryResponse estimateModelMemory(EstimateModelMemoryRequest request,
+                                                           RequestOptions options) throws IOException {
+        return restHighLevelClient.performRequestAndParseEntity(request,
+            MLRequestConverters::estimateModelMemory,
+            options,
+            EstimateModelMemoryResponse::fromXContent,
+            Collections.emptySet());
+    }
+
+    /**
+     * Estimate the model memory an analysis config is likely to need given supplied field cardinalities and notifies listener upon
+     * completion
+     * <p>
+     * For additional info
+     * see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-estimate-model-memory.html">Estimate Model Memory</a>
+     *
+     * @param request The {@link EstimateModelMemoryRequest}
+     * @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
+     * @param listener Listener to be notified upon request completion
+     * @return cancellable that may be used to cancel the request
+     */
+    public Cancellable estimateModelMemoryAsync(EstimateModelMemoryRequest request,
+                                                RequestOptions options,
+                                                ActionListener<EstimateModelMemoryResponse> listener) {
+        return restHighLevelClient.performRequestAsyncAndParseEntity(request,
+            MLRequestConverters::estimateModelMemory,
+            options,
+            EstimateModelMemoryResponse::fromXContent,
+            listener,
+            Collections.emptySet());
+    }
+
     /**
      * Creates a new Data Frame Analytics config
      * <p>
diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryRequest.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryRequest.java
new file mode 100644
index 0000000000000..d3d2c7c52a703
--- /dev/null
+++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryRequest.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.client.ml;
+
+import org.elasticsearch.client.Validatable;
+import org.elasticsearch.client.ValidationException;
+import org.elasticsearch.client.ml.job.config.AnalysisConfig;
+import org.elasticsearch.common.xcontent.ToXContentObject;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+
+/**
+ * Request to estimate the model memory an analysis config is likely to need given supplied field cardinalities.
+ */
+public class EstimateModelMemoryRequest implements Validatable, ToXContentObject {
+
+    public static final String ANALYSIS_CONFIG = "analysis_config";
+    public static final String OVERALL_CARDINALITY = "overall_cardinality";
+    public static final String MAX_BUCKET_CARDINALITY = "max_bucket_cardinality";
+
+    private AnalysisConfig analysisConfig;
+    private Map<String, Long> overallCardinality = Collections.emptyMap();
+    private Map<String, Long> maxBucketCardinality = Collections.emptyMap();
+
+    @Override
+    public Optional<ValidationException> validate() {
+        return Optional.empty();
+    }
+
+    public EstimateModelMemoryRequest(AnalysisConfig analysisConfig) {
+        this.analysisConfig = Objects.requireNonNull(analysisConfig);
+    }
+
+    public AnalysisConfig getAnalysisConfig() {
+        return analysisConfig;
+    }
+
+    public void setAnalysisConfig(AnalysisConfig analysisConfig) {
+        this.analysisConfig = Objects.requireNonNull(analysisConfig);
+    }
+
+    public Map<String, Long> getOverallCardinality() {
+        return overallCardinality;
+    }
+
+    public void setOverallCardinality(Map<String, Long> overallCardinality) {
+        this.overallCardinality = Collections.unmodifiableMap(overallCardinality);
+    }
+
+    public Map<String, Long> getMaxBucketCardinality() {
+        return maxBucketCardinality;
+    }
+
+    public void setMaxBucketCardinality(Map<String, Long> maxBucketCardinality) {
+        this.maxBucketCardinality = Collections.unmodifiableMap(maxBucketCardinality);
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        builder.startObject();
+        builder.field(ANALYSIS_CONFIG, analysisConfig);
+        if (overallCardinality.isEmpty() == false) {
+            builder.field(OVERALL_CARDINALITY, overallCardinality);
+        }
+        if (maxBucketCardinality.isEmpty() == false) {
+            builder.field(MAX_BUCKET_CARDINALITY, maxBucketCardinality);
+        }
+        builder.endObject();
+        return builder;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(analysisConfig, overallCardinality, maxBucketCardinality);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (this == other) {
+            return true;
+        }
+
+        if (other == null || getClass() != other.getClass()) {
+            return false;
+        }
+
+        EstimateModelMemoryRequest that = (EstimateModelMemoryRequest) other;
+        return Objects.equals(analysisConfig, that.analysisConfig) &&
+            Objects.equals(overallCardinality, that.overallCardinality) &&
+            Objects.equals(maxBucketCardinality, that.maxBucketCardinality);
+    }
+}
diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryResponse.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryResponse.java
new file mode 100644
index 0000000000000..02b5c03d9b44a
--- /dev/null
+++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryResponse.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.client.ml;
+
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.xcontent.ConstructingObjectParser;
+import org.elasticsearch.common.xcontent.XContentParser;
+
+import java.util.Objects;
+
+import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg;
+
+public class EstimateModelMemoryResponse {
+
+    public static final ParseField MODEL_MEMORY_ESTIMATE = new ParseField("model_memory_estimate");
+
+    static final ConstructingObjectParser<EstimateModelMemoryResponse, Void> PARSER =
+        new ConstructingObjectParser<>(
+            "estimate_model_memory",
+            true,
+            args -> new EstimateModelMemoryResponse((String) args[0]));
+
+    static {
+        PARSER.declareString(constructorArg(), MODEL_MEMORY_ESTIMATE);
+    }
+
+    public static EstimateModelMemoryResponse fromXContent(final XContentParser parser) {
+        return PARSER.apply(parser, null);
+    }
+
+    private final ByteSizeValue modelMemoryEstimate;
+
+    public EstimateModelMemoryResponse(String modelMemoryEstimate) {
+        this.modelMemoryEstimate = ByteSizeValue.parseBytesSizeValue(modelMemoryEstimate, MODEL_MEMORY_ESTIMATE.getPreferredName());
+    }
+
+    /**
+     * @return An estimate of the model memory the supplied analysis config is likely to need given the supplied field cardinalities.
+     */
+    public ByteSizeValue getModelMemoryEstimate() {
+        return modelMemoryEstimate;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+
+        if (this == o) {
+            return true;
+        }
+        if (o == null || getClass() != o.getClass()) {
+            return false;
+        }
+
+        EstimateModelMemoryResponse other = (EstimateModelMemoryResponse) o;
+        return Objects.equals(this.modelMemoryEstimate, other.modelMemoryEstimate);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(modelMemoryEstimate);
+    }
+}
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java
index 6c280fba5ab01..7137a2cb58a4c 100644
--- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java
+++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java
@@ -36,6 +36,7 @@
 import org.elasticsearch.client.ml.DeleteJobRequest;
 import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
 import org.elasticsearch.client.ml.DeleteTrainedModelRequest;
+import org.elasticsearch.client.ml.EstimateModelMemoryRequest;
 import org.elasticsearch.client.ml.EvaluateDataFrameRequest;
 import org.elasticsearch.client.ml.EvaluateDataFrameRequestTests;
 import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
@@ -106,6 +107,7 @@
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.common.xcontent.NamedXContentRegistry;
+import org.elasticsearch.common.xcontent.ToXContent;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.common.xcontent.XContentType;
@@ -694,6 +696,25 @@ public void testDeleteCalendarEvent() {
         assertEquals("/_ml/calendars/" + calendarId + "/events/" + eventId, request.getEndpoint());
     }
 
+    public void testEstimateModelMemory() throws Exception {
+        String byFieldName = randomAlphaOfLength(10);
+        String influencerFieldName = randomAlphaOfLength(10);
+        AnalysisConfig analysisConfig = AnalysisConfig.builder(
+            Collections.singletonList(
+                Detector.builder().setFunction("count").setByFieldName(byFieldName).build()
+            )).setInfluencers(Collections.singletonList(influencerFieldName)).build();
+        EstimateModelMemoryRequest estimateModelMemoryRequest = new EstimateModelMemoryRequest(analysisConfig);
+        estimateModelMemoryRequest.setOverallCardinality(Collections.singletonMap(byFieldName, randomNonNegativeLong()));
+        estimateModelMemoryRequest.setMaxBucketCardinality(Collections.singletonMap(influencerFieldName, randomNonNegativeLong()));
+        Request request = MLRequestConverters.estimateModelMemory(estimateModelMemoryRequest);
+        assertEquals(HttpPost.METHOD_NAME, request.getMethod());
+        assertEquals("/_ml/anomaly_detectors/_estimate_model_memory", request.getEndpoint());
+
+        XContentBuilder builder = JsonXContent.contentBuilder();
+        builder = estimateModelMemoryRequest.toXContent(builder, ToXContent.EMPTY_PARAMS);
+        assertEquals(Strings.toString(builder), requestEntityToString(request));
+    }
+
     public void testPutDataFrameAnalytics() throws IOException {
         PutDataFrameAnalyticsRequest putRequest = new PutDataFrameAnalyticsRequest(randomDataFrameAnalyticsConfig());
         Request request = MLRequestConverters.putDataFrameAnalytics(putRequest);
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java
index e13464aeddabd..fc91c71caf358 100644
--- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java
+++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java
@@ -46,6 +46,8 @@
 import org.elasticsearch.client.ml.DeleteJobResponse;
 import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
 import org.elasticsearch.client.ml.DeleteTrainedModelRequest;
+import org.elasticsearch.client.ml.EstimateModelMemoryRequest;
+import org.elasticsearch.client.ml.EstimateModelMemoryResponse;
 import org.elasticsearch.client.ml.EvaluateDataFrameRequest;
 import org.elasticsearch.client.ml.EvaluateDataFrameResponse;
 import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
@@ -1244,6 +1246,27 @@ public void testDeleteCalendarEvent() throws IOException {
         assertThat(remainingIds, not(hasItem(deletedEvent)));
     }
 
+    public void testEstimateModelMemory() throws Exception {
+        MachineLearningClient machineLearningClient = highLevelClient().machineLearning();
+
+        String byFieldName = randomAlphaOfLength(10);
+        String influencerFieldName = randomAlphaOfLength(10);
+        AnalysisConfig analysisConfig = AnalysisConfig.builder(
+            Collections.singletonList(
+                Detector.builder().setFunction("count").setByFieldName(byFieldName).build()
+            )).setInfluencers(Collections.singletonList(influencerFieldName)).build();
+        EstimateModelMemoryRequest estimateModelMemoryRequest = new EstimateModelMemoryRequest(analysisConfig);
+        estimateModelMemoryRequest.setOverallCardinality(Collections.singletonMap(byFieldName, randomNonNegativeLong()));
+        estimateModelMemoryRequest.setMaxBucketCardinality(Collections.singletonMap(influencerFieldName, randomNonNegativeLong()));
+
+        EstimateModelMemoryResponse estimateModelMemoryResponse = execute(
+            estimateModelMemoryRequest,
+            machineLearningClient::estimateModelMemory, machineLearningClient::estimateModelMemoryAsync);
+
+        ByteSizeValue modelMemoryEstimate = estimateModelMemoryResponse.getModelMemoryEstimate();
+        assertThat(modelMemoryEstimate.getBytes(), greaterThanOrEqualTo(10000000L));
+    }
+
     public void testPutDataFrameAnalyticsConfig_GivenOutlierDetectionAnalysis() throws Exception {
         MachineLearningClient machineLearningClient = highLevelClient().machineLearning();
         String configId = "test-put-df-analytics-outlier-detection";
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java
index 74e34889897e3..1361e34d38f29 100644
--- a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java
+++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java
@@ -49,6 +49,8 @@
 import org.elasticsearch.client.ml.DeleteJobResponse;
 import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
 import org.elasticsearch.client.ml.DeleteTrainedModelRequest;
+import org.elasticsearch.client.ml.EstimateModelMemoryRequest;
+import org.elasticsearch.client.ml.EstimateModelMemoryResponse;
 import org.elasticsearch.client.ml.EvaluateDataFrameRequest;
 import org.elasticsearch.client.ml.EvaluateDataFrameResponse;
 import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
@@ -4131,6 +4133,65 @@ public void onFailure(Exception e) {
         }
     }
 
+    public void testEstimateModelMemory() throws Exception {
+        RestHighLevelClient client = highLevelClient();
+        {
+            // tag::estimate-model-memory-request
+            Detector.Builder detectorBuilder = new Detector.Builder()
+                .setFunction("count")
+                .setPartitionFieldName("status");
+            AnalysisConfig.Builder analysisConfigBuilder =
+                new AnalysisConfig.Builder(Collections.singletonList(detectorBuilder.build()))
+                .setBucketSpan(TimeValue.timeValueMinutes(10))
+                .setInfluencers(Collections.singletonList("src_ip"));
+            EstimateModelMemoryRequest request = new EstimateModelMemoryRequest(analysisConfigBuilder.build()); // <1>
+            request.setOverallCardinality(Collections.singletonMap("status", 50L));                             // <2>
+            request.setMaxBucketCardinality(Collections.singletonMap("src_ip", 30L));                           // <3>
+            // end::estimate-model-memory-request
+
+            // tag::estimate-model-memory-execute
+            EstimateModelMemoryResponse estimateModelMemoryResponse =
+                client.machineLearning().estimateModelMemory(request, RequestOptions.DEFAULT);
+            // end::estimate-model-memory-execute
+
+            // tag::estimate-model-memory-response
+            ByteSizeValue modelMemoryEstimate = estimateModelMemoryResponse.getModelMemoryEstimate(); // <1>
+            long estimateInBytes = modelMemoryEstimate.getBytes();
+            // end::estimate-model-memory-response
+            assertThat(estimateInBytes, greaterThan(10000000L));
+        }
+        {
+            AnalysisConfig analysisConfig =
+                AnalysisConfig.builder(Collections.singletonList(Detector.builder().setFunction("count").build())).build();
+            EstimateModelMemoryRequest request = new EstimateModelMemoryRequest(analysisConfig);
+
+            // tag::estimate-model-memory-execute-listener
+            ActionListener<EstimateModelMemoryResponse> listener = new ActionListener<EstimateModelMemoryResponse>() {
+                @Override
+                public void onResponse(EstimateModelMemoryResponse estimateModelMemoryResponse) {
+                    // <1>
+                }
+
+                @Override
+                public void onFailure(Exception e) {
+                    // <2>
+                }
+            };
+            // end::estimate-model-memory-execute-listener
+
+            // Replace the empty listener by a blocking listener in test
+            final CountDownLatch latch = new CountDownLatch(1);
+            listener = new LatchedActionListener<>(listener, latch);
+
+            // tag::estimate-model-memory-execute-async
+            client.machineLearning()
+                .estimateModelMemoryAsync(request, RequestOptions.DEFAULT, listener); // <1>
+            // end::estimate-model-memory-execute-async
+
+            assertTrue(latch.await(30L, TimeUnit.SECONDS));
+        }
+    }
+
     private String createFilter(RestHighLevelClient client) throws IOException {
         MlFilter.Builder filterBuilder = MlFilter.builder("my_safe_domains")
             .setDescription("A list of safe domains")
diff --git a/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc b/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc
new file mode 100644
index 0000000000000..1a22805880bad
--- /dev/null
+++ b/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc
@@ -0,0 +1,42 @@
+--
+:api: estimate-model-memory
+:request: EstimateModelMemoryRequest
+:response: EstimateModelMemoryResponse
+--
+[role="xpack"]
+[id="{upid}-{api}"]
+=== Estimate {anomaly-job} Model Memory API
+
+Estimate the model memory an analysis config is likely to need for
+given cardinality of the fields it references.
+
+[id="{upid}-{api}-request"]
+==== Estimate {anomaly-job} Model Memory request
+
+A +{request}+ can be set up as follows:
+
+["source","java",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{doc-tests-file}[{api}-request]
+--------------------------------------------------
+<1> Pass an `AnalysisConfig` to the constructor.
+<2> For any `by_field_name`, `over_field_name` or
+    `partition_field_name` fields referenced by the
+    `Detector`s, supply overall cardinality estimates
+    in a `Map`.
+<3> For any `influencers`, supply a `Map` containing
+    estimates of the highest cardinality expected in
+    any single bucket.
+
+include::../execution.asciidoc[]
+
+[id="{upid}-{api}-response"]
+==== Estimate {anomaly-job} Model Memory response
+
+The returned +{response}+ contains the model memory estimate:
+
+["source","java",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{doc-tests-file}[{api}-response]
+--------------------------------------------------
+<1> The model memory estimate.
diff --git a/docs/java-rest/high-level/supported-apis.asciidoc b/docs/java-rest/high-level/supported-apis.asciidoc
index efe7b6650640c..4999d0627a9eb 100644
--- a/docs/java-rest/high-level/supported-apis.asciidoc
+++ b/docs/java-rest/high-level/supported-apis.asciidoc
@@ -295,6 +295,7 @@ The Java High Level REST Client supports the following Machine Learning APIs:
 * <<{upid}-put-calendar-job>>
 * <<{upid}-delete-calendar-job>>
 * <<{upid}-delete-calendar>>
+* <<{upid}-estimate-model-memory>>
 * <<{upid}-get-data-frame-analytics>>
 * <<{upid}-get-data-frame-analytics-stats>>
 * <<{upid}-put-data-frame-analytics>>
@@ -351,6 +352,7 @@ include::ml/delete-calendar-event.asciidoc[]
 include::ml/put-calendar-job.asciidoc[]
 include::ml/delete-calendar-job.asciidoc[]
 include::ml/delete-calendar.asciidoc[]
+include::ml/estimate-model-memory.asciidoc[]
 include::ml/get-data-frame-analytics.asciidoc[]
 include::ml/get-data-frame-analytics-stats.asciidoc[]
 include::ml/put-data-frame-analytics.asciidoc[]
diff --git a/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc b/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc
new file mode 100644
index 0000000000000..d09e89061d3a9
--- /dev/null
+++ b/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc
@@ -0,0 +1,74 @@
+[role="xpack"]
+[testenv="platinum"]
+[[ml-estimate-model-memory]]
+=== Estimate {anomaly-jobs} Model Memory API
+++++
+<titleabbrev>Estimate Model Memory</titleabbrev>
+++++
+
+Estimates the model memory an analysis config is likely to need given
+cardinality estimates for the fields it references.
+
+[[ml-estimate-model-memory-request]]
+==== {api-request-title}
+
+`POST _ml/anomaly_detectors/_estimate_model_memory`
+
+[[ml-estimate-model-memory-prereqs]]
+==== {api-prereq-title}
+
+* If the {es} {security-features} are enabled, you must have `manage_ml` or
+`manage` cluster privileges to use this API. See
+<<security-privileges>>.
+
+[[ml-estimate-model-memory-desc]]
+==== {api-description-title}
+
+This API enables you to estimate the model memory and {anomaly-job}
+configuration will require before you create the job.
+
+[[ml-estimate-model-memory-request-body]]
+==== {api-request-body-title}
+
+For a list of the properties that you can specify in the `analysis_config`
+component of the body of this API, see <<put-analysisconfig>>.
+
+[[ml-estimate-model-memory-example]]
+==== {api-examples-title}
+
+[source,console]
+--------------------------------------------------
+POST _ml/anomaly_detectors/_estimate_model_memory
+{
+    "analysis_config": {
+        "bucket_span": "5m",
+        "detectors": [
+          {
+            "function": "sum",
+            "field_name": "bytes",
+            "by_field_name": "status",
+            "partition_field_name": "app"
+          }
+        ],
+        "influencers": [ "source_ip", "dest_ip" ]
+    },
+    "overall_cardinality": {
+       "status": 10,
+       "app": 50
+    },
+    "max_bucket_cardinality": {
+       "source_ip": 300,
+       "dest_ip": 30
+    }
+}
+--------------------------------------------------
+// TEST[skip:needs-licence]
+
+The estimate returns the following result:
+
+[source,console-result]
+----
+{
+  "model_memory_estimate": "123mb"
+}
+----
diff --git a/docs/reference/ml/anomaly-detection/apis/ml-api.asciidoc b/docs/reference/ml/anomaly-detection/apis/ml-api.asciidoc
index f02312cb0ac94..e6514dff60a00 100644
--- a/docs/reference/ml/anomaly-detection/apis/ml-api.asciidoc
+++ b/docs/reference/ml/anomaly-detection/apis/ml-api.asciidoc
@@ -118,6 +118,8 @@ include::delete-job.asciidoc[]
 include::delete-calendar-job.asciidoc[]
 include::delete-snapshot.asciidoc[]
 include::delete-expired-data.asciidoc[]
+//ESTIMATE
+include::estimate-model-memory.asciidoc[]
 //FIND
 include::find-file-structure.asciidoc[]
 //FLUSH
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java
index d415156450082..0a323d675d4f0 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java
@@ -47,17 +47,17 @@ protected void doExecute(Task task,
         Map<String, Long> overallCardinality = request.getOverallCardinality();
         Map<String, Long> maxBucketCardinality = request.getMaxBucketCardinality();
 
-        long answer = BASIC_REQUIREMENT.getBytes()
-            + calculateDetectorsRequirementBytes(analysisConfig, overallCardinality)
-            + calculateInfluencerRequirementBytes(analysisConfig, maxBucketCardinality)
-            + calculateCategorizationRequirementBytes(analysisConfig);
+        long answer = BASIC_REQUIREMENT.getBytes();
+        answer = addNonNegativeLongsWithMaxValueCap(answer, calculateDetectorsRequirementBytes(analysisConfig, overallCardinality));
+        answer = addNonNegativeLongsWithMaxValueCap(answer, calculateInfluencerRequirementBytes(analysisConfig, maxBucketCardinality));
+        answer = addNonNegativeLongsWithMaxValueCap(answer, calculateCategorizationRequirementBytes(analysisConfig));
 
         listener.onResponse(new EstimateModelMemoryAction.Response(roundUpToNextMb(answer)));
     }
 
     static long calculateDetectorsRequirementBytes(AnalysisConfig analysisConfig, Map<String, Long> overallCardinality) {
         return analysisConfig.getDetectors().stream().map(detector -> calculateDetectorRequirementBytes(detector, overallCardinality))
-            .reduce(0L, Long::sum);
+            .reduce(0L, TransportEstimateModelMemoryAction::addNonNegativeLongsWithMaxValueCap);
     }
 
     static long calculateDetectorRequirementBytes(Detector detector, Map<String, Long> overallCardinality) {
@@ -130,19 +130,28 @@ static long calculateDetectorRequirementBytes(Detector detector, Map<String, Lon
 
         String byFieldName = detector.getByFieldName();
         if (byFieldName != null) {
-            answer *= cardinalityEstimate(Detector.BY_FIELD_NAME_FIELD.getPreferredName(), byFieldName, overallCardinality, true);
+            long multiplier = cardinalityEstimate(Detector.BY_FIELD_NAME_FIELD.getPreferredName(), byFieldName, overallCardinality, true);
+            if (Long.MAX_VALUE / answer < multiplier) {
+                return Long.MAX_VALUE;
+            }
+            answer *= multiplier;
         }
 
         String overFieldName = detector.getOverFieldName();
         if (overFieldName != null) {
-            cardinalityEstimate(Detector.OVER_FIELD_NAME_FIELD.getPreferredName(), overFieldName, overallCardinality, true);
+            long multiplier =
+                cardinalityEstimate(Detector.OVER_FIELD_NAME_FIELD.getPreferredName(), overFieldName, overallCardinality, true);
             // TODO - how should "over" field cardinality affect estimate?
         }
 
         String partitionFieldName = detector.getPartitionFieldName();
         if (partitionFieldName != null) {
-            answer *=
+            long multiplier =
                 cardinalityEstimate(Detector.PARTITION_FIELD_NAME_FIELD.getPreferredName(), partitionFieldName, overallCardinality, true);
+            if (Long.MAX_VALUE / answer < multiplier) {
+                return Long.MAX_VALUE;
+            }
+            answer *= multiplier;
         }
 
         return answer;
@@ -156,10 +165,13 @@ static long calculateInfluencerRequirementBytes(AnalysisConfig analysisConfig, M
             pureInfluencers.removeAll(detector.extractAnalysisFields());
         }
 
-        return pureInfluencers.stream()
-            .map(influencer -> cardinalityEstimate(AnalysisConfig.INFLUENCERS.getPreferredName(), influencer, maxBucketCardinality, false)
-                * BYTES_PER_INFLUENCER_VALUE)
-            .reduce(0L, Long::sum);
+        long totalInfluencerCardinality = pureInfluencers.stream()
+            .map(influencer -> cardinalityEstimate(AnalysisConfig.INFLUENCERS.getPreferredName(), influencer, maxBucketCardinality, false))
+            .reduce(0L, TransportEstimateModelMemoryAction::addNonNegativeLongsWithMaxValueCap);
+        if (Long.MAX_VALUE / BYTES_PER_INFLUENCER_VALUE < totalInfluencerCardinality) {
+            return Long.MAX_VALUE;
+        }
+        return BYTES_PER_INFLUENCER_VALUE * totalInfluencerCardinality;
     }
 
     static long calculateCategorizationRequirementBytes(AnalysisConfig analysisConfig) {
@@ -187,7 +199,16 @@ static long cardinalityEstimate(String description, String fieldName, Map<String
     }
 
     static ByteSizeValue roundUpToNextMb(long bytes) {
-        assert bytes >= 0;
-        return new ByteSizeValue((BYTES_IN_MB - 1 + bytes) / BYTES_IN_MB, ByteSizeUnit.MB);
+        assert bytes >= 0 : "negative bytes " + bytes;
+        return new ByteSizeValue((BYTES_IN_MB - 1 + Math.min(Long.MAX_VALUE - BYTES_IN_MB + 1, bytes)) / BYTES_IN_MB, ByteSizeUnit.MB);
+    }
+
+    private static long addNonNegativeLongsWithMaxValueCap(long a, long b) {
+        assert a >= 0;
+        assert b >= 0;
+        if (Long.MAX_VALUE - a - b < 0) {
+            return Long.MAX_VALUE;
+        }
+        return a + b;
     }
 }
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java
index ea10c9fb5f64c..c08af05234586 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java
@@ -98,6 +98,10 @@ public void testRoundUpToNextMb() {
             equalTo(new ByteSizeValue(2, ByteSizeUnit.MB)));
         assertThat(TransportEstimateModelMemoryAction.roundUpToNextMb(2 * 1024 * 1024),
             equalTo(new ByteSizeValue(2, ByteSizeUnit.MB)));
+        // We don't round up at the extremes, to ensure that the resulting value can be represented as bytes in a long
+        // (At such extreme scale it won't be possible to actually run the analysis, so ease of use trumps precision)
+        assertThat(TransportEstimateModelMemoryAction.roundUpToNextMb(Long.MAX_VALUE - randomIntBetween(0, 1000000)),
+            equalTo(new ByteSizeValue(Long.MAX_VALUE / new ByteSizeValue(1, ByteSizeUnit.MB).getBytes() , ByteSizeUnit.MB)));
     }
 
     public static Detector createDetector(String function, String fieldName, String byFieldName,

From 258a900ed71f0f57f3283f60c3136fcc432d503e Mon Sep 17 00:00:00 2001
From: David Roberts <dave.roberts@elastic.co>
Date: Fri, 13 Mar 2020 13:30:05 +0000
Subject: [PATCH 2/6] Addressing docs comments

---
 .../apis/estimate-model-memory.asciidoc       | 37 +++++++++++++------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc b/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc
index d09e89061d3a9..45596312fcd4e 100644
--- a/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc
+++ b/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc
@@ -1,13 +1,13 @@
 [role="xpack"]
 [testenv="platinum"]
 [[ml-estimate-model-memory]]
-=== Estimate {anomaly-jobs} Model Memory API
+=== Estimate {anomaly-jobs} model memory API
 ++++
 <titleabbrev>Estimate Model Memory</titleabbrev>
 ++++
 
-Estimates the model memory an analysis config is likely to need given
-cardinality estimates for the fields it references.
+Estimates the model memory an {anomaly-job} is likely to need based on analysis
+configuration details and cardinality estimates for the fields it references.
 
 [[ml-estimate-model-memory-request]]
 ==== {api-request-title}
@@ -21,17 +21,30 @@ cardinality estimates for the fields it references.
 `manage` cluster privileges to use this API. See
 <<security-privileges>>.
 
-[[ml-estimate-model-memory-desc]]
-==== {api-description-title}
-
-This API enables you to estimate the model memory and {anomaly-job}
-configuration will require before you create the job.
-
 [[ml-estimate-model-memory-request-body]]
 ==== {api-request-body-title}
 
-For a list of the properties that you can specify in the `analysis_config`
-component of the body of this API, see <<put-analysisconfig>>.
+`analysis_config`::
+(Required, object) For a list of the properties that you can specify in the
+`analysis_config` component of the body of this API, see <<put-analysisconfig>>.
+
+`max_bucket_cardinality`::
+(Optional, object) Estimates of the highest cardinality in a single bucket
+that will be observed for influencer fields over the time period the job
+analyzes data for. To produce a good answer values must be provided for
+all influencer fields. It does not matter if values are provided for fields
+that are not listed as `influencers`. If there are no `influencers` then
+`max_bucket_cardinality` can be safely omitted from the request.
+
+`overall_cardinality`::
+(Optional, object) Estimates of the cardinality that will be observed for
+fields over the whole time period the job analyzes data for. To produce
+a good answer values must be provided for fields referenced in the
+`by_field_name`, `over_field_name` and `partition_field_name` of any
+detectors. It does not matter if values are provided for other fields.
+If no detectors have a `by_field_name`, `over_field_name` or
+`partition_field_name` then `overall_cardinality` can be safely omitted
+from the request.
 
 [[ml-estimate-model-memory-example]]
 ==== {api-examples-title}
@@ -69,6 +82,6 @@ The estimate returns the following result:
 [source,console-result]
 ----
 {
-  "model_memory_estimate": "123mb"
+  "model_memory_estimate": "45mb"
 }
 ----

From ce16b385665dc3e356be193cdad5ac4daf830e44 Mon Sep 17 00:00:00 2001
From: David Roberts <dave.roberts@elastic.co>
Date: Thu, 19 Mar 2020 15:45:28 +0000
Subject: [PATCH 3/6] Adding numbers for more functions

---
 .../TransportEstimateModelMemoryAction.java   | 42 +++++++++----------
 ...ansportEstimateModelMemoryActionTests.java |  4 +-
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java
index 0a323d675d4f0..0665944c1cf8c 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java
@@ -64,6 +64,7 @@ static long calculateDetectorRequirementBytes(Detector detector, Map<String, Lon
 
         long answer = 0;
 
+        // These values for detectors assume splitting is via a partition field
         switch (detector.getFunction()) {
             case COUNT:
             case LOW_COUNT:
@@ -71,7 +72,7 @@ static long calculateDetectorRequirementBytes(Detector detector, Map<String, Lon
             case NON_ZERO_COUNT:
             case LOW_NON_ZERO_COUNT:
             case HIGH_NON_ZERO_COUNT:
-                answer = 1; // TODO add realistic number
+                answer = new ByteSizeValue(32, ByteSizeUnit.KB).getBytes();
                 break;
             case DISTINCT_COUNT:
             case LOW_DISTINCT_COUNT:
@@ -104,18 +105,14 @@ static long calculateDetectorRequirementBytes(Detector detector, Map<String, Lon
             case NON_NULL_SUM:
             case LOW_NON_NULL_SUM:
             case HIGH_NON_NULL_SUM:
-                // 64 comes from https://github.com/elastic/kibana/issues/18722
-                answer = new ByteSizeValue(64, ByteSizeUnit.KB).getBytes();
-                break;
             case MEDIAN:
             case LOW_MEDIAN:
             case HIGH_MEDIAN:
-                answer = 1; // TODO add realistic number
-                break;
             case VARP:
             case LOW_VARP:
             case HIGH_VARP:
-                answer = 1; // TODO add realistic number
+                // 64 comes from https://github.com/elastic/kibana/issues/18722
+                answer = new ByteSizeValue(64, ByteSizeUnit.KB).getBytes();
                 break;
             case TIME_OF_DAY:
             case TIME_OF_WEEK:
@@ -130,11 +127,11 @@ static long calculateDetectorRequirementBytes(Detector detector, Map<String, Lon
 
         String byFieldName = detector.getByFieldName();
         if (byFieldName != null) {
-            long multiplier = cardinalityEstimate(Detector.BY_FIELD_NAME_FIELD.getPreferredName(), byFieldName, overallCardinality, true);
-            if (Long.MAX_VALUE / answer < multiplier) {
-                return Long.MAX_VALUE;
-            }
-            answer *= multiplier;
+            long cardinalityEstimate =
+                cardinalityEstimate(Detector.BY_FIELD_NAME_FIELD.getPreferredName(), byFieldName, overallCardinality, true);
+            // The memory cost of a by field is about 2/3rds that of a partition field
+            long multiplier = addNonNegativeLongsWithMaxValueCap(cardinalityEstimate, 2) / 3 * 2;
+            answer = multiplyNonNegativeLongsWithMaxValueCap(answer, multiplier);
         }
 
         String overFieldName = detector.getOverFieldName();
@@ -148,10 +145,7 @@ static long calculateDetectorRequirementBytes(Detector detector, Map<String, Lon
         if (partitionFieldName != null) {
             long multiplier =
                 cardinalityEstimate(Detector.PARTITION_FIELD_NAME_FIELD.getPreferredName(), partitionFieldName, overallCardinality, true);
-            if (Long.MAX_VALUE / answer < multiplier) {
-                return Long.MAX_VALUE;
-            }
-            answer *= multiplier;
+            answer = multiplyNonNegativeLongsWithMaxValueCap(answer, multiplier);
         }
 
         return answer;
@@ -168,10 +162,7 @@ static long calculateInfluencerRequirementBytes(AnalysisConfig analysisConfig, M
         long totalInfluencerCardinality = pureInfluencers.stream()
             .map(influencer -> cardinalityEstimate(AnalysisConfig.INFLUENCERS.getPreferredName(), influencer, maxBucketCardinality, false))
             .reduce(0L, TransportEstimateModelMemoryAction::addNonNegativeLongsWithMaxValueCap);
-        if (Long.MAX_VALUE / BYTES_PER_INFLUENCER_VALUE < totalInfluencerCardinality) {
-            return Long.MAX_VALUE;
-        }
-        return BYTES_PER_INFLUENCER_VALUE * totalInfluencerCardinality;
+        return multiplyNonNegativeLongsWithMaxValueCap(BYTES_PER_INFLUENCER_VALUE, totalInfluencerCardinality);
     }
 
     static long calculateCategorizationRequirementBytes(AnalysisConfig analysisConfig) {
@@ -200,7 +191,7 @@ static long cardinalityEstimate(String description, String fieldName, Map<String
 
     static ByteSizeValue roundUpToNextMb(long bytes) {
         assert bytes >= 0 : "negative bytes " + bytes;
-        return new ByteSizeValue((BYTES_IN_MB - 1 + Math.min(Long.MAX_VALUE - BYTES_IN_MB + 1, bytes)) / BYTES_IN_MB, ByteSizeUnit.MB);
+        return new ByteSizeValue(addNonNegativeLongsWithMaxValueCap(bytes, BYTES_IN_MB - 1) / BYTES_IN_MB, ByteSizeUnit.MB);
     }
 
     private static long addNonNegativeLongsWithMaxValueCap(long a, long b) {
@@ -211,4 +202,13 @@ private static long addNonNegativeLongsWithMaxValueCap(long a, long b) {
         }
         return a + b;
     }
+
+    private static long multiplyNonNegativeLongsWithMaxValueCap(long a, long b) {
+        assert a >= 0;
+        assert b >= 0;
+        if (Long.MAX_VALUE / a < b) {
+            return Long.MAX_VALUE;
+        }
+        return a * b;
+    }
 }
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java
index c08af05234586..30118104e6510 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryActionTests.java
@@ -36,7 +36,7 @@ public void testCalculateDetectorRequirementBytes() {
 
         Detector withByField = createDetector(function, "field", "buy", null, null);
         assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withByField,
-            overallCardinality), is(200 * 65536L));
+            overallCardinality), is(134 * 65536L));
 
         Detector withPartitionField = createDetector(function, "field", null, null, "part");
         assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withPartitionField,
@@ -44,7 +44,7 @@ public void testCalculateDetectorRequirementBytes() {
 
         Detector withByAndPartitionFields = createDetector(function, "field", "buy", null, "part");
         assertThat(TransportEstimateModelMemoryAction.calculateDetectorRequirementBytes(withByAndPartitionFields,
-            overallCardinality), is(200 * 100 * 65536L));
+            overallCardinality), is(134 * 100 * 65536L));
     }
 
     public void testCalculateInfluencerRequirementBytes() {

From 0caf242c5313ad39ca053c8824642fb7151cdc5b Mon Sep 17 00:00:00 2001
From: David Roberts <dave.roberts@elastic.co>
Date: Tue, 24 Mar 2020 12:43:59 +0000
Subject: [PATCH 4/6] Address review comments

---
 .../elasticsearch/client/ml/EstimateModelMemoryRequest.java | 6 +-----
 docs/java-rest/high-level/ml/estimate-model-memory.asciidoc | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryRequest.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryRequest.java
index d3d2c7c52a703..b0dc8bb7c294e 100644
--- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryRequest.java
+++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateModelMemoryRequest.java
@@ -40,7 +40,7 @@ public class EstimateModelMemoryRequest implements Validatable, ToXContentObject
     public static final String OVERALL_CARDINALITY = "overall_cardinality";
     public static final String MAX_BUCKET_CARDINALITY = "max_bucket_cardinality";
 
-    private AnalysisConfig analysisConfig;
+    private final AnalysisConfig analysisConfig;
     private Map<String, Long> overallCardinality = Collections.emptyMap();
     private Map<String, Long> maxBucketCardinality = Collections.emptyMap();
 
@@ -57,10 +57,6 @@ public AnalysisConfig getAnalysisConfig() {
         return analysisConfig;
     }
 
-    public void setAnalysisConfig(AnalysisConfig analysisConfig) {
-        this.analysisConfig = Objects.requireNonNull(analysisConfig);
-    }
-
     public Map<String, Long> getOverallCardinality() {
         return overallCardinality;
     }
diff --git a/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc b/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc
index 1a22805880bad..5730f8195aed0 100644
--- a/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc
+++ b/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc
@@ -8,7 +8,7 @@
 === Estimate {anomaly-job} Model Memory API
 
 Estimate the model memory an analysis config is likely to need for
-given cardinality of the fields it references.
+the given cardinality of the fields it references.
 
 [id="{upid}-{api}-request"]
 ==== Estimate {anomaly-job} Model Memory request

From 95dede2d050ea49f2c50459878467e892f1acae3 Mon Sep 17 00:00:00 2001
From: David Roberts <dave.roberts@elastic.co>
Date: Tue, 24 Mar 2020 16:39:10 +0000
Subject: [PATCH 5/6] Apply docs suggestions from code review

Co-Authored-By: Lisa Cawley <lcawley@elastic.co>
---
 .../high-level/ml/estimate-model-memory.asciidoc   |  8 ++++----
 .../apis/estimate-model-memory.asciidoc            | 14 +++++++-------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc b/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc
index 5730f8195aed0..8e8b5f1befa34 100644
--- a/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc
+++ b/docs/java-rest/high-level/ml/estimate-model-memory.asciidoc
@@ -5,13 +5,13 @@
 --
 [role="xpack"]
 [id="{upid}-{api}"]
-=== Estimate {anomaly-job} Model Memory API
+=== Estimate {anomaly-job} model memory API
 
 Estimate the model memory an analysis config is likely to need for
 the given cardinality of the fields it references.
 
 [id="{upid}-{api}-request"]
-==== Estimate {anomaly-job} Model Memory request
+==== Estimate {anomaly-job} model memory request
 
 A +{request}+ can be set up as follows:
 
@@ -22,7 +22,7 @@ include-tagged::{doc-tests-file}[{api}-request]
 <1> Pass an `AnalysisConfig` to the constructor.
 <2> For any `by_field_name`, `over_field_name` or
     `partition_field_name` fields referenced by the
-    `Detector`s, supply overall cardinality estimates
+    detectors, supply overall cardinality estimates
     in a `Map`.
 <3> For any `influencers`, supply a `Map` containing
     estimates of the highest cardinality expected in
@@ -31,7 +31,7 @@ include-tagged::{doc-tests-file}[{api}-request]
 include::../execution.asciidoc[]
 
 [id="{upid}-{api}-response"]
-==== Estimate {anomaly-job} Model Memory response
+==== Estimate {anomaly-job} model memory response
 
 The returned +{response}+ contains the model memory estimate:
 
diff --git a/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc b/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc
index 45596312fcd4e..46a49f7cd239c 100644
--- a/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc
+++ b/docs/reference/ml/anomaly-detection/apis/estimate-model-memory.asciidoc
@@ -3,7 +3,7 @@
 [[ml-estimate-model-memory]]
 === Estimate {anomaly-jobs} model memory API
 ++++
-<titleabbrev>Estimate Model Memory</titleabbrev>
+<titleabbrev>Estimate model memory</titleabbrev>
 ++++
 
 Estimates the model memory an {anomaly-job} is likely to need based on analysis
@@ -30,20 +30,20 @@ configuration details and cardinality estimates for the fields it references.
 
 `max_bucket_cardinality`::
 (Optional, object) Estimates of the highest cardinality in a single bucket
-that will be observed for influencer fields over the time period the job
-analyzes data for. To produce a good answer values must be provided for
+that will be observed for influencer fields over the time period that the job
+analyzes data. To produce a good answer, values must be provided for
 all influencer fields. It does not matter if values are provided for fields
 that are not listed as `influencers`. If there are no `influencers` then
-`max_bucket_cardinality` can be safely omitted from the request.
+`max_bucket_cardinality` can be omitted from the request.
 
 `overall_cardinality`::
 (Optional, object) Estimates of the cardinality that will be observed for
-fields over the whole time period the job analyzes data for. To produce
-a good answer values must be provided for fields referenced in the
+fields over the whole time period that the job analyzes data. To produce
+a good answer, values must be provided for fields referenced in the
 `by_field_name`, `over_field_name` and `partition_field_name` of any
 detectors. It does not matter if values are provided for other fields.
 If no detectors have a `by_field_name`, `over_field_name` or
-`partition_field_name` then `overall_cardinality` can be safely omitted
+`partition_field_name` then `overall_cardinality` can be omitted
 from the request.
 
 [[ml-estimate-model-memory-example]]

From d51c6f4cdf232a9ebbea831e35795e44035ac119 Mon Sep 17 00:00:00 2001
From: David Roberts <dave.roberts@elastic.co>
Date: Tue, 24 Mar 2020 19:23:52 +0000
Subject: [PATCH 6/6] A few refinements

---
 .../TransportEstimateModelMemoryAction.java   |  19 ++-
 .../test/ml/estimate_model_memory.yml         | 128 +++++++++++++++++-
 2 files changed, 138 insertions(+), 9 deletions(-)

diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java
index 0665944c1cf8c..776c84031428b 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java
@@ -23,6 +23,17 @@
 import java.util.Map;
 import java.util.Set;
 
+/**
+ * Calculates the estimated model memory requirement of an anomaly detection job
+ * from its analysis config and estimates of the cardinality of the various fields
+ * referenced in it.
+ *
+ * Answers are capped at <code>Long.MAX_VALUE</code> bytes, to avoid returning
+ * values with bigger units that cannot trivially be converted back to bytes.
+ * (In reality if the memory estimate is greater than <code>Long.MAX_VALUE</code>
+ * bytes then the job will be impossible to run successfully, so this is not a
+ * major limitation.)
+ */
 public class TransportEstimateModelMemoryAction
     extends HandledTransportAction<EstimateModelMemoryAction.Request, EstimateModelMemoryAction.Response> {
 
@@ -89,7 +100,8 @@ static long calculateDetectorRequirementBytes(Detector detector, Map<String, Lon
                 answer = 1; // TODO add realistic number
                 break;
             case METRIC:
-                answer = 1; // TODO add realistic number
+                // metric analyses mean, min and max simultaneously, and uses about 2.5 times the memory of one of these
+                answer = new ByteSizeValue(160, ByteSizeUnit.KB).getBytes();
                 break;
             case MEAN:
             case LOW_MEAN:
@@ -136,9 +148,10 @@ static long calculateDetectorRequirementBytes(Detector detector, Map<String, Lon
 
         String overFieldName = detector.getOverFieldName();
         if (overFieldName != null) {
-            long multiplier =
+            long cardinalityEstimate =
                 cardinalityEstimate(Detector.OVER_FIELD_NAME_FIELD.getPreferredName(), overFieldName, overallCardinality, true);
-            // TODO - how should "over" field cardinality affect estimate?
+            // Over fields don't multiply the whole estimate, just add a small amount (estimate 512 bytes) per value
+            answer = addNonNegativeLongsWithMaxValueCap(answer, multiplyNonNegativeLongsWithMaxValueCap(cardinalityEstimate, 512));
         }
 
         String partitionFieldName = detector.getPartitionFieldName();
diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/estimate_model_memory.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/estimate_model_memory.yml
index 8253f12dda70e..f0407860b6a58 100644
--- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/estimate_model_memory.yml
+++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/estimate_model_memory.yml
@@ -12,7 +12,7 @@
               "airline": 50000
             }
           }
-  - match: { model_memory_estimate: "3135mb" }
+  - match: { model_memory_estimate: "2094mb" }
 
 ---
 "Test by field also influencer":
@@ -32,7 +32,7 @@
               "airline": 500
             }
           }
-  - match: { model_memory_estimate: "3135mb" }
+  - match: { model_memory_estimate: "2094mb" }
 
 ---
 "Test by field with independent influencer":
@@ -52,7 +52,63 @@
               "country": 500
             }
           }
-  - match: { model_memory_estimate: "3140mb" }
+  - match: { model_memory_estimate: "2099mb" }
+
+---
+"Test over field":
+  - do:
+      ml.estimate_model_memory:
+        body: >
+          {
+            "analysis_config": {
+              "bucket_span": "1h",
+              "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline"}]
+            },
+            "overall_cardinality": {
+              "airline": 50000
+            }
+          }
+  - match: { model_memory_estimate: "35mb" }
+
+---
+"Test over field also influencer":
+  - do:
+      ml.estimate_model_memory:
+        body: >
+          {
+            "analysis_config": {
+              "bucket_span": "1h",
+              "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline"}],
+              "influencers": [ "airline" ]
+            },
+            "overall_cardinality": {
+              "airline": 50000
+            },
+            "max_bucket_cardinality": {
+              "airline": 500
+            }
+          }
+  - match: { model_memory_estimate: "35mb" }
+
+---
+"Test over field with independent influencer":
+  - do:
+      ml.estimate_model_memory:
+        body: >
+          {
+            "analysis_config": {
+              "bucket_span": "1h",
+              "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline"}],
+              "influencers": [ "country" ]
+            },
+            "overall_cardinality": {
+              "airline": 50000
+            },
+            "max_bucket_cardinality": {
+              "country": 500
+            }
+          }
+  - match: { model_memory_estimate: "40mb" }
 
 ---
 "Test partition field":
@@ -125,7 +181,7 @@
               "country": 600
             }
           }
-  - match: { model_memory_estimate: "150010mb" }
+  - match: { model_memory_estimate: "100060mb" }
 
 ---
 "Test by and partition fields also influencers":
@@ -147,7 +203,7 @@
               "country": 40
             }
           }
-  - match: { model_memory_estimate: "150010mb" }
+  - match: { model_memory_estimate: "100060mb" }
 
 ---
 "Test by and partition fields with independent influencer":
@@ -168,5 +224,65 @@
               "src_ip": 500
             }
           }
-  - match: { model_memory_estimate: "150015mb" }
+  - match: { model_memory_estimate: "100065mb" }
+
+---
+"Test over and partition field":
+  - do:
+      ml.estimate_model_memory:
+        body: >
+          {
+            "analysis_config": {
+              "bucket_span": "1h",
+              "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline", "partition_field_name": "country"}]
+            },
+            "overall_cardinality": {
+              "airline": 4000,
+              "country": 600
+            }
+          }
+  - match: { model_memory_estimate: "1220mb" }
+
+---
+"Test over and partition fields also influencers":
+  - do:
+      ml.estimate_model_memory:
+        body: >
+          {
+            "analysis_config": {
+              "bucket_span": "1h",
+              "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline", "partition_field_name": "country"}],
+              "influencers": [ "airline", "country" ]
+            },
+            "overall_cardinality": {
+              "airline": 4000,
+              "country": 600
+            },
+            "max_bucket_cardinality": {
+              "airline": 60,
+              "country": 40
+            }
+          }
+  - match: { model_memory_estimate: "1220mb" }
+
+---
+"Test over and partition fields with independent influencer":
+  - do:
+      ml.estimate_model_memory:
+        body: >
+          {
+            "analysis_config": {
+              "bucket_span": "1h",
+              "detectors": [{"function": "max", "field_name": "responsetime", "over_field_name": "airline", "partition_field_name": "country"}],
+              "influencers": [ "src_ip" ]
+            },
+            "overall_cardinality": {
+              "airline": 4000,
+              "country": 600
+            },
+            "max_bucket_cardinality": {
+              "src_ip": 500
+            }
+          }
+  - match: { model_memory_estimate: "1225mb" }