From 35c6dab58d67c21a45387dee2516fe7940dffc92 Mon Sep 17 00:00:00 2001
From: Dimitris Athanasiou <dimitris@elastic.co>
Date: Fri, 22 Nov 2019 20:08:14 +0200
Subject: [PATCH] [7.x][ML] Explain data frame analytics API (#49455)

This commit replaces the _estimate_memory_usage API with
a new API, the _explain API.

The API consolidates information that is useful before
creating a data frame analytics job.

It includes:

- memory estimation
- field selection explanation

Memory estimation is moved here from what was previously
calculated in the _estimate_memory_usage API.

Field selection is a new feature that explains to the user
whether each available field was selected to be included or
not in the analysis. In the case it was not included, it also
explains the reason why.

Backport of #49455
---
 .../client/MLRequestConverters.java           |  18 +-
 .../client/MachineLearningClient.java         |  37 ++-
 .../ml/ExplainDataFrameAnalyticsRequest.java  |  72 ++++
 .../ml/ExplainDataFrameAnalyticsResponse.java |  94 ++++++
 .../ml/dataframe/explain/FieldSelection.java  | 163 +++++++++
 .../explain/MemoryEstimation.java}            |  23 +-
 .../client/MLRequestConvertersTests.java      |  28 +-
 .../client/MachineLearningIT.java             |  41 ++-
 .../MlClientDocumentationIT.java              |  63 ++--
 ...ExplainDataFrameAnalyticsRequestTests.java |  44 +++
 ...xplainDataFrameAnalyticsResponseTests.java |  54 +++
 .../explain/FieldSelectionTests.java          |  57 ++++
 .../explain/MemoryEstimationTests.java}       |  18 +-
 .../ml/estimate-memory-usage.asciidoc         |  36 --
 .../ml/explain-data-frame-analytics.asciidoc  |  48 +++
 .../high-level/supported-apis.asciidoc        |   4 +-
 ...estimate-memory-usage-dfanalytics.asciidoc |  80 -----
 .../apis/explain-dfanalytics.asciidoc         | 159 +++++++++
 .../ml/df-analytics/apis/index.asciidoc       |   8 +-
 .../xpack/core/XPackClientPlugin.java         |  12 +-
 .../ml/action/EstimateMemoryUsageAction.java  | 119 -------
 .../ExplainDataFrameAnalyticsAction.java      | 101 ++++++
 .../action/PutDataFrameAnalyticsAction.java   |   9 +-
 .../dataframe/DataFrameAnalyticsConfig.java   |   4 +-
 .../ml/dataframe/explain/FieldSelection.java  | 184 +++++++++++
 .../dataframe/explain/MemoryEstimation.java   | 103 ++++++
 ...stimateMemoryUsageActionResponseTests.java |  54 ---
 ...DataFrameAnalyticsActionResponseTests.java |  42 +++
 .../DataFrameAnalyticsConfigTests.java        |  12 +-
 .../explain/FieldSelectionTests.java          |  45 +++
 .../explain/MemoryEstimationTests.java        |  61 ++++
 .../ml/qa/ml-with-security/build.gradle       |   5 +-
 .../xpack/ml/MachineLearning.java             |  14 +-
 .../TransportEstimateMemoryUsageAction.java   | 130 --------
 ...nsportExplainDataFrameAnalyticsAction.java | 156 +++++++++
 ...ransportStartDataFrameAnalyticsAction.java |  79 +++--
 .../DataFrameDataExtractorFactory.java        |  29 +-
 .../extractor/ExtractedFieldsDetector.java    | 144 +++++---
 .../MemoryUsageEstimationProcessManager.java  |   4 +-
 .../RestEstimateMemoryUsageAction.java        |  38 ---
 .../RestExplainDataFrameAnalyticsAction.java  |  84 +++++
 .../ExtractedFieldsDetectorTests.java         | 243 ++++++++++----
 .../api/ml.estimate_memory_usage.json         |  21 --
 .../api/ml.explain_data_frame_analytics.json  |  31 ++
 ...rame_analytics_memory_usage_estimation.yml |  84 -----
 .../test/ml/explain_data_frame_analytics.yml  | 308 ++++++++++++++++++
 46 files changed, 2312 insertions(+), 851 deletions(-)
 create mode 100644 client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java
 create mode 100644 client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java
 create mode 100644 client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java
 rename client/rest-high-level/src/main/java/org/elasticsearch/client/ml/{EstimateMemoryUsageResponse.java => dataframe/explain/MemoryEstimation.java} (81%)
 create mode 100644 client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java
 create mode 100644 client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java
 create mode 100644 client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java
 rename client/rest-high-level/src/test/java/org/elasticsearch/client/ml/{EstimateMemoryUsageResponseTests.java => dataframe/explain/MemoryEstimationTests.java} (68%)
 delete mode 100644 docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc
 create mode 100644 docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc
 delete mode 100644 docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc
 create mode 100644 docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc
 delete mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java
 create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java
 create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelection.java
 create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java
 delete mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java
 create mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java
 create mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelectionTests.java
 create mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java
 delete mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java
 create mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java
 delete mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java
 create mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java
 delete mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json
 create mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json
 delete mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml
 create mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml

diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java
index 2fc23acd13430..0a1a18eeb4461 100644
--- a/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java
+++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java
@@ -29,6 +29,7 @@
 import org.elasticsearch.client.RequestConverters.EndpointBuilder;
 import org.elasticsearch.client.core.PageParams;
 import org.elasticsearch.client.ml.CloseJobRequest;
+import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
 import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
 import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
 import org.elasticsearch.client.ml.DeleteCalendarRequest;
@@ -701,12 +702,17 @@ static Request evaluateDataFrame(EvaluateDataFrameRequest evaluateRequest) throw
         return request;
     }
 
-    static Request estimateMemoryUsage(PutDataFrameAnalyticsRequest estimateRequest) throws IOException {
-        String endpoint = new EndpointBuilder()
-            .addPathPartAsIs("_ml", "data_frame", "analytics", "_estimate_memory_usage")
-            .build();
-        Request request = new Request(HttpPost.METHOD_NAME, endpoint);
-        request.setEntity(createEntity(estimateRequest, REQUEST_BODY_CONTENT_TYPE));
+    static Request explainDataFrameAnalytics(ExplainDataFrameAnalyticsRequest explainRequest) throws IOException {
+        EndpointBuilder endpoint = new EndpointBuilder().addPathPartAsIs("_ml", "data_frame", "analytics");
+        if (explainRequest.getId() != null) {
+            endpoint.addPathPart(explainRequest.getId());
+        }
+        endpoint.addPathPartAsIs("_explain");
+
+        Request request = new Request(HttpPost.METHOD_NAME, endpoint.build());
+        if (explainRequest.getConfig() != null) {
+            request.setEntity(createEntity(explainRequest.getConfig(), REQUEST_BODY_CONTENT_TYPE));
+        }
         return request;
     }
 
diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java
index 2ddc8839f9648..468cd535c01dc 100644
--- a/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java
+++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java
@@ -22,6 +22,8 @@
 import org.elasticsearch.action.support.master.AcknowledgedResponse;
 import org.elasticsearch.client.ml.CloseJobRequest;
 import org.elasticsearch.client.ml.CloseJobResponse;
+import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
+import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse;
 import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
 import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
 import org.elasticsearch.client.ml.DeleteCalendarRequest;
@@ -34,7 +36,6 @@
 import org.elasticsearch.client.ml.DeleteJobRequest;
 import org.elasticsearch.client.ml.DeleteJobResponse;
 import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
-import org.elasticsearch.client.ml.EstimateMemoryUsageResponse;
 import org.elasticsearch.client.ml.EvaluateDataFrameRequest;
 import org.elasticsearch.client.ml.EvaluateDataFrameResponse;
 import org.elasticsearch.client.ml.FindFileStructureRequest;
@@ -2249,46 +2250,46 @@ public Cancellable evaluateDataFrameAsync(EvaluateDataFrameRequest request, Requ
     }
 
     /**
-     * Estimates memory usage for the given Data Frame Analytics
+     * Explains the given Data Frame Analytics
      * <p>
      * For additional info
-     * see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/estimate-memory-usage-dfanalytics.html">
-     *     Estimate Memory Usage for Data Frame Analytics documentation</a>
+     * see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/explain-dfanalytics.html">
+     *     Explain Data Frame Analytics documentation</a>
      *
-     * @param request The {@link PutDataFrameAnalyticsRequest}
+     * @param request The {@link ExplainDataFrameAnalyticsRequest}
      * @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
-     * @return {@link EstimateMemoryUsageResponse} response object
+     * @return {@link ExplainDataFrameAnalyticsResponse} response object
      * @throws IOException when there is a serialization issue sending the request or receiving the response
      */
-    public EstimateMemoryUsageResponse estimateMemoryUsage(PutDataFrameAnalyticsRequest request,
-                                                           RequestOptions options) throws IOException {
+    public ExplainDataFrameAnalyticsResponse explainDataFrameAnalytics(ExplainDataFrameAnalyticsRequest request,
+                                                                       RequestOptions options) throws IOException {
         return restHighLevelClient.performRequestAndParseEntity(
             request,
-            MLRequestConverters::estimateMemoryUsage,
+            MLRequestConverters::explainDataFrameAnalytics,
             options,
-            EstimateMemoryUsageResponse::fromXContent,
+            ExplainDataFrameAnalyticsResponse::fromXContent,
             Collections.emptySet());
     }
 
     /**
-     * Estimates memory usage for the given Data Frame Analytics asynchronously and notifies listener upon completion
+     * Explains the given Data Frame Analytics asynchronously and notifies listener upon completion
      * <p>
      * For additional info
-     * see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/estimate-memory-usage-dfanalytics.html">
-     *     Estimate Memory Usage for Data Frame Analytics documentation</a>
+     * see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/explain-dfanalytics.html">
+     *     Explain Data Frame Analytics documentation</a>
      *
-     * @param request The {@link PutDataFrameAnalyticsRequest}
+     * @param request The {@link ExplainDataFrameAnalyticsRequest}
      * @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
      * @param listener Listener to be notified upon request completion
      * @return cancellable that may be used to cancel the request
      */
-    public Cancellable estimateMemoryUsageAsync(PutDataFrameAnalyticsRequest request, RequestOptions options,
-                                                ActionListener<EstimateMemoryUsageResponse> listener) {
+    public Cancellable explainDataFrameAnalyticsAsync(ExplainDataFrameAnalyticsRequest request, RequestOptions options,
+                                                      ActionListener<ExplainDataFrameAnalyticsResponse> listener) {
         return restHighLevelClient.performRequestAsyncAndParseEntity(
             request,
-            MLRequestConverters::estimateMemoryUsage,
+            MLRequestConverters::explainDataFrameAnalytics,
             options,
-            EstimateMemoryUsageResponse::fromXContent,
+            ExplainDataFrameAnalyticsResponse::fromXContent,
             listener,
             Collections.emptySet());
     }
diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java
new file mode 100644
index 0000000000000..880e87b2eea9b
--- /dev/null
+++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml;
+
+import org.elasticsearch.client.Validatable;
+import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfig;
+import org.elasticsearch.common.Nullable;
+
+import java.util.Objects;
+
+/**
+ * Request to explain the following about a data frame analytics job:
+ * <ul>
+ *     <li>field selection: which fields are included or are not in the analysis</li>
+ *     <li>memory estimation: how much memory the job is estimated to require</li>
+ * </ul>
+ */
+public class ExplainDataFrameAnalyticsRequest implements Validatable {
+
+    private final String id;
+    private final DataFrameAnalyticsConfig config;
+
+    public ExplainDataFrameAnalyticsRequest(String id) {
+        this.id = Objects.requireNonNull(id);
+        this.config = null;
+    }
+
+    public ExplainDataFrameAnalyticsRequest(DataFrameAnalyticsConfig config) {
+        this.id = null;
+        this.config = Objects.requireNonNull(config);
+    }
+
+    @Nullable
+    public String getId() {
+        return id;
+    }
+
+    @Nullable
+    public DataFrameAnalyticsConfig getConfig() {
+        return config;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        ExplainDataFrameAnalyticsRequest other = (ExplainDataFrameAnalyticsRequest) o;
+        return Objects.equals(id, other.id) && Objects.equals(config, other.config);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(id, config);
+    }
+}
diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java
new file mode 100644
index 0000000000000..5879ffc7154bd
--- /dev/null
+++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml;
+
+import org.elasticsearch.client.ml.dataframe.explain.FieldSelection;
+import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation;
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.xcontent.ConstructingObjectParser;
+import org.elasticsearch.common.xcontent.ToXContentObject;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Objects;
+
+public class ExplainDataFrameAnalyticsResponse implements ToXContentObject {
+
+    public static final ParseField TYPE = new ParseField("explain_data_frame_analytics_response");
+
+    public static final ParseField FIELD_SELECTION = new ParseField("field_selection");
+    public static final ParseField MEMORY_ESTIMATION = new ParseField("memory_estimation");
+
+    public static ExplainDataFrameAnalyticsResponse fromXContent(XContentParser parser) throws IOException {
+        return PARSER.parse(parser, null);
+    }
+
+    @SuppressWarnings("unchecked")
+    static final ConstructingObjectParser<ExplainDataFrameAnalyticsResponse, Void> PARSER =
+        new ConstructingObjectParser<>(
+            TYPE.getPreferredName(), true,
+            args -> new ExplainDataFrameAnalyticsResponse((List<FieldSelection>) args[0], (MemoryEstimation) args[1]));
+
+    static {
+        PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), FieldSelection.PARSER, FIELD_SELECTION);
+        PARSER.declareObject(ConstructingObjectParser.constructorArg(), MemoryEstimation.PARSER, MEMORY_ESTIMATION);
+    }
+
+    private final List<FieldSelection> fieldSelection;
+    private final MemoryEstimation memoryEstimation;
+
+    public ExplainDataFrameAnalyticsResponse(List<FieldSelection> fieldSelection, MemoryEstimation memoryEstimation) {
+        this.fieldSelection = Objects.requireNonNull(fieldSelection);
+        this.memoryEstimation = Objects.requireNonNull(memoryEstimation);
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        builder.startObject();
+        builder.field(FIELD_SELECTION.getPreferredName(), fieldSelection);
+        builder.field(MEMORY_ESTIMATION.getPreferredName(), memoryEstimation);
+        builder.endObject();
+        return builder;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (this == other) return true;
+        if (other == null || getClass() != other.getClass()) return false;
+
+        ExplainDataFrameAnalyticsResponse that = (ExplainDataFrameAnalyticsResponse) other;
+        return Objects.equals(fieldSelection, that.fieldSelection)
+            && Objects.equals(memoryEstimation, that.memoryEstimation);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(fieldSelection, memoryEstimation);
+    }
+
+    public MemoryEstimation getMemoryEstimation() {
+        return memoryEstimation;
+    }
+
+    public List<FieldSelection> getFieldSelection() {
+        return fieldSelection;
+    }
+}
diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java
new file mode 100644
index 0000000000000..4483b6fa5e09a
--- /dev/null
+++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml.dataframe.explain;
+
+import org.elasticsearch.common.Nullable;
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.xcontent.ConstructingObjectParser;
+import org.elasticsearch.common.xcontent.ObjectParser;
+import org.elasticsearch.common.xcontent.ToXContentObject;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Objects;
+import java.util.Set;
+
+public class FieldSelection implements ToXContentObject {
+
+    private static final ParseField NAME = new ParseField("name");
+    private static final ParseField MAPPING_TYPES = new ParseField("mapping_types");
+    private static final ParseField IS_INCLUDED = new ParseField("is_included");
+    private static final ParseField IS_REQUIRED = new ParseField("is_required");
+    private static final ParseField FEATURE_TYPE = new ParseField("feature_type");
+    private static final ParseField REASON = new ParseField("reason");
+
+    public enum FeatureType {
+        CATEGORICAL, NUMERICAL;
+
+        public static FeatureType fromString(String value) {
+            return FeatureType.valueOf(value.toUpperCase(Locale.ROOT));
+        }
+
+        @Override
+        public String toString() {
+            return name().toLowerCase(Locale.ROOT);
+        }
+    }
+
+    @SuppressWarnings("unchecked")
+    public static ConstructingObjectParser<FieldSelection, Void> PARSER = new ConstructingObjectParser<>("field_selection", true,
+        a -> new FieldSelection((String) a[0], new HashSet<>((List<String>) a[1]), (boolean) a[2], (boolean) a[3], (FeatureType) a[4],
+            (String) a[5]));
+
+    static {
+        PARSER.declareString(ConstructingObjectParser.constructorArg(), NAME);
+        PARSER.declareStringArray(ConstructingObjectParser.constructorArg(), MAPPING_TYPES);
+        PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_INCLUDED);
+        PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_REQUIRED);
+        PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> {
+            if (p.currentToken() == XContentParser.Token.VALUE_STRING) {
+                return FeatureType.fromString(p.text());
+            }
+            throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]");
+        }, FEATURE_TYPE, ObjectParser.ValueType.STRING);
+        PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), REASON);
+    }
+
+    private final String name;
+    private final Set<String> mappingTypes;
+    private final boolean isIncluded;
+    private final boolean isRequired;
+    private final FeatureType featureType;
+    private final String reason;
+
+    public static FieldSelection included(String name, Set<String> mappingTypes, boolean isRequired, FeatureType featureType) {
+        return new FieldSelection(name, mappingTypes, true, isRequired, featureType, null);
+    }
+
+    public static FieldSelection excluded(String name, Set<String> mappingTypes, String reason) {
+        return new FieldSelection(name, mappingTypes, false, false, null, reason);
+    }
+
+    FieldSelection(String name, Set<String> mappingTypes, boolean isIncluded, boolean isRequired, @Nullable FeatureType featureType,
+                           @Nullable String reason) {
+        this.name = Objects.requireNonNull(name);
+        this.mappingTypes = Collections.unmodifiableSet(mappingTypes);
+        this.isIncluded = isIncluded;
+        this.isRequired = isRequired;
+        this.featureType = featureType;
+        this.reason = reason;
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        builder.startObject();
+        builder.field(NAME.getPreferredName(), name);
+        builder.field(MAPPING_TYPES.getPreferredName(), mappingTypes);
+        builder.field(IS_INCLUDED.getPreferredName(), isIncluded);
+        builder.field(IS_REQUIRED.getPreferredName(), isRequired);
+        if (featureType != null) {
+            builder.field(FEATURE_TYPE.getPreferredName(), featureType);
+        }
+        if (reason != null) {
+            builder.field(REASON.getPreferredName(), reason);
+        }
+        builder.endObject();
+        return builder;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        FieldSelection that = (FieldSelection) o;
+        return Objects.equals(name, that.name)
+            && Objects.equals(mappingTypes, that.mappingTypes)
+            && isIncluded == that.isIncluded
+            && isRequired == that.isRequired
+            && Objects.equals(featureType, that.featureType)
+            && Objects.equals(reason, that.reason);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(name, mappingTypes, isIncluded, isRequired, featureType, reason);
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public Set<String> getMappingTypes() {
+        return mappingTypes;
+    }
+
+    public boolean isIncluded() {
+        return isIncluded;
+    }
+
+    public boolean isRequired() {
+        return isRequired;
+    }
+
+    @Nullable
+    public FeatureType getFeatureType() {
+        return featureType;
+    }
+
+    @Nullable
+    public String getReason() {
+        return reason;
+    }
+}
diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponse.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimation.java
similarity index 81%
rename from client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponse.java
rename to client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimation.java
index c97cc545cdb79..9151b8ce5dd32 100644
--- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponse.java
+++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimation.java
@@ -16,8 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-
-package org.elasticsearch.client.ml;
+package org.elasticsearch.client.ml.dataframe.explain;
 
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.ParseField;
@@ -26,23 +25,19 @@
 import org.elasticsearch.common.xcontent.ObjectParser;
 import org.elasticsearch.common.xcontent.ToXContentObject;
 import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.common.xcontent.XContentParser;
 
 import java.io.IOException;
 import java.util.Objects;
 
 import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
 
-public class EstimateMemoryUsageResponse implements ToXContentObject {
-    
+public class MemoryEstimation implements ToXContentObject {
+
     public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk");
     public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk");
 
-    static final ConstructingObjectParser<EstimateMemoryUsageResponse, Void> PARSER =
-        new ConstructingObjectParser<>(
-            "estimate_memory_usage_response",
-            true,
-            args -> new EstimateMemoryUsageResponse((ByteSizeValue) args[0], (ByteSizeValue) args[1]));
+    public static final ConstructingObjectParser<MemoryEstimation, Void> PARSER = new ConstructingObjectParser<>("memory_estimation", true,
+            a -> new MemoryEstimation((ByteSizeValue) a[0], (ByteSizeValue) a[1]));
 
     static {
         PARSER.declareField(
@@ -57,14 +52,10 @@ public class EstimateMemoryUsageResponse implements ToXContentObject {
             ObjectParser.ValueType.VALUE);
     }
 
-    public static EstimateMemoryUsageResponse fromXContent(XContentParser parser) {
-        return PARSER.apply(parser, null);
-    }
-
     private final ByteSizeValue expectedMemoryWithoutDisk;
     private final ByteSizeValue expectedMemoryWithDisk;
 
-    public EstimateMemoryUsageResponse(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) {
+    public MemoryEstimation(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) {
         this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk;
         this.expectedMemoryWithDisk = expectedMemoryWithDisk;
     }
@@ -99,7 +90,7 @@ public boolean equals(Object other) {
             return false;
         }
 
-        EstimateMemoryUsageResponse that = (EstimateMemoryUsageResponse) other;
+        MemoryEstimation that = (MemoryEstimation) other;
         return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk)
             && Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk);
     }
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java
index db59054cdb87b..633e5363ff165 100644
--- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java
+++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java
@@ -25,6 +25,7 @@
 import org.apache.http.client.methods.HttpPut;
 import org.elasticsearch.client.core.PageParams;
 import org.elasticsearch.client.ml.CloseJobRequest;
+import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
 import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
 import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
 import org.elasticsearch.client.ml.DeleteCalendarRequest;
@@ -788,14 +789,25 @@ public void testEvaluateDataFrame() throws IOException {
         }
     }
 
-    public void testEstimateMemoryUsage() throws IOException {
-        PutDataFrameAnalyticsRequest estimateRequest = new PutDataFrameAnalyticsRequest(randomDataFrameAnalyticsConfig());
-        Request request = MLRequestConverters.estimateMemoryUsage(estimateRequest);
-        assertEquals(HttpPost.METHOD_NAME, request.getMethod());
-        assertEquals("/_ml/data_frame/analytics/_estimate_memory_usage", request.getEndpoint());
-        try (XContentParser parser = createParser(JsonXContent.jsonXContent, request.getEntity().getContent())) {
-            DataFrameAnalyticsConfig parsedConfig = DataFrameAnalyticsConfig.fromXContent(parser);
-            assertThat(parsedConfig, equalTo(estimateRequest.getConfig()));
+    public void testExplainDataFrameAnalytics() throws IOException {
+        // Request with config
+        {
+            ExplainDataFrameAnalyticsRequest estimateRequest = new ExplainDataFrameAnalyticsRequest(randomDataFrameAnalyticsConfig());
+            Request request = MLRequestConverters.explainDataFrameAnalytics(estimateRequest);
+            assertEquals(HttpPost.METHOD_NAME, request.getMethod());
+            assertEquals("/_ml/data_frame/analytics/_explain", request.getEndpoint());
+            try (XContentParser parser = createParser(JsonXContent.jsonXContent, request.getEntity().getContent())) {
+                DataFrameAnalyticsConfig parsedConfig = DataFrameAnalyticsConfig.fromXContent(parser);
+                assertThat(parsedConfig, equalTo(estimateRequest.getConfig()));
+            }
+        }
+        // Request with id
+        {
+            ExplainDataFrameAnalyticsRequest estimateRequest = new ExplainDataFrameAnalyticsRequest("foo");
+            Request request = MLRequestConverters.explainDataFrameAnalytics(estimateRequest);
+            assertEquals(HttpPost.METHOD_NAME, request.getMethod());
+            assertEquals("/_ml/data_frame/analytics/foo/_explain", request.getEndpoint());
+            assertNull(request.getEntity());
         }
     }
 
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java
index 361b36745509c..efb62b3f52689 100644
--- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java
+++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java
@@ -32,6 +32,8 @@
 import org.elasticsearch.client.indices.GetIndexRequest;
 import org.elasticsearch.client.ml.CloseJobRequest;
 import org.elasticsearch.client.ml.CloseJobResponse;
+import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
+import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse;
 import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
 import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
 import org.elasticsearch.client.ml.DeleteCalendarRequest;
@@ -44,7 +46,6 @@
 import org.elasticsearch.client.ml.DeleteJobRequest;
 import org.elasticsearch.client.ml.DeleteJobResponse;
 import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
-import org.elasticsearch.client.ml.EstimateMemoryUsageResponse;
 import org.elasticsearch.client.ml.EvaluateDataFrameRequest;
 import org.elasticsearch.client.ml.EvaluateDataFrameResponse;
 import org.elasticsearch.client.ml.FindFileStructureRequest;
@@ -140,6 +141,8 @@
 import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric;
 import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric;
 import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric;
+import org.elasticsearch.client.ml.dataframe.explain.FieldSelection;
+import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation;
 import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
 import org.elasticsearch.client.ml.inference.TrainedModelConfig;
 import org.elasticsearch.client.ml.inference.TrainedModelDefinition;
@@ -1996,8 +1999,8 @@ private void createIndex(String indexName, XContentBuilder mapping) throws IOExc
         highLevelClient().indices().create(new CreateIndexRequest(indexName).mapping(mapping), RequestOptions.DEFAULT);
     }
 
-    public void testEstimateMemoryUsage() throws IOException {
-        String indexName = "estimate-test-index";
+    public void testExplainDataFrameAnalytics() throws IOException {
+        String indexName = "explain-df-test-index";
         createIndex(indexName, mappingForSoftClassification());
         BulkRequest bulk1 = new BulkRequest()
             .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
@@ -2007,8 +2010,8 @@ public void testEstimateMemoryUsage() throws IOException {
         highLevelClient().bulk(bulk1, RequestOptions.DEFAULT);
 
         MachineLearningClient machineLearningClient = highLevelClient().machineLearning();
-        PutDataFrameAnalyticsRequest estimateMemoryUsageRequest =
-            new PutDataFrameAnalyticsRequest(
+        ExplainDataFrameAnalyticsRequest explainRequest =
+            new ExplainDataFrameAnalyticsRequest(
                 DataFrameAnalyticsConfig.builder()
                     .setSource(DataFrameAnalyticsSource.builder().setIndex(indexName).build())
                     .setAnalysis(OutlierDetection.createDefault())
@@ -2019,11 +2022,16 @@ public void testEstimateMemoryUsage() throws IOException {
         ByteSizeValue upperBound = new ByteSizeValue(1, ByteSizeUnit.GB);
 
         // Data Frame has 10 rows, expect that the returned estimates fall within (1kB, 1GB) range.
-        EstimateMemoryUsageResponse response1 =
-            execute(
-                estimateMemoryUsageRequest, machineLearningClient::estimateMemoryUsage, machineLearningClient::estimateMemoryUsageAsync);
-        assertThat(response1.getExpectedMemoryWithoutDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound)));
-        assertThat(response1.getExpectedMemoryWithDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound)));
+        ExplainDataFrameAnalyticsResponse response1 = execute(explainRequest, machineLearningClient::explainDataFrameAnalytics,
+            machineLearningClient::explainDataFrameAnalyticsAsync);
+
+        MemoryEstimation memoryEstimation1 = response1.getMemoryEstimation();
+        assertThat(memoryEstimation1.getExpectedMemoryWithoutDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound)));
+        assertThat(memoryEstimation1.getExpectedMemoryWithDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound)));
+
+        List<FieldSelection> fieldSelection = response1.getFieldSelection();
+        assertThat(fieldSelection.size(), equalTo(3));
+        assertThat(fieldSelection.stream().map(FieldSelection::getName).collect(Collectors.toList()), contains("dataset", "label", "p"));
 
         BulkRequest bulk2 = new BulkRequest()
             .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
@@ -2033,15 +2041,16 @@ public void testEstimateMemoryUsage() throws IOException {
         highLevelClient().bulk(bulk2, RequestOptions.DEFAULT);
 
         // Data Frame now has 100 rows, expect that the returned estimates will be greater than or equal to the previous ones.
-        EstimateMemoryUsageResponse response2 =
+        ExplainDataFrameAnalyticsResponse response2 =
             execute(
-                estimateMemoryUsageRequest, machineLearningClient::estimateMemoryUsage, machineLearningClient::estimateMemoryUsageAsync);
+                explainRequest, machineLearningClient::explainDataFrameAnalytics, machineLearningClient::explainDataFrameAnalyticsAsync);
+        MemoryEstimation memoryEstimation2 = response2.getMemoryEstimation();
         assertThat(
-            response2.getExpectedMemoryWithoutDisk(),
-            allOf(greaterThanOrEqualTo(response1.getExpectedMemoryWithoutDisk()), lessThan(upperBound)));
+            memoryEstimation2.getExpectedMemoryWithoutDisk(),
+            allOf(greaterThanOrEqualTo(memoryEstimation1.getExpectedMemoryWithoutDisk()), lessThan(upperBound)));
         assertThat(
-            response2.getExpectedMemoryWithDisk(),
-            allOf(greaterThanOrEqualTo(response1.getExpectedMemoryWithDisk()), lessThan(upperBound)));
+            memoryEstimation2.getExpectedMemoryWithDisk(),
+            allOf(greaterThanOrEqualTo(memoryEstimation1.getExpectedMemoryWithDisk()), lessThan(upperBound)));
     }
 
     public void testGetTrainedModels() throws Exception {
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java
index da12420535f67..8a118672d95e6 100644
--- a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java
+++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java
@@ -36,6 +36,8 @@
 import org.elasticsearch.client.indices.CreateIndexRequest;
 import org.elasticsearch.client.ml.CloseJobRequest;
 import org.elasticsearch.client.ml.CloseJobResponse;
+import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
+import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse;
 import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
 import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
 import org.elasticsearch.client.ml.DeleteCalendarRequest;
@@ -48,7 +50,6 @@
 import org.elasticsearch.client.ml.DeleteJobRequest;
 import org.elasticsearch.client.ml.DeleteJobResponse;
 import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
-import org.elasticsearch.client.ml.EstimateMemoryUsageResponse;
 import org.elasticsearch.client.ml.EvaluateDataFrameRequest;
 import org.elasticsearch.client.ml.EvaluateDataFrameResponse;
 import org.elasticsearch.client.ml.FindFileStructureRequest;
@@ -155,6 +156,8 @@
 import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric.ConfusionMatrix;
 import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric;
 import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric;
+import org.elasticsearch.client.ml.dataframe.explain.FieldSelection;
+import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation;
 import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
 import org.elasticsearch.client.ml.inference.TrainedModelConfig;
 import org.elasticsearch.client.ml.inference.TrainedModelDefinition;
@@ -213,6 +216,7 @@
 
 import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.closeTo;
+import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
@@ -3460,10 +3464,10 @@ public void testEvaluateDataFrame_Regression() throws Exception {
         }
     }
 
-    public void testEstimateMemoryUsage() throws Exception {
-        createIndex("estimate-test-source-index");
+    public void testExplainDataFrameAnalytics() throws Exception {
+        createIndex("explain-df-test-source-index");
         BulkRequest bulkRequest =
-            new BulkRequest("estimate-test-source-index")
+            new BulkRequest("explain-df-test-source-index")
                 .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
         for (int i = 0; i < 10; ++i) {
             bulkRequest.add(new IndexRequest().source(XContentType.JSON, "timestamp", 123456789L, "total", 10L));
@@ -3471,22 +3475,33 @@ public void testEstimateMemoryUsage() throws Exception {
         RestHighLevelClient client = highLevelClient();
         client.bulk(bulkRequest, RequestOptions.DEFAULT);
         {
-            // tag::estimate-memory-usage-request
+            // tag::explain-data-frame-analytics-id-request
+            ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest("existing_job_id"); // <1>
+            // end::explain-data-frame-analytics-id-request
+
+            // tag::explain-data-frame-analytics-config-request
             DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder()
-                .setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build())
+                .setSource(DataFrameAnalyticsSource.builder().setIndex("explain-df-test-source-index").build())
                 .setAnalysis(OutlierDetection.createDefault())
                 .build();
-            PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config); // <1>
-            // end::estimate-memory-usage-request
+            request = new ExplainDataFrameAnalyticsRequest(config); // <1>
+            // end::explain-data-frame-analytics-config-request
+
+            // tag::explain-data-frame-analytics-execute
+            ExplainDataFrameAnalyticsResponse response = client.machineLearning().explainDataFrameAnalytics(request,
+                RequestOptions.DEFAULT);
+            // end::explain-data-frame-analytics-execute
+
+            // tag::explain-data-frame-analytics-response
+            List<FieldSelection> fieldSelection = response.getFieldSelection(); // <1>
+            MemoryEstimation memoryEstimation = response.getMemoryEstimation(); // <2>
+            // end::explain-data-frame-analytics-response
 
-            // tag::estimate-memory-usage-execute
-            EstimateMemoryUsageResponse response = client.machineLearning().estimateMemoryUsage(request, RequestOptions.DEFAULT);
-            // end::estimate-memory-usage-execute
+            assertThat(fieldSelection.size(), equalTo(2));
+            assertThat(fieldSelection.stream().map(FieldSelection::getName).collect(Collectors.toList()), contains("timestamp", "total"));
 
-            // tag::estimate-memory-usage-response
-            ByteSizeValue expectedMemoryWithoutDisk = response.getExpectedMemoryWithoutDisk(); // <1>
-            ByteSizeValue expectedMemoryWithDisk = response.getExpectedMemoryWithDisk(); // <2>
-            // end::estimate-memory-usage-response
+            ByteSizeValue expectedMemoryWithoutDisk = memoryEstimation.getExpectedMemoryWithoutDisk(); // <1>
+            ByteSizeValue expectedMemoryWithDisk = memoryEstimation.getExpectedMemoryWithDisk(); // <2>
 
             // We are pretty liberal here as this test does not aim at verifying concrete numbers but rather end-to-end user workflow.
             ByteSizeValue lowerBound = new ByteSizeValue(1, ByteSizeUnit.KB);
@@ -3496,14 +3511,14 @@ public void testEstimateMemoryUsage() throws Exception {
         }
         {
             DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder()
-                .setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build())
+                .setSource(DataFrameAnalyticsSource.builder().setIndex("explain-df-test-source-index").build())
                 .setAnalysis(OutlierDetection.createDefault())
                 .build();
-            PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config);
-            // tag::estimate-memory-usage-execute-listener
-            ActionListener<EstimateMemoryUsageResponse> listener = new ActionListener<EstimateMemoryUsageResponse>() {
+            ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest(config);
+            // tag::explain-data-frame-analytics-execute-listener
+            ActionListener<ExplainDataFrameAnalyticsResponse> listener = new ActionListener<ExplainDataFrameAnalyticsResponse>() {
                 @Override
-                public void onResponse(EstimateMemoryUsageResponse response) {
+                public void onResponse(ExplainDataFrameAnalyticsResponse response) {
                     // <1>
                 }
 
@@ -3512,15 +3527,15 @@ public void onFailure(Exception e) {
                     // <2>
                 }
             };
-            // end::estimate-memory-usage-execute-listener
+            // end::explain-data-frame-analytics-execute-listener
 
             // Replace the empty listener by a blocking listener in test
             final CountDownLatch latch = new CountDownLatch(1);
             listener = new LatchedActionListener<>(listener, latch);
 
-            // tag::estimate-memory-usage-execute-async
-            client.machineLearning().estimateMemoryUsageAsync(request, RequestOptions.DEFAULT, listener); // <1>
-            // end::estimate-memory-usage-execute-async
+            // tag::explain-data-frame-analytics-execute-async
+            client.machineLearning().explainDataFrameAnalyticsAsync(request, RequestOptions.DEFAULT, listener); // <1>
+            // end::explain-data-frame-analytics-execute-async
 
             assertTrue(latch.await(30L, TimeUnit.SECONDS));
         }
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java
new file mode 100644
index 0000000000000..7273a40e298c4
--- /dev/null
+++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml;
+
+import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfig;
+import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfigTests;
+import org.elasticsearch.test.ESTestCase;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.nullValue;
+
+public class ExplainDataFrameAnalyticsRequestTests extends ESTestCase {
+
+    public void testIdConstructor() {
+        ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest("foo");
+        assertThat(request.getId(), equalTo("foo"));
+        assertThat(request.getConfig(), is(nullValue()));
+    }
+
+    public void testConfigConstructor() {
+        DataFrameAnalyticsConfig config = DataFrameAnalyticsConfigTests.randomDataFrameAnalyticsConfig();
+
+        ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest(config);
+        assertThat(request.getId(), is(nullValue()));
+        assertThat(request.getConfig(), equalTo(config));
+    }
+}
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java
new file mode 100644
index 0000000000000..f4adbd09ba7f3
--- /dev/null
+++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml;
+
+import org.elasticsearch.client.ml.dataframe.explain.FieldSelection;
+import org.elasticsearch.client.ml.dataframe.explain.FieldSelectionTests;
+import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation;
+import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimationTests;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.test.AbstractXContentTestCase;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.IntStream;
+
+public class ExplainDataFrameAnalyticsResponseTests extends AbstractXContentTestCase<ExplainDataFrameAnalyticsResponse> {
+
+    @Override
+    protected ExplainDataFrameAnalyticsResponse createTestInstance() {
+        int fieldSelectionCount = randomIntBetween(1, 5);
+        List<FieldSelection> fieldSelection = new ArrayList<>(fieldSelectionCount);
+        IntStream.of(fieldSelectionCount).forEach(i -> fieldSelection.add(FieldSelectionTests.createRandom()));
+        MemoryEstimation memoryEstimation = MemoryEstimationTests.createRandom();
+
+        return new ExplainDataFrameAnalyticsResponse(fieldSelection, memoryEstimation);
+    }
+
+    @Override
+    protected ExplainDataFrameAnalyticsResponse doParseInstance(XContentParser parser) throws IOException {
+        return ExplainDataFrameAnalyticsResponse.fromXContent(parser);
+    }
+
+    @Override
+    protected boolean supportsUnknownFields() {
+        return true;
+    }
+}
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java
new file mode 100644
index 0000000000000..e76f39b5b852f
--- /dev/null
+++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml.dataframe.explain;
+
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.test.AbstractXContentTestCase;
+
+import java.io.IOException;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+public class FieldSelectionTests extends AbstractXContentTestCase<FieldSelection> {
+
+    public static FieldSelection createRandom() {
+        Set<String> mappingTypes = randomSubsetOf(randomIntBetween(1, 3), "int", "float", "double", "text", "keyword", "ip")
+            .stream().collect(Collectors.toSet());
+        FieldSelection.FeatureType featureType = randomBoolean() ? null : randomFrom(FieldSelection.FeatureType.values());
+        String reason = randomBoolean() ? null : randomAlphaOfLength(20);
+        return new FieldSelection(randomAlphaOfLength(10),
+            mappingTypes,
+            randomBoolean(),
+            randomBoolean(),
+            featureType,
+            reason);
+    }
+
+    @Override
+    protected FieldSelection createTestInstance() {
+        return createRandom();
+    }
+
+    @Override
+    protected FieldSelection doParseInstance(XContentParser parser) throws IOException {
+        return FieldSelection.PARSER.apply(parser, null);
+    }
+
+    @Override
+    protected boolean supportsUnknownFields() {
+        return true;
+    }
+}
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponseTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimationTests.java
similarity index 68%
rename from client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponseTests.java
rename to client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimationTests.java
index f8f2746204df5..884736e573ed5 100644
--- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponseTests.java
+++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimationTests.java
@@ -7,7 +7,7 @@
  * not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.elasticsearch.client.ml;
+package org.elasticsearch.client.ml.dataframe.explain;
 
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.common.xcontent.XContentParser;
@@ -24,22 +24,22 @@
 
 import java.io.IOException;
 
-public class EstimateMemoryUsageResponseTests extends AbstractXContentTestCase<EstimateMemoryUsageResponse> {
+public class MemoryEstimationTests extends AbstractXContentTestCase<MemoryEstimation> {
 
-    public static EstimateMemoryUsageResponse randomResponse() {
-        return new EstimateMemoryUsageResponse(
+    public static MemoryEstimation createRandom() {
+        return new MemoryEstimation(
             randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null,
             randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null);
     }
 
     @Override
-    protected EstimateMemoryUsageResponse createTestInstance() {
-        return randomResponse();
+    protected MemoryEstimation createTestInstance() {
+        return createRandom();
     }
 
     @Override
-    protected EstimateMemoryUsageResponse doParseInstance(XContentParser parser) throws IOException {
-        return EstimateMemoryUsageResponse.fromXContent(parser);
+    protected MemoryEstimation doParseInstance(XContentParser parser) throws IOException {
+        return MemoryEstimation.PARSER.apply(parser, null);
     }
 
     @Override
diff --git a/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc b/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc
deleted file mode 100644
index 8b7ae0f55c8e1..0000000000000
--- a/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc
+++ /dev/null
@@ -1,36 +0,0 @@
---
-:api: estimate-memory-usage
-:request: PutDataFrameAnalyticsRequest
-:response: EstimateMemoryUsageResponse
---
-[role="xpack"]
-[id="{upid}-{api}"]
-=== Estimate memory usage API
-
-Estimates memory usage of {dfanalytics}.
-Estimation results can be used when deciding the appropriate value for `model_memory_limit` setting later on.
-
-The API accepts an +{request}+ object and returns an +{response}+.
-
-[id="{upid}-{api}-request"]
-==== Estimate memory usage request
-
-["source","java",subs="attributes,callouts,macros"]
---------------------------------------------------
-include-tagged::{doc-tests-file}[{api}-request]
---------------------------------------------------
-<1> Constructing a new request containing a {dataframe-analytics-config} for which memory usage estimation should be performed
-
-include::../execution.asciidoc[]
-
-[id="{upid}-{api}-response"]
-==== Response
-
-The returned +{response}+ contains the memory usage estimates.
-
-["source","java",subs="attributes,callouts,macros"]
---------------------------------------------------
-include-tagged::{doc-tests-file}[{api}-response]
---------------------------------------------------
-<1> Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory (i.e. without overflowing to disk).
-<2> Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}.
\ No newline at end of file
diff --git a/docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc b/docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc
new file mode 100644
index 0000000000000..3c41531d22213
--- /dev/null
+++ b/docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc
@@ -0,0 +1,48 @@
+--
+:api: explain-data-frame-analytics
+:request: ExplainDataFrameAnalyticsRequest
+:response: ExplainDataFrameAnalyticsResponse
+--
+[role="xpack"]
+[id="{upid}-{api}"]
+=== Explain {dfanalytics}} API
+
+Explains the following about a {dataframe-analytics-config}:
+
+* field selection: which fields are included or not in the analysis
+* memory estimation: how much memory is estimated to be required. The estimate can be used when deciding the appropriate value for `model_memory_limit` setting later on.
+
+The API accepts an +{request}+ object and returns an +{response}+.
+
+[id="{upid}-{api}-request"]
+==== Explain {dfanalytics} request
+
+The request can be constructed with the id of an existing {dfanalytics-job}.
+
+["source","java",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{doc-tests-file}[{api}-id-request]
+--------------------------------------------------
+<1> Constructing a new request with the id of an existing {dfanalytics-job}
+
+It can also be constructed with a {dataframe-analytics-config} to explain it before creating it.
+
+["source","java",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{doc-tests-file}[{api}-config-request]
+--------------------------------------------------
+<1> Constructing a new request containing a {dataframe-analytics-config}
+
+include::../execution.asciidoc[]
+
+[id="{upid}-{api}-response"]
+==== Response
+
+The returned +{response}+ contains the field selection and the memory usage estimation.
+
+["source","java",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{doc-tests-file}[{api}-response]
+--------------------------------------------------
+<1> A list where each item explains whether a field was selected for analysis or not
+<2> The memory estimation for the {dfanalytics-job}
diff --git a/docs/java-rest/high-level/supported-apis.asciidoc b/docs/java-rest/high-level/supported-apis.asciidoc
index 770866a075522..d691a3ac34b09 100644
--- a/docs/java-rest/high-level/supported-apis.asciidoc
+++ b/docs/java-rest/high-level/supported-apis.asciidoc
@@ -300,7 +300,7 @@ The Java High Level REST Client supports the following Machine Learning APIs:
 * <<{upid}-start-data-frame-analytics>>
 * <<{upid}-stop-data-frame-analytics>>
 * <<{upid}-evaluate-data-frame>>
-* <<{upid}-estimate-memory-usage>>
+* <<{upid}-explain-data-frame-analytics>>
 * <<{upid}-get-trained-models>>
 * <<{upid}-put-filter>>
 * <<{upid}-get-filters>>
@@ -353,7 +353,7 @@ include::ml/delete-data-frame-analytics.asciidoc[]
 include::ml/start-data-frame-analytics.asciidoc[]
 include::ml/stop-data-frame-analytics.asciidoc[]
 include::ml/evaluate-data-frame.asciidoc[]
-include::ml/estimate-memory-usage.asciidoc[]
+include::ml/explain-data-frame-analytics.asciidoc[]
 include::ml/get-trained-models.asciidoc[]
 include::ml/put-filter.asciidoc[]
 include::ml/get-filters.asciidoc[]
diff --git a/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc
deleted file mode 100644
index 64db472dfd1e4..0000000000000
--- a/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc
+++ /dev/null
@@ -1,80 +0,0 @@
-[role="xpack"]
-[testenv="platinum"]
-[[estimate-memory-usage-dfanalytics]]
-=== Estimate memory usage API
-
-[subs="attributes"]
-++++
-<titleabbrev>Estimate memory usage for {dfanalytics-jobs}</titleabbrev>
-++++
-
-Estimates memory usage for the given {dataframe-analytics-config}.
-
-experimental[]
-
-[[ml-estimate-memory-usage-dfanalytics-request]]
-==== {api-request-title}
-
-`POST _ml/data_frame/analytics/_estimate_memory_usage`
-
-[[ml-estimate-memory-usage-dfanalytics-prereq]]
-==== {api-prereq-title}
-
-* You must have `monitor_ml` privilege to use this API. For more 
-information, see <<security-privileges>> and <<built-in-roles>>.
-
-[[ml-estimate-memory-usage-dfanalytics-desc]]
-==== {api-description-title}
-
-This API estimates memory usage for the given {dataframe-analytics-config} before the {dfanalytics-job} is even created.
-
-Serves as an advice on how to set `model_memory_limit` when creating {dfanalytics-job}.
-
-[[ml-estimate-memory-usage-dfanalytics-request-body]]
-==== {api-request-body-title}
-
-`data_frame_analytics_config`::
-  (Required, object) Intended configuration of {dfanalytics-job}. For more information, see
-  <<ml-dfanalytics-resources>>.
-  Note that `id` and `dest` don't need to be provided in the context of this API.
-
-[[ml-estimate-memory-usage-dfanalytics-results]]
-==== {api-response-body-title}
-
-`expected_memory_without_disk`::
-  (string) Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory
-  (i.e. without overflowing to disk).
-  
-`expected_memory_with_disk`::
-  (string) Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}.
-  `expected_memory_with_disk` is usually smaller than `expected_memory_without_disk` as using disk allows to
-  limit the main memory needed to perform {dfanalytics}.
-
-[[ml-estimate-memory-usage-dfanalytics-example]]
-==== {api-examples-title}
-
-[source,console]
---------------------------------------------------
-POST _ml/data_frame/analytics/_estimate_memory_usage
-{
-  "data_frame_analytics_config": {
-    "source": {
-      "index": "logdata"
-    },
-    "analysis": {
-      "outlier_detection": {}
-    }
-  }
-}
---------------------------------------------------
-// TEST[skip:TBD]
-
-The API returns the following results:
-
-[source,console-result]
-----
-{
-  "expected_memory_without_disk": "128MB",
-  "expected_memory_with_disk": "32MB"
-}
-----
diff --git a/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc
new file mode 100644
index 0000000000000..c9ee565e9b2c5
--- /dev/null
+++ b/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc
@@ -0,0 +1,159 @@
+[role="xpack"]
+[testenv="platinum"]
+[[explain-dfanalytics]]
+=== Explain {dfanalytics} API
+
+[subs="attributes"]
+++++
+<titleabbrev>Explain {dfanalytics} API</titleabbrev>
+++++
+
+Explains a {dataframe-analytics-config}.
+
+experimental[]
+
+[[ml-explain-dfanalytics-request]]
+==== {api-request-title}
+
+`GET _ml/data_frame/analytics/_explain` +
+
+`POST _ml/data_frame/analytics/_explain` +
+
+`GET _ml/data_frame/analytics/<data_frame_analytics_id>/_explain` +
+
+`POST _ml/data_frame/analytics/<data_frame_analytics_id>/_explain`
+
+[[ml-explain-dfanalytics-prereq]]
+==== {api-prereq-title}
+
+* You must have `monitor_ml` privilege to use this API. For more
+information, see <<security-privileges>> and <<built-in-roles>>.
+
+[[ml-explain-dfanalytics-desc]]
+==== {api-description-title}
+
+This API provides explanations for a {dataframe-analytics-config} that either exists already or one that has not been created yet.
+The following explanations are provided:
+
+* which fields are included or not in the analysis and why
+* how much memory is estimated to be required. The estimate can be used when deciding the appropriate value for `model_memory_limit` setting later on.
+about either an existing {dfanalytics-job} or one that has not been created yet.
+
+[[ml-explain-dfanalytics-path-params]]
+==== {api-path-parms-title}
+
+`<data_frame_analytics_id>`::
+  (Optional, string) A numerical character string that uniquely identifies the existing
+  {dfanalytics-job} to explain. This identifier can contain lowercase alphanumeric
+  characters (a-z and 0-9), hyphens, and underscores. It must start and end with
+  alphanumeric characters.
+
+[[ml-explain-dfanalytics-request-body]]
+==== {api-request-body-title}
+
+`data_frame_analytics_config`::
+  (Optional, object) Intended configuration of {dfanalytics-job}. For more information, see
+  <<ml-dfanalytics-resources>>.
+  Note that `id` and `dest` don't need to be provided in the context of this API.
+
+[[ml-explain-dfanalytics-results]]
+==== {api-response-body-title}
+
+The API returns a response that contains the following:
+
+`field_selection`::
+  (array) An array of objects that explain selection for each field, sorted by the field names.
+   Each object in the array has the following properties:
+
+  `name`:::
+    (string) The field name.
+
+  `mapping_types`:::
+    (string) The mapping types of the field.
+
+  `is_included`:::
+    (boolean) Whether the field is selected to be included in the analysis.
+
+  `is_required`:::
+    (boolean) Whether the field is required.
+
+  `feature_type`:::
+    (string) The feature type of this field for the analysis. May be `categorical` or `numerical`.
+
+  `reason`:::
+    (string) The reason a field is not selected to be included in the analysis.
+
+`memory_estimation`::
+  (object) An object containing the memory estimates. The object has the following properties:
+
+  `expected_memory_without_disk`:::
+    (string) Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory
+    (i.e. without overflowing to disk).
+
+  `expected_memory_with_disk`:::
+    (string) Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}.
+    `expected_memory_with_disk` is usually smaller than `expected_memory_without_disk` as using disk allows to
+    limit the main memory needed to perform {dfanalytics}.
+
+[[ml-explain-dfanalytics-example]]
+==== {api-examples-title}
+
+[source,console]
+--------------------------------------------------
+POST _ml/data_frame/analytics/_explain
+{
+  "data_frame_analytics_config": {
+    "source": {
+      "index": "houses_sold_last_10_yrs"
+    },
+    "analysis": {
+      "regression": {
+        "dependent_variable": "price"
+      }
+    }
+  }
+}
+--------------------------------------------------
+// TEST[skip:TBD]
+
+The API returns the following results:
+
+[source,console-result]
+----
+{
+  "field_selection": [
+    {
+      "field": "number_of_bedrooms",
+      "mappings_types": ["integer"],
+      "is_included": true,
+      "is_required": false,
+      "feature_type": "numerical"
+    },
+    {
+      "field": "postcode",
+      "mappings_types": ["text"],
+      "is_included": false,
+      "is_required": false,
+      "reason": "[postcode.keyword] is preferred because it is aggregatable"
+    },
+    {
+      "field": "postcode.keyword",
+      "mappings_types": ["keyword"],
+      "is_included": true,
+      "is_required": false,
+      "feature_type": "categorical"
+    },
+    {
+      "field": "price",
+      "mappings_types": ["float"],
+      "is_included": true,
+      "is_required": true,
+      "feature_type": "numerical"
+    }
+  ],
+  "memory_estimation": {
+    "expected_memory_without_disk": "128MB",
+    "expected_memory_with_disk": "32MB"
+  }
+}
+----
diff --git a/docs/reference/ml/df-analytics/apis/index.asciidoc b/docs/reference/ml/df-analytics/apis/index.asciidoc
index 30e909f3ffad6..6bf63e7ddb8c0 100644
--- a/docs/reference/ml/df-analytics/apis/index.asciidoc
+++ b/docs/reference/ml/df-analytics/apis/index.asciidoc
@@ -5,16 +5,16 @@
 
 You can use the following APIs to perform {ml} {dfanalytics} activities.
 
-* <<put-dfanalytics,Create {dfanalytics-jobs}>> 
+* <<put-dfanalytics,Create {dfanalytics-jobs}>>
 * <<delete-dfanalytics,Delete {dfanalytics-jobs}>>
 * <<get-dfanalytics,Get {dfanalytics-jobs} info>>
 * <<get-dfanalytics-stats,Get {dfanalytics-jobs} statistics>>
 * <<start-dfanalytics,Start {dfanalytics-jobs}>>
 * <<stop-dfanalytics,Stop {dfanalytics-jobs}>>
 * <<evaluate-dfanalytics,Evaluate {dfanalytics}>>
-* <<estimate-memory-usage-dfanalytics,Estimate memory usage for {dfanalytics}>>
+* <<explain-dfanalytics,Explain {dfanalytics}>>
 
-See also <<ml-apis>>. 
+See also <<ml-apis>>.
 
 //CREATE
 include::put-dfanalytics.asciidoc[]
@@ -23,7 +23,7 @@ include::delete-dfanalytics.asciidoc[]
 //EVALUATE
 include::evaluate-dfanalytics.asciidoc[]
 //ESTIMATE_MEMORY_USAGE
-include::estimate-memory-usage-dfanalytics.asciidoc[]
+include::explain-dfanalytics.asciidoc[]
 //GET
 include::get-dfanalytics.asciidoc[]
 include::get-dfanalytics-stats.asciidoc[]
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java
index 8caac9d6e2050..d99dd1ec23390 100644
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java
@@ -79,6 +79,7 @@
 import org.elasticsearch.xpack.core.ml.MlMetadata;
 import org.elasticsearch.xpack.core.ml.MlTasks;
 import org.elasticsearch.xpack.core.ml.action.CloseJobAction;
+import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction;
 import org.elasticsearch.xpack.core.ml.action.DeleteCalendarAction;
 import org.elasticsearch.xpack.core.ml.action.DeleteCalendarEventAction;
 import org.elasticsearch.xpack.core.ml.action.DeleteDataFrameAnalyticsAction;
@@ -89,7 +90,6 @@
 import org.elasticsearch.xpack.core.ml.action.DeleteJobAction;
 import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction;
 import org.elasticsearch.xpack.core.ml.action.DeleteTrainedModelAction;
-import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction;
 import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction;
 import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction;
 import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction;
@@ -158,6 +158,10 @@
 import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.Recall;
 import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.ScoreByThresholdResult;
 import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.SoftClassificationMetric;
+import org.elasticsearch.xpack.core.ml.inference.preprocessing.FrequencyEncoding;
+import org.elasticsearch.xpack.core.ml.inference.preprocessing.OneHotEncoding;
+import org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor;
+import org.elasticsearch.xpack.core.ml.inference.preprocessing.TargetMeanEncoding;
 import org.elasticsearch.xpack.core.ml.inference.results.ClassificationInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.results.InferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.results.RegressionInferenceResults;
@@ -171,10 +175,6 @@
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ensemble.WeightedMode;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ensemble.WeightedSum;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.tree.Tree;
-import org.elasticsearch.xpack.core.ml.inference.preprocessing.FrequencyEncoding;
-import org.elasticsearch.xpack.core.ml.inference.preprocessing.OneHotEncoding;
-import org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor;
-import org.elasticsearch.xpack.core.ml.inference.preprocessing.TargetMeanEncoding;
 import org.elasticsearch.xpack.core.ml.job.config.JobTaskState;
 import org.elasticsearch.xpack.core.monitoring.MonitoringFeatureSetUsage;
 import org.elasticsearch.xpack.core.rollup.RollupFeatureSetUsage;
@@ -381,7 +381,7 @@ public List<ActionType<? extends ActionResponse>> getClientActions() {
                 StartDataFrameAnalyticsAction.INSTANCE,
                 StopDataFrameAnalyticsAction.INSTANCE,
                 EvaluateDataFrameAction.INSTANCE,
-                EstimateMemoryUsageAction.INSTANCE,
+                ExplainDataFrameAnalyticsAction.INSTANCE,
                 InternalInferModelAction.INSTANCE,
                 GetTrainedModelsAction.INSTANCE,
                 DeleteTrainedModelAction.INSTANCE,
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java
deleted file mode 100644
index 529db21cced70..0000000000000
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License;
- * you may not use this file except in compliance with the Elastic License.
- */
-package org.elasticsearch.xpack.core.ml.action;
-
-import org.elasticsearch.action.ActionResponse;
-import org.elasticsearch.action.ActionType;
-import org.elasticsearch.common.Nullable;
-import org.elasticsearch.common.ParseField;
-import org.elasticsearch.common.io.stream.StreamInput;
-import org.elasticsearch.common.io.stream.StreamOutput;
-import org.elasticsearch.common.unit.ByteSizeValue;
-import org.elasticsearch.common.xcontent.ConstructingObjectParser;
-import org.elasticsearch.common.xcontent.ObjectParser;
-import org.elasticsearch.common.xcontent.ToXContentObject;
-import org.elasticsearch.common.xcontent.XContentBuilder;
-
-import java.io.IOException;
-import java.util.Objects;
-
-import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
-
-public class EstimateMemoryUsageAction extends ActionType<EstimateMemoryUsageAction.Response> {
-
-    public static final EstimateMemoryUsageAction INSTANCE = new EstimateMemoryUsageAction();
-    public static final String NAME = "cluster:admin/xpack/ml/data_frame/analytics/estimate_memory_usage";
-
-    private EstimateMemoryUsageAction() {
-        super(NAME, EstimateMemoryUsageAction.Response::new);
-    }
-
-    public static class Response extends ActionResponse implements ToXContentObject {
-
-        public static final ParseField TYPE = new ParseField("memory_usage_estimation_result");
-
-        public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk");
-        public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk");
-
-        static final ConstructingObjectParser<Response, Void> PARSER =
-            new ConstructingObjectParser<>(
-                TYPE.getPreferredName(),
-                args -> new Response((ByteSizeValue) args[0], (ByteSizeValue) args[1]));
-
-        static {
-            PARSER.declareField(
-                optionalConstructorArg(),
-                (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName()),
-                EXPECTED_MEMORY_WITHOUT_DISK,
-                ObjectParser.ValueType.VALUE);
-            PARSER.declareField(
-                optionalConstructorArg(),
-                (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITH_DISK.getPreferredName()),
-                EXPECTED_MEMORY_WITH_DISK,
-                ObjectParser.ValueType.VALUE);
-        }
-
-        private final ByteSizeValue expectedMemoryWithoutDisk;
-        private final ByteSizeValue expectedMemoryWithDisk;
-
-        public Response(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) {
-            this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk;
-            this.expectedMemoryWithDisk = expectedMemoryWithDisk;
-        }
-
-        public Response(StreamInput in) throws IOException {
-            super(in);
-            this.expectedMemoryWithoutDisk = in.readOptionalWriteable(ByteSizeValue::new);
-            this.expectedMemoryWithDisk = in.readOptionalWriteable(ByteSizeValue::new);
-        }
-
-        public ByteSizeValue getExpectedMemoryWithoutDisk() {
-            return expectedMemoryWithoutDisk;
-        }
-
-        public ByteSizeValue getExpectedMemoryWithDisk() {
-            return expectedMemoryWithDisk;
-        }
-
-        @Override
-        public void writeTo(StreamOutput out) throws IOException {
-            out.writeOptionalWriteable(expectedMemoryWithoutDisk);
-            out.writeOptionalWriteable(expectedMemoryWithDisk);
-        }
-
-        @Override
-        public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
-            builder.startObject();
-            if (expectedMemoryWithoutDisk != null) {
-                builder.field(EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName(), expectedMemoryWithoutDisk.getStringRep());
-            }
-            if (expectedMemoryWithDisk != null) {
-                builder.field(EXPECTED_MEMORY_WITH_DISK.getPreferredName(), expectedMemoryWithDisk.getStringRep());
-            }
-            builder.endObject();
-            return builder;
-        }
-
-        @Override
-        public boolean equals(Object other) {
-            if (this == other) {
-                return true;
-            }
-            if (other == null || getClass() != other.getClass()) {
-                return false;
-            }
-
-            Response that = (Response) other;
-            return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk)
-                && Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk);
-        }
-
-        @Override
-        public int hashCode() {
-            return Objects.hash(expectedMemoryWithoutDisk, expectedMemoryWithDisk);
-        }
-    }
-}
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java
new file mode 100644
index 0000000000000..46888ea27a7a9
--- /dev/null
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.core.ml.action;
+
+import org.elasticsearch.action.ActionResponse;
+import org.elasticsearch.action.ActionType;
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.xcontent.ConstructingObjectParser;
+import org.elasticsearch.common.xcontent.ToXContentObject;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Objects;
+
+public class ExplainDataFrameAnalyticsAction extends ActionType<ExplainDataFrameAnalyticsAction.Response> {
+
+    public static final ExplainDataFrameAnalyticsAction INSTANCE = new ExplainDataFrameAnalyticsAction();
+    public static final String NAME = "cluster:admin/xpack/ml/data_frame/analytics/explain";
+
+    private ExplainDataFrameAnalyticsAction() {
+        super(NAME, ExplainDataFrameAnalyticsAction.Response::new);
+    }
+
+    public static class Response extends ActionResponse implements ToXContentObject {
+
+        public static final ParseField TYPE = new ParseField("explain_data_frame_analytics_response");
+
+        public static final ParseField FIELD_SELECTION = new ParseField("field_selection");
+        public static final ParseField MEMORY_ESTIMATION = new ParseField("memory_estimation");
+
+        static final ConstructingObjectParser<Response, Void> PARSER =
+            new ConstructingObjectParser<>(
+                TYPE.getPreferredName(),
+                args -> new Response((List<FieldSelection>) args[0], (MemoryEstimation) args[1]));
+
+        static {
+            PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), FieldSelection.PARSER, FIELD_SELECTION);
+            PARSER.declareObject(ConstructingObjectParser.constructorArg(), MemoryEstimation.PARSER, MEMORY_ESTIMATION);
+        }
+
+        private final List<FieldSelection> fieldSelection;
+        private final MemoryEstimation memoryEstimation;
+
+        public Response(List<FieldSelection> fieldSelection, MemoryEstimation memoryEstimation) {
+            this.fieldSelection = Objects.requireNonNull(fieldSelection);
+            this.memoryEstimation = Objects.requireNonNull(memoryEstimation);
+        }
+
+        public Response(StreamInput in) throws IOException {
+            super(in);
+            this.fieldSelection = in.readList(FieldSelection::new);
+            this.memoryEstimation = new MemoryEstimation(in);
+        }
+
+        @Override
+        public void writeTo(StreamOutput out) throws IOException {
+            out.writeList(fieldSelection);
+            memoryEstimation.writeTo(out);
+        }
+
+        @Override
+        public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+            builder.startObject();
+            builder.field(FIELD_SELECTION.getPreferredName(), fieldSelection);
+            builder.field(MEMORY_ESTIMATION.getPreferredName(), memoryEstimation);
+            builder.endObject();
+            return builder;
+        }
+
+        @Override
+        public boolean equals(Object other) {
+            if (this == other) return true;
+            if (other == null || getClass() != other.getClass()) return false;
+
+            Response that = (Response) other;
+            return Objects.equals(fieldSelection, that.fieldSelection)
+                && Objects.equals(memoryEstimation, that.memoryEstimation);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(fieldSelection, memoryEstimation);
+        }
+
+        public MemoryEstimation getMemoryEstimation() {
+            return memoryEstimation;
+        }
+
+        public List<FieldSelection> getFieldSelection() {
+            return fieldSelection;
+        }
+    }
+}
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java
index 6860162d793fd..5bce41d8a4ae6 100644
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java
@@ -51,13 +51,14 @@ public static Request parseRequest(String id, XContentParser parser) {
         }
 
         /**
-         * Parses request for memory estimation.
-         * {@link Request} is reused across {@link PutDataFrameAnalyticsAction} and {@link EstimateMemoryUsageAction} but parsing differs
+         * Parses request for use in the explain action.
+         * {@link Request} is reused across {@link PutDataFrameAnalyticsAction} and
+         * {@link ExplainDataFrameAnalyticsAction} but parsing differs
          * between these two usages.
          */
-        public static Request parseRequestForMemoryEstimation(XContentParser parser) {
+        public static Request parseRequestForExplain(XContentParser parser) {
             DataFrameAnalyticsConfig.Builder configBuilder = DataFrameAnalyticsConfig.STRICT_PARSER.apply(parser, null);
-            DataFrameAnalyticsConfig config = configBuilder.buildForMemoryEstimation();
+            DataFrameAnalyticsConfig config = configBuilder.buildForExplain();
             return new PutDataFrameAnalyticsAction.Request(config);
         }
 
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfig.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfig.java
index 03020caef8ab1..ac1589fa56fbc 100644
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfig.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfig.java
@@ -416,11 +416,11 @@ public DataFrameAnalyticsConfig build() {
         }
 
         /**
-         * Builds {@link DataFrameAnalyticsConfig} object for the purpose of performing memory estimation.
+         * Builds {@link DataFrameAnalyticsConfig} object for the purpose of explaining a job that has not been created yet.
          * Some fields (i.e. "id", "dest") may not be present, therefore we overwrite them here to make {@link DataFrameAnalyticsConfig}'s
          * constructor validations happy.
          */
-        public DataFrameAnalyticsConfig buildForMemoryEstimation() {
+        public DataFrameAnalyticsConfig buildForExplain() {
             return new DataFrameAnalyticsConfig(
                 id != null ? id : "dummy",
                 description,
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelection.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelection.java
new file mode 100644
index 0000000000000..57fae51d36643
--- /dev/null
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelection.java
@@ -0,0 +1,184 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.core.ml.dataframe.explain;
+
+import org.elasticsearch.common.Nullable;
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.xcontent.ConstructingObjectParser;
+import org.elasticsearch.common.xcontent.ObjectParser;
+import org.elasticsearch.common.xcontent.ToXContentObject;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Objects;
+import java.util.Set;
+
+public class FieldSelection implements ToXContentObject, Writeable {
+
+    private static final ParseField NAME = new ParseField("name");
+    private static final ParseField MAPPING_TYPES = new ParseField("mapping_types");
+    private static final ParseField IS_INCLUDED = new ParseField("is_included");
+    private static final ParseField IS_REQUIRED = new ParseField("is_required");
+    private static final ParseField FEATURE_TYPE = new ParseField("feature_type");
+    private static final ParseField REASON = new ParseField("reason");
+
+    public enum FeatureType {
+        CATEGORICAL, NUMERICAL;
+
+        public static FeatureType fromString(String value) {
+            return FeatureType.valueOf(value.toUpperCase(Locale.ROOT));
+        }
+
+        @Override
+        public String toString() {
+            return name().toLowerCase(Locale.ROOT);
+        }
+    }
+
+    public static ConstructingObjectParser<FieldSelection, Void> PARSER = new ConstructingObjectParser<>("field_selection",
+        a -> new FieldSelection((String) a[0], new HashSet<>((List<String>) a[1]), (boolean) a[2], (boolean) a[3], (FeatureType) a[4],
+            (String) a[5]));
+
+    static {
+        PARSER.declareString(ConstructingObjectParser.constructorArg(), NAME);
+        PARSER.declareStringArray(ConstructingObjectParser.constructorArg(), MAPPING_TYPES);
+        PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_INCLUDED);
+        PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_REQUIRED);
+        PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> {
+            if (p.currentToken() == XContentParser.Token.VALUE_STRING) {
+                return FeatureType.fromString(p.text());
+            }
+            throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]");
+        }, FEATURE_TYPE, ObjectParser.ValueType.STRING);
+        PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), REASON);
+    }
+
+    private final String name;
+    private final Set<String> mappingTypes;
+    private final boolean isIncluded;
+    private final boolean isRequired;
+    private final FeatureType featureType;
+    private final String reason;
+
+    public static FieldSelection included(String name, Set<String> mappingTypes, boolean isRequired, FeatureType featureType) {
+        return new FieldSelection(name, mappingTypes, true, isRequired, featureType, null);
+    }
+
+    public static FieldSelection excluded(String name, Set<String> mappingTypes, String reason) {
+        return new FieldSelection(name, mappingTypes, false, false, null, reason);
+    }
+
+    FieldSelection(String name, Set<String> mappingTypes, boolean isIncluded, boolean isRequired, @Nullable FeatureType featureType,
+                           @Nullable String reason) {
+        this.name = Objects.requireNonNull(name);
+        this.mappingTypes = Collections.unmodifiableSet(mappingTypes);
+        this.isIncluded = isIncluded;
+        this.isRequired = isRequired;
+        this.featureType = featureType;
+        this.reason = reason;
+    }
+
+    public FieldSelection(StreamInput in) throws IOException {
+        this.name = in.readString();
+        this.mappingTypes = Collections.unmodifiableSet(in.readSet(StreamInput::readString));
+        this.isIncluded = in.readBoolean();
+        this.isRequired = in.readBoolean();
+        boolean hasFeatureType = in.readBoolean();
+
+        if (hasFeatureType) {
+            this.featureType = in.readEnum(FeatureType.class);
+        } else {
+            this.featureType = null;
+        }
+
+        this.reason = in.readOptionalString();
+    }
+
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        out.writeString(name);
+        out.writeCollection(mappingTypes, StreamOutput::writeString);
+        out.writeBoolean(isIncluded);
+        out.writeBoolean(isRequired);
+
+        if (featureType == null) {
+            out.writeBoolean(false);
+        } else {
+            out.writeBoolean(true);
+            out.writeEnum(featureType);
+        }
+        out.writeOptionalString(reason);
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        builder.startObject();
+        builder.field(NAME.getPreferredName(), name);
+        builder.field(MAPPING_TYPES.getPreferredName(), mappingTypes);
+        builder.field(IS_INCLUDED.getPreferredName(), isIncluded);
+        builder.field(IS_REQUIRED.getPreferredName(), isRequired);
+        if (featureType != null) {
+            builder.field(FEATURE_TYPE.getPreferredName(), featureType);
+        }
+        if (reason != null) {
+            builder.field(REASON.getPreferredName(), reason);
+        }
+        builder.endObject();
+        return builder;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        FieldSelection that = (FieldSelection) o;
+        return Objects.equals(name, that.name)
+            && Objects.equals(mappingTypes, that.mappingTypes)
+            && isIncluded == that.isIncluded
+            && isRequired == that.isRequired
+            && Objects.equals(featureType, that.featureType)
+            && Objects.equals(reason, that.reason);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(name, mappingTypes, isIncluded, isRequired, featureType, reason);
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public Set<String> getMappingTypes() {
+        return mappingTypes;
+    }
+
+    public boolean isIncluded() {
+        return isIncluded;
+    }
+
+    public boolean isRequired() {
+        return isRequired;
+    }
+
+    @Nullable
+    public FeatureType getFeatureType() {
+        return featureType;
+    }
+
+    @Nullable
+    public String getReason() {
+        return reason;
+    }
+}
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java
new file mode 100644
index 0000000000000..7972c6a9ee0a2
--- /dev/null
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.core.ml.dataframe.explain;
+
+import org.elasticsearch.common.Nullable;
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.xcontent.ConstructingObjectParser;
+import org.elasticsearch.common.xcontent.ObjectParser;
+import org.elasticsearch.common.xcontent.ToXContentObject;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.Objects;
+
+import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
+
+public class MemoryEstimation implements ToXContentObject, Writeable {
+
+    public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk");
+    public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk");
+
+    public static final ConstructingObjectParser<MemoryEstimation, Void> PARSER = new ConstructingObjectParser<>("memory_estimation",
+            a -> new MemoryEstimation((ByteSizeValue) a[0], (ByteSizeValue) a[1]));
+
+    static {
+        PARSER.declareField(
+            optionalConstructorArg(),
+            (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName()),
+            EXPECTED_MEMORY_WITHOUT_DISK,
+            ObjectParser.ValueType.VALUE);
+        PARSER.declareField(
+            optionalConstructorArg(),
+            (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITH_DISK.getPreferredName()),
+            EXPECTED_MEMORY_WITH_DISK,
+            ObjectParser.ValueType.VALUE);
+    }
+
+    private final ByteSizeValue expectedMemoryWithoutDisk;
+    private final ByteSizeValue expectedMemoryWithDisk;
+
+    public MemoryEstimation(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) {
+        this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk;
+        this.expectedMemoryWithDisk = expectedMemoryWithDisk;
+    }
+
+    public MemoryEstimation(StreamInput in) throws IOException {
+        this.expectedMemoryWithoutDisk = in.readOptionalWriteable(ByteSizeValue::new);
+        this.expectedMemoryWithDisk = in.readOptionalWriteable(ByteSizeValue::new);
+    }
+
+    public ByteSizeValue getExpectedMemoryWithoutDisk() {
+        return expectedMemoryWithoutDisk;
+    }
+
+    public ByteSizeValue getExpectedMemoryWithDisk() {
+        return expectedMemoryWithDisk;
+    }
+
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        out.writeOptionalWriteable(expectedMemoryWithoutDisk);
+        out.writeOptionalWriteable(expectedMemoryWithDisk);
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        builder.startObject();
+        if (expectedMemoryWithoutDisk != null) {
+            builder.field(EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName(), expectedMemoryWithoutDisk.getStringRep());
+        }
+        if (expectedMemoryWithDisk != null) {
+            builder.field(EXPECTED_MEMORY_WITH_DISK.getPreferredName(), expectedMemoryWithDisk.getStringRep());
+        }
+        builder.endObject();
+        return builder;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (this == other) {
+            return true;
+        }
+        if (other == null || getClass() != other.getClass()) {
+            return false;
+        }
+
+        MemoryEstimation that = (MemoryEstimation) other;
+        return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk)
+            && Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(expectedMemoryWithoutDisk, expectedMemoryWithDisk);
+    }
+}
diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java
deleted file mode 100644
index 1bc8d8970eae1..0000000000000
--- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License;
- * you may not use this file except in compliance with the Elastic License.
- */
-package org.elasticsearch.xpack.core.ml.action;
-
-import org.elasticsearch.common.io.stream.Writeable;
-import org.elasticsearch.common.unit.ByteSizeUnit;
-import org.elasticsearch.common.unit.ByteSizeValue;
-import org.elasticsearch.common.xcontent.XContentParser;
-import org.elasticsearch.test.AbstractSerializingTestCase;
-import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction.Response;
-
-import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.nullValue;
-
-public class EstimateMemoryUsageActionResponseTests extends AbstractSerializingTestCase<Response> {
-
-    @Override
-    protected Response createTestInstance() {
-        return new Response(
-            randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null,
-            randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null);
-    }
-
-    @Override
-    protected Writeable.Reader<Response> instanceReader() {
-        return Response::new;
-    }
-
-    @Override
-    protected Response doParseInstance(XContentParser parser) {
-        return Response.PARSER.apply(parser, null);
-    }
-
-    public void testConstructor_NullValues() {
-        Response response = new Response(null, null);
-        assertThat(response.getExpectedMemoryWithoutDisk(), nullValue());
-        assertThat(response.getExpectedMemoryWithDisk(), nullValue());
-    }
-
-    public void testConstructor_SmallValues() {
-        Response response = new Response(new ByteSizeValue(120, ByteSizeUnit.KB), new ByteSizeValue(30, ByteSizeUnit.KB));
-        assertThat(response.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(120, ByteSizeUnit.KB)));
-        assertThat(response.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(30, ByteSizeUnit.KB)));
-    }
-
-    public void testConstructor() {
-        Response response = new Response(new ByteSizeValue(20, ByteSizeUnit.MB), new ByteSizeValue(10, ByteSizeUnit.MB));
-        assertThat(response.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(20, ByteSizeUnit.MB)));
-        assertThat(response.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(10, ByteSizeUnit.MB)));
-    }
-}
diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java
new file mode 100644
index 0000000000000..ea1aca3916cb9
--- /dev/null
+++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.core.ml.action;
+
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.test.AbstractSerializingTestCase;
+import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction.Response;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelectionTests;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimationTests;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.IntStream;
+
+public class ExplainDataFrameAnalyticsActionResponseTests extends AbstractSerializingTestCase<Response> {
+
+    @Override
+    protected Response createTestInstance() {
+        int fieldSelectionCount = randomIntBetween(1, 5);
+        List<FieldSelection> fieldSelection = new ArrayList<>(fieldSelectionCount);
+        IntStream.of(fieldSelectionCount).forEach(i -> fieldSelection.add(FieldSelectionTests.createRandom()));
+        MemoryEstimation memoryEstimation = MemoryEstimationTests.createRandom();
+
+        return new Response(fieldSelection, memoryEstimation);
+    }
+
+    @Override
+    protected Writeable.Reader<Response> instanceReader() {
+        return Response::new;
+    }
+
+    @Override
+    protected Response doParseInstance(XContentParser parser) {
+        return Response.PARSER.apply(parser, null);
+    }
+}
diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java
index 3266f488daf4a..d8c52c839026f 100644
--- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java
+++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java
@@ -279,32 +279,32 @@ public void testExplicitModelMemoryLimitTooHigh() {
         assertThat(e.getMessage(), containsString("must be less than the value of the xpack.ml.max_model_memory_limit setting"));
     }
 
-    public void testBuildForMemoryEstimation() {
+    public void testBuildForExplain() {
         DataFrameAnalyticsConfig.Builder builder = createRandomBuilder("foo");
 
-        DataFrameAnalyticsConfig config = builder.buildForMemoryEstimation();
+        DataFrameAnalyticsConfig config = builder.buildForExplain();
 
         assertThat(config, equalTo(builder.build()));
     }
 
-    public void testBuildForMemoryEstimation_MissingId() {
+    public void testBuildForExplain_MissingId() {
         DataFrameAnalyticsConfig.Builder builder = new DataFrameAnalyticsConfig.Builder()
             .setAnalysis(OutlierDetectionTests.createRandom())
             .setSource(DataFrameAnalyticsSourceTests.createRandom())
             .setDest(DataFrameAnalyticsDestTests.createRandom());
 
-        DataFrameAnalyticsConfig config = builder.buildForMemoryEstimation();
+        DataFrameAnalyticsConfig config = builder.buildForExplain();
 
         assertThat(config.getId(), equalTo("dummy"));
     }
 
-    public void testBuildForMemoryEstimation_MissingDest() {
+    public void testBuildForExplain_MissingDest() {
         DataFrameAnalyticsConfig.Builder builder = new DataFrameAnalyticsConfig.Builder()
             .setId("foo")
             .setAnalysis(OutlierDetectionTests.createRandom())
             .setSource(DataFrameAnalyticsSourceTests.createRandom());
 
-        DataFrameAnalyticsConfig config = builder.buildForMemoryEstimation();
+        DataFrameAnalyticsConfig config = builder.buildForExplain();
 
         assertThat(config.getDest().getIndex(), equalTo("dummy"));
     }
diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelectionTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelectionTests.java
new file mode 100644
index 0000000000000..2c8a8fde39ad2
--- /dev/null
+++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelectionTests.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.core.ml.dataframe.explain;
+
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.test.AbstractSerializingTestCase;
+
+import java.io.IOException;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+public class FieldSelectionTests extends AbstractSerializingTestCase<FieldSelection> {
+
+    public static FieldSelection createRandom() {
+        Set<String> mappingTypes = randomSubsetOf(randomIntBetween(1, 3), "int", "float", "double", "text", "keyword", "ip")
+            .stream().collect(Collectors.toSet());
+        FieldSelection.FeatureType featureType = randomBoolean() ? null : randomFrom(FieldSelection.FeatureType.values());
+        String reason = randomBoolean() ? null : randomAlphaOfLength(20);
+        return new FieldSelection(randomAlphaOfLength(10),
+            mappingTypes,
+            randomBoolean(),
+            randomBoolean(),
+            featureType,
+            reason);
+    }
+
+    @Override
+    protected FieldSelection createTestInstance() {
+        return createRandom();
+    }
+
+    @Override
+    protected FieldSelection doParseInstance(XContentParser parser) throws IOException {
+        return FieldSelection.PARSER.apply(parser, null);
+    }
+
+    @Override
+    protected Writeable.Reader<FieldSelection> instanceReader() {
+        return FieldSelection::new;
+    }
+}
diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java
new file mode 100644
index 0000000000000..dc9e20bd86a8c
--- /dev/null
+++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.core.ml.dataframe.explain;
+
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.unit.ByteSizeUnit;
+import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.test.AbstractSerializingTestCase;
+
+import java.io.IOException;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.nullValue;
+
+public class MemoryEstimationTests extends AbstractSerializingTestCase<MemoryEstimation> {
+
+    public static MemoryEstimation createRandom() {
+        return new MemoryEstimation(
+            randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null,
+            randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null);
+    }
+
+    @Override
+    protected MemoryEstimation createTestInstance() {
+        return createRandom();
+    }
+
+    @Override
+    protected Writeable.Reader<MemoryEstimation> instanceReader() {
+        return MemoryEstimation::new;
+    }
+
+    @Override
+    protected MemoryEstimation doParseInstance(XContentParser parser) throws IOException {
+        return MemoryEstimation.PARSER.apply(parser, null);
+    }
+
+    public void testConstructor_NullValues() {
+        MemoryEstimation memoryEstimation = new MemoryEstimation(null, null);
+        assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), nullValue());
+        assertThat(memoryEstimation.getExpectedMemoryWithDisk(), nullValue());
+    }
+
+    public void testConstructor_SmallValues() {
+        MemoryEstimation memoryEstimation = new MemoryEstimation(
+            new ByteSizeValue(120, ByteSizeUnit.KB), new ByteSizeValue(30, ByteSizeUnit.KB));
+        assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(120, ByteSizeUnit.KB)));
+        assertThat(memoryEstimation.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(30, ByteSizeUnit.KB)));
+    }
+
+    public void testConstructor() {
+        MemoryEstimation memoryEstimation = new MemoryEstimation(
+            new ByteSizeValue(20, ByteSizeUnit.MB), new ByteSizeValue(10, ByteSizeUnit.MB));
+        assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(20, ByteSizeUnit.MB)));
+        assertThat(memoryEstimation.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(10, ByteSizeUnit.MB)));
+    }
+}
diff --git a/x-pack/plugin/ml/qa/ml-with-security/build.gradle b/x-pack/plugin/ml/qa/ml-with-security/build.gradle
index 961dc944ea7d7..38beb1d1908c1 100644
--- a/x-pack/plugin/ml/qa/ml-with-security/build.gradle
+++ b/x-pack/plugin/ml/qa/ml-with-security/build.gradle
@@ -92,7 +92,6 @@ integTest.runner {
     'ml/data_frame_analytics_crud/Test put classification given num_top_classes is greater than 1k',
     'ml/data_frame_analytics_crud/Test put classification given training_percent is less than one',
     'ml/data_frame_analytics_crud/Test put classification given training_percent is greater than hundred',
-    'ml/data_frame_analytics_memory_usage_estimation/Test memory usage estimation for empty data frame',
     'ml/evaluate_data_frame/Test given missing index',
     'ml/evaluate_data_frame/Test given index does not exist',
     'ml/evaluate_data_frame/Test given missing evaluation',
@@ -113,6 +112,10 @@ integTest.runner {
     'ml/evaluate_data_frame/Test regression given evaluation with empty metrics',
     'ml/evaluate_data_frame/Test regression given missing actual_field',
     'ml/evaluate_data_frame/Test regression given missing predicted_field',
+    'ml/explain_data_frame_analytics/Test neither job id nor body',
+    'ml/explain_data_frame_analytics/Test both job id and body',
+    'ml/explain_data_frame_analytics/Test missing job',
+    'ml/explain_data_frame_analytics/Test empty data frame given body',
     'ml/delete_job_force/Test cannot force delete a non-existent job',
     'ml/delete_model_snapshot/Test delete snapshot missing snapshotId',
     'ml/delete_model_snapshot/Test delete snapshot missing job_id',
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
index 964bc719cbdfa..0293a36747311 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
@@ -65,6 +65,7 @@
 import org.elasticsearch.xpack.core.ml.MachineLearningField;
 import org.elasticsearch.xpack.core.ml.MlMetaIndex;
 import org.elasticsearch.xpack.core.ml.action.CloseJobAction;
+import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction;
 import org.elasticsearch.xpack.core.ml.action.DeleteCalendarAction;
 import org.elasticsearch.xpack.core.ml.action.DeleteCalendarEventAction;
 import org.elasticsearch.xpack.core.ml.action.DeleteDataFrameAnalyticsAction;
@@ -75,7 +76,6 @@
 import org.elasticsearch.xpack.core.ml.action.DeleteJobAction;
 import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction;
 import org.elasticsearch.xpack.core.ml.action.DeleteTrainedModelAction;
-import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction;
 import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction;
 import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction;
 import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction;
@@ -98,8 +98,8 @@
 import org.elasticsearch.xpack.core.ml.action.GetRecordsAction;
 import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction;
 import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsStatsAction;
-import org.elasticsearch.xpack.core.ml.action.IsolateDatafeedAction;
 import org.elasticsearch.xpack.core.ml.action.InternalInferModelAction;
+import org.elasticsearch.xpack.core.ml.action.IsolateDatafeedAction;
 import org.elasticsearch.xpack.core.ml.action.KillProcessAction;
 import org.elasticsearch.xpack.core.ml.action.MlInfoAction;
 import org.elasticsearch.xpack.core.ml.action.OpenJobAction;
@@ -136,6 +136,7 @@
 import org.elasticsearch.xpack.core.ml.notifications.AuditorField;
 import org.elasticsearch.xpack.core.template.TemplateUtils;
 import org.elasticsearch.xpack.ml.action.TransportCloseJobAction;
+import org.elasticsearch.xpack.ml.action.TransportExplainDataFrameAnalyticsAction;
 import org.elasticsearch.xpack.ml.action.TransportDeleteCalendarAction;
 import org.elasticsearch.xpack.ml.action.TransportDeleteCalendarEventAction;
 import org.elasticsearch.xpack.ml.action.TransportDeleteDataFrameAnalyticsAction;
@@ -146,7 +147,6 @@
 import org.elasticsearch.xpack.ml.action.TransportDeleteJobAction;
 import org.elasticsearch.xpack.ml.action.TransportDeleteModelSnapshotAction;
 import org.elasticsearch.xpack.ml.action.TransportDeleteTrainedModelAction;
-import org.elasticsearch.xpack.ml.action.TransportEstimateMemoryUsageAction;
 import org.elasticsearch.xpack.ml.action.TransportEvaluateDataFrameAction;
 import org.elasticsearch.xpack.ml.action.TransportFinalizeJobExecutionAction;
 import org.elasticsearch.xpack.ml.action.TransportFindFileStructureAction;
@@ -167,9 +167,9 @@
 import org.elasticsearch.xpack.ml.action.TransportGetModelSnapshotsAction;
 import org.elasticsearch.xpack.ml.action.TransportGetOverallBucketsAction;
 import org.elasticsearch.xpack.ml.action.TransportGetRecordsAction;
+import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsAction;
 import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsStatsAction;
 import org.elasticsearch.xpack.ml.action.TransportInternalInferModelAction;
-import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsAction;
 import org.elasticsearch.xpack.ml.action.TransportIsolateDatafeedAction;
 import org.elasticsearch.xpack.ml.action.TransportKillProcessAction;
 import org.elasticsearch.xpack.ml.action.TransportMlInfoAction;
@@ -258,8 +258,8 @@
 import org.elasticsearch.xpack.ml.rest.datafeeds.RestStartDatafeedAction;
 import org.elasticsearch.xpack.ml.rest.datafeeds.RestStopDatafeedAction;
 import org.elasticsearch.xpack.ml.rest.datafeeds.RestUpdateDatafeedAction;
+import org.elasticsearch.xpack.ml.rest.dataframe.RestExplainDataFrameAnalyticsAction;
 import org.elasticsearch.xpack.ml.rest.dataframe.RestDeleteDataFrameAnalyticsAction;
-import org.elasticsearch.xpack.ml.rest.dataframe.RestEstimateMemoryUsageAction;
 import org.elasticsearch.xpack.ml.rest.dataframe.RestEvaluateDataFrameAction;
 import org.elasticsearch.xpack.ml.rest.dataframe.RestGetDataFrameAnalyticsAction;
 import org.elasticsearch.xpack.ml.rest.dataframe.RestGetDataFrameAnalyticsStatsAction;
@@ -759,7 +759,7 @@ public List<RestHandler> getRestHandlers(Settings settings, RestController restC
             new RestStartDataFrameAnalyticsAction(restController),
             new RestStopDataFrameAnalyticsAction(restController),
             new RestEvaluateDataFrameAction(restController),
-            new RestEstimateMemoryUsageAction(restController),
+            new RestExplainDataFrameAnalyticsAction(restController),
             new RestGetTrainedModelsAction(restController),
             new RestDeleteTrainedModelAction(restController),
             new RestGetTrainedModelsStatsAction(restController)
@@ -829,7 +829,7 @@ public List<RestHandler> getRestHandlers(Settings settings, RestController restC
                 new ActionHandler<>(StartDataFrameAnalyticsAction.INSTANCE, TransportStartDataFrameAnalyticsAction.class),
                 new ActionHandler<>(StopDataFrameAnalyticsAction.INSTANCE, TransportStopDataFrameAnalyticsAction.class),
                 new ActionHandler<>(EvaluateDataFrameAction.INSTANCE, TransportEvaluateDataFrameAction.class),
-                new ActionHandler<>(EstimateMemoryUsageAction.INSTANCE, TransportEstimateMemoryUsageAction.class),
+                new ActionHandler<>(ExplainDataFrameAnalyticsAction.INSTANCE, TransportExplainDataFrameAnalyticsAction.class),
                 new ActionHandler<>(InternalInferModelAction.INSTANCE, TransportInternalInferModelAction.class),
                 new ActionHandler<>(GetTrainedModelsAction.INSTANCE, TransportGetTrainedModelsAction.class),
                 new ActionHandler<>(DeleteTrainedModelAction.INSTANCE, TransportDeleteTrainedModelAction.class),
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java
deleted file mode 100644
index a82db7c4f97f0..0000000000000
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License;
- * you may not use this file except in compliance with the Elastic License.
- */
-package org.elasticsearch.xpack.ml.action;
-
-import org.elasticsearch.action.ActionListener;
-import org.elasticsearch.action.ActionListenerResponseHandler;
-import org.elasticsearch.action.support.ActionFilters;
-import org.elasticsearch.action.support.HandledTransportAction;
-import org.elasticsearch.client.node.NodeClient;
-import org.elasticsearch.cluster.ClusterState;
-import org.elasticsearch.cluster.node.DiscoveryNode;
-import org.elasticsearch.cluster.service.ClusterService;
-import org.elasticsearch.common.inject.Inject;
-import org.elasticsearch.tasks.Task;
-import org.elasticsearch.transport.TransportService;
-import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction;
-import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction;
-import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
-import org.elasticsearch.xpack.ml.MachineLearning;
-import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory;
-import org.elasticsearch.xpack.ml.dataframe.process.MemoryUsageEstimationProcessManager;
-
-import java.util.Objects;
-import java.util.Optional;
-
-/**
- * Estimates memory usage for the given data frame analytics spec.
- * Redirects to a different node if the current node is *not* an ML node.
- */
-public class TransportEstimateMemoryUsageAction
-    extends HandledTransportAction<PutDataFrameAnalyticsAction.Request, EstimateMemoryUsageAction.Response> {
-
-    private final TransportService transportService;
-    private final ClusterService clusterService;
-    private final NodeClient client;
-    private final MemoryUsageEstimationProcessManager processManager;
-
-    @Inject
-    public TransportEstimateMemoryUsageAction(TransportService transportService,
-                                              ActionFilters actionFilters,
-                                              ClusterService clusterService,
-                                              NodeClient client,
-                                              MemoryUsageEstimationProcessManager processManager) {
-        super(EstimateMemoryUsageAction.NAME, transportService, actionFilters, PutDataFrameAnalyticsAction.Request::new);
-        this.transportService = transportService;
-        this.clusterService = Objects.requireNonNull(clusterService);
-        this.client = Objects.requireNonNull(client);
-        this.processManager = Objects.requireNonNull(processManager);
-    }
-
-    @Override
-    protected void doExecute(Task task,
-                             PutDataFrameAnalyticsAction.Request request,
-                             ActionListener<EstimateMemoryUsageAction.Response> listener) {
-        DiscoveryNode localNode = clusterService.localNode();
-        if (MachineLearning.isMlNode(localNode)) {
-            doEstimateMemoryUsage(createTaskIdForMemoryEstimation(task), request, listener);
-        } else {
-            redirectToMlNode(request, listener);
-        }
-    }
-
-    /**
-     * Creates unique task id for the memory estimation process. This id is useful when logging.
-     */
-    private static String createTaskIdForMemoryEstimation(Task task) {
-        return "memory_usage_estimation_" + task.getId();
-    }
-
-    /**
-     * Performs memory usage estimation.
-     * Memory usage estimation spawns an ML C++ process which is only available on ML nodes. That's why this method can only be called on
-     * the ML node.
-     */
-    private void doEstimateMemoryUsage(String taskId,
-                                       PutDataFrameAnalyticsAction.Request request,
-                                       ActionListener<EstimateMemoryUsageAction.Response> listener) {
-        DataFrameDataExtractorFactory.createForSourceIndices(
-            client,
-            taskId,
-            true, // We are not interested in first-time run validations here
-            request.getConfig(),
-            ActionListener.wrap(
-                dataExtractorFactory -> {
-                    processManager.runJobAsync(
-                        taskId,
-                        request.getConfig(),
-                        dataExtractorFactory,
-                        ActionListener.wrap(
-                            result -> listener.onResponse(
-                                new EstimateMemoryUsageAction.Response(
-                                    result.getExpectedMemoryWithoutDisk(), result.getExpectedMemoryWithDisk())),
-                            listener::onFailure
-                        )
-                    );
-                },
-                listener::onFailure
-            )
-        );
-    }
-
-    /**
-     * Finds the first available ML node in the cluster and redirects the request to this node.
-     */
-    private void redirectToMlNode(PutDataFrameAnalyticsAction.Request request,
-                                  ActionListener<EstimateMemoryUsageAction.Response> listener) {
-        Optional<DiscoveryNode> node = findMlNode(clusterService.state());
-        if (node.isPresent()) {
-            transportService.sendRequest(
-                node.get(), actionName, request, new ActionListenerResponseHandler<>(listener, EstimateMemoryUsageAction.Response::new));
-        } else {
-            listener.onFailure(ExceptionsHelper.badRequestException("No ML node to run on"));
-        }
-    }
-
-    /**
-     * Finds the first available ML node in the cluster state.
-     */
-    private static Optional<DiscoveryNode> findMlNode(ClusterState clusterState) {
-        for (DiscoveryNode node : clusterState.getNodes()) {
-            if (MachineLearning.isMlNode(node)) {
-                return Optional.of(node);
-            }
-        }
-        return Optional.empty();
-    }
-}
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java
new file mode 100644
index 0000000000000..7f19deb8d5ba0
--- /dev/null
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java
@@ -0,0 +1,156 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.ml.action;
+
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.action.ActionListenerResponseHandler;
+import org.elasticsearch.action.support.ActionFilters;
+import org.elasticsearch.action.support.HandledTransportAction;
+import org.elasticsearch.client.node.NodeClient;
+import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.cluster.service.ClusterService;
+import org.elasticsearch.common.collect.Tuple;
+import org.elasticsearch.common.inject.Inject;
+import org.elasticsearch.license.LicenseUtils;
+import org.elasticsearch.license.XPackLicenseState;
+import org.elasticsearch.tasks.Task;
+import org.elasticsearch.transport.TransportService;
+import org.elasticsearch.xpack.core.XPackField;
+import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction;
+import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation;
+import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
+import org.elasticsearch.xpack.ml.MachineLearning;
+import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory;
+import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetector;
+import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetectorFactory;
+import org.elasticsearch.xpack.ml.dataframe.process.MemoryUsageEstimationProcessManager;
+import org.elasticsearch.xpack.ml.extractor.ExtractedFields;
+
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+
+/**
+ * Provides explanations on aspects of the given data frame analytics spec like memory estimation, field selection, etc.
+ * Redirects to a different node if the current node is *not* an ML node.
+ */
+public class TransportExplainDataFrameAnalyticsAction
+    extends HandledTransportAction<PutDataFrameAnalyticsAction.Request, ExplainDataFrameAnalyticsAction.Response> {
+
+    private final XPackLicenseState licenseState;
+    private final TransportService transportService;
+    private final ClusterService clusterService;
+    private final NodeClient client;
+    private final MemoryUsageEstimationProcessManager processManager;
+
+    @Inject
+    public TransportExplainDataFrameAnalyticsAction(TransportService transportService,
+                                                    ActionFilters actionFilters,
+                                                    ClusterService clusterService,
+                                                    NodeClient client,
+                                                    XPackLicenseState licenseState,
+                                                    MemoryUsageEstimationProcessManager processManager) {
+        super(ExplainDataFrameAnalyticsAction.NAME, transportService, actionFilters, PutDataFrameAnalyticsAction.Request::new);
+        this.transportService = transportService;
+        this.clusterService = Objects.requireNonNull(clusterService);
+        this.client = Objects.requireNonNull(client);
+        this.licenseState = licenseState;
+        this.processManager = Objects.requireNonNull(processManager);
+    }
+
+    @Override
+    protected void doExecute(Task task,
+                             PutDataFrameAnalyticsAction.Request request,
+                             ActionListener<ExplainDataFrameAnalyticsAction.Response> listener) {
+        if (licenseState.isMachineLearningAllowed() == false) {
+            listener.onFailure(LicenseUtils.newComplianceException(XPackField.MACHINE_LEARNING));
+            return;
+        }
+
+        DiscoveryNode localNode = clusterService.localNode();
+        if (MachineLearning.isMlNode(localNode)) {
+            explain(task, request, listener);
+        } else {
+            redirectToMlNode(request, listener);
+        }
+    }
+
+    private void explain(Task task, PutDataFrameAnalyticsAction.Request request,
+                         ActionListener<ExplainDataFrameAnalyticsAction.Response> listener) {
+        ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client);
+        extractedFieldsDetectorFactory.createFromSource(request.getConfig(), true, ActionListener.wrap(
+            extractedFieldsDetector -> {
+                explain(task, request, extractedFieldsDetector, listener);
+            },
+            listener::onFailure
+        ));
+    }
+
+    private void explain(Task task, PutDataFrameAnalyticsAction.Request request, ExtractedFieldsDetector extractedFieldsDetector,
+                         ActionListener<ExplainDataFrameAnalyticsAction.Response> listener) {
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
+
+        ActionListener<MemoryEstimation> memoryEstimationListener = ActionListener.wrap(
+            memoryEstimation -> listener.onResponse(new ExplainDataFrameAnalyticsAction.Response(fieldExtraction.v2(), memoryEstimation)),
+            listener::onFailure
+        );
+
+        estimateMemoryUsage(task, request, fieldExtraction.v1(), memoryEstimationListener);
+    }
+
+    /**
+     * Performs memory usage estimation.
+     * Memory usage estimation spawns an ML C++ process which is only available on ML nodes. That's why this method can only be called on
+     * the ML node.
+     */
+    private void estimateMemoryUsage(Task task,
+                                     PutDataFrameAnalyticsAction.Request request,
+                                     ExtractedFields extractedFields,
+                                     ActionListener<MemoryEstimation> listener) {
+        final String estimateMemoryTaskId = "memory_usage_estimation_" + task.getId();
+        DataFrameDataExtractorFactory extractorFactory = DataFrameDataExtractorFactory.createForSourceIndices(
+            client, estimateMemoryTaskId, request.getConfig(), extractedFields);
+        processManager.runJobAsync(
+            estimateMemoryTaskId,
+            request.getConfig(),
+            extractorFactory,
+            ActionListener.wrap(
+                result -> listener.onResponse(
+                    new MemoryEstimation(result.getExpectedMemoryWithoutDisk(), result.getExpectedMemoryWithDisk())),
+                listener::onFailure
+            )
+        );
+    }
+
+    /**
+     * Finds the first available ML node in the cluster and redirects the request to this node.
+     */
+    private void redirectToMlNode(PutDataFrameAnalyticsAction.Request request,
+                                  ActionListener<ExplainDataFrameAnalyticsAction.Response> listener) {
+        Optional<DiscoveryNode> node = findMlNode(clusterService.state());
+        if (node.isPresent()) {
+            transportService.sendRequest(node.get(), actionName, request,
+                new ActionListenerResponseHandler<>(listener, ExplainDataFrameAnalyticsAction.Response::new));
+        } else {
+            listener.onFailure(ExceptionsHelper.badRequestException("No ML node to run on"));
+        }
+    }
+
+    /**
+     * Finds the first available ML node in the cluster state.
+     */
+    private static Optional<DiscoveryNode> findMlNode(ClusterState clusterState) {
+        for (DiscoveryNode node : clusterState.getNodes()) {
+            if (MachineLearning.isMlNode(node)) {
+                return Optional.of(node);
+            }
+        }
+        return Optional.empty();
+    }
+}
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java
index 1740a7fb53247..af67750ee6dc5 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java
@@ -29,6 +29,7 @@
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.index.IndexNotFoundException;
 import org.elasticsearch.license.LicenseUtils;
@@ -47,7 +48,7 @@
 import org.elasticsearch.xpack.core.XPackField;
 import org.elasticsearch.xpack.core.ml.MlMetadata;
 import org.elasticsearch.xpack.core.ml.MlTasks;
-import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction;
+import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction;
 import org.elasticsearch.xpack.core.ml.action.GetDataFrameAnalyticsStatsAction;
 import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction;
 import org.elasticsearch.xpack.core.ml.action.StartDataFrameAnalyticsAction;
@@ -66,6 +67,7 @@
 import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory;
 import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetectorFactory;
 import org.elasticsearch.xpack.ml.dataframe.persistence.DataFrameAnalyticsConfigProvider;
+import org.elasticsearch.xpack.ml.extractor.ExtractedFields;
 import org.elasticsearch.xpack.ml.job.JobNodeSelector;
 import org.elasticsearch.xpack.ml.notifications.DataFrameAnalyticsAuditor;
 import org.elasticsearch.xpack.ml.process.MlMemoryTracker;
@@ -190,20 +192,18 @@ private void estimateMemoryUsageAndUpdateMemoryTracker(StartContext startContext
         final String jobId = startContext.config.getId();
 
         // Tell the job tracker to refresh the memory requirement for this job and all other jobs that have persistent tasks
-        ActionListener<EstimateMemoryUsageAction.Response> estimateMemoryUsageListener = ActionListener.wrap(
-            estimateMemoryUsageResponse -> {
-                auditor.info(
-                    jobId,
-                    Messages.getMessage(
-                        Messages.DATA_FRAME_ANALYTICS_AUDIT_ESTIMATED_MEMORY_USAGE,
-                        estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk()));
+        ActionListener<ExplainDataFrameAnalyticsAction.Response> explainListener = ActionListener.wrap(
+            explainResponse -> {
+                ByteSizeValue expectedMemoryWithoutDisk = explainResponse.getMemoryEstimation().getExpectedMemoryWithoutDisk();
+                auditor.info(jobId,
+                    Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_AUDIT_ESTIMATED_MEMORY_USAGE, expectedMemoryWithoutDisk));
                 // Validate that model memory limit is sufficient to run the analysis
                 if (startContext.config.getModelMemoryLimit()
-                    .compareTo(estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk()) < 0) {
+                    .compareTo(expectedMemoryWithoutDisk) < 0) {
                     ElasticsearchStatusException e =
                         ExceptionsHelper.badRequestException(
                             "Cannot start because the configured model memory limit [{}] is lower than the expected memory usage [{}]",
-                            startContext.config.getModelMemoryLimit(), estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk());
+                            startContext.config.getModelMemoryLimit(), expectedMemoryWithoutDisk);
                     listener.onFailure(e);
                     return;
                 }
@@ -215,13 +215,13 @@ private void estimateMemoryUsageAndUpdateMemoryTracker(StartContext startContext
             listener::onFailure
         );
 
-        PutDataFrameAnalyticsAction.Request estimateMemoryUsageRequest = new PutDataFrameAnalyticsAction.Request(startContext.config);
+        PutDataFrameAnalyticsAction.Request explainRequest = new PutDataFrameAnalyticsAction.Request(startContext.config);
         ClientHelper.executeAsyncWithOrigin(
             client,
             ClientHelper.ML_ORIGIN,
-            EstimateMemoryUsageAction.INSTANCE,
-            estimateMemoryUsageRequest,
-            estimateMemoryUsageListener);
+            ExplainDataFrameAnalyticsAction.INSTANCE,
+            explainRequest,
+            explainListener);
 
     }
 
@@ -277,7 +277,11 @@ private void getStartContext(String id, ActionListener<StartContext> finalListen
                 // Validate extraction is possible
                 boolean isTaskRestarting = startContext.startingState != DataFrameAnalyticsTask.StartingState.FIRST_TIME;
                 new ExtractedFieldsDetectorFactory(client).createFromSource(startContext.config, isTaskRestarting, ActionListener.wrap(
-                    extractedFieldsDetector -> toValidateDestEmptyListener.onResponse(startContext), finalListener::onFailure));
+                    extractedFieldsDetector -> {
+                        startContext.extractedFields = extractedFieldsDetector.detect().v1();
+                        toValidateDestEmptyListener.onResponse(startContext);
+                    },
+                    finalListener::onFailure));
             },
             finalListener::onFailure
         );
@@ -294,33 +298,27 @@ private void getStartContext(String id, ActionListener<StartContext> finalListen
     }
 
     private void validateSourceIndexHasRows(StartContext startContext, ActionListener<StartContext> listener) {
-        boolean isTaskRestarting = startContext.startingState != DataFrameAnalyticsTask.StartingState.FIRST_TIME;
-        DataFrameDataExtractorFactory.createForSourceIndices(client,
+        DataFrameDataExtractorFactory extractorFactory = DataFrameDataExtractorFactory.createForSourceIndices(client,
             "validate_source_index_has_rows-" + startContext.config.getId(),
-            isTaskRestarting,
             startContext.config,
-            ActionListener.wrap(
-                dataFrameDataExtractorFactory ->
-                    dataFrameDataExtractorFactory
-                        .newExtractor(false)
-                        .collectDataSummaryAsync(ActionListener.wrap(
-                            dataSummary -> {
-                                if (dataSummary.rows == 0) {
-                                    listener.onFailure(ExceptionsHelper.badRequestException(
-                                        "Unable to start {} as no documents in the source indices [{}] contained all the fields "
-                                            + "selected for analysis. If you are relying on automatic field selection then there are "
-                                            + "currently mapped fields that do not exist in any indexed documents, and you will have "
-                                            + "to switch to explicit field selection and include only fields that exist in indexed "
-                                            + "documents.",
-                                        startContext.config.getId(),
-                                        Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex())
-                                    ));
-                                } else {
-                                    listener.onResponse(startContext);
-                                }
-                            },
-                            listener::onFailure
-                        )),
+            startContext.extractedFields);
+        extractorFactory.newExtractor(false)
+            .collectDataSummaryAsync(ActionListener.wrap(
+                dataSummary -> {
+                    if (dataSummary.rows == 0) {
+                        listener.onFailure(ExceptionsHelper.badRequestException(
+                            "Unable to start {} as no documents in the source indices [{}] contained all the fields "
+                                + "selected for analysis. If you are relying on automatic field selection then there are "
+                                + "currently mapped fields that do not exist in any indexed documents, and you will have "
+                                + "to switch to explicit field selection and include only fields that exist in indexed "
+                                + "documents.",
+                            startContext.config.getId(),
+                            Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex())
+                        ));
+                    } else {
+                        listener.onResponse(startContext);
+                    }
+                },
                 listener::onFailure
             ));
     }
@@ -402,6 +400,7 @@ private static class StartContext {
         private final DataFrameAnalyticsConfig config;
         private final List<PhaseProgress> progressOnStart;
         private final DataFrameAnalyticsTask.StartingState startingState;
+        private volatile ExtractedFields extractedFields;
 
         private StartContext(DataFrameAnalyticsConfig config, List<PhaseProgress> progressOnStart) {
             this.config = config;
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java
index ce21973ca9130..f8afd22909831 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java
@@ -29,7 +29,7 @@ public class DataFrameDataExtractorFactory {
     private final Map<String, String> headers;
     private final boolean includeRowsWithMissingValues;
 
-    private DataFrameDataExtractorFactory(Client client, String analyticsId, List<String> indices, ExtractedFields extractedFields,
+    public DataFrameDataExtractorFactory(Client client, String analyticsId, List<String> indices, ExtractedFields extractedFields,
                                           Map<String, String> headers, boolean includeRowsWithMissingValues) {
         this.client = Objects.requireNonNull(client);
         this.analyticsId = Objects.requireNonNull(analyticsId);
@@ -66,32 +66,19 @@ private QueryBuilder allExtractedFieldsExistQuery() {
     }
 
     /**
-     * Validate and create a new extractor factory
+     * Create a new extractor factory
      *
      * The source index must exist and contain at least 1 compatible field or validations will fail.
      *
      * @param client ES Client used to make calls against the cluster
      * @param taskId The task id
-     * @param isTaskRestarting Whether the task is restarting or it is running for the first time
      * @param config The config from which to create the extractor factory
-     * @param listener The listener to notify on creation or failure
+     * @param extractedFields The fields to extract
      */
-    public static void createForSourceIndices(Client client,
-                                              String taskId,
-                                              boolean isTaskRestarting,
-                                              DataFrameAnalyticsConfig config,
-                                              ActionListener<DataFrameDataExtractorFactory> listener) {
-        ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client);
-        extractedFieldsDetectorFactory.createFromSource(config, isTaskRestarting, ActionListener.wrap(
-            extractedFieldsDetector -> {
-                ExtractedFields extractedFields = extractedFieldsDetector.detect();
-                DataFrameDataExtractorFactory extractorFactory = new DataFrameDataExtractorFactory(client, taskId,
-                    Arrays.asList(config.getSource().getIndex()), extractedFields, config.getHeaders(),
-                    config.getAnalysis().supportsMissingValues());
-                listener.onResponse(extractorFactory);
-            },
-            listener::onFailure
-        ));
+    public static DataFrameDataExtractorFactory createForSourceIndices(Client client, String taskId, DataFrameAnalyticsConfig config,
+                                                                       ExtractedFields extractedFields) {
+        return new DataFrameDataExtractorFactory(client, taskId, Arrays.asList(config.getSource().getIndex()), extractedFields,
+            config.getHeaders(), config.getAnalysis().supportsMissingValues());
     }
 
     /**
@@ -111,7 +98,7 @@ public static void createForDestinationIndex(Client client,
         ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client);
         extractedFieldsDetectorFactory.createFromDest(config, isTaskRestarting, ActionListener.wrap(
             extractedFieldsDetector -> {
-                ExtractedFields extractedFields = extractedFieldsDetector.detect();
+                ExtractedFields extractedFields = extractedFieldsDetector.detect().v1();
                 DataFrameDataExtractorFactory extractorFactory = new DataFrameDataExtractorFactory(client, config.getId(),
                     Collections.singletonList(config.getDest().getIndex()), extractedFields, config.getHeaders(),
                     config.getAnalysis().supportsMissingValues());
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java
index 5d94b57aca584..682cc94433c60 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java
@@ -11,6 +11,7 @@
 import org.elasticsearch.action.fieldcaps.FieldCapabilities;
 import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
 import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.collect.Tuple;
 import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.mapper.BooleanFieldMapper;
@@ -19,6 +20,7 @@
 import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsDest;
 import org.elasticsearch.xpack.core.ml.dataframe.analyses.RequiredField;
 import org.elasticsearch.xpack.core.ml.dataframe.analyses.Types;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection;
 import org.elasticsearch.xpack.core.ml.job.messages.Messages;
 import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
 import org.elasticsearch.xpack.core.ml.utils.NameResolver;
@@ -29,13 +31,12 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.HashSet;
+import java.util.Comparator;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
-import java.util.Optional;
 import java.util.Set;
 import java.util.TreeSet;
 import java.util.stream.Collectors;
@@ -57,9 +58,8 @@ public class ExtractedFieldsDetector {
     private final FieldCapabilitiesResponse fieldCapabilitiesResponse;
     private final Map<String, Long> fieldCardinalities;
 
-    ExtractedFieldsDetector(String[] index, DataFrameAnalyticsConfig config, boolean isTaskRestarting,
-                            int docValueFieldsLimit, FieldCapabilitiesResponse fieldCapabilitiesResponse,
-                            Map<String, Long> fieldCardinalities) {
+    ExtractedFieldsDetector(String[] index, DataFrameAnalyticsConfig config, boolean isTaskRestarting, int docValueFieldsLimit,
+                            FieldCapabilitiesResponse fieldCapabilitiesResponse, Map<String, Long> fieldCardinalities) {
         this.index = Objects.requireNonNull(index);
         this.config = Objects.requireNonNull(config);
         this.isTaskRestarting = isTaskRestarting;
@@ -68,37 +68,52 @@ public class ExtractedFieldsDetector {
         this.fieldCardinalities = Objects.requireNonNull(fieldCardinalities);
     }
 
-    public ExtractedFields detect() {
-        Set<String> fields = getIncludedFields();
-
-        if (fields.isEmpty()) {
-            throw ExceptionsHelper.badRequestException("No compatible fields could be detected in index {}. Supported types are {}.",
-                Arrays.toString(index),
-                getSupportedTypes());
-        }
-
-        checkNoIgnoredFields(fields);
+    public Tuple<ExtractedFields, List<FieldSelection>> detect() {
+        TreeSet<FieldSelection> fieldSelection = new TreeSet<>(Comparator.comparing(FieldSelection::getName));
+        Set<String> fields = getIncludedFields(fieldSelection);
         checkFieldsHaveCompatibleTypes(fields);
         checkRequiredFields(fields);
         checkFieldsWithCardinalityLimit();
-        return detectExtractedFields(fields);
+        ExtractedFields extractedFields = detectExtractedFields(fields, fieldSelection);
+        addIncludedFields(extractedFields, fieldSelection);
+
+        return Tuple.tuple(extractedFields, Collections.unmodifiableList(new ArrayList<>(fieldSelection)));
     }
 
-    private Set<String> getIncludedFields() {
-        Set<String> fields = new HashSet<>(fieldCapabilitiesResponse.get().keySet());
+    private Set<String> getIncludedFields(Set<FieldSelection> fieldSelection) {
+        Set<String> fields = new TreeSet<>(fieldCapabilitiesResponse.get().keySet());
+        fields.removeAll(IGNORE_FIELDS);
         checkResultsFieldIsNotPresent();
         removeFieldsUnderResultsField(fields);
         FetchSourceContext analyzedFields = config.getAnalyzedFields();
 
         // If the user has not explicitly included fields we'll include all compatible fields
         if (analyzedFields == null || analyzedFields.includes().length == 0) {
-            fields.removeAll(IGNORE_FIELDS);
-            removeFieldsWithIncompatibleTypes(fields);
+            removeFieldsWithIncompatibleTypes(fields, fieldSelection);
         }
-        includeAndExcludeFields(fields);
+        includeAndExcludeFields(fields, fieldSelection);
+
+        if (fields.isEmpty()) {
+            throw ExceptionsHelper.badRequestException("No compatible fields could be detected in index {}. Supported types are {}.",
+                Arrays.toString(index),
+                getSupportedTypes());
+        }
+
         return fields;
     }
 
+    private void removeFieldsUnderResultsField(Set<String> fields) {
+        String resultsField = config.getDest().getResultsField();
+        Iterator<String> fieldsIterator = fields.iterator();
+        while (fieldsIterator.hasNext()) {
+            String field = fieldsIterator.next();
+            if (field.startsWith(resultsField + ".")) {
+                fieldsIterator.remove();
+            }
+        }
+        fields.removeIf(field -> field.startsWith(resultsField + "."));
+    }
+
     private void checkResultsFieldIsNotPresent() {
         // If the task is restarting we do not mind the index containing the results field, we will overwrite all docs
         if (isTaskRestarting) {
@@ -117,16 +132,21 @@ private void checkResultsFieldIsNotPresent() {
         }
     }
 
-    private void removeFieldsUnderResultsField(Set<String> fields) {
-        // Ignore fields under the results object
-        fields.removeIf(field -> field.startsWith(config.getDest().getResultsField() + "."));
+    private void addExcludedField(String field, String reason, Set<FieldSelection> fieldSelection) {
+        fieldSelection.add(FieldSelection.excluded(field, getMappingTypes(field), reason));
+    }
+
+    private Set<String> getMappingTypes(String field) {
+        Map<String, FieldCapabilities> fieldCaps = fieldCapabilitiesResponse.getField(field);
+        return fieldCaps == null ? Collections.emptySet() : fieldCaps.keySet();
     }
 
-    private void removeFieldsWithIncompatibleTypes(Set<String> fields) {
+    private void removeFieldsWithIncompatibleTypes(Set<String> fields, Set<FieldSelection> fieldSelection) {
         Iterator<String> fieldsIterator = fields.iterator();
         while (fieldsIterator.hasNext()) {
             String field = fieldsIterator.next();
             if (hasCompatibleType(field) == false) {
+                addExcludedField(field, "unsupported type; supported types are " + getSupportedTypes(), fieldSelection);
                 fieldsIterator.remove();
             }
         }
@@ -163,7 +183,7 @@ private Set<String> getSupportedTypes() {
         return supportedTypes;
     }
 
-    private void includeAndExcludeFields(Set<String> fields) {
+    private void includeAndExcludeFields(Set<String> fields, Set<FieldSelection> fieldSelection) {
         FetchSourceContext analyzedFields = config.getAnalyzedFields();
         if (analyzedFields == null) {
             return;
@@ -188,18 +208,30 @@ private void includeAndExcludeFields(Set<String> fields) {
                     Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_BAD_FIELD_FILTER, ex)))
                 .expand(excludes, true);
 
-            fields.retainAll(includedSet);
-            fields.removeAll(excludedSet);
+            applyIncludesExcludes(fields, includedSet, excludedSet, fieldSelection);
         } catch (ResourceNotFoundException ex) {
             // Re-wrap our exception so that we throw the same exception type when there are no fields.
             throw ExceptionsHelper.badRequestException(ex.getMessage());
         }
     }
 
-    private void checkNoIgnoredFields(Set<String> fields) {
-        Optional<String> ignoreField = IGNORE_FIELDS.stream().filter(fields::contains).findFirst();
-        if (ignoreField.isPresent()) {
-            throw ExceptionsHelper.badRequestException("field [{}] cannot be analyzed", ignoreField.get());
+    private void applyIncludesExcludes(Set<String> fields, Set<String> includes, Set<String> excludes,
+                                       Set<FieldSelection> fieldSelection) {
+        Iterator<String> fieldsIterator = fields.iterator();
+        while (fieldsIterator.hasNext()) {
+            String field = fieldsIterator.next();
+            if (includes.contains(field)) {
+                if (IGNORE_FIELDS.contains(field)) {
+                    throw ExceptionsHelper.badRequestException("field [{}] cannot be analyzed", field);
+                }
+            } else {
+                fieldsIterator.remove();
+                addExcludedField(field, "field not in includes list", fieldSelection);
+            }
+            if (excludes.contains(field)) {
+                fieldsIterator.remove();
+                addExcludedField(field, "field in excludes list", fieldSelection);
+            }
         }
     }
 
@@ -247,13 +279,10 @@ private void checkFieldsWithCardinalityLimit() {
         }
     }
 
-    private ExtractedFields detectExtractedFields(Set<String> fields) {
-        List<String> sortedFields = new ArrayList<>(fields);
-        // We sort the fields to ensure the checksum for each document is deterministic
-        Collections.sort(sortedFields);
-        ExtractedFields extractedFields = ExtractedFields.build(sortedFields, Collections.emptySet(), fieldCapabilitiesResponse);
+    private ExtractedFields detectExtractedFields(Set<String> fields, Set<FieldSelection> fieldSelection) {
+        ExtractedFields extractedFields = ExtractedFields.build(fields, Collections.emptySet(), fieldCapabilitiesResponse);
         boolean preferSource = extractedFields.getDocValueFields().size() > docValueFieldsLimit;
-        extractedFields = deduplicateMultiFields(extractedFields, preferSource);
+        extractedFields = deduplicateMultiFields(extractedFields, preferSource, fieldSelection);
         if (preferSource) {
             extractedFields = fetchFromSourceIfSupported(extractedFields);
             if (extractedFields.getDocValueFields().size() > docValueFieldsLimit) {
@@ -266,7 +295,8 @@ private ExtractedFields detectExtractedFields(Set<String> fields) {
         return extractedFields;
     }
 
-    private ExtractedFields deduplicateMultiFields(ExtractedFields extractedFields, boolean preferSource) {
+    private ExtractedFields deduplicateMultiFields(ExtractedFields extractedFields, boolean preferSource,
+                                                   Set<FieldSelection> fieldSelection) {
         Set<String> requiredFields = config.getAnalysis().getRequiredFields().stream().map(RequiredField::getName)
             .collect(Collectors.toSet());
         Map<String, ExtractedField> nameOrParentToField = new LinkedHashMap<>();
@@ -276,43 +306,53 @@ private ExtractedFields deduplicateMultiFields(ExtractedFields extractedFields,
             if (existingField != null) {
                 ExtractedField parent = currentField.isMultiField() ? existingField : currentField;
                 ExtractedField multiField = currentField.isMultiField() ? currentField : existingField;
-                nameOrParentToField.put(nameOrParent, chooseMultiFieldOrParent(preferSource, requiredFields, parent, multiField));
+                nameOrParentToField.put(nameOrParent,
+                    chooseMultiFieldOrParent(preferSource, requiredFields, parent, multiField, fieldSelection));
             }
         }
         return new ExtractedFields(new ArrayList<>(nameOrParentToField.values()));
     }
 
-    private ExtractedField chooseMultiFieldOrParent(boolean preferSource, Set<String> requiredFields,
-                                                    ExtractedField parent, ExtractedField multiField) {
+    private ExtractedField chooseMultiFieldOrParent(boolean preferSource, Set<String> requiredFields, ExtractedField parent,
+                                                    ExtractedField multiField, Set<FieldSelection> fieldSelection) {
         // Check requirements first
         if (requiredFields.contains(parent.getName())) {
+            addExcludedField(multiField.getName(), "[" + parent.getName() + "] is required instead", fieldSelection);
             return parent;
         }
         if (requiredFields.contains(multiField.getName())) {
+            addExcludedField(parent.getName(), "[" + multiField.getName() + "] is required instead", fieldSelection);
             return multiField;
         }
 
         // If both are multi-fields it means there are several. In this case parent is the previous multi-field
         // we selected. We'll just keep that.
         if (parent.isMultiField() && multiField.isMultiField()) {
+            addExcludedField(multiField.getName(), "[" + parent.getName() + "] came first", fieldSelection);
             return parent;
         }
 
         // If we prefer source only the parent may support it. If it does we pick it immediately.
         if (preferSource && parent.supportsFromSource()) {
+            addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because it supports fetching from source",
+                fieldSelection);
             return parent;
         }
 
         // If any of the two is a doc_value field let's prefer it as it'd support aggregations.
         // We check the parent first as it'd be a shorter field name.
         if (parent.getMethod() == ExtractedField.Method.DOC_VALUE) {
+            addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because it is aggregatable", fieldSelection);
             return parent;
         }
         if (multiField.getMethod() == ExtractedField.Method.DOC_VALUE) {
+            addExcludedField(parent.getName(), "[" + multiField.getName() + "] is preferred because it is aggregatable", fieldSelection);
             return multiField;
         }
 
         // None is aggregatable. Let's pick the parent for its shorter name.
+        addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because none of the multi-fields are aggregatable",
+            fieldSelection);
         return parent;
     }
 
@@ -343,6 +383,26 @@ private ExtractedFields fetchBooleanFieldsAsIntegers(ExtractedFields extractedFi
         return new ExtractedFields(adjusted);
     }
 
+    private void addIncludedFields(ExtractedFields extractedFields, Set<FieldSelection> fieldSelection) {
+        Set<String> requiredFields = config.getAnalysis().getRequiredFields().stream().map(RequiredField::getName)
+            .collect(Collectors.toSet());
+        Set<String> categoricalFields = getCategoricalFields(extractedFields);
+        for (ExtractedField includedField : extractedFields.getAllFields()) {
+            FieldSelection.FeatureType featureType = categoricalFields.contains(includedField.getName()) ?
+                FieldSelection.FeatureType.CATEGORICAL : FieldSelection.FeatureType.NUMERICAL;
+            fieldSelection.add(FieldSelection.included(includedField.getName(), includedField.getTypes(),
+                requiredFields.contains(includedField.getName()), featureType));
+        }
+    }
+
+    private Set<String> getCategoricalFields(ExtractedFields extractedFields) {
+        return extractedFields.getAllFields().stream()
+            .filter(extractedField -> config.getAnalysis().getAllowedCategoricalTypes(extractedField.getName())
+                .containsAll(extractedField.getTypes()))
+            .map(ExtractedField::getName)
+            .collect(Collectors.toSet());
+    }
+
     private static boolean isBoolean(Set<String> types) {
         return types.size() == 1 && types.contains(BooleanFieldMapper.CONTENT_TYPE);
     }
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java
index 2e5189eb249eb..6740f8d4d34ca 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java
@@ -100,9 +100,9 @@ private MemoryUsageEstimationResult runJob(String jobId,
         } finally {
             process.consumeAndCloseOutputStream();
             try {
-                LOGGER.info("[{}] Closing process", jobId);
+                LOGGER.debug("[{}] Closing process", jobId);
                 process.close();
-                LOGGER.info("[{}] Closed process", jobId);
+                LOGGER.debug("[{}] Closed process", jobId);
             } catch (Exception e) {
                 String errorMsg =
                     new ParameterizedMessage(
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java
deleted file mode 100644
index 25f2bcb4bb872..0000000000000
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License;
- * you may not use this file except in compliance with the Elastic License.
- */
-package org.elasticsearch.xpack.ml.rest.dataframe;
-
-import org.elasticsearch.client.node.NodeClient;
-import org.elasticsearch.rest.BaseRestHandler;
-import org.elasticsearch.rest.RestController;
-import org.elasticsearch.rest.RestRequest;
-import org.elasticsearch.rest.action.RestToXContentListener;
-import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction;
-import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction;
-import org.elasticsearch.xpack.ml.MachineLearning;
-
-import java.io.IOException;
-
-public class RestEstimateMemoryUsageAction extends BaseRestHandler {
-
-    public RestEstimateMemoryUsageAction(RestController controller) {
-        controller.registerHandler(
-            RestRequest.Method.POST,
-            MachineLearning.BASE_PATH + "data_frame/analytics/_estimate_memory_usage", this);
-    }
-
-    @Override
-    public String getName() {
-        return "ml_estimate_memory_usage_action";
-    }
-
-    @Override
-    protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException {
-        PutDataFrameAnalyticsAction.Request request =
-            PutDataFrameAnalyticsAction.Request.parseRequestForMemoryEstimation(restRequest.contentOrSourceParamParser());
-        return channel -> client.execute(EstimateMemoryUsageAction.INSTANCE, request, new RestToXContentListener<>(channel));
-    }
-}
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java
new file mode 100644
index 0000000000000..b16bf7b3efbf1
--- /dev/null
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.ml.rest.dataframe;
+
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.client.node.NodeClient;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.rest.BaseRestHandler;
+import org.elasticsearch.rest.RestController;
+import org.elasticsearch.rest.RestRequest;
+import org.elasticsearch.rest.action.RestToXContentListener;
+import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction;
+import org.elasticsearch.xpack.core.ml.action.GetDataFrameAnalyticsAction;
+import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction;
+import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
+import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
+import org.elasticsearch.xpack.ml.MachineLearning;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class RestExplainDataFrameAnalyticsAction extends BaseRestHandler {
+
+    public RestExplainDataFrameAnalyticsAction(RestController controller) {
+        controller.registerHandler(RestRequest.Method.GET, MachineLearning.BASE_PATH + "data_frame/analytics/_explain", this);
+        controller.registerHandler(RestRequest.Method.POST, MachineLearning.BASE_PATH + "data_frame/analytics/_explain", this);
+        controller.registerHandler(RestRequest.Method.GET, MachineLearning.BASE_PATH + "data_frame/analytics/{"
+            + DataFrameAnalyticsConfig.ID.getPreferredName() + "}/_explain", this);
+        controller.registerHandler(RestRequest.Method.POST, MachineLearning.BASE_PATH + "data_frame/analytics/{"
+            + DataFrameAnalyticsConfig.ID.getPreferredName() + "}/_explain", this);
+    }
+
+    @Override
+    public String getName() {
+        return "ml_explain_data_frame_analytics_action";
+    }
+
+    @Override
+    protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException {
+        final String jobId = restRequest.param(DataFrameAnalyticsConfig.ID.getPreferredName());
+
+        if (Strings.isNullOrEmpty(jobId) && restRequest.hasContentOrSourceParam() == false) {
+            throw ExceptionsHelper.badRequestException("Please provide a job [{}] or the config object",
+                DataFrameAnalyticsConfig.ID.getPreferredName());
+        }
+
+        if (Strings.isNullOrEmpty(jobId) == false && restRequest.hasContentOrSourceParam()) {
+            throw ExceptionsHelper.badRequestException("Please provide either a job [{}] or the config object but not both",
+                DataFrameAnalyticsConfig.ID.getPreferredName());
+        }
+
+        // We need to consume the body before returning
+        PutDataFrameAnalyticsAction.Request explainRequestFromBody = Strings.isNullOrEmpty(jobId) ?
+            PutDataFrameAnalyticsAction.Request.parseRequestForExplain(restRequest.contentOrSourceParamParser()) : null;
+
+        return channel -> {
+            RestToXContentListener<ExplainDataFrameAnalyticsAction.Response> listener = new RestToXContentListener<>(channel);
+
+            if (explainRequestFromBody != null) {
+                client.execute(ExplainDataFrameAnalyticsAction.INSTANCE, explainRequestFromBody, listener);
+            } else {
+                GetDataFrameAnalyticsAction.Request getRequest = new GetDataFrameAnalyticsAction.Request(jobId);
+                getRequest.setAllowNoResources(false);
+                client.execute(GetDataFrameAnalyticsAction.INSTANCE, getRequest, ActionListener.wrap(
+                    getResponse -> {
+                        List<DataFrameAnalyticsConfig> jobs = getResponse.getResources().results();
+                        if (jobs.size() > 1) {
+                            listener.onFailure(ExceptionsHelper.badRequestException("expected only one config but matched {}",
+                                jobs.stream().map(DataFrameAnalyticsConfig::getId).collect(Collectors.toList())));
+                        } else {
+                            PutDataFrameAnalyticsAction.Request explainRequest = new PutDataFrameAnalyticsAction.Request(jobs.get(0));
+                            client.execute(ExplainDataFrameAnalyticsAction.INSTANCE, explainRequest, listener);
+                        }
+                    },
+                    listener::onFailure
+                ));
+            }
+        };
+    }
+}
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java
index 8f33c9bfbbfb0..5f7bd650a1cd8 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java
@@ -8,6 +8,7 @@
 import org.elasticsearch.ElasticsearchStatusException;
 import org.elasticsearch.action.fieldcaps.FieldCapabilities;
 import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
+import org.elasticsearch.common.collect.Tuple;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
 import org.elasticsearch.test.ESTestCase;
@@ -17,6 +18,7 @@
 import org.elasticsearch.xpack.core.ml.dataframe.analyses.Classification;
 import org.elasticsearch.xpack.core.ml.dataframe.analyses.OutlierDetection;
 import org.elasticsearch.xpack.core.ml.dataframe.analyses.Regression;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection;
 import org.elasticsearch.xpack.ml.extractor.ExtractedField;
 import org.elasticsearch.xpack.ml.extractor.ExtractedFields;
 import org.elasticsearch.xpack.ml.test.SearchHitBuilder;
@@ -25,6 +27,7 @@
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
@@ -48,12 +51,15 @@ public void testDetect_GivenFloatField() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        List<ExtractedField> allFields = extractedFields.getAllFields();
+        List<ExtractedField> allFields = fieldExtraction.v1().getAllFields();
         assertThat(allFields.size(), equalTo(1));
         assertThat(allFields.get(0).getName(), equalTo("some_float"));
         assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
+
+        assertFieldSelectionContains(fieldExtraction.v2(),
+            FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL));
     }
 
     public void testDetect_GivenNumericFieldWithMultipleTypes() {
@@ -63,12 +69,16 @@ public void testDetect_GivenNumericFieldWithMultipleTypes() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        List<ExtractedField> allFields = extractedFields.getAllFields();
+        List<ExtractedField> allFields = fieldExtraction.v1().getAllFields();
         assertThat(allFields.size(), equalTo(1));
         assertThat(allFields.get(0).getName(), equalTo("some_number"));
         assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
+
+        assertFieldSelectionContains(fieldExtraction.v2(), FieldSelection.included("some_number",
+            new HashSet<>(Arrays.asList("long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float")), false,
+            FieldSelection.FeatureType.NUMERICAL));
     }
 
     public void testDetect_GivenOutlierDetectionAndNonNumericField() {
@@ -105,14 +115,22 @@ public void testDetect_GivenOutlierDetectionAndMultipleFields() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        List<ExtractedField> allFields = extractedFields.getAllFields();
+        List<ExtractedField> allFields = fieldExtraction.v1().getAllFields();
         assertThat(allFields.size(), equalTo(3));
         assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toSet()),
             containsInAnyOrder("some_float", "some_long", "some_boolean"));
         assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
             contains(equalTo(ExtractedField.Method.DOC_VALUE)));
+
+        assertFieldSelectionContains(fieldExtraction.v2(),
+            FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL),
+            FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
+            FieldSelection.excluded("some_keyword", Collections.singleton("keyword"), "unsupported type; " +
+                "supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]"),
+            FieldSelection.included("some_long", Collections.singleton("long"), false, FieldSelection.FeatureType.NUMERICAL)
+        );
     }
 
     public void testDetect_GivenRegressionAndMultipleFields() {
@@ -126,14 +144,22 @@ public void testDetect_GivenRegressionAndMultipleFields() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildRegressionConfig("foo"), false, 100, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        List<ExtractedField> allFields = extractedFields.getAllFields();
+        List<ExtractedField> allFields = fieldExtraction.v1().getAllFields();
         assertThat(allFields.size(), equalTo(5));
         assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toList()),
             containsInAnyOrder("foo", "some_float", "some_keyword", "some_long", "some_boolean"));
         assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
             contains(equalTo(ExtractedField.Method.DOC_VALUE)));
+
+        assertFieldSelectionContains(fieldExtraction.v2(),
+            FieldSelection.included("foo", Collections.singleton("double"), true, FieldSelection.FeatureType.NUMERICAL),
+            FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL),
+            FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
+            FieldSelection.included("some_keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
+            FieldSelection.included("some_long", Collections.singleton("long"), false, FieldSelection.FeatureType.NUMERICAL)
+        );
     }
 
     public void testDetect_GivenRegressionAndRequiredFieldMissing() {
@@ -191,11 +217,16 @@ public void testDetect_GivenFieldIsBothIncludedAndExcluded() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildOutlierDetectionConfig(analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        List<ExtractedField> allFields = extractedFields.getAllFields();
+        List<ExtractedField> allFields = fieldExtraction.v1().getAllFields();
         assertThat(allFields.size(), equalTo(1));
         assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toList()), contains("bar"));
+
+        assertFieldSelectionContains(fieldExtraction.v2(),
+            FieldSelection.included("bar", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
+            FieldSelection.excluded("foo", Collections.singleton("float"), "field in excludes list")
+        );
     }
 
     public void testDetect_GivenRegressionAndRequiredFieldHasInvalidType() {
@@ -258,14 +289,15 @@ public void testDetect_GivenIgnoredField() {
 
     public void testDetect_GivenIncludedIgnoredField() {
         FieldCapabilitiesResponse fieldCapabilities = new MockFieldCapsResponseBuilder()
-            .addAggregatableField("_id", "float").build();
+            .addAggregatableField("_id", "float")
+            .build();
         FetchSourceContext analyzedFields = new FetchSourceContext(true, new String[]{"_id"}, new String[0]);
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildOutlierDetectionConfig(analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap());
         ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> extractedFieldsDetector.detect());
 
-        assertThat(e.getMessage(), equalTo("field [_id] cannot be analyzed"));
+        assertThat(e.getMessage(), equalTo("No field [_id] could be detected"));
     }
 
     public void testDetect_ShouldSortFieldsAlphabetically() {
@@ -285,9 +317,9 @@ public void testDetect_ShouldSortFieldsAlphabetically() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+        List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
             .collect(Collectors.toList());
         assertThat(extractedFieldNames, equalTo(sortedFields));
     }
@@ -333,11 +365,17 @@ public void testDetect_GivenInclusionsAndExclusions() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildOutlierDetectionConfig(desiredFields), false, 100, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+        List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
             .collect(Collectors.toList());
         assertThat(extractedFieldNames, equalTo(Arrays.asList("my_field1", "your_field2")));
+
+        assertFieldSelectionContains(fieldExtraction.v2(),
+            FieldSelection.included("my_field1", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
+            FieldSelection.excluded("my_field1_nope", Collections.singleton("float"), "field in excludes list"),
+            FieldSelection.included("your_field2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)
+        );
     }
 
     public void testDetect_GivenIncludedFieldHasUnsupportedType() {
@@ -384,11 +422,18 @@ public void testDetect_GivenIndexContainsResultsFieldAndTaskIsRestarting() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildOutlierDetectionConfig(), true, 100, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+        List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
             .collect(Collectors.toList());
         assertThat(extractedFieldNames, equalTo(Arrays.asList("my_field1", "your_field2")));
+
+        assertFieldSelectionContains(fieldExtraction.v2(),
+            FieldSelection.included("my_field1", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
+            FieldSelection.included("your_field2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
+            FieldSelection.excluded("your_keyword", Collections.singleton("keyword"), "unsupported type; supported types " +
+                "are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]")
+        );
     }
 
     public void testDetect_GivenIncludedResultsField() {
@@ -434,12 +479,12 @@ public void testDetect_GivenLessFieldsThanDocValuesLimit() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildOutlierDetectionConfig(), true, 4, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+        List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
             .collect(Collectors.toList());
         assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3")));
-        assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
+        assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
             contains(equalTo(ExtractedField.Method.DOC_VALUE)));
     }
 
@@ -453,12 +498,12 @@ public void testDetect_GivenEqualFieldsToDocValuesLimit() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildOutlierDetectionConfig(), true, 3, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+        List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
             .collect(Collectors.toList());
         assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3")));
-        assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
+        assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
             contains(equalTo(ExtractedField.Method.DOC_VALUE)));
     }
 
@@ -472,12 +517,12 @@ public void testDetect_GivenMoreFieldsThanDocValuesLimit() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildOutlierDetectionConfig(), true, 2, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+        List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
             .collect(Collectors.toList());
         assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3")));
-        assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
+        assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
             contains(equalTo(ExtractedField.Method.SOURCE)));
     }
 
@@ -488,14 +533,18 @@ public void testDetect_GivenBooleanField_BooleanMappedAsInteger() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        List<ExtractedField> allFields = extractedFields.getAllFields();
+        List<ExtractedField> allFields = fieldExtraction.v1().getAllFields();
         assertThat(allFields.size(), equalTo(1));
         ExtractedField booleanField = allFields.get(0);
         assertThat(booleanField.getTypes(), contains("boolean"));
         assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
 
+        assertFieldSelectionContains(fieldExtraction.v2(),
+            FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL)
+        );
+
         SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build();
         assertThat(booleanField.value(hit), arrayContaining(1));
 
@@ -514,14 +563,18 @@ public void testDetect_GivenBooleanField_BooleanMappedAsString() {
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildClassificationConfig("some_boolean"), false, 100, fieldCapabilities,
             Collections.singletonMap("some_boolean", 2L));
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        List<ExtractedField> allFields = extractedFields.getAllFields();
+        List<ExtractedField> allFields = fieldExtraction.v1().getAllFields();
         assertThat(allFields.size(), equalTo(1));
         ExtractedField booleanField = allFields.get(0);
         assertThat(booleanField.getTypes(), contains("boolean"));
         assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
 
+        assertFieldSelectionContains(fieldExtraction.v2(),
+            FieldSelection.included("some_boolean", Collections.singleton("boolean"), true, FieldSelection.FeatureType.CATEGORICAL)
+        );
+
         SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build();
         assertThat(booleanField.value(hit), arrayContaining("true"));
 
@@ -546,12 +599,26 @@ public void testDetect_GivenMultiFields() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildRegressionConfig("a_float"), true, 100, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        assertThat(extractedFields.getAllFields().size(), equalTo(5));
-        List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+        assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(5));
+        List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
             .collect(Collectors.toList());
         assertThat(extractedFieldNames, contains("a_float", "keyword_1", "text_1.keyword", "text_2.keyword", "text_without_keyword"));
+
+        assertFieldSelectionContains(fieldExtraction.v2(),
+            FieldSelection.included("a_float", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL),
+            FieldSelection.included("keyword_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
+            FieldSelection.excluded("keyword_1.text", Collections.singleton("text"),
+                "[keyword_1] is preferred because it is aggregatable"),
+            FieldSelection.excluded("text_1", Collections.singleton("text"),
+                "[text_1.keyword] is preferred because it is aggregatable"),
+            FieldSelection.included("text_1.keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
+            FieldSelection.excluded("text_2", Collections.singleton("text"),
+                "[text_2.keyword] is preferred because it is aggregatable"),
+            FieldSelection.included("text_2.keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
+            FieldSelection.included("text_without_keyword", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL)
+        );
     }
 
     public void testDetect_GivenMultiFieldAndParentIsRequired() {
@@ -563,12 +630,19 @@ public void testDetect_GivenMultiFieldAndParentIsRequired() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildClassificationConfig("field_1"), true, 100, fieldCapabilities, Collections.singletonMap("field_1", 2L));
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        assertThat(extractedFields.getAllFields().size(), equalTo(2));
-        List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+        assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
+        List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
             .collect(Collectors.toList());
         assertThat(extractedFieldNames, contains("field_1", "field_2"));
+
+        assertFieldSelectionContains(fieldExtraction.v2(),
+            FieldSelection.included("field_1", Collections.singleton("keyword"), true, FieldSelection.FeatureType.CATEGORICAL),
+            FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"),
+                "[field_1] is required instead"),
+            FieldSelection.included("field_2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)
+        );
     }
 
     public void testDetect_GivenMultiFieldAndMultiFieldIsRequired() {
@@ -581,12 +655,19 @@ public void testDetect_GivenMultiFieldAndMultiFieldIsRequired() {
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildClassificationConfig("field_1.keyword"), true, 100, fieldCapabilities,
             Collections.singletonMap("field_1.keyword", 2L));
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        assertThat(extractedFields.getAllFields().size(), equalTo(2));
-        List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+        assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
+        List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
             .collect(Collectors.toList());
         assertThat(extractedFieldNames, contains("field_1.keyword", "field_2"));
+
+        assertFieldSelectionContains(fieldExtraction.v2(),
+            FieldSelection.excluded("field_1", Collections.singleton("keyword"),
+                "[field_1.keyword] is required instead"),
+            FieldSelection.included("field_1.keyword", Collections.singleton("keyword"), true, FieldSelection.FeatureType.CATEGORICAL),
+            FieldSelection.included("field_2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)
+        );
     }
 
     public void testDetect_GivenSeveralMultiFields_ShouldPickFirstSorted() {
@@ -600,12 +681,21 @@ public void testDetect_GivenSeveralMultiFields_ShouldPickFirstSorted() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildRegressionConfig("field_2"), true, 100, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        assertThat(extractedFields.getAllFields().size(), equalTo(2));
-        List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+        assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
+        List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
             .collect(Collectors.toList());
         assertThat(extractedFieldNames, contains("field_1.keyword_1", "field_2"));
+
+        assertFieldSelectionContains(fieldExtraction.v2(),
+            FieldSelection.excluded("field_1", Collections.singleton("text"),
+                "[field_1.keyword_1] is preferred because it is aggregatable"),
+            FieldSelection.included("field_1.keyword_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
+            FieldSelection.excluded("field_1.keyword_2", Collections.singleton("keyword"), "[field_1.keyword_1] came first"),
+            FieldSelection.excluded("field_1.keyword_3", Collections.singleton("keyword"), "[field_1.keyword_1] came first"),
+            FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL)
+        );
     }
 
     public void testDetect_GivenMultiFields_OverDocValueLimit() {
@@ -617,12 +707,19 @@ public void testDetect_GivenMultiFields_OverDocValueLimit() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildRegressionConfig("field_2"), true, 0, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        assertThat(extractedFields.getAllFields().size(), equalTo(2));
-        List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+        assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
+        List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
             .collect(Collectors.toList());
         assertThat(extractedFieldNames, contains("field_1", "field_2"));
+
+        assertFieldSelectionContains(fieldExtraction.v2(),
+            FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL),
+            FieldSelection.excluded("field_1.keyword_1", Collections.singleton("keyword"),
+                "[field_1] is preferred because it supports fetching from source"),
+            FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL)
+        );
     }
 
     public void testDetect_GivenParentAndMultiFieldBothAggregatable() {
@@ -635,12 +732,20 @@ public void testDetect_GivenParentAndMultiFieldBothAggregatable() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildRegressionConfig("field_2.double"), true, 100, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        assertThat(extractedFields.getAllFields().size(), equalTo(2));
-        List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+        assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
+        List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
             .collect(Collectors.toList());
         assertThat(extractedFieldNames, contains("field_1", "field_2.double"));
+
+        assertFieldSelectionContains(fieldExtraction.v2(),
+            FieldSelection.included("field_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
+            FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"),
+                "[field_1] is preferred because it is aggregatable"),
+            FieldSelection.included("field_2.double", Collections.singleton("double"), true, FieldSelection.FeatureType.NUMERICAL),
+            FieldSelection.excluded("field_2.keyword", Collections.singleton("float"), "[field_2.double] is required instead")
+        );
     }
 
     public void testDetect_GivenParentAndMultiFieldNoneAggregatable() {
@@ -652,12 +757,19 @@ public void testDetect_GivenParentAndMultiFieldNoneAggregatable() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildRegressionConfig("field_2"), true, 100, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        assertThat(extractedFields.getAllFields().size(), equalTo(2));
-        List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+        assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
+        List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
             .collect(Collectors.toList());
         assertThat(extractedFieldNames, contains("field_1", "field_2"));
+
+        assertFieldSelectionContains(fieldExtraction.v2(),
+            FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL),
+            FieldSelection.excluded("field_1.text", Collections.singleton("text"),
+                "[field_1] is preferred because none of the multi-fields are aggregatable"),
+            FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL)
+        );
     }
 
     public void testDetect_GivenMultiFields_AndExplicitlyIncludedFields() {
@@ -670,12 +782,18 @@ public void testDetect_GivenMultiFields_AndExplicitlyIncludedFields() {
 
         ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
             SOURCE_INDEX, buildRegressionConfig("field_2", analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap());
-        ExtractedFields extractedFields = extractedFieldsDetector.detect();
+        Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
 
-        assertThat(extractedFields.getAllFields().size(), equalTo(2));
-        List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+        assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
+        List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
             .collect(Collectors.toList());
         assertThat(extractedFieldNames, contains("field_1", "field_2"));
+
+        assertFieldSelectionContains(fieldExtraction.v2(),
+            FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL),
+            FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"), "field not in includes list"),
+            FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL)
+        );
     }
 
     private static DataFrameAnalyticsConfig buildOutlierDetectionConfig() {
@@ -715,6 +833,21 @@ private static DataFrameAnalyticsConfig buildClassificationConfig(String depende
             .build();
     }
 
+    /**
+     * We assert each field individually to get useful error messages in case of failure
+     */
+    private static void assertFieldSelectionContains(List<FieldSelection> actual, FieldSelection... expected) {
+        assertThat(actual.size(), equalTo(expected.length));
+        for (int i = 0; i < expected.length; i++) {
+            assertThat("i = " + i, actual.get(i).getName(), equalTo(expected[i].getName()));
+            assertThat("i = " + i, actual.get(i).getMappingTypes(), equalTo(expected[i].getMappingTypes()));
+            assertThat("i = " + i, actual.get(i).isIncluded(), equalTo(expected[i].isIncluded()));
+            assertThat("i = " + i, actual.get(i).isRequired(), equalTo(expected[i].isRequired()));
+            assertThat("i = " + i, actual.get(i).getFeatureType(), equalTo(expected[i].getFeatureType()));
+            assertThat("i = " + i, actual.get(i).getReason(), equalTo(expected[i].getReason()));
+        }
+    }
+
     private static class MockFieldCapsResponseBuilder {
 
         private final Map<String, Map<String, FieldCapabilities>> fieldCaps = new HashMap<>();
diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json
deleted file mode 100644
index 99bd6527de3b1..0000000000000
--- a/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "ml.estimate_memory_usage": {
-    "documentation": {
-      "url": "http://www.elastic.co/guide/en/elasticsearch/reference/current/estimate-memory-usage-dfanalytics.html"
-    },
-    "stability": "experimental",
-    "url": {
-      "paths" : [
-        {
-          "path" : "/_ml/data_frame/analytics/_estimate_memory_usage",
-          "methods": [ "POST" ],
-          "parts": {}
-        }
-      ]
-    },
-    "body": {
-      "description" : "Memory usage estimation definition",
-      "required" : true
-    }
-  }
-}
diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json
new file mode 100644
index 0000000000000..6969cf9a49f13
--- /dev/null
+++ b/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json
@@ -0,0 +1,31 @@
+{
+  "ml.explain_data_frame_analytics": {
+    "documentation": {
+      "url": "http://www.elastic.co/guide/en/elasticsearch/reference/current/explain-dfanalytics.html"
+    },
+    "stability": "experimental",
+    "url": {
+      "paths" : [
+        {
+          "path" : "/_ml/data_frame/analytics/_explain",
+          "methods": [ "GET", "POST" ],
+          "parts": {}
+        },
+        {
+          "path" : "/_ml/data_frame/analytics/{id}/_explain",
+          "methods": [ "GET", "POST" ],
+          "parts":{
+            "id":{
+              "type":"string",
+              "description":"The ID of the data frame analytics to explain"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description" : "The data frame analytics config to explain",
+      "required" : false
+    }
+  }
+}
diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml
deleted file mode 100644
index 39fe8005fa8cb..0000000000000
--- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml
+++ /dev/null
@@ -1,84 +0,0 @@
----
-setup:
-  - do:
-      indices.create:
-        index: index-source
-        body:
-          mappings:
-            properties:
-              x:
-                type: float
-              y:
-                type: float
-
----
-"Test memory usage estimation for empty data frame":
-  - do:
-      catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
-      ml.estimate_memory_usage:
-        body:
-          source: { index: "index-source" }
-          analysis: { outlier_detection: {} }
-
-  - do:
-      index:
-        index: index-source
-        refresh: true
-        body: { x: 1 }
-  - match: { result: "created" }
-
-  # Note that value for "y" is missing and outlier detection analysis does not support missing values.
-  # Hence, the data frame is still considered empty.
-  - do:
-      catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
-      ml.estimate_memory_usage:
-        body:
-          source: { index: "index-source" }
-          analysis: { outlier_detection: {} }
-
----
-"Test memory usage estimation for non-empty data frame":
-  - do:
-      index:
-        index: index-source
-        refresh: true
-        body: { x: 1, y: 10 }
-  - match: { result: "created" }
-
-  - do:
-      ml.estimate_memory_usage:
-        body:
-          source: { index: "index-source" }
-          analysis: { outlier_detection: {} }
-  - match: { expected_memory_without_disk: "3kb" }
-  - match: { expected_memory_with_disk: "3kb" }
-
-  - do:
-      index:
-        index: index-source
-        refresh: true
-        body: { x: 2, y: 20 }
-  - match: { result: "created" }
-
-  - do:
-      ml.estimate_memory_usage:
-        body:
-          source: { index: "index-source" }
-          analysis: { outlier_detection: {} }
-  - match: { expected_memory_without_disk: "4kb" }
-  - match: { expected_memory_with_disk: "4kb" }
-
-  - do:
-      index:
-        index: index-source
-        refresh: true
-        body: { x: 3, y: 30 }
-  - match: { result: "created" }
-
-  - do:
-      ml.estimate_memory_usage:
-        body:
-          source: { index: "index-source" }
-          analysis: { outlier_detection: {} }
-  - match: { expected_memory_without_disk: "6kb" }
-  - match: { expected_memory_with_disk: "5kb" }
diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml
new file mode 100644
index 0000000000000..f42964272568a
--- /dev/null
+++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml
@@ -0,0 +1,308 @@
+---
+"Test neither job id nor body":
+  - do:
+      catch: /Please provide a job \[id\] or the config object/
+      ml.explain_data_frame_analytics:
+        id: ""
+
+---
+"Test both job id and body":
+  - do:
+      catch: /Please provide either a job \[id\] or the config object but not both/
+      ml.explain_data_frame_analytics:
+        id: "foo"
+        body:
+          source: { index: "index-source" }
+          analysis: { outlier_detection: {} }
+
+---
+"Test missing job":
+  - do:
+      catch: missing
+      ml.explain_data_frame_analytics:
+        id: "no_such_job"
+
+---
+"Test id that matches multiple jobs":
+
+  - do:
+      indices.create:
+        index: index-source
+
+  - do:
+      ml.put_data_frame_analytics:
+        id: "foo-1"
+        body: >
+          {
+            "source": {
+              "index": "index-source"
+            },
+            "dest": {
+              "index": "index-dest"
+            },
+            "analysis": {"outlier_detection":{}}
+          }
+
+  - do:
+      ml.put_data_frame_analytics:
+        id: "foo-2"
+        body: >
+          {
+            "source": {
+              "index": "index-source"
+            },
+            "dest": {
+              "index": "index-dest"
+            },
+            "analysis": {"outlier_detection":{}}
+          }
+
+  - do:
+      catch: /expected only one config but matched \[foo-1, foo-2\]/
+      ml.explain_data_frame_analytics:
+        id: "foo-*"
+
+---
+"Test empty data frame given body":
+
+  - do:
+      indices.create:
+        index: index-source
+        body:
+          mappings:
+            properties:
+              x:
+                type: float
+              y:
+                type: float
+
+  - do:
+      catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
+      ml.explain_data_frame_analytics:
+        body:
+          source: { index: "index-source" }
+          analysis: { outlier_detection: {} }
+
+  - do:
+      index:
+        index: index-source
+        refresh: true
+        body: { x: 1 }
+  - match: { result: "created" }
+
+  # Note that value for "y" is missing and outlier detection analysis does not support missing values.
+  # Hence, the data frame is still considered empty.
+  - do:
+      catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
+      ml.explain_data_frame_analytics:
+        body:
+          source: { index: "index-source" }
+          analysis: { outlier_detection: {} }
+
+---
+"Test non-empty data frame given body":
+
+  - do:
+      indices.create:
+        index: index-source
+        body:
+          mappings:
+            properties:
+              x:
+                type: float
+              y:
+                type: float
+
+  - do:
+      index:
+        index: index-source
+        refresh: true
+        body: { x: 1, y: 10 }
+  - match: { result: "created" }
+
+  - do:
+      ml.explain_data_frame_analytics:
+        body:
+          source: { index: "index-source" }
+          analysis: { outlier_detection: {} }
+  - match: { memory_estimation.expected_memory_without_disk: "3kb" }
+  - match: { memory_estimation.expected_memory_with_disk: "3kb" }
+  - length: { field_selection: 2 }
+  - match: { field_selection.0.name: "x" }
+  - match: { field_selection.0.mapping_types: ["float"] }
+  - match: { field_selection.0.is_included: true }
+  - match: { field_selection.0.is_required: false }
+  - match: { field_selection.0.feature_type: "numerical" }
+  - is_false: field_selection.0.reason
+  - match: { field_selection.1.name: "y" }
+  - match: { field_selection.1.mapping_types: ["float"] }
+  - match: { field_selection.1.is_included: true }
+  - match: { field_selection.1.is_required: false }
+  - match: { field_selection.1.feature_type: "numerical" }
+  - is_false: field_selection.1.reason
+
+  - do:
+      index:
+        index: index-source
+        refresh: true
+        body: { x: 2, y: 20 }
+  - match: { result: "created" }
+
+  - do:
+      ml.explain_data_frame_analytics:
+        body:
+          source: { index: "index-source" }
+          analysis: { outlier_detection: {} }
+  - match: { memory_estimation.expected_memory_without_disk: "4kb" }
+  - match: { memory_estimation.expected_memory_with_disk: "4kb" }
+
+  - do:
+      index:
+        index: index-source
+        refresh: true
+        body: { x: 3, y: 30 }
+  - match: { result: "created" }
+
+  - do:
+      ml.explain_data_frame_analytics:
+        body:
+          source: { index: "index-source" }
+          analysis: { outlier_detection: {} }
+  - match: { memory_estimation.expected_memory_without_disk: "6kb" }
+  - match: { memory_estimation.expected_memory_with_disk: "5kb" }
+
+---
+"Test field_selection given body":
+
+  - do:
+      indices.create:
+        index: index-source
+        body:
+          mappings:
+            properties:
+              field_1:
+                type: integer
+              field_2:
+                type: double
+              field_3:
+                type: date
+
+  - do:
+      index:
+        index: index-source
+        refresh: true
+        body: { field_1: 3, field_2: 3.14, field_3: "2019-11-11T00:00:00", field_4: "blah" }
+  - match: { result: "created" }
+
+  - do:
+      ml.explain_data_frame_analytics:
+        body:
+          source: { index: "index-source" }
+          analysis: { regression: { dependent_variable: "field_1" } }
+  - is_true: memory_estimation.expected_memory_without_disk
+  - is_true: memory_estimation.expected_memory_with_disk
+  - length: { field_selection: 5 }
+  - match: { field_selection.0.name: "field_1" }
+  - match: { field_selection.0.mapping_types: ["integer"] }
+  - match: { field_selection.0.is_included: true }
+  - match: { field_selection.0.is_required: true }
+  - match: { field_selection.0.feature_type: "numerical" }
+  - is_false: field_selection.0.reason
+  - match: { field_selection.1.name: "field_2" }
+  - match: { field_selection.1.mapping_types: ["double"] }
+  - match: { field_selection.1.is_included: true }
+  - match: { field_selection.1.is_required: false }
+  - match: { field_selection.1.feature_type: "numerical" }
+  - is_false: field_selection.1.reason
+  - match: { field_selection.2.name: "field_3" }
+  - match: { field_selection.2.mapping_types: ["date"] }
+  - match: { field_selection.2.is_included: false }
+  - match: { field_selection.2.is_required: false }
+  - is_false: field_selection.2.feature_type
+  - match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" }
+  - match: { field_selection.3.name: "field_4" }
+  - match: { field_selection.3.mapping_types: ["text"] }
+  - match: { field_selection.3.is_included: false }
+  - match: { field_selection.3.is_required: false }
+  - is_false: field_selection.3.feature_type
+  - match: { field_selection.3.reason: "[field_4.keyword] is preferred because it is aggregatable" }
+  - match: { field_selection.4.name: "field_4.keyword" }
+  - match: { field_selection.4.mapping_types: ["keyword"] }
+  - match: { field_selection.4.is_included: true }
+  - match: { field_selection.4.is_required: false }
+  - match: { field_selection.4.feature_type: "categorical" }
+  - is_false: field_selection.4.reason
+
+---
+"Test field_selection given job":
+
+  - do:
+      indices.create:
+        index: index-source
+        body:
+          mappings:
+            properties:
+              field_1:
+                type: integer
+              field_2:
+                type: double
+              field_3:
+                type: date
+
+  - do:
+      index:
+        index: index-source
+        refresh: true
+        body: { field_1: 3, field_2: 3.14, field_3: "2019-11-11T00:00:00", field_4: "blah" }
+  - match: { result: "created" }
+
+  - do:
+      ml.put_data_frame_analytics:
+        id: "got-a-job-for-this-one"
+        body: >
+          {
+            "source": {
+              "index": "index-source"
+            },
+            "dest": {
+              "index": "index-dest"
+            },
+            "analysis": {"regression":{ "dependent_variable": "field_1" }}
+          }
+
+  - do:
+      ml.explain_data_frame_analytics:
+        id: "got-a-job-for-this-one"
+  - is_true: memory_estimation.expected_memory_without_disk
+  - is_true: memory_estimation.expected_memory_with_disk
+  - length: { field_selection: 5 }
+  - match: { field_selection.0.name: "field_1" }
+  - match: { field_selection.0.mapping_types: ["integer"] }
+  - match: { field_selection.0.is_included: true }
+  - match: { field_selection.0.is_required: true }
+  - match: { field_selection.0.feature_type: "numerical" }
+  - is_false: field_selection.0.reason
+  - match: { field_selection.1.name: "field_2" }
+  - match: { field_selection.1.mapping_types: ["double"] }
+  - match: { field_selection.1.is_included: true }
+  - match: { field_selection.1.is_required: false }
+  - match: { field_selection.1.feature_type: "numerical" }
+  - is_false: field_selection.1.reason
+  - match: { field_selection.2.name: "field_3" }
+  - match: { field_selection.2.mapping_types: ["date"] }
+  - match: { field_selection.2.is_included: false }
+  - match: { field_selection.2.is_required: false }
+  - is_false: field_selection.2.feature_type
+  - match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" }
+  - match: { field_selection.3.name: "field_4" }
+  - match: { field_selection.3.mapping_types: ["text"] }
+  - match: { field_selection.3.is_included: false }
+  - match: { field_selection.3.is_required: false }
+  - is_false: field_selection.3.feature_type
+  - match: { field_selection.3.reason: "[field_4.keyword] is preferred because it is aggregatable" }
+  - match: { field_selection.4.name: "field_4.keyword" }
+  - match: { field_selection.4.mapping_types: ["keyword"] }
+  - match: { field_selection.4.is_included: true }
+  - match: { field_selection.4.is_required: false }
+  - match: { field_selection.4.feature_type: "categorical" }
+  - is_false: field_selection.4.reason