From 35c6dab58d67c21a45387dee2516fe7940dffc92 Mon Sep 17 00:00:00 2001 From: Dimitris Athanasiou Date: Fri, 22 Nov 2019 20:08:14 +0200 Subject: [PATCH] [7.x][ML] Explain data frame analytics API (#49455) This commit replaces the _estimate_memory_usage API with a new API, the _explain API. The API consolidates information that is useful before creating a data frame analytics job. It includes: - memory estimation - field selection explanation Memory estimation is moved here from what was previously calculated in the _estimate_memory_usage API. Field selection is a new feature that explains to the user whether each available field was selected to be included or not in the analysis. In the case it was not included, it also explains the reason why. Backport of #49455 --- .../client/MLRequestConverters.java | 18 +- .../client/MachineLearningClient.java | 37 ++- .../ml/ExplainDataFrameAnalyticsRequest.java | 72 ++++ .../ml/ExplainDataFrameAnalyticsResponse.java | 94 ++++++ .../ml/dataframe/explain/FieldSelection.java | 163 +++++++++ .../explain/MemoryEstimation.java} | 23 +- .../client/MLRequestConvertersTests.java | 28 +- .../client/MachineLearningIT.java | 41 ++- .../MlClientDocumentationIT.java | 63 ++-- ...ExplainDataFrameAnalyticsRequestTests.java | 44 +++ ...xplainDataFrameAnalyticsResponseTests.java | 54 +++ .../explain/FieldSelectionTests.java | 57 ++++ .../explain/MemoryEstimationTests.java} | 18 +- .../ml/estimate-memory-usage.asciidoc | 36 -- .../ml/explain-data-frame-analytics.asciidoc | 48 +++ .../high-level/supported-apis.asciidoc | 4 +- ...estimate-memory-usage-dfanalytics.asciidoc | 80 ----- .../apis/explain-dfanalytics.asciidoc | 159 +++++++++ .../ml/df-analytics/apis/index.asciidoc | 8 +- .../xpack/core/XPackClientPlugin.java | 12 +- .../ml/action/EstimateMemoryUsageAction.java | 119 ------- .../ExplainDataFrameAnalyticsAction.java | 101 ++++++ .../action/PutDataFrameAnalyticsAction.java | 9 +- .../dataframe/DataFrameAnalyticsConfig.java | 4 +- .../ml/dataframe/explain/FieldSelection.java | 184 +++++++++++ .../dataframe/explain/MemoryEstimation.java | 103 ++++++ ...stimateMemoryUsageActionResponseTests.java | 54 --- ...DataFrameAnalyticsActionResponseTests.java | 42 +++ .../DataFrameAnalyticsConfigTests.java | 12 +- .../explain/FieldSelectionTests.java | 45 +++ .../explain/MemoryEstimationTests.java | 61 ++++ .../ml/qa/ml-with-security/build.gradle | 5 +- .../xpack/ml/MachineLearning.java | 14 +- .../TransportEstimateMemoryUsageAction.java | 130 -------- ...nsportExplainDataFrameAnalyticsAction.java | 156 +++++++++ ...ransportStartDataFrameAnalyticsAction.java | 79 +++-- .../DataFrameDataExtractorFactory.java | 29 +- .../extractor/ExtractedFieldsDetector.java | 144 +++++--- .../MemoryUsageEstimationProcessManager.java | 4 +- .../RestEstimateMemoryUsageAction.java | 38 --- .../RestExplainDataFrameAnalyticsAction.java | 84 +++++ .../ExtractedFieldsDetectorTests.java | 243 ++++++++++---- .../api/ml.estimate_memory_usage.json | 21 -- .../api/ml.explain_data_frame_analytics.json | 31 ++ ...rame_analytics_memory_usage_estimation.yml | 84 ----- .../test/ml/explain_data_frame_analytics.yml | 308 ++++++++++++++++++ 46 files changed, 2312 insertions(+), 851 deletions(-) create mode 100644 client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java create mode 100644 client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java create mode 100644 client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java rename client/rest-high-level/src/main/java/org/elasticsearch/client/ml/{EstimateMemoryUsageResponse.java => dataframe/explain/MemoryEstimation.java} (81%) create mode 100644 client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java create mode 100644 client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java create mode 100644 client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java rename client/rest-high-level/src/test/java/org/elasticsearch/client/ml/{EstimateMemoryUsageResponseTests.java => dataframe/explain/MemoryEstimationTests.java} (68%) delete mode 100644 docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc create mode 100644 docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc delete mode 100644 docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc create mode 100644 docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc delete mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelection.java create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java delete mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java create mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java create mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelectionTests.java create mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java delete mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java create mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java delete mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java create mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java delete mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json create mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json delete mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml create mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java index 2fc23acd13430..0a1a18eeb4461 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java @@ -29,6 +29,7 @@ import org.elasticsearch.client.RequestConverters.EndpointBuilder; import org.elasticsearch.client.core.PageParams; import org.elasticsearch.client.ml.CloseJobRequest; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; import org.elasticsearch.client.ml.DeleteCalendarEventRequest; import org.elasticsearch.client.ml.DeleteCalendarJobRequest; import org.elasticsearch.client.ml.DeleteCalendarRequest; @@ -701,12 +702,17 @@ static Request evaluateDataFrame(EvaluateDataFrameRequest evaluateRequest) throw return request; } - static Request estimateMemoryUsage(PutDataFrameAnalyticsRequest estimateRequest) throws IOException { - String endpoint = new EndpointBuilder() - .addPathPartAsIs("_ml", "data_frame", "analytics", "_estimate_memory_usage") - .build(); - Request request = new Request(HttpPost.METHOD_NAME, endpoint); - request.setEntity(createEntity(estimateRequest, REQUEST_BODY_CONTENT_TYPE)); + static Request explainDataFrameAnalytics(ExplainDataFrameAnalyticsRequest explainRequest) throws IOException { + EndpointBuilder endpoint = new EndpointBuilder().addPathPartAsIs("_ml", "data_frame", "analytics"); + if (explainRequest.getId() != null) { + endpoint.addPathPart(explainRequest.getId()); + } + endpoint.addPathPartAsIs("_explain"); + + Request request = new Request(HttpPost.METHOD_NAME, endpoint.build()); + if (explainRequest.getConfig() != null) { + request.setEntity(createEntity(explainRequest.getConfig(), REQUEST_BODY_CONTENT_TYPE)); + } return request; } diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java index 2ddc8839f9648..468cd535c01dc 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java @@ -22,6 +22,8 @@ import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.ml.CloseJobRequest; import org.elasticsearch.client.ml.CloseJobResponse; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse; import org.elasticsearch.client.ml.DeleteCalendarEventRequest; import org.elasticsearch.client.ml.DeleteCalendarJobRequest; import org.elasticsearch.client.ml.DeleteCalendarRequest; @@ -34,7 +36,6 @@ import org.elasticsearch.client.ml.DeleteJobRequest; import org.elasticsearch.client.ml.DeleteJobResponse; import org.elasticsearch.client.ml.DeleteModelSnapshotRequest; -import org.elasticsearch.client.ml.EstimateMemoryUsageResponse; import org.elasticsearch.client.ml.EvaluateDataFrameRequest; import org.elasticsearch.client.ml.EvaluateDataFrameResponse; import org.elasticsearch.client.ml.FindFileStructureRequest; @@ -2249,46 +2250,46 @@ public Cancellable evaluateDataFrameAsync(EvaluateDataFrameRequest request, Requ } /** - * Estimates memory usage for the given Data Frame Analytics + * Explains the given Data Frame Analytics *

* For additional info - * see - * Estimate Memory Usage for Data Frame Analytics documentation + * see + * Explain Data Frame Analytics documentation * - * @param request The {@link PutDataFrameAnalyticsRequest} + * @param request The {@link ExplainDataFrameAnalyticsRequest} * @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized - * @return {@link EstimateMemoryUsageResponse} response object + * @return {@link ExplainDataFrameAnalyticsResponse} response object * @throws IOException when there is a serialization issue sending the request or receiving the response */ - public EstimateMemoryUsageResponse estimateMemoryUsage(PutDataFrameAnalyticsRequest request, - RequestOptions options) throws IOException { + public ExplainDataFrameAnalyticsResponse explainDataFrameAnalytics(ExplainDataFrameAnalyticsRequest request, + RequestOptions options) throws IOException { return restHighLevelClient.performRequestAndParseEntity( request, - MLRequestConverters::estimateMemoryUsage, + MLRequestConverters::explainDataFrameAnalytics, options, - EstimateMemoryUsageResponse::fromXContent, + ExplainDataFrameAnalyticsResponse::fromXContent, Collections.emptySet()); } /** - * Estimates memory usage for the given Data Frame Analytics asynchronously and notifies listener upon completion + * Explains the given Data Frame Analytics asynchronously and notifies listener upon completion *

* For additional info - * see - * Estimate Memory Usage for Data Frame Analytics documentation + * see + * Explain Data Frame Analytics documentation * - * @param request The {@link PutDataFrameAnalyticsRequest} + * @param request The {@link ExplainDataFrameAnalyticsRequest} * @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized * @param listener Listener to be notified upon request completion * @return cancellable that may be used to cancel the request */ - public Cancellable estimateMemoryUsageAsync(PutDataFrameAnalyticsRequest request, RequestOptions options, - ActionListener listener) { + public Cancellable explainDataFrameAnalyticsAsync(ExplainDataFrameAnalyticsRequest request, RequestOptions options, + ActionListener listener) { return restHighLevelClient.performRequestAsyncAndParseEntity( request, - MLRequestConverters::estimateMemoryUsage, + MLRequestConverters::explainDataFrameAnalytics, options, - EstimateMemoryUsageResponse::fromXContent, + ExplainDataFrameAnalyticsResponse::fromXContent, listener, Collections.emptySet()); } diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java new file mode 100644 index 0000000000000..880e87b2eea9b --- /dev/null +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java @@ -0,0 +1,72 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml; + +import org.elasticsearch.client.Validatable; +import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfig; +import org.elasticsearch.common.Nullable; + +import java.util.Objects; + +/** + * Request to explain the following about a data frame analytics job: + *

+ */ +public class ExplainDataFrameAnalyticsRequest implements Validatable { + + private final String id; + private final DataFrameAnalyticsConfig config; + + public ExplainDataFrameAnalyticsRequest(String id) { + this.id = Objects.requireNonNull(id); + this.config = null; + } + + public ExplainDataFrameAnalyticsRequest(DataFrameAnalyticsConfig config) { + this.id = null; + this.config = Objects.requireNonNull(config); + } + + @Nullable + public String getId() { + return id; + } + + @Nullable + public DataFrameAnalyticsConfig getConfig() { + return config; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + ExplainDataFrameAnalyticsRequest other = (ExplainDataFrameAnalyticsRequest) o; + return Objects.equals(id, other.id) && Objects.equals(config, other.config); + } + + @Override + public int hashCode() { + return Objects.hash(id, config); + } +} diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java new file mode 100644 index 0000000000000..5879ffc7154bd --- /dev/null +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java @@ -0,0 +1,94 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml; + +import org.elasticsearch.client.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +public class ExplainDataFrameAnalyticsResponse implements ToXContentObject { + + public static final ParseField TYPE = new ParseField("explain_data_frame_analytics_response"); + + public static final ParseField FIELD_SELECTION = new ParseField("field_selection"); + public static final ParseField MEMORY_ESTIMATION = new ParseField("memory_estimation"); + + public static ExplainDataFrameAnalyticsResponse fromXContent(XContentParser parser) throws IOException { + return PARSER.parse(parser, null); + } + + @SuppressWarnings("unchecked") + static final ConstructingObjectParser PARSER = + new ConstructingObjectParser<>( + TYPE.getPreferredName(), true, + args -> new ExplainDataFrameAnalyticsResponse((List) args[0], (MemoryEstimation) args[1])); + + static { + PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), FieldSelection.PARSER, FIELD_SELECTION); + PARSER.declareObject(ConstructingObjectParser.constructorArg(), MemoryEstimation.PARSER, MEMORY_ESTIMATION); + } + + private final List fieldSelection; + private final MemoryEstimation memoryEstimation; + + public ExplainDataFrameAnalyticsResponse(List fieldSelection, MemoryEstimation memoryEstimation) { + this.fieldSelection = Objects.requireNonNull(fieldSelection); + this.memoryEstimation = Objects.requireNonNull(memoryEstimation); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(FIELD_SELECTION.getPreferredName(), fieldSelection); + builder.field(MEMORY_ESTIMATION.getPreferredName(), memoryEstimation); + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object other) { + if (this == other) return true; + if (other == null || getClass() != other.getClass()) return false; + + ExplainDataFrameAnalyticsResponse that = (ExplainDataFrameAnalyticsResponse) other; + return Objects.equals(fieldSelection, that.fieldSelection) + && Objects.equals(memoryEstimation, that.memoryEstimation); + } + + @Override + public int hashCode() { + return Objects.hash(fieldSelection, memoryEstimation); + } + + public MemoryEstimation getMemoryEstimation() { + return memoryEstimation; + } + + public List getFieldSelection() { + return fieldSelection; + } +} diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java new file mode 100644 index 0000000000000..4483b6fa5e09a --- /dev/null +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java @@ -0,0 +1,163 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml.dataframe.explain; + +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Objects; +import java.util.Set; + +public class FieldSelection implements ToXContentObject { + + private static final ParseField NAME = new ParseField("name"); + private static final ParseField MAPPING_TYPES = new ParseField("mapping_types"); + private static final ParseField IS_INCLUDED = new ParseField("is_included"); + private static final ParseField IS_REQUIRED = new ParseField("is_required"); + private static final ParseField FEATURE_TYPE = new ParseField("feature_type"); + private static final ParseField REASON = new ParseField("reason"); + + public enum FeatureType { + CATEGORICAL, NUMERICAL; + + public static FeatureType fromString(String value) { + return FeatureType.valueOf(value.toUpperCase(Locale.ROOT)); + } + + @Override + public String toString() { + return name().toLowerCase(Locale.ROOT); + } + } + + @SuppressWarnings("unchecked") + public static ConstructingObjectParser PARSER = new ConstructingObjectParser<>("field_selection", true, + a -> new FieldSelection((String) a[0], new HashSet<>((List) a[1]), (boolean) a[2], (boolean) a[3], (FeatureType) a[4], + (String) a[5])); + + static { + PARSER.declareString(ConstructingObjectParser.constructorArg(), NAME); + PARSER.declareStringArray(ConstructingObjectParser.constructorArg(), MAPPING_TYPES); + PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_INCLUDED); + PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_REQUIRED); + PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> { + if (p.currentToken() == XContentParser.Token.VALUE_STRING) { + return FeatureType.fromString(p.text()); + } + throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]"); + }, FEATURE_TYPE, ObjectParser.ValueType.STRING); + PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), REASON); + } + + private final String name; + private final Set mappingTypes; + private final boolean isIncluded; + private final boolean isRequired; + private final FeatureType featureType; + private final String reason; + + public static FieldSelection included(String name, Set mappingTypes, boolean isRequired, FeatureType featureType) { + return new FieldSelection(name, mappingTypes, true, isRequired, featureType, null); + } + + public static FieldSelection excluded(String name, Set mappingTypes, String reason) { + return new FieldSelection(name, mappingTypes, false, false, null, reason); + } + + FieldSelection(String name, Set mappingTypes, boolean isIncluded, boolean isRequired, @Nullable FeatureType featureType, + @Nullable String reason) { + this.name = Objects.requireNonNull(name); + this.mappingTypes = Collections.unmodifiableSet(mappingTypes); + this.isIncluded = isIncluded; + this.isRequired = isRequired; + this.featureType = featureType; + this.reason = reason; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(NAME.getPreferredName(), name); + builder.field(MAPPING_TYPES.getPreferredName(), mappingTypes); + builder.field(IS_INCLUDED.getPreferredName(), isIncluded); + builder.field(IS_REQUIRED.getPreferredName(), isRequired); + if (featureType != null) { + builder.field(FEATURE_TYPE.getPreferredName(), featureType); + } + if (reason != null) { + builder.field(REASON.getPreferredName(), reason); + } + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FieldSelection that = (FieldSelection) o; + return Objects.equals(name, that.name) + && Objects.equals(mappingTypes, that.mappingTypes) + && isIncluded == that.isIncluded + && isRequired == that.isRequired + && Objects.equals(featureType, that.featureType) + && Objects.equals(reason, that.reason); + } + + @Override + public int hashCode() { + return Objects.hash(name, mappingTypes, isIncluded, isRequired, featureType, reason); + } + + public String getName() { + return name; + } + + public Set getMappingTypes() { + return mappingTypes; + } + + public boolean isIncluded() { + return isIncluded; + } + + public boolean isRequired() { + return isRequired; + } + + @Nullable + public FeatureType getFeatureType() { + return featureType; + } + + @Nullable + public String getReason() { + return reason; + } +} diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponse.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimation.java similarity index 81% rename from client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponse.java rename to client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimation.java index c97cc545cdb79..9151b8ce5dd32 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponse.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimation.java @@ -16,8 +16,7 @@ * specific language governing permissions and limitations * under the License. */ - -package org.elasticsearch.client.ml; +package org.elasticsearch.client.ml.dataframe.explain; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.ParseField; @@ -26,23 +25,19 @@ import org.elasticsearch.common.xcontent.ObjectParser; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentParser; import java.io.IOException; import java.util.Objects; import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; -public class EstimateMemoryUsageResponse implements ToXContentObject { - +public class MemoryEstimation implements ToXContentObject { + public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk"); public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk"); - static final ConstructingObjectParser PARSER = - new ConstructingObjectParser<>( - "estimate_memory_usage_response", - true, - args -> new EstimateMemoryUsageResponse((ByteSizeValue) args[0], (ByteSizeValue) args[1])); + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("memory_estimation", true, + a -> new MemoryEstimation((ByteSizeValue) a[0], (ByteSizeValue) a[1])); static { PARSER.declareField( @@ -57,14 +52,10 @@ public class EstimateMemoryUsageResponse implements ToXContentObject { ObjectParser.ValueType.VALUE); } - public static EstimateMemoryUsageResponse fromXContent(XContentParser parser) { - return PARSER.apply(parser, null); - } - private final ByteSizeValue expectedMemoryWithoutDisk; private final ByteSizeValue expectedMemoryWithDisk; - public EstimateMemoryUsageResponse(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) { + public MemoryEstimation(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) { this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk; this.expectedMemoryWithDisk = expectedMemoryWithDisk; } @@ -99,7 +90,7 @@ public boolean equals(Object other) { return false; } - EstimateMemoryUsageResponse that = (EstimateMemoryUsageResponse) other; + MemoryEstimation that = (MemoryEstimation) other; return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk) && Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk); } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java index db59054cdb87b..633e5363ff165 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java @@ -25,6 +25,7 @@ import org.apache.http.client.methods.HttpPut; import org.elasticsearch.client.core.PageParams; import org.elasticsearch.client.ml.CloseJobRequest; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; import org.elasticsearch.client.ml.DeleteCalendarEventRequest; import org.elasticsearch.client.ml.DeleteCalendarJobRequest; import org.elasticsearch.client.ml.DeleteCalendarRequest; @@ -788,14 +789,25 @@ public void testEvaluateDataFrame() throws IOException { } } - public void testEstimateMemoryUsage() throws IOException { - PutDataFrameAnalyticsRequest estimateRequest = new PutDataFrameAnalyticsRequest(randomDataFrameAnalyticsConfig()); - Request request = MLRequestConverters.estimateMemoryUsage(estimateRequest); - assertEquals(HttpPost.METHOD_NAME, request.getMethod()); - assertEquals("/_ml/data_frame/analytics/_estimate_memory_usage", request.getEndpoint()); - try (XContentParser parser = createParser(JsonXContent.jsonXContent, request.getEntity().getContent())) { - DataFrameAnalyticsConfig parsedConfig = DataFrameAnalyticsConfig.fromXContent(parser); - assertThat(parsedConfig, equalTo(estimateRequest.getConfig())); + public void testExplainDataFrameAnalytics() throws IOException { + // Request with config + { + ExplainDataFrameAnalyticsRequest estimateRequest = new ExplainDataFrameAnalyticsRequest(randomDataFrameAnalyticsConfig()); + Request request = MLRequestConverters.explainDataFrameAnalytics(estimateRequest); + assertEquals(HttpPost.METHOD_NAME, request.getMethod()); + assertEquals("/_ml/data_frame/analytics/_explain", request.getEndpoint()); + try (XContentParser parser = createParser(JsonXContent.jsonXContent, request.getEntity().getContent())) { + DataFrameAnalyticsConfig parsedConfig = DataFrameAnalyticsConfig.fromXContent(parser); + assertThat(parsedConfig, equalTo(estimateRequest.getConfig())); + } + } + // Request with id + { + ExplainDataFrameAnalyticsRequest estimateRequest = new ExplainDataFrameAnalyticsRequest("foo"); + Request request = MLRequestConverters.explainDataFrameAnalytics(estimateRequest); + assertEquals(HttpPost.METHOD_NAME, request.getMethod()); + assertEquals("/_ml/data_frame/analytics/foo/_explain", request.getEndpoint()); + assertNull(request.getEntity()); } } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java index 361b36745509c..efb62b3f52689 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java @@ -32,6 +32,8 @@ import org.elasticsearch.client.indices.GetIndexRequest; import org.elasticsearch.client.ml.CloseJobRequest; import org.elasticsearch.client.ml.CloseJobResponse; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse; import org.elasticsearch.client.ml.DeleteCalendarEventRequest; import org.elasticsearch.client.ml.DeleteCalendarJobRequest; import org.elasticsearch.client.ml.DeleteCalendarRequest; @@ -44,7 +46,6 @@ import org.elasticsearch.client.ml.DeleteJobRequest; import org.elasticsearch.client.ml.DeleteJobResponse; import org.elasticsearch.client.ml.DeleteModelSnapshotRequest; -import org.elasticsearch.client.ml.EstimateMemoryUsageResponse; import org.elasticsearch.client.ml.EvaluateDataFrameRequest; import org.elasticsearch.client.ml.EvaluateDataFrameResponse; import org.elasticsearch.client.ml.FindFileStructureRequest; @@ -140,6 +141,8 @@ import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric; import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric; import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric; +import org.elasticsearch.client.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation; import org.elasticsearch.client.ml.filestructurefinder.FileStructure; import org.elasticsearch.client.ml.inference.TrainedModelConfig; import org.elasticsearch.client.ml.inference.TrainedModelDefinition; @@ -1996,8 +1999,8 @@ private void createIndex(String indexName, XContentBuilder mapping) throws IOExc highLevelClient().indices().create(new CreateIndexRequest(indexName).mapping(mapping), RequestOptions.DEFAULT); } - public void testEstimateMemoryUsage() throws IOException { - String indexName = "estimate-test-index"; + public void testExplainDataFrameAnalytics() throws IOException { + String indexName = "explain-df-test-index"; createIndex(indexName, mappingForSoftClassification()); BulkRequest bulk1 = new BulkRequest() .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); @@ -2007,8 +2010,8 @@ public void testEstimateMemoryUsage() throws IOException { highLevelClient().bulk(bulk1, RequestOptions.DEFAULT); MachineLearningClient machineLearningClient = highLevelClient().machineLearning(); - PutDataFrameAnalyticsRequest estimateMemoryUsageRequest = - new PutDataFrameAnalyticsRequest( + ExplainDataFrameAnalyticsRequest explainRequest = + new ExplainDataFrameAnalyticsRequest( DataFrameAnalyticsConfig.builder() .setSource(DataFrameAnalyticsSource.builder().setIndex(indexName).build()) .setAnalysis(OutlierDetection.createDefault()) @@ -2019,11 +2022,16 @@ public void testEstimateMemoryUsage() throws IOException { ByteSizeValue upperBound = new ByteSizeValue(1, ByteSizeUnit.GB); // Data Frame has 10 rows, expect that the returned estimates fall within (1kB, 1GB) range. - EstimateMemoryUsageResponse response1 = - execute( - estimateMemoryUsageRequest, machineLearningClient::estimateMemoryUsage, machineLearningClient::estimateMemoryUsageAsync); - assertThat(response1.getExpectedMemoryWithoutDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound))); - assertThat(response1.getExpectedMemoryWithDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound))); + ExplainDataFrameAnalyticsResponse response1 = execute(explainRequest, machineLearningClient::explainDataFrameAnalytics, + machineLearningClient::explainDataFrameAnalyticsAsync); + + MemoryEstimation memoryEstimation1 = response1.getMemoryEstimation(); + assertThat(memoryEstimation1.getExpectedMemoryWithoutDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound))); + assertThat(memoryEstimation1.getExpectedMemoryWithDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound))); + + List fieldSelection = response1.getFieldSelection(); + assertThat(fieldSelection.size(), equalTo(3)); + assertThat(fieldSelection.stream().map(FieldSelection::getName).collect(Collectors.toList()), contains("dataset", "label", "p")); BulkRequest bulk2 = new BulkRequest() .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); @@ -2033,15 +2041,16 @@ public void testEstimateMemoryUsage() throws IOException { highLevelClient().bulk(bulk2, RequestOptions.DEFAULT); // Data Frame now has 100 rows, expect that the returned estimates will be greater than or equal to the previous ones. - EstimateMemoryUsageResponse response2 = + ExplainDataFrameAnalyticsResponse response2 = execute( - estimateMemoryUsageRequest, machineLearningClient::estimateMemoryUsage, machineLearningClient::estimateMemoryUsageAsync); + explainRequest, machineLearningClient::explainDataFrameAnalytics, machineLearningClient::explainDataFrameAnalyticsAsync); + MemoryEstimation memoryEstimation2 = response2.getMemoryEstimation(); assertThat( - response2.getExpectedMemoryWithoutDisk(), - allOf(greaterThanOrEqualTo(response1.getExpectedMemoryWithoutDisk()), lessThan(upperBound))); + memoryEstimation2.getExpectedMemoryWithoutDisk(), + allOf(greaterThanOrEqualTo(memoryEstimation1.getExpectedMemoryWithoutDisk()), lessThan(upperBound))); assertThat( - response2.getExpectedMemoryWithDisk(), - allOf(greaterThanOrEqualTo(response1.getExpectedMemoryWithDisk()), lessThan(upperBound))); + memoryEstimation2.getExpectedMemoryWithDisk(), + allOf(greaterThanOrEqualTo(memoryEstimation1.getExpectedMemoryWithDisk()), lessThan(upperBound))); } public void testGetTrainedModels() throws Exception { diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java index da12420535f67..8a118672d95e6 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java @@ -36,6 +36,8 @@ import org.elasticsearch.client.indices.CreateIndexRequest; import org.elasticsearch.client.ml.CloseJobRequest; import org.elasticsearch.client.ml.CloseJobResponse; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse; import org.elasticsearch.client.ml.DeleteCalendarEventRequest; import org.elasticsearch.client.ml.DeleteCalendarJobRequest; import org.elasticsearch.client.ml.DeleteCalendarRequest; @@ -48,7 +50,6 @@ import org.elasticsearch.client.ml.DeleteJobRequest; import org.elasticsearch.client.ml.DeleteJobResponse; import org.elasticsearch.client.ml.DeleteModelSnapshotRequest; -import org.elasticsearch.client.ml.EstimateMemoryUsageResponse; import org.elasticsearch.client.ml.EvaluateDataFrameRequest; import org.elasticsearch.client.ml.EvaluateDataFrameResponse; import org.elasticsearch.client.ml.FindFileStructureRequest; @@ -155,6 +156,8 @@ import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric.ConfusionMatrix; import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric; import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric; +import org.elasticsearch.client.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation; import org.elasticsearch.client.ml.filestructurefinder.FileStructure; import org.elasticsearch.client.ml.inference.TrainedModelConfig; import org.elasticsearch.client.ml.inference.TrainedModelDefinition; @@ -213,6 +216,7 @@ import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.closeTo; +import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @@ -3460,10 +3464,10 @@ public void testEvaluateDataFrame_Regression() throws Exception { } } - public void testEstimateMemoryUsage() throws Exception { - createIndex("estimate-test-source-index"); + public void testExplainDataFrameAnalytics() throws Exception { + createIndex("explain-df-test-source-index"); BulkRequest bulkRequest = - new BulkRequest("estimate-test-source-index") + new BulkRequest("explain-df-test-source-index") .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); for (int i = 0; i < 10; ++i) { bulkRequest.add(new IndexRequest().source(XContentType.JSON, "timestamp", 123456789L, "total", 10L)); @@ -3471,22 +3475,33 @@ public void testEstimateMemoryUsage() throws Exception { RestHighLevelClient client = highLevelClient(); client.bulk(bulkRequest, RequestOptions.DEFAULT); { - // tag::estimate-memory-usage-request + // tag::explain-data-frame-analytics-id-request + ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest("existing_job_id"); // <1> + // end::explain-data-frame-analytics-id-request + + // tag::explain-data-frame-analytics-config-request DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder() - .setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build()) + .setSource(DataFrameAnalyticsSource.builder().setIndex("explain-df-test-source-index").build()) .setAnalysis(OutlierDetection.createDefault()) .build(); - PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config); // <1> - // end::estimate-memory-usage-request + request = new ExplainDataFrameAnalyticsRequest(config); // <1> + // end::explain-data-frame-analytics-config-request + + // tag::explain-data-frame-analytics-execute + ExplainDataFrameAnalyticsResponse response = client.machineLearning().explainDataFrameAnalytics(request, + RequestOptions.DEFAULT); + // end::explain-data-frame-analytics-execute + + // tag::explain-data-frame-analytics-response + List fieldSelection = response.getFieldSelection(); // <1> + MemoryEstimation memoryEstimation = response.getMemoryEstimation(); // <2> + // end::explain-data-frame-analytics-response - // tag::estimate-memory-usage-execute - EstimateMemoryUsageResponse response = client.machineLearning().estimateMemoryUsage(request, RequestOptions.DEFAULT); - // end::estimate-memory-usage-execute + assertThat(fieldSelection.size(), equalTo(2)); + assertThat(fieldSelection.stream().map(FieldSelection::getName).collect(Collectors.toList()), contains("timestamp", "total")); - // tag::estimate-memory-usage-response - ByteSizeValue expectedMemoryWithoutDisk = response.getExpectedMemoryWithoutDisk(); // <1> - ByteSizeValue expectedMemoryWithDisk = response.getExpectedMemoryWithDisk(); // <2> - // end::estimate-memory-usage-response + ByteSizeValue expectedMemoryWithoutDisk = memoryEstimation.getExpectedMemoryWithoutDisk(); // <1> + ByteSizeValue expectedMemoryWithDisk = memoryEstimation.getExpectedMemoryWithDisk(); // <2> // We are pretty liberal here as this test does not aim at verifying concrete numbers but rather end-to-end user workflow. ByteSizeValue lowerBound = new ByteSizeValue(1, ByteSizeUnit.KB); @@ -3496,14 +3511,14 @@ public void testEstimateMemoryUsage() throws Exception { } { DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder() - .setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build()) + .setSource(DataFrameAnalyticsSource.builder().setIndex("explain-df-test-source-index").build()) .setAnalysis(OutlierDetection.createDefault()) .build(); - PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config); - // tag::estimate-memory-usage-execute-listener - ActionListener listener = new ActionListener() { + ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest(config); + // tag::explain-data-frame-analytics-execute-listener + ActionListener listener = new ActionListener() { @Override - public void onResponse(EstimateMemoryUsageResponse response) { + public void onResponse(ExplainDataFrameAnalyticsResponse response) { // <1> } @@ -3512,15 +3527,15 @@ public void onFailure(Exception e) { // <2> } }; - // end::estimate-memory-usage-execute-listener + // end::explain-data-frame-analytics-execute-listener // Replace the empty listener by a blocking listener in test final CountDownLatch latch = new CountDownLatch(1); listener = new LatchedActionListener<>(listener, latch); - // tag::estimate-memory-usage-execute-async - client.machineLearning().estimateMemoryUsageAsync(request, RequestOptions.DEFAULT, listener); // <1> - // end::estimate-memory-usage-execute-async + // tag::explain-data-frame-analytics-execute-async + client.machineLearning().explainDataFrameAnalyticsAsync(request, RequestOptions.DEFAULT, listener); // <1> + // end::explain-data-frame-analytics-execute-async assertTrue(latch.await(30L, TimeUnit.SECONDS)); } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java new file mode 100644 index 0000000000000..7273a40e298c4 --- /dev/null +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java @@ -0,0 +1,44 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml; + +import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfig; +import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfigTests; +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.nullValue; + +public class ExplainDataFrameAnalyticsRequestTests extends ESTestCase { + + public void testIdConstructor() { + ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest("foo"); + assertThat(request.getId(), equalTo("foo")); + assertThat(request.getConfig(), is(nullValue())); + } + + public void testConfigConstructor() { + DataFrameAnalyticsConfig config = DataFrameAnalyticsConfigTests.randomDataFrameAnalyticsConfig(); + + ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest(config); + assertThat(request.getId(), is(nullValue())); + assertThat(request.getConfig(), equalTo(config)); + } +} diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java new file mode 100644 index 0000000000000..f4adbd09ba7f3 --- /dev/null +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java @@ -0,0 +1,54 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml; + +import org.elasticsearch.client.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.client.ml.dataframe.explain.FieldSelectionTests; +import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation; +import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimationTests; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractXContentTestCase; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.IntStream; + +public class ExplainDataFrameAnalyticsResponseTests extends AbstractXContentTestCase { + + @Override + protected ExplainDataFrameAnalyticsResponse createTestInstance() { + int fieldSelectionCount = randomIntBetween(1, 5); + List fieldSelection = new ArrayList<>(fieldSelectionCount); + IntStream.of(fieldSelectionCount).forEach(i -> fieldSelection.add(FieldSelectionTests.createRandom())); + MemoryEstimation memoryEstimation = MemoryEstimationTests.createRandom(); + + return new ExplainDataFrameAnalyticsResponse(fieldSelection, memoryEstimation); + } + + @Override + protected ExplainDataFrameAnalyticsResponse doParseInstance(XContentParser parser) throws IOException { + return ExplainDataFrameAnalyticsResponse.fromXContent(parser); + } + + @Override + protected boolean supportsUnknownFields() { + return true; + } +} diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java new file mode 100644 index 0000000000000..e76f39b5b852f --- /dev/null +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java @@ -0,0 +1,57 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml.dataframe.explain; + +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractXContentTestCase; + +import java.io.IOException; +import java.util.Set; +import java.util.stream.Collectors; + +public class FieldSelectionTests extends AbstractXContentTestCase { + + public static FieldSelection createRandom() { + Set mappingTypes = randomSubsetOf(randomIntBetween(1, 3), "int", "float", "double", "text", "keyword", "ip") + .stream().collect(Collectors.toSet()); + FieldSelection.FeatureType featureType = randomBoolean() ? null : randomFrom(FieldSelection.FeatureType.values()); + String reason = randomBoolean() ? null : randomAlphaOfLength(20); + return new FieldSelection(randomAlphaOfLength(10), + mappingTypes, + randomBoolean(), + randomBoolean(), + featureType, + reason); + } + + @Override + protected FieldSelection createTestInstance() { + return createRandom(); + } + + @Override + protected FieldSelection doParseInstance(XContentParser parser) throws IOException { + return FieldSelection.PARSER.apply(parser, null); + } + + @Override + protected boolean supportsUnknownFields() { + return true; + } +} diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponseTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimationTests.java similarity index 68% rename from client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponseTests.java rename to client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimationTests.java index f8f2746204df5..884736e573ed5 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponseTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimationTests.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.client.ml; +package org.elasticsearch.client.ml.dataframe.explain; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.xcontent.XContentParser; @@ -24,22 +24,22 @@ import java.io.IOException; -public class EstimateMemoryUsageResponseTests extends AbstractXContentTestCase { +public class MemoryEstimationTests extends AbstractXContentTestCase { - public static EstimateMemoryUsageResponse randomResponse() { - return new EstimateMemoryUsageResponse( + public static MemoryEstimation createRandom() { + return new MemoryEstimation( randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null, randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null); } @Override - protected EstimateMemoryUsageResponse createTestInstance() { - return randomResponse(); + protected MemoryEstimation createTestInstance() { + return createRandom(); } @Override - protected EstimateMemoryUsageResponse doParseInstance(XContentParser parser) throws IOException { - return EstimateMemoryUsageResponse.fromXContent(parser); + protected MemoryEstimation doParseInstance(XContentParser parser) throws IOException { + return MemoryEstimation.PARSER.apply(parser, null); } @Override diff --git a/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc b/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc deleted file mode 100644 index 8b7ae0f55c8e1..0000000000000 --- a/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc +++ /dev/null @@ -1,36 +0,0 @@ --- -:api: estimate-memory-usage -:request: PutDataFrameAnalyticsRequest -:response: EstimateMemoryUsageResponse --- -[role="xpack"] -[id="{upid}-{api}"] -=== Estimate memory usage API - -Estimates memory usage of {dfanalytics}. -Estimation results can be used when deciding the appropriate value for `model_memory_limit` setting later on. - -The API accepts an +{request}+ object and returns an +{response}+. - -[id="{upid}-{api}-request"] -==== Estimate memory usage request - -["source","java",subs="attributes,callouts,macros"] --------------------------------------------------- -include-tagged::{doc-tests-file}[{api}-request] --------------------------------------------------- -<1> Constructing a new request containing a {dataframe-analytics-config} for which memory usage estimation should be performed - -include::../execution.asciidoc[] - -[id="{upid}-{api}-response"] -==== Response - -The returned +{response}+ contains the memory usage estimates. - -["source","java",subs="attributes,callouts,macros"] --------------------------------------------------- -include-tagged::{doc-tests-file}[{api}-response] --------------------------------------------------- -<1> Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory (i.e. without overflowing to disk). -<2> Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}. \ No newline at end of file diff --git a/docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc b/docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc new file mode 100644 index 0000000000000..3c41531d22213 --- /dev/null +++ b/docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc @@ -0,0 +1,48 @@ +-- +:api: explain-data-frame-analytics +:request: ExplainDataFrameAnalyticsRequest +:response: ExplainDataFrameAnalyticsResponse +-- +[role="xpack"] +[id="{upid}-{api}"] +=== Explain {dfanalytics}} API + +Explains the following about a {dataframe-analytics-config}: + +* field selection: which fields are included or not in the analysis +* memory estimation: how much memory is estimated to be required. The estimate can be used when deciding the appropriate value for `model_memory_limit` setting later on. + +The API accepts an +{request}+ object and returns an +{response}+. + +[id="{upid}-{api}-request"] +==== Explain {dfanalytics} request + +The request can be constructed with the id of an existing {dfanalytics-job}. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-id-request] +-------------------------------------------------- +<1> Constructing a new request with the id of an existing {dfanalytics-job} + +It can also be constructed with a {dataframe-analytics-config} to explain it before creating it. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-config-request] +-------------------------------------------------- +<1> Constructing a new request containing a {dataframe-analytics-config} + +include::../execution.asciidoc[] + +[id="{upid}-{api}-response"] +==== Response + +The returned +{response}+ contains the field selection and the memory usage estimation. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-response] +-------------------------------------------------- +<1> A list where each item explains whether a field was selected for analysis or not +<2> The memory estimation for the {dfanalytics-job} diff --git a/docs/java-rest/high-level/supported-apis.asciidoc b/docs/java-rest/high-level/supported-apis.asciidoc index 770866a075522..d691a3ac34b09 100644 --- a/docs/java-rest/high-level/supported-apis.asciidoc +++ b/docs/java-rest/high-level/supported-apis.asciidoc @@ -300,7 +300,7 @@ The Java High Level REST Client supports the following Machine Learning APIs: * <<{upid}-start-data-frame-analytics>> * <<{upid}-stop-data-frame-analytics>> * <<{upid}-evaluate-data-frame>> -* <<{upid}-estimate-memory-usage>> +* <<{upid}-explain-data-frame-analytics>> * <<{upid}-get-trained-models>> * <<{upid}-put-filter>> * <<{upid}-get-filters>> @@ -353,7 +353,7 @@ include::ml/delete-data-frame-analytics.asciidoc[] include::ml/start-data-frame-analytics.asciidoc[] include::ml/stop-data-frame-analytics.asciidoc[] include::ml/evaluate-data-frame.asciidoc[] -include::ml/estimate-memory-usage.asciidoc[] +include::ml/explain-data-frame-analytics.asciidoc[] include::ml/get-trained-models.asciidoc[] include::ml/put-filter.asciidoc[] include::ml/get-filters.asciidoc[] diff --git a/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc deleted file mode 100644 index 64db472dfd1e4..0000000000000 --- a/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc +++ /dev/null @@ -1,80 +0,0 @@ -[role="xpack"] -[testenv="platinum"] -[[estimate-memory-usage-dfanalytics]] -=== Estimate memory usage API - -[subs="attributes"] -++++ -Estimate memory usage for {dfanalytics-jobs} -++++ - -Estimates memory usage for the given {dataframe-analytics-config}. - -experimental[] - -[[ml-estimate-memory-usage-dfanalytics-request]] -==== {api-request-title} - -`POST _ml/data_frame/analytics/_estimate_memory_usage` - -[[ml-estimate-memory-usage-dfanalytics-prereq]] -==== {api-prereq-title} - -* You must have `monitor_ml` privilege to use this API. For more -information, see <> and <>. - -[[ml-estimate-memory-usage-dfanalytics-desc]] -==== {api-description-title} - -This API estimates memory usage for the given {dataframe-analytics-config} before the {dfanalytics-job} is even created. - -Serves as an advice on how to set `model_memory_limit` when creating {dfanalytics-job}. - -[[ml-estimate-memory-usage-dfanalytics-request-body]] -==== {api-request-body-title} - -`data_frame_analytics_config`:: - (Required, object) Intended configuration of {dfanalytics-job}. For more information, see - <>. - Note that `id` and `dest` don't need to be provided in the context of this API. - -[[ml-estimate-memory-usage-dfanalytics-results]] -==== {api-response-body-title} - -`expected_memory_without_disk`:: - (string) Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory - (i.e. without overflowing to disk). - -`expected_memory_with_disk`:: - (string) Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}. - `expected_memory_with_disk` is usually smaller than `expected_memory_without_disk` as using disk allows to - limit the main memory needed to perform {dfanalytics}. - -[[ml-estimate-memory-usage-dfanalytics-example]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -POST _ml/data_frame/analytics/_estimate_memory_usage -{ - "data_frame_analytics_config": { - "source": { - "index": "logdata" - }, - "analysis": { - "outlier_detection": {} - } - } -} --------------------------------------------------- -// TEST[skip:TBD] - -The API returns the following results: - -[source,console-result] ----- -{ - "expected_memory_without_disk": "128MB", - "expected_memory_with_disk": "32MB" -} ----- diff --git a/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc new file mode 100644 index 0000000000000..c9ee565e9b2c5 --- /dev/null +++ b/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc @@ -0,0 +1,159 @@ +[role="xpack"] +[testenv="platinum"] +[[explain-dfanalytics]] +=== Explain {dfanalytics} API + +[subs="attributes"] +++++ +Explain {dfanalytics} API +++++ + +Explains a {dataframe-analytics-config}. + +experimental[] + +[[ml-explain-dfanalytics-request]] +==== {api-request-title} + +`GET _ml/data_frame/analytics/_explain` + + +`POST _ml/data_frame/analytics/_explain` + + +`GET _ml/data_frame/analytics//_explain` + + +`POST _ml/data_frame/analytics//_explain` + +[[ml-explain-dfanalytics-prereq]] +==== {api-prereq-title} + +* You must have `monitor_ml` privilege to use this API. For more +information, see <> and <>. + +[[ml-explain-dfanalytics-desc]] +==== {api-description-title} + +This API provides explanations for a {dataframe-analytics-config} that either exists already or one that has not been created yet. +The following explanations are provided: + +* which fields are included or not in the analysis and why +* how much memory is estimated to be required. The estimate can be used when deciding the appropriate value for `model_memory_limit` setting later on. +about either an existing {dfanalytics-job} or one that has not been created yet. + +[[ml-explain-dfanalytics-path-params]] +==== {api-path-parms-title} + +``:: + (Optional, string) A numerical character string that uniquely identifies the existing + {dfanalytics-job} to explain. This identifier can contain lowercase alphanumeric + characters (a-z and 0-9), hyphens, and underscores. It must start and end with + alphanumeric characters. + +[[ml-explain-dfanalytics-request-body]] +==== {api-request-body-title} + +`data_frame_analytics_config`:: + (Optional, object) Intended configuration of {dfanalytics-job}. For more information, see + <>. + Note that `id` and `dest` don't need to be provided in the context of this API. + +[[ml-explain-dfanalytics-results]] +==== {api-response-body-title} + +The API returns a response that contains the following: + +`field_selection`:: + (array) An array of objects that explain selection for each field, sorted by the field names. + Each object in the array has the following properties: + + `name`::: + (string) The field name. + + `mapping_types`::: + (string) The mapping types of the field. + + `is_included`::: + (boolean) Whether the field is selected to be included in the analysis. + + `is_required`::: + (boolean) Whether the field is required. + + `feature_type`::: + (string) The feature type of this field for the analysis. May be `categorical` or `numerical`. + + `reason`::: + (string) The reason a field is not selected to be included in the analysis. + +`memory_estimation`:: + (object) An object containing the memory estimates. The object has the following properties: + + `expected_memory_without_disk`::: + (string) Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory + (i.e. without overflowing to disk). + + `expected_memory_with_disk`::: + (string) Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}. + `expected_memory_with_disk` is usually smaller than `expected_memory_without_disk` as using disk allows to + limit the main memory needed to perform {dfanalytics}. + +[[ml-explain-dfanalytics-example]] +==== {api-examples-title} + +[source,console] +-------------------------------------------------- +POST _ml/data_frame/analytics/_explain +{ + "data_frame_analytics_config": { + "source": { + "index": "houses_sold_last_10_yrs" + }, + "analysis": { + "regression": { + "dependent_variable": "price" + } + } + } +} +-------------------------------------------------- +// TEST[skip:TBD] + +The API returns the following results: + +[source,console-result] +---- +{ + "field_selection": [ + { + "field": "number_of_bedrooms", + "mappings_types": ["integer"], + "is_included": true, + "is_required": false, + "feature_type": "numerical" + }, + { + "field": "postcode", + "mappings_types": ["text"], + "is_included": false, + "is_required": false, + "reason": "[postcode.keyword] is preferred because it is aggregatable" + }, + { + "field": "postcode.keyword", + "mappings_types": ["keyword"], + "is_included": true, + "is_required": false, + "feature_type": "categorical" + }, + { + "field": "price", + "mappings_types": ["float"], + "is_included": true, + "is_required": true, + "feature_type": "numerical" + } + ], + "memory_estimation": { + "expected_memory_without_disk": "128MB", + "expected_memory_with_disk": "32MB" + } +} +---- diff --git a/docs/reference/ml/df-analytics/apis/index.asciidoc b/docs/reference/ml/df-analytics/apis/index.asciidoc index 30e909f3ffad6..6bf63e7ddb8c0 100644 --- a/docs/reference/ml/df-analytics/apis/index.asciidoc +++ b/docs/reference/ml/df-analytics/apis/index.asciidoc @@ -5,16 +5,16 @@ You can use the following APIs to perform {ml} {dfanalytics} activities. -* <> +* <> * <> * <> * <> * <> * <> * <> -* <> +* <> -See also <>. +See also <>. //CREATE include::put-dfanalytics.asciidoc[] @@ -23,7 +23,7 @@ include::delete-dfanalytics.asciidoc[] //EVALUATE include::evaluate-dfanalytics.asciidoc[] //ESTIMATE_MEMORY_USAGE -include::estimate-memory-usage-dfanalytics.asciidoc[] +include::explain-dfanalytics.asciidoc[] //GET include::get-dfanalytics.asciidoc[] include::get-dfanalytics-stats.asciidoc[] diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java index 8caac9d6e2050..d99dd1ec23390 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java @@ -79,6 +79,7 @@ import org.elasticsearch.xpack.core.ml.MlMetadata; import org.elasticsearch.xpack.core.ml.MlTasks; import org.elasticsearch.xpack.core.ml.action.CloseJobAction; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction; import org.elasticsearch.xpack.core.ml.action.DeleteCalendarAction; import org.elasticsearch.xpack.core.ml.action.DeleteCalendarEventAction; import org.elasticsearch.xpack.core.ml.action.DeleteDataFrameAnalyticsAction; @@ -89,7 +90,6 @@ import org.elasticsearch.xpack.core.ml.action.DeleteJobAction; import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction; import org.elasticsearch.xpack.core.ml.action.DeleteTrainedModelAction; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction; import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction; import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction; import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; @@ -158,6 +158,10 @@ import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.Recall; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.ScoreByThresholdResult; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.SoftClassificationMetric; +import org.elasticsearch.xpack.core.ml.inference.preprocessing.FrequencyEncoding; +import org.elasticsearch.xpack.core.ml.inference.preprocessing.OneHotEncoding; +import org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor; +import org.elasticsearch.xpack.core.ml.inference.preprocessing.TargetMeanEncoding; import org.elasticsearch.xpack.core.ml.inference.results.ClassificationInferenceResults; import org.elasticsearch.xpack.core.ml.inference.results.InferenceResults; import org.elasticsearch.xpack.core.ml.inference.results.RegressionInferenceResults; @@ -171,10 +175,6 @@ import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ensemble.WeightedMode; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ensemble.WeightedSum; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.tree.Tree; -import org.elasticsearch.xpack.core.ml.inference.preprocessing.FrequencyEncoding; -import org.elasticsearch.xpack.core.ml.inference.preprocessing.OneHotEncoding; -import org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor; -import org.elasticsearch.xpack.core.ml.inference.preprocessing.TargetMeanEncoding; import org.elasticsearch.xpack.core.ml.job.config.JobTaskState; import org.elasticsearch.xpack.core.monitoring.MonitoringFeatureSetUsage; import org.elasticsearch.xpack.core.rollup.RollupFeatureSetUsage; @@ -381,7 +381,7 @@ public List> getClientActions() { StartDataFrameAnalyticsAction.INSTANCE, StopDataFrameAnalyticsAction.INSTANCE, EvaluateDataFrameAction.INSTANCE, - EstimateMemoryUsageAction.INSTANCE, + ExplainDataFrameAnalyticsAction.INSTANCE, InternalInferModelAction.INSTANCE, GetTrainedModelsAction.INSTANCE, DeleteTrainedModelAction.INSTANCE, diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java deleted file mode 100644 index 529db21cced70..0000000000000 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.core.ml.action; - -import org.elasticsearch.action.ActionResponse; -import org.elasticsearch.action.ActionType; -import org.elasticsearch.common.Nullable; -import org.elasticsearch.common.ParseField; -import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.unit.ByteSizeValue; -import org.elasticsearch.common.xcontent.ConstructingObjectParser; -import org.elasticsearch.common.xcontent.ObjectParser; -import org.elasticsearch.common.xcontent.ToXContentObject; -import org.elasticsearch.common.xcontent.XContentBuilder; - -import java.io.IOException; -import java.util.Objects; - -import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; - -public class EstimateMemoryUsageAction extends ActionType { - - public static final EstimateMemoryUsageAction INSTANCE = new EstimateMemoryUsageAction(); - public static final String NAME = "cluster:admin/xpack/ml/data_frame/analytics/estimate_memory_usage"; - - private EstimateMemoryUsageAction() { - super(NAME, EstimateMemoryUsageAction.Response::new); - } - - public static class Response extends ActionResponse implements ToXContentObject { - - public static final ParseField TYPE = new ParseField("memory_usage_estimation_result"); - - public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk"); - public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk"); - - static final ConstructingObjectParser PARSER = - new ConstructingObjectParser<>( - TYPE.getPreferredName(), - args -> new Response((ByteSizeValue) args[0], (ByteSizeValue) args[1])); - - static { - PARSER.declareField( - optionalConstructorArg(), - (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName()), - EXPECTED_MEMORY_WITHOUT_DISK, - ObjectParser.ValueType.VALUE); - PARSER.declareField( - optionalConstructorArg(), - (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITH_DISK.getPreferredName()), - EXPECTED_MEMORY_WITH_DISK, - ObjectParser.ValueType.VALUE); - } - - private final ByteSizeValue expectedMemoryWithoutDisk; - private final ByteSizeValue expectedMemoryWithDisk; - - public Response(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) { - this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk; - this.expectedMemoryWithDisk = expectedMemoryWithDisk; - } - - public Response(StreamInput in) throws IOException { - super(in); - this.expectedMemoryWithoutDisk = in.readOptionalWriteable(ByteSizeValue::new); - this.expectedMemoryWithDisk = in.readOptionalWriteable(ByteSizeValue::new); - } - - public ByteSizeValue getExpectedMemoryWithoutDisk() { - return expectedMemoryWithoutDisk; - } - - public ByteSizeValue getExpectedMemoryWithDisk() { - return expectedMemoryWithDisk; - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeOptionalWriteable(expectedMemoryWithoutDisk); - out.writeOptionalWriteable(expectedMemoryWithDisk); - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(); - if (expectedMemoryWithoutDisk != null) { - builder.field(EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName(), expectedMemoryWithoutDisk.getStringRep()); - } - if (expectedMemoryWithDisk != null) { - builder.field(EXPECTED_MEMORY_WITH_DISK.getPreferredName(), expectedMemoryWithDisk.getStringRep()); - } - builder.endObject(); - return builder; - } - - @Override - public boolean equals(Object other) { - if (this == other) { - return true; - } - if (other == null || getClass() != other.getClass()) { - return false; - } - - Response that = (Response) other; - return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk) - && Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk); - } - - @Override - public int hashCode() { - return Objects.hash(expectedMemoryWithoutDisk, expectedMemoryWithDisk); - } - } -} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java new file mode 100644 index 0000000000000..46888ea27a7a9 --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java @@ -0,0 +1,101 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.action; + +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +public class ExplainDataFrameAnalyticsAction extends ActionType { + + public static final ExplainDataFrameAnalyticsAction INSTANCE = new ExplainDataFrameAnalyticsAction(); + public static final String NAME = "cluster:admin/xpack/ml/data_frame/analytics/explain"; + + private ExplainDataFrameAnalyticsAction() { + super(NAME, ExplainDataFrameAnalyticsAction.Response::new); + } + + public static class Response extends ActionResponse implements ToXContentObject { + + public static final ParseField TYPE = new ParseField("explain_data_frame_analytics_response"); + + public static final ParseField FIELD_SELECTION = new ParseField("field_selection"); + public static final ParseField MEMORY_ESTIMATION = new ParseField("memory_estimation"); + + static final ConstructingObjectParser PARSER = + new ConstructingObjectParser<>( + TYPE.getPreferredName(), + args -> new Response((List) args[0], (MemoryEstimation) args[1])); + + static { + PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), FieldSelection.PARSER, FIELD_SELECTION); + PARSER.declareObject(ConstructingObjectParser.constructorArg(), MemoryEstimation.PARSER, MEMORY_ESTIMATION); + } + + private final List fieldSelection; + private final MemoryEstimation memoryEstimation; + + public Response(List fieldSelection, MemoryEstimation memoryEstimation) { + this.fieldSelection = Objects.requireNonNull(fieldSelection); + this.memoryEstimation = Objects.requireNonNull(memoryEstimation); + } + + public Response(StreamInput in) throws IOException { + super(in); + this.fieldSelection = in.readList(FieldSelection::new); + this.memoryEstimation = new MemoryEstimation(in); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeList(fieldSelection); + memoryEstimation.writeTo(out); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(FIELD_SELECTION.getPreferredName(), fieldSelection); + builder.field(MEMORY_ESTIMATION.getPreferredName(), memoryEstimation); + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object other) { + if (this == other) return true; + if (other == null || getClass() != other.getClass()) return false; + + Response that = (Response) other; + return Objects.equals(fieldSelection, that.fieldSelection) + && Objects.equals(memoryEstimation, that.memoryEstimation); + } + + @Override + public int hashCode() { + return Objects.hash(fieldSelection, memoryEstimation); + } + + public MemoryEstimation getMemoryEstimation() { + return memoryEstimation; + } + + public List getFieldSelection() { + return fieldSelection; + } + } +} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java index 6860162d793fd..5bce41d8a4ae6 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java @@ -51,13 +51,14 @@ public static Request parseRequest(String id, XContentParser parser) { } /** - * Parses request for memory estimation. - * {@link Request} is reused across {@link PutDataFrameAnalyticsAction} and {@link EstimateMemoryUsageAction} but parsing differs + * Parses request for use in the explain action. + * {@link Request} is reused across {@link PutDataFrameAnalyticsAction} and + * {@link ExplainDataFrameAnalyticsAction} but parsing differs * between these two usages. */ - public static Request parseRequestForMemoryEstimation(XContentParser parser) { + public static Request parseRequestForExplain(XContentParser parser) { DataFrameAnalyticsConfig.Builder configBuilder = DataFrameAnalyticsConfig.STRICT_PARSER.apply(parser, null); - DataFrameAnalyticsConfig config = configBuilder.buildForMemoryEstimation(); + DataFrameAnalyticsConfig config = configBuilder.buildForExplain(); return new PutDataFrameAnalyticsAction.Request(config); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfig.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfig.java index 03020caef8ab1..ac1589fa56fbc 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfig.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfig.java @@ -416,11 +416,11 @@ public DataFrameAnalyticsConfig build() { } /** - * Builds {@link DataFrameAnalyticsConfig} object for the purpose of performing memory estimation. + * Builds {@link DataFrameAnalyticsConfig} object for the purpose of explaining a job that has not been created yet. * Some fields (i.e. "id", "dest") may not be present, therefore we overwrite them here to make {@link DataFrameAnalyticsConfig}'s * constructor validations happy. */ - public DataFrameAnalyticsConfig buildForMemoryEstimation() { + public DataFrameAnalyticsConfig buildForExplain() { return new DataFrameAnalyticsConfig( id != null ? id : "dummy", description, diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelection.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelection.java new file mode 100644 index 0000000000000..57fae51d36643 --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelection.java @@ -0,0 +1,184 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.dataframe.explain; + +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Objects; +import java.util.Set; + +public class FieldSelection implements ToXContentObject, Writeable { + + private static final ParseField NAME = new ParseField("name"); + private static final ParseField MAPPING_TYPES = new ParseField("mapping_types"); + private static final ParseField IS_INCLUDED = new ParseField("is_included"); + private static final ParseField IS_REQUIRED = new ParseField("is_required"); + private static final ParseField FEATURE_TYPE = new ParseField("feature_type"); + private static final ParseField REASON = new ParseField("reason"); + + public enum FeatureType { + CATEGORICAL, NUMERICAL; + + public static FeatureType fromString(String value) { + return FeatureType.valueOf(value.toUpperCase(Locale.ROOT)); + } + + @Override + public String toString() { + return name().toLowerCase(Locale.ROOT); + } + } + + public static ConstructingObjectParser PARSER = new ConstructingObjectParser<>("field_selection", + a -> new FieldSelection((String) a[0], new HashSet<>((List) a[1]), (boolean) a[2], (boolean) a[3], (FeatureType) a[4], + (String) a[5])); + + static { + PARSER.declareString(ConstructingObjectParser.constructorArg(), NAME); + PARSER.declareStringArray(ConstructingObjectParser.constructorArg(), MAPPING_TYPES); + PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_INCLUDED); + PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_REQUIRED); + PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> { + if (p.currentToken() == XContentParser.Token.VALUE_STRING) { + return FeatureType.fromString(p.text()); + } + throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]"); + }, FEATURE_TYPE, ObjectParser.ValueType.STRING); + PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), REASON); + } + + private final String name; + private final Set mappingTypes; + private final boolean isIncluded; + private final boolean isRequired; + private final FeatureType featureType; + private final String reason; + + public static FieldSelection included(String name, Set mappingTypes, boolean isRequired, FeatureType featureType) { + return new FieldSelection(name, mappingTypes, true, isRequired, featureType, null); + } + + public static FieldSelection excluded(String name, Set mappingTypes, String reason) { + return new FieldSelection(name, mappingTypes, false, false, null, reason); + } + + FieldSelection(String name, Set mappingTypes, boolean isIncluded, boolean isRequired, @Nullable FeatureType featureType, + @Nullable String reason) { + this.name = Objects.requireNonNull(name); + this.mappingTypes = Collections.unmodifiableSet(mappingTypes); + this.isIncluded = isIncluded; + this.isRequired = isRequired; + this.featureType = featureType; + this.reason = reason; + } + + public FieldSelection(StreamInput in) throws IOException { + this.name = in.readString(); + this.mappingTypes = Collections.unmodifiableSet(in.readSet(StreamInput::readString)); + this.isIncluded = in.readBoolean(); + this.isRequired = in.readBoolean(); + boolean hasFeatureType = in.readBoolean(); + + if (hasFeatureType) { + this.featureType = in.readEnum(FeatureType.class); + } else { + this.featureType = null; + } + + this.reason = in.readOptionalString(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(name); + out.writeCollection(mappingTypes, StreamOutput::writeString); + out.writeBoolean(isIncluded); + out.writeBoolean(isRequired); + + if (featureType == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + out.writeEnum(featureType); + } + out.writeOptionalString(reason); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(NAME.getPreferredName(), name); + builder.field(MAPPING_TYPES.getPreferredName(), mappingTypes); + builder.field(IS_INCLUDED.getPreferredName(), isIncluded); + builder.field(IS_REQUIRED.getPreferredName(), isRequired); + if (featureType != null) { + builder.field(FEATURE_TYPE.getPreferredName(), featureType); + } + if (reason != null) { + builder.field(REASON.getPreferredName(), reason); + } + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FieldSelection that = (FieldSelection) o; + return Objects.equals(name, that.name) + && Objects.equals(mappingTypes, that.mappingTypes) + && isIncluded == that.isIncluded + && isRequired == that.isRequired + && Objects.equals(featureType, that.featureType) + && Objects.equals(reason, that.reason); + } + + @Override + public int hashCode() { + return Objects.hash(name, mappingTypes, isIncluded, isRequired, featureType, reason); + } + + public String getName() { + return name; + } + + public Set getMappingTypes() { + return mappingTypes; + } + + public boolean isIncluded() { + return isIncluded; + } + + public boolean isRequired() { + return isRequired; + } + + @Nullable + public FeatureType getFeatureType() { + return featureType; + } + + @Nullable + public String getReason() { + return reason; + } +} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java new file mode 100644 index 0000000000000..7972c6a9ee0a2 --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java @@ -0,0 +1,103 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.dataframe.explain; + +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Objects; + +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; + +public class MemoryEstimation implements ToXContentObject, Writeable { + + public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk"); + public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk"); + + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("memory_estimation", + a -> new MemoryEstimation((ByteSizeValue) a[0], (ByteSizeValue) a[1])); + + static { + PARSER.declareField( + optionalConstructorArg(), + (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName()), + EXPECTED_MEMORY_WITHOUT_DISK, + ObjectParser.ValueType.VALUE); + PARSER.declareField( + optionalConstructorArg(), + (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITH_DISK.getPreferredName()), + EXPECTED_MEMORY_WITH_DISK, + ObjectParser.ValueType.VALUE); + } + + private final ByteSizeValue expectedMemoryWithoutDisk; + private final ByteSizeValue expectedMemoryWithDisk; + + public MemoryEstimation(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) { + this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk; + this.expectedMemoryWithDisk = expectedMemoryWithDisk; + } + + public MemoryEstimation(StreamInput in) throws IOException { + this.expectedMemoryWithoutDisk = in.readOptionalWriteable(ByteSizeValue::new); + this.expectedMemoryWithDisk = in.readOptionalWriteable(ByteSizeValue::new); + } + + public ByteSizeValue getExpectedMemoryWithoutDisk() { + return expectedMemoryWithoutDisk; + } + + public ByteSizeValue getExpectedMemoryWithDisk() { + return expectedMemoryWithDisk; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeOptionalWriteable(expectedMemoryWithoutDisk); + out.writeOptionalWriteable(expectedMemoryWithDisk); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + if (expectedMemoryWithoutDisk != null) { + builder.field(EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName(), expectedMemoryWithoutDisk.getStringRep()); + } + if (expectedMemoryWithDisk != null) { + builder.field(EXPECTED_MEMORY_WITH_DISK.getPreferredName(), expectedMemoryWithDisk.getStringRep()); + } + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + + MemoryEstimation that = (MemoryEstimation) other; + return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk) + && Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk); + } + + @Override + public int hashCode() { + return Objects.hash(expectedMemoryWithoutDisk, expectedMemoryWithDisk); + } +} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java deleted file mode 100644 index 1bc8d8970eae1..0000000000000 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.core.ml.action; - -import org.elasticsearch.common.io.stream.Writeable; -import org.elasticsearch.common.unit.ByteSizeUnit; -import org.elasticsearch.common.unit.ByteSizeValue; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.test.AbstractSerializingTestCase; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction.Response; - -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.nullValue; - -public class EstimateMemoryUsageActionResponseTests extends AbstractSerializingTestCase { - - @Override - protected Response createTestInstance() { - return new Response( - randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null, - randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null); - } - - @Override - protected Writeable.Reader instanceReader() { - return Response::new; - } - - @Override - protected Response doParseInstance(XContentParser parser) { - return Response.PARSER.apply(parser, null); - } - - public void testConstructor_NullValues() { - Response response = new Response(null, null); - assertThat(response.getExpectedMemoryWithoutDisk(), nullValue()); - assertThat(response.getExpectedMemoryWithDisk(), nullValue()); - } - - public void testConstructor_SmallValues() { - Response response = new Response(new ByteSizeValue(120, ByteSizeUnit.KB), new ByteSizeValue(30, ByteSizeUnit.KB)); - assertThat(response.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(120, ByteSizeUnit.KB))); - assertThat(response.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(30, ByteSizeUnit.KB))); - } - - public void testConstructor() { - Response response = new Response(new ByteSizeValue(20, ByteSizeUnit.MB), new ByteSizeValue(10, ByteSizeUnit.MB)); - assertThat(response.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(20, ByteSizeUnit.MB))); - assertThat(response.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(10, ByteSizeUnit.MB))); - } -} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java new file mode 100644 index 0000000000000..ea1aca3916cb9 --- /dev/null +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.action; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractSerializingTestCase; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction.Response; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelectionTests; +import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation; +import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimationTests; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.IntStream; + +public class ExplainDataFrameAnalyticsActionResponseTests extends AbstractSerializingTestCase { + + @Override + protected Response createTestInstance() { + int fieldSelectionCount = randomIntBetween(1, 5); + List fieldSelection = new ArrayList<>(fieldSelectionCount); + IntStream.of(fieldSelectionCount).forEach(i -> fieldSelection.add(FieldSelectionTests.createRandom())); + MemoryEstimation memoryEstimation = MemoryEstimationTests.createRandom(); + + return new Response(fieldSelection, memoryEstimation); + } + + @Override + protected Writeable.Reader instanceReader() { + return Response::new; + } + + @Override + protected Response doParseInstance(XContentParser parser) { + return Response.PARSER.apply(parser, null); + } +} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java index 3266f488daf4a..d8c52c839026f 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java @@ -279,32 +279,32 @@ public void testExplicitModelMemoryLimitTooHigh() { assertThat(e.getMessage(), containsString("must be less than the value of the xpack.ml.max_model_memory_limit setting")); } - public void testBuildForMemoryEstimation() { + public void testBuildForExplain() { DataFrameAnalyticsConfig.Builder builder = createRandomBuilder("foo"); - DataFrameAnalyticsConfig config = builder.buildForMemoryEstimation(); + DataFrameAnalyticsConfig config = builder.buildForExplain(); assertThat(config, equalTo(builder.build())); } - public void testBuildForMemoryEstimation_MissingId() { + public void testBuildForExplain_MissingId() { DataFrameAnalyticsConfig.Builder builder = new DataFrameAnalyticsConfig.Builder() .setAnalysis(OutlierDetectionTests.createRandom()) .setSource(DataFrameAnalyticsSourceTests.createRandom()) .setDest(DataFrameAnalyticsDestTests.createRandom()); - DataFrameAnalyticsConfig config = builder.buildForMemoryEstimation(); + DataFrameAnalyticsConfig config = builder.buildForExplain(); assertThat(config.getId(), equalTo("dummy")); } - public void testBuildForMemoryEstimation_MissingDest() { + public void testBuildForExplain_MissingDest() { DataFrameAnalyticsConfig.Builder builder = new DataFrameAnalyticsConfig.Builder() .setId("foo") .setAnalysis(OutlierDetectionTests.createRandom()) .setSource(DataFrameAnalyticsSourceTests.createRandom()); - DataFrameAnalyticsConfig config = builder.buildForMemoryEstimation(); + DataFrameAnalyticsConfig config = builder.buildForExplain(); assertThat(config.getDest().getIndex(), equalTo("dummy")); } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelectionTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelectionTests.java new file mode 100644 index 0000000000000..2c8a8fde39ad2 --- /dev/null +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelectionTests.java @@ -0,0 +1,45 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.dataframe.explain; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractSerializingTestCase; + +import java.io.IOException; +import java.util.Set; +import java.util.stream.Collectors; + +public class FieldSelectionTests extends AbstractSerializingTestCase { + + public static FieldSelection createRandom() { + Set mappingTypes = randomSubsetOf(randomIntBetween(1, 3), "int", "float", "double", "text", "keyword", "ip") + .stream().collect(Collectors.toSet()); + FieldSelection.FeatureType featureType = randomBoolean() ? null : randomFrom(FieldSelection.FeatureType.values()); + String reason = randomBoolean() ? null : randomAlphaOfLength(20); + return new FieldSelection(randomAlphaOfLength(10), + mappingTypes, + randomBoolean(), + randomBoolean(), + featureType, + reason); + } + + @Override + protected FieldSelection createTestInstance() { + return createRandom(); + } + + @Override + protected FieldSelection doParseInstance(XContentParser parser) throws IOException { + return FieldSelection.PARSER.apply(parser, null); + } + + @Override + protected Writeable.Reader instanceReader() { + return FieldSelection::new; + } +} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java new file mode 100644 index 0000000000000..dc9e20bd86a8c --- /dev/null +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java @@ -0,0 +1,61 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.dataframe.explain; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.unit.ByteSizeUnit; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractSerializingTestCase; + +import java.io.IOException; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.nullValue; + +public class MemoryEstimationTests extends AbstractSerializingTestCase { + + public static MemoryEstimation createRandom() { + return new MemoryEstimation( + randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null, + randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null); + } + + @Override + protected MemoryEstimation createTestInstance() { + return createRandom(); + } + + @Override + protected Writeable.Reader instanceReader() { + return MemoryEstimation::new; + } + + @Override + protected MemoryEstimation doParseInstance(XContentParser parser) throws IOException { + return MemoryEstimation.PARSER.apply(parser, null); + } + + public void testConstructor_NullValues() { + MemoryEstimation memoryEstimation = new MemoryEstimation(null, null); + assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), nullValue()); + assertThat(memoryEstimation.getExpectedMemoryWithDisk(), nullValue()); + } + + public void testConstructor_SmallValues() { + MemoryEstimation memoryEstimation = new MemoryEstimation( + new ByteSizeValue(120, ByteSizeUnit.KB), new ByteSizeValue(30, ByteSizeUnit.KB)); + assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(120, ByteSizeUnit.KB))); + assertThat(memoryEstimation.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(30, ByteSizeUnit.KB))); + } + + public void testConstructor() { + MemoryEstimation memoryEstimation = new MemoryEstimation( + new ByteSizeValue(20, ByteSizeUnit.MB), new ByteSizeValue(10, ByteSizeUnit.MB)); + assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(20, ByteSizeUnit.MB))); + assertThat(memoryEstimation.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(10, ByteSizeUnit.MB))); + } +} diff --git a/x-pack/plugin/ml/qa/ml-with-security/build.gradle b/x-pack/plugin/ml/qa/ml-with-security/build.gradle index 961dc944ea7d7..38beb1d1908c1 100644 --- a/x-pack/plugin/ml/qa/ml-with-security/build.gradle +++ b/x-pack/plugin/ml/qa/ml-with-security/build.gradle @@ -92,7 +92,6 @@ integTest.runner { 'ml/data_frame_analytics_crud/Test put classification given num_top_classes is greater than 1k', 'ml/data_frame_analytics_crud/Test put classification given training_percent is less than one', 'ml/data_frame_analytics_crud/Test put classification given training_percent is greater than hundred', - 'ml/data_frame_analytics_memory_usage_estimation/Test memory usage estimation for empty data frame', 'ml/evaluate_data_frame/Test given missing index', 'ml/evaluate_data_frame/Test given index does not exist', 'ml/evaluate_data_frame/Test given missing evaluation', @@ -113,6 +112,10 @@ integTest.runner { 'ml/evaluate_data_frame/Test regression given evaluation with empty metrics', 'ml/evaluate_data_frame/Test regression given missing actual_field', 'ml/evaluate_data_frame/Test regression given missing predicted_field', + 'ml/explain_data_frame_analytics/Test neither job id nor body', + 'ml/explain_data_frame_analytics/Test both job id and body', + 'ml/explain_data_frame_analytics/Test missing job', + 'ml/explain_data_frame_analytics/Test empty data frame given body', 'ml/delete_job_force/Test cannot force delete a non-existent job', 'ml/delete_model_snapshot/Test delete snapshot missing snapshotId', 'ml/delete_model_snapshot/Test delete snapshot missing job_id', diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 964bc719cbdfa..0293a36747311 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -65,6 +65,7 @@ import org.elasticsearch.xpack.core.ml.MachineLearningField; import org.elasticsearch.xpack.core.ml.MlMetaIndex; import org.elasticsearch.xpack.core.ml.action.CloseJobAction; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction; import org.elasticsearch.xpack.core.ml.action.DeleteCalendarAction; import org.elasticsearch.xpack.core.ml.action.DeleteCalendarEventAction; import org.elasticsearch.xpack.core.ml.action.DeleteDataFrameAnalyticsAction; @@ -75,7 +76,6 @@ import org.elasticsearch.xpack.core.ml.action.DeleteJobAction; import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction; import org.elasticsearch.xpack.core.ml.action.DeleteTrainedModelAction; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction; import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction; import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction; import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; @@ -98,8 +98,8 @@ import org.elasticsearch.xpack.core.ml.action.GetRecordsAction; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsStatsAction; -import org.elasticsearch.xpack.core.ml.action.IsolateDatafeedAction; import org.elasticsearch.xpack.core.ml.action.InternalInferModelAction; +import org.elasticsearch.xpack.core.ml.action.IsolateDatafeedAction; import org.elasticsearch.xpack.core.ml.action.KillProcessAction; import org.elasticsearch.xpack.core.ml.action.MlInfoAction; import org.elasticsearch.xpack.core.ml.action.OpenJobAction; @@ -136,6 +136,7 @@ import org.elasticsearch.xpack.core.ml.notifications.AuditorField; import org.elasticsearch.xpack.core.template.TemplateUtils; import org.elasticsearch.xpack.ml.action.TransportCloseJobAction; +import org.elasticsearch.xpack.ml.action.TransportExplainDataFrameAnalyticsAction; import org.elasticsearch.xpack.ml.action.TransportDeleteCalendarAction; import org.elasticsearch.xpack.ml.action.TransportDeleteCalendarEventAction; import org.elasticsearch.xpack.ml.action.TransportDeleteDataFrameAnalyticsAction; @@ -146,7 +147,6 @@ import org.elasticsearch.xpack.ml.action.TransportDeleteJobAction; import org.elasticsearch.xpack.ml.action.TransportDeleteModelSnapshotAction; import org.elasticsearch.xpack.ml.action.TransportDeleteTrainedModelAction; -import org.elasticsearch.xpack.ml.action.TransportEstimateMemoryUsageAction; import org.elasticsearch.xpack.ml.action.TransportEvaluateDataFrameAction; import org.elasticsearch.xpack.ml.action.TransportFinalizeJobExecutionAction; import org.elasticsearch.xpack.ml.action.TransportFindFileStructureAction; @@ -167,9 +167,9 @@ import org.elasticsearch.xpack.ml.action.TransportGetModelSnapshotsAction; import org.elasticsearch.xpack.ml.action.TransportGetOverallBucketsAction; import org.elasticsearch.xpack.ml.action.TransportGetRecordsAction; +import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsAction; import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsStatsAction; import org.elasticsearch.xpack.ml.action.TransportInternalInferModelAction; -import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsAction; import org.elasticsearch.xpack.ml.action.TransportIsolateDatafeedAction; import org.elasticsearch.xpack.ml.action.TransportKillProcessAction; import org.elasticsearch.xpack.ml.action.TransportMlInfoAction; @@ -258,8 +258,8 @@ import org.elasticsearch.xpack.ml.rest.datafeeds.RestStartDatafeedAction; import org.elasticsearch.xpack.ml.rest.datafeeds.RestStopDatafeedAction; import org.elasticsearch.xpack.ml.rest.datafeeds.RestUpdateDatafeedAction; +import org.elasticsearch.xpack.ml.rest.dataframe.RestExplainDataFrameAnalyticsAction; import org.elasticsearch.xpack.ml.rest.dataframe.RestDeleteDataFrameAnalyticsAction; -import org.elasticsearch.xpack.ml.rest.dataframe.RestEstimateMemoryUsageAction; import org.elasticsearch.xpack.ml.rest.dataframe.RestEvaluateDataFrameAction; import org.elasticsearch.xpack.ml.rest.dataframe.RestGetDataFrameAnalyticsAction; import org.elasticsearch.xpack.ml.rest.dataframe.RestGetDataFrameAnalyticsStatsAction; @@ -759,7 +759,7 @@ public List getRestHandlers(Settings settings, RestController restC new RestStartDataFrameAnalyticsAction(restController), new RestStopDataFrameAnalyticsAction(restController), new RestEvaluateDataFrameAction(restController), - new RestEstimateMemoryUsageAction(restController), + new RestExplainDataFrameAnalyticsAction(restController), new RestGetTrainedModelsAction(restController), new RestDeleteTrainedModelAction(restController), new RestGetTrainedModelsStatsAction(restController) @@ -829,7 +829,7 @@ public List getRestHandlers(Settings settings, RestController restC new ActionHandler<>(StartDataFrameAnalyticsAction.INSTANCE, TransportStartDataFrameAnalyticsAction.class), new ActionHandler<>(StopDataFrameAnalyticsAction.INSTANCE, TransportStopDataFrameAnalyticsAction.class), new ActionHandler<>(EvaluateDataFrameAction.INSTANCE, TransportEvaluateDataFrameAction.class), - new ActionHandler<>(EstimateMemoryUsageAction.INSTANCE, TransportEstimateMemoryUsageAction.class), + new ActionHandler<>(ExplainDataFrameAnalyticsAction.INSTANCE, TransportExplainDataFrameAnalyticsAction.class), new ActionHandler<>(InternalInferModelAction.INSTANCE, TransportInternalInferModelAction.class), new ActionHandler<>(GetTrainedModelsAction.INSTANCE, TransportGetTrainedModelsAction.class), new ActionHandler<>(DeleteTrainedModelAction.INSTANCE, TransportDeleteTrainedModelAction.class), diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java deleted file mode 100644 index a82db7c4f97f0..0000000000000 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.ml.action; - -import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.ActionListenerResponseHandler; -import org.elasticsearch.action.support.ActionFilters; -import org.elasticsearch.action.support.HandledTransportAction; -import org.elasticsearch.client.node.NodeClient; -import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.service.ClusterService; -import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.tasks.Task; -import org.elasticsearch.transport.TransportService; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction; -import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction; -import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; -import org.elasticsearch.xpack.ml.MachineLearning; -import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory; -import org.elasticsearch.xpack.ml.dataframe.process.MemoryUsageEstimationProcessManager; - -import java.util.Objects; -import java.util.Optional; - -/** - * Estimates memory usage for the given data frame analytics spec. - * Redirects to a different node if the current node is *not* an ML node. - */ -public class TransportEstimateMemoryUsageAction - extends HandledTransportAction { - - private final TransportService transportService; - private final ClusterService clusterService; - private final NodeClient client; - private final MemoryUsageEstimationProcessManager processManager; - - @Inject - public TransportEstimateMemoryUsageAction(TransportService transportService, - ActionFilters actionFilters, - ClusterService clusterService, - NodeClient client, - MemoryUsageEstimationProcessManager processManager) { - super(EstimateMemoryUsageAction.NAME, transportService, actionFilters, PutDataFrameAnalyticsAction.Request::new); - this.transportService = transportService; - this.clusterService = Objects.requireNonNull(clusterService); - this.client = Objects.requireNonNull(client); - this.processManager = Objects.requireNonNull(processManager); - } - - @Override - protected void doExecute(Task task, - PutDataFrameAnalyticsAction.Request request, - ActionListener listener) { - DiscoveryNode localNode = clusterService.localNode(); - if (MachineLearning.isMlNode(localNode)) { - doEstimateMemoryUsage(createTaskIdForMemoryEstimation(task), request, listener); - } else { - redirectToMlNode(request, listener); - } - } - - /** - * Creates unique task id for the memory estimation process. This id is useful when logging. - */ - private static String createTaskIdForMemoryEstimation(Task task) { - return "memory_usage_estimation_" + task.getId(); - } - - /** - * Performs memory usage estimation. - * Memory usage estimation spawns an ML C++ process which is only available on ML nodes. That's why this method can only be called on - * the ML node. - */ - private void doEstimateMemoryUsage(String taskId, - PutDataFrameAnalyticsAction.Request request, - ActionListener listener) { - DataFrameDataExtractorFactory.createForSourceIndices( - client, - taskId, - true, // We are not interested in first-time run validations here - request.getConfig(), - ActionListener.wrap( - dataExtractorFactory -> { - processManager.runJobAsync( - taskId, - request.getConfig(), - dataExtractorFactory, - ActionListener.wrap( - result -> listener.onResponse( - new EstimateMemoryUsageAction.Response( - result.getExpectedMemoryWithoutDisk(), result.getExpectedMemoryWithDisk())), - listener::onFailure - ) - ); - }, - listener::onFailure - ) - ); - } - - /** - * Finds the first available ML node in the cluster and redirects the request to this node. - */ - private void redirectToMlNode(PutDataFrameAnalyticsAction.Request request, - ActionListener listener) { - Optional node = findMlNode(clusterService.state()); - if (node.isPresent()) { - transportService.sendRequest( - node.get(), actionName, request, new ActionListenerResponseHandler<>(listener, EstimateMemoryUsageAction.Response::new)); - } else { - listener.onFailure(ExceptionsHelper.badRequestException("No ML node to run on")); - } - } - - /** - * Finds the first available ML node in the cluster state. - */ - private static Optional findMlNode(ClusterState clusterState) { - for (DiscoveryNode node : clusterState.getNodes()) { - if (MachineLearning.isMlNode(node)) { - return Optional.of(node); - } - } - return Optional.empty(); - } -} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java new file mode 100644 index 0000000000000..7f19deb8d5ba0 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java @@ -0,0 +1,156 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.action; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionListenerResponseHandler; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.HandledTransportAction; +import org.elasticsearch.client.node.NodeClient; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.license.LicenseUtils; +import org.elasticsearch.license.XPackLicenseState; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.transport.TransportService; +import org.elasticsearch.xpack.core.XPackField; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction; +import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation; +import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; +import org.elasticsearch.xpack.ml.MachineLearning; +import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory; +import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetector; +import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetectorFactory; +import org.elasticsearch.xpack.ml.dataframe.process.MemoryUsageEstimationProcessManager; +import org.elasticsearch.xpack.ml.extractor.ExtractedFields; + +import java.util.List; +import java.util.Objects; +import java.util.Optional; + +/** + * Provides explanations on aspects of the given data frame analytics spec like memory estimation, field selection, etc. + * Redirects to a different node if the current node is *not* an ML node. + */ +public class TransportExplainDataFrameAnalyticsAction + extends HandledTransportAction { + + private final XPackLicenseState licenseState; + private final TransportService transportService; + private final ClusterService clusterService; + private final NodeClient client; + private final MemoryUsageEstimationProcessManager processManager; + + @Inject + public TransportExplainDataFrameAnalyticsAction(TransportService transportService, + ActionFilters actionFilters, + ClusterService clusterService, + NodeClient client, + XPackLicenseState licenseState, + MemoryUsageEstimationProcessManager processManager) { + super(ExplainDataFrameAnalyticsAction.NAME, transportService, actionFilters, PutDataFrameAnalyticsAction.Request::new); + this.transportService = transportService; + this.clusterService = Objects.requireNonNull(clusterService); + this.client = Objects.requireNonNull(client); + this.licenseState = licenseState; + this.processManager = Objects.requireNonNull(processManager); + } + + @Override + protected void doExecute(Task task, + PutDataFrameAnalyticsAction.Request request, + ActionListener listener) { + if (licenseState.isMachineLearningAllowed() == false) { + listener.onFailure(LicenseUtils.newComplianceException(XPackField.MACHINE_LEARNING)); + return; + } + + DiscoveryNode localNode = clusterService.localNode(); + if (MachineLearning.isMlNode(localNode)) { + explain(task, request, listener); + } else { + redirectToMlNode(request, listener); + } + } + + private void explain(Task task, PutDataFrameAnalyticsAction.Request request, + ActionListener listener) { + ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client); + extractedFieldsDetectorFactory.createFromSource(request.getConfig(), true, ActionListener.wrap( + extractedFieldsDetector -> { + explain(task, request, extractedFieldsDetector, listener); + }, + listener::onFailure + )); + } + + private void explain(Task task, PutDataFrameAnalyticsAction.Request request, ExtractedFieldsDetector extractedFieldsDetector, + ActionListener listener) { + Tuple> fieldExtraction = extractedFieldsDetector.detect(); + + ActionListener memoryEstimationListener = ActionListener.wrap( + memoryEstimation -> listener.onResponse(new ExplainDataFrameAnalyticsAction.Response(fieldExtraction.v2(), memoryEstimation)), + listener::onFailure + ); + + estimateMemoryUsage(task, request, fieldExtraction.v1(), memoryEstimationListener); + } + + /** + * Performs memory usage estimation. + * Memory usage estimation spawns an ML C++ process which is only available on ML nodes. That's why this method can only be called on + * the ML node. + */ + private void estimateMemoryUsage(Task task, + PutDataFrameAnalyticsAction.Request request, + ExtractedFields extractedFields, + ActionListener listener) { + final String estimateMemoryTaskId = "memory_usage_estimation_" + task.getId(); + DataFrameDataExtractorFactory extractorFactory = DataFrameDataExtractorFactory.createForSourceIndices( + client, estimateMemoryTaskId, request.getConfig(), extractedFields); + processManager.runJobAsync( + estimateMemoryTaskId, + request.getConfig(), + extractorFactory, + ActionListener.wrap( + result -> listener.onResponse( + new MemoryEstimation(result.getExpectedMemoryWithoutDisk(), result.getExpectedMemoryWithDisk())), + listener::onFailure + ) + ); + } + + /** + * Finds the first available ML node in the cluster and redirects the request to this node. + */ + private void redirectToMlNode(PutDataFrameAnalyticsAction.Request request, + ActionListener listener) { + Optional node = findMlNode(clusterService.state()); + if (node.isPresent()) { + transportService.sendRequest(node.get(), actionName, request, + new ActionListenerResponseHandler<>(listener, ExplainDataFrameAnalyticsAction.Response::new)); + } else { + listener.onFailure(ExceptionsHelper.badRequestException("No ML node to run on")); + } + } + + /** + * Finds the first available ML node in the cluster state. + */ + private static Optional findMlNode(ClusterState clusterState) { + for (DiscoveryNode node : clusterState.getNodes()) { + if (MachineLearning.isMlNode(node)) { + return Optional.of(node); + } + } + return Optional.empty(); + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java index 1740a7fb53247..af67750ee6dc5 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java @@ -29,6 +29,7 @@ import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.IndexNotFoundException; import org.elasticsearch.license.LicenseUtils; @@ -47,7 +48,7 @@ import org.elasticsearch.xpack.core.XPackField; import org.elasticsearch.xpack.core.ml.MlMetadata; import org.elasticsearch.xpack.core.ml.MlTasks; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction; import org.elasticsearch.xpack.core.ml.action.GetDataFrameAnalyticsStatsAction; import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction; import org.elasticsearch.xpack.core.ml.action.StartDataFrameAnalyticsAction; @@ -66,6 +67,7 @@ import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory; import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetectorFactory; import org.elasticsearch.xpack.ml.dataframe.persistence.DataFrameAnalyticsConfigProvider; +import org.elasticsearch.xpack.ml.extractor.ExtractedFields; import org.elasticsearch.xpack.ml.job.JobNodeSelector; import org.elasticsearch.xpack.ml.notifications.DataFrameAnalyticsAuditor; import org.elasticsearch.xpack.ml.process.MlMemoryTracker; @@ -190,20 +192,18 @@ private void estimateMemoryUsageAndUpdateMemoryTracker(StartContext startContext final String jobId = startContext.config.getId(); // Tell the job tracker to refresh the memory requirement for this job and all other jobs that have persistent tasks - ActionListener estimateMemoryUsageListener = ActionListener.wrap( - estimateMemoryUsageResponse -> { - auditor.info( - jobId, - Messages.getMessage( - Messages.DATA_FRAME_ANALYTICS_AUDIT_ESTIMATED_MEMORY_USAGE, - estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk())); + ActionListener explainListener = ActionListener.wrap( + explainResponse -> { + ByteSizeValue expectedMemoryWithoutDisk = explainResponse.getMemoryEstimation().getExpectedMemoryWithoutDisk(); + auditor.info(jobId, + Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_AUDIT_ESTIMATED_MEMORY_USAGE, expectedMemoryWithoutDisk)); // Validate that model memory limit is sufficient to run the analysis if (startContext.config.getModelMemoryLimit() - .compareTo(estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk()) < 0) { + .compareTo(expectedMemoryWithoutDisk) < 0) { ElasticsearchStatusException e = ExceptionsHelper.badRequestException( "Cannot start because the configured model memory limit [{}] is lower than the expected memory usage [{}]", - startContext.config.getModelMemoryLimit(), estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk()); + startContext.config.getModelMemoryLimit(), expectedMemoryWithoutDisk); listener.onFailure(e); return; } @@ -215,13 +215,13 @@ private void estimateMemoryUsageAndUpdateMemoryTracker(StartContext startContext listener::onFailure ); - PutDataFrameAnalyticsAction.Request estimateMemoryUsageRequest = new PutDataFrameAnalyticsAction.Request(startContext.config); + PutDataFrameAnalyticsAction.Request explainRequest = new PutDataFrameAnalyticsAction.Request(startContext.config); ClientHelper.executeAsyncWithOrigin( client, ClientHelper.ML_ORIGIN, - EstimateMemoryUsageAction.INSTANCE, - estimateMemoryUsageRequest, - estimateMemoryUsageListener); + ExplainDataFrameAnalyticsAction.INSTANCE, + explainRequest, + explainListener); } @@ -277,7 +277,11 @@ private void getStartContext(String id, ActionListener finalListen // Validate extraction is possible boolean isTaskRestarting = startContext.startingState != DataFrameAnalyticsTask.StartingState.FIRST_TIME; new ExtractedFieldsDetectorFactory(client).createFromSource(startContext.config, isTaskRestarting, ActionListener.wrap( - extractedFieldsDetector -> toValidateDestEmptyListener.onResponse(startContext), finalListener::onFailure)); + extractedFieldsDetector -> { + startContext.extractedFields = extractedFieldsDetector.detect().v1(); + toValidateDestEmptyListener.onResponse(startContext); + }, + finalListener::onFailure)); }, finalListener::onFailure ); @@ -294,33 +298,27 @@ private void getStartContext(String id, ActionListener finalListen } private void validateSourceIndexHasRows(StartContext startContext, ActionListener listener) { - boolean isTaskRestarting = startContext.startingState != DataFrameAnalyticsTask.StartingState.FIRST_TIME; - DataFrameDataExtractorFactory.createForSourceIndices(client, + DataFrameDataExtractorFactory extractorFactory = DataFrameDataExtractorFactory.createForSourceIndices(client, "validate_source_index_has_rows-" + startContext.config.getId(), - isTaskRestarting, startContext.config, - ActionListener.wrap( - dataFrameDataExtractorFactory -> - dataFrameDataExtractorFactory - .newExtractor(false) - .collectDataSummaryAsync(ActionListener.wrap( - dataSummary -> { - if (dataSummary.rows == 0) { - listener.onFailure(ExceptionsHelper.badRequestException( - "Unable to start {} as no documents in the source indices [{}] contained all the fields " - + "selected for analysis. If you are relying on automatic field selection then there are " - + "currently mapped fields that do not exist in any indexed documents, and you will have " - + "to switch to explicit field selection and include only fields that exist in indexed " - + "documents.", - startContext.config.getId(), - Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex()) - )); - } else { - listener.onResponse(startContext); - } - }, - listener::onFailure - )), + startContext.extractedFields); + extractorFactory.newExtractor(false) + .collectDataSummaryAsync(ActionListener.wrap( + dataSummary -> { + if (dataSummary.rows == 0) { + listener.onFailure(ExceptionsHelper.badRequestException( + "Unable to start {} as no documents in the source indices [{}] contained all the fields " + + "selected for analysis. If you are relying on automatic field selection then there are " + + "currently mapped fields that do not exist in any indexed documents, and you will have " + + "to switch to explicit field selection and include only fields that exist in indexed " + + "documents.", + startContext.config.getId(), + Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex()) + )); + } else { + listener.onResponse(startContext); + } + }, listener::onFailure )); } @@ -402,6 +400,7 @@ private static class StartContext { private final DataFrameAnalyticsConfig config; private final List progressOnStart; private final DataFrameAnalyticsTask.StartingState startingState; + private volatile ExtractedFields extractedFields; private StartContext(DataFrameAnalyticsConfig config, List progressOnStart) { this.config = config; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java index ce21973ca9130..f8afd22909831 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java @@ -29,7 +29,7 @@ public class DataFrameDataExtractorFactory { private final Map headers; private final boolean includeRowsWithMissingValues; - private DataFrameDataExtractorFactory(Client client, String analyticsId, List indices, ExtractedFields extractedFields, + public DataFrameDataExtractorFactory(Client client, String analyticsId, List indices, ExtractedFields extractedFields, Map headers, boolean includeRowsWithMissingValues) { this.client = Objects.requireNonNull(client); this.analyticsId = Objects.requireNonNull(analyticsId); @@ -66,32 +66,19 @@ private QueryBuilder allExtractedFieldsExistQuery() { } /** - * Validate and create a new extractor factory + * Create a new extractor factory * * The source index must exist and contain at least 1 compatible field or validations will fail. * * @param client ES Client used to make calls against the cluster * @param taskId The task id - * @param isTaskRestarting Whether the task is restarting or it is running for the first time * @param config The config from which to create the extractor factory - * @param listener The listener to notify on creation or failure + * @param extractedFields The fields to extract */ - public static void createForSourceIndices(Client client, - String taskId, - boolean isTaskRestarting, - DataFrameAnalyticsConfig config, - ActionListener listener) { - ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client); - extractedFieldsDetectorFactory.createFromSource(config, isTaskRestarting, ActionListener.wrap( - extractedFieldsDetector -> { - ExtractedFields extractedFields = extractedFieldsDetector.detect(); - DataFrameDataExtractorFactory extractorFactory = new DataFrameDataExtractorFactory(client, taskId, - Arrays.asList(config.getSource().getIndex()), extractedFields, config.getHeaders(), - config.getAnalysis().supportsMissingValues()); - listener.onResponse(extractorFactory); - }, - listener::onFailure - )); + public static DataFrameDataExtractorFactory createForSourceIndices(Client client, String taskId, DataFrameAnalyticsConfig config, + ExtractedFields extractedFields) { + return new DataFrameDataExtractorFactory(client, taskId, Arrays.asList(config.getSource().getIndex()), extractedFields, + config.getHeaders(), config.getAnalysis().supportsMissingValues()); } /** @@ -111,7 +98,7 @@ public static void createForDestinationIndex(Client client, ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client); extractedFieldsDetectorFactory.createFromDest(config, isTaskRestarting, ActionListener.wrap( extractedFieldsDetector -> { - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + ExtractedFields extractedFields = extractedFieldsDetector.detect().v1(); DataFrameDataExtractorFactory extractorFactory = new DataFrameDataExtractorFactory(client, config.getId(), Collections.singletonList(config.getDest().getIndex()), extractedFields, config.getHeaders(), config.getAnalysis().supportsMissingValues()); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java index 5d94b57aca584..682cc94433c60 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java @@ -11,6 +11,7 @@ import org.elasticsearch.action.fieldcaps.FieldCapabilities; import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.BooleanFieldMapper; @@ -19,6 +20,7 @@ import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsDest; import org.elasticsearch.xpack.core.ml.dataframe.analyses.RequiredField; import org.elasticsearch.xpack.core.ml.dataframe.analyses.Types; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection; import org.elasticsearch.xpack.core.ml.job.messages.Messages; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; import org.elasticsearch.xpack.core.ml.utils.NameResolver; @@ -29,13 +31,12 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.HashSet; +import java.util.Comparator; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Optional; import java.util.Set; import java.util.TreeSet; import java.util.stream.Collectors; @@ -57,9 +58,8 @@ public class ExtractedFieldsDetector { private final FieldCapabilitiesResponse fieldCapabilitiesResponse; private final Map fieldCardinalities; - ExtractedFieldsDetector(String[] index, DataFrameAnalyticsConfig config, boolean isTaskRestarting, - int docValueFieldsLimit, FieldCapabilitiesResponse fieldCapabilitiesResponse, - Map fieldCardinalities) { + ExtractedFieldsDetector(String[] index, DataFrameAnalyticsConfig config, boolean isTaskRestarting, int docValueFieldsLimit, + FieldCapabilitiesResponse fieldCapabilitiesResponse, Map fieldCardinalities) { this.index = Objects.requireNonNull(index); this.config = Objects.requireNonNull(config); this.isTaskRestarting = isTaskRestarting; @@ -68,37 +68,52 @@ public class ExtractedFieldsDetector { this.fieldCardinalities = Objects.requireNonNull(fieldCardinalities); } - public ExtractedFields detect() { - Set fields = getIncludedFields(); - - if (fields.isEmpty()) { - throw ExceptionsHelper.badRequestException("No compatible fields could be detected in index {}. Supported types are {}.", - Arrays.toString(index), - getSupportedTypes()); - } - - checkNoIgnoredFields(fields); + public Tuple> detect() { + TreeSet fieldSelection = new TreeSet<>(Comparator.comparing(FieldSelection::getName)); + Set fields = getIncludedFields(fieldSelection); checkFieldsHaveCompatibleTypes(fields); checkRequiredFields(fields); checkFieldsWithCardinalityLimit(); - return detectExtractedFields(fields); + ExtractedFields extractedFields = detectExtractedFields(fields, fieldSelection); + addIncludedFields(extractedFields, fieldSelection); + + return Tuple.tuple(extractedFields, Collections.unmodifiableList(new ArrayList<>(fieldSelection))); } - private Set getIncludedFields() { - Set fields = new HashSet<>(fieldCapabilitiesResponse.get().keySet()); + private Set getIncludedFields(Set fieldSelection) { + Set fields = new TreeSet<>(fieldCapabilitiesResponse.get().keySet()); + fields.removeAll(IGNORE_FIELDS); checkResultsFieldIsNotPresent(); removeFieldsUnderResultsField(fields); FetchSourceContext analyzedFields = config.getAnalyzedFields(); // If the user has not explicitly included fields we'll include all compatible fields if (analyzedFields == null || analyzedFields.includes().length == 0) { - fields.removeAll(IGNORE_FIELDS); - removeFieldsWithIncompatibleTypes(fields); + removeFieldsWithIncompatibleTypes(fields, fieldSelection); } - includeAndExcludeFields(fields); + includeAndExcludeFields(fields, fieldSelection); + + if (fields.isEmpty()) { + throw ExceptionsHelper.badRequestException("No compatible fields could be detected in index {}. Supported types are {}.", + Arrays.toString(index), + getSupportedTypes()); + } + return fields; } + private void removeFieldsUnderResultsField(Set fields) { + String resultsField = config.getDest().getResultsField(); + Iterator fieldsIterator = fields.iterator(); + while (fieldsIterator.hasNext()) { + String field = fieldsIterator.next(); + if (field.startsWith(resultsField + ".")) { + fieldsIterator.remove(); + } + } + fields.removeIf(field -> field.startsWith(resultsField + ".")); + } + private void checkResultsFieldIsNotPresent() { // If the task is restarting we do not mind the index containing the results field, we will overwrite all docs if (isTaskRestarting) { @@ -117,16 +132,21 @@ private void checkResultsFieldIsNotPresent() { } } - private void removeFieldsUnderResultsField(Set fields) { - // Ignore fields under the results object - fields.removeIf(field -> field.startsWith(config.getDest().getResultsField() + ".")); + private void addExcludedField(String field, String reason, Set fieldSelection) { + fieldSelection.add(FieldSelection.excluded(field, getMappingTypes(field), reason)); + } + + private Set getMappingTypes(String field) { + Map fieldCaps = fieldCapabilitiesResponse.getField(field); + return fieldCaps == null ? Collections.emptySet() : fieldCaps.keySet(); } - private void removeFieldsWithIncompatibleTypes(Set fields) { + private void removeFieldsWithIncompatibleTypes(Set fields, Set fieldSelection) { Iterator fieldsIterator = fields.iterator(); while (fieldsIterator.hasNext()) { String field = fieldsIterator.next(); if (hasCompatibleType(field) == false) { + addExcludedField(field, "unsupported type; supported types are " + getSupportedTypes(), fieldSelection); fieldsIterator.remove(); } } @@ -163,7 +183,7 @@ private Set getSupportedTypes() { return supportedTypes; } - private void includeAndExcludeFields(Set fields) { + private void includeAndExcludeFields(Set fields, Set fieldSelection) { FetchSourceContext analyzedFields = config.getAnalyzedFields(); if (analyzedFields == null) { return; @@ -188,18 +208,30 @@ private void includeAndExcludeFields(Set fields) { Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_BAD_FIELD_FILTER, ex))) .expand(excludes, true); - fields.retainAll(includedSet); - fields.removeAll(excludedSet); + applyIncludesExcludes(fields, includedSet, excludedSet, fieldSelection); } catch (ResourceNotFoundException ex) { // Re-wrap our exception so that we throw the same exception type when there are no fields. throw ExceptionsHelper.badRequestException(ex.getMessage()); } } - private void checkNoIgnoredFields(Set fields) { - Optional ignoreField = IGNORE_FIELDS.stream().filter(fields::contains).findFirst(); - if (ignoreField.isPresent()) { - throw ExceptionsHelper.badRequestException("field [{}] cannot be analyzed", ignoreField.get()); + private void applyIncludesExcludes(Set fields, Set includes, Set excludes, + Set fieldSelection) { + Iterator fieldsIterator = fields.iterator(); + while (fieldsIterator.hasNext()) { + String field = fieldsIterator.next(); + if (includes.contains(field)) { + if (IGNORE_FIELDS.contains(field)) { + throw ExceptionsHelper.badRequestException("field [{}] cannot be analyzed", field); + } + } else { + fieldsIterator.remove(); + addExcludedField(field, "field not in includes list", fieldSelection); + } + if (excludes.contains(field)) { + fieldsIterator.remove(); + addExcludedField(field, "field in excludes list", fieldSelection); + } } } @@ -247,13 +279,10 @@ private void checkFieldsWithCardinalityLimit() { } } - private ExtractedFields detectExtractedFields(Set fields) { - List sortedFields = new ArrayList<>(fields); - // We sort the fields to ensure the checksum for each document is deterministic - Collections.sort(sortedFields); - ExtractedFields extractedFields = ExtractedFields.build(sortedFields, Collections.emptySet(), fieldCapabilitiesResponse); + private ExtractedFields detectExtractedFields(Set fields, Set fieldSelection) { + ExtractedFields extractedFields = ExtractedFields.build(fields, Collections.emptySet(), fieldCapabilitiesResponse); boolean preferSource = extractedFields.getDocValueFields().size() > docValueFieldsLimit; - extractedFields = deduplicateMultiFields(extractedFields, preferSource); + extractedFields = deduplicateMultiFields(extractedFields, preferSource, fieldSelection); if (preferSource) { extractedFields = fetchFromSourceIfSupported(extractedFields); if (extractedFields.getDocValueFields().size() > docValueFieldsLimit) { @@ -266,7 +295,8 @@ private ExtractedFields detectExtractedFields(Set fields) { return extractedFields; } - private ExtractedFields deduplicateMultiFields(ExtractedFields extractedFields, boolean preferSource) { + private ExtractedFields deduplicateMultiFields(ExtractedFields extractedFields, boolean preferSource, + Set fieldSelection) { Set requiredFields = config.getAnalysis().getRequiredFields().stream().map(RequiredField::getName) .collect(Collectors.toSet()); Map nameOrParentToField = new LinkedHashMap<>(); @@ -276,43 +306,53 @@ private ExtractedFields deduplicateMultiFields(ExtractedFields extractedFields, if (existingField != null) { ExtractedField parent = currentField.isMultiField() ? existingField : currentField; ExtractedField multiField = currentField.isMultiField() ? currentField : existingField; - nameOrParentToField.put(nameOrParent, chooseMultiFieldOrParent(preferSource, requiredFields, parent, multiField)); + nameOrParentToField.put(nameOrParent, + chooseMultiFieldOrParent(preferSource, requiredFields, parent, multiField, fieldSelection)); } } return new ExtractedFields(new ArrayList<>(nameOrParentToField.values())); } - private ExtractedField chooseMultiFieldOrParent(boolean preferSource, Set requiredFields, - ExtractedField parent, ExtractedField multiField) { + private ExtractedField chooseMultiFieldOrParent(boolean preferSource, Set requiredFields, ExtractedField parent, + ExtractedField multiField, Set fieldSelection) { // Check requirements first if (requiredFields.contains(parent.getName())) { + addExcludedField(multiField.getName(), "[" + parent.getName() + "] is required instead", fieldSelection); return parent; } if (requiredFields.contains(multiField.getName())) { + addExcludedField(parent.getName(), "[" + multiField.getName() + "] is required instead", fieldSelection); return multiField; } // If both are multi-fields it means there are several. In this case parent is the previous multi-field // we selected. We'll just keep that. if (parent.isMultiField() && multiField.isMultiField()) { + addExcludedField(multiField.getName(), "[" + parent.getName() + "] came first", fieldSelection); return parent; } // If we prefer source only the parent may support it. If it does we pick it immediately. if (preferSource && parent.supportsFromSource()) { + addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because it supports fetching from source", + fieldSelection); return parent; } // If any of the two is a doc_value field let's prefer it as it'd support aggregations. // We check the parent first as it'd be a shorter field name. if (parent.getMethod() == ExtractedField.Method.DOC_VALUE) { + addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because it is aggregatable", fieldSelection); return parent; } if (multiField.getMethod() == ExtractedField.Method.DOC_VALUE) { + addExcludedField(parent.getName(), "[" + multiField.getName() + "] is preferred because it is aggregatable", fieldSelection); return multiField; } // None is aggregatable. Let's pick the parent for its shorter name. + addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because none of the multi-fields are aggregatable", + fieldSelection); return parent; } @@ -343,6 +383,26 @@ private ExtractedFields fetchBooleanFieldsAsIntegers(ExtractedFields extractedFi return new ExtractedFields(adjusted); } + private void addIncludedFields(ExtractedFields extractedFields, Set fieldSelection) { + Set requiredFields = config.getAnalysis().getRequiredFields().stream().map(RequiredField::getName) + .collect(Collectors.toSet()); + Set categoricalFields = getCategoricalFields(extractedFields); + for (ExtractedField includedField : extractedFields.getAllFields()) { + FieldSelection.FeatureType featureType = categoricalFields.contains(includedField.getName()) ? + FieldSelection.FeatureType.CATEGORICAL : FieldSelection.FeatureType.NUMERICAL; + fieldSelection.add(FieldSelection.included(includedField.getName(), includedField.getTypes(), + requiredFields.contains(includedField.getName()), featureType)); + } + } + + private Set getCategoricalFields(ExtractedFields extractedFields) { + return extractedFields.getAllFields().stream() + .filter(extractedField -> config.getAnalysis().getAllowedCategoricalTypes(extractedField.getName()) + .containsAll(extractedField.getTypes())) + .map(ExtractedField::getName) + .collect(Collectors.toSet()); + } + private static boolean isBoolean(Set types) { return types.size() == 1 && types.contains(BooleanFieldMapper.CONTENT_TYPE); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java index 2e5189eb249eb..6740f8d4d34ca 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java @@ -100,9 +100,9 @@ private MemoryUsageEstimationResult runJob(String jobId, } finally { process.consumeAndCloseOutputStream(); try { - LOGGER.info("[{}] Closing process", jobId); + LOGGER.debug("[{}] Closing process", jobId); process.close(); - LOGGER.info("[{}] Closed process", jobId); + LOGGER.debug("[{}] Closed process", jobId); } catch (Exception e) { String errorMsg = new ParameterizedMessage( diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java deleted file mode 100644 index 25f2bcb4bb872..0000000000000 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.ml.rest.dataframe; - -import org.elasticsearch.client.node.NodeClient; -import org.elasticsearch.rest.BaseRestHandler; -import org.elasticsearch.rest.RestController; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.rest.action.RestToXContentListener; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction; -import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction; -import org.elasticsearch.xpack.ml.MachineLearning; - -import java.io.IOException; - -public class RestEstimateMemoryUsageAction extends BaseRestHandler { - - public RestEstimateMemoryUsageAction(RestController controller) { - controller.registerHandler( - RestRequest.Method.POST, - MachineLearning.BASE_PATH + "data_frame/analytics/_estimate_memory_usage", this); - } - - @Override - public String getName() { - return "ml_estimate_memory_usage_action"; - } - - @Override - protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException { - PutDataFrameAnalyticsAction.Request request = - PutDataFrameAnalyticsAction.Request.parseRequestForMemoryEstimation(restRequest.contentOrSourceParamParser()); - return channel -> client.execute(EstimateMemoryUsageAction.INSTANCE, request, new RestToXContentListener<>(channel)); - } -} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java new file mode 100644 index 0000000000000..b16bf7b3efbf1 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java @@ -0,0 +1,84 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.rest.dataframe; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.client.node.NodeClient; +import org.elasticsearch.common.Strings; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestController; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.action.RestToXContentListener; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction; +import org.elasticsearch.xpack.core.ml.action.GetDataFrameAnalyticsAction; +import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction; +import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; +import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; +import org.elasticsearch.xpack.ml.MachineLearning; + +import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; + +public class RestExplainDataFrameAnalyticsAction extends BaseRestHandler { + + public RestExplainDataFrameAnalyticsAction(RestController controller) { + controller.registerHandler(RestRequest.Method.GET, MachineLearning.BASE_PATH + "data_frame/analytics/_explain", this); + controller.registerHandler(RestRequest.Method.POST, MachineLearning.BASE_PATH + "data_frame/analytics/_explain", this); + controller.registerHandler(RestRequest.Method.GET, MachineLearning.BASE_PATH + "data_frame/analytics/{" + + DataFrameAnalyticsConfig.ID.getPreferredName() + "}/_explain", this); + controller.registerHandler(RestRequest.Method.POST, MachineLearning.BASE_PATH + "data_frame/analytics/{" + + DataFrameAnalyticsConfig.ID.getPreferredName() + "}/_explain", this); + } + + @Override + public String getName() { + return "ml_explain_data_frame_analytics_action"; + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException { + final String jobId = restRequest.param(DataFrameAnalyticsConfig.ID.getPreferredName()); + + if (Strings.isNullOrEmpty(jobId) && restRequest.hasContentOrSourceParam() == false) { + throw ExceptionsHelper.badRequestException("Please provide a job [{}] or the config object", + DataFrameAnalyticsConfig.ID.getPreferredName()); + } + + if (Strings.isNullOrEmpty(jobId) == false && restRequest.hasContentOrSourceParam()) { + throw ExceptionsHelper.badRequestException("Please provide either a job [{}] or the config object but not both", + DataFrameAnalyticsConfig.ID.getPreferredName()); + } + + // We need to consume the body before returning + PutDataFrameAnalyticsAction.Request explainRequestFromBody = Strings.isNullOrEmpty(jobId) ? + PutDataFrameAnalyticsAction.Request.parseRequestForExplain(restRequest.contentOrSourceParamParser()) : null; + + return channel -> { + RestToXContentListener listener = new RestToXContentListener<>(channel); + + if (explainRequestFromBody != null) { + client.execute(ExplainDataFrameAnalyticsAction.INSTANCE, explainRequestFromBody, listener); + } else { + GetDataFrameAnalyticsAction.Request getRequest = new GetDataFrameAnalyticsAction.Request(jobId); + getRequest.setAllowNoResources(false); + client.execute(GetDataFrameAnalyticsAction.INSTANCE, getRequest, ActionListener.wrap( + getResponse -> { + List jobs = getResponse.getResources().results(); + if (jobs.size() > 1) { + listener.onFailure(ExceptionsHelper.badRequestException("expected only one config but matched {}", + jobs.stream().map(DataFrameAnalyticsConfig::getId).collect(Collectors.toList()))); + } else { + PutDataFrameAnalyticsAction.Request explainRequest = new PutDataFrameAnalyticsAction.Request(jobs.get(0)); + client.execute(ExplainDataFrameAnalyticsAction.INSTANCE, explainRequest, listener); + } + }, + listener::onFailure + )); + } + }; + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java index 8f33c9bfbbfb0..5f7bd650a1cd8 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java @@ -8,6 +8,7 @@ import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.action.fieldcaps.FieldCapabilities; import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; +import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.test.ESTestCase; @@ -17,6 +18,7 @@ import org.elasticsearch.xpack.core.ml.dataframe.analyses.Classification; import org.elasticsearch.xpack.core.ml.dataframe.analyses.OutlierDetection; import org.elasticsearch.xpack.core.ml.dataframe.analyses.Regression; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection; import org.elasticsearch.xpack.ml.extractor.ExtractedField; import org.elasticsearch.xpack.ml.extractor.ExtractedFields; import org.elasticsearch.xpack.ml.test.SearchHitBuilder; @@ -25,6 +27,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -48,12 +51,15 @@ public void testDetect_GivenFloatField() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(1)); assertThat(allFields.get(0).getName(), equalTo("some_float")); assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)); } public void testDetect_GivenNumericFieldWithMultipleTypes() { @@ -63,12 +69,16 @@ public void testDetect_GivenNumericFieldWithMultipleTypes() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(1)); assertThat(allFields.get(0).getName(), equalTo("some_number")); assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + + assertFieldSelectionContains(fieldExtraction.v2(), FieldSelection.included("some_number", + new HashSet<>(Arrays.asList("long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float")), false, + FieldSelection.FeatureType.NUMERICAL)); } public void testDetect_GivenOutlierDetectionAndNonNumericField() { @@ -105,14 +115,22 @@ public void testDetect_GivenOutlierDetectionAndMultipleFields() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(3)); assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toSet()), containsInAnyOrder("some_float", "some_long", "some_boolean")); assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), contains(equalTo(ExtractedField.Method.DOC_VALUE))); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.excluded("some_keyword", Collections.singleton("keyword"), "unsupported type; " + + "supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]"), + FieldSelection.included("some_long", Collections.singleton("long"), false, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenRegressionAndMultipleFields() { @@ -126,14 +144,22 @@ public void testDetect_GivenRegressionAndMultipleFields() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("foo"), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(5)); assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toList()), containsInAnyOrder("foo", "some_float", "some_keyword", "some_long", "some_boolean")); assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), contains(equalTo(ExtractedField.Method.DOC_VALUE))); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("foo", Collections.singleton("double"), true, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("some_keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.included("some_long", Collections.singleton("long"), false, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenRegressionAndRequiredFieldMissing() { @@ -191,11 +217,16 @@ public void testDetect_GivenFieldIsBothIncludedAndExcluded() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(1)); assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toList()), contains("bar")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("bar", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.excluded("foo", Collections.singleton("float"), "field in excludes list") + ); } public void testDetect_GivenRegressionAndRequiredFieldHasInvalidType() { @@ -258,14 +289,15 @@ public void testDetect_GivenIgnoredField() { public void testDetect_GivenIncludedIgnoredField() { FieldCapabilitiesResponse fieldCapabilities = new MockFieldCapsResponseBuilder() - .addAggregatableField("_id", "float").build(); + .addAggregatableField("_id", "float") + .build(); FetchSourceContext analyzedFields = new FetchSourceContext(true, new String[]{"_id"}, new String[0]); ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap()); ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> extractedFieldsDetector.detect()); - assertThat(e.getMessage(), equalTo("field [_id] cannot be analyzed")); + assertThat(e.getMessage(), equalTo("No field [_id] could be detected")); } public void testDetect_ShouldSortFieldsAlphabetically() { @@ -285,9 +317,9 @@ public void testDetect_ShouldSortFieldsAlphabetically() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(sortedFields)); } @@ -333,11 +365,17 @@ public void testDetect_GivenInclusionsAndExclusions() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(desiredFields), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("my_field1", "your_field2"))); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("my_field1", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.excluded("my_field1_nope", Collections.singleton("float"), "field in excludes list"), + FieldSelection.included("your_field2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenIncludedFieldHasUnsupportedType() { @@ -384,11 +422,18 @@ public void testDetect_GivenIndexContainsResultsFieldAndTaskIsRestarting() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), true, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("my_field1", "your_field2"))); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("my_field1", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("your_field2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.excluded("your_keyword", Collections.singleton("keyword"), "unsupported type; supported types " + + "are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]") + ); } public void testDetect_GivenIncludedResultsField() { @@ -434,12 +479,12 @@ public void testDetect_GivenLessFieldsThanDocValuesLimit() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), true, 4, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3"))); - assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), contains(equalTo(ExtractedField.Method.DOC_VALUE))); } @@ -453,12 +498,12 @@ public void testDetect_GivenEqualFieldsToDocValuesLimit() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), true, 3, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3"))); - assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), contains(equalTo(ExtractedField.Method.DOC_VALUE))); } @@ -472,12 +517,12 @@ public void testDetect_GivenMoreFieldsThanDocValuesLimit() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), true, 2, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3"))); - assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), contains(equalTo(ExtractedField.Method.SOURCE))); } @@ -488,14 +533,18 @@ public void testDetect_GivenBooleanField_BooleanMappedAsInteger() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(1)); ExtractedField booleanField = allFields.get(0); assertThat(booleanField.getTypes(), contains("boolean")); assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL) + ); + SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build(); assertThat(booleanField.value(hit), arrayContaining(1)); @@ -514,14 +563,18 @@ public void testDetect_GivenBooleanField_BooleanMappedAsString() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildClassificationConfig("some_boolean"), false, 100, fieldCapabilities, Collections.singletonMap("some_boolean", 2L)); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(1)); ExtractedField booleanField = allFields.get(0); assertThat(booleanField.getTypes(), contains("boolean")); assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("some_boolean", Collections.singleton("boolean"), true, FieldSelection.FeatureType.CATEGORICAL) + ); + SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build(); assertThat(booleanField.value(hit), arrayContaining("true")); @@ -546,12 +599,26 @@ public void testDetect_GivenMultiFields() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("a_float"), true, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(5)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(5)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("a_float", "keyword_1", "text_1.keyword", "text_2.keyword", "text_without_keyword")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("a_float", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("keyword_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("keyword_1.text", Collections.singleton("text"), + "[keyword_1] is preferred because it is aggregatable"), + FieldSelection.excluded("text_1", Collections.singleton("text"), + "[text_1.keyword] is preferred because it is aggregatable"), + FieldSelection.included("text_1.keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("text_2", Collections.singleton("text"), + "[text_2.keyword] is preferred because it is aggregatable"), + FieldSelection.included("text_2.keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.included("text_without_keyword", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL) + ); } public void testDetect_GivenMultiFieldAndParentIsRequired() { @@ -563,12 +630,19 @@ public void testDetect_GivenMultiFieldAndParentIsRequired() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildClassificationConfig("field_1"), true, 100, fieldCapabilities, Collections.singletonMap("field_1", 2L)); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("field_1", Collections.singleton("keyword"), true, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"), + "[field_1] is required instead"), + FieldSelection.included("field_2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenMultiFieldAndMultiFieldIsRequired() { @@ -581,12 +655,19 @@ public void testDetect_GivenMultiFieldAndMultiFieldIsRequired() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildClassificationConfig("field_1.keyword"), true, 100, fieldCapabilities, Collections.singletonMap("field_1.keyword", 2L)); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1.keyword", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.excluded("field_1", Collections.singleton("keyword"), + "[field_1.keyword] is required instead"), + FieldSelection.included("field_1.keyword", Collections.singleton("keyword"), true, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.included("field_2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenSeveralMultiFields_ShouldPickFirstSorted() { @@ -600,12 +681,21 @@ public void testDetect_GivenSeveralMultiFields_ShouldPickFirstSorted() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("field_2"), true, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1.keyword_1", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.excluded("field_1", Collections.singleton("text"), + "[field_1.keyword_1] is preferred because it is aggregatable"), + FieldSelection.included("field_1.keyword_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.keyword_2", Collections.singleton("keyword"), "[field_1.keyword_1] came first"), + FieldSelection.excluded("field_1.keyword_3", Collections.singleton("keyword"), "[field_1.keyword_1] came first"), + FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenMultiFields_OverDocValueLimit() { @@ -617,12 +707,19 @@ public void testDetect_GivenMultiFields_OverDocValueLimit() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("field_2"), true, 0, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.keyword_1", Collections.singleton("keyword"), + "[field_1] is preferred because it supports fetching from source"), + FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenParentAndMultiFieldBothAggregatable() { @@ -635,12 +732,20 @@ public void testDetect_GivenParentAndMultiFieldBothAggregatable() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("field_2.double"), true, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1", "field_2.double")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("field_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"), + "[field_1] is preferred because it is aggregatable"), + FieldSelection.included("field_2.double", Collections.singleton("double"), true, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.excluded("field_2.keyword", Collections.singleton("float"), "[field_2.double] is required instead") + ); } public void testDetect_GivenParentAndMultiFieldNoneAggregatable() { @@ -652,12 +757,19 @@ public void testDetect_GivenParentAndMultiFieldNoneAggregatable() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("field_2"), true, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.text", Collections.singleton("text"), + "[field_1] is preferred because none of the multi-fields are aggregatable"), + FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenMultiFields_AndExplicitlyIncludedFields() { @@ -670,12 +782,18 @@ public void testDetect_GivenMultiFields_AndExplicitlyIncludedFields() { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("field_2", analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"), "field not in includes list"), + FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL) + ); } private static DataFrameAnalyticsConfig buildOutlierDetectionConfig() { @@ -715,6 +833,21 @@ private static DataFrameAnalyticsConfig buildClassificationConfig(String depende .build(); } + /** + * We assert each field individually to get useful error messages in case of failure + */ + private static void assertFieldSelectionContains(List actual, FieldSelection... expected) { + assertThat(actual.size(), equalTo(expected.length)); + for (int i = 0; i < expected.length; i++) { + assertThat("i = " + i, actual.get(i).getName(), equalTo(expected[i].getName())); + assertThat("i = " + i, actual.get(i).getMappingTypes(), equalTo(expected[i].getMappingTypes())); + assertThat("i = " + i, actual.get(i).isIncluded(), equalTo(expected[i].isIncluded())); + assertThat("i = " + i, actual.get(i).isRequired(), equalTo(expected[i].isRequired())); + assertThat("i = " + i, actual.get(i).getFeatureType(), equalTo(expected[i].getFeatureType())); + assertThat("i = " + i, actual.get(i).getReason(), equalTo(expected[i].getReason())); + } + } + private static class MockFieldCapsResponseBuilder { private final Map> fieldCaps = new HashMap<>(); diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json deleted file mode 100644 index 99bd6527de3b1..0000000000000 --- a/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "ml.estimate_memory_usage": { - "documentation": { - "url": "http://www.elastic.co/guide/en/elasticsearch/reference/current/estimate-memory-usage-dfanalytics.html" - }, - "stability": "experimental", - "url": { - "paths" : [ - { - "path" : "/_ml/data_frame/analytics/_estimate_memory_usage", - "methods": [ "POST" ], - "parts": {} - } - ] - }, - "body": { - "description" : "Memory usage estimation definition", - "required" : true - } - } -} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json new file mode 100644 index 0000000000000..6969cf9a49f13 --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json @@ -0,0 +1,31 @@ +{ + "ml.explain_data_frame_analytics": { + "documentation": { + "url": "http://www.elastic.co/guide/en/elasticsearch/reference/current/explain-dfanalytics.html" + }, + "stability": "experimental", + "url": { + "paths" : [ + { + "path" : "/_ml/data_frame/analytics/_explain", + "methods": [ "GET", "POST" ], + "parts": {} + }, + { + "path" : "/_ml/data_frame/analytics/{id}/_explain", + "methods": [ "GET", "POST" ], + "parts":{ + "id":{ + "type":"string", + "description":"The ID of the data frame analytics to explain" + } + } + } + ] + }, + "body": { + "description" : "The data frame analytics config to explain", + "required" : false + } + } +} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml deleted file mode 100644 index 39fe8005fa8cb..0000000000000 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml +++ /dev/null @@ -1,84 +0,0 @@ ---- -setup: - - do: - indices.create: - index: index-source - body: - mappings: - properties: - x: - type: float - y: - type: float - ---- -"Test memory usage estimation for empty data frame": - - do: - catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/ - ml.estimate_memory_usage: - body: - source: { index: "index-source" } - analysis: { outlier_detection: {} } - - - do: - index: - index: index-source - refresh: true - body: { x: 1 } - - match: { result: "created" } - - # Note that value for "y" is missing and outlier detection analysis does not support missing values. - # Hence, the data frame is still considered empty. - - do: - catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/ - ml.estimate_memory_usage: - body: - source: { index: "index-source" } - analysis: { outlier_detection: {} } - ---- -"Test memory usage estimation for non-empty data frame": - - do: - index: - index: index-source - refresh: true - body: { x: 1, y: 10 } - - match: { result: "created" } - - - do: - ml.estimate_memory_usage: - body: - source: { index: "index-source" } - analysis: { outlier_detection: {} } - - match: { expected_memory_without_disk: "3kb" } - - match: { expected_memory_with_disk: "3kb" } - - - do: - index: - index: index-source - refresh: true - body: { x: 2, y: 20 } - - match: { result: "created" } - - - do: - ml.estimate_memory_usage: - body: - source: { index: "index-source" } - analysis: { outlier_detection: {} } - - match: { expected_memory_without_disk: "4kb" } - - match: { expected_memory_with_disk: "4kb" } - - - do: - index: - index: index-source - refresh: true - body: { x: 3, y: 30 } - - match: { result: "created" } - - - do: - ml.estimate_memory_usage: - body: - source: { index: "index-source" } - analysis: { outlier_detection: {} } - - match: { expected_memory_without_disk: "6kb" } - - match: { expected_memory_with_disk: "5kb" } diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml new file mode 100644 index 0000000000000..f42964272568a --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml @@ -0,0 +1,308 @@ +--- +"Test neither job id nor body": + - do: + catch: /Please provide a job \[id\] or the config object/ + ml.explain_data_frame_analytics: + id: "" + +--- +"Test both job id and body": + - do: + catch: /Please provide either a job \[id\] or the config object but not both/ + ml.explain_data_frame_analytics: + id: "foo" + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + +--- +"Test missing job": + - do: + catch: missing + ml.explain_data_frame_analytics: + id: "no_such_job" + +--- +"Test id that matches multiple jobs": + + - do: + indices.create: + index: index-source + + - do: + ml.put_data_frame_analytics: + id: "foo-1" + body: > + { + "source": { + "index": "index-source" + }, + "dest": { + "index": "index-dest" + }, + "analysis": {"outlier_detection":{}} + } + + - do: + ml.put_data_frame_analytics: + id: "foo-2" + body: > + { + "source": { + "index": "index-source" + }, + "dest": { + "index": "index-dest" + }, + "analysis": {"outlier_detection":{}} + } + + - do: + catch: /expected only one config but matched \[foo-1, foo-2\]/ + ml.explain_data_frame_analytics: + id: "foo-*" + +--- +"Test empty data frame given body": + + - do: + indices.create: + index: index-source + body: + mappings: + properties: + x: + type: float + y: + type: float + + - do: + catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/ + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + + - do: + index: + index: index-source + refresh: true + body: { x: 1 } + - match: { result: "created" } + + # Note that value for "y" is missing and outlier detection analysis does not support missing values. + # Hence, the data frame is still considered empty. + - do: + catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/ + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + +--- +"Test non-empty data frame given body": + + - do: + indices.create: + index: index-source + body: + mappings: + properties: + x: + type: float + y: + type: float + + - do: + index: + index: index-source + refresh: true + body: { x: 1, y: 10 } + - match: { result: "created" } + + - do: + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + - match: { memory_estimation.expected_memory_without_disk: "3kb" } + - match: { memory_estimation.expected_memory_with_disk: "3kb" } + - length: { field_selection: 2 } + - match: { field_selection.0.name: "x" } + - match: { field_selection.0.mapping_types: ["float"] } + - match: { field_selection.0.is_included: true } + - match: { field_selection.0.is_required: false } + - match: { field_selection.0.feature_type: "numerical" } + - is_false: field_selection.0.reason + - match: { field_selection.1.name: "y" } + - match: { field_selection.1.mapping_types: ["float"] } + - match: { field_selection.1.is_included: true } + - match: { field_selection.1.is_required: false } + - match: { field_selection.1.feature_type: "numerical" } + - is_false: field_selection.1.reason + + - do: + index: + index: index-source + refresh: true + body: { x: 2, y: 20 } + - match: { result: "created" } + + - do: + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + - match: { memory_estimation.expected_memory_without_disk: "4kb" } + - match: { memory_estimation.expected_memory_with_disk: "4kb" } + + - do: + index: + index: index-source + refresh: true + body: { x: 3, y: 30 } + - match: { result: "created" } + + - do: + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + - match: { memory_estimation.expected_memory_without_disk: "6kb" } + - match: { memory_estimation.expected_memory_with_disk: "5kb" } + +--- +"Test field_selection given body": + + - do: + indices.create: + index: index-source + body: + mappings: + properties: + field_1: + type: integer + field_2: + type: double + field_3: + type: date + + - do: + index: + index: index-source + refresh: true + body: { field_1: 3, field_2: 3.14, field_3: "2019-11-11T00:00:00", field_4: "blah" } + - match: { result: "created" } + + - do: + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { regression: { dependent_variable: "field_1" } } + - is_true: memory_estimation.expected_memory_without_disk + - is_true: memory_estimation.expected_memory_with_disk + - length: { field_selection: 5 } + - match: { field_selection.0.name: "field_1" } + - match: { field_selection.0.mapping_types: ["integer"] } + - match: { field_selection.0.is_included: true } + - match: { field_selection.0.is_required: true } + - match: { field_selection.0.feature_type: "numerical" } + - is_false: field_selection.0.reason + - match: { field_selection.1.name: "field_2" } + - match: { field_selection.1.mapping_types: ["double"] } + - match: { field_selection.1.is_included: true } + - match: { field_selection.1.is_required: false } + - match: { field_selection.1.feature_type: "numerical" } + - is_false: field_selection.1.reason + - match: { field_selection.2.name: "field_3" } + - match: { field_selection.2.mapping_types: ["date"] } + - match: { field_selection.2.is_included: false } + - match: { field_selection.2.is_required: false } + - is_false: field_selection.2.feature_type + - match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" } + - match: { field_selection.3.name: "field_4" } + - match: { field_selection.3.mapping_types: ["text"] } + - match: { field_selection.3.is_included: false } + - match: { field_selection.3.is_required: false } + - is_false: field_selection.3.feature_type + - match: { field_selection.3.reason: "[field_4.keyword] is preferred because it is aggregatable" } + - match: { field_selection.4.name: "field_4.keyword" } + - match: { field_selection.4.mapping_types: ["keyword"] } + - match: { field_selection.4.is_included: true } + - match: { field_selection.4.is_required: false } + - match: { field_selection.4.feature_type: "categorical" } + - is_false: field_selection.4.reason + +--- +"Test field_selection given job": + + - do: + indices.create: + index: index-source + body: + mappings: + properties: + field_1: + type: integer + field_2: + type: double + field_3: + type: date + + - do: + index: + index: index-source + refresh: true + body: { field_1: 3, field_2: 3.14, field_3: "2019-11-11T00:00:00", field_4: "blah" } + - match: { result: "created" } + + - do: + ml.put_data_frame_analytics: + id: "got-a-job-for-this-one" + body: > + { + "source": { + "index": "index-source" + }, + "dest": { + "index": "index-dest" + }, + "analysis": {"regression":{ "dependent_variable": "field_1" }} + } + + - do: + ml.explain_data_frame_analytics: + id: "got-a-job-for-this-one" + - is_true: memory_estimation.expected_memory_without_disk + - is_true: memory_estimation.expected_memory_with_disk + - length: { field_selection: 5 } + - match: { field_selection.0.name: "field_1" } + - match: { field_selection.0.mapping_types: ["integer"] } + - match: { field_selection.0.is_included: true } + - match: { field_selection.0.is_required: true } + - match: { field_selection.0.feature_type: "numerical" } + - is_false: field_selection.0.reason + - match: { field_selection.1.name: "field_2" } + - match: { field_selection.1.mapping_types: ["double"] } + - match: { field_selection.1.is_included: true } + - match: { field_selection.1.is_required: false } + - match: { field_selection.1.feature_type: "numerical" } + - is_false: field_selection.1.reason + - match: { field_selection.2.name: "field_3" } + - match: { field_selection.2.mapping_types: ["date"] } + - match: { field_selection.2.is_included: false } + - match: { field_selection.2.is_required: false } + - is_false: field_selection.2.feature_type + - match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" } + - match: { field_selection.3.name: "field_4" } + - match: { field_selection.3.mapping_types: ["text"] } + - match: { field_selection.3.is_included: false } + - match: { field_selection.3.is_required: false } + - is_false: field_selection.3.feature_type + - match: { field_selection.3.reason: "[field_4.keyword] is preferred because it is aggregatable" } + - match: { field_selection.4.name: "field_4.keyword" } + - match: { field_selection.4.mapping_types: ["keyword"] } + - match: { field_selection.4.is_included: true } + - match: { field_selection.4.is_required: false } + - match: { field_selection.4.feature_type: "categorical" } + - is_false: field_selection.4.reason