From 435129a06a3d219806a60a380f9d33e417fd2e0b Mon Sep 17 00:00:00 2001 From: Przemyslaw Witek Date: Fri, 15 Nov 2019 10:52:15 +0100 Subject: [PATCH 1/2] Throw an exception when memory usage estimation endpoint encounters empty data frame. --- .../MemoryUsageEstimationProcessManager.java | 8 ++++++-- ...oryUsageEstimationProcessManagerTests.java | 9 ++++++--- ...rame_analytics_memory_usage_estimation.yml | 19 +++++++++++++++++-- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java index c41e3038725f8..f7b11f5eaab73 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java @@ -9,6 +9,7 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; @@ -57,10 +58,13 @@ private MemoryUsageEstimationResult runJob(String jobId, DataFrameDataExtractorFactory dataExtractorFactory) { DataFrameDataExtractor dataExtractor = dataExtractorFactory.newExtractor(false); DataFrameDataExtractor.DataSummary dataSummary = dataExtractor.collectDataSummary(); - Set categoricalFields = dataExtractor.getCategoricalFields(config.getAnalysis()); if (dataSummary.rows == 0) { - return new MemoryUsageEstimationResult(ByteSizeValue.ZERO, ByteSizeValue.ZERO); + throw ExceptionsHelper.badRequestException( + "[{}] Unable to estimate memory usage as there are no analyzable data in source indices [{}].", + jobId, + Strings.arrayToCommaDelimitedString(config.getSource().getIndex())); } + Set categoricalFields = dataExtractor.getCategoricalFields(config.getAnalysis()); AnalyticsProcessConfig processConfig = new AnalyticsProcessConfig( jobId, diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManagerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManagerTests.java index 7cc3e64c15b30..21495c4a92ee1 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManagerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManagerTests.java @@ -85,9 +85,12 @@ public void testRunJob_EmptyDataFrame() { processManager.runJobAsync(TASK_ID, dataFrameAnalyticsConfig, dataExtractorFactory, listener); - verify(listener).onResponse(resultCaptor.capture()); - MemoryUsageEstimationResult result = resultCaptor.getValue(); - assertThat(result, equalTo(PROCESS_RESULT_ZERO)); + verify(listener).onFailure(exceptionCaptor.capture()); + ElasticsearchException exception = (ElasticsearchException) exceptionCaptor.getValue(); + assertThat(exception.status(), equalTo(RestStatus.BAD_REQUEST)); + assertThat(exception.getMessage(), containsString(TASK_ID)); + assertThat( + exception.getMessage(), containsString("Unable to estimate memory usage as there are no analyzable data in source indices")); verifyNoMoreInteractions(process, listener); } diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml index a58ea36aaacf2..e45356b572c6a 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml @@ -14,12 +14,27 @@ setup: --- "Test memory usage estimation for empty data frame": - do: + catch: /Unable to estimate memory usage as there are no analyzable data in source indices \[index-source\]/ + ml.estimate_memory_usage: + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + + - do: + index: + index: index-source + refresh: true + body: { x: 1 } + - match: { result: "created" } + + # Note that value for "y" is missing and outlier detection analysis does not support missing values. + # Hence, the data frame is still considered empty. + - do: + catch: /Unable to estimate memory usage as there are no analyzable data in source indices \[index-source\]/ ml.estimate_memory_usage: body: source: { index: "index-source" } analysis: { outlier_detection: {} } - - match: { expected_memory_without_disk: "0" } - - match: { expected_memory_with_disk: "0" } --- "Test memory usage estimation for non-empty data frame": From b901d274f49b91a07436789e02b2ca4d9c068ff0 Mon Sep 17 00:00:00 2001 From: Przemyslaw Witek Date: Fri, 15 Nov 2019 13:10:46 +0100 Subject: [PATCH 2/2] Refine message for both data_frame/analytics/_start and data_frame/analytics/_estimate_memory_usage actions --- x-pack/plugin/ml/qa/ml-with-security/build.gradle | 1 + .../TransportStartDataFrameAnalyticsAction.java | 12 +++++++----- .../process/MemoryUsageEstimationProcessManager.java | 5 ++++- .../MemoryUsageEstimationProcessManagerTests.java | 5 +---- .../data_frame_analytics_memory_usage_estimation.yml | 4 ++-- .../test/ml/start_data_frame_analytics.yml | 2 +- 6 files changed, 16 insertions(+), 13 deletions(-) diff --git a/x-pack/plugin/ml/qa/ml-with-security/build.gradle b/x-pack/plugin/ml/qa/ml-with-security/build.gradle index 1929b03ba4399..063ad43a9217b 100644 --- a/x-pack/plugin/ml/qa/ml-with-security/build.gradle +++ b/x-pack/plugin/ml/qa/ml-with-security/build.gradle @@ -92,6 +92,7 @@ integTest.runner { 'ml/data_frame_analytics_crud/Test put classification given num_top_classes is greater than 1k', 'ml/data_frame_analytics_crud/Test put classification given training_percent is less than one', 'ml/data_frame_analytics_crud/Test put classification given training_percent is greater than hundred', + 'ml/data_frame_analytics_memory_usage_estimation/Test memory usage estimation for empty data frame', 'ml/evaluate_data_frame/Test given missing index', 'ml/evaluate_data_frame/Test given index does not exist', 'ml/evaluate_data_frame/Test given missing evaluation', diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java index 46bec44ea562a..849d948f671e7 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java @@ -239,11 +239,13 @@ private void getStartContext(String id, ActionListener finalListen .collectDataSummaryAsync(ActionListener.wrap( dataSummary -> { if (dataSummary.rows == 0) { - finalListener.onFailure(new ElasticsearchStatusException( - "Unable to start {} as there are no analyzable data in source indices [{}].", - RestStatus.BAD_REQUEST, - id, - Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex()) + finalListener.onFailure(ExceptionsHelper.badRequestException( + "Unable to start {} as no documents in the source indices [{}] contained all the fields " + + "selected for analysis. If you are relying on automatic field selection then there are " + + "currently mapped fields that do not exist in any indexed documents, and you will have " + + "to switch to explicit field selection and include only fields that exist in indexed " + + "documents.", + id, Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex()) )); } else { finalListener.onResponse(startContext); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java index f7b11f5eaab73..2e5189eb249eb 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java @@ -60,7 +60,10 @@ private MemoryUsageEstimationResult runJob(String jobId, DataFrameDataExtractor.DataSummary dataSummary = dataExtractor.collectDataSummary(); if (dataSummary.rows == 0) { throw ExceptionsHelper.badRequestException( - "[{}] Unable to estimate memory usage as there are no analyzable data in source indices [{}].", + "[{}] Unable to estimate memory usage as no documents in the source indices [{}] contained all the fields selected for " + + "analysis. If you are relying on automatic field selection then there are currently mapped fields that do not exist " + + "in any indexed documents, and you will have to switch to explicit field selection and include only fields that " + + "exist in indexed documents.", jobId, Strings.arrayToCommaDelimitedString(config.getSource().getIndex())); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManagerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManagerTests.java index 21495c4a92ee1..5dc015d86e715 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManagerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManagerTests.java @@ -42,8 +42,6 @@ public class MemoryUsageEstimationProcessManagerTests extends ESTestCase { private static final String CONFIG_ID = "dummy"; private static final int NUM_ROWS = 100; private static final int NUM_COLS = 4; - private static final MemoryUsageEstimationResult PROCESS_RESULT_ZERO = - new MemoryUsageEstimationResult(ByteSizeValue.ZERO, ByteSizeValue.ZERO); private static final MemoryUsageEstimationResult PROCESS_RESULT = new MemoryUsageEstimationResult(ByteSizeValue.parseBytesSizeValue("20kB", ""), ByteSizeValue.parseBytesSizeValue("10kB", "")); @@ -89,8 +87,7 @@ public void testRunJob_EmptyDataFrame() { ElasticsearchException exception = (ElasticsearchException) exceptionCaptor.getValue(); assertThat(exception.status(), equalTo(RestStatus.BAD_REQUEST)); assertThat(exception.getMessage(), containsString(TASK_ID)); - assertThat( - exception.getMessage(), containsString("Unable to estimate memory usage as there are no analyzable data in source indices")); + assertThat(exception.getMessage(), containsString("Unable to estimate memory usage")); verifyNoMoreInteractions(process, listener); } diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml index e45356b572c6a..39fe8005fa8cb 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml @@ -14,7 +14,7 @@ setup: --- "Test memory usage estimation for empty data frame": - do: - catch: /Unable to estimate memory usage as there are no analyzable data in source indices \[index-source\]/ + catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/ ml.estimate_memory_usage: body: source: { index: "index-source" } @@ -30,7 +30,7 @@ setup: # Note that value for "y" is missing and outlier detection analysis does not support missing values. # Hence, the data frame is still considered empty. - do: - catch: /Unable to estimate memory usage as there are no analyzable data in source indices \[index-source\]/ + catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/ ml.estimate_memory_usage: body: source: { index: "index-source" } diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/start_data_frame_analytics.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/start_data_frame_analytics.yml index 9f08ed89b1fde..0172a83f58ef2 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/start_data_frame_analytics.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/start_data_frame_analytics.yml @@ -86,7 +86,7 @@ } - do: - catch: /Unable to start empty-with-compatible-fields as there are no analyzable data in source indices \[empty-index-with-compatible-fields\]/ + catch: /Unable to start empty-with-compatible-fields as no documents in the source indices \[empty-index-with-compatible-fields\] contained all the fields selected for analysis/ ml.start_data_frame_analytics: id: "empty-with-compatible-fields" ---