Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import org.elasticsearch.client.ml.DeleteJobRequest;
import org.elasticsearch.client.ml.DeleteJobResponse;
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
import org.elasticsearch.client.ml.EstimateMemoryUsageResponse;
import org.elasticsearch.client.ml.EvaluateDataFrameRequest;
import org.elasticsearch.client.ml.EvaluateDataFrameResponse;
import org.elasticsearch.client.ml.FindFileStructureRequest;
Expand Down Expand Up @@ -194,11 +195,13 @@
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

import static org.hamcrest.Matchers.allOf;
import static org.hamcrest.Matchers.closeTo;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.lessThan;
import static org.hamcrest.core.Is.is;

public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
Expand Down Expand Up @@ -3262,6 +3265,72 @@ public void onFailure(Exception e) {
}
}

public void testEstimateMemoryUsage() throws Exception {
createIndex("estimate-test-source-index");
BulkRequest bulkRequest =
new BulkRequest("estimate-test-source-index")
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
for (int i = 0; i < 10; ++i) {
bulkRequest.add(new IndexRequest().source(XContentType.JSON, "timestamp", 123456789L, "total", 10L));
}
RestHighLevelClient client = highLevelClient();
client.bulk(bulkRequest, RequestOptions.DEFAULT);
{
// tag::estimate-memory-usage-request
DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder()
.setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build())
.setAnalysis(OutlierDetection.createDefault())
.build();
PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config); // <1>
// end::estimate-memory-usage-request

// tag::estimate-memory-usage-execute
EstimateMemoryUsageResponse response = client.machineLearning().estimateMemoryUsage(request, RequestOptions.DEFAULT);
// end::estimate-memory-usage-execute

// tag::estimate-memory-usage-response
ByteSizeValue expectedMemoryWithoutDisk = response.getExpectedMemoryWithoutDisk(); // <1>
ByteSizeValue expectedMemoryWithDisk = response.getExpectedMemoryWithDisk(); // <2>
// end::estimate-memory-usage-response

// We are pretty liberal here as this test does not aim at verifying concrete numbers but rather end-to-end user workflow.
ByteSizeValue lowerBound = new ByteSizeValue(1, ByteSizeUnit.KB);
ByteSizeValue upperBound = new ByteSizeValue(1, ByteSizeUnit.GB);
assertThat(expectedMemoryWithoutDisk, allOf(greaterThan(lowerBound), lessThan(upperBound)));
assertThat(expectedMemoryWithDisk, allOf(greaterThan(lowerBound), lessThan(upperBound)));
}
{
DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder()
.setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build())
.setAnalysis(OutlierDetection.createDefault())
.build();
PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config);
// tag::estimate-memory-usage-execute-listener
ActionListener<EstimateMemoryUsageResponse> listener = new ActionListener<EstimateMemoryUsageResponse>() {
@Override
public void onResponse(EstimateMemoryUsageResponse response) {
// <1>
}

@Override
public void onFailure(Exception e) {
// <2>
}
};
// end::estimate-memory-usage-execute-listener

// Replace the empty listener by a blocking listener in test
final CountDownLatch latch = new CountDownLatch(1);
listener = new LatchedActionListener<>(listener, latch);

// tag::estimate-memory-usage-execute-async
client.machineLearning().estimateMemoryUsageAsync(request, RequestOptions.DEFAULT, listener); // <1>
// end::estimate-memory-usage-execute-async

assertTrue(latch.await(30L, TimeUnit.SECONDS));
}
}

public void testCreateFilter() throws Exception {
RestHighLevelClient client = highLevelClient();
{
Expand Down
35 changes: 35 additions & 0 deletions docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
--
:api: estimate-memory-usage
:request: PutDataFrameAnalyticsRequest
:response: EstimateMemoryUsageResponse
--
[id="{upid}-{api}"]
=== Estimate memory usage API

The Estimate memory usage API is used to estimate memory usage of {dfanalytics}.
Estimation results can be used when deciding the appropriate value for `model_memory_limit` setting later on.

The API accepts an +{request}+ object and returns an +{response}+.

[id="{upid}-{api}-request"]
==== Estimate memory usage Request

["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests-file}[{api}-request]
--------------------------------------------------
<1> Constructing a new request containing a {dataframe-analytics-config} for which memory usage estimation should be performed

include::../execution.asciidoc[]

[id="{upid}-{api}-response"]
==== Response

The returned +{response}+ contains the memory usage estimates.

["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests-file}[{api}-response]
--------------------------------------------------
<1> Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory (i.e. without overflowing to disk).
<2> Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}.
2 changes: 2 additions & 0 deletions docs/java-rest/high-level/supported-apis.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ The Java High Level REST Client supports the following Machine Learning APIs:
* <<{upid}-start-data-frame-analytics>>
* <<{upid}-stop-data-frame-analytics>>
* <<{upid}-evaluate-data-frame>>
* <<{upid}-estimate-memory-usage>>
* <<{upid}-put-filter>>
* <<{upid}-get-filters>>
* <<{upid}-update-filter>>
Expand Down Expand Up @@ -346,6 +347,7 @@ include::ml/delete-data-frame-analytics.asciidoc[]
include::ml/start-data-frame-analytics.asciidoc[]
include::ml/stop-data-frame-analytics.asciidoc[]
include::ml/evaluate-data-frame.asciidoc[]
include::ml/estimate-memory-usage.asciidoc[]
include::ml/put-filter.asciidoc[]
include::ml/get-filters.asciidoc[]
include::ml/update-filter.asciidoc[]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ Serves as an advice on how to set `model_memory_limit` when creating {dfanalytic
[[ml-estimate-memory-usage-dfanalytics-results]]
==== {api-response-body-title}

`expected_memory_usage_with_one_partition`::
`expected_memory_without_disk`::
(string) Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory
(i.e. without overflowing to disk).

`expected_memory_usage_with_max_partitions`::
`expected_memory_with_disk`::
(string) Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}.
`expected_memory_usage_with_max_partitions` is usually smaller than `expected_memory_usage_with_one_partition`
as using disk allows to limit the main memory needed to perform {dfanalytics}.
`expected_memory_with_disk` is usually smaller than `expected_memory_without_disk` as using disk allows to
limit the main memory needed to perform {dfanalytics}.

[[ml-estimate-memory-usage-dfanalytics-example]]
==== {api-examples-title}
Expand All @@ -76,8 +76,8 @@ The API returns the following results:
[source,js]
----
{
"expected_memory_usage_with_one_partition": "128MB",
"expected_memory_usage_with_max_partitions": "32MB"
"expected_memory_without_disk": "128MB",
"expected_memory_with_disk": "32MB"
}
----
// TESTRESPONSE