diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java index fe7d04a4e0a8d..0203a3c855d14 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java @@ -39,6 +39,7 @@ import org.elasticsearch.client.ml.DeleteCalendarEventRequest; import org.elasticsearch.client.ml.DeleteCalendarJobRequest; import org.elasticsearch.client.ml.DeleteCalendarRequest; +import org.elasticsearch.client.ml.DeleteDataFrameAnalyticsRequest; import org.elasticsearch.client.ml.DeleteDatafeedRequest; import org.elasticsearch.client.ml.DeleteExpiredDataRequest; import org.elasticsearch.client.ml.DeleteExpiredDataResponse; @@ -47,6 +48,8 @@ import org.elasticsearch.client.ml.DeleteJobRequest; import org.elasticsearch.client.ml.DeleteJobResponse; import org.elasticsearch.client.ml.DeleteModelSnapshotRequest; +import org.elasticsearch.client.ml.EvaluateDataFrameRequest; +import org.elasticsearch.client.ml.EvaluateDataFrameResponse; import org.elasticsearch.client.ml.FindFileStructureRequest; import org.elasticsearch.client.ml.FindFileStructureResponse; import org.elasticsearch.client.ml.FlushJobRequest; @@ -61,6 +64,10 @@ import org.elasticsearch.client.ml.GetCalendarsResponse; import org.elasticsearch.client.ml.GetCategoriesRequest; import org.elasticsearch.client.ml.GetCategoriesResponse; +import org.elasticsearch.client.ml.GetDataFrameAnalyticsRequest; +import org.elasticsearch.client.ml.GetDataFrameAnalyticsResponse; +import org.elasticsearch.client.ml.GetDataFrameAnalyticsStatsRequest; +import org.elasticsearch.client.ml.GetDataFrameAnalyticsStatsResponse; import org.elasticsearch.client.ml.GetModelSnapshotsRequest; import org.elasticsearch.client.ml.GetModelSnapshotsResponse; import org.elasticsearch.client.ml.GetDatafeedRequest; @@ -92,6 +99,8 @@ import org.elasticsearch.client.ml.PutCalendarJobRequest; import org.elasticsearch.client.ml.PutCalendarRequest; import org.elasticsearch.client.ml.PutCalendarResponse; +import org.elasticsearch.client.ml.PutDataFrameAnalyticsRequest; +import org.elasticsearch.client.ml.PutDataFrameAnalyticsResponse; import org.elasticsearch.client.ml.PutDatafeedRequest; import org.elasticsearch.client.ml.PutDatafeedResponse; import org.elasticsearch.client.ml.PutFilterRequest; @@ -101,8 +110,11 @@ import org.elasticsearch.client.ml.RevertModelSnapshotRequest; import org.elasticsearch.client.ml.RevertModelSnapshotResponse; import org.elasticsearch.client.ml.SetUpgradeModeRequest; +import org.elasticsearch.client.ml.StartDataFrameAnalyticsRequest; import org.elasticsearch.client.ml.StartDatafeedRequest; import org.elasticsearch.client.ml.StartDatafeedResponse; +import org.elasticsearch.client.ml.StopDataFrameAnalyticsRequest; +import org.elasticsearch.client.ml.StopDataFrameAnalyticsResponse; import org.elasticsearch.client.ml.StopDatafeedRequest; import org.elasticsearch.client.ml.StopDatafeedResponse; import org.elasticsearch.client.ml.UpdateDatafeedRequest; @@ -118,6 +130,21 @@ import org.elasticsearch.client.ml.datafeed.DatafeedStats; import org.elasticsearch.client.ml.datafeed.DatafeedUpdate; import org.elasticsearch.client.ml.datafeed.DelayedDataCheckConfig; +import org.elasticsearch.client.ml.dataframe.DataFrameAnalysis; +import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfig; +import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsDest; +import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsSource; +import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsState; +import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsStats; +import org.elasticsearch.client.ml.dataframe.OutlierDetection; +import org.elasticsearch.client.ml.dataframe.QueryConfig; +import org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.AucRocMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.BinarySoftClassification; +import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric.ConfusionMatrix; +import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric; import org.elasticsearch.client.ml.filestructurefinder.FileStructure; import org.elasticsearch.client.ml.job.config.AnalysisConfig; import org.elasticsearch.client.ml.job.config.AnalysisLimits; @@ -139,13 +166,18 @@ import org.elasticsearch.client.ml.job.results.OverallBucket; import org.elasticsearch.client.ml.job.stats.JobStats; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.unit.ByteSizeUnit; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.tasks.TaskId; +import org.hamcrest.CoreMatchers; import org.junit.After; import java.io.IOException; @@ -870,18 +902,7 @@ public void testPreviewDatafeed() throws Exception { client.machineLearning().putJob(new PutJobRequest(job), RequestOptions.DEFAULT); String datafeedId = job.getId() + "-feed"; String indexName = "preview_data_2"; - CreateIndexRequest createIndexRequest = new CreateIndexRequest(indexName); - createIndexRequest.mapping(XContentFactory.jsonBuilder().startObject() - .startObject("properties") - .startObject("timestamp") - .field("type", "date") - .endObject() - .startObject("total") - .field("type", "long") - .endObject() - .endObject() - .endObject()); - highLevelClient().indices().create(createIndexRequest, RequestOptions.DEFAULT); + createIndex(indexName); DatafeedConfig datafeed = DatafeedConfig.builder(datafeedId, job.getId()) .setIndices(indexName) .build(); @@ -938,18 +959,7 @@ public void testStartDatafeed() throws Exception { client.machineLearning().putJob(new PutJobRequest(job), RequestOptions.DEFAULT); String datafeedId = job.getId() + "-feed"; String indexName = "start_data_2"; - CreateIndexRequest createIndexRequest = new CreateIndexRequest(indexName); - createIndexRequest.mapping(XContentFactory.jsonBuilder().startObject() - .startObject("properties") - .startObject("timestamp") - .field("type", "date") - .endObject() - .startObject("total") - .field("type", "long") - .endObject() - .endObject() - .endObject()); - highLevelClient().indices().create(createIndexRequest, RequestOptions.DEFAULT); + createIndex(indexName); DatafeedConfig datafeed = DatafeedConfig.builder(datafeedId, job.getId()) .setIndices(indexName) .build(); @@ -1067,18 +1077,7 @@ public void testGetDatafeedStats() throws Exception { client.machineLearning().putJob(new PutJobRequest(secondJob), RequestOptions.DEFAULT); String datafeedId1 = job.getId() + "-feed"; String indexName = "datafeed_stats_data_2"; - CreateIndexRequest createIndexRequest = new CreateIndexRequest(indexName); - createIndexRequest.mapping(XContentFactory.jsonBuilder().startObject() - .startObject("properties") - .startObject("timestamp") - .field("type", "date") - .endObject() - .startObject("total") - .field("type", "long") - .endObject() - .endObject() - .endObject()); - highLevelClient().indices().create(createIndexRequest, RequestOptions.DEFAULT); + createIndex(indexName); DatafeedConfig datafeed = DatafeedConfig.builder(datafeedId1, job.getId()) .setIndices(indexName) .build(); @@ -2802,6 +2801,455 @@ public void onFailure(Exception e) { } } + public void testGetDataFrameAnalytics() throws Exception { + RestHighLevelClient client = highLevelClient(); + client.machineLearning().putDataFrameAnalytics(new PutDataFrameAnalyticsRequest(DF_ANALYTICS_CONFIG), RequestOptions.DEFAULT); + { + // tag::get-data-frame-analytics-request + GetDataFrameAnalyticsRequest request = new GetDataFrameAnalyticsRequest("my-analytics-config"); // <1> + // end::get-data-frame-analytics-request + + // tag::get-data-frame-analytics-execute + GetDataFrameAnalyticsResponse response = client.machineLearning().getDataFrameAnalytics(request, RequestOptions.DEFAULT); + // end::get-data-frame-analytics-execute + + // tag::get-data-frame-analytics-response + List configs = response.getAnalytics(); + // end::get-data-frame-analytics-response + + assertThat(configs.size(), equalTo(1)); + } + { + GetDataFrameAnalyticsRequest request = new GetDataFrameAnalyticsRequest("my-analytics-config"); + + // tag::get-data-frame-analytics-execute-listener + ActionListener listener = new ActionListener<>() { + @Override + public void onResponse(GetDataFrameAnalyticsResponse response) { + // <1> + } + + @Override + public void onFailure(Exception e) { + // <2> + } + }; + // end::get-data-frame-analytics-execute-listener + + // Replace the empty listener by a blocking listener in test + CountDownLatch latch = new CountDownLatch(1); + listener = new LatchedActionListener<>(listener, latch); + + // tag::get-data-frame-analytics-execute-async + client.machineLearning().getDataFrameAnalyticsAsync(request, RequestOptions.DEFAULT, listener); // <1> + // end::get-data-frame-analytics-execute-async + + assertTrue(latch.await(30L, TimeUnit.SECONDS)); + } + } + + public void testGetDataFrameAnalyticsStats() throws Exception { + RestHighLevelClient client = highLevelClient(); + client.machineLearning().putDataFrameAnalytics(new PutDataFrameAnalyticsRequest(DF_ANALYTICS_CONFIG), RequestOptions.DEFAULT); + { + // tag::get-data-frame-analytics-stats-request + GetDataFrameAnalyticsStatsRequest request = new GetDataFrameAnalyticsStatsRequest("my-analytics-config"); // <1> + // end::get-data-frame-analytics-stats-request + + // tag::get-data-frame-analytics-stats-execute + GetDataFrameAnalyticsStatsResponse response = + client.machineLearning().getDataFrameAnalyticsStats(request, RequestOptions.DEFAULT); + // end::get-data-frame-analytics-stats-execute + + // tag::get-data-frame-analytics-stats-response + List stats = response.getAnalyticsStats(); + // end::get-data-frame-analytics-stats-response + + assertThat(stats.size(), equalTo(1)); + } + { + GetDataFrameAnalyticsStatsRequest request = new GetDataFrameAnalyticsStatsRequest("my-analytics-config"); + + // tag::get-data-frame-analytics-stats-execute-listener + ActionListener listener = new ActionListener<>() { + @Override + public void onResponse(GetDataFrameAnalyticsStatsResponse response) { + // <1> + } + + @Override + public void onFailure(Exception e) { + // <2> + } + }; + // end::get-data-frame-analytics-stats-execute-listener + + // Replace the empty listener by a blocking listener in test + CountDownLatch latch = new CountDownLatch(1); + listener = new LatchedActionListener<>(listener, latch); + + // tag::get-data-frame-analytics-stats-execute-async + client.machineLearning().getDataFrameAnalyticsStatsAsync(request, RequestOptions.DEFAULT, listener); // <1> + // end::get-data-frame-analytics-stats-execute-async + + assertTrue(latch.await(30L, TimeUnit.SECONDS)); + } + } + + public void testPutDataFrameAnalytics() throws Exception { + RestHighLevelClient client = highLevelClient(); + { + // tag::put-data-frame-analytics-query-config + QueryConfig queryConfig = new QueryConfig(new MatchAllQueryBuilder()); + // end::put-data-frame-analytics-query-config + + // tag::put-data-frame-analytics-source-config + DataFrameAnalyticsSource sourceConfig = DataFrameAnalyticsSource.builder() // <1> + .setIndex("put-test-source-index") // <2> + .setQueryConfig(queryConfig) // <3> + .build(); + // end::put-data-frame-analytics-source-config + + // tag::put-data-frame-analytics-dest-config + DataFrameAnalyticsDest destConfig = DataFrameAnalyticsDest.builder() // <1> + .setIndex("put-test-dest-index") // <2> + .build(); + // end::put-data-frame-analytics-dest-config + + // tag::put-data-frame-analytics-analysis-default + DataFrameAnalysis outlierDetection = OutlierDetection.createDefault(); // <1> + // end::put-data-frame-analytics-analysis-default + + // tag::put-data-frame-analytics-analysis-customized + DataFrameAnalysis outlierDetectionCustomized = OutlierDetection.builder() // <1> + .setMethod(OutlierDetection.Method.DISTANCE_KNN) // <2> + .setNNeighbors(5) // <3> + .build(); + // end::put-data-frame-analytics-analysis-customized + + // tag::put-data-frame-analytics-analyzed-fields + FetchSourceContext analyzedFields = + new FetchSourceContext( + true, + new String[] { "included_field_1", "included_field_2" }, + new String[] { "excluded_field" }); + // end::put-data-frame-analytics-analyzed-fields + + // tag::put-data-frame-analytics-config + DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder("my-analytics-config") // <1> + .setSource(sourceConfig) // <2> + .setDest(destConfig) // <3> + .setAnalysis(outlierDetection) // <4> + .setAnalyzedFields(analyzedFields) // <5> + .setModelMemoryLimit(new ByteSizeValue(5, ByteSizeUnit.MB)) // <6> + .build(); + // end::put-data-frame-analytics-config + + // tag::put-data-frame-analytics-request + PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config); // <1> + // end::put-data-frame-analytics-request + + // tag::put-data-frame-analytics-execute + PutDataFrameAnalyticsResponse response = client.machineLearning().putDataFrameAnalytics(request, RequestOptions.DEFAULT); + // end::put-data-frame-analytics-execute + + // tag::put-data-frame-analytics-response + DataFrameAnalyticsConfig createdConfig = response.getConfig(); + // end::put-data-frame-analytics-response + + assertThat(createdConfig.getId(), equalTo("my-analytics-config")); + } + { + PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(DF_ANALYTICS_CONFIG); + // tag::put-data-frame-analytics-execute-listener + ActionListener listener = new ActionListener<>() { + @Override + public void onResponse(PutDataFrameAnalyticsResponse response) { + // <1> + } + + @Override + public void onFailure(Exception e) { + // <2> + } + }; + // end::put-data-frame-analytics-execute-listener + + // Replace the empty listener by a blocking listener in test + final CountDownLatch latch = new CountDownLatch(1); + listener = new LatchedActionListener<>(listener, latch); + + // tag::put-data-frame-analytics-execute-async + client.machineLearning().putDataFrameAnalyticsAsync(request, RequestOptions.DEFAULT, listener); // <1> + // end::put-data-frame-analytics-execute-async + + assertTrue(latch.await(30L, TimeUnit.SECONDS)); + } + } + + public void testDeleteDataFrameAnalytics() throws Exception { + RestHighLevelClient client = highLevelClient(); + client.machineLearning().putDataFrameAnalytics(new PutDataFrameAnalyticsRequest(DF_ANALYTICS_CONFIG), RequestOptions.DEFAULT); + { + // tag::delete-data-frame-analytics-request + DeleteDataFrameAnalyticsRequest request = new DeleteDataFrameAnalyticsRequest("my-analytics-config"); // <1> + // end::delete-data-frame-analytics-request + + // tag::delete-data-frame-analytics-execute + AcknowledgedResponse response = client.machineLearning().deleteDataFrameAnalytics(request, RequestOptions.DEFAULT); + // end::delete-data-frame-analytics-execute + + // tag::delete-data-frame-analytics-response + boolean acknowledged = response.isAcknowledged(); + // end::delete-data-frame-analytics-response + + assertThat(acknowledged, is(true)); + } + client.machineLearning().putDataFrameAnalytics(new PutDataFrameAnalyticsRequest(DF_ANALYTICS_CONFIG), RequestOptions.DEFAULT); + { + DeleteDataFrameAnalyticsRequest request = new DeleteDataFrameAnalyticsRequest("my-analytics-config"); + + // tag::delete-data-frame-analytics-execute-listener + ActionListener listener = new ActionListener<>() { + @Override + public void onResponse(AcknowledgedResponse response) { + // <1> + } + + @Override + public void onFailure(Exception e) { + // <2> + } + }; + // end::delete-data-frame-analytics-execute-listener + + // Replace the empty listener by a blocking listener in test + CountDownLatch latch = new CountDownLatch(1); + listener = new LatchedActionListener<>(listener, latch); + + // tag::delete-data-frame-analytics-execute-async + client.machineLearning().deleteDataFrameAnalyticsAsync(request, RequestOptions.DEFAULT, listener); // <1> + // end::delete-data-frame-analytics-execute-async + + assertTrue(latch.await(30L, TimeUnit.SECONDS)); + } + } + + public void testStartDataFrameAnalytics() throws Exception { + createIndex(DF_ANALYTICS_CONFIG.getSource().getIndex()); + highLevelClient().index( + new IndexRequest(DF_ANALYTICS_CONFIG.getSource().getIndex()).source(XContentType.JSON, "total", 10000), RequestOptions.DEFAULT); + RestHighLevelClient client = highLevelClient(); + client.machineLearning().putDataFrameAnalytics(new PutDataFrameAnalyticsRequest(DF_ANALYTICS_CONFIG), RequestOptions.DEFAULT); + { + // tag::start-data-frame-analytics-request + StartDataFrameAnalyticsRequest request = new StartDataFrameAnalyticsRequest("my-analytics-config"); // <1> + // end::start-data-frame-analytics-request + + // tag::start-data-frame-analytics-execute + AcknowledgedResponse response = client.machineLearning().startDataFrameAnalytics(request, RequestOptions.DEFAULT); + // end::start-data-frame-analytics-execute + + // tag::start-data-frame-analytics-response + boolean acknowledged = response.isAcknowledged(); + // end::start-data-frame-analytics-response + + assertThat(acknowledged, is(true)); + } + assertBusy( + () -> assertThat(getAnalyticsState(DF_ANALYTICS_CONFIG.getId()), equalTo(DataFrameAnalyticsState.STOPPED)), + 30, TimeUnit.SECONDS); + { + StartDataFrameAnalyticsRequest request = new StartDataFrameAnalyticsRequest("my-analytics-config"); + + // tag::start-data-frame-analytics-execute-listener + ActionListener listener = new ActionListener<>() { + @Override + public void onResponse(AcknowledgedResponse response) { + // <1> + } + + @Override + public void onFailure(Exception e) { + // <2> + } + }; + // end::start-data-frame-analytics-execute-listener + + // Replace the empty listener by a blocking listener in test + CountDownLatch latch = new CountDownLatch(1); + listener = new LatchedActionListener<>(listener, latch); + + // tag::start-data-frame-analytics-execute-async + client.machineLearning().startDataFrameAnalyticsAsync(request, RequestOptions.DEFAULT, listener); // <1> + // end::start-data-frame-analytics-execute-async + + assertTrue(latch.await(30L, TimeUnit.SECONDS)); + } + assertBusy( + () -> assertThat(getAnalyticsState(DF_ANALYTICS_CONFIG.getId()), equalTo(DataFrameAnalyticsState.STOPPED)), + 30, TimeUnit.SECONDS); + } + + public void testStopDataFrameAnalytics() throws Exception { + createIndex(DF_ANALYTICS_CONFIG.getSource().getIndex()); + highLevelClient().index( + new IndexRequest(DF_ANALYTICS_CONFIG.getSource().getIndex()).source(XContentType.JSON, "total", 10000), RequestOptions.DEFAULT); + RestHighLevelClient client = highLevelClient(); + client.machineLearning().putDataFrameAnalytics(new PutDataFrameAnalyticsRequest(DF_ANALYTICS_CONFIG), RequestOptions.DEFAULT); + { + // tag::stop-data-frame-analytics-request + StopDataFrameAnalyticsRequest request = new StopDataFrameAnalyticsRequest("my-analytics-config"); // <1> + // end::stop-data-frame-analytics-request + + // tag::stop-data-frame-analytics-execute + StopDataFrameAnalyticsResponse response = client.machineLearning().stopDataFrameAnalytics(request, RequestOptions.DEFAULT); + // end::stop-data-frame-analytics-execute + + // tag::stop-data-frame-analytics-response + boolean acknowledged = response.isStopped(); + // end::stop-data-frame-analytics-response + + assertThat(acknowledged, is(true)); + } + assertBusy( + () -> assertThat(getAnalyticsState(DF_ANALYTICS_CONFIG.getId()), equalTo(DataFrameAnalyticsState.STOPPED)), + 30, TimeUnit.SECONDS); + { + StopDataFrameAnalyticsRequest request = new StopDataFrameAnalyticsRequest("my-analytics-config"); + + // tag::stop-data-frame-analytics-execute-listener + ActionListener listener = new ActionListener<>() { + @Override + public void onResponse(StopDataFrameAnalyticsResponse response) { + // <1> + } + + @Override + public void onFailure(Exception e) { + // <2> + } + }; + // end::stop-data-frame-analytics-execute-listener + + // Replace the empty listener by a blocking listener in test + CountDownLatch latch = new CountDownLatch(1); + listener = new LatchedActionListener<>(listener, latch); + + // tag::stop-data-frame-analytics-execute-async + client.machineLearning().stopDataFrameAnalyticsAsync(request, RequestOptions.DEFAULT, listener); // <1> + // end::stop-data-frame-analytics-execute-async + + assertTrue(latch.await(30L, TimeUnit.SECONDS)); + } + assertBusy( + () -> assertThat(getAnalyticsState(DF_ANALYTICS_CONFIG.getId()), equalTo(DataFrameAnalyticsState.STOPPED)), + 30, TimeUnit.SECONDS); + } + + public void testEvaluateDataFrame() throws Exception { + String indexName = "evaluate-test-index"; + CreateIndexRequest createIndexRequest = + new CreateIndexRequest(indexName) + .mapping(XContentFactory.jsonBuilder().startObject() + .startObject("properties") + .startObject("label") + .field("type", "keyword") + .endObject() + .startObject("p") + .field("type", "double") + .endObject() + .endObject() + .endObject()); + BulkRequest bulkRequest = + new BulkRequest(indexName) + .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) + .add(new IndexRequest().source(XContentType.JSON, "label", false, "p", 0.1)) // #0 + .add(new IndexRequest().source(XContentType.JSON, "label", false, "p", 0.2)) // #1 + .add(new IndexRequest().source(XContentType.JSON, "label", false, "p", 0.3)) // #2 + .add(new IndexRequest().source(XContentType.JSON, "label", false, "p", 0.4)) // #3 + .add(new IndexRequest().source(XContentType.JSON, "label", false, "p", 0.7)) // #4 + .add(new IndexRequest().source(XContentType.JSON, "label", true, "p", 0.2)) // #5 + .add(new IndexRequest().source(XContentType.JSON, "label", true, "p", 0.3)) // #6 + .add(new IndexRequest().source(XContentType.JSON, "label", true, "p", 0.4)) // #7 + .add(new IndexRequest().source(XContentType.JSON, "label", true, "p", 0.8)) // #8 + .add(new IndexRequest().source(XContentType.JSON, "label", true, "p", 0.9)); // #9 + RestHighLevelClient client = highLevelClient(); + client.indices().create(createIndexRequest, RequestOptions.DEFAULT); + client.bulk(bulkRequest, RequestOptions.DEFAULT); + { + // tag::evaluate-data-frame-request + EvaluateDataFrameRequest request = new EvaluateDataFrameRequest( // <1> + indexName, // <2> + new BinarySoftClassification( // <3> + "label", // <4> + "p", // <5> + // Evaluation metrics // <6> + PrecisionMetric.at(0.4, 0.5, 0.6), // <7> + RecallMetric.at(0.5, 0.7), // <8> + ConfusionMatrixMetric.at(0.5), // <9> + AucRocMetric.withCurve())); // <10> + // end::evaluate-data-frame-request + + // tag::evaluate-data-frame-execute + EvaluateDataFrameResponse response = client.machineLearning().evaluateDataFrame(request, RequestOptions.DEFAULT); + // end::evaluate-data-frame-execute + + // tag::evaluate-data-frame-response + List metrics = response.getMetrics(); // <1> + + PrecisionMetric.Result precisionResult = response.getMetricByName(PrecisionMetric.NAME); // <2> + double precision = precisionResult.getScoreByThreshold("0.4"); // <3> + + ConfusionMatrixMetric.Result confusionMatrixResult = response.getMetricByName(ConfusionMatrixMetric.NAME); // <4> + ConfusionMatrix confusionMatrix = confusionMatrixResult.getScoreByThreshold("0.5"); // <5> + // end::evaluate-data-frame-response + + assertThat( + metrics.stream().map(m -> m.getMetricName()).collect(Collectors.toList()), + containsInAnyOrder(PrecisionMetric.NAME, RecallMetric.NAME, ConfusionMatrixMetric.NAME, AucRocMetric.NAME)); + assertThat(precision, closeTo(0.6, 1e-9)); + assertThat(confusionMatrix.getTruePositives(), CoreMatchers.equalTo(2L)); // docs #8 and #9 + assertThat(confusionMatrix.getFalsePositives(), CoreMatchers.equalTo(1L)); // doc #4 + assertThat(confusionMatrix.getTrueNegatives(), CoreMatchers.equalTo(4L)); // docs #0, #1, #2 and #3 + assertThat(confusionMatrix.getFalseNegatives(), CoreMatchers.equalTo(3L)); // docs #5, #6 and #7 + } + { + EvaluateDataFrameRequest request = new EvaluateDataFrameRequest( + indexName, + new BinarySoftClassification( + "label", + "p", + PrecisionMetric.at(0.4, 0.5, 0.6), + RecallMetric.at(0.5, 0.7), + ConfusionMatrixMetric.at(0.5), + AucRocMetric.withCurve())); + + // tag::evaluate-data-frame-execute-listener + ActionListener listener = new ActionListener<>() { + @Override + public void onResponse(EvaluateDataFrameResponse response) { + // <1> + } + + @Override + public void onFailure(Exception e) { + // <2> + } + }; + // end::evaluate-data-frame-execute-listener + + // Replace the empty listener by a blocking listener in test + CountDownLatch latch = new CountDownLatch(1); + listener = new LatchedActionListener<>(listener, latch); + + // tag::evaluate-data-frame-execute-async + client.machineLearning().evaluateDataFrameAsync(request, RequestOptions.DEFAULT, listener); // <1> + // end::evaluate-data-frame-execute-async + + assertTrue(latch.await(30L, TimeUnit.SECONDS)); + } + } public void testCreateFilter() throws Exception { RestHighLevelClient client = highLevelClient(); @@ -3140,4 +3588,39 @@ private String createFilter(RestHighLevelClient client) throws IOException { assertThat(createdFilter.getId(), equalTo("my_safe_domains")); return createdFilter.getId(); } + + private void createIndex(String indexName) throws IOException { + CreateIndexRequest createIndexRequest = new CreateIndexRequest(indexName); + createIndexRequest.mapping(XContentFactory.jsonBuilder().startObject() + .startObject("properties") + .startObject("timestamp") + .field("type", "date") + .endObject() + .startObject("total") + .field("type", "long") + .endObject() + .endObject() + .endObject()); + highLevelClient().indices().create(createIndexRequest, RequestOptions.DEFAULT); + } + + private DataFrameAnalyticsState getAnalyticsState(String configId) throws IOException { + GetDataFrameAnalyticsStatsResponse statsResponse = + highLevelClient().machineLearning().getDataFrameAnalyticsStats( + new GetDataFrameAnalyticsStatsRequest(configId), RequestOptions.DEFAULT); + assertThat(statsResponse.getAnalyticsStats(), hasSize(1)); + DataFrameAnalyticsStats stats = statsResponse.getAnalyticsStats().get(0); + return stats.getState(); + } + + private static final DataFrameAnalyticsConfig DF_ANALYTICS_CONFIG = + DataFrameAnalyticsConfig.builder("my-analytics-config") + .setSource(DataFrameAnalyticsSource.builder() + .setIndex("put-test-source-index") + .build()) + .setDest(DataFrameAnalyticsDest.builder() + .setIndex("put-test-dest-index") + .build()) + .setAnalysis(OutlierDetection.createDefault()) + .build(); } diff --git a/docs/java-rest/high-level/ml/delete-data-frame-analytics.asciidoc b/docs/java-rest/high-level/ml/delete-data-frame-analytics.asciidoc new file mode 100644 index 0000000000000..2e5ade37107cf --- /dev/null +++ b/docs/java-rest/high-level/ml/delete-data-frame-analytics.asciidoc @@ -0,0 +1,28 @@ +-- +:api: delete-data-frame-analytics +:request: DeleteDataFrameAnalyticsRequest +:response: AcknowledgedResponse +-- +[id="{upid}-{api}"] +=== Delete Data Frame Analytics API + +The Delete Data Frame Analytics API is used to delete an existing {dataframe-analytics-config}. +The API accepts a +{request}+ object as a request and returns a +{response}+. + +[id="{upid}-{api}-request"] +==== Delete Data Frame Analytics Request + +A +{request}+ object requires a {dataframe-analytics-config} id. + +["source","java",subs="attributes,callouts,macros"] +--------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-request] +--------------------------------------------------- +<1> Constructing a new request referencing an existing {dataframe-analytics-config} + +include::../execution.asciidoc[] + +[id="{upid}-{api}-response"] +==== Response + +The returned +{response}+ object acknowledges the {dataframe-analytics-config} deletion. diff --git a/docs/java-rest/high-level/ml/evaluate-data-frame.asciidoc b/docs/java-rest/high-level/ml/evaluate-data-frame.asciidoc new file mode 100644 index 0000000000000..660603d2e38e7 --- /dev/null +++ b/docs/java-rest/high-level/ml/evaluate-data-frame.asciidoc @@ -0,0 +1,45 @@ +-- +:api: evaluate-data-frame +:request: EvaluateDataFrameRequest +:response: EvaluateDataFrameResponse +-- +[id="{upid}-{api}"] +=== Evaluate Data Frame API + +The Evaluate Data Frame API is used to evaluate an ML algorithm that ran on a {dataframe}. +The API accepts an +{request}+ object and returns an +{response}+. + +[id="{upid}-{api}-request"] +==== Evaluate Data Frame Request + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-request] +-------------------------------------------------- +<1> Constructing a new evaluation request +<2> Reference to an existing index +<3> Kind of evaluation to perform +<4> Name of the field in the index. Its value denotes the actual (i.e. ground truth) label for an example. Must be either true or false +<5> Name of the field in the index. Its value denotes the probability (as per some ML algorithm) of the example being classified as positive +<6> The remaining parameters are the metrics to be calculated based on the two fields described above. +<7> https://en.wikipedia.org/wiki/Precision_and_recall[Precision] calculated at thresholds: 0.4, 0.5 and 0.6 +<8> https://en.wikipedia.org/wiki/Precision_and_recall[Recall] calculated at thresholds: 0.5 and 0.7 +<9> https://en.wikipedia.org/wiki/Confusion_matrix[Confusion matrix] calculated at threshold 0.5 +<10> https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve[AuC ROC] calculated and the curve points returned + +include::../execution.asciidoc[] + +[id="{upid}-{api}-response"] +==== Response + +The returned +{response}+ contains the requested evaluation metrics. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-response] +-------------------------------------------------- +<1> Fetching all the calculated metrics results +<2> Fetching precision metric by name +<3> Fetching precision at a given (0.4) threshold +<4> Fetching confusion matrix metric by name +<5> Fetching confusion matrix at a given (0.5) threshold \ No newline at end of file diff --git a/docs/java-rest/high-level/ml/get-data-frame-analytics-stats.asciidoc b/docs/java-rest/high-level/ml/get-data-frame-analytics-stats.asciidoc new file mode 100644 index 0000000000000..e1047e9b3e002 --- /dev/null +++ b/docs/java-rest/high-level/ml/get-data-frame-analytics-stats.asciidoc @@ -0,0 +1,34 @@ +-- +:api: get-data-frame-analytics-stats +:request: GetDataFrameAnalyticsStatsRequest +:response: GetDataFrameAnalyticsStatsResponse +-- +[id="{upid}-{api}"] +=== Get Data Frame Analytics Stats API + +The Get Data Frame Analytics Stats API is used to read the operational statistics of one or more {dataframe-analytics-config}s. +The API accepts a +{request}+ object and returns a +{response}+. + +[id="{upid}-{api}-request"] +==== Get Data Frame Analytics Stats Request + +A +{request}+ requires either a {dataframe-analytics-config} id, a comma separated list of ids or +the special wildcard `_all` to get the statistics for all {dataframe-analytics-config}s + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-request] +-------------------------------------------------- +<1> Constructing a new GET Stats request referencing an existing {dataframe-analytics-config} + +include::../execution.asciidoc[] + +[id="{upid}-{api}-response"] +==== Response + +The returned +{response}+ contains the requested {dataframe-analytics-config} statistics. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-response] +-------------------------------------------------- \ No newline at end of file diff --git a/docs/java-rest/high-level/ml/get-data-frame-analytics.asciidoc b/docs/java-rest/high-level/ml/get-data-frame-analytics.asciidoc new file mode 100644 index 0000000000000..c6d368efbcae9 --- /dev/null +++ b/docs/java-rest/high-level/ml/get-data-frame-analytics.asciidoc @@ -0,0 +1,34 @@ +-- +:api: get-data-frame-analytics +:request: GetDataFrameAnalyticsRequest +:response: GetDataFrameAnalyticsResponse +-- +[id="{upid}-{api}"] +=== Get Data Frame Analytics API + +The Get Data Frame Analytics API is used to get one or more {dataframe-analytics-config}s. +The API accepts a +{request}+ object and returns a +{response}+. + +[id="{upid}-{api}-request"] +==== Get Data Frame Analytics Request + +A +{request}+ requires either a {dataframe-analytics-config} id, a comma separated list of ids or +the special wildcard `_all` to get all {dataframe-analytics-config}s. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-request] +-------------------------------------------------- +<1> Constructing a new GET request referencing an existing {dataframe-analytics-config} + +include::../execution.asciidoc[] + +[id="{upid}-{api}-response"] +==== Response + +The returned +{response}+ contains the requested {dataframe-analytics-config}s. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-response] +-------------------------------------------------- diff --git a/docs/java-rest/high-level/ml/put-data-frame-analytics.asciidoc b/docs/java-rest/high-level/ml/put-data-frame-analytics.asciidoc new file mode 100644 index 0000000000000..05fbd5bc3922a --- /dev/null +++ b/docs/java-rest/high-level/ml/put-data-frame-analytics.asciidoc @@ -0,0 +1,115 @@ +-- +:api: put-data-frame-analytics +:request: PutDataFrameAnalyticsRequest +:response: PutDataFrameAnalyticsResponse +-- +[id="{upid}-{api}"] +=== Put Data Frame Analytics API + +The Put Data Frame Analytics API is used to create a new {dataframe-analytics-config}. +The API accepts a +{request}+ object as a request and returns a +{response}+. + +[id="{upid}-{api}-request"] +==== Put Data Frame Analytics Request + +A +{request}+ requires the following argument: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-request] +-------------------------------------------------- +<1> The configuration of the {dataframe-job} to create + +[id="{upid}-{api}-config"] +==== Data Frame Analytics Configuration + +The `DataFrameAnalyticsConfig` object contains all the details about the {dataframe-job} +configuration and contains the following arguments: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-config] +-------------------------------------------------- +<1> The {dataframe-analytics-config} id +<2> The source index and query from which to gather data +<3> The destination index +<4> The analysis to be performed +<5> The fields to be included in / excluded from the analysis +<6> The memory limit for the model created as part of the analysis process + +[id="{upid}-{api}-query-config"] + +==== SourceConfig + +The index and the query from which to collect data. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-source-config] +-------------------------------------------------- +<1> Constructing a new DataFrameAnalyticsSource +<2> The source index +<3> The query from which to gather the data. If query is not set, a `match_all` query is used by default. + +===== QueryConfig + +The query with which to select data from the source. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-query-config] +-------------------------------------------------- + +==== DestinationConfig + +The index to which data should be written by the {dataframe-job}. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-dest-config] +-------------------------------------------------- +<1> Constructing a new DataFrameAnalyticsDest +<2> The destination index + +==== Analysis + +The analysis to be performed. +Currently, only one analysis is supported: +OutlierDetection+. + ++OutlierDetection+ analysis can be created in one of two ways: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-analysis-default] +-------------------------------------------------- +<1> Constructing a new OutlierDetection object with default strategy to determine outliers + +or +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-analysis-customized] +-------------------------------------------------- +<1> Constructing a new OutlierDetection object +<2> The method used to perform the analysis +<3> Number of neighbors taken into account during analysis + +==== Analyzed fields + +FetchContext object containing fields to be included in / excluded from the analysis + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-analyzed-fields] +-------------------------------------------------- + +include::../execution.asciidoc[] + +[id="{upid}-{api}-response"] +==== Response + +The returned +{response}+ contains the newly created {dataframe-analytics-config}. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-response] +-------------------------------------------------- \ No newline at end of file diff --git a/docs/java-rest/high-level/ml/start-data-frame-analytics.asciidoc b/docs/java-rest/high-level/ml/start-data-frame-analytics.asciidoc new file mode 100644 index 0000000000000..610607daba1f8 --- /dev/null +++ b/docs/java-rest/high-level/ml/start-data-frame-analytics.asciidoc @@ -0,0 +1,28 @@ +-- +:api: start-data-frame-analytics +:request: StartDataFrameAnalyticsRequest +:response: AcknowledgedResponse +-- +[id="{upid}-{api}"] +=== Start Data Frame Analytics API + +The Start Data Frame Analytics API is used to start an existing {dataframe-analytics-config}. +It accepts a +{request}+ object and responds with a +{response}+ object. + +[id="{upid}-{api}-request"] +==== Start Data Frame Analytics Request + +A +{request}+ object requires a {dataframe-analytics-config} id. + +["source","java",subs="attributes,callouts,macros"] +--------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-request] +--------------------------------------------------- +<1> Constructing a new start request referencing an existing {dataframe-analytics-config} + +include::../execution.asciidoc[] + +[id="{upid}-{api}-response"] +==== Response + +The returned +{response}+ object acknowledges the {dataframe-job} has started. \ No newline at end of file diff --git a/docs/java-rest/high-level/ml/stop-data-frame-analytics.asciidoc b/docs/java-rest/high-level/ml/stop-data-frame-analytics.asciidoc new file mode 100644 index 0000000000000..243c075e18b03 --- /dev/null +++ b/docs/java-rest/high-level/ml/stop-data-frame-analytics.asciidoc @@ -0,0 +1,28 @@ +-- +:api: stop-data-frame-analytics +:request: StopDataFrameAnalyticsRequest +:response: StopDataFrameAnalyticsResponse +-- +[id="{upid}-{api}"] +=== Stop Data Frame Analytics API + +The Stop Data Frame Analytics API is used to stop a running {dataframe-analytics-config}. +It accepts a +{request}+ object and responds with a +{response}+ object. + +[id="{upid}-{api}-request"] +==== Stop Data Frame Analytics Request + +A +{request}+ object requires a {dataframe-analytics-config} id. + +["source","java",subs="attributes,callouts,macros"] +--------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-request] +--------------------------------------------------- +<1> Constructing a new stop request referencing an existing {dataframe-analytics-config} + +include::../execution.asciidoc[] + +[id="{upid}-{api}-response"] +==== Response + +The returned +{response}+ object acknowledges the {dataframe-job} has stopped. \ No newline at end of file diff --git a/docs/java-rest/high-level/supported-apis.asciidoc b/docs/java-rest/high-level/supported-apis.asciidoc index 4e28efc2941db..21ebdfab65155 100644 --- a/docs/java-rest/high-level/supported-apis.asciidoc +++ b/docs/java-rest/high-level/supported-apis.asciidoc @@ -285,6 +285,13 @@ The Java High Level REST Client supports the following Machine Learning APIs: * <<{upid}-put-calendar-job>> * <<{upid}-delete-calendar-job>> * <<{upid}-delete-calendar>> +* <<{upid}-get-data-frame-analytics>> +* <<{upid}-get-data-frame-analytics-stats>> +* <<{upid}-put-data-frame-analytics>> +* <<{upid}-delete-data-frame-analytics>> +* <<{upid}-start-data-frame-analytics>> +* <<{upid}-stop-data-frame-analytics>> +* <<{upid}-evaluate-data-frame>> * <<{upid}-put-filter>> * <<{upid}-get-filters>> * <<{upid}-update-filter>> @@ -329,6 +336,13 @@ include::ml/delete-calendar-event.asciidoc[] include::ml/put-calendar-job.asciidoc[] include::ml/delete-calendar-job.asciidoc[] include::ml/delete-calendar.asciidoc[] +include::ml/get-data-frame-analytics.asciidoc[] +include::ml/get-data-frame-analytics-stats.asciidoc[] +include::ml/put-data-frame-analytics.asciidoc[] +include::ml/delete-data-frame-analytics.asciidoc[] +include::ml/start-data-frame-analytics.asciidoc[] +include::ml/stop-data-frame-analytics.asciidoc[] +include::ml/evaluate-data-frame.asciidoc[] include::ml/put-filter.asciidoc[] include::ml/get-filters.asciidoc[] include::ml/update-filter.asciidoc[]