Skip to content

Commit 166d9a9

Browse files
authored
[ML] Add lazy parsing for DatafeedConfig:Aggs,Query (#36117)
* Lazily parsing aggs and query in DatafeedConfigs * Adding parser tests * Fixing exception types && unneccessary checked ex * Adding semi aggregation parser * Adding tests, fixing up semi-parser * Reverting semi-parsing * Moving agg validations * Making bad configs throw badRequestException
1 parent 6e1ff31 commit 166d9a9

File tree

20 files changed

+295
-79
lines changed

20 files changed

+295
-79
lines changed

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/datafeed/DatafeedConfig.java

Lines changed: 159 additions & 31 deletions
Large diffs are not rendered by default.

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/datafeed/DatafeedUpdate.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -295,10 +295,11 @@ public DatafeedConfig apply(DatafeedConfig datafeedConfig, Map<String, String> h
295295
builder.setTypes(types);
296296
}
297297
if (query != null) {
298-
builder.setQuery(query);
298+
builder.setParsedQuery(query);
299299
}
300300
if (aggregations != null) {
301-
builder.setAggregations(aggregations);
301+
DatafeedConfig.validateAggregations(aggregations);
302+
builder.setParsedAggregations(aggregations);
302303
}
303304
if (scriptFields != null) {
304305
builder.setScriptFields(scriptFields);
@@ -371,9 +372,9 @@ boolean isNoop(DatafeedConfig datafeed) {
371372
&& (queryDelay == null || Objects.equals(queryDelay, datafeed.getQueryDelay()))
372373
&& (indices == null || Objects.equals(indices, datafeed.getIndices()))
373374
&& (types == null || Objects.equals(types, datafeed.getTypes()))
374-
&& (query == null || Objects.equals(query, datafeed.getQuery()))
375+
&& (query == null || Objects.equals(query, datafeed.getParsedQuery()))
375376
&& (scrollSize == null || Objects.equals(scrollSize, datafeed.getQueryDelay()))
376-
&& (aggregations == null || Objects.equals(aggregations, datafeed.getAggregations()))
377+
&& (aggregations == null || Objects.equals(aggregations, datafeed.getParsedAggregations()))
377378
&& (scriptFields == null || Objects.equals(scriptFields, datafeed.getScriptFields()))
378379
&& (delayedDataCheckConfig == null || Objects.equals(delayedDataCheckConfig, datafeed.getDelayedDataCheckConfig()))
379380
&& (chunkingConfig == null || Objects.equals(chunkingConfig, datafeed.getChunkingConfig()));

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ public final class Messages {
2626
"delayed_data_check_config: check_window [{0}] must be greater than the bucket_span [{1}]";
2727
public static final String DATAFEED_CONFIG_DELAYED_DATA_CHECK_SPANS_TOO_MANY_BUCKETS =
2828
"delayed_data_check_config: check_window [{0}] must be less than 10,000x the bucket_span [{1}]";
29+
public static final String DATAFEED_CONFIG_QUERY_BAD_FORMAT = "Datafeed [{0}] query is not parsable: {1}";
30+
public static final String DATAFEED_CONFIG_AGG_BAD_FORMAT = "Datafeed [{0}] aggregations are not parsable: {1}";
2931

3032
public static final String DATAFEED_DOES_NOT_SUPPORT_JOB_WITH_LATENCY = "A job configured with datafeed cannot support latency";
3133
public static final String DATAFEED_NOT_FOUND = "No datafeed with id [{0}] exists";

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/utils/XContentObjectTransformer.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ public static XContentObjectTransformer<QueryBuilder> queryBuilderTransformer()
6161
}
6262

6363
public T fromMap(Map<String, Object> stringObjectMap) throws IOException {
64+
if (stringObjectMap == null) {
65+
return null;
66+
}
6467
LoggingDeprecationAccumulationHandler deprecationLogger = new LoggingDeprecationAccumulationHandler();
6568
try(XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().map(stringObjectMap);
6669
XContentParser parser = XContentType.JSON
@@ -74,6 +77,9 @@ public T fromMap(Map<String, Object> stringObjectMap) throws IOException {
7477
}
7578

7679
public Map<String, Object> toMap(T object) throws IOException {
80+
if (object == null) {
81+
return null;
82+
}
7783
try(XContentBuilder xContentBuilder = XContentFactory.jsonBuilder()) {
7884
XContentBuilder content = object.toXContent(xContentBuilder, ToXContent.EMPTY_PARAMS);
7985
return XContentHelper.convertToMap(BytesReference.bytes(content), true, XContentType.JSON).v2();

x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/datafeed/DatafeedConfigTests.java

Lines changed: 92 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ public static DatafeedConfig createRandomizedDatafeedConfig(String jobId, long b
6767
builder.setIndices(randomStringList(1, 10));
6868
builder.setTypes(randomStringList(0, 10));
6969
if (randomBoolean()) {
70-
builder.setQuery(QueryBuilders.termQuery(randomAlphaOfLength(10), randomAlphaOfLength(10)));
70+
builder.setParsedQuery(QueryBuilders.termQuery(randomAlphaOfLength(10), randomAlphaOfLength(10)));
7171
}
7272
boolean addScriptFields = randomBoolean();
7373
if (addScriptFields) {
@@ -91,7 +91,7 @@ public static DatafeedConfig createRandomizedDatafeedConfig(String jobId, long b
9191
MaxAggregationBuilder maxTime = AggregationBuilders.max("time").field("time");
9292
aggs.addAggregator(AggregationBuilders.dateHistogram("buckets")
9393
.interval(aggHistogramInterval).subAggregation(maxTime).field("time"));
94-
builder.setAggregations(aggs);
94+
builder.setParsedAggregations(aggs);
9595
}
9696
if (randomBoolean()) {
9797
builder.setScrollSize(randomIntBetween(0, Integer.MAX_VALUE));
@@ -155,6 +155,43 @@ protected DatafeedConfig doParseInstance(XContentParser parser) {
155155
" \"scroll_size\": 1234\n" +
156156
"}";
157157

158+
private static final String ANACHRONISTIC_QUERY_DATAFEED = "{\n" +
159+
" \"datafeed_id\": \"farequote-datafeed\",\n" +
160+
" \"job_id\": \"farequote\",\n" +
161+
" \"frequency\": \"1h\",\n" +
162+
" \"indices\": [\"farequote1\", \"farequote2\"],\n" +
163+
//query:match:type stopped being supported in 6.x
164+
" \"query\": {\"match\" : {\"query\":\"fieldName\", \"type\": \"phrase\"}},\n" +
165+
" \"scroll_size\": 1234\n" +
166+
"}";
167+
168+
private static final String ANACHRONISTIC_AGG_DATAFEED = "{\n" +
169+
" \"datafeed_id\": \"farequote-datafeed\",\n" +
170+
" \"job_id\": \"farequote\",\n" +
171+
" \"frequency\": \"1h\",\n" +
172+
" \"indices\": [\"farequote1\", \"farequote2\"],\n" +
173+
" \"aggregations\": {\n" +
174+
" \"buckets\": {\n" +
175+
" \"date_histogram\": {\n" +
176+
" \"field\": \"time\",\n" +
177+
" \"interval\": \"360s\",\n" +
178+
" \"time_zone\": \"UTC\"\n" +
179+
" },\n" +
180+
" \"aggregations\": {\n" +
181+
" \"time\": {\n" +
182+
" \"max\": {\"field\": \"time\"}\n" +
183+
" },\n" +
184+
" \"airline\": {\n" +
185+
" \"terms\": {\n" +
186+
" \"field\": \"airline\",\n" +
187+
" \"size\": 0\n" + //size: 0 stopped being supported in 6.x
188+
" }\n" +
189+
" }\n" +
190+
" }\n" +
191+
" }\n" +
192+
" }\n" +
193+
"}";
194+
158195
public void testFutureConfigParse() throws IOException {
159196
XContentParser parser = XContentFactory.xContent(XContentType.JSON)
160197
.createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, FUTURE_DATAFEED);
@@ -163,6 +200,44 @@ public void testFutureConfigParse() throws IOException {
163200
assertEquals("[6:5] [datafeed_config] unknown field [tomorrows_technology_today], parser not found", e.getMessage());
164201
}
165202

203+
public void testPastQueryConfigParse() throws IOException {
204+
try(XContentParser parser = XContentFactory.xContent(XContentType.JSON)
205+
.createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, ANACHRONISTIC_QUERY_DATAFEED)) {
206+
207+
DatafeedConfig config = DatafeedConfig.LENIENT_PARSER.apply(parser, null).build();
208+
ElasticsearchException e = expectThrows(ElasticsearchException.class, () -> config.getParsedQuery());
209+
assertEquals("[match] query doesn't support multiple fields, found [query] and [type]", e.getMessage());
210+
}
211+
212+
try(XContentParser parser = XContentFactory.xContent(XContentType.JSON)
213+
.createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, ANACHRONISTIC_QUERY_DATAFEED)) {
214+
215+
XContentParseException e = expectThrows(XContentParseException.class,
216+
() -> DatafeedConfig.STRICT_PARSER.apply(parser, null).build());
217+
assertEquals("[6:25] [datafeed_config] failed to parse field [query]", e.getMessage());
218+
}
219+
}
220+
221+
public void testPastAggConfigParse() throws IOException {
222+
try(XContentParser parser = XContentFactory.xContent(XContentType.JSON)
223+
.createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, ANACHRONISTIC_AGG_DATAFEED)) {
224+
225+
DatafeedConfig.Builder configBuilder = DatafeedConfig.LENIENT_PARSER.apply(parser, null);
226+
ElasticsearchException e = expectThrows(ElasticsearchException.class, () -> configBuilder.build());
227+
assertEquals(
228+
"Datafeed [farequote-datafeed] aggregations are not parsable: [size] must be greater than 0. Found [0] in [airline]",
229+
e.getMessage());
230+
}
231+
232+
try(XContentParser parser = XContentFactory.xContent(XContentType.JSON)
233+
.createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, ANACHRONISTIC_AGG_DATAFEED)) {
234+
235+
XContentParseException e = expectThrows(XContentParseException.class,
236+
() -> DatafeedConfig.STRICT_PARSER.apply(parser, null).build());
237+
assertEquals("[8:25] [datafeed_config] failed to parse field [aggregations]", e.getMessage());
238+
}
239+
}
240+
166241
public void testFutureMetadataParse() throws IOException {
167242
XContentParser parser = XContentFactory.xContent(XContentType.JSON)
168243
.createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, FUTURE_DATAFEED);
@@ -274,7 +349,7 @@ public void testBuild_GivenScriptFieldsAndAggregations() {
274349
datafeed.setTypes(Collections.singletonList("my_type"));
275350
datafeed.setScriptFields(Collections.singletonList(new SearchSourceBuilder.ScriptField(randomAlphaOfLength(10),
276351
mockScript(randomAlphaOfLength(10)), randomBoolean())));
277-
datafeed.setAggregations(new AggregatorFactories.Builder().addAggregator(AggregationBuilders.avg("foo")));
352+
datafeed.setParsedAggregations(new AggregatorFactories.Builder().addAggregator(AggregationBuilders.avg("foo")));
278353

279354
ElasticsearchException e = expectThrows(ElasticsearchException.class, datafeed::build);
280355

@@ -295,7 +370,7 @@ public void testHasAggregations_NonEmpty() {
295370
builder.setIndices(Collections.singletonList("myIndex"));
296371
builder.setTypes(Collections.singletonList("myType"));
297372
MaxAggregationBuilder maxTime = AggregationBuilders.max("time").field("time");
298-
builder.setAggregations(new AggregatorFactories.Builder().addAggregator(
373+
builder.setParsedAggregations(new AggregatorFactories.Builder().addAggregator(
299374
AggregationBuilders.dateHistogram("time").interval(300000).subAggregation(maxTime).field("time")));
300375
DatafeedConfig datafeedConfig = builder.build();
301376

@@ -306,7 +381,7 @@ public void testBuild_GivenEmptyAggregations() {
306381
DatafeedConfig.Builder builder = new DatafeedConfig.Builder("datafeed1", "job1");
307382
builder.setIndices(Collections.singletonList("myIndex"));
308383
builder.setTypes(Collections.singletonList("myType"));
309-
builder.setAggregations(new AggregatorFactories.Builder());
384+
builder.setParsedAggregations(new AggregatorFactories.Builder());
310385

311386
ElasticsearchException e = expectThrows(ElasticsearchException.class, builder::build);
312387

@@ -318,13 +393,13 @@ public void testBuild_GivenHistogramWithDefaultInterval() {
318393
builder.setIndices(Collections.singletonList("myIndex"));
319394
builder.setTypes(Collections.singletonList("myType"));
320395
MaxAggregationBuilder maxTime = AggregationBuilders.max("time").field("time");
321-
builder.setAggregations(new AggregatorFactories.Builder().addAggregator(
396+
builder.setParsedAggregations(new AggregatorFactories.Builder().addAggregator(
322397
AggregationBuilders.histogram("time").subAggregation(maxTime).field("time"))
323398
);
324399

325400
ElasticsearchException e = expectThrows(ElasticsearchException.class, builder::build);
326401

327-
assertThat(e.getMessage(), equalTo("Aggregation interval must be greater than 0"));
402+
assertThat(e.getMessage(), containsString("[interval] must be >0 for histogram aggregation [time]"));
328403
}
329404

330405
public void testBuild_GivenDateHistogramWithInvalidTimeZone() {
@@ -341,7 +416,7 @@ public void testBuild_GivenDateHistogramWithDefaultInterval() {
341416
ElasticsearchException e = expectThrows(ElasticsearchException.class,
342417
() -> createDatafeedWithDateHistogram((String) null));
343418

344-
assertThat(e.getMessage(), equalTo("Aggregation interval must be greater than 0"));
419+
assertThat(e.getMessage(), containsString("Aggregation interval must be greater than 0"));
345420
}
346421

347422
public void testBuild_GivenValidDateHistogram() {
@@ -402,9 +477,8 @@ public void testValidateAggregations_GivenMulitpleHistogramAggs() {
402477
TermsAggregationBuilder toplevelTerms = AggregationBuilders.terms("top_level");
403478
toplevelTerms.subAggregation(dateHistogram);
404479

405-
DatafeedConfig.Builder builder = new DatafeedConfig.Builder("foo", "bar");
406-
builder.setAggregations(new AggregatorFactories.Builder().addAggregator(toplevelTerms));
407-
ElasticsearchException e = expectThrows(ElasticsearchException.class, builder::validateAggregations);
480+
ElasticsearchException e = expectThrows(ElasticsearchException.class,
481+
() -> DatafeedConfig.validateAggregations(new AggregatorFactories.Builder().addAggregator(toplevelTerms)));
408482

409483
assertEquals("Aggregations can only have 1 date_histogram or histogram aggregation", e.getMessage());
410484
}
@@ -520,7 +594,9 @@ private static DatafeedConfig createDatafeedWithDateHistogram(DateHistogramAggre
520594
DatafeedConfig.Builder builder = new DatafeedConfig.Builder("datafeed1", "job1");
521595
builder.setIndices(Collections.singletonList("myIndex"));
522596
builder.setTypes(Collections.singletonList("myType"));
523-
builder.setAggregations(new AggregatorFactories.Builder().addAggregator(dateHistogram));
597+
AggregatorFactories.Builder aggs = new AggregatorFactories.Builder().addAggregator(dateHistogram);
598+
DatafeedConfig.validateAggregations(aggs);
599+
builder.setParsedAggregations(aggs);
524600
return builder.build();
525601
}
526602

@@ -556,11 +632,11 @@ protected DatafeedConfig mutateInstance(DatafeedConfig instance) throws IOExcept
556632
break;
557633
case 6:
558634
BoolQueryBuilder query = new BoolQueryBuilder();
559-
if (instance.getQuery() != null) {
560-
query.must(instance.getQuery());
635+
if (instance.getParsedQuery() != null) {
636+
query.must(instance.getParsedQuery());
561637
}
562638
query.filter(new TermQueryBuilder(randomAlphaOfLengthBetween(1, 10), randomAlphaOfLengthBetween(1, 10)));
563-
builder.setQuery(query);
639+
builder.setParsedQuery(query);
564640
break;
565641
case 7:
566642
if (instance.hasAggregations()) {
@@ -571,7 +647,7 @@ protected DatafeedConfig mutateInstance(DatafeedConfig instance) throws IOExcept
571647
aggBuilder
572648
.addAggregator(new DateHistogramAggregationBuilder(timeField).field(timeField).interval(between(10000, 3600000))
573649
.subAggregation(new MaxAggregationBuilder(timeField).field(timeField)));
574-
builder.setAggregations(aggBuilder);
650+
builder.setParsedAggregations(aggBuilder);
575651
if (instance.getScriptFields().isEmpty() == false) {
576652
builder.setScriptFields(Collections.emptyList());
577653
}

x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/datafeed/DatafeedUpdateTests.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ public void testApply_givenFullUpdateNoAggregations() {
167167
assertThat(updatedDatafeed.getTypes(), equalTo(Collections.singletonList("t_2")));
168168
assertThat(updatedDatafeed.getQueryDelay(), equalTo(TimeValue.timeValueSeconds(42)));
169169
assertThat(updatedDatafeed.getFrequency(), equalTo(TimeValue.timeValueSeconds(142)));
170-
assertThat(updatedDatafeed.getQuery(), equalTo(QueryBuilders.termQuery("a", "b")));
170+
assertThat(updatedDatafeed.getParsedQuery(), equalTo(QueryBuilders.termQuery("a", "b")));
171171
assertThat(updatedDatafeed.hasAggregations(), is(false));
172172
assertThat(updatedDatafeed.getScriptFields(),
173173
equalTo(Collections.singletonList(new SearchSourceBuilder.ScriptField("a", mockScript("b"), false))));
@@ -192,7 +192,7 @@ public void testApply_givenAggregations() {
192192

193193
assertThat(updatedDatafeed.getIndices(), equalTo(Collections.singletonList("i_1")));
194194
assertThat(updatedDatafeed.getTypes(), equalTo(Collections.singletonList("t_1")));
195-
assertThat(updatedDatafeed.getAggregations(),
195+
assertThat(updatedDatafeed.getParsedAggregations(),
196196
equalTo(new AggregatorFactories.Builder().addAggregator(
197197
AggregationBuilders.histogram("a").interval(300000).field("time").subAggregation(maxTime))));
198198
}

x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/DelayedDataDetectorIT.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,13 +153,13 @@ public void testMissingDataDetectionWithAggregationsAndQuery() throws Exception
153153
DatafeedConfig.Builder datafeedConfigBuilder = createDatafeedBuilder(job.getId() + "-datafeed",
154154
job.getId(),
155155
Collections.singletonList(index));
156-
datafeedConfigBuilder.setAggregations(new AggregatorFactories.Builder().addAggregator(
156+
datafeedConfigBuilder.setParsedAggregations(new AggregatorFactories.Builder().addAggregator(
157157
AggregationBuilders.histogram("time")
158158
.subAggregation(maxTime)
159159
.subAggregation(avgAggregationBuilder)
160160
.field("time")
161161
.interval(TimeValue.timeValueMinutes(5).millis())));
162-
datafeedConfigBuilder.setQuery(new RangeQueryBuilder("value").gte(numDocs/2));
162+
datafeedConfigBuilder.setParsedQuery(new RangeQueryBuilder("value").gte(numDocs/2));
163163
datafeedConfigBuilder.setFrequency(TimeValue.timeValueMinutes(5));
164164
datafeedConfigBuilder.setDelayedDataCheckConfig(DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(12)));
165165

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportPutDatafeedAction.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.elasticsearch.xpack.core.XPackSettings;
3434
import org.elasticsearch.xpack.core.ml.MlMetadata;
3535
import org.elasticsearch.xpack.core.ml.action.PutDatafeedAction;
36+
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
3637
import org.elasticsearch.xpack.core.rollup.action.GetRollupIndexCapsAction;
3738
import org.elasticsearch.xpack.core.rollup.action.RollupSearchAction;
3839
import org.elasticsearch.xpack.core.security.SecurityContext;
@@ -154,6 +155,7 @@ private void handlePrivsResponse(String username, PutDatafeedAction.Request requ
154155
private void putDatafeed(PutDatafeedAction.Request request, Map<String, String> headers,
155156
ActionListener<PutDatafeedAction.Response> listener) {
156157

158+
DatafeedConfig.validateAggregations(request.getDatafeed().getParsedAggregations());
157159
clusterService.submitStateUpdateTask(
158160
"put-datafeed-" + request.getDatafeed().getId(),
159161
new AckedClusterStateUpdateTask<PutDatafeedAction.Response>(request, listener) {

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDatafeedAction.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ static void validate(String datafeedId, MlMetadata mlMetadata, PersistentTasksCu
9090
throw ExceptionsHelper.missingJobException(datafeed.getJobId());
9191
}
9292
DatafeedJobValidator.validate(datafeed, job);
93+
DatafeedConfig.validateAggregations(datafeed.getParsedAggregations());
9394
JobState jobState = MlTasks.getJobState(datafeed.getJobId(), tasks);
9495
if (jobState.isAnyOf(JobState.OPENING, JobState.OPENED) == false) {
9596
throw ExceptionsHelper.conflictStatusException("cannot start datafeed [" + datafeedId + "] because job [" + job.getId() +

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/delayeddatacheck/DelayedDataDetectorFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ public static DelayedDataDetector buildDetector(Job job, DatafeedConfig datafeed
4444
window,
4545
job.getId(),
4646
job.getDataDescription().getTimeField(),
47-
datafeedConfig.getQuery(),
47+
datafeedConfig.getParsedQuery(),
4848
datafeedConfig.getIndices().toArray(new String[0]),
4949
client);
5050
} else {

0 commit comments

Comments
 (0)