Skip to content

Commit ad8cc92

Browse files
author
Hendrik Muhs
authored
[ML-Dataframe] Feature/fib multi aggs and sources (#34525)
implement support for multiple sources and aggregations
1 parent 97782f5 commit ad8cc92

File tree

3 files changed

+374
-15
lines changed

3 files changed

+374
-15
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
package org.elasticsearch.xpack.ml.featureindexbuilder.job;
8+
9+
import org.apache.log4j.Logger;
10+
import org.elasticsearch.search.aggregations.Aggregation;
11+
import org.elasticsearch.search.aggregations.AggregationBuilder;
12+
import org.elasticsearch.search.aggregations.bucket.composite.CompositeAggregation;
13+
import org.elasticsearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder;
14+
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregation;
15+
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregation.SingleValue;
16+
17+
import java.util.Collection;
18+
import java.util.HashMap;
19+
import java.util.List;
20+
import java.util.Map;
21+
import java.util.stream.Stream;
22+
23+
final class AggregationResultUtils {
24+
private static final Logger logger = Logger.getLogger(AggregationResultUtils.class.getName());
25+
26+
/**
27+
* Extracts aggregation results from a composite aggregation and puts it into a map.
28+
*
29+
* @param agg The aggregation result
30+
* @param sources The original sources used for querying
31+
* @param aggregationBuilders the aggregation used for querying
32+
* @return a map containing the results of the aggregation in a consumable way
33+
*/
34+
public static Stream<Map<String, Object>> extractCompositeAggregationResults(CompositeAggregation agg,
35+
List<CompositeValuesSourceBuilder<?>> sources, Collection<AggregationBuilder> aggregationBuilders) {
36+
return agg.getBuckets().stream().map(bucket -> {
37+
Map<String, Object> document = new HashMap<>();
38+
for (CompositeValuesSourceBuilder<?> source : sources) {
39+
String destinationFieldName = source.name();
40+
document.put(destinationFieldName, bucket.getKey().get(destinationFieldName));
41+
}
42+
for (AggregationBuilder aggregationBuilder : aggregationBuilders) {
43+
String aggName = aggregationBuilder.getName();
44+
45+
// TODO: support other aggregation types
46+
Aggregation aggResult = bucket.getAggregations().get(aggName);
47+
48+
if (aggResult instanceof NumericMetricsAggregation.SingleValue) {
49+
NumericMetricsAggregation.SingleValue aggResultSingleValue = (SingleValue) aggResult;
50+
document.put(aggName, aggResultSingleValue.value());
51+
} else {
52+
// Execution should never reach this point!
53+
// Creating jobs with unsupported aggregations shall not be possible
54+
logger.error("Dataframe Internal Error: unsupported aggregation ["+ aggResult.getName() +"], ignoring");
55+
assert false;
56+
}
57+
}
58+
return document;
59+
});
60+
}
61+
62+
}

x-pack/plugin/ml-feature-index-builder/src/main/java/org/elasticsearch/xpack/ml/featureindexbuilder/job/FeatureIndexBuilderIndexer.java

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,23 @@
1717
import org.elasticsearch.search.aggregations.bucket.composite.CompositeAggregation;
1818
import org.elasticsearch.search.aggregations.bucket.composite.CompositeAggregationBuilder;
1919
import org.elasticsearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder;
20-
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregation;
2120
import org.elasticsearch.search.builder.SearchSourceBuilder;
2221
import org.elasticsearch.xpack.core.indexing.AsyncTwoPhaseIndexer;
2322
import org.elasticsearch.xpack.core.indexing.IndexerState;
2423
import org.elasticsearch.xpack.core.indexing.IterationResult;
2524

2625
import java.io.IOException;
2726
import java.io.UncheckedIOException;
27+
import java.util.Collection;
2828
import java.util.List;
2929
import java.util.Map;
3030
import java.util.concurrent.Executor;
3131
import java.util.concurrent.atomic.AtomicReference;
3232
import java.util.stream.Collectors;
33+
import java.util.stream.Stream;
3334

34-
import static org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings.DOC_TYPE;
3535
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
36+
import static org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings.DOC_TYPE;
3637

3738
public abstract class FeatureIndexBuilderIndexer extends AsyncTwoPhaseIndexer<Map<String, Object>, FeatureIndexBuilderJobStats> {
3839

@@ -58,36 +59,37 @@ protected void onStartJob(long now) {
5859
@Override
5960
protected IterationResult<Map<String, Object>> doProcess(SearchResponse searchResponse) {
6061
final CompositeAggregation agg = searchResponse.getAggregations().get("feature");
61-
return new IterationResult<>(processBuckets(agg), agg.afterKey(), agg.getBuckets().isEmpty());
62+
return new IterationResult<>(processBucketsToIndexRequests(agg).collect(Collectors.toList()), agg.afterKey(),
63+
agg.getBuckets().isEmpty());
6264
}
6365

6466
/*
65-
* Mocked demo case
67+
* Parses the result and creates a stream of indexable documents
6668
*
67-
* TODO: replace with proper implementation
69+
* Implementation decisions:
70+
*
71+
* Extraction uses generic maps as intermediate exchange format in order to hook in ingest pipelines/processors
72+
* in later versions, see {@link IngestDocument).
6873
*/
69-
private List<IndexRequest> processBuckets(CompositeAggregation agg) {
70-
// for now only 1 source supported
71-
String destinationFieldName = job.getConfig().getSourceConfig().getSources().get(0).name();
72-
String aggName = job.getConfig().getAggregationConfig().getAggregatorFactories().iterator().next().getName();
74+
private Stream<IndexRequest> processBucketsToIndexRequests(CompositeAggregation agg) {
75+
String indexName = job.getConfig().getDestinationIndex();
76+
List<CompositeValuesSourceBuilder<?>> sources = job.getConfig().getSourceConfig().getSources();
77+
Collection<AggregationBuilder> aggregationBuilders = job.getConfig().getAggregationConfig().getAggregatorFactories();
7378

74-
return agg.getBuckets().stream().map(b -> {
75-
NumericMetricsAggregation.SingleValue aggResult = b.getAggregations().get(aggName);
79+
return AggregationResultUtils.extractCompositeAggregationResults(agg, sources, aggregationBuilders).map(document -> {
7680
XContentBuilder builder;
7781
try {
7882
builder = jsonBuilder();
7983
builder.startObject();
80-
builder.field(destinationFieldName, b.getKey().get(destinationFieldName));
81-
builder.field(aggName, aggResult.value());
84+
builder.map(document);
8285
builder.endObject();
8386
} catch (IOException e) {
8487
throw new UncheckedIOException(e);
8588
}
8689

87-
String indexName = job.getConfig().getDestinationIndex();
8890
IndexRequest request = new IndexRequest(indexName, DOC_TYPE).source(builder);
8991
return request;
90-
}).collect(Collectors.toList());
92+
});
9193
}
9294

9395
@Override

0 commit comments

Comments
 (0)