|
7 | 7 | package org.elasticsearch.xpack.ml.featureindexbuilder.job; |
8 | 8 |
|
9 | 9 | import org.apache.log4j.Logger; |
10 | | -import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; |
11 | | -import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; |
12 | | -import org.elasticsearch.action.bulk.BulkRequest; |
13 | 10 | import org.elasticsearch.action.index.IndexRequest; |
14 | 11 | import org.elasticsearch.action.search.SearchRequest; |
15 | 12 | import org.elasticsearch.action.search.SearchResponse; |
16 | | -import org.elasticsearch.client.Client; |
17 | | -import org.elasticsearch.client.IndicesAdminClient; |
18 | | -import org.elasticsearch.common.settings.Settings; |
19 | 13 | import org.elasticsearch.common.xcontent.XContentBuilder; |
20 | | -import org.elasticsearch.common.xcontent.XContentType; |
21 | | -import org.elasticsearch.index.IndexNotFoundException; |
22 | 14 | import org.elasticsearch.index.query.MatchAllQueryBuilder; |
23 | 15 | import org.elasticsearch.index.query.QueryBuilder; |
24 | 16 | import org.elasticsearch.search.aggregations.AggregationBuilders; |
25 | 17 | import org.elasticsearch.search.aggregations.bucket.composite.CompositeAggregation; |
26 | | -import org.elasticsearch.search.aggregations.bucket.composite.CompositeAggregation.Bucket; |
27 | 18 | import org.elasticsearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; |
28 | 19 | import org.elasticsearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder; |
29 | 20 | import org.elasticsearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; |
30 | 21 | import org.elasticsearch.search.aggregations.metrics.avg.InternalAvg; |
31 | 22 | import org.elasticsearch.search.builder.SearchSourceBuilder; |
| 23 | +import org.elasticsearch.xpack.core.indexing.AsyncTwoPhaseIndexer; |
| 24 | +import org.elasticsearch.xpack.core.indexing.IndexerState; |
| 25 | +import org.elasticsearch.xpack.core.indexing.IterationResult; |
32 | 26 |
|
33 | 27 | import java.io.IOException; |
| 28 | +import java.io.UncheckedIOException; |
34 | 29 | import java.util.ArrayList; |
35 | 30 | import java.util.List; |
36 | 31 | import java.util.Map; |
37 | | -import java.util.concurrent.ExecutionException; |
| 32 | +import java.util.concurrent.Executor; |
| 33 | +import java.util.concurrent.atomic.AtomicReference; |
| 34 | +import java.util.stream.Collectors; |
38 | 35 |
|
39 | | -import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; |
40 | 36 | import static org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings.DOC_TYPE; |
| 37 | +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; |
41 | 38 |
|
42 | | -public class FeatureIndexBuilderIndexer { |
| 39 | +public abstract class FeatureIndexBuilderIndexer extends AsyncTwoPhaseIndexer<Map<String, Object>, FeatureIndexBuilderJobStats> { |
43 | 40 | private static final String PIVOT_INDEX = "pivot-reviews"; |
44 | 41 | private static final String SOURCE_INDEX = "anonreviews"; |
45 | 42 |
|
46 | 43 | private static final Logger logger = Logger.getLogger(FeatureIndexBuilderIndexer.class.getName()); |
47 | 44 | private FeatureIndexBuilderJob job; |
48 | | - private Client client; |
49 | 45 |
|
50 | | - public FeatureIndexBuilderIndexer(FeatureIndexBuilderJob job, Client client) { |
| 46 | + public FeatureIndexBuilderIndexer(Executor executor, FeatureIndexBuilderJob job, AtomicReference<IndexerState> initialState, |
| 47 | + Map<String, Object> initialPosition) { |
| 48 | + super(executor, initialState, initialPosition, new FeatureIndexBuilderJobStats()); |
51 | 49 |
|
52 | 50 | this.job = job; |
53 | | - this.client = client; |
54 | | - logger.info("delete pivot-reviews"); |
55 | | - |
56 | 51 | } |
57 | 52 |
|
58 | | - public synchronized void start() { |
59 | | - deleteIndex(client); |
60 | | - |
61 | | - createIndex(client); |
62 | | - |
63 | | - int runs = 0; |
64 | | - |
65 | | - Map<String, Object> after = null; |
66 | | - logger.info("start feature indexing"); |
67 | | - SearchResponse response; |
68 | | - |
69 | | - try { |
70 | | - response = runQuery(client, after); |
71 | | - |
72 | | - CompositeAggregation compositeAggregation = response.getAggregations().get("feature"); |
73 | | - after = compositeAggregation.afterKey(); |
74 | | - |
75 | | - while (after != null) { |
76 | | - indexBuckets(compositeAggregation); |
77 | | - |
78 | | - ++runs; |
79 | | - response = runQuery(client, after); |
80 | | - |
81 | | - compositeAggregation = response.getAggregations().get("feature"); |
82 | | - after = compositeAggregation.afterKey(); |
83 | | - |
84 | | - //after = null; |
85 | | - } |
86 | | - |
87 | | - indexBuckets(compositeAggregation); |
88 | | - } catch (InterruptedException | ExecutionException e) { |
89 | | - logger.error("Failed to build feature index", e); |
90 | | - } |
91 | | - |
92 | | - logger.info("Finished feature indexing"); |
| 53 | + @Override |
| 54 | + protected String getJobId() { |
| 55 | + return job.getConfig().getId(); |
93 | 56 | } |
94 | 57 |
|
95 | | - private void indexBuckets(CompositeAggregation compositeAggregation) { |
96 | | - BulkRequest bulkIndexRequest = new BulkRequest(); |
97 | | - try { |
98 | | - for (Bucket b : compositeAggregation.getBuckets()) { |
| 58 | + @Override |
| 59 | + protected void onStartJob(long now) { |
| 60 | + } |
99 | 61 |
|
100 | | - InternalAvg avgAgg = b.getAggregations().get("avg_rating"); |
| 62 | + @Override |
| 63 | + protected IterationResult<Map<String, Object>> doProcess(SearchResponse searchResponse) { |
| 64 | + final CompositeAggregation agg = searchResponse.getAggregations().get("feature"); |
| 65 | + return new IterationResult<>(processBuckets(agg), agg.afterKey(), agg.getBuckets().isEmpty()); |
| 66 | + } |
101 | 67 |
|
102 | | - XContentBuilder builder; |
| 68 | + /* |
| 69 | + * Mocked demo case |
| 70 | + * |
| 71 | + * TODO: replace with proper implementation |
| 72 | + */ |
| 73 | + private List<IndexRequest> processBuckets(CompositeAggregation agg) { |
| 74 | + return agg.getBuckets().stream().map(b -> { |
| 75 | + InternalAvg avgAgg = b.getAggregations().get("avg_rating"); |
| 76 | + XContentBuilder builder; |
| 77 | + try { |
103 | 78 | builder = jsonBuilder(); |
| 79 | + |
104 | 80 | builder.startObject(); |
105 | 81 | builder.field("reviewerId", b.getKey().get("reviewerId")); |
106 | 82 | builder.field("avg_rating", avgAgg.getValue()); |
107 | 83 | builder.endObject(); |
108 | | - bulkIndexRequest.add(new IndexRequest(PIVOT_INDEX, DOC_TYPE).source(builder)); |
109 | | - |
| 84 | + } catch (IOException e) { |
| 85 | + throw new UncheckedIOException(e); |
110 | 86 | } |
111 | | - client.bulk(bulkIndexRequest); |
112 | | - } catch (IOException e) { |
113 | | - logger.error("Failed to index", e); |
114 | | - } |
| 87 | + |
| 88 | + String indexName = PIVOT_INDEX + "_" + job.getConfig().getId(); |
| 89 | + IndexRequest request = new IndexRequest(indexName, DOC_TYPE).source(builder); |
| 90 | + return request; |
| 91 | + }).collect(Collectors.toList()); |
| 92 | + } |
| 93 | + |
| 94 | + @Override |
| 95 | + protected SearchRequest buildSearchRequest() { |
| 96 | + |
| 97 | + final Map<String, Object> position = getPosition(); |
| 98 | + SearchRequest request = buildFeatureQuery(position); |
| 99 | + return request; |
115 | 100 | } |
116 | | - |
| 101 | + |
117 | 102 | /* |
118 | | - * Hardcoded demo case for pivoting |
| 103 | + * Mocked demo case |
| 104 | + * |
| 105 | + * TODO: everything below will be replaced with proper implementation read from job configuration |
119 | 106 | */ |
120 | | - |
121 | | - private static void deleteIndex(Client client) { |
122 | | - DeleteIndexRequest deleteIndex = new DeleteIndexRequest(PIVOT_INDEX); |
123 | | - |
124 | | - IndicesAdminClient adminClient = client.admin().indices(); |
125 | | - try { |
126 | | - adminClient.delete(deleteIndex).actionGet(); |
127 | | - } catch (IndexNotFoundException e) { |
128 | | - } |
129 | | - } |
130 | | - |
131 | | - private static void createIndex(Client client) { |
132 | | - |
133 | | - CreateIndexRequest request = new CreateIndexRequest(PIVOT_INDEX); |
134 | | - request.settings(Settings.builder() // <1> |
135 | | - .put("index.number_of_shards", 1) |
136 | | - .put("index.number_of_replicas", 0) |
137 | | - ); |
138 | | - request.mapping(DOC_TYPE, // <1> |
139 | | - "{\n" + |
140 | | - " \"" + DOC_TYPE + "\": {\n" + |
141 | | - " \"properties\": {\n" + |
142 | | - " \"reviewerId\": {\n" + |
143 | | - " \"type\": \"keyword\"\n" + |
144 | | - " },\n" + |
145 | | - " \"avg_rating\": {\n" + |
146 | | - " \"type\": \"integer\"\n" + |
147 | | - " }\n" + |
148 | | - " }\n" + |
149 | | - " }\n" + |
150 | | - "}", // <2> |
151 | | - XContentType.JSON); |
152 | | - IndicesAdminClient adminClient = client.admin().indices(); |
153 | | - adminClient.create(request).actionGet(); |
154 | | - } |
155 | | - |
156 | 107 | private static SearchRequest buildFeatureQuery(Map<String, Object> after) { |
157 | 108 | QueryBuilder queryBuilder = new MatchAllQueryBuilder(); |
158 | 109 | SearchRequest searchRequest = new SearchRequest(SOURCE_INDEX); |
159 | | - |
| 110 | + |
160 | 111 | List<CompositeValuesSourceBuilder<?>> sources = new ArrayList<>(); |
161 | 112 | sources.add(new TermsValuesSourceBuilder("reviewerId").field("reviewerId")); |
162 | | - |
| 113 | + |
163 | 114 | CompositeAggregationBuilder compositeAggregation = new CompositeAggregationBuilder("feature", sources); |
164 | 115 | compositeAggregation.size(1000); |
165 | | - |
| 116 | + |
166 | 117 | if (after != null) { |
167 | 118 | compositeAggregation.aggregateAfter(after); |
168 | 119 | } |
169 | | - |
| 120 | + |
170 | 121 | compositeAggregation.subAggregation(AggregationBuilders.avg("avg_rating").field("rating")); |
171 | 122 | compositeAggregation.subAggregation(AggregationBuilders.cardinality("dc_vendors").field("vendorId")); |
172 | 123 | SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); |
173 | 124 | sourceBuilder.aggregation(compositeAggregation); |
174 | 125 | sourceBuilder.size(0); |
175 | 126 | sourceBuilder.query(queryBuilder); |
176 | 127 | searchRequest.source(sourceBuilder); |
177 | | - |
| 128 | + |
178 | 129 | return searchRequest; |
179 | | - } |
180 | | - |
181 | | - private static SearchResponse runQuery(Client client, Map<String, Object> after) throws InterruptedException, ExecutionException { |
182 | | - |
183 | | - SearchRequest request = buildFeatureQuery(after); |
184 | | - SearchResponse response = client.search(request).get(); |
185 | | - |
186 | | - return response; |
187 | | - } |
188 | | - |
189 | | - private static void indexResult() { |
190 | | - |
191 | | - |
192 | | - |
193 | 130 | } |
194 | 131 | } |
0 commit comments