Skip to content

Commit c0ea6d4

Browse files
committed
Add a simple JSON field mapper. (#33923)
* Add a simple JSON field type. * Add support for ignore_above. * Add support for null_value. * Add support for split_queries_on_whitespace. * Prevent norms from being enabled. * Clarify the message around copy_to not being supported. * Disallow wildcard queries. * For now, disallow the field from being stored.
1 parent a21a99d commit c0ea6d4

File tree

7 files changed

+1060
-0
lines changed

7 files changed

+1060
-0
lines changed
Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.index.mapper;
21+
22+
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
23+
import org.apache.lucene.index.IndexOptions;
24+
import org.apache.lucene.index.IndexableField;
25+
import org.apache.lucene.index.Term;
26+
import org.apache.lucene.search.MultiTermQuery;
27+
import org.apache.lucene.search.Query;
28+
import org.apache.lucene.search.TermQuery;
29+
import org.apache.lucene.util.BytesRef;
30+
import org.elasticsearch.common.lucene.Lucene;
31+
import org.elasticsearch.common.settings.Settings;
32+
import org.elasticsearch.common.unit.Fuzziness;
33+
import org.elasticsearch.common.xcontent.XContentBuilder;
34+
import org.elasticsearch.common.xcontent.XContentParser;
35+
import org.elasticsearch.common.xcontent.support.XContentMapValues;
36+
import org.elasticsearch.index.analysis.AnalyzerScope;
37+
import org.elasticsearch.index.analysis.NamedAnalyzer;
38+
import org.elasticsearch.index.query.QueryShardContext;
39+
40+
import java.io.IOException;
41+
import java.util.Iterator;
42+
import java.util.List;
43+
import java.util.Map;
44+
import java.util.Objects;
45+
46+
import static org.elasticsearch.index.mapper.TypeParsers.parseField;
47+
48+
/**
49+
* A field mapper that accepts a JSON object and flattens it into a single field. This data type
50+
* can be a useful alternative to an 'object' mapping when the object has a large, unknown set
51+
* of keys.
52+
*
53+
* Currently the mapper extracts all leaf values of the JSON object, converts them to their text
54+
* representations, and indexes each one as a keyword. As an example, given a json field called
55+
* 'json_field' and the following input
56+
*
57+
* {
58+
* "json_field: {
59+
* "key1": "some value",
60+
* "key2": {
61+
* "key3": true
62+
* }
63+
* }
64+
* }
65+
*
66+
* the mapper will produce untokenized string fields with the values "some value" and "true".
67+
*/
68+
public final class JsonFieldMapper extends FieldMapper {
69+
70+
public static final String CONTENT_TYPE = "json";
71+
public static final NamedAnalyzer WHITESPACE_ANALYZER = new NamedAnalyzer(
72+
"whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer());
73+
74+
private static class Defaults {
75+
public static final MappedFieldType FIELD_TYPE = new JsonFieldType();
76+
77+
static {
78+
FIELD_TYPE.setTokenized(false);
79+
FIELD_TYPE.setOmitNorms(true);
80+
FIELD_TYPE.setStored(false);
81+
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
82+
FIELD_TYPE.freeze();
83+
}
84+
85+
public static final int IGNORE_ABOVE = Integer.MAX_VALUE;
86+
}
87+
88+
public static class Builder extends FieldMapper.Builder<Builder, JsonFieldMapper> {
89+
private int ignoreAbove = Defaults.IGNORE_ABOVE;
90+
91+
public Builder(String name) {
92+
super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE);
93+
builder = this;
94+
}
95+
96+
@Override
97+
public JsonFieldType fieldType() {
98+
return (JsonFieldType) super.fieldType();
99+
}
100+
101+
@Override
102+
public Builder indexOptions(IndexOptions indexOptions) {
103+
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) > 0) {
104+
throw new IllegalArgumentException("The [" + CONTENT_TYPE
105+
+ "] field does not support positions, got [index_options]="
106+
+ indexOptionToString(indexOptions));
107+
}
108+
return super.indexOptions(indexOptions);
109+
}
110+
111+
public Builder ignoreAbove(int ignoreAbove) {
112+
if (ignoreAbove < 0) {
113+
throw new IllegalArgumentException("[ignore_above] must be positive, got " + ignoreAbove);
114+
}
115+
this.ignoreAbove = ignoreAbove;
116+
return this;
117+
}
118+
119+
public Builder splitQueriesOnWhitespace(boolean splitQueriesOnWhitespace) {
120+
fieldType().setSplitQueriesOnWhitespace(splitQueriesOnWhitespace);
121+
return builder;
122+
}
123+
124+
@Override
125+
public Builder addMultiField(Mapper.Builder mapperBuilder) {
126+
throw new UnsupportedOperationException("[fields] is not supported for [" + CONTENT_TYPE + "] fields.");
127+
}
128+
129+
@Override
130+
public Builder copyTo(CopyTo copyTo) {
131+
throw new UnsupportedOperationException("[copy_to] is not supported for [" + CONTENT_TYPE + "] fields.");
132+
}
133+
134+
@Override
135+
public Builder store(boolean store) {
136+
throw new UnsupportedOperationException("[store] is not currently supported for [" +
137+
CONTENT_TYPE + "] fields.");
138+
}
139+
140+
@Override
141+
public JsonFieldMapper build(BuilderContext context) {
142+
setupFieldType(context);
143+
if (fieldType().splitQueriesOnWhitespace()) {
144+
fieldType().setSearchAnalyzer(WHITESPACE_ANALYZER);
145+
}
146+
return new JsonFieldMapper(name, fieldType, defaultFieldType,
147+
ignoreAbove, context.indexSettings());
148+
}
149+
}
150+
151+
public static class TypeParser implements Mapper.TypeParser {
152+
@Override
153+
public Mapper.Builder<?,?> parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
154+
JsonFieldMapper.Builder builder = new JsonFieldMapper.Builder(name);
155+
parseField(builder, name, node, parserContext);
156+
for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) {
157+
Map.Entry<String, Object> entry = iterator.next();
158+
String propName = entry.getKey();
159+
Object propNode = entry.getValue();
160+
if (propName.equals("ignore_above")) {
161+
builder.ignoreAbove(XContentMapValues.nodeIntegerValue(propNode, -1));
162+
iterator.remove();
163+
} else if (propName.equals("null_value")) {
164+
if (propNode == null) {
165+
throw new MapperParsingException("Property [null_value] cannot be null.");
166+
}
167+
builder.nullValue(propNode.toString());
168+
iterator.remove();
169+
} else if (propName.equals("split_queries_on_whitespace")) {
170+
builder.splitQueriesOnWhitespace
171+
(XContentMapValues.nodeBooleanValue(propNode, "split_queries_on_whitespace"));
172+
iterator.remove();
173+
}
174+
}
175+
return builder;
176+
}
177+
}
178+
179+
public static final class JsonFieldType extends StringFieldType {
180+
private boolean splitQueriesOnWhitespace;
181+
182+
public JsonFieldType() {
183+
setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
184+
setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
185+
}
186+
187+
private JsonFieldType(JsonFieldType ref) {
188+
super(ref);
189+
this.splitQueriesOnWhitespace = ref.splitQueriesOnWhitespace;
190+
}
191+
192+
@Override
193+
public boolean equals(Object o) {
194+
if (this == o) return true;
195+
if (o == null || getClass() != o.getClass()) return false;
196+
if (!super.equals(o)) return false;
197+
JsonFieldType that = (JsonFieldType) o;
198+
return splitQueriesOnWhitespace == that.splitQueriesOnWhitespace;
199+
}
200+
201+
@Override
202+
public int hashCode() {
203+
return Objects.hash(super.hashCode(), splitQueriesOnWhitespace);
204+
}
205+
206+
public JsonFieldType clone() {
207+
return new JsonFieldType(this);
208+
}
209+
210+
@Override
211+
public String typeName() {
212+
return CONTENT_TYPE;
213+
}
214+
215+
public boolean splitQueriesOnWhitespace() {
216+
return splitQueriesOnWhitespace;
217+
}
218+
219+
public void setSplitQueriesOnWhitespace(boolean splitQueriesOnWhitespace) {
220+
checkIfFrozen();
221+
this.splitQueriesOnWhitespace = splitQueriesOnWhitespace;
222+
}
223+
224+
@Override
225+
public Query existsQuery(QueryShardContext context) {
226+
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
227+
}
228+
229+
@Override
230+
public Query fuzzyQuery(Object value, Fuzziness fuzziness, int prefixLength, int maxExpansions,
231+
boolean transpositions) {
232+
throw new UnsupportedOperationException("[fuzzy] queries are not currently supported on [" +
233+
CONTENT_TYPE + "] fields.");
234+
}
235+
236+
@Override
237+
public Query regexpQuery(String value, int flags, int maxDeterminizedStates,
238+
MultiTermQuery.RewriteMethod method, QueryShardContext context) {
239+
throw new UnsupportedOperationException("[regexp] queries are not currently supported on [" +
240+
CONTENT_TYPE + "] fields.");
241+
}
242+
243+
@Override
244+
public Query wildcardQuery(String value,
245+
MultiTermQuery.RewriteMethod method,
246+
QueryShardContext context) {
247+
throw new UnsupportedOperationException("[wildcard] queries are not currently supported on [" +
248+
CONTENT_TYPE + "] fields.");
249+
}
250+
251+
@Override
252+
public Object valueForDisplay(Object value) {
253+
if (value == null) {
254+
return null;
255+
}
256+
BytesRef binaryValue = (BytesRef) value;
257+
return binaryValue.utf8ToString();
258+
}
259+
}
260+
261+
private final JsonFieldParser fieldParser;
262+
private int ignoreAbove;
263+
264+
private JsonFieldMapper(String simpleName,
265+
MappedFieldType fieldType,
266+
MappedFieldType defaultFieldType,
267+
int ignoreAbove,
268+
Settings indexSettings) {
269+
super(simpleName, fieldType, defaultFieldType, indexSettings, MultiFields.empty(), CopyTo.empty());
270+
assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0;
271+
272+
this.ignoreAbove = ignoreAbove;
273+
this.fieldParser = new JsonFieldParser(fieldType, ignoreAbove);
274+
}
275+
276+
@Override
277+
protected String contentType() {
278+
return CONTENT_TYPE;
279+
}
280+
281+
@Override
282+
protected void doMerge(Mapper mergeWith) {
283+
super.doMerge(mergeWith);
284+
this.ignoreAbove = ((JsonFieldMapper) mergeWith).ignoreAbove;
285+
}
286+
287+
@Override
288+
protected JsonFieldMapper clone() {
289+
return (JsonFieldMapper) super.clone();
290+
}
291+
292+
@Override
293+
public JsonFieldType fieldType() {
294+
return (JsonFieldType) super.fieldType();
295+
}
296+
297+
@Override
298+
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
299+
if (context.parser().currentToken() == XContentParser.Token.VALUE_NULL) {
300+
return;
301+
}
302+
303+
if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
304+
fields.addAll(fieldParser.parse(context.parser()));
305+
createFieldNamesField(context, fields);
306+
} else {
307+
context.parser().skipChildren();
308+
}
309+
}
310+
311+
@Override
312+
protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException {
313+
super.doXContentBody(builder, includeDefaults, params);
314+
315+
if (includeDefaults || fieldType().nullValue() != null) {
316+
builder.field("null_value", fieldType().nullValue());
317+
}
318+
319+
if (includeDefaults || ignoreAbove != Defaults.IGNORE_ABOVE) {
320+
builder.field("ignore_above", ignoreAbove);
321+
}
322+
323+
if (includeDefaults || fieldType().splitQueriesOnWhitespace()) {
324+
builder.field("split_queries_on_whitespace", fieldType().splitQueriesOnWhitespace());
325+
}
326+
}
327+
}

0 commit comments

Comments
 (0)