Skip to content

Commit b05fbdf

Browse files
committed
When parsing JSON fields, also create tokens prefixed with the field key. (#34207)
1 parent 549ae4b commit b05fbdf

File tree

5 files changed

+327
-63
lines changed

5 files changed

+327
-63
lines changed

server/src/main/java/org/elasticsearch/index/mapper/JsonFieldMapper.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,10 @@
5151
* of keys.
5252
*
5353
* Currently the mapper extracts all leaf values of the JSON object, converts them to their text
54-
* representations, and indexes each one as a keyword. As an example, given a json field called
55-
* 'json_field' and the following input
54+
* representations, and indexes each one as a keyword. It creates both a 'keyed' version of the token
55+
* to allow searches on particular key-value pairs, as well as a 'root' token without the key
56+
*
57+
* As an example, given a json field called 'json_field' and the following input
5658
*
5759
* {
5860
* "json_field: {
@@ -63,13 +65,18 @@
6365
* }
6466
* }
6567
*
66-
* the mapper will produce untokenized string fields with the values "some value" and "true".
68+
* the mapper will produce untokenized string fields called "json_field" with values "some value" and "true",
69+
* as well as string fields called "json_field._keyed" with values "key\0some value" and "key2.key3\0true".
70+
*
71+
* Note that \0 is a reserved separator character, and cannot be used in the keys of the JSON object
72+
* (see {@link JsonFieldParser#SEPARATOR}).
6773
*/
6874
public final class JsonFieldMapper extends FieldMapper {
6975

7076
public static final String CONTENT_TYPE = "json";
7177
public static final NamedAnalyzer WHITESPACE_ANALYZER = new NamedAnalyzer(
7278
"whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer());
79+
public static final String KEYED_FIELD_SUFFIX = "._keyed";
7380

7481
private static class Defaults {
7582
public static final MappedFieldType FIELD_TYPE = new JsonFieldType();

server/src/main/java/org/elasticsearch/index/mapper/JsonFieldParser.java

Lines changed: 84 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -31,53 +31,116 @@
3131

3232
/**
3333
* A helper class for {@link JsonFieldMapper} parses a JSON object
34-
* and produces an indexable field for each leaf value.
34+
* and produces a pair of indexable fields for each leaf value.
3535
*/
3636
public class JsonFieldParser {
37+
private static final String SEPARATOR = "\0";
38+
3739
private final MappedFieldType fieldType;
3840
private final int ignoreAbove;
3941

42+
private final String rootFieldName;
43+
private final String keyedFieldName;
44+
4045
JsonFieldParser(MappedFieldType fieldType,
4146
int ignoreAbove) {
4247
this.fieldType = fieldType;
4348
this.ignoreAbove = ignoreAbove;
49+
50+
this.rootFieldName = fieldType.name();
51+
this.keyedFieldName = fieldType.name() + JsonFieldMapper.KEYED_FIELD_SUFFIX;
4452
}
4553

4654
public List<IndexableField> parse(XContentParser parser) throws IOException {
4755
XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT,
4856
parser.currentToken(),
4957
parser::getTokenLocation);
5058

59+
ContentPath path = new ContentPath();
5160
List<IndexableField> fields = new ArrayList<>();
52-
int openObjects = 1;
5361

62+
parseObject(parser, path, fields);
63+
return fields;
64+
}
65+
66+
private void parseObject(XContentParser parser,
67+
ContentPath path,
68+
List<IndexableField> fields) throws IOException {
69+
String currentName = null;
5470
while (true) {
55-
if (openObjects == 0) {
56-
return fields;
71+
XContentParser.Token token = parser.nextToken();
72+
if (token == XContentParser.Token.END_OBJECT) {
73+
return;
74+
}
75+
76+
if (token == XContentParser.Token.FIELD_NAME) {
77+
currentName = parser.currentName();
78+
} else {
79+
assert currentName != null;
80+
parseFieldValue(token, parser, path, currentName, fields);
5781
}
82+
}
83+
}
5884

85+
private void parseArray(XContentParser parser,
86+
ContentPath path,
87+
String currentName,
88+
List<IndexableField> fields) throws IOException {
89+
while (true) {
5990
XContentParser.Token token = parser.nextToken();
60-
assert token != null;
61-
62-
if (token == XContentParser.Token.START_OBJECT) {
63-
openObjects++;
64-
} else if (token == XContentParser.Token.END_OBJECT) {
65-
openObjects--;
66-
} else if (token.isValue()) {
67-
String value = parser.text();
68-
addField(value, fields);
69-
} else if (token == XContentParser.Token.VALUE_NULL) {
70-
String value = fieldType.nullValueAsString();
71-
if (value != null) {
72-
addField(value, fields);
73-
}
91+
if (token == XContentParser.Token.END_ARRAY) {
92+
return;
93+
}
94+
parseFieldValue(token, parser, path, currentName, fields);
95+
}
96+
}
97+
98+
private void parseFieldValue(XContentParser.Token token,
99+
XContentParser parser,
100+
ContentPath path,
101+
String currentName,
102+
List<IndexableField> fields) throws IOException {
103+
if (token == XContentParser.Token.START_OBJECT) {
104+
path.add(currentName);
105+
parseObject(parser, path, fields);
106+
path.remove();
107+
} else if (token == XContentParser.Token.START_ARRAY) {
108+
parseArray(parser, path, currentName, fields);
109+
} else if (token.isValue()) {
110+
String value = parser.text();
111+
addField(path, currentName, value, fields);
112+
} else if (token == XContentParser.Token.VALUE_NULL) {
113+
String value = fieldType.nullValueAsString();
114+
if (value != null) {
115+
addField(path, currentName, value, fields);
74116
}
117+
} else {
118+
// Note that we throw an exception here just to be safe. We don't actually expect to reach
119+
// this case, since XContentParser verifies that the input is well-formed as it parses.
120+
throw new IllegalArgumentException("Encountered unexpected token [" + token.toString() + "].");
75121
}
76122
}
77123

78-
private void addField(String value, List<IndexableField> fields) {
79-
if (value.length() <= ignoreAbove) {
80-
fields.add(new Field(fieldType.name(), new BytesRef(value), fieldType));
124+
private void addField(ContentPath path,
125+
String currentName,
126+
String value,
127+
List<IndexableField> fields) {
128+
if (value.length() > ignoreAbove) {
129+
return;
81130
}
131+
132+
String key = path.pathAsText(currentName);
133+
if (key.contains(SEPARATOR)) {
134+
throw new IllegalArgumentException("Keys in [json] fields cannot contain the reserved character \\0."
135+
+ " Offending key: [" + key + "].");
136+
}
137+
String keyedValue = createKeyedValue(key, value);
138+
139+
fields.add(new Field(rootFieldName, new BytesRef(value), fieldType));
140+
fields.add(new Field(keyedFieldName, new BytesRef(keyedValue), fieldType));
141+
}
142+
143+
private static String createKeyedValue(String key, String value) {
144+
return key + SEPARATOR + value;
82145
}
83146
}

server/src/test/java/org/elasticsearch/index/mapper/JsonFieldMapperTests.java

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -72,30 +72,31 @@ public void testDefaults() throws Exception {
7272

7373
BytesReference doc = BytesReference.bytes(XContentFactory.jsonBuilder().startObject()
7474
.startObject("field")
75-
.field("key1", "value")
76-
.field("key2", true)
75+
.field("key", "value")
7776
.endObject()
7877
.endObject());
7978

8079
ParsedDocument parsedDoc = mapper.parse(SourceToParse.source("test", "type", "1", doc, XContentType.JSON));
80+
8181
IndexableField[] fields = parsedDoc.rootDoc().getFields("field");
82-
assertEquals(2, fields.length);
82+
assertEquals(1, fields.length);
83+
84+
assertEquals("field", fields[0].name());
85+
assertEquals(new BytesRef("value"), fields[0].binaryValue());
86+
assertFalse(fields[0].fieldType().stored());
87+
assertTrue(fields[0].fieldType().omitNorms());
8388

84-
IndexableField field1 = fields[0];
85-
assertEquals("field", field1.name());
86-
assertEquals(new BytesRef("value"), field1.binaryValue());
87-
assertTrue(field1.fieldType().omitNorms());
89+
IndexableField[] keyedFields = parsedDoc.rootDoc().getFields("field._keyed");
90+
assertEquals(1, keyedFields.length);
8891

89-
IndexableField field2 = fields[1];
90-
assertEquals("field", field2.name());
91-
assertEquals(new BytesRef("true"), field2.binaryValue());
92-
assertTrue(field2.fieldType().omitNorms());
92+
assertEquals("field._keyed", keyedFields[0].name());
93+
assertEquals(new BytesRef("key\0value"), keyedFields[0].binaryValue());
94+
assertFalse(keyedFields[0].fieldType().stored());
95+
assertTrue(keyedFields[0].fieldType().omitNorms());
9396

9497
IndexableField[] fieldNamesFields = parsedDoc.rootDoc().getFields(FieldNamesFieldMapper.NAME);
9598
assertEquals(1, fieldNamesFields.length);
96-
97-
IndexableField fieldNamesField = fieldNamesFields[0];
98-
assertEquals("field", fieldNamesField.stringValue());
99+
assertEquals("field", fieldNamesFields[0].stringValue());
99100
}
100101

101102
public void testDisableIndex() throws Exception {
@@ -248,20 +249,18 @@ public void testFieldMultiplicity() throws Exception {
248249
.endObject());
249250

250251
ParsedDocument parsedDoc = mapper.parse(SourceToParse.source("test", "type", "1", doc, XContentType.JSON));
252+
251253
IndexableField[] fields = parsedDoc.rootDoc().getFields("field");
252254
assertEquals(3, fields.length);
253-
254-
IndexableField field1 = fields[0];
255-
assertEquals("field", field1.name());
256-
assertEquals(new BytesRef("value"), field1.binaryValue());
257-
258-
IndexableField field2 = fields[1];
259-
assertEquals("field", field2.name());
260-
assertEquals(new BytesRef("true"), field2.binaryValue());
261-
262-
IndexableField field3 = fields[2];
263-
assertEquals("field", field3.name());
264-
assertEquals(new BytesRef("false"), field3.binaryValue());
255+
assertEquals(new BytesRef("value"), fields[0].binaryValue());
256+
assertEquals(new BytesRef("true"), fields[1].binaryValue());
257+
assertEquals(new BytesRef("false"), fields[2].binaryValue());
258+
259+
IndexableField[] keyedFields = parsedDoc.rootDoc().getFields("field._keyed");
260+
assertEquals(3, keyedFields.length);
261+
assertEquals(new BytesRef("key1\0value"), keyedFields[0].binaryValue());
262+
assertEquals(new BytesRef("key2\0true"), keyedFields[1].binaryValue());
263+
assertEquals(new BytesRef("key3\0false"), keyedFields[2].binaryValue());
265264
}
266265

267266
public void testIgnoreAbove() throws IOException {
@@ -292,7 +291,6 @@ public void testIgnoreAbove() throws IOException {
292291
assertEquals(0, fields.length);
293292
}
294293

295-
296294
public void testNullValues() throws Exception {
297295
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject()
298296
.startObject("type")
@@ -326,8 +324,11 @@ public void testNullValues() throws Exception {
326324

327325
IndexableField[] otherFields = parsedDoc.rootDoc().getFields("other_field");
328326
assertEquals(1, otherFields.length);
329-
IndexableField field = otherFields[0];
330-
assertEquals(new BytesRef("placeholder"), field.binaryValue());
327+
assertEquals(new BytesRef("placeholder"), otherFields[0].binaryValue());
328+
329+
IndexableField[] prefixedOtherFields = parsedDoc.rootDoc().getFields("other_field._keyed");
330+
assertEquals(1, prefixedOtherFields.length);
331+
assertEquals(new BytesRef("key\0placeholder"), prefixedOtherFields[0].binaryValue());
331332
}
332333

333334
public void testSplitQueriesOnWhitespace() throws IOException {

0 commit comments

Comments
 (0)