Skip to content

Commit bdd889e

Browse files
committed
nested: In case of a single type the _id field should be added to the nested document instead of _uid field.
When `index.mapping.single_type` is `true` the `_uid` field is not used and instead `_id` field is used. Prior to this change nested documents would in this case still use the `_uid` field to mark to what root document they belong to. In case of deleting documents this could lead to only the root Lucene document to be deleted and not the nested Lucene documents. This broke the docid block ordering the block join relies on in order to work correctly and thus causing the `nested` query, `nested` aggregation, nested sorting and nested inner hits to either fail or yield incorrect results. This bug only manifests in 6.0.0-ALPHA2 release and snaphots (5.5.0-SNAPSHOT, 5.6.0-SNAPSHOT, 6.0.0-SNAPSHOT).
1 parent d188f55 commit bdd889e

File tree

4 files changed

+242
-22
lines changed

4 files changed

+242
-22
lines changed

core/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -426,15 +426,33 @@ private static ParseContext nestedContext(ParseContext context, ObjectMapper map
426426
context = context.createNestedContext(mapper.fullPath());
427427
ParseContext.Document nestedDoc = context.doc();
428428
ParseContext.Document parentDoc = nestedDoc.getParent();
429-
// pre add the uid field if possible (id was already provided)
430-
IndexableField uidField = parentDoc.getField(UidFieldMapper.NAME);
431-
if (uidField != null) {
432-
// we don't need to add it as a full uid field in nested docs, since we don't need versioning
433-
// we also rely on this for UidField#loadVersion
434429

435-
// this is a deeply nested field
436-
nestedDoc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
430+
// We need to add the uid or id to this nested Lucene document too,
431+
// If we do not do this then when a document gets deleted only the root Lucene document gets deleted and
432+
// not the nested Lucene documents! Besides the fact that we would have zombie Lucene documents, the ordering of
433+
// documents inside the Lucene index (document blocks) will be incorrect, as nested documents of different root
434+
// documents are then aligned with other root documents. This will lead tothe nested query, sorting, aggregations
435+
// and inner hits to fail or yield incorrect results.
436+
if (context.mapperService().getIndexSettings().isSingleType()) {
437+
IndexableField idField = parentDoc.getField(IdFieldMapper.NAME);
438+
if (idField != null) {
439+
// We just need to store the id as indexed field, so that IndexWriter#deleteDocuments(term) can then
440+
// delete it when the root document is deleted too.
441+
nestedDoc.add(new Field(IdFieldMapper.NAME, idField.stringValue(), IdFieldMapper.Defaults.NESTED_FIELD_TYPE));
442+
} else {
443+
throw new IllegalStateException("The root document of a nested document should have an id field");
444+
}
445+
} else {
446+
IndexableField uidField = parentDoc.getField(UidFieldMapper.NAME);
447+
if (uidField != null) {
448+
/// We just need to store the uid as indexed field, so that IndexWriter#deleteDocuments(term) can then
449+
// delete it when the root document is deleted too.
450+
nestedDoc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
451+
} else {
452+
throw new IllegalStateException("The root document of a nested document should have an uid field");
453+
}
437454
}
455+
438456
// the type of the nested doc starts with __, so we can identify that its a nested one in filters
439457
// note, we don't prefix it with the type of the doc since it allows us to execute a nested query
440458
// across types (for example, with similar nested objects)

core/src/main/java/org/elasticsearch/index/mapper/IdFieldMapper.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ public static class Defaults {
5252
public static final String NAME = IdFieldMapper.NAME;
5353

5454
public static final MappedFieldType FIELD_TYPE = new IdFieldType();
55+
public static final MappedFieldType NESTED_FIELD_TYPE;
5556

5657
static {
5758
FIELD_TYPE.setTokenized(false);
@@ -62,6 +63,10 @@ public static class Defaults {
6263
FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
6364
FIELD_TYPE.setName(NAME);
6465
FIELD_TYPE.freeze();
66+
67+
NESTED_FIELD_TYPE = FIELD_TYPE.clone();
68+
NESTED_FIELD_TYPE.setStored(false);
69+
NESTED_FIELD_TYPE.freeze();
6570
}
6671
}
6772

core/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java

Lines changed: 105 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,31 +19,32 @@
1919

2020
package org.elasticsearch.index.mapper;
2121

22-
import java.io.IOException;
23-
import java.nio.charset.StandardCharsets;
24-
import java.util.ArrayList;
25-
import java.util.Arrays;
26-
import java.util.Collection;
27-
import java.util.Collections;
28-
import java.util.HashSet;
29-
import java.util.List;
30-
import java.util.Set;
31-
3222
import org.apache.lucene.index.IndexableField;
33-
import org.elasticsearch.common.bytes.BytesArray;
3423
import org.elasticsearch.Version;
3524
import org.elasticsearch.cluster.metadata.IndexMetaData;
25+
import org.elasticsearch.common.bytes.BytesArray;
3626
import org.elasticsearch.common.bytes.BytesReference;
3727
import org.elasticsearch.common.compress.CompressedXContent;
38-
import org.elasticsearch.common.lucene.all.AllField;
3928
import org.elasticsearch.common.settings.Settings;
29+
import org.elasticsearch.common.xcontent.XContentBuilder;
4030
import org.elasticsearch.common.xcontent.XContentFactory;
31+
import org.elasticsearch.common.xcontent.XContentType;
4132
import org.elasticsearch.index.IndexService;
4233
import org.elasticsearch.index.mapper.ParseContext.Document;
4334
import org.elasticsearch.plugins.Plugin;
4435
import org.elasticsearch.test.ESSingleNodeTestCase;
4536
import org.elasticsearch.test.InternalSettingsPlugin;
4637

38+
import java.io.IOException;
39+
import java.nio.charset.StandardCharsets;
40+
import java.util.ArrayList;
41+
import java.util.Arrays;
42+
import java.util.Collection;
43+
import java.util.Collections;
44+
import java.util.HashSet;
45+
import java.util.List;
46+
import java.util.Set;
47+
4748
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
4849
import static org.elasticsearch.test.StreamsUtils.copyToBytesFromClasspath;
4950
import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
@@ -166,6 +167,92 @@ public void testDotsWithDynamicNestedMapper() throws Exception {
166167
e.getMessage());
167168
}
168169

170+
public void testNestedHaveIdAndTypeFields() throws Exception {
171+
DocumentMapperParser mapperParser1 = createIndex("index1", Settings.builder()
172+
.put("index.mapping.single_type", false).build()
173+
).mapperService().documentMapperParser();
174+
DocumentMapperParser mapperParser2 = createIndex("index2", Settings.builder()
175+
.put("index.mapping.single_type", true).build()
176+
).mapperService().documentMapperParser();
177+
178+
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties");
179+
{
180+
mapping.startObject("foo");
181+
mapping.field("type", "nested");
182+
{
183+
mapping.startObject("properties");
184+
{
185+
186+
mapping.startObject("bar");
187+
mapping.field("type", "keyword");
188+
mapping.endObject();
189+
}
190+
mapping.endObject();
191+
}
192+
mapping.endObject();
193+
}
194+
{
195+
mapping.startObject("baz");
196+
mapping.field("type", "keyword");
197+
mapping.endObject();
198+
}
199+
mapping.endObject().endObject().endObject();
200+
DocumentMapper mapper1 = mapperParser1.parse("type", new CompressedXContent(mapping.string()));
201+
DocumentMapper mapper2 = mapperParser2.parse("type", new CompressedXContent(mapping.string()));
202+
203+
XContentBuilder doc = XContentFactory.jsonBuilder().startObject();
204+
{
205+
doc.startArray("foo");
206+
{
207+
doc.startObject();
208+
doc.field("bar", "value1");
209+
doc.endObject();
210+
}
211+
doc.endArray();
212+
doc.field("baz", "value2");
213+
}
214+
doc.endObject();
215+
216+
// Verify in the case where multiple types are allowed that the _uid field is added to nested documents:
217+
ParsedDocument result = mapper1.parse(SourceToParse.source("index1", "type", "1", doc.bytes(), XContentType.JSON));
218+
assertEquals(2, result.docs().size());
219+
// Nested document:
220+
assertNull(result.docs().get(0).getField(IdFieldMapper.NAME));
221+
assertNotNull(result.docs().get(0).getField(UidFieldMapper.NAME));
222+
assertEquals("type#1", result.docs().get(0).getField(UidFieldMapper.NAME).stringValue());
223+
assertEquals(UidFieldMapper.Defaults.NESTED_FIELD_TYPE, result.docs().get(0).getField(UidFieldMapper.NAME).fieldType());
224+
assertNotNull(result.docs().get(0).getField(TypeFieldMapper.NAME));
225+
assertEquals("__foo", result.docs().get(0).getField(TypeFieldMapper.NAME).stringValue());
226+
assertEquals("value1", result.docs().get(0).getField("foo.bar").binaryValue().utf8ToString());
227+
// Root document:
228+
assertNull(result.docs().get(1).getField(IdFieldMapper.NAME));
229+
assertNotNull(result.docs().get(1).getField(UidFieldMapper.NAME));
230+
assertEquals("type#1", result.docs().get(1).getField(UidFieldMapper.NAME).stringValue());
231+
assertEquals(UidFieldMapper.Defaults.FIELD_TYPE, result.docs().get(1).getField(UidFieldMapper.NAME).fieldType());
232+
assertNotNull(result.docs().get(1).getField(TypeFieldMapper.NAME));
233+
assertEquals("type", result.docs().get(1).getField(TypeFieldMapper.NAME).stringValue());
234+
assertEquals("value2", result.docs().get(1).getField("baz").binaryValue().utf8ToString());
235+
236+
// Verify in the case where only a single type is allowed that the _id field is added to nested documents:
237+
result = mapper2.parse(SourceToParse.source("index2", "type", "1", doc.bytes(), XContentType.JSON));
238+
assertEquals(2, result.docs().size());
239+
// Nested document:
240+
assertNull(result.docs().get(0).getField(UidFieldMapper.NAME));
241+
assertNotNull(result.docs().get(0).getField(IdFieldMapper.NAME));
242+
assertEquals("1", result.docs().get(0).getField(IdFieldMapper.NAME).stringValue());
243+
assertEquals(IdFieldMapper.Defaults.NESTED_FIELD_TYPE, result.docs().get(0).getField(IdFieldMapper.NAME).fieldType());
244+
assertNotNull(result.docs().get(0).getField(TypeFieldMapper.NAME));
245+
assertEquals("__foo", result.docs().get(0).getField(TypeFieldMapper.NAME).stringValue());
246+
assertEquals("value1", result.docs().get(0).getField("foo.bar").binaryValue().utf8ToString());
247+
// Root document:
248+
assertNull(result.docs().get(1).getField(UidFieldMapper.NAME));
249+
assertNotNull(result.docs().get(1).getField(IdFieldMapper.NAME));
250+
assertEquals("1", result.docs().get(1).getField(IdFieldMapper.NAME).stringValue());
251+
assertEquals(IdFieldMapper.Defaults.FIELD_TYPE, result.docs().get(1).getField(IdFieldMapper.NAME).fieldType());
252+
assertNull(result.docs().get(1).getField(TypeFieldMapper.NAME));
253+
assertEquals("value2", result.docs().get(1).getField("baz").binaryValue().utf8ToString());
254+
}
255+
169256
public void testPropagateDynamicWithExistingMapper() throws Exception {
170257
DocumentMapperParser mapperParser = createIndex("test").mapperService().documentMapperParser();
171258
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
@@ -657,7 +744,8 @@ public void testDynamicDottedFieldNameLongArrayWithExistingParentWrongType() thr
657744
.value(0)
658745
.value(1)
659746
.endArray().endObject().bytes();
660-
MapperParsingException exception = expectThrows(MapperParsingException.class, () -> mapper.parse("test", "type", "1", bytes));
747+
MapperParsingException exception = expectThrows(MapperParsingException.class,
748+
() -> mapper.parse("test", "type", "1", bytes));
661749
assertEquals("Could not dynamically add mapping for field [foo.bar.baz]. "
662750
+ "Existing mapping for [foo] must be of type object but found [long].", exception.getMessage());
663751
}
@@ -775,7 +863,8 @@ public void testDynamicDottedFieldNameLongWithExistingParentWrongType() throws E
775863
BytesReference bytes = XContentFactory.jsonBuilder()
776864
.startObject().field("foo.bar.baz", 0)
777865
.endObject().bytes();
778-
MapperParsingException exception = expectThrows(MapperParsingException.class, () -> mapper.parse("test", "type", "1", bytes));
866+
MapperParsingException exception = expectThrows(MapperParsingException.class,
867+
() -> mapper.parse("test", "type", "1", bytes));
779868
assertEquals("Could not dynamically add mapping for field [foo.bar.baz]. "
780869
+ "Existing mapping for [foo] must be of type object but found [long].", exception.getMessage());
781870
}
@@ -896,7 +985,8 @@ public void testDynamicDottedFieldNameObjectWithExistingParentWrongType() throws
896985

897986
BytesReference bytes = XContentFactory.jsonBuilder().startObject().startObject("foo.bar.baz").field("a", 0).endObject().endObject()
898987
.bytes();
899-
MapperParsingException exception = expectThrows(MapperParsingException.class, () -> mapper.parse("test", "type", "1", bytes));
988+
MapperParsingException exception = expectThrows(MapperParsingException.class,
989+
() -> mapper.parse("test", "type", "1", bytes));
900990
assertEquals("Could not dynamically add mapping for field [foo.bar.baz]. "
901991
+ "Existing mapping for [foo] must be of type object but found [long].", exception.getMessage());
902992
}

core/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,113 @@ public void testSimpleNested() throws Exception {
164164
assertThat(searchResponse.getHits().totalHits(), equalTo(1L));
165165
}
166166

167+
public void testSimpleNested_singleType() throws Exception {
168+
assertAcked(prepareCreate("test")
169+
.setSettings(Settings.builder().put("index.mapping.single_type", true))
170+
.addMapping("type1", "nested1", "type=nested"));
171+
ensureGreen();
172+
173+
// check on no data, see it works
174+
SearchResponse searchResponse = client().prepareSearch("test").setQuery(termQuery("_all", "n_value1_1")).execute().actionGet();
175+
assertThat(searchResponse.getHits().totalHits(), equalTo(0L));
176+
searchResponse = client().prepareSearch("test").setQuery(termQuery("n_field1", "n_value1_1")).execute().actionGet();
177+
assertThat(searchResponse.getHits().totalHits(), equalTo(0L));
178+
179+
client().prepareIndex("test", "type1", "1").setSource(jsonBuilder().startObject()
180+
.field("field1", "value1")
181+
.startArray("nested1")
182+
.startObject()
183+
.field("n_field1", "n_value1_1")
184+
.field("n_field2", "n_value2_1")
185+
.endObject()
186+
.startObject()
187+
.field("n_field1", "n_value1_2")
188+
.field("n_field2", "n_value2_2")
189+
.endObject()
190+
.endArray()
191+
.endObject()).execute().actionGet();
192+
193+
waitForRelocation(ClusterHealthStatus.GREEN);
194+
// flush, so we fetch it from the index (as see that we filter nested docs)
195+
flush();
196+
GetResponse getResponse = client().prepareGet("test", "type1", "1").get();
197+
assertThat(getResponse.isExists(), equalTo(true));
198+
assertThat(getResponse.getSourceAsBytes(), notNullValue());
199+
200+
// check the numDocs
201+
assertDocumentCount("test", 3);
202+
203+
// check that _all is working on nested docs
204+
searchResponse = client().prepareSearch("test").setQuery(termQuery("_all", "n_value1_1")).execute().actionGet();
205+
assertThat(searchResponse.getHits().totalHits(), equalTo(1L));
206+
searchResponse = client().prepareSearch("test").setQuery(termQuery("n_field1", "n_value1_1")).execute().actionGet();
207+
assertThat(searchResponse.getHits().totalHits(), equalTo(0L));
208+
209+
// search for something that matches the nested doc, and see that we don't find the nested doc
210+
searchResponse = client().prepareSearch("test").setQuery(matchAllQuery()).get();
211+
assertThat(searchResponse.getHits().totalHits(), equalTo(1L));
212+
searchResponse = client().prepareSearch("test").setQuery(termQuery("n_field1", "n_value1_1")).get();
213+
assertThat(searchResponse.getHits().totalHits(), equalTo(0L));
214+
215+
// now, do a nested query
216+
searchResponse = client().prepareSearch("test").setQuery(nestedQuery("nested1", termQuery("nested1.n_field1", "n_value1_1"), ScoreMode.Avg)).get();
217+
assertNoFailures(searchResponse);
218+
assertThat(searchResponse.getHits().totalHits(), equalTo(1L));
219+
220+
searchResponse = client().prepareSearch("test").setQuery(nestedQuery("nested1", termQuery("nested1.n_field1", "n_value1_1"), ScoreMode.Avg)).setSearchType(SearchType.DFS_QUERY_THEN_FETCH).get();
221+
assertNoFailures(searchResponse);
222+
assertThat(searchResponse.getHits().totalHits(), equalTo(1L));
223+
224+
// add another doc, one that would match if it was not nested...
225+
226+
client().prepareIndex("test", "type1", "2").setSource(jsonBuilder().startObject()
227+
.field("field1", "value1")
228+
.startArray("nested1")
229+
.startObject()
230+
.field("n_field1", "n_value1_1")
231+
.field("n_field2", "n_value2_2")
232+
.endObject()
233+
.startObject()
234+
.field("n_field1", "n_value1_2")
235+
.field("n_field2", "n_value2_1")
236+
.endObject()
237+
.endArray()
238+
.endObject()).execute().actionGet();
239+
waitForRelocation(ClusterHealthStatus.GREEN);
240+
// flush, so we fetch it from the index (as see that we filter nested docs)
241+
flush();
242+
assertDocumentCount("test", 6);
243+
244+
searchResponse = client().prepareSearch("test").setQuery(nestedQuery("nested1",
245+
boolQuery().must(termQuery("nested1.n_field1", "n_value1_1")).must(termQuery("nested1.n_field2", "n_value2_1")), ScoreMode.Avg)).execute().actionGet();
246+
assertNoFailures(searchResponse);
247+
assertThat(searchResponse.getHits().totalHits(), equalTo(1L));
248+
249+
// filter
250+
searchResponse = client().prepareSearch("test").setQuery(boolQuery().must(matchAllQuery()).mustNot(nestedQuery("nested1",
251+
boolQuery().must(termQuery("nested1.n_field1", "n_value1_1")).must(termQuery("nested1.n_field2", "n_value2_1")), ScoreMode.Avg))).execute().actionGet();
252+
assertNoFailures(searchResponse);
253+
assertThat(searchResponse.getHits().totalHits(), equalTo(1L));
254+
255+
// check with type prefix
256+
searchResponse = client().prepareSearch("test").setQuery(nestedQuery("nested1",
257+
boolQuery().must(termQuery("nested1.n_field1", "n_value1_1")).must(termQuery("nested1.n_field2", "n_value2_1")), ScoreMode.Avg)).execute().actionGet();
258+
assertNoFailures(searchResponse);
259+
assertThat(searchResponse.getHits().totalHits(), equalTo(1L));
260+
261+
// check delete, so all is gone...
262+
DeleteResponse deleteResponse = client().prepareDelete("test", "type1", "2").execute().actionGet();
263+
assertEquals(DocWriteResponse.Result.DELETED, deleteResponse.getResult());
264+
265+
// flush, so we fetch it from the index (as see that we filter nested docs)
266+
flush();
267+
assertDocumentCount("test", 3);
268+
269+
searchResponse = client().prepareSearch("test").setQuery(nestedQuery("nested1", termQuery("nested1.n_field1", "n_value1_1"), ScoreMode.Avg)).execute().actionGet();
270+
assertNoFailures(searchResponse);
271+
assertThat(searchResponse.getHits().totalHits(), equalTo(1L));
272+
}
273+
167274
public void testMultiNested() throws Exception {
168275
assertAcked(prepareCreate("test")
169276
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")

0 commit comments

Comments
 (0)