Skip to content

Commit 20e7a64

Browse files
committed
Preserve the order of nested documents in the Lucene index (#34225)
Today we reverse the initial order of the nested documents when we index them in order to ensure that parents documents appear after their children. This means that a query will always match nested documents in the reverse order of their offsets in the source document. Reversing all documents is not needed so this change ensures that parents documents appear after their children without modifying the initial order in each nested level. This allows to match children in the order of their appearance in the source document which is a requirement to efficiently implement #33587. Old indices created before this change will continue to reverse the order of nested documents to ensure backwark compatibility.
1 parent 68cdb8b commit 20e7a64

File tree

7 files changed

+172
-53
lines changed

7 files changed

+172
-53
lines changed

server/src/main/java/org/elasticsearch/index/mapper/ParseContext.java

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.apache.lucene.index.IndexableField;
2727
import org.apache.lucene.util.BytesRef;
2828
import org.elasticsearch.common.lucene.all.AllEntries;
29+
import org.elasticsearch.Version;
2930
import org.elasticsearch.common.xcontent.XContentParser;
3031
import org.elasticsearch.index.IndexSettings;
3132

@@ -34,6 +35,7 @@
3435
import java.util.Collections;
3536
import java.util.HashSet;
3637
import java.util.Iterator;
38+
import java.util.LinkedList;
3739
import java.util.List;
3840
import java.util.Set;
3941

@@ -456,11 +458,38 @@ public Iterable<Document> nonRootDocuments() {
456458
}
457459

458460
void postParse() {
459-
// reverse the order of docs for nested docs support, parent should be last
460461
if (documents.size() > 1) {
461462
docsReversed = true;
462-
Collections.reverse(documents);
463+
if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_6_5_0)) {
464+
/**
465+
* For indices created on or after {@link Version#V_6_5_0} we preserve the order
466+
* of the children while ensuring that parents appear after them.
467+
*/
468+
List<Document> newDocs = reorderParent(documents);
469+
documents.clear();
470+
documents.addAll(newDocs);
471+
} else {
472+
// reverse the order of docs for nested docs support, parent should be last
473+
Collections.reverse(documents);
474+
}
475+
}
476+
}
477+
478+
/**
479+
* Returns a copy of the provided {@link List} where parent documents appear
480+
* after their children.
481+
*/
482+
private List<Document> reorderParent(List<Document> docs) {
483+
List<Document> newDocs = new ArrayList<>(docs.size());
484+
LinkedList<Document> parents = new LinkedList<>();
485+
for (Document doc : docs) {
486+
while (parents.peek() != doc.getParent()){
487+
newDocs.add(parents.poll());
488+
}
489+
parents.add(0, doc);
463490
}
491+
newDocs.addAll(parents);
492+
return newDocs;
464493
}
465494

466495
@Override

server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.apache.lucene.search.Weight;
2828
import org.apache.lucene.util.BitSet;
2929
import org.elasticsearch.ExceptionsHelper;
30+
import org.elasticsearch.Version;
3031
import org.elasticsearch.common.bytes.BytesReference;
3132
import org.elasticsearch.common.collect.Tuple;
3233
import org.elasticsearch.common.document.DocumentField;
@@ -35,6 +36,7 @@
3536
import org.elasticsearch.common.xcontent.XContentHelper;
3637
import org.elasticsearch.common.xcontent.XContentType;
3738
import org.elasticsearch.common.xcontent.support.XContentMapValues;
39+
import org.elasticsearch.index.IndexSettings;
3840
import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
3941
import org.elasticsearch.index.fieldvisitor.FieldsVisitor;
4042
import org.elasticsearch.index.mapper.DocumentMapper;
@@ -344,6 +346,7 @@ private SearchHit.NestedIdentity getInternalNestedIdentity(SearchContext context
344346
ObjectMapper current = nestedObjectMapper;
345347
String originalName = nestedObjectMapper.name();
346348
SearchHit.NestedIdentity nestedIdentity = null;
349+
final IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
347350
do {
348351
Query parentFilter;
349352
nestedParentObjectMapper = current.getParentObjectMapper(mapperService);
@@ -373,12 +376,32 @@ private SearchHit.NestedIdentity getInternalNestedIdentity(SearchContext context
373376
BitSet parentBits = context.bitsetFilterCache().getBitSetProducer(parentFilter).getBitSet(subReaderContext);
374377

375378
int offset = 0;
376-
int nextParent = parentBits.nextSetBit(currentParent);
377-
for (int docId = childIter.advance(currentParent + 1); docId < nextParent && docId != DocIdSetIterator.NO_MORE_DOCS;
378-
docId = childIter.nextDoc()) {
379-
offset++;
379+
if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_6_5_0)) {
380+
/**
381+
* Starts from the previous parent and finds the offset of the
382+
* <code>nestedSubDocID</code> within the nested children. Nested documents
383+
* are indexed in the same order than in the source array so the offset
384+
* of the nested child is the number of nested document with the same parent
385+
* that appear before him.
386+
*/
387+
int previousParent = parentBits.prevSetBit(currentParent);
388+
for (int docId = childIter.advance(previousParent + 1); docId < nestedSubDocId && docId != DocIdSetIterator.NO_MORE_DOCS;
389+
docId = childIter.nextDoc()) {
390+
offset++;
391+
}
392+
currentParent = nestedSubDocId;
393+
} else {
394+
/**
395+
* Nested documents are in reverse order in this version so we start from the current nested document
396+
* and find the number of documents with the same parent that appear after it.
397+
*/
398+
int nextParent = parentBits.nextSetBit(currentParent);
399+
for (int docId = childIter.advance(currentParent + 1); docId < nextParent && docId != DocIdSetIterator.NO_MORE_DOCS;
400+
docId = childIter.nextDoc()) {
401+
offset++;
402+
}
403+
currentParent = nextParent;
380404
}
381-
currentParent = nextParent;
382405
current = nestedObjectMapper = nestedParentObjectMapper;
383406
int currentPrefix = current == null ? 0 : current.name().length() + 1;
384407
nestedIdentity = new SearchHit.NestedIdentity(originalName.substring(currentPrefix), offset, nestedIdentity);

server/src/test/java/org/elasticsearch/index/mapper/CopyToMapperTests.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -389,28 +389,28 @@ public void testCopyToNestedField() throws Exception {
389389
assertEquals(6, doc.docs().size());
390390

391391
Document nested = doc.docs().get(0);
392-
assertFieldValue(nested, "n1.n2.target", 7L);
392+
assertFieldValue(nested, "n1.n2.target", 3L);
393393
assertFieldValue(nested, "n1.target");
394394
assertFieldValue(nested, "target");
395395

396-
nested = doc.docs().get(2);
396+
nested = doc.docs().get(1);
397397
assertFieldValue(nested, "n1.n2.target", 5L);
398398
assertFieldValue(nested, "n1.target");
399399
assertFieldValue(nested, "target");
400400

401401
nested = doc.docs().get(3);
402-
assertFieldValue(nested, "n1.n2.target", 3L);
402+
assertFieldValue(nested, "n1.n2.target", 7L);
403403
assertFieldValue(nested, "n1.target");
404404
assertFieldValue(nested, "target");
405405

406-
Document parent = doc.docs().get(1);
406+
Document parent = doc.docs().get(2);
407407
assertFieldValue(parent, "target");
408-
assertFieldValue(parent, "n1.target", 7L);
408+
assertFieldValue(parent, "n1.target", 3L, 5L);
409409
assertFieldValue(parent, "n1.n2.target");
410410

411411
parent = doc.docs().get(4);
412412
assertFieldValue(parent, "target");
413-
assertFieldValue(parent, "n1.target", 3L, 5L);
413+
assertFieldValue(parent, "n1.target", 7L);
414414
assertFieldValue(parent, "n1.n2.target");
415415

416416
Document root = doc.docs().get(5);

0 commit comments

Comments
 (0)