Skip to content

Commit e43a18f

Browse files
committed
Use delCount of SegmentInfos to calculate numDocs (#36323)
Today, we iterate the bitset of hardLiveDocs to calculate the number of live docs. This calculation might be expensive if we enable soft-deletes (by default) for old indices whose soft-deletes was disabled previously and had hard-deletes. Once soft-deletes is enabled, we no longer hard-update or hard-delete documents directly. We have hard-deletes in two scenarios: (1) from old segments where soft-deletes was disabled, (2) when IndexWriter hits non-aborted exceptions. These two cases, IW flushes SegmentInfos before exposing the hard-deletes; thus we can use the hard-delete count of SegmentInfos.
1 parent b813a4a commit e43a18f

File tree

2 files changed

+24
-9
lines changed

2 files changed

+24
-9
lines changed

server/src/main/java/org/elasticsearch/common/lucene/Lucene.java

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -907,18 +907,17 @@ public CacheHelper getReaderCacheHelper() {
907907
super(in, new SubReaderWrapper() {
908908
@Override
909909
public LeafReader wrap(LeafReader leaf) {
910-
SegmentReader segmentReader = segmentReader(leaf);
911-
Bits hardLiveDocs = segmentReader.getHardLiveDocs();
910+
final SegmentReader segmentReader = segmentReader(leaf);
911+
final Bits hardLiveDocs = segmentReader.getHardLiveDocs();
912912
if (hardLiveDocs == null) {
913913
return new LeafReaderWithLiveDocs(leaf, null, leaf.maxDoc());
914914
}
915-
// TODO: Can we avoid calculate numDocs by using SegmentReader#getSegmentInfo with LUCENE-8458?
916-
int numDocs = 0;
917-
for (int i = 0; i < hardLiveDocs.length(); i++) {
918-
if (hardLiveDocs.get(i)) {
919-
numDocs++;
920-
}
921-
}
915+
// Once soft-deletes is enabled, we no longer hard-update or hard-delete documents directly.
916+
// Two scenarios that we have hard-deletes: (1) from old segments where soft-deletes was disabled,
917+
// (2) when IndexWriter hits non-aborted exceptions. These two cases, IW flushes SegmentInfos
918+
// before exposing the hard-deletes, thus we can use the hard-delete count of SegmentInfos.
919+
final int numDocs = segmentReader.maxDoc() - segmentReader.getSegmentInfo().getDelCount();
920+
assert numDocs == popCount(hardLiveDocs) : numDocs + " != " + popCount(hardLiveDocs);
922921
return new LeafReaderWithLiveDocs(segmentReader, hardLiveDocs, numDocs);
923922
}
924923
});
@@ -935,6 +934,17 @@ public CacheHelper getReaderCacheHelper() {
935934
}
936935
}
937936

937+
private static int popCount(Bits bits) {
938+
assert bits != null;
939+
int onBits = 0;
940+
for (int i = 0; i < bits.length(); i++) {
941+
if (bits.get(i)) {
942+
onBits++;
943+
}
944+
}
945+
return onBits;
946+
}
947+
938948
/**
939949
* Returns a numeric docvalues which can be used to soft-delete documents.
940950
*/

server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2666,6 +2666,11 @@ public long softUpdateDocuments(Term term, Iterable<? extends Iterable<? extends
26662666
assert softDeleteEnabled : "Call #softUpdateDocuments but soft-deletes is disabled";
26672667
return super.softUpdateDocuments(term, docs, softDeletes);
26682668
}
2669+
@Override
2670+
public long tryDeleteDocument(IndexReader readerIn, int docID) {
2671+
assert false : "#tryDeleteDocument is not supported. See Lucene#DirectoryReaderWithAllLiveDocs";
2672+
throw new UnsupportedOperationException();
2673+
}
26692674
}
26702675

26712676
/**

0 commit comments

Comments
 (0)