diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index fd5002f1f22f..f656545e79b8 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -84,6 +84,8 @@ Optimizations * GITHUB#14333: Introduce a specialized trie for block tree index, instead of FST. (Guo Feng) +* GITHUB#14447: Compute the doc range more efficiently when flushing doc block. (Pan Guixin) + Bug Fixes --------------------- (No changes) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene103/Lucene103PostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene103/Lucene103PostingsWriter.java index fc43c9053de2..b5d5047e6233 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene103/Lucene103PostingsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene103/Lucene103PostingsWriter.java @@ -429,12 +429,13 @@ private void flushDocBlock(boolean finishTerm) throws IOException { // storage-efficient than the next number of bits per value (which effectively slightly biases // towards the bit set approach). int bitsPerValue = forDeltaUtil.bitsRequired(docDeltaBuffer); - int sum = Math.toIntExact(Arrays.stream(docDeltaBuffer).sum()); - int numBitSetLongs = FixedBitSet.bits2words(sum); + int docRange = lastDocID - level0LastDocID; + assert docRange == Arrays.stream(docDeltaBuffer).sum(); + int numBitSetLongs = FixedBitSet.bits2words(docRange); int numBitsNextBitsPerValue = Math.min(Integer.SIZE, bitsPerValue + 1) * BLOCK_SIZE; - if (sum == BLOCK_SIZE) { + if (docRange == BLOCK_SIZE) { level0Output.writeByte((byte) 0); - } else if (numBitsNextBitsPerValue <= sum) { + } else if (numBitsNextBitsPerValue <= docRange) { level0Output.writeByte((byte) bitsPerValue); forDeltaUtil.encodeDeltas(bitsPerValue, docDeltaBuffer, level0Output); } else {