Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,10 @@ public Matches matches(LeafReaderContext context, int doc) throws IOException {
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
return in.scorerSupplier(context);
}

@Override
public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition partition)
throws IOException {
return in.scorerSupplier(partition);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,14 @@ protected void searchLeaf(
// continue with the following leaf
return;
}
ScorerSupplier scorerSupplier = weight.scorerSupplier(ctx);
ScorerSupplier scorerSupplier;
if (minDocId == 0 && maxDocId == DocIdSetIterator.NO_MORE_DOCS) {
scorerSupplier = weight.scorerSupplier(ctx);
} else {
LeafReaderContextPartition partition =
LeafReaderContextPartition.createFromAndTo(ctx, minDocId, maxDocId);
scorerSupplier = weight.scorerSupplier(partition);
}
if (scorerSupplier != null) {
scorerSupplier.setTopLevelScoringClause();
BulkScorer scorer = scorerSupplier.bulkScorer();
Expand Down
15 changes: 12 additions & 3 deletions lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,9 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
}

@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition partition)
throws IOException {
LeafReader reader = partition.ctx.reader();

PointValues values = reader.getPointValues(field);
if (checkValidPointValues(values) == false) {
Expand Down Expand Up @@ -298,7 +299,9 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
} else {
return new ConstantScoreScorerSupplier(score(), scoreMode, reader.maxDoc()) {

final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values);
final DocIdSetBuilder result =
new DocIdSetBuilder(
reader.maxDoc(), values, partition.minDocId, partition.maxDocId);
final IntersectVisitor visitor = getIntersectVisitor(result);
long cost = -1;

Expand Down Expand Up @@ -336,6 +339,12 @@ public long cost() {
}
}

@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
return scorerSupplier(
IndexSearcher.LeafReaderContextPartition.createForEntireSegment(context));
}

@Override
public int count(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
Expand Down
25 changes: 25 additions & 0 deletions lucene/core/src/java/org/apache/lucene/search/Weight.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.IndexSearcher.LeafReaderContextPartition;
import org.apache.lucene.util.Bits;

/**
Expand Down Expand Up @@ -149,6 +150,30 @@ public final Scorer scorer(LeafReaderContext context) throws IOException {
*/
public abstract ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException;

/**
* Returns a {@link ScorerSupplier}, which can then be used to get a {@link Scorer} for a
* partition of a leaf reader context.
*
* <p>This method allows queries to optimize for intra-segment concurrency by knowing the specific
* doc ID range being searched within the segment. The default implementation delegates to {@link
* #scorerSupplier(LeafReaderContext)} ignoring the partition bounds. Queries that can benefit
* from partition awareness (e.g., by creating smaller data structures scoped to the partition)
* should override this method.
*
* <p>A scorer supplier for the same {@link LeafReaderContext} instance may be requested multiple
* times as part of a single search call, potentially from different threads searching different
* doc ID ranges concurrently.
*
* @param partition the leaf reader context partition containing the context and doc ID range
* @return a {@link ScorerSupplier} providing the scorer, or null if scorer is null
* @throws IOException if an IOException occurs
* @see LeafReaderContextPartition
* @since 10.1
*/
public ScorerSupplier scorerSupplier(LeafReaderContextPartition partition) throws IOException {
return scorerSupplier(partition.ctx);
}

/**
* Helper method that delegates to {@link #scorerSupplier(LeafReaderContext)}. It is implemented
* as
Expand Down
114 changes: 95 additions & 19 deletions lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ public final class DocIdSetBuilder {
*
* @see DocIdSetBuilder#grow
*/
public sealed interface BulkAdder permits FixedBitSetAdder, BufferAdder {
public sealed interface BulkAdder
permits PartitionAwareFixedBitSetAdder, PartitionAwareBufferAdder {
void add(int doc);

void add(IntsRef docs);
Expand All @@ -51,32 +52,52 @@ public sealed interface BulkAdder permits FixedBitSetAdder, BufferAdder {
void add(IntsRef docs, int docLowerBoundInclusive);
}

private record FixedBitSetAdder(FixedBitSet bitSet) implements BulkAdder {
/**
* Partition-aware FixedBitSetAdder that filters docs to only include those within the specified
* range. Stores docs using partition-relative indices (doc - offset) to save memory.
*
* @param bitSet the partition-sized bitset to store relative doc indices
* @param minDocId minimum doc ID (inclusive) to accept
* @param maxDocId maximum doc ID (exclusive) to accept
* @param offset the value to subtract from absolute doc IDs (typically minDocId)
*/
private record PartitionAwareFixedBitSetAdder(
FixedBitSet bitSet, int minDocId, int maxDocId, int offset) implements BulkAdder {

@Override
public void add(int doc) {
bitSet.set(doc);
if (doc >= minDocId && doc < maxDocId) {
bitSet.set(doc - offset);
}
}

@Override
public void add(IntsRef docs) {
for (int i = docs.offset, to = docs.offset + docs.length; i < to; i++) {
bitSet.set(docs.ints[i]);
int doc = docs.ints[i];
if (doc >= minDocId && doc < maxDocId) {
bitSet.set(doc - offset);
}
}
}

@Override
public void add(DocIdSetIterator iterator) throws IOException {
iterator.nextDoc();
iterator.intoBitSet(DocIdSetIterator.NO_MORE_DOCS, bitSet, 0);
int doc = iterator.nextDoc();
if (doc < minDocId) {
doc = iterator.advance(minDocId);
}
if (doc < maxDocId) {
iterator.intoBitSet(maxDocId, bitSet, offset);
}
}

@Override
public void add(IntsRef docs, int docLowerBoundInclusive) {
for (int i = docs.offset, to = docs.offset + docs.length; i < to; i++) {
int doc = docs.ints[i];
if (doc >= docLowerBoundInclusive) {
bitSet.set(doc);
if (doc >= Math.max(docLowerBoundInclusive, minDocId) && doc < maxDocId) {
bitSet.set(doc - offset);
}
}
}
Expand All @@ -97,17 +118,29 @@ private static class Buffer {
}
}

private record BufferAdder(Buffer buffer) implements BulkAdder {
/**
* Partition-aware BufferAdder that filters docs to only include those within the specified range.
*/
private record PartitionAwareBufferAdder(Buffer buffer, int minDocId, int maxDocId)
implements BulkAdder {

@Override
public void add(int doc) {
buffer.array[buffer.length++] = doc;
if (doc >= minDocId && doc < maxDocId) {
buffer.array[buffer.length++] = doc;
}
}

@Override
public void add(IntsRef docs) {
System.arraycopy(docs.ints, docs.offset, buffer.array, buffer.length, docs.length);
buffer.length += docs.length;
int index = buffer.length;
for (int i = docs.offset, to = docs.offset + docs.length; i < to; i++) {
int doc = docs.ints[i];
if (doc >= minDocId && doc < maxDocId) {
buffer.array[index++] = doc;
}
}
buffer.length = index;
}

@Override
Expand All @@ -123,7 +156,7 @@ public void add(IntsRef docs, int docLowerBoundInclusive) {
int index = buffer.length;
for (int i = docs.offset, to = docs.offset + docs.length; i < to; i++) {
int doc = docs.ints[i];
if (doc >= docLowerBoundInclusive) {
if (doc >= Math.max(docLowerBoundInclusive, minDocId) && doc < maxDocId) {
buffer.array[index++] = doc;
}
}
Expand All @@ -137,6 +170,9 @@ public void add(IntsRef docs, int docLowerBoundInclusive) {
final boolean multivalued;
final double numValuesPerDoc;

private final int minDocId;
private final int maxDocId;

private List<Buffer> buffers = new ArrayList<>();
private int totalAllocated; // accumulated size of the allocated buffers

Expand Down Expand Up @@ -166,8 +202,29 @@ public DocIdSetBuilder(int maxDoc, PointValues values) throws IOException {
this(maxDoc, values.getDocCount(), values.size());
}

/**
* Create a partition-aware {@link DocIdSetBuilder} for {@link PointValues} that only accepts doc
* IDs within the specified range.
*
* @param maxDoc the maximum doc ID in the segment
* @param values the point values
* @param minDocId the minimum doc ID (inclusive) to accept
* @param maxDocId the maximum doc ID (exclusive) to accept
*/
public DocIdSetBuilder(int maxDoc, PointValues values, int minDocId, int maxDocId)
throws IOException {
this(maxDoc, values.getDocCount(), values.size(), minDocId, maxDocId);
}

DocIdSetBuilder(int maxDoc, int docCount, long valueCount) {
this(maxDoc, docCount, valueCount, 0, maxDoc);
}

private DocIdSetBuilder(int maxDoc, int docCount, long valueCount, int minDocId, int maxDocId) {
this.maxDoc = maxDoc;
this.minDocId = minDocId;
this.maxDocId = maxDocId;

this.multivalued = docCount < 0 || docCount != valueCount;
if (docCount <= 0 || valueCount < 0) {
// assume one value per doc, this means the cost will be overestimated
Expand All @@ -184,7 +241,11 @@ public DocIdSetBuilder(int maxDoc, PointValues values) throws IOException {
// maxDoc >>> 7 is a good value if you want to save memory, lower values
// such as maxDoc >>> 11 should provide faster building but at the expense
// of using a full bitset even for quite sparse data
this.threshold = maxDoc >>> 7;
// When filtering to a partition (minDocId > 0 or maxDocId < maxDoc), use the partition size
// for threshold calculation to ensure the threshold scales correctly with the partition size
boolean isPartition = (minDocId > 0 || maxDocId < maxDoc);
int effectiveMaxDoc = isPartition ? (maxDocId - minDocId) : maxDoc;
this.threshold = effectiveMaxDoc >>> 7;

this.bitSet = null;
}
Expand Down Expand Up @@ -267,7 +328,7 @@ private int additionalCapacity(int numDocs) {
private Buffer addBuffer(int len) {
Buffer buffer = new Buffer(len);
buffers.add(buffer);
adder = new BufferAdder(buffer);
adder = new PartitionAwareBufferAdder(buffer, minDocId, maxDocId);
totalAllocated += buffer.array.length;
return buffer;
}
Expand All @@ -279,20 +340,29 @@ private void growBuffer(Buffer buffer, int additionalCapacity) {

private void upgradeToBitSet() {
assert bitSet == null;
FixedBitSet bitSet = new FixedBitSet(maxDoc);

// For partitions, create a smaller bitset sized to the partition range only
// This saves memory by not allocating bits outside [minDocId, maxDocId)
boolean isPartition = (minDocId > 0 || maxDocId < maxDoc);
int bitSetSize = isPartition ? (maxDocId - minDocId) : maxDoc;

FixedBitSet bitSet = new FixedBitSet(bitSetSize);
long counter = 0;
for (Buffer buffer : buffers) {
int[] array = buffer.array;
int length = buffer.length;
counter += length;
for (int i = 0; i < length; ++i) {
bitSet.set(array[i]);
int docId = array[i];
int bitIndex = isPartition ? (docId - minDocId) : docId;
bitSet.set(bitIndex);
}
}
this.bitSet = bitSet;
this.counter = counter;
this.buffers = null;
this.adder = new FixedBitSetAdder(bitSet);
int offset = isPartition ? minDocId : 0;
this.adder = new PartitionAwareFixedBitSetAdder(bitSet, minDocId, maxDocId, offset);
}

/** Build a {@link DocIdSet} from the accumulated doc IDs. */
Expand All @@ -301,7 +371,13 @@ public DocIdSet build() {
if (bitSet != null) {
assert counter >= 0;
final long cost = Math.round(counter / numValuesPerDoc);
return new BitDocIdSet(bitSet, cost);
// For partition-relative bitsets, wrap with offset to return absolute doc IDs
boolean isPartition = (minDocId > 0 || maxDocId < maxDoc);
if (isPartition) {
return new OffsetBitDocIdSet(bitSet, cost, minDocId);
} else {
return new BitDocIdSet(bitSet, cost);
}
} else {
Buffer concatenated = concat(buffers);
LSBRadixSorter sorter = new LSBRadixSorter();
Expand Down
62 changes: 62 additions & 0 deletions lucene/core/src/java/org/apache/lucene/util/OffsetBitDocIdSet.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;

import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;

/**
* Wrapper for partition-relative bitsets that offsets doc IDs back to absolute values when
* iterating.
*
* <p>This is used for partition-aware queries where a small bitset stores docs using
* partition-relative indices (0 to partitionSize-1), but the iterator must return absolute doc IDs
* (minDocId to maxDocId-1).
*
* @lucene.internal
*/
final class OffsetBitDocIdSet extends DocIdSet {
private final BitDocIdSet delegate;
private final int offset;

/**
* Creates an offset wrapper around a BitDocIdSet.
*
* @param bitSet the partition-relative bitset
* @param cost the cost estimate
* @param offset the value to add to convert relative indices to absolute doc IDs (typically
* minDocId)
*/
OffsetBitDocIdSet(FixedBitSet bitSet, long cost, int offset) {
this.delegate = new BitDocIdSet(bitSet, cost);
this.offset = offset;
}

@Override
public DocIdSetIterator iterator() {
DocIdSetIterator delegateIterator = delegate.iterator();
if (delegateIterator == null) {
return null;
}
return new OffsetDocIdSetIterator(delegateIterator, offset);
}

@Override
public long ramBytesUsed() {
return delegate.ramBytesUsed();
}
}
Loading
Loading