|
17 | 17 | package org.apache.lucene.util.bkd; |
18 | 18 |
|
19 | 19 | import java.io.IOException; |
| 20 | +import java.util.ArrayList; |
20 | 21 | import java.util.Arrays; |
| 22 | +import java.util.List; |
21 | 23 | import org.apache.lucene.codecs.CodecUtil; |
22 | 24 | import org.apache.lucene.index.CorruptIndexException; |
23 | 25 | import org.apache.lucene.index.PointValues; |
@@ -589,6 +591,65 @@ public void visitDocIDs(PointValues.IntersectVisitor visitor) throws IOException |
589 | 591 | addAll(visitor, false); |
590 | 592 | } |
591 | 593 |
|
| 594 | + /** prefetch DocIds below current node */ |
| 595 | + public void prefetchDocIDs(TwoPhaseIntersectVisitor visitor) throws IOException { |
| 596 | + resetNodeDataPosition(); |
| 597 | + prefetchAll(visitor, false); |
| 598 | + } |
| 599 | + |
| 600 | + /** visit Doc Ids for a leafNode at provided input position */ |
| 601 | + public void visitDocIDs(long position, IntersectVisitor visitor) throws IOException { |
| 602 | + visitDocIDs(position, visitor, false); |
| 603 | + } |
| 604 | + |
| 605 | + private void visitDocIDs(long position, IntersectVisitor visitor, boolean grown) |
| 606 | + throws IOException { |
| 607 | + leafNodes.seek(position); |
| 608 | + int count = leafNodes.readVInt(); |
| 609 | + if (!grown) { |
| 610 | + visitor.grow(count); |
| 611 | + } |
| 612 | + docIdsWriter.readInts(leafNodes, count, visitor, scratchIterator.docIDs); |
| 613 | + } |
| 614 | + |
| 615 | + private int getLeafNodeOrdinal() { |
| 616 | + assert isLeafNode() : "nodeID=" + nodeID + " is not a leaf"; |
| 617 | + return nodeID - leafNodeOffset; |
| 618 | + } |
| 619 | + |
| 620 | + public void prefetchAll(TwoPhaseIntersectVisitor visitor, boolean grown) throws IOException { |
| 621 | + if (grown == false) { |
| 622 | + final long size = size(); |
| 623 | + if (size <= Integer.MAX_VALUE) { |
| 624 | + visitor.grow((int) size); |
| 625 | + grown = true; |
| 626 | + } |
| 627 | + } |
| 628 | + if (isLeafNode()) { |
| 629 | + // int count = isLastLeaf() ? config.maxPointsInLeafNode() : lastLeafNodePointCount; |
| 630 | + long leafFp = getLeafBlockFP(); |
| 631 | + int leafNodeOrdinal = getLeafNodeOrdinal(); |
| 632 | + // Only call prefetch is this is the first leaf node ordinal or the first match in |
| 633 | + // contigiuous sequence of matches for leaf nodes |
| 634 | + // boolean prefetched = false; |
| 635 | + if (visitor.lastDeferredBlockOrdinal() == -1 |
| 636 | + || visitor.lastDeferredBlockOrdinal() + 1 < leafNodeOrdinal) { |
| 637 | + // System.out.println("Prefetched called on " + leafNodeOrdinal); |
| 638 | + leafNodes.prefetch(leafFp, 1); |
| 639 | + // prefetched = true; |
| 640 | + } |
| 641 | + visitor.setLastDeferredBlockOrdinal(leafNodeOrdinal); |
| 642 | + visitor.deferBlock(leafFp); |
| 643 | + } else { |
| 644 | + pushLeft(); |
| 645 | + prefetchAll(visitor, grown); |
| 646 | + pop(); |
| 647 | + pushRight(); |
| 648 | + prefetchAll(visitor, grown); |
| 649 | + pop(); |
| 650 | + } |
| 651 | + } |
| 652 | + |
592 | 653 | public void addAll(PointValues.IntersectVisitor visitor, boolean grown) throws IOException { |
593 | 654 | if (grown == false) { |
594 | 655 | final long size = size(); |
@@ -1076,4 +1137,123 @@ public long cost() { |
1076 | 1137 | return length; |
1077 | 1138 | } |
1078 | 1139 | } |
| 1140 | + |
| 1141 | + /** |
| 1142 | + * We can recurse the {@link BKDPointTree} using {@link TwoPhaseIntersectVisitor}. This visitor |
| 1143 | + * travere {@link BKDPointTree} in two phases. In the first phase, it recurses over the {@link |
| 1144 | + * BKDPointTree} optionally triggering IO for some of the blocks and caching them. In the second |
| 1145 | + * phase, once the recursion is over it visits the cached blocks one by one. |
| 1146 | + * |
| 1147 | + * @lucene.experimental |
| 1148 | + */ |
| 1149 | + public interface TwoPhaseIntersectVisitor extends IntersectVisitor { |
| 1150 | + /** return the last deferred block ordinal during recursion. */ |
| 1151 | + public int lastDeferredBlockOrdinal(); |
| 1152 | + |
| 1153 | + /** set last deferred block ordinal */ |
| 1154 | + public void setLastDeferredBlockOrdinal(int leafNodeOrdinal); |
| 1155 | + |
| 1156 | + /** Defer this block for processing in the second phase. */ |
| 1157 | + public void deferBlock(long leafFp); |
| 1158 | + |
| 1159 | + /** Returns a snapshot of the currently deferred blocks. */ |
| 1160 | + public List<Long> deferredBlocks(); |
| 1161 | + |
| 1162 | + /** Mark the given block as processed and remove it from the deferred set. */ |
| 1163 | + public void onProcessingDeferredBlock(long leafFp); |
| 1164 | + } |
| 1165 | + |
| 1166 | + /** |
| 1167 | + * Base implementation of {@link TwoPhaseIntersectVisitor} that maintains a list of deferred |
| 1168 | + * blocks from first phase of traversal and visits them in the second phase. |
| 1169 | + * |
| 1170 | + * @lucene.experimental |
| 1171 | + */ |
| 1172 | + public abstract static class BaseTwoPhaseIntersectVisitor implements TwoPhaseIntersectVisitor { |
| 1173 | + |
| 1174 | + int lastDeferredBlockOrdinal = -1; |
| 1175 | + List<Long> deferredBlocks = new ArrayList<>(); |
| 1176 | + |
| 1177 | + /** |
| 1178 | + * return the last deferred block ordinal - this is used to avoid prefetching call for |
| 1179 | + * contiguous ordinals assuming contiguous ordinals prefetching can be taken care by readaheads. |
| 1180 | + */ |
| 1181 | + @Override |
| 1182 | + public int lastDeferredBlockOrdinal() { |
| 1183 | + return lastDeferredBlockOrdinal; |
| 1184 | + } |
| 1185 | + |
| 1186 | + /** set last deferred block ordinal * */ |
| 1187 | + @Override |
| 1188 | + public void setLastDeferredBlockOrdinal(int leafNodeOrdinal) { |
| 1189 | + lastDeferredBlockOrdinal = leafNodeOrdinal; |
| 1190 | + } |
| 1191 | + |
| 1192 | + /** Defer this block for processing in the second phase. */ |
| 1193 | + @Override |
| 1194 | + public void deferBlock(long leafFp) { |
| 1195 | + deferredBlocks.add(leafFp); |
| 1196 | + } |
| 1197 | + |
| 1198 | + /** Returns a snapshot of the currently deferred blocks. */ |
| 1199 | + @Override |
| 1200 | + public List<Long> deferredBlocks() { |
| 1201 | + return new ArrayList<>(deferredBlocks); |
| 1202 | + } |
| 1203 | + |
| 1204 | + /** Mark the given block as processed and remove it from the deferred set. */ |
| 1205 | + @Override |
| 1206 | + public void onProcessingDeferredBlock(long leafFp) { |
| 1207 | + deferredBlocks.remove(leafFp); |
| 1208 | + } |
| 1209 | + } |
| 1210 | + |
| 1211 | + /** |
| 1212 | + * Finds all documents and points matching the provided visitor. This method does not enforce live |
| 1213 | + * documents, so it's up to the caller to test whether each document is deleted, if necessary. |
| 1214 | + */ |
| 1215 | + @Override |
| 1216 | + public final void intersect(IntersectVisitor visitor) throws IOException { |
| 1217 | + final BKDPointTree pointTree = (BKDPointTree) getPointTree(); |
| 1218 | + if (visitor instanceof TwoPhaseIntersectVisitor twoPhaseIntersectVisitor) { |
| 1219 | + intersect(twoPhaseIntersectVisitor, pointTree); |
| 1220 | + List<Long> fps = twoPhaseIntersectVisitor.deferredBlocks(); |
| 1221 | + for (int i = 0; i < fps.size(); ++i) { |
| 1222 | + long fp = fps.get(i); |
| 1223 | + pointTree.visitDocIDs(fp, visitor); |
| 1224 | + twoPhaseIntersectVisitor.onProcessingDeferredBlock(fp); |
| 1225 | + } |
| 1226 | + } else { |
| 1227 | + intersect(visitor, pointTree); |
| 1228 | + } |
| 1229 | + assert pointTree.moveToParent() == false; |
| 1230 | + } |
| 1231 | + |
| 1232 | + private static void intersect(TwoPhaseIntersectVisitor visitor, BKDPointTree pointTree) |
| 1233 | + throws IOException { |
| 1234 | + while (true) { |
| 1235 | + Relation compare = |
| 1236 | + visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); |
| 1237 | + if (compare == Relation.CELL_INSIDE_QUERY) { |
| 1238 | + // This cell is fully inside the query shape: recursively prefetch all points in this cell |
| 1239 | + // without filtering |
| 1240 | + pointTree.prefetchDocIDs(visitor); |
| 1241 | + } else if (compare == Relation.CELL_CROSSES_QUERY) { |
| 1242 | + // The cell crosses the shape boundary, or the cell fully contains the query, so we fall |
| 1243 | + // through and do full filtering: |
| 1244 | + if (pointTree.moveToChild()) { |
| 1245 | + continue; |
| 1246 | + } |
| 1247 | + // TODO: we can assert that the first value here in fact matches what the pointTree |
| 1248 | + // claimed? |
| 1249 | + // Leaf node; scan and filter all points in this block: |
| 1250 | + pointTree.visitDocValues(visitor); |
| 1251 | + } |
| 1252 | + while (pointTree.moveToSibling() == false) { |
| 1253 | + if (pointTree.moveToParent() == false) { |
| 1254 | + return; |
| 1255 | + } |
| 1256 | + } |
| 1257 | + } |
| 1258 | + } |
1079 | 1259 | } |
0 commit comments