2525import org .apache .lucene .index .NumericDocValues ;
2626import org .apache .lucene .index .ReaderUtil ;
2727import org .apache .lucene .index .Term ;
28- import org .apache .lucene .search .BooleanClause ;
29- import org .apache .lucene .search .BooleanQuery ;
3028import org .apache .lucene .search .DocIdSetIterator ;
31- import org .apache .lucene .search .DocValuesFieldExistsQuery ;
3229import org .apache .lucene .search .IndexSearcher ;
3330import org .apache .lucene .search .Query ;
31+ import org .apache .lucene .search .ScoreDoc ;
3432import org .apache .lucene .search .Sort ;
3533import org .apache .lucene .search .SortField ;
3634import org .apache .lucene .search .SortedNumericSortField ;
5553 * A {@link Translog.Snapshot} from changes in a Lucene index
5654 */
5755final class LuceneChangesSnapshot implements Translog .Snapshot {
56+ static final int DEFAULT_BATCH_SIZE = 1024 ;
57+
58+ private final int searchBatchSize ;
5859 private final long fromSeqNo , toSeqNo ;
5960 private long lastSeenSeqNo ;
6061 private int skippedOperations ;
@@ -63,7 +64,8 @@ final class LuceneChangesSnapshot implements Translog.Snapshot {
6364 private final IndexSearcher indexSearcher ;
6465 private final MapperService mapperService ;
6566 private int docIndex = 0 ;
66- private final TopDocs topDocs ;
67+ private final int totalHits ;
68+ private ScoreDoc [] scoreDocs ;
6769
6870 private final Closeable onClose ;
6971 private final CombinedDocValues [] docValues ; // Cache of DocValues
@@ -73,23 +75,30 @@ final class LuceneChangesSnapshot implements Translog.Snapshot {
7375 *
7476 * @param engineSearcher the internal engine searcher which will be taken over if the snapshot is opened successfully
7577 * @param mapperService the mapper service which will be mainly used to resolve the document's type and uid
78+ * @param searchBatchSize the number of documents should be returned by each search
7679 * @param fromSeqNo the min requesting seq# - inclusive
7780 * @param toSeqNo the maximum requesting seq# - inclusive
7881 * @param requiredFullRange if true, the snapshot will strictly check for the existence of operations between fromSeqNo and toSeqNo
7982 */
80- LuceneChangesSnapshot (Engine .Searcher engineSearcher , MapperService mapperService ,
83+ LuceneChangesSnapshot (Engine .Searcher engineSearcher , MapperService mapperService , int searchBatchSize ,
8184 long fromSeqNo , long toSeqNo , boolean requiredFullRange ) throws IOException {
8285 if (fromSeqNo < 0 || toSeqNo < 0 || fromSeqNo > toSeqNo ) {
8386 throw new IllegalArgumentException ("Invalid range; from_seqno [" + fromSeqNo + "], to_seqno [" + toSeqNo + "]" );
8487 }
88+ if (searchBatchSize < 0 ) {
89+ throw new IllegalArgumentException ("Search_batch_size must not be negative [" + searchBatchSize + "]" );
90+ }
8591 this .mapperService = mapperService ;
92+ this .searchBatchSize = searchBatchSize ;
8693 this .fromSeqNo = fromSeqNo ;
8794 this .toSeqNo = toSeqNo ;
8895 this .lastSeenSeqNo = fromSeqNo - 1 ;
8996 this .requiredFullRange = requiredFullRange ;
9097 this .indexSearcher = new IndexSearcher (Lucene .wrapAllDocsLive (engineSearcher .getDirectoryReader ()));
9198 this .indexSearcher .setQueryCache (null );
92- this .topDocs = searchOperations (indexSearcher );
99+ final TopDocs topDocs = searchOperations (null );
100+ this .totalHits = Math .toIntExact (topDocs .totalHits );
101+ this .scoreDocs = topDocs .scoreDocs ;
93102 final List <LeafReaderContext > leaves = indexSearcher .getIndexReader ().leaves ();
94103 this .docValues = new CombinedDocValues [leaves .size ()];
95104 for (LeafReaderContext leaf : leaves ) {
@@ -105,7 +114,7 @@ public void close() throws IOException {
105114
106115 @ Override
107116 public int totalOperations () {
108- return Math . toIntExact ( topDocs . totalHits ) ;
117+ return totalHits ;
109118 }
110119
111120 @ Override
@@ -146,28 +155,28 @@ private void rangeCheck(Translog.Operation op) {
146155 }
147156 }
148157
149- private int nextDocId () {
150- if (docIndex < topDocs .scoreDocs .length ) {
151- final int docId = topDocs .scoreDocs [docIndex ].doc ;
158+ private int nextDocId () throws IOException {
159+ // we have processed all docs in the current search - fetch the next batch
160+ if (docIndex == scoreDocs .length && docIndex > 0 ) {
161+ final ScoreDoc prev = scoreDocs [scoreDocs .length - 1 ];
162+ scoreDocs = searchOperations (prev ).scoreDocs ;
163+ docIndex = 0 ;
164+ }
165+ if (docIndex < scoreDocs .length ) {
166+ int docId = scoreDocs [docIndex ].doc ;
152167 docIndex ++;
153168 return docId ;
154- } else {
155- return DocIdSetIterator .NO_MORE_DOCS ;
156169 }
170+ return DocIdSetIterator .NO_MORE_DOCS ;
157171 }
158172
159- private TopDocs searchOperations (IndexSearcher searcher ) throws IOException {
160- final Query rangeQuery = new BooleanQuery .Builder ()
161- .add (new DocValuesFieldExistsQuery (SeqNoFieldMapper .PRIMARY_TERM_NAME ), BooleanClause .Occur .FILTER )
162- .add (LongPoint .newRangeQuery (SeqNoFieldMapper .NAME , fromSeqNo , toSeqNo ), BooleanClause .Occur .FILTER )
163- .build ();
173+ private TopDocs searchOperations (ScoreDoc after ) throws IOException {
174+ final Query rangeQuery = LongPoint .newRangeQuery (SeqNoFieldMapper .NAME , fromSeqNo , toSeqNo );
164175 final Sort sortedBySeqNoThenByTerm = new Sort (
165176 new SortedNumericSortField (SeqNoFieldMapper .NAME , SortField .Type .LONG ),
166177 new SortedNumericSortField (SeqNoFieldMapper .PRIMARY_TERM_NAME , SortField .Type .LONG , true )
167178 );
168- // norelease - limits the number of hits
169- final long numHits = Math .min ((toSeqNo + 1 - fromSeqNo ) * 2 , Integer .MAX_VALUE - 1 );
170- return searcher .search (rangeQuery , Math .toIntExact (numHits ), sortedBySeqNoThenByTerm );
179+ return indexSearcher .searchAfter (after , rangeQuery , searchBatchSize , sortedBySeqNoThenByTerm );
171180 }
172181
173182 private Translog .Operation readDocAsOp (int docID ) throws IOException {
0 commit comments