diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java index eb58502e103e..2561bbb1a412 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java @@ -1374,9 +1374,15 @@ public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { int i = 0; int count = 0; + boolean set = false; @Override public long nextOrd() throws IOException { + if (set == false) { + set = true; + i = 0; + count = ords.docValueCount(); + } if (i++ == count) { return NO_MORE_ORDS; } @@ -1385,13 +1391,8 @@ public long nextOrd() throws IOException { @Override public boolean advanceExact(int target) throws IOException { - if (ords.advanceExact(target)) { - count = ords.docValueCount(); - i = 0; - return true; - } else { - return false; - } + set = false; + return ords.advanceExact(target); } @Override @@ -1401,18 +1402,14 @@ public int docID() { @Override public int nextDoc() throws IOException { - int doc = ords.nextDoc(); - count = ords.docValueCount(); - i = 0; - return doc; + set = false; + return ords.nextDoc(); } @Override public int advance(int target) throws IOException { - int doc = ords.advance(target); - count = ords.docValueCount(); - i = 0; - return doc; + set = false; + return ords.advance(target); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java index 32ba8de6fe80..f9697303e7e4 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java @@ -3499,6 +3499,39 @@ public DocIdSetIterator iterator(IndexReader r) throws IOException { }); } + /** + * Tests where a DVField uses a high number of packed bits to store its ords. See: + * https://issues.apache.org/jira/browse/LUCENE-10159 + */ + @Nightly + public void testHighOrdsSortedSetDV() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(); + iwc.setRAMBufferSizeMB(8 + random().nextInt(64)); + IndexWriter writer = new IndexWriter(dir, iwc); + // many docs with some of them have very high ords + int numDocs = 20_000 + random().nextInt(10_000); + for (int i = 1; i < numDocs; i++) { + final int numOrds; + if (random().nextInt(100) <= 5) { + numOrds = 1000 + random().nextInt(500); + } else { + numOrds = random().nextInt(10); + } + Document doc = new Document(); + for (int ord = 0; ord < numOrds; ord++) { + doc.add( + new SortedSetDocValuesField("sorted_set_dv", TestUtil.randomBinaryTerm(random(), 2))); + } + writer.addDocument(doc); + } + writer.forceMerge(1, true); + try (DirectoryReader reader = DirectoryReader.open(writer)) { + TestUtil.checkReader(reader); + } + IOUtils.close(writer, dir); + } + private interface FieldCreator { public Field next(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java index c2ff1afd0332..fb8a845fd03f 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java @@ -1175,7 +1175,11 @@ public static String randomFixedByteLengthUnicodeString(Random r, int length) { /** Returns a random binary term. */ public static BytesRef randomBinaryTerm(Random r) { - int length = r.nextInt(15); + return randomBinaryTerm(r, r.nextInt(15)); + } + + /** Returns a random binary with a given length */ + public static BytesRef randomBinaryTerm(Random r, int length) { BytesRef b = new BytesRef(length); r.nextBytes(b.bytes); b.length = length;