Skip to content

Commit 8b68bf6

Browse files
authored
LUCENE-10159: Fix invalid access in sorted set dv (#389)
We introduced invalid accesses for sorted set doc values in LUCENE-9613. However, the issue has been unnoticed because the ordinals in doc values tests aren't complex enough to use high packed bits, and the 3 padding bytes make these invalid accesses perfectly fine. To reproduce this issue, we need to use at least 20 bits per value for the ordinals.
1 parent 6c21862 commit 8b68bf6

File tree

3 files changed

+50
-16
lines changed

3 files changed

+50
-16
lines changed

lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1374,9 +1374,15 @@ public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
13741374

13751375
int i = 0;
13761376
int count = 0;
1377+
boolean set = false;
13771378

13781379
@Override
13791380
public long nextOrd() throws IOException {
1381+
if (set == false) {
1382+
set = true;
1383+
i = 0;
1384+
count = ords.docValueCount();
1385+
}
13801386
if (i++ == count) {
13811387
return NO_MORE_ORDS;
13821388
}
@@ -1385,13 +1391,8 @@ public long nextOrd() throws IOException {
13851391

13861392
@Override
13871393
public boolean advanceExact(int target) throws IOException {
1388-
if (ords.advanceExact(target)) {
1389-
count = ords.docValueCount();
1390-
i = 0;
1391-
return true;
1392-
} else {
1393-
return false;
1394-
}
1394+
set = false;
1395+
return ords.advanceExact(target);
13951396
}
13961397

13971398
@Override
@@ -1401,18 +1402,14 @@ public int docID() {
14011402

14021403
@Override
14031404
public int nextDoc() throws IOException {
1404-
int doc = ords.nextDoc();
1405-
count = ords.docValueCount();
1406-
i = 0;
1407-
return doc;
1405+
set = false;
1406+
return ords.nextDoc();
14081407
}
14091408

14101409
@Override
14111410
public int advance(int target) throws IOException {
1412-
int doc = ords.advance(target);
1413-
count = ords.docValueCount();
1414-
i = 0;
1415-
return doc;
1411+
set = false;
1412+
return ords.advance(target);
14161413
}
14171414

14181415
@Override

lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3499,6 +3499,39 @@ public DocIdSetIterator iterator(IndexReader r) throws IOException {
34993499
});
35003500
}
35013501

3502+
/**
3503+
* Tests where a DVField uses a high number of packed bits to store its ords. See:
3504+
* https://issues.apache.org/jira/browse/LUCENE-10159
3505+
*/
3506+
@Nightly
3507+
public void testHighOrdsSortedSetDV() throws Exception {
3508+
Directory dir = newDirectory();
3509+
IndexWriterConfig iwc = new IndexWriterConfig();
3510+
iwc.setRAMBufferSizeMB(8 + random().nextInt(64));
3511+
IndexWriter writer = new IndexWriter(dir, iwc);
3512+
// many docs with some of them have very high ords
3513+
int numDocs = 20_000 + random().nextInt(10_000);
3514+
for (int i = 1; i < numDocs; i++) {
3515+
final int numOrds;
3516+
if (random().nextInt(100) <= 5) {
3517+
numOrds = 1000 + random().nextInt(500);
3518+
} else {
3519+
numOrds = random().nextInt(10);
3520+
}
3521+
Document doc = new Document();
3522+
for (int ord = 0; ord < numOrds; ord++) {
3523+
doc.add(
3524+
new SortedSetDocValuesField("sorted_set_dv", TestUtil.randomBinaryTerm(random(), 2)));
3525+
}
3526+
writer.addDocument(doc);
3527+
}
3528+
writer.forceMerge(1, true);
3529+
try (DirectoryReader reader = DirectoryReader.open(writer)) {
3530+
TestUtil.checkReader(reader);
3531+
}
3532+
IOUtils.close(writer, dir);
3533+
}
3534+
35023535
private interface FieldCreator {
35033536
public Field next();
35043537

lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1175,7 +1175,11 @@ public static String randomFixedByteLengthUnicodeString(Random r, int length) {
11751175

11761176
/** Returns a random binary term. */
11771177
public static BytesRef randomBinaryTerm(Random r) {
1178-
int length = r.nextInt(15);
1178+
return randomBinaryTerm(r, r.nextInt(15));
1179+
}
1180+
1181+
/** Returns a random binary with a given length */
1182+
public static BytesRef randomBinaryTerm(Random r, int length) {
11791183
BytesRef b = new BytesRef(length);
11801184
r.nextBytes(b.bytes);
11811185
b.length = length;

0 commit comments

Comments
 (0)