Skip to content

Commit 48348ae

Browse files
author
Christoph Büscher
authored
LUCENE-9578: TermRangeQuery empty string lower bound edge case (#1976)
Currently a TermRangeQuery with the empty String ("") as lower bound and includeLower=false leads internally constructs an Automaton that doesn't match anything. This is unexpected expecially for open upper bounds where any string should be considered to be "higher" than the empty string. This PR changes "Automata#makeBinaryInterval" so that for an empty string lower bound and an open upper bound, any String should match the query regardless or the includeLower flag.
1 parent 6990109 commit 48348ae

File tree

2 files changed

+59
-4
lines changed

2 files changed

+59
-4
lines changed

lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,22 @@ public static Automaton makeAnyBinary() {
8585
a.finishState();
8686
return a;
8787
}
88-
88+
89+
/**
90+
* Returns a new (deterministic) automaton that accepts all binary terms except
91+
* the empty string.
92+
*/
93+
public static Automaton makeNonEmptyBinary() {
94+
Automaton a = new Automaton();
95+
int s1 = a.createState();
96+
int s2 = a.createState();
97+
a.setAccept(s2, true);
98+
a.addTransition(s1, s2, 0, 255);
99+
a.addTransition(s2, s2, 0, 255);
100+
a.finishState();
101+
return a;
102+
}
103+
89104
/**
90105
* Returns a new (deterministic) automaton that accepts any single codepoint.
91106
*/
@@ -254,8 +269,12 @@ public static Automaton makeBinaryInterval(BytesRef min, boolean minInclusive, B
254269
cmp = min.compareTo(max);
255270
} else {
256271
cmp = -1;
257-
if (min.length == 0 && minInclusive) {
258-
return makeAnyBinary();
272+
if (min.length == 0) {
273+
if (minInclusive) {
274+
return makeAnyBinary();
275+
} else {
276+
return makeNonEmptyBinary();
277+
}
259278
}
260279
}
261280

@@ -266,7 +285,7 @@ public static Automaton makeBinaryInterval(BytesRef min, boolean minInclusive, B
266285
return makeBinary(min);
267286
}
268287
} else if (cmp > 0) {
269-
// max > min
288+
// max < min
270289
return makeEmpty();
271290
}
272291

lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1331,6 +1331,23 @@ public void testMakeBinaryIntervalBasic() throws Exception {
13311331
assertFalse(Operations.run(a, intsRef("baq")));
13321332
assertTrue(Operations.run(a, intsRef("bara")));
13331333
}
1334+
1335+
public void testMakeBinaryIntervalLowerBoundEmptyString() throws Exception {
1336+
Automaton a = Automata.makeBinaryInterval(new BytesRef(""), true, new BytesRef("bar"), true);
1337+
assertTrue(Operations.run(a, intsRef("")));
1338+
assertTrue(Operations.run(a, intsRef("a")));
1339+
assertTrue(Operations.run(a, intsRef("bar")));
1340+
assertFalse(Operations.run(a, intsRef("bara")));
1341+
assertFalse(Operations.run(a, intsRef("baz")));
1342+
1343+
1344+
a = Automata.makeBinaryInterval(new BytesRef(""), false, new BytesRef("bar"), true);
1345+
assertFalse(Operations.run(a, intsRef("")));
1346+
assertTrue(Operations.run(a, intsRef("a")));
1347+
assertTrue(Operations.run(a, intsRef("bar")));
1348+
assertFalse(Operations.run(a, intsRef("bara")));
1349+
assertFalse(Operations.run(a, intsRef("baz")));
1350+
}
13341351

13351352
public void testMakeBinaryIntervalEqual() throws Exception {
13361353
Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, new BytesRef("bar"), true);
@@ -1352,6 +1369,12 @@ public void testMakeBinaryIntervalCommonPrefix() throws Exception {
13521369
assertFalse(Operations.run(a, intsRef("barfoop")));
13531370
}
13541371

1372+
public void testMakeBinaryExceptEmpty() throws Exception {
1373+
Automaton a = Automata.makeNonEmptyBinary();
1374+
assertFalse(Operations.run(a, intsRef("")));
1375+
assertTrue(Operations.run(a, intsRef(TestUtil.randomRealisticUnicodeString(random(), 1, 10))));
1376+
}
1377+
13551378
public void testMakeBinaryIntervalOpenMax() throws Exception {
13561379
Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, null, true);
13571380
assertFalse(Operations.run(a, intsRef("bam")));
@@ -1366,6 +1389,19 @@ public void testMakeBinaryIntervalOpenMax() throws Exception {
13661389
assertTrue(Operations.run(a, intsRef("zzz")));
13671390
}
13681391

1392+
public void testMakeBinaryIntervalOpenMaxZeroLengthMin() throws Exception {
1393+
// when including min, automaton should accept "a"
1394+
Automaton a = Automata.makeBinaryInterval(new BytesRef(""), true, null, true);
1395+
assertTrue(Operations.run(a, intsRef("")));
1396+
assertTrue(Operations.run(a, intsRef("a")));
1397+
assertTrue(Operations.run(a, intsRef("aaaaaa")));
1398+
// excluding min should still accept "a"
1399+
a = Automata.makeBinaryInterval(new BytesRef(""), false, null, true);
1400+
assertFalse(Operations.run(a, intsRef("")));
1401+
assertTrue(Operations.run(a, intsRef("a")));
1402+
assertTrue(Operations.run(a, intsRef("aaaaaa")));
1403+
}
1404+
13691405
public void testMakeBinaryIntervalOpenMin() throws Exception {
13701406
Automaton a = Automata.makeBinaryInterval(null, true, new BytesRef("foo"), true);
13711407
assertFalse(Operations.run(a, intsRef("foz")));

0 commit comments

Comments
 (0)