Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,14 @@ API Changes
* LUCENE-9462: Fields without positions should still return MatchIterator.
(Alan Woodward, Dawid Weiss)

* LUCENE-9445: QueryParserBase.getRegexpQuery and newRegexpQuery now take a
caseSensitive flag to allow new case insensitive matching option. (Mark Harwood)

Improvements

* LUCENE-9445: QueryParser syntax for regular expressions extended to support
trailing i for case insensitive matching e.g. /.*Foo/i (Mark Harwood)

* LUCENE-9463: Query match region retrieval component, passage scoring and formatting
for building custom highlighters. (Alan Woodward, Dawid Weiss)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,16 +261,16 @@ protected Query getRangeQuery(String field, String part1, String part2, boolean


@Override
protected Query getRegexpQuery(String field, String termStr)
protected Query getRegexpQuery(String field, String termStr, boolean caseSensitive)
throws ParseException {
if (field == null) {
List<Query> clauses = new ArrayList<>();
for (int i = 0; i < fields.length; i++) {
clauses.add(getRegexpQuery(fields[i], termStr));
clauses.add(getRegexpQuery(fields[i], termStr, caseSensitive));
}
return getMultiFieldQuery(clauses);
}
return super.getRegexpQuery(field, termStr);
return super.getRegexpQuery(field, termStr, caseSensitive);
}

/** Creates a multifield query */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ PARSER_END(QueryParser)
| <FUZZY_SLOP: "~" ((<_NUM_CHAR>)+ (( "." (<_NUM_CHAR>)+ )? (<_TERM_CHAR>)*) | (<_TERM_CHAR>)*) >
| <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
| <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
| <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" >
| <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* ("/" | "/i") >
| <RANGEIN_START: "[" > : Range
| <RANGEEX_START: "{" > : Range
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -572,10 +572,12 @@ protected Query newPrefixQuery(Term prefix){
/**
* Builds a new RegexpQuery instance
* @param regexp Regexp term
* @param caseSensitive if the term matching should be case sensitive
* @return new RegexpQuery instance
*/
protected Query newRegexpQuery(Term regexp) {
RegexpQuery query = new RegexpQuery(regexp, RegExp.ALL,
protected Query newRegexpQuery(Term regexp, boolean caseSensitive) {
int matchFlags = caseSensitive ? 0 : RegExp.ASCII_CASE_INSENSITIVE;
RegexpQuery query = new RegexpQuery(regexp, RegExp.ALL, matchFlags,
maxDeterminizedStates);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
Expand Down Expand Up @@ -746,18 +748,19 @@ private BytesRef analyzeWildcard(String field, String termStr) {
*
* @param field Name of the field query will use.
* @param termStr Term token that contains a regular expression
* @param caseSensitive if token matching should be case sensitive
*
* @return Resulting {@link org.apache.lucene.search.Query} built for the term
* @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow
*/
protected Query getRegexpQuery(String field, String termStr) throws ParseException
protected Query getRegexpQuery(String field, String termStr, boolean caseSensitive) throws ParseException
{
// We need to pass the whole string to #normalize, which will not work with
// custom attribute factories for the binary term impl, and may not work
// with some analyzers
BytesRef term = getAnalyzer().normalize(field, termStr);
Term t = new Term(field, term);
return newRegexpQuery(t);
return newRegexpQuery(t, caseSensitive);
}

/**
Expand Down Expand Up @@ -823,7 +826,9 @@ Query handleBareTokenQuery(String qfield, Token term, Token fuzzySlop, boolean p
discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
} else if (regexp) {
q = getRegexpQuery(qfield, term.image.substring(1, term.image.length()-1));
boolean caseSensitive = !term.image.endsWith("i");
int lastSlash = term.image.lastIndexOf("/");
q = getRegexpQuery(qfield, term.image.substring(1, lastSlash), caseSensitive);
} else if (fuzzy) {
q = handleBareFuzzy(qfield, fuzzySlop, termImage);
} else {
Expand Down
Loading