From 0c0446ba8cdcb8b87912f3d8c4c2a27b3a63e773 Mon Sep 17 00:00:00 2001
From: markharwood <markharwood@gmail.com>
Date: Fri, 31 Jul 2020 15:01:19 +0100
Subject: [PATCH 1/2] LUCENE-9445 Add support for case insensitive regex
 searches in QueryParser using the standard /.../i regex syntax

---
 .../classic/MultiFieldQueryParser.java        |   6 +-
 .../lucene/queryparser/classic/QueryParser.jj |   2 +-
 .../queryparser/classic/QueryParserBase.java  |  15 +-
 .../classic/QueryParserTokenManager.java      | 197 +++++++++---------
 .../builders/RegexpQueryNodeBuilder.java      |   5 +-
 .../standard/nodes/RegexpQueryNode.java       |  13 +-
 .../standard/parser/StandardSyntaxParser.java |  40 ++--
 .../standard/parser/StandardSyntaxParser.jj   |   8 +-
 .../StandardSyntaxParserTokenManager.java     |  56 ++---
 .../queryparser/util/QueryParserTestBase.java |  10 +
 10 files changed, 200 insertions(+), 152 deletions(-)
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java
index 3ee9c6ced0c4..2866646c9322 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java
@@ -261,16 +261,16 @@ protected Query getRangeQuery(String field, String part1, String part2, boolean
   
 
   @Override
-  protected Query getRegexpQuery(String field, String termStr)
+  protected Query getRegexpQuery(String field, String termStr, boolean caseSensitive)
       throws ParseException {
     if (field == null) {
       List<Query> clauses = new ArrayList<>();
       for (int i = 0; i < fields.length; i++) {
-        clauses.add(getRegexpQuery(fields[i], termStr));
+        clauses.add(getRegexpQuery(fields[i], termStr, caseSensitive));
       }
       return getMultiFieldQuery(clauses);
     }
-    return super.getRegexpQuery(field, termStr);
+    return super.getRegexpQuery(field, termStr, caseSensitive);
   }
   
   /** Creates a multifield query */
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj
index fdc0cd0ed5eb..8a149720df65 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj
@@ -209,7 +209,7 @@ PARSER_END(QueryParser)
 | <FUZZY_SLOP:    "~" ((<_NUM_CHAR>)+ (( "." (<_NUM_CHAR>)+ )? (<_TERM_CHAR>)*) | (<_TERM_CHAR>)*) >
 | <PREFIXTERM:    ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
 | <WILDTERM:      (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
-| <REGEXPTERM:    "/" (~[ "/" ] | "\\/" )* "/" >
+| <REGEXPTERM:    "/" (~[ "/" ] | "\\/" )* ("/" | "/i") >
 | <RANGEIN_START: "[" > : Range
 | <RANGEEX_START: "{" > : Range
 }
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
index a1f8fa582ada..7b08839ad8db 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
@@ -572,10 +572,12 @@ protected Query newPrefixQuery(Term prefix){
   /**
    * Builds a new RegexpQuery instance
    * @param regexp Regexp term
+   * @param caseSensitive if the term matching should be case sensitive
    * @return new RegexpQuery instance
    */
-  protected Query newRegexpQuery(Term regexp) {
-    RegexpQuery query = new RegexpQuery(regexp, RegExp.ALL,
+  protected Query newRegexpQuery(Term regexp, boolean caseSensitive) {
+    int matchFlags = caseSensitive ? 0 : RegExp.ASCII_CASE_INSENSITIVE;
+    RegexpQuery query = new RegexpQuery(regexp, RegExp.ALL, matchFlags,
       maxDeterminizedStates);
     query.setRewriteMethod(multiTermRewriteMethod);
     return query;
@@ -746,18 +748,19 @@ private BytesRef analyzeWildcard(String field, String termStr) {
    *
    * @param field Name of the field query will use.
    * @param termStr Term token that contains a regular expression
+   * @param caseSensitive if token matching should be case sensitive
    *
    * @return Resulting {@link org.apache.lucene.search.Query} built for the term
    * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow
    */
-  protected Query getRegexpQuery(String field, String termStr) throws ParseException
+  protected Query getRegexpQuery(String field, String termStr, boolean caseSensitive) throws ParseException
   {
     // We need to pass the whole string to #normalize, which will not work with
     // custom attribute factories for the binary term impl, and may not work
     // with some analyzers
     BytesRef term = getAnalyzer().normalize(field, termStr);
     Term t = new Term(field, term);
-    return newRegexpQuery(t);
+    return newRegexpQuery(t, caseSensitive);
   }
 
   /**
@@ -823,7 +826,9 @@ Query handleBareTokenQuery(String qfield, Token term, Token fuzzySlop, boolean p
           discardEscapeChar(term.image.substring
               (0, term.image.length()-1)));
     } else if (regexp) {
-      q = getRegexpQuery(qfield, term.image.substring(1, term.image.length()-1));
+      boolean caseSensitive = !term.image.endsWith("i");
+      int lastSlash = term.image.lastIndexOf("/");
+      q = getRegexpQuery(qfield, term.image.substring(1, lastSlash), caseSensitive);
     } else if (fuzzy) {
       q = handleBareFuzzy(qfield, fuzzySlop, termImage);
     } else {
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java
index 39cac0232f28..7300cf7f4202 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java
@@ -32,7 +32,7 @@ private int jjMoveStringLiteralDfa0_2()
       case 41:
          return jjStopAtPos(0, 15);
       case 42:
-         return jjStartNfaWithStates_2(0, 17, 49);
+         return jjStartNfaWithStates_2(0, 17, 51);
       case 43:
          return jjStartNfaWithStates_2(0, 11, 15);
       case 45:
@@ -72,7 +72,7 @@ private int jjStartNfaWithStates_2(int pos, int kind, int state)
 private int jjMoveNfa_2(int startState, int curPos)
 {
    int startsAt = 0;
-   jjnewStateCnt = 49;
+   jjnewStateCnt = 51;
    int i = 1;
    jjstateSet[0] = startState;
    int kind = 0x7fffffff;
@@ -87,14 +87,6 @@ private int jjMoveNfa_2(int startState, int curPos)
          {
             switch(jjstateSet[--i])
             {
-               case 49:
-               case 33:
-                  if ((0xfbff7cf8ffffd9ffL & l) == 0L)
-                     break;
-                  if (kind > 23)
-                     kind = 23;
-                  jjCheckNAddTwoStates(33, 34);
-                  break;
                case 0:
                   if ((0xfbff54f8ffffd9ffL & l) != 0L)
                   {
@@ -110,14 +102,14 @@ else if ((0x100002600L & l) != 0L)
                   else if ((0x280200000000L & l) != 0L)
                      jjstateSet[jjnewStateCnt++] = 15;
                   else if (curChar == 47)
-                     jjCheckNAddStates(0, 2);
+                     jjCheckNAddStates(0, 3);
                   else if (curChar == 34)
-                     jjCheckNAddStates(3, 5);
+                     jjCheckNAddStates(4, 6);
                   if ((0x7bff50f8ffffd9ffL & l) != 0L)
                   {
                      if (kind > 20)
                         kind = 20;
-                     jjCheckNAddStates(6, 10);
+                     jjCheckNAddStates(7, 11);
                   }
                   else if (curChar == 42)
                   {
@@ -132,6 +124,14 @@ else if (curChar == 33)
                   if (curChar == 38)
                      jjstateSet[jjnewStateCnt++] = 4;
                   break;
+               case 51:
+               case 33:
+                  if ((0xfbff7cf8ffffd9ffL & l) == 0L)
+                     break;
+                  if (kind > 23)
+                     kind = 23;
+                  jjCheckNAddTwoStates(33, 34);
+                  break;
                case 4:
                   if (curChar == 38 && kind > 8)
                      kind = 8;
@@ -154,14 +154,14 @@ else if (curChar == 33)
                   break;
                case 16:
                   if (curChar == 34)
-                     jjCheckNAddStates(3, 5);
+                     jjCheckNAddStates(4, 6);
                   break;
                case 17:
                   if ((0xfffffffbffffffffL & l) != 0L)
-                     jjCheckNAddStates(3, 5);
+                     jjCheckNAddStates(4, 6);
                   break;
                case 19:
-                  jjCheckNAddStates(3, 5);
+                  jjCheckNAddStates(4, 6);
                   break;
                case 20:
                   if (curChar == 34 && kind > 19)
@@ -172,7 +172,7 @@ else if (curChar == 33)
                      break;
                   if (kind > 21)
                      kind = 21;
-                  jjCheckNAddStates(11, 14);
+                  jjCheckNAddStates(12, 15);
                   break;
                case 23:
                   if (curChar == 46)
@@ -183,7 +183,7 @@ else if (curChar == 33)
                      break;
                   if (kind > 21)
                      kind = 21;
-                  jjCheckNAddStates(15, 17);
+                  jjCheckNAddStates(16, 18);
                   break;
                case 25:
                   if ((0x7bff78f8ffffd9ffL & l) == 0L)
@@ -228,41 +228,45 @@ else if (curChar == 33)
                case 36:
                case 38:
                   if (curChar == 47)
-                     jjCheckNAddStates(0, 2);
+                     jjCheckNAddStates(0, 3);
                   break;
                case 37:
                   if ((0xffff7fffffffffffL & l) != 0L)
-                     jjCheckNAddStates(0, 2);
+                     jjCheckNAddStates(0, 3);
                   break;
                case 40:
                   if (curChar == 47 && kind > 24)
                      kind = 24;
                   break;
-               case 41:
+               case 42:
+                  if (curChar == 47)
+                     jjstateSet[jjnewStateCnt++] = 41;
+                  break;
+               case 43:
                   if ((0x7bff50f8ffffd9ffL & l) == 0L)
                      break;
                   if (kind > 20)
                      kind = 20;
-                  jjCheckNAddStates(6, 10);
+                  jjCheckNAddStates(7, 11);
                   break;
-               case 42:
+               case 44:
                   if ((0x7bff78f8ffffd9ffL & l) == 0L)
                      break;
                   if (kind > 20)
                      kind = 20;
-                  jjCheckNAddTwoStates(42, 43);
+                  jjCheckNAddTwoStates(44, 45);
                   break;
-               case 44:
+               case 46:
                   if (kind > 20)
                      kind = 20;
-                  jjCheckNAddTwoStates(42, 43);
+                  jjCheckNAddTwoStates(44, 45);
                   break;
-               case 45:
+               case 47:
                   if ((0x7bff78f8ffffd9ffL & l) != 0L)
-                     jjCheckNAddStates(18, 20);
+                     jjCheckNAddStates(19, 21);
                   break;
-               case 47:
-                  jjCheckNAddStates(18, 20);
+               case 49:
+                  jjCheckNAddStates(19, 21);
                   break;
                default : break;
             }
@@ -275,30 +279,20 @@ else if (curChar < 128)
          {
             switch(jjstateSet[--i])
             {
-               case 49:
-                  if ((0x97ffffff87ffffffL & l) != 0L)
-                  {
-                     if (kind > 23)
-                        kind = 23;
-                     jjCheckNAddTwoStates(33, 34);
-                  }
-                  else if (curChar == 92)
-                     jjCheckNAdd(35);
-                  break;
                case 0:
                   if ((0x97ffffff87ffffffL & l) != 0L)
                   {
                      if (kind > 20)
                         kind = 20;
-                     jjCheckNAddStates(6, 10);
+                     jjCheckNAddStates(7, 11);
                   }
                   else if (curChar == 92)
-                     jjCheckNAddStates(21, 23);
+                     jjCheckNAddStates(22, 24);
                   else if (curChar == 126)
                   {
                      if (kind > 21)
                         kind = 21;
-                     jjCheckNAddStates(24, 26);
+                     jjCheckNAddStates(25, 27);
                   }
                   if ((0x97ffffff87ffffffL & l) != 0L)
                   {
@@ -315,6 +309,16 @@ else if (curChar == 79)
                   else if (curChar == 65)
                      jjstateSet[jjnewStateCnt++] = 2;
                   break;
+               case 51:
+                  if ((0x97ffffff87ffffffL & l) != 0L)
+                  {
+                     if (kind > 23)
+                        kind = 23;
+                     jjCheckNAddTwoStates(33, 34);
+                  }
+                  else if (curChar == 92)
+                     jjCheckNAdd(35);
+                  break;
                case 1:
                   if (curChar == 68 && kind > 8)
                      kind = 8;
@@ -357,21 +361,21 @@ else if (curChar == 65)
                   break;
                case 17:
                   if ((0xffffffffefffffffL & l) != 0L)
-                     jjCheckNAddStates(3, 5);
+                     jjCheckNAddStates(4, 6);
                   break;
                case 18:
                   if (curChar == 92)
                      jjstateSet[jjnewStateCnt++] = 19;
                   break;
                case 19:
-                  jjCheckNAddStates(3, 5);
+                  jjCheckNAddStates(4, 6);
                   break;
                case 21:
                   if (curChar != 126)
                      break;
                   if (kind > 21)
                      kind = 21;
-                  jjCheckNAddStates(24, 26);
+                  jjCheckNAddStates(25, 27);
                   break;
                case 25:
                   if ((0x97ffffff87ffffffL & l) == 0L)
@@ -429,49 +433,53 @@ else if (curChar == 65)
                   jjCheckNAddTwoStates(33, 34);
                   break;
                case 37:
-                  jjAddStates(0, 2);
+                  jjAddStates(0, 3);
                   break;
                case 39:
                   if (curChar == 92)
                      jjstateSet[jjnewStateCnt++] = 38;
                   break;
                case 41:
+                  if (curChar == 105 && kind > 24)
+                     kind = 24;
+                  break;
+               case 43:
                   if ((0x97ffffff87ffffffL & l) == 0L)
                      break;
                   if (kind > 20)
                      kind = 20;
-                  jjCheckNAddStates(6, 10);
+                  jjCheckNAddStates(7, 11);
                   break;
-               case 42:
+               case 44:
                   if ((0x97ffffff87ffffffL & l) == 0L)
                      break;
                   if (kind > 20)
                      kind = 20;
-                  jjCheckNAddTwoStates(42, 43);
+                  jjCheckNAddTwoStates(44, 45);
                   break;
-               case 43:
+               case 45:
                   if (curChar == 92)
-                     jjCheckNAdd(44);
+                     jjCheckNAdd(46);
                   break;
-               case 44:
+               case 46:
                   if (kind > 20)
                      kind = 20;
-                  jjCheckNAddTwoStates(42, 43);
+                  jjCheckNAddTwoStates(44, 45);
                   break;
-               case 45:
+               case 47:
                   if ((0x97ffffff87ffffffL & l) != 0L)
-                     jjCheckNAddStates(18, 20);
+                     jjCheckNAddStates(19, 21);
                   break;
-               case 46:
+               case 48:
                   if (curChar == 92)
-                     jjCheckNAdd(47);
+                     jjCheckNAdd(49);
                   break;
-               case 47:
-                  jjCheckNAddStates(18, 20);
+               case 49:
+                  jjCheckNAddStates(19, 21);
                   break;
-               case 48:
+               case 50:
                   if (curChar == 92)
-                     jjCheckNAddStates(21, 23);
+                     jjCheckNAddStates(22, 24);
                   break;
                default : break;
             }
@@ -488,14 +496,6 @@ else if (curChar == 65)
          {
             switch(jjstateSet[--i])
             {
-               case 49:
-               case 33:
-                  if (!jjCanMove_2(hiByte, i1, i2, l1, l2))
-                     break;
-                  if (kind > 23)
-                     kind = 23;
-                  jjCheckNAddTwoStates(33, 34);
-                  break;
                case 0:
                   if (jjCanMove_0(hiByte, i1, i2, l1, l2))
                   {
@@ -512,9 +512,17 @@ else if (curChar == 65)
                   {
                      if (kind > 20)
                         kind = 20;
-                     jjCheckNAddStates(6, 10);
+                     jjCheckNAddStates(7, 11);
                   }
                   break;
+               case 51:
+               case 33:
+                  if (!jjCanMove_2(hiByte, i1, i2, l1, l2))
+                     break;
+                  if (kind > 23)
+                     kind = 23;
+                  jjCheckNAddTwoStates(33, 34);
+                  break;
                case 15:
                   if (jjCanMove_0(hiByte, i1, i2, l1, l2) && kind > 13)
                      kind = 13;
@@ -522,7 +530,7 @@ else if (curChar == 65)
                case 17:
                case 19:
                   if (jjCanMove_1(hiByte, i1, i2, l1, l2))
-                     jjCheckNAddStates(3, 5);
+                     jjCheckNAddStates(4, 6);
                   break;
                case 25:
                   if (!jjCanMove_2(hiByte, i1, i2, l1, l2))
@@ -568,36 +576,36 @@ else if (curChar == 65)
                   break;
                case 37:
                   if (jjCanMove_1(hiByte, i1, i2, l1, l2))
-                     jjAddStates(0, 2);
+                     jjAddStates(0, 3);
                   break;
-               case 41:
+               case 43:
                   if (!jjCanMove_2(hiByte, i1, i2, l1, l2))
                      break;
                   if (kind > 20)
                      kind = 20;
-                  jjCheckNAddStates(6, 10);
+                  jjCheckNAddStates(7, 11);
                   break;
-               case 42:
+               case 44:
                   if (!jjCanMove_2(hiByte, i1, i2, l1, l2))
                      break;
                   if (kind > 20)
                      kind = 20;
-                  jjCheckNAddTwoStates(42, 43);
+                  jjCheckNAddTwoStates(44, 45);
                   break;
-               case 44:
+               case 46:
                   if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
                      break;
                   if (kind > 20)
                      kind = 20;
-                  jjCheckNAddTwoStates(42, 43);
+                  jjCheckNAddTwoStates(44, 45);
                   break;
-               case 45:
+               case 47:
                   if (jjCanMove_2(hiByte, i1, i2, l1, l2))
-                     jjCheckNAddStates(18, 20);
+                     jjCheckNAddStates(19, 21);
                   break;
-               case 47:
+               case 49:
                   if (jjCanMove_1(hiByte, i1, i2, l1, l2))
-                     jjCheckNAddStates(18, 20);
+                     jjCheckNAddStates(19, 21);
                   break;
                default : break;
             }
@@ -610,7 +618,7 @@ else if (curChar == 65)
          kind = 0x7fffffff;
       }
       ++curPos;
-      if ((i = jjnewStateCnt) == (startsAt = 49 - (jjnewStateCnt = startsAt)))
+      if ((i = jjnewStateCnt) == (startsAt = 51 - (jjnewStateCnt = startsAt)))
          return curPos;
       try { curChar = input_stream.readChar(); }
       catch(java.io.IOException e) { return curPos; }
@@ -643,7 +651,7 @@ private int jjMoveNfa_0(int startState, int curPos)
                      break;
                   if (kind > 27)
                      kind = 27;
-                  jjAddStates(27, 28);
+                  jjAddStates(28, 29);
                   break;
                case 1:
                   if (curChar == 46)
@@ -797,11 +805,11 @@ else if (curChar == 34)
                   break;
                case 2:
                   if ((0xfffffffbffffffffL & l) != 0L)
-                     jjCheckNAddStates(29, 31);
+                     jjCheckNAddStates(30, 32);
                   break;
                case 3:
                   if (curChar == 34)
-                     jjCheckNAddStates(29, 31);
+                     jjCheckNAddStates(30, 32);
                   break;
                case 5:
                   if (curChar == 34 && kind > 31)
@@ -834,7 +842,7 @@ else if (curChar < 128)
                   jjCheckNAdd(6);
                   break;
                case 2:
-                  jjAddStates(29, 31);
+                  jjAddStates(30, 32);
                   break;
                case 4:
                   if (curChar == 92)
@@ -870,7 +878,7 @@ else if (curChar < 128)
                   break;
                case 2:
                   if (jjCanMove_1(hiByte, i1, i2, l1, l2))
-                     jjAddStates(29, 31);
+                     jjAddStates(30, 32);
                   break;
                case 6:
                   if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
@@ -897,8 +905,9 @@ else if (curChar < 128)
    }
 }
 static final int[] jjnextStates = {
-   37, 39, 40, 17, 18, 20, 42, 43, 45, 46, 31, 22, 23, 25, 26, 24, 
-   25, 26, 45, 46, 31, 44, 47, 35, 22, 28, 29, 0, 1, 2, 4, 5, 
+   37, 39, 40, 42, 17, 18, 20, 44, 45, 47, 48, 31, 22, 23, 25, 26, 
+   24, 25, 26, 47, 48, 31, 46, 49, 35, 22, 28, 29, 0, 1, 2, 4, 
+   5, 
 };
 private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
 {
@@ -962,8 +971,8 @@ private static final boolean jjCanMove_2(int hiByte, int i1, int i2, long l1, lo
    0x80L, 
 };
 protected CharStream input_stream;
-private final int[] jjrounds = new int[49];
-private final int[] jjstateSet = new int[98];
+private final int[] jjrounds = new int[51];
+private final int[] jjstateSet = new int[102];
 protected char curChar;
 /** Constructor. */
 public QueryParserTokenManager(CharStream stream){
@@ -988,7 +997,7 @@ private void ReInitRounds()
 {
    int i;
    jjround = 0x80000001;
-   for (i = 49; i-- > 0;)
+   for (i = 51; i-- > 0;)
       jjrounds[i] = 0x80000000;
 }
 
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/RegexpQueryNodeBuilder.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/RegexpQueryNodeBuilder.java
index b2198b41fdab..d06759a3cd20 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/RegexpQueryNodeBuilder.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/RegexpQueryNodeBuilder.java
@@ -23,6 +23,8 @@
 import org.apache.lucene.queryparser.flexible.standard.processors.MultiTermRewriteMethodProcessor;
 import org.apache.lucene.search.MultiTermQuery;
 import org.apache.lucene.search.RegexpQuery;
+import org.apache.lucene.util.automaton.Operations;
+import org.apache.lucene.util.automaton.RegExp;
 
 /**
  * Builds a {@link RegexpQuery} object from a {@link RegexpQueryNode} object.
@@ -36,10 +38,11 @@ public RegexpQueryNodeBuilder() {
   @Override
   public RegexpQuery build(QueryNode queryNode) throws QueryNodeException {
     RegexpQueryNode regexpNode = (RegexpQueryNode) queryNode;
+    int matchFlags = regexpNode.getCaseSensitive() ? 0 : RegExp.ASCII_CASE_INSENSITIVE;
 
     // TODO: make the maxStates configurable w/ a reasonable default (QueryParserBase uses 10000)
     RegexpQuery q = new RegexpQuery(new Term(regexpNode.getFieldAsString(),
-        regexpNode.textToBytesRef()));
+        regexpNode.textToBytesRef()),  RegExp.ALL, matchFlags, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
 
     MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod) queryNode
         .getTag(MultiTermRewriteMethodProcessor.TAG_ID);
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/nodes/RegexpQueryNode.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/nodes/RegexpQueryNode.java
index cba2612cb612..202e7fcb0cae 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/nodes/RegexpQueryNode.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/nodes/RegexpQueryNode.java
@@ -30,6 +30,7 @@ public class RegexpQueryNode extends QueryNodeImpl  implements TextableQueryNode
 FieldableNode {
   private CharSequence text;
   private CharSequence field;
+  private boolean caseSensitive;
   /**
    * @param field
    *          - field name
@@ -39,11 +40,14 @@ public class RegexpQueryNode extends QueryNodeImpl  implements TextableQueryNode
    *          - position in the query string
    * @param end
    *          - position in the query string
+   * @param caseSensitive
+   *          - true if the text matching should be case sensitive
    */
   public RegexpQueryNode(CharSequence field, CharSequence text, int begin,
-      int end) {
+      int end, boolean caseSensitive) {
     this.field = field;
     this.text = text.subSequence(begin, end);
+    this.caseSensitive = caseSensitive;
   }
 
   public BytesRef textToBytesRef() {
@@ -52,7 +56,7 @@ public BytesRef textToBytesRef() {
 
   @Override
   public String toString() {
-    return "<regexp field='" + this.field + "' term='" + this.text + "'/>";
+    return "<regexp field='" + this.field + "' term='" + this.text + "' caseSensitive=" + this.caseSensitive + " />";
   }
 
   @Override
@@ -60,6 +64,7 @@ public RegexpQueryNode cloneTree() throws CloneNotSupportedException {
     RegexpQueryNode clone = (RegexpQueryNode) super.cloneTree();
     clone.field = this.field;
     clone.text = this.text;
+    clone.caseSensitive = this.caseSensitive;
     return clone;
   }
 
@@ -67,6 +72,10 @@ public RegexpQueryNode cloneTree() throws CloneNotSupportedException {
   public CharSequence getText() {
     return text;
   }
+  
+  public boolean getCaseSensitive() {
+    return caseSensitive;
+  }  
 
   @Override
   public void setText(CharSequence text) {
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java
index 57d723159702..aefd3c60009e 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java
@@ -551,8 +551,10 @@ final public QueryNode Term(CharSequence field) throws ParseException {
           }
           q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn);
        } else if (regexp) {
-         String re = term.image.substring(1, term.image.length()-1);
-         q = new RegexpQueryNode(field, re, 0, re.length());
+         boolean caseSensitive = !term.image.endsWith("i");
+         int lastSlash = term.image.lastIndexOf("/");
+         String re = term.image.substring(1, lastSlash);
+         q = new RegexpQueryNode(field, re, 0, re.length(), caseSensitive);
        }
       break;
     case RANGEIN_START:
@@ -707,33 +709,28 @@ private boolean jj_2_2(int xla) {
     finally { jj_save(1, xla); }
   }
 
-  private boolean jj_3R_12() {
-    if (jj_scan_token(RANGEIN_START)) return true;
-    return false;
-  }
-
   private boolean jj_3R_11() {
     if (jj_scan_token(REGEXPTERM)) return true;
     return false;
   }
 
-  private boolean jj_3_1() {
-    if (jj_scan_token(TERM)) return true;
+  private boolean jj_3R_8() {
     Token xsp;
     xsp = jj_scanpos;
-    if (jj_scan_token(15)) {
+    if (jj_3R_12()) {
     jj_scanpos = xsp;
-    if (jj_scan_token(16)) return true;
+    if (jj_scan_token(27)) return true;
     }
     return false;
   }
 
-  private boolean jj_3R_8() {
+  private boolean jj_3_1() {
+    if (jj_scan_token(TERM)) return true;
     Token xsp;
     xsp = jj_scanpos;
-    if (jj_3R_12()) {
+    if (jj_scan_token(15)) {
     jj_scanpos = xsp;
-    if (jj_scan_token(27)) return true;
+    if (jj_scan_token(16)) return true;
     }
     return false;
   }
@@ -743,6 +740,11 @@ private boolean jj_3R_10() {
     return false;
   }
 
+  private boolean jj_3R_9() {
+    if (jj_scan_token(QUOTED)) return true;
+    return false;
+  }
+
   private boolean jj_3R_7() {
     Token xsp;
     xsp = jj_scanpos;
@@ -756,11 +758,6 @@ private boolean jj_3R_7() {
     return false;
   }
 
-  private boolean jj_3R_9() {
-    if (jj_scan_token(QUOTED)) return true;
-    return false;
-  }
-
   private boolean jj_3R_5() {
     Token xsp;
     xsp = jj_scanpos;
@@ -820,6 +817,11 @@ private boolean jj_3_2() {
     return false;
   }
 
+  private boolean jj_3R_12() {
+    if (jj_scan_token(RANGEIN_START)) return true;
+    return false;
+  }
+
   /** Generated Token Manager. */
   public StandardSyntaxParserTokenManager token_source;
   /** Current token. */
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj
index 0a60490b6958..50a77f244b3c 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj
@@ -125,7 +125,7 @@ PARSER_END(StandardSyntaxParser)
 | <QUOTED:     "\"" (<_QUOTED_CHAR>)* "\"">
 | <TERM:      <_TERM_START_CHAR> (<_TERM_CHAR>)*  >
 | <FUZZY_SLOP:     "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
-| <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" >
+| <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* ("/" | "/i") >
 | <RANGEIN_START: "[" > : Range
 | <RANGEEX_START: "{" > : Range
 }
@@ -440,8 +440,10 @@ QueryNode Term(CharSequence field) : {
           }
           q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn);
        } else if (regexp) {
-         String re = term.image.substring(1, term.image.length()-1);
-         q = new RegexpQueryNode(field, re, 0, re.length());
+         boolean caseSensitive = !term.image.endsWith("i");
+         int lastSlash = term.image.lastIndexOf("/");
+         String re = term.image.substring(1, lastSlash);
+         q = new RegexpQueryNode(field, re, 0, re.length(), caseSensitive);
        }
      }
      | ( ( <RANGEIN_START> {startInc=true;} | <RANGEEX_START> )
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java
index 1fdaa480af0d..9ce4771b539f 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java
@@ -107,7 +107,7 @@ else if ((active0 & 0x100000L) != 0L)
 private int jjMoveNfa_2(int startState, int curPos)
 {
    int startsAt = 0;
-   jjnewStateCnt = 33;
+   jjnewStateCnt = 35;
    int i = 1;
    jjstateSet[0] = startState;
    int kind = 0x7fffffff;
@@ -135,9 +135,9 @@ else if ((0x100002600L & l) != 0L)
                         kind = 7;
                   }
                   else if (curChar == 47)
-                     jjCheckNAddStates(0, 2);
+                     jjCheckNAddStates(0, 3);
                   else if (curChar == 34)
-                     jjCheckNAddStates(3, 5);
+                     jjCheckNAddStates(4, 6);
                   else if (curChar == 33)
                   {
                      if (kind > 10)
@@ -160,14 +160,14 @@ else if (curChar == 33)
                   break;
                case 14:
                   if (curChar == 34)
-                     jjCheckNAddStates(3, 5);
+                     jjCheckNAddStates(4, 6);
                   break;
                case 15:
                   if ((0xfffffffbffffffffL & l) != 0L)
-                     jjCheckNAddStates(3, 5);
+                     jjCheckNAddStates(4, 6);
                   break;
                case 17:
-                  jjCheckNAddStates(3, 5);
+                  jjCheckNAddStates(4, 6);
                   break;
                case 18:
                   if (curChar == 34 && kind > 22)
@@ -197,7 +197,7 @@ else if (curChar == 33)
                      break;
                   if (kind > 24)
                      kind = 24;
-                  jjAddStates(6, 7);
+                  jjAddStates(7, 8);
                   break;
                case 26:
                   if (curChar == 46)
@@ -213,16 +213,20 @@ else if (curChar == 33)
                case 28:
                case 30:
                   if (curChar == 47)
-                     jjCheckNAddStates(0, 2);
+                     jjCheckNAddStates(0, 3);
                   break;
                case 29:
                   if ((0xffff7fffffffffffL & l) != 0L)
-                     jjCheckNAddStates(0, 2);
+                     jjCheckNAddStates(0, 3);
                   break;
                case 32:
                   if (curChar == 47 && kind > 25)
                      kind = 25;
                   break;
+               case 34:
+                  if (curChar == 47)
+                     jjstateSet[jjnewStateCnt++] = 33;
+                  break;
                default : break;
             }
          } while(i != startsAt);
@@ -300,14 +304,14 @@ else if (curChar == 65)
                   break;
                case 15:
                   if ((0xffffffffefffffffL & l) != 0L)
-                     jjCheckNAddStates(3, 5);
+                     jjCheckNAddStates(4, 6);
                   break;
                case 16:
                   if (curChar == 92)
                      jjstateSet[jjnewStateCnt++] = 17;
                   break;
                case 17:
-                  jjCheckNAddStates(3, 5);
+                  jjCheckNAddStates(4, 6);
                   break;
                case 19:
                case 20:
@@ -338,12 +342,16 @@ else if (curChar == 65)
                   jjstateSet[jjnewStateCnt++] = 25;
                   break;
                case 29:
-                  jjAddStates(0, 2);
+                  jjAddStates(0, 3);
                   break;
                case 31:
                   if (curChar == 92)
                      jjstateSet[jjnewStateCnt++] = 30;
                   break;
+               case 33:
+                  if (curChar == 105 && kind > 25)
+                     kind = 25;
+                  break;
                default : break;
             }
          } while(i != startsAt);
@@ -375,7 +383,7 @@ else if (curChar == 65)
                case 15:
                case 17:
                   if (jjCanMove_1(hiByte, i1, i2, l1, l2))
-                     jjCheckNAddStates(3, 5);
+                     jjCheckNAddStates(4, 6);
                   break;
                case 19:
                case 20:
@@ -394,7 +402,7 @@ else if (curChar == 65)
                   break;
                case 29:
                   if (jjCanMove_1(hiByte, i1, i2, l1, l2))
-                     jjAddStates(0, 2);
+                     jjAddStates(0, 3);
                   break;
                default : break;
             }
@@ -407,7 +415,7 @@ else if (curChar == 65)
          kind = 0x7fffffff;
       }
       ++curPos;
-      if ((i = jjnewStateCnt) == (startsAt = 33 - (jjnewStateCnt = startsAt)))
+      if ((i = jjnewStateCnt) == (startsAt = 35 - (jjnewStateCnt = startsAt)))
          return curPos;
       try { curChar = input_stream.readChar(); }
       catch(java.io.IOException e) { return curPos; }
@@ -440,7 +448,7 @@ private int jjMoveNfa_0(int startState, int curPos)
                      break;
                   if (kind > 28)
                      kind = 28;
-                  jjAddStates(8, 9);
+                  jjAddStates(9, 10);
                   break;
                case 1:
                   if (curChar == 46)
@@ -594,11 +602,11 @@ else if (curChar == 34)
                   break;
                case 2:
                   if ((0xfffffffbffffffffL & l) != 0L)
-                     jjCheckNAddStates(10, 12);
+                     jjCheckNAddStates(11, 13);
                   break;
                case 3:
                   if (curChar == 34)
-                     jjCheckNAddStates(10, 12);
+                     jjCheckNAddStates(11, 13);
                   break;
                case 5:
                   if (curChar == 34 && kind > 32)
@@ -631,7 +639,7 @@ else if (curChar < 128)
                   jjCheckNAdd(6);
                   break;
                case 2:
-                  jjAddStates(10, 12);
+                  jjAddStates(11, 13);
                   break;
                case 4:
                   if (curChar == 92)
@@ -667,7 +675,7 @@ else if (curChar < 128)
                   break;
                case 2:
                   if (jjCanMove_1(hiByte, i1, i2, l1, l2))
-                     jjAddStates(10, 12);
+                     jjAddStates(11, 13);
                   break;
                case 6:
                   if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
@@ -694,7 +702,7 @@ else if (curChar < 128)
    }
 }
 static final int[] jjnextStates = {
-   29, 31, 32, 15, 16, 18, 25, 26, 0, 1, 2, 4, 5, 
+   29, 31, 32, 34, 15, 16, 18, 25, 26, 0, 1, 2, 4, 5, 
 };
 private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
 {
@@ -758,8 +766,8 @@ private static final boolean jjCanMove_2(int hiByte, int i1, int i2, long l1, lo
    0x80L, 
 };
 protected CharStream input_stream;
-private final int[] jjrounds = new int[33];
-private final int[] jjstateSet = new int[66];
+private final int[] jjrounds = new int[35];
+private final int[] jjstateSet = new int[70];
 protected char curChar;
 /** Constructor. */
 public StandardSyntaxParserTokenManager(CharStream stream){
@@ -784,7 +792,7 @@ private void ReInitRounds()
 {
    int i;
    jjround = 0x80000001;
-   for (i = 33; i-- > 0;)
+   for (i = 35; i-- > 0;)
       jjrounds[i] = 0x80000000;
 }
 
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
index b4451a6cd524..8a4f07c3483e 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
@@ -1051,6 +1051,16 @@ public void testRegexps() throws Exception {
     assertEquals(re, getQuery("field:/http.*/",qp));
     assertEquals(re, getQuery("/http.*/",qp));
     
+    // Confirm the automaton comparison identifies differences in case-matching choices
+    re = new RegexpQuery(new Term("field", "http.*"), RegExp.NONE);
+    assertNotEquals(re, getQuery("field:/http.*/i",qp));
+    assertNotEquals(re, getQuery("/http.*/i",qp));
+
+    // Now check the case insensitivity syntax
+    re = new RegexpQuery(new Term("field", "http.*"), RegExp.NONE, RegExp.ASCII_CASE_INSENSITIVE, 1000);
+    assertEquals(re, getQuery("field:/http.*/i",qp));
+    assertEquals(re, getQuery("/http.*/i",qp));
+    
     re = new RegexpQuery(new Term("field", "http~0.5"));
     assertEquals(re, getQuery("field:/http~0.5/",qp));
     assertEquals(re, getQuery("/http~0.5/",qp));

From b10b65f4fc82b2c4f6692606b5aec005de00db6f Mon Sep 17 00:00:00 2001
From: markharwood <markharwood@gmail.com>
Date: Mon, 3 Aug 2020 10:26:49 +0100
Subject: [PATCH 2/2] Docs changes - package docs and CHANGES.txt

---
 lucene/CHANGES.txt                                          | 6 ++++++
 .../org/apache/lucene/queryparser/classic/package-info.java | 3 +++
 2 files changed, 9 insertions(+)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e2c646c61215..cba41cfdd5e0 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -61,8 +61,14 @@ API Changes
 * LUCENE-9462: Fields without positions should still return MatchIterator.
   (Alan Woodward, Dawid Weiss)
 
+* LUCENE-9445: QueryParserBase.getRegexpQuery and newRegexpQuery now take a 
+  caseSensitive flag to allow new case insensitive matching option. (Mark Harwood)
+
 Improvements
 
+* LUCENE-9445: QueryParser syntax for regular expressions extended to support 
+  trailing i for case insensitive matching e.g. /.*Foo/i (Mark Harwood)
+
 * LUCENE-9463: Query match region retrieval component, passage scoring and formatting
   for building custom highlighters. (Alan Woodward, Dawid Weiss)
 
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/package-info.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/package-info.java
index 9f77eb9b7e13..39f694116a82 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/package-info.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/package-info.java
@@ -192,6 +192,9 @@
  * syntax is documented in the {@link org.apache.lucene.util.automaton.RegExp RegExp} class. For example to find documents containing "moat" or "boat":
  * 
  * <pre class="code">/[mb]oat/</pre>
+ * <p>Searches against indexed terms can be made case insensitive by adding an `i` to the regex as in this example:
+ * 
+ * <pre class="code">/.*MiXeDcAsE/i</pre>
  * <a id="N1009B"></a><a id="Fuzzy_Searches"></a>
  * <h3 class="boxed">Fuzzy Searches</h3>
  * <p>Lucene supports fuzzy searches based on Damerau-Levenshtein Distance. To do a fuzzy search use the tilde, "~", symbol at the end of a Single word Term. For example to search for a term similar in spelling to "roam" use the fuzzy search: