-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Fix painless's regex lexer and error messages #23634
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -68,8 +68,9 @@ void analyze(Locals locals) { | |
|
|
||
| try { | ||
| Pattern.compile(pattern, flags); | ||
| } catch (PatternSyntaxException exception) { | ||
| throw createError(exception); | ||
| } catch (PatternSyntaxException e) { | ||
| throw new Location(location.getSourceName(), location.getOffset() + 1 + e.getIndex()).createError( | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found this while debugging this issue and it bothered me more than it should have. I'm quite happy PatternSyntaxException seems designed for you to extract this information. The
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it not possible to fix this value where the Location object is originally generated?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I need the offset from the exception to get the location right. The |
||
| new IllegalArgumentException("Error compiling regex: " + e.getDescription())); | ||
| } | ||
|
|
||
| constant = new Constant(location, Definition.PATTERN_TYPE.type, "regexAt$" + location.getOffset(), this::initializeConstant); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
| package org.elasticsearch.painless; | ||
|
|
||
| import org.elasticsearch.common.settings.Settings; | ||
| import org.elasticsearch.script.ScriptException; | ||
|
|
||
| import java.nio.CharBuffer; | ||
| import java.util.Arrays; | ||
|
|
@@ -44,8 +45,17 @@ public void testPatternAfterReturn() { | |
| assertEquals(false, exec("return 'bar' ==~ /foo/")); | ||
| } | ||
|
|
||
| public void testSlashesEscapePattern() { | ||
| assertEquals(true, exec("return '//' ==~ /\\/\\//")); | ||
| public void testBackslashEscapesForwardSlash() { | ||
| assertEquals(true, exec("'//' ==~ /\\/\\//")); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one blows up if you don't reorder the lexer rules after you switch it to non-greedy. |
||
| } | ||
|
|
||
| public void testBackslashEscapeBackslash() { | ||
| // Both of these are single backslashes but java escaping + Painless escaping.... | ||
| assertEquals(true, exec("'\\\\' ==~ /\\\\/")); | ||
| } | ||
|
|
||
| public void testRegexIsNonGreedy() { | ||
| assertEquals(true, exec("def s = /\\\\/.split('.\\\\.'); return s[1] ==~ /\\./")); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This blew up mightily without the lexer change. |
||
| } | ||
|
|
||
| public void testPatternAfterAssignment() { | ||
|
|
@@ -248,11 +258,14 @@ public void testCantUsePatternCompile() { | |
| } | ||
|
|
||
| public void testBadRegexPattern() { | ||
| PatternSyntaxException e = expectScriptThrows(PatternSyntaxException.class, () -> { | ||
| ScriptException e = expectThrows(ScriptException.class, () -> { | ||
| exec("/\\ujjjj/"); // Invalid unicode | ||
| }); | ||
| assertThat(e.getMessage(), containsString("Illegal Unicode escape sequence near index 2")); | ||
| assertThat(e.getMessage(), containsString("\\ujjjj")); | ||
| assertEquals("Error compiling regex: Illegal Unicode escape sequence", e.getCause().getMessage()); | ||
|
|
||
| // And make sure the location of the error points to the offset inside the pattern | ||
| assertEquals("/\\ujjjj/", e.getScriptStack().get(0)); | ||
| assertEquals(" ^---- HERE", e.getScriptStack().get(1)); | ||
| } | ||
|
|
||
| public void testRegexAgainstNumber() { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This non-greedy pattern works well for strings so I figured I should use it for regexes as well. I had to reorder the escape characters to the front so they wouldn't be ignored because order is important in non-greedy rules.