|
5 | 5 | */ |
6 | 6 | package org.elasticsearch.xpack.sql.parser; |
7 | 7 |
|
8 | | -import com.carrotsearch.hppc.ObjectShortHashMap; |
9 | 8 | import org.antlr.v4.runtime.BaseErrorListener; |
10 | 9 | import org.antlr.v4.runtime.CharStream; |
11 | 10 | import org.antlr.v4.runtime.CommonToken; |
|
26 | 25 | import org.apache.logging.log4j.LogManager; |
27 | 26 | import org.apache.logging.log4j.Logger; |
28 | 27 | import org.elasticsearch.xpack.sql.expression.Expression; |
29 | | -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.BackQuotedIdentifierContext; |
30 | | -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.BooleanDefaultContext; |
31 | | -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.BooleanExpressionContext; |
32 | | -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.PrimaryExpressionContext; |
33 | | -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.QueryPrimaryDefaultContext; |
34 | | -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.QueryTermContext; |
35 | | -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.QuoteIdentifierContext; |
36 | | -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.StatementContext; |
37 | | -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.StatementDefaultContext; |
38 | | -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.UnquoteIdentifierContext; |
39 | 28 | import org.elasticsearch.xpack.sql.plan.logical.LogicalPlan; |
40 | 29 | import org.elasticsearch.xpack.sql.proto.SqlTypedParamValue; |
41 | 30 |
|
|
50 | 39 | import java.util.function.Function; |
51 | 40 |
|
52 | 41 | import static java.lang.String.format; |
53 | | -import static org.elasticsearch.xpack.sql.parser.AbstractBuilder.source; |
54 | 42 |
|
55 | 43 | public class SqlParser { |
56 | 44 |
|
@@ -100,45 +88,49 @@ private <T> T invokeParser(String sql, |
100 | 88 | List<SqlTypedParamValue> params, Function<SqlBaseParser, |
101 | 89 | ParserRuleContext> parseFunction, |
102 | 90 | BiFunction<AstBuilder, ParserRuleContext, T> visitor) { |
103 | | - SqlBaseLexer lexer = new SqlBaseLexer(new CaseInsensitiveStream(sql)); |
| 91 | + try { |
| 92 | + SqlBaseLexer lexer = new SqlBaseLexer(new CaseInsensitiveStream(sql)); |
104 | 93 |
|
105 | | - lexer.removeErrorListeners(); |
106 | | - lexer.addErrorListener(ERROR_LISTENER); |
| 94 | + lexer.removeErrorListeners(); |
| 95 | + lexer.addErrorListener(ERROR_LISTENER); |
107 | 96 |
|
108 | | - Map<Token, SqlTypedParamValue> paramTokens = new HashMap<>(); |
109 | | - TokenSource tokenSource = new ParametrizedTokenSource(lexer, paramTokens, params); |
| 97 | + Map<Token, SqlTypedParamValue> paramTokens = new HashMap<>(); |
| 98 | + TokenSource tokenSource = new ParametrizedTokenSource(lexer, paramTokens, params); |
110 | 99 |
|
111 | | - CommonTokenStream tokenStream = new CommonTokenStream(tokenSource); |
112 | | - SqlBaseParser parser = new SqlBaseParser(tokenStream); |
| 100 | + CommonTokenStream tokenStream = new CommonTokenStream(tokenSource); |
| 101 | + SqlBaseParser parser = new SqlBaseParser(tokenStream); |
113 | 102 |
|
114 | | - parser.addParseListener(new CircuitBreakerListener()); |
115 | | - parser.addParseListener(new PostProcessor(Arrays.asList(parser.getRuleNames()))); |
| 103 | + parser.addParseListener(new PostProcessor(Arrays.asList(parser.getRuleNames()))); |
116 | 104 |
|
117 | | - parser.removeErrorListeners(); |
118 | | - parser.addErrorListener(ERROR_LISTENER); |
| 105 | + parser.removeErrorListeners(); |
| 106 | + parser.addErrorListener(ERROR_LISTENER); |
119 | 107 |
|
120 | | - parser.getInterpreter().setPredictionMode(PredictionMode.SLL); |
| 108 | + parser.getInterpreter().setPredictionMode(PredictionMode.SLL); |
121 | 109 |
|
122 | | - if (DEBUG) { |
123 | | - debug(parser); |
124 | | - tokenStream.fill(); |
| 110 | + if (DEBUG) { |
| 111 | + debug(parser); |
| 112 | + tokenStream.fill(); |
125 | 113 |
|
126 | | - for (Token t : tokenStream.getTokens()) { |
127 | | - String symbolicName = SqlBaseLexer.VOCABULARY.getSymbolicName(t.getType()); |
128 | | - String literalName = SqlBaseLexer.VOCABULARY.getLiteralName(t.getType()); |
129 | | - log.info(format(Locale.ROOT, " %-15s '%s'", |
| 114 | + for (Token t : tokenStream.getTokens()) { |
| 115 | + String symbolicName = SqlBaseLexer.VOCABULARY.getSymbolicName(t.getType()); |
| 116 | + String literalName = SqlBaseLexer.VOCABULARY.getLiteralName(t.getType()); |
| 117 | + log.info(format(Locale.ROOT, " %-15s '%s'", |
130 | 118 | symbolicName == null ? literalName : symbolicName, |
131 | 119 | t.getText())); |
| 120 | + } |
132 | 121 | } |
133 | | - } |
134 | 122 |
|
135 | | - ParserRuleContext tree = parseFunction.apply(parser); |
| 123 | + ParserRuleContext tree = parseFunction.apply(parser); |
136 | 124 |
|
137 | | - if (DEBUG) { |
138 | | - log.info("Parse tree {} " + tree.toStringTree()); |
139 | | - } |
| 125 | + if (DEBUG) { |
| 126 | + log.info("Parse tree {} " + tree.toStringTree()); |
| 127 | + } |
140 | 128 |
|
141 | | - return visitor.apply(new AstBuilder(paramTokens), tree); |
| 129 | + return visitor.apply(new AstBuilder(paramTokens), tree); |
| 130 | + } catch (StackOverflowError e) { |
| 131 | + throw new ParsingException("SQL statement is too large, " + |
| 132 | + "causing stack overflow when generating the parsing tree: [{}]", sql); |
| 133 | + } |
142 | 134 | } |
143 | 135 |
|
144 | 136 | private static void debug(SqlBaseParser parser) { |
@@ -221,93 +213,6 @@ public void exitNonReserved(SqlBaseParser.NonReservedContext context) { |
221 | 213 | } |
222 | 214 | } |
223 | 215 |
|
224 | | - /** |
225 | | - * Used to catch large expressions that can lead to stack overflows |
226 | | - */ |
227 | | - static class CircuitBreakerListener extends SqlBaseBaseListener { |
228 | | - |
229 | | - private static final short MAX_RULE_DEPTH = 200; |
230 | | - |
231 | | - /** |
232 | | - * Due to the structure of the grammar and our custom handling in {@link ExpressionBuilder} |
233 | | - * some expressions can exit with a different class than they entered: |
234 | | - * e.g.: ValueExpressionContext can exit as ValueExpressionDefaultContext |
235 | | - */ |
236 | | - private static final Map<String, String> ENTER_EXIT_RULE_MAPPING = new HashMap<>(); |
237 | | - |
238 | | - static { |
239 | | - ENTER_EXIT_RULE_MAPPING.put(StatementDefaultContext.class.getSimpleName(), StatementContext.class.getSimpleName()); |
240 | | - ENTER_EXIT_RULE_MAPPING.put(QueryPrimaryDefaultContext.class.getSimpleName(), QueryTermContext.class.getSimpleName()); |
241 | | - ENTER_EXIT_RULE_MAPPING.put(BooleanDefaultContext.class.getSimpleName(), BooleanExpressionContext.class.getSimpleName()); |
242 | | - } |
243 | | - |
244 | | - private boolean insideIn = false; |
245 | | - |
246 | | - // Keep current depth for every rule visited. |
247 | | - // The totalDepth alone cannot be used as expressions like: e1 OR e2 OR e3 OR ... |
248 | | - // are processed as e1 OR (e2 OR (e3 OR (... and this results in the totalDepth not growing |
249 | | - // while the stack call depth is, leading to a StackOverflowError. |
250 | | - private ObjectShortHashMap<String> depthCounts = new ObjectShortHashMap<>(); |
251 | | - |
252 | | - @Override |
253 | | - public void enterEveryRule(ParserRuleContext ctx) { |
254 | | - if (inDetected(ctx)) { |
255 | | - insideIn = true; |
256 | | - } |
257 | | - |
258 | | - // Skip PrimaryExpressionContext for IN as it's not visited on exit due to |
259 | | - // the grammar's peculiarity rule with "predicated" and "predicate". |
260 | | - // Also skip the Identifiers as they are "cheap". |
261 | | - if (ctx.getClass() != UnquoteIdentifierContext.class && |
262 | | - ctx.getClass() != QuoteIdentifierContext.class && |
263 | | - ctx.getClass() != BackQuotedIdentifierContext.class && |
264 | | - ctx.getClass() != SqlBaseParser.ConstantContext.class && |
265 | | - ctx.getClass() != SqlBaseParser.NumberContext.class && |
266 | | - ctx.getClass() != SqlBaseParser.ValueExpressionContext.class && |
267 | | - (insideIn == false || ctx.getClass() != PrimaryExpressionContext.class)) { |
268 | | - |
269 | | - int currentDepth = depthCounts.putOrAdd(ctx.getClass().getSimpleName(), (short) 1, (short) 1); |
270 | | - if (currentDepth > MAX_RULE_DEPTH) { |
271 | | - throw new ParsingException(source(ctx), "SQL statement too large; " + |
272 | | - "halt parsing to prevent memory errors (stopped at depth {})", MAX_RULE_DEPTH); |
273 | | - } |
274 | | - } |
275 | | - super.enterEveryRule(ctx); |
276 | | - } |
277 | | - |
278 | | - @Override |
279 | | - public void exitEveryRule(ParserRuleContext ctx) { |
280 | | - if (inDetected(ctx)) { |
281 | | - insideIn = false; |
282 | | - } |
283 | | - |
284 | | - decrementCounter(ctx); |
285 | | - super.exitEveryRule(ctx); |
286 | | - } |
287 | | - |
288 | | - ObjectShortHashMap<String> depthCounts() { |
289 | | - return depthCounts; |
290 | | - } |
291 | | - |
292 | | - private void decrementCounter(ParserRuleContext ctx) { |
293 | | - String className = ctx.getClass().getSimpleName(); |
294 | | - String classNameToDecrement = ENTER_EXIT_RULE_MAPPING.getOrDefault(className, className); |
295 | | - |
296 | | - // Avoid having negative numbers |
297 | | - if (depthCounts.containsKey(classNameToDecrement)) { |
298 | | - depthCounts.putOrAdd(classNameToDecrement, (short) 0, (short) -1); |
299 | | - } |
300 | | - } |
301 | | - |
302 | | - private boolean inDetected(ParserRuleContext ctx) { |
303 | | - if (ctx.getParent() != null && ctx.getParent().getClass() == SqlBaseParser.PredicateContext.class) { |
304 | | - SqlBaseParser.PredicateContext pc = (SqlBaseParser.PredicateContext) ctx.getParent(); |
305 | | - return pc.kind != null && pc.kind.getType() == SqlBaseParser.IN; |
306 | | - } |
307 | | - return false; |
308 | | - } |
309 | | - } |
310 | | - |
311 | 216 | private static final BaseErrorListener ERROR_LISTENER = new BaseErrorListener() { |
312 | 217 | @Override |
313 | 218 | public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, |
|
0 commit comments