diff --git a/docs/reference/sql/limitations.asciidoc b/docs/reference/sql/limitations.asciidoc index 8f7868c892e52..b9c59e31b3d6f 100644 --- a/docs/reference/sql/limitations.asciidoc +++ b/docs/reference/sql/limitations.asciidoc @@ -3,6 +3,14 @@ [[sql-limitations]] == SQL Limitations +[float] +[[large-parsing-trees]] +=== Large queries may throw `ParsingExpection` + +Extremely large queries can consume too much memory during the parsing phase, in which case the {es-sql} engine will +abort parsing and throw an error. In such cases, consider reducing the query to a smaller size by potentially +simplifying it or splitting it into smaller queries. + [float] [[sys-columns-describe-table-nested-fields]] === Nested fields in `SYS COLUMNS` and `DESCRIBE TABLE` diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/parser/SqlParser.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/parser/SqlParser.java index 9920293794ea0..6166d87703ead 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/parser/SqlParser.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/parser/SqlParser.java @@ -5,7 +5,6 @@ */ package org.elasticsearch.xpack.sql.parser; -import com.carrotsearch.hppc.ObjectShortHashMap; import org.antlr.v4.runtime.BaseErrorListener; import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.CommonToken; @@ -26,16 +25,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.xpack.sql.expression.Expression; -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.BackQuotedIdentifierContext; -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.BooleanDefaultContext; -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.BooleanExpressionContext; -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.PrimaryExpressionContext; -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.QueryPrimaryDefaultContext; -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.QueryTermContext; -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.QuoteIdentifierContext; -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.StatementContext; -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.StatementDefaultContext; -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.UnquoteIdentifierContext; import org.elasticsearch.xpack.sql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.sql.proto.SqlTypedParamValue; @@ -50,7 +39,6 @@ import java.util.function.Function; import static java.lang.String.format; -import static org.elasticsearch.xpack.sql.parser.AbstractBuilder.source; public class SqlParser { @@ -100,45 +88,49 @@ private T invokeParser(String sql, List params, Function parseFunction, BiFunction visitor) { - SqlBaseLexer lexer = new SqlBaseLexer(new CaseInsensitiveStream(sql)); + try { + SqlBaseLexer lexer = new SqlBaseLexer(new CaseInsensitiveStream(sql)); - lexer.removeErrorListeners(); - lexer.addErrorListener(ERROR_LISTENER); + lexer.removeErrorListeners(); + lexer.addErrorListener(ERROR_LISTENER); - Map paramTokens = new HashMap<>(); - TokenSource tokenSource = new ParametrizedTokenSource(lexer, paramTokens, params); + Map paramTokens = new HashMap<>(); + TokenSource tokenSource = new ParametrizedTokenSource(lexer, paramTokens, params); - CommonTokenStream tokenStream = new CommonTokenStream(tokenSource); - SqlBaseParser parser = new SqlBaseParser(tokenStream); + CommonTokenStream tokenStream = new CommonTokenStream(tokenSource); + SqlBaseParser parser = new SqlBaseParser(tokenStream); - parser.addParseListener(new CircuitBreakerListener()); - parser.addParseListener(new PostProcessor(Arrays.asList(parser.getRuleNames()))); + parser.addParseListener(new PostProcessor(Arrays.asList(parser.getRuleNames()))); - parser.removeErrorListeners(); - parser.addErrorListener(ERROR_LISTENER); + parser.removeErrorListeners(); + parser.addErrorListener(ERROR_LISTENER); - parser.getInterpreter().setPredictionMode(PredictionMode.SLL); + parser.getInterpreter().setPredictionMode(PredictionMode.SLL); - if (DEBUG) { - debug(parser); - tokenStream.fill(); + if (DEBUG) { + debug(parser); + tokenStream.fill(); - for (Token t : tokenStream.getTokens()) { - String symbolicName = SqlBaseLexer.VOCABULARY.getSymbolicName(t.getType()); - String literalName = SqlBaseLexer.VOCABULARY.getLiteralName(t.getType()); - log.info(format(Locale.ROOT, " %-15s '%s'", + for (Token t : tokenStream.getTokens()) { + String symbolicName = SqlBaseLexer.VOCABULARY.getSymbolicName(t.getType()); + String literalName = SqlBaseLexer.VOCABULARY.getLiteralName(t.getType()); + log.info(format(Locale.ROOT, " %-15s '%s'", symbolicName == null ? literalName : symbolicName, t.getText())); + } } - } - ParserRuleContext tree = parseFunction.apply(parser); + ParserRuleContext tree = parseFunction.apply(parser); - if (DEBUG) { - log.info("Parse tree {} " + tree.toStringTree()); - } + if (DEBUG) { + log.info("Parse tree {} " + tree.toStringTree()); + } - return visitor.apply(new AstBuilder(paramTokens), tree); + return visitor.apply(new AstBuilder(paramTokens), tree); + } catch (StackOverflowError e) { + throw new ParsingException("SQL statement is too large, " + + "causing stack overflow when generating the parsing tree: [{}]", sql); + } } private static void debug(SqlBaseParser parser) { @@ -221,93 +213,6 @@ public void exitNonReserved(SqlBaseParser.NonReservedContext context) { } } - /** - * Used to catch large expressions that can lead to stack overflows - */ - static class CircuitBreakerListener extends SqlBaseBaseListener { - - private static final short MAX_RULE_DEPTH = 200; - - /** - * Due to the structure of the grammar and our custom handling in {@link ExpressionBuilder} - * some expressions can exit with a different class than they entered: - * e.g.: ValueExpressionContext can exit as ValueExpressionDefaultContext - */ - private static final Map ENTER_EXIT_RULE_MAPPING = new HashMap<>(); - - static { - ENTER_EXIT_RULE_MAPPING.put(StatementDefaultContext.class.getSimpleName(), StatementContext.class.getSimpleName()); - ENTER_EXIT_RULE_MAPPING.put(QueryPrimaryDefaultContext.class.getSimpleName(), QueryTermContext.class.getSimpleName()); - ENTER_EXIT_RULE_MAPPING.put(BooleanDefaultContext.class.getSimpleName(), BooleanExpressionContext.class.getSimpleName()); - } - - private boolean insideIn = false; - - // Keep current depth for every rule visited. - // The totalDepth alone cannot be used as expressions like: e1 OR e2 OR e3 OR ... - // are processed as e1 OR (e2 OR (e3 OR (... and this results in the totalDepth not growing - // while the stack call depth is, leading to a StackOverflowError. - private ObjectShortHashMap depthCounts = new ObjectShortHashMap<>(); - - @Override - public void enterEveryRule(ParserRuleContext ctx) { - if (inDetected(ctx)) { - insideIn = true; - } - - // Skip PrimaryExpressionContext for IN as it's not visited on exit due to - // the grammar's peculiarity rule with "predicated" and "predicate". - // Also skip the Identifiers as they are "cheap". - if (ctx.getClass() != UnquoteIdentifierContext.class && - ctx.getClass() != QuoteIdentifierContext.class && - ctx.getClass() != BackQuotedIdentifierContext.class && - ctx.getClass() != SqlBaseParser.ConstantContext.class && - ctx.getClass() != SqlBaseParser.NumberContext.class && - ctx.getClass() != SqlBaseParser.ValueExpressionContext.class && - (insideIn == false || ctx.getClass() != PrimaryExpressionContext.class)) { - - int currentDepth = depthCounts.putOrAdd(ctx.getClass().getSimpleName(), (short) 1, (short) 1); - if (currentDepth > MAX_RULE_DEPTH) { - throw new ParsingException(source(ctx), "SQL statement too large; " + - "halt parsing to prevent memory errors (stopped at depth {})", MAX_RULE_DEPTH); - } - } - super.enterEveryRule(ctx); - } - - @Override - public void exitEveryRule(ParserRuleContext ctx) { - if (inDetected(ctx)) { - insideIn = false; - } - - decrementCounter(ctx); - super.exitEveryRule(ctx); - } - - ObjectShortHashMap depthCounts() { - return depthCounts; - } - - private void decrementCounter(ParserRuleContext ctx) { - String className = ctx.getClass().getSimpleName(); - String classNameToDecrement = ENTER_EXIT_RULE_MAPPING.getOrDefault(className, className); - - // Avoid having negative numbers - if (depthCounts.containsKey(classNameToDecrement)) { - depthCounts.putOrAdd(classNameToDecrement, (short) 0, (short) -1); - } - } - - private boolean inDetected(ParserRuleContext ctx) { - if (ctx.getParent() != null && ctx.getParent().getClass() == SqlBaseParser.PredicateContext.class) { - SqlBaseParser.PredicateContext pc = (SqlBaseParser.PredicateContext) ctx.getParent(); - return pc.kind != null && pc.kind.getType() == SqlBaseParser.IN; - } - return false; - } - } - private static final BaseErrorListener ERROR_LISTENER = new BaseErrorListener() { @Override public void syntaxError(Recognizer recognizer, Object offendingSymbol, int line, diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/parser/SqlParserTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/parser/SqlParserTests.java index d1e05b6ec532c..f9b0fc18bca52 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/parser/SqlParserTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/parser/SqlParserTests.java @@ -6,7 +6,6 @@ package org.elasticsearch.xpack.sql.parser; import com.google.common.base.Joiner; - import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.sql.expression.NamedExpression; import org.elasticsearch.xpack.sql.expression.Order; @@ -18,19 +17,10 @@ import org.elasticsearch.xpack.sql.expression.predicate.fulltext.MultiMatchQueryPredicate; import org.elasticsearch.xpack.sql.expression.predicate.fulltext.StringQueryPredicate; import org.elasticsearch.xpack.sql.expression.predicate.operator.arithmetic.Add; -import org.elasticsearch.xpack.sql.expression.predicate.operator.comparison.In; -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.BooleanExpressionContext; -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.QueryPrimaryDefaultContext; -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.QueryTermContext; -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.StatementContext; -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.StatementDefaultContext; -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.ValueExpressionContext; -import org.elasticsearch.xpack.sql.parser.SqlBaseParser.ValueExpressionDefaultContext; import org.elasticsearch.xpack.sql.plan.logical.Filter; import org.elasticsearch.xpack.sql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.sql.plan.logical.OrderBy; import org.elasticsearch.xpack.sql.plan.logical.Project; -import org.elasticsearch.xpack.sql.plan.logical.With; import java.util.ArrayList; import java.util.List; @@ -198,86 +188,44 @@ public void testMultiMatchQuery() { assertThat(mmqp.optionMap(), hasEntry("fuzzy_rewrite", "scoring_boolean")); } - public void testLimitToPreventStackOverflowFromLongListOfQuotedIdentifiers() { - // Create expression in the form of "t"."field","t"."field", ... - - // 200 elements is ok - new SqlParser().createStatement("SELECT " + - Joiner.on(",").join(nCopies(200, "\"t\".\"field\"")) + " FROM t"); - - // 201 elements parser's "circuit breaker" is triggered - ParsingException e = expectThrows(ParsingException.class, () -> new SqlParser().createStatement("SELECT " + - Joiner.on(",").join(nCopies(201, "\"t\".\"field\"")) + " FROM t")); - assertEquals("line 1:2409: SQL statement too large; halt parsing to prevent memory errors (stopped at depth 200)", - e.getMessage()); - } - - public void testLimitToPreventStackOverflowFromLongListOfUnQuotedIdentifiers() { - // Create expression in the form of t.field,t.field, ... - - // 250 elements is ok - new SqlParser().createStatement("SELECT " + - Joiner.on(",").join(nCopies(200, "t.field")) + " FROM t"); - - // 251 elements parser's "circuit breaker" is triggered - ParsingException e = expectThrows(ParsingException.class, () -> new SqlParser().createStatement("SELECT " + - Joiner.on(",").join(nCopies(201, "t.field")) + " FROM t")); - assertEquals("line 1:1609: SQL statement too large; halt parsing to prevent memory errors (stopped at depth 200)", - e.getMessage()); - } - - public void testLimitToPreventStackOverflowFromLargeUnaryBooleanExpression() { - // Create expression in the form of NOT(NOT(NOT ... (b) ...) - - // 99 elements is ok - new SqlParser().createExpression( - Joiner.on("").join(nCopies(99, "NOT(")).concat("b").concat(Joiner.on("").join(nCopies(99, ")")))); - - // 100 elements parser's "circuit breaker" is triggered - ParsingException e = expectThrows(ParsingException.class, () -> new SqlParser().createExpression( - Joiner.on("").join(nCopies(100, "NOT(")).concat("b").concat(Joiner.on("").join(nCopies(100, ")"))))); - assertEquals("line 1:402: SQL statement too large; halt parsing to prevent memory errors (stopped at depth 200)", - e.getMessage()); - } - public void testLimitToPreventStackOverflowFromLargeBinaryBooleanExpression() { // Create expression in the form of a = b OR a = b OR ... a = b - // 100 elements is ok - new SqlParser().createExpression(Joiner.on(" OR ").join(nCopies(100, "a = b"))); + // 1000 elements is ok + new SqlParser().createExpression(Joiner.on(" OR ").join(nCopies(1000, "a = b"))); - // 101 elements parser's "circuit breaker" is triggered + // 5000 elements cause stack overflow ParsingException e = expectThrows(ParsingException.class, () -> - new SqlParser().createExpression(Joiner.on(" OR ").join(nCopies(101, "a = b")))); - assertEquals("line 1:902: SQL statement too large; halt parsing to prevent memory errors (stopped at depth 200)", - e.getMessage()); + new SqlParser().createExpression(Joiner.on(" OR ").join(nCopies(5000, "a = b")))); + assertThat(e.getMessage(), + startsWith("line -1:0: SQL statement is too large, causing stack overflow when generating the parsing tree: [")); } public void testLimitToPreventStackOverflowFromLargeUnaryArithmeticExpression() { // Create expression in the form of abs(abs(abs ... (i) ...) - // 199 elements is ok + // 200 elements is ok new SqlParser().createExpression( - Joiner.on("").join(nCopies(199, "abs(")).concat("i").concat(Joiner.on("").join(nCopies(199, ")")))); + Joiner.on("").join(nCopies(200, "abs(")).concat("i").concat(Joiner.on("").join(nCopies(200, ")")))); - // 200 elements parser's "circuit breaker" is triggered + // 5000 elements cause stack overflow ParsingException e = expectThrows(ParsingException.class, () -> new SqlParser().createExpression( - Joiner.on("").join(nCopies(200, "abs(")).concat("i").concat(Joiner.on("").join(nCopies(200, ")"))))); - assertEquals("line 1:802: SQL statement too large; halt parsing to prevent memory errors (stopped at depth 200)", - e.getMessage()); + Joiner.on("").join(nCopies(1000, "abs(")).concat("i").concat(Joiner.on("").join(nCopies(1000, ")"))))); + assertThat(e.getMessage(), + startsWith("line -1:0: SQL statement is too large, causing stack overflow when generating the parsing tree: [")); } public void testLimitToPreventStackOverflowFromLargeBinaryArithmeticExpression() { // Create expression in the form of a + a + a + ... + a - // 200 elements is ok - new SqlParser().createExpression(Joiner.on(" + ").join(nCopies(200, "a"))); + // 1000 elements is ok + new SqlParser().createExpression(Joiner.on(" + ").join(nCopies(1000, "a"))); - // 201 elements parser's "circuit breaker" is triggered + // 5000 elements cause stack overflow ParsingException e = expectThrows(ParsingException.class, () -> - new SqlParser().createExpression(Joiner.on(" + ").join(nCopies(201, "a")))); - assertEquals("line 1:802: SQL statement too large; halt parsing to prevent memory errors (stopped at depth 200)", - e.getMessage()); + new SqlParser().createExpression(Joiner.on(" + ").join(nCopies(5000, "a")))); + assertThat(e.getMessage(), + startsWith("line -1:0: SQL statement is too large, causing stack overflow when generating the parsing tree: [")); } public void testLimitToPreventStackOverflowFromLargeSubselectTree() { @@ -289,92 +237,13 @@ public void testLimitToPreventStackOverflowFromLargeSubselectTree() { .concat("t") .concat(Joiner.on("").join(nCopies(199, ")")))); - // 201 elements parser's "circuit breaker" is triggered + // 500 elements cause stack overflow ParsingException e = expectThrows(ParsingException.class, () -> new SqlParser().createStatement( - Joiner.on(" (").join(nCopies(201, "SELECT * FROM")) + Joiner.on(" (").join(nCopies(500, "SELECT * FROM")) .concat("t") - .concat(Joiner.on("").join(nCopies(200, ")"))))); - assertEquals("line 1:3002: SQL statement too large; halt parsing to prevent memory errors (stopped at depth 200)", - e.getMessage()); - } - - public void testLimitToPreventStackOverflowFromLargeComplexSubselectTree() { - // Test with queries in the form of `SELECT true OR true OR .. FROM (SELECT true OR true OR... FROM (... t) ...) - - new SqlParser().createStatement( - Joiner.on(" (").join(nCopies(20, "SELECT ")). - concat(Joiner.on(" OR ").join(nCopies(180, "true"))).concat(" FROM") - .concat("t").concat(Joiner.on("").join(nCopies(19, ")")))); - - ParsingException e = expectThrows(ParsingException.class, () -> new SqlParser().createStatement( - Joiner.on(" (").join(nCopies(20, "SELECT ")). - concat(Joiner.on(" OR ").join(nCopies(190, "true"))).concat(" FROM") - .concat("t").concat(Joiner.on("").join(nCopies(19, ")"))))); - assertEquals("line 1:1628: SQL statement too large; halt parsing to prevent memory errors (stopped at depth 200)", - e.getMessage()); - } - - public void testLimitStackOverflowForInAndLiteralsIsNotApplied() { - int noChildren = 10_000; - LogicalPlan plan = parseStatement("SELECT * FROM t WHERE a IN(" + - Joiner.on(",").join(nCopies(noChildren, "a + 10")) + "," + - Joiner.on(",").join(nCopies(noChildren, "-(-a - 10)")) + "," + - Joiner.on(",").join(nCopies(noChildren, "20")) + "," + - Joiner.on(",").join(nCopies(noChildren, "-20")) + "," + - Joiner.on(",").join(nCopies(noChildren, "20.1234")) + "," + - Joiner.on(",").join(nCopies(noChildren, "-20.4321")) + "," + - Joiner.on(",").join(nCopies(noChildren, "1.1234E56")) + "," + - Joiner.on(",").join(nCopies(noChildren, "-1.4321E-65")) + "," + - Joiner.on(",").join(nCopies(noChildren, "'foo'")) + "," + - Joiner.on(",").join(nCopies(noChildren, "'bar'")) + ")"); - - assertEquals(With.class, plan.getClass()); - assertEquals(Project.class, ((With) plan).child().getClass()); - assertEquals(Filter.class, ((Project) ((With) plan).child()).child().getClass()); - Filter filter = (Filter) ((Project) ((With) plan).child()).child(); - assertEquals(In.class, filter.condition().getClass()); - In in = (In) filter.condition(); - assertEquals("?a", in.value().toString()); - assertEquals(noChildren * 2 + 8, in.list().size()); - assertThat(in.list().get(0).toString(), startsWith("Add[?a,10]#")); - assertThat(in.list().get(noChildren).toString(), startsWith("Neg[Sub[Neg[?a]#")); - assertEquals("20", in.list().get(noChildren * 2).toString()); - assertEquals("-20", in.list().get(noChildren * 2 + 1).toString()); - assertEquals("20.1234", in.list().get(noChildren * 2 + 2).toString()); - assertEquals("-20.4321", in.list().get(noChildren * 2 + 3).toString()); - assertEquals("1.1234E56", in.list().get(noChildren * 2 + 4).toString()); - assertEquals("-1.4321E-65", in.list().get(noChildren * 2 + 5).toString()); - assertEquals("'foo'=foo", in.list().get(noChildren * 2 + 6).toString()); - assertEquals("'bar'=bar", in.list().get(noChildren * 2 + 7).toString()); - } - - public void testDecrementOfDepthCounter() { - SqlParser.CircuitBreakerListener cbl = new SqlParser.CircuitBreakerListener(); - StatementContext sc = new StatementContext(); - QueryTermContext qtc = new QueryTermContext(); - ValueExpressionContext vec = new ValueExpressionContext(); - BooleanExpressionContext bec = new BooleanExpressionContext(); - - cbl.enterEveryRule(sc); - cbl.enterEveryRule(sc); - cbl.enterEveryRule(qtc); - cbl.enterEveryRule(qtc); - cbl.enterEveryRule(qtc); - cbl.enterEveryRule(vec); - cbl.enterEveryRule(bec); - cbl.enterEveryRule(bec); - - cbl.exitEveryRule(new StatementDefaultContext(sc)); - cbl.exitEveryRule(new StatementDefaultContext(sc)); - cbl.exitEveryRule(new QueryPrimaryDefaultContext(qtc)); - cbl.exitEveryRule(new QueryPrimaryDefaultContext(qtc)); - cbl.exitEveryRule(new ValueExpressionDefaultContext(vec)); - cbl.exitEveryRule(new SqlBaseParser.BooleanDefaultContext(bec)); - - assertEquals(0, cbl.depthCounts().get(SqlBaseParser.StatementContext.class.getSimpleName())); - assertEquals(1, cbl.depthCounts().get(SqlBaseParser.QueryTermContext.class.getSimpleName())); - assertEquals(0, cbl.depthCounts().get(SqlBaseParser.ValueExpressionContext.class.getSimpleName())); - assertEquals(1, cbl.depthCounts().get(SqlBaseParser.BooleanExpressionContext.class.getSimpleName())); + .concat(Joiner.on("").join(nCopies(499, ")"))))); + assertThat(e.getMessage(), + startsWith("line -1:0: SQL statement is too large, causing stack overflow when generating the parsing tree: [")); } private LogicalPlan parseStatement(String sql) {