From 8d820f39c04ab975b7720b83c309ff8e4aebdd64 Mon Sep 17 00:00:00 2001 From: aokolnychyi Date: Mon, 19 Jun 2017 20:17:18 +0200 Subject: [PATCH 1/2] [SPARK-21102][SQL] Make refresh resource command less aggressive in parsing --- .../spark/sql/catalyst/parser/SqlBase.g4 | 2 +- .../spark/sql/execution/SparkSqlParser.scala | 19 +++++++++++--- .../sql/execution/SparkSqlParserSuite.scala | 25 ++++++++++++++++++- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 7ffa150096333..29f554451ed4a 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -149,7 +149,7 @@ statement | (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)? tableIdentifier partitionSpec? describeColName? #describeTable | REFRESH TABLE tableIdentifier #refreshTable - | REFRESH .*? #refreshResource + | REFRESH (STRING | .*?) #refreshResource | CACHE LAZY? TABLE tableIdentifier (AS? query)? #cacheTable | UNCACHE TABLE (IF EXISTS)? tableIdentifier #uncacheTable | CLEAR CACHE #clearCache diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 3c58c6e1b6780..1bef199deb08f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -230,11 +230,24 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { } /** - * Create a [[RefreshTable]] logical plan. + * Create a [[RefreshResource]] logical plan. */ override def visitRefreshResource(ctx: RefreshResourceContext): LogicalPlan = withOrigin(ctx) { - val resourcePath = remainder(ctx.REFRESH.getSymbol).trim - RefreshResource(resourcePath) + val path = if (ctx.STRING != null) string(ctx.STRING) else extractUnquotedResourcePath(ctx) + RefreshResource(path) + } + + private def extractUnquotedResourcePath(ctx: RefreshResourceContext): String = withOrigin(ctx) { + val unquotedPath = remainder(ctx.REFRESH.getSymbol).trim + validate( + unquotedPath != null && !unquotedPath.isEmpty, + "Resource paths cannot be empty in REFRESH statements. Use / to match everything", + ctx) + validate( + !unquotedPath.contains(' '), + "It is not allowed to use spaces inside unquoted resource paths in REFRESH statements", + ctx) + unquotedPath } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index bd9c2ebd6fab9..d340102f63152 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.{Ascending, Concat, SortOrder} import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, RepartitionByExpression, Sort} import org.apache.spark.sql.execution.command._ -import org.apache.spark.sql.execution.datasources.CreateTable +import org.apache.spark.sql.execution.datasources.{CreateTable, RefreshResource} import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType} @@ -66,6 +66,29 @@ class SparkSqlParserSuite extends AnalysisTest { } } + test("refresh resource") { + assertEqual("REFRESH prefix_path", RefreshResource("prefix_path")) + assertEqual("REFRESH /", RefreshResource("/")) + assertEqual("REFRESH /path///a", RefreshResource("/path///a")) + assertEqual("REFRESH pat1h/112/_1a", RefreshResource("pat1h/112/_1a")) + assertEqual("REFRESH pat1h/112/_1a/a-1", RefreshResource("pat1h/112/_1a/a-1")) + assertEqual("REFRESH path-with-dash", RefreshResource("path-with-dash")) + assertEqual("REFRESH \'path with space\'", RefreshResource("path with space")) + assertEqual("REFRESH \"path with space 2\"", RefreshResource("path with space 2")) + intercept( + "REFRESH a b", + "It is not allowed to use spaces inside unquoted resource paths in REFRESH statements") + intercept( + "REFRESH @ $a$", + "It is not allowed to use spaces inside unquoted resource paths in REFRESH statements") + intercept( + "REFRESH ", + "Resource paths cannot be empty in REFRESH statements. Use / to match everything") + intercept( + "REFRESH", + "Resource paths cannot be empty in REFRESH statements. Use / to match everything") + } + test("show functions") { assertEqual("show functions", ShowFunctionsCommand(None, None, true, true)) assertEqual("show all functions", ShowFunctionsCommand(None, None, true, true)) From 6ab9191cf2dd539a7f6f8c539392645bcca33fcc Mon Sep 17 00:00:00 2001 From: aokolnychyi Date: Sun, 2 Jul 2017 23:33:51 +0200 Subject: [PATCH 2/2] [SPARK-21102][SQL] Handle additional symbols --- .../spark/sql/execution/SparkSqlParser.scala | 5 +++-- .../sql/execution/SparkSqlParserSuite.scala | 20 ++++++++----------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 1bef199deb08f..2b79eb5eac0f1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -243,9 +243,10 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { unquotedPath != null && !unquotedPath.isEmpty, "Resource paths cannot be empty in REFRESH statements. Use / to match everything", ctx) + val forbiddenSymbols = Seq(" ", "\n", "\r", "\t") validate( - !unquotedPath.contains(' '), - "It is not allowed to use spaces inside unquoted resource paths in REFRESH statements", + !forbiddenSymbols.exists(unquotedPath.contains(_)), + "REFRESH statements cannot contain ' ', '\\n', '\\r', '\\t' inside unquoted resource paths", ctx) unquotedPath } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index d340102f63152..d238c76fbeeff 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -75,18 +75,14 @@ class SparkSqlParserSuite extends AnalysisTest { assertEqual("REFRESH path-with-dash", RefreshResource("path-with-dash")) assertEqual("REFRESH \'path with space\'", RefreshResource("path with space")) assertEqual("REFRESH \"path with space 2\"", RefreshResource("path with space 2")) - intercept( - "REFRESH a b", - "It is not allowed to use spaces inside unquoted resource paths in REFRESH statements") - intercept( - "REFRESH @ $a$", - "It is not allowed to use spaces inside unquoted resource paths in REFRESH statements") - intercept( - "REFRESH ", - "Resource paths cannot be empty in REFRESH statements. Use / to match everything") - intercept( - "REFRESH", - "Resource paths cannot be empty in REFRESH statements. Use / to match everything") + intercept("REFRESH a b", "REFRESH statements cannot contain") + intercept("REFRESH a\tb", "REFRESH statements cannot contain") + intercept("REFRESH a\nb", "REFRESH statements cannot contain") + intercept("REFRESH a\rb", "REFRESH statements cannot contain") + intercept("REFRESH a\r\nb", "REFRESH statements cannot contain") + intercept("REFRESH @ $a$", "REFRESH statements cannot contain") + intercept("REFRESH ", "Resource paths cannot be empty in REFRESH statements") + intercept("REFRESH", "Resource paths cannot be empty in REFRESH statements") } test("show functions") {