Skip to content

Commit 0e6d92d

Browse files
viiryarxin
authored andcommitted
[SPARK-12689][SQL] Migrate DDL parsing to the newly absorbed parser
JIRA: https://issues.apache.org/jira/browse/SPARK-12689 DDLParser processes three commands: createTable, describeTable and refreshTable. This patch migrates the three commands to newly absorbed parser. Author: Liang-Chi Hsieh <[email protected]> Author: Liang-Chi Hsieh <[email protected]> Closes #10723 from viirya/migrate-ddl-describe.
1 parent a1303de commit 0e6d92d

File tree

10 files changed

+208
-229
lines changed

10 files changed

+208
-229
lines changed

project/MimaExcludes.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,11 @@ object MimaExcludes {
200200
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.sink.Logging.org$apache$spark$streaming$flume$sink$Logging$$_log_="),
201201
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.sink.TransactionProcessor.org$apache$spark$streaming$flume$sink$Logging$$log_"),
202202
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.sink.TransactionProcessor.org$apache$spark$streaming$flume$sink$Logging$$log__=")
203+
) ++ Seq(
204+
// SPARK-12689 Migrate DDL parsing to the newly absorbed parser
205+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.execution.datasources.DDLParser"),
206+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.execution.datasources.DDLException"),
207+
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.sql.SQLContext.ddlParser")
203208
) ++ Seq(
204209
// SPARK-7799 Add "streaming-akka" project
205210
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.zeromq.ZeroMQUtils.createStream"),

sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/ExpressionParser.g

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,16 @@ descFuncNames
493493
| functionIdentifier
494494
;
495495
496+
//We are allowed to use From and To in CreateTableUsing command's options (actually seems we can use any string as the option key). But we can't simply add them into nonReserved because by doing that we mess other existing rules. So we create a looseIdentifier and looseNonReserved here.
497+
looseIdentifier
498+
:
499+
Identifier
500+
| looseNonReserved -> Identifier[$looseNonReserved.text]
501+
// If it decides to support SQL11 reserved keywords, i.e., useSQL11ReservedKeywordsForIdentifier()=false,
502+
// the sql11keywords in existing q tests will NOT be added back.
503+
| {useSQL11ReservedKeywordsForIdentifier()}? sql11ReservedKeywordsUsedAsIdentifier -> Identifier[$sql11ReservedKeywordsUsedAsIdentifier.text]
504+
;
505+
496506
identifier
497507
:
498508
Identifier
@@ -516,6 +526,10 @@ principalIdentifier
516526
| QuotedIdentifier
517527
;
518528
529+
looseNonReserved
530+
: nonReserved | KW_FROM | KW_TO
531+
;
532+
519533
//The new version of nonReserved + sql11ReservedKeywordsUsedAsIdentifier = old version of nonReserved
520534
//Non reserved keywords are basically the keywords that can be used as identifiers.
521535
//All the KW_* are automatically not only keywords, but also reserved keywords.

sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,8 @@ KW_ISOLATION: 'ISOLATION';
324324
KW_LEVEL: 'LEVEL';
325325
KW_SNAPSHOT: 'SNAPSHOT';
326326
KW_AUTOCOMMIT: 'AUTOCOMMIT';
327+
KW_REFRESH: 'REFRESH';
328+
KW_OPTIONS: 'OPTIONS';
327329
KW_WEEK: 'WEEK'|'WEEKS';
328330
KW_MILLISECOND: 'MILLISECOND'|'MILLISECONDS';
329331
KW_MICROSECOND: 'MICROSECOND'|'MICROSECONDS';
@@ -470,7 +472,7 @@ Identifier
470472
fragment
471473
QuotedIdentifier
472474
:
473-
'`' ( '``' | ~('`') )* '`' { setText(getText().substring(1, getText().length() -1 ).replaceAll("``", "`")); }
475+
'`' ( '``' | ~('`') )* '`' { setText(getText().replaceAll("``", "`")); }
474476
;
475477

476478
WS : (' '|'\r'|'\t'|'\n') {$channel=HIDDEN;}

sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g

Lines changed: 75 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ TOK_UNIONTYPE;
142142
TOK_COLTYPELIST;
143143
TOK_CREATEDATABASE;
144144
TOK_CREATETABLE;
145+
TOK_CREATETABLEUSING;
145146
TOK_TRUNCATETABLE;
146147
TOK_CREATEINDEX;
147148
TOK_CREATEINDEX_INDEXTBLNAME;
@@ -371,6 +372,10 @@ TOK_TXN_READ_WRITE;
371372
TOK_COMMIT;
372373
TOK_ROLLBACK;
373374
TOK_SET_AUTOCOMMIT;
375+
TOK_REFRESHTABLE;
376+
TOK_TABLEPROVIDER;
377+
TOK_TABLEOPTIONS;
378+
TOK_TABLEOPTION;
374379
TOK_CACHETABLE;
375380
TOK_UNCACHETABLE;
376381
TOK_CLEARCACHE;
@@ -660,6 +665,12 @@ import java.util.HashMap;
660665
}
661666
private char [] excludedCharForColumnName = {'.', ':'};
662667
private boolean containExcludedCharForCreateTableColumnName(String input) {
668+
if (input.length() > 0) {
669+
if (input.charAt(0) == '`' && input.charAt(input.length() - 1) == '`') {
670+
// When column name is backquoted, we don't care about excluded chars.
671+
return false;
672+
}
673+
}
663674
for(char c : excludedCharForColumnName) {
664675
if(input.indexOf(c)>-1) {
665676
return true;
@@ -781,6 +792,7 @@ ddlStatement
781792
| truncateTableStatement
782793
| alterStatement
783794
| descStatement
795+
| refreshStatement
784796
| showStatement
785797
| metastoreCheck
786798
| createViewStatement
@@ -907,12 +919,31 @@ createTableStatement
907919
@init { pushMsg("create table statement", state); }
908920
@after { popMsg(state); }
909921
: KW_CREATE (temp=KW_TEMPORARY)? (ext=KW_EXTERNAL)? KW_TABLE ifNotExists? name=tableName
910-
( like=KW_LIKE likeName=tableName
922+
(
923+
like=KW_LIKE likeName=tableName
911924
tableRowFormat?
912925
tableFileFormat?
913926
tableLocation?
914927
tablePropertiesPrefixed?
928+
-> ^(TOK_CREATETABLE $name $temp? $ext? ifNotExists?
929+
^(TOK_LIKETABLE $likeName?)
930+
tableRowFormat?
931+
tableFileFormat?
932+
tableLocation?
933+
tablePropertiesPrefixed?
934+
)
935+
|
936+
tableProvider
937+
tableOpts?
938+
(KW_AS selectStatementWithCTE)?
939+
-> ^(TOK_CREATETABLEUSING $name $temp? ifNotExists?
940+
tableProvider
941+
tableOpts?
942+
selectStatementWithCTE?
943+
)
915944
| (LPAREN columnNameTypeList RPAREN)?
945+
(p=tableProvider?)
946+
tableOpts?
916947
tableComment?
917948
tablePartition?
918949
tableBuckets?
@@ -922,8 +953,15 @@ createTableStatement
922953
tableLocation?
923954
tablePropertiesPrefixed?
924955
(KW_AS selectStatementWithCTE)?
925-
)
926-
-> ^(TOK_CREATETABLE $name $temp? $ext? ifNotExists?
956+
-> {p != null}?
957+
^(TOK_CREATETABLEUSING $name $temp? ifNotExists?
958+
columnNameTypeList?
959+
$p
960+
tableOpts?
961+
selectStatementWithCTE?
962+
)
963+
->
964+
^(TOK_CREATETABLE $name $temp? $ext? ifNotExists?
927965
^(TOK_LIKETABLE $likeName?)
928966
columnNameTypeList?
929967
tableComment?
@@ -935,7 +973,8 @@ createTableStatement
935973
tableLocation?
936974
tablePropertiesPrefixed?
937975
selectStatementWithCTE?
938-
)
976+
)
977+
)
939978
;
940979
941980
truncateTableStatement
@@ -1379,6 +1418,13 @@ tabPartColTypeExpr
13791418
: tableName partitionSpec? extColumnName? -> ^(TOK_TABTYPE tableName partitionSpec? extColumnName?)
13801419
;
13811420
1421+
refreshStatement
1422+
@init { pushMsg("refresh statement", state); }
1423+
@after { popMsg(state); }
1424+
:
1425+
KW_REFRESH KW_TABLE tableName -> ^(TOK_REFRESHTABLE tableName)
1426+
;
1427+
13821428
descStatement
13831429
@init { pushMsg("describe statement", state); }
13841430
@after { popMsg(state); }
@@ -1774,6 +1820,30 @@ showStmtIdentifier
17741820
| StringLiteral
17751821
;
17761822
1823+
tableProvider
1824+
@init { pushMsg("table's provider", state); }
1825+
@after { popMsg(state); }
1826+
:
1827+
KW_USING Identifier (DOT Identifier)*
1828+
-> ^(TOK_TABLEPROVIDER Identifier+)
1829+
;
1830+
1831+
optionKeyValue
1832+
@init { pushMsg("table's option specification", state); }
1833+
@after { popMsg(state); }
1834+
:
1835+
(looseIdentifier (DOT looseIdentifier)*) StringLiteral
1836+
-> ^(TOK_TABLEOPTION looseIdentifier+ StringLiteral)
1837+
;
1838+
1839+
tableOpts
1840+
@init { pushMsg("table's options", state); }
1841+
@after { popMsg(state); }
1842+
:
1843+
KW_OPTIONS LPAREN optionKeyValue (COMMA optionKeyValue)* RPAREN
1844+
-> ^(TOK_TABLEOPTIONS optionKeyValue+)
1845+
;
1846+
17771847
tableComment
17781848
@init { pushMsg("table's comment", state); }
17791849
@after { popMsg(state); }
@@ -2132,7 +2202,7 @@ structType
21322202
mapType
21332203
@init { pushMsg("map type", state); }
21342204
@after { popMsg(state); }
2135-
: KW_MAP LESSTHAN left=primitiveType COMMA right=type GREATERTHAN
2205+
: KW_MAP LESSTHAN left=type COMMA right=type GREATERTHAN
21362206
-> ^(TOK_MAP $left $right)
21372207
;
21382208

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ private[sql] class CatalystQl(val conf: ParserConf = SimpleParserConf()) extends
140140
case Token("TOK_BOOLEAN", Nil) => BooleanType
141141
case Token("TOK_STRING", Nil) => StringType
142142
case Token("TOK_VARCHAR", Token(_, Nil) :: Nil) => StringType
143+
case Token("TOK_CHAR", Token(_, Nil) :: Nil) => StringType
143144
case Token("TOK_FLOAT", Nil) => FloatType
144145
case Token("TOK_DOUBLE", Nil) => DoubleType
145146
case Token("TOK_DATE", Nil) => DateType
@@ -156,9 +157,10 @@ private[sql] class CatalystQl(val conf: ParserConf = SimpleParserConf()) extends
156157

157158
protected def nodeToStructField(node: ASTNode): StructField = node match {
158159
case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: Nil) =>
159-
StructField(fieldName, nodeToDataType(dataType), nullable = true)
160-
case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: _ /* comment */:: Nil) =>
161-
StructField(fieldName, nodeToDataType(dataType), nullable = true)
160+
StructField(cleanIdentifier(fieldName), nodeToDataType(dataType), nullable = true)
161+
case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: comment :: Nil) =>
162+
val meta = new MetadataBuilder().putString("comment", unquoteString(comment.text)).build()
163+
StructField(cleanIdentifier(fieldName), nodeToDataType(dataType), nullable = true, meta)
162164
case _ =>
163165
noParseRule("StructField", node)
164166
}
@@ -222,15 +224,16 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
222224
case Nil =>
223225
ShowFunctions(None, None)
224226
case Token(name, Nil) :: Nil =>
225-
ShowFunctions(None, Some(unquoteString(name)))
227+
ShowFunctions(None, Some(unquoteString(cleanIdentifier(name))))
226228
case Token(db, Nil) :: Token(name, Nil) :: Nil =>
227-
ShowFunctions(Some(unquoteString(db)), Some(unquoteString(name)))
229+
ShowFunctions(Some(unquoteString(cleanIdentifier(db))),
230+
Some(unquoteString(cleanIdentifier(name))))
228231
case _ =>
229232
noParseRule("SHOW FUNCTIONS", node)
230233
}
231234

232235
case Token("TOK_DESCFUNCTION", Token(functionName, Nil) :: isExtended) =>
233-
DescribeFunction(functionName, isExtended.nonEmpty)
236+
DescribeFunction(cleanIdentifier(functionName), isExtended.nonEmpty)
234237

235238
case Token("TOK_QUERY", queryArgs @ Token("TOK_CTE" | "TOK_FROM" | "TOK_INSERT", _) :: _) =>
236239
val (fromClause: Option[ASTNode], insertClauses, cteRelations) =
@@ -611,7 +614,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
611614
noParseRule("Select", node)
612615
}
613616

614-
protected val escapedIdentifier = "`([^`]+)`".r
617+
protected val escapedIdentifier = "`(.+)`".r
615618
protected val doubleQuotedString = "\"([^\"]+)\"".r
616619
protected val singleQuotedString = "'([^']+)'".r
617620

@@ -655,15 +658,15 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
655658
nodeToExpr(qualifier) match {
656659
case UnresolvedAttribute(nameParts) =>
657660
UnresolvedAttribute(nameParts :+ cleanIdentifier(attr))
658-
case other => UnresolvedExtractValue(other, Literal(attr))
661+
case other => UnresolvedExtractValue(other, Literal(cleanIdentifier(attr)))
659662
}
660663

661664
/* Stars (*) */
662665
case Token("TOK_ALLCOLREF", Nil) => UnresolvedStar(None)
663666
// The format of dbName.tableName.* cannot be parsed by HiveParser. TOK_TABNAME will only
664667
// has a single child which is tableName.
665668
case Token("TOK_ALLCOLREF", Token("TOK_TABNAME", target) :: Nil) if target.nonEmpty =>
666-
UnresolvedStar(Some(target.map(_.text)))
669+
UnresolvedStar(Some(target.map(x => cleanIdentifier(x.text))))
667670

668671
/* Aggregate Functions */
669672
case Token("TOK_FUNCTIONDI", Token(COUNT(), Nil) :: args) =>
@@ -971,7 +974,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
971974
protected def nodeToGenerate(node: ASTNode, outer: Boolean, child: LogicalPlan): Generate = {
972975
val Token("TOK_SELECT", Token("TOK_SELEXPR", clauses) :: Nil) = node
973976

974-
val alias = getClause("TOK_TABALIAS", clauses).children.head.text
977+
val alias = cleanIdentifier(getClause("TOK_TABALIAS", clauses).children.head.text)
975978

976979
val generator = clauses.head match {
977980
case Token("TOK_FUNCTION", Token(explode(), Nil) :: childNode :: Nil) =>

sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -206,10 +206,7 @@ class SQLContext private[sql](
206206
@transient
207207
protected[sql] val sqlParser: ParserInterface = new SparkQl(conf)
208208

209-
@transient
210-
protected[sql] val ddlParser: DDLParser = new DDLParser(sqlParser)
211-
212-
protected[sql] def parseSql(sql: String): LogicalPlan = ddlParser.parse(sql, false)
209+
protected[sql] def parseSql(sql: String): LogicalPlan = sqlParser.parsePlan(sql)
213210

214211
protected[sql] def executeSql(sql: String):
215212
org.apache.spark.sql.execution.QueryExecution = executePlan(parseSql(sql))

0 commit comments

Comments
 (0)