Skip to content

Commit 01e14bf

Browse files
jiangxb1987rxin
authored andcommitted
[SPARK-17910][SQL] Allow users to update the comment of a column
## What changes were proposed in this pull request? Right now, once a user set the comment of a column with create table command, he/she cannot update the comment. It will be useful to provide a public interface (e.g. SQL) to do that. This PR implements the following SQL statement: ``` ALTER TABLE table [PARTITION partition_spec] CHANGE [COLUMN] column_old_name column_new_name column_dataType [COMMENT column_comment] [FIRST | AFTER column_name]; ``` For further expansion, we could support alter `name`/`dataType`/`index` of a column too. ## How was this patch tested? Add new test cases in `ExternalCatalogSuite` and `SessionCatalogSuite`. Add sql file test for `ALTER TABLE CHANGE COLUMN` statement. Author: jiangxingbo <[email protected]> Closes #15717 from jiangxb1987/change-column.
1 parent 93cdb8a commit 01e14bf

File tree

7 files changed

+529
-18
lines changed

7 files changed

+529
-18
lines changed

sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ statement
8989
SET TBLPROPERTIES tablePropertyList #setTableProperties
9090
| ALTER (TABLE | VIEW) tableIdentifier
9191
UNSET TBLPROPERTIES (IF EXISTS)? tablePropertyList #unsetTableProperties
92+
| ALTER TABLE tableIdentifier partitionSpec?
93+
CHANGE COLUMN? identifier colType colPosition? #changeColumn
9294
| ALTER TABLE tableIdentifier (partitionSpec)?
9395
SET SERDE STRING (WITH SERDEPROPERTIES tablePropertyList)? #setTableSerDe
9496
| ALTER TABLE tableIdentifier (partitionSpec)?
@@ -194,7 +196,6 @@ unsupportedHiveNativeCommands
194196
| kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=CONCATENATE
195197
| kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=SET kw4=FILEFORMAT
196198
| kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=ADD kw4=COLUMNS
197-
| kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=CHANGE kw4=COLUMN?
198199
| kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=REPLACE kw4=COLUMNS
199200
| kw1=START kw2=TRANSACTION
200201
| kw1=COMMIT
@@ -580,6 +581,10 @@ intervalValue
580581
| STRING
581582
;
582583

584+
colPosition
585+
: FIRST | AFTER identifier
586+
;
587+
583588
dataType
584589
: complex=ARRAY '<' dataType '>' #complexDataType
585590
| complex=MAP '<' dataType ',' dataType '>' #complexDataType
@@ -671,7 +676,7 @@ number
671676
nonReserved
672677
: SHOW | TABLES | COLUMNS | COLUMN | PARTITIONS | FUNCTIONS | DATABASES
673678
| ADD
674-
| OVER | PARTITION | RANGE | ROWS | PRECEDING | FOLLOWING | CURRENT | ROW | LAST | FIRST
679+
| OVER | PARTITION | RANGE | ROWS | PRECEDING | FOLLOWING | CURRENT | ROW | LAST | FIRST | AFTER
675680
| MAP | ARRAY | STRUCT
676681
| LATERAL | WINDOW | REDUCE | TRANSFORM | USING | SERDE | SERDEPROPERTIES | RECORDREADER
677682
| DELIMITED | FIELDS | TERMINATED | COLLECTION | ITEMS | KEYS | ESCAPED | LINES | SEPARATED
@@ -761,6 +766,7 @@ PRECEDING: 'PRECEDING';
761766
FOLLOWING: 'FOLLOWING';
762767
CURRENT: 'CURRENT';
763768
FIRST: 'FIRST';
769+
AFTER: 'AFTER';
764770
LAST: 'LAST';
765771
ROW: 'ROW';
766772
WITH: 'WITH';

sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation,
3131
import org.apache.spark.sql.execution.command._
3232
import org.apache.spark.sql.execution.datasources.{CreateTable, _}
3333
import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, VariableSubstitution}
34-
import org.apache.spark.sql.types.StructType
34+
import org.apache.spark.sql.types.{StructField, StructType}
3535

3636
/**
3737
* Concrete parser for Spark SQL statements.
@@ -886,6 +886,33 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
886886
visitLocationSpec(ctx.locationSpec))
887887
}
888888

889+
/**
890+
* Create a [[AlterTableChangeColumnCommand]] command.
891+
*
892+
* For example:
893+
* {{{
894+
* ALTER TABLE table [PARTITION partition_spec]
895+
* CHANGE [COLUMN] column_old_name column_new_name column_dataType [COMMENT column_comment]
896+
* [FIRST | AFTER column_name];
897+
* }}}
898+
*/
899+
override def visitChangeColumn(ctx: ChangeColumnContext): LogicalPlan = withOrigin(ctx) {
900+
if (ctx.partitionSpec != null) {
901+
operationNotAllowed("ALTER TABLE table PARTITION partition_spec CHANGE COLUMN", ctx)
902+
}
903+
904+
if (ctx.colPosition != null) {
905+
operationNotAllowed(
906+
"ALTER TABLE table [PARTITION partition_spec] CHANGE COLUMN ... FIRST | AFTER otherCol",
907+
ctx)
908+
}
909+
910+
AlterTableChangeColumnCommand(
911+
tableName = visitTableIdentifier(ctx.tableIdentifier),
912+
columnName = ctx.identifier.getText,
913+
newColumn = visitColType(ctx.colType))
914+
}
915+
889916
/**
890917
* Create location string.
891918
*/

sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,77 @@ case class AlterTableUnsetPropertiesCommand(
274274

275275
}
276276

277+
278+
/**
279+
* A command to change the column for a table, only support changing the comment of a non-partition
280+
* column for now.
281+
*
282+
* The syntax of using this command in SQL is:
283+
* {{{
284+
* ALTER TABLE table_identifier
285+
* CHANGE [COLUMN] column_old_name column_new_name column_dataType [COMMENT column_comment]
286+
* [FIRST | AFTER column_name];
287+
* }}}
288+
*/
289+
case class AlterTableChangeColumnCommand(
290+
tableName: TableIdentifier,
291+
columnName: String,
292+
newColumn: StructField) extends RunnableCommand {
293+
294+
// TODO: support change column name/dataType/metadata/position.
295+
override def run(sparkSession: SparkSession): Seq[Row] = {
296+
val catalog = sparkSession.sessionState.catalog
297+
val table = catalog.getTableMetadata(tableName)
298+
val resolver = sparkSession.sessionState.conf.resolver
299+
DDLUtils.verifyAlterTableType(catalog, table, isView = false)
300+
301+
// Find the origin column from schema by column name.
302+
val originColumn = findColumnByName(table.schema, columnName, resolver)
303+
// Throw an AnalysisException if the column name/dataType is changed.
304+
if (!columnEqual(originColumn, newColumn, resolver)) {
305+
throw new AnalysisException(
306+
"ALTER TABLE CHANGE COLUMN is not supported for changing column " +
307+
s"'${originColumn.name}' with type '${originColumn.dataType}' to " +
308+
s"'${newColumn.name}' with type '${newColumn.dataType}'")
309+
}
310+
311+
val newSchema = table.schema.fields.map { field =>
312+
if (field.name == originColumn.name) {
313+
// Create a new column from the origin column with the new comment.
314+
addComment(field, newColumn.getComment)
315+
} else {
316+
field
317+
}
318+
}
319+
val newTable = table.copy(schema = StructType(newSchema))
320+
catalog.alterTable(newTable)
321+
322+
Seq.empty[Row]
323+
}
324+
325+
// Find the origin column from schema by column name, throw an AnalysisException if the column
326+
// reference is invalid.
327+
private def findColumnByName(
328+
schema: StructType, name: String, resolver: Resolver): StructField = {
329+
schema.fields.collectFirst {
330+
case field if resolver(field.name, name) => field
331+
}.getOrElse(throw new AnalysisException(
332+
s"Invalid column reference '$name', table schema is '${schema}'"))
333+
}
334+
335+
// Add the comment to a column, if comment is empty, return the original column.
336+
private def addComment(column: StructField, comment: Option[String]): StructField = {
337+
comment.map(column.withComment(_)).getOrElse(column)
338+
}
339+
340+
// Compare a [[StructField]] to another, return true if they have the same column
341+
// name(by resolver) and dataType.
342+
private def columnEqual(
343+
field: StructField, other: StructField, resolver: Resolver): Boolean = {
344+
resolver(field.name, other.name) && field.dataType == other.dataType
345+
}
346+
}
347+
277348
/**
278349
* A command that sets the serde class and/or serde properties of a table/view.
279350
*
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
-- Create the origin table
2+
CREATE TABLE test_change(a INT, b STRING, c INT);
3+
DESC test_change;
4+
5+
-- Change column name (not supported yet)
6+
ALTER TABLE test_change CHANGE a a1 INT;
7+
DESC test_change;
8+
9+
-- Change column dataType (not supported yet)
10+
ALTER TABLE test_change CHANGE a a STRING;
11+
DESC test_change;
12+
13+
-- Change column position (not supported yet)
14+
ALTER TABLE test_change CHANGE a a INT AFTER b;
15+
ALTER TABLE test_change CHANGE b b STRING FIRST;
16+
DESC test_change;
17+
18+
-- Change column comment
19+
ALTER TABLE test_change CHANGE a a INT COMMENT 'this is column a';
20+
ALTER TABLE test_change CHANGE b b STRING COMMENT '#*02?`';
21+
ALTER TABLE test_change CHANGE c c INT COMMENT '';
22+
DESC test_change;
23+
24+
-- Don't change anything.
25+
ALTER TABLE test_change CHANGE a a INT COMMENT 'this is column a';
26+
DESC test_change;
27+
28+
-- Change a invalid column
29+
ALTER TABLE test_change CHANGE invalid_col invalid_col INT;
30+
DESC test_change;
31+
32+
-- Change column name/dataType/position/comment together (not supported yet)
33+
ALTER TABLE test_change CHANGE a a1 STRING COMMENT 'this is column a1' AFTER b;
34+
DESC test_change;
35+
36+
-- Check the behavior with different values of CASE_SENSITIVE
37+
SET spark.sql.caseSensitive=false;
38+
ALTER TABLE test_change CHANGE a A INT COMMENT 'this is column A';
39+
SET spark.sql.caseSensitive=true;
40+
ALTER TABLE test_change CHANGE a A INT COMMENT 'this is column A1';
41+
DESC test_change;
42+
43+
-- Change column can't apply to a temporary/global_temporary view
44+
CREATE TEMPORARY VIEW temp_view(a, b) AS SELECT 1, "one";
45+
ALTER TABLE temp_view CHANGE a a INT COMMENT 'this is column a';
46+
CREATE GLOBAL TEMPORARY VIEW global_temp_view(a, b) AS SELECT 1, "one";
47+
ALTER TABLE global_temp.global_temp_view CHANGE a a INT COMMENT 'this is column a';
48+
49+
-- Change column in partition spec (not supported yet)
50+
CREATE TABLE partition_table(a INT, b STRING) PARTITIONED BY (c INT, d STRING);
51+
ALTER TABLE partition_table PARTITION (c = 1) CHANGE COLUMN a new_a INT;
52+
53+
-- DROP TEST TABLE
54+
DROP TABLE test_change;
55+
DROP TABLE partition_table;

0 commit comments

Comments
 (0)