Skip to content

Commit 33c6eb5

Browse files
clockflyliancheng
authored andcommitted
[SPARK-15171][SQL] Deprecate registerTempTable and add dataset.createTempView
## What changes were proposed in this pull request? Deprecates registerTempTable and add dataset.createTempView, dataset.createOrReplaceTempView. ## How was this patch tested? Unit tests. Author: Sean Zhong <[email protected]> Closes #12945 from clockfly/spark-15171.
1 parent 5207a00 commit 33c6eb5

File tree

45 files changed

+197
-120
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+197
-120
lines changed

dev/audit-release/sbt_app_sql/src/main/scala/SqlApp.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ object SparkSqlExample {
4141
import sqlContext._
4242

4343
val people = sc.makeRDD(1 to 100, 10).map(x => Person(s"Name$x", x)).toDF()
44-
people.registerTempTable("people")
44+
people.createOrReplaceTempView("people")
4545
val teenagers = sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
4646
val teenagerNames = teenagers.map(t => "Name: " + t(0)).collect()
4747
teenagerNames.foreach(println)
@@ -52,7 +52,7 @@ object SparkSqlExample {
5252
System.exit(-1)
5353
}
5454
}
55-
55+
5656
test(teenagerNames.size == 7, "Unexpected number of selected elements: " + teenagerNames)
5757
println("Test succeeded")
5858
sc.stop()

examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ public Person call(String line) {
7575

7676
// Apply a schema to an RDD of Java Beans and register it as a table.
7777
Dataset<Row> schemaPeople = spark.createDataFrame(people, Person.class);
78-
schemaPeople.registerTempTable("people");
78+
schemaPeople.createOrReplaceTempView("people");
7979

8080
// SQL can be run over RDDs that have been registered as tables.
8181
Dataset<Row> teenagers = spark.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
@@ -102,7 +102,7 @@ public String call(Row row) {
102102
Dataset<Row> parquetFile = spark.read().parquet("people.parquet");
103103

104104
//Parquet files can also be registered as tables and then used in SQL statements.
105-
parquetFile.registerTempTable("parquetFile");
105+
parquetFile.createOrReplaceTempView("parquetFile");
106106
Dataset<Row> teenagers2 =
107107
spark.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19");
108108
teenagerNames = teenagers2.toJavaRDD().map(new Function<Row, String>() {
@@ -131,7 +131,7 @@ public String call(Row row) {
131131
// |-- name: StringType
132132

133133
// Register this DataFrame as a table.
134-
peopleFromJsonFile.registerTempTable("people");
134+
peopleFromJsonFile.createOrReplaceTempView("people");
135135

136136
// SQL statements can be run by using the sql methods provided by `spark`
137137
Dataset<Row> teenagers3 = spark.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
@@ -163,7 +163,7 @@ public String call(Row row) {
163163
// | |-- state: StringType
164164
// |-- name: StringType
165165

166-
peopleFromJsonRDD.registerTempTable("people2");
166+
peopleFromJsonRDD.createOrReplaceTempView("people2");
167167

168168
Dataset<Row> peopleWithCity = spark.sql("SELECT name, address.city FROM people2");
169169
List<String> nameAndCity = peopleWithCity.toJavaRDD().map(new Function<Row, String>() {

examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ public JavaRecord call(String word) {
9595
Dataset<Row> wordsDataFrame = spark.createDataFrame(rowRDD, JavaRecord.class);
9696

9797
// Register as table
98-
wordsDataFrame.registerTempTable("words");
98+
wordsDataFrame.createOrReplaceTempView("words");
9999

100100
// Do word count on table using SQL and print it
101101
Dataset<Row> wordCountsDataFrame =

examples/src/main/python/sql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@
6767
# |-- name: string (nullable = true)
6868

6969
# Register this DataFrame as a temporary table.
70-
people.registerTempTable("people")
70+
people.createOrReplaceTempView("people")
7171

7272
# SQL statements can be run by using the sql methods provided by `spark`
7373
teenagers = spark.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")

examples/src/main/python/streaming/sql_network_wordcount.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def process(time, rdd):
7171
wordsDataFrame = spark.createDataFrame(rowRdd)
7272

7373
# Register as table
74-
wordsDataFrame.registerTempTable("words")
74+
wordsDataFrame.createOrReplaceTempView("words")
7575

7676
# Do word count on table using SQL and print it
7777
wordCountsDataFrame = \

examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ object RDDRelation {
3737
val df = spark.createDataFrame((1 to 100).map(i => Record(i, s"val_$i")))
3838
// Any RDD containing case classes can be registered as a table. The schema of the table is
3939
// automatically inferred using scala reflection.
40-
df.registerTempTable("records")
40+
df.createOrReplaceTempView("records")
4141

4242
// Once tables have been registered, you can run SQL queries over them.
4343
println("Result of SELECT *:")
@@ -67,7 +67,7 @@ object RDDRelation {
6767
parquetFile.where($"key" === 1).select($"value".as("a")).collect().foreach(println)
6868

6969
// These files can also be registered as tables.
70-
parquetFile.registerTempTable("parquetFile")
70+
parquetFile.createOrReplaceTempView("parquetFile")
7171
spark.sql("SELECT * FROM parquetFile").collect().foreach(println)
7272

7373
spark.stop()

examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ object SqlNetworkWordCount {
6767
val wordsDataFrame = rdd.map(w => Record(w)).toDF()
6868

6969
// Register as table
70-
wordsDataFrame.registerTempTable("words")
70+
wordsDataFrame.createOrReplaceTempView("words")
7171

7272
// Do word count on table using SQL and print it
7373
val wordCountsDataFrame =

mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ class SQLTransformer @Since("1.6.0") (override val uid: String) extends Transfor
4848

4949
/**
5050
* SQL statement parameter. The statement is provided in string form.
51+
*
5152
* @group param
5253
*/
5354
@Since("1.6.0")
@@ -66,7 +67,7 @@ class SQLTransformer @Since("1.6.0") (override val uid: String) extends Transfor
6667
@Since("2.0.0")
6768
override def transform(dataset: Dataset[_]): DataFrame = {
6869
val tableName = Identifiable.randomUID(uid)
69-
dataset.registerTempTable(tableName)
70+
dataset.createOrReplaceTempView(tableName)
7071
val realStatement = $(statement).replace(tableIdentifier, tableName)
7172
dataset.sparkSession.sql(realStatement)
7273
}
@@ -79,7 +80,7 @@ class SQLTransformer @Since("1.6.0") (override val uid: String) extends Transfor
7980
val dummyDF = sqlContext.createDataFrame(dummyRDD, schema)
8081
val tableName = Identifiable.randomUID(uid)
8182
val realStatement = $(statement).replace(tableIdentifier, tableName)
82-
dummyDF.registerTempTable(tableName)
83+
dummyDF.createOrReplaceTempView(tableName)
8384
val outputSchema = sqlContext.sql(realStatement).schema
8485
sqlContext.dropTempTable(tableName)
8586
outputSchema

python/pyspark/sql/catalog.py

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -166,34 +166,20 @@ def createExternalTable(self, tableName, path=None, source=None, schema=None, **
166166
return DataFrame(df, self._sparkSession._wrapped)
167167

168168
@since(2.0)
169-
def dropTempTable(self, tableName):
170-
"""Drops the temporary table with the given table name in the catalog.
171-
If the table has been cached before, then it will also be uncached.
169+
def dropTempView(self, viewName):
170+
"""Drops the temporary view with the given view name in the catalog.
171+
If the view has been cached before, then it will also be uncached.
172172
173-
>>> spark.createDataFrame([(1, 1)]).registerTempTable("my_table")
173+
>>> spark.createDataFrame([(1, 1)]).createTempView("my_table")
174174
>>> spark.table("my_table").collect()
175175
[Row(_1=1, _2=1)]
176-
>>> spark.catalog.dropTempTable("my_table")
176+
>>> spark.catalog.dropTempView("my_table")
177177
>>> spark.table("my_table") # doctest: +IGNORE_EXCEPTION_DETAIL
178178
Traceback (most recent call last):
179179
...
180180
AnalysisException: ...
181181
"""
182-
self._jcatalog.dropTempTable(tableName)
183-
184-
@since(2.0)
185-
def registerTable(self, df, tableName):
186-
"""Registers the given :class:`DataFrame` as a temporary table in the catalog.
187-
188-
>>> df = spark.createDataFrame([(2, 1), (3, 1)])
189-
>>> spark.catalog.registerTable(df, "my_cool_table")
190-
>>> spark.table("my_cool_table").collect()
191-
[Row(_1=2, _2=1), Row(_1=3, _2=1)]
192-
"""
193-
if isinstance(df, DataFrame):
194-
self._jsparkSession.registerTable(df._jdf, tableName)
195-
else:
196-
raise ValueError("Can only register DataFrame as table")
182+
self._jcatalog.dropTempView(viewName)
197183

198184
@ignore_unicode_prefix
199185
@since(2.0)

python/pyspark/sql/context.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ def registerDataFrameAsTable(self, df, tableName):
302302
303303
>>> sqlContext.registerDataFrameAsTable(df, "table1")
304304
"""
305-
self.sparkSession.catalog.registerTable(df, tableName)
305+
df.createOrReplaceTempView(tableName)
306306

307307
@since(1.6)
308308
def dropTempTable(self, tableName):
@@ -311,7 +311,7 @@ def dropTempTable(self, tableName):
311311
>>> sqlContext.registerDataFrameAsTable(df, "table1")
312312
>>> sqlContext.dropTempTable("table1")
313313
"""
314-
self.sparkSession.catalog.dropTempTable(tableName)
314+
self.sparkSession.catalog.dropTempView(tableName)
315315

316316
@since(1.3)
317317
def createExternalTable(self, tableName, path=None, source=None, schema=None, **options):

0 commit comments

Comments
 (0)