Skip to content

Commit 18ee55d

Browse files
committed
[SPARK-19148][SQL] do not expose the external table concept in Catalog
## What changes were proposed in this pull request? In apache#16296 , we reached a consensus that we should hide the external/managed table concept to users and only expose custom table path. This PR renames `Catalog.createExternalTable` to `createTable`(still keep the old versions for backward compatibility), and only set the table type to EXTERNAL if `path` is specified in options. ## How was this patch tested? new tests in `CatalogSuite` Author: Wenchen Fan <[email protected]> Closes apache#16528 from cloud-fan/create-table.
1 parent f8db894 commit 18ee55d

File tree

6 files changed

+211
-103
lines changed

6 files changed

+211
-103
lines changed

project/MimaExcludes.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,10 @@ object MimaExcludes {
4343
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.recoverPartitions"),
4444

4545
// [SPARK-18537] Add a REST api to spark streaming
46-
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.streaming.scheduler.StreamingListener.onStreamingStarted")
46+
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.streaming.scheduler.StreamingListener.onStreamingStarted"),
47+
48+
// [SPARK-19148][SQL] do not expose the external table concept in Catalog
49+
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.createTable")
4750
)
4851

4952
// Exclude rules for 2.1.x

python/pyspark/sql/catalog.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# limitations under the License.
1616
#
1717

18+
import warnings
1819
from collections import namedtuple
1920

2021
from pyspark import since
@@ -138,7 +139,27 @@ def listColumns(self, tableName, dbName=None):
138139

139140
@since(2.0)
140141
def createExternalTable(self, tableName, path=None, source=None, schema=None, **options):
141-
"""Creates an external table based on the dataset in a data source.
142+
"""Creates a table based on the dataset in a data source.
143+
144+
It returns the DataFrame associated with the external table.
145+
146+
The data source is specified by the ``source`` and a set of ``options``.
147+
If ``source`` is not specified, the default data source configured by
148+
``spark.sql.sources.default`` will be used.
149+
150+
Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and
151+
created external table.
152+
153+
:return: :class:`DataFrame`
154+
"""
155+
warnings.warn(
156+
"createExternalTable is deprecated since Spark 2.2, please use createTable instead.",
157+
DeprecationWarning)
158+
return self.createTable(tableName, path, source, schema, **options)
159+
160+
@since(2.2)
161+
def createTable(self, tableName, path=None, source=None, schema=None, **options):
162+
"""Creates a table based on the dataset in a data source.
142163
143164
It returns the DataFrame associated with the external table.
144165
@@ -157,12 +178,12 @@ def createExternalTable(self, tableName, path=None, source=None, schema=None, **
157178
source = self._sparkSession.conf.get(
158179
"spark.sql.sources.default", "org.apache.spark.sql.parquet")
159180
if schema is None:
160-
df = self._jcatalog.createExternalTable(tableName, source, options)
181+
df = self._jcatalog.createTable(tableName, source, options)
161182
else:
162183
if not isinstance(schema, StructType):
163184
raise TypeError("schema should be StructType")
164185
scala_datatype = self._jsparkSession.parseDataType(schema.json())
165-
df = self._jcatalog.createExternalTable(tableName, source, scala_datatype, options)
186+
df = self._jcatalog.createTable(tableName, source, scala_datatype, options)
166187
return DataFrame(df, self._sparkSession._wrapped)
167188

168189
@since(2.0)

sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala

Lines changed: 109 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.spark.sql.catalog
1919

20+
import scala.collection.JavaConverters._
21+
2022
import org.apache.spark.annotation.{Experimental, InterfaceStability}
2123
import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset}
2224
import org.apache.spark.sql.types.StructType
@@ -187,82 +189,169 @@ abstract class Catalog {
187189
def functionExists(dbName: String, functionName: String): Boolean
188190

189191
/**
190-
* :: Experimental ::
191-
* Creates an external table from the given path and returns the corresponding DataFrame.
192+
* Creates a table from the given path and returns the corresponding DataFrame.
192193
* It will use the default data source configured by spark.sql.sources.default.
193194
*
194195
* @since 2.0.0
195196
*/
197+
@deprecated("use createTable instead.", "2.2.0")
198+
def createExternalTable(tableName: String, path: String): DataFrame = {
199+
createTable(tableName, path)
200+
}
201+
202+
/**
203+
* :: Experimental ::
204+
* Creates a table from the given path and returns the corresponding DataFrame.
205+
* It will use the default data source configured by spark.sql.sources.default.
206+
*
207+
* @since 2.2.0
208+
*/
196209
@Experimental
197210
@InterfaceStability.Evolving
198-
def createExternalTable(tableName: String, path: String): DataFrame
211+
def createTable(tableName: String, path: String): DataFrame
199212

200213
/**
201-
* :: Experimental ::
202-
* Creates an external table from the given path based on a data source
203-
* and returns the corresponding DataFrame.
214+
* Creates a table from the given path based on a data source and returns the corresponding
215+
* DataFrame.
204216
*
205217
* @since 2.0.0
206218
*/
219+
@deprecated("use createTable instead.", "2.2.0")
220+
def createExternalTable(tableName: String, path: String, source: String): DataFrame = {
221+
createTable(tableName, path, source)
222+
}
223+
224+
/**
225+
* :: Experimental ::
226+
* Creates a table from the given path based on a data source and returns the corresponding
227+
* DataFrame.
228+
*
229+
* @since 2.2.0
230+
*/
207231
@Experimental
208232
@InterfaceStability.Evolving
209-
def createExternalTable(tableName: String, path: String, source: String): DataFrame
233+
def createTable(tableName: String, path: String, source: String): DataFrame
210234

211235
/**
212-
* :: Experimental ::
213-
* Creates an external table from the given path based on a data source and a set of options.
236+
* Creates a table from the given path based on a data source and a set of options.
214237
* Then, returns the corresponding DataFrame.
215238
*
216239
* @since 2.0.0
217240
*/
241+
@deprecated("use createTable instead.", "2.2.0")
242+
def createExternalTable(
243+
tableName: String,
244+
source: String,
245+
options: java.util.Map[String, String]): DataFrame = {
246+
createTable(tableName, source, options)
247+
}
248+
249+
/**
250+
* :: Experimental ::
251+
* Creates a table from the given path based on a data source and a set of options.
252+
* Then, returns the corresponding DataFrame.
253+
*
254+
* @since 2.2.0
255+
*/
218256
@Experimental
219257
@InterfaceStability.Evolving
258+
def createTable(
259+
tableName: String,
260+
source: String,
261+
options: java.util.Map[String, String]): DataFrame = {
262+
createTable(tableName, source, options.asScala.toMap)
263+
}
264+
265+
/**
266+
* (Scala-specific)
267+
* Creates a table from the given path based on a data source and a set of options.
268+
* Then, returns the corresponding DataFrame.
269+
*
270+
* @since 2.0.0
271+
*/
272+
@deprecated("use createTable instead.", "2.2.0")
220273
def createExternalTable(
221274
tableName: String,
222275
source: String,
223-
options: java.util.Map[String, String]): DataFrame
276+
options: Map[String, String]): DataFrame = {
277+
createTable(tableName, source, options)
278+
}
224279

225280
/**
226281
* :: Experimental ::
227282
* (Scala-specific)
228-
* Creates an external table from the given path based on a data source and a set of options.
283+
* Creates a table from the given path based on a data source and a set of options.
229284
* Then, returns the corresponding DataFrame.
230285
*
231-
* @since 2.0.0
286+
* @since 2.2.0
232287
*/
233288
@Experimental
234289
@InterfaceStability.Evolving
235-
def createExternalTable(
290+
def createTable(
236291
tableName: String,
237292
source: String,
238293
options: Map[String, String]): DataFrame
239294

240295
/**
241296
* :: Experimental ::
242-
* Create an external table from the given path based on a data source, a schema and
243-
* a set of options. Then, returns the corresponding DataFrame.
297+
* Create a table from the given path based on a data source, a schema and a set of options.
298+
* Then, returns the corresponding DataFrame.
244299
*
245300
* @since 2.0.0
246301
*/
302+
@deprecated("use createTable instead.", "2.2.0")
303+
def createExternalTable(
304+
tableName: String,
305+
source: String,
306+
schema: StructType,
307+
options: java.util.Map[String, String]): DataFrame = {
308+
createTable(tableName, source, schema, options)
309+
}
310+
311+
/**
312+
* :: Experimental ::
313+
* Create a table from the given path based on a data source, a schema and a set of options.
314+
* Then, returns the corresponding DataFrame.
315+
*
316+
* @since 2.2.0
317+
*/
247318
@Experimental
248319
@InterfaceStability.Evolving
320+
def createTable(
321+
tableName: String,
322+
source: String,
323+
schema: StructType,
324+
options: java.util.Map[String, String]): DataFrame = {
325+
createTable(tableName, source, schema, options.asScala.toMap)
326+
}
327+
328+
/**
329+
* (Scala-specific)
330+
* Create a table from the given path based on a data source, a schema and a set of options.
331+
* Then, returns the corresponding DataFrame.
332+
*
333+
* @since 2.0.0
334+
*/
335+
@deprecated("use createTable instead.", "2.2.0")
249336
def createExternalTable(
250337
tableName: String,
251338
source: String,
252339
schema: StructType,
253-
options: java.util.Map[String, String]): DataFrame
340+
options: Map[String, String]): DataFrame = {
341+
createTable(tableName, source, schema, options)
342+
}
254343

255344
/**
256345
* :: Experimental ::
257346
* (Scala-specific)
258-
* Create an external table from the given path based on a data source, a schema and
259-
* a set of options. Then, returns the corresponding DataFrame.
347+
* Create a table from the given path based on a data source, a schema and a set of options.
348+
* Then, returns the corresponding DataFrame.
260349
*
261-
* @since 2.0.0
350+
* @since 2.2.0
262351
*/
263352
@Experimental
264353
@InterfaceStability.Evolving
265-
def createExternalTable(
354+
def createTable(
266355
tableName: String,
267356
source: String,
268357
schema: StructType,

sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,6 @@ case class CreateDataSourceTableCommand(table: CatalogTable, ignoreIfExists: Boo
7171
options = table.storage.properties ++ pathOption,
7272
catalogTable = Some(tableWithDefaultOptions)).resolveRelation()
7373

74-
dataSource match {
75-
case fs: HadoopFsRelation =>
76-
if (table.tableType == CatalogTableType.EXTERNAL && fs.location.rootPaths.isEmpty) {
77-
throw new AnalysisException(
78-
"Cannot create a file-based external data source table without path")
79-
}
80-
case _ =>
81-
}
82-
8374
val partitionColumnNames = if (table.schema.nonEmpty) {
8475
table.partitionColumnNames
8576
} else {

0 commit comments

Comments
 (0)