apache · Hisoka-X · Aug 5, 2023 · Aug 5, 2023 · Aug 5, 2023 · Aug 6, 2023
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
@@ -485,7 +485,6 @@ export("as.DataFrame",
        "cacheTable",
        "clearCache",
        "createDataFrame",
-       "createExternalTable",
        "createTable",
        "currentCatalog",
        "currentDatabase",

diff --git a/R/pkg/R/catalog.R b/R/pkg/R/catalog.R
@@ -77,35 +77,6 @@ listCatalogs <- function() {
   dataFrame(callJMethod(callJMethod(catalog, "listCatalogs"), "toDF"))
 }
 
-#' (Deprecated) Create an external table
-#'
-#' Creates an external table based on the dataset in a data source,
-#' Returns a SparkDataFrame associated with the external table.
-#'
-#' The data source is specified by the \code{source} and a set of options(...).
-#' If \code{source} is not specified, the default data source configured by
-#' "spark.sql.sources.default" will be used.
-#'
-#' @param tableName a name of the table.
-#' @param path the path of files to load.
-#' @param source the name of external data source.
-#' @param schema the schema of the data required for some data sources.
-#' @param ... additional argument(s) passed to the method.
-#' @return A SparkDataFrame.
-#' @rdname createExternalTable-deprecated
-#' @seealso \link{createTable}
-#' @examples
-#'\dontrun{
-#' sparkR.session()
-#' df <- createExternalTable("myjson", path="path/to/json", source="json", schema)
-#' }
-#' @name createExternalTable
-#' @note createExternalTable since 1.4.0
-createExternalTable <- function(tableName, path = NULL, source = NULL, schema = NULL, ...) {
-  .Deprecated("createTable", old = "createExternalTable")
-  createTable(tableName, path, source, schema, ...)
-}
-
 #' Creates a table based on the dataset in a data source
 #'
 #' Creates a table based on the dataset in a data source. Returns a SparkDataFrame associated with

diff --git a/R/pkg/pkgdown/_pkgdown_template.yml b/R/pkg/pkgdown/_pkgdown_template.yml
@@ -53,7 +53,6 @@ reference:
   - colnames
   - count
   - createDataFrame
-  - createExternalTable
   - createOrReplaceTempView
   - createTable
   - crossJoin

diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
@@ -265,20 +265,6 @@ abstract class Catalog {
    */
   def functionExists(dbName: String, functionName: String): Boolean
 
-  /**
-   * Creates a table from the given path and returns the corresponding DataFrame. It will use the
-   * default data source configured by spark.sql.sources.default.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(tableName: String, path: String): DataFrame = {
-    createTable(tableName, path)
-  }
-
   /**
    * Creates a table from the given path and returns the corresponding DataFrame. It will use the
    * default data source configured by spark.sql.sources.default.
@@ -290,20 +276,6 @@ abstract class Catalog {
    */
   def createTable(tableName: String, path: String): DataFrame
 
-  /**
-   * Creates a table from the given path based on a data source and returns the corresponding
-   * DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(tableName: String, path: String, source: String): DataFrame = {
-    createTable(tableName, path, source)
-  }
-
   /**
    * Creates a table from the given path based on a data source and returns the corresponding
    * DataFrame.
@@ -315,23 +287,6 @@ abstract class Catalog {
    */
   def createTable(tableName: String, path: String, source: String): DataFrame
 
-  /**
-   * Creates a table from the given path based on a data source and a set of options. Then,
-   * returns the corresponding DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(tableName, source, options)
-  }
-
   /**
    * Creates a table based on the dataset in a data source and a set of options. Then, returns the
    * corresponding DataFrame.
@@ -348,23 +303,6 @@ abstract class Catalog {
     createTable(tableName, source, options.asScala.toMap)
   }
 
-  /**
-   * (Scala-specific) Creates a table from the given path based on a data source and a set of
-   * options. Then, returns the corresponding DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      options: Map[String, String]): DataFrame = {
-    createTable(tableName, source, options)
-  }
-
   /**
    * (Scala-specific) Creates a table based on the dataset in a data source and a set of options.
    * Then, returns the corresponding DataFrame.
@@ -376,24 +314,6 @@ abstract class Catalog {
    */
   def createTable(tableName: String, source: String, options: Map[String, String]): DataFrame
 
-  /**
-   * Create a table from the given path based on a data source, a schema and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      schema: StructType,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(tableName, source, schema, options)
-  }
-
   /**
    * Creates a table based on the dataset in a data source and a set of options. Then, returns the
    * corresponding DataFrame.
@@ -447,24 +367,6 @@ abstract class Catalog {
     createTable(tableName, source, schema, options.asScala.toMap)
   }
 
-  /**
-   * (Scala-specific) Create a table from the given path based on a data source, a schema and a
-   * set of options. Then, returns the corresponding DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      schema: StructType,
-      options: Map[String, String]): DataFrame = {
-    createTable(tableName, source, schema, options)
-  }
-
   /**
    * (Scala-specific) Create a table based on the dataset in a data source, a schema and a set of
    * options. Then, returns the corresponding DataFrame.

diff --git a/docs/sparkr-migration-guide.md b/docs/sparkr-migration-guide.md
@@ -26,6 +26,10 @@ Note that this migration guide describes the items specific to SparkR.
 Many items of SQL migration can be applied when migrating SparkR to higher versions.
 Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html).
 
+## ## Upgrading from SparkR 3.5 to 4.0
+
+- The deprecated methods `createExternalTable` have been removed. Use `createTable` instead.
+
 ## Upgrading from SparkR 3.1 to 3.2
 
  - Previously, SparkR automatically downloaded and installed the Spark distribution in user' cache directory to complete SparkR installation when SparkR runs in a plain R shell or Rscript, and the Spark distribution cannot be found. Now, it asks if users want to download and install or not. To restore the previous behavior, set `SPARKR_ASK_INSTALLATION` environment variable to `FALSE`.

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
@@ -22,6 +22,10 @@ license: |
 * Table of contents
 {:toc}
 
+## ## Upgrading from SparkR 3.5 to 4.0
+
+- The deprecated methods `createExternalTable` have been removed. Use `createTable` instead.
+
 ## Upgrading from Spark SQL 3.4 to 3.5
 
 - Since Spark 3.5, the JDBC options related to DS V2 pushdown are `true` by default. These options include: `pushDownAggregate`, `pushDownLimit`, `pushDownOffset` and `pushDownTableSample`. To restore the legacy behavior, please set them to `false`. e.g. set `spark.sql.catalog.your_catalog_name.pushDownAggregate` to `false`.

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
@@ -36,6 +36,9 @@ object MimaExcludes {
 
   // Exclude rules for 4.0.x
   lazy val v40excludes = v35excludes ++ Seq(
+    // [SPARK-44685][SQL] Remove deprecated Catalog#createExternalTable
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLContext.createExternalTable"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.createExternalTable")
   )
 
   // Exclude rules for 3.5.x from 3.4.0

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -488,98 +488,6 @@ class SQLContext private[sql](val sparkSession: SparkSession)
    */
   def readStream: DataStreamReader = sparkSession.readStream
 
-
-  /**
-   * Creates an external table from the given path and returns the corresponding DataFrame.
-   * It will use the default data source configured by spark.sql.sources.default.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(tableName: String, path: String): DataFrame = {
-    sparkSession.catalog.createTable(tableName, path)
-  }
-
-  /**
-   * Creates an external table from the given path based on a data source
-   * and returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      path: String,
-      source: String): DataFrame = {
-    sparkSession.catalog.createTable(tableName, path, source)
-  }
-
-  /**
-   * Creates an external table from the given path based on a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      options: java.util.Map[String, String]): DataFrame = {
-    sparkSession.catalog.createTable(tableName, source, options)
-  }
-
-  /**
-   * (Scala-specific)
-   * Creates an external table from the given path based on a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      options: Map[String, String]): DataFrame = {
-    sparkSession.catalog.createTable(tableName, source, options)
-  }
-
-  /**
-   * Create an external table from the given path based on a data source, a schema and
-   * a set of options. Then, returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      schema: StructType,
-      options: java.util.Map[String, String]): DataFrame = {
-    sparkSession.catalog.createTable(tableName, source, schema, options)
-  }
-
-  /**
-   * (Scala-specific)
-   * Create an external table from the given path based on a data source, a schema and
-   * a set of options. Then, returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      schema: StructType,
-      options: Map[String, String]): DataFrame = {
-    sparkSession.catalog.createTable(tableName, source, schema, options)
-  }
-
   /**
    * Registers the given `DataFrame` as a temporary table in the catalog. Temporary tables exist
    * only during the lifetime of this instance of SQLContext.