Skip to content
Closed
2 changes: 2 additions & 0 deletions docs/sql-migration-guide-upgrade.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ displayTitle: Spark SQL Upgrading Guide

- In Spark version 2.4 and earlier, users can create a map with duplicated keys via built-in functions like `CreateMap`, `StringToMap`, etc. The behavior of map with duplicated keys is undefined, e.g. map look up respects the duplicated key appears first, `Dataset.collect` only keeps the duplicated key appears last, `MapKeys` returns duplicated keys, etc. Since Spark 3.0, these built-in functions will remove duplicated map keys with last wins policy. Users may still read map values with duplicated keys from data sources which do not enforce it (e.g. Parquet), the behavior will be udefined.

- In Spark version 2.4 and earlier, the `SET` command works without any warnings even if the specified key is for `SparkConf` entries and it has no effect because the command does not update `SparkConf`, but the behavior might confuse users. Since 3.0, the command fails if a `SparkConf` key is used. You can disable such a check by setting `spark.sql.legacy.execution.setCommandRejectsSparkConfs` to `false`.

## Upgrading From Spark SQL 2.3 to 2.4

- In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ import org.apache.spark.util.Utils

object SQLConf {

private val sqlConfEntries = java.util.Collections.synchronizedMap(
private[sql] val sqlConfEntries = java.util.Collections.synchronizedMap(
new java.util.HashMap[String, ConfigEntry[_]]())

val staticConfKeys: java.util.Set[String] =
Expand Down Expand Up @@ -1610,6 +1610,14 @@ object SQLConf {
""" "... N more fields" placeholder.""")
.intConf
.createWithDefault(25)

val SET_COMMAND_REJECTS_SPARK_CONFS =
buildConf("spark.sql.legacy.execution.setCommandRejectsSparkConfs")
.internal()
.doc("If it is set to true, SET command will fail when the key is registered as " +
"a SparkConf entry.")
.booleanConf
.createWithDefault(true)
}

/**
Expand Down Expand Up @@ -2030,6 +2038,8 @@ class SQLConf extends Serializable with Logging {

def maxToStringFields: Int = getConf(SQLConf.MAX_TO_STRING_FIELDS)

def setCommandRejectsSparkConfs: Boolean = getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CONFS)

/** ********************** SQLConf functionality methods ************ */

/** Set Spark SQL configuration properties. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,5 +153,9 @@ class RuntimeConfig private[sql](sqlConf: SQLConf = new SQLConf) {
if (SQLConf.staticConfKeys.contains(key)) {
throw new AnalysisException(s"Cannot modify the value of a static config: $key")
}
if (sqlConf.setCommandRejectsSparkConfs &&
ConfigEntry.findEntry(key) != null && !SQLConf.sqlConfEntries.containsKey(key)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, thinking of it, isn't ConfigEntry.findEntry(key) != null redundant with the other check?

Removing that would also want by other settings that don't have constants defined.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm sorry for the delay.
As per the comment #22887 (comment), I'd leave it as is for now.

Copy link
Contributor

@cloud-fan cloud-fan Nov 28, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should only reject configs that are registered as SparkConf. Thinking about configs that are neither a SparkConf or SQLConf, we shouldn't reject it.

throw new AnalysisException(s"Cannot modify the value of a Spark config: $key")
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.spark.sql

import org.apache.spark.SparkFunSuite
import org.apache.spark.internal.config

class RuntimeConfigSuite extends SparkFunSuite {

Expand Down Expand Up @@ -68,4 +69,13 @@ class RuntimeConfigSuite extends SparkFunSuite {
assert(!conf.isModifiable(""))
assert(!conf.isModifiable("invalid config parameter"))
}

test("reject SparkConf entries") {
val conf = newConf()

val ex = intercept[AnalysisException] {
conf.set(config.CPUS_PER_TASK.key, 4)
}
assert(ex.getMessage.contains("Spark config"))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import java.util.Locale
import org.apache.hadoop.fs.Path
import org.scalatest.BeforeAndAfterEach

import org.apache.spark.internal.config
import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchPartitionException, NoSuchTableException, TempTableAlreadyExistsException}
Expand Down Expand Up @@ -2715,4 +2716,11 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
}
}
}

test("set command rejects SparkConf entries") {
val ex = intercept[AnalysisException] {
sql(s"SET ${config.CPUS_PER_TASK.key} = 4")
}
assert(ex.getMessage.contains("Spark config"))
}
}