Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
DataSourceOptions.PATHS_KEY -> objectMapper.writeValueAsString(paths.toArray)
}
Dataset.ofRows(sparkSession, DataSourceV2Relation.create(
ds, extraOptions.toMap ++ sessionOptions + pathsOption,
ds, sessionOptions ++ extraOptions.toMap + pathsOption,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, cc @rdblue since this is introduced at aadf953#diff-f70bda59304588cc3abfa3a9840653f4R197 .

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dongjoon-hyun The commit didn't change semantic actually. Before it was:

(extraOptions ++
        DataSourceV2Utils.extractSessionConfigs(
          ds = ds.asInstanceOf[DataSourceV2],
          conf = sparkSession.sessionState.conf))

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh. It has more history. Thanks, @MaxGekk . Could you trace down when it started? We need to mark the affected version correctly in order to know the backport candidates.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, please add a test case for this. If this has a long history than SPARK-23203 (fixed at 2.4.0), we need to verify this during backporting.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The changes were added in #19861

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you. Then, it was 2.3.0.

userSpecifiedSchema = userSpecifiedSchema))
} else {
loadV1Source(paths: _*)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,12 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
val source = cls.newInstance().asInstanceOf[DataSourceV2]
source match {
case provider: BatchWriteSupportProvider =>
val options = extraOptions ++
DataSourceV2Utils.extractSessionConfigs(source, df.sparkSession.sessionState.conf)
val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
source,
df.sparkSession.sessionState.conf)
val options = sessionOptions ++ extraOptions

val relation = DataSourceV2Relation.create(source, options.toMap)
val relation = DataSourceV2Relation.create(source, options)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both read/write-side tests.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 for tests

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wrote a test for read path since I was able to grab options propagated to DataSource but I have no idea so far for write path, only mocking probably. Does it make sense to do that?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For write path, I think we can use the traditional way instead of introducing mocking. Let me try.

if (mode == SaveMode.Append) {
runCommand(df.sparkSession, "save") {
AppendData.byName(relation, df.logicalPlan)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.sql.sources.v2

import java.io.File

import test.org.apache.spark.sql.sources.v2._

import org.apache.spark.SparkException
Expand Down Expand Up @@ -317,6 +319,38 @@ class DataSourceV2Suite extends QueryTest with SharedSQLContext {
checkCanonicalizedOutput(df, 2, 2)
checkCanonicalizedOutput(df.select('i), 2, 1)
}

test("SPARK-25425: extra options should override sessions options during reading") {
val prefix = "spark.datasource.userDefinedDataSource."
val optionName = "optionA"
withSQLConf(prefix + optionName -> "true") {
val df = spark
.read
.option(optionName, false)
.format(classOf[DataSourceV2WithSessionConfig].getName).load()
val options = df.queryExecution.optimizedPlan.collectFirst {
case d: DataSourceV2Relation => d.options
}
assert(options.get.get(optionName) == Some("false"))
}
}

test("SPARK-25425: extra options should override sessions options during writing") {
withTempPath { path =>
val sessionPath = path.getCanonicalPath
withSQLConf("spark.datasource.simpleWritableDataSource.path" -> sessionPath) {
withTempPath { file =>
val optionPath = file.getCanonicalPath
val format = classOf[SimpleWritableDataSource].getName

val df = Seq((1L, 2L)).toDF("i", "j")
df.write.format(format).option("path", optionPath).save()
assert(!new File(sessionPath).exists)
checkAnswer(spark.read.format(format).option("path", optionPath).load(), df)
}
}
}
}
}


Expand Down Expand Up @@ -385,7 +419,6 @@ class SimpleDataSourceV2 extends DataSourceV2 with BatchReadSupportProvider {
}
}


class AdvancedDataSourceV2 extends DataSourceV2 with BatchReadSupportProvider {

class ReadSupport extends SimpleReadSupport {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,14 @@ import org.apache.spark.util.SerializableConfiguration
* Each job moves files from `target/_temporary/queryId/` to `target`.
*/
class SimpleWritableDataSource extends DataSourceV2
with BatchReadSupportProvider with BatchWriteSupportProvider {
with BatchReadSupportProvider
with BatchWriteSupportProvider
with SessionConfigSupport {

private val schema = new StructType().add("i", "long").add("j", "long")

override def keyPrefix: String = "simpleWritableDataSource"

class ReadSupport(path: String, conf: Configuration) extends SimpleReadSupport {

override def fullSchema(): StructType = schema
Expand Down