Skip to content

Commit cf96785

Browse files
committed
convert filePath to URI in binary file data source
1 parent b71abd6 commit cf96785

File tree

3 files changed

+13
-2
lines changed

3 files changed

+13
-2
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ import org.apache.spark.util.NextIterator
3838
* that need to be prepended to each row.
3939
*
4040
* @param partitionValues value of partition columns to be prepended to each row.
41-
* @param filePath path of the file to read
41+
* @param filePath URI of the file to read
4242
* @param start the beginning offset (in bytes) of the block.
4343
* @param length number of bytes to read.
4444
* @param locations locality information (list of nodes that have the data).

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
package org.apache.spark.sql.execution.datasources.binaryfile
1919

20+
import java.net.URI
2021
import java.sql.Timestamp
2122

2223
import com.google.common.io.{ByteStreams, Closeables}
@@ -100,7 +101,7 @@ class BinaryFileFormat extends FileFormat with DataSourceRegister {
100101
val maxLength = sparkSession.conf.get(SOURCES_BINARY_FILE_MAX_LENGTH)
101102

102103
file: PartitionedFile => {
103-
val path = new Path(file.filePath)
104+
val path = new Path(new URI(file.filePath))
104105
val fs = path.getFileSystem(broadcastedHadoopConf.value.value)
105106
val status = fs.getFileStatus(path)
106107
if (filterFuncs.forall(_.apply(status))) {

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,4 +368,14 @@ class BinaryFileFormatSuite extends QueryTest with SharedSQLContext with SQLTest
368368
assert(caught.getMessage.contains("exceeds the max length allowed"))
369369
}
370370
}
371+
372+
test("SPARK-28030: support chars in file names that require URL encoding") {
373+
withTempDir { dir =>
374+
val file = new File(dir, "test space.txt")
375+
val content = "123".getBytes
376+
Files.write(file.toPath, content, StandardOpenOption.CREATE, StandardOpenOption.WRITE)
377+
val df = spark.read.format(BINARY_FILE).load(dir.getPath)
378+
df.collect()
379+
}
380+
}
371381
}

0 commit comments

Comments
 (0)