Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1308,6 +1308,12 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
assert(Utils.buildLocationMetadata(paths, 10) == "[path0, path1]")
assert(Utils.buildLocationMetadata(paths, 15) == "[path0, path1, path2]")
assert(Utils.buildLocationMetadata(paths, 25) == "[path0, path1, path2, path3]")

// edge-case: we should consider the fact non-path chars including '[' and ", " are accounted
// 1. second path is not added due to the addition of '['
assert(Utils.buildLocationMetadata(paths, 6) == "[path0]")
// 2. third path is not added due to the addition of ", "
assert(Utils.buildLocationMetadata(paths, 13) == "[path0, path1]")
}

test("checkHost supports both IPV4 and IPV6") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.apache.spark.sql.execution
import java.io.File

import scala.collection.mutable
import scala.util.Random

import org.apache.hadoop.fs.Path

Expand Down Expand Up @@ -122,24 +123,39 @@ class DataSourceScanExecRedactionSuite extends DataSourceScanRedactionTest {
test("SPARK-31793: FileSourceScanExec metadata should contain limited file paths") {
withTempPath { path =>
val dir = path.getCanonicalPath

// create a sub-directory with long name so that each root path will always exceed the limit
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's just ensure we test on path truncation; other cases should be tested against UtilsSuite.

// this is to ensure we always test the case for the path truncation
val dataDirName = Random.alphanumeric.take(100).toList.mkString
val dataDir = new File(path, dataDirName)
dataDir.mkdir()

val partitionCol = "partitionCol"
spark.range(10)
.select("id", "id")
.toDF("value", partitionCol)
.write
.partitionBy(partitionCol)
.orc(dir)
val paths = (0 to 9).map(i => new File(dir, s"$partitionCol=$i").getCanonicalPath)
.orc(dataDir.getCanonicalPath)
val paths = (0 to 9).map(i => new File(dataDir, s"$partitionCol=$i").getCanonicalPath)
val plan = spark.read.orc(paths: _*).queryExecution.executedPlan
val location = plan collectFirst {
case f: FileSourceScanExec => f.metadata("Location")
}
assert(location.isDefined)
// The location metadata should at least contain one path
assert(location.get.contains(paths.head))
// If the temp path length is larger than 100, the metadata length should not exceed
// twice of the length; otherwise, the metadata length should be controlled within 200.
assert(location.get.length < Math.max(paths.head.length, 100) * 2)

// The location metadata should have bracket wrapping paths
assert(location.get.indexOf('[') > -1)
assert(location.get.indexOf(']') > -1)

// extract paths in location metadata (removing classname, brackets, separators)
val pathsInLocation = location.get.substring(
location.get.indexOf('[') + 1, location.get.indexOf(']')).split(", ").toSeq

// the only one path should be available
assert(pathsInLocation.size == 1)
}
}
}
Expand Down