Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion core/src/main/scala/org/apache/spark/util/Utils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2980,7 +2980,7 @@ private[spark] object Utils extends Logging {
* exceeds `stopAppendingThreshold`, stop appending paths for saving memory.
*/
def buildLocationMetadata(paths: Seq[Path], stopAppendingThreshold: Int): String = {
val metadata = new StringBuilder("[")
val metadata = new StringBuilder(s"(${paths.length} paths)[")
var index: Int = 0
while (index < paths.length && metadata.length < stopAppendingThreshold) {
if (index > 0) {
Expand All @@ -2989,6 +2989,12 @@ private[spark] object Utils extends Logging {
metadata.append(paths(index).toString)
index += 1
}
if (paths.length > index) {
if (index > 0) {
metadata.append(", ")
}
metadata.append("...")
}
metadata.append("]")
metadata.toString
}
Expand Down
15 changes: 5 additions & 10 deletions core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1304,16 +1304,11 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {

test("pathsToMetadata") {
val paths = (0 to 4).map(i => new Path(s"path$i"))
assert(Utils.buildLocationMetadata(paths, 5) == "[path0]")
assert(Utils.buildLocationMetadata(paths, 10) == "[path0, path1]")
assert(Utils.buildLocationMetadata(paths, 15) == "[path0, path1, path2]")
assert(Utils.buildLocationMetadata(paths, 25) == "[path0, path1, path2, path3]")

// edge-case: we should consider the fact non-path chars including '[' and ", " are accounted
// 1. second path is not added due to the addition of '['
assert(Utils.buildLocationMetadata(paths, 6) == "[path0]")
// 2. third path is not added due to the addition of ", "
assert(Utils.buildLocationMetadata(paths, 13) == "[path0, path1]")
assert(Utils.buildLocationMetadata(paths, 10) == "(5 paths)[...]")
// 11 is the minimum threshold to print at least one path
assert(Utils.buildLocationMetadata(paths, 11) == "(5 paths)[path0, ...]")
// 11 + 5 + 2 = 18 is the minimum threshold to print two paths
assert(Utils.buildLocationMetadata(paths, 18) == "(5 paths)[path0, path1, ...]")
}

test("checkHost supports both IPV4 and IPV6") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2132,7 +2132,7 @@ class AvroV2Suite extends AvroSuite with ExplainSuiteHelper {
|Output \\[2\\]: \\[value#xL, id#x\\]
|DataFilters: \\[isnotnull\\(value#xL\\), \\(value#xL > 2\\)\\]
|Format: avro
|Location: InMemoryFileIndex\\[.*\\]
|Location: InMemoryFileIndex\\([0-9]+ paths\\)\\[.*\\]
|PartitionFilters: \\[isnotnull\\(id#x\\), \\(id#x > 1\\)\\]
|PushedFilers: \\[IsNotNull\\(value\\), GreaterThan\\(value,2\\)\\]
|ReadSchema: struct\\<value:bigint\\>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
|Output \\[2\\]: \\[value#x, id#x\\]
|DataFilters: \\[isnotnull\\(value#x\\), \\(value#x > 2\\)\\]
|Format: $fmt
|Location: InMemoryFileIndex\\[.*\\]
|Location: InMemoryFileIndex\\([0-9]+ paths\\)\\[.*\\]
|PartitionFilters: \\[isnotnull\\(id#x\\), \\(id#x > 1\\)\\]
${pushFilterMaps.get(fmt).get}
|ReadSchema: struct\\<value:int\\>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,6 @@ class DataSourceScanExecRedactionSuite extends DataSourceScanRedactionTest {

test("SPARK-31793: FileSourceScanExec metadata should contain limited file paths") {
withTempPath { path =>
val dir = path.getCanonicalPath

// create a sub-directory with long name so that each root path will always exceed the limit
// this is to ensure we always test the case for the path truncation
val dataDirName = Random.alphanumeric.take(100).toList.mkString
Expand All @@ -146,6 +144,9 @@ class DataSourceScanExecRedactionSuite extends DataSourceScanRedactionTest {
// The location metadata should at least contain one path
assert(location.get.contains(paths.head))

// The location metadata should have the number of root paths
assert(location.get.contains("(10 paths)"))

// The location metadata should have bracket wrapping paths
assert(location.get.indexOf('[') > -1)
assert(location.get.indexOf(']') > -1)
Expand All @@ -155,7 +156,9 @@ class DataSourceScanExecRedactionSuite extends DataSourceScanRedactionTest {
location.get.indexOf('[') + 1, location.get.indexOf(']')).split(", ").toSeq

// the only one path should be available
assert(pathsInLocation.size == 1)
assert(pathsInLocation.size == 2)
// indicator ("...") should be available
assert(pathsInLocation.exists(_.contains("...")))
}
}
}
Expand Down