diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 13fcfe4aed023..c97216d241249 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2980,7 +2980,7 @@ private[spark] object Utils extends Logging { * exceeds `stopAppendingThreshold`, stop appending paths for saving memory. */ def buildLocationMetadata(paths: Seq[Path], stopAppendingThreshold: Int): String = { - val metadata = new StringBuilder("[") + val metadata = new StringBuilder(s"(${paths.length} paths)[") var index: Int = 0 while (index < paths.length && metadata.length < stopAppendingThreshold) { if (index > 0) { @@ -2989,6 +2989,12 @@ private[spark] object Utils extends Logging { metadata.append(paths(index).toString) index += 1 } + if (paths.length > index) { + if (index > 0) { + metadata.append(", ") + } + metadata.append("...") + } metadata.append("]") metadata.toString } diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index 18ff96021153f..e8c4349476036 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -1304,16 +1304,11 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging { test("pathsToMetadata") { val paths = (0 to 4).map(i => new Path(s"path$i")) - assert(Utils.buildLocationMetadata(paths, 5) == "[path0]") - assert(Utils.buildLocationMetadata(paths, 10) == "[path0, path1]") - assert(Utils.buildLocationMetadata(paths, 15) == "[path0, path1, path2]") - assert(Utils.buildLocationMetadata(paths, 25) == "[path0, path1, path2, path3]") - - // edge-case: we should consider the fact non-path chars including '[' and ", " are accounted - // 1. second path is not added due to the addition of '[' - assert(Utils.buildLocationMetadata(paths, 6) == "[path0]") - // 2. third path is not added due to the addition of ", " - assert(Utils.buildLocationMetadata(paths, 13) == "[path0, path1]") + assert(Utils.buildLocationMetadata(paths, 10) == "(5 paths)[...]") + // 11 is the minimum threshold to print at least one path + assert(Utils.buildLocationMetadata(paths, 11) == "(5 paths)[path0, ...]") + // 11 + 5 + 2 = 18 is the minimum threshold to print two paths + assert(Utils.buildLocationMetadata(paths, 18) == "(5 paths)[path0, path1, ...]") } test("checkHost supports both IPV4 and IPV6") { diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index 287b43045f450..e08b4fec7e96f 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -2132,7 +2132,7 @@ class AvroV2Suite extends AvroSuite with ExplainSuiteHelper { |Output \\[2\\]: \\[value#xL, id#x\\] |DataFilters: \\[isnotnull\\(value#xL\\), \\(value#xL > 2\\)\\] |Format: avro - |Location: InMemoryFileIndex\\[.*\\] + |Location: InMemoryFileIndex\\([0-9]+ paths\\)\\[.*\\] |PartitionFilters: \\[isnotnull\\(id#x\\), \\(id#x > 1\\)\\] |PushedFilers: \\[IsNotNull\\(value\\), GreaterThan\\(value,2\\)\\] |ReadSchema: struct\\ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala index 916e1331c7750..2a6c644a3b1d8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala @@ -411,7 +411,7 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite |Output \\[2\\]: \\[value#x, id#x\\] |DataFilters: \\[isnotnull\\(value#x\\), \\(value#x > 2\\)\\] |Format: $fmt - |Location: InMemoryFileIndex\\[.*\\] + |Location: InMemoryFileIndex\\([0-9]+ paths\\)\\[.*\\] |PartitionFilters: \\[isnotnull\\(id#x\\), \\(id#x > 1\\)\\] ${pushFilterMaps.get(fmt).get} |ReadSchema: struct\\ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala index ccac5a00fdb05..612cd6f0d891b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala @@ -122,8 +122,6 @@ class DataSourceScanExecRedactionSuite extends DataSourceScanRedactionTest { test("SPARK-31793: FileSourceScanExec metadata should contain limited file paths") { withTempPath { path => - val dir = path.getCanonicalPath - // create a sub-directory with long name so that each root path will always exceed the limit // this is to ensure we always test the case for the path truncation val dataDirName = Random.alphanumeric.take(100).toList.mkString @@ -146,6 +144,9 @@ class DataSourceScanExecRedactionSuite extends DataSourceScanRedactionTest { // The location metadata should at least contain one path assert(location.get.contains(paths.head)) + // The location metadata should have the number of root paths + assert(location.get.contains("(10 paths)")) + // The location metadata should have bracket wrapping paths assert(location.get.indexOf('[') > -1) assert(location.get.indexOf(']') > -1) @@ -155,7 +156,9 @@ class DataSourceScanExecRedactionSuite extends DataSourceScanRedactionTest { location.get.indexOf('[') + 1, location.get.indexOf(']')).split(", ").toSeq // the only one path should be available - assert(pathsInLocation.size == 1) + assert(pathsInLocation.size == 2) + // indicator ("...") should be available + assert(pathsInLocation.exists(_.contains("..."))) } } }