Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion core/src/test/java/test/org/apache/spark/JavaAPISuite.java
Original file line number Diff line number Diff line change
Expand Up @@ -960,7 +960,7 @@ public void textFiles() throws IOException {
rdd.saveAsTextFile(outputDir);
// Read the plain text file and check it's OK
File outputFile = new File(outputDir, "part-00000");
String content = Files.asCharSource(outputFile, StandardCharsets.UTF_8).read();
String content = java.nio.file.Files.readString(outputFile.toPath());
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

java.nio.file.Files.readString will be replaced to Files.readString when we finish the migration in this file.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are they the same (in the effect)? What migration it is?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • For read side, the performance is the same.
  • For write side, Java is faster than Google library. My very next PR will provide a simple perf comparison.

This is a subtask of the following migration towards modern and faster Java APIs.

  • SPARK-53047 Mordernize Spark to use the latest Java features

Copy link
Member Author

@dongjoon-hyun dongjoon-hyun Aug 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you @dongjoon-hyun

assertEquals("1\n2\n3\n4\n", content);
// Also try reading it in as a text file RDD
List<String> expected = Arrays.asList("1", "2", "3", "4");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ package org.apache.spark.deploy.history

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
import java.net.URI
import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.util.zip.{ZipInputStream, ZipOutputStream}

import com.google.common.io.{ByteStreams, Files}
import com.google.common.io.ByteStreams
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.scalatest.BeforeAndAfter
Expand Down Expand Up @@ -220,8 +220,8 @@ class SingleFileEventLogFileReaderSuite extends EventLogFileReadersSuite {

val entry = is.getNextEntry
assert(entry != null)
val actual = new String(ByteStreams.toByteArray(is), StandardCharsets.UTF_8)
Copy link
Member Author

@dongjoon-hyun dongjoon-hyun Aug 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically, these test suites are testing the compressed event logs (LZ4, Snappy, ZSD) which cannot fit to UTF_8. This is a kind of bug fix.

val expected = Files.asCharSource(new File(logPath.toString), StandardCharsets.UTF_8).read()
val actual = ByteStreams.toByteArray(is)
val expected = Files.readAllBytes(new File(logPath.toString).toPath)
assert(actual === expected)
assert(is.getNextEntry === null)
}
Expand Down Expand Up @@ -367,9 +367,8 @@ class RollingEventLogFilesReaderSuite extends EventLogFileReadersSuite {
val fileName = entry.getName.stripPrefix(logPath.getName + "/")
assert(allFileNames.contains(fileName))

val actual = new String(ByteStreams.toByteArray(is), StandardCharsets.UTF_8)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto.

val expected = Files.asCharSource(
new File(logPath.toString, fileName), StandardCharsets.UTF_8).read()
val actual = ByteStreams.toByteArray(is)
val expected = Files.readAllBytes(new File(logPath.toString, fileName).toPath)
assert(actual === expected)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -708,8 +708,7 @@ abstract class FsHistoryProviderSuite extends SparkFunSuite with Matchers with P
while (entry != null) {
val actual = new String(ByteStreams.toByteArray(inputStream), StandardCharsets.UTF_8)
val expected =
Files.asCharSource(logs.find(_.getName == entry.getName).get, StandardCharsets.UTF_8)
.read()
java.nio.file.Files.readString(logs.find(_.getName == entry.getName).get.toPath)
actual should be (expected)
totalEntries += 1
entry = inputStream.getNextEntry
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@ package org.apache.spark.deploy.history
import java.io.{File, FileInputStream, FileWriter, InputStream, IOException}
import java.net.{HttpURLConnection, URI, URL}
import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.util.zip.ZipInputStream

import scala.concurrent.duration._
import scala.jdk.CollectionConverters._

import com.google.common.io.{ByteStreams, Files}
import com.google.common.io.ByteStreams
import jakarta.servlet._
import jakarta.servlet.http.{HttpServletRequest, HttpServletRequestWrapper, HttpServletResponse}
import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
Expand Down Expand Up @@ -308,7 +309,7 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
val expectedFile = {
new File(logDir, entry.getName)
}
val expected = Files.asCharSource(expectedFile, StandardCharsets.UTF_8).read()
val expected = Files.readString(expectedFile.toPath)
val actual = new String(ByteStreams.toByteArray(zipStream), StandardCharsets.UTF_8)
actual should be (expected)
filesCompared += 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter {
val appender = new FileAppender(inputStream, testFile)
inputStream.close()
appender.awaitTermination()
assert(Files.asCharSource(testFile, StandardCharsets.UTF_8).read() === header + testString)
assert(java.nio.file.Files.readString(testFile.toPath) === header + testString)
}

test("SPARK-35027: basic file appender - close stream") {
Expand Down Expand Up @@ -392,7 +392,7 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter {
Utils.closeQuietly(inputStream)
}
} else {
Files.asCharSource(file, StandardCharsets.UTF_8).read()
java.nio.file.Files.readString(file.toPath)
}
}.mkString("")
assert(allText === expectedText)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@
package org.apache.spark.deploy.k8s

import java.io.File
import java.nio.charset.StandardCharsets
import java.nio.file.Files

import com.fasterxml.jackson.databind.ObjectMapper
import com.google.common.io.Files
import io.fabric8.kubernetes.client.{ConfigBuilder, KubernetesClient, KubernetesClientBuilder}
import io.fabric8.kubernetes.client.Config.KUBERNETES_REQUEST_RETRY_BACKOFFLIMIT_SYSTEM_PROPERTY
import io.fabric8.kubernetes.client.Config.autoConfigure
Expand Down Expand Up @@ -98,8 +97,7 @@ object SparkKubernetesClientFactory extends Logging {
.withOption(oauthTokenValue) {
(token, configBuilder) => configBuilder.withOauthToken(token)
}.withOption(oauthTokenFile) {
(file, configBuilder) =>
configBuilder.withOauthToken(Files.asCharSource(file, StandardCharsets.UTF_8).read())
(file, configBuilder) => configBuilder.withOauthToken(Files.readString(file.toPath))
}.withOption(caCertFile) {
(file, configBuilder) => configBuilder.withCaCertFile(file)
}.withOption(clientKeyFile) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@
package org.apache.spark.deploy.k8s.features

import java.io.File
import java.nio.charset.StandardCharsets
import java.nio.file.Files

import scala.jdk.CollectionConverters._

import com.google.common.io.Files
import io.fabric8.kubernetes.api.model._

import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesUtils, SparkPod}
Expand Down Expand Up @@ -116,7 +115,7 @@ private[spark] class HadoopConfDriverFeatureStep(conf: KubernetesConf)
override def getAdditionalKubernetesResources(): Seq[HasMetadata] = {
if (confDir.isDefined) {
val fileMap = confFiles.map { file =>
(file.getName(), Files.asCharSource(file, StandardCharsets.UTF_8).read())
(file.getName(), Files.readString(file.toPath))
}.toMap.asJava

Seq(new ConfigMapBuilder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package org.apache.spark.deploy.k8s.features

import java.io.File
import java.nio.charset.StandardCharsets

import scala.jdk.CollectionConverters._
import scala.util.control.NonFatal
Expand Down Expand Up @@ -236,7 +235,7 @@ private[spark] class KerberosConfDriverFeatureStep(kubernetesConf: KubernetesDri
.endMetadata()
.withImmutable(true)
.addToData(
Map(file.getName() -> Files.asCharSource(file, StandardCharsets.UTF_8).read()).asJava)
Map(file.getName() -> java.nio.file.Files.readString(file.toPath)).asJava)
.build()
}
} ++ {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
package org.apache.spark.deploy.k8s.features

import java.io.File
import java.nio.charset.StandardCharsets
import java.nio.file.Files

import com.google.common.io.Files
import io.fabric8.kubernetes.api.model.{ConfigMapBuilder, ContainerBuilder, HasMetadata, PodBuilder}

import org.apache.spark.deploy.SparkHadoopUtil
Expand Down Expand Up @@ -81,7 +80,7 @@ private[spark] class PodTemplateConfigMapStep(conf: KubernetesConf)
val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf.sparkConf)
val uri = downloadFile(podTemplateFile, Utils.createTempDir(), conf.sparkConf, hadoopConf)
val file = new java.net.URI(uri).getPath
val podTemplateString = Files.asCharSource(new File(file), StandardCharsets.UTF_8).read()
val podTemplateString = Files.readString(new File(file).toPath)
Seq(new ConfigMapBuilder()
.withNewMetadata()
.withName(configmapName)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,11 @@
package org.apache.spark.deploy.k8s.integrationtest

import java.io.File
import java.nio.charset.StandardCharsets
import java.nio.file.{Path, Paths}
import java.nio.file.{Files, Path, Paths}
import java.util.UUID

import scala.jdk.CollectionConverters._

import com.google.common.io.Files
import io.fabric8.kubernetes.api.model.Pod
import io.fabric8.kubernetes.client.{Watcher, WatcherException}
import io.fabric8.kubernetes.client.KubernetesClientException
Expand Down Expand Up @@ -129,7 +127,7 @@ class KubernetesSuite extends SparkFunSuite
val tagFile = new File(path)
require(tagFile.isFile,
s"No file found for image tag at ${tagFile.getAbsolutePath}.")
Files.asCharSource(tagFile, StandardCharsets.UTF_8).read().trim
Files.readString(tagFile.toPath).trim
}
.orElse(sys.props.get(CONFIG_KEY_IMAGE_TAG))
.getOrElse {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,11 +236,11 @@ abstract class BaseYarnClusterSuite extends SparkFunSuite with Matchers {
// an error message
val output = new Object() {
override def toString: String = outFile
.map(Files.asCharSource(_, StandardCharsets.UTF_8).read())
.map((f: File) => java.nio.file.Files.readString(f.toPath))
.getOrElse("(stdout/stderr was not captured)")
}
assert(finalState === SparkAppHandle.State.FINISHED, output)
val resultString = Files.asCharSource(result, StandardCharsets.UTF_8).read()
val resultString = java.nio.file.Files.readString(result.toPath)
assert(resultString === expected, output)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
extraEnv = Map("SPARK_CONF_DIR" -> confDir.getAbsolutePath),
extraConf = Map(CLIENT_INCLUDE_DRIVER_LOGS_LINK.key -> true.toString))
checkResult(finalState, result)
val logOutput = Files.asCharSource(logOutFile, StandardCharsets.UTF_8).read()
val logOutput = java.nio.file.Files.readString(logOutFile.toPath)
val logFilePattern = raw"""(?s).+\sDriver Logs \(<NAME>\): https?://.+/<NAME>(\?\S+)?\s.+"""
logOutput should fullyMatch regex logFilePattern.replace("<NAME>", "stdout")
logOutput should fullyMatch regex logFilePattern.replace("<NAME>", "stderr")
Expand Down
5 changes: 5 additions & 0 deletions scalastyle-config.xml
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,11 @@ This file is divided into 3 sections:
<customMessage>Use Files.readString instead.</customMessage>
</check>

<check customId="asCharSource" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">\bFiles\.asCharSource\b</parameter></parameters>
<customMessage>Use Files.readString instead.</customMessage>
</check>

<check customId="write" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">\bFileUtils\.write\b</parameter></parameters>
<customMessage>Use Files.writeString instead.</customMessage>
Expand Down