Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
package org.apache.spark.storage

import java.nio.{ByteBuffer, MappedByteBuffer}
import java.nio.file.Files
import java.util.{Arrays, Random}

import com.google.common.io.{ByteStreams, Files}
import com.google.common.io.ByteStreams
import io.netty.channel.FileRegion

import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
Expand Down Expand Up @@ -150,7 +151,7 @@ class DiskStoreSuite extends SparkFunSuite {

assert(diskStore.getSize(blockId) === testData.length)

val diskData = Files.toByteArray(diskBlockManager.getFile(blockId.name))
val diskData = Files.readAllBytes(diskBlockManager.getFile(blockId.name).toPath)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which one is better, Files.readAllBytes or com.google.common.io.ByteStreams#toByteArray(java.io.InputStream)?

Copy link
Member Author

@dongjoon-hyun dongjoon-hyun Aug 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked the performance. It's the same for reading part, @LuciferYang . The main performance difference happens at writer API part usually.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, I mentioned "The built-in Java method is as good as 3rd party library." in this PR description.

Copy link
Member Author

@dongjoon-hyun dongjoon-hyun Aug 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, FYI, I'm digging this are as a part of Java 25 preparation and CI runtime improvement. If we stick to the old libraries, we cannot get the benefit of new Java version's improvement. For me, the stale Scala 2.13 and 3rd party libraries are very suspicious to me.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for explaining!

assert(!Arrays.equals(testData, diskData))

val blockData = diskStore.getBytes(blockId)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ package org.apache.spark.deploy.k8s.features

import java.io.File
import java.nio.charset.StandardCharsets
import java.nio.file.Files

import scala.jdk.CollectionConverters._

import com.google.common.io.{BaseEncoding, Files}
import com.google.common.io.BaseEncoding
import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder, Secret, SecretBuilder}

import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod}
Expand Down Expand Up @@ -153,7 +154,7 @@ private[spark] class DriverKubernetesCredentialsFeatureStep(kubernetesConf: Kube
.map { file =>
require(file.isFile, String.format("%s provided at %s does not exist or is not a file.",
fileType, file.getAbsolutePath))
BaseEncoding.base64().encode(Files.toByteArray(file))
BaseEncoding.base64().encode(Files.readAllBytes(file.toPath))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
package org.apache.spark.deploy.k8s.features

import java.io.File
import java.nio.file.Files

import scala.jdk.CollectionConverters._
import scala.util.control.NonFatal

import com.google.common.io.Files
import io.fabric8.kubernetes.api.model._
import org.apache.commons.codec.binary.Base64
import org.apache.hadoop.security.UserGroupInformation
Expand Down Expand Up @@ -235,7 +235,7 @@ private[spark] class KerberosConfDriverFeatureStep(kubernetesConf: KubernetesDri
.endMetadata()
.withImmutable(true)
.addToData(
Map(file.getName() -> java.nio.file.Files.readString(file.toPath)).asJava)
Map(file.getName() -> Files.readString(file.toPath)).asJava)
.build()
}
} ++ {
Expand All @@ -247,7 +247,7 @@ private[spark] class KerberosConfDriverFeatureStep(kubernetesConf: KubernetesDri
.withName(ktSecretName)
.endMetadata()
.withImmutable(true)
.addToData(kt.getName(), Base64.encodeBase64String(Files.toByteArray(kt)))
.addToData(kt.getName(), Base64.encodeBase64String(Files.readAllBytes(kt.toPath)))
.build())
} else {
Nil
Expand Down
5 changes: 5 additions & 0 deletions scalastyle-config.xml
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,11 @@ This file is divided into 3 sections:
scala.jdk.CollectionConverters._ and use .asScala / .asJava methods</customMessage>
</check>

<check customId="toByteArray" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">\bFiles\.toByteArray\b</parameter></parameters>
<customMessage>Use java.nio.file.Files.readAllBytes instead.</customMessage>
</check>

<check customId="getTempDirectory" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">\bFileUtils\.getTempDirectory\b</parameter></parameters>
<customMessage>Use System.getProperty instead.</customMessage>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
package org.apache.spark.streaming

import java.io._
import java.nio.file.Files
import java.util.concurrent.ConcurrentLinkedQueue

import scala.jdk.CollectionConverters._
import scala.reflect.ClassTag

import com.google.common.io.Files
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.io.{IntWritable, Text}
Expand Down Expand Up @@ -648,7 +648,7 @@ class CheckpointSuite extends TestSuiteBase with LocalStreamingContext with DStr
*/
def writeFile(i: Int, clock: Clock): Unit = {
val file = new File(testDir, i.toString)
java.nio.file.Files.writeString(file.toPath, s"$i\n")
Files.writeString(file.toPath, s"$i\n")
assert(file.setLastModified(clock.getTimeMillis()))
// Check that the file's modification date is actually the value we wrote, since rounding or
// truncation will break the test:
Expand Down Expand Up @@ -878,8 +878,8 @@ class CheckpointSuite extends TestSuiteBase with LocalStreamingContext with DStr
assert(checkpointFiles.size === 2)
// Although bytes2 was written with an old time, it contains the latest status, so we should
// try to read from it at first.
assert(Files.toByteArray(checkpointFiles(0)) === bytes2)
assert(Files.toByteArray(checkpointFiles(1)) === bytes1)
assert(Files.readAllBytes(checkpointFiles(0).toPath) === bytes2)
assert(Files.readAllBytes(checkpointFiles(1).toPath) === bytes1)
checkpointWriter.stop()
}

Expand Down