apache
diff --git a/‎R/pkg/R/DataFrame.R‎
Lines changed: 8 additions & 2 deletions b/‎R/pkg/R/DataFrame.R‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎R/pkg/R/deserialize.R‎
Lines changed: 10 additions & 3 deletions b/‎R/pkg/R/deserialize.R‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java‎
Lines changed: 10 additions & 5 deletions b/‎common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java‎
Lines changed: 10 additions & 5 deletions
diff --git a/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExecutorDiskUtils.java‎
Lines changed: 66 additions & 0 deletions b/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExecutorDiskUtils.java‎
Lines changed: 66 additions & 0 deletions
diff --git a/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java‎
Lines changed: 5 additions & 40 deletions b/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java‎
Lines changed: 5 additions & 40 deletions
diff --git a/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java‎
Lines changed: 10 additions & 3 deletions b/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java‎
Lines changed: 1 addition & 1 deletion b/‎common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java‎
Lines changed: 5 additions & 4 deletions b/‎common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java‎
Lines changed: 1 addition & 1 deletion b/‎common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java‎
Lines changed: 1 addition & 1 deletion
@@ -1203,7 +1203,8 @@ setMethod("collect",
               requireNamespace1 <- requireNamespace
               if (requireNamespace1("arrow", quietly = TRUE)) {
                 read_arrow <- get("read_arrow", envir = asNamespace("arrow"), inherits = FALSE)
-                as_tibble <- get("as_tibble", envir = asNamespace("arrow"))
+                # Arrow drops `as_tibble` since 0.14.0, see ARROW-5190.
+                useAsTibble <- exists("as_tibble", envir = asNamespace("arrow"))
 
                 portAuth <- callJMethod(x@sdf, "collectAsArrowToR")
                 port <- portAuth[[1]]
@@ -1213,7 +1214,12 @@ setMethod("collect",
                 output <- tryCatch({
                   doServerAuth(conn, authSecret)
                   arrowTable <- read_arrow(readRaw(conn))
-                  as.data.frame(as_tibble(arrowTable), stringsAsFactors = stringsAsFactors)
+                  if (useAsTibble) {
+                    as_tibble <- get("as_tibble", envir = asNamespace("arrow"))
+                    as.data.frame(as_tibble(arrowTable), stringsAsFactors = stringsAsFactors)
+                  } else {
+                    as.data.frame(arrowTable, stringsAsFactors = stringsAsFactors)
+                  }
                 }, finally = {
                   close(conn)
                 })
 
@@ -237,7 +237,9 @@ readDeserializeInArrow <- function(inputCon) {
   if (requireNamespace1("arrow", quietly = TRUE)) {
     RecordBatchStreamReader <- get(
       "RecordBatchStreamReader", envir = asNamespace("arrow"), inherits = FALSE)
-    as_tibble <- get("as_tibble", envir = asNamespace("arrow"))
+    # Arrow drops `as_tibble` since 0.14.0, see ARROW-5190.
+    useAsTibble <- exists("as_tibble", envir = asNamespace("arrow"))
+
 
     # Currently, there looks no way to read batch by batch by socket connection in R side,
     # See ARROW-4512. Therefore, it reads the whole Arrow streaming-formatted binary at once
@@ -246,8 +248,13 @@ readDeserializeInArrow <- function(inputCon) {
     arrowData <- readBin(inputCon, raw(), as.integer(dataLen), endian = "big")
     batches <- RecordBatchStreamReader(arrowData)$batches()
 
-    # Read all groupped batches. Tibble -> data.frame is cheap.
-    lapply(batches, function(batch) as.data.frame(as_tibble(batch)))
+    if (useAsTibble) {
+      as_tibble <- get("as_tibble", envir = asNamespace("arrow"))
+      # Read all groupped batches. Tibble -> data.frame is cheap.
+      lapply(batches, function(batch) as.data.frame(as_tibble(batch)))
+    } else {
+      lapply(batches, function(batch) as.data.frame(batch))
+    }
   } else {
     stop("'arrow' package should be installed.")
   }
 
@@ -237,11 +237,16 @@ public ByteBuffer sendRpcSync(ByteBuffer message, long timeoutMs) {
     sendRpc(message, new RpcResponseCallback() {
       @Override
       public void onSuccess(ByteBuffer response) {
-        ByteBuffer copy = ByteBuffer.allocate(response.remaining());
-        copy.put(response);
-        // flip "copy" to make it readable
-        copy.flip();
-        result.set(copy);
+        try {
+          ByteBuffer copy = ByteBuffer.allocate(response.remaining());
+          copy.put(response);
+          // flip "copy" to make it readable
+          copy.flip();
+          result.set(copy);
+        } catch (Throwable t) {
+          logger.warn("Error in responding PRC callback", t);
+          result.setException(t);
+        }
       }
 
       @Override
 
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.File;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import com.google.common.annotations.VisibleForTesting;
+
+import org.apache.spark.network.util.JavaUtils;
+
+public class ExecutorDiskUtils {
+
+  private static final Pattern MULTIPLE_SEPARATORS = Pattern.compile(File.separator + "{2,}");
+
+  /**
+   * Hashes a filename into the corresponding local directory, in a manner consistent with
+   * Spark's DiskBlockManager.getFile().
+   */
+  public static File getFile(String[] localDirs, int subDirsPerLocalDir, String filename) {
+    int hash = JavaUtils.nonNegativeHash(filename);
+    String localDir = localDirs[hash % localDirs.length];
+    int subDirId = (hash / localDirs.length) % subDirsPerLocalDir;
+    return new File(createNormalizedInternedPathname(
+        localDir, String.format("%02x", subDirId), filename));
+  }
+
+  /**
+   * This method is needed to avoid the situation when multiple File instances for the
+   * same pathname "foo/bar" are created, each with a separate copy of the "foo/bar" String.
+   * According to measurements, in some scenarios such duplicate strings may waste a lot
+   * of memory (~ 10% of the heap). To avoid that, we intern the pathname, and before that
+   * we make sure that it's in a normalized form (contains no "//", "///" etc.) Otherwise,
+   * the internal code in java.io.File would normalize it later, creating a new "foo/bar"
+   * String copy. Unfortunately, we cannot just reuse the normalization code that java.io.File
+   * uses, since it is in the package-private class java.io.FileSystem.
+   */
+  @VisibleForTesting
+  static String createNormalizedInternedPathname(String dir1, String dir2, String fname) {
+    String pathname = dir1 + File.separator + dir2 + File.separator + fname;
+    Matcher m = MULTIPLE_SEPARATORS.matcher(pathname);
+    pathname = m.replaceAll("/");
+    // A single trailing slash needs to be taken care of separately
+    if (pathname.length() > 1 && pathname.endsWith("/")) {
+      pathname = pathname.substring(0, pathname.length() - 1);
+    }
+    return pathname.intern();
+  }
+
+}
@@ -24,7 +24,6 @@
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Executor;
 import java.util.concurrent.Executors;
-import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
@@ -298,15 +297,15 @@ private void deleteNonShuffleServiceServedFiles(String[] dirs) {
    */
   private ManagedBuffer getSortBasedShuffleBlockData(
     ExecutorShuffleInfo executor, int shuffleId, int mapId, int reduceId) {
-    File indexFile = getFile(executor.localDirs, executor.subDirsPerLocalDir,
+    File indexFile = ExecutorDiskUtils.getFile(executor.localDirs, executor.subDirsPerLocalDir,
       "shuffle_" + shuffleId + "_" + mapId + "_0.index");
 
     try {
       ShuffleIndexInformation shuffleIndexInformation = shuffleIndexCache.get(indexFile);
       ShuffleIndexRecord shuffleIndexRecord = shuffleIndexInformation.getIndex(reduceId);
       return new FileSegmentManagedBuffer(
         conf,
-        getFile(executor.localDirs, executor.subDirsPerLocalDir,
+        ExecutorDiskUtils.getFile(executor.localDirs, executor.subDirsPerLocalDir,
           "shuffle_" + shuffleId + "_" + mapId + "_0.data"),
         shuffleIndexRecord.getOffset(),
         shuffleIndexRecord.getLength());
@@ -317,7 +316,7 @@ private ManagedBuffer getSortBasedShuffleBlockData(
 
   public ManagedBuffer getDiskPersistedRddBlockData(
       ExecutorShuffleInfo executor, int rddId, int splitIndex) {
-    File file = getFile(executor.localDirs, executor.subDirsPerLocalDir,
+    File file = ExecutorDiskUtils.getFile(executor.localDirs, executor.subDirsPerLocalDir,
       "rdd_" + rddId + "_" + splitIndex);
     long fileLength = file.length();
     ManagedBuffer res = null;
@@ -327,19 +326,6 @@ public ManagedBuffer getDiskPersistedRddBlockData(
     return res;
   }
 
-  /**
-   * Hashes a filename into the corresponding local directory, in a manner consistent with
-   * Spark's DiskBlockManager.getFile().
-   */
-  @VisibleForTesting
-  static File getFile(String[] localDirs, int subDirsPerLocalDir, String filename) {
-    int hash = JavaUtils.nonNegativeHash(filename);
-    String localDir = localDirs[hash % localDirs.length];
-    int subDirId = (hash / localDirs.length) % subDirsPerLocalDir;
-    return new File(createNormalizedInternedPathname(
-        localDir, String.format("%02x", subDirId), filename));
-  }
-
   void close() {
     if (db != null) {
       try {
@@ -350,28 +336,6 @@ void close() {
     }
   }
 
-  /**
-   * This method is needed to avoid the situation when multiple File instances for the
-   * same pathname "foo/bar" are created, each with a separate copy of the "foo/bar" String.
-   * According to measurements, in some scenarios such duplicate strings may waste a lot
-   * of memory (~ 10% of the heap). To avoid that, we intern the pathname, and before that
-   * we make sure that it's in a normalized form (contains no "//", "///" etc.) Otherwise,
-   * the internal code in java.io.File would normalize it later, creating a new "foo/bar"
-   * String copy. Unfortunately, we cannot just reuse the normalization code that java.io.File
-   * uses, since it is in the package-private class java.io.FileSystem.
-   */
-  @VisibleForTesting
-  static String createNormalizedInternedPathname(String dir1, String dir2, String fname) {
-    String pathname = dir1 + File.separator + dir2 + File.separator + fname;
-    Matcher m = MULTIPLE_SEPARATORS.matcher(pathname);
-    pathname = m.replaceAll("/");
-    // A single trailing slash needs to be taken care of separately
-    if (pathname.length() > 1 && pathname.endsWith("/")) {
-      pathname = pathname.substring(0, pathname.length() - 1);
-    }
-    return pathname.intern();
-  }
-
   public int removeBlocks(String appId, String execId, String[] blockIds) {
     ExecutorShuffleInfo executor = executors.get(new AppExecId(appId, execId));
     if (executor == null) {
@@ -380,7 +344,8 @@ public int removeBlocks(String appId, String execId, String[] blockIds) {
     }
     int numRemovedBlocks = 0;
     for (String blockId : blockIds) {
-      File file = getFile(executor.localDirs, executor.subDirsPerLocalDir, blockId);
+      File file =
+        ExecutorDiskUtils.getFile(executor.localDirs, executor.subDirsPerLocalDir, blockId);
       if (file.delete()) {
         numRemovedBlocks++;
       } else {
 
@@ -163,9 +163,16 @@ public Future<Integer> removeBlocks(
     client.sendRpc(removeBlocksMessage, new RpcResponseCallback() {
       @Override
       public void onSuccess(ByteBuffer response) {
-        BlockTransferMessage msgObj = BlockTransferMessage.Decoder.fromByteBuffer(response);
-        numRemovedBlocksFuture.complete(((BlocksRemoved)msgObj).numRemovedBlocks);
-        client.close();
+        try {
+          BlockTransferMessage msgObj = BlockTransferMessage.Decoder.fromByteBuffer(response);
+          numRemovedBlocksFuture.complete(((BlocksRemoved) msgObj).numRemovedBlocks);
+        } catch (Throwable t) {
+          logger.warn("Error trying to remove RDD blocks " + Arrays.toString(blockIds) +
+            " via external shuffle service from executor: " + execId, t);
+          numRemovedBlocksFuture.complete(0);
+        } finally {
+          client.close();
+        }
       }
 
       @Override
 
@@ -149,7 +149,7 @@ public void testNormalizeAndInternPathname() {
 
   private void assertPathsMatch(String p1, String p2, String p3, String expectedPathname) {
     String normPathname =
-      ExternalShuffleBlockResolver.createNormalizedInternedPathname(p1, p2, p3);
+      ExecutorDiskUtils.createNormalizedInternedPathname(p1, p2, p3);
     assertEquals(expectedPathname, normPathname);
     File file = new File(normPathname);
     String returnedPath = file.getPath();
 
@@ -76,9 +76,9 @@ public void insertSortShuffleData(int shuffleId, int mapId, byte[][] blocks) thr
 
     try {
       dataStream = new FileOutputStream(
-        ExternalShuffleBlockResolver.getFile(localDirs, subDirsPerLocalDir, blockId + ".data"));
+        ExecutorDiskUtils.getFile(localDirs, subDirsPerLocalDir, blockId + ".data"));
       indexStream = new DataOutputStream(new FileOutputStream(
-        ExternalShuffleBlockResolver.getFile(localDirs, subDirsPerLocalDir, blockId + ".index")));
+        ExecutorDiskUtils.getFile(localDirs, subDirsPerLocalDir, blockId + ".index")));
 
       long offset = 0;
       indexStream.writeLong(offset);
@@ -121,10 +121,11 @@ private void insertFile(String filename) throws IOException {
 
   private void insertFile(String filename, byte[] block) throws IOException {
     OutputStream dataStream = null;
-    File file = ExternalShuffleBlockResolver.getFile(localDirs, subDirsPerLocalDir, filename);
+    File file = ExecutorDiskUtils.getFile(localDirs, subDirsPerLocalDir, filename);
     assert(!file.exists()) : "this test file has been already generated";
     try {
-      dataStream = new FileOutputStream(file);
+      dataStream = new FileOutputStream(
+        ExecutorDiskUtils.getFile(localDirs, subDirsPerLocalDir, filename));
       dataStream.write(block);
     } finally {
       Closeables.close(dataStream, false);
 
@@ -304,7 +304,7 @@ public static void throwException(Throwable t) {
   static {
     boolean _unaligned;
     String arch = System.getProperty("os.arch", "");
-    if (arch.equals("ppc64le") || arch.equals("ppc64")) {
+    if (arch.equals("ppc64le") || arch.equals("ppc64") || arch.equals("s390x")) {
       // Since java.nio.Bits.unaligned() doesn't return true on ppc (See JDK-8165231), but
       // ppc64 and ppc64le support it
       _unaligned = true;