apache
diff --git a/‎LICENSE‎
Lines changed: 2 additions & 2 deletions b/‎LICENSE‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎LICENSE-binary‎
Lines changed: 0 additions & 1 deletion b/‎LICENSE-binary‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 10 additions & 10 deletions b/‎README.md‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎bin/docker-image-tool.sh‎
Lines changed: 1 addition & 1 deletion b/‎bin/docker-image-tool.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎common/network-yarn/pom.xml‎
Lines changed: 45 additions & 1 deletion b/‎common/network-yarn/pom.xml‎
Lines changed: 45 additions & 1 deletion
diff --git a/‎common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java‎
Lines changed: 2 additions & 0 deletions b/‎common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java‎
Lines changed: 3 additions & 0 deletions b/‎common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎conf/log4j.properties.template‎
Lines changed: 2 additions & 2 deletions b/‎conf/log4j.properties.template‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/resources/org/apache/spark/log4j-defaults.properties‎
Lines changed: 2 additions & 2 deletions b/‎core/src/main/resources/org/apache/spark/log4j-defaults.properties‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala‎
Lines changed: 57 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala‎
Lines changed: 57 additions & 2 deletions
@@ -222,7 +222,7 @@ Python Software Foundation License
 ----------------------------------
 
 pyspark/heapq3.py
-
+python/docs/_static/copybutton.js
 
 BSD 3-Clause
 ------------
@@ -258,4 +258,4 @@ data/mllib/images/kittens/29.5.a_b_EGDP022204.jpg
 data/mllib/images/kittens/54893.jpg
 data/mllib/images/kittens/DP153539.jpg
 data/mllib/images/kittens/DP802813.jpg
-data/mllib/images/multi-channel/chr30.4.184.jpg
+data/mllib/images/multi-channel/chr30.4.184.jpg
@@ -489,7 +489,6 @@ Eclipse Distribution License (EDL) 1.0
 org.glassfish.jaxb:jaxb-runtime
 jakarta.xml.bind:jakarta.xml.bind-api
 com.sun.istack:istack-commons-runtime
-jakarta.activation:jakarta.activation-api
 
 
 Mozilla Public License (MPL) 1.1
 
@@ -1,18 +1,18 @@
 # Apache Spark
 
-[![Jenkins Build](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7/badge/icon)](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7)
-[![AppVeyor Build](https://img.shields.io/appveyor/ci/ApacheSoftwareFoundation/spark/master.svg?style=plastic&logo=appveyor)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/spark)
-[![PySpark Coverage](https://img.shields.io/badge/dynamic/xml.svg?label=pyspark%20coverage&url=https%3A%2F%2Fspark-test.github.io%2Fpyspark-coverage-site&query=%2Fhtml%2Fbody%2Fdiv%5B1%5D%2Fdiv%2Fh1%2Fspan&colorB=brightgreen&style=plastic)](https://spark-test.github.io/pyspark-coverage-site)
-
-Spark is a fast and general cluster computing system for Big Data. It provides
+Spark is a unified analytics engine for large-scale data processing. It provides
 high-level APIs in Scala, Java, Python, and R, and an optimized engine that
 supports general computation graphs for data analysis. It also supports a
 rich set of higher-level tools including Spark SQL for SQL and DataFrames,
 MLlib for machine learning, GraphX for graph processing,
-and Spark Streaming for stream processing.
+and Structured Streaming for stream processing.
 
 <http://spark.apache.org/>
 
+[![Jenkins Build](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7/badge/icon)](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7)
+[![AppVeyor Build](https://img.shields.io/appveyor/ci/ApacheSoftwareFoundation/spark/master.svg?style=plastic&logo=appveyor)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/spark)
+[![PySpark Coverage](https://img.shields.io/badge/dynamic/xml.svg?label=pyspark%20coverage&url=https%3A%2F%2Fspark-test.github.io%2Fpyspark-coverage-site&query=%2Fhtml%2Fbody%2Fdiv%5B1%5D%2Fdiv%2Fh1%2Fspan&colorB=brightgreen&style=plastic)](https://spark-test.github.io/pyspark-coverage-site)
+
 
 ## Online Documentation
 
@@ -41,19 +41,19 @@ The easiest way to start using Spark is through the Scala shell:
 
     ./bin/spark-shell
 
-Try the following command, which should return 1000:
+Try the following command, which should return 1,000,000,000:
 
-    scala> sc.parallelize(1 to 1000).count()
+    scala> spark.range(1000 * 1000 * 1000).count()
 
 ## Interactive Python Shell
 
 Alternatively, if you prefer Python, you can use the Python shell:
 
     ./bin/pyspark
 
-And run the following command, which should also return 1000:
+And run the following command, which should also return 1,000,000,000:
 
-    >>> sc.parallelize(range(1000)).count()
+    >>> spark.range(1000 * 1000 * 1000).count()
 
 ## Example Programs
 
 
@@ -282,7 +282,7 @@ do
    if ! minikube status 1>/dev/null; then
      error "Cannot contact minikube. Make sure it's running."
    fi
-   eval $(minikube docker-env)
+   eval $(minikube docker-env --shell bash)
    ;;
   u) SPARK_UID=${OPTARG};;
  esac
 
@@ -35,7 +35,7 @@
     <!-- Make sure all Hadoop dependencies are provided to avoid repackaging. -->
     <hadoop.deps.scope>provided</hadoop.deps.scope>
     <shuffle.jar>${project.build.directory}/scala-${scala.binary.version}/spark-${project.version}-yarn-shuffle.jar</shuffle.jar>
-    <shade>org/spark_project/</shade>
+    <shade>org/sparkproject/</shade>
   </properties>
 
   <dependencies>
@@ -128,6 +128,50 @@
           </execution>
         </executions>
       </plugin>
+      <!-- shade the native netty libs as well -->
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>regex-property</id>
+            <goals>
+              <goal>regex-property</goal>
+            </goals>
+            <configuration>
+              <name>spark.shade.native.packageName</name>
+              <value>${spark.shade.packageName}</value>
+              <regex>\.</regex>
+              <replacement>_</replacement>
+              <failIfNoMatch>true</failIfNoMatch>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-antrun-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>unpack</id>
+            <phase>package</phase>
+            <configuration>
+                <target>
+                    <echo message="Shade netty native libraries to ${spark.shade.native.packageName}" />
+                    <unzip src="${shuffle.jar}" dest="${project.build.directory}/exploded/" />
+                    <move file="${project.build.directory}/exploded/META-INF/native/libnetty_transport_native_epoll_x86_64.so"
+                          tofile="${project.build.directory}/exploded/META-INF/native/lib${spark.shade.native.packageName}_netty_transport_native_epoll_x86_64.so" />
+                    <move file="${project.build.directory}/exploded/META-INF/native/libnetty_transport_native_kqueue_x86_64.jnilib"
+                          tofile="${project.build.directory}/exploded/META-INF/native/lib${spark.shade.native.packageName}_netty_transport_native_kqueue_x86_64.jnilib" />
+                    <jar destfile="${shuffle.jar}" basedir="${project.build.directory}/exploded" />
+                </target>
+            </configuration>
+            <goals>
+              <goal>run</goal>
+            </goals>
+          </execution>
+        </executions>
+    </plugin>
 
       <!-- probes to validate that those dependencies which must be shaded are  -->
       <plugin>
 
@@ -319,6 +319,8 @@ public String toString() {
       appendUnit(sb, rest / MICROS_PER_MILLI, "millisecond");
       rest %= MICROS_PER_MILLI;
       appendUnit(sb, rest, "microsecond");
+    } else if (months == 0) {
+      sb.append(" 0 microseconds");
     }
 
     return sb.toString();
 
@@ -41,6 +41,9 @@ public void equalsTest() {
   public void toStringTest() {
     CalendarInterval i;
 
+    i = new CalendarInterval(0, 0);
+    assertEquals("interval 0 microseconds", i.toString());
+
     i = new CalendarInterval(34, 0);
     assertEquals("interval 2 years 10 months", i.toString());
 
 
@@ -28,8 +28,8 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}:
 log4j.logger.org.apache.spark.repl.Main=WARN
 
 # Settings to quiet third party logs that are too verbose
-log4j.logger.org.spark_project.jetty=WARN
-log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.sparkproject.jetty=WARN
+log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
 log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
 log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
 log4j.logger.org.apache.parquet=ERROR
 
@@ -28,8 +28,8 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}:
 log4j.logger.org.apache.spark.repl.Main=WARN
 
 # Settings to quiet third party logs that are too verbose
-log4j.logger.org.spark_project.jetty=WARN
-log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.sparkproject.jetty=WARN
+log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
 log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
 log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
 
 
@@ -86,7 +86,7 @@ private[spark] case class PythonFunction(
 private[spark] case class ChainedPythonFunctions(funcs: Seq[PythonFunction])
 
 /** Thrown for exceptions in user Python code. */
-private[spark] class PythonException(msg: String, cause: Exception)
+private[spark] class PythonException(msg: String, cause: Throwable)
   extends RuntimeException(msg, cause)
 
 /**
@@ -163,8 +163,63 @@ private[spark] object PythonRDD extends Logging {
     serveIterator(rdd.collect().iterator, s"serve RDD ${rdd.id}")
   }
 
+  /**
+   * A helper function to create a local RDD iterator and serve it via socket. Partitions are
+   * are collected as separate jobs, by order of index. Partition data is first requested by a
+   * non-zero integer to start a collection job. The response is prefaced by an integer with 1
+   * meaning partition data will be served, 0 meaning the local iterator has been consumed,
+   * and -1 meaining an error occurred during collection. This function is used by
+   * pyspark.rdd._local_iterator_from_socket().
+   *
+   * @return 2-tuple (as a Java array) with the port number of a local socket which serves the
+   *         data collected from these jobs, and the secret for authentication.
+   */
   def toLocalIteratorAndServe[T](rdd: RDD[T]): Array[Any] = {
-    serveIterator(rdd.toLocalIterator, s"serve toLocalIterator")
+    val (port, secret) = SocketAuthServer.setupOneConnectionServer(
+        authHelper, "serve toLocalIterator") { s =>
+      val out = new DataOutputStream(s.getOutputStream)
+      val in = new DataInputStream(s.getInputStream)
+      Utils.tryWithSafeFinally {
+
+        // Collects a partition on each iteration
+        val collectPartitionIter = rdd.partitions.indices.iterator.map { i =>
+          rdd.sparkContext.runJob(rdd, (iter: Iterator[Any]) => iter.toArray, Seq(i)).head
+        }
+
+        // Read request for data and send next partition if nonzero
+        var complete = false
+        while (!complete && in.readInt() != 0) {
+          if (collectPartitionIter.hasNext) {
+            try {
+              // Attempt to collect the next partition
+              val partitionArray = collectPartitionIter.next()
+
+              // Send response there is a partition to read
+              out.writeInt(1)
+
+              // Write the next object and signal end of data for this iteration
+              writeIteratorToStream(partitionArray.toIterator, out)
+              out.writeInt(SpecialLengths.END_OF_DATA_SECTION)
+              out.flush()
+            } catch {
+              case e: SparkException =>
+                // Send response that an error occurred followed by error message
+                out.writeInt(-1)
+                writeUTF(e.getMessage, out)
+                complete = true
+            }
+          } else {
+            // Send response there are no more partitions to read and close
+            out.writeInt(0)
+            complete = true
+          }
+        }
+      } {
+        out.close()
+        in.close()
+      }
+    }
+    Array(port, secret)
   }
 
   def readRDDFromFile(
Original file line number	Diff line number	Diff line change
`@@ -319,6 +319,8 @@ public String toString() {`
`319`	`319`	`appendUnit(sb, rest / MICROS_PER_MILLI, "millisecond");`
`320`	`320`	`rest %= MICROS_PER_MILLI;`
`321`	`321`	`appendUnit(sb, rest, "microsecond");`
	`322`	`+ } else if (months == 0) {`
	`323`	`+ sb.append(" 0 microseconds");`
`322`	`324`	`}`
`323`	`325`
`324`	`326`	`return sb.toString();`