Skip to content

Commit 01e5ce3

Browse files
committed
Merge branch 'master' into SPARK-31830
2 parents 0680855 + e289140 commit 01e5ce3

File tree

180 files changed

+3918
-3422
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

180 files changed

+3918
-3422
lines changed

bin/docker-image-tool.sh

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,13 +172,19 @@ function build {
172172
local BASEDOCKERFILE=${BASEDOCKERFILE:-"kubernetes/dockerfiles/spark/Dockerfile"}
173173
local PYDOCKERFILE=${PYDOCKERFILE:-false}
174174
local RDOCKERFILE=${RDOCKERFILE:-false}
175+
local ARCHS=${ARCHS:-"--platform linux/amd64,linux/arm64"}
175176

176177
(cd $(img_ctx_dir base) && docker build $NOCACHEARG "${BUILD_ARGS[@]}" \
177178
-t $(image_ref spark) \
178179
-f "$BASEDOCKERFILE" .)
179180
if [ $? -ne 0 ]; then
180181
error "Failed to build Spark JVM Docker image, please refer to Docker build output for details."
181182
fi
183+
if [ "${CROSS_BUILD}" != "false" ]; then
184+
(cd $(img_ctx_dir base) && docker buildx build $ARCHS $NOCACHEARG "${BUILD_ARGS[@]}" \
185+
-t $(image_ref spark) \
186+
-f "$BASEDOCKERFILE" .)
187+
fi
182188

183189
if [ "${PYDOCKERFILE}" != "false" ]; then
184190
(cd $(img_ctx_dir pyspark) && docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
@@ -187,6 +193,11 @@ function build {
187193
if [ $? -ne 0 ]; then
188194
error "Failed to build PySpark Docker image, please refer to Docker build output for details."
189195
fi
196+
if [ "${CROSS_BUILD}" != "false" ]; then
197+
(cd $(img_ctx_dir pyspark) && docker buildx build $ARCHS $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
198+
-t $(image_ref spark-py) \
199+
-f "$PYDOCKERFILE" .)
200+
fi
190201
fi
191202

192203
if [ "${RDOCKERFILE}" != "false" ]; then
@@ -196,6 +207,11 @@ function build {
196207
if [ $? -ne 0 ]; then
197208
error "Failed to build SparkR Docker image, please refer to Docker build output for details."
198209
fi
210+
if [ "${CROSS_BUILD}" != "false" ]; then
211+
(cd $(img_ctx_dir sparkr) && docker buildx build $ARCHS $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
212+
-t $(image_ref spark-r) \
213+
-f "$RDOCKERFILE" .)
214+
fi
199215
fi
200216
}
201217

@@ -227,6 +243,8 @@ Options:
227243
-n Build docker image with --no-cache
228244
-u uid UID to use in the USER directive to set the user the main Spark process runs as inside the
229245
resulting container
246+
-X Use docker buildx to cross build. Automatically pushes.
247+
See https://docs.docker.com/buildx/working-with-buildx/ for steps to setup buildx.
230248
-b arg Build arg to build or push the image. For multiple build args, this option needs to
231249
be used separately for each build arg.
232250
@@ -252,6 +270,12 @@ Examples:
252270
- Build and push JDK11-based image with tag "v3.0.0" to docker.io/myrepo
253271
$0 -r docker.io/myrepo -t v3.0.0 -b java_image_tag=11-jre-slim build
254272
$0 -r docker.io/myrepo -t v3.0.0 push
273+
274+
- Build and push JDK11-based image for multiple archs to docker.io/myrepo
275+
$0 -r docker.io/myrepo -t v3.0.0 -X -b java_image_tag=11-jre-slim build
276+
# Note: buildx, which does cross building, needs to do the push during build
277+
# So there is no seperate push step with -X
278+
255279
EOF
256280
}
257281

@@ -268,7 +292,8 @@ RDOCKERFILE=
268292
NOCACHEARG=
269293
BUILD_PARAMS=
270294
SPARK_UID=
271-
while getopts f:p:R:mr:t:nb:u: option
295+
CROSS_BUILD="false"
296+
while getopts f:p:R:mr:t:Xnb:u: option
272297
do
273298
case "${option}"
274299
in
@@ -279,6 +304,7 @@ do
279304
t) TAG=${OPTARG};;
280305
n) NOCACHEARG="--no-cache";;
281306
b) BUILD_PARAMS=${BUILD_PARAMS}" --build-arg "${OPTARG};;
307+
X) CROSS_BUILD=1;;
282308
m)
283309
if ! which minikube 1>/dev/null; then
284310
error "Cannot find minikube."
Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,13 @@
1515
* limitations under the License.
1616
*/
1717

18-
package org.apache.spark.sql.hive.thriftserver
18+
package org.apache.spark.tags;
1919

20-
import org.apache.spark.sql.catalyst.catalog.CatalogTableType
21-
import org.apache.spark.sql.catalyst.catalog.CatalogTableType.{EXTERNAL, MANAGED, VIEW}
20+
import java.lang.annotation.*;
2221

23-
/**
24-
* Utils for metadata operations.
25-
*/
26-
private[hive] trait SparkMetadataOperationUtils {
22+
import org.scalatest.TagAnnotation;
2723

28-
def tableTypeString(tableType: CatalogTableType): String = tableType match {
29-
case EXTERNAL | MANAGED => "TABLE"
30-
case VIEW => "VIEW"
31-
case t =>
32-
throw new IllegalArgumentException(s"Unknown table type is found: $t")
33-
}
34-
}
24+
@TagAnnotation
25+
@Retention(RetentionPolicy.RUNTIME)
26+
@Target({ElementType.METHOD, ElementType.TYPE})
27+
public @interface ChromeUITest { }

core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -173,9 +173,11 @@ function renderDagViz(forJob) {
173173
});
174174

175175
metadataContainer().selectAll(".barrier-rdd").each(function() {
176-
var rddId = d3.select(this).text().trim();
177-
var clusterId = VizConstants.clusterPrefix + rddId;
178-
svg.selectAll("g." + clusterId).classed("barrier", true)
176+
var opId = d3.select(this).text().trim();
177+
var opClusterId = VizConstants.clusterPrefix + opId;
178+
var stageId = $(this).parents(".stage-metadata").attr("stage-id");
179+
var stageClusterId = VizConstants.graphPrefix + stageId;
180+
svg.selectAll("g[id=" + stageClusterId + "] g." + opClusterId).classed("barrier", true)
179181
});
180182

181183
resizeSvg(svg);
@@ -216,7 +218,7 @@ function renderDagVizForJob(svgContainer) {
216218
var dot = metadata.select(".dot-file").text();
217219
var stageId = metadata.attr("stage-id");
218220
var containerId = VizConstants.graphPrefix + stageId;
219-
var isSkipped = metadata.attr("skipped") == "true";
221+
var isSkipped = metadata.attr("skipped") === "true";
220222
var container;
221223
if (isSkipped) {
222224
container = svgContainer
@@ -225,11 +227,8 @@ function renderDagVizForJob(svgContainer) {
225227
.attr("skipped", "true");
226228
} else {
227229
// Link each graph to the corresponding stage page (TODO: handle stage attempts)
228-
// Use the link from the stage table so it also works for the history server
229230
var attemptId = 0;
230-
var stageLink = d3.select("#stage-" + stageId + "-" + attemptId)
231-
.select("a.name-link")
232-
.attr("href");
231+
var stageLink = uiRoot + appBasePath + "/stages/stage/?id=" + stageId + "&attempt=" + attemptId;
233232
container = svgContainer
234233
.append("a")
235234
.attr("xlink:href", stageLink)

core/src/main/resources/org/apache/spark/ui/static/webui.js

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,16 @@
1616
*/
1717

1818
var uiRoot = "";
19+
var appBasePath = "";
1920

2021
function setUIRoot(val) {
2122
uiRoot = val;
2223
}
2324

25+
function setAppBasePath(path) {
26+
appBasePath = path;
27+
}
28+
2429
function collapseTablePageLoad(name, table){
2530
if (window.localStorage.getItem(name) == "true") {
2631
// Set it to false so that the click function can revert it
@@ -33,7 +38,7 @@ function collapseTable(thisName, table){
3338
var status = window.localStorage.getItem(thisName) == "true";
3439
status = !status;
3540

36-
var thisClass = '.' + thisName
41+
var thisClass = '.' + thisName;
3742

3843
// Expand the list of additional metrics.
3944
var tableDiv = $(thisClass).parent().find('.' + table);

core/src/main/scala/org/apache/spark/rdd/RDD.scala

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1780,10 +1780,9 @@ abstract class RDD[T: ClassTag](
17801780
* It will result in new executors with the resources specified being acquired to
17811781
* calculate the RDD.
17821782
*/
1783-
// PRIVATE for now, added for testing purposes, will be made public with SPARK-29150
17841783
@Experimental
1785-
@Since("3.0.0")
1786-
private[spark] def withResources(rp: ResourceProfile): this.type = {
1784+
@Since("3.1.0")
1785+
def withResources(rp: ResourceProfile): this.type = {
17871786
resourceProfile = Option(rp)
17881787
sc.resourceProfileManager.addResourceProfile(resourceProfile.get)
17891788
this
@@ -1794,10 +1793,9 @@ abstract class RDD[T: ClassTag](
17941793
* @return the user specified ResourceProfile or null (for Java compatibility) if
17951794
* none was specified
17961795
*/
1797-
// PRIVATE for now, added for testing purposes, will be made public with SPARK-29150
17981796
@Experimental
1799-
@Since("3.0.0")
1800-
private[spark] def getResourceProfile(): ResourceProfile = resourceProfile.getOrElse(null)
1797+
@Since("3.1.0")
1798+
def getResourceProfile(): ResourceProfile = resourceProfile.getOrElse(null)
18011799

18021800
// =======================================================================
18031801
// Other internal methods and fields

core/src/main/scala/org/apache/spark/resource/ExecutorResourceRequest.scala

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.spark.resource
1919

20+
import org.apache.spark.annotation.{Evolving, Since}
21+
2022
/**
2123
* An Executor resource request. This is used in conjunction with the ResourceProfile to
2224
* programmatically specify the resources needed for an RDD that will be applied at the
@@ -46,11 +48,10 @@ package org.apache.spark.resource
4648
* allocated. The script runs on Executors startup to discover the addresses
4749
* of the resources available.
4850
* @param vendor Optional vendor, required for some cluster managers
49-
*
50-
* This api is currently private until the rest of the pieces are in place and then it
51-
* will become public.
5251
*/
53-
private[spark] class ExecutorResourceRequest(
52+
@Evolving
53+
@Since("3.1.0")
54+
class ExecutorResourceRequest(
5455
val resourceName: String,
5556
val amount: Long,
5657
val discoveryScript: String = "",

core/src/main/scala/org/apache/spark/resource/ExecutorResourceRequests.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,18 @@ import java.util.concurrent.ConcurrentHashMap
2222

2323
import scala.collection.JavaConverters._
2424

25+
import org.apache.spark.annotation.{Evolving, Since}
2526
import org.apache.spark.network.util.JavaUtils
2627
import org.apache.spark.resource.ResourceProfile._
2728

2829
/**
2930
* A set of Executor resource requests. This is used in conjunction with the ResourceProfile to
3031
* programmatically specify the resources needed for an RDD that will be applied at the
3132
* stage level.
32-
*
33-
* This api is currently private until the rest of the pieces are in place and then it
34-
* will become public.
3533
*/
36-
private[spark] class ExecutorResourceRequests() extends Serializable {
34+
@Evolving
35+
@Since("3.1.0")
36+
class ExecutorResourceRequests() extends Serializable {
3737

3838
private val _executorResources = new ConcurrentHashMap[String, ExecutorResourceRequest]()
3939

core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import scala.collection.JavaConverters._
2525
import scala.collection.mutable
2626

2727
import org.apache.spark.{SparkConf, SparkException}
28-
import org.apache.spark.annotation.Evolving
28+
import org.apache.spark.annotation.{Evolving, Since}
2929
import org.apache.spark.internal.Logging
3030
import org.apache.spark.internal.config._
3131
import org.apache.spark.internal.config.Python.PYSPARK_EXECUTOR_MEMORY
@@ -37,6 +37,7 @@ import org.apache.spark.internal.config.Python.PYSPARK_EXECUTOR_MEMORY
3737
* This is meant to be immutable so user can't change it after building.
3838
*/
3939
@Evolving
40+
@Since("3.1.0")
4041
class ResourceProfile(
4142
val executorResources: Map[String, ExecutorResourceRequest],
4243
val taskResources: Map[String, TaskResourceRequest]) extends Serializable with Logging {

core/src/main/scala/org/apache/spark/resource/ResourceProfileBuilder.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,19 @@ import java.util.concurrent.ConcurrentHashMap
2222

2323
import scala.collection.JavaConverters._
2424

25-
import org.apache.spark.annotation.Evolving
25+
import org.apache.spark.annotation.{Evolving, Since}
26+
2627

2728
/**
2829
* Resource profile builder to build a Resource profile to associate with an RDD.
2930
* A ResourceProfile allows the user to specify executor and task requirements for an RDD
3031
* that will get applied during a stage. This allows the user to change the resource
3132
* requirements between stages.
33+
*
3234
*/
3335
@Evolving
34-
private[spark] class ResourceProfileBuilder() {
36+
@Since("3.1.0")
37+
class ResourceProfileBuilder() {
3538

3639
private val _taskResources = new ConcurrentHashMap[String, TaskResourceRequest]()
3740
private val _executorResources = new ConcurrentHashMap[String, ExecutorResourceRequest]()

core/src/main/scala/org/apache/spark/resource/TaskResourceRequest.scala

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,18 @@
1717

1818
package org.apache.spark.resource
1919

20+
import org.apache.spark.annotation.{Evolving, Since}
21+
2022
/**
2123
* A task resource request. This is used in conjuntion with the ResourceProfile to
2224
* programmatically specify the resources needed for an RDD that will be applied at the
2325
* stage level.
2426
*
2527
* Use TaskResourceRequests class as a convenience API.
26-
*
27-
* This api is currently private until the rest of the pieces are in place and then it
28-
* will become public.
2928
*/
30-
private[spark] class TaskResourceRequest(val resourceName: String, val amount: Double)
29+
@Evolving
30+
@Since("3.1.0")
31+
class TaskResourceRequest(val resourceName: String, val amount: Double)
3132
extends Serializable {
3233

3334
assert(amount <= 0.5 || amount % 1 == 0,

0 commit comments

Comments
 (0)