Skip to content

Commit 0310e13

Browse files
committed
Merge remote-tracking branch 'upstream/master' into SPARK-28461
# Conflicts: # docs/sql-migration-guide.md # sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala # sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out # sql/core/src/test/resources/sql-tests/results/literals.sql.out # sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
2 parents 91bd74a + a60da23 commit 0310e13

File tree

1,237 files changed

+44622
-11602
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,237 files changed

+44622
-11602
lines changed

.github/workflows/master.yml

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,43 @@ jobs:
1616
matrix:
1717
java: [ '1.8', '11' ]
1818
hadoop: [ 'hadoop-2.7', 'hadoop-3.2' ]
19+
hive: [ 'hive-1.2', 'hive-2.3' ]
1920
exclude:
2021
- java: '11'
2122
hadoop: 'hadoop-2.7'
22-
name: Build Spark with JDK ${{ matrix.java }} and ${{ matrix.hadoop }}
23+
- java: '11'
24+
hive: 'hive-1.2'
25+
- hadoop: 'hadoop-3.2'
26+
hive: 'hive-1.2'
27+
name: Build Spark - JDK${{ matrix.java }}/${{ matrix.hadoop }}/${{ matrix.hive }}
2328

2429
steps:
2530
- uses: actions/checkout@master
31+
# We split caches because GitHub Action Cache has a 400MB-size limit.
32+
- uses: actions/cache@v1
33+
with:
34+
path: ~/.m2/repository/com
35+
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-${{ hashFiles('**/pom.xml') }}
36+
restore-keys: |
37+
${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-
38+
- uses: actions/cache@v1
39+
with:
40+
path: ~/.m2/repository/org
41+
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-${{ hashFiles('**/pom.xml') }}
42+
restore-keys: |
43+
${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-
44+
- uses: actions/cache@v1
45+
with:
46+
path: ~/.m2/repository/net
47+
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-${{ hashFiles('**/pom.xml') }}
48+
restore-keys: |
49+
${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-
50+
- uses: actions/cache@v1
51+
with:
52+
path: ~/.m2/repository/io
53+
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-${{ hashFiles('**/pom.xml') }}
54+
restore-keys: |
55+
${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-
2656
- name: Set up JDK ${{ matrix.java }}
2757
uses: actions/setup-java@v1
2858
with:
@@ -31,12 +61,13 @@ jobs:
3161
run: |
3262
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
3363
export MAVEN_CLI_OPTS="--no-transfer-progress"
34-
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -P${{ matrix.hadoop }} -Phadoop-cloud -Djava.version=${{ matrix.java }} package
64+
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -P${{ matrix.hive }} -Phive-thriftserver -P${{ matrix.hadoop }} -Phadoop-cloud -Djava.version=${{ matrix.java }} install
65+
rm -rf ~/.m2/repository/org/apache/spark
3566
3667
3768
lint:
3869
runs-on: ubuntu-latest
39-
name: Linters
70+
name: Linters (Java/Scala/Python), licenses, dependencies
4071
steps:
4172
- uses: actions/checkout@master
4273
- uses: actions/setup-java@v1
@@ -58,3 +89,26 @@ jobs:
5889
run: ./dev/check-license
5990
- name: Dependencies
6091
run: ./dev/test-dependencies.sh
92+
93+
lintr:
94+
runs-on: ubuntu-latest
95+
name: Linter (R)
96+
steps:
97+
- uses: actions/checkout@master
98+
- uses: actions/setup-java@v1
99+
with:
100+
java-version: '11'
101+
- name: install R
102+
run: |
103+
echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/' | sudo tee -a /etc/apt/sources.list
104+
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add
105+
sudo apt-get update
106+
sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
107+
- name: install R packages
108+
run: |
109+
sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')"
110+
sudo Rscript -e "devtools::install_github('jimhester/[email protected]')"
111+
- name: package and install SparkR
112+
run: ./R/install-dev.sh
113+
- name: lint-r
114+
run: ./dev/lint-r

LICENSE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ core/src/main/resources/org/apache/spark/ui/static/bootstrap*
216216
core/src/main/resources/org/apache/spark/ui/static/jsonFormatter*
217217
core/src/main/resources/org/apache/spark/ui/static/vis*
218218
docs/js/vendor/bootstrap.js
219+
external/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java
219220

220221

221222
Python Software Foundation License

LICENSE-binary

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,10 +243,10 @@ com.vlkan:flatbuffers
243243
com.ning:compress-lzf
244244
io.airlift:aircompressor
245245
io.dropwizard.metrics:metrics-core
246-
io.dropwizard.metrics:metrics-ganglia
247246
io.dropwizard.metrics:metrics-graphite
248247
io.dropwizard.metrics:metrics-json
249248
io.dropwizard.metrics:metrics-jvm
249+
io.dropwizard.metrics:metrics-jmx
250250
org.iq80.snappy:snappy
251251
com.clearspring.analytics:stream
252252
com.jamesmurty.utils:java-xmlbuilder
@@ -502,7 +502,7 @@ com.github.scopt:scopt_2.12
502502
core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
503503
core/src/main/resources/org/apache/spark/ui/static/*dataTables*
504504
core/src/main/resources/org/apache/spark/ui/static/graphlib-dot.min.js
505-
ore/src/main/resources/org/apache/spark/ui/static/jquery*
505+
core/src/main/resources/org/apache/spark/ui/static/jquery*
506506
core/src/main/resources/org/apache/spark/ui/static/sorttable.js
507507
docs/js/vendor/anchor.min.js
508508
docs/js/vendor/jquery*
@@ -544,6 +544,7 @@ Eclipse Distribution License (EDL) 1.0
544544
--------------------------------------
545545

546546
org.glassfish.jaxb:jaxb-runtime
547+
jakarta.activation:jakarta.activation-api
547548
jakarta.xml.bind:jakarta.xml.bind-api
548549
com.sun.istack:istack-commons-runtime
549550

NOTICE

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,16 @@ The following provides more details on the included cryptographic software:
2626
This software uses Apache Commons Crypto (https://commons.apache.org/proper/commons-crypto/) to
2727
support authentication, and encryption and decryption of data sent across the network between
2828
services.
29+
30+
31+
Metrics
32+
Copyright 2010-2013 Coda Hale and Yammer, Inc.
33+
34+
This product includes software developed by Coda Hale and Yammer, Inc.
35+
36+
This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64,
37+
LongAdder), which was released with the following comments:
38+
39+
Written by Doug Lea with assistance from members of JCP JSR-166
40+
Expert Group and released to the public domain, as explained at
41+
http://creativecommons.org/publicdomain/zero/1.0/

NOTICE-binary

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1515,3 +1515,16 @@ Copyright 2014-2017 The Apache Software Foundation
15151515

15161516
This product includes software developed at
15171517
The Apache Software Foundation (http://www.apache.org/).
1518+
1519+
1520+
Metrics
1521+
Copyright 2010-2013 Coda Hale and Yammer, Inc.
1522+
1523+
This product includes software developed by Coda Hale and Yammer, Inc.
1524+
1525+
This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64,
1526+
LongAdder), which was released with the following comments:
1527+
1528+
Written by Doug Lea with assistance from members of JCP JSR-166
1529+
Expert Group and released to the public domain, as explained at
1530+
http://creativecommons.org/publicdomain/zero/1.0/

R/pkg/.lintr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
linters: with_defaults(line_length_linter(100), multiple_dots_linter = NULL, object_name_linter = NULL, camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE))
1+
linters: with_defaults(line_length_linter(100), multiple_dots_linter = NULL, object_name_linter = NULL, camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE), object_usage_linter = NULL, cyclocomp_linter = NULL)
22
exclusions: list("inst/profile/general.R" = 1, "inst/profile/shell.R")

R/pkg/R/DataFrame.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2252,7 +2252,7 @@ setMethod("mutate",
22522252

22532253
# The last column of the same name in the specific columns takes effect
22542254
deDupCols <- list()
2255-
for (i in 1:length(cols)) {
2255+
for (i in seq_len(length(cols))) {
22562256
deDupCols[[ns[[i]]]] <- alias(cols[[i]], ns[[i]])
22572257
}
22582258

@@ -2416,7 +2416,7 @@ setMethod("arrange",
24162416
# builds a list of columns of type Column
24172417
# example: [[1]] Column Species ASC
24182418
# [[2]] Column Petal_Length DESC
2419-
jcols <- lapply(seq_len(length(decreasing)), function(i){
2419+
jcols <- lapply(seq_len(length(decreasing)), function(i) {
24202420
if (decreasing[[i]]) {
24212421
desc(getColumn(x, by[[i]]))
24222422
} else {
@@ -2749,7 +2749,7 @@ genAliasesForIntersectedCols <- function(x, intersectedColNames, suffix) {
27492749
col <- getColumn(x, colName)
27502750
if (colName %in% intersectedColNames) {
27512751
newJoin <- paste(colName, suffix, sep = "")
2752-
if (newJoin %in% allColNames){
2752+
if (newJoin %in% allColNames) {
27532753
stop("The following column name: ", newJoin, " occurs more than once in the 'DataFrame'.",
27542754
"Please use different suffixes for the intersected columns.")
27552755
}
@@ -3475,7 +3475,7 @@ setMethod("str",
34753475
cat(paste0("'", class(object), "': ", length(names), " variables:\n"))
34763476

34773477
if (nrow(localDF) > 0) {
3478-
for (i in 1 : ncol(localDF)) {
3478+
for (i in seq_len(ncol(localDF))) {
34793479
# Get the first elements for each column
34803480

34813481
firstElements <- if (types[i] == "character") {

R/pkg/R/SQLContext.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,9 @@ writeToFileInArrow <- function(fileName, rdf, numPartitions) {
166166
for (rdf_slice in rdf_slices) {
167167
batch <- arrow::record_batch(rdf_slice)
168168
if (is.null(stream_writer)) {
169-
stream <- arrow::FileOutputStream(fileName)
169+
stream <- arrow::FileOutputStream$create(fileName)
170170
schema <- batch$schema
171-
stream_writer <- arrow::RecordBatchStreamWriter(stream, schema)
171+
stream_writer <- arrow::RecordBatchStreamWriter$create(stream, schema)
172172
}
173173

174174
stream_writer$write_batch(batch)
@@ -197,7 +197,7 @@ getSchema <- function(schema, firstRow = NULL, rdd = NULL) {
197197
as.list(schema)
198198
}
199199
if (is.null(names)) {
200-
names <- lapply(1:length(firstRow), function(x) {
200+
names <- lapply(seq_len(length(firstRow)), function(x) {
201201
paste0("_", as.character(x))
202202
})
203203
}
@@ -213,7 +213,7 @@ getSchema <- function(schema, firstRow = NULL, rdd = NULL) {
213213
})
214214

215215
types <- lapply(firstRow, infer_type)
216-
fields <- lapply(1:length(firstRow), function(i) {
216+
fields <- lapply(seq_len(length(firstRow)), function(i) {
217217
structField(names[[i]], types[[i]], TRUE)
218218
})
219219
schema <- do.call(structType, fields)

R/pkg/R/context.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,7 @@ spark.getSparkFiles <- function(fileName) {
416416
#' @examples
417417
#'\dontrun{
418418
#' sparkR.session()
419-
#' doubled <- spark.lapply(1:10, function(x){2 * x})
419+
#' doubled <- spark.lapply(1:10, function(x) {2 * x})
420420
#'}
421421
#' @note spark.lapply since 2.0.0
422422
spark.lapply <- function(list, func) {

R/pkg/R/deserialize.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ readDeserializeInArrow <- function(inputCon) {
242242
# for now.
243243
dataLen <- readInt(inputCon)
244244
arrowData <- readBin(inputCon, raw(), as.integer(dataLen), endian = "big")
245-
batches <- arrow::RecordBatchStreamReader(arrowData)$batches()
245+
batches <- arrow::RecordBatchStreamReader$create(arrowData)$batches()
246246

247247
if (useAsTibble) {
248248
as_tibble <- get("as_tibble", envir = asNamespace("arrow"))

0 commit comments

Comments
 (0)