Skip to content

Commit 2e7f31c

Browse files
committed
Merge remote-tracking branch 'upstream/master' into SPARK-23710-hadoop3
2 parents 78825a7 + 1d95dea commit 2e7f31c

File tree

287 files changed

+4291
-1577
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

287 files changed

+4291
-1577
lines changed

LICENSE-binary

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -209,34 +209,34 @@ org.apache.zookeeper:zookeeper
209209
oro:oro
210210
commons-configuration:commons-configuration
211211
commons-digester:commons-digester
212-
com.chuusai:shapeless_2.11
212+
com.chuusai:shapeless_2.12
213213
com.googlecode.javaewah:JavaEWAH
214214
com.twitter:chill-java
215-
com.twitter:chill_2.11
215+
com.twitter:chill_2.12
216216
com.univocity:univocity-parsers
217217
javax.jdo:jdo-api
218218
joda-time:joda-time
219219
net.sf.opencsv:opencsv
220220
org.apache.derby:derby
221221
org.objenesis:objenesis
222222
org.roaringbitmap:RoaringBitmap
223-
org.scalanlp:breeze-macros_2.11
224-
org.scalanlp:breeze_2.11
225-
org.typelevel:macro-compat_2.11
223+
org.scalanlp:breeze-macros_2.12
224+
org.scalanlp:breeze_2.12
225+
org.typelevel:macro-compat_2.12
226226
org.yaml:snakeyaml
227227
org.apache.xbean:xbean-asm5-shaded
228228
com.squareup.okhttp3:logging-interceptor
229229
com.squareup.okhttp3:okhttp
230230
com.squareup.okio:okio
231-
org.apache.spark:spark-catalyst_2.11
232-
org.apache.spark:spark-kvstore_2.11
233-
org.apache.spark:spark-launcher_2.11
234-
org.apache.spark:spark-mllib-local_2.11
235-
org.apache.spark:spark-network-common_2.11
236-
org.apache.spark:spark-network-shuffle_2.11
237-
org.apache.spark:spark-sketch_2.11
238-
org.apache.spark:spark-tags_2.11
239-
org.apache.spark:spark-unsafe_2.11
231+
org.apache.spark:spark-catalyst_2.12
232+
org.apache.spark:spark-kvstore_2.12
233+
org.apache.spark:spark-launcher_2.12
234+
org.apache.spark:spark-mllib-local_2.12
235+
org.apache.spark:spark-network-common_2.12
236+
org.apache.spark:spark-network-shuffle_2.12
237+
org.apache.spark:spark-sketch_2.12
238+
org.apache.spark:spark-tags_2.12
239+
org.apache.spark:spark-unsafe_2.12
240240
commons-httpclient:commons-httpclient
241241
com.vlkan:flatbuffers
242242
com.ning:compress-lzf
@@ -284,18 +284,18 @@ org.apache.orc:orc-mapreduce
284284
org.mortbay.jetty:jetty
285285
org.mortbay.jetty:jetty-util
286286
com.jolbox:bonecp
287-
org.json4s:json4s-ast_2.11
288-
org.json4s:json4s-core_2.11
289-
org.json4s:json4s-jackson_2.11
290-
org.json4s:json4s-scalap_2.11
287+
org.json4s:json4s-ast_2.12
288+
org.json4s:json4s-core_2.12
289+
org.json4s:json4s-jackson_2.12
290+
org.json4s:json4s-scalap_2.12
291291
com.carrotsearch:hppc
292292
com.fasterxml.jackson.core:jackson-annotations
293293
com.fasterxml.jackson.core:jackson-core
294294
com.fasterxml.jackson.core:jackson-databind
295295
com.fasterxml.jackson.dataformat:jackson-dataformat-yaml
296296
com.fasterxml.jackson.module:jackson-module-jaxb-annotations
297297
com.fasterxml.jackson.module:jackson-module-paranamer
298-
com.fasterxml.jackson.module:jackson-module-scala_2.11
298+
com.fasterxml.jackson.module:jackson-module-scala_2.12
299299
com.github.mifmif:generex
300300
com.google.code.findbugs:jsr305
301301
com.google.code.gson:gson
@@ -412,8 +412,8 @@ com.thoughtworks.paranamer:paranamer
412412
org.scala-lang:scala-compiler
413413
org.scala-lang:scala-library
414414
org.scala-lang:scala-reflect
415-
org.scala-lang.modules:scala-parser-combinators_2.11
416-
org.scala-lang.modules:scala-xml_2.11
415+
org.scala-lang.modules:scala-parser-combinators_2.12
416+
org.scala-lang.modules:scala-xml_2.12
417417
org.fusesource.leveldbjni:leveldbjni-all
418418
net.sourceforge.f2j:arpack_combined_all
419419
xmlenc:xmlenc
@@ -434,15 +434,15 @@ is distributed under the 3-Clause BSD license.
434434
MIT License
435435
-----------
436436

437-
org.spire-math:spire-macros_2.11
438-
org.spire-math:spire_2.11
439-
org.typelevel:machinist_2.11
437+
org.spire-math:spire-macros_2.12
438+
org.spire-math:spire_2.12
439+
org.typelevel:machinist_2.12
440440
net.razorvine:pyrolite
441441
org.slf4j:jcl-over-slf4j
442442
org.slf4j:jul-to-slf4j
443443
org.slf4j:slf4j-api
444444
org.slf4j:slf4j-log4j12
445-
com.github.scopt:scopt_2.11
445+
com.github.scopt:scopt_2.12
446446

447447
core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
448448
core/src/main/resources/org/apache/spark/ui/static/*dataTables*

R/CRAN_RELEASE.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,21 @@
1+
---
2+
license: |
3+
Licensed to the Apache Software Foundation (ASF) under one or more
4+
contributor license agreements. See the NOTICE file distributed with
5+
this work for additional information regarding copyright ownership.
6+
The ASF licenses this file to You under the Apache License, Version 2.0
7+
(the "License"); you may not use this file except in compliance with
8+
the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
---
18+
119
# SparkR CRAN Release
220

321
To release SparkR as a package to CRAN, we would use the `devtools` package. Please work with the

R/DOCUMENTATION.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,21 @@
1+
---
2+
license: |
3+
Licensed to the Apache Software Foundation (ASF) under one or more
4+
contributor license agreements. See the NOTICE file distributed with
5+
this work for additional information regarding copyright ownership.
6+
The ASF licenses this file to You under the Apache License, Version 2.0
7+
(the "License"); you may not use this file except in compliance with
8+
the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
---
18+
119
# SparkR Documentation
220

321
SparkR documentation is generated by using in-source comments and annotated by using

R/WINDOWS.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,21 @@
1+
---
2+
license: |
3+
Licensed to the Apache Software Foundation (ASF) under one or more
4+
contributor license agreements. See the NOTICE file distributed with
5+
this work for additional information regarding copyright ownership.
6+
The ASF licenses this file to You under the Apache License, Version 2.0
7+
(the "License"); you may not use this file except in compliance with
8+
the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
---
18+
119
## Building SparkR on Windows
220

321
To build SparkR on Windows, the following steps are required

R/pkg/DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: SparkR
22
Type: Package
33
Version: 3.0.0
4-
Title: R Front end for 'Apache Spark'
4+
Title: R Front End for 'Apache Spark'
55
Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
66
Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
77
email = "[email protected]"),

R/pkg/R/DataFrame.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2143,6 +2143,11 @@ setMethod("selectExpr",
21432143
#' Return a new SparkDataFrame by adding a column or replacing the existing column
21442144
#' that has the same name.
21452145
#'
2146+
#' Note: This method introduces a projection internally. Therefore, calling it multiple times,
2147+
#' for instance, via loops in order to add multiple columns can generate big plans which
2148+
#' can cause performance issues and even \code{StackOverflowException}. To avoid this,
2149+
#' use \code{select} with the multiple columns at once.
2150+
#'
21462151
#' @param x a SparkDataFrame.
21472152
#' @param colName a column name.
21482153
#' @param col a Column expression (which must refer only to this SparkDataFrame), or an atomic

R/pkg/R/functions.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2459,6 +2459,7 @@ setMethod("schema_of_csv", signature(x = "characterOrColumn"),
24592459
#' @note from_utc_timestamp since 1.5.0
24602460
setMethod("from_utc_timestamp", signature(y = "Column", x = "character"),
24612461
function(y, x) {
2462+
.Deprecated(msg = "from_utc_timestamp is deprecated. See SPARK-25496.")
24622463
jc <- callJStatic("org.apache.spark.sql.functions", "from_utc_timestamp", y@jc, x)
24632464
column(jc)
24642465
})
@@ -2517,6 +2518,7 @@ setMethod("next_day", signature(y = "Column", x = "character"),
25172518
#' @note to_utc_timestamp since 1.5.0
25182519
setMethod("to_utc_timestamp", signature(y = "Column", x = "character"),
25192520
function(y, x) {
2521+
.Deprecated(msg = "to_utc_timestamp is deprecated. See SPARK-25496.")
25202522
jc <- callJStatic("org.apache.spark.sql.functions", "to_utc_timestamp", y@jc, x)
25212523
column(jc)
25222524
})

R/pkg/tests/fulltests/test_sparkSQL.R

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1905,10 +1905,20 @@ test_that("date functions on a DataFrame", {
19051905
df2 <- createDataFrame(l2)
19061906
expect_equal(collect(select(df2, minute(df2$b)))[, 1], c(34, 24))
19071907
expect_equal(collect(select(df2, second(df2$b)))[, 1], c(0, 34))
1908-
expect_equal(collect(select(df2, from_utc_timestamp(df2$b, "JST")))[, 1],
1909-
c(as.POSIXct("2012-12-13 21:34:00 UTC"), as.POSIXct("2014-12-15 10:24:34 UTC")))
1910-
expect_equal(collect(select(df2, to_utc_timestamp(df2$b, "JST")))[, 1],
1911-
c(as.POSIXct("2012-12-13 03:34:00 UTC"), as.POSIXct("2014-12-14 16:24:34 UTC")))
1908+
conf <- callJMethod(sparkSession, "conf")
1909+
isUtcTimestampFuncEnabled <- callJMethod(conf, "get", "spark.sql.legacy.utcTimestampFunc.enabled")
1910+
callJMethod(conf, "set", "spark.sql.legacy.utcTimestampFunc.enabled", "true")
1911+
tryCatch({
1912+
# Both from_utc_timestamp and to_utc_timestamp are deprecated as of SPARK-25496
1913+
expect_equal(suppressWarnings(collect(select(df2, from_utc_timestamp(df2$b, "JST"))))[, 1],
1914+
c(as.POSIXct("2012-12-13 21:34:00 UTC"), as.POSIXct("2014-12-15 10:24:34 UTC")))
1915+
expect_equal(suppressWarnings(collect(select(df2, to_utc_timestamp(df2$b, "JST"))))[, 1],
1916+
c(as.POSIXct("2012-12-13 03:34:00 UTC"), as.POSIXct("2014-12-14 16:24:34 UTC")))
1917+
},
1918+
finally = {
1919+
# Reverting the conf back
1920+
callJMethod(conf, "set", "spark.sql.legacy.utcTimestampFunc.enabled", isUtcTimestampFuncEnabled)
1921+
})
19121922
expect_gt(collect(select(df2, unix_timestamp()))[1, 1], 0)
19131923
expect_gt(collect(select(df2, unix_timestamp(df2$b)))[1, 1], 0)
19141924
expect_gt(collect(select(df2, unix_timestamp(lit("2015-01-01"), "yyyy-MM-dd")))[1, 1], 0)

core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -573,19 +573,31 @@ public boolean hasNext() {
573573

574574
@Override
575575
public void loadNext() throws IOException {
576-
synchronized (this) {
577-
loaded = true;
578-
if (nextUpstream != null) {
579-
// Just consumed the last record from in memory iterator
580-
if (lastPage != null) {
581-
freePage(lastPage);
582-
lastPage = null;
576+
MemoryBlock pageToFree = null;
577+
try {
578+
synchronized (this) {
579+
loaded = true;
580+
if (nextUpstream != null) {
581+
// Just consumed the last record from in memory iterator
582+
if(lastPage != null) {
583+
// Do not free the page here, while we are locking `SpillableIterator`. The `freePage`
584+
// method locks the `TaskMemoryManager`, and it's a bad idea to lock 2 objects in
585+
// sequence. We may hit dead lock if another thread locks `TaskMemoryManager` and
586+
// `SpillableIterator` in sequence, which may happen in
587+
// `TaskMemoryManager.acquireExecutionMemory`.
588+
pageToFree = lastPage;
589+
lastPage = null;
590+
}
591+
upstream = nextUpstream;
592+
nextUpstream = null;
583593
}
584-
upstream = nextUpstream;
585-
nextUpstream = null;
594+
numRecords--;
595+
upstream.loadNext();
596+
}
597+
} finally {
598+
if (pageToFree != null) {
599+
freePage(pageToFree);
586600
}
587-
numRecords--;
588-
upstream.loadNext();
589601
}
590602
}
591603

core/src/main/scala/org/apache/spark/BarrierCoordinator.scala

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.spark
1919

2020
import java.util.{Timer, TimerTask}
2121
import java.util.concurrent.ConcurrentHashMap
22-
import java.util.function.{Consumer, Function}
22+
import java.util.function.Consumer
2323

2424
import scala.collection.mutable.ArrayBuffer
2525

@@ -202,10 +202,8 @@ private[spark] class BarrierCoordinator(
202202
case request @ RequestToSync(numTasks, stageId, stageAttemptId, _, _) =>
203203
// Get or init the ContextBarrierState correspond to the stage attempt.
204204
val barrierId = ContextBarrierId(stageId, stageAttemptId)
205-
states.computeIfAbsent(barrierId, new Function[ContextBarrierId, ContextBarrierState] {
206-
override def apply(key: ContextBarrierId): ContextBarrierState =
207-
new ContextBarrierState(key, numTasks)
208-
})
205+
states.computeIfAbsent(barrierId,
206+
(key: ContextBarrierId) => new ContextBarrierState(key, numTasks))
209207
val barrierState = states.get(barrierId)
210208

211209
barrierState.handleRequest(context, request)

0 commit comments

Comments
 (0)