Skip to content

Commit 361bf02

Browse files
committed
Merge branch 'master' into SPARK-25827
2 parents ca3efd8 + 3ed91c9 commit 361bf02

File tree

134 files changed

+1129
-1184
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

134 files changed

+1129
-1184
lines changed

R/pkg/NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ exportMethods("%<=>%",
380380
"tanh",
381381
"toDegrees",
382382
"toRadians",
383+
"to_csv",
383384
"to_date",
384385
"to_json",
385386
"to_timestamp",

R/pkg/R/functions.R

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ NULL
187187
#' \itemize{
188188
#' \item \code{to_json}: it is the column containing the struct, array of the structs,
189189
#' the map or array of maps.
190+
#' \item \code{to_csv}: it is the column containing the struct.
190191
#' \item \code{from_json}: it is the column containing the JSON string.
191192
#' \item \code{from_csv}: it is the column containing the CSV string.
192193
#' }
@@ -204,11 +205,11 @@ NULL
204205
#' also supported for the schema.
205206
#' \item \code{from_csv}: a DDL-formatted string
206207
#' }
207-
#' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains
208-
#' additional named properties to control how it is converted, accepts the same
209-
#' options as the JSON data source. Additionally \code{to_json} supports the "pretty"
210-
#' option which enables pretty JSON generation. In \code{arrays_zip}, this contains
211-
#' additional Columns of arrays to be merged.
208+
#' @param ... additional argument(s). In \code{to_json}, \code{to_csv} and \code{from_json},
209+
#' this contains additional named properties to control how it is converted, accepts
210+
#' the same options as the JSON/CSV data source. Additionally \code{to_json} supports
211+
#' the "pretty" option which enables pretty JSON generation. In \code{arrays_zip},
212+
#' this contains additional Columns of arrays to be merged.
212213
#' @name column_collection_functions
213214
#' @rdname column_collection_functions
214215
#' @family collection functions
@@ -1740,6 +1741,26 @@ setMethod("to_json", signature(x = "Column"),
17401741
column(jc)
17411742
})
17421743

1744+
#' @details
1745+
#' \code{to_csv}: Converts a column containing a \code{structType} into a Column of CSV string.
1746+
#' Resolving the Column can fail if an unsupported type is encountered.
1747+
#'
1748+
#' @rdname column_collection_functions
1749+
#' @aliases to_csv to_csv,Column-method
1750+
#' @examples
1751+
#'
1752+
#' \dontrun{
1753+
#' # Converts a struct into a CSV string
1754+
#' df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
1755+
#' select(df2, to_csv(df2$d, dateFormat = 'dd/MM/yyyy'))}
1756+
#' @note to_csv since 3.0.0
1757+
setMethod("to_csv", signature(x = "Column"),
1758+
function(x, ...) {
1759+
options <- varargsToStrEnv(...)
1760+
jc <- callJStatic("org.apache.spark.sql.functions", "to_csv", x@jc, options)
1761+
column(jc)
1762+
})
1763+
17431764
#' @details
17441765
#' \code{to_timestamp}: Converts the column into a TimestampType. You may optionally specify
17451766
#' a format according to the rules in:

R/pkg/R/generics.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,6 +1303,10 @@ setGeneric("to_date", function(x, format) { standardGeneric("to_date") })
13031303
#' @name NULL
13041304
setGeneric("to_json", function(x, ...) { standardGeneric("to_json") })
13051305

1306+
#' @rdname column_collection_functions
1307+
#' @name NULL
1308+
setGeneric("to_csv", function(x, ...) { standardGeneric("to_csv") })
1309+
13061310
#' @rdname column_datetime_functions
13071311
#' @name NULL
13081312
setGeneric("to_timestamp", function(x, format) { standardGeneric("to_timestamp") })

R/pkg/tests/fulltests/test_sparkSQL.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1689,6 +1689,11 @@ test_that("column functions", {
16891689
expect_equal(arr$arrcol[[1]][[2]]$name, "Alice")
16901690
}
16911691

1692+
# Test to_csv()
1693+
df <- sql("SELECT named_struct('name', 'Bob') as people")
1694+
j <- collect(select(df, alias(to_csv(df$people), "csv")))
1695+
expect_equal(j[order(j$csv), ][1], "Bob")
1696+
16921697
# Test create_array() and create_map()
16931698
df <- as.DataFrame(data.frame(
16941699
x = c(1.0, 2.0), y = c(-1.0, 3.0), z = c(-2.0, 5.0)

bin/spark-shell

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ if [ -z "${SPARK_HOME}" ]; then
3232
source "$(dirname "$0")"/find-spark-home
3333
fi
3434

35-
export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options]"
35+
export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options]
36+
37+
Scala REPL options:
38+
-I <file> preload <file>, enforcing line-by-line interpretation"
3639

3740
# SPARK-4161: scala does not assume use of the java classpath,
3841
# so we need to add the "-Dscala.usejavacp=true" flag manually. We

bin/spark-shell2.cmd

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,13 @@ rem
2020
rem Figure out where the Spark framework is installed
2121
call "%~dp0find-spark-home.cmd"
2222

23-
set _SPARK_CMD_USAGE=Usage: .\bin\spark-shell.cmd [options]
23+
set LF=^
24+
25+
26+
rem two empty lines are required
27+
set _SPARK_CMD_USAGE=Usage: .\bin\spark-shell.cmd [options]^%LF%%LF%^%LF%%LF%^
28+
Scala REPL options:^%LF%%LF%^
29+
-I ^<file^> preload ^<file^>, enforcing line-by-line interpretation
2430

2531
rem SPARK-4161: scala does not assume use of the java classpath,
2632
rem so we need to add the "-Dscala.usejavacp=true" flag manually. We

core/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
</dependency>
5757
<dependency>
5858
<groupId>org.apache.xbean</groupId>
59-
<artifactId>xbean-asm6-shaded</artifactId>
59+
<artifactId>xbean-asm7-shaded</artifactId>
6060
</dependency>
6161
<dependency>
6262
<groupId>org.apache.hadoop</groupId>

core/src/main/java/org/apache/spark/ExecutorPlugin.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,18 @@
2020
import org.apache.spark.annotation.DeveloperApi;
2121

2222
/**
23-
* A plugin which can be automaticaly instantiated within each Spark executor. Users can specify
23+
* A plugin which can be automatically instantiated within each Spark executor. Users can specify
2424
* plugins which should be created with the "spark.executor.plugins" configuration. An instance
2525
* of each plugin will be created for every executor, including those created by dynamic allocation,
2626
* before the executor starts running any tasks.
2727
*
2828
* The specific api exposed to the end users still considered to be very unstable. We will
29-
* hopefully be able to keep compatability by providing default implementations for any methods
29+
* hopefully be able to keep compatibility by providing default implementations for any methods
3030
* added, but make no guarantees this will always be possible across all Spark releases.
3131
*
3232
* Spark does nothing to verify the plugin is doing legitimate things, or to manage the resources
3333
* it uses. A plugin acquires the same privileges as the user running the task. A bad plugin
34-
* could also intefere with task execution and make the executor fail in unexpected ways.
34+
* could also interfere with task execution and make the executor fail in unexpected ways.
3535
*/
3636
@DeveloperApi
3737
public interface ExecutorPlugin {

core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillWriter.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,10 @@ public final class UnsafeSorterSpillWriter {
4242

4343
private final SparkConf conf = new SparkConf();
4444

45-
/** The buffer size to use when writing the sorted records to an on-disk file */
45+
/**
46+
* The buffer size to use when writing the sorted records to an on-disk file, and
47+
* this space used by prefix + len + recordLength must be greater than 4 + 8 bytes.
48+
*/
4649
private final int diskWriteBufferSize =
4750
(int) (long) conf.get(package$.MODULE$.SHUFFLE_DISK_WRITE_BUFFER_SIZE());
4851

core/src/main/scala/org/apache/spark/BarrierTaskContext.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,14 @@ import org.apache.spark.util._
4141
class BarrierTaskContext private[spark] (
4242
taskContext: TaskContext) extends TaskContext with Logging {
4343

44+
import BarrierTaskContext._
45+
4446
// Find the driver side RPCEndpointRef of the coordinator that handles all the barrier() calls.
4547
private val barrierCoordinator: RpcEndpointRef = {
4648
val env = SparkEnv.get
4749
RpcUtils.makeDriverRef("barrierSync", env.conf, env.rpcEnv)
4850
}
4951

50-
private val timer = new Timer("Barrier task timer for barrier() calls.")
51-
5252
// Local barrierEpoch that identify a barrier() call from current task, it shall be identical
5353
// with the driver side epoch.
5454
private var barrierEpoch = 0
@@ -234,4 +234,7 @@ object BarrierTaskContext {
234234
@Experimental
235235
@Since("2.4.0")
236236
def get(): BarrierTaskContext = TaskContext.get().asInstanceOf[BarrierTaskContext]
237+
238+
private val timer = new Timer("Barrier task timer for barrier() calls.")
239+
237240
}

0 commit comments

Comments
 (0)