Skip to content

Commit 43cbef6

Browse files
committed
Merge remote-tracking branch 'origin/master' into modularize-jdbc-internals
2 parents fca548a + 2f9c273 commit 43cbef6

File tree

318 files changed

+6674
-2526
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

318 files changed

+6674
-2526
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
/lib/
2424
R-unit-tests.log
2525
R/unit-tests.out
26+
R/cran-check.out
2627
build/*.jar
2728
build/apache-maven*
2829
build/scala*

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ notifications:
4444
# 5. Run maven install before running lint-java.
4545
install:
4646
- export MAVEN_SKIP_RC=1
47-
- build/mvn -T 4 -q -DskipTests -Pyarn -Phadoop-2.3 -Pkinesis-asl -Phive -Phive-thriftserver install
47+
- build/mvn -T 4 -q -DskipTests -Pmesos -Pyarn -Phadoop-2.3 -Pkinesis-asl -Phive -Phive-thriftserver install
4848

4949
# 6. Run lint-java.
5050
script:

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
263263
(New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
264264
(The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
265265
(The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
266-
(The New BSD License) Py4J (net.sf.py4j:py4j:0.10.1 - http://py4j.sourceforge.net/)
266+
(The New BSD License) Py4J (net.sf.py4j:py4j:0.10.3 - http://py4j.sourceforge.net/)
267267
(Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
268268
(BSD licence) sbt and sbt-launch-lib.bash
269269
(BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE)

R/WINDOWS.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,23 @@ To build SparkR on Windows, the following steps are required
44

55
1. Install R (>= 3.1) and [Rtools](http://cran.r-project.org/bin/windows/Rtools/). Make sure to
66
include Rtools and R in `PATH`.
7+
78
2. Install
89
[JDK7](http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html) and set
910
`JAVA_HOME` in the system environment variables.
11+
1012
3. Download and install [Maven](http://maven.apache.org/download.html). Also include the `bin`
1113
directory in Maven in `PATH`.
14+
1215
4. Set `MAVEN_OPTS` as described in [Building Spark](http://spark.apache.org/docs/latest/building-spark.html).
13-
5. Open a command shell (`cmd`) in the Spark directory and run `mvn -DskipTests -Psparkr package`
16+
17+
5. Open a command shell (`cmd`) in the Spark directory and build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
18+
19+
```bash
20+
mvn.cmd -DskipTests -Psparkr package
21+
```
22+
23+
`.\build\mvn` is a shell script so `mvn.cmd` should be used directly on Windows.
1424

1525
## Unit tests
1626

R/pkg/DESCRIPTION

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ Package: SparkR
22
Type: Package
33
Title: R Frontend for Apache Spark
44
Version: 2.0.0
5-
Date: 2016-07-07
5+
Date: 2016-08-27
66
Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
77
email = "[email protected]"),
88
person("Xiangrui", "Meng", role = "aut",
@@ -11,7 +11,7 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
1111
email = "[email protected]"),
1212
person(family = "The Apache Software Foundation", role = c("aut", "cph")))
1313
URL: http://www.apache.org/ http://spark.apache.org/
14-
BugReports: https://issues.apache.org/jira/secure/CreateIssueDetails!init.jspa?pid=12315420&components=12325400&issuetype=4
14+
BugReports: https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-ContributingBugReports
1515
Depends:
1616
R (>= 3.0),
1717
methods
@@ -39,6 +39,7 @@ Collate:
3939
'deserialize.R'
4040
'functions.R'
4141
'install.R'
42+
'jvm.R'
4243
'mllib.R'
4344
'serialize.R'
4445
'sparkR.R'

R/pkg/NAMESPACE

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ exportMethods("glm",
2727
"summary",
2828
"spark.kmeans",
2929
"fitted",
30+
"spark.mlp",
3031
"spark.naiveBayes",
3132
"spark.survreg",
3233
"spark.lda",
@@ -363,4 +364,8 @@ S3method(structField, jobj)
363364
S3method(structType, jobj)
364365
S3method(structType, structField)
365366

367+
export("sparkR.newJObject")
368+
export("sparkR.callJMethod")
369+
export("sparkR.callJStatic")
370+
366371
export("install.spark")

R/pkg/R/DataFrame.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,9 +212,9 @@ setMethod("showDF",
212212

213213
#' show
214214
#'
215-
#' Print the SparkDataFrame column names and types
215+
#' Print class and type information of a Spark object.
216216
#'
217-
#' @param object a SparkDataFrame.
217+
#' @param object a Spark object. Can be a SparkDataFrame, Column, GroupedData, WindowSpec.
218218
#'
219219
#' @family SparkDataFrame functions
220220
#' @rdname show

R/pkg/R/WindowSpec.R

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,18 @@ setMethod("rangeBetween",
203203
#' @aliases over,Column,WindowSpec-method
204204
#' @family colum_func
205205
#' @export
206+
#' @examples \dontrun{
207+
#' df <- createDataFrame(mtcars)
208+
#'
209+
#' # Partition by am (transmission) and order by hp (horsepower)
210+
#' ws <- orderBy(windowPartitionBy("am"), "hp")
211+
#'
212+
#' # Rank on hp within each partition
213+
#' out <- select(df, over(rank(), ws), df$hp, df$am)
214+
#'
215+
#' # Lag mpg values by 1 row on the partition-and-ordered table
216+
#' out <- select(df, over(lead(df$mpg), ws), df$mpg, df$hp, df$am)
217+
#' }
206218
#' @note over since 2.0.0
207219
setMethod("over",
208220
signature(x = "Column", window = "WindowSpec"),

R/pkg/R/functions.R

Lines changed: 60 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3121,9 +3121,9 @@ setMethod("ifelse",
31213121
#' @aliases cume_dist,missing-method
31223122
#' @export
31233123
#' @examples \dontrun{
3124-
#' df <- createDataFrame(iris)
3125-
#' ws <- orderBy(windowPartitionBy("Species"), "Sepal_Length")
3126-
#' out <- select(df, over(cume_dist(), ws), df$Sepal_Length, df$Species)
3124+
#' df <- createDataFrame(mtcars)
3125+
#' ws <- orderBy(windowPartitionBy("am"), "hp")
3126+
#' out <- select(df, over(cume_dist(), ws), df$hp, df$am)
31273127
#' }
31283128
#' @note cume_dist since 1.6.0
31293129
setMethod("cume_dist",
@@ -3148,7 +3148,11 @@ setMethod("cume_dist",
31483148
#' @family window_funcs
31493149
#' @aliases dense_rank,missing-method
31503150
#' @export
3151-
#' @examples \dontrun{dense_rank()}
3151+
#' @examples \dontrun{
3152+
#' df <- createDataFrame(mtcars)
3153+
#' ws <- orderBy(windowPartitionBy("am"), "hp")
3154+
#' out <- select(df, over(dense_rank(), ws), df$hp, df$am)
3155+
#' }
31523156
#' @note dense_rank since 1.6.0
31533157
setMethod("dense_rank",
31543158
signature("missing"),
@@ -3168,18 +3172,26 @@ setMethod("dense_rank",
31683172
#' @param x the column as a character string or a Column to compute on.
31693173
#' @param offset the number of rows back from the current row from which to obtain a value.
31703174
#' If not specified, the default is 1.
3171-
#' @param defaultValue default to use when the offset row does not exist.
3175+
#' @param defaultValue (optional) default to use when the offset row does not exist.
31723176
#' @param ... further arguments to be passed to or from other methods.
31733177
#' @rdname lag
31743178
#' @name lag
31753179
#' @aliases lag,characterOrColumn-method
31763180
#' @family window_funcs
31773181
#' @export
3178-
#' @examples \dontrun{lag(df$c)}
3182+
#' @examples \dontrun{
3183+
#' df <- createDataFrame(mtcars)
3184+
#'
3185+
#' # Partition by am (transmission) and order by hp (horsepower)
3186+
#' ws <- orderBy(windowPartitionBy("am"), "hp")
3187+
#'
3188+
#' # Lag mpg values by 1 row on the partition-and-ordered table
3189+
#' out <- select(df, over(lag(df$mpg), ws), df$mpg, df$hp, df$am)
3190+
#' }
31793191
#' @note lag since 1.6.0
31803192
setMethod("lag",
31813193
signature(x = "characterOrColumn"),
3182-
function(x, offset, defaultValue = NULL) {
3194+
function(x, offset = 1, defaultValue = NULL) {
31833195
col <- if (class(x) == "Column") {
31843196
x@jc
31853197
} else {
@@ -3194,25 +3206,35 @@ setMethod("lag",
31943206
#' lead
31953207
#'
31963208
#' Window function: returns the value that is \code{offset} rows after the current row, and
3197-
#' NULL if there is less than \code{offset} rows after the current row. For example,
3198-
#' an \code{offset} of one will return the next row at any given point in the window partition.
3209+
#' \code{defaultValue} if there is less than \code{offset} rows after the current row.
3210+
#' For example, an \code{offset} of one will return the next row at any given point
3211+
#' in the window partition.
31993212
#'
32003213
#' This is equivalent to the \code{LEAD} function in SQL.
32013214
#'
3202-
#' @param x Column to compute on
3203-
#' @param offset Number of rows to offset
3204-
#' @param defaultValue (Optional) default value to use
3215+
#' @param x the column as a character string or a Column to compute on.
3216+
#' @param offset the number of rows after the current row from which to obtain a value.
3217+
#' If not specified, the default is 1.
3218+
#' @param defaultValue (optional) default to use when the offset row does not exist.
32053219
#'
32063220
#' @rdname lead
32073221
#' @name lead
32083222
#' @family window_funcs
32093223
#' @aliases lead,characterOrColumn,numeric-method
32103224
#' @export
3211-
#' @examples \dontrun{lead(df$c)}
3225+
#' @examples \dontrun{
3226+
#' df <- createDataFrame(mtcars)
3227+
#'
3228+
#' # Partition by am (transmission) and order by hp (horsepower)
3229+
#' ws <- orderBy(windowPartitionBy("am"), "hp")
3230+
#'
3231+
#' # Lead mpg values by 1 row on the partition-and-ordered table
3232+
#' out <- select(df, over(lead(df$mpg), ws), df$mpg, df$hp, df$am)
3233+
#' }
32123234
#' @note lead since 1.6.0
32133235
setMethod("lead",
32143236
signature(x = "characterOrColumn", offset = "numeric", defaultValue = "ANY"),
3215-
function(x, offset, defaultValue = NULL) {
3237+
function(x, offset = 1, defaultValue = NULL) {
32163238
col <- if (class(x) == "Column") {
32173239
x@jc
32183240
} else {
@@ -3239,7 +3261,15 @@ setMethod("lead",
32393261
#' @aliases ntile,numeric-method
32403262
#' @family window_funcs
32413263
#' @export
3242-
#' @examples \dontrun{ntile(1)}
3264+
#' @examples \dontrun{
3265+
#' df <- createDataFrame(mtcars)
3266+
#'
3267+
#' # Partition by am (transmission) and order by hp (horsepower)
3268+
#' ws <- orderBy(windowPartitionBy("am"), "hp")
3269+
#'
3270+
#' # Get ntile group id (1-4) for hp
3271+
#' out <- select(df, over(ntile(4), ws), df$hp, df$am)
3272+
#' }
32433273
#' @note ntile since 1.6.0
32443274
setMethod("ntile",
32453275
signature(x = "numeric"),
@@ -3263,7 +3293,11 @@ setMethod("ntile",
32633293
#' @family window_funcs
32643294
#' @aliases percent_rank,missing-method
32653295
#' @export
3266-
#' @examples \dontrun{percent_rank()}
3296+
#' @examples \dontrun{
3297+
#' df <- createDataFrame(mtcars)
3298+
#' ws <- orderBy(windowPartitionBy("am"), "hp")
3299+
#' out <- select(df, over(percent_rank(), ws), df$hp, df$am)
3300+
#' }
32673301
#' @note percent_rank since 1.6.0
32683302
setMethod("percent_rank",
32693303
signature("missing"),
@@ -3288,7 +3322,11 @@ setMethod("percent_rank",
32883322
#' @family window_funcs
32893323
#' @aliases rank,missing-method
32903324
#' @export
3291-
#' @examples \dontrun{rank()}
3325+
#' @examples \dontrun{
3326+
#' df <- createDataFrame(mtcars)
3327+
#' ws <- orderBy(windowPartitionBy("am"), "hp")
3328+
#' out <- select(df, over(rank(), ws), df$hp, df$am)
3329+
#' }
32923330
#' @note rank since 1.6.0
32933331
setMethod("rank",
32943332
signature(x = "missing"),
@@ -3321,7 +3359,11 @@ setMethod("rank",
33213359
#' @aliases row_number,missing-method
33223360
#' @family window_funcs
33233361
#' @export
3324-
#' @examples \dontrun{row_number()}
3362+
#' @examples \dontrun{
3363+
#' df <- createDataFrame(mtcars)
3364+
#' ws <- orderBy(windowPartitionBy("am"), "hp")
3365+
#' out <- select(df, over(row_number(), ws), df$hp, df$am)
3366+
#' }
33253367
#' @note row_number since 1.6.0
33263368
setMethod("row_number",
33273369
signature("missing"),

R/pkg/R/generics.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,6 +1330,10 @@ setGeneric("spark.kmeans", function(data, formula, ...) { standardGeneric("spark
13301330
#' @export
13311331
setGeneric("fitted")
13321332

1333+
#' @rdname spark.mlp
1334+
#' @export
1335+
setGeneric("spark.mlp", function(data, ...) { standardGeneric("spark.mlp") })
1336+
13331337
#' @rdname spark.naiveBayes
13341338
#' @export
13351339
setGeneric("spark.naiveBayes", function(data, formula, ...) { standardGeneric("spark.naiveBayes") })

0 commit comments

Comments
 (0)