Skip to content

Commit 857ba4d

Browse files
committed
Resolve conflicts
2 parents b595b7f + 3323d0f commit 857ba4d

File tree

552 files changed

+12514
-6064
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

552 files changed

+12514
-6064
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ metastore/
7272
metastore_db/
7373
sql/hive-thriftserver/test_warehouses
7474
warehouse/
75+
spark-warehouse/
7576

7677
# For R session data
7778
.RData

NOTICE

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ Common Development and Distribution License 1.0
1212
The following components are provided under the Common Development and Distribution License 1.0. See project link for details.
1313

1414
(CDDL 1.0) Glassfish Jasper (org.mortbay.jetty:jsp-2.1:6.1.14 - http://jetty.mortbay.org/project/modules/jsp-2.1)
15+
(CDDL 1.0) JAX-RS (https://jax-rs-spec.java.net/)
1516
(CDDL 1.0) Servlet Specification 2.5 API (org.mortbay.jetty:servlet-api-2.5:6.1.14 - http://jetty.mortbay.org/project/modules/servlet-api-2.5)
17+
(CDDL 1.0) (GPL2 w/ CPE) javax.annotation API (https://glassfish.java.net/nonav/public/CDDL+GPL.html)
1618
(COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0) (GNU General Public Library) Streaming API for XML (javax.xml.stream:stax-api:1.0-2 - no url defined)
1719
(Common Development and Distribution License (CDDL) v1.0) JavaBeans Activation Framework (JAF) (javax.activation:activation:1.1 - http://java.sun.com/products/javabeans/jaf/index.jsp)
1820

@@ -22,15 +24,10 @@ Common Development and Distribution License 1.1
2224

2325
The following components are provided under the Common Development and Distribution License 1.1. See project link for details.
2426

27+
(CDDL 1.1) (GPL2 w/ CPE) org.glassfish.hk2 (https://hk2.java.net)
2528
(CDDL 1.1) (GPL2 w/ CPE) JAXB API bundle for GlassFish V3 (javax.xml.bind:jaxb-api:2.2.2 - https://jaxb.dev.java.net/)
2629
(CDDL 1.1) (GPL2 w/ CPE) JAXB RI (com.sun.xml.bind:jaxb-impl:2.2.3-1 - http://jaxb.java.net/)
27-
(CDDL 1.1) (GPL2 w/ CPE) jersey-core (com.sun.jersey:jersey-core:1.8 - https://jersey.dev.java.net/jersey-core/)
28-
(CDDL 1.1) (GPL2 w/ CPE) jersey-core (com.sun.jersey:jersey-core:1.9 - https://jersey.java.net/jersey-core/)
29-
(CDDL 1.1) (GPL2 w/ CPE) jersey-guice (com.sun.jersey.contribs:jersey-guice:1.9 - https://jersey.java.net/jersey-contribs/jersey-guice/)
30-
(CDDL 1.1) (GPL2 w/ CPE) jersey-json (com.sun.jersey:jersey-json:1.8 - https://jersey.dev.java.net/jersey-json/)
31-
(CDDL 1.1) (GPL2 w/ CPE) jersey-json (com.sun.jersey:jersey-json:1.9 - https://jersey.java.net/jersey-json/)
32-
(CDDL 1.1) (GPL2 w/ CPE) jersey-server (com.sun.jersey:jersey-server:1.8 - https://jersey.dev.java.net/jersey-server/)
33-
(CDDL 1.1) (GPL2 w/ CPE) jersey-server (com.sun.jersey:jersey-server:1.9 - https://jersey.java.net/jersey-server/)
30+
(CDDL 1.1) (GPL2 w/ CPE) Jersey 2 (https://jersey.java.net)
3431

3532
========================================================================
3633
Common Public License 1.0

R/pkg/DESCRIPTION

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ Collate:
2626
'pairRDD.R'
2727
'DataFrame.R'
2828
'SQLContext.R'
29+
'WindowSpec.R'
2930
'backend.R'
3031
'broadcast.R'
3132
'client.R'
@@ -38,4 +39,5 @@ Collate:
3839
'stats.R'
3940
'types.R'
4041
'utils.R'
42+
'window.R'
4143
RoxygenNote: 5.0.1

R/pkg/NAMESPACE

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ exportMethods("%in%",
216216
"next_day",
217217
"ntile",
218218
"otherwise",
219+
"over",
219220
"percent_rank",
220221
"pmod",
221222
"quarter",
@@ -315,3 +316,12 @@ export("structField",
315316
"structType.jobj",
316317
"structType.structField",
317318
"print.structType")
319+
320+
exportClasses("WindowSpec")
321+
322+
export("partitionBy",
323+
"rowsBetween",
324+
"rangeBetween")
325+
326+
export("window.partitionBy",
327+
"window.orderBy")

R/pkg/R/DataFrame.R

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -570,10 +570,17 @@ setMethod("unpersist",
570570

571571
#' Repartition
572572
#'
573-
#' Return a new SparkDataFrame that has exactly numPartitions partitions.
574-
#'
573+
#' The following options for repartition are possible:
574+
#' \itemize{
575+
#' \item{"Option 1"} {Return a new SparkDataFrame partitioned by
576+
#' the given columns into `numPartitions`.}
577+
#' \item{"Option 2"} {Return a new SparkDataFrame that has exactly `numPartitions`.}
578+
#' \item{"Option 3"} {Return a new SparkDataFrame partitioned by the given column(s),
579+
#' using `spark.sql.shuffle.partitions` as number of partitions.}
580+
#'}
575581
#' @param x A SparkDataFrame
576582
#' @param numPartitions The number of partitions to use.
583+
#' @param col The column by which the partitioning will be performed.
577584
#'
578585
#' @family SparkDataFrame functions
579586
#' @rdname repartition
@@ -586,11 +593,31 @@ setMethod("unpersist",
586593
#' path <- "path/to/file.json"
587594
#' df <- read.json(sqlContext, path)
588595
#' newDF <- repartition(df, 2L)
596+
#' newDF <- repartition(df, numPartitions = 2L)
597+
#' newDF <- repartition(df, col = df$"col1", df$"col2")
598+
#' newDF <- repartition(df, 3L, col = df$"col1", df$"col2")
589599
#'}
590600
setMethod("repartition",
591-
signature(x = "SparkDataFrame", numPartitions = "numeric"),
592-
function(x, numPartitions) {
593-
sdf <- callJMethod(x@sdf, "repartition", numToInt(numPartitions))
601+
signature(x = "SparkDataFrame"),
602+
function(x, numPartitions = NULL, col = NULL, ...) {
603+
if (!is.null(numPartitions) && is.numeric(numPartitions)) {
604+
# number of partitions and columns both are specified
605+
if (!is.null(col) && class(col) == "Column") {
606+
cols <- list(col, ...)
607+
jcol <- lapply(cols, function(c) { c@jc })
608+
sdf <- callJMethod(x@sdf, "repartition", numToInt(numPartitions), jcol)
609+
} else {
610+
# only number of partitions is specified
611+
sdf <- callJMethod(x@sdf, "repartition", numToInt(numPartitions))
612+
}
613+
} else if (!is.null(col) && class(col) == "Column") {
614+
# only columns are specified
615+
cols <- list(col, ...)
616+
jcol <- lapply(cols, function(c) { c@jc })
617+
sdf <- callJMethod(x@sdf, "repartition", jcol)
618+
} else {
619+
stop("Please, specify the number of partitions and/or a column(s)")
620+
}
594621
dataFrame(sdf)
595622
})
596623

@@ -1722,8 +1749,8 @@ setMethod("arrange",
17221749
#' @export
17231750
setMethod("orderBy",
17241751
signature(x = "SparkDataFrame", col = "characterOrColumn"),
1725-
function(x, col) {
1726-
arrange(x, col)
1752+
function(x, col, ...) {
1753+
arrange(x, col, ...)
17271754
})
17281755

17291756
#' Filter

R/pkg/R/RDD.R

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,9 +1023,13 @@ setMethod("keyBy",
10231023
#' @aliases repartition,RDD
10241024
#' @noRd
10251025
setMethod("repartition",
1026-
signature(x = "RDD", numPartitions = "numeric"),
1026+
signature(x = "RDD"),
10271027
function(x, numPartitions) {
1028-
coalesce(x, numPartitions, TRUE)
1028+
if (!is.null(numPartitions) && is.numeric(numPartitions)) {
1029+
coalesce(x, numPartitions, TRUE)
1030+
} else {
1031+
stop("Please, specify the number of partitions")
1032+
}
10291033
})
10301034

10311035
#' Return a new RDD that is reduced into numPartitions partitions.

R/pkg/R/WindowSpec.R

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
# WindowSpec.R - WindowSpec class and methods implemented in S4 OO classes
19+
20+
#' @include generics.R jobj.R column.R
21+
NULL
22+
23+
#' @title S4 class that represents a WindowSpec
24+
#' @description WindowSpec can be created by using window.partitionBy()
25+
#' or window.orderBy()
26+
#' @rdname WindowSpec
27+
#' @seealso \link{window.partitionBy}, \link{window.orderBy}
28+
#'
29+
#' @param sws A Java object reference to the backing Scala WindowSpec
30+
#' @export
31+
setClass("WindowSpec",
32+
slots = list(sws = "jobj"))
33+
34+
setMethod("initialize", "WindowSpec", function(.Object, sws) {
35+
.Object@sws <- sws
36+
.Object
37+
})
38+
39+
windowSpec <- function(sws) {
40+
stopifnot(class(sws) == "jobj")
41+
new("WindowSpec", sws)
42+
}
43+
44+
#' @rdname show
45+
setMethod("show", "WindowSpec",
46+
function(object) {
47+
cat("WindowSpec", callJMethod(object@sws, "toString"), "\n")
48+
})
49+
50+
#' partitionBy
51+
#'
52+
#' Defines the partitioning columns in a WindowSpec.
53+
#'
54+
#' @param x a WindowSpec
55+
#' @return a WindowSpec
56+
#' @rdname partitionBy
57+
#' @name partitionBy
58+
#' @family windowspec_method
59+
#' @export
60+
#' @examples
61+
#' \dontrun{
62+
#' partitionBy(ws, "col1", "col2")
63+
#' partitionBy(ws, df$col1, df$col2)
64+
#' }
65+
setMethod("partitionBy",
66+
signature(x = "WindowSpec"),
67+
function(x, col, ...) {
68+
stopifnot (class(col) %in% c("character", "Column"))
69+
70+
if (class(col) == "character") {
71+
windowSpec(callJMethod(x@sws, "partitionBy", col, list(...)))
72+
} else {
73+
jcols <- lapply(list(col, ...), function(c) {
74+
c@jc
75+
})
76+
windowSpec(callJMethod(x@sws, "partitionBy", jcols))
77+
}
78+
})
79+
80+
#' orderBy
81+
#'
82+
#' Defines the ordering columns in a WindowSpec.
83+
#'
84+
#' @param x a WindowSpec
85+
#' @return a WindowSpec
86+
#' @rdname arrange
87+
#' @name orderBy
88+
#' @family windowspec_method
89+
#' @export
90+
#' @examples
91+
#' \dontrun{
92+
#' orderBy(ws, "col1", "col2")
93+
#' orderBy(ws, df$col1, df$col2)
94+
#' }
95+
setMethod("orderBy",
96+
signature(x = "WindowSpec", col = "character"),
97+
function(x, col, ...) {
98+
windowSpec(callJMethod(x@sws, "orderBy", col, list(...)))
99+
})
100+
101+
#' @rdname arrange
102+
#' @name orderBy
103+
#' @export
104+
setMethod("orderBy",
105+
signature(x = "WindowSpec", col = "Column"),
106+
function(x, col, ...) {
107+
jcols <- lapply(list(col, ...), function(c) {
108+
c@jc
109+
})
110+
windowSpec(callJMethod(x@sws, "orderBy", jcols))
111+
})
112+
113+
#' rowsBetween
114+
#'
115+
#' Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
116+
#'
117+
#' Both `start` and `end` are relative positions from the current row. For example, "0" means
118+
#' "current row", while "-1" means the row before the current row, and "5" means the fifth row
119+
#' after the current row.
120+
#'
121+
#' @param x a WindowSpec
122+
#' @param start boundary start, inclusive.
123+
#' The frame is unbounded if this is the minimum long value.
124+
#' @param end boundary end, inclusive.
125+
#' The frame is unbounded if this is the maximum long value.
126+
#' @return a WindowSpec
127+
#' @rdname rowsBetween
128+
#' @name rowsBetween
129+
#' @family windowspec_method
130+
#' @export
131+
#' @examples
132+
#' \dontrun{
133+
#' rowsBetween(ws, 0, 3)
134+
#' }
135+
setMethod("rowsBetween",
136+
signature(x = "WindowSpec", start = "numeric", end = "numeric"),
137+
function(x, start, end) {
138+
# "start" and "end" should be long, due to serde limitation,
139+
# limit "start" and "end" as integer now
140+
windowSpec(callJMethod(x@sws, "rowsBetween", as.integer(start), as.integer(end)))
141+
})
142+
143+
#' rangeBetween
144+
#'
145+
#' Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
146+
#'
147+
#' Both `start` and `end` are relative from the current row. For example, "0" means "current row",
148+
#' while "-1" means one off before the current row, and "5" means the five off after the
149+
#' current row.
150+
151+
#' @param x a WindowSpec
152+
#' @param start boundary start, inclusive.
153+
#' The frame is unbounded if this is the minimum long value.
154+
#' @param end boundary end, inclusive.
155+
#' The frame is unbounded if this is the maximum long value.
156+
#' @return a WindowSpec
157+
#' @rdname rangeBetween
158+
#' @name rangeBetween
159+
#' @family windowspec_method
160+
#' @export
161+
#' @examples
162+
#' \dontrun{
163+
#' rangeBetween(ws, 0, 3)
164+
#' }
165+
setMethod("rangeBetween",
166+
signature(x = "WindowSpec", start = "numeric", end = "numeric"),
167+
function(x, start, end) {
168+
# "start" and "end" should be long, due to serde limitation,
169+
# limit "start" and "end" as integer now
170+
windowSpec(callJMethod(x@sws, "rangeBetween", as.integer(start), as.integer(end)))
171+
})
172+
173+
# Note that over is a method of Column class, but it is placed here to
174+
# avoid Roxygen circular-dependency between class Column and WindowSpec.
175+
176+
#' over
177+
#'
178+
#' Define a windowing column.
179+
#'
180+
#' @rdname over
181+
#' @name over
182+
#' @family colum_func
183+
#' @export
184+
setMethod("over",
185+
signature(x = "Column", window = "WindowSpec"),
186+
function(x, window) {
187+
column(callJMethod(x@jc, "over", window@sws))
188+
})

0 commit comments

Comments
 (0)