Skip to content

Commit 66cc11d

Browse files
committed
updating to current branch and addressing @yu-iskw comments
2 parents 0909197 + 7ee7d5a commit 66cc11d

File tree

1,284 files changed

+58474
-24214
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,284 files changed

+58474
-24214
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ spark-tests.log
5050
streaming-tests.log
5151
dependency-reduced-pom.xml
5252
.ensime
53+
.ensime_cache/
5354
.ensime_lucene
5455
checkpoint
5556
derby.log
@@ -74,3 +75,7 @@ metastore/
7475
warehouse/
7576
TempStatsStore/
7677
sql/hive-thriftserver/test_warehouses
78+
79+
# For R session data
80+
.RHistory
81+
.RData

LICENSE

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -250,11 +250,11 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
250250
(Interpreter classes (all .scala files in repl/src/main/scala
251251
except for Main.Scala, SparkHelper.scala and ExecutorClassLoader.scala),
252252
and for SerializableMapWrapper in JavaUtils.scala)
253-
(BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.10.4 - http://www.scala-lang.org/)
254-
(BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.10.4 - http://www.scala-lang.org/)
255-
(BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.10.4 - http://www.scala-lang.org/)
256-
(BSD-like) Scala Library (org.scala-lang:scala-library:2.10.4 - http://www.scala-lang.org/)
257-
(BSD-like) Scalap (org.scala-lang:scalap:2.10.4 - http://www.scala-lang.org/)
253+
(BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.10.5 - http://www.scala-lang.org/)
254+
(BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.10.5 - http://www.scala-lang.org/)
255+
(BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.10.5 - http://www.scala-lang.org/)
256+
(BSD-like) Scala Library (org.scala-lang:scala-library:2.10.5 - http://www.scala-lang.org/)
257+
(BSD-like) Scalap (org.scala-lang:scalap:2.10.5 - http://www.scala-lang.org/)
258258
(BSD-style) scalacheck (org.scalacheck:scalacheck_2.10:1.10.0 - http://www.scalacheck.org)
259259
(BSD-style) spire (org.spire-math:spire_2.10:0.7.1 - http://spire-math.org)
260260
(BSD-style) spire-macros (org.spire-math:spire-macros_2.10:0.7.1 - http://spire-math.org)
@@ -265,7 +265,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
265265
(New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
266266
(The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
267267
(The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
268-
(The New BSD License) Py4J (net.sf.py4j:py4j:0.8.2.1 - http://py4j.sourceforge.net/)
268+
(The New BSD License) Py4J (net.sf.py4j:py4j:0.9 - http://py4j.sourceforge.net/)
269269
(Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
270270
(BSD licence) sbt and sbt-launch-lib.bash
271271
(BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE)

R/install-dev.bat

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,9 @@ set SPARK_HOME=%~dp0..
2525
MKDIR %SPARK_HOME%\R\lib
2626

2727
R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib" %SPARK_HOME%\R\pkg\
28+
29+
rem Zip the SparkR package so that it can be distributed to worker nodes on YARN
30+
pushd %SPARK_HOME%\R\lib
31+
%JAVA_HOME%\bin\jar.exe cfM "%SPARK_HOME%\R\lib\sparkr.zip" SparkR
32+
popd
33+

R/install-dev.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,8 @@ Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtoo
4242
# Install SparkR to $LIB_DIR
4343
R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/
4444

45+
# Zip the SparkR package so that it can be distributed to worker nodes on YARN
46+
cd $LIB_DIR
47+
jar cfM "$LIB_DIR/sparkr.zip" SparkR
48+
4549
popd > /dev/null

R/pkg/DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,5 @@ Collate:
3434
'serialize.R'
3535
'sparkR.R'
3636
'stats.R'
37+
'types.R'
3738
'utils.R'

R/pkg/NAMESPACE

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,11 @@ export("setJobGroup",
2323
exportClasses("DataFrame")
2424

2525
exportMethods("arrange",
26+
"as.data.frame",
27+
"attach",
2628
"cache",
2729
"collect",
30+
"coltypes",
2831
"columns",
2932
"count",
3033
"cov",
@@ -64,6 +67,7 @@ exportMethods("arrange",
6467
"repartition",
6568
"sample",
6669
"sample_frac",
70+
"sampleBy",
6771
"saveAsParquetFile",
6872
"saveAsTable",
6973
"saveDF",
@@ -81,6 +85,7 @@ exportMethods("arrange",
8185
"unique",
8286
"unpersist",
8387
"where",
88+
"with",
8489
"withColumn",
8590
"withColumnRenamed",
8691
"write.df")
@@ -93,6 +98,7 @@ exportMethods("%in%",
9398
"add_months",
9499
"alias",
95100
"approxCountDistinct",
101+
"array_contains",
96102
"asc",
97103
"ascii",
98104
"asin",
@@ -117,12 +123,14 @@ exportMethods("%in%",
117123
"count",
118124
"countDistinct",
119125
"crc32",
126+
"cumeDist",
120127
"date_add",
121128
"date_format",
122129
"date_sub",
123130
"datediff",
124131
"dayofmonth",
125132
"dayofyear",
133+
"denseRank",
126134
"desc",
127135
"endsWith",
128136
"exp",
@@ -148,8 +156,11 @@ exportMethods("%in%",
148156
"isNaN",
149157
"isNotNull",
150158
"isNull",
159+
"kurtosis",
160+
"lag",
151161
"last",
152162
"last_day",
163+
"lead",
153164
"least",
154165
"length",
155166
"levenshtein",
@@ -175,17 +186,21 @@ exportMethods("%in%",
175186
"nanvl",
176187
"negate",
177188
"next_day",
189+
"ntile",
178190
"otherwise",
191+
"percentRank",
179192
"pmod",
180193
"quarter",
181194
"rand",
182195
"randn",
196+
"rank",
183197
"regexp_extract",
184198
"regexp_replace",
185199
"reverse",
186200
"rint",
187201
"rlike",
188202
"round",
203+
"rowNumber",
189204
"rpad",
190205
"rtrim",
191206
"second",
@@ -194,12 +209,18 @@ exportMethods("%in%",
194209
"shiftLeft",
195210
"shiftRight",
196211
"shiftRightUnsigned",
212+
"sd",
197213
"sign",
198214
"signum",
199215
"sin",
200216
"sinh",
201217
"size",
218+
"skewness",
219+
"sort_array",
202220
"soundex",
221+
"stddev",
222+
"stddev_pop",
223+
"stddev_samp",
203224
"sqrt",
204225
"startsWith",
205226
"substr",
@@ -218,6 +239,10 @@ exportMethods("%in%",
218239
"unhex",
219240
"unix_timestamp",
220241
"upper",
242+
"var",
243+
"variance",
244+
"var_pop",
245+
"var_samp",
221246
"weekofyear",
222247
"when",
223248
"year")
@@ -228,7 +253,8 @@ exportMethods("agg")
228253
export("sparkRSQL.init",
229254
"sparkRHive.init")
230255

231-
export("cacheTable",
256+
export("as.DataFrame",
257+
"cacheTable",
232258
"clearCache",
233259
"createDataFrame",
234260
"createExternalTable",
@@ -250,6 +276,4 @@ export("structField",
250276
"structType",
251277
"structType.jobj",
252278
"structType.structField",
253-
"print.structType")
254-
255-
export("as.data.frame")
279+
"print.structType")

0 commit comments

Comments
 (0)