Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
254 commits
Select commit Hold shift + click to select a range
00b655c
[SPARK-9755] [MLLIB] Add docs to MultivariateOnlineSummarizer methods
Aug 10, 2015
d285212
Fixed AtmoicReference<> Example
lababidi Aug 10, 2015
0fe6674
[SPARK-9784] [SQL] Exchange.isUnsafe should check whether codegen and…
JoshRosen Aug 10, 2015
40ed2af
[SPARK-9763][SQL] Minimize exposure of internal SQL classes.
rxin Aug 10, 2015
fe2fb7f
[SPARK-9620] [SQL] generated UnsafeProjection should support many col…
Aug 10, 2015
c4fd2a2
[SPARK-9759] [SQL] improve decimal.times() and cast(int, decimalType)
Aug 10, 2015
853809e
[SPARK-5155] [PYSPARK] [STREAMING] Mqtt streaming support in Python
prabeesh Aug 10, 2015
3c9802d
[SPARK-9801] [STREAMING] Check if file exists before deleting tempora…
viadea Aug 11, 2015
071bbad
[SPARK-9340] [SQL] Fixes converting unannotated Parquet lists
dguy Aug 11, 2015
91e9389
[SPARK-9729] [SPARK-9363] [SQL] Use sort merge join for left and righ…
JoshRosen Aug 11, 2015
0f90d60
[SPARK-9640] [STREAMING] [TEST] Do not run Python Kinesis tests when …
tdas Aug 11, 2015
55752d8
[SPARK-9810] [BUILD] Remove individual commit messages from the squas…
rxin Aug 11, 2015
600031e
[SPARK-9727] [STREAMING] [BUILD] Updated streaming kinesis SBT projec…
tdas Aug 11, 2015
d378396
[SPARK-9815] Rename PlatformDependent.UNSAFE -> Platform.
rxin Aug 11, 2015
dfe347d
[SPARK-9785] [SQL] HashPartitioning compatibility should consider exp…
JoshRosen Aug 11, 2015
bce7279
Fix comment error
zjffdu Aug 11, 2015
8cad854
[SPARK-8345] [ML] Add an SQL node as a feature transformer
yanboliang Aug 11, 2015
dbd778d
[SPARK-8764] [ML] string indexer should take option to handle unseen …
holdenk Aug 11, 2015
5b8bb1b
[SPARK-9572] [STREAMING] [PYSPARK] Added StreamingContext.getActiveOr…
tdas Aug 11, 2015
5831294
[SPARK-9646] [SQL] Add metrics for all join and aggregate operators
zsxwing Aug 11, 2015
520ad44
[SPARK-9750] [MLLIB] Improve equals on SparseMatrix and DenseMatrix
Aug 11, 2015
2a3be4d
[SPARK-7726] Add import so Scaladoc doesn't fail.
pwendell Aug 11, 2015
00c0272
[SPARK-9814] [SQL] EqualNotNull not passing to data sources
HyukjinKwon Aug 11, 2015
f16bc68
[SPARK-9824] [CORE] Fix the issue that InternalAccumulator leaks Weak…
zsxwing Aug 11, 2015
423cdfd
Closes #1290
mengxr Aug 11, 2015
be3e271
[SPARK-9788] [MLLIB] Fix LDA Binary Compatibility
Aug 11, 2015
017b5de
[SPARK-8925] [MLLIB] Add @since tags to mllib.util
sthota2014 Aug 11, 2015
736af95
[HOTFIX] Fix style error caused by 017b5de
Aug 11, 2015
5a5bbc2
[SPARK-9074] [LAUNCHER] Allow arbitrary Spark args to be set.
Aug 11, 2015
afa757c
[SPARK-9849] [SQL] DirectParquetOutputCommitter qualified name should…
rxin Aug 12, 2015
b8fdd5c
Added section for preferred yarn and kept the one with deploy-mode fo…
Aug 12, 2015
8c65676
Moved the Standalone examples together
Aug 12, 2015
ca8f70e
[SPARK-9649] Fix flaky test MasterSuite again - disable REST
Aug 12, 2015
3ef0f32
[SPARK-1517] Refactor release scripts to facilitate nightly publishing
pwendell Aug 12, 2015
74a293f
[SPARK-9713] [ML] Document SparkR MLlib glm() integration in Spark 1.5
ericl Aug 12, 2015
c3e9a12
[SPARK-9831] [SQL] fix serialization with empty broadcast
Aug 12, 2015
b1581ac
[SPARK-9854] [SQL] RuleExecutor.timeMap should be thread-safe
JoshRosen Aug 12, 2015
b85f9a2
[SPARK-8366] maxNumExecutorsNeeded should properly handle failed tasks
XuTingjun Aug 12, 2015
a807fcb
[SPARK-9806] [WEB UI] Don't share ReplayListenerBus between multiple …
Aug 12, 2015
4e3f4b9
[SPARK-9829] [WEBUI] Display the update value for peak execution memory
zsxwing Aug 12, 2015
bab8923
[SPARK-9426] [WEBUI] Job page DAG visualization is not shown
carsonwang Aug 12, 2015
5c99d8b
[SPARK-8798] [MESOS] Allow additional uris to be fetched with mesos
tnachen Aug 12, 2015
741a29f
[SPARK-9575] [MESOS] Add docuemntation around Mesos shuffle service.
tnachen Aug 12, 2015
9d08224
[SPARK-9182] [SQL] Filters are not passed through to jdbc source
yjshen Aug 12, 2015
3ecb379
[SPARK-9407] [SQL] Relaxes Parquet ValidTypeMap to allow ENUM predica…
liancheng Aug 12, 2015
2e68066
[SPARK-8625] [CORE] Propagate user exceptions in tasks back to driver
tomwhite Aug 12, 2015
be5d191
[SPARK-9795] Dynamic allocation: avoid double counting when killing s…
Aug 12, 2015
66d87c1
[SPARK-7583] [MLLIB] User guide update for RegexTokenizer
hhbyyh Aug 12, 2015
e011079
[SPARK-9747] [SQL] Avoid starving an unsafe operator in aggregation
Aug 12, 2015
57ec27d
[SPARK-9804] [HIVE] Use correct value for isSrcLocal parameter.
Aug 12, 2015
70fe558
[SPARK-9847] [ML] Modified copyValues to distinguish between default,…
jkbradley Aug 12, 2015
60103ec
[SPARK-9726] [PYTHON] PySpark DF join no longer accepts on=None
btashton Aug 12, 2015
762bacc
[SPARK-9766] [ML] [PySpark] check and add miss docs for PySpark ML
yanboliang Aug 12, 2015
8a331d0
Moved Master URLs
Aug 12, 2015
551def5
[SPARK-9789] [ML] Added logreg threshold param back
jkbradley Aug 12, 2015
6f60298
[SPARK-8967] [DOC] add Since annotation
mengxr Aug 12, 2015
a17384f
[SPARK-9907] [SQL] Python crc32 is mistakenly calling md5
rxin Aug 12, 2015
738f353
[SPARK-9092] Fixed incompatibility when both num-executors and dynami…
Aug 12, 2015
ab7e721
[SPARK-9826] [CORE] Fix cannot use custom classes in log4j.properties
michellemay Aug 12, 2015
7035d88
[SPARK-9894] [SQL] Json writer should handle MapData.
yhuai Aug 12, 2015
caa14d9
[SPARK-9913] [MLLIB] LDAUtils should be private
mengxr Aug 12, 2015
6e409bc
[SPARK-9909] [ML] [TRIVIAL] move weightCol to shared params
holdenk Aug 12, 2015
e6aef55
[SPARK-9912] [MLLIB] QRDecomposition should use QType and RType for t…
mengxr Aug 13, 2015
fc1c7fd
[SPARK-9915] [ML] stopWords should use StringArrayParam
mengxr Aug 13, 2015
660e6dc
[SPARK-9449] [SQL] Include MetastoreRelation's inputFiles
marmbrus Aug 13, 2015
8ce6096
[SPARK-9780] [STREAMING] [KAFKA] prevent NPE if KafkaRDD instantiation …
koeninger Aug 13, 2015
0d1d146
[SPARK-9724] [WEB UI] Avoid unnecessary redirects in the Spark Web UI.
Aug 13, 2015
f4bc01f
[SPARK-9855] [SPARKR] Add expression functions into SparkR whose para…
yu-iskw Aug 13, 2015
7b13ed2
[SPARK-9870] Disable driver UI and Master REST server in SparkSubmitS…
JoshRosen Aug 13, 2015
7c35746
[SPARK-9827] [SQL] fix fd leak in UnsafeRowSerializer
Aug 13, 2015
4413d08
[SPARK-9908] [SQL] When spark.sql.tungsten.enabled is false, broadcas…
yhuai Aug 13, 2015
d2d5e7f
[SPARK-9704] [ML] Made ProbabilisticClassifier, Identifiable, VectorU…
jkbradley Aug 13, 2015
d7053be
[SPARK-9903] [MLLIB] skip local processing in PrefixSpan if there are…
mengxr Aug 13, 2015
2fb4901
[SPARK-9916] [BUILD] [SPARKR] removed left-over sparkr.zip copy/creat…
brkyvz Aug 13, 2015
2278219
[SPARK-9920] [SQL] The simpleString of TungstenAggregate does not sho…
yhuai Aug 13, 2015
a8ab263
[SPARK-9832] [SQL] add a thread-safe lookup for BytesToBytseMap
Aug 13, 2015
5fc058a
[SPARK-9917] [ML] add getMin/getMax and doc for originalMin/origianlM…
mengxr Aug 13, 2015
df54389
[SPARK-8922] [DOCUMENTATION, MLLIB] Add @since tags to mllib.evaluation
mosessky Aug 13, 2015
d7eb371
[SPARK-9914] [ML] define setters explicitly for Java and use setParam…
mengxr Aug 13, 2015
d0b1891
[SPARK-9927] [SQL] Revert 8049 since it's pushing wrong filter down
yjshen Aug 13, 2015
68f9957
[SPARK-9918] [MLLIB] remove runs from k-means and rename epsilon to tol
mengxr Aug 13, 2015
84a2791
[SPARK-9885] [SQL] Also pass barrierPrefixes and sharedPrefixes to Is…
yhuai Aug 13, 2015
6993031
[SPARK-9757] [SQL] Fixes persistence of Parquet relation with decimal…
liancheng Aug 13, 2015
2932e25
[SPARK-9073] [ML] spark.ml Models copy() should call setParent when t…
Lewuathe Aug 13, 2015
7a539ef
[SPARK-8965] [DOCS] Add ml-guide Python Example: Estimator, Transform…
Rosstin Aug 13, 2015
4b70798
[MINOR] [ML] change MultilayerPerceptronClassifierModel to Multilayer…
yanboliang Aug 13, 2015
65fec79
[MINOR] [DOC] fix mllib pydoc warnings
mengxr Aug 13, 2015
8815ba2
[SPARK-9649] Fix MasterSuite, third time's a charm
Aug 13, 2015
864de8e
[SPARK-9661] [MLLIB] [ML] Java compatibility
MechCoder Aug 13, 2015
a8d2f4c
[SPARK-9942] [PYSPARK] [SQL] ignore exceptions while try to import pa…
Aug 13, 2015
c2520f5
[SPARK-9935] [SQL] EqualNotNull not processed in ORC
HyukjinKwon Aug 13, 2015
0fed23b
Added deploy-mode section to YARN submission
Aug 13, 2015
6c5858b
[SPARK-9922] [ML] rename StringIndexerReverse to IndexToString
mengxr Aug 13, 2015
693949b
[SPARK-8976] [PYSPARK] fix open mode in python3
Aug 14, 2015
c50f97d
[SPARK-9943] [SQL] deserialized UnsafeHashedRelation should be serial…
Aug 14, 2015
8187b3a
[SPARK-9580] [SQL] Replace singletons in SQL tests
Aug 14, 2015
bd35385
[SPARK-9945] [SQL] pageSize should be calculated from executor.memory
Aug 14, 2015
7c7c752
[MINOR] [SQL] Remove canEqual in Row
viirya Aug 14, 2015
c8677d7
[SPARK-9958] [SQL] Make HiveThriftServer2Listener thread-safe and upd…
zsxwing Aug 14, 2015
a0e1abb
[SPARK-9661] [MLLIB] minor clean-up of SPARK-9661
mengxr Aug 14, 2015
7ecf0c4
[SPARK-9956] [ML] Make trees work with one-category features
jkbradley Aug 14, 2015
a7317cc
[SPARK-8744] [ML] Add a public constructor to StringIndexer
holdenk Aug 14, 2015
34d610b
[SPARK-9929] [SQL] support metadata in withColumn
cloud-fan Aug 14, 2015
57c2d08
[SPARK-9923] [CORE] ShuffleMapStage.numAvailableOutputs should be an …
Aug 14, 2015
3bc5528
[SPARK-9946] [SPARK-9589] [SQL] fix NPE and thread-safety in TaskMemo…
Aug 14, 2015
ece0056
[SPARK-9561] Re-enable BroadcastJoinSuite
Aug 14, 2015
ffa05c8
[SPARK-9828] [PYSPARK] Mutable values should not be default arguments
MechCoder Aug 14, 2015
33bae58
[SPARK-9809] Task crashes because the internal accumulators are not p…
carsonwang Aug 14, 2015
6518ef6
[SPARK-9948] Fix flaky AccumulatorSuite - internal accumulators
Aug 14, 2015
9407baa
[SPARK-9877] [CORE] Fix StandaloneRestServer NPE when submitting appl…
jerryshao Aug 14, 2015
11ed2b1
[SPARK-9978] [PYSPARK] [SQL] fix Window.orderBy and doc of ntile()
Aug 14, 2015
2a6590e
[SPARK-9981] [ML] Made labels public for StringIndexerModel
jkbradley Aug 14, 2015
1150a19
[SPARK-8670] [SQL] Nested columns can't be referenced in pyspark
cloud-fan Aug 14, 2015
f3bfb71
[SPARK-9966] [STREAMING] Handle couple of corner cases in PIDRateEsti…
tdas Aug 14, 2015
18a761e
[SPARK-9968] [STREAMING] Reduced time spent within synchronized block…
tdas Aug 14, 2015
932b24f
[SPARK-9949] [SQL] Fix TakeOrderedAndProject's output.
yhuai Aug 15, 2015
e5fd604
[SPARK-9934] Deprecate NIO ConnectionManager.
rxin Aug 15, 2015
37586e5
[HOTFIX] fix duplicated braces
Aug 15, 2015
ec29f20
[SPARK-9634] [SPARK-9323] [SQL] cleanup unnecessary Aliases in Logica…
cloud-fan Aug 15, 2015
6c4fdbe
[SPARK-8887] [SQL] Explicit define which data types can be used as dy…
yjshen Aug 15, 2015
609ce3c
[SPARK-9984] [SQL] Create local physical operator interface.
rxin Aug 15, 2015
71a3af8
[SPARK-9960] [GRAPHX] sendMessage type fix in LabelPropagation.scala
blindFS Aug 15, 2015
7c1e568
[SPARK-9725] [SQL] fix serialization of UTF8String across different JVM
Aug 15, 2015
a85fb6c
[SPARK-9980] [BUILD] Fix SBT publishLocal error due to invalid charac…
hvanhovell Aug 15, 2015
5705672
[SPARK-9955] [SQL] correct error message for aggregate
cloud-fan Aug 15, 2015
1db7179
[SPARK-9805] [MLLIB] [PYTHON] [STREAMING] Added _eventually for ml st…
jkbradley Aug 16, 2015
182f9b7
[SPARK-9973] [SQL] Correct in-memory columnar buffer size
viper-kun Aug 16, 2015
5f9ce73
[SPARK-8844] [SPARKR] head/collect is broken in SparkR.
Aug 16, 2015
cf01607
[SPARK-10008] Ensure shuffle locality doesn't take precedence over na…
mateiz Aug 16, 2015
ae2370e
[SPARK-10005] [SQL] Fixes schema merging for nested structs
liancheng Aug 16, 2015
26e7605
[SPARK-9871] [SPARKR] Add expression functions into SparkR which have…
yu-iskw Aug 17, 2015
3ff81ad
[SPARK-9199] [CORE] Upgrade Tachyon version from 0.7.0 -> 0.7.1.
calvinjia Aug 17, 2015
f7efda3
[SPARK-9959] [MLLIB] Association Rules Java Compatibility
Aug 17, 2015
76c155d
[SPARK-7837] [SQL] Avoids double closing output writers when commitTa…
liancheng Aug 17, 2015
ed092a0
[SPARK-9924] [WEB UI] Don't schedule checkForLogs while some of them …
Aug 17, 2015
f68d024
[SPARK-7736] [CORE] [YARN] Make pyspark fail YARN app on failure.
Aug 17, 2015
a4acdab
[SPARK-9950] [SQL] Wrong Analysis Error for grouping/aggregating on s…
cloud-fan Aug 17, 2015
f10660f
[SPARK-10036] [SQL] Load JDBC driver in DataFrameReader.jdbc and Data…
zsxwing Aug 17, 2015
b265e28
[SPARK-9526] [SQL] Utilize randomized tests to reveal potential bugs …
yjshen Aug 17, 2015
772e7c1
[SPARK-9592] [SQL] Fix Last function implemented based on AggregateEx…
yhuai Aug 17, 2015
fdaf17f
[SPARK-10068] [MLLIB] Adds links to MLlib types, algos, utilities lis…
Aug 17, 2015
088b11e
[SPARK-8920] [MLLIB] Add @since tags to mllib.linalg
Aug 17, 2015
52ae952
[SPARK-9974] [BUILD] [SQL] Makes sure com.twitter:parquet-hadoop-bund…
liancheng Aug 18, 2015
0076e82
[SPARK-9768] [PYSPARK] [ML] Add Python API and user guide for ml.feat…
yanboliang Aug 18, 2015
18523c1
SPARK-8916 [Documentation, MLlib] Add @since tags to mllib.regression
prayagchandran Aug 18, 2015
0b6b017
[SPARK-9898] [MLLIB] Prefix Span user guide
Aug 18, 2015
f9d1a92
[SPARK-7707] User guide and example code for KernelDensity
sryza Aug 18, 2015
c90c605
[SPARK-9902] [MLLIB] Add Java and Python examples to user guide for 1…
Aug 18, 2015
ee093c8
[SPARK-10059] [YARN] Explicitly add JSP dependencies for tests.
Aug 18, 2015
e290029
[SPARK-7808] [ML] add package doc for ml.feature
mengxr Aug 18, 2015
a091031
[MINOR] Format the comment of `translate` at `functions.scala`
yu-iskw Aug 18, 2015
5af3838
[SPARK-10038] [SQL] fix bug in generated unsafe projection when there…
Aug 18, 2015
dd0614f
[SPARK-10076] [ML] make MultilayerPerceptronClassifier layers and wei…
yanboliang Aug 18, 2015
c34e9ff
[MINOR] fix the comments in IndexShuffleBlockResolver
CodingCat Aug 18, 2015
5723d26
[SPARK-8118] [SQL] Redirects Parquet JUL logger via SLF4J
liancheng Aug 18, 2015
1968276
[SPARK-10007] [SPARKR] Update `NAMESPACE` file in SparkR for simple p…
yu-iskw Aug 18, 2015
354f458
[SPARK-9028] [ML] Add CountVectorizer as an estimator to generate Cou…
hhbyyh Aug 18, 2015
c1840a8
[SPARK-7736] [CORE] Fix a race introduced in PythonRunner.
Aug 18, 2015
f5ea391
[SPARK-9900] [MLLIB] User guide for Association Rules
Aug 18, 2015
f4fa61e
[SPARK-10029] [MLLIB] [DOC] Add Python examples for mllib IsotonicReg…
yanboliang Aug 18, 2015
747c2ba
[SPARK-10032] [PYSPARK] [DOC] Add Python example for mllib LDAModel u…
yanboliang Aug 18, 2015
8bae901
[SPARK-10085] [MLLIB] [DOCS] removed unnecessary numpy array import
stared Aug 18, 2015
bf1d661
[SPARK-9574] [STREAMING] Remove unnecessary contents of spark-streami…
zsxwing Aug 18, 2015
80cb25b
[SPARK-10080] [SQL] Fix binary incompatibility for $ column interpola…
marmbrus Aug 18, 2015
9b731fa
[SPARK-9782] [YARN] Support YARN application tags via SparkConf
dennishuo Aug 18, 2015
fa41e02
[SPARK-10089] [SQL] Add missing golden files.
Aug 18, 2015
492ac1f
[SPARK-10088] [SQL] Add support for "stored as avro" in HiveQL parser.
Aug 18, 2015
1dbffba
[SPARK-8924] [MLLIB, DOCUMENTATION] Added @since tags to mllib.tree
BryanCutler Aug 18, 2015
c635a16
[SPARK-10012] [ML] Missing test case for Params#arrayLengthGt
Lewuathe Aug 18, 2015
9108eff
[SPARK-10098] [STREAMING] [TEST] Cleanup active context after test in…
tdas Aug 19, 2015
badf7fa
[SPARK-8473] [SPARK-9889] [ML] User guide and example code for DCT
Aug 19, 2015
04e0fea
Bump SparkR version string to 1.5.0
falaki Aug 19, 2015
1f89029
[SPARK-9969] [YARN] Remove old MR classpath API support
jerryshao Aug 19, 2015
b4b35f1
[SPARKR] [MINOR] Get rid of a long line warning
yu-iskw Aug 19, 2015
1aeae05
[SPARK-10072] [STREAMING] BlockGenerator can deadlock when the queue …
tdas Aug 19, 2015
90273ef
[SPARK-10102] [STREAMING] Fix a race condition that startReceiver may…
zsxwing Aug 19, 2015
a5b5b93
[SPARK-9939] [SQL] Resorts to Java process API in CliSuite, HiveSpark…
liancheng Aug 19, 2015
bf32c1f
[SPARK-10075] [SPARKR] Add `when` expressino function in SparkR
yu-iskw Aug 19, 2015
270ee67
[SPARK-10095] [SQL] use public API of BigInteger
Aug 19, 2015
1ff0580
[SPARK-10093] [SPARK-10096] [SQL] Avoid transformation on executors &…
rxin Aug 19, 2015
de32238
[SPARK-9705] [DOC] fix docs about Python version
Aug 19, 2015
1c843e2
[SPARK-9508] GraphX Pregel docs update with new Pregel code
avulanov Aug 19, 2015
010b03e
[SPARK-9952] Fix N^2 loop when DAGScheduler.getPreferredLocsInternal …
JoshRosen Aug 19, 2015
bc9a0e0
[SPARK-9967] [SPARK-10099] [STREAMING] Renamed conf spark.streaming.b…
tdas Aug 19, 2015
b23c4d3
Fix Broken Link
bllchmbrs Aug 19, 2015
f141efe
[SPARK-10070] [DOCS] Remove Guava dependencies in user guides
srowen Aug 19, 2015
865a3df
[DOCS] [SQL] [PYSPARK] Fix typo in ntile function
moutai Aug 19, 2015
ba2a07e
[SPARK-9977] [DOCS] Update documentation for StringIndexer
Lewuathe Aug 19, 2015
3d16a54
[SPARK-8949] Print warnings when using preferred locations feature
darkjh Aug 19, 2015
39e4ebd
[SPARK-10060] [ML] [DOC] spark.ml DecisionTree user guide
jkbradley Aug 19, 2015
802b5b8
[SPARK-10084] [MLLIB] [DOC] Add Python example for mllib FP-growth us…
yanboliang Aug 19, 2015
f3e1779
[SPARK-5754] [YARN] Spark/Yarn/Windows driver/executor escaping Fix
cbvoxel Aug 19, 2015
2fcb9cb
[SPARK-9856] [SPARKR] Add expression functions into SparkR whose para…
yu-iskw Aug 19, 2015
5fd53c6
[SPARK-9833] [YARN] Add options to disable delegation token retrieval.
Aug 19, 2015
28a9846
[SPARK-10097] Adds `shouldMaximize` flag to `ml.evaluation.Evaluator`
Aug 19, 2015
d898c33
[SPARK-10106] [SPARKR] Add `ifelse` Column function to SparkR
yu-iskw Aug 19, 2015
5b62bef
[SPARK-8918] [MLLIB] [DOC] Add @since tags to mllib.clustering
mengxr Aug 19, 2015
f3391ff
[SPARK-8889] [CORE] Fix for OOM for graph creation
rekhajoshm Aug 19, 2015
e05da5c
[SPARK-10107] [SQL] fix NPE in format_number
Aug 19, 2015
0888736
[SPARK-10073] [SQL] Python withColumn should replace the old column
Aug 19, 2015
21bdbe9
[SPARK-9627] [SQL] Stops using Scala runtime reflection in Dictionary…
liancheng Aug 19, 2015
1f4c4fe
[SPARK-10090] [SQL] fix decimal scale of division
Aug 19, 2015
f3ff4c4
[SPARK-9899] [SQL] Disables customized output committer when speculat…
liancheng Aug 19, 2015
373a376
[SPARK-10083] [SQL] CaseWhen should support type coercion of DecimalT…
adrian-wang Aug 19, 2015
e0dd130
[SPARK-10119] [CORE] Fix isDynamicAllocationEnabled when config is ex…
Aug 19, 2015
b0dbaec
[SPARK-6489] [SQL] add column pruning for Generate
cloud-fan Aug 19, 2015
8e0a072
[SPARK-9895] User Guide for RFormula Feature Transformer
ericl Aug 19, 2015
ba5f7e1
[SPARK-10035] [SQL] Parquet filters does not process EqualNullSafe fi…
HyukjinKwon Aug 20, 2015
2f2686a
[SPARK-9242] [SQL] Audit UDAF interface.
rxin Aug 20, 2015
1f29d50
[SPARK-9812] [STREAMING] Fix Python 3 compatibility issue in PySpark …
zsxwing Aug 20, 2015
affc8a8
[SPARK-10125] [STREAMING] Fix a potential deadlock in JobGenerator.stop
zsxwing Aug 20, 2015
73431d8
[SPARK-10124] [MESOS] Fix removing queued driver in mesos cluster mode.
tnachen Aug 20, 2015
b762f99
[SPARK-10128] [STREAMING] Used correct classloader to deserialize WAL…
tdas Aug 20, 2015
43e0135
[SPARK-10092] [SQL] Multi-DB support follow up.
yhuai Aug 20, 2015
b4f4e91
[SPARK-10100] [SQL] Eliminate hash table lookup if there is no groupi…
rxin Aug 20, 2015
52c6053
[MINOR] [SQL] Fix sphinx warnings in PySpark SQL
MechCoder Aug 20, 2015
39e91fe
[SPARK-9982] [SPARKR] SparkR DataFrame fail to return data of Decimal…
ashkurenko Aug 20, 2015
85f9a61
[SPARK-10136] [SQL] Fixes Parquet support for Avro array of primitive…
liancheng Aug 20, 2015
12de348
[SPARK-10126] [PROJECT INFRA] Fix typo in release-build.sh which brok…
JoshRosen Aug 20, 2015
907df2f
[SQL] [MINOR] remove unnecessary class
cloud-fan Aug 20, 2015
2a3d98a
[SPARK-10138] [ML] move setters to MultilayerPerceptronClassifier and…
mengxr Aug 20, 2015
7cfc075
[SPARK-10108] Add since tags to mllib.feature
MechCoder Aug 20, 2015
eaafe13
[SPARK-9245] [MLLIB] LDA topic assignments
jkbradley Aug 20, 2015
afe9f03
[SPARK-9400] [SQL] codegen for StringLocate
tarekbecker Aug 20, 2015
cdd9a2b
[SPARK-10140] [DOC] add target fields to @Since
mengxr Aug 21, 2015
dcfe0c5
[SPARK-9846] [DOCS] User guide for Multilayer Perceptron Classifier
avulanov Aug 21, 2015
bb220f6
[SPARK-10040] [SQL] Use batch insert for JDBC writing
viirya Aug 21, 2015
708036c
[SPARK-9439] [YARN] External shuffle service robust to NM restarts us…
squito Aug 21, 2015
3c462f5
[SPARK-10130] [SQL] type coercion for IF should have children resolve…
adrian-wang Aug 21, 2015
d89cc38
[SPARK-10122] [PYSPARK] [STREAMING] Fix getOffsetRanges bug in PySpar…
jerryshao Aug 21, 2015
f5b028e
[SPARK-9864] [DOC] [MLlib] [SQL] Replace since in scaladoc to Since a…
MechCoder Aug 21, 2015
e335509
[SPARK-10143] [SQL] Use parquet's block size (row group size) setting…
yhuai Aug 21, 2015
f01c422
[SPARK-10163] [ML] Allow single-category features for GBT models
jkbradley Aug 21, 2015
630a994
[SPARK-9893] User guide with Java test suite for VectorSlicer
yinxusen Aug 21, 2015
46fcb9e
Update programming-guide.md
yosssi Aug 22, 2015
90cb9f0
[SPARK-9401] [SQL] Fully implement code generation for ConcatWs
yjshen Aug 22, 2015
623c675
Update streaming-programming-guide.md
yosssi Aug 23, 2015
670d251
Added yarn-deploy-mode alternative
Aug 9, 2015
40d3b80
Moved Master URLs closer above before the examples
Aug 9, 2015
89d15bf
Removed the addition section
Aug 10, 2015
d2c212a
Added a section for alternative submission. Distinguished from the sh…
Aug 10, 2015
3f25500
Added section for preferred yarn and kept the one with deploy-mode fo…
Aug 12, 2015
0766da6
Moved the Standalone examples together
Aug 12, 2015
46a24d5
Moved Master URLs
Aug 12, 2015
9175807
Added deploy-mode section to YARN submission
Aug 13, 2015
3052c74
Merge branch 'SPARK-9570' of https://github.com/nssalian/spark into S…
Aug 23, 2015
c91073e
Modified Running on YARN doc
Aug 23, 2015
3dc79e2
Modified submitting applications
Aug 23, 2015
67a4255
Removed extra YARN section, there is already a running without --depl…
Aug 23, 2015
a8b67ef
Added --deploy-mode flags to the yarn submission sections
Aug 24, 2015
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
1 change: 1 addition & 0 deletions .rat-excludes
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,4 @@ INDEX
gen-java.*
.*avpr
org.apache.spark.sql.sources.DataSourceRegister
.*parquet
2 changes: 2 additions & 0 deletions R/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,5 +63,7 @@ You can also run the unit-tests for SparkR by running (you need to install the [
The `./bin/spark-submit` and `./bin/sparkR` can also be used to submit jobs to YARN clusters. You will need to set YARN conf dir before doing so. For example on CDH you can run
```
export YARN_CONF_DIR=/etc/hadoop/conf
./bin/spark-submit --master yarn --deploy-mode cluster (or client) examples/src/main/r/dataframe.R
OR
./bin/spark-submit --master yarn examples/src/main/r/dataframe.R
```
5 changes: 0 additions & 5 deletions R/install-dev.bat
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,3 @@ set SPARK_HOME=%~dp0..
MKDIR %SPARK_HOME%\R\lib

R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib" %SPARK_HOME%\R\pkg\

rem Zip the SparkR package so that it can be distributed to worker nodes on YARN
pushd %SPARK_HOME%\R\lib
%JAVA_HOME%\bin\jar.exe cfM "%SPARK_HOME%\R\lib\sparkr.zip" SparkR
popd
3 changes: 2 additions & 1 deletion R/pkg/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: SparkR
Type: Package
Title: R frontend for Spark
Version: 1.4.0
Version: 1.5.0
Date: 2013-09-09
Author: The Apache Software Foundation
Maintainer: Shivaram Venkataraman <[email protected]>
Expand Down Expand Up @@ -29,6 +29,7 @@ Collate:
'client.R'
'context.R'
'deserialize.R'
'functions.R'
'mllib.R'
'serialize.R'
'sparkR.R'
Expand Down
81 changes: 80 additions & 1 deletion R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -84,57 +84,136 @@ exportClasses("Column")

exportMethods("abs",
"acos",
"add_months",
"alias",
"approxCountDistinct",
"asc",
"ascii",
"asin",
"atan",
"atan2",
"avg",
"base64",
"between",
"bin",
"bitwiseNOT",
"cast",
"cbrt",
"ceil",
"ceiling",
"concat",
"concat_ws",
"contains",
"conv",
"cos",
"cosh",
"count",
"countDistinct",
"crc32",
"date_add",
"date_format",
"date_sub",
"datediff",
"dayofmonth",
"dayofyear",
"desc",
"endsWith",
"exp",
"explode",
"expm1",
"expr",
"factorial",
"first",
"floor",
"format_number",
"format_string",
"from_unixtime",
"from_utc_timestamp",
"getField",
"getItem",
"greatest",
"hex",
"hour",
"hypot",
"ifelse",
"initcap",
"instr",
"isNaN",
"isNotNull",
"isNull",
"last",
"last_day",
"least",
"length",
"levenshtein",
"like",
"lit",
"locate",
"log",
"log10",
"log1p",
"log2",
"lower",
"lpad",
"ltrim",
"max",
"md5",
"mean",
"min",
"minute",
"month",
"months_between",
"n",
"n_distinct",
"nanvl",
"negate",
"next_day",
"otherwise",
"pmod",
"quarter",
"rand",
"randn",
"regexp_extract",
"regexp_replace",
"reverse",
"rint",
"rlike",
"round",
"rpad",
"rtrim",
"second",
"sha1",
"sha2",
"shiftLeft",
"shiftRight",
"shiftRightUnsigned",
"sign",
"signum",
"sin",
"sinh",
"size",
"soundex",
"sqrt",
"startsWith",
"substr",
"substring_index",
"sum",
"sumDistinct",
"tan",
"tanh",
"toDegrees",
"toRadians",
"upper")
"to_date",
"to_utc_timestamp",
"translate",
"trim",
"unbase64",
"unhex",
"unix_timestamp",
"upper",
"weekofyear",
"when",
"year")

exportClasses("GroupedData")
exportMethods("agg")
Expand Down
83 changes: 8 additions & 75 deletions R/pkg/R/column.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,6 @@ operators <- list(
)
column_functions1 <- c("asc", "desc", "isNull", "isNotNull")
column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains")
functions <- c("min", "max", "sum", "avg", "mean", "count", "abs", "sqrt",
"first", "last", "lower", "upper", "sumDistinct",
"acos", "asin", "atan", "cbrt", "ceiling", "cos", "cosh", "exp",
"expm1", "floor", "log", "log10", "log1p", "rint", "sign",
"sin", "sinh", "tan", "tanh", "toDegrees", "toRadians")
binary_mathfunctions <- c("atan2", "hypot")

createOperator <- function(op) {
setMethod(op,
Expand Down Expand Up @@ -111,33 +105,6 @@ createColumnFunction2 <- function(name) {
})
}

createStaticFunction <- function(name) {
setMethod(name,
signature(x = "Column"),
function(x) {
if (name == "ceiling") {
name <- "ceil"
}
if (name == "sign") {
name <- "signum"
}
jc <- callJStatic("org.apache.spark.sql.functions", name, x@jc)
column(jc)
})
}

createBinaryMathfunctions <- function(name) {
setMethod(name,
signature(y = "Column"),
function(y, x) {
if (class(x) == "Column") {
x <- x@jc
}
jc <- callJStatic("org.apache.spark.sql.functions", name, y@jc, x)
column(jc)
})
}

createMethods <- function() {
for (op in names(operators)) {
createOperator(op)
Expand All @@ -148,12 +115,6 @@ createMethods <- function() {
for (name in column_functions2) {
createColumnFunction2(name)
}
for (x in functions) {
createStaticFunction(x)
}
for (name in binary_mathfunctions) {
createBinaryMathfunctions(name)
}
}

createMethods()
Expand Down Expand Up @@ -243,44 +204,16 @@ setMethod("%in%",
return(column(jc))
})

#' Approx Count Distinct
#' otherwise
#'
#' @rdname column
#' @return the approximate number of distinct items in a group.
setMethod("approxCountDistinct",
signature(x = "Column"),
function(x, rsd = 0.95) {
jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc, rsd)
column(jc)
})

#' Count Distinct
#' If values in the specified column are null, returns the value.
#' Can be used in conjunction with `when` to specify a default value for expressions.
#'
#' @rdname column
#' @return the number of distinct items in a group.
setMethod("countDistinct",
signature(x = "Column"),
function(x, ...) {
jcol <- lapply(list(...), function (x) {
x@jc
})
jc <- callJStatic("org.apache.spark.sql.functions", "countDistinct", x@jc,
listToSeq(jcol))
setMethod("otherwise",
signature(x = "Column", value = "ANY"),
function(x, value) {
value <- ifelse(class(value) == "Column", value@jc, value)
jc <- callJMethod(x@jc, "otherwise", value)
column(jc)
})

#' @rdname column
#' @aliases countDistinct
setMethod("n_distinct",
signature(x = "Column"),
function(x, ...) {
countDistinct(x, ...)
})

#' @rdname column
#' @aliases count
setMethod("n",
signature(x = "Column"),
function(x) {
count(x)
})
16 changes: 10 additions & 6 deletions R/pkg/R/deserialize.R
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,14 @@ readRow <- function(inputCon) {

# Take a single column as Array[Byte] and deserialize it into an atomic vector
readCol <- function(inputCon, numRows) {
# sapply can not work with POSIXlt
do.call(c, lapply(1:numRows, function(x) {
value <- readObject(inputCon)
# Replace NULL with NA so we can coerce to vectors
if (is.null(value)) NA else value
}))
if (numRows > 0) {
# sapply can not work with POSIXlt
do.call(c, lapply(1:numRows, function(x) {
value <- readObject(inputCon)
# Replace NULL with NA so we can coerce to vectors
if (is.null(value)) NA else value
}))
} else {
vector()
}
}
Loading