Skip to content

Commit 8c6bb70

Browse files
committed
upstream changes
2 parents 53be837 + 5f686cc commit 8c6bb70

File tree

281 files changed

+11071
-3043
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

281 files changed

+11071
-3043
lines changed

.rat-excludes

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,16 @@ graphlib-dot.min.js
2525
sorttable.js
2626
vis.min.js
2727
vis.min.css
28+
dataTables.bootstrap.css
29+
dataTables.bootstrap.min.js
30+
dataTables.rowsGroup.js
31+
jquery.blockUI.min.js
32+
jquery.cookies.2.2.0.min.js
33+
jquery.dataTables.1.10.4.min.css
34+
jquery.dataTables.1.10.4.min.js
35+
jquery.mustache.js
36+
jsonFormatter.min.css
37+
jsonFormatter.min.js
2838
.*avsc
2939
.*txt
3040
.*json

LICENSE

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,3 +291,9 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
291291
(MIT License) dagre-d3 (https://github.com/cpettitt/dagre-d3)
292292
(MIT License) sorttable (https://github.com/stuartlangridge/sorttable)
293293
(MIT License) boto (https://github.com/boto/boto/blob/develop/LICENSE)
294+
(MIT License) datatables (http://datatables.net/license)
295+
(MIT License) mustache (https://github.com/mustache/mustache/blob/master/LICENSE)
296+
(MIT License) cookies (http://code.google.com/p/cookies/wiki/License)
297+
(MIT License) blockUI (http://jquery.malsup.com/block/)
298+
(MIT License) RowsGroup (http://datatables.net/license/mit)
299+
(MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html)

NOTICE

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,3 +650,19 @@ For CSV functionality:
650650
*/
651651

652652

653+
===============================================================================
654+
For dev/sparktestsupport/toposort.py:
655+
656+
Copyright 2014 True Blade Systems, Inc.
657+
658+
Licensed under the Apache License, Version 2.0 (the "License");
659+
you may not use this file except in compliance with the License.
660+
You may obtain a copy of the License at
661+
662+
http://www.apache.org/licenses/LICENSE-2.0
663+
664+
Unless required by applicable law or agreed to in writing, software
665+
distributed under the License is distributed on an "AS IS" BASIS,
666+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
667+
See the License for the specific language governing permissions and
668+
limitations under the License.

R/pkg/NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ exportMethods("arrange",
3535
"count",
3636
"cov",
3737
"corr",
38+
"covar_samp",
39+
"covar_pop",
3840
"crosstab",
3941
"describe",
4042
"dim",

R/pkg/R/functions.R

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,64 @@ setMethod("corr", signature(x = "Column"),
275275
column(jc)
276276
})
277277

278+
#' cov
279+
#'
280+
#' Compute the sample covariance between two expressions.
281+
#'
282+
#' @rdname cov
283+
#' @name cov
284+
#' @family math_funcs
285+
#' @export
286+
#' @examples
287+
#' \dontrun{
288+
#' cov(df$c, df$d)
289+
#' cov("c", "d")
290+
#' covar_samp(df$c, df$d)
291+
#' covar_samp("c", "d")
292+
#' }
293+
setMethod("cov", signature(x = "characterOrColumn"),
294+
function(x, col2) {
295+
stopifnot(is(class(col2), "characterOrColumn"))
296+
covar_samp(x, col2)
297+
})
298+
299+
#' @rdname cov
300+
#' @name covar_samp
301+
setMethod("covar_samp", signature(col1 = "characterOrColumn", col2 = "characterOrColumn"),
302+
function(col1, col2) {
303+
stopifnot(class(col1) == class(col2))
304+
if (class(col1) == "Column") {
305+
col1 <- col1@jc
306+
col2 <- col2@jc
307+
}
308+
jc <- callJStatic("org.apache.spark.sql.functions", "covar_samp", col1, col2)
309+
column(jc)
310+
})
311+
312+
#' covar_pop
313+
#'
314+
#' Compute the population covariance between two expressions.
315+
#'
316+
#' @rdname covar_pop
317+
#' @name covar_pop
318+
#' @family math_funcs
319+
#' @export
320+
#' @examples
321+
#' \dontrun{
322+
#' covar_pop(df$c, df$d)
323+
#' covar_pop("c", "d")
324+
#' }
325+
setMethod("covar_pop", signature(col1 = "characterOrColumn", col2 = "characterOrColumn"),
326+
function(col1, col2) {
327+
stopifnot(class(col1) == class(col2))
328+
if (class(col1) == "Column") {
329+
col1 <- col1@jc
330+
col2 <- col2@jc
331+
}
332+
jc <- callJStatic("org.apache.spark.sql.functions", "covar_pop", col1, col2)
333+
column(jc)
334+
})
335+
278336
#' cos
279337
#'
280338
#' Computes the cosine of the given value.

R/pkg/R/generics.R

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,12 +418,20 @@ setGeneric("columns", function(x) {standardGeneric("columns") })
418418

419419
#' @rdname statfunctions
420420
#' @export
421-
setGeneric("cov", function(x, col1, col2) {standardGeneric("cov") })
421+
setGeneric("cov", function(x, ...) {standardGeneric("cov") })
422422

423423
#' @rdname statfunctions
424424
#' @export
425425
setGeneric("corr", function(x, ...) {standardGeneric("corr") })
426426

427+
#' @rdname statfunctions
428+
#' @export
429+
setGeneric("covar_samp", function(col1, col2) {standardGeneric("covar_samp") })
430+
431+
#' @rdname statfunctions
432+
#' @export
433+
setGeneric("covar_pop", function(col1, col2) {standardGeneric("covar_pop") })
434+
427435
#' @rdname summary
428436
#' @export
429437
setGeneric("describe", function(x, col, ...) { standardGeneric("describe") })

R/pkg/R/stats.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,9 @@ setMethod("crosstab",
6666
#' cov <- cov(df, "title", "gender")
6767
#' }
6868
setMethod("cov",
69-
signature(x = "DataFrame", col1 = "character", col2 = "character"),
69+
signature(x = "DataFrame"),
7070
function(x, col1, col2) {
71+
stopifnot(class(col1) == "character" && class(col2) == "character")
7172
statFunctions <- callJMethod(x@sdf, "stat")
7273
callJMethod(statFunctions, "cov", col1, col2)
7374
})

R/pkg/inst/tests/testthat/test_sparkSQL.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -996,6 +996,8 @@ test_that("column functions", {
996996
c14 <- cume_dist() + ntile(1) + corr(c, c1)
997997
c15 <- dense_rank() + percent_rank() + rank() + row_number()
998998
c16 <- is.nan(c) + isnan(c) + isNaN(c)
999+
c17 <- cov(c, c1) + cov("c", "c1") + covar_samp(c, c1) + covar_samp("c", "c1")
1000+
c18 <- covar_pop(c, c1) + covar_pop("c", "c1")
9991001

10001002
# Test if base::is.nan() is exposed
10011003
expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))

common/sketch/pom.xml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,13 @@
3535
<sbt.project.name>sketch</sbt.project.name>
3636
</properties>
3737

38+
<dependencies>
39+
<dependency>
40+
<groupId>org.apache.spark</groupId>
41+
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
42+
</dependency>
43+
</dependencies>
44+
3845
<build>
3946
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
4047
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.util.sketch;
19+
20+
import java.io.DataInputStream;
21+
import java.io.DataOutputStream;
22+
import java.io.IOException;
23+
import java.util.Arrays;
24+
25+
public final class BitArray {
26+
private final long[] data;
27+
private long bitCount;
28+
29+
static int numWords(long numBits) {
30+
if (numBits <= 0) {
31+
throw new IllegalArgumentException("numBits must be positive, but got " + numBits);
32+
}
33+
long numWords = (long) Math.ceil(numBits / 64.0);
34+
if (numWords > Integer.MAX_VALUE) {
35+
throw new IllegalArgumentException("Can't allocate enough space for " + numBits + " bits");
36+
}
37+
return (int) numWords;
38+
}
39+
40+
BitArray(long numBits) {
41+
this(new long[numWords(numBits)]);
42+
}
43+
44+
private BitArray(long[] data) {
45+
this.data = data;
46+
long bitCount = 0;
47+
for (long word : data) {
48+
bitCount += Long.bitCount(word);
49+
}
50+
this.bitCount = bitCount;
51+
}
52+
53+
/** Returns true if the bit changed value. */
54+
boolean set(long index) {
55+
if (!get(index)) {
56+
data[(int) (index >>> 6)] |= (1L << index);
57+
bitCount++;
58+
return true;
59+
}
60+
return false;
61+
}
62+
63+
boolean get(long index) {
64+
return (data[(int) (index >>> 6)] & (1L << index)) != 0;
65+
}
66+
67+
/** Number of bits */
68+
long bitSize() {
69+
return (long) data.length * Long.SIZE;
70+
}
71+
72+
/** Number of set bits (1s) */
73+
long cardinality() {
74+
return bitCount;
75+
}
76+
77+
/** Combines the two BitArrays using bitwise OR. */
78+
void putAll(BitArray array) {
79+
assert data.length == array.data.length : "BitArrays must be of equal length when merging";
80+
long bitCount = 0;
81+
for (int i = 0; i < data.length; i++) {
82+
data[i] |= array.data[i];
83+
bitCount += Long.bitCount(data[i]);
84+
}
85+
this.bitCount = bitCount;
86+
}
87+
88+
void writeTo(DataOutputStream out) throws IOException {
89+
out.writeInt(data.length);
90+
for (long datum : data) {
91+
out.writeLong(datum);
92+
}
93+
}
94+
95+
static BitArray readFrom(DataInputStream in) throws IOException {
96+
int numWords = in.readInt();
97+
long[] data = new long[numWords];
98+
for (int i = 0; i < numWords; i++) {
99+
data[i] = in.readLong();
100+
}
101+
return new BitArray(data);
102+
}
103+
104+
@Override
105+
public boolean equals(Object other) {
106+
if (this == other) return true;
107+
if (other == null || !(other instanceof BitArray)) return false;
108+
BitArray that = (BitArray) other;
109+
return Arrays.equals(data, that.data);
110+
}
111+
112+
@Override
113+
public int hashCode() {
114+
return Arrays.hashCode(data);
115+
}
116+
}

0 commit comments

Comments
 (0)