Skip to content

Commit 574000d

Browse files
author
Thomas Graves
committed
Merge remote-tracking branch 'upstream/master' into SPARK-11701
Conflicts: core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
2 parents 2ab1c90 + 87abcf7 commit 574000d

File tree

206 files changed

+7707
-2136
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

206 files changed

+7707
-2136
lines changed

NOTICE

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,3 +650,19 @@ For CSV functionality:
650650
*/
651651

652652

653+
===============================================================================
654+
For dev/sparktestsupport/toposort.py:
655+
656+
Copyright 2014 True Blade Systems, Inc.
657+
658+
Licensed under the Apache License, Version 2.0 (the "License");
659+
you may not use this file except in compliance with the License.
660+
You may obtain a copy of the License at
661+
662+
http://www.apache.org/licenses/LICENSE-2.0
663+
664+
Unless required by applicable law or agreed to in writing, software
665+
distributed under the License is distributed on an "AS IS" BASIS,
666+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
667+
See the License for the specific language governing permissions and
668+
limitations under the License.

R/pkg/NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ exportMethods("arrange",
3535
"count",
3636
"cov",
3737
"corr",
38+
"covar_samp",
39+
"covar_pop",
3840
"crosstab",
3941
"describe",
4042
"dim",

R/pkg/R/functions.R

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,64 @@ setMethod("corr", signature(x = "Column"),
275275
column(jc)
276276
})
277277

278+
#' cov
279+
#'
280+
#' Compute the sample covariance between two expressions.
281+
#'
282+
#' @rdname cov
283+
#' @name cov
284+
#' @family math_funcs
285+
#' @export
286+
#' @examples
287+
#' \dontrun{
288+
#' cov(df$c, df$d)
289+
#' cov("c", "d")
290+
#' covar_samp(df$c, df$d)
291+
#' covar_samp("c", "d")
292+
#' }
293+
setMethod("cov", signature(x = "characterOrColumn"),
294+
function(x, col2) {
295+
stopifnot(is(class(col2), "characterOrColumn"))
296+
covar_samp(x, col2)
297+
})
298+
299+
#' @rdname cov
300+
#' @name covar_samp
301+
setMethod("covar_samp", signature(col1 = "characterOrColumn", col2 = "characterOrColumn"),
302+
function(col1, col2) {
303+
stopifnot(class(col1) == class(col2))
304+
if (class(col1) == "Column") {
305+
col1 <- col1@jc
306+
col2 <- col2@jc
307+
}
308+
jc <- callJStatic("org.apache.spark.sql.functions", "covar_samp", col1, col2)
309+
column(jc)
310+
})
311+
312+
#' covar_pop
313+
#'
314+
#' Compute the population covariance between two expressions.
315+
#'
316+
#' @rdname covar_pop
317+
#' @name covar_pop
318+
#' @family math_funcs
319+
#' @export
320+
#' @examples
321+
#' \dontrun{
322+
#' covar_pop(df$c, df$d)
323+
#' covar_pop("c", "d")
324+
#' }
325+
setMethod("covar_pop", signature(col1 = "characterOrColumn", col2 = "characterOrColumn"),
326+
function(col1, col2) {
327+
stopifnot(class(col1) == class(col2))
328+
if (class(col1) == "Column") {
329+
col1 <- col1@jc
330+
col2 <- col2@jc
331+
}
332+
jc <- callJStatic("org.apache.spark.sql.functions", "covar_pop", col1, col2)
333+
column(jc)
334+
})
335+
278336
#' cos
279337
#'
280338
#' Computes the cosine of the given value.

R/pkg/R/generics.R

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,12 +418,20 @@ setGeneric("columns", function(x) {standardGeneric("columns") })
418418

419419
#' @rdname statfunctions
420420
#' @export
421-
setGeneric("cov", function(x, col1, col2) {standardGeneric("cov") })
421+
setGeneric("cov", function(x, ...) {standardGeneric("cov") })
422422

423423
#' @rdname statfunctions
424424
#' @export
425425
setGeneric("corr", function(x, ...) {standardGeneric("corr") })
426426

427+
#' @rdname statfunctions
428+
#' @export
429+
setGeneric("covar_samp", function(col1, col2) {standardGeneric("covar_samp") })
430+
431+
#' @rdname statfunctions
432+
#' @export
433+
setGeneric("covar_pop", function(col1, col2) {standardGeneric("covar_pop") })
434+
427435
#' @rdname summary
428436
#' @export
429437
setGeneric("describe", function(x, col, ...) { standardGeneric("describe") })

R/pkg/R/stats.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,9 @@ setMethod("crosstab",
6666
#' cov <- cov(df, "title", "gender")
6767
#' }
6868
setMethod("cov",
69-
signature(x = "DataFrame", col1 = "character", col2 = "character"),
69+
signature(x = "DataFrame"),
7070
function(x, col1, col2) {
71+
stopifnot(class(col1) == "character" && class(col2) == "character")
7172
statFunctions <- callJMethod(x@sdf, "stat")
7273
callJMethod(statFunctions, "cov", col1, col2)
7374
})

R/pkg/inst/tests/testthat/test_sparkSQL.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -996,6 +996,8 @@ test_that("column functions", {
996996
c14 <- cume_dist() + ntile(1) + corr(c, c1)
997997
c15 <- dense_rank() + percent_rank() + rank() + row_number()
998998
c16 <- is.nan(c) + isnan(c) + isNaN(c)
999+
c17 <- cov(c, c1) + cov("c", "c1") + covar_samp(c, c1) + covar_samp("c", "c1")
1000+
c18 <- covar_pop(c, c1) + covar_pop("c", "c1")
9991001

10001002
# Test if base::is.nan() is exposed
10011003
expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.util.sketch;
19+
20+
import java.io.DataInputStream;
21+
import java.io.DataOutputStream;
22+
import java.io.IOException;
23+
import java.util.Arrays;
24+
25+
public final class BitArray {
26+
private final long[] data;
27+
private long bitCount;
28+
29+
static int numWords(long numBits) {
30+
if (numBits <= 0) {
31+
throw new IllegalArgumentException("numBits must be positive, but got " + numBits);
32+
}
33+
long numWords = (long) Math.ceil(numBits / 64.0);
34+
if (numWords > Integer.MAX_VALUE) {
35+
throw new IllegalArgumentException("Can't allocate enough space for " + numBits + " bits");
36+
}
37+
return (int) numWords;
38+
}
39+
40+
BitArray(long numBits) {
41+
this(new long[numWords(numBits)]);
42+
}
43+
44+
private BitArray(long[] data) {
45+
this.data = data;
46+
long bitCount = 0;
47+
for (long word : data) {
48+
bitCount += Long.bitCount(word);
49+
}
50+
this.bitCount = bitCount;
51+
}
52+
53+
/** Returns true if the bit changed value. */
54+
boolean set(long index) {
55+
if (!get(index)) {
56+
data[(int) (index >>> 6)] |= (1L << index);
57+
bitCount++;
58+
return true;
59+
}
60+
return false;
61+
}
62+
63+
boolean get(long index) {
64+
return (data[(int) (index >>> 6)] & (1L << index)) != 0;
65+
}
66+
67+
/** Number of bits */
68+
long bitSize() {
69+
return (long) data.length * Long.SIZE;
70+
}
71+
72+
/** Number of set bits (1s) */
73+
long cardinality() {
74+
return bitCount;
75+
}
76+
77+
/** Combines the two BitArrays using bitwise OR. */
78+
void putAll(BitArray array) {
79+
assert data.length == array.data.length : "BitArrays must be of equal length when merging";
80+
long bitCount = 0;
81+
for (int i = 0; i < data.length; i++) {
82+
data[i] |= array.data[i];
83+
bitCount += Long.bitCount(data[i]);
84+
}
85+
this.bitCount = bitCount;
86+
}
87+
88+
void writeTo(DataOutputStream out) throws IOException {
89+
out.writeInt(data.length);
90+
for (long datum : data) {
91+
out.writeLong(datum);
92+
}
93+
}
94+
95+
static BitArray readFrom(DataInputStream in) throws IOException {
96+
int numWords = in.readInt();
97+
long[] data = new long[numWords];
98+
for (int i = 0; i < numWords; i++) {
99+
data[i] = in.readLong();
100+
}
101+
return new BitArray(data);
102+
}
103+
104+
@Override
105+
public boolean equals(Object other) {
106+
if (this == other) return true;
107+
if (other == null || !(other instanceof BitArray)) return false;
108+
BitArray that = (BitArray) other;
109+
return Arrays.equals(data, that.data);
110+
}
111+
112+
@Override
113+
public int hashCode() {
114+
return Arrays.hashCode(data);
115+
}
116+
}

0 commit comments

Comments
 (0)