d2iq-archive · mgummelt · Jan 25, 2016 · Jan 25, 2016 · Jan 25, 2016 · Jan 25, 2016
diff --git a/.rat-excludes b/.rat-excludes
@@ -25,6 +25,16 @@ graphlib-dot.min.js
 sorttable.js
 vis.min.js
 vis.min.css
+dataTables.bootstrap.css
+dataTables.bootstrap.min.js
+dataTables.rowsGroup.js
+jquery.blockUI.min.js
+jquery.cookies.2.2.0.min.js
+jquery.dataTables.1.10.4.min.css
+jquery.dataTables.1.10.4.min.js
+jquery.mustache.js
+jsonFormatter.min.css
+jsonFormatter.min.js
 .*avsc
 .*txt
 .*json
@@ -63,12 +73,12 @@ logs
 .*dependency-reduced-pom.xml
 known_translations
 json_expectation
-local-1422981759269/*
-local-1422981780767/*
-local-1425081759269/*
-local-1426533911241/*
-local-1426633911242/*
-local-1430917381534/*
+local-1422981759269
+local-1422981780767
+local-1425081759269
+local-1426533911241
+local-1426633911242
+local-1430917381534
 local-1430917381535_1
 local-1430917381535_2
 DESCRIPTION

diff --git a/LICENSE b/LICENSE
@@ -291,3 +291,9 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
      (MIT License) dagre-d3 (https://github.com/cpettitt/dagre-d3)
      (MIT License) sorttable (https://github.com/stuartlangridge/sorttable)
      (MIT License) boto (https://github.com/boto/boto/blob/develop/LICENSE)
+     (MIT License) datatables (http://datatables.net/license)
+     (MIT License) mustache (https://github.com/mustache/mustache/blob/master/LICENSE)
+     (MIT License) cookies (http://code.google.com/p/cookies/wiki/License)
+     (MIT License) blockUI (http://jquery.malsup.com/block/)
+     (MIT License) RowsGroup (http://datatables.net/license/mit)
+     (MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html)
diff --git a/NOTICE b/NOTICE
@@ -650,3 +650,19 @@ For CSV functionality:
  */
 
 
+===============================================================================
+For dev/sparktestsupport/toposort.py:
+
+Copyright 2014 True Blade Systems, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
@@ -35,6 +35,8 @@ exportMethods("arrange",
               "count",
               "cov",
               "corr",
+              "covar_samp",
+              "covar_pop",
               "crosstab",
               "describe",
               "dim",

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
@@ -275,6 +275,64 @@ setMethod("corr", signature(x = "Column"),
             column(jc)
           })
 
+#' cov
+#'
+#' Compute the sample covariance between two expressions.
+#'
+#' @rdname cov
+#' @name cov
+#' @family math_funcs
+#' @export
+#' @examples
+#' \dontrun{
+#' cov(df$c, df$d)
+#' cov("c", "d")
+#' covar_samp(df$c, df$d)
+#' covar_samp("c", "d")
+#' }
+setMethod("cov", signature(x = "characterOrColumn"),
+          function(x, col2) {
+            stopifnot(is(class(col2), "characterOrColumn"))
+            covar_samp(x, col2)
+          })
+
+#' @rdname cov
+#' @name covar_samp
+setMethod("covar_samp", signature(col1 = "characterOrColumn", col2 = "characterOrColumn"),
+          function(col1, col2) {
+            stopifnot(class(col1) == class(col2))
+            if (class(col1) == "Column") {
+              col1 <- col1@jc
+              col2 <- col2@jc
+            }
+            jc <- callJStatic("org.apache.spark.sql.functions", "covar_samp", col1, col2)
+            column(jc)
+          })
+
+#' covar_pop
+#'
+#' Compute the population covariance between two expressions.
+#'
+#' @rdname covar_pop
+#' @name covar_pop
+#' @family math_funcs
+#' @export
+#' @examples
+#' \dontrun{
+#' covar_pop(df$c, df$d)
+#' covar_pop("c", "d")
+#' }
+setMethod("covar_pop", signature(col1 = "characterOrColumn", col2 = "characterOrColumn"),
+          function(col1, col2) {
+            stopifnot(class(col1) == class(col2))
+            if (class(col1) == "Column") {
+              col1 <- col1@jc
+              col2 <- col2@jc
+            }
+            jc <- callJStatic("org.apache.spark.sql.functions", "covar_pop", col1, col2)
+            column(jc)
+          })
+
 #' cos
 #'
 #' Computes the cosine of the given value.

diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
@@ -418,12 +418,20 @@ setGeneric("columns", function(x) {standardGeneric("columns") })
 
 #' @rdname statfunctions
 #' @export
-setGeneric("cov", function(x, col1, col2) {standardGeneric("cov") })
+setGeneric("cov", function(x, ...) {standardGeneric("cov") })
 
 #' @rdname statfunctions
 #' @export
 setGeneric("corr", function(x, ...) {standardGeneric("corr") })
 
+#' @rdname statfunctions
+#' @export
+setGeneric("covar_samp", function(col1, col2) {standardGeneric("covar_samp") })
+
+#' @rdname statfunctions
+#' @export
+setGeneric("covar_pop", function(col1, col2) {standardGeneric("covar_pop") })
+
 #' @rdname summary
 #' @export
 setGeneric("describe", function(x, col, ...) { standardGeneric("describe") })

diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
@@ -66,8 +66,9 @@ setMethod("crosstab",
 #' cov <- cov(df, "title", "gender")
 #' }
 setMethod("cov",
-          signature(x = "DataFrame", col1 = "character", col2 = "character"),
+          signature(x = "DataFrame"),
           function(x, col1, col2) {
+            stopifnot(class(col1) == "character" && class(col2) == "character")
             statFunctions <- callJMethod(x@sdf, "stat")
             callJMethod(statFunctions, "cov", col1, col2)
           })

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -996,6 +996,8 @@ test_that("column functions", {
   c14 <- cume_dist() + ntile(1) + corr(c, c1)
   c15 <- dense_rank() + percent_rank() + rank() + row_number()
   c16 <- is.nan(c) + isnan(c) + isNaN(c)
+  c17 <- cov(c, c1) + cov("c", "c1") + covar_samp(c, c1) + covar_samp("c", "c1")
+  c18 <- covar_pop(c, c1) + covar_pop("c", "c1")
 
   # Test if base::is.nan() is exposed
   expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))

diff --git a/assembly/pom.xml b/assembly/pom.xml
@@ -20,13 +20,13 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.10</artifactId>
+    <artifactId>spark-parent_2.11</artifactId>
     <version>2.0.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-assembly_2.10</artifactId>
+  <artifactId>spark-assembly_2.11</artifactId>
   <name>Spark Project Assembly</name>
   <url>http://spark.apache.org/</url>
   <packaging>pom</packaging>

diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
@@ -21,20 +21,27 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.10</artifactId>
+    <artifactId>spark-parent_2.11</artifactId>
     <version>2.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-sketch_2.10</artifactId>
+  <artifactId>spark-sketch_2.11</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Sketch</name>
   <url>http://spark.apache.org/</url>
   <properties>
     <sbt.project.name>sketch</sbt.project.name>
   </properties>
 
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-test-tags_${scala.binary.version}</artifactId>
+    </dependency>
+  </dependencies>
+
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>

diff --git a/common/sketch/src/main/java/org/apache/spark/util/sketch/BitArray.java b/common/sketch/src/main/java/org/apache/spark/util/sketch/BitArray.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.sketch;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.Arrays;
+
+final class BitArray {
+  private final long[] data;
+  private long bitCount;
+
+  static int numWords(long numBits) {
+    if (numBits <= 0) {
+      throw new IllegalArgumentException("numBits must be positive, but got " + numBits);
+    }
+    long numWords = (long) Math.ceil(numBits / 64.0);
+    if (numWords > Integer.MAX_VALUE) {
+      throw new IllegalArgumentException("Can't allocate enough space for " + numBits + " bits");
+    }
+    return (int) numWords;
+  }
+
+  BitArray(long numBits) {
+    this(new long[numWords(numBits)]);
+  }
+
+  private BitArray(long[] data) {
+    this.data = data;
+    long bitCount = 0;
+    for (long word : data) {
+      bitCount += Long.bitCount(word);
+    }
+    this.bitCount = bitCount;
+  }
+
+  /** Returns true if the bit changed value. */
+  boolean set(long index) {
+    if (!get(index)) {
+      data[(int) (index >>> 6)] |= (1L << index);
+      bitCount++;
+      return true;
+    }
+    return false;
+  }
+
+  boolean get(long index) {
+    return (data[(int) (index >>> 6)] & (1L << index)) != 0;
+  }
+
+  /** Number of bits */
+  long bitSize() {
+    return (long) data.length * Long.SIZE;
+  }
+
+  /** Number of set bits (1s) */
+  long cardinality() {
+    return bitCount;
+  }
+
+  /** Combines the two BitArrays using bitwise OR. */
+  void putAll(BitArray array) {
+    assert data.length == array.data.length : "BitArrays must be of equal length when merging";
+    long bitCount = 0;
+    for (int i = 0; i < data.length; i++) {
+      data[i] |= array.data[i];
+      bitCount += Long.bitCount(data[i]);
+    }
+    this.bitCount = bitCount;
+  }
+
+  void writeTo(DataOutputStream out) throws IOException {
+    out.writeInt(data.length);
+    for (long datum : data) {
+      out.writeLong(datum);
+    }
+  }
+
+  static BitArray readFrom(DataInputStream in) throws IOException {
+    int numWords = in.readInt();
+    long[] data = new long[numWords];
+    for (int i = 0; i < numWords; i++) {
+      data[i] = in.readLong();
+    }
+    return new BitArray(data);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (this == other) return true;
+    if (other == null || !(other instanceof BitArray)) return false;
+    BitArray that = (BitArray) other;
+    return Arrays.equals(data, that.data);
+  }
+
+  @Override
+  public int hashCode() {
+    return Arrays.hashCode(data);
+  }
+}