Skip to content
This repository was archived by the owner on Oct 23, 2024. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
131 commits
Select commit Hold shift + click to select a range
dd2325d
[SPARK-11965][ML][DOC] Update user guide for RFormula feature interac…
yanboliang Jan 25, 2016
ef8fb36
Closes #10879
mengxr Jan 25, 2016
c037d25
[SPARK-12149][WEB UI] Executor UI improvement suggestions - Color UI
ajbozarth Jan 25, 2016
7d877c3
[SPARK-12902] [SQL] visualization for generated operators
Jan 25, 2016
00026fa
[SPARK-12901][SQL][HOT-FIX] Fix scala 2.11 compilation.
yhuai Jan 25, 2016
9348431
[SPARK-12975][SQL] Throwing Exception when Bucketing Columns are part…
gatorsmile Jan 25, 2016
dcae355
[SPARK-12905][ML][PYSPARK] PCAModel return eigenvalues for PySpark
yanboliang Jan 25, 2016
6f0f1d9
[SPARK-12934][SQL] Count-min sketch serialization
liancheng Jan 25, 2016
be375fc
[SPARK-12879] [SQL] improve the unsafe row writing framework
cloud-fan Jan 26, 2016
109061f
[SPARK-12936][SQL] Initial bloom filter implementation
cloud-fan Jan 26, 2016
fdcc351
[SPARK-12934] use try-with-resources for streams
tedyu Jan 26, 2016
b66afde
[SPARK-11922][PYSPARK][ML] Python api for ml.feature.quantile discret…
holdenk Jan 26, 2016
ae47ba7
[SPARK-12834] Change ser/de of JavaArray and JavaList
yinxusen Jan 26, 2016
27c910f
[SPARK-10086][MLLIB][STREAMING][PYSPARK] ignore StreamingKMeans test …
mengxr Jan 26, 2016
d54cfed
[SQL][MINOR] A few minor tweaks to CSV reader.
rxin Jan 26, 2016
6743de3
[SPARK-12937][SQL] bloom filter serialization
cloud-fan Jan 26, 2016
5936bf9
[SPARK-12961][CORE] Prevent snappy-java memory leak
viirya Jan 26, 2016
649e9d0
[SPARK-3369][CORE][STREAMING] Java mapPartitions Iterator->Iterable i…
srowen Jan 26, 2016
ae0309a
[SPARK-10911] Executors should System.exit on clean shutdown.
Jan 26, 2016
08c781c
[SPARK-12682][SQL] Add support for (optionally) not storing tables in…
sameeragarwal Jan 26, 2016
cbd507d
[SPARK-7799][STREAMING][DOCUMENT] Add the linking and deploying instr…
zsxwing Jan 26, 2016
8beab68
[SPARK-11923][ML] Python API for ml.feature.ChiSqSelector
yinxusen Jan 26, 2016
fbf7623
[SPARK-12952] EMLDAOptimizer initialize() should return EMLDAOptimize…
yinxusen Jan 26, 2016
ee74498
[SPARK-8725][PROJECT-INFRA] Test modules in topologically-sorted orde…
JoshRosen Jan 26, 2016
83507fe
[SQL] Minor Scaladoc format fix
liancheng Jan 26, 2016
19fdb21
[SPARK-12993][PYSPARK] Remove usage of ADD_FILES in pyspark
zjffdu Jan 26, 2016
eb91729
[SPARK-10509][PYSPARK] Reduce excessive param boiler plate code
holdenk Jan 26, 2016
22662b2
[SPARK-12614][CORE] Don't throw non fatal exception from ask
zsxwing Jan 27, 2016
1dac964
[SPARK-11622][MLLIB] Make LibSVMRelation extends HadoopFsRelation and…
zjffdu Jan 27, 2016
5551273
[SPARK-12854][SQL] Implement complex types support in ColumnarBatch
nongli Jan 27, 2016
b72611f
[SPARK-7780][MLLIB] intercept in logisticregressionwith lbfgs should …
holdenk Jan 27, 2016
e7f9199
[SPARK-12903][SPARKR] Add covar_samp and covar_pop for SparkR
yanboliang Jan 27, 2016
ce38a35
[SPARK-12935][SQL] DataFrame API for Count-Min Sketch
liancheng Jan 27, 2016
58f5d8c
[SPARK-12728][SQL] Integrates SQL generation with native view
liancheng Jan 27, 2016
bae3c9a
[SPARK-12967][NETTY] Avoid NettyRpc error message during sparkContext…
nishkamravi2 Jan 27, 2016
4db255c
[SPARK-12780] Inconsistency returning value of ML python models' prop…
yinxusen Jan 27, 2016
90b0e56
[SPARK-12983][CORE][DOC] Correct metrics.properties.template
BenFradet Jan 27, 2016
093291c
[SPARK-1680][DOCS] Explain environment variables for running on YARN …
weineran Jan 27, 2016
41f0c85
[SPARK-13023][PROJECT INFRA] Fix handling of root module in modules_t…
JoshRosen Jan 27, 2016
edd4737
[SPARK-10847][SQL][PYSPARK] Pyspark - DataFrame - Optional Metadata w…
jasoncl Jan 27, 2016
87abcf7
[SPARK-12895][SPARK-12896] Migrate TaskMetrics to accumulators
Jan 27, 2016
32f7411
[SPARK-13021][CORE] Fail fast when custom RDDs violate RDD.partition'…
JoshRosen Jan 27, 2016
680afab
[SPARK-12938][SQL] DataFrame API for Bloom filter
cloud-fan Jan 27, 2016
ef96cd3
[SPARK-12865][SPARK-12866][SQL] Migrate SparkSQLParser/ExtendedHiveQl…
hvanhovell Jan 27, 2016
d702f0c
[HOTFIX] Fix Scala 2.11 compilation
Jan 27, 2016
4a09123
[SPARK-13045] [SQL] Remove ColumnVector.Struct in favor of ColumnarBa…
nongli Jan 27, 2016
c220443
Provide same info as in spark-submit --help
jimlohse Jan 28, 2016
415d0a8
[SPARK-12818][SQL] Specialized integral and string types for Count-mi…
liancheng Jan 28, 2016
6768039
[SPARK-12926][SQL] SQLContext to display warning message when non-sql…
tejasapatil Jan 28, 2016
cc18a71
[SPARK-13031] [SQL] cleanup codegen and improve test coverage
Jan 28, 2016
df78a93
[SPARK-9835][ML] Implement IterativelyReweightedLeastSquares solver
yanboliang Jan 28, 2016
abae889
[SPARK-12401][SQL] Add integration tests for postgres enum types
maropu Jan 28, 2016
3a40c0e
[SPARK-12749][SQL] add json option to parse floating-point types as D…
blbradley Jan 28, 2016
4637fc0
[SPARK-11955][SQL] Mark optional fields in merging schema for safely …
viirya Jan 29, 2016
b9dfdcc
Revert "[SPARK-13031] [SQL] cleanup codegen and improve test coverage"
davies Jan 29, 2016
66449b8
[SPARK-12968][SQL] Implement command to set current database
viirya Jan 29, 2016
721ced2
[SPARK-13067] [SQL] workaround for a weird scala reflection problem
cloud-fan Jan 29, 2016
8d3cc3d
[SPARK-13050][BUILD] Scalatest tags fail build with the addition of t…
ajbozarth Jan 29, 2016
55561e7
[SPARK-13031][SQL] cleanup codegen and improve test coverage
Jan 29, 2016
e51b6ea
[SPARK-13032][ML][PYSPARK] PySpark support model export/import and ta…
yanboliang Jan 29, 2016
e4c1162
[SPARK-10873] Support column sort and search for History Server.
Jan 29, 2016
c5f745e
[SPARK-13072] [SQL] simplify and improve murmur3 hash expression codegen
cloud-fan Jan 29, 2016
5f686cc
[SPARK-12656] [SQL] Implement Intersect with Left-semi Join
gatorsmile Jan 29, 2016
2b027e9
[SPARK-12818] Polishes spark-sketch module
liancheng Jan 29, 2016
e38b0ba
[SPARK-13055] SQLHistoryListener throws ClassCastException
Jan 29, 2016
2cbc412
[SPARK-13076][SQL] Rename ClientInterface -> HiveClient
rxin Jan 30, 2016
e6ceac4
[SPARK-13096][TEST] Fix flaky verifyPeakExecutionMemorySet
Jan 30, 2016
70e69fc
[SPARK-13088] Fix DAG viz in latest version of chrome
Jan 30, 2016
12252d1
[SPARK-13071] Coalescing HadoopRDD overwrites existing input metrics
Jan 30, 2016
e6a02c6
[SPARK-12914] [SQL] generate aggregation with grouping keys
Jan 30, 2016
dab246f
[SPARK-13098] [SQL] remove GenericInternalRowWithSchema
cloud-fan Jan 30, 2016
289373b
[SPARK-6363][BUILD] Make Scala 2.11 the default Scala version
JoshRosen Jan 30, 2016
de28371
[SPARK-13100][SQL] improving the performance of stringToDate method i…
Jan 30, 2016
a1303de
[SPARK-13070][SQL] Better error message when Parquet schema merging f…
liancheng Jan 31, 2016
0e6d92d
[SPARK-12689][SQL] Migrate DDL parsing to the newly absorbed parser
viirya Jan 31, 2016
5a8b978
[SPARK-13049] Add First/last with ignore nulls to functions.scala
hvanhovell Jan 31, 2016
c1da4d4
[SPARK-13093] [SQL] improve null check in nullSafeCodeGen for unary, …
cloud-fan Feb 1, 2016
6075573
[SPARK-6847][CORE][STREAMING] Fix stack overflow issue when updateSta…
zsxwing Feb 1, 2016
33c8a49
[SPARK-12989][SQL] Delaying Alias Cleanup after ExtractWindowExpressions
gatorsmile Feb 1, 2016
8f26eb5
[SPARK-12705][SPARK-10777][SQL] Analyzer Rule ResolveSortReferences
gatorsmile Feb 1, 2016
da9146c
[DOCS] Fix the jar location of datanucleus in sql-programming-guid.md
maropu Feb 1, 2016
711ce04
[ML][MINOR] Invalid MulticlassClassification reference in ml-guide
Lewuathe Feb 1, 2016
51b03b7
[SPARK-12463][SPARK-12464][SPARK-12465][SPARK-10647][MESOS] Fix zooke…
tnachen Feb 1, 2016
a41b68b
[SPARK-12265][MESOS] Spark calls System.exit inside driver instead of…
nraychaudhuri Feb 1, 2016
c9b89a0
[SPARK-12979][MESOS] Don’t resolve paths on the local file system in …
dragos Feb 1, 2016
064b029
[SPARK-13043][SQL] Implement remaining catalyst types in ColumnarBatch.
nongli Feb 1, 2016
a2973fe
Fix for [SPARK-12854][SQL] Implement complex types support in Columna…
jaceklaskowski Feb 1, 2016
be7a2fc
[SPARK-13078][SQL] API and test cases for internal catalog
rxin Feb 1, 2016
715a19d
[SPARK-12637][CORE] Print stage info of finished stages properly
srowen Feb 2, 2016
0df3cfb
[SPARK-12790][CORE] Remove HistoryServer old multiple files format
felixcheung Feb 2, 2016
0fff5c6
[SPARK-13130][SQL] Make codegen variable names easier to read
rxin Feb 2, 2016
b8666fd
Closes #10662. Closes #10661
rxin Feb 2, 2016
22ba213
[SPARK-13087][SQL] Fix group by function for sort based aggregation
marmbrus Feb 2, 2016
12a20c1
[SPARK-10820][SQL] Support for the continuous execution of structured…
marmbrus Feb 2, 2016
29d9218
[SPARK-13094][SQL] Add encoders for seq/array of primitives
marmbrus Feb 2, 2016
b938301
[SPARK-13114][SQL] Add a test for tokens more than the fields in schema
HyukjinKwon Feb 2, 2016
cba1d6b
[SPARK-12631][PYSPARK][DOC] PySpark clustering parameter desc to cons…
BryanCutler Feb 2, 2016
358300c
[SPARK-13056][SQL] map column would throw NPE if value is null
adrian-wang Feb 2, 2016
b1835d7
[SPARK-12711][ML] ML StopWordsRemover does not protect itself from co…
grzegorz-chilkiewicz Feb 2, 2016
7f6e3ec
[SPARK-13138][SQL] Add "logical" package prefix for ddl.scala
rxin Feb 2, 2016
be5dd88
[SPARK-12913] [SQL] Improve performance of stat functions
Feb 2, 2016
d0df2ca
[SPARK-13121][STREAMING] java mapWithState mishandles scala Option
sparkyengine Feb 2, 2016
b377b03
[DOCS] Update StructType.scala
swkimme Feb 2, 2016
6de6a97
[SPARK-13150] [SQL] disable two flaky tests
Feb 3, 2016
672032d
[SPARK-13020][SQL][TEST] fix random generator for map type
cloud-fan Feb 3, 2016
21112e8
[SPARK-12992] [SQL] Update parquet reader to support more types when …
nongli Feb 3, 2016
ff71261
[SPARK-13122] Fix race condition in MemoryStore.unrollSafely()
Feb 3, 2016
99a6e3c
[SPARK-12951] [SQL] support spilling in generated aggregate
Feb 3, 2016
0557146
[SPARK-12732][ML] bug fix in linear regression train
iyounus Feb 3, 2016
335f10e
[SPARK-7997][CORE] Add rpcEnv.awaitTermination() back to SparkEnv
zsxwing Feb 3, 2016
e86f8f6
[SPARK-13147] [SQL] improve readability of generated code
Feb 3, 2016
138c300
[SPARK-12957][SQL] Initial support for constraint propagation in Spar…
sameeragarwal Feb 3, 2016
e9eb248
[SPARK-12739][STREAMING] Details of batch in Streaming tab uses two D…
mariobriggs Feb 3, 2016
c4feec2
[SPARK-12798] [SQL] generated BroadcastHashJoin
Feb 3, 2016
9dd2741
[SPARK-13157] [SQL] Support any kind of input for SQL commands.
hvanhovell Feb 3, 2016
3221edd
[SPARK-3611][WEB UI] Show number of cores for each executor in applic…
ajbozarth Feb 3, 2016
915a753
[SPARK-13166][SQL] Remove DataStreamReader/Writer
rxin Feb 4, 2016
de09145
[SPARK-13131] [SQL] Use best and average time in benchmark
Feb 4, 2016
a8e2ba7
[SPARK-13152][CORE] Fix task metrics deprecation warning
holdenk Feb 4, 2016
a648311
[SPARK-13079][SQL] Extend and implement InMemoryCatalog
Feb 4, 2016
0f81318
[SPARK-12828][SQL] add natural join support
adrian-wang Feb 4, 2016
c2c956b
[ML][DOC] fix wrong api link in ml onevsrest
hhbyyh Feb 4, 2016
d390871
[SPARK-13113] [CORE] Remove unnecessary bit operation when decoding p…
viirya Feb 4, 2016
dee801a
[SPARK-12828][SQL] Natural join follow-up
rxin Feb 4, 2016
2eaeafe
[SPARK-12330][MESOS] Fix mesos coarse mode cleanup
drcrallen Feb 4, 2016
62a7c28
[SPARK-13164][CORE] Replace deprecated synchronized buffer in core
holdenk Feb 4, 2016
4120bcb
[SPARK-13162] Standalone mode does not respect initial executors
Feb 4, 2016
15205da
[SPARK-13053][TEST] Unignore tests in InternalAccumulatorSuite
Feb 4, 2016
085f510
MAINTENANCE: Automated closing of pull requests.
Feb 4, 2016
33212cb
[SPARK-13168][SQL] Collapse adjacent repartition operators
JoshRosen Feb 4, 2016
ecad77a
Support multiple executors per node on Mesos.
Jan 19, 2016
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
22 changes: 16 additions & 6 deletions .rat-excludes
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@ graphlib-dot.min.js
sorttable.js
vis.min.js
vis.min.css
dataTables.bootstrap.css
dataTables.bootstrap.min.js
dataTables.rowsGroup.js
jquery.blockUI.min.js
jquery.cookies.2.2.0.min.js
jquery.dataTables.1.10.4.min.css
jquery.dataTables.1.10.4.min.js
jquery.mustache.js
jsonFormatter.min.css
jsonFormatter.min.js
.*avsc
.*txt
.*json
Expand Down Expand Up @@ -63,12 +73,12 @@ logs
.*dependency-reduced-pom.xml
known_translations
json_expectation
local-1422981759269/*
local-1422981780767/*
local-1425081759269/*
local-1426533911241/*
local-1426633911242/*
local-1430917381534/*
local-1422981759269
local-1422981780767
local-1425081759269
local-1426533911241
local-1426633911242
local-1430917381534
local-1430917381535_1
local-1430917381535_2
DESCRIPTION
Expand Down
6 changes: 6 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -291,3 +291,9 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
(MIT License) dagre-d3 (https://github.com/cpettitt/dagre-d3)
(MIT License) sorttable (https://github.com/stuartlangridge/sorttable)
(MIT License) boto (https://github.com/boto/boto/blob/develop/LICENSE)
(MIT License) datatables (http://datatables.net/license)
(MIT License) mustache (https://github.com/mustache/mustache/blob/master/LICENSE)
(MIT License) cookies (http://code.google.com/p/cookies/wiki/License)
(MIT License) blockUI (http://jquery.malsup.com/block/)
(MIT License) RowsGroup (http://datatables.net/license/mit)
(MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html)
16 changes: 16 additions & 0 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -650,3 +650,19 @@ For CSV functionality:
*/


===============================================================================
For dev/sparktestsupport/toposort.py:

Copyright 2014 True Blade Systems, Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
2 changes: 2 additions & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ exportMethods("arrange",
"count",
"cov",
"corr",
"covar_samp",
"covar_pop",
"crosstab",
"describe",
"dim",
Expand Down
58 changes: 58 additions & 0 deletions R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,64 @@ setMethod("corr", signature(x = "Column"),
column(jc)
})

#' cov
#'
#' Compute the sample covariance between two expressions.
#'
#' @rdname cov
#' @name cov
#' @family math_funcs
#' @export
#' @examples
#' \dontrun{
#' cov(df$c, df$d)
#' cov("c", "d")
#' covar_samp(df$c, df$d)
#' covar_samp("c", "d")
#' }
setMethod("cov", signature(x = "characterOrColumn"),
function(x, col2) {
stopifnot(is(class(col2), "characterOrColumn"))
covar_samp(x, col2)
})

#' @rdname cov
#' @name covar_samp
setMethod("covar_samp", signature(col1 = "characterOrColumn", col2 = "characterOrColumn"),
function(col1, col2) {
stopifnot(class(col1) == class(col2))
if (class(col1) == "Column") {
col1 <- col1@jc
col2 <- col2@jc
}
jc <- callJStatic("org.apache.spark.sql.functions", "covar_samp", col1, col2)
column(jc)
})

#' covar_pop
#'
#' Compute the population covariance between two expressions.
#'
#' @rdname covar_pop
#' @name covar_pop
#' @family math_funcs
#' @export
#' @examples
#' \dontrun{
#' covar_pop(df$c, df$d)
#' covar_pop("c", "d")
#' }
setMethod("covar_pop", signature(col1 = "characterOrColumn", col2 = "characterOrColumn"),
function(col1, col2) {
stopifnot(class(col1) == class(col2))
if (class(col1) == "Column") {
col1 <- col1@jc
col2 <- col2@jc
}
jc <- callJStatic("org.apache.spark.sql.functions", "covar_pop", col1, col2)
column(jc)
})

#' cos
#'
#' Computes the cosine of the given value.
Expand Down
10 changes: 9 additions & 1 deletion R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -418,12 +418,20 @@ setGeneric("columns", function(x) {standardGeneric("columns") })

#' @rdname statfunctions
#' @export
setGeneric("cov", function(x, col1, col2) {standardGeneric("cov") })
setGeneric("cov", function(x, ...) {standardGeneric("cov") })

#' @rdname statfunctions
#' @export
setGeneric("corr", function(x, ...) {standardGeneric("corr") })

#' @rdname statfunctions
#' @export
setGeneric("covar_samp", function(col1, col2) {standardGeneric("covar_samp") })

#' @rdname statfunctions
#' @export
setGeneric("covar_pop", function(col1, col2) {standardGeneric("covar_pop") })

#' @rdname summary
#' @export
setGeneric("describe", function(x, col, ...) { standardGeneric("describe") })
Expand Down
3 changes: 2 additions & 1 deletion R/pkg/R/stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,9 @@ setMethod("crosstab",
#' cov <- cov(df, "title", "gender")
#' }
setMethod("cov",
signature(x = "DataFrame", col1 = "character", col2 = "character"),
signature(x = "DataFrame"),
function(x, col1, col2) {
stopifnot(class(col1) == "character" && class(col2) == "character")
statFunctions <- callJMethod(x@sdf, "stat")
callJMethod(statFunctions, "cov", col1, col2)
})
Expand Down
2 changes: 2 additions & 0 deletions R/pkg/inst/tests/testthat/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -996,6 +996,8 @@ test_that("column functions", {
c14 <- cume_dist() + ntile(1) + corr(c, c1)
c15 <- dense_rank() + percent_rank() + rank() + row_number()
c16 <- is.nan(c) + isnan(c) + isNaN(c)
c17 <- cov(c, c1) + cov("c", "c1") + covar_samp(c, c1) + covar_samp("c", "c1")
c18 <- covar_pop(c, c1) + covar_pop("c", "c1")

# Test if base::is.nan() is exposed
expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))
Expand Down
4 changes: 2 additions & 2 deletions assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.spark</groupId>
<artifactId>spark-parent_2.10</artifactId>
<artifactId>spark-parent_2.11</artifactId>
<version>2.0.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

<groupId>org.apache.spark</groupId>
<artifactId>spark-assembly_2.10</artifactId>
<artifactId>spark-assembly_2.11</artifactId>
<name>Spark Project Assembly</name>
<url>http://spark.apache.org/</url>
<packaging>pom</packaging>
Expand Down
11 changes: 9 additions & 2 deletions common/sketch/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,27 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.spark</groupId>
<artifactId>spark-parent_2.10</artifactId>
<artifactId>spark-parent_2.11</artifactId>
<version>2.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

<groupId>org.apache.spark</groupId>
<artifactId>spark-sketch_2.10</artifactId>
<artifactId>spark-sketch_2.11</artifactId>
<packaging>jar</packaging>
<name>Spark Project Sketch</name>
<url>http://spark.apache.org/</url>
<properties>
<sbt.project.name>sketch</sbt.project.name>
</properties>

<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-test-tags_${scala.binary.version}</artifactId>
</dependency>
</dependencies>

<build>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
Expand Down
116 changes: 116 additions & 0 deletions common/sketch/src/main/java/org/apache/spark/util/sketch/BitArray.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.util.sketch;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.Arrays;

final class BitArray {
private final long[] data;
private long bitCount;

static int numWords(long numBits) {
if (numBits <= 0) {
throw new IllegalArgumentException("numBits must be positive, but got " + numBits);
}
long numWords = (long) Math.ceil(numBits / 64.0);
if (numWords > Integer.MAX_VALUE) {
throw new IllegalArgumentException("Can't allocate enough space for " + numBits + " bits");
}
return (int) numWords;
}

BitArray(long numBits) {
this(new long[numWords(numBits)]);
}

private BitArray(long[] data) {
this.data = data;
long bitCount = 0;
for (long word : data) {
bitCount += Long.bitCount(word);
}
this.bitCount = bitCount;
}

/** Returns true if the bit changed value. */
boolean set(long index) {
if (!get(index)) {
data[(int) (index >>> 6)] |= (1L << index);
bitCount++;
return true;
}
return false;
}

boolean get(long index) {
return (data[(int) (index >>> 6)] & (1L << index)) != 0;
}

/** Number of bits */
long bitSize() {
return (long) data.length * Long.SIZE;
}

/** Number of set bits (1s) */
long cardinality() {
return bitCount;
}

/** Combines the two BitArrays using bitwise OR. */
void putAll(BitArray array) {
assert data.length == array.data.length : "BitArrays must be of equal length when merging";
long bitCount = 0;
for (int i = 0; i < data.length; i++) {
data[i] |= array.data[i];
bitCount += Long.bitCount(data[i]);
}
this.bitCount = bitCount;
}

void writeTo(DataOutputStream out) throws IOException {
out.writeInt(data.length);
for (long datum : data) {
out.writeLong(datum);
}
}

static BitArray readFrom(DataInputStream in) throws IOException {
int numWords = in.readInt();
long[] data = new long[numWords];
for (int i = 0; i < numWords; i++) {
data[i] = in.readLong();
}
return new BitArray(data);
}

@Override
public boolean equals(Object other) {
if (this == other) return true;
if (other == null || !(other instanceof BitArray)) return false;
BitArray that = (BitArray) other;
return Arrays.equals(data, that.data);
}

@Override
public int hashCode() {
return Arrays.hashCode(data);
}
}
Loading