Skip to content

Commit f71634d

Browse files
committed
Roll back ScalaCheck usage
1 parent e0d7d49 commit f71634d

File tree

4 files changed

+108
-45
lines changed

4 files changed

+108
-45
lines changed

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -922,7 +922,7 @@ The following components are provided under a BSD-style license. See project lin
922922
(BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.10.4 - http://www.scala-lang.org/)
923923
(BSD-like) Scala Library (org.scala-lang:scala-library:2.10.4 - http://www.scala-lang.org/)
924924
(BSD-like) Scalap (org.scala-lang:scalap:2.10.4 - http://www.scala-lang.org/)
925-
(BSD-style) scalacheck (org.scalacheck:scalacheck_2.10:1.12.4 - http://www.scalacheck.org)
925+
(BSD-style) scalacheck (org.scalacheck:scalacheck_2.10:1.10.0 - http://www.scalacheck.org)
926926
(BSD-style) spire (org.spire-math:spire_2.10:0.7.1 - http://spire-math.org)
927927
(BSD-style) spire-macros (org.spire-math:spire-macros_2.10:0.7.1 - http://spire-math.org)
928928
(New BSD License) Kryo (com.esotericsoftware.kryo:kryo:2.21 - http://code.google.com/p/kryo/)

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -689,7 +689,7 @@
689689
<dependency>
690690
<groupId>org.scalacheck</groupId>
691691
<artifactId>scalacheck_${scala.binary.version}</artifactId>
692-
<version>1.12.4</version>
692+
<version>1.11.3</version>
693693
<scope>test</scope>
694694
</dependency>
695695
<dependency>

sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala

Lines changed: 99 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,54 @@
1717

1818
package org.apache.spark.sql
1919

20-
import java.sql.Timestamp
20+
import java.lang.Double.longBitsToDouble
21+
import java.lang.Float.intBitsToFloat
22+
import java.math.MathContext
2123

22-
import org.scalacheck.{Arbitrary, Gen}
24+
import scala.util.Random
2325

2426
import org.apache.spark.sql.types._
2527

2628
/**
27-
* ScalaCheck random data generators for Spark SQL DataTypes.
29+
* Random data generators for Spark SQL DataTypes. These generators do not generate uniformly random
30+
* values; instead, they're biased to return "interesting" values (such as maximum / minimum values)
31+
* with higher probability.
2832
*/
2933
object RandomDataGenerator {
3034

35+
/**
36+
* The conditional probability of a non-null value being drawn from a set of "interesting" values
37+
* instead of being chosen uniformly at random.
38+
*/
39+
private val PROBABILITY_OF_INTERESTING_VALUE: Float = 0.5f
40+
41+
/**
42+
* The probability of the generated value being null
43+
*/
44+
private val PROBABILITY_OF_NULL: Float = 0.1f
45+
46+
private val MAX_STR_LEN: Int = 1024
47+
private val MAX_ARR_SIZE: Int = 128
48+
private val MAX_MAP_SIZE: Int = 128
49+
50+
/**
51+
* Helper function for constructing a biased random number generator which returns "interesting"
52+
* values with a higher probability.
53+
*/
54+
private def randomNumeric[T](
55+
rand: Random,
56+
uniformRand: Random => T,
57+
interestingValues: Seq[T]): Some[() => T] = {
58+
val f = () => {
59+
if (rand.nextFloat() <= PROBABILITY_OF_INTERESTING_VALUE) {
60+
interestingValues(rand.nextInt(interestingValues.length))
61+
} else {
62+
uniformRand(rand)
63+
}
64+
}
65+
Some(f)
66+
}
67+
3168
/**
3269
* Returns a function which generates random values for the given [[DataType]], or `None` if no
3370
* random data generator is defined for that data type. The generated values will use an external
@@ -37,58 +74,85 @@ object RandomDataGenerator {
3774
*
3875
* @param dataType the type to generate values for
3976
* @param nullable whether null values should be generated
40-
* @return a ScalaCheck [[Gen]] which can be used to produce random values.
77+
* @param seed an optional seed for the random number generator
78+
* @return a function which can be called to generate random values.
4179
*/
4280
def forType(
4381
dataType: DataType,
44-
nullable: Boolean = true): Option[Gen[Any]] = {
45-
val valueGenerator: Option[Gen[Any]] = dataType match {
46-
case StringType => Some(Arbitrary.arbitrary[String])
47-
case BinaryType => Some(Gen.listOf(Arbitrary.arbitrary[Byte]).map(_.toArray))
48-
case BooleanType => Some(Arbitrary.arbitrary[Boolean])
49-
case DateType => Some(Arbitrary.arbitrary[Int].suchThat(_ >= 0).map(new java.sql.Date(_)))
50-
case DoubleType => Some(Arbitrary.arbitrary[Double])
51-
case FloatType => Some(Arbitrary.arbitrary[Float])
52-
case ByteType => Some(Arbitrary.arbitrary[Byte])
53-
case IntegerType => Some(Arbitrary.arbitrary[Int])
54-
case LongType => Some(Arbitrary.arbitrary[Long])
55-
case ShortType => Some(Arbitrary.arbitrary[Short])
56-
case NullType => Some(Gen.const[Any](null))
57-
case TimestampType => Some(Arbitrary.arbitrary[Long].suchThat(_ >= 0).map(new Timestamp(_)))
58-
case DecimalType.Unlimited => Some(Arbitrary.arbitrary[BigDecimal])
82+
nullable: Boolean = true,
83+
seed: Option[Long] = None): Option[() => Any] = {
84+
val rand = new Random()
85+
seed.foreach(rand.setSeed)
86+
87+
val valueGenerator: Option[() => Any] = dataType match {
88+
case StringType => Some(() => rand.nextString(rand.nextInt(MAX_STR_LEN)))
89+
case BinaryType => Some(() => {
90+
val arr = new Array[Byte](rand.nextInt(MAX_STR_LEN))
91+
rand.nextBytes(arr)
92+
arr
93+
})
94+
case BooleanType => Some(() => rand.nextBoolean())
95+
case DateType => Some(() => new java.sql.Date(rand.nextInt()))
96+
case TimestampType => Some(() => new java.sql.Timestamp(rand.nextLong()))
97+
case DecimalType.Unlimited => Some(
98+
() => BigDecimal.apply(rand.nextLong, rand.nextInt, MathContext.UNLIMITED))
99+
case DoubleType => randomNumeric[Double](
100+
rand, r => longBitsToDouble(r.nextLong()), Seq(Double.MinValue, Double.MinPositiveValue,
101+
Double.MaxValue, Double.PositiveInfinity, Double.NegativeInfinity, Double.NaN, 0.0))
102+
case FloatType => randomNumeric[Float](
103+
rand, r => intBitsToFloat(r.nextInt()), Seq(Float.MinValue, Float.MinPositiveValue,
104+
Float.MaxValue, Float.PositiveInfinity, Float.NegativeInfinity, Float.NaN, 0.0f))
105+
case ByteType => randomNumeric[Byte](
106+
rand, _.nextInt().toByte, Seq(Byte.MinValue, Byte.MaxValue, 0.toByte))
107+
case IntegerType => randomNumeric[Int](
108+
rand, _.nextInt(), Seq(Int.MinValue, Int.MaxValue, 0))
109+
case LongType => randomNumeric[Long](
110+
rand, _.nextLong(), Seq(Long.MinValue, Long.MaxValue, 0L))
111+
case ShortType => randomNumeric[Short](
112+
rand, _.nextInt().toShort, Seq(Short.MinValue, Short.MaxValue, 0.toShort))
113+
case NullType => Some(() => null)
59114
case ArrayType(elementType, containsNull) => {
60-
forType(elementType, nullable = containsNull).map { elementGen =>
61-
Gen.listOf(elementGen).map(_.toArray)
115+
forType(elementType, nullable = containsNull, seed = Some(rand.nextLong())).map {
116+
elementGenerator => () => Array.fill(rand.nextInt(MAX_ARR_SIZE))(elementGenerator())
62117
}
63118
}
64119
case MapType(keyType, valueType, valueContainsNull) => {
65120
for (
66-
keyGenerator <- forType(keyType, nullable = false);
67-
valueGenerator <- forType(valueType, nullable = valueContainsNull)
68-
// Scala's BigDecimal.hashCode can lead to OutOfMemoryError on Scala 2.10 (see SI-6173)
69-
// and Spark can hit NumberFormatException errors converting certain BigDecimals
70-
// (SPARK-8802). For these reasons, we don't support generation of maps with decimal keys.
71-
if !keyType.isInstanceOf[DecimalType]
121+
keyGenerator <- forType(keyType, nullable = false, seed = Some(rand.nextLong()));
122+
valueGenerator <-
123+
forType(valueType, nullable = valueContainsNull, seed = Some(rand.nextLong()))
72124
) yield {
73-
Gen.listOf(Gen.zip(keyGenerator, valueGenerator)).map(_.toMap)
125+
() => {
126+
Seq.fill(rand.nextInt(MAX_MAP_SIZE))((keyGenerator(), valueGenerator())).toMap
127+
}
74128
}
75129
}
76130
case StructType(fields) => {
77-
val maybeFieldGenerators: Seq[Option[Gen[Any]]] = fields.map { field =>
78-
forType(field.dataType, nullable = field.nullable)
131+
val maybeFieldGenerators: Seq[Option[() => Any]] = fields.map { field =>
132+
forType(field.dataType, nullable = field.nullable, seed = Some(rand.nextLong()))
79133
}
80134
if (maybeFieldGenerators.forall(_.isDefined)) {
81-
Some(Gen.sequence[Seq[Any], Any](maybeFieldGenerators.flatten).map(vs => Row.fromSeq(vs)))
135+
val fieldGenerators: Seq[() => Any] = maybeFieldGenerators.map(_.get)
136+
Some(() => Row.fromSeq(fieldGenerators.map(_.apply())))
82137
} else {
83138
None
84139
}
85140
}
86141
case unsupportedType => None
87142
}
88-
if (nullable) {
89-
valueGenerator.map(Gen.oneOf(_, Gen.const[Any](null)))
90-
} else {
91-
valueGenerator
143+
// Handle nullability by wrapping the non-null value generator:
144+
valueGenerator.map { valueGenerator =>
145+
if (nullable) {
146+
() => {
147+
if (rand.nextFloat() <= PROBABILITY_OF_NULL) {
148+
null
149+
} else {
150+
valueGenerator()
151+
}
152+
}
153+
} else {
154+
valueGenerator
155+
}
92156
}
93157
}
94158
}

sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,14 @@
1717

1818
package org.apache.spark.sql
1919

20-
import org.scalacheck.Prop.{exists, forAll, secure}
21-
import org.scalatest.prop.Checkers
22-
2320
import org.apache.spark.SparkFunSuite
2421
import org.apache.spark.sql.catalyst.CatalystTypeConverters
2522
import org.apache.spark.sql.types._
2623

2724
/**
2825
* Tests of [[RandomDataGenerator]].
2926
*/
30-
class RandomDataGeneratorSuite extends SparkFunSuite with Checkers {
27+
class RandomDataGeneratorSuite extends SparkFunSuite {
3128

3229
/**
3330
* Tests random data generation for the given type by using it to generate random values then
@@ -39,12 +36,14 @@ class RandomDataGeneratorSuite extends SparkFunSuite with Checkers {
3936
fail(s"Random data generator was not defined for $dataType")
4037
}
4138
if (nullable) {
42-
check(exists(generator) { _ == null })
39+
assert(Iterator.fill(100)(generator()).contains(null))
40+
} else {
41+
assert(Iterator.fill(100)(generator()).forall(_ != null))
4342
}
44-
if (!nullable) {
45-
check(forAll(generator) { _ != null })
43+
for (_ <- 1 to 10) {
44+
val generatedValue = generator()
45+
toCatalyst(generatedValue)
4646
}
47-
check(secure(forAll(generator) { v => { toCatalyst(v); true } }))
4847
}
4948

5049
// Basic types:

0 commit comments

Comments
 (0)