From d2b4a4a9a2139b1a6c2be5d1f1aa3d98a6c9ed99 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Wed, 1 Jul 2015 20:18:05 -0700
Subject: [PATCH 1/8] Add random data generator test utilities to Spark SQL.

---
 .../spark/sql/test/DataTypeTestUtils.scala    |  59 +++++++
 .../spark/sql/test/RandomDataGenerator.scala  | 151 ++++++++++++++++++
 .../sql/test/RandomDataGeneratorSuite.scala   |  77 +++++++++
 3 files changed, 287 insertions(+)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/test/DataTypeTestUtils.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/test/RandomDataGenerator.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/test/RandomDataGeneratorSuite.scala

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataTypeTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataTypeTestUtils.scala
new file mode 100644
index 000000000000..d862eb7293d6
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataTypeTestUtils.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.test
+
+import org.apache.spark.sql.types._
+
+/**
+ * Utility functions for working with DataTypes in tests.
+ */
+object DataTypeTestUtils {
+
+  /**
+   * Instances of all [[IntegralType]]s.
+   */
+  val integralType: Set[IntegralType] = Set(
+    ByteType, ShortType, IntegerType, LongType
+  )
+
+  /**
+   * Instances of all [[FractionalType]]s, including both fixed- and unlimited-precision
+   * decimal types.
+   */
+  val fractionalTypes: Set[FractionalType] = Set(
+    DecimalType(precisionInfo = None),
+    DecimalType(2, 1),
+    DoubleType,
+    FloatType
+  )
+
+  /**
+   * Instances of all [[NumericType]]s.
+   */
+  val numericTypes: Set[NumericType] = integralType ++ fractionalTypes
+
+  /**
+   * Instances of all [[AtomicType]]s.
+   */
+  val atomicTypes: Set[DataType] = Set(BinaryType, StringType, TimestampType) ++ numericTypes
+
+  /**
+   * Instances of [[ArrayType]] for all [[AtomicType]]s. Arrays of these types may contain null.
+   */
+  val atomicArrayTypes: Set[ArrayType] = atomicTypes.map(ArrayType(_, containsNull = true))
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/RandomDataGenerator.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/RandomDataGenerator.scala
new file mode 100644
index 000000000000..6ac2ba155655
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/RandomDataGenerator.scala
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.test
+
+import org.apache.spark.sql.Row
+
+import scala.util.Random
+
+import org.apache.spark.sql.types._
+
+/**
+ * Random data generators for Spark SQL DataTypes. These generators do not generate uniformly random
+ * values; instead, they're biased to return "interesting" values (such as maximum / minimum values)
+ * with higher probability.
+ */
+object RandomDataGenerator {
+
+  /**
+   * The conditional probability of a non-null value being drawn from a set of "interesting" values
+   * instead of being chosen uniformly at random.
+   */
+  private val PROBABILITY_OF_INTERESTING_VALUE: Float = 0.25f
+
+  /**
+   * The probability of the generated value being null
+   */
+  private val PROBABILITY_OF_NULL: Float = 0.1f
+
+  private val MAX_STR_LEN: Int = 1024
+  private val MAX_ARR_SIZE: Int = 128
+  private val MAX_MAP_SIZE: Int = 128
+
+  /**
+   * Helper function for constructing a biased random number generator which returns "interesting"
+   * values with a higher probability.
+   */
+  private def randomNumeric[T](
+      rand: Random,
+      uniformRand: Random => T,
+      interestingValues: Seq[T]): Some[() => T] = {
+    val f = () => {
+      if (rand.nextFloat() <= PROBABILITY_OF_INTERESTING_VALUE) {
+        interestingValues(rand.nextInt(interestingValues.length))
+      } else {
+        uniformRand(rand)
+      }
+    }
+    Some(f)
+  }
+
+  /**
+   * Returns a function which generates random values for the given [[DataType]], or `None` if no
+   * random data generator is defined for that data type. The generated values will use an external
+   * representation of the data type; for example, the random generator for [[DateType]] will return
+   * instances of [[java.sql.Date]] and the generator for [[StructType]] will return a
+   * [[org.apache.spark.Row]].
+   *
+   * @param dataType the type to generate values for
+   * @param nullable whether null values should be generated
+   * @param seed an optional seed for the random number generator
+   * @return a function which can be called to generate random values.
+   */
+  def forType(
+      dataType: DataType,
+      nullable: Boolean = true,
+      seed: Option[Long] = None): Option[() => Any] = {
+    val rand = new Random()
+    seed.foreach(rand.setSeed)
+
+    val valueGenerator: Option[() => Any] = dataType match {
+      case StringType => Some(() => rand.nextString(rand.nextInt(MAX_STR_LEN)))
+      case BinaryType => Some(() => {
+        val arr = new Array[Byte](rand.nextInt(MAX_STR_LEN))
+        rand.nextBytes(arr)
+        arr
+      })
+      case BooleanType => Some(() => rand.nextBoolean())
+      case DateType => Some(() => new java.sql.Date(rand.nextInt(Int.MaxValue)))
+      case DoubleType => randomNumeric[Double](
+        rand, _.nextDouble(), Seq(Double.MinValue, Double.MinPositiveValue, Double.MaxValue, 0.0))
+      case FloatType => randomNumeric[Float](
+        rand, _.nextFloat(), Seq(Float.MinValue, Float.MinPositiveValue, Float.MaxValue, 0.0f))
+      case ByteType => randomNumeric[Byte](
+        rand, _.nextInt().toByte, Seq(Byte.MinValue, Byte.MaxValue, 0.toByte))
+      case IntegerType => randomNumeric[Int](
+        rand, _.nextInt(), Seq(Int.MinValue, Int.MaxValue, 0))
+      case LongType => randomNumeric[Long](
+        rand, _.nextLong(), Seq(Long.MinValue, Long.MaxValue, 0L))
+      case ShortType => randomNumeric[Short](
+        rand, _.nextInt().toShort, Seq(Short.MinValue, Short.MaxValue, 0.toShort))
+      case NullType => Some(() => null)
+      case ArrayType(elementType, containsNull) => {
+        forType(elementType, nullable = containsNull, seed = Some(rand.nextLong())).map {
+          elementGenerator => () => Array.fill(rand.nextInt(MAX_ARR_SIZE))(elementGenerator())
+        }
+      }
+      case MapType(keyType, valueType, valueContainsNull) => {
+        for (
+          keyGenerator <- forType(keyType, nullable = false, seed = Some(rand.nextLong()));
+          valueGenerator <-
+            forType(valueType, nullable = valueContainsNull, seed = Some(rand.nextLong()))
+        ) yield {
+          () => {
+            Seq.fill(rand.nextInt(MAX_MAP_SIZE))((keyGenerator(), valueGenerator())).toMap
+          }
+        }
+      }
+      case StructType(fields) => {
+        val maybeFieldGenerators: Seq[Option[() => Any]] = fields.map { field =>
+          forType(field.dataType, nullable = field.nullable, seed = Some(rand.nextLong()))
+        }
+        if (maybeFieldGenerators.forall(_.isDefined)) {
+          val fieldGenerators: Seq[() => Any] = maybeFieldGenerators.map(_.get)
+          Some(() => Row.fromSeq(fieldGenerators.map(_.apply())))
+        } else {
+          None
+        }
+      }
+      case unsupportedType => None
+    }
+    // Handle nullability by wrapping the non-null value generator:
+    valueGenerator.map { valueGenerator =>
+      if (nullable) {
+        () => {
+          if (rand.nextFloat() <= PROBABILITY_OF_NULL) {
+            null
+          } else {
+            valueGenerator()
+          }
+        }
+      } else {
+        valueGenerator
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/RandomDataGeneratorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/RandomDataGeneratorSuite.scala
new file mode 100644
index 000000000000..fb4ed9028c2c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/RandomDataGeneratorSuite.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.test
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.CatalystTypeConverters
+import org.apache.spark.sql.types.{StructField, StructType, MapType, DataType}
+
+/**
+ * Tests of [[RandomDataGenerator]].
+ */
+class RandomDataGeneratorSuite extends SparkFunSuite {
+
+  /**
+   * Tests random data generation for the given type by using it to generate random values then
+   * converting those values into their Catalyst equivalents using CatalystTypeConverters.
+   */
+  def testRandomDataGeneration(dataType: DataType, nullable: Boolean = true): Unit = {
+    val toCatalyst = CatalystTypeConverters.createToCatalystConverter(dataType)
+    RandomDataGenerator.forType(dataType, nullable, Some(42L)).foreach { generator =>
+      for (_ <- 1 to 10) {
+        val generatedValue = generator()
+        val convertedValue = toCatalyst(generatedValue)
+        if (!nullable) {
+          assert(convertedValue !== null)
+        }
+      }
+    }
+
+  }
+
+  // Basic types:
+
+  (DataTypeTestUtils.atomicTypes ++ DataTypeTestUtils.atomicArrayTypes).foreach { dataType =>
+    test(s"$dataType") {
+      testRandomDataGeneration(dataType)
+    }
+  }
+
+  // Complex types:
+
+  for (
+    keyType <- DataTypeTestUtils.atomicTypes;
+    valueType <- DataTypeTestUtils.atomicTypes
+  ) {
+    val mapType = MapType(keyType, valueType)
+    test(s"$mapType") {
+      testRandomDataGeneration(mapType)
+    }
+  }
+
+  for (
+    colOneType <- DataTypeTestUtils.atomicTypes;
+    colTwoType <- DataTypeTestUtils.atomicTypes
+  ) {
+    val structType = StructType(StructField("a", colOneType) :: StructField("b", colTwoType) :: Nil)
+    test(s"$structType") {
+      testRandomDataGeneration(structType)
+    }
+  }
+
+}

From ab76cbd89bf800d590b7833f5a25c62df4ec2a95 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Wed, 1 Jul 2015 21:37:38 -0700
Subject: [PATCH 2/8] Move code to Catalyst package.

---
 .../scala/org/apache/spark/sql}/RandomDataGenerator.scala   | 6 ++----
 .../org/apache/spark/sql}/RandomDataGeneratorSuite.scala    | 4 ++--
 .../org/apache/spark/sql/types}/DataTypeTestUtils.scala     | 4 +---
 3 files changed, 5 insertions(+), 9 deletions(-)
 rename sql/{core/src/test/scala/org/apache/spark/sql/test => catalyst/src/test/scala/org/apache/spark/sql}/RandomDataGenerator.scala (98%)
 rename sql/{core/src/test/scala/org/apache/spark/sql/test => catalyst/src/test/scala/org/apache/spark/sql}/RandomDataGeneratorSuite.scala (95%)
 rename sql/{core/src/test/scala/org/apache/spark/sql/test => catalyst/src/test/scala/org/apache/spark/sql/types}/DataTypeTestUtils.scala (96%)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
similarity index 98%
rename from sql/core/src/test/scala/org/apache/spark/sql/test/RandomDataGenerator.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index 6ac2ba155655..f167557be818 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -15,14 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.test
+package org.apache.spark.sql
 
-import org.apache.spark.sql.Row
+import org.apache.spark.sql.types._
 
 import scala.util.Random
 
-import org.apache.spark.sql.types._
-
 /**
  * Random data generators for Spark SQL DataTypes. These generators do not generate uniformly random
  * values; instead, they're biased to return "interesting" values (such as maximum / minimum values)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/RandomDataGeneratorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
similarity index 95%
rename from sql/core/src/test/scala/org/apache/spark/sql/test/RandomDataGeneratorSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
index fb4ed9028c2c..ea70fe03eb91 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/RandomDataGeneratorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.test
+package org.apache.spark.sql
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.CatalystTypeConverters
-import org.apache.spark.sql.types.{StructField, StructType, MapType, DataType}
+import org.apache.spark.sql.types._
 
 /**
  * Tests of [[RandomDataGenerator]].
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataTypeTestUtils.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
similarity index 96%
rename from sql/core/src/test/scala/org/apache/spark/sql/test/DataTypeTestUtils.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
index d862eb7293d6..0b7ed54c681e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataTypeTestUtils.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
@@ -15,9 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.test
-
-import org.apache.spark.sql.types._
+package org.apache.spark.sql.types
 
 /**
  * Utility functions for working with DataTypes in tests.

From 5acdd5ccf36487ba49815e8e0429f4c99558d427 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Wed, 1 Jul 2015 22:15:13 -0700
Subject: [PATCH 3/8] Infinity and NaN are interesting.

---
 .../scala/org/apache/spark/sql/RandomDataGenerator.scala  | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index f167557be818..cd4ffdfd4517 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -32,7 +32,7 @@ object RandomDataGenerator {
    * The conditional probability of a non-null value being drawn from a set of "interesting" values
    * instead of being chosen uniformly at random.
    */
-  private val PROBABILITY_OF_INTERESTING_VALUE: Float = 0.25f
+  private val PROBABILITY_OF_INTERESTING_VALUE: Float = 0.5f
 
   /**
    * The probability of the generated value being null
@@ -90,9 +90,11 @@ object RandomDataGenerator {
       case BooleanType => Some(() => rand.nextBoolean())
       case DateType => Some(() => new java.sql.Date(rand.nextInt(Int.MaxValue)))
       case DoubleType => randomNumeric[Double](
-        rand, _.nextDouble(), Seq(Double.MinValue, Double.MinPositiveValue, Double.MaxValue, 0.0))
+        rand, _.nextDouble(), Seq(Double.MinValue, Double.MinPositiveValue, Double.MaxValue,
+          Double.PositiveInfinity, Double.NegativeInfinity, Double.NaN, 0.0))
       case FloatType => randomNumeric[Float](
-        rand, _.nextFloat(), Seq(Float.MinValue, Float.MinPositiveValue, Float.MaxValue, 0.0f))
+        rand, _.nextFloat(), Seq(Float.MinValue, Float.MinPositiveValue, Float.MaxValue,
+          Float.PositiveInfinity, Float.NegativeInfinity, Float.NaN, 0.0f))
       case ByteType => randomNumeric[Byte](
         rand, _.nextInt().toByte, Seq(Byte.MinValue, Byte.MaxValue, 0.toByte))
       case IntegerType => randomNumeric[Int](

From b55875a05e4805cfdf2c3468a6cd50eec6a30578 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Wed, 1 Jul 2015 22:23:55 -0700
Subject: [PATCH 4/8] Generate doubles and floats over entire possible range.

---
 .../org/apache/spark/sql/RandomDataGenerator.scala  | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index cd4ffdfd4517..26437c45eb41 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -17,10 +17,13 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.sql.types._
+import java.lang.Double.longBitsToDouble
+import java.lang.Float.intBitsToFloat
 
 import scala.util.Random
 
+import org.apache.spark.sql.types._
+
 /**
  * Random data generators for Spark SQL DataTypes. These generators do not generate uniformly random
  * values; instead, they're biased to return "interesting" values (such as maximum / minimum values)
@@ -90,11 +93,11 @@ object RandomDataGenerator {
       case BooleanType => Some(() => rand.nextBoolean())
       case DateType => Some(() => new java.sql.Date(rand.nextInt(Int.MaxValue)))
       case DoubleType => randomNumeric[Double](
-        rand, _.nextDouble(), Seq(Double.MinValue, Double.MinPositiveValue, Double.MaxValue,
-          Double.PositiveInfinity, Double.NegativeInfinity, Double.NaN, 0.0))
+        rand, r => longBitsToDouble(r.nextLong()), Seq(Double.MinValue, Double.MinPositiveValue,
+          Double.MaxValue, Double.PositiveInfinity, Double.NegativeInfinity, Double.NaN, 0.0))
       case FloatType => randomNumeric[Float](
-        rand, _.nextFloat(), Seq(Float.MinValue, Float.MinPositiveValue, Float.MaxValue,
-          Float.PositiveInfinity, Float.NegativeInfinity, Float.NaN, 0.0f))
+        rand, r => intBitsToFloat(r.nextInt()), Seq(Float.MinValue, Float.MinPositiveValue,
+          Float.MaxValue, Float.PositiveInfinity, Float.NegativeInfinity, Float.NaN, 0.0f))
       case ByteType => randomNumeric[Byte](
         rand, _.nextInt().toByte, Seq(Byte.MinValue, Byte.MaxValue, 0.toByte))
       case IntegerType => randomNumeric[Int](

From 0c209051777620b62a6cab4b18673dd145ca91c8 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Thu, 2 Jul 2015 14:58:30 -0700
Subject: [PATCH 5/8] Initial attempt at using ScalaCheck.

---
 .../spark/sql/RandomDataGenerator.scala       | 127 +++++-------------
 .../spark/sql/RandomDataGeneratorSuite.scala  |  58 +++++---
 2 files changed, 74 insertions(+), 111 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index 26437c45eb41..9479c9b1a078 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -17,12 +17,10 @@
 
 package org.apache.spark.sql
 
-import java.lang.Double.longBitsToDouble
-import java.lang.Float.intBitsToFloat
-
-import scala.util.Random
+import java.sql.Timestamp
 
 import org.apache.spark.sql.types._
+import org.scalacheck.{Arbitrary, Gen}
 
 /**
  * Random data generators for Spark SQL DataTypes. These generators do not generate uniformly random
@@ -31,39 +29,6 @@ import org.apache.spark.sql.types._
  */
 object RandomDataGenerator {
 
-  /**
-   * The conditional probability of a non-null value being drawn from a set of "interesting" values
-   * instead of being chosen uniformly at random.
-   */
-  private val PROBABILITY_OF_INTERESTING_VALUE: Float = 0.5f
-
-  /**
-   * The probability of the generated value being null
-   */
-  private val PROBABILITY_OF_NULL: Float = 0.1f
-
-  private val MAX_STR_LEN: Int = 1024
-  private val MAX_ARR_SIZE: Int = 128
-  private val MAX_MAP_SIZE: Int = 128
-
-  /**
-   * Helper function for constructing a biased random number generator which returns "interesting"
-   * values with a higher probability.
-   */
-  private def randomNumeric[T](
-      rand: Random,
-      uniformRand: Random => T,
-      interestingValues: Seq[T]): Some[() => T] = {
-    val f = () => {
-      if (rand.nextFloat() <= PROBABILITY_OF_INTERESTING_VALUE) {
-        interestingValues(rand.nextInt(interestingValues.length))
-      } else {
-        uniformRand(rand)
-      }
-    }
-    Some(f)
-  }
-
   /**
    * Returns a function which generates random values for the given [[DataType]], or `None` if no
    * random data generator is defined for that data type. The generated values will use an external
@@ -73,82 +38,58 @@ object RandomDataGenerator {
    *
    * @param dataType the type to generate values for
    * @param nullable whether null values should be generated
-   * @param seed an optional seed for the random number generator
-   * @return a function which can be called to generate random values.
+   * @return a ScalaCheck [[Gen]] which can be used to produce random values.
    */
   def forType(
       dataType: DataType,
-      nullable: Boolean = true,
-      seed: Option[Long] = None): Option[() => Any] = {
-    val rand = new Random()
-    seed.foreach(rand.setSeed)
-
-    val valueGenerator: Option[() => Any] = dataType match {
-      case StringType => Some(() => rand.nextString(rand.nextInt(MAX_STR_LEN)))
-      case BinaryType => Some(() => {
-        val arr = new Array[Byte](rand.nextInt(MAX_STR_LEN))
-        rand.nextBytes(arr)
-        arr
-      })
-      case BooleanType => Some(() => rand.nextBoolean())
-      case DateType => Some(() => new java.sql.Date(rand.nextInt(Int.MaxValue)))
-      case DoubleType => randomNumeric[Double](
-        rand, r => longBitsToDouble(r.nextLong()), Seq(Double.MinValue, Double.MinPositiveValue,
-          Double.MaxValue, Double.PositiveInfinity, Double.NegativeInfinity, Double.NaN, 0.0))
-      case FloatType => randomNumeric[Float](
-        rand, r => intBitsToFloat(r.nextInt()), Seq(Float.MinValue, Float.MinPositiveValue,
-          Float.MaxValue, Float.PositiveInfinity, Float.NegativeInfinity, Float.NaN, 0.0f))
-      case ByteType => randomNumeric[Byte](
-        rand, _.nextInt().toByte, Seq(Byte.MinValue, Byte.MaxValue, 0.toByte))
-      case IntegerType => randomNumeric[Int](
-        rand, _.nextInt(), Seq(Int.MinValue, Int.MaxValue, 0))
-      case LongType => randomNumeric[Long](
-        rand, _.nextLong(), Seq(Long.MinValue, Long.MaxValue, 0L))
-      case ShortType => randomNumeric[Short](
-        rand, _.nextInt().toShort, Seq(Short.MinValue, Short.MaxValue, 0.toShort))
-      case NullType => Some(() => null)
+      nullable: Boolean = true): Option[Gen[Any]] = {
+    val valueGenerator: Option[Gen[Any]] = dataType match {
+      case StringType => Some(Arbitrary.arbitrary[String])
+      case BinaryType => Some(Gen.listOf(Arbitrary.arbitrary[Byte]).map(_.toArray))
+      case BooleanType => Some(Arbitrary.arbitrary[Boolean])
+      case DateType => Some(Arbitrary.arbitrary[Int].suchThat(_ >= 0).map(new java.sql.Date(_)))
+      case DoubleType => Some(Arbitrary.arbitrary[Double])
+      case FloatType => Some(Arbitrary.arbitrary[Float])
+      case ByteType => Some(Arbitrary.arbitrary[Byte])
+      case IntegerType => Some(Arbitrary.arbitrary[Int])
+      case LongType => Some(Arbitrary.arbitrary[Long])
+      case ShortType => Some(Arbitrary.arbitrary[Short])
+      case NullType => Some(Gen.const[Any](null))
+      case TimestampType => Some(Arbitrary.arbitrary[Long].suchThat(_ >= 0).map(new Timestamp(_)))
+      case DecimalType.Unlimited => Some(Arbitrary.arbitrary[BigDecimal])
       case ArrayType(elementType, containsNull) => {
-        forType(elementType, nullable = containsNull, seed = Some(rand.nextLong())).map {
-          elementGenerator => () => Array.fill(rand.nextInt(MAX_ARR_SIZE))(elementGenerator())
+        forType(elementType, nullable = containsNull).map { elementGen =>
+          Gen.listOf(elementGen).map(_.toArray)
         }
       }
       case MapType(keyType, valueType, valueContainsNull) => {
         for (
-          keyGenerator <- forType(keyType, nullable = false, seed = Some(rand.nextLong()));
-          valueGenerator <-
-            forType(valueType, nullable = valueContainsNull, seed = Some(rand.nextLong()))
+          keyGenerator <- forType(keyType, nullable = false);
+          valueGenerator <- forType(valueType, nullable = valueContainsNull)
+          // Scala's BigDecimal.hashCode can lead to OutOfMemoryError on Scala 2.10 (see SI-6173)
+          // and Spark can hit NumberFormatException errors converting certain BigDecimals
+          // (SPARK-8802). For these reasons, we don't support generation of maps with decimal keys.
+          if !keyType.isInstanceOf[DecimalType]
         ) yield {
-          () => {
-            Seq.fill(rand.nextInt(MAX_MAP_SIZE))((keyGenerator(), valueGenerator())).toMap
-          }
+          Gen.listOf(Gen.zip(keyGenerator, valueGenerator)).map(_.toMap)
         }
       }
       case StructType(fields) => {
-        val maybeFieldGenerators: Seq[Option[() => Any]] = fields.map { field =>
-          forType(field.dataType, nullable = field.nullable, seed = Some(rand.nextLong()))
+        val maybeFieldGenerators: Seq[Option[Gen[Any]]] = fields.map { field =>
+          forType(field.dataType, nullable = field.nullable)
         }
         if (maybeFieldGenerators.forall(_.isDefined)) {
-          val fieldGenerators: Seq[() => Any] = maybeFieldGenerators.map(_.get)
-          Some(() => Row.fromSeq(fieldGenerators.map(_.apply())))
+          Some(Gen.sequence[Seq[Any], Any](maybeFieldGenerators.flatten).map(vs => Row.fromSeq(vs)))
         } else {
           None
         }
       }
       case unsupportedType => None
     }
-    // Handle nullability by wrapping the non-null value generator:
-    valueGenerator.map { valueGenerator =>
-      if (nullable) {
-        () => {
-          if (rand.nextFloat() <= PROBABILITY_OF_NULL) {
-            null
-          } else {
-            valueGenerator()
-          }
-        }
-      } else {
-        valueGenerator
-      }
+    if (nullable) {
+      valueGenerator.map(Gen.oneOf(_, Gen.const[Any](null)))
+    } else {
+      valueGenerator
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
index ea70fe03eb91..dc07a732cdb1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.sql
 
+import org.scalacheck.Prop.{exists, forAll, secure}
+import org.scalatest.prop.Checkers
+
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.types._
@@ -24,7 +27,7 @@ import org.apache.spark.sql.types._
 /**
  * Tests of [[RandomDataGenerator]].
  */
-class RandomDataGeneratorSuite extends SparkFunSuite {
+class RandomDataGeneratorSuite extends SparkFunSuite with Checkers {
 
   /**
    * Tests random data generation for the given type by using it to generate random values then
@@ -32,31 +35,50 @@ class RandomDataGeneratorSuite extends SparkFunSuite {
    */
   def testRandomDataGeneration(dataType: DataType, nullable: Boolean = true): Unit = {
     val toCatalyst = CatalystTypeConverters.createToCatalystConverter(dataType)
-    RandomDataGenerator.forType(dataType, nullable, Some(42L)).foreach { generator =>
-      for (_ <- 1 to 10) {
-        val generatedValue = generator()
-        val convertedValue = toCatalyst(generatedValue)
-        if (!nullable) {
-          assert(convertedValue !== null)
-        }
-      }
+    val generator = RandomDataGenerator.forType(dataType, nullable).getOrElse {
+      fail(s"Random data generator was not defined for $dataType")
     }
-
+    if (nullable) {
+      check(exists(generator) { _ == null })
+    }
+    if (!nullable) {
+      check(forAll(generator) { _ != null })
+    }
+    check(secure(forAll(generator) { v => { toCatalyst(v); true } }))
   }
 
   // Basic types:
-
-  (DataTypeTestUtils.atomicTypes ++ DataTypeTestUtils.atomicArrayTypes).foreach { dataType =>
-    test(s"$dataType") {
+  for (
+    dataType <- DataTypeTestUtils.atomicTypes;
+    nullable <- Seq(true, false)
+    if !dataType.isInstanceOf[DecimalType] ||
+      dataType.asInstanceOf[DecimalType].precisionInfo.isEmpty
+  ) {
+    test(s"$dataType (nullable=$nullable)") {
       testRandomDataGeneration(dataType)
     }
   }
 
-  // Complex types:
+  for (
+    arrayType <- DataTypeTestUtils.atomicArrayTypes
+    if RandomDataGenerator.forType(arrayType.elementType, arrayType.containsNull).isDefined
+  ) {
+    test(s"$arrayType") {
+      testRandomDataGeneration(arrayType)
+    }
+  }
+
+  val atomicTypesWithDataGenerators =
+    DataTypeTestUtils.atomicTypes.filter(RandomDataGenerator.forType(_).isDefined)
 
+  // Complex types:
   for (
-    keyType <- DataTypeTestUtils.atomicTypes;
-    valueType <- DataTypeTestUtils.atomicTypes
+    keyType <- atomicTypesWithDataGenerators;
+    valueType <- atomicTypesWithDataGenerators
+    // Scala's BigDecimal.hashCode can lead to OutOfMemoryError on Scala 2.10 (see SI-6173) and
+    // Spark can hit NumberFormatException errors when converting certain BigDecimals (SPARK-8802).
+    // For these reasons, we don't support generation of maps with decimal keys.
+    if !keyType.isInstanceOf[DecimalType]
   ) {
     val mapType = MapType(keyType, valueType)
     test(s"$mapType") {
@@ -65,8 +87,8 @@ class RandomDataGeneratorSuite extends SparkFunSuite {
   }
 
   for (
-    colOneType <- DataTypeTestUtils.atomicTypes;
-    colTwoType <- DataTypeTestUtils.atomicTypes
+    colOneType <- atomicTypesWithDataGenerators;
+    colTwoType <- atomicTypesWithDataGenerators
   ) {
     val structType = StructType(StructField("a", colOneType) :: StructField("b", colTwoType) :: Nil)
     test(s"$structType") {

From 89d86b1f65f5db7044280d916b07d1622b4662a0 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Thu, 2 Jul 2015 17:09:15 -0700
Subject: [PATCH 6/8] Bump ScalaCheck version.

---
 pom.xml                                                   | 2 +-
 .../scala/org/apache/spark/sql/RandomDataGenerator.scala  | 7 +++----
 .../org/apache/spark/sql/types/DataTypeTestUtils.scala    | 8 +++++++-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/pom.xml b/pom.xml
index 211da9ee74a3..6a41aa4f6510 100644
--- a/pom.xml
+++ b/pom.xml
@@ -689,7 +689,7 @@
       <dependency>
         <groupId>org.scalacheck</groupId>
         <artifactId>scalacheck_${scala.binary.version}</artifactId>
-        <version>1.11.3</version>
+        <version>1.12.4</version>
         <scope>test</scope>
       </dependency>
       <dependency>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index 9479c9b1a078..ccf171543f8e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -19,13 +19,12 @@ package org.apache.spark.sql
 
 import java.sql.Timestamp
 
-import org.apache.spark.sql.types._
 import org.scalacheck.{Arbitrary, Gen}
 
+import org.apache.spark.sql.types._
+
 /**
- * Random data generators for Spark SQL DataTypes. These generators do not generate uniformly random
- * values; instead, they're biased to return "interesting" values (such as maximum / minimum values)
- * with higher probability.
+ * ScalaCheck random data generators for Spark SQL DataTypes.
  */
 object RandomDataGenerator {
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
index 0b7ed54c681e..32632b5d6e34 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
@@ -48,7 +48,13 @@ object DataTypeTestUtils {
   /**
    * Instances of all [[AtomicType]]s.
    */
-  val atomicTypes: Set[DataType] = Set(BinaryType, StringType, TimestampType) ++ numericTypes
+  val atomicTypes: Set[DataType] = numericTypes ++ Set(
+    BinaryType,
+    BooleanType,
+    DateType,
+    StringType,
+    TimestampType
+  )
 
   /**
    * Instances of [[ArrayType]] for all [[AtomicType]]s. Arrays of these types may contain null.

From e0d7d49023f7cd63d963f6147ce5db6b6bd94f99 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Thu, 2 Jul 2015 17:10:17 -0700
Subject: [PATCH 7/8] Bump ScalaCheck version in LICENSE

---
 LICENSE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LICENSE b/LICENSE
index f9e412cade34..2a41ea294e46 100644
--- a/LICENSE
+++ b/LICENSE
@@ -922,7 +922,7 @@ The following components are provided under a BSD-style license. See project lin
      (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.10.4 - http://www.scala-lang.org/)
      (BSD-like) Scala Library (org.scala-lang:scala-library:2.10.4 - http://www.scala-lang.org/)
      (BSD-like) Scalap (org.scala-lang:scalap:2.10.4 - http://www.scala-lang.org/)
-     (BSD-style) scalacheck (org.scalacheck:scalacheck_2.10:1.10.0 - http://www.scalacheck.org)
+     (BSD-style) scalacheck (org.scalacheck:scalacheck_2.10:1.12.4 - http://www.scalacheck.org)
      (BSD-style) spire (org.spire-math:spire_2.10:0.7.1 - http://spire-math.org)
      (BSD-style) spire-macros (org.spire-math:spire-macros_2.10:0.7.1 - http://spire-math.org)
      (New BSD License) Kryo (com.esotericsoftware.kryo:kryo:2.21 - http://code.google.com/p/kryo/)

From f71634d73470189cfe45a89d2a69ea9c5ffa9e29 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Thu, 2 Jul 2015 22:14:33 -0700
Subject: [PATCH 8/8] Roll back ScalaCheck usage

---
 LICENSE                                       |   2 +-
 pom.xml                                       |   2 +-
 .../spark/sql/RandomDataGenerator.scala       | 134 +++++++++++++-----
 .../spark/sql/RandomDataGeneratorSuite.scala  |  15 +-
 4 files changed, 108 insertions(+), 45 deletions(-)

diff --git a/LICENSE b/LICENSE
index 2a41ea294e46..f9e412cade34 100644
--- a/LICENSE
+++ b/LICENSE
@@ -922,7 +922,7 @@ The following components are provided under a BSD-style license. See project lin
      (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.10.4 - http://www.scala-lang.org/)
      (BSD-like) Scala Library (org.scala-lang:scala-library:2.10.4 - http://www.scala-lang.org/)
      (BSD-like) Scalap (org.scala-lang:scalap:2.10.4 - http://www.scala-lang.org/)
-     (BSD-style) scalacheck (org.scalacheck:scalacheck_2.10:1.12.4 - http://www.scalacheck.org)
+     (BSD-style) scalacheck (org.scalacheck:scalacheck_2.10:1.10.0 - http://www.scalacheck.org)
      (BSD-style) spire (org.spire-math:spire_2.10:0.7.1 - http://spire-math.org)
      (BSD-style) spire-macros (org.spire-math:spire-macros_2.10:0.7.1 - http://spire-math.org)
      (New BSD License) Kryo (com.esotericsoftware.kryo:kryo:2.21 - http://code.google.com/p/kryo/)
diff --git a/pom.xml b/pom.xml
index 6a41aa4f6510..211da9ee74a3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -689,7 +689,7 @@
       <dependency>
         <groupId>org.scalacheck</groupId>
         <artifactId>scalacheck_${scala.binary.version}</artifactId>
-        <version>1.12.4</version>
+        <version>1.11.3</version>
         <scope>test</scope>
       </dependency>
       <dependency>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index ccf171543f8e..13aad467fa57 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -17,17 +17,54 @@
 
 package org.apache.spark.sql
 
-import java.sql.Timestamp
+import java.lang.Double.longBitsToDouble
+import java.lang.Float.intBitsToFloat
+import java.math.MathContext
 
-import org.scalacheck.{Arbitrary, Gen}
+import scala.util.Random
 
 import org.apache.spark.sql.types._
 
 /**
- * ScalaCheck random data generators for Spark SQL DataTypes.
+ * Random data generators for Spark SQL DataTypes. These generators do not generate uniformly random
+ * values; instead, they're biased to return "interesting" values (such as maximum / minimum values)
+ * with higher probability.
  */
 object RandomDataGenerator {
 
+  /**
+   * The conditional probability of a non-null value being drawn from a set of "interesting" values
+   * instead of being chosen uniformly at random.
+   */
+  private val PROBABILITY_OF_INTERESTING_VALUE: Float = 0.5f
+
+  /**
+   * The probability of the generated value being null
+   */
+  private val PROBABILITY_OF_NULL: Float = 0.1f
+
+  private val MAX_STR_LEN: Int = 1024
+  private val MAX_ARR_SIZE: Int = 128
+  private val MAX_MAP_SIZE: Int = 128
+
+  /**
+   * Helper function for constructing a biased random number generator which returns "interesting"
+   * values with a higher probability.
+   */
+  private def randomNumeric[T](
+      rand: Random,
+      uniformRand: Random => T,
+      interestingValues: Seq[T]): Some[() => T] = {
+    val f = () => {
+      if (rand.nextFloat() <= PROBABILITY_OF_INTERESTING_VALUE) {
+        interestingValues(rand.nextInt(interestingValues.length))
+      } else {
+        uniformRand(rand)
+      }
+    }
+    Some(f)
+  }
+
   /**
    * Returns a function which generates random values for the given [[DataType]], or `None` if no
    * random data generator is defined for that data type. The generated values will use an external
@@ -37,58 +74,85 @@ object RandomDataGenerator {
    *
    * @param dataType the type to generate values for
    * @param nullable whether null values should be generated
-   * @return a ScalaCheck [[Gen]] which can be used to produce random values.
+   * @param seed an optional seed for the random number generator
+   * @return a function which can be called to generate random values.
    */
   def forType(
       dataType: DataType,
-      nullable: Boolean = true): Option[Gen[Any]] = {
-    val valueGenerator: Option[Gen[Any]] = dataType match {
-      case StringType => Some(Arbitrary.arbitrary[String])
-      case BinaryType => Some(Gen.listOf(Arbitrary.arbitrary[Byte]).map(_.toArray))
-      case BooleanType => Some(Arbitrary.arbitrary[Boolean])
-      case DateType => Some(Arbitrary.arbitrary[Int].suchThat(_ >= 0).map(new java.sql.Date(_)))
-      case DoubleType => Some(Arbitrary.arbitrary[Double])
-      case FloatType => Some(Arbitrary.arbitrary[Float])
-      case ByteType => Some(Arbitrary.arbitrary[Byte])
-      case IntegerType => Some(Arbitrary.arbitrary[Int])
-      case LongType => Some(Arbitrary.arbitrary[Long])
-      case ShortType => Some(Arbitrary.arbitrary[Short])
-      case NullType => Some(Gen.const[Any](null))
-      case TimestampType => Some(Arbitrary.arbitrary[Long].suchThat(_ >= 0).map(new Timestamp(_)))
-      case DecimalType.Unlimited => Some(Arbitrary.arbitrary[BigDecimal])
+      nullable: Boolean = true,
+      seed: Option[Long] = None): Option[() => Any] = {
+    val rand = new Random()
+    seed.foreach(rand.setSeed)
+
+    val valueGenerator: Option[() => Any] = dataType match {
+      case StringType => Some(() => rand.nextString(rand.nextInt(MAX_STR_LEN)))
+      case BinaryType => Some(() => {
+        val arr = new Array[Byte](rand.nextInt(MAX_STR_LEN))
+        rand.nextBytes(arr)
+        arr
+      })
+      case BooleanType => Some(() => rand.nextBoolean())
+      case DateType => Some(() => new java.sql.Date(rand.nextInt()))
+      case TimestampType => Some(() => new java.sql.Timestamp(rand.nextLong()))
+      case DecimalType.Unlimited => Some(
+        () => BigDecimal.apply(rand.nextLong, rand.nextInt, MathContext.UNLIMITED))
+      case DoubleType => randomNumeric[Double](
+        rand, r => longBitsToDouble(r.nextLong()), Seq(Double.MinValue, Double.MinPositiveValue,
+          Double.MaxValue, Double.PositiveInfinity, Double.NegativeInfinity, Double.NaN, 0.0))
+      case FloatType => randomNumeric[Float](
+        rand, r => intBitsToFloat(r.nextInt()), Seq(Float.MinValue, Float.MinPositiveValue,
+          Float.MaxValue, Float.PositiveInfinity, Float.NegativeInfinity, Float.NaN, 0.0f))
+      case ByteType => randomNumeric[Byte](
+        rand, _.nextInt().toByte, Seq(Byte.MinValue, Byte.MaxValue, 0.toByte))
+      case IntegerType => randomNumeric[Int](
+        rand, _.nextInt(), Seq(Int.MinValue, Int.MaxValue, 0))
+      case LongType => randomNumeric[Long](
+        rand, _.nextLong(), Seq(Long.MinValue, Long.MaxValue, 0L))
+      case ShortType => randomNumeric[Short](
+        rand, _.nextInt().toShort, Seq(Short.MinValue, Short.MaxValue, 0.toShort))
+      case NullType => Some(() => null)
       case ArrayType(elementType, containsNull) => {
-        forType(elementType, nullable = containsNull).map { elementGen =>
-          Gen.listOf(elementGen).map(_.toArray)
+        forType(elementType, nullable = containsNull, seed = Some(rand.nextLong())).map {
+          elementGenerator => () => Array.fill(rand.nextInt(MAX_ARR_SIZE))(elementGenerator())
         }
       }
       case MapType(keyType, valueType, valueContainsNull) => {
         for (
-          keyGenerator <- forType(keyType, nullable = false);
-          valueGenerator <- forType(valueType, nullable = valueContainsNull)
-          // Scala's BigDecimal.hashCode can lead to OutOfMemoryError on Scala 2.10 (see SI-6173)
-          // and Spark can hit NumberFormatException errors converting certain BigDecimals
-          // (SPARK-8802). For these reasons, we don't support generation of maps with decimal keys.
-          if !keyType.isInstanceOf[DecimalType]
+          keyGenerator <- forType(keyType, nullable = false, seed = Some(rand.nextLong()));
+          valueGenerator <-
+            forType(valueType, nullable = valueContainsNull, seed = Some(rand.nextLong()))
         ) yield {
-          Gen.listOf(Gen.zip(keyGenerator, valueGenerator)).map(_.toMap)
+          () => {
+            Seq.fill(rand.nextInt(MAX_MAP_SIZE))((keyGenerator(), valueGenerator())).toMap
+          }
         }
       }
       case StructType(fields) => {
-        val maybeFieldGenerators: Seq[Option[Gen[Any]]] = fields.map { field =>
-          forType(field.dataType, nullable = field.nullable)
+        val maybeFieldGenerators: Seq[Option[() => Any]] = fields.map { field =>
+          forType(field.dataType, nullable = field.nullable, seed = Some(rand.nextLong()))
         }
         if (maybeFieldGenerators.forall(_.isDefined)) {
-          Some(Gen.sequence[Seq[Any], Any](maybeFieldGenerators.flatten).map(vs => Row.fromSeq(vs)))
+          val fieldGenerators: Seq[() => Any] = maybeFieldGenerators.map(_.get)
+          Some(() => Row.fromSeq(fieldGenerators.map(_.apply())))
         } else {
           None
         }
       }
       case unsupportedType => None
     }
-    if (nullable) {
-      valueGenerator.map(Gen.oneOf(_, Gen.const[Any](null)))
-    } else {
-      valueGenerator
+    // Handle nullability by wrapping the non-null value generator:
+    valueGenerator.map { valueGenerator =>
+      if (nullable) {
+        () => {
+          if (rand.nextFloat() <= PROBABILITY_OF_NULL) {
+            null
+          } else {
+            valueGenerator()
+          }
+        }
+      } else {
+        valueGenerator
+      }
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
index dc07a732cdb1..dbba93dba668 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
@@ -17,9 +17,6 @@
 
 package org.apache.spark.sql
 
-import org.scalacheck.Prop.{exists, forAll, secure}
-import org.scalatest.prop.Checkers
-
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.types._
@@ -27,7 +24,7 @@ import org.apache.spark.sql.types._
 /**
  * Tests of [[RandomDataGenerator]].
  */
-class RandomDataGeneratorSuite extends SparkFunSuite with Checkers {
+class RandomDataGeneratorSuite extends SparkFunSuite {
 
   /**
    * Tests random data generation for the given type by using it to generate random values then
@@ -39,12 +36,14 @@ class RandomDataGeneratorSuite extends SparkFunSuite with Checkers {
       fail(s"Random data generator was not defined for $dataType")
     }
     if (nullable) {
-      check(exists(generator) { _ == null })
+      assert(Iterator.fill(100)(generator()).contains(null))
+    } else {
+      assert(Iterator.fill(100)(generator()).forall(_ != null))
     }
-    if (!nullable) {
-      check(forAll(generator) { _ != null })
+    for (_ <- 1 to 10) {
+      val generatedValue = generator()
+      toCatalyst(generatedValue)
     }
-    check(secure(forAll(generator) { v => { toCatalyst(v); true } }))
   }
 
   // Basic types: