[SPARK-2010] minor doc change and adding a TODO

kanzhang · kanzhang · commit 1b2891daa049 · 2014-06-14T22:06:34.000-07:00
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
@@ -76,15 +76,15 @@ def inferSchema(self, rdd):
         """Infer and apply a schema to an RDD of L{dict}s.
 
         We peek at the first row of the RDD to determine the fields names
-        and types, and then use that to extract all the dictionaries.
+        and types, and then use that to extract all the dictionaries. Nested
+        collections are supported, which include array, dict, list, set, and
+        tuple.
 
         >>> srdd = sqlCtx.inferSchema(rdd)
         >>> srdd.collect() == [{"field1" : 1, "field2" : "row1"}, {"field1" : 2, "field2": "row2"},
         ...                    {"field1" : 3, "field2": "row3"}]
         True
 
-        Nested collections are supported, which include array, dict, list, set, and tuple.
-
         >>> from array import array
         >>> srdd = sqlCtx.inferSchema(nestedRdd1)
         >>> srdd.collect() == [{"f1" : array('i', [1, 2]), "f2" : {"row1" : 1.0}},
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -298,6 +298,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
 
   /**
    * Peek at the first row of the RDD and infer its schema.
+   * TODO: consolidate this with the type system developed in SPARK-2060.
    */
   private[sql] def inferSchema(rdd: RDD[Map[String, _]]): SchemaRDD = {
     import scala.collection.JavaConversions._