@@ -367,11 +367,18 @@ val people = sc.textFile("examples/src/main/resources/people.txt").map(_.split("
367367people.registerTempTable("people")
368368
369369// SQL statements can be run by using the sql methods provided by sqlContext.
370- val teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
370+ val teenagers = sqlContext.sql("SELECT name, age FROM people WHERE age >= 13 AND age <= 19")
371371
372372// The results of SQL queries are DataFrames and support all the normal RDD operations.
373- // The columns of a row in the result can be accessed by ordinal.
373+ // The columns of a row in the result can be accessed by field index:
374374teenagers.map(t => "Name: " + t(0)).collect().foreach(println)
375+
376+ // or by field name:
377+ teenagers.map(t => "Name: " + t.getAs[ String] ( "name" ) ).collect().foreach(println)
378+
379+ // row.getValuesMap[ T] retrieves multiple columns at once into a Map[ String, T]
380+ teenagers.map(_ .getValuesMap[ Any] (List("name", "age"))).collect().foreach(println)
381+ // Map("name" -> "Justin", "age" -> 19)
375382{% endhighlight %}
376383
377384</div >
@@ -470,7 +477,7 @@ parts = lines.map(lambda l: l.split(","))
470477people = parts.map(lambda p: Row(name=p[ 0] , age=int(p[ 1] )))
471478
472479# Infer the schema, and register the DataFrame as a table.
473- schemaPeople = sqlContext.inferSchema (people)
480+ schemaPeople = sqlContext.createDataFrame (people)
474481schemaPeople.registerTempTable("people")
475482
476483# SQL can be run over DataFrames that have been registered as a table.
@@ -538,7 +545,7 @@ peopleDataFrame.registerTempTable("people")
538545val results = sqlContext.sql("SELECT name FROM people")
539546
540547// The results of SQL queries are DataFrames and support all the normal RDD operations.
541- // The columns of a row in the result can be accessed by ordinal .
548+ // The columns of a row in the result can be accessed by field index or by field name .
542549results.map(t => "Name: " + t(0)).collect().foreach(println)
543550{% endhighlight %}
544551
@@ -1594,6 +1601,64 @@ options.
15941601
15951602# Migration Guide
15961603
1604+ ## Upgrading from Spark SQL 1.3 to 1.4
1605+
1606+ Based on user feedback, we changed the default behavior of ` DataFrame.groupBy().agg() ` to retain the grouping columns in the resulting ` DataFrame ` . To keep the behavior in 1.3, set ` spark.sql.retainGroupColumns ` to ` false ` .
1607+
1608+ <div class =" codetabs " >
1609+ <div data-lang =" scala " markdown =" 1 " >
1610+ {% highlight scala %}
1611+
1612+ // In 1.3.x, in order for the grouping column "department" to show up,
1613+ // it must be included explicitly as part of the agg function call.
1614+ df.groupBy("department").agg($"department", max("age"), sum("expense"))
1615+
1616+ // In 1.4+, grouping column "department" is included automatically.
1617+ df.groupBy("department").agg(max("age"), sum("expense"))
1618+
1619+ // Revert to 1.3 behavior (not retaining grouping column) by:
1620+ sqlContext.setConf("spark.sql.retainGroupColumns", "false")
1621+
1622+ {% endhighlight %}
1623+ </div >
1624+
1625+ <div data-lang =" java " markdown =" 1 " >
1626+ {% highlight java %}
1627+
1628+ // In 1.3.x, in order for the grouping column "department" to show up,
1629+ // it must be included explicitly as part of the agg function call.
1630+ df.groupBy("department").agg(col("department"), max("age"), sum("expense"));
1631+
1632+ // In 1.4+, grouping column "department" is included automatically.
1633+ df.groupBy("department").agg(max("age"), sum("expense"));
1634+
1635+ // Revert to 1.3 behavior (not retaining grouping column) by:
1636+ sqlContext.setConf("spark.sql.retainGroupColumns", "false");
1637+
1638+ {% endhighlight %}
1639+ </div >
1640+
1641+ <div data-lang =" python " markdown =" 1 " >
1642+ {% highlight python %}
1643+
1644+ import pyspark.sql.functions as func
1645+
1646+ # In 1.3.x, in order for the grouping column "department" to show up,
1647+ # it must be included explicitly as part of the agg function call.
1648+ df.groupBy("department").agg("department"), func.max("age"), func.sum("expense"))
1649+
1650+ # In 1.4+, grouping column "department" is included automatically.
1651+ df.groupBy("department").agg(func.max("age"), func.sum("expense"))
1652+
1653+ # Revert to 1.3.x behavior (not retaining grouping column) by:
1654+ sqlContext.setConf("spark.sql.retainGroupColumns", "false")
1655+
1656+ {% endhighlight %}
1657+ </div >
1658+
1659+ </div >
1660+
1661+
15971662## Upgrading from Spark SQL 1.0-1.2 to 1.3
15981663
15991664In Spark 1.3 we removed the "Alpha" label from Spark SQL and as part of this did a cleanup of the
@@ -1651,7 +1716,7 @@ moved into the udf object in `SQLContext`.
16511716
16521717<div class =" codetabs " >
16531718<div data-lang =" scala " markdown =" 1 " >
1654- {% highlight java %}
1719+ {% highlight scala %}
16551720
16561721sqlContext.udf.register("strLen", (s: String) => s.length())
16571722
0 commit comments