@@ -819,16 +819,16 @@ saveDF(select(df, "name", "age"), "namesAndAges.parquet")
819819
820820You can also manually specify the data source that will be used along with any extra options
821821that you would like to pass to the data source. Data sources are specified by their fully qualified
822- name (i.e., ` org.apache.spark.sql.parquet ` ), but for built-in sources you can also use the shorted
823- name (` json ` , ` parquet ` , ` jdbc ` ). DataFrames of any type can be converted into other types
822+ name (i.e., ` org.apache.spark.sql.parquet ` ), but for built-in sources you can also use their short
823+ names (` json ` , ` parquet ` , ` jdbc ` ). DataFrames of any type can be converted into other types
824824using this syntax.
825825
826826<div class =" codetabs " >
827827<div data-lang =" scala " markdown =" 1 " >
828828
829829{% highlight scala %}
830830val df = sqlContext.read.format("json").load("examples/src/main/resources/people.json")
831- df.select("name", "age").write.format("json").save("namesAndAges.parquet ")
831+ df.select("name", "age").write.format("json").save("namesAndAges.json ")
832832{% endhighlight %}
833833
834834</div >
@@ -975,7 +975,7 @@ schemaPeople.write().parquet("people.parquet");
975975// The result of loading a parquet file is also a DataFrame.
976976DataFrame parquetFile = sqlContext.read().parquet("people.parquet");
977977
978- //Parquet files can also be registered as tables and then used in SQL statements.
978+ // Parquet files can also be registered as tables and then used in SQL statements.
979979parquetFile.registerTempTable("parquetFile");
980980DataFrame teenagers = sqlContext.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19");
981981List<String > teenagerNames = teenagers.javaRDD().map(new Function<Row, String>() {
@@ -1059,7 +1059,7 @@ SELECT * FROM parquetTable
10591059Table partitioning is a common optimization approach used in systems like Hive. In a partitioned
10601060table, data are usually stored in different directories, with partitioning column values encoded in
10611061the path of each partition directory. The Parquet data source is now able to discover and infer
1062- partitioning information automatically. For exmaple , we can store all our previously used
1062+ partitioning information automatically. For example , we can store all our previously used
10631063population data into a partitioned table using the following directory structure, with two extra
10641064columns, ` gender ` and ` country ` as partitioning columns:
10651065
@@ -1125,20 +1125,20 @@ source is now able to automatically detect this case and merge schemas of all th
11251125import sqlContext.implicits._
11261126
11271127// Create a simple DataFrame, stored into a partition directory
1128- val df1 = sparkContext .makeRDD(1 to 5).map(i => (i, i * 2)).toDF("single", "double")
1128+ val df1 = sc .makeRDD(1 to 5).map(i => (i, i * 2)).toDF("single", "double")
11291129df1.write.parquet("data/test_table/key=1")
11301130
11311131// Create another DataFrame in a new partition directory,
11321132// adding a new column and dropping an existing column
1133- val df2 = sparkContext .makeRDD(6 to 10).map(i => (i, i * 3)).toDF("single", "triple")
1133+ val df2 = sc .makeRDD(6 to 10).map(i => (i, i * 3)).toDF("single", "triple")
11341134df2.write.parquet("data/test_table/key=2")
11351135
11361136// Read the partitioned table
11371137val df3 = sqlContext.read.parquet("data/test_table")
11381138df3.printSchema()
11391139
11401140// The final schema consists of all 3 columns in the Parquet files together
1141- // with the partiioning column appeared in the partition directory paths.
1141+ // with the partitioning column appeared in the partition directory paths.
11421142// root
11431143// |-- single: int (nullable = true)
11441144// |-- double: int (nullable = true)
@@ -1169,7 +1169,7 @@ df3 = sqlContext.load("data/test_table", "parquet")
11691169df3.printSchema()
11701170
11711171# The final schema consists of all 3 columns in the Parquet files together
1172- # with the partiioning column appeared in the partition directory paths.
1172+ # with the partitioning column appeared in the partition directory paths.
11731173# root
11741174# |-- single: int (nullable = true)
11751175# |-- double: int (nullable = true)
@@ -1196,7 +1196,7 @@ df3 <- loadDF(sqlContext, "data/test_table", "parquet")
11961196printSchema(df3)
11971197
11981198# The final schema consists of all 3 columns in the Parquet files together
1199- # with the partiioning column appeared in the partition directory paths.
1199+ # with the partitioning column appeared in the partition directory paths.
12001200# root
12011201# |-- single: int (nullable = true)
12021202# |-- double: int (nullable = true)
@@ -1253,7 +1253,7 @@ Configuration of Parquet can be done using the `setConf` method on `SQLContext`
12531253 <td >false</td >
12541254 <td >
12551255 Turn on Parquet filter pushdown optimization. This feature is turned off by default because of a known
1256- bug in Paruet 1.6.0rc3 (<a href="https://issues.apache.org/jira/browse/PARQUET-136">PARQUET-136</a>).
1256+ bug in Parquet 1.6.0rc3 (<a href="https://issues.apache.org/jira/browse/PARQUET-136">PARQUET-136</a>).
12571257 However, if your table doesn't contain any nullable string or binary columns, it's still safe to turn
12581258 this feature on.
12591259 </td >
@@ -1402,7 +1402,7 @@ sqlContext <- sparkRSQL.init(sc)
14021402# The path can be either a single text file or a directory storing text files.
14031403path <- "examples/src/main/resources/people.json"
14041404# Create a DataFrame from the file(s) pointed to by path
1405- people <- jsonFile(sqlContex,t path)
1405+ people <- jsonFile(sqlContext, path)
14061406
14071407# The inferred schema can be visualized using the printSchema() method.
14081408printSchema(people)
@@ -1474,7 +1474,7 @@ sqlContext.sql("FROM src SELECT key, value").collect().foreach(println)
14741474
14751475When working with Hive one must construct a ` HiveContext ` , which inherits from ` SQLContext ` , and
14761476adds support for finding tables in the MetaStore and writing queries using HiveQL. In addition to
1477- the ` sql ` method a ` HiveContext ` also provides an ` hql ` methods , which allows queries to be
1477+ the ` sql ` method a ` HiveContext ` also provides an ` hql ` method , which allows queries to be
14781478expressed in HiveQL.
14791479
14801480{% highlight java %}
@@ -2770,7 +2770,7 @@ from pyspark.sql.types import *
27702770</tr >
27712771<tr >
27722772 <td > <b >MapType</b > </td >
2773- <td > enviroment </td >
2773+ <td > environment </td >
27742774 <td >
27752775 list(type="map", keyType=<i >keyType</i >, valueType=<i >valueType</i >, valueContainsNull=[ <i >valueContainsNull</i >] )<br />
27762776 <b >Note:</b > The default value of <i >valueContainsNull</i > is <i >True</i >.
0 commit comments