File tree Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -216,6 +216,8 @@ parts = lines.map(lambda l: l.split(","))
216216people = parts.map(lambda p: {"name": p[ 0] , "age": int(p[ 1] )})
217217
218218# Infer the schema, and register the SchemaRDD as a table.
219+ # In future versions of PySpark we would like to add support for registering RDDs with other
220+ # datatypes as tables
219221peopleTable = sqlCtx.inferSchema(people)
220222peopleTable.registerAsTable("people")
221223
@@ -293,11 +295,11 @@ JavaSchemaRDD teenagers = sqlCtx.sql("SELECT name FROM parquetFile WHERE age >=
293295
294296peopleTable # The SchemaRDD from the previous example.
295297
296- # JavaSchemaRDDs can be saved as parquet files, maintaining the schema information.
298+ # SchemaRDDs can be saved as parquet files, maintaining the schema information.
297299peopleTable.saveAsParquetFile("people.parquet")
298300
299301# Read in the parquet file created above. Parquet files are self-describing so the schema is preserved.
300- # The result of loading a parquet file is also a JavaSchemaRDD .
302+ # The result of loading a parquet file is also a SchemaRDD .
301303parquetFile = sqlCtx.parquetFile("people.parquet")
302304
303305# Parquet files can also be registered as tables and then used in SQL statements.
You can’t perform that action at this time.
0 commit comments