|
26 | 26 | sqlContext = SQLContext(sc) |
27 | 27 |
|
28 | 28 | # RDD is created from a list of rows |
29 | | - some_rdd = sc.parallelize([Row(name="John", age=19), Row(name="Smith", |
30 | | - age=23), Row(name="Sarah", age=18)]) |
| 29 | + some_rdd = sc.parallelize([Row(name="John", age=19), |
| 30 | + Row(name="Smith", age=23), |
| 31 | + Row(name="Sarah", age=18)]) |
31 | 32 | # Infer schema from the first row, create a SchemaRDD and print the schema |
32 | 33 | some_schemardd = sqlContext.inferSchema(some_rdd) |
33 | 34 | some_schemardd.printSchema() |
34 | 35 |
|
35 | 36 | # Another RDD is created from a list of tuples |
36 | 37 | another_rdd = sc.parallelize([("John", 19), ("Smith", 23), ("Sarah", 18)]) |
37 | 38 | # Schema with two fields - person_name and person_age |
38 | | - schema = StructType([StructField("person_name", StringType(), False), |
39 | | - StructField("person_age", IntegerType(), False)]) |
| 39 | + schema = StructType([StructField("person_name", StringType(), False), |
| 40 | + StructField("person_age", IntegerType(), False)]) |
40 | 41 | # Create a SchemaRDD by applying the schema to the RDD and print the schema |
41 | 42 | another_schemardd = sqlContext.applySchema(another_rdd, schema) |
42 | 43 | another_schemardd.printSchema() |
|
0 commit comments