|
26 | 26 | sqlContext = SQLContext(sc) |
27 | 27 |
|
28 | 28 | # RDD is created from a list of rows |
29 | | - some_rdd = sc.parallelize([Row(name="John", age=19), Row(name="Smith", age=23), Row(name="Sarah", age=18)]) |
| 29 | + some_rdd = sc.parallelize([Row(name="John", age=19), Row(name="Smith", |
| 30 | + age=23), Row(name="Sarah", age=18)]) |
30 | 31 | # Infer schema from the first row, create a SchemaRDD and print the schema |
31 | 32 | some_schemardd = sqlContext.inferSchema(some_rdd) |
32 | 33 | some_schemardd.printSchema() |
33 | 34 |
|
34 | 35 | # Another RDD is created from a list of tuples |
35 | 36 | another_rdd = sc.parallelize([("John", 19), ("Smith", 23), ("Sarah", 18)]) |
36 | 37 | # Schema with two fields - person_name and person_age |
37 | | - schema = StructType([StructField("person_name", StringType(), False), StructField("person_age", IntegerType(), False)]) |
| 38 | + schema = StructType([StructField("person_name", StringType(), False), |
| 39 | + StructField("person_age", IntegerType(), False)]) |
38 | 40 | # Create a SchemaRDD by applying the schema to the RDD and print the schema |
39 | 41 | another_schemardd = sqlContext.applySchema(another_rdd, schema) |
40 | 42 | another_schemardd.printSchema() |
|
0 commit comments