Skip to content

Commit c74b07f

Browse files
committed
[SPARK-5166][SPARK-5247][SPARK-5258][SQL] API Cleanup / Documentation
Author: Michael Armbrust <[email protected]> Closes #4642 from marmbrus/docs and squashes the following commits: d291c34 [Michael Armbrust] python tests 9be66e3 [Michael Armbrust] comments d56afc2 [Michael Armbrust] fix style f004747 [Michael Armbrust] fix build c4a907b [Michael Armbrust] fix tests 42e2b73 [Michael Armbrust] [SQL] Documentation / API Clean-up.
1 parent c76da36 commit c74b07f

File tree

30 files changed

+483
-405
lines changed

30 files changed

+483
-405
lines changed

project/SparkBuild.scala

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,9 +361,16 @@ object Unidoc {
361361
publish := {},
362362

363363
unidocProjectFilter in(ScalaUnidoc, unidoc) :=
364-
inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, catalyst, streamingFlumeSink, yarn),
364+
inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, streamingFlumeSink, yarn),
365365
unidocProjectFilter in(JavaUnidoc, unidoc) :=
366-
inAnyProject -- inProjects(OldDeps.project, repl, bagel, examples, tools, catalyst, streamingFlumeSink, yarn),
366+
inAnyProject -- inProjects(OldDeps.project, repl, bagel, examples, tools, streamingFlumeSink, yarn),
367+
368+
// Skip actual catalyst, but include the subproject.
369+
// Catalyst is not public API and contains quasiquotes which break scaladoc.
370+
unidocAllSources in (ScalaUnidoc, unidoc) := {
371+
(unidocAllSources in (ScalaUnidoc, unidoc)).value
372+
.map(_.filterNot(_.getCanonicalPath.contains("sql/catalyst")))
373+
},
367374

368375
// Skip class names containing $ and some internal packages in Javadocs
369376
unidocAllSources in (JavaUnidoc, unidoc) := {
@@ -376,6 +383,7 @@ object Unidoc {
376383
.map(_.filterNot(_.getCanonicalPath.contains("executor")))
377384
.map(_.filterNot(_.getCanonicalPath.contains("python")))
378385
.map(_.filterNot(_.getCanonicalPath.contains("collection")))
386+
.map(_.filterNot(_.getCanonicalPath.contains("sql/catalyst")))
379387
},
380388

381389
// Javadoc options: create a window title, and group key packages on index page

python/pyspark/sql/context.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ def applySchema(self, rdd, schema):
252252
>>> schema = StructType([StructField("field1", IntegerType(), False),
253253
... StructField("field2", StringType(), False)])
254254
>>> df = sqlCtx.applySchema(rdd2, schema)
255-
>>> sqlCtx.registerRDDAsTable(df, "table1")
255+
>>> sqlCtx.registerDataFrameAsTable(df, "table1")
256256
>>> df2 = sqlCtx.sql("SELECT * from table1")
257257
>>> df2.collect()
258258
[Row(field1=1, field2=u'row1'),..., Row(field1=3, field2=u'row3')]
@@ -405,17 +405,17 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
405405

406406
return self.applySchema(data, schema)
407407

408-
def registerRDDAsTable(self, rdd, tableName):
408+
def registerDataFrameAsTable(self, rdd, tableName):
409409
"""Registers the given RDD as a temporary table in the catalog.
410410
411411
Temporary tables exist only during the lifetime of this instance of
412412
SQLContext.
413413
414-
>>> sqlCtx.registerRDDAsTable(df, "table1")
414+
>>> sqlCtx.registerDataFrameAsTable(df, "table1")
415415
"""
416416
if (rdd.__class__ is DataFrame):
417417
df = rdd._jdf
418-
self._ssql_ctx.registerRDDAsTable(df, tableName)
418+
self._ssql_ctx.registerDataFrameAsTable(df, tableName)
419419
else:
420420
raise ValueError("Can only register DataFrame as table")
421421

@@ -456,7 +456,7 @@ def jsonFile(self, path, schema=None, samplingRatio=1.0):
456456
... print>>ofn, json
457457
>>> ofn.close()
458458
>>> df1 = sqlCtx.jsonFile(jsonFile)
459-
>>> sqlCtx.registerRDDAsTable(df1, "table1")
459+
>>> sqlCtx.registerDataFrameAsTable(df1, "table1")
460460
>>> df2 = sqlCtx.sql(
461461
... "SELECT field1 AS f1, field2 as f2, field3 as f3, "
462462
... "field6 as f4 from table1")
@@ -467,7 +467,7 @@ def jsonFile(self, path, schema=None, samplingRatio=1.0):
467467
Row(f1=None, f2=u'row3', f3=Row(field4=33, field5=[]), f4=None)
468468
469469
>>> df3 = sqlCtx.jsonFile(jsonFile, df1.schema)
470-
>>> sqlCtx.registerRDDAsTable(df3, "table2")
470+
>>> sqlCtx.registerDataFrameAsTable(df3, "table2")
471471
>>> df4 = sqlCtx.sql(
472472
... "SELECT field1 AS f1, field2 as f2, field3 as f3, "
473473
... "field6 as f4 from table2")
@@ -485,7 +485,7 @@ def jsonFile(self, path, schema=None, samplingRatio=1.0):
485485
... StructField("field5",
486486
... ArrayType(IntegerType(), False), True)]), False)])
487487
>>> df5 = sqlCtx.jsonFile(jsonFile, schema)
488-
>>> sqlCtx.registerRDDAsTable(df5, "table3")
488+
>>> sqlCtx.registerDataFrameAsTable(df5, "table3")
489489
>>> df6 = sqlCtx.sql(
490490
... "SELECT field2 AS f1, field3.field5 as f2, "
491491
... "field3.field5[0] as f3 from table3")
@@ -509,7 +509,7 @@ def jsonRDD(self, rdd, schema=None, samplingRatio=1.0):
509509
determine the schema.
510510
511511
>>> df1 = sqlCtx.jsonRDD(json)
512-
>>> sqlCtx.registerRDDAsTable(df1, "table1")
512+
>>> sqlCtx.registerDataFrameAsTable(df1, "table1")
513513
>>> df2 = sqlCtx.sql(
514514
... "SELECT field1 AS f1, field2 as f2, field3 as f3, "
515515
... "field6 as f4 from table1")
@@ -520,7 +520,7 @@ def jsonRDD(self, rdd, schema=None, samplingRatio=1.0):
520520
Row(f1=None, f2=u'row3', f3=Row(field4=33, field5=[]), f4=None)
521521
522522
>>> df3 = sqlCtx.jsonRDD(json, df1.schema)
523-
>>> sqlCtx.registerRDDAsTable(df3, "table2")
523+
>>> sqlCtx.registerDataFrameAsTable(df3, "table2")
524524
>>> df4 = sqlCtx.sql(
525525
... "SELECT field1 AS f1, field2 as f2, field3 as f3, "
526526
... "field6 as f4 from table2")
@@ -538,7 +538,7 @@ def jsonRDD(self, rdd, schema=None, samplingRatio=1.0):
538538
... StructField("field5",
539539
... ArrayType(IntegerType(), False), True)]), False)])
540540
>>> df5 = sqlCtx.jsonRDD(json, schema)
541-
>>> sqlCtx.registerRDDAsTable(df5, "table3")
541+
>>> sqlCtx.registerDataFrameAsTable(df5, "table3")
542542
>>> df6 = sqlCtx.sql(
543543
... "SELECT field2 AS f1, field3.field5 as f2, "
544544
... "field3.field5[0] as f3 from table3")
@@ -628,7 +628,7 @@ def createExternalTable(self, tableName, path=None, source=None,
628628
def sql(self, sqlQuery):
629629
"""Return a L{DataFrame} representing the result of the given query.
630630
631-
>>> sqlCtx.registerRDDAsTable(df, "table1")
631+
>>> sqlCtx.registerDataFrameAsTable(df, "table1")
632632
>>> df2 = sqlCtx.sql("SELECT field1 AS f1, field2 as f2 from table1")
633633
>>> df2.collect()
634634
[Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
@@ -638,7 +638,7 @@ def sql(self, sqlQuery):
638638
def table(self, tableName):
639639
"""Returns the specified table as a L{DataFrame}.
640640
641-
>>> sqlCtx.registerRDDAsTable(df, "table1")
641+
>>> sqlCtx.registerDataFrameAsTable(df, "table1")
642642
>>> df2 = sqlCtx.table("table1")
643643
>>> sorted(df.collect()) == sorted(df2.collect())
644644
True
@@ -653,7 +653,7 @@ def tables(self, dbName=None):
653653
The returned DataFrame has two columns, tableName and isTemporary
654654
(a column with BooleanType indicating if a table is a temporary one or not).
655655
656-
>>> sqlCtx.registerRDDAsTable(df, "table1")
656+
>>> sqlCtx.registerDataFrameAsTable(df, "table1")
657657
>>> df2 = sqlCtx.tables()
658658
>>> df2.filter("tableName = 'table1'").first()
659659
Row(tableName=u'table1', isTemporary=True)
@@ -668,7 +668,7 @@ def tableNames(self, dbName=None):
668668
669669
If `dbName` is not specified, the current database will be used.
670670
671-
>>> sqlCtx.registerRDDAsTable(df, "table1")
671+
>>> sqlCtx.registerDataFrameAsTable(df, "table1")
672672
>>> "table1" in sqlCtx.tableNames()
673673
True
674674
>>> "table1" in sqlCtx.tableNames("db")

sql/core/src/main/java/org/apache/spark/sql/jdbc/JDBCUtils.java

Lines changed: 0 additions & 59 deletions
This file was deleted.

0 commit comments

Comments
 (0)