|
32 | 32 | from pyspark.sql.udf import UDFRegistration |
33 | 33 | from pyspark.sql.utils import install_exception_handler |
34 | 34 |
|
35 | | -__all__ = ["SQLContext"] |
| 35 | +__all__ = ["SQLContext", "HiveContext"] |
36 | 36 |
|
37 | 37 |
|
38 | 38 | class SQLContext(object): |
@@ -340,6 +340,24 @@ def dropTempTable(self, tableName): |
340 | 340 | """ |
341 | 341 | self.sparkSession.catalog.dropTempView(tableName) |
342 | 342 |
|
| 343 | + @since(1.3) |
| 344 | + def createExternalTable(self, tableName, path=None, source=None, schema=None, **options): |
| 345 | + """Creates an external table based on the dataset in a data source. |
| 346 | +
|
| 347 | + It returns the DataFrame associated with the external table. |
| 348 | +
|
| 349 | + The data source is specified by the ``source`` and a set of ``options``. |
| 350 | + If ``source`` is not specified, the default data source configured by |
| 351 | + ``spark.sql.sources.default`` will be used. |
| 352 | +
|
| 353 | + Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and |
| 354 | + created external table. |
| 355 | +
|
| 356 | + :return: :class:`DataFrame` |
| 357 | + """ |
| 358 | + return self.sparkSession.catalog.createExternalTable( |
| 359 | + tableName, path, source, schema, **options) |
| 360 | + |
343 | 361 | @ignore_unicode_prefix |
344 | 362 | @since(1.0) |
345 | 363 | def sql(self, sqlQuery): |
@@ -463,6 +481,53 @@ def streams(self): |
463 | 481 | return StreamingQueryManager(self._ssql_ctx.streams()) |
464 | 482 |
|
465 | 483 |
|
| 484 | +class HiveContext(SQLContext): |
| 485 | + """A variant of Spark SQL that integrates with data stored in Hive. |
| 486 | +
|
| 487 | + Configuration for Hive is read from ``hive-site.xml`` on the classpath. |
| 488 | + It supports running both SQL and HiveQL commands. |
| 489 | +
|
| 490 | + :param sparkContext: The SparkContext to wrap. |
| 491 | + :param jhiveContext: An optional JVM Scala HiveContext. If set, we do not instantiate a new |
| 492 | + :class:`HiveContext` in the JVM, instead we make all calls to this object. |
| 493 | +
|
| 494 | + .. note:: Deprecated in 2.0.0. Use SparkSession.builder.enableHiveSupport().getOrCreate(). |
| 495 | + """ |
| 496 | + |
| 497 | + def __init__(self, sparkContext, jhiveContext=None): |
| 498 | + warnings.warn( |
| 499 | + "HiveContext is deprecated in Spark 2.0.0. Please use " + |
| 500 | + "SparkSession.builder.enableHiveSupport().getOrCreate() instead.", |
| 501 | + DeprecationWarning) |
| 502 | + if jhiveContext is None: |
| 503 | + sparkContext._conf.set("spark.sql.catalogImplementation", "hive") |
| 504 | + sparkSession = SparkSession.builder._sparkContext(sparkContext).getOrCreate() |
| 505 | + else: |
| 506 | + sparkSession = SparkSession(sparkContext, jhiveContext.sparkSession()) |
| 507 | + SQLContext.__init__(self, sparkContext, sparkSession, jhiveContext) |
| 508 | + |
| 509 | + @classmethod |
| 510 | + def _createForTesting(cls, sparkContext): |
| 511 | + """(Internal use only) Create a new HiveContext for testing. |
| 512 | +
|
| 513 | + All test code that touches HiveContext *must* go through this method. Otherwise, |
| 514 | + you may end up launching multiple derby instances and encounter with incredibly |
| 515 | + confusing error messages. |
| 516 | + """ |
| 517 | + jsc = sparkContext._jsc.sc() |
| 518 | + jtestHive = sparkContext._jvm.org.apache.spark.sql.hive.test.TestHiveContext(jsc, False) |
| 519 | + return cls(sparkContext, jtestHive) |
| 520 | + |
| 521 | + def refreshTable(self, tableName): |
| 522 | + """Invalidate and refresh all the cached the metadata of the given |
| 523 | + table. For performance reasons, Spark SQL or the external data source |
| 524 | + library it uses might cache certain metadata about a table, such as the |
| 525 | + location of blocks. When those change outside of Spark SQL, users should |
| 526 | + call this function to invalidate the cache. |
| 527 | + """ |
| 528 | + self._ssql_ctx.refreshTable(tableName) |
| 529 | + |
| 530 | + |
466 | 531 | def _test(): |
467 | 532 | import os |
468 | 533 | import doctest |
|
0 commit comments