|
32 | 32 | from pyspark.sql.udf import UDFRegistration |
33 | 33 | from pyspark.sql.utils import install_exception_handler |
34 | 34 |
|
35 | | -__all__ = ["SQLContext"] |
| 35 | +__all__ = ["SQLContext", "HiveContext"] |
36 | 36 |
|
37 | 37 |
|
38 | 38 | class SQLContext(object): |
@@ -338,6 +338,24 @@ def dropTempTable(self, tableName): |
338 | 338 | """ |
339 | 339 | self.sparkSession.catalog.dropTempView(tableName) |
340 | 340 |
|
| 341 | + @since(1.3) |
| 342 | + def createExternalTable(self, tableName, path=None, source=None, schema=None, **options): |
| 343 | + """Creates an external table based on the dataset in a data source. |
| 344 | +
|
| 345 | + It returns the DataFrame associated with the external table. |
| 346 | +
|
| 347 | + The data source is specified by the ``source`` and a set of ``options``. |
| 348 | + If ``source`` is not specified, the default data source configured by |
| 349 | + ``spark.sql.sources.default`` will be used. |
| 350 | +
|
| 351 | + Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and |
| 352 | + created external table. |
| 353 | +
|
| 354 | + :return: :class:`DataFrame` |
| 355 | + """ |
| 356 | + return self.sparkSession.catalog.createExternalTable( |
| 357 | + tableName, path, source, schema, **options) |
| 358 | + |
341 | 359 | @ignore_unicode_prefix |
342 | 360 | @since(1.0) |
343 | 361 | def sql(self, sqlQuery): |
@@ -461,6 +479,53 @@ def streams(self): |
461 | 479 | return StreamingQueryManager(self._ssql_ctx.streams()) |
462 | 480 |
|
463 | 481 |
|
| 482 | +class HiveContext(SQLContext): |
| 483 | + """A variant of Spark SQL that integrates with data stored in Hive. |
| 484 | +
|
| 485 | + Configuration for Hive is read from ``hive-site.xml`` on the classpath. |
| 486 | + It supports running both SQL and HiveQL commands. |
| 487 | +
|
| 488 | + :param sparkContext: The SparkContext to wrap. |
| 489 | + :param jhiveContext: An optional JVM Scala HiveContext. If set, we do not instantiate a new |
| 490 | + :class:`HiveContext` in the JVM, instead we make all calls to this object. |
| 491 | +
|
| 492 | + .. note:: Deprecated in 2.0.0. Use SparkSession.builder.enableHiveSupport().getOrCreate(). |
| 493 | + """ |
| 494 | + |
| 495 | + def __init__(self, sparkContext, jhiveContext=None): |
| 496 | + warnings.warn( |
| 497 | + "HiveContext is deprecated in Spark 2.0.0. Please use " + |
| 498 | + "SparkSession.builder.enableHiveSupport().getOrCreate() instead.", |
| 499 | + DeprecationWarning) |
| 500 | + if jhiveContext is None: |
| 501 | + sparkContext._conf.set("spark.sql.catalogImplementation", "hive") |
| 502 | + sparkSession = SparkSession.builder._sparkContext(sparkContext).getOrCreate() |
| 503 | + else: |
| 504 | + sparkSession = SparkSession(sparkContext, jhiveContext.sparkSession()) |
| 505 | + SQLContext.__init__(self, sparkContext, sparkSession, jhiveContext) |
| 506 | + |
| 507 | + @classmethod |
| 508 | + def _createForTesting(cls, sparkContext): |
| 509 | + """(Internal use only) Create a new HiveContext for testing. |
| 510 | +
|
| 511 | + All test code that touches HiveContext *must* go through this method. Otherwise, |
| 512 | + you may end up launching multiple derby instances and encounter with incredibly |
| 513 | + confusing error messages. |
| 514 | + """ |
| 515 | + jsc = sparkContext._jsc.sc() |
| 516 | + jtestHive = sparkContext._jvm.org.apache.spark.sql.hive.test.TestHiveContext(jsc, False) |
| 517 | + return cls(sparkContext, jtestHive) |
| 518 | + |
| 519 | + def refreshTable(self, tableName): |
| 520 | + """Invalidate and refresh all the cached the metadata of the given |
| 521 | + table. For performance reasons, Spark SQL or the external data source |
| 522 | + library it uses might cache certain metadata about a table, such as the |
| 523 | + location of blocks. When those change outside of Spark SQL, users should |
| 524 | + call this function to invalidate the cache. |
| 525 | + """ |
| 526 | + self._ssql_ctx.refreshTable(tableName) |
| 527 | + |
| 528 | + |
464 | 529 | def _test(): |
465 | 530 | import os |
466 | 531 | import doctest |
|
0 commit comments