Skip to content

Commit 0dba382

Browse files
Davies Liumarmbrus
authored andcommitted
[SPARK-5872] [SQL] create a sqlCtx in pyspark shell
The sqlCtx will be HiveContext if hive is built in assembly jar, or SQLContext if not. It also skip the Hive tests in pyspark.sql.tests if no hive is available. Author: Davies Liu <[email protected]> Closes #4659 from davies/sqlctx and squashes the following commits: 0e6629a [Davies Liu] sqlCtx in pyspark (cherry picked from commit 4d4cc76) Signed-off-by: Michael Armbrust <[email protected]>
1 parent cb06160 commit 0dba382

File tree

2 files changed

+22
-3
lines changed

2 files changed

+22
-3
lines changed

python/pyspark/shell.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,12 @@
3131
import atexit
3232
import os
3333
import platform
34+
35+
import py4j
36+
3437
import pyspark
3538
from pyspark.context import SparkContext
39+
from pyspark.sql import SQLContext, HiveContext
3640
from pyspark.storagelevel import StorageLevel
3741

3842
# this is the deprecated equivalent of ADD_JARS
@@ -46,6 +50,13 @@
4650
sc = SparkContext(appName="PySparkShell", pyFiles=add_files)
4751
atexit.register(lambda: sc.stop())
4852

53+
try:
54+
# Try to access HiveConf, it will raise exception if Hive is not added
55+
sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
56+
sqlCtx = HiveContext(sc)
57+
except py4j.protocol.Py4JError:
58+
sqlCtx = SQLContext(sc)
59+
4960
print("""Welcome to
5061
____ __
5162
/ __/__ ___ _____/ /__
@@ -57,7 +68,7 @@
5768
platform.python_version(),
5869
platform.python_build()[0],
5970
platform.python_build()[1]))
60-
print("SparkContext available as sc.")
71+
print("SparkContext available as sc, %s available as sqlCtx." % sqlCtx.__class__.__name__)
6172

6273
if add_files is not None:
6374
print("Warning: ADD_FILES environment variable is deprecated, use --py-files argument instead")

python/pyspark/sql/tests.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
import shutil
2626
import tempfile
2727

28+
import py4j
29+
2830
if sys.version_info[:2] <= (2, 6):
2931
try:
3032
import unittest2 as unittest
@@ -329,9 +331,12 @@ class HiveContextSQLTests(ReusedPySparkTestCase):
329331
def setUpClass(cls):
330332
ReusedPySparkTestCase.setUpClass()
331333
cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
334+
try:
335+
cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
336+
except py4j.protocol.Py4JError:
337+
cls.sqlCtx = None
338+
return
332339
os.unlink(cls.tempdir.name)
333-
print "type", type(cls.sc)
334-
print "type", type(cls.sc._jsc)
335340
_scala_HiveContext =\
336341
cls.sc._jvm.org.apache.spark.sql.hive.test.TestHiveContext(cls.sc._jsc.sc())
337342
cls.sqlCtx = HiveContext(cls.sc, _scala_HiveContext)
@@ -344,6 +349,9 @@ def tearDownClass(cls):
344349
shutil.rmtree(cls.tempdir.name, ignore_errors=True)
345350

346351
def test_save_and_load_table(self):
352+
if self.sqlCtx is None:
353+
return # no hive available, skipped
354+
347355
df = self.df
348356
tmpPath = tempfile.mkdtemp()
349357
shutil.rmtree(tmpPath)

0 commit comments

Comments
 (0)