Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion python/pyspark/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,12 @@
import atexit
import os
import platform

import py4j

import pyspark
from pyspark.context import SparkContext
from pyspark.sql import SQLContext, HiveContext
from pyspark.storagelevel import StorageLevel

# this is the deprecated equivalent of ADD_JARS
Expand All @@ -46,6 +50,13 @@
sc = SparkContext(appName="PySparkShell", pyFiles=add_files)
atexit.register(lambda: sc.stop())

try:
# Try to access HiveConf, it will raise exception if Hive is not added
sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
sqlCtx = HiveContext(sc)
except py4j.protocol.Py4JError:
sqlCtx = SQLContext(sc)

print("""Welcome to
____ __
/ __/__ ___ _____/ /__
Expand All @@ -57,7 +68,7 @@
platform.python_version(),
platform.python_build()[0],
platform.python_build()[1]))
print("SparkContext available as sc.")
print("SparkContext available as sc, %s available as sqlCtx." % sqlCtx.__class__.__name__)

if add_files is not None:
print("Warning: ADD_FILES environment variable is deprecated, use --py-files argument instead")
Expand Down
12 changes: 10 additions & 2 deletions python/pyspark/sql/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import shutil
import tempfile

import py4j

if sys.version_info[:2] <= (2, 6):
try:
import unittest2 as unittest
Expand Down Expand Up @@ -329,9 +331,12 @@ class HiveContextSQLTests(ReusedPySparkTestCase):
def setUpClass(cls):
ReusedPySparkTestCase.setUpClass()
cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
try:
cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
except py4j.protocol.Py4JError:
cls.sqlCtx = None
return
os.unlink(cls.tempdir.name)
print "type", type(cls.sc)
print "type", type(cls.sc._jsc)
_scala_HiveContext =\
cls.sc._jvm.org.apache.spark.sql.hive.test.TestHiveContext(cls.sc._jsc.sc())
cls.sqlCtx = HiveContext(cls.sc, _scala_HiveContext)
Expand All @@ -344,6 +349,9 @@ def tearDownClass(cls):
shutil.rmtree(cls.tempdir.name, ignore_errors=True)

def test_save_and_load_table(self):
if self.sqlCtx is None:
return # no hive available, skipped

df = self.df
tmpPath = tempfile.mkdtemp()
shutil.rmtree(tmpPath)
Expand Down