Skip to content

Commit 25d3c9d

Browse files
committed
Remove unnecessary imports
1 parent 7ec04db commit 25d3c9d

File tree

2 files changed

+47
-8
lines changed

2 files changed

+47
-8
lines changed

python/pyspark/mllib/tests.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
from pyspark.mllib.feature import IDF
5050
from pyspark.mllib.feature import StandardScaler
5151
from pyspark.mllib.feature import ElementwiseProduct
52+
from pyspark.mllib.util import MLUtils
5253
from pyspark.serializers import PickleSerializer
5354
from pyspark.streaming import StreamingContext
5455
from pyspark.sql import SQLContext
@@ -1010,6 +1011,48 @@ def collect(rdd):
10101011
self.assertEqual(predict_results, [[0, 1, 1], [1, 0, 1]])
10111012

10121013

1014+
class MLUtilsTests(MLlibTestCase):
1015+
def test_append_bias(self):
1016+
data = [2.0, 2.0, 2.0]
1017+
ret = MLUtils.appendBias(data)
1018+
self.assertEqual(ret[3], 1.0)
1019+
self.assertEqual(type(ret), DenseVector)
1020+
1021+
def test_append_bias_with_vector(self):
1022+
data = Vectors.dense([2.0, 2.0, 2.0])
1023+
ret = MLUtils.appendBias(data)
1024+
self.assertEqual(ret[3], 1.0)
1025+
self.assertEqual(type(ret), DenseVector)
1026+
1027+
def test_append_bias_with_sp_vector(self):
1028+
data = Vectors.sparse(3, {0: 2.0, 2: 2.0})
1029+
expected = Vectors.sparse(4, {0: 2.0, 2: 2.0, 3: 1.0})
1030+
# Returned value must be SparseVector
1031+
ret = MLUtils.appendBias(data)
1032+
self.assertEqual(ret, expected)
1033+
self.assertEqual(type(ret), SparseVector)
1034+
1035+
def test_load_vectors(self):
1036+
import shutil
1037+
data = [
1038+
[1.0, 2.0, 3.0],
1039+
[1.0, 2.0, 3.0]
1040+
]
1041+
temp_dir = tempfile.mkdtemp()
1042+
load_vectors_path = os.path.join(temp_dir, "test_load_vectors")
1043+
try:
1044+
self.sc.parallelize(data).saveAsTextFile(load_vectors_path)
1045+
ret_rdd = MLUtils.loadVectors(self.sc, load_vectors_path)
1046+
ret = ret_rdd.collect()
1047+
self.assertEqual(len(ret), 2)
1048+
self.assertEqual(ret[0], DenseVector([1.0, 2.0, 3.0]))
1049+
self.assertEqual(ret[1], DenseVector([1.0, 2.0, 3.0]))
1050+
except:
1051+
self.fail()
1052+
finally:
1053+
shutil.rmtree(load_vectors_path)
1054+
1055+
10131056
if __name__ == "__main__":
10141057
if not _have_scipy:
10151058
print("NOTE: Skipping SciPy tests as it does not seem to be installed")

python/pyspark/mllib/util.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
xrange = range
3030

3131
from pyspark.mllib.common import callMLlibFunc, inherit_doc
32-
from pyspark.mllib.linalg import Vector, Vectors, DenseVector, SparseVector, _convert_to_vector
32+
from pyspark.mllib.linalg import Vector, Vectors, SparseVector, _convert_to_vector
3333

3434

3535
class MLUtils(object):
@@ -183,15 +183,11 @@ def appendBias(data):
183183
"""
184184
vec = _convert_to_vector(data)
185185
if isinstance(vec, SparseVector):
186-
if _have_scipy:
187-
l = scipy.sparse.csc_matrix(np.append(vec.toArray(), 1.0))
188-
return _convert_to_vector(l.T)
189-
else:
190-
raise TypeError("Cannot append bias %s into sparce "
191-
"vector because of lack of scipy" % type(vec))
186+
l = scipy.sparse.csc_matrix(np.append(vec.toArray(), 1.0))
187+
return _convert_to_vector(l.T)
192188
elif isinstance(vec, Vector):
193189
vec = vec.toArray()
194-
return np.append(vec, 1.0).tolist()
190+
return _convert_to_vector(np.append(vec, 1.0).tolist())
195191

196192
@staticmethod
197193
def loadVectors(sc, path):

0 commit comments

Comments
 (0)