Skip to content

Commit a353354

Browse files
committed
Remove unnecessary appendBias implementation
1 parent 44295c2 commit a353354

File tree

3 files changed

+25
-12
lines changed

3 files changed

+25
-12
lines changed

mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,15 @@ private[python] class PythonMLLibAPI extends Serializable {
7171
minPartitions: Int): JavaRDD[LabeledPoint] =
7272
MLUtils.loadLabeledPoints(jsc.sc, path, minPartitions)
7373

74-
def appendBias(data: org.apache.spark.mllib.linalg.Vector)
75-
: org.apache.spark.mllib.linalg.Vector
76-
= MLUtils.appendBias(data)
77-
78-
def loadVectors(jsc: JavaSparkContext, path: String)
79-
: RDD[org.apache.spark.mllib.linalg.Vector]
80-
= MLUtils.loadVectors(jsc.sc, path)
74+
/**
75+
* Loads and serializes vectors saved with `RDD#saveAsTextFile`.
76+
* @param jsc Java SparkContext
77+
* @param path file or directory path in any Hadoop-supported file system URI
78+
* @return serialized vectors in a RDD
79+
*/
80+
def loadVectors(jsc: JavaSparkContext,
81+
path: String): RDD[Vector] =
82+
MLUtils.loadVectors(jsc.sc, path)
8183

8284
private def trainRegressionModel(
8385
learner: GeneralizedLinearAlgorithm[_ <: GeneralizedLinearModel],

python/pyspark/mllib/tests.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -821,7 +821,7 @@ def test_model_transform(self):
821821

822822
class MLUtilsTests(MLlibTestCase):
823823
def test_append_bias(self):
824-
data = [1.0, 2.0, 3.0]
824+
data = [2.0, 2.0, 2.0]
825825
ret = MLUtils.appendBias(data)
826826
self.assertEqual(ret[3], 1.0)
827827

@@ -832,14 +832,17 @@ def test_load_vectors(self):
832832
[1.0, 2.0, 3.0]
833833
]
834834
try:
835-
self.sc.parallelize(data).saveAsTextFile("test_load_vectors")
836-
ret_rdd = MLUtils.loadVectors(self.sc, "test_load_vectors")
835+
temp_dir = tempfile.mkdtemp()
836+
load_vectors_path = os.path.join(temp_dir, "test_load_vectors")
837+
self.sc.parallelize(data).saveAsTextFile(load_vectors_path)
838+
ret_rdd = MLUtils.loadVectors(self.sc, load_vectors_path)
837839
ret = ret_rdd.collect()
840+
ret.sort()
838841
self.assertEqual(len(ret), 2)
839842
self.assertEqual(ret[0], DenseVector([1.0, 2.0, 3.0]))
840843
self.assertEqual(ret[1], DenseVector([1.0, 2.0, 3.0]))
841844
finally:
842-
shutil.rmtree("test_load_vectors")
845+
shutil.rmtree(load_vectors_path)
843846

844847

845848
if __name__ == "__main__":

python/pyspark/mllib/util.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,18 @@ def loadLabeledPoints(sc, path, minPartitions=None):
171171

172172
@staticmethod
173173
def appendBias(data):
174-
return callMLlibFunc("appendBias", _convert_to_vector(data))
174+
"""
175+
Returns a new vector with `1.0` (bias) appended to the input vector.
176+
"""
177+
vec = _convert_to_vector(data)
178+
return np.append(vec, 1.0)
175179

176180
@staticmethod
177181
def loadVectors(sc, path):
182+
"""
183+
Loads vectors saved using `RDD[Vector].saveAsTextFile`
184+
with the default number of partitions.
185+
"""
178186
return callMLlibFunc("loadVectors", sc, path)
179187

180188

0 commit comments

Comments
 (0)