1717
1818package org .apache .spark .examples .mllib
1919
20+ import scopt .OptionParser
21+
2022import org .apache .spark .SparkContext ._
2123import org .apache .spark .mllib .linalg .Vectors
2224import org .apache .spark .mllib .linalg .distributed .{MatrixEntry , RowMatrix }
2325import org .apache .spark .{SparkConf , SparkContext }
24- import scopt .OptionParser
2526
2627/**
2728 * Compute the similar columns of a matrix, using cosine similarity.
@@ -37,8 +38,8 @@ import scopt.OptionParser
3738 *
3839 * Example invocation:
3940 *
40- * bin/run-example org.apache.spark.examples. mllib.CosineSimilarity \
41- * --inputFile data/mllib/sample_svm_data.txt --threshold 0.1
41+ * bin/run-example mllib.CosineSimilarity \
42+ * --threshold 0.1 data/mllib/sample_svm_data.txt
4243 */
4344object CosineSimilarity {
4445 case class Params (inputFile : String = null , threshold : Double = 0.1 )
@@ -48,14 +49,14 @@ object CosineSimilarity {
4849
4950 val parser = new OptionParser [Params ](" CosineSimilarity" ) {
5051 head(" CosineSimilarity: an example app." )
51- opt[String ](" inputFile" )
52- .required()
53- .text(s " input file, one row per line, space-separated " )
54- .action((x, c) => c.copy(inputFile = x))
5552 opt[Double ](" threshold" )
5653 .required()
5754 .text(s " threshold similarity: to tradeoff computation vs quality estimate " )
5855 .action((x, c) => c.copy(threshold = x))
56+ arg[String ](" <inputFile>" )
57+ .required()
58+ .text(s " input file, one row per line, space-separated " )
59+ .action((x, c) => c.copy(inputFile = x))
5960 note(
6061 """
6162 |For example, the following command runs this app on a dataset:
@@ -90,12 +91,9 @@ object CosineSimilarity {
9091 // Compute similar columns with estimation using DIMSUM
9192 val approx = mat.columnSimilarities(params.threshold)
9293
93- val MAE = exact.entries.map { case MatrixEntry (i, j, u) =>
94- ((i, j), u)
95- }.leftOuterJoin(
96- approx.entries.map { case MatrixEntry (i, j, v) =>
97- ((i, j), v)
98- }).values.map {
94+ val exactEntries = exact.entries.map { case MatrixEntry (i, j, u) => ((i, j), u) }
95+ val approxEntries = approx.entries.map { case MatrixEntry (i, j, v) => ((i, j), v) }
96+ val MAE = exactEntries.leftOuterJoin(approxEntries).values.map {
9997 case (u, Some (v)) =>
10098 math.abs(u - v)
10199 case (u, None ) =>
0 commit comments