Skip to content

Commit c1885c1

Browse files
committed
add headers and minor changes
1 parent b0c50cb commit c1885c1

File tree

2 files changed

+58
-13
lines changed

2 files changed

+58
-13
lines changed

mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,25 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
118
package org.apache.spark.mllib.util
219

320
import scala.collection.mutable.{ArrayBuffer, ListBuffer}
421

5-
object NumericTokenizer {
22+
private[mllib] object NumericTokenizer {
623
val NUMBER = -1
724
val END = -2
825
}
@@ -16,7 +33,7 @@ import NumericTokenizer._
1633
* - array: an array of numbers stored as `[v0,v1,...,vn]`
1734
* - tuple: a list of numbers, arrays, or tuples stored as `(...)`
1835
*
19-
@param s input string
36+
* @param s input string
2037
* @param start start index
2138
* @param end end index
2239
*/
@@ -45,12 +62,6 @@ private[mllib] class NumericTokenizer(s: String, start: Int, end: Int) {
4562
def next(): Int = {
4663
if (cur < end) {
4764
val c = s(cur)
48-
if (c == ',' && allowComma) {
49-
cur += 1
50-
allowComma = false
51-
return next()
52-
}
53-
5465
c match {
5566
case '(' | '[' =>
5667
allowComma = false
@@ -90,6 +101,9 @@ private[mllib] class NumericTokenizer(s: String, start: Int, end: Int) {
90101
}
91102
}
92103

104+
/**
105+
* Simple parser for tokens from [[org.apache.spark.mllib.util.NumericTokenizer]].
106+
*/
93107
private[mllib] object NumericParser {
94108

95109
/** Parses a string into a Double, an Array[Double], or a Seq[Any]. */
@@ -119,7 +133,7 @@ private[mllib] object NumericParser {
119133
values.toArray
120134
}
121135

122-
private def parseTuple(tokenizer: NumericTokenizer): List[_] = {
136+
private def parseTuple(tokenizer: NumericTokenizer): Seq[_] = {
123137
val items = ListBuffer.empty[Any]
124138
var token = tokenizer.next()
125139
while (token != ')' && token != END) {
@@ -134,6 +148,6 @@ private[mllib] object NumericParser {
134148
token = tokenizer.next()
135149
}
136150
require(token == ')')
137-
items.toList
151+
items.toSeq
138152
}
139153
}

mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,30 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
118
package org.apache.spark.mllib.util
219

3-
import org.scalatest.FunSuite
420
import scala.collection.mutable.ListBuffer
521

22+
import org.scalatest.FunSuite
23+
624
class NumericParserSuite extends FunSuite {
725

826
test("tokenizer") {
9-
val s = "((1,2),4,[5,6],8)"
27+
val s = "((1.0,2e3),-4,[5e-6,7.0E8],+9)"
1028
val tokenizer = new NumericTokenizer(s)
1129
var token = tokenizer.next()
1230
val tokens = ListBuffer.empty[Any]
@@ -19,10 +37,23 @@ class NumericParserSuite extends FunSuite {
1937
}
2038
token = tokenizer.next()
2139
}
22-
val expected = Seq('(', '(', 1.0, 2.0, ')', 4.0, '[', 5.0, 6.0, ']', 8.0, ')')
40+
val expected = Seq('(', '(', 1.0, 2e3, ')', -4.0, '[', 5e-6, 7e8, ']', 9.0, ')')
2341
assert(expected === tokens)
2442
}
2543

44+
test("tokenizer on malformatted strings") {
45+
val malformatted = Seq("a", "[1,,]", "0.123.4", "1 2", "3+4")
46+
malformatted.foreach { s =>
47+
intercept[RuntimeException] {
48+
val tokenizer = new NumericTokenizer(s)
49+
while (tokenizer.next() != NumericTokenizer.END) {
50+
// do nothing
51+
}
52+
println(s"Didn't detect malformatted string $s.")
53+
}
54+
}
55+
}
56+
2657
test("parser") {
2758
val s = "((1,2),4,[5,6],8)"
2859
val parsed = NumericParser.parse(s).asInstanceOf[Seq[_]]

0 commit comments

Comments
 (0)