Skip to content

Commit 2c46076

Browse files
committed
safe unicode conversions
1 parent f8294e8 commit 2c46076

File tree

1 file changed

+6
-5
lines changed

1 file changed

+6
-5
lines changed

python/pyspark/ml/param/__init__.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,10 @@ def _can_convert_to_list(value):
104104
return vtype in [list, np.ndarray, tuple, xrange, array.array] or isinstance(value, Vector)
105105

106106
@staticmethod
107-
def _is_string(value):
107+
def _can_convert_to_string(value):
108108
vtype = type(value)
109-
return isinstance(value, basestring) or vtype in [np.unicode_, np.string_, np.str_]
109+
is_string = isinstance(value, basestring) or vtype in [np.unicode_, np.string_, np.str_]
110+
return is_string and all(ord(c) < 128 for c in value) # safe unicode to str
110111

111112
@staticmethod
112113
def identity(value):
@@ -158,7 +159,7 @@ def toListString(value):
158159
"""
159160
if TypeConverters._can_convert_to_list(value):
160161
value = TypeConverters.toList(value)
161-
if all(map(lambda v: TypeConverters._is_string(v), value)):
162+
if all(map(lambda v: TypeConverters._can_convert_to_string(v), value)):
162163
return [str(v) for v in value]
163164
raise TypeError("Could not convert %s to list of strings" % value)
164165

@@ -200,10 +201,10 @@ def toString(value):
200201
"""
201202
Convert a value to a string, if possible.
202203
"""
203-
if TypeConverters._is_string(value):
204+
if TypeConverters._can_convert_to_string(value):
204205
return str(value)
205206
else:
206-
raise TypeError("Could not convert %s to string" % value)
207+
raise TypeError("Could not convert value of type %s to string" % type(value).__name__)
207208

208209
@staticmethod
209210
def toBoolean(value):

0 commit comments

Comments
 (0)