From 6604e9fdaa710cd894b4799390144e404667402e Mon Sep 17 00:00:00 2001 From: Shashwat Anand Date: Sun, 4 Feb 2018 15:57:31 +0530 Subject: [PATCH 1/2] Fix __repr__ behaviour for Rows. Rows __repr__ assumes data is strings when column name is missing. Examples, >>> Row ("Alice", "11") >>> Row (name="Alice", age=11) Row(age=11, name='Alice') >>> Row ("Alice", 11) TypeError: sequence item 1: expected string, int found This is because Row () when called without column names assumes everything is string. --- python/pyspark/sql/tests.py | 4 ++++ python/pyspark/sql/types.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index b27363023ae7..37a691b34f7d 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -234,6 +234,10 @@ def test_empty_row(self): row = Row() self.assertEqual(len(row), 0) + def test_row_without_column_name(self): + row = Row("Alice", 11) + self.assertEqual(row.__repr__(), "") + def test_struct_field_type_name(self): struct_field = StructField("a", IntegerType()) self.assertRaises(TypeError, struct_field.typeName) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 0dc5823f72a3..87ff0bd03b24 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -1581,7 +1581,7 @@ def __repr__(self): return "Row(%s)" % ", ".join("%s=%r" % (k, v) for k, v in zip(self.__fields__, tuple(self))) else: - return "" % ", ".join(self) + return "" % ", ".join(str(field) for field in self) class DateConverter(object): From 890aa6514196b3c672c4581120506632dd49b4a6 Mon Sep 17 00:00:00 2001 From: Shashwat Anand Date: Mon, 5 Feb 2018 23:12:14 +0530 Subject: [PATCH 2/2] Unicode fix. --- python/pyspark/sql/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 87ff0bd03b24..6c6b26690312 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -1581,7 +1581,7 @@ def __repr__(self): return "Row(%s)" % ", ".join("%s=%r" % (k, v) for k, v in zip(self.__fields__, tuple(self))) else: - return "" % ", ".join(str(field) for field in self) + return "" % ", ".join("%s" % (fields) for fields in self) class DateConverter(object):