From 4e5c365e8d29d363750c4ca0718b9f907bb36f0b Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Fri, 21 Aug 2020 13:47:17 -0400 Subject: [PATCH 1/3] un-deprecate inferring schema from list of dict --- python/pyspark/sql/session.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index c86078c1b27a9..7b358c834d102 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -359,18 +359,14 @@ def range(self, start, end=None, step=1, numPartitions=None): def _inferSchemaFromList(self, data, names=None): """ - Infer schema from list of Row or tuple. + Infer schema from list of Row, dict, or tuple. - :param data: list of Row or tuple + :param data: list of Row, dict, or tuple :param names: list of column names :return: :class:`pyspark.sql.types.StructType` """ if not data: raise ValueError("can not infer schema from empty dataset") - first = data[0] - if type(first) is dict: - warnings.warn("inferring schema from dict is deprecated," - "please use pyspark.sql.Row instead") schema = reduce(_merge_type, (_infer_schema(row, names) for row in data)) if _has_nulltype(schema): raise ValueError("Some of types cannot be determined after inferring") From 37ceb56fb93a1993503790c70e09ddcc9c815b87 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Mon, 24 Aug 2020 11:15:57 -0400 Subject: [PATCH 2/3] inferring from RDD of dict is OK, too --- python/pyspark/sql/session.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 7b358c834d102..bba51791ec9a4 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -374,9 +374,9 @@ def _inferSchemaFromList(self, data, names=None): def _inferSchema(self, rdd, samplingRatio=None, names=None): """ - Infer schema from an RDD of Row or tuple. + Infer schema from an RDD of Row, dict, or tuple. - :param rdd: an RDD of Row or tuple + :param rdd: an RDD of Row, dict, or tuple :param samplingRatio: sampling ratio, or no sampling (default) :return: :class:`pyspark.sql.types.StructType` """ @@ -384,9 +384,6 @@ def _inferSchema(self, rdd, samplingRatio=None, names=None): if not first: raise ValueError("The first row in RDD is empty, " "can not infer schema") - if type(first) is dict: - warnings.warn("Using RDD of dict to inferSchema is deprecated. " - "Use pyspark.sql.Row instead") if samplingRatio is None: schema = _infer_schema(first, names=names) From 5136829a12ef4762f70feaf8c22561fed36660d5 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Mon, 24 Aug 2020 13:55:21 -0400 Subject: [PATCH 3/3] trigger CI re-build