diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 09647ff6d074..da742d7ce7d1 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -722,8 +722,6 @@ def __getitem__(self, item): [Row(age=5, name=u'Bob')] """ if isinstance(item, basestring): - if item not in self.columns: - raise IndexError("no such column: %s" % item) jc = self._jdf.apply(item) return Column(jc) elif isinstance(item, Column): diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 38c83c427a74..6a09b8e29748 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -770,7 +770,7 @@ def test_access_column(self): self.assertTrue(isinstance(df['key'], Column)) self.assertTrue(isinstance(df[0], Column)) self.assertRaises(IndexError, lambda: df[2]) - self.assertRaises(IndexError, lambda: df["bad_key"]) + self.assertRaises(AnalysisException, lambda: df["bad_key"]) self.assertRaises(TypeError, lambda: df[{}]) def test_column_name_with_non_ascii(self): @@ -794,7 +794,9 @@ def test_field_accessor(self): df = self.sc.parallelize([Row(l=[1], r=Row(a=1, b="b"), d={"k": "v"})]).toDF() self.assertEqual(1, df.select(df.l[0]).first()[0]) self.assertEqual(1, df.select(df.r["a"]).first()[0]) + self.assertEqual(1, df.select(df["r.a"]).first()[0]) self.assertEqual("b", df.select(df.r["b"]).first()[0]) + self.assertEqual("b", df.select(df["r.b"]).first()[0]) self.assertEqual("v", df.select(df.d["k"]).first()[0]) def test_infer_long_type(self): diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index c466d9e6cb34..ec5084af1460 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -634,6 +634,7 @@ class DataFrame private[sql]( /** * Selects column based on the column name and return it as a [[Column]]. + * Note that the column name can also reference to a nested column like `a.b`. * @group dfops * @since 1.3.0 */ @@ -641,6 +642,7 @@ class DataFrame private[sql]( /** * Selects column based on the column name and return it as a [[Column]]. + * Note that the column name can also reference to a nested column like `a.b`. * @group dfops * @since 1.3.0 */