Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions python/pyspark/sql/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,8 +722,6 @@ def __getitem__(self, item):
[Row(age=5, name=u'Bob')]
"""
if isinstance(item, basestring):
if item not in self.columns:
raise IndexError("no such column: %s" % item)
jc = self._jdf.apply(item)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we still have this check if there is not '.' in item?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or maybe, instead of doing any checks here, is it possible to catch specific errors here thrown by the analyzer and just reformat them nicely?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(i assume that was the whole point of doing a check here?)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already capture the AnalysisException. If it's already good enough, we can just remove this check.

return Column(jc)
elif isinstance(item, Column):
Expand Down
4 changes: 3 additions & 1 deletion python/pyspark/sql/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,7 @@ def test_access_column(self):
self.assertTrue(isinstance(df['key'], Column))
self.assertTrue(isinstance(df[0], Column))
self.assertRaises(IndexError, lambda: df[2])
self.assertRaises(IndexError, lambda: df["bad_key"])
self.assertRaises(AnalysisException, lambda: df["bad_key"])
self.assertRaises(TypeError, lambda: df[{}])

def test_column_name_with_non_ascii(self):
Expand All @@ -794,7 +794,9 @@ def test_field_accessor(self):
df = self.sc.parallelize([Row(l=[1], r=Row(a=1, b="b"), d={"k": "v"})]).toDF()
self.assertEqual(1, df.select(df.l[0]).first()[0])
self.assertEqual(1, df.select(df.r["a"]).first()[0])
self.assertEqual(1, df.select(df["r.a"]).first()[0])
self.assertEqual("b", df.select(df.r["b"]).first()[0])
self.assertEqual("b", df.select(df["r.b"]).first()[0])
self.assertEqual("v", df.select(df.d["k"]).first()[0])

def test_infer_long_type(self):
Expand Down
2 changes: 2 additions & 0 deletions sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
Original file line number Diff line number Diff line change
Expand Up @@ -634,13 +634,15 @@ class DataFrame private[sql](

/**
* Selects column based on the column name and return it as a [[Column]].
* Note that the column name can also reference to a nested column like `a.b`.
* @group dfops
* @since 1.3.0
*/
def apply(colName: String): Column = col(colName)

/**
* Selects column based on the column name and return it as a [[Column]].
* Note that the column name can also reference to a nested column like `a.b`.
* @group dfops
* @since 1.3.0
*/
Expand Down