From af6066d81f4a837855678cab05efa3c81922c610 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Fri, 7 Jun 2024 10:48:04 +0800 Subject: [PATCH 1/4] fix --- python/pyspark/pandas/plot/core.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/pyspark/pandas/plot/core.py b/python/pyspark/pandas/plot/core.py index 5bd2a67ed39bb..99c52f577fab5 100644 --- a/python/pyspark/pandas/plot/core.py +++ b/python/pyspark/pandas/plot/core.py @@ -23,6 +23,7 @@ from pandas.core.dtypes.inference import is_integer from pyspark.sql import functions as F +from pyspark.sql.utils import is_remote from pyspark.pandas.missing import unsupported_function from pyspark.pandas.config import get_option from pyspark.pandas.utils import name_like_string @@ -948,6 +949,9 @@ def hist(self, bins=10, **kwds): >>> df = ps.from_pandas(df) >>> df.plot.hist(bins=12, alpha=0.5) # doctest: +SKIP """ + if is_remote(): + return unsupported_function(class_name="pd.DataFrame", method_name="hist")() + return self(kind="hist", bins=bins, **kwds) def kde(self, bw_method=None, ind=None, **kwargs): @@ -1023,6 +1027,9 @@ def kde(self, bw_method=None, ind=None, **kwargs): ... }) >>> df.plot.kde(ind=[1, 2, 3, 4, 5, 6], bw_method=0.3) # doctest: +SKIP """ + if is_remote(): + return unsupported_function(class_name="pd.DataFrame", method_name="kde")() + return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs) density = kde From f6de6b95a27f7e89f281edf794325154b9ebb965 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Fri, 7 Jun 2024 11:10:34 +0800 Subject: [PATCH 2/4] fix --- python/pyspark/pandas/plot/core.py | 6 ++++- .../plot/test_parity_frame_plot_matplotlib.py | 11 +++++--- .../plot/test_parity_frame_plot_plotly.py | 16 +++++++----- .../test_parity_series_plot_matplotlib.py | 26 +++++++++++++------ .../plot/test_parity_series_plot_plotly.py | 11 +++++--- 5 files changed, 47 insertions(+), 23 deletions(-) diff --git a/python/pyspark/pandas/plot/core.py b/python/pyspark/pandas/plot/core.py index 99c52f577fab5..819ac02a51266 100644 --- a/python/pyspark/pandas/plot/core.py +++ b/python/pyspark/pandas/plot/core.py @@ -572,10 +572,14 @@ def _get_plot_backend(backend=None): return module def __call__(self, kind="line", backend=None, **kwargs): + kind = {"density": "kde"}.get(kind, kind) + + if is_remote() and kind in ["hist", "kde"]: + return unsupported_function(class_name="pd.DataFrame", method_name=kind)() + plot_backend = PandasOnSparkPlotAccessor._get_plot_backend(backend) plot_data = self.data - kind = {"density": "kde"}.get(kind, kind) if hasattr(plot_backend, "plot_pandas_on_spark"): # use if there's pandas-on-Spark specific method. return plot_backend.plot_pandas_on_spark(plot_data, kind=kind, **kwargs) diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py index 9fec1c57c02d5..f75283ad68ad2 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py @@ -18,19 +18,22 @@ from pyspark.pandas.tests.plot.test_frame_plot_matplotlib import DataFramePlotMatplotlibTestsMixin from pyspark.testing.connectutils import ReusedConnectTestCase +from pyspark.pandas.exceptions import PandasNotImplementedError from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class DataFramePlotMatplotlibParityTests( DataFramePlotMatplotlibTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): - @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_hist_plot(self): - super().test_hist_plot() + # "Test depends on Spark ML which is not supported from Spark Connect." + with self.assertRaises(PandasNotImplementedError) as pe: + super().test_hist_plot() - @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_kde_plot(self): - super().test_kde_plot() + # "Test depends on Spark ML which is not supported from Spark Connect." + with self.assertRaises(PandasNotImplementedError) as pe: + super().test_kde_plot() if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py index 452962d813521..81d7222fda357 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py @@ -18,23 +18,27 @@ from pyspark.pandas.tests.plot.test_frame_plot_plotly import DataFramePlotPlotlyTestsMixin from pyspark.testing.connectutils import ReusedConnectTestCase +from pyspark.pandas.exceptions import PandasNotImplementedError from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class DataFramePlotPlotlyParityTests( DataFramePlotPlotlyTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): - @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_hist_layout_kwargs(self): - super().test_hist_layout_kwargs() + # "Test depends on Spark ML which is not supported from Spark Connect." + with self.assertRaises(PandasNotImplementedError) as pe: + super().test_hist_layout_kwargs() - @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_hist_plot(self): - super().test_hist_plot() + # "Test depends on Spark ML which is not supported from Spark Connect." + with self.assertRaises(PandasNotImplementedError) as pe: + super().test_hist_plot() - @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_kde_plot(self): - super().test_kde_plot() + # "Test depends on Spark ML which is not supported from Spark Connect." + with self.assertRaises(PandasNotImplementedError) as pe: + super().test_kde_plot() if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py index f093f48b16e9c..bea67e3c51b44 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py @@ -18,27 +18,37 @@ from pyspark.pandas.tests.plot.test_series_plot_matplotlib import SeriesPlotMatplotlibTestsMixin from pyspark.testing.connectutils import ReusedConnectTestCase +from pyspark.pandas.exceptions import PandasNotImplementedError from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class SeriesPlotMatplotlibParityTests( SeriesPlotMatplotlibTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): - @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") + def test_empty_hist(self): + # "Test depends on Spark ML which is not supported from Spark Connect." + with self.assertRaises(PandasNotImplementedError) as pe: + super().test_empty_hist() + def test_hist(self): - super().test_hist() + # "Test depends on Spark ML which is not supported from Spark Connect." + with self.assertRaises(PandasNotImplementedError) as pe: + super().test_hist() - @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_hist_plot(self): - super().test_hist_plot() + # "Test depends on Spark ML which is not supported from Spark Connect." + with self.assertRaises(PandasNotImplementedError) as pe: + super().test_hist_plot() - @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_kde_plot(self): - super().test_kde_plot() + # "Test depends on Spark ML which is not supported from Spark Connect." + with self.assertRaises(PandasNotImplementedError) as pe: + super().test_kde_plot() - @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_single_value_hist(self): - super().test_single_value_hist() + # "Test depends on Spark ML which is not supported from Spark Connect." + with self.assertRaises(PandasNotImplementedError) as pe: + super().test_single_value_hist() if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py index 795732950b8a0..a513f50f282e9 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py @@ -18,19 +18,22 @@ from pyspark.pandas.tests.plot.test_series_plot_plotly import SeriesPlotPlotlyTestsMixin from pyspark.testing.connectutils import ReusedConnectTestCase +from pyspark.pandas.exceptions import PandasNotImplementedError from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class SeriesPlotPlotlyParityTests( SeriesPlotPlotlyTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): - @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_hist_plot(self): - super().test_hist_plot() + # "Test depends on Spark ML which is not supported from Spark Connect." + with self.assertRaises(PandasNotImplementedError) as pe: + super().test_hist_plot() - @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_kde_plot(self): - super().test_kde_plot() + # "Test depends on Spark ML which is not supported from Spark Connect." + with self.assertRaises(PandasNotImplementedError) as pe: + super().test_kde_plot() if __name__ == "__main__": From 898826882ea36c3ef4ec382c92080404fdd6b2f4 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Fri, 7 Jun 2024 12:43:57 +0800 Subject: [PATCH 3/4] fix lint --- .../connect/plot/test_parity_frame_plot_matplotlib.py | 4 ++-- .../connect/plot/test_parity_frame_plot_plotly.py | 6 +++--- .../connect/plot/test_parity_series_plot_matplotlib.py | 10 +++++----- .../connect/plot/test_parity_series_plot_plotly.py | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py index f75283ad68ad2..eb6e18138cbf3 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py @@ -27,12 +27,12 @@ class DataFramePlotMatplotlibParityTests( ): def test_hist_plot(self): # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError) as pe: + with self.assertRaises(PandasNotImplementedError): super().test_hist_plot() def test_kde_plot(self): # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError) as pe: + with self.assertRaises(PandasNotImplementedError): super().test_kde_plot() diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py index 81d7222fda357..a892cf58023d5 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py @@ -27,17 +27,17 @@ class DataFramePlotPlotlyParityTests( ): def test_hist_layout_kwargs(self): # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError) as pe: + with self.assertRaises(PandasNotImplementedError): super().test_hist_layout_kwargs() def test_hist_plot(self): # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError) as pe: + with self.assertRaises(PandasNotImplementedError): super().test_hist_plot() def test_kde_plot(self): # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError) as pe: + with self.assertRaises(PandasNotImplementedError): super().test_kde_plot() diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py index bea67e3c51b44..e8bb92e0b0e15 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py @@ -27,27 +27,27 @@ class SeriesPlotMatplotlibParityTests( ): def test_empty_hist(self): # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError) as pe: + with self.assertRaises(PandasNotImplementedError): super().test_empty_hist() def test_hist(self): # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError) as pe: + with self.assertRaises(PandasNotImplementedError): super().test_hist() def test_hist_plot(self): # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError) as pe: + with self.assertRaises(PandasNotImplementedError): super().test_hist_plot() def test_kde_plot(self): # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError) as pe: + with self.assertRaises(PandasNotImplementedError): super().test_kde_plot() def test_single_value_hist(self): # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError) as pe: + with self.assertRaises(PandasNotImplementedError): super().test_single_value_hist() diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py index a513f50f282e9..4c2566eb13aa7 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py @@ -27,12 +27,12 @@ class SeriesPlotPlotlyParityTests( ): def test_hist_plot(self): # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError) as pe: + with self.assertRaises(PandasNotImplementedError): super().test_hist_plot() def test_kde_plot(self): # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError) as pe: + with self.assertRaises(PandasNotImplementedError): super().test_kde_plot() From 13da681377eca812c82b3decdf4b3475a934f40e Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Fri, 7 Jun 2024 13:42:08 +0800 Subject: [PATCH 4/4] fix lint --- dev/sparktestsupport/modules.py | 2 + .../plot/test_parity_frame_plot_matplotlib.py | 11 +- .../plot/test_parity_frame_plot_plotly.py | 16 +-- .../test_parity_series_plot_matplotlib.py | 26 ++-- .../plot/test_parity_series_plot_plotly.py | 11 +- .../tests/connect/test_connect_plotting.py | 124 ++++++++++++++++++ 6 files changed, 150 insertions(+), 40 deletions(-) create mode 100644 python/pyspark/pandas/tests/connect/test_connect_plotting.py diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index e182d0c33f16c..b97ec34b53824 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -1102,6 +1102,8 @@ def __hash__(self): "python/pyspark/pandas", ], python_test_goals=[ + # unittests dedicated for Spark Connect + "pyspark.pandas.tests.connect.test_connect_plotting", # pandas-on-Spark unittests "pyspark.pandas.tests.connect.test_parity_categorical", "pyspark.pandas.tests.connect.test_parity_config", diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py index eb6e18138cbf3..9fec1c57c02d5 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py @@ -18,22 +18,19 @@ from pyspark.pandas.tests.plot.test_frame_plot_matplotlib import DataFramePlotMatplotlibTestsMixin from pyspark.testing.connectutils import ReusedConnectTestCase -from pyspark.pandas.exceptions import PandasNotImplementedError from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class DataFramePlotMatplotlibParityTests( DataFramePlotMatplotlibTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): + @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_hist_plot(self): - # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError): - super().test_hist_plot() + super().test_hist_plot() + @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_kde_plot(self): - # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError): - super().test_kde_plot() + super().test_kde_plot() if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py index a892cf58023d5..452962d813521 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py @@ -18,27 +18,23 @@ from pyspark.pandas.tests.plot.test_frame_plot_plotly import DataFramePlotPlotlyTestsMixin from pyspark.testing.connectutils import ReusedConnectTestCase -from pyspark.pandas.exceptions import PandasNotImplementedError from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class DataFramePlotPlotlyParityTests( DataFramePlotPlotlyTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): + @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_hist_layout_kwargs(self): - # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError): - super().test_hist_layout_kwargs() + super().test_hist_layout_kwargs() + @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_hist_plot(self): - # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError): - super().test_hist_plot() + super().test_hist_plot() + @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_kde_plot(self): - # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError): - super().test_kde_plot() + super().test_kde_plot() if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py index e8bb92e0b0e15..abb18d473bf8d 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py @@ -18,37 +18,31 @@ from pyspark.pandas.tests.plot.test_series_plot_matplotlib import SeriesPlotMatplotlibTestsMixin from pyspark.testing.connectutils import ReusedConnectTestCase -from pyspark.pandas.exceptions import PandasNotImplementedError from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class SeriesPlotMatplotlibParityTests( SeriesPlotMatplotlibTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): + @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_empty_hist(self): - # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError): - super().test_empty_hist() + super().test_empty_hist() + @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_hist(self): - # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError): - super().test_hist() + super().test_hist() + @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_hist_plot(self): - # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError): - super().test_hist_plot() + super().test_hist_plot() + @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_kde_plot(self): - # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError): - super().test_kde_plot() + super().test_kde_plot() + @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_single_value_hist(self): - # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError): - super().test_single_value_hist() + super().test_single_value_hist() if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py index 4c2566eb13aa7..795732950b8a0 100644 --- a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +++ b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py @@ -18,22 +18,19 @@ from pyspark.pandas.tests.plot.test_series_plot_plotly import SeriesPlotPlotlyTestsMixin from pyspark.testing.connectutils import ReusedConnectTestCase -from pyspark.pandas.exceptions import PandasNotImplementedError from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils class SeriesPlotPlotlyParityTests( SeriesPlotPlotlyTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase ): + @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_hist_plot(self): - # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError): - super().test_hist_plot() + super().test_hist_plot() + @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.") def test_kde_plot(self): - # "Test depends on Spark ML which is not supported from Spark Connect." - with self.assertRaises(PandasNotImplementedError): - super().test_kde_plot() + super().test_kde_plot() if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/test_connect_plotting.py b/python/pyspark/pandas/tests/connect/test_connect_plotting.py new file mode 100644 index 0000000000000..9b7cfebfcd552 --- /dev/null +++ b/python/pyspark/pandas/tests/connect/test_connect_plotting.py @@ -0,0 +1,124 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest + +import pandas as pd + +from pyspark import pandas as ps +from pyspark.pandas.exceptions import PandasNotImplementedError +from pyspark.testing.connectutils import ReusedConnectTestCase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils + + +class ConnectPlottingTests(PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase): + @property + def pdf1(self): + return pd.DataFrame( + [[1, 2], [4, 5], [7, 8]], + index=["cobra", "viper", None], + columns=["max_speed", "shield"], + ) + + @property + def psdf1(self): + return ps.from_pandas(self.pdf1) + + def test_unsupported_functions(self): + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot.hist() + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot.hist(bins=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot.kde() + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot.kde(bw_method=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot.density() + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot.density(bw_method=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot.hist() + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot.hist(bins=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot.kde() + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot.kde(bw_method=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot.density() + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot.density(bw_method=3) + + def test_unsupported_kinds(self): + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot(kind="hist") + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot(kind="hist", bins=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot(kind="kde") + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot(kind="kde", bw_method=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot(kind="density") + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.plot(kind="density", bw_method=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot(kind="hist") + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot(kind="hist", bins=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot(kind="kde") + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot(kind="kde", bw_method=3) + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot(kind="density") + + with self.assertRaises(PandasNotImplementedError): + self.psdf1.shield.plot(kind="density", bw_method=3) + + +if __name__ == "__main__": + from pyspark.pandas.tests.connect.test_connect_plotting import * # noqa: F401 + + try: + import xmlrunner # type: ignore[import] + + testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=2)