From 9e3ebf28e925894c6b349f731d11a35d49c9fe02 Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 26 Jan 2021 22:54:05 +0100 Subject: [PATCH 1/6] print the repr of a multiindex using only a subset of the coordinate values --- xarray/core/formatting.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 282620e3569..2ed4f0c01d7 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -300,11 +300,14 @@ def _summarize_coord_multiindex(coord, col_width, marker): def _summarize_coord_levels(coord, col_width, marker="-"): + n_values = col_width // 4 + indices = list(range(0, n_values)) + list(range(-n_values, 0)) + subset = coord[indices] return "\n".join( summarize_variable( - lname, coord.get_level_variable(lname), col_width, marker=marker + lname, subset.get_level_variable(lname), col_width, marker=marker ) - for lname in coord.level_names + for lname in subset.level_names ) From 1138487f2cdf41da48c9a5dc8ba64ec6f5e604b8 Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 26 Jan 2021 23:41:32 +0100 Subject: [PATCH 2/6] don't index if we have less items than available width --- xarray/core/formatting.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 2ed4f0c01d7..39508ac79f6 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -300,9 +300,13 @@ def _summarize_coord_multiindex(coord, col_width, marker): def _summarize_coord_levels(coord, col_width, marker="-"): - n_values = col_width // 4 - indices = list(range(0, n_values)) + list(range(-n_values, 0)) - subset = coord[indices] + if col_width < len(coord): + n_values = col_width // 4 + indices = list(range(0, n_values)) + list(range(-n_values, 0)) + subset = coord[indices] + else: + subset = coord + return "\n".join( summarize_variable( lname, subset.get_level_variable(lname), col_width, marker=marker From 5e719b1b004eceaa4a730896b14a028b1385fd16 Mon Sep 17 00:00:00 2001 From: Keewis Date: Wed, 27 Jan 2021 00:13:08 +0100 Subject: [PATCH 3/6] don't try to shorten arrays which are way too short --- xarray/core/formatting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 39508ac79f6..713cff3c112 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -300,7 +300,7 @@ def _summarize_coord_multiindex(coord, col_width, marker): def _summarize_coord_levels(coord, col_width, marker="-"): - if col_width < len(coord): + if len(coord) > 100 and col_width < len(coord): n_values = col_width // 4 indices = list(range(0, n_values)) + list(range(-n_values, 0)) subset = coord[indices] From f061dc80e9cc1cf87a09f64d0e0f414fe374af96 Mon Sep 17 00:00:00 2001 From: Keewis Date: Wed, 27 Jan 2021 00:13:37 +0100 Subject: [PATCH 4/6] col_width seems to be the maximum number of elements, not characters --- xarray/core/formatting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 713cff3c112..0c1be1cc175 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -301,7 +301,7 @@ def _summarize_coord_multiindex(coord, col_width, marker): def _summarize_coord_levels(coord, col_width, marker="-"): if len(coord) > 100 and col_width < len(coord): - n_values = col_width // 4 + n_values = col_width indices = list(range(0, n_values)) + list(range(-n_values, 0)) subset = coord[indices] else: From e04152f9ae129bd98c09338246ffd5f8df7e0d6e Mon Sep 17 00:00:00 2001 From: Keewis Date: Wed, 27 Jan 2021 00:19:42 +0100 Subject: [PATCH 5/6] add a asv benchmark --- asv_bench/benchmarks/repr.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 asv_bench/benchmarks/repr.py diff --git a/asv_bench/benchmarks/repr.py b/asv_bench/benchmarks/repr.py new file mode 100644 index 00000000000..795f5bd77b6 --- /dev/null +++ b/asv_bench/benchmarks/repr.py @@ -0,0 +1,18 @@ +import pandas as pd + +import xarray as xr + + +class ReprMultiIndex: + def setup(self, key): + index = pd.MultiIndex.from_product( + [range(10000), range(10000)], names=("level_0", "level_1") + ) + series = pd.Series(range(100000000), index=index) + self.da = xr.DataArray(series) + + def repr(self): + repr(self.da) + + def repr_html(self): + self.da._repr_html_() From 8f20f1feca36513f166d611a0b068a73cc98d0cc Mon Sep 17 00:00:00 2001 From: keewis Date: Fri, 29 Jan 2021 01:16:10 +0100 Subject: [PATCH 6/6] Apply suggestions from code review Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- asv_bench/benchmarks/repr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/repr.py b/asv_bench/benchmarks/repr.py index 795f5bd77b6..b218c0be870 100644 --- a/asv_bench/benchmarks/repr.py +++ b/asv_bench/benchmarks/repr.py @@ -11,8 +11,8 @@ def setup(self, key): series = pd.Series(range(100000000), index=index) self.da = xr.DataArray(series) - def repr(self): + def time_repr(self): repr(self.da) - def repr_html(self): + def time_repr_html(self): self.da._repr_html_()