2828 Series ,
2929 Timedelta ,
3030 TimedeltaIndex ,
31- Timestamp ,
3231)
3332import pandas ._testing as tm
3433
@@ -39,6 +38,23 @@ def allow_na_ops(obj: Any) -> bool:
3938 return not is_bool_index and obj ._can_hold_na
4039
4140
41+ def multiply_values (obj ):
42+ """
43+ Repeat values so that the previous values are ordered (increasing)
44+ by number of occurrences
45+ """
46+ klass = type (obj )
47+
48+ if isinstance (obj , pd .Index ):
49+ return obj .repeat (range (1 , len (obj ) + 1 ))
50+ elif isinstance (obj , pd .Series ):
51+ indices = np .repeat (np .arange (len (obj )), range (1 , len (obj ) + 1 ))
52+ rep = obj .values .take (indices )
53+ idx = obj .index .repeat (range (1 , len (obj ) + 1 ))
54+ return klass (rep , index = idx )
55+ raise TypeError (f"Unexpected type: { klass } " )
56+
57+
4258class Ops :
4359 def setup_method (self , method ):
4460 self .bool_index = tm .makeBoolIndex (10 , name = "a" )
@@ -205,7 +221,31 @@ def test_ndarray_compat_properties(self, index_or_series_obj):
205221 assert Index ([1 ]).item () == 1
206222 assert Series ([1 ]).item () == 1
207223
208- def test_value_counts_unique_nunique (self , index_or_series_obj ):
224+ def test_unique (self , index_or_series_obj ):
225+ obj = multiply_values (index_or_series_obj )
226+ result = obj .unique ()
227+
228+ # dict.fromkeys preserves the order
229+ unique_values = list (dict .fromkeys (obj .values ))
230+ if isinstance (obj , pd .MultiIndex ):
231+ expected = pd .MultiIndex .from_tuples (unique_values )
232+ expected .names = obj .names
233+ tm .assert_index_equal (result , expected )
234+ elif isinstance (obj , pd .Index ):
235+ expected = pd .Index (unique_values , dtype = obj .dtype )
236+ if is_datetime64tz_dtype (obj ):
237+ expected = expected .normalize ()
238+ tm .assert_index_equal (result , expected )
239+ else :
240+ expected = np .array (unique_values )
241+ tm .assert_numpy_array_equal (result , expected )
242+
243+ def test_nunique (self , index_or_series_obj ):
244+ obj = multiply_values (index_or_series_obj )
245+ result = obj .nunique (dropna = False )
246+ assert result == len (obj .unique ())
247+
248+ def test_value_counts (self , index_or_series_obj ):
209249 orig = index_or_series_obj
210250 obj = orig .copy ()
211251 klass = type (obj )
@@ -242,27 +282,6 @@ def test_value_counts_unique_nunique(self, index_or_series_obj):
242282 tm .assert_series_equal (result , expected_s )
243283 assert result .index .name is None
244284
245- result = obj .unique ()
246- if isinstance (obj , Index ):
247- assert isinstance (result , type (obj ))
248- tm .assert_index_equal (result , orig )
249- assert result .dtype == orig .dtype
250- elif is_datetime64tz_dtype (obj ):
251- # datetimetz Series returns array of Timestamp
252- assert result [0 ] == orig [0 ]
253- for r in result :
254- assert isinstance (r , Timestamp )
255-
256- tm .assert_numpy_array_equal (
257- result .astype (object ), orig ._values .astype (object )
258- )
259- else :
260- tm .assert_numpy_array_equal (result , orig .values )
261- assert result .dtype == orig .dtype
262-
263- # dropna=True would break for MultiIndex
264- assert obj .nunique (dropna = False ) == len (np .unique (obj .values ))
265-
266285 @pytest .mark .parametrize ("null_obj" , [np .nan , None ])
267286 def test_value_counts_unique_nunique_null (self , null_obj , index_or_series_obj ):
268287 orig = index_or_series_obj
0 commit comments