@@ -797,6 +797,149 @@ def test_set_categories_inplace(self):
797797 cat .set_categories (['a' , 'b' , 'c' , 'd' ], inplace = True )
798798 tm .assert_index_equal (cat .categories , pd .Index (['a' , 'b' , 'c' , 'd' ]))
799799
800+ @pytest .mark .parametrize (
801+ "input1, input2, cat_array" ,
802+ [
803+ (
804+ np .array ([1 , 2 , 3 , 3 ], dtype = np .dtype ('int_' )),
805+ np .array ([1 , 2 , 3 , 5 , 3 , 2 , 4 ], dtype = np .dtype ('int_' )),
806+ np .array ([1 , 2 , 3 , 4 , 5 ], dtype = np .dtype ('int_' ))
807+ ),
808+ (
809+ np .array ([1 , 2 , 3 , 3 ], dtype = np .dtype ('uint' )),
810+ np .array ([1 , 2 , 3 , 5 , 3 , 2 , 4 ], dtype = np .dtype ('uint' )),
811+ np .array ([1 , 2 , 3 , 4 , 5 ], dtype = np .dtype ('uint' ))
812+ ),
813+ (
814+ np .array ([1 , 2 , 3 , 3 ], dtype = np .dtype ('float_' )),
815+ np .array ([1 , 2 , 3 , 5 , 3 , 2 , 4 ], dtype = np .dtype ('float_' )),
816+ np .array ([1 , 2 , 3 , 4 , 5 ], dtype = np .dtype ('float_' ))
817+ ),
818+ (
819+ np .array (
820+ [1 , 2 , 3 , 3 ], dtype = np .dtype ('unicode_' )
821+ ),
822+ np .array (
823+ [1 , 2 , 3 , 5 , 3 , 2 , 4 ], dtype = np .dtype ('unicode_' )
824+ ),
825+ np .array (
826+ [1 , 2 , 3 , 4 , 5 ], dtype = np .dtype ('unicode_' )
827+ )
828+ ),
829+ (
830+ np .array (
831+ [
832+ '2017-01-01 10:00:00' , '2017-02-01 10:00:00' ,
833+ '2017-03-01 10:00:00' , '2017-03-01 10:00:00'
834+ ],
835+ dtype = 'datetime64'
836+ ),
837+ np .array (
838+ [
839+ '2017-01-01 10:00:00' , '2017-02-01 10:00:00' ,
840+ '2017-03-01 10:00:00' , '2017-05-01 10:00:00' ,
841+ '2017-03-01 10:00:00' , '2017-02-01 10:00:00' ,
842+ '2017-04-01 10:00:00'
843+ ],
844+ dtype = 'datetime64'
845+ ),
846+ np .array (
847+ [
848+ '2017-01-01 10:00:00' , '2017-02-01 10:00:00' ,
849+ '2017-03-01 10:00:00' , '2017-04-01 10:00:00' ,
850+ '2017-05-01 10:00:00'
851+ ],
852+ dtype = 'datetime64'
853+ )
854+ ),
855+ (
856+ pd .to_timedelta (['1 days' , '2 days' , '3 days' , '3 days' ],
857+ unit = "D" ),
858+ pd .to_timedelta (['1 days' , '2 days' , '3 days' , '5 days' ,
859+ '3 days' , '2 days' , '4 days' ], unit = "D" ),
860+ pd .timedelta_range ("1 days" , periods = 5 , freq = "D" )
861+ )
862+ ]
863+ )
864+ @pytest .mark .parametrize ("is_ordered" , [True , False ])
865+ def test_drop_duplicates_non_bool (self , input1 , input2 ,
866+ cat_array , is_ordered ):
867+ # Test case 1
868+ tc1 = Series (Categorical (input1 , categories = cat_array ,
869+ ordered = is_ordered ))
870+ expected = Series ([False , False , False , True ])
871+ tm .assert_series_equal (tc1 .duplicated (), expected )
872+ tm .assert_series_equal (tc1 .drop_duplicates (), tc1 [~ expected ])
873+ sc = tc1 .copy ()
874+ sc .drop_duplicates (inplace = True )
875+ tm .assert_series_equal (sc , tc1 [~ expected ])
876+
877+ expected = Series ([False , False , True , False ])
878+ tm .assert_series_equal (tc1 .duplicated (keep = 'last' ), expected )
879+ tm .assert_series_equal (tc1 .drop_duplicates (keep = 'last' ),
880+ tc1 [~ expected ])
881+ sc = tc1 .copy ()
882+ sc .drop_duplicates (keep = 'last' , inplace = True )
883+ tm .assert_series_equal (sc , tc1 [~ expected ])
884+
885+ expected = Series ([False , False , True , True ])
886+ tm .assert_series_equal (tc1 .duplicated (keep = False ), expected )
887+ tm .assert_series_equal (tc1 .drop_duplicates (keep = False ), tc1 [~ expected ])
888+ sc = tc1 .copy ()
889+ sc .drop_duplicates (keep = False , inplace = True )
890+ tm .assert_series_equal (sc , tc1 [~ expected ])
891+
892+ # Test case 2
893+ tc2 = Series (Categorical (input2 , categories = cat_array ,
894+ ordered = is_ordered ))
895+ expected = Series ([False , False , False , False , True , True , False ])
896+ tm .assert_series_equal (tc2 .duplicated (), expected )
897+ tm .assert_series_equal (tc2 .drop_duplicates (), tc2 [~ expected ])
898+ sc = tc2 .copy ()
899+ sc .drop_duplicates (inplace = True )
900+ tm .assert_series_equal (sc , tc2 [~ expected ])
901+
902+ expected = Series ([False , True , True , False , False , False , False ])
903+ tm .assert_series_equal (tc2 .duplicated (keep = 'last' ), expected )
904+ tm .assert_series_equal (tc2 .drop_duplicates (keep = 'last' ),
905+ tc2 [~ expected ])
906+ sc = tc2 .copy ()
907+ sc .drop_duplicates (keep = 'last' , inplace = True )
908+ tm .assert_series_equal (sc , tc2 [~ expected ])
909+
910+ expected = Series ([False , True , True , False , True , True , False ])
911+ tm .assert_series_equal (tc2 .duplicated (keep = False ), expected )
912+ tm .assert_series_equal (tc2 .drop_duplicates (keep = False ), tc2 [~ expected ])
913+ sc = tc2 .copy ()
914+ sc .drop_duplicates (keep = False , inplace = True )
915+ tm .assert_series_equal (sc , tc2 [~ expected ])
916+
917+ @pytest .mark .parametrize ("is_ordered" , [True , False ])
918+ def test_drop_duplicates_bool (self , is_ordered ):
919+ tc = Series (Categorical ([True , False , True , False ],
920+ categories = [True , False ], ordered = is_ordered ))
921+
922+ expected = Series ([False , False , True , True ])
923+ tm .assert_series_equal (tc .duplicated (), expected )
924+ tm .assert_series_equal (tc .drop_duplicates (), tc [~ expected ])
925+ sc = tc .copy ()
926+ sc .drop_duplicates (inplace = True )
927+ tm .assert_series_equal (sc , tc [~ expected ])
928+
929+ expected = Series ([True , True , False , False ])
930+ tm .assert_series_equal (tc .duplicated (keep = 'last' ), expected )
931+ tm .assert_series_equal (tc .drop_duplicates (keep = 'last' ), tc [~ expected ])
932+ sc = tc .copy ()
933+ sc .drop_duplicates (keep = 'last' , inplace = True )
934+ tm .assert_series_equal (sc , tc [~ expected ])
935+
936+ expected = Series ([True , True , True , True ])
937+ tm .assert_series_equal (tc .duplicated (keep = False ), expected )
938+ tm .assert_series_equal (tc .drop_duplicates (keep = False ), tc [~ expected ])
939+ sc = tc .copy ()
940+ sc .drop_duplicates (keep = False , inplace = True )
941+ tm .assert_series_equal (sc , tc [~ expected ])
942+
800943 def test_describe (self ):
801944 # string type
802945 desc = self .factor .describe ()
0 commit comments