@@ -1493,3 +1493,83 @@ def test_replace_period_ignore_float(self):
14931493 result = df .replace (1.0 , 0.0 )
14941494 expected = pd .DataFrame ({"Per" : [pd .Period ("2020-01" )] * 3 })
14951495 tm .assert_frame_equal (expected , result )
1496+
1497+ def test_replace_value_category_type (self ):
1498+ """
1499+ Test for #23305: to ensure category dtypes are maintained
1500+ after replace with direct values
1501+ """
1502+
1503+ # create input data
1504+ input_dict = {
1505+ "col1" : [1 , 2 , 3 , 4 ],
1506+ "col2" : ["a" , "b" , "c" , "d" ],
1507+ "col3" : [1.5 , 2.5 , 3.5 , 4.5 ],
1508+ "col4" : ["cat1" , "cat2" , "cat3" , "cat4" ],
1509+ "col5" : ["obj1" , "obj2" , "obj3" , "obj4" ],
1510+ }
1511+ # explicitly cast columns as category and order them
1512+ input_df = pd .DataFrame (data = input_dict ).astype (
1513+ {"col2" : "category" , "col4" : "category" }
1514+ )
1515+ input_df ["col2" ] = input_df ["col2" ].cat .reorder_categories (
1516+ ["a" , "b" , "c" , "d" ], ordered = True
1517+ )
1518+ input_df ["col4" ] = input_df ["col4" ].cat .reorder_categories (
1519+ ["cat1" , "cat2" , "cat3" , "cat4" ], ordered = True
1520+ )
1521+
1522+ # create expected dataframe
1523+ expected_dict = {
1524+ "col1" : [1 , 2 , 3 , 4 ],
1525+ "col2" : ["a" , "b" , "c" , "z" ],
1526+ "col3" : [1.5 , 2.5 , 3.5 , 4.5 ],
1527+ "col4" : ["cat1" , "catX" , "cat3" , "cat4" ],
1528+ "col5" : ["obj9" , "obj2" , "obj3" , "obj4" ],
1529+ }
1530+ # explicitly cast columns as category and order them
1531+ expected = pd .DataFrame (data = expected_dict ).astype (
1532+ {"col2" : "category" , "col4" : "category" }
1533+ )
1534+ expected ["col2" ] = expected ["col2" ].cat .reorder_categories (
1535+ ["a" , "b" , "c" , "z" ], ordered = True
1536+ )
1537+ expected ["col4" ] = expected ["col4" ].cat .reorder_categories (
1538+ ["cat1" , "catX" , "cat3" , "cat4" ], ordered = True
1539+ )
1540+
1541+ # replace values in input dataframe
1542+ input_df = input_df .replace ("d" , "z" )
1543+ input_df = input_df .replace ("obj1" , "obj9" )
1544+ result = input_df .replace ("cat2" , "catX" )
1545+
1546+ tm .assert_frame_equal (result , expected )
1547+
1548+ @pytest .mark .xfail (
1549+ reason = "category dtype gets changed to object type after replace, see #35268" ,
1550+ strict = True ,
1551+ )
1552+ def test_replace_dict_category_type (self , input_category_df , expected_category_df ):
1553+ """
1554+ Test to ensure category dtypes are maintained
1555+ after replace with dict values
1556+ """
1557+
1558+ # create input dataframe
1559+ input_dict = {"col1" : ["a" ], "col2" : ["obj1" ], "col3" : ["cat1" ]}
1560+ # explicitly cast columns as category
1561+ input_df = pd .DataFrame (data = input_dict ).astype (
1562+ {"col1" : "category" , "col2" : "category" , "col3" : "category" }
1563+ )
1564+
1565+ # create expected dataframe
1566+ expected_dict = {"col1" : ["z" ], "col2" : ["obj9" ], "col3" : ["catX" ]}
1567+ # explicitly cast columns as category
1568+ expected = pd .DataFrame (data = expected_dict ).astype (
1569+ {"col1" : "category" , "col2" : "category" , "col3" : "category" }
1570+ )
1571+
1572+ # replace values in input dataframe using a dict
1573+ result = input_df .replace ({"a" : "z" , "obj1" : "obj9" , "cat1" : "catX" })
1574+
1575+ tm .assert_frame_equal (result , expected )
0 commit comments