1+ import re
2+
13import numpy as np
24import pytest
35
@@ -254,41 +256,45 @@ def test_slicing_doc_examples(self):
254256 )
255257 tm .assert_frame_equal (result , expected )
256258
257- def test_loc_listlike (self ):
258-
259+ def test_loc_getitem_listlike_labels (self ):
259260 # list of labels
260261 result = self .df .loc [["c" , "a" ]]
261262 expected = self .df .iloc [[4 , 0 , 1 , 5 ]]
262263 tm .assert_frame_equal (result , expected , check_index_type = True )
263264
264- result = self .df2 .loc [["a" , "b" , "e" ]]
265- exp_index = CategoricalIndex (list ("aaabbe" ), categories = list ("cabe" ), name = "B" )
266- expected = DataFrame ({"A" : [0 , 1 , 5 , 2 , 3 , np .nan ]}, index = exp_index )
267- tm .assert_frame_equal (result , expected , check_index_type = True )
265+ def test_loc_getitem_listlike_unused_category (self ):
266+ # GH#37901 a label that is in index.categories but not in index
267+ # listlike containing an element in the categories but not in the values
268+ msg = (
269+ "The following labels were missing: CategoricalIndex(['e'], "
270+ "categories=['c', 'a', 'b', 'e'], ordered=False, name='B', "
271+ "dtype='category')"
272+ )
273+ with pytest .raises (KeyError , match = re .escape (msg )):
274+ self .df2 .loc [["a" , "b" , "e" ]]
268275
276+ def test_loc_getitem_label_unused_category (self ):
269277 # element in the categories but not in the values
270278 with pytest .raises (KeyError , match = r"^'e'$" ):
271279 self .df2 .loc ["e" ]
272280
273- # assign is ok
281+ def test_loc_getitem_non_category (self ):
282+ # not all labels in the categories
283+ msg = (
284+ "The following labels were missing: Index(['d'], dtype='object', name='B')"
285+ )
286+ with pytest .raises (KeyError , match = re .escape (msg )):
287+ self .df2 .loc [["a" , "d" ]]
288+
289+ def test_loc_setitem_expansion_label_unused_category (self ):
290+ # assigning with a label that is in the categories but not in the index
274291 df = self .df2 .copy ()
275292 df .loc ["e" ] = 20
276293 result = df .loc [["a" , "b" , "e" ]]
277294 exp_index = CategoricalIndex (list ("aaabbe" ), categories = list ("cabe" ), name = "B" )
278295 expected = DataFrame ({"A" : [0 , 1 , 5 , 2 , 3 , 20 ]}, index = exp_index )
279296 tm .assert_frame_equal (result , expected )
280297
281- df = self .df2 .copy ()
282- result = df .loc [["a" , "b" , "e" ]]
283- exp_index = CategoricalIndex (list ("aaabbe" ), categories = list ("cabe" ), name = "B" )
284- expected = DataFrame ({"A" : [0 , 1 , 5 , 2 , 3 , np .nan ]}, index = exp_index )
285- tm .assert_frame_equal (result , expected , check_index_type = True )
286-
287- # not all labels in the categories
288- msg = "a list-indexer must only include values that are in the categories"
289- with pytest .raises (KeyError , match = msg ):
290- self .df2 .loc [["a" , "d" ]]
291-
292298 def test_loc_listlike_dtypes (self ):
293299 # GH 11586
294300
@@ -309,8 +315,8 @@ def test_loc_listlike_dtypes(self):
309315 exp = DataFrame ({"A" : [1 , 1 , 2 ], "B" : [4 , 4 , 5 ]}, index = exp_index )
310316 tm .assert_frame_equal (res , exp , check_index_type = True )
311317
312- msg = "a list-indexer must only include values that are in the categories "
313- with pytest .raises (KeyError , match = msg ):
318+ msg = "The following labels were missing: Index(['x'], dtype='object') "
319+ with pytest .raises (KeyError , match = re . escape ( msg ) ):
314320 df .loc [["a" , "x" ]]
315321
316322 # duplicated categories and codes
@@ -332,8 +338,7 @@ def test_loc_listlike_dtypes(self):
332338 )
333339 tm .assert_frame_equal (res , exp , check_index_type = True )
334340
335- msg = "a list-indexer must only include values that are in the categories"
336- with pytest .raises (KeyError , match = msg ):
341+ with pytest .raises (KeyError , match = re .escape (msg )):
337342 df .loc [["a" , "x" ]]
338343
339344 # contains unused category
@@ -347,13 +352,6 @@ def test_loc_listlike_dtypes(self):
347352 )
348353 tm .assert_frame_equal (res , exp , check_index_type = True )
349354
350- res = df .loc [["a" , "e" ]]
351- exp = DataFrame (
352- {"A" : [1 , 3 , np .nan ], "B" : [5 , 7 , np .nan ]},
353- index = CategoricalIndex (["a" , "a" , "e" ], categories = list ("abcde" )),
354- )
355- tm .assert_frame_equal (res , exp , check_index_type = True )
356-
357355 # duplicated slice
358356 res = df .loc [["a" , "a" , "b" ]]
359357 exp = DataFrame (
@@ -362,10 +360,27 @@ def test_loc_listlike_dtypes(self):
362360 )
363361 tm .assert_frame_equal (res , exp , check_index_type = True )
364362
365- msg = "a list-indexer must only include values that are in the categories"
366- with pytest .raises (KeyError , match = msg ):
363+ with pytest .raises (KeyError , match = re .escape (msg )):
367364 df .loc [["a" , "x" ]]
368365
366+ def test_loc_getitem_listlike_unused_category_raises_keyerro (self ):
367+ # key that is an *unused* category raises
368+ index = CategoricalIndex (["a" , "b" , "a" , "c" ], categories = list ("abcde" ))
369+ df = DataFrame ({"A" : [1 , 2 , 3 , 4 ], "B" : [5 , 6 , 7 , 8 ]}, index = index )
370+
371+ with pytest .raises (KeyError , match = "e" ):
372+ # For comparison, check the scalar behavior
373+ df .loc ["e" ]
374+
375+ msg = (
376+ "Passing list-likes to .loc or [] with any missing labels is no "
377+ "longer supported. The following labels were missing: "
378+ "CategoricalIndex(['e'], categories=['a', 'b', 'c', 'd', 'e'], "
379+ "ordered=False, dtype='category'). See https"
380+ )
381+ with pytest .raises (KeyError , match = re .escape (msg )):
382+ df .loc [["a" , "e" ]]
383+
369384 def test_ix_categorical_index (self ):
370385 # GH 12531
371386 df = DataFrame (np .random .randn (3 , 3 ), index = list ("ABC" ), columns = list ("XYZ" ))
0 commit comments