11from io import StringIO
22import re
3+ from string import ascii_uppercase as uppercase
34import sys
45import textwrap
56
89
910from pandas .compat import PYPY
1011
11- import pandas as pd
12+ from pandas import (
13+ CategoricalIndex ,
14+ DataFrame ,
15+ MultiIndex ,
16+ Series ,
17+ date_range ,
18+ option_context ,
19+ reset_option ,
20+ set_option ,
21+ )
22+ import pandas ._testing as tm
23+
24+
25+ @pytest .fixture
26+ def datetime_frame ():
27+ """
28+ Fixture for DataFrame of floats with DatetimeIndex
29+
30+ Columns are ['A', 'B', 'C', 'D']
31+
32+ A B C D
33+ 2000-01-03 -1.122153 0.468535 0.122226 1.693711
34+ 2000-01-04 0.189378 0.486100 0.007864 -1.216052
35+ 2000-01-05 0.041401 -0.835752 -0.035279 -0.414357
36+ 2000-01-06 0.430050 0.894352 0.090719 0.036939
37+ 2000-01-07 -0.620982 -0.668211 -0.706153 1.466335
38+ 2000-01-10 -0.752633 0.328434 -0.815325 0.699674
39+ 2000-01-11 -2.236969 0.615737 -0.829076 -1.196106
40+ ... ... ... ... ...
41+ 2000-02-03 1.642618 -0.579288 0.046005 1.385249
42+ 2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351
43+ 2000-02-07 -2.656149 -0.601387 1.410148 0.444150
44+ 2000-02-08 -1.201881 -1.289040 0.772992 -1.445300
45+ 2000-02-09 1.377373 0.398619 1.008453 -0.928207
46+ 2000-02-10 0.473194 -0.636677 0.984058 0.511519
47+ 2000-02-11 -0.965556 0.408313 -1.312844 -0.381948
48+
49+ [30 rows x 4 columns]
50+ """
51+ return DataFrame (tm .getTimeSeriesData ())
1252
1353
1454def test_info_categorical_column ():
1555
1656 # make sure it works
1757 n = 2500
18- df = pd . DataFrame ({"int64" : np .random .randint (100 , size = n )})
19- df ["category" ] = pd . Series (
58+ df = DataFrame ({"int64" : np .random .randint (100 , size = n )})
59+ df ["category" ] = Series (
2060 np .array (list ("abcdefghij" )).take (np .random .randint (0 , 10 , size = n ))
2161 ).astype ("category" )
2262 df .isna ()
@@ -33,7 +73,7 @@ def test_info(float_frame, datetime_frame):
3373 float_frame .info (buf = io )
3474 datetime_frame .info (buf = io )
3575
36- frame = pd . DataFrame (np .random .randn (5 , 3 ))
76+ frame = DataFrame (np .random .randn (5 , 3 ))
3777
3878 frame .info ()
3979 frame .info (verbose = False )
@@ -43,7 +83,7 @@ def test_info_verbose():
4383 buf = StringIO ()
4484 size = 1001
4585 start = 5
46- frame = pd . DataFrame (np .random .randn (3 , size ))
86+ frame = DataFrame (np .random .randn (3 , size ))
4787 frame .info (verbose = True , buf = buf )
4888
4989 res = buf .getvalue ()
@@ -63,7 +103,7 @@ def test_info_verbose():
63103
64104def test_info_memory ():
65105 # https://github.com/pandas-dev/pandas/issues/21056
66- df = pd . DataFrame ({"a" : pd . Series ([1 , 2 ], dtype = "i8" )})
106+ df = DataFrame ({"a" : Series ([1 , 2 ], dtype = "i8" )})
67107 buf = StringIO ()
68108 df .info (buf = buf )
69109 result = buf .getvalue ()
@@ -84,10 +124,8 @@ def test_info_memory():
84124
85125
86126def test_info_wide ():
87- from pandas import set_option , reset_option
88-
89127 io = StringIO ()
90- df = pd . DataFrame (np .random .randn (5 , 101 ))
128+ df = DataFrame (np .random .randn (5 , 101 ))
91129 df .info (buf = io )
92130
93131 io = StringIO ()
@@ -107,15 +145,15 @@ def test_info_duplicate_columns():
107145 io = StringIO ()
108146
109147 # it works!
110- frame = pd . DataFrame (np .random .randn (1500 , 4 ), columns = ["a" , "a" , "b" , "b" ])
148+ frame = DataFrame (np .random .randn (1500 , 4 ), columns = ["a" , "a" , "b" , "b" ])
111149 frame .info (buf = io )
112150
113151
114152def test_info_duplicate_columns_shows_correct_dtypes ():
115153 # GH11761
116154 io = StringIO ()
117155
118- frame = pd . DataFrame ([[1 , 2.0 ]], columns = ["a" , "a" ])
156+ frame = DataFrame ([[1 , 2.0 ]], columns = ["a" , "a" ])
119157 frame .info (buf = io )
120158 io .seek (0 )
121159 lines = io .readlines ()
@@ -137,7 +175,7 @@ def test_info_shows_column_dtypes():
137175 n = 10
138176 for i , dtype in enumerate (dtypes ):
139177 data [i ] = np .random .randint (2 , size = n ).astype (dtype )
140- df = pd . DataFrame (data )
178+ df = DataFrame (data )
141179 buf = StringIO ()
142180 df .info (buf = buf )
143181 res = buf .getvalue ()
@@ -152,10 +190,10 @@ def test_info_shows_column_dtypes():
152190
153191
154192def test_info_max_cols ():
155- df = pd . DataFrame (np .random .randn (10 , 5 ))
193+ df = DataFrame (np .random .randn (10 , 5 ))
156194 for len_ , verbose in [(5 , None ), (5 , False ), (12 , True )]:
157195 # For verbose always ^ setting ^ summarize ^ full output
158- with pd . option_context ("max_info_columns" , 4 ):
196+ with option_context ("max_info_columns" , 4 ):
159197 buf = StringIO ()
160198 df .info (buf = buf , verbose = verbose )
161199 res = buf .getvalue ()
@@ -164,22 +202,22 @@ def test_info_max_cols():
164202 for len_ , verbose in [(12 , None ), (5 , False ), (12 , True )]:
165203
166204 # max_cols not exceeded
167- with pd . option_context ("max_info_columns" , 5 ):
205+ with option_context ("max_info_columns" , 5 ):
168206 buf = StringIO ()
169207 df .info (buf = buf , verbose = verbose )
170208 res = buf .getvalue ()
171209 assert len (res .strip ().split ("\n " )) == len_
172210
173211 for len_ , max_cols in [(12 , 5 ), (5 , 4 )]:
174212 # setting truncates
175- with pd . option_context ("max_info_columns" , 4 ):
213+ with option_context ("max_info_columns" , 4 ):
176214 buf = StringIO ()
177215 df .info (buf = buf , max_cols = max_cols )
178216 res = buf .getvalue ()
179217 assert len (res .strip ().split ("\n " )) == len_
180218
181219 # setting wouldn't truncate
182- with pd . option_context ("max_info_columns" , 5 ):
220+ with option_context ("max_info_columns" , 5 ):
183221 buf = StringIO ()
184222 df .info (buf = buf , max_cols = max_cols )
185223 res = buf .getvalue ()
@@ -201,7 +239,7 @@ def test_info_memory_usage():
201239 n = 10
202240 for i , dtype in enumerate (dtypes ):
203241 data [i ] = np .random .randint (2 , size = n ).astype (dtype )
204- df = pd . DataFrame (data )
242+ df = DataFrame (data )
205243 buf = StringIO ()
206244
207245 # display memory usage case
@@ -232,10 +270,10 @@ def test_info_memory_usage():
232270 n = 100
233271 for i , dtype in enumerate (dtypes ):
234272 data [i ] = np .random .randint (2 , size = n ).astype (dtype )
235- df = pd . DataFrame (data )
273+ df = DataFrame (data )
236274 df .columns = dtypes
237275
238- df_with_object_index = pd . DataFrame ({"a" : [1 ]}, index = ["foo" ])
276+ df_with_object_index = DataFrame ({"a" : [1 ]}, index = ["foo" ])
239277 df_with_object_index .info (buf = buf , memory_usage = True )
240278 res = buf .getvalue ().splitlines ()
241279 assert re .match (r"memory usage: [^+]+\+" , res [- 1 ])
@@ -258,10 +296,10 @@ def test_info_memory_usage():
258296 assert df .memory_usage ().sum () == df .memory_usage (deep = True ).sum ()
259297
260298 # test for validity
261- pd . DataFrame (1 , index = ["a" ], columns = ["A" ]).memory_usage (index = True )
262- pd . DataFrame (1 , index = ["a" ], columns = ["A" ]).index .nbytes
263- df = pd . DataFrame (
264- data = 1 , index = pd . MultiIndex .from_product ([["a" ], range (1000 )]), columns = ["A" ],
299+ DataFrame (1 , index = ["a" ], columns = ["A" ]).memory_usage (index = True )
300+ DataFrame (1 , index = ["a" ], columns = ["A" ]).index .nbytes
301+ df = DataFrame (
302+ data = 1 , index = MultiIndex .from_product ([["a" ], range (1000 )]), columns = ["A" ],
265303 )
266304 df .index .nbytes
267305 df .memory_usage (index = True )
@@ -273,32 +311,32 @@ def test_info_memory_usage():
273311
274312@pytest .mark .skipif (PYPY , reason = "on PyPy deep=True doesn't change result" )
275313def test_info_memory_usage_deep_not_pypy ():
276- df_with_object_index = pd . DataFrame ({"a" : [1 ]}, index = ["foo" ])
314+ df_with_object_index = DataFrame ({"a" : [1 ]}, index = ["foo" ])
277315 assert (
278316 df_with_object_index .memory_usage (index = True , deep = True ).sum ()
279317 > df_with_object_index .memory_usage (index = True ).sum ()
280318 )
281319
282- df_object = pd . DataFrame ({"a" : ["a" ]})
320+ df_object = DataFrame ({"a" : ["a" ]})
283321 assert df_object .memory_usage (deep = True ).sum () > df_object .memory_usage ().sum ()
284322
285323
286324@pytest .mark .skipif (not PYPY , reason = "on PyPy deep=True does not change result" )
287325def test_info_memory_usage_deep_pypy ():
288- df_with_object_index = pd . DataFrame ({"a" : [1 ]}, index = ["foo" ])
326+ df_with_object_index = DataFrame ({"a" : [1 ]}, index = ["foo" ])
289327 assert (
290328 df_with_object_index .memory_usage (index = True , deep = True ).sum ()
291329 == df_with_object_index .memory_usage (index = True ).sum ()
292330 )
293331
294- df_object = pd . DataFrame ({"a" : ["a" ]})
332+ df_object = DataFrame ({"a" : ["a" ]})
295333 assert df_object .memory_usage (deep = True ).sum () == df_object .memory_usage ().sum ()
296334
297335
298336@pytest .mark .skipif (PYPY , reason = "PyPy getsizeof() fails by design" )
299337def test_usage_via_getsizeof ():
300- df = pd . DataFrame (
301- data = 1 , index = pd . MultiIndex .from_product ([["a" ], range (1000 )]), columns = ["A" ],
338+ df = DataFrame (
339+ data = 1 , index = MultiIndex .from_product ([["a" ], range (1000 )]), columns = ["A" ],
302340 )
303341 mem = df .memory_usage (deep = True ).sum ()
304342 # sys.getsizeof will call the .memory_usage with
@@ -310,27 +348,27 @@ def test_usage_via_getsizeof():
310348def test_info_memory_usage_qualified ():
311349
312350 buf = StringIO ()
313- df = pd . DataFrame (1 , columns = list ("ab" ), index = [1 , 2 , 3 ])
351+ df = DataFrame (1 , columns = list ("ab" ), index = [1 , 2 , 3 ])
314352 df .info (buf = buf )
315353 assert "+" not in buf .getvalue ()
316354
317355 buf = StringIO ()
318- df = pd . DataFrame (1 , columns = list ("ab" ), index = list ("ABC" ))
356+ df = DataFrame (1 , columns = list ("ab" ), index = list ("ABC" ))
319357 df .info (buf = buf )
320358 assert "+" in buf .getvalue ()
321359
322360 buf = StringIO ()
323- df = pd . DataFrame (
324- 1 , columns = list ("ab" ), index = pd . MultiIndex .from_product ([range (3 ), range (3 )]),
361+ df = DataFrame (
362+ 1 , columns = list ("ab" ), index = MultiIndex .from_product ([range (3 ), range (3 )]),
325363 )
326364 df .info (buf = buf )
327365 assert "+" not in buf .getvalue ()
328366
329367 buf = StringIO ()
330- df = pd . DataFrame (
368+ df = DataFrame (
331369 1 ,
332370 columns = list ("ab" ),
333- index = pd . MultiIndex .from_product ([range (3 ), ["foo" , "bar" ]]),
371+ index = MultiIndex .from_product ([range (3 ), ["foo" , "bar" ]]),
334372 )
335373 df .info (buf = buf )
336374 assert "+" in buf .getvalue ()
@@ -340,17 +378,15 @@ def test_info_memory_usage_bug_on_multiindex():
340378 # GH 14308
341379 # memory usage introspection should not materialize .values
342380
343- from string import ascii_uppercase as uppercase
344-
345381 def memory_usage (f ):
346382 return f .memory_usage (deep = True ).sum ()
347383
348384 N = 100
349385 M = len (uppercase )
350- index = pd . MultiIndex .from_product (
351- [list (uppercase ), pd . date_range ("20160101" , periods = N )], names = ["id" , "date" ],
386+ index = MultiIndex .from_product (
387+ [list (uppercase ), date_range ("20160101" , periods = N )], names = ["id" , "date" ],
352388 )
353- df = pd . DataFrame ({"value" : np .random .randn (N * M )}, index = index )
389+ df = DataFrame ({"value" : np .random .randn (N * M )}, index = index )
354390
355391 unstacked = df .unstack ("id" )
356392 assert df .values .nbytes == unstacked .values .nbytes
@@ -362,8 +398,8 @@ def memory_usage(f):
362398
363399def test_info_categorical ():
364400 # GH14298
365- idx = pd . CategoricalIndex (["a" , "b" ])
366- df = pd . DataFrame (np .zeros ((2 , 2 )), index = idx , columns = idx )
401+ idx = CategoricalIndex (["a" , "b" ])
402+ df = DataFrame (np .zeros ((2 , 2 )), index = idx , columns = idx )
367403
368404 buf = StringIO ()
369405 df .info (buf = buf )
0 commit comments