11# pylint: disable-msg=E1101
22# pylint: disable-msg=E1103
3- # pylint: disable-msg=W0212,W0703
3+ # pylint: disable-msg=W0212,W0703,W0622
44
5+ from cStringIO import StringIO
56import operator
7+ import sys
68
79from numpy import NaN
810import numpy as np
@@ -260,11 +262,14 @@ def __repr__(self):
260262 """
261263 Return a string representation for a particular DataFrame
262264 """
263- if len (self .index ) < 1000 and len (self ._series ) < 10 :
264- return self .toString (to_stdout = False )
265+ buf = StringIO ()
266+ if len (self .index ) < 500 and len (self ._series ) < 10 :
267+ self .toString (buffer = buf )
265268 else :
266- output = str (self .__class__ ) + '\n '
267- return output + self .info (to_stdout = False )
269+ buf .write (str (self .__class__ ) + '\n ' )
270+ self .info (buffer = buf )
271+
272+ return buf .getvalue ()
268273
269274 def __getitem__ (self , item ):
270275 """
@@ -313,7 +318,7 @@ def __delitem__(self, key):
313318 """
314319 Delete column from DataFrame (only deletes the reference)
315320 """
316- r = self ._series .pop (key , None )
321+ self ._series .pop (key , None )
317322
318323 def pop (self , item ):
319324 """
@@ -408,7 +413,6 @@ def _combineFrame(self, other, func):
408413
409414 for col , series in other .iteritems ():
410415 if col not in self :
411- cls = series .__class__
412416 newColumns [col ] = series .fromValue (np .NaN , index = newIndex )
413417
414418 return DataFrame (data = newColumns , index = newIndex )
@@ -514,54 +518,60 @@ def toDataMatrix(self):
514518
515519 return DataMatrix (self ._series , index = self .index )
516520
517- def toString (self , to_stdout = True , verbose = False , colSpace = 15 , nanRep = None ):
521+ def toString (self , buffer = sys .stdout , verbose = False ,
522+ colSpace = 15 , nanRep = None , formatters = None ,
523+ float_format = None ):
518524 """Output a tab-separated version of this DataFrame"""
519525 series = self ._series
520- skeys = sorted (series .keys ())
521- if len (skeys ) == 0 or len (self .index ) == 0 :
522- output = 'Empty DataFrame\n '
523- output += self .index .__repr__ ()
526+ columns = sorted (series .keys ())
527+ formatters = formatters or {}
528+
529+
530+ # TODO
531+
532+ float_format = float_format or str
533+ for c in columns :
534+ if c not in formatters :
535+ formatters [c ] = str # float_format if c in self.columns else str
536+
537+ if len (columns ) == 0 or len (self .index ) == 0 :
538+ print >> buffer , 'Empty DataFrame'
539+ print >> buffer , repr (self .index )
524540 else :
525541 idxSpace = max ([len (str (idx )) for idx in self .index ]) + 4
526542 head = _pfixed ('' , idxSpace )
527543 if verbose :
528544 colSpace = max ([len (c ) for c in self .columns ]) + 4
529- for h in skeys :
545+ for h in columns :
530546 head += _pfixed (h , colSpace )
531- output = head + ' \n '
547+ print >> buffer , head
532548 for idx in self .index :
533549 ot = _pfixed (idx , idxSpace )
534- for k in skeys :
535- ot += _pfixed (series [k ][idx ], colSpace , nanRep = nanRep )
536- output += ot + '\n '
537- if to_stdout :
538- print output
539- else :
540- return output
550+ for k in columns :
551+ formatter = formatters .get (k , str )
552+ ot += _pfixed (formatter (series [k ][idx ]),
553+ colSpace , nanRep = nanRep )
554+ print >> buffer , ot
541555
542- def info (self , to_stdout = True ):
556+ def info (self , buffer = sys . stdout ):
543557 """Concise summary of a DataFrame, used in __repr__ when very large."""
544558 if len (self ._series ) == 0 :
545- output = 'DataFrame is empty!\n '
546- output += self .index . __repr__ ( )
547- return output
548-
549- output = 'Index: %s entries, %s to %s \n ' % ( len (self .index ),
550- min (self .index ),
551- max ( self . index ))
552- output += 'Columns: \n '
559+ print >> buffer , 'DataFrame is empty!'
560+ print >> buffer , repr ( self .index )
561+
562+ print >> buffer , 'Index: %s entries, %s to %s' % ( len ( self . index ),
563+ min (self .index ),
564+ max (self .index ))
565+ print >> buffer , 'Data columns:'
566+
553567 series = self ._series
554- skeys = sorted (self .cols ())
555- space = max ([len (str (k )) for k in skeys ]) + 4
556- for k in skeys :
568+ columns = sorted (self .cols ())
569+ space = max ([len (str (k )) for k in columns ]) + 4
570+ for k in columns :
557571 out = _pfixed (k , space )
558572 N = notnull (series [k ]).sum ()
559- out += '%d non-null values\n ' % N
560- output += out
561- if to_stdout :
562- print output
563- else :
564- return output
573+ out += '%d non-null values' % N
574+ print >> buffer , out
565575
566576 def rows (self ):
567577 """Alias for the frame's index"""
@@ -586,7 +596,7 @@ def append(self, otherFrame):
586596 """
587597 newIndex = np .concatenate ((self .index , otherFrame .index ))
588598 newValues = {}
589-
599+
590600 for column , series in self .iteritems ():
591601 if column in otherFrame :
592602 newValues [column ] = series .append (otherFrame [column ])
@@ -793,7 +803,7 @@ def getTS(self, colName=None, fromDate=None, toDate=None, nPeriods=None):
793803 else :
794804 return self .reindex (dateRange )
795805
796- def truncate (self , before = None , after = None , periods = None ):
806+ def truncate (self , before = None , after = None ):
797807 """Function truncate a sorted DataFrame before and/or after
798808 some particular dates.
799809
@@ -803,13 +813,13 @@ def truncate(self, before=None, after=None, periods=None):
803813 Truncate before date
804814 after : date
805815 Truncate after date
806-
816+
807817 Returns
808818 -------
809819 DataFrame
810820 """
811821 beg_slice , end_slice = self ._getIndices (before , after )
812-
822+
813823 return self [beg_slice :end_slice ]
814824
815825 def _getIndices (self , before , after ):
@@ -833,8 +843,8 @@ def _getIndices(self, before, after):
833843 end_slice = self .index .indexMap [after ] + 1
834844
835845 return beg_slice , end_slice
836-
837- def getXS (self , key , subset = None , asOf = False ):
846+
847+ def getXS (self , key , subset = None ):
838848 """
839849 Returns a row from the DataFrame as a Series object.
840850
@@ -843,9 +853,6 @@ def getXS(self, key, subset=None, asOf=False):
843853 key : some index contained in the index
844854 subset : iterable (list, array, set, etc.), optional
845855 columns to be included
846- asOf : boolean, optional
847- Whether to use asOf values for TimeSeries objects
848- Won't do anything for Series objects.
849856
850857 Note
851858 ----
@@ -1050,7 +1057,7 @@ def applymap(self, func):
10501057 """
10511058 results = {}
10521059 for col , series in self .iteritems ():
1053- results [col ] = map ( func , series )
1060+ results [col ] = [ func ( v ) for v in series ]
10541061 return DataFrame (data = results , index = self .index )
10551062
10561063 def tgroupby (self , keyfunc , applyfunc ):
0 commit comments