@@ -375,6 +375,18 @@ def __init__(self, encoding):
375375 'd' : np .float64 (struct .unpack ('<d' , b'\x00 \x00 \x00 \x00 \x00 \x00 \xe0 \x7f ' )[0 ])
376376 }
377377
378+ # Reserved words cannot be used as variable names
379+ self .RESERVED_WORDS = ('aggregate' , 'array' , 'boolean' , 'break' ,
380+ 'byte' , 'case' , 'catch' , 'class' , 'colvector' ,
381+ 'complex' , 'const' , 'continue' , 'default' ,
382+ 'delegate' , 'delete' , 'do' , 'double' , 'else' ,
383+ 'eltypedef' , 'end' , 'enum' , 'explicit' ,
384+ 'export' , 'external' , 'float' , 'for' , 'friend' ,
385+ 'function' , 'global' , 'goto' , 'if' , 'inline' ,
386+ 'int' , 'local' , 'long' , 'NULL' , 'pragma' ,
387+ 'protected' , 'quad' , 'rowvector' , 'short' ,
388+ 'typedef' , 'typename' , 'virtual' )
389+
378390 def _decode_bytes (self , str , errors = None ):
379391 if compat .PY3 or self ._encoding is not None :
380392 return str .decode (self ._encoding , errors )
@@ -449,10 +461,10 @@ def _read_header(self):
449461 self .path_or_buf .read (4 ))[0 ]
450462 self .path_or_buf .read (11 ) # </N><label>
451463 strlen = struct .unpack ('b' , self .path_or_buf .read (1 ))[0 ]
452- self .data_label = self .path_or_buf .read (strlen )
464+ self .data_label = self ._null_terminate ( self . path_or_buf .read (strlen ) )
453465 self .path_or_buf .read (19 ) # </label><timestamp>
454466 strlen = struct .unpack ('b' , self .path_or_buf .read (1 ))[0 ]
455- self .time_stamp = self .path_or_buf .read (strlen )
467+ self .time_stamp = self ._null_terminate ( self . path_or_buf .read (strlen ) )
456468 self .path_or_buf .read (26 ) # </timestamp></header><map>
457469 self .path_or_buf .read (8 ) # 0x0000000000000000
458470 self .path_or_buf .read (8 ) # position of <map>
@@ -543,11 +555,11 @@ def _read_header(self):
543555 self .nobs = struct .unpack (self .byteorder + 'I' ,
544556 self .path_or_buf .read (4 ))[0 ]
545557 if self .format_version > 105 :
546- self .data_label = self .path_or_buf .read (81 )
558+ self .data_label = self ._null_terminate ( self . path_or_buf .read (81 ) )
547559 else :
548- self .data_label = self .path_or_buf .read (32 )
560+ self .data_label = self ._null_terminate ( self . path_or_buf .read (32 ) )
549561 if self .format_version > 104 :
550- self .time_stamp = self .path_or_buf .read (18 )
562+ self .time_stamp = self ._null_terminate ( self . path_or_buf .read (18 ) )
551563
552564 # descriptors
553565 if self .format_version > 108 :
@@ -1029,6 +1041,11 @@ class StataWriter(StataParser):
10291041 byteorder : str
10301042 Can be ">", "<", "little", or "big". The default is None which uses
10311043 `sys.byteorder`
1044+ time_stamp : datetime
1045+ A date time to use when writing the file. Can be None, in which
1046+ case the current time is used.
1047+ dataset_label : str
1048+ A label for the data set. Should be 80 characters or smaller.
10321049
10331050 Returns
10341051 -------
@@ -1047,10 +1064,13 @@ class StataWriter(StataParser):
10471064 >>> writer.write_file()
10481065 """
10491066 def __init__ (self , fname , data , convert_dates = None , write_index = True ,
1050- encoding = "latin-1" , byteorder = None ):
1067+ encoding = "latin-1" , byteorder = None , time_stamp = None ,
1068+ data_label = None ):
10511069 super (StataWriter , self ).__init__ (encoding )
10521070 self ._convert_dates = convert_dates
10531071 self ._write_index = write_index
1072+ self ._time_stamp = time_stamp
1073+ self ._data_label = data_label
10541074 # attach nobs, nvars, data, varlist, typlist
10551075 self ._prepare_pandas (data )
10561076
@@ -1086,7 +1106,7 @@ def __iter__(self):
10861106
10871107 if self ._write_index :
10881108 data = data .reset_index ()
1089- # Check columns for compatbaility with stata
1109+ # Check columns for compatibility with stata
10901110 data = _cast_to_stata_types (data )
10911111 self .datarows = DataFrameRowIter (data )
10921112 self .nobs , self .nvar = data .shape
@@ -1110,7 +1130,8 @@ def __iter__(self):
11101130 self .fmtlist [key ] = self ._convert_dates [key ]
11111131
11121132 def write_file (self ):
1113- self ._write_header ()
1133+ self ._write_header (time_stamp = self ._time_stamp ,
1134+ data_label = self ._data_label )
11141135 self ._write_descriptors ()
11151136 self ._write_variable_labels ()
11161137 # write 5 zeros for expansion fields
@@ -1147,7 +1168,7 @@ def _write_header(self, data_label=None, time_stamp=None):
11471168 # format dd Mon yyyy hh:mm
11481169 if time_stamp is None :
11491170 time_stamp = datetime .datetime .now ()
1150- elif not isinstance (time_stamp , datetime ):
1171+ elif not isinstance (time_stamp , datetime . datetime ):
11511172 raise ValueError ("time_stamp should be datetime type" )
11521173 self ._file .write (
11531174 self ._null_terminate (time_stamp .strftime ("%d %b %Y %H:%M" ))
@@ -1169,7 +1190,9 @@ def _write_descriptors(self, typlist=None, varlist=None, srtlist=None,
11691190 for c in name :
11701191 if (c < 'A' or c > 'Z' ) and (c < 'a' or c > 'z' ) and (c < '0' or c > '9' ) and c != '_' :
11711192 name = name .replace (c , '_' )
1172-
1193+ # Variable name must not be a reserved word
1194+ if name in self .RESERVED_WORDS :
1195+ name = '_' + name
11731196 # Variable name may not start with a number
11741197 if name [0 ] > '0' and name [0 ] < '9' :
11751198 name = '_' + name
0 commit comments