@@ -138,7 +138,7 @@ cdef extern from "parser/tokenizer.h":
138138
139139 # Store words in (potentially ragged) matrix for now, hmm
140140 char ** words
141- int64_t * word_starts # where we are in the stream
141+ int64_t * word_starts # where we are in the stream
142142 int64_t words_len
143143 int64_t words_cap
144144
@@ -400,7 +400,7 @@ cdef class TextReader:
400400 raise ValueError (' only length-1 separators excluded right now' )
401401 self .parser.delimiter = ord (delimiter)
402402
403- # ----------------------------------------
403+ # ----------------------------------------
404404 # parser options
405405
406406 self .parser.doublequote = doublequote
@@ -519,7 +519,7 @@ cdef class TextReader:
519519
520520 self .index_col = index_col
521521
522- # ----------------------------------------
522+ # ----------------------------------------
523523 # header stuff
524524
525525 self .allow_leading_cols = allow_leading_cols
@@ -810,7 +810,7 @@ cdef class TextReader:
810810 if hr == self .header[- 1 ]:
811811 lc = len (this_header)
812812 ic = (len (self .index_col) if self .index_col
813- is not None else 0 )
813+ is not None else 0 )
814814 if lc != unnamed_count and lc - ic > unnamed_count:
815815 hr -= 1
816816 self .parser_start -= 1
@@ -848,7 +848,7 @@ cdef class TextReader:
848848 # Corner case, not enough lines in the file
849849 if self .parser.lines < data_line + 1 :
850850 field_count = len (header[0 ])
851- else : # not self.has_usecols:
851+ else : # not self.has_usecols:
852852
853853 field_count = self .parser.line_fields[data_line]
854854
@@ -1374,6 +1374,7 @@ def _ensure_encoded(list lst):
13741374 result.append(x)
13751375 return result
13761376
1377+
13771378cdef asbytes(object o):
13781379 if PY3:
13791380 return str (o).encode(' utf-8' )
@@ -1417,11 +1418,13 @@ def _maybe_upcast(arr):
14171418
14181419 return arr
14191420
1421+
14201422cdef enum StringPath:
14211423 CSTRING
14221424 UTF8
14231425 ENCODED
14241426
1427+
14251428# factored out logic to pick string converter
14261429cdef inline StringPath _string_path(char * encoding):
14271430 if encoding != NULL and encoding != b" utf-8" :
@@ -1430,9 +1433,12 @@ cdef inline StringPath _string_path(char *encoding):
14301433 return UTF8
14311434 else :
14321435 return CSTRING
1436+
1437+
14331438# ----------------------------------------------------------------------
14341439# Type conversions / inference support code
14351440
1441+
14361442cdef _string_box_factorize(parser_t * parser, int64_t col,
14371443 int64_t line_start, int64_t line_end,
14381444 bint na_filter, kh_str_t * na_hashset):
@@ -1782,7 +1788,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
17821788 parser.sci, parser.thousands, 1 )
17831789 if errno != 0 or p_end[0 ] or p_end == word:
17841790 if (strcasecmp(word, cinf) == 0 or
1785- strcasecmp(word, cposinf) == 0 ):
1791+ strcasecmp(word, cposinf) == 0 ):
17861792 data[0 ] = INF
17871793 elif strcasecmp(word, cneginf) == 0 :
17881794 data[0 ] = NEGINF
@@ -1803,7 +1809,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
18031809 parser.sci, parser.thousands, 1 )
18041810 if errno != 0 or p_end[0 ] or p_end == word:
18051811 if (strcasecmp(word, cinf) == 0 or
1806- strcasecmp(word, cposinf) == 0 ):
1812+ strcasecmp(word, cposinf) == 0 ):
18071813 data[0 ] = INF
18081814 elif strcasecmp(word, cneginf) == 0 :
18091815 data[0 ] = NEGINF
@@ -2263,6 +2269,7 @@ def _compute_na_values():
22632269 }
22642270 return na_values
22652271
2272+
22662273na_values = _compute_na_values()
22672274
22682275for k in list (na_values):
@@ -2362,6 +2369,7 @@ def _to_structured_array(dict columns, object names, object usecols):
23622369
23632370 return recs
23642371
2372+
23652373cdef _fill_structured_column(char * dst, char * src, int64_t elsize,
23662374 int64_t stride, int64_t length, bint incref):
23672375 cdef:
0 commit comments