@@ -374,6 +374,17 @@ cdef class TextReader:
374374 float_precision = None ,
375375 skip_blank_lines = True ):
376376
377+ # encoding
378+ if encoding is not None :
379+ if not isinstance (encoding, bytes):
380+ encoding = encoding.encode(' utf-8' )
381+ encoding = encoding.lower()
382+ self .c_encoding = < char * > encoding
383+ else :
384+ self .c_encoding = NULL
385+
386+ self .encoding = encoding
387+
377388 self .parser = parser_new()
378389 self .parser.chunksize = tokenize_chunksize
379390
@@ -495,17 +506,6 @@ cdef class TextReader:
495506 self .parser.double_converter_nogil = NULL
496507 self .parser.double_converter_withgil = round_trip
497508
498- # encoding
499- if encoding is not None :
500- if not isinstance (encoding, bytes):
501- encoding = encoding.encode(' utf-8' )
502- encoding = encoding.lower()
503- self .c_encoding = < char * > encoding
504- else :
505- self .c_encoding = NULL
506-
507- self .encoding = encoding
508-
509509 if isinstance (dtype, dict ):
510510 dtype = {k: pandas_dtype(dtype[k])
511511 for k in dtype}
@@ -684,6 +684,12 @@ cdef class TextReader:
684684 else :
685685 raise ValueError (' Unrecognized compression type: %s ' %
686686 self .compression)
687+
688+ if b' utf-16' in (self .encoding or b' ' ):
689+ source = com.UTF8Recoder(source, self .encoding.decode(' utf-8' ))
690+ self .encoding = b' utf-8'
691+ self .c_encoding = < char * > self .encoding
692+
687693 self .handle = source
688694
689695 if isinstance (source, basestring ):
0 commit comments