1010import mmap
1111import os
1212import pathlib
13+ from typing import IO , AnyStr , BinaryIO , Optional , TextIO , Type
1314from urllib .error import URLError # noqa
1415from urllib .parse import ( # noqa
1516 urlencode ,
3233
3334from pandas .core .dtypes .common import is_file_like
3435
36+ from pandas ._typing import FilePathOrBuffer
37+
3538# gh-12665: Alias for now and remove later.
3639CParserError = ParserError
3740
@@ -68,14 +71,14 @@ class BaseIterator:
6871 Useful only when the object being iterated is non-reusable (e.g. OK for a
6972 parser, not for an in-memory table, yes for its iterator)."""
7073
71- def __iter__ (self ):
74+ def __iter__ (self ) -> "BaseIterator" :
7275 return self
7376
7477 def __next__ (self ):
7578 raise AbstractMethodError (self )
7679
7780
78- def _is_url (url ):
81+ def _is_url (url ) -> bool :
7982 """Check to see if a URL has a valid protocol.
8083
8184 Parameters
@@ -93,7 +96,9 @@ def _is_url(url):
9396 return False
9497
9598
96- def _expand_user (filepath_or_buffer ):
99+ def _expand_user (
100+ filepath_or_buffer : FilePathOrBuffer [AnyStr ]
101+ ) -> FilePathOrBuffer [AnyStr ]:
97102 """Return the argument with an initial component of ~ or ~user
98103 replaced by that user's home directory.
99104
@@ -111,7 +116,7 @@ def _expand_user(filepath_or_buffer):
111116 return filepath_or_buffer
112117
113118
114- def _validate_header_arg (header ):
119+ def _validate_header_arg (header ) -> None :
115120 if isinstance (header , bool ):
116121 raise TypeError (
117122 "Passing a bool to header is invalid. "
@@ -121,7 +126,9 @@ def _validate_header_arg(header):
121126 )
122127
123128
124- def _stringify_path (filepath_or_buffer ):
129+ def _stringify_path (
130+ filepath_or_buffer : FilePathOrBuffer [AnyStr ]
131+ ) -> FilePathOrBuffer [AnyStr ]:
125132 """Attempt to convert a path-like object to a string.
126133
127134 Parameters
@@ -144,21 +151,22 @@ def _stringify_path(filepath_or_buffer):
144151 strings, buffers, or anything else that's not even path-like.
145152 """
146153 if hasattr (filepath_or_buffer , "__fspath__" ):
147- return filepath_or_buffer .__fspath__ ()
154+ # https://github.com/python/mypy/issues/1424
155+ return filepath_or_buffer .__fspath__ () # type: ignore
148156 elif isinstance (filepath_or_buffer , pathlib .Path ):
149157 return str (filepath_or_buffer )
150158 return _expand_user (filepath_or_buffer )
151159
152160
153- def is_s3_url (url ):
161+ def is_s3_url (url ) -> bool :
154162 """Check for an s3, s3n, or s3a url"""
155163 try :
156164 return parse_url (url ).scheme in ["s3" , "s3n" , "s3a" ]
157165 except Exception :
158166 return False
159167
160168
161- def is_gcs_url (url ):
169+ def is_gcs_url (url ) -> bool :
162170 """Check for a gcs url"""
163171 try :
164172 return parse_url (url ).scheme in ["gcs" , "gs" ]
@@ -167,7 +175,10 @@ def is_gcs_url(url):
167175
168176
169177def get_filepath_or_buffer (
170- filepath_or_buffer , encoding = None , compression = None , mode = None
178+ filepath_or_buffer : FilePathOrBuffer ,
179+ encoding : Optional [str ] = None ,
180+ compression : Optional [str ] = None ,
181+ mode : Optional [str ] = None ,
171182):
172183 """
173184 If the filepath_or_buffer is a url, translate and return the buffer.
@@ -190,7 +201,7 @@ def get_filepath_or_buffer(
190201 """
191202 filepath_or_buffer = _stringify_path (filepath_or_buffer )
192203
193- if _is_url (filepath_or_buffer ):
204+ if isinstance ( filepath_or_buffer , str ) and _is_url (filepath_or_buffer ):
194205 req = urlopen (filepath_or_buffer )
195206 content_encoding = req .headers .get ("Content-Encoding" , None )
196207 if content_encoding == "gzip" :
@@ -224,7 +235,7 @@ def get_filepath_or_buffer(
224235 return filepath_or_buffer , None , compression , False
225236
226237
227- def file_path_to_url (path ) :
238+ def file_path_to_url (path : str ) -> str :
228239 """
229240 converts an absolute native path to a FILE URL.
230241
@@ -242,7 +253,9 @@ def file_path_to_url(path):
242253_compression_to_extension = {"gzip" : ".gz" , "bz2" : ".bz2" , "zip" : ".zip" , "xz" : ".xz" }
243254
244255
245- def _infer_compression (filepath_or_buffer , compression ):
256+ def _infer_compression (
257+ filepath_or_buffer : FilePathOrBuffer , compression : Optional [str ]
258+ ) -> Optional [str ]:
246259 """
247260 Get the compression method for filepath_or_buffer. If compression='infer',
248261 the inferred compression method is returned. Otherwise, the input
@@ -435,7 +448,13 @@ class BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore
435448 """
436449
437450 # GH 17778
438- def __init__ (self , file , mode , compression = zipfile .ZIP_DEFLATED , ** kwargs ):
451+ def __init__ (
452+ self ,
453+ file : FilePathOrBuffer ,
454+ mode : str ,
455+ compression : int = zipfile .ZIP_DEFLATED ,
456+ ** kwargs
457+ ):
439458 if mode in ["wb" , "rb" ]:
440459 mode = mode .replace ("b" , "" )
441460 super ().__init__ (file , mode , compression , ** kwargs )
@@ -461,16 +480,16 @@ class MMapWrapper(BaseIterator):
461480
462481 """
463482
464- def __init__ (self , f ):
483+ def __init__ (self , f : IO ):
465484 self .mmap = mmap .mmap (f .fileno (), 0 , access = mmap .ACCESS_READ )
466485
467- def __getattr__ (self , name ):
486+ def __getattr__ (self , name : str ):
468487 return getattr (self .mmap , name )
469488
470- def __iter__ (self ):
489+ def __iter__ (self ) -> "MMapWrapper" :
471490 return self
472491
473- def __next__ (self ):
492+ def __next__ (self ) -> str :
474493 newline = self .mmap .readline ()
475494
476495 # readline returns bytes, not str, but Python's CSV reader
@@ -491,16 +510,16 @@ class UTF8Recoder(BaseIterator):
491510 Iterator that reads an encoded stream and re-encodes the input to UTF-8
492511 """
493512
494- def __init__ (self , f , encoding ):
513+ def __init__ (self , f : BinaryIO , encoding : str ):
495514 self .reader = codecs .getreader (encoding )(f )
496515
497- def read (self , bytes = - 1 ):
516+ def read (self , bytes : int = - 1 ) -> bytes :
498517 return self .reader .read (bytes ).encode ("utf-8" )
499518
500- def readline (self ):
519+ def readline (self ) -> bytes :
501520 return self .reader .readline ().encode ("utf-8" )
502521
503- def next (self ):
522+ def next (self ) -> bytes :
504523 return next (self .reader ).encode ("utf-8" )
505524
506525
@@ -511,5 +530,7 @@ def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds):
511530 return csv .reader (f , dialect = dialect , ** kwds )
512531
513532
514- def UnicodeWriter (f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
533+ def UnicodeWriter (
534+ f : TextIO , dialect : Type [csv .Dialect ] = csv .excel , encoding : str = "utf-8" , ** kwds
535+ ):
515536 return csv .writer (f , dialect = dialect , ** kwds )
0 commit comments