88import tarfile
99import zipfile
1010
11- import requests
1211import six
1312import torch
1413import torchaudio
@@ -66,44 +65,6 @@ def makedir_exist_ok(dirpath):
6665 raise
6766
6867
69- def download_url_resume (url , download_folder , resume_byte_pos = None ):
70- """Download url to disk with possible resumption.
71-
72- Args:
73- url (str): Url.
74- download_folder (str): Folder to download file.
75- resume_byte_pos (int): Position of byte from where to resume the download.
76- """
77- # Get size of file
78- r = requests .head (url )
79- file_size = int (r .headers .get ("content-length" , 0 ))
80-
81- # Append information to resume download at specific byte position to header
82- resume_header = (
83- {"Range" : "bytes={}-" .format (resume_byte_pos )} if resume_byte_pos else None
84- )
85-
86- # Establish connection
87- r = requests .get (url , stream = True , headers = resume_header )
88-
89- # Set configuration
90- n_block = 32
91- block_size = 1024
92- initial_pos = resume_byte_pos if resume_byte_pos else 0
93- mode = "ab" if resume_byte_pos else "wb"
94-
95- filename = os .path .basename (url )
96- filepath = os .path .join (download_folder , os .path .basename (url ))
97-
98- with open (filepath , mode ) as f :
99- with tqdm (
100- unit = "B" , unit_scale = True , unit_divisor = 1024 , total = file_size
101- ) as pbar :
102- for chunk in r .iter_content (n_block * block_size ):
103- f .write (chunk )
104- pbar .update (len (chunk ))
105-
106-
10768def download_url (url , download_folder , hash_value = None , hash_type = "sha256" ):
10869 """Execute the correct download operation.
10970 Depending on the size of the file online and offline, resume the
@@ -115,48 +76,58 @@ def download_url(url, download_folder, hash_value=None, hash_type="sha256"):
11576 hash_value (str): Hash for url.
11677 hash_type (str): Hash type.
11778 """
118- # Establish connection to header of file
119- r = requests .head (url )
12079
121- # Get filesize of online and offline file
122- file_size_online = int (r .headers .get ("content-length" , 0 ))
12380 filepath = os .path .join (download_folder , os .path .basename (url ))
12481
82+ req = urllib .request .Request (url )
12583 if os .path .exists (filepath ):
126- file_size_offline = os .path .getsize (filepath )
127-
128- if file_size_online != file_size_offline :
129- # Resume download
130- print ("File {} is incomplete. Resume download." .format (filepath ))
131- download_url_resume (url , download_folder , file_size_offline )
132- elif hash_value :
133- if validate_download_url (url , download_folder , hash_value , hash_type ):
134- print ("File {} is validated. Skip download." .format (filepath ))
135- else :
136- print (
137- "File {} is corrupt. Delete it manually and retry." .format (filepath )
138- )
139- else :
140- # Skip download
141- print ("File {} is complete. Skip download." .format (filepath ))
84+ mode = "ab"
85+ local_size = os .path .getsize (filepath )
86+
87+ # If the file exists, then download only the remainder
88+ req .headers ["Range" ] = "bytes={}-" .format (local_size )
14289 else :
143- # Start download
144- print ("File {} has not been downloaded. Start download." .format (filepath ))
145- download_url_resume (url , download_folder )
90+ mode = "wb"
91+ local_size = 0
92+
93+ # If we already have the whole file, there is no need to download it again
94+ url_size = int (urllib .request .urlopen (url ).info ().get ("Content-Length" , - 1 ))
95+ if url_size == local_size :
96+ if hash_value and not validate_download_url (filepath , hash_value , hash_type ):
97+ raise RuntimeError (
98+ "The hash of {} does not match. Delete the file manually and retry." .format (
99+ filepath
100+ )
101+ )
102+
103+ return
146104
105+ with open (filepath , mode ) as fpointer , urllib .request .urlopen (
106+ req
107+ ) as upointer , tqdm (
108+ unit = "B" , unit_scale = True , unit_divisor = 1024 , total = url_size
109+ ) as pbar :
147110
148- def validate_download_url (url , download_folder , hash_value , hash_type = "sha256" ):
111+ num_bytes = 0
112+ block_size = 32 * 1024
113+ while True :
114+ chunk = upointer .read (block_size )
115+ if not chunk :
116+ break
117+ fpointer .write (chunk )
118+ num_bytes += len (chunk )
119+ pbar .update (len (chunk ))
120+
121+
122+ def validate_download_url (filepath , hash_value , hash_type = "sha256" ):
149123 """Validate a given file with its hash.
150- The downloaded file is hashed and compared to a pre-registered
151- has value to validate the download procedure.
152124
153125 Args:
154126 url (str): Url.
155127 download_folder (str): Folder to download file.
156128 hash_value (str): Hash for url.
157129 hash_type (str): Hash type.
158130 """
159- filepath = os .path .join (download_folder , os .path .basename (url ))
160131
161132 if hash_type == "sha256" :
162133 sha = hashlib .sha256 ()
0 commit comments