Skip to content

Commit 4b2272d

Browse files
authored
file_packager.py: Round two of cleanups and refactorings. NFC (#16049)
The biggest change here is the of a DataFile class over raw dict. This simplifies field access and makes things more explicit. I tried to use `namedtuple` here but we need to write to at least one of the fields.
1 parent bcc2cf4 commit 4b2272d

File tree

1 file changed

+117
-109
lines changed

1 file changed

+117
-109
lines changed

tools/file_packager.py

Lines changed: 117 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -58,32 +58,26 @@
5858
"""
5959

6060
import base64
61+
import ctypes
62+
import fnmatch
63+
import json
6164
import os
62-
import sys
63-
import shutil
65+
import posixpath
6466
import random
67+
import shutil
68+
import sys
6569
import uuid
66-
import ctypes
70+
from subprocess import PIPE
6771

6872
__scriptdir__ = os.path.dirname(os.path.abspath(__file__))
6973
__rootdir__ = os.path.dirname(__scriptdir__)
7074
sys.path.append(__rootdir__)
7175

72-
import posixpath
7376
from tools import shared, utils, js_manipulation
74-
from subprocess import PIPE
75-
import fnmatch
76-
import json
7777

78-
if len(sys.argv) == 1:
79-
print('''Usage: file_packager TARGET [--preload A [B..]] [--embed C [D..]] [--exclude E [F..]]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--indexedDB-name=EM_PRELOAD_CACHE] [--separate-metadata] [--lz4] [--use-preload-plugins]
80-
See the source for more details.''')
81-
sys.exit(0)
8278

8379
DEBUG = os.environ.get('EMCC_DEBUG')
8480

85-
data_target = sys.argv[1]
86-
8781
IMAGE_SUFFIXES = ('.jpg', '.png', '.bmp')
8882
AUDIO_SUFFIXES = ('.ogg', '.wav', '.mp3')
8983
AUDIO_MIMETYPES = {'ogg': 'audio/ogg', 'wav': 'audio/wav', 'mp3': 'audio/mpeg'}
@@ -98,6 +92,43 @@
9892
new_data_files = []
9993

10094

95+
class Options:
96+
def __init__(self):
97+
self.export_name = 'Module'
98+
self.has_preloaded = False
99+
self.jsoutput = None
100+
self.from_emcc = False
101+
self.force = True
102+
# If set to True, IndexedDB (IDBFS in library_idbfs.js) is used to locally
103+
# cache VFS XHR so that subsequent page loads can read the data from the
104+
# offline cache instead.
105+
self.use_preload_cache = False
106+
self.indexeddb_name = 'EM_PRELOAD_CACHE'
107+
# If set to True, the package metadata is stored separately from js-output
108+
# file which makes js-output file immutable to the package content changes.
109+
# If set to False, the package metadata is stored inside the js-output file
110+
# which makes js-output file to mutate on each invocation of this packager tool.
111+
self.separate_metadata = False
112+
self.lz4 = False
113+
self.use_preload_plugins = False
114+
self.support_node = True
115+
116+
117+
class DataFile:
118+
def __init__(self, srcpath, dstpath, mode, explicit_dst_path):
119+
self.srcpath = srcpath
120+
self.dstpath = dstpath
121+
self.mode = mode
122+
self.explicit_dst_path = explicit_dst_path
123+
124+
125+
options = Options()
126+
127+
128+
def err(*args):
129+
print(*args, file=sys.stderr)
130+
131+
101132
def base64_encode(b):
102133
b64 = base64.b64encode(b)
103134
return b64.decode('ascii')
@@ -146,56 +177,36 @@ def add(mode, rootpathsrc, rootpathdst):
146177
if not should_ignore(fullname):
147178
new_dirnames.append(name)
148179
elif DEBUG:
149-
print('Skipping directory "%s" from inclusion in the emscripten '
150-
'virtual file system.' % fullname, file=sys.stderr)
180+
err('Skipping directory "%s" from inclusion in the emscripten '
181+
'virtual file system.' % fullname)
151182
for name in filenames:
152183
fullname = os.path.join(dirpath, name)
153184
if not should_ignore(fullname):
154185
# Convert source filename relative to root directory of target FS.
155186
dstpath = os.path.join(rootpathdst,
156187
os.path.relpath(fullname, rootpathsrc))
157-
new_data_files.append({'srcpath': fullname, 'dstpath': dstpath,
158-
'mode': mode, 'explicit_dst_path': True})
188+
new_data_files.append(DataFile(srcpath=fullname, dstpath=dstpath,
189+
mode=mode, explicit_dst_path=True))
159190
elif DEBUG:
160-
print('Skipping file "%s" from inclusion in the emscripten '
161-
'virtual file system.' % fullname, file=sys.stderr)
191+
err('Skipping file "%s" from inclusion in the emscripten '
192+
'virtual file system.' % fullname)
162193
dirnames.clear()
163194
dirnames.extend(new_dirnames)
164195

165196

166-
class Options:
167-
def __init__(self):
168-
self.export_name = 'Module'
169-
self.has_preloaded = False
170-
self.jsoutput = None
171-
self.from_emcc = False
172-
self.force = True
173-
# If set to True, IndexedDB (IDBFS in library_idbfs.js) is used to locally
174-
# cache VFS XHR so that subsequent page loads can read the data from the
175-
# offline cache instead.
176-
self.use_preload_cache = False
177-
self.indexeddb_name = 'EM_PRELOAD_CACHE'
178-
# If set to True, the package metadata is stored separately from js-output
179-
# file which makes js-output file immutable to the package content changes.
180-
# If set to False, the package metadata is stored inside the js-output file
181-
# which makes js-output file to mutate on each invocation of this packager tool.
182-
self.separate_metadata = False
183-
self.lz4 = False
184-
self.use_preload_plugins = False
185-
self.support_node = True
186-
187-
188-
options = Options()
189-
190-
191197
def main():
198+
if len(sys.argv) == 1:
199+
err('''Usage: file_packager TARGET [--preload A [B..]] [--embed C [D..]] [--exclude E [F..]]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--indexedDB-name=EM_PRELOAD_CACHE] [--separate-metadata] [--lz4] [--use-preload-plugins]
200+
See the source for more details.''')
201+
return 1
202+
203+
data_target = sys.argv[1]
192204
data_files = []
193205
plugins = []
194206
leading = ''
195207

196208
for arg in sys.argv[2:]:
197209
if arg == '--preload':
198-
options.has_preloaded = True
199210
leading = 'preload'
200211
elif arg == '--embed':
201212
leading = 'embed'
@@ -211,7 +222,7 @@ def main():
211222
options.indexeddb_name = arg.split('=', 1)[1] if '=' in arg else None
212223
leading = ''
213224
elif arg == '--no-heap-copy':
214-
print('ignoring legacy flag --no-heap-copy (that is the only mode supported now)')
225+
err('ignoring legacy flag --no-heap-copy (that is the only mode supported now)')
215226
leading = ''
216227
elif arg == '--separate-metadata':
217228
options.separate_metadata = True
@@ -256,100 +267,97 @@ def main():
256267
# Use source path as destination path.
257268
srcpath = dstpath = arg.replace('@@', '@')
258269
if os.path.isfile(srcpath) or os.path.isdir(srcpath):
259-
data_files.append({'srcpath': srcpath, 'dstpath': dstpath, 'mode': mode,
260-
'explicit_dst_path': uses_at_notation})
270+
data_files.append(DataFile(srcpath=srcpath, dstpath=dstpath, mode=mode,
271+
explicit_dst_path=uses_at_notation))
261272
else:
262-
print('error: ' + arg + ' does not exist', file=sys.stderr)
273+
err('error: ' + arg + ' does not exist')
263274
return 1
264275
elif leading == 'exclude':
265276
excluded_patterns.append(arg)
266277
else:
267-
print('Unknown parameter:', arg, file=sys.stderr)
278+
err('Unknown parameter:', arg)
268279
return 1
269280

270-
if (not options.force) and not data_files:
271-
options.has_preloaded = False
272-
if not options.has_preloaded or options.jsoutput is None:
273-
assert not options.separate_metadata, (
274-
'cannot separate-metadata without both --preloaded files '
275-
'and a specified --js-output')
281+
options.has_preloaded = any(f.mode == 'preload' for f in data_files)
282+
283+
if options.separate_metadata:
284+
if not options.has_preloaded or not options.jsoutput:
285+
err('cannot separate-metadata without both --preloaded files '
286+
'and a specified --js-output')
287+
return 1
276288

277289
if not options.from_emcc:
278-
print('Remember to build the main file with -s FORCE_FILESYSTEM=1 '
279-
'so that it includes support for loading this file package',
280-
file=sys.stderr)
290+
err('Remember to build the main file with -s FORCE_FILESYSTEM=1 '
291+
'so that it includes support for loading this file package')
281292

282293
if options.jsoutput and os.path.abspath(options.jsoutput) == os.path.abspath(data_target):
283-
print('error: TARGET should not be the same value of --js-output',
284-
file=sys.stderr)
294+
err('error: TARGET should not be the same value of --js-output')
285295
return 1
286296

287297
for file_ in data_files:
288-
if not should_ignore(file_['srcpath']):
289-
if os.path.isdir(file_['srcpath']):
290-
add(file_['mode'], file_['srcpath'], file_['dstpath'])
298+
if not should_ignore(file_.srcpath):
299+
if os.path.isdir(file_.srcpath):
300+
add(file_.mode, file_.srcpath, file_.dstpath)
291301
else:
292302
new_data_files.append(file_)
293303
data_files = [file_ for file_ in new_data_files
294-
if not os.path.isdir(file_['srcpath'])]
304+
if not os.path.isdir(file_.srcpath)]
295305
if len(data_files) == 0:
296-
print('Nothing to do!', file=sys.stderr)
306+
err('Nothing to do!')
297307
sys.exit(1)
298308

299309
# Absolutize paths, and check that they make sense
300310
# os.getcwd() always returns the hard path with any symbolic links resolved,
301311
# even if we cd'd into a symbolic link.
302312
curr_abspath = os.path.abspath(os.getcwd())
303313

304-
for file_ in data_files:
305-
if not file_['explicit_dst_path']:
314+
if not file_.explicit_dst_path:
315+
for file_ in data_files:
306316
# This file was not defined with src@dst, so we inferred the destination
307317
# from the source. In that case, we require that the destination not be
308318
# under the current location
309-
path = file_['dstpath']
319+
path = file_.dstpath
310320
# Use os.path.realpath to resolve any symbolic links to hard paths,
311321
# to match the structure in curr_abspath.
312322
abspath = os.path.realpath(os.path.abspath(path))
313323
if DEBUG:
314-
print(path, abspath, curr_abspath, file=sys.stderr)
324+
err(path, abspath, curr_abspath)
315325
if not abspath.startswith(curr_abspath):
316-
print('Error: Embedding "%s" which is below the current directory '
317-
'"%s". This is invalid since the current directory becomes the '
318-
'root that the generated code will see' % (path, curr_abspath),
319-
file=sys.stderr)
326+
err('Error: Embedding "%s" which is below the current directory '
327+
'"%s". This is invalid since the current directory becomes the '
328+
'root that the generated code will see' % (path, curr_abspath))
320329
sys.exit(1)
321-
file_['dstpath'] = abspath[len(curr_abspath) + 1:]
330+
file_.dstpath = abspath[len(curr_abspath) + 1:]
322331
if os.path.isabs(path):
323-
print('Warning: Embedding an absolute file/directory name "%s" to the '
324-
'virtual filesystem. The file will be made available in the '
325-
'relative path "%s". You can use the explicit syntax '
326-
'--preload-file srcpath@dstpath to explicitly specify the target '
327-
'location the absolute source path should be directed to.'
328-
% (path, file_['dstpath']), file=sys.stderr)
332+
err('Warning: Embedding an absolute file/directory name "%s" to the '
333+
'virtual filesystem. The file will be made available in the '
334+
'relative path "%s". You can use the explicit syntax '
335+
'--preload-file srcpath@dstpath to explicitly specify the target '
336+
'location the absolute source path should be directed to.'
337+
% (path, file_.dstpath))
329338

330339
for file_ in data_files:
331340
# name in the filesystem, native and emulated
332-
file_['dstpath'] = file_['dstpath'].replace(os.path.sep, '/')
341+
file_.dstpath = file_.dstpath.replace(os.path.sep, '/')
333342
# If user has submitted a directory name as the destination but omitted
334343
# the destination filename, use the filename from source file
335-
if file_['dstpath'].endswith('/'):
336-
file_['dstpath'] = file_['dstpath'] + os.path.basename(file_['srcpath'])
344+
if file_.dstpath.endswith('/'):
345+
file_.dstpath = file_.dstpath + os.path.basename(file_.srcpath)
337346
# make destination path always relative to the root
338-
file_['dstpath'] = posixpath.normpath(os.path.join('/', file_['dstpath']))
347+
file_.dstpath = posixpath.normpath(os.path.join('/', file_.dstpath))
339348
if DEBUG:
340-
print('Packaging file "%s" to VFS in path "%s".'
341-
% (file_['srcpath'], file_['dstpath']), file=sys.stderr)
349+
err('Packaging file "%s" to VFS in path "%s".' % (file_.srcpath, file_.dstpath))
342350

343351
# Remove duplicates (can occur naively, for example preload dir/, preload dir/subdir/)
344-
seen = {}
352+
seen = set()
345353

346354
def was_seen(name):
347-
if seen.get(name):
348-
return True
349-
seen[name] = 1
355+
if name in seen:
356+
return True
357+
seen.add(name)
350358
return False
351359

352-
data_files = [file_ for file_ in data_files if not was_seen(file_['dstpath'])]
360+
data_files = [file_ for file_ in data_files if not was_seen(file_.dstpath)]
353361

354362
if AV_WORKAROUND:
355363
random.shuffle(data_files)
@@ -361,7 +369,7 @@ def was_seen(name):
361369

362370
metadata = {'files': []}
363371

364-
ret = generate_js(data_files, metadata)
372+
ret = generate_js(data_target, data_files, metadata)
365373

366374
if options.force or len(data_files):
367375
if options.jsoutput is None:
@@ -386,7 +394,7 @@ def was_seen(name):
386394
return 0
387395

388396

389-
def generate_js(data_files, metadata):
397+
def generate_js(data_target, data_files, metadata):
390398
# emcc will add this to the output itself, so it is only needed for
391399
# standalone calls
392400
if options.from_emcc:
@@ -415,7 +423,7 @@ def generate_js(data_files, metadata):
415423
# Set up folders
416424
partial_dirs = []
417425
for file_ in data_files:
418-
dirname = os.path.dirname(file_['dstpath'])
426+
dirname = os.path.dirname(file_.dstpath)
419427
dirname = dirname.lstrip('/') # absolute paths start with '/', remove that
420428
if dirname != '':
421429
parts = dirname.split('/')
@@ -432,21 +440,21 @@ def generate_js(data_files, metadata):
432440
start = 0
433441
with open(data_target, 'wb') as data:
434442
for file_ in data_files:
435-
file_['data_start'] = start
436-
with open(file_['srcpath'], 'rb') as f:
443+
file_.data_start = start
444+
with open(file_.srcpath, 'rb') as f:
437445
curr = f.read()
438-
file_['data_end'] = start + len(curr)
446+
file_.data_end = start + len(curr)
439447
if AV_WORKAROUND:
440448
curr += '\x00'
441449
start += len(curr)
442450
data.write(curr)
443451

444452
# TODO: sha256sum on data_target
445453
if start > 256 * 1024 * 1024:
446-
print('warning: file packager is creating an asset bundle of %d MB. '
447-
'this is very large, and browsers might have trouble loading it. '
448-
'see https://hacks.mozilla.org/2015/02/synchronous-execution-and-filesystem-access-in-emscripten/'
449-
% (start / (1024 * 1024)), file=sys.stderr)
454+
err('warning: file packager is creating an asset bundle of %d MB. '
455+
'this is very large, and browsers might have trouble loading it. '
456+
'see https://hacks.mozilla.org/2015/02/synchronous-execution-and-filesystem-access-in-emscripten/'
457+
% (start / (1024 * 1024)))
450458

451459
create_preloaded = '''
452460
Module['FS_createPreloadedFile'](this.name, null, byteArray, true, true, function() {
@@ -497,22 +505,22 @@ def generate_js(data_files, metadata):
497505
}\n''' % (create_preloaded if options.use_preload_plugins else create_data)
498506

499507
for (counter, file_) in enumerate(data_files):
500-
filename = file_['dstpath']
508+
filename = file_.dstpath
501509
dirname = os.path.dirname(filename)
502510
basename = os.path.basename(filename)
503-
if file_['mode'] == 'embed':
511+
if file_.mode == 'embed':
504512
# Embed
505-
data = base64_encode(utils.read_binary(file_['srcpath']))
513+
data = base64_encode(utils.read_binary(file_.srcpath))
506514
code += " var fileData%d = '%s';\n" % (counter, data)
507515
# canOwn this data in the filesystem (i.e. there is no need to create a copy in the FS layer).
508516
code += (" Module['FS_createDataFile']('%s', '%s', decodeBase64(fileData%d), true, true, true);\n"
509517
% (dirname, basename, counter))
510-
elif file_['mode'] == 'preload':
518+
elif file_.mode == 'preload':
511519
# Preload
512520
metadata_el = {
513-
'filename': file_['dstpath'],
514-
'start': file_['data_start'],
515-
'end': file_['data_end'],
521+
'filename': file_.dstpath,
522+
'start': file_.data_start,
523+
'end': file_.data_end,
516524
}
517525
if filename[-4:] in AUDIO_SUFFIXES:
518526
metadata_el['audio'] = 1

0 commit comments

Comments
 (0)