From e3202a02b8c58c4031e4c461d8e684b73f441781 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Wed, 20 Aug 2025 17:01:42 -0700 Subject: [PATCH] [file-packager] Drop support for --embed with JS output The object file output is more efficient way of embedding and has been recommended via a warning since #16050 (early 2022). With this simplification the file packager now has essentially two output modes: 1. Preloading via JS 2. Embedding via and object file --- ChangeLog.md | 3 + test/test_other.py | 4 +- tools/file_packager.py | 155 ++++++++++++++++++----------------------- 3 files changed, 71 insertions(+), 91 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 4d2b8ef7c989e..4e9a20cd43fca 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -20,6 +20,9 @@ See docs/process.md for more on how version tagging works. 4.0.20 (in development) ----------------------- +- The standalone `file_packager.py` script no longer supports `--embed` with JS + output (use `--obj-output` is now required for embedding data). This usage + has been producing a warning since #16050 which is now an error. (#25049) - Embind now requires C++17 or newer. See #24850. 4.0.19 - 11/04/25 diff --git a/test/test_other.py b/test/test_other.py index a6b5a61f22606..8b7cf3ba59cc7 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -3953,8 +3953,8 @@ def test_file_packager_embed(self): create_file('data.txt', 'hello data') # Without --obj-output we issue a warning - err = self.run_process([FILE_PACKAGER, 'test.data', '--embed', 'data.txt', '--js-output=data.js'], stderr=PIPE).stderr - self.assertContained('--obj-output is recommended when using --embed', err) + err = self.expect_fail([FILE_PACKAGER, 'test.data', '--embed', 'data.txt', '--js-output=data.js']) + self.assertContained('error: --obj-output is required when using --embed', err) self.run_process([FILE_PACKAGER, 'test.data', '--embed', 'data.txt', '--obj-output=data.o']) diff --git a/tools/file_packager.py b/tools/file_packager.py index 5d5611f16dee5..087f8270f8137 100755 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -67,7 +67,6 @@ subdir\file, in JS it will be subdir/file. For simplicity we treat the web platform as a *NIX. """ -import base64 import ctypes import fnmatch import hashlib @@ -138,11 +137,6 @@ def err(*args): print(*args, file=sys.stderr) -def base64_encode(b): - b64 = base64.b64encode(b) - return b64.decode('ascii') - - def has_hidden_attribute(filepath): """Win32 code to test whether the given file has the hidden property set.""" @@ -455,9 +449,10 @@ def main(): # noqa: C901, PLR0912, PLR0915 options.has_embedded = any(f.mode == 'embed' for f in data_files) if options.has_preloaded and options.has_embedded: - diagnostics.warn('support for using --preload and --embed in the same command is scheduled ' - 'for deprecation. If you need this feature please comment at ' - 'https://github.com/emscripten-core/emscripten/issues/24803') + diagnostics.error('--preload and --embed are mutually exclusive (See https://github.com/emscripten-core/emscripten/issues/24803') + + if options.has_embedded and not options.obj_output: + diagnostics.error('--obj-output is required when using --embed. This outputs an object file for linking directly into your application and is more efficient than the old JS encoding') if options.separate_metadata and (not options.has_preloaded or not options.jsoutput): diagnostics.error('cannot separate-metadata without both --preloaded files and a specified --js-output') @@ -543,8 +538,6 @@ def was_seen(name): data_files = sorted(data_files, key=lambda file_: file_.dstpath) data_files = [file_ for file_ in data_files if not was_seen(file_.dstpath)] - metadata = {'files': []} - if options.depfile: targets = [] if options.obj_output: @@ -566,26 +559,26 @@ def was_seen(name): if not options.has_embedded: diagnostics.error('--obj-output is only applicable when embedding files') generate_object_file(data_files) - if not options.has_preloaded: - return 0 + else: + metadata = {'files': []} - ret = generate_js(data_target, data_files, metadata) + ret = generate_preload_js(data_target, data_files, metadata) - if options.force or data_files: - if options.jsoutput is None: - print(ret) - else: - # Overwrite the old jsoutput file (if exists) only when its content - # differs from the current generated one, otherwise leave the file - # untouched preserving its old timestamp - if os.path.isfile(options.jsoutput): - old = utils.read_file(options.jsoutput) - if old != ret: - utils.write_file(options.jsoutput, ret) + if options.force or data_files: + if options.jsoutput is None: + print(ret) else: - utils.write_file(options.jsoutput, ret) - if options.separate_metadata: - utils.write_file(options.jsoutput + '.metadata', json.dumps(metadata, separators=(',', ':'))) + # Overwrite the old jsoutput file (if exists) only when its content + # differs from the current generated one, otherwise leave the file + # untouched preserving its old timestamp + if os.path.isfile(options.jsoutput): + old = utils.read_file(options.jsoutput) + if old != ret: + utils.write_file(options.jsoutput, ret) + else: + utils.write_file(options.jsoutput, ret) + if options.separate_metadata: + utils.write_file(options.jsoutput + '.metadata', json.dumps(metadata, separators=(',', ':'))) return 0 @@ -598,7 +591,7 @@ def escape_for_makefile(fpath): return fpath.replace('$', '$$').replace('#', '\\#').replace(' ', '\\ ') -def generate_js(data_target, data_files, metadata): +def generate_preload_js(data_target, data_files, metadata): # emcc will add this to the output itself, so it is only needed for # standalone calls if options.from_emcc: @@ -646,6 +639,7 @@ def generate_js(data_target, data_files, metadata): # Set up folders partial_dirs = [] for file_ in data_files: + assert file_.mode == 'preload' dirname = os.path.dirname(file_.dstpath) dirname = dirname.lstrip('/') # absolute paths start with '/', remove that if dirname != '': @@ -657,49 +651,45 @@ def generate_js(data_target, data_files, metadata): % (json.dumps('/' + '/'.join(parts[:i])), json.dumps(parts[i]))) partial_dirs.append(partial) - if options.has_preloaded: - # Bundle all datafiles into one archive. Avoids doing lots of simultaneous - # XHRs which has overhead. - start = 0 - with open(data_target, 'wb') as data: - for file_ in data_files: - file_.data_start = start - curr = utils.read_binary(file_.srcpath) - file_.data_end = start + len(curr) - start += len(curr) - data.write(curr) - - if start > 256 * 1024 * 1024: - diagnostics.warn('file packager is creating an asset bundle of %d MB. ' - 'this is very large, and browsers might have trouble loading it. ' - 'see https://hacks.mozilla.org/2015/02/synchronous-execution-and-filesystem-access-in-emscripten/' - % (start / (1024 * 1024))) - - create_preloaded = ''' - try { - // canOwn this data in the filesystem, it is a slice into the heap that will never change - await Module['FS_preloadFile'](name, null, data, true, true, false, true); - Module['removeRunDependency'](`fp ${name}`); - } catch (e) { - err(`Preloading file ${name} failed`, e); - }\n''' - create_data = '''// canOwn this data in the filesystem, it is a slice into the heap that will never change - Module['FS_createDataFile'](name, null, data, true, true, true); - Module['removeRunDependency'](`fp ${name}`);''' - - finish_handler = create_preloaded if options.use_preload_plugins else create_data + # Bundle all datafiles into one archive. Avoids doing lots of simultaneous + # XHRs which has overhead. + start = 0 + with open(data_target, 'wb') as data: + for file_ in data_files: + file_.data_start = start + curr = utils.read_binary(file_.srcpath) + file_.data_end = start + len(curr) + start += len(curr) + data.write(curr) + + if start > 256 * 1024 * 1024: + diagnostics.warn('file packager is creating an asset bundle of %d MB. ' + 'this is very large, and browsers might have trouble loading it. ' + 'see https://hacks.mozilla.org/2015/02/synchronous-execution-and-filesystem-access-in-emscripten/' + % (start / (1024 * 1024))) + + create_preloaded = ''' + try { + // canOwn this data in the filesystem, it is a slice into the heap that will never change + await Module['FS_preloadFile'](name, null, data, true, true, false, true); + Module['removeRunDependency'](`fp ${name}`); + } catch (e) { + err(`Preloading file ${name} failed`, e); + }\n''' + create_data = '''// canOwn this data in the filesystem, it is a slice into the heap that will never change + Module['FS_createDataFile'](name, null, data, true, true, true); + Module['removeRunDependency'](`fp ${name}`);''' - if not options.lz4: - # Data requests - for getting a block of data out of the big archive - have - # a similar API to XHRs - code += ''' - for (var file of metadata['files']) { - var name = file['filename'] - Module['addRunDependency'](`fp ${name}`); - }\n''' + finish_handler = create_preloaded if options.use_preload_plugins else create_data - if options.has_embedded and not options.obj_output: - diagnostics.warn('--obj-output is recommended when using --embed. This outputs an object file for linking directly into your application is more efficient than JS encoding') + if not options.lz4: + # Data requests - for getting a block of data out of the big archive - have + # a similar API to XHRs + code += ''' + for (var file of metadata['files']) { + var name = file['filename'] + Module['addRunDependency'](`fp ${name}`); + }\n''' catch_handler = '' if options.export_es6: @@ -708,27 +698,14 @@ def generate_js(data_target, data_files, metadata): loadDataReject(error); })''' - for counter, file_ in enumerate(data_files): + for file_ in data_files: filename = file_.dstpath dirname = os.path.dirname(filename) - basename = os.path.basename(filename) - if file_.mode == 'embed': - if not options.obj_output: - # Embed (only needed when not generating object file output) - data = base64_encode(utils.read_binary(file_.srcpath)) - code += " var fileData%d = '%s';\n" % (counter, data) - # canOwn this data in the filesystem (i.e. there is no need to create a copy in the FS layer). - code += (" Module['FS_createDataFile']('%s', '%s', atob(fileData%d), true, true, true);\n" - % (dirname, basename, counter)) - elif file_.mode == 'preload': - # Preload - metadata['files'].append({ - 'filename': file_.dstpath, - 'start': file_.data_start, - 'end': file_.data_end, - }) - else: - assert 0 + metadata['files'].append({ + 'filename': file_.dstpath, + 'start': file_.data_start, + 'end': file_.data_end, + }) if options.has_preloaded: if not options.lz4: