From e3202a02b8c58c4031e4c461d8e684b73f441781 Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Wed, 20 Aug 2025 17:01:42 -0700
Subject: [PATCH] [file-packager] Drop support for --embed with JS output

The object file output is more efficient way of embedding and has been
recommended via a warning since #16050 (early 2022).

With this simplification the file packager now has essentially two
output modes:

1. Preloading via JS
2. Embedding via and object file
---
 ChangeLog.md           |   3 +
 test/test_other.py     |   4 +-
 tools/file_packager.py | 155 ++++++++++++++++++-----------------------
 3 files changed, 71 insertions(+), 91 deletions(-)

diff --git a/ChangeLog.md b/ChangeLog.md
index 4d2b8ef7c989e..4e9a20cd43fca 100644
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -20,6 +20,9 @@ See docs/process.md for more on how version tagging works.
 
 4.0.20 (in development)
 -----------------------
+- The standalone `file_packager.py` script no longer supports `--embed` with JS
+  output (use `--obj-output` is now required for embedding data).  This usage
+  has been producing a warning since #16050 which is now an error.  (#25049)
 - Embind now requires C++17 or newer. See #24850.
 
 4.0.19 - 11/04/25
diff --git a/test/test_other.py b/test/test_other.py
index a6b5a61f22606..8b7cf3ba59cc7 100644
--- a/test/test_other.py
+++ b/test/test_other.py
@@ -3953,8 +3953,8 @@ def test_file_packager_embed(self):
     create_file('data.txt', 'hello data')
 
     # Without --obj-output we issue a warning
-    err = self.run_process([FILE_PACKAGER, 'test.data', '--embed', 'data.txt', '--js-output=data.js'], stderr=PIPE).stderr
-    self.assertContained('--obj-output is recommended when using --embed', err)
+    err = self.expect_fail([FILE_PACKAGER, 'test.data', '--embed', 'data.txt', '--js-output=data.js'])
+    self.assertContained('error: --obj-output is required when using --embed', err)
 
     self.run_process([FILE_PACKAGER, 'test.data', '--embed', 'data.txt', '--obj-output=data.o'])
 
diff --git a/tools/file_packager.py b/tools/file_packager.py
index 5d5611f16dee5..087f8270f8137 100755
--- a/tools/file_packager.py
+++ b/tools/file_packager.py
@@ -67,7 +67,6 @@
     subdir\file, in JS it will be subdir/file. For simplicity we treat the web platform as a *NIX.
 """
 
-import base64
 import ctypes
 import fnmatch
 import hashlib
@@ -138,11 +137,6 @@ def err(*args):
   print(*args, file=sys.stderr)
 
 
-def base64_encode(b):
-  b64 = base64.b64encode(b)
-  return b64.decode('ascii')
-
-
 def has_hidden_attribute(filepath):
   """Win32 code to test whether the given file has the hidden property set."""
 
@@ -455,9 +449,10 @@ def main():  # noqa: C901, PLR0912, PLR0915
   options.has_embedded = any(f.mode == 'embed' for f in data_files)
 
   if options.has_preloaded and options.has_embedded:
-    diagnostics.warn('support for using --preload and --embed in the same command is scheduled '
-        'for deprecation.  If you need this feature please comment at '
-        'https://github.com/emscripten-core/emscripten/issues/24803')
+    diagnostics.error('--preload and --embed are mutually exclusive (See https://github.com/emscripten-core/emscripten/issues/24803')
+
+  if options.has_embedded and not options.obj_output:
+    diagnostics.error('--obj-output is required when using --embed.  This outputs an object file for linking directly into your application and is more efficient than the old JS encoding')
 
   if options.separate_metadata and (not options.has_preloaded or not options.jsoutput):
     diagnostics.error('cannot separate-metadata without both --preloaded files and a specified --js-output')
@@ -543,8 +538,6 @@ def was_seen(name):
   data_files = sorted(data_files, key=lambda file_: file_.dstpath)
   data_files = [file_ for file_ in data_files if not was_seen(file_.dstpath)]
 
-  metadata = {'files': []}
-
   if options.depfile:
     targets = []
     if options.obj_output:
@@ -566,26 +559,26 @@ def was_seen(name):
     if not options.has_embedded:
       diagnostics.error('--obj-output is only applicable when embedding files')
     generate_object_file(data_files)
-    if not options.has_preloaded:
-      return 0
+  else:
+    metadata = {'files': []}
 
-  ret = generate_js(data_target, data_files, metadata)
+    ret = generate_preload_js(data_target, data_files, metadata)
 
-  if options.force or data_files:
-    if options.jsoutput is None:
-      print(ret)
-    else:
-      # Overwrite the old jsoutput file (if exists) only when its content
-      # differs from the current generated one, otherwise leave the file
-      # untouched preserving its old timestamp
-      if os.path.isfile(options.jsoutput):
-        old = utils.read_file(options.jsoutput)
-        if old != ret:
-          utils.write_file(options.jsoutput, ret)
+    if options.force or data_files:
+      if options.jsoutput is None:
+        print(ret)
       else:
-        utils.write_file(options.jsoutput, ret)
-      if options.separate_metadata:
-        utils.write_file(options.jsoutput + '.metadata', json.dumps(metadata, separators=(',', ':')))
+        # Overwrite the old jsoutput file (if exists) only when its content
+        # differs from the current generated one, otherwise leave the file
+        # untouched preserving its old timestamp
+        if os.path.isfile(options.jsoutput):
+          old = utils.read_file(options.jsoutput)
+          if old != ret:
+            utils.write_file(options.jsoutput, ret)
+        else:
+          utils.write_file(options.jsoutput, ret)
+        if options.separate_metadata:
+          utils.write_file(options.jsoutput + '.metadata', json.dumps(metadata, separators=(',', ':')))
 
   return 0
 
@@ -598,7 +591,7 @@ def escape_for_makefile(fpath):
   return fpath.replace('$', '$$').replace('#', '\\#').replace(' ', '\\ ')
 
 
-def generate_js(data_target, data_files, metadata):
+def generate_preload_js(data_target, data_files, metadata):
   # emcc will add this to the output itself, so it is only needed for
   # standalone calls
   if options.from_emcc:
@@ -646,6 +639,7 @@ def generate_js(data_target, data_files, metadata):
   # Set up folders
   partial_dirs = []
   for file_ in data_files:
+    assert file_.mode == 'preload'
     dirname = os.path.dirname(file_.dstpath)
     dirname = dirname.lstrip('/') # absolute paths start with '/', remove that
     if dirname != '':
@@ -657,49 +651,45 @@ def generate_js(data_target, data_files, metadata):
                    % (json.dumps('/' + '/'.join(parts[:i])), json.dumps(parts[i])))
           partial_dirs.append(partial)
 
-  if options.has_preloaded:
-    # Bundle all datafiles into one archive. Avoids doing lots of simultaneous
-    # XHRs which has overhead.
-    start = 0
-    with open(data_target, 'wb') as data:
-      for file_ in data_files:
-        file_.data_start = start
-        curr = utils.read_binary(file_.srcpath)
-        file_.data_end = start + len(curr)
-        start += len(curr)
-        data.write(curr)
-
-    if start > 256 * 1024 * 1024:
-      diagnostics.warn('file packager is creating an asset bundle of %d MB. '
-          'this is very large, and browsers might have trouble loading it. '
-          'see https://hacks.mozilla.org/2015/02/synchronous-execution-and-filesystem-access-in-emscripten/'
-          % (start / (1024 * 1024)))
-
-    create_preloaded = '''
-          try {
-            // canOwn this data in the filesystem, it is a slice into the heap that will never change
-            await Module['FS_preloadFile'](name, null, data, true, true, false, true);
-            Module['removeRunDependency'](`fp ${name}`);
-          } catch (e) {
-            err(`Preloading file ${name} failed`, e);
-          }\n'''
-    create_data = '''// canOwn this data in the filesystem, it is a slice into the heap that will never change
-          Module['FS_createDataFile'](name, null, data, true, true, true);
-          Module['removeRunDependency'](`fp ${name}`);'''
-
-    finish_handler = create_preloaded if options.use_preload_plugins else create_data
+  # Bundle all datafiles into one archive. Avoids doing lots of simultaneous
+  # XHRs which has overhead.
+  start = 0
+  with open(data_target, 'wb') as data:
+    for file_ in data_files:
+      file_.data_start = start
+      curr = utils.read_binary(file_.srcpath)
+      file_.data_end = start + len(curr)
+      start += len(curr)
+      data.write(curr)
+
+  if start > 256 * 1024 * 1024:
+    diagnostics.warn('file packager is creating an asset bundle of %d MB. '
+        'this is very large, and browsers might have trouble loading it. '
+        'see https://hacks.mozilla.org/2015/02/synchronous-execution-and-filesystem-access-in-emscripten/'
+        % (start / (1024 * 1024)))
+
+  create_preloaded = '''
+        try {
+          // canOwn this data in the filesystem, it is a slice into the heap that will never change
+          await Module['FS_preloadFile'](name, null, data, true, true, false, true);
+          Module['removeRunDependency'](`fp ${name}`);
+        } catch (e) {
+          err(`Preloading file ${name} failed`, e);
+        }\n'''
+  create_data = '''// canOwn this data in the filesystem, it is a slice into the heap that will never change
+        Module['FS_createDataFile'](name, null, data, true, true, true);
+        Module['removeRunDependency'](`fp ${name}`);'''
 
-    if not options.lz4:
-      # Data requests - for getting a block of data out of the big archive - have
-      # a similar API to XHRs
-      code += '''
-      for (var file of metadata['files']) {
-        var name = file['filename']
-        Module['addRunDependency'](`fp ${name}`);
-      }\n'''
+  finish_handler = create_preloaded if options.use_preload_plugins else create_data
 
-  if options.has_embedded and not options.obj_output:
-    diagnostics.warn('--obj-output is recommended when using --embed.  This outputs an object file for linking directly into your application is more efficient than JS encoding')
+  if not options.lz4:
+    # Data requests - for getting a block of data out of the big archive - have
+    # a similar API to XHRs
+    code += '''
+    for (var file of metadata['files']) {
+      var name = file['filename']
+      Module['addRunDependency'](`fp ${name}`);
+    }\n'''
 
   catch_handler = ''
   if options.export_es6:
@@ -708,27 +698,14 @@ def generate_js(data_target, data_files, metadata):
           loadDataReject(error);
         })'''
 
-  for counter, file_ in enumerate(data_files):
+  for file_ in data_files:
     filename = file_.dstpath
     dirname = os.path.dirname(filename)
-    basename = os.path.basename(filename)
-    if file_.mode == 'embed':
-      if not options.obj_output:
-        # Embed (only needed when not generating object file output)
-        data = base64_encode(utils.read_binary(file_.srcpath))
-        code += "      var fileData%d = '%s';\n" % (counter, data)
-        # canOwn this data in the filesystem (i.e. there is no need to create a copy in the FS layer).
-        code += ("      Module['FS_createDataFile']('%s', '%s', atob(fileData%d), true, true, true);\n"
-                 % (dirname, basename, counter))
-    elif file_.mode == 'preload':
-      # Preload
-      metadata['files'].append({
-        'filename': file_.dstpath,
-        'start': file_.data_start,
-        'end': file_.data_end,
-      })
-    else:
-      assert 0
+    metadata['files'].append({
+      'filename': file_.dstpath,
+      'start': file_.data_start,
+      'end': file_.data_end,
+    })
 
   if options.has_preloaded:
     if not options.lz4: