diff --git a/emcc.py b/emcc.py index 08f355c2ca975..1039ee30bb118 100755 --- a/emcc.py +++ b/emcc.py @@ -1135,7 +1135,7 @@ def run(args): logger.debug('stopping after linking to object file') return 0 - phase_calculate_system_libraries(state, linker_arguments, linker_inputs, newargs) + phase_calculate_system_libraries(state, linker_arguments, linker_inputs, newargs, options) phase_link(linker_arguments, wasm_target) @@ -2620,7 +2620,7 @@ def compile_source_file(i, input_file): @ToolchainProfiler.profile_block('calculate system libraries') -def phase_calculate_system_libraries(state, linker_arguments, linker_inputs, newargs): +def phase_calculate_system_libraries(state, linker_arguments, linker_inputs, newargs, options): extra_files_to_link = [] # link in ports and system libraries, if necessary if not settings.SIDE_MODULE: @@ -2630,6 +2630,19 @@ def phase_calculate_system_libraries(state, linker_arguments, linker_inputs, new extra_files_to_link += system_libs.calculate(all_linker_inputs, newargs, forced=state.forced_stdlibs) linker_arguments.extend(extra_files_to_link) + if settings.WASMFS and options.embed_files: + # wasmfs file embedding is done via emitting C code that contains the data + # and code to set them up. we add that as another input, like a system + # library, that we compile on the fly here + temp_files = shared.configuration.get_temp_files() + temp_c = temp_files.get(suffix='.c').name + temp_o = unsuffixed(temp_c) + '.o' + temp_files.note(temp_o) + with open(temp_c, 'w') as f: + f.write(package_files(options)) + shared.check_call([shared.CLANG_CC, temp_c, '-o', temp_o, '-c'] + get_cflags([])) + linker_arguments.append(temp_o) + @ToolchainProfiler.profile_block('link') def phase_link(linker_arguments, wasm_target): @@ -2707,33 +2720,43 @@ def phase_emscript(options, in_wasm, wasm_target, memfile): save_intermediate('original') +def package_files(options, target='default'): + logger.debug('setting up files') + file_args = ['--from-emcc', '--export-name=' + settings.EXPORT_NAME] + if options.preload_files: + file_args.append('--preload') + file_args += options.preload_files + if options.embed_files: + file_args.append('--embed') + file_args += options.embed_files + if options.exclude_files: + file_args.append('--exclude') + file_args += options.exclude_files + if options.use_preload_cache: + file_args.append('--use-preload-cache') + if settings.LZ4: + file_args.append('--lz4') + if options.use_preload_plugins: + file_args.append('--use-preload-plugins') + if not settings.ENVIRONMENT_MAY_BE_NODE: + file_args.append('--no-node') + wasmfs_c = settings.WASMFS and options.embed_files + if wasmfs_c: + file_args += ['--wasmfs-c'] + file_code = shared.check_call([shared.FILE_PACKAGER, shared.replace_suffix(target, '.data')] + file_args, stdout=PIPE).stdout + if wasmfs_c: + return file_code + else: + options.pre_js = js_manipulation.add_files_pre_js(options.pre_js, file_code) + + @ToolchainProfiler.profile_block('source transforms') def phase_source_transforms(options, target): global final_js # Embed and preload files if len(options.preload_files) or len(options.embed_files): - logger.debug('setting up files') - file_args = ['--from-emcc', '--export-name=' + settings.EXPORT_NAME] - if len(options.preload_files): - file_args.append('--preload') - file_args += options.preload_files - if len(options.embed_files): - file_args.append('--embed') - file_args += options.embed_files - if len(options.exclude_files): - file_args.append('--exclude') - file_args += options.exclude_files - if options.use_preload_cache: - file_args.append('--use-preload-cache') - if settings.LZ4: - file_args.append('--lz4') - if options.use_preload_plugins: - file_args.append('--use-preload-plugins') - if not settings.ENVIRONMENT_MAY_BE_NODE: - file_args.append('--no-node') - file_code = shared.check_call([shared.FILE_PACKAGER, shared.replace_suffix(target, '.data')] + file_args, stdout=PIPE).stdout - options.pre_js = js_manipulation.add_files_pre_js(options.pre_js, file_code) + package_files(options, target) # Apply pre and postjs files if final_js and (options.pre_js or options.post_js): diff --git a/system/lib/wasmfs/file.h b/system/lib/wasmfs/file.h index e218e5c544db3..0bd40fc6bf8cd 100644 --- a/system/lib/wasmfs/file.h +++ b/system/lib/wasmfs/file.h @@ -130,6 +130,8 @@ class File : public std::enable_shared_from_this { std::weak_ptr parent; // This specifies which backend a file is associated with. + // TODO: Should this be a shared_ptr? Or do we assume backends are never + // deallocated? backend_t backend; }; diff --git a/system/lib/wasmfs/wasmfs.cpp b/system/lib/wasmfs/wasmfs.cpp index 75ce1bc29180e..fd2c227e399c9 100644 --- a/system/lib/wasmfs/wasmfs.cpp +++ b/system/lib/wasmfs/wasmfs.cpp @@ -53,12 +53,13 @@ std::shared_ptr WasmFS::initRootDirectory() { return rootDirectory; } -// Initialize files specified by the --preload-file option. -// Set up directories and files in wasmFS$preloadedDirs and -// wasmFS$preloadedFiles from JS. This function will be called before any file -// operation to ensure any preloaded files are eagerly available for use. -void WasmFS::preloadFiles() { - // Debug builds only: add check to ensure preloadFiles() is called once. +// If files are embedded in the program, then this symbol is defined. We will +// call it and it will set those files up. +__attribute__((__weak__)) +extern "C" void __wasmfs_load_embedded(); + +void WasmFS::loadInitialFiles() { + // Debug builds only: add check to ensure loadInitialFiles() is called once. #ifndef NDEBUG static std::atomic timesCalled; timesCalled++; @@ -71,6 +72,15 @@ void WasmFS::preloadFiles() { // Ensure that files are preloaded from the main thread. assert(emscripten_is_main_runtime_thread()); + // First, handle embedded files, if there are any. + if (__wasmfs_load_embedded) { + __wasmfs_load_embedded(); + } + + // Handle preloaded files. + // Set up directories and files in wasmFS$preloadedDirs and + // wasmFS$preloadedFiles from JS. This function will be called before any file + // operation to ensure any preloaded files are eagerly available for use. auto numFiles = _wasmfs_get_num_preloaded_files(); auto numDirs = _wasmfs_get_num_preloaded_dirs(); diff --git a/system/lib/wasmfs/wasmfs.h b/system/lib/wasmfs/wasmfs.h index cd9d2cd6bc985..c9e97a9cfa95b 100644 --- a/system/lib/wasmfs/wasmfs.h +++ b/system/lib/wasmfs/wasmfs.h @@ -33,15 +33,15 @@ class WasmFS { // dev/stderr. Refers to the same std streams in the open file table. std::shared_ptr initRootDirectory(); - // Initialize files specified by --preload-file option. - void preloadFiles(); + // Initialize files specified by --preload-file and --embed-file options. + void loadInitialFiles(); public: // Files will be preloaded in this constructor. // This global constructor has init_priority 100. Please see wasmfs.cpp. // The current working directory is initialized to the root directory. WasmFS() : rootDirectory(initRootDirectory()), cwd(rootDirectory) { - preloadFiles(); + loadInitialFiles(); } // This get method returns a locked file table. diff --git a/tests/test_core.py b/tests/test_core.py index cbf925899ad57..9aa985991758e 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -528,6 +528,7 @@ def test_wasm32_unknown_emscripten(self): # No other configuration is supported, so always run this. self.do_runf(test_file('wasm32-unknown-emscripten.c'), '') + @also_with_wasmfs # tests file embedding def test_cube2md5(self): self.emcc_args += ['--embed-file', 'cube2md5.txt'] shutil.copyfile(test_file('cube2md5.txt'), 'cube2md5.txt') @@ -8953,7 +8954,7 @@ def setUp(self): simd2 = make_run('simd2', emcc_args=['-O2', '-msimd128']) bulkmem2 = make_run('bulkmem2', emcc_args=['-O2', '-mbulk-memory']) -wasmfs = make_run('wasmfs', emcc_args=['-s', 'WASMFS']) +wasmfs = make_run('wasmfs', emcc_args=['-s', 'WASMFS', '--profiling']) # SAFE_HEAP/STACK_OVERFLOW_CHECK core2s = make_run('core2s', emcc_args=['-O2'], settings={'SAFE_HEAP': 1}) diff --git a/tools/file_packager.py b/tools/file_packager.py index da56bede9adfa..1a7663d37b74e 100755 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -21,7 +21,7 @@ Usage: - file_packager TARGET [--preload A [B..]] [--embed C [D..]] [--exclude E [F..]]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--indexedDB-name=EM_PRELOAD_CACHE] [--separate-metadata] [--lz4] [--use-preload-plugins] [--no-node] + file_packager TARGET [--preload A [B..]] [--embed C [D..]] [--exclude E [F..]]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--indexedDB-name=EM_PRELOAD_CACHE] [--separate-metadata] [--lz4] [--use-preload-plugins] [--no-node] [--wasmfs-c] --preload , --embed See emcc --help for more details on those options. @@ -51,6 +51,11 @@ --no-node Whether to support Node.js. By default we do, which emits some extra code. + --wasmfs-c Whether to emit C code for wasmfs. This only supports embedding + (as it literally embeds the data in the C). If you prefer + preloading, you can use that normally and wasmfs will interact + with the JS normally. + Notes: * The file packager generates unix-style file paths. So if you are on windows and a file is accessed at @@ -185,6 +190,7 @@ def main(): lz4 = False use_preload_plugins = False support_node = True + wasmfs_c = False for arg in sys.argv[2:]: if arg == '--preload': @@ -218,6 +224,9 @@ def main(): elif arg == '--no-node': support_node = False leading = '' + elif arg == '--wasmfs-c': + wasmfs_c = True + leading = '' elif arg.startswith('--js-output'): jsoutput = arg.split('=', 1)[1] if '=' in arg else None leading = '' @@ -380,6 +389,8 @@ def was_seen(name): metadata = {'files': []} + c_output = '' + # Set up folders partial_dirs = [] for file_ in data_files: @@ -392,6 +403,7 @@ def was_seen(name): if partial not in partial_dirs: code += ('''Module['FS_createPath'](%s, %s, true, true);\n''' % (json.dumps('/' + '/'.join(parts[:i])), json.dumps(parts[i]))) + c_output += f'mkdir("{partial}", 0700);\n' partial_dirs.append(partial) if has_preloaded: @@ -475,13 +487,35 @@ def was_seen(name): basename = os.path.basename(filename) if file_['mode'] == 'embed': # Embed - data = base64_encode(utils.read_binary(file_['srcpath'])) - code += '''var fileData%d = '%s';\n''' % (counter, data) - code += ('''Module['FS_createDataFile']('%s', '%s', decodeBase64(fileData%d), true, true, false);\n''' - % (dirname, basename, counter)) + binary = utils.read_binary(file_['srcpath']) + if not wasmfs_c: + # JS output + data = base64_encode(binary) + code += '''var fileData%d = '%s';\n''' % (counter, data) + code += ('''Module['FS_createDataFile']('%s', '%s', decodeBase64(fileData%d), true, true, false);\n''' + % (dirname, basename, counter)) + else: + # C output. + # convert the binary data into a C escaped string, \xAB for hex code AB + def escape_for_c(char): + if char < 16: + # add a 0 to keep the output in two bytes + return '\\x0' + hex(char)[-1:] + return '\\x' + hex(char)[-2:] + data = ''.join([escape_for_c(char) for char in binary]) + # directories... :( make them + c_output += f'''static const char fileData{counter}[] = "{data}";\n''' + c_output += f''' +FILE* file{counter} = fopen("{dirname}" "/" "{basename}", "wb"); +fwrite(fileData{counter}, 1, {len(binary)}, file{counter}); +fclose(file{counter}); +''' + counter += 1 elif file_['mode'] == 'preload': # Preload + assert not wasmfs_c, 'wasmfs-c mode only supports embedding' + counter += 1 metadata_el = { @@ -920,6 +954,18 @@ def was_seen(name): })(); ''' % _metadata_template + if wasmfs_c: + ret = r''' +#include +#include + +void __wasmfs_load_embedded() { + +%s + +} +''' % c_output + if force or len(data_files): if jsoutput is None: print(ret)