From 4529332e2296298cbe400fdd555d276f86cec0dc Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Sat, 6 Sep 2025 06:20:52 +0700 Subject: [PATCH 1/7] webgpu : fix build on emscripten --- .gitignore | 3 +++ common/common.cpp | 2 ++ ggml/src/ggml-webgpu/CMakeLists.txt | 13 +++++++++++-- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 12 ++++++++++-- 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 595831accb05d..ed034e40e2795 100644 --- a/.gitignore +++ b/.gitignore @@ -148,3 +148,6 @@ poetry.toml /run-vim.sh /run-chat.sh .ccache/ + +# emscripten +a.out.* diff --git a/common/common.cpp b/common/common.cpp index 0c92d4d57ddbf..a95cca8a2b174 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -871,6 +871,8 @@ std::string fs_get_cache_directory() { cache_directory = std::getenv("HOME") + std::string("/Library/Caches/"); #elif defined(_WIN32) cache_directory = std::getenv("LOCALAPPDATA"); +#elif defined(__EMSCRIPTEN__) + GGML_ABORT("not implemented on this platform"); #else # error Unknown architecture #endif diff --git a/ggml/src/ggml-webgpu/CMakeLists.txt b/ggml/src/ggml-webgpu/CMakeLists.txt index 78a985a4d167a..dce7e14ca83c9 100644 --- a/ggml/src/ggml-webgpu/CMakeLists.txt +++ b/ggml/src/ggml-webgpu/CMakeLists.txt @@ -39,8 +39,17 @@ add_dependencies(ggml-webgpu generate_shaders) if(EMSCRIPTEN) set(EMDAWNWEBGPU_DIR "" CACHE PATH "Path to emdawnwebgpu_pkg") - target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py") - target_link_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py") + if(NOT EMDAWNWEBGPU_DIR) + # default built-in port + target_compile_options(ggml-webgpu PRIVATE "--use-port=emdawnwebgpu") + target_link_options(ggml-webgpu INTERFACE "--use-port=emdawnwebgpu") + else() + # custom port + target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py") + target_link_options(ggml-webgpu INTERFACE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py") + endif() + + set(DawnWebGPU_TARGET webgpu_cpp) else() find_package(Dawn REQUIRED) set(DawnWebGPU_TARGET dawn::webgpu_dawn) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index e5df883c1367e..acd43a8a94b97 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -9,6 +9,10 @@ #include "ggml-impl.h" #include "ggml-wgsl-shaders.hpp" +#ifdef __EMSCRIPTEN__ +#include +#endif + #include #include @@ -1173,8 +1177,12 @@ static ggml_backend_dev_t ggml_backend_webgpu_reg_get_device(ggml_backend_reg_t ctx->adapter.GetInfo(&info); // Initialize device - std::vector required_features = { wgpu::FeatureName::ShaderF16, - wgpu::FeatureName::ImplicitDeviceSynchronization }; + std::vector required_features = { + wgpu::FeatureName::ShaderF16, +#ifndef __EMSCRIPTEN__ + wgpu::FeatureName::ImplicitDeviceSynchronization, +#endif + }; wgpu::DeviceDescriptor dev_desc; dev_desc.requiredLimits = &ctx->limits; dev_desc.requiredFeatures = required_features.data(); From 990a98ae642cec417623081dd273b1ff5b1aea02 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Sat, 6 Sep 2025 08:36:16 +0700 Subject: [PATCH 2/7] more debugging stuff --- CMakeLists.txt | 6 +++++- ggml/src/ggml-webgpu/CMakeLists.txt | 8 ++++++-- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 11 +++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 36a2078e4c9fa..9e0a042edb2fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,7 +36,11 @@ option(LLAMA_USE_SYSTEM_GGML "Use system libggml" OFF) if (EMSCRIPTEN) set(BUILD_SHARED_LIBS_DEFAULT OFF) - option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON) + option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" OFF) + option(LLAMA_BUILD_HTML "llama: build HTML file" ON) + if (LLAMA_BUILD_HTML) + set(CMAKE_EXECUTABLE_SUFFIX ".html") + endif() else() if (MINGW) set(BUILD_SHARED_LIBS_DEFAULT OFF) diff --git a/ggml/src/ggml-webgpu/CMakeLists.txt b/ggml/src/ggml-webgpu/CMakeLists.txt index dce7e14ca83c9..73eb66fb874c3 100644 --- a/ggml/src/ggml-webgpu/CMakeLists.txt +++ b/ggml/src/ggml-webgpu/CMakeLists.txt @@ -42,11 +42,11 @@ if(EMSCRIPTEN) if(NOT EMDAWNWEBGPU_DIR) # default built-in port target_compile_options(ggml-webgpu PRIVATE "--use-port=emdawnwebgpu") - target_link_options(ggml-webgpu INTERFACE "--use-port=emdawnwebgpu") + target_link_options(ggml-webgpu INTERFACE "--use-port=emdawnwebgpu" "-sASYNCIFY=1") else() # custom port target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py") - target_link_options(ggml-webgpu INTERFACE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py") + target_link_options(ggml-webgpu INTERFACE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py" "-sASYNCIFY=1") endif() set(DawnWebGPU_TARGET webgpu_cpp) @@ -57,6 +57,10 @@ endif() if (GGML_WEBGPU_DEBUG) target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1) + if(EMSCRIPTEN) + target_compile_options(ggml-webgpu PRIVATE "-fexceptions") + target_link_options(ggml-webgpu INTERFACE "-sASSERTIONS=2" "-fexceptions") + endif() endif() target_include_directories(ggml-webgpu PRIVATE ${SHADER_OUTPUT_DIR}) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index acd43a8a94b97..53cd8df50b836 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -1295,6 +1295,17 @@ ggml_backend_reg_t ggml_backend_webgpu_reg() { instance_descriptor.requiredFeatures = instance_features.data(); instance_descriptor.requiredFeatureCount = instance_features.size(); webgpu_ctx->instance = wgpu::CreateInstance(&instance_descriptor); + +#ifdef __EMSCRIPTEN__ +#ifndef __EMSCRIPTEN_PTHREADS__ + GGML_LOG_WARN("ggml_webgpu: pthread is disabled. This may cause bugs\n"); +#endif + + if (webgpu_ctx->instance == nullptr) { + GGML_LOG_ERROR("ggml_webgpu: Failed to create WebGPU instance. Make sure -sASYNCIFY is set\n"); + return nullptr; + } +#endif GGML_ASSERT(webgpu_ctx->instance != nullptr); static ggml_backend_reg reg = { From 5616b9c246d6d50c1419b6a14703d62818adf579 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Sat, 6 Sep 2025 19:26:22 +0700 Subject: [PATCH 3/7] test-backend-ops: force single thread on wasm --- tests/test-backend-ops.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index d638a96ee9be8..af8add41b76ba 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -40,12 +40,18 @@ #include #include +#ifdef __EMSCRIPTEN__ +# define N_THREADS 1 +#else +# define N_THREADS std::thread::hardware_concurrency() +#endif + static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) { size_t nels = ggml_nelements(tensor); std::vector data(nels); { // parallel initialization - static const size_t n_threads = std::thread::hardware_concurrency(); + static const size_t n_threads = N_THREADS; // static RNG initialization (revisit if n_threads stops being constant) static std::vector generators = []() { std::random_device rd; @@ -104,7 +110,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m }; const size_t min_blocks_per_thread = 1; - const size_t n_threads = std::min(std::thread::hardware_concurrency()/2, + const size_t n_threads = std::min(N_THREADS/2, std::max(1, n_blocks / min_blocks_per_thread)); std::vector> tasks; tasks.reserve(n_threads); @@ -6934,7 +6940,7 @@ int main(int argc, char ** argv) { auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads"); if (ggml_backend_set_n_threads_fn) { // TODO: better value for n_threads - ggml_backend_set_n_threads_fn(backend, std::thread::hardware_concurrency()); + ggml_backend_set_n_threads_fn(backend, N_THREADS); } size_t free, total; // NOLINT From 56d02f6f424a75c7fc085effe4c8bc078680058e Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Sat, 6 Sep 2025 19:34:27 +0700 Subject: [PATCH 4/7] fix single-thread case for init_tensor_uniform --- tests/test-backend-ops.cpp | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index af8add41b76ba..86f8087c0fd25 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -70,15 +70,19 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m } }; - std::vector> tasks; - tasks.reserve(n_threads); - for (size_t i = 0; i < n_threads; i++) { - size_t start = i*nels/n_threads; - size_t end = (i+1)*nels/n_threads; - tasks.push_back(std::async(std::launch::async, init_thread, i, start, end)); - } - for (auto & t : tasks) { - t.get(); + if (n_threads == 1) { + init_thread(0, 0, nels); + } else { + std::vector> tasks; + tasks.reserve(n_threads); + for (size_t i = 0; i < n_threads; i++) { + size_t start = i*nels/n_threads; + size_t end = (i+1)*nels/n_threads; + tasks.push_back(std::async(std::launch::async, init_thread, i, start, end)); + } + for (auto & t : tasks) { + t.get(); + } } } From 1cd87e07df867dd25a90d52761bce7732f97c864 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Sun, 7 Sep 2025 00:04:32 +0700 Subject: [PATCH 5/7] use jspi --- ggml/CMakeLists.txt | 1 + ggml/src/ggml-webgpu/CMakeLists.txt | 15 +++++++++++---- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 6 +----- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 9ef88c6fd0a85..a67e3421d0554 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -189,6 +189,7 @@ option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF) option(GGML_WEBGPU "ggml: use WebGPU" OFF) option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF) +option(GGML_WEBGPU_JSPI "ggml: use JSPI for WebGPU" ON) option(GGML_ZDNN "ggml: use zDNN" OFF) option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT}) option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF) diff --git a/ggml/src/ggml-webgpu/CMakeLists.txt b/ggml/src/ggml-webgpu/CMakeLists.txt index 73eb66fb874c3..7ab450564c3b3 100644 --- a/ggml/src/ggml-webgpu/CMakeLists.txt +++ b/ggml/src/ggml-webgpu/CMakeLists.txt @@ -42,11 +42,19 @@ if(EMSCRIPTEN) if(NOT EMDAWNWEBGPU_DIR) # default built-in port target_compile_options(ggml-webgpu PRIVATE "--use-port=emdawnwebgpu") - target_link_options(ggml-webgpu INTERFACE "--use-port=emdawnwebgpu" "-sASYNCIFY=1") + target_link_options(ggml-webgpu INTERFACE "--use-port=emdawnwebgpu") else() # custom port target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py") - target_link_options(ggml-webgpu INTERFACE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py" "-sASYNCIFY=1") + target_link_options(ggml-webgpu INTERFACE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py") + endif() + + if (GGML_WEBGPU_JSPI) + target_compile_options(ggml-webgpu PRIVATE "-fwasm-exceptions") + target_link_options(ggml-webgpu INTERFACE "-sJSPI" "-fwasm-exceptions") + else() + target_compile_options(ggml-webgpu PRIVATE "-fexceptions") + target_link_options(ggml-webgpu INTERFACE "-sASYNCIFY" "-exceptions") endif() set(DawnWebGPU_TARGET webgpu_cpp) @@ -58,8 +66,7 @@ endif() if (GGML_WEBGPU_DEBUG) target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1) if(EMSCRIPTEN) - target_compile_options(ggml-webgpu PRIVATE "-fexceptions") - target_link_options(ggml-webgpu INTERFACE "-sASSERTIONS=2" "-fexceptions") + target_link_options(ggml-webgpu INTERFACE "-sASSERTIONS=2") endif() endif() diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index 53cd8df50b836..35503093b4bb1 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -1297,12 +1297,8 @@ ggml_backend_reg_t ggml_backend_webgpu_reg() { webgpu_ctx->instance = wgpu::CreateInstance(&instance_descriptor); #ifdef __EMSCRIPTEN__ -#ifndef __EMSCRIPTEN_PTHREADS__ - GGML_LOG_WARN("ggml_webgpu: pthread is disabled. This may cause bugs\n"); -#endif - if (webgpu_ctx->instance == nullptr) { - GGML_LOG_ERROR("ggml_webgpu: Failed to create WebGPU instance. Make sure -sASYNCIFY is set\n"); + GGML_LOG_ERROR("ggml_webgpu: Failed to create WebGPU instance. Make sure either -sASYNCIFY or -sJSPI is set\n"); return nullptr; } #endif From 8549245c71eef0c1b269f0a56c88e8e9d52d454f Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 8 Sep 2025 04:17:01 +0700 Subject: [PATCH 6/7] add pthread --- CMakeLists.txt | 5 ++ scripts/serve-static.js | 110 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+) create mode 100644 scripts/serve-static.js diff --git a/CMakeLists.txt b/CMakeLists.txt index 9e0a042edb2fc..8099cc3be03c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -161,6 +161,11 @@ endif() # 3rd-party # +if (EMSCRIPTEN) + add_compile_options(-pthread) + link_libraries (-pthread) +endif() + if (LLAMA_USE_SYSTEM_GGML) message(STATUS "Using system-provided libggml, skipping ggml build") find_package(ggml REQUIRED) diff --git a/scripts/serve-static.js b/scripts/serve-static.js new file mode 100644 index 0000000000000..df6cf534055f1 --- /dev/null +++ b/scripts/serve-static.js @@ -0,0 +1,110 @@ +const http = require('http'); +const fs = require('fs').promises; +const path = require('path'); + +// This file is used for testing wasm build from emscripten +// Example build command: +// emcmake cmake -B build-wasm -DGGML_WEBGPU=ON -DLLAMA_CURL=OFF +// cmake --build build-wasm --target test-backend-ops -j + +const PORT = 8080; +const STATIC_DIR = path.join(__dirname, '../build-wasm/bin'); +console.log(`Serving static files from: ${STATIC_DIR}`); + +const mimeTypes = { + '.html': 'text/html', + '.js': 'text/javascript', + '.css': 'text/css', + '.png': 'image/png', + '.jpg': 'image/jpeg', + '.gif': 'image/gif', + '.svg': 'image/svg+xml', + '.json': 'application/json', + '.woff': 'font/woff', + '.woff2': 'font/woff2', +}; + +async function generateDirListing(dirPath, reqUrl) { + const files = await fs.readdir(dirPath); + let html = ` + + + + Directory Listing + + + +

Directory: ${reqUrl}

+
    + `; + + if (reqUrl !== '/') { + html += `
  • ../ (Parent Directory)
  • `; + } + + for (const file of files) { + const filePath = path.join(dirPath, file); + const stats = await fs.stat(filePath); + const link = encodeURIComponent(file) + (stats.isDirectory() ? '/' : ''); + html += `
  • ${file}${stats.isDirectory() ? '/' : ''}
  • `; + } + + html += ` +
+ + + `; + return html; +} + +const server = http.createServer(async (req, res) => { + try { + // Set COOP and COEP headers + res.setHeader('Cross-Origin-Opener-Policy', 'same-origin'); + res.setHeader('Cross-Origin-Embedder-Policy', 'require-corp'); + res.setHeader('Cache-Control', 'no-store, no-cache, must-revalidate, proxy-revalidate'); + res.setHeader('Pragma', 'no-cache'); + res.setHeader('Expires', '0'); + + const filePath = path.join(STATIC_DIR, decodeURIComponent(req.url)); + const stats = await fs.stat(filePath); + + if (stats.isDirectory()) { + const indexPath = path.join(filePath, 'index.html'); + try { + const indexData = await fs.readFile(indexPath); + res.writeHeader(200, { 'Content-Type': 'text/html' }); + res.end(indexData); + } catch { + // No index.html, generate directory listing + const dirListing = await generateDirListing(filePath, req.url); + res.writeHeader(200, { 'Content-Type': 'text/html' }); + res.end(dirListing); + } + } else { + const ext = path.extname(filePath).toLowerCase(); + const contentType = mimeTypes[ext] || 'application/octet-stream'; + const data = await fs.readFile(filePath); + res.writeHeader(200, { 'Content-Type': contentType }); + res.end(data); + } + } catch (err) { + if (err.code === 'ENOENT') { + res.writeHeader(404, { 'Content-Type': 'text/plain' }); + res.end('404 Not Found'); + } else { + res.writeHeader(500, { 'Content-Type': 'text/plain' }); + res.end('500 Internal Server Error'); + } + } +}); + +server.listen(PORT, () => { + console.log(`Server running at http://localhost:${PORT}/`); +}); From bf9d14cd01b1ac166308b77d2f710b374acce932 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 8 Sep 2025 17:12:22 +0700 Subject: [PATCH 7/7] test: remember to set n_thread for cpu backend --- CMakeLists.txt | 5 ----- tests/test-backend-ops.cpp | 4 ++++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8099cc3be03c8..9e0a042edb2fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -161,11 +161,6 @@ endif() # 3rd-party # -if (EMSCRIPTEN) - add_compile_options(-pthread) - link_libraries (-pthread) -endif() - if (LLAMA_USE_SYSTEM_GGML) message(STATUS "Using system-provided libggml, skipping ggml build") find_package(ggml REQUIRED) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 86f8087c0fd25..676af05d6dfb4 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -6704,6 +6705,9 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op return false; } + // TODO: find a better way to set the number of threads for the CPU backend + ggml_backend_cpu_set_n_threads(backend_cpu, N_THREADS); + size_t n_ok = 0; for (auto & test : test_cases) { if (test->eval(backend, backend_cpu, op_names_filter, output_printer)) {