Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
4529332
webgpu : fix build on emscripten
ngxson Sep 5, 2025
990a98a
more debugging stuff
ngxson Sep 6, 2025
5616b9c
test-backend-ops: force single thread on wasm
ngxson Sep 6, 2025
56d02f6
fix single-thread case for init_tensor_uniform
ngxson Sep 6, 2025
1cd87e0
use jspi
ngxson Sep 6, 2025
8549245
add pthread
ngxson Sep 7, 2025
bf9d14c
test: remember to set n_thread for cpu backend
ngxson Sep 8, 2025
b566811
Add buffer label and enable dawn-specific toggles to turn off some ch…
reeselevine Oct 15, 2025
2560412
Intermediate state
reeselevine Oct 17, 2025
833c4a8
Fast working f16/f32 vec4
reeselevine Oct 19, 2025
bd38091
Working float fast mul mat
reeselevine Oct 20, 2025
f808c48
Clean up naming of mul_mat to match logical model, start work on q mu…
reeselevine Oct 22, 2025
a3b2f67
Setup for subgroup matrix mat mul
reeselevine Oct 25, 2025
0bdd9f4
Basic working subgroup matrix
reeselevine Oct 25, 2025
a80e2bb
Working subgroup matrix tiling
reeselevine Oct 26, 2025
e4fd0b5
Handle weirder sg matrix sizes (but still % sg matrix size)
reeselevine Oct 26, 2025
b524249
Working start to gemv
reeselevine Oct 26, 2025
749a791
working f16 accumulation with shared memory staging
reeselevine Oct 26, 2025
abaf12e
Print out available subgroup matrix configurations
reeselevine Oct 26, 2025
54c31c1
Vectorize dst stores for sg matrix shader
reeselevine Oct 26, 2025
0f6e38d
Gemv working scalar
reeselevine Oct 27, 2025
f2e187c
Minor set_rows optimization (#4)
neha-ha Oct 27, 2025
2aa05c6
Merge remote-tracking branch 'upstream/master'
reeselevine Oct 27, 2025
51aae63
Comment on dawn toggles
reeselevine Oct 27, 2025
9edfcc9
Working subgroup matrix code for (semi)generic sizes
reeselevine Oct 27, 2025
f0cfae4
Remove some comments
reeselevine Oct 27, 2025
904dbe3
Merge remote-tracking branch 'origin/master' into mul_mat_opt
reeselevine Oct 27, 2025
cf0c536
Cleanup code
reeselevine Oct 28, 2025
71c7a4a
Update dawn version and move to portable subgroup size
reeselevine Oct 28, 2025
c73893e
Try to fix new dawn release
reeselevine Oct 28, 2025
f538ca3
Update subgroup size comment
reeselevine Oct 28, 2025
f5001d8
Only check for subgroup matrix configs if they are supported
reeselevine Oct 28, 2025
844ba40
Add toggles for subgroup matrix/f16 support on nvidia+vulkan
reeselevine Oct 29, 2025
d426436
Make row/col naming consistent
reeselevine Oct 29, 2025
a46d093
Refactor shared memory loading
reeselevine Oct 29, 2025
eb7150a
Move sg matrix stores to correct file
reeselevine Oct 29, 2025
4ec09e4
Working q4_0
reeselevine Oct 30, 2025
9726640
Formatting
reeselevine Oct 31, 2025
92d5eb8
Merge remote-tracking branch 'ngxson/xsn/emscripten_webgpu' into wasm
reeselevine Nov 2, 2025
b51edae
Work with emscripten builds
reeselevine Nov 3, 2025
fd6d56b
Fix test-backend-ops emscripten for f16/quantized types
reeselevine Nov 4, 2025
427f1f7
Use emscripten memory64 to support get_memory
reeselevine Nov 4, 2025
edb30b5
Merge remote-tracking branch 'origin/master' into wasm
reeselevine Nov 5, 2025
cbc8309
Add build flags and try ci
reeselevine Nov 11, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,46 @@ jobs:
# This is using llvmpipe and runs slower than other backends
ctest -L main --verbose --timeout 3600

ubuntu-24-wasm-webgpu:
runs-on: ubuntu-24.04

steps:
- name: Clone
id: checkout
uses: actions/checkout@v4

- name: ccache
uses: ggml-org/[email protected]
with:
key: ubuntu-latest-wasm-webgpu
evict-old-files: 1d

- name: Install Emscripten
run: |
git clone https://github.com/emscripten-core/emsdk.git
cd emsdk
./emsdk install latest
./emsdk activate latest

- name: Fetch emdawnwebgpu
run: |
DAWN_TAG="v20251027.212519"
EMDAWN_PKG="emdawnwebgpu_pkg-${DAWN_TAG}.zip"
echo "Downloading ${EMDAWN_PKG}"
curl -L -o emdawn.zip \
"https://github.com/google/dawn/releases/download/${DAWN_TAG}/${EMDAWN_PKG}"
unzip emdawn.zip

- name: Build WASM WebGPU
run: |
source emsdk/emsdk_env.sh
emcmake cmake -B build-wasm \
-DGGML_WEBGPU=ON \
-DLLAMA_CURL=OFF \
-DEMDAWNWEBGPU_DIR=emdawnwebgpu_pkg

cmake --build build-wasm --target test-backend-ops -j $(nproc)

ubuntu-22-cmake-hip:
runs-on: ubuntu-22.04
container: rocm/dev-ubuntu-22.04:6.1.2
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,5 @@ poetry.toml
# IDE
*.code-workspace
.windsurf/
# emscripten
a.out.*
12 changes: 11 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,17 @@ option(LLAMA_USE_SYSTEM_GGML "Use system libggml" OFF)
if (EMSCRIPTEN)
set(BUILD_SHARED_LIBS_DEFAULT OFF)

option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON)
# Use 64-bit memory to support backend_get_memory queries
# TODO: analyze performance impact, see https://spidermonkey.dev/blog/2025/01/15/is-memory64-actually-worth-using
add_compile_options("-sMEMORY64=1")
add_link_options("-sMEMORY64=1")
add_link_options("-sALLOW_MEMORY_GROWTH=1")

option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" OFF)
option(LLAMA_BUILD_HTML "llama: build HTML file" ON)
if (LLAMA_BUILD_HTML)
set(CMAKE_EXECUTABLE_SUFFIX ".html")
endif()
else()
if (MINGW)
set(BUILD_SHARED_LIBS_DEFAULT OFF)
Expand Down
4 changes: 4 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "http.h"
#endif

#ifndef __EMSCRIPTEN__
#ifdef __linux__
#include <linux/limits.h>
#elif defined(_WIN32)
Expand All @@ -50,8 +51,11 @@
#else
#include <sys/syslimits.h>
#endif
#endif

#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083


// isatty
#if defined(_WIN32)
#include <io.h>
Expand Down
2 changes: 2 additions & 0 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,8 @@ std::string fs_get_cache_directory() {
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
#elif defined(_WIN32)
cache_directory = std::getenv("LOCALAPPDATA");
#elif defined(__EMSCRIPTEN__)
GGML_ABORT("not implemented on this platform");
#else
# error Unknown architecture
#endif
Expand Down
2 changes: 1 addition & 1 deletion ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ option(GGML_WEBGPU "ggml: use WebGPU"
option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)" OFF)
option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)

option(GGML_WEBGPU_JSPI "ggml: use JSPI for WebGPU" ON)
option(GGML_ZDNN "ggml: use zDNN" OFF)
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
Expand Down
22 changes: 20 additions & 2 deletions ggml/src/ggml-webgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,33 @@ add_dependencies(ggml-webgpu generate_shaders)
if(EMSCRIPTEN)
set(EMDAWNWEBGPU_DIR "" CACHE PATH "Path to emdawnwebgpu_pkg")

target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
target_link_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
if(NOT EMDAWNWEBGPU_DIR)
# default built-in port
target_compile_options(ggml-webgpu PRIVATE "--use-port=emdawnwebgpu")
target_link_options(ggml-webgpu INTERFACE "--use-port=emdawnwebgpu")
else()
# custom port
target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
target_link_options(ggml-webgpu INTERFACE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
endif()

if (GGML_WEBGPU_JSPI)
target_compile_options(ggml-webgpu PRIVATE "-fwasm-exceptions")
target_link_options(ggml-webgpu INTERFACE "-sJSPI" "-fwasm-exceptions")
else()
target_compile_options(ggml-webgpu PRIVATE "-fexceptions")
target_link_options(ggml-webgpu INTERFACE "-sASYNCIFY" "-exceptions")
endif()
else()
find_package(Dawn REQUIRED)
set(DawnWebGPU_TARGET dawn::webgpu_dawn)
endif()

if (GGML_WEBGPU_DEBUG)
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1)
if(EMSCRIPTEN)
target_link_options(ggml-webgpu INTERFACE "-sASSERTIONS=2")
endif()
endif()

if (GGML_WEBGPU_CPU_PROFILE)
Expand Down
Loading
Loading