Skip to content

Commit 6f3cfe3

Browse files
authored
Add implementation of emscripten_memcpy_big based on bulk memory. (#19128)
These new functions live in `libbulkmemory` which only gets included if bulk memory is enabled (either via `-mbulk-memory` directly or indirectly via `-pthread). benchmark results for benchmark.test_memcpy_1mb: ``` v8: mean: 1.666 v8-bulkmemory: mean: 1.598 v8-standalone-bulkmemory: mean: 1.576 v8-standalone: mean: 3.197 ``` Here we can see the that when bulk memory is enabled its at least as fast if not faster than the JS version. v8-standalone doesn't have emscripten_memcpy_big at all is is much slower, as expected. By adding `-mbulk-memory` the standalone version becomes just as fast as the non-standalone.
1 parent 38eedc6 commit 6f3cfe3

16 files changed

+133
-16
lines changed

embuilder.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
# Minimal subset of targets used by CI systems to build enough to useful
3030
MINIMAL_TASKS = [
31+
'libbulkmemory',
3132
'libcompiler_rt',
3233
'libcompiler_rt-wasm-sjlj',
3334
'libc',

emcc.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1599,6 +1599,9 @@ def phase_setup(options, state, newargs):
15991599
if '-mbulk-memory' not in newargs:
16001600
newargs += ['-mbulk-memory']
16011601

1602+
if settings.SHARED_MEMORY:
1603+
settings.BULK_MEMORY = 1
1604+
16021605
if 'DISABLE_EXCEPTION_CATCHING' in user_settings and 'EXCEPTION_CATCHING_ALLOWED' in user_settings:
16031606
# If we get here then the user specified both DISABLE_EXCEPTION_CATCHING and EXCEPTION_CATCHING_ALLOWED
16041607
# on the command line. This is no longer valid so report either an error or a warning (for
@@ -2434,6 +2437,8 @@ def phase_linker_setup(options, state, newargs):
24342437
settings.JS_LIBRARIES.append((0, shared.path_from_root('src', 'library_wasm_worker.js')))
24352438

24362439
settings.SUPPORTS_GLOBALTHIS = feature_matrix.caniuse(feature_matrix.Feature.GLOBALTHIS)
2440+
if not settings.BULK_MEMORY:
2441+
settings.BULK_MEMORY = feature_matrix.caniuse(feature_matrix.Feature.BULK_MEMORY)
24372442

24382443
if settings.AUDIO_WORKLET:
24392444
if not settings.SUPPORTS_GLOBALTHIS:
@@ -3565,6 +3570,10 @@ def consume_arg_file():
35653570
settings.DISABLE_EXCEPTION_CATCHING = 1
35663571
settings.DISABLE_EXCEPTION_THROWING = 1
35673572
settings.WASM_EXCEPTIONS = 0
3573+
elif arg == '-mbulk-memory':
3574+
settings.BULK_MEMORY = 1
3575+
elif arg == '-mno-bulk-memory':
3576+
settings.BULK_MEMORY = 0
35683577
elif arg == '-fexceptions':
35693578
# TODO Currently -fexceptions only means Emscripten EH. Switch to wasm
35703579
# exception handling by default when -fexceptions is given when wasm

src/library.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,9 +389,11 @@ mergeInto(LibraryManager.library, {
389389
// variant, so we should never emit emscripten_memcpy_big() in the build.
390390
// In STANDALONE_WASM we avoid the emscripten_memcpy_big dependency so keep
391391
// the wasm file standalone.
392+
// In BULK_MEMORY mode we include native versions of these functions based
393+
// on memory.fill and memory.copy.
392394
// In MAIN_MODULE=1 or EMCC_FORCE_STDLIBS mode all of libc is force included
393395
// so we cannot override parts of it, and therefore cannot use libc_optz.
394-
#if (SHRINK_LEVEL < 2 || LINKABLE || process.env.EMCC_FORCE_STDLIBS) && !STANDALONE_WASM
396+
#if (SHRINK_LEVEL < 2 || LINKABLE || process.env.EMCC_FORCE_STDLIBS) && !STANDALONE_WASM && !BULK_MEMORY
395397

396398
#if MIN_CHROME_VERSION < 45 || MIN_EDGE_VERSION < 14 || MIN_FIREFOX_VERSION < 34 || MIN_IE_VERSION != TARGET_NOT_SUPPORTED || MIN_SAFARI_VERSION < 100101
397399
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray/copyWithin lists browsers that support TypedArray.prototype.copyWithin, but it

src/settings_internal.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,3 +256,5 @@ var POST_JS_FILES = [];
256256

257257
// Set when -pthread / -sPTHREADS is passed
258258
var PTHREADS = false;
259+
260+
var BULK_MEMORY = false;

system/lib/libc/emscripten_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ extern "C" {
3030
void emscripten_memcpy_big(void* __restrict__ dest,
3131
const void* __restrict__ src,
3232
size_t n) EM_IMPORT(emscripten_memcpy_big);
33+
void emscripten_memset_big(void* ptr, char value, size_t n);
3334

3435
void emscripten_notify_memory_growth(size_t memory_index);
3536

system/lib/libc/emscripten_memcpy.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ static void *__memcpy(void *restrict dest, const void *restrict src, size_t n) {
2929
unsigned char *block_aligned_d_end;
3030
unsigned char *d_end;
3131

32-
#ifndef EMSCRIPTEN_STANDALONE_WASM
32+
#if !defined(EMSCRIPTEN_STANDALONE_WASM) || defined(__wasm_bulk_memory__)
3333
if (n >= 512) {
3434
emscripten_memcpy_big(dest, src, n);
3535
return dest;
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#ifdef __wasm64__
2+
#define PTR i64
3+
#else
4+
#define PTR i32
5+
#endif
6+
7+
.globl emscripten_memcpy_big
8+
emscripten_memcpy_big:
9+
.functype emscripten_memcpy_big (PTR, PTR, PTR) -> ()
10+
local.get 0
11+
local.get 1
12+
local.get 2
13+
memory.copy 0, 0
14+
end_function
Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,44 @@
1-
// XXX EMSCRIPTEN ASAN: build an uninstrumented version of memset
2-
#if defined(__EMSCRIPTEN__) && defined(__has_feature)
3-
#if __has_feature(address_sanitizer)
4-
#define memset __attribute__((no_sanitize("address"))) emscripten_builtin_memset
5-
#endif
1+
#include "emscripten_internal.h" // for emscripten_memset_big
2+
3+
#if defined(__has_feature) && __has_feature(address_sanitizer)
4+
// build an uninstrumented version of memset
5+
__attribute__((no_sanitize("address"))) void *__musl_memset(void *str, int c, size_t n);
6+
__attribute__((no_sanitize("address"))) void *__memset(void *str, int c, size_t n);
67
#endif
78

8-
#ifdef EMSCRIPTEN_OPTIMIZE_FOR_OZ
9+
__attribute__((__weak__)) void *__musl_memset(void *str, int c, size_t n);
10+
__attribute__((__weak__)) void *__memset(void *str, int c, size_t n);
911

10-
#include <stddef.h>
12+
#ifdef EMSCRIPTEN_OPTIMIZE_FOR_OZ
1113

12-
void *memset(void *str, int c, size_t n) {
14+
void *__memset(void *str, int c, size_t n) {
1315
unsigned char *s = (unsigned char *)str;
1416
#pragma clang loop unroll(disable)
1517
while(n--) *s++ = c;
1618
return str;
1719
}
1820

21+
#elif defined(__wasm_bulk_memory__)
22+
23+
#define memset __musl_memset
24+
#include "musl/src/string/memset.c"
25+
#undef memset
26+
27+
void *__memset(void *str, int c, size_t n) {
28+
if (n >= 512) {
29+
emscripten_memset_big(str, c, n);
30+
return str;
31+
}
32+
return __musl_memset(str, c, n);
33+
}
34+
1935
#else
2036

37+
#define memset __memset
2138
#include "musl/src/string/memset.c"
39+
#undef memset
2240

2341
#endif
42+
43+
weak_alias(__memset, emscripten_builtin_memset);
44+
weak_alias(__memset, memset);
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#ifdef __wasm64__
2+
#define PTR i64
3+
#else
4+
#define PTR i32
5+
#endif
6+
7+
.globl emscripten_memset_big
8+
emscripten_memset_big:
9+
.functype emscripten_memset_big (PTR, i32, PTR) -> ()
10+
local.get 0
11+
local.get 1
12+
local.get 2
13+
memory.fill 0
14+
end_function

system/lib/standalone/standalone.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ int emscripten_resize_heap(size_t size) {
152152
}
153153

154154
double emscripten_get_now(void) {
155-
return (1000 * clock()) / (double)CLOCKS_PER_SEC;
155+
return (1000ll * clock()) / (double)CLOCKS_PER_SEC;
156156
}
157157

158158
// C++ ABI

0 commit comments

Comments
 (0)