Skip to content

Commit b1ae429

Browse files
committed
Fix deadloack in dlopen. NFC
When within dlopen itself we hold an exclusive lock so we need to disable the automatic code synchronization during that time. Split out from #18376
1 parent 8fc4ea5 commit b1ae429

File tree

4 files changed

+188
-58
lines changed

4 files changed

+188
-58
lines changed

system/lib/libc/dynlink.c

Lines changed: 92 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,82 @@ void _emscripten_dlopen_js(struct dso* handle,
4747
void __dl_vseterr(const char*, va_list);
4848

4949
static struct dso * _Atomic head, * _Atomic tail;
50+
51+
#ifdef _REENTRANT
5052
static thread_local struct dso* thread_local_tail;
5153
static pthread_rwlock_t lock;
5254

55+
static void dlsync_locked() {
56+
if (!thread_local_tail) {
57+
thread_local_tail = head;
58+
}
59+
while (thread_local_tail->next) {
60+
struct dso* p = thread_local_tail->next;
61+
dbg("dlsync_locked: %s mem_addr=%p "
62+
"mem_size=%zu table_addr=%p table_size=%zu",
63+
p->name,
64+
p->mem_addr,
65+
p->mem_size,
66+
p->table_addr,
67+
p->table_size);
68+
void* success = _dlopen_js(p);
69+
if (!success) {
70+
// If any on the libraries fails to load here then we give up.
71+
// TODO(sbc): Ideally this would never happen and we could/should
72+
// abort, but on the main thread (where we don't have sync xhr) its
73+
// often not possible to syncronously load side module.
74+
_emscripten_errf("_dlopen_js failed: %s", dlerror());
75+
break;
76+
}
77+
thread_local_tail = p;
78+
}
79+
}
80+
81+
// This function is called from emscripten_yield which itself is called whenever
82+
// we block on a futex. We need to check to avoid infinite recursion when
83+
// taking the lock below.
84+
static thread_local bool skip_dlsync = false;
85+
86+
static void ensure_init();
87+
88+
void _emscripten_thread_sync_code() {
89+
if (skip_dlsync) {
90+
return;
91+
}
92+
skip_dlsync = true;
93+
ensure_init();
94+
if (thread_local_tail != tail) {
95+
dbg("emscripten_thread_sync_code: catching up %p %p", thread_local_tail, tail);
96+
pthread_rwlock_rdlock(&lock);
97+
dlsync_locked();
98+
pthread_rwlock_unlock(&lock);
99+
dbg("emscripten_thread_sync_code: done");
100+
}
101+
skip_dlsync = false;
102+
}
103+
104+
static void do_read_lock() {
105+
skip_dlsync = true;
106+
pthread_rwlock_rdlock(&lock);
107+
}
108+
109+
static void do_write_lock() {
110+
// Once we have the lock we want to avoid automatic code sync as that would
111+
// result in a deadlock.
112+
skip_dlsync = true;
113+
pthread_rwlock_wrlock(&lock);
114+
}
115+
116+
static void do_unlock() {
117+
pthread_rwlock_unlock(&lock);
118+
skip_dlsync = false;
119+
}
120+
#else
121+
#define do_unlock()
122+
#define do_read_lock()
123+
#define do_write_lock()
124+
#endif
125+
53126
static void error(const char* fmt, ...) {
54127
va_list ap;
55128
va_start(ap, fmt);
@@ -80,13 +153,16 @@ static void load_library_done(struct dso* p) {
80153
p->table_addr,
81154
p->table_size);
82155

156+
#ifdef _REENTRANT
157+
thread_local_tail = p;
158+
#endif
159+
83160
// insert into linked list
84161
p->prev = tail;
85162
if (tail) {
86163
tail->next = p;
87164
}
88165
tail = p;
89-
thread_local_tail = p;
90166

91167
if (!head) {
92168
head = p;
@@ -114,14 +190,14 @@ static void dlopen_js_onsuccess(struct dso* dso, struct async_data* data) {
114190
dso->mem_addr,
115191
dso->mem_size);
116192
load_library_done(dso);
117-
pthread_rwlock_unlock(&lock);
193+
do_unlock();
118194
data->onsuccess(data->user_data, dso);
119195
free(data);
120196
}
121197

122198
static void dlopen_js_onerror(struct dso* dso, struct async_data* data) {
123199
dbg("dlopen_js_onerror: dso=%p", dso);
124-
pthread_rwlock_unlock(&lock);
200+
do_unlock();
125201
data->onerror(data->user_data);
126202
free(dso);
127203
free(data);
@@ -134,7 +210,7 @@ static void ensure_init() {
134210
return;
135211
}
136212
// Initialize the dso list. This happens on first run.
137-
pthread_rwlock_wrlock(&lock);
213+
do_write_lock();
138214
if (!head) {
139215
// Flags are not important since the main module is already loaded.
140216
struct dso* p = load_library_start("__main__", RTLD_NOW|RTLD_GLOBAL);
@@ -143,7 +219,7 @@ static void ensure_init() {
143219
load_library_done(p);
144220
assert(head);
145221
}
146-
pthread_rwlock_unlock(&lock);
222+
do_unlock();
147223
}
148224

149225
void* dlopen(const char* file, int flags) {
@@ -156,7 +232,11 @@ void* dlopen(const char* file, int flags) {
156232
struct dso* p;
157233
int cs;
158234
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
159-
pthread_rwlock_wrlock(&lock);
235+
do_write_lock();
236+
#ifdef _REENTRANT
237+
// Make sure we are in sync before loading any new DSOs.
238+
dlsync_locked();
239+
#endif
160240

161241
/* Search for the name to see if it's already loaded */
162242
for (p = head; p; p = p->next) {
@@ -180,7 +260,7 @@ void* dlopen(const char* file, int flags) {
180260
dbg("dlopen_js: success: %p", p);
181261
load_library_done(p);
182262
end:
183-
pthread_rwlock_unlock(&lock);
263+
do_unlock();
184264
pthread_setcancelstate(cs, 0);
185265
return p;
186266
}
@@ -192,10 +272,10 @@ void emscripten_dlopen(const char* filename, int flags, void* user_data,
192272
onsuccess(user_data, head);
193273
return;
194274
}
195-
pthread_rwlock_wrlock(&lock);
275+
do_write_lock();
196276
struct dso* p = load_library_start(filename, flags);
197277
if (!p) {
198-
pthread_rwlock_unlock(&lock);
278+
do_unlock();
199279
onerror(user_data);
200280
return;
201281
}
@@ -217,9 +297,10 @@ void* __dlsym(void* restrict p, const char* restrict s, void* restrict ra) {
217297
return 0;
218298
}
219299
void* res;
220-
pthread_rwlock_rdlock(&lock);
300+
do_read_lock();
221301
res = _dlsym_js(p, s);
222-
pthread_rwlock_unlock(&lock);
302+
do_unlock();
303+
dbg("__dlsym done dso:%p res:%p", p, res);
223304
return res;
224305
}
225306

@@ -232,50 +313,3 @@ int dladdr(const void* addr, Dl_info* info) {
232313
info->dli_saddr = NULL;
233314
return 1;
234315
}
235-
236-
#ifdef _REENTRANT
237-
void _emscripten_thread_sync_code() {
238-
// This function is called from emscripten_yeild which itself is called
239-
// whenever we block on a futex. We need to check to avoid infinite
240-
// recursion when taking the lock below.
241-
static thread_local bool syncing = false;
242-
if (syncing) {
243-
return;
244-
}
245-
syncing = true;
246-
ensure_init();
247-
if (thread_local_tail == tail) {
248-
dbg("emscripten_thread_sync_code: already in sync");
249-
goto done;
250-
}
251-
pthread_rwlock_rdlock(&lock);
252-
if (!thread_local_tail) {
253-
thread_local_tail = head;
254-
}
255-
while (thread_local_tail->next) {
256-
struct dso* p = thread_local_tail->next;
257-
dbg("emscripten_thread_sync_code: %s mem_addr=%p "
258-
"mem_size=%zu table_addr=%p table_size=%zu",
259-
p->name,
260-
p->mem_addr,
261-
p->mem_size,
262-
p->table_addr,
263-
p->table_size);
264-
void* success = _dlopen_js(p);
265-
if (!success) {
266-
// If any on the libraries fails to load here then we give up.
267-
// TODO(sbc): Ideally this would never happen and we could/should
268-
// abort, but on the main thread (where we don't have sync xhr) its
269-
// often not possible to syncronously load side module.
270-
_emscripten_errf("emscripten_thread_sync_code failed: %s", dlerror());
271-
break;
272-
}
273-
thread_local_tail = p;
274-
}
275-
pthread_rwlock_unlock(&lock);
276-
dbg("emscripten_thread_sync_code done");
277-
278-
done:
279-
syncing = false;
280-
}
281-
#endif
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#include <assert.h>
2+
#include <dlfcn.h>
3+
#include <pthread.h>
4+
#include <stdio.h>
5+
6+
#ifndef NUM_THREADS
7+
#define NUM_THREADS 2
8+
#endif
9+
10+
typedef int* (*sidey_data_type)();
11+
typedef int (*func_t)();
12+
typedef func_t (*sidey_func_type)();
13+
14+
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
15+
16+
_Atomic int thread_count = 0;
17+
18+
void* thread_main(void* arg) {
19+
int num = (intptr_t)arg;
20+
printf("thread_main %d %p\n", num, pthread_self());
21+
thread_count++;
22+
23+
// busy wait until all threads are running
24+
while (thread_count != NUM_THREADS) {}
25+
26+
char filename[255];
27+
sprintf(filename, "liblib%d.so", num);
28+
printf("loading %s\n", filename);
29+
void* handle = dlopen(filename, RTLD_NOW|RTLD_GLOBAL);
30+
printf("done loading %s\n", filename);
31+
if (!handle) {
32+
printf("dlerror: %s\n", dlerror());
33+
}
34+
assert(handle);
35+
/*
36+
* TODO(sbc): We have a bug that new functions added to the table via dlsym
37+
* are not yet correctly synchronized between threads.
38+
* Uncommenting the code below will cause a "table out of sync" error.
39+
*/
40+
/*
41+
sidey_data_type p_side_data_address;
42+
sidey_func_type p_side_func_address;
43+
p_side_data_address = dlsym(handle, "side_data_address");
44+
printf("p_side_data_address=%p\n", p_side_data_address);
45+
p_side_func_address = dlsym(handle, "side_func_address");
46+
printf("p_side_func_address=%p\n", p_side_func_address);
47+
*/
48+
49+
printf("done thread_main %d\n", num);
50+
return NULL;
51+
}
52+
53+
int main() {
54+
printf("in main: %p\n", pthread_self());
55+
pthread_mutex_lock(&mutex);
56+
57+
// start a bunch of threads while holding the lock
58+
pthread_t threads[NUM_THREADS];
59+
for (int i = 0; i < NUM_THREADS; i++) {
60+
pthread_create(&threads[i], NULL, thread_main, (void*)i);
61+
}
62+
63+
// busy wait until all threads are running
64+
while (thread_count != NUM_THREADS) {}
65+
66+
for (int i = 0; i < NUM_THREADS; i++) {
67+
pthread_join(threads[i], NULL);
68+
}
69+
70+
printf("main done\n");
71+
return 0;
72+
}

test/core/pthread/test_pthread_dlopen_side.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
#include <stdio.h>
2+
#include <stdlib.h>
23

34
typedef int (*myfunc_type)();
45

56
static int mydata[10] = { 44 };
67

8+
static void dtor() {
9+
puts("side module atexit ..");
10+
}
11+
712
__attribute__((constructor)) static void ctor() {
813
puts("side module ctor");
14+
atexit(dtor);
915
}
1016

1117
static int myfunc() {

test/test_core.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9356,6 +9356,24 @@ def test_pthread_dlopen(self, do_yield):
93569356
'invalid index into function table',
93579357
assert_returncode=NON_ZERO)
93589358

9359+
@needs_dylink
9360+
@node_pthreads
9361+
def test_pthread_dlopen_many(self):
9362+
nthreads = 10
9363+
self.set_setting('USE_PTHREADS')
9364+
self.emcc_args.append('-Wno-experimental')
9365+
self.build_dlfcn_lib(test_file('core/pthread/test_pthread_dlopen_side.c'))
9366+
for i in range(nthreads):
9367+
shutil.copyfile('liblib.so', f'liblib{i}.so')
9368+
9369+
self.prep_dlfcn_main()
9370+
self.set_setting('EXIT_RUNTIME')
9371+
self.set_setting('PROXY_TO_PTHREAD')
9372+
self.do_runf(test_file('core/pthread/test_pthread_dlopen_many.c'),
9373+
['side module ctor', 'main done', 'side module atexit'],
9374+
emcc_args=[f'-DNUM_THREADS={nthreads}'],
9375+
assert_all=True)
9376+
93599377
@needs_dylink
93609378
@node_pthreads
93619379
def test_pthread_dlsym(self):

0 commit comments

Comments
 (0)