Skip to content

Commit 5003169

Browse files
committed
Fix sse tests with -mnontrapping-fptoint
Turns out we were relying on the explicit llvm bounds check here to give use the defined SSE behavior, but normal C semantics don't give us that so we need explicit bounds checks here.
1 parent ffeb761 commit 5003169

File tree

3 files changed

+50
-26
lines changed

3 files changed

+50
-26
lines changed

system/include/compat/emmintrin.h

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -431,9 +431,12 @@ _mm_cvttpd_epi32(__m128d __a)
431431
int m[2];
432432
for(int i = 0; i < 2; ++i)
433433
{
434+
float elem = __a[i];
434435
int x = lrint(__a[i]);
435-
if (x != 0 || fabs(__a[i]) < 2.0)
436-
m[i] = (int)__a[i];
436+
if ((x != 0 || fabs(elem) < 2.0) && !isnanf(elem) && elem <= INT_MAX && elem >= INT_MIN)
437+
// Use the trapping instruction here since we have explicit bounds checks
438+
// above.
439+
m[i] = __builtin_wasm_trunc_s_i32_f32(elem);
437440
else
438441
m[i] = (int)0x80000000;
439442
}
@@ -444,9 +447,12 @@ static __inline__ int __attribute__((__always_inline__, __nodebug__))
444447
_mm_cvttsd_si32(__m128d __a)
445448
{
446449
// TODO: OPTIMIZE!
447-
int x = lrint(__a[0]);
448-
if (x != 0 || fabs(__a[0]) < 2.0)
449-
return (int)__a[0];
450+
float elem = __a[0];
451+
if (isnan(elem) || elem > INT_MAX || elem < INT_MIN) return (int)0x80000000;
452+
if (lrint(elem) != 0 || fabs(elem) < 2.0)
453+
// Use the trapping instruction here since we have explicit bounds checks
454+
// above.
455+
return __builtin_wasm_trunc_s_i32_f32(elem);
450456
else
451457
return (int)0x80000000;
452458
}
@@ -1013,9 +1019,10 @@ static __inline__ long long __attribute__((__always_inline__, __nodebug__))
10131019
_mm_cvttsd_si64(__m128d __a)
10141020
{
10151021
// TODO: optimize
1016-
if (isnan(__a[0]) || isinf(__a[0])) return 0x8000000000000000LL;
1017-
long long x = llrint(__a[0]);
1018-
if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(__a[0]) < 2.f))
1022+
float e = __a[0];
1023+
if (isnan(e) || isinf(e) || e > LLONG_MAX || e < LLONG_MIN) return 0x8000000000000000LL;
1024+
long long x = llrint(e);
1025+
if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(e) < 2.f))
10191026
return (long long)__a[0];
10201027
else
10211028
return 0x8000000000000000LL;
@@ -1056,9 +1063,10 @@ _mm_cvttps_epi32(__m128 __a)
10561063
} u;
10571064
for(int i = 0; i < 4; ++i)
10581065
{
1059-
int x = lrint(__a[i]);
1060-
if (x != 0 || fabs(__a[i]) < 2.0)
1061-
u.x[i] = (int)__a[i];
1066+
float e = __a[i];
1067+
int x = lrint(e);
1068+
if ((x != 0 || fabs(e) < 2.0) && !isnanf(e) && e <= INT_MAX && e >= INT_MIN)
1069+
u.x[i] = (int)e;
10621070
else
10631071
u.x[i] = (int)0x80000000;
10641072
}

system/include/compat/xmmintrin.h

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#include <wasm_simd128.h>
1111

12+
#include <limits.h>
1213
#include <math.h>
1314
#include <string.h>
1415

@@ -605,9 +606,11 @@ static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SL
605606

606607
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) _mm_cvttss_si32(__m128 __a)
607608
{
608-
int x = lrint(((__f32x4)__a)[0]);
609-
if (x != 0 || fabsf(((__f32x4)__a)[0]) < 2.f)
610-
return (int)((__f32x4)__a)[0];
609+
float e = ((__f32x4)__a)[0];
610+
if (isnanf(e) || e > INT_MAX || e < INT_MIN) return (int)0x80000000;
611+
int x = lrint(e);
612+
if ((x != 0 || fabsf(e) < 2.f))
613+
return (int)e;
611614
else
612615
return (int)0x80000000;
613616
}
@@ -635,10 +638,11 @@ _mm_cvtss_si64(__m128 __a)
635638
static __inline__ long long __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
636639
_mm_cvttss_si64(__m128 __a)
637640
{
638-
if (isnan(((__f32x4)__a)[0]) || isinf(((__f32x4)__a)[0])) return 0x8000000000000000LL;
639-
long long x = llrint(((__f32x4)__a)[0]);
640-
if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(((__f32x4)__a)[0]) < 2.f))
641-
return (long long)((__f32x4)__a)[0];
641+
float e = ((__f32x4)__a)[0];
642+
if (isnan(e) || isinf(e) || e > LLONG_MAX || e < LLONG_MIN) return 0x8000000000000000LL;
643+
long long x = llrint(e);
644+
if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(e) < 2.f))
645+
return (long long)e;
642646
else
643647
return 0x8000000000000000LL;
644648
}

test/test_core.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6511,12 +6511,16 @@ def test_neon_wasm_simd(self):
65116511
@requires_native_clang
65126512
@no_safe_heap('has unaligned 64-bit operations in wasm')
65136513
@no_ubsan('test contains UB')
6514-
def test_sse1(self):
6514+
@parameterized({
6515+
'': ([],),
6516+
'nontrapping': (['-mnontrapping-fptoint'],)
6517+
})
6518+
def test_sse1(self, args):
65156519
src = test_file('sse/test_sse1.cpp')
65166520
self.run_process([shared.CLANG_CXX, src, '-msse', '-o', 'test_sse1', '-D_CRT_SECURE_NO_WARNINGS=1'] + clang_native.get_clang_native_args(), stdout=PIPE)
65176521
native_result = self.run_process('./test_sse1', stdout=PIPE).stdout
65186522

6519-
self.emcc_args += ['-I' + test_file('sse'), '-msse']
6523+
self.emcc_args += ['-I' + test_file('sse'), '-msse'] + args
65206524
self.maybe_closure()
65216525

65226526
self.do_runf(src, native_result)
@@ -6528,14 +6532,18 @@ def test_sse1(self):
65286532
@is_slow_test
65296533
@no_ubsan('https://github.com/emscripten-core/emscripten/issues/19688')
65306534
@no_asan('local count too large')
6531-
def test_sse2(self):
6535+
@parameterized({
6536+
'': ([],),
6537+
'nontrapping': (['-mnontrapping-fptoint'],)
6538+
})
6539+
def test_sse2(self, args):
65326540
if self.is_wasm64():
65336541
self.require_node_canary()
65346542
src = test_file('sse/test_sse2.cpp')
65356543
self.run_process([shared.CLANG_CXX, src, '-msse2', '-Wno-argument-outside-range', '-o', 'test_sse2', '-D_CRT_SECURE_NO_WARNINGS=1'] + clang_native.get_clang_native_args(), stdout=PIPE)
65366544
native_result = self.run_process('./test_sse2', stdout=PIPE).stdout
65376545

6538-
self.emcc_args += ['-I' + test_file('sse'), '-msse2', '-Wno-argument-outside-range', '-sSTACK_SIZE=1MB']
6546+
self.emcc_args += ['-I' + test_file('sse'), '-msse2', '-Wno-argument-outside-range', '-sSTACK_SIZE=1MB'] + args
65396547
self.maybe_closure()
65406548
self.do_runf(src, native_result)
65416549

@@ -6587,8 +6595,8 @@ def test_sse4_1(self):
65876595
@wasm_simd
65886596
@requires_native_clang
65896597
@parameterized({
6590-
'': (False,),
6591-
'2': (True,)
6598+
'': (False,),
6599+
'2': (True,)
65926600
})
65936601
def test_sse4(self, use_4_2):
65946602
msse4 = '-msse4.2' if use_4_2 else '-msse4'
@@ -6606,12 +6614,16 @@ def test_sse4(self, use_4_2):
66066614
@is_slow_test
66076615
@no_asan('local count too large')
66086616
@no_ubsan('local count too large')
6609-
def test_avx(self):
6617+
@parameterized({
6618+
'': ([],),
6619+
'nontrapping': (['-mnontrapping-fptoint'],)
6620+
})
6621+
def test_avx(self, args):
66106622
src = test_file('sse/test_avx.cpp')
66116623
self.run_process([shared.CLANG_CXX, src, '-mavx', '-Wno-argument-outside-range', '-Wpedantic', '-o', 'test_avx', '-D_CRT_SECURE_NO_WARNINGS=1'] + clang_native.get_clang_native_args(), stdout=PIPE)
66126624
native_result = self.run_process('./test_avx', stdout=PIPE).stdout
66136625

6614-
self.emcc_args += ['-I' + test_file('sse'), '-mavx', '-Wno-argument-outside-range', '-sSTACK_SIZE=1MB']
6626+
self.emcc_args += ['-I' + test_file('sse'), '-mavx', '-Wno-argument-outside-range', '-sSTACK_SIZE=1MB'] + args
66156627
self.maybe_closure()
66166628
self.do_runf(src, native_result)
66176629

0 commit comments

Comments
 (0)