-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Closed
Description
https://godbolt.org/z/vc4Y1r6Mq
C++ code:
_Float16 foo(_Float16 x) {
return static_cast<_Float16>(__builtin_ceilf(x));
}GCC output with -O3 -march=raptorlake -fno-omit-frame-pointer (takes 1.33-1.64 ns on i7-13700H):
foo(_Float16):
vpxor xmm1, xmm1, xmm1
vpblendw xmm0, xmm1, xmm0, 1
vcvtph2ps xmm0, xmm0
vroundss xmm0, xmm0, xmm0, 10
vinsertps xmm0, xmm0, xmm0, 0xe
vcvtps2ph xmm0, xmm0, 4
retClang output with -O3 -march=raptorlake -fno-omit-frame-pointer (takes ~9.12 ns on i7-13700H):
foo(_Float16): # @foo(_Float16)
push rbp
mov rbp, rsp
vpextrw eax, xmm0, 0
vmovd xmm0, eax
vcvtph2ps xmm0, xmm0
vroundss xmm0, xmm0, xmm0, 10
vcvtps2ph xmm0, xmm0, 4
vmovd eax, xmm0
vpinsrw xmm0, xmm0, eax, 0
pop rbp
ret