-
Notifications
You must be signed in to change notification settings - Fork 14.6k
Open
Description
#include <string.h>
struct S
{
alignas(32) char data[176];
};
typedef struct S S;
void copy1(S* dest1, S* dest2, const S* src)
{
memcpy(dest1->data, src->data, 1);
memcpy(dest2->data, src->data, 1);
}
void copy8(S* dest1, S* dest2, const S* src)
{
memcpy(dest1->data, src->data, 8);
memcpy(dest2->data, src->data, 8);
}
void copy16(S* dest1, S* dest2, const S* src)
{
memcpy(dest1->data, src->data, 16);
memcpy(dest2->data, src->data, 16);
}
void copy16b(S* restrict dest1, S* restrict dest2, const S* restrict src)
{
memcpy(dest1->data, src->data, 16);
memcpy(dest2->data, src->data, 16);
}
-O2
The pointers can't overlap non-exactly, because the member is aligned to greater than the copy's size (and the pointers are tagged restrict in the last one), so a single appropriately-sized read of src->data is sufficient.
Expected:
copy1:
movzx eax, byte ptr [rdx]
mov byte ptr [rdi], al
mov byte ptr [rsi], al
ret
copy8:
mov rax, qword ptr [rdx]
mov qword ptr [rdi], rax
mov qword ptr [rsi], rax
ret
copy16:
movaps xmm0, xmmword ptr [rdx]
movaps xmmword ptr [rdi], xmm0
movaps xmmword ptr [rsi], xmm0
ret
copy16b:
movaps xmm0, xmmword ptr [rdx]
movaps xmmword ptr [rdi], xmm0
movaps xmmword ptr [rsi], xmm0
ret
Actual:
copy1:
movzx eax, byte ptr [rdx]
mov byte ptr [rdi], al
mov byte ptr [rsi], al
ret
copy8:
mov rax, qword ptr [rdx]
mov qword ptr [rdi], rax
mov qword ptr [rsi], rax
ret
copy16:
movaps xmm0, xmmword ptr [rdx]
movaps xmmword ptr [rdi], xmm0
movaps xmm0, xmmword ptr [rdx]
movaps xmmword ptr [rsi], xmm0
ret
copy16b:
movaps xmm0, xmmword ptr [rdx]
movaps xmmword ptr [rdi], xmm0
movaps xmm0, xmmword ptr [rdx]
movaps xmmword ptr [rsi], xmm0
ret