Skip to content

Missed optimization when copying large object to multiple targets #149155

@Alcaro

Description

@Alcaro
#include <string.h>

struct S
{
    alignas(32) char data[176];
};
typedef struct S S;

void copy1(S* dest1, S* dest2, const S* src)
{
    memcpy(dest1->data, src->data, 1);
    memcpy(dest2->data, src->data, 1);
}
void copy8(S* dest1, S* dest2, const S* src)
{
    memcpy(dest1->data, src->data, 8);
    memcpy(dest2->data, src->data, 8);
}
void copy16(S* dest1, S* dest2, const S* src)
{
    memcpy(dest1->data, src->data, 16);
    memcpy(dest2->data, src->data, 16);
}
void copy16b(S* restrict dest1, S* restrict dest2, const S* restrict src)
{
    memcpy(dest1->data, src->data, 16);
    memcpy(dest2->data, src->data, 16);
}

-O2

The pointers can't overlap non-exactly, because the member is aligned to greater than the copy's size (and the pointers are tagged restrict in the last one), so a single appropriately-sized read of src->data is sufficient.

Expected:

copy1:
        movzx   eax, byte ptr [rdx]
        mov     byte ptr [rdi], al
        mov     byte ptr [rsi], al
        ret

copy8:
        mov     rax, qword ptr [rdx]
        mov     qword ptr [rdi], rax
        mov     qword ptr [rsi], rax
        ret

copy16:
        movaps  xmm0, xmmword ptr [rdx]
        movaps  xmmword ptr [rdi], xmm0
        movaps  xmmword ptr [rsi], xmm0
        ret

copy16b:
        movaps  xmm0, xmmword ptr [rdx]
        movaps  xmmword ptr [rdi], xmm0
        movaps  xmmword ptr [rsi], xmm0
        ret

Actual:

copy1:
        movzx   eax, byte ptr [rdx]
        mov     byte ptr [rdi], al
        mov     byte ptr [rsi], al
        ret

copy8:
        mov     rax, qword ptr [rdx]
        mov     qword ptr [rdi], rax
        mov     qword ptr [rsi], rax
        ret

copy16:
        movaps  xmm0, xmmword ptr [rdx]
        movaps  xmmword ptr [rdi], xmm0
        movaps  xmm0, xmmword ptr [rdx]
        movaps  xmmword ptr [rsi], xmm0
        ret

copy16b:
        movaps  xmm0, xmmword ptr [rdx]
        movaps  xmmword ptr [rdi], xmm0
        movaps  xmm0, xmmword ptr [rdx]
        movaps  xmmword ptr [rsi], xmm0
        ret

https://godbolt.org/z/fKr8E16zd

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions