Open
Description
In the below example - clang 18 generates good code, and clang 19 and up to trunk fail to do so.
#include <coroutine>
struct coro
{
struct promise_type
{
constexpr auto get_return_object() { return coro{}; }
constexpr auto initial_suspend() noexcept { return std::suspend_never{}; }
constexpr auto final_suspend() noexcept { return std::suspend_never{}; }
constexpr auto unhandled_exception() {}
constexpr auto return_void() {}
};
constexpr auto await_ready() { return false; }
constexpr auto await_suspend(auto handle) { handle.destroy(); }
constexpr auto await_resume() {}
};
coro f1() noexcept;
coro f2() noexcept
{
co_await f1();
}
Clang 18:
f2():
jmp f1()@PLT
Clang 19 and above:
f2():
push rbx
mov edi, 24
call operator new(unsigned long)@PLT
mov rbx, rax
lea rax, [rip + f2() (.resume)]
mov qword ptr [rbx], rax
lea rax, [rip + f2() (.destroy)]
mov qword ptr [rbx + 8], rax
call f1()@PLT
mov rdi, rbx
mov byte ptr [rbx + 17], 0
pop rbx
jmp qword ptr [rdi + 8]
f2() (.resume):
mov esi, 24
jmp operator delete(void*, unsigned long)@PLT
f2() (.destroy):
mov esi, 24
jmp operator delete(void*, unsigned long)@PLT
It seems to be related to the compiler not realizing that jmp qword ptr [rdi + 8]
can be devirtualized into coro.destroy, which results in failing to inline it and optimize the code.
Please see the godbolt link:
https://godbolt.org/z/TbjGPvPPT