From e713939a87239ea57c15849dc0f9a1fcc49e73fb Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Fri, 20 Sep 2024 15:04:50 -0700 Subject: [PATCH 01/10] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20?= =?UTF-8?q?initial=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.4 --- bolt/test/X86/Inputs/callcont-fallthru.preagg | 21 + bolt/test/X86/Inputs/callcont-fallthru.yaml | 889 ++++++++++++++++++ bolt/test/X86/callcont-fallthru.test | 9 + 3 files changed, 919 insertions(+) create mode 100644 bolt/test/X86/Inputs/callcont-fallthru.preagg create mode 100644 bolt/test/X86/Inputs/callcont-fallthru.yaml create mode 100644 bolt/test/X86/callcont-fallthru.test diff --git a/bolt/test/X86/Inputs/callcont-fallthru.preagg b/bolt/test/X86/Inputs/callcont-fallthru.preagg new file mode 100644 index 0000000000000..0b5f344540573 --- /dev/null +++ b/bolt/test/X86/Inputs/callcont-fallthru.preagg @@ -0,0 +1,21 @@ +B ffffffff81e01006 401194 8 0 +B 401180 401199 98482 96 +B 401199 401166 99542 0 +B 401177 401130 102776 0 +B 401135 40117c 103204 0 +B 401186 40118b 1022983 0 +B 401194 40117c 1021645 1 +F 40117c 401135 1161 +F 40117c 401180 92267 +F 40118b 401194 991002 +F 40117c 401186 968072 +F 40118b 401186 11468 +F 401130 401135 100015 +F 401166 401177 96992 +F 401199 401199 96168 +F 40117c ffffffff81e01006 7 +F 401199 401180 1140 +F 401194 ffffffff81e01006 1 +F 40117c 401194 11522 +F 401166 401199 1151 +F 401130 401177 1154 diff --git a/bolt/test/X86/Inputs/callcont-fallthru.yaml b/bolt/test/X86/Inputs/callcont-fallthru.yaml new file mode 100644 index 0000000000000..a1f8417d1e217 --- /dev/null +++ b/bolt/test/X86/Inputs/callcont-fallthru.yaml @@ -0,0 +1,889 @@ +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 + Entry: 0x401040 +ProgramHeaders: + - Type: PT_PHDR + Flags: [ PF_R ] + VAddr: 0x400040 + Align: 0x8 + - Type: PT_INTERP + Flags: [ PF_R ] + FirstSec: .interp + LastSec: .interp + VAddr: 0x400318 + - Type: PT_LOAD + Flags: [ PF_R ] + FirstSec: .interp + LastSec: .rela.plt + VAddr: 0x400000 + Align: 0x1000 + - Type: PT_LOAD + Flags: [ PF_X, PF_R ] + FirstSec: .init + LastSec: .fini + VAddr: 0x401000 + Align: 0x1000 + - Type: PT_LOAD + Flags: [ PF_R ] + FirstSec: .rodata + LastSec: .eh_frame + VAddr: 0x402000 + Align: 0x1000 + - Type: PT_LOAD + Flags: [ PF_W, PF_R ] + FirstSec: .init_array + LastSec: .bss + VAddr: 0x403DE8 + Align: 0x1000 + - Type: PT_DYNAMIC + Flags: [ PF_W, PF_R ] + FirstSec: .dynamic + LastSec: .dynamic + VAddr: 0x403DF8 + Align: 0x8 + - Type: PT_NOTE + Flags: [ PF_R ] + FirstSec: .note.gnu.property + LastSec: .note.gnu.property + VAddr: 0x400338 + Align: 0x8 + - Type: PT_NOTE + Flags: [ PF_R ] + FirstSec: .note.gnu.build-id + LastSec: .note.ABI-tag + VAddr: 0x400358 + Align: 0x4 + - Type: PT_GNU_PROPERTY + Flags: [ PF_R ] + FirstSec: .note.gnu.property + LastSec: .note.gnu.property + VAddr: 0x400338 + Align: 0x8 + - Type: PT_GNU_EH_FRAME + Flags: [ PF_R ] + FirstSec: .eh_frame_hdr + LastSec: .eh_frame_hdr + VAddr: 0x402010 + Align: 0x4 + - Type: PT_GNU_STACK + Flags: [ PF_W, PF_R ] + Align: 0x10 + - Type: PT_GNU_RELRO + Flags: [ PF_R ] + FirstSec: .init_array + LastSec: .got + VAddr: 0x403DE8 +Sections: + - Name: .interp + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x400318 + AddressAlign: 0x1 + Content: 2F6C696236342F6C642D6C696E75782D7838362D36342E736F2E3200 + - Name: .note.gnu.property + Type: SHT_NOTE + Flags: [ SHF_ALLOC ] + Address: 0x400338 + AddressAlign: 0x8 + Notes: + - Name: GNU + Desc: 028000C0040000000300000000000000 + Type: NT_GNU_PROPERTY_TYPE_0 + - Name: .note.gnu.build-id + Type: SHT_NOTE + Flags: [ SHF_ALLOC ] + Address: 0x400358 + AddressAlign: 0x4 + Notes: + - Name: GNU + Desc: A77EA471B9AAA21E180E5FD02A0A0B2E4AB643E9 + Type: NT_PRPSINFO + - Name: .note.ABI-tag + Type: SHT_NOTE + Flags: [ SHF_ALLOC ] + Address: 0x40037C + AddressAlign: 0x4 + Notes: + - Name: GNU + Desc: '00000000030000000200000000000000' + Type: NT_VERSION + - Name: .gnu.hash + Type: SHT_GNU_HASH + Flags: [ SHF_ALLOC ] + Address: 0x4003A0 + Link: .dynsym + AddressAlign: 0x8 + Header: + SymNdx: 0x1 + Shift2: 0x0 + BloomFilter: [ 0x0 ] + HashBuckets: [ 0x0 ] + HashValues: [ ] + - Name: .dynsym + Type: SHT_DYNSYM + Flags: [ SHF_ALLOC ] + Address: 0x4003C0 + Link: .dynstr + AddressAlign: 0x8 + - Name: .dynstr + Type: SHT_STRTAB + Flags: [ SHF_ALLOC ] + Address: 0x400450 + AddressAlign: 0x1 + - Name: .gnu.version + Type: SHT_GNU_versym + Flags: [ SHF_ALLOC ] + Address: 0x4004CE + Link: .dynsym + AddressAlign: 0x2 + Entries: [ 0, 2, 1, 1, 3, 1 ] + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Flags: [ SHF_ALLOC ] + Address: 0x4004E0 + Link: .dynstr + AddressAlign: 0x8 + Dependencies: + - Version: 1 + File: libc.so.6 + Entries: + - Name: GLIBC_2.2.5 + Hash: 157882997 + Flags: 0 + Other: 3 + - Name: GLIBC_2.34 + Hash: 110530996 + Flags: 0 + Other: 2 + - Name: .rela.dyn + Type: SHT_RELA + Flags: [ SHF_ALLOC ] + Address: 0x400510 + Link: .dynsym + AddressAlign: 0x8 + Relocations: + - Offset: 0x403FC8 + Symbol: __libc_start_main + Type: R_X86_64_GLOB_DAT + - Offset: 0x403FD0 + Symbol: _ITM_deregisterTMCloneTable + Type: R_X86_64_GLOB_DAT + - Offset: 0x403FD8 + Symbol: __gmon_start__ + Type: R_X86_64_GLOB_DAT + - Offset: 0x403FE0 + Symbol: _ITM_registerTMCloneTable + Type: R_X86_64_GLOB_DAT + - Name: .rela.plt + Type: SHT_RELA + Flags: [ SHF_ALLOC, SHF_INFO_LINK ] + Address: 0x400570 + Link: .dynsym + AddressAlign: 0x8 + Info: .got.plt + Relocations: + - Offset: 0x404000 + Symbol: atoi + Type: R_X86_64_JUMP_SLOT + - Name: .init + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x401000 + AddressAlign: 0x4 + Offset: 0x1000 + Content: F30F1EFA4883EC08488B05C92F00004885C07402FFD04883C408C3 + - Name: .plt + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x401020 + AddressAlign: 0x10 + EntSize: 0x10 + Content: FF35CA2F0000FF25CC2F00000F1F4000FF25CA2F00006800000000E9E0FFFFFF + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x401040 + AddressAlign: 0x10 + Contentame: .fini + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x4011A8 + AddressAlign: 0x4 + Content: F30F1EFA4883EC084883C408C3 + - Name: .rodata + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x402000 + AddressAlign: 0x8 + Offset: 0x2000 + Content: '01000200000000000000000000000000' + - Name: .eh_frame_hdr + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x402010 + AddressAlign: 0x4 + Content: 011B033B340000000500000010F0FFFF7800000030F0FFFF5000000060F0FFFF6400000020F1FFFFA000000030F1FFFFC0000000 + - Name: .eh_frame + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x402048 + AddressAlign: 0x8 + Content: 1400000000000000017A5200017810011B0C070890010000100000001C000000D8EFFFFF26000000004407101000000030000000F4EFFFFF0500000000000000240000004400000090EFFFFF20000000000E10460E184A0F0B770880003F1A3B2A332422000000001C0000006C00000078F0FFFF0600000000410E108602430D06410C07080000001C0000008C00000068F0FFFF6600000000410E108602430D0602610C0708000000000000 + - Name: .init_array + Type: SHT_INIT_ARRAY + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x403DE8 + AddressAlign: 0x8 + EntSize: 0x8 + Offset: 0x2DE8 + Content: '2011400000000000' + - Name: .fini_array + Type: SHT_FINI_ARRAY + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x403DF0 + AddressAlign: 0x8 + EntSize: 0x8 + Content: F010400000000000 + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x403DF8 + Link: .dynstr + AddressAlign: 0x8 + Entries: + - Tag: DT_NEEDED + Value: 0x18 + - Tag: DT_INIT + Value: 0x401000 + - Tag: DT_FINI + Value: 0x4011A8 + - Tag: DT_INIT_ARRAY + Value: 0x403DE8 + - Tag: DT_INIT_ARRAYSZ + Value: 0x8 + - Tag: DT_FINI_ARRAY + Value: 0x403DF0 + - Tag: DT_FINI_ARRAYSZ + Value: 0x8 + - Tag: DT_GNU_HASH + Value: 0x4003A0 + - Tag: DT_STRTAB + Value: 0x400450 + - Tag: DT_SYMTAB + Value: 0x4003C0 + - Tag: DT_STRSZ + Value: 0x7E + - Tag: DT_SYMENT + Value: 0x18 + - Tag: DT_DEBUG + Value: 0x0 + - Tag: DT_PLTGOT + Value: 0x403FE8 + - Tag: DT_PLTRELSZ + Value: 0x18 + - Tag: DT_PLTREL + Value: 0x7 + - Tag: DT_JMPREL + Value: 0x400570 + - Tag: DT_RELA + Value: 0x400510 + - Tag: DT_RELASZ + Value: 0x60 + - Tag: DT_RELAENT + Value: 0x18 + - Tag: DT_VERNEED + Value: 0x4004E0 + - Tag: DT_VERNEEDNUM + Value: 0x1 + - Tag: DT_VERSYM + Value: 0x4004CE + - Tag: DT_NULL + Value: 0x0 + - Tag: DT_NULL + Value: 0x0 + - Tag: DT_NULL + Value: 0x0 + - Tag: DT_NULL + Value: 0x0 + - Tag: DT_NULL + Value: 0x0 + - Tag: DT_NULL + Value: 0x0 + - Name: .got + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x403FC8 + AddressAlign: 0x8 + EntSize: 0x8 + Content: '0000000000000000000000000000000000000000000000000000000000000000' + - Name: .got.plt + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x403FE8 + AddressAlign: 0x8 + EntSize: 0x8 + Content: F83D400000000000000000000000000000000000000000003610400000000000 + - Name: .data + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x404008 + AddressAlign: 0x1 + Content: '00000000' + - Name: .tm_clone_table + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x404010 + AddressAlign: 0x8 + - Name: .bss + Type: SHT_NOBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x404010 + AddressAlign: 0x1 + Size: 0x8 + - Name: .comment + Type: SHT_PROGBITS + Flags: [ SHF_MERGE, SHF_STRINGS ] + AddressAlign: 0x1 + EntSize: 0x1 + Content: 4743433A2028474E55292031312E352E302032303234303731392028526564204861742031312E352E302D3229004743433A2028474E55292031332E332E312032303234303631312028526564204861742031332E332E312D322900636C616E672076657273696F6E2031382E312E38202843656E744F532031382E312E382D332E656C392900 + - Name: .gnu.build.attributes + Type: SHT_NOTE + Address: 0x406018 + AddressAlign: 0x4 + Notes: + - Name: "GA$\x013a1" + Desc: '40104000000000006610400000000000' + Type: NT_GNU_BUILD_ATTRIBUTE_OPEN + - Name: "GA$\x013a1" + Desc: '75104000000000007510400000000000' + Type: NT_GNU_BUILD_ATTRIBUTE_OPEN + - Name: "GA$\x013a1" + Desc: '00104000000000001610400000000000' + Type: NT_GNU_BUILD_ATTRIBUTE_OPEN + - Name: "GA$\x013a1" + Desc: A811400000000000B011400000000000 + Type: NT_GNU_BUILD_ATTRIBUTE_OPEN + - Name: "GA$\x013a1" + Desc: '80104000000000002611400000000000' + Type: NT_GNU_BUILD_ATTRIBUTE_OPEN + - Name: "GA$\x013a1" + Desc: A611400000000000A611400000000000 + Type: NT_GNU_BUILD_ATTRIBUTE_OPEN + - Name: "GA$\x013a1" + Desc: A611400000000000A611400000000000 + Type: NT_GNU_BUILD_ATTRIBUTE_OPEN + - Name: "GA$\x013a1" + Desc: 16104000000000001B10400000000000 + Type: NT_GNU_BUILD_ATTRIBUTE_OPEN + - Name: "GA$\x013a1" + Desc: B011400000000000B511400000000000 + Type: NT_GNU_BUILD_ATTRIBUTE_OPEN + - Name: .rela.init + Type: SHT_RELA + Flags: [ SHF_INFO_LINK ] + Link: .symtab + AddressAlign: 0x8 + Info: .init + Relocations: + - Offset: 0x40100B + Symbol: __gmon_start__ + Type: R_X86_64_REX_GOTPCRELX + Addend: -4 + - Name: .rela.text + Type: SHT_RELA + Flags: [ SHF_INFO_LINK ] + Link: .symtab + AddressAlign: 0x8 + Info: .text + Relocations: + - Offset: 0x40105B + Symbol: main + Type: R_X86_64_32S + - Offset: 0x401061 + Symbol: '__libc_start_main@GLIBC_2.34' + Type: R_X86_64_GOTPCRELX + Addend: -4 + - Offset: 0x401083 + Symbol: .tm_clone_table + Type: R_X86_64_PC32 + Addend: -4 + - Offset: 0x40108A + Symbol: __TMC_END__ + Type: R_X86_64_PC32 + Addend: -4 + - Offset: 0x401096 + Symbol: _ITM_deregisterTMCloneTable + Type: R_X86_64_REX_GOTPCRELX + Addend: -4 + - Offset: 0x4010B3 + Symbol: .tm_clone_table + Type: R_X86_64_PC32 + Addend: -4 + - Offset: 0x4010BA + Symbol: __TMC_END__ + Type: R_X86_64_PC32 + Addend: -4 + - Offset: 0x4010D7 + Symbol: _ITM_registerTMCloneTable + Type: R_X86_64_REX_GOTPCRELX + Addend: -4 + - Offset: 0x4010F6 + Symbol: .bss + Type: R_X86_64_PC32 + Addend: -5 + - Offset: 0x401108 + Symbol: .bss + Type: R_X86_64_PC32 + Addend: -5 + - Offset: 0x40115F + Symbol: 'atoi@GLIBC_2.2.5' + Type: R_X86_64_PLT32 + Addend: -4 + - Offset: 0x401178 + Symbol: foo + Type: R_X86_64_PLT32 + Addend: -4 + - Name: .rela.eh_frame + Type: SHT_RELA + Flags: [ SHF_INFO_LINK ] + Link: .symtab + AddressAlign: 0x8 + Info: .eh_frame + Relocations: + - Offset: 0x402068 + Symbol: .text + Type: R_X86_64_PC32 + - Offset: 0x40207C + Symbol: .text + Type: R_X86_64_PC32 + Addend: 48 + - Offset: 0x4020B8 + Symbol: .text + Type: R_X86_64_PC32 + Addend: 240 + - Offset: 0x4020D8 + Symbol: .text + Type: R_X86_64_PC32 + Addend: 256 + - Name: .rela.init_array + Type: SHT_RELA + Flags: [ SHF_INFO_LINK ] + Link: .symtab + AddressAlign: 0x8 + Info: .init_array + Relocations: + - Offset: 0x403DE8 + Symbol: .text + Type: R_X86_64_64 + Addend: 224 + - Name: .rela.fini_array + Type: SHT_RELA + Flags: [ SHF_INFO_LINK ] + Link: .symtab + AddressAlign: 0x8 + Info: .fini_array + Relocations: + - Offset: 0x403DF0 + Symbol: .text + Type: R_X86_64_64 + Addend: 176 + - Name: .rela.gnu.build.attributes + Type: SHT_RELA + Flags: [ SHF_INFO_LINK ] + Link: .symtab + AddressAlign: 0x8 + Info: .gnu.build.attributes + Relocations: + - Offset: 0x40602C + Symbol: .text + Type: R_X86_64_64 + - Offset: 0x406034 + Symbol: .text + Type: R_X86_64_64 + Addend: 38 + - Offset: 0x406050 + Symbol: .text + Type: R_X86_64_64 + Addend: 53 + - Offset: 0x406058 + Symbol: .text + Type: R_X86_64_64 + Addend: 53 + - Offset: 0x406074 + Symbol: .init + Type: R_X86_64_64 + - Offset: 0x40607C + Symbol: .init + Type: R_X86_64_64 + Addend: 22 + - Offset: 0x406098 + Symbol: .fini + Type: R_X86_64_64 + - Offset: 0x4060A0 + Symbol: .fini + Type: R_X86_64_64 + Addend: 8 + - Offset: 0x4060BC + Symbol: .text + Type: R_X86_64_64 + Addend: 64 + - Offset: 0x4060C4 + Symbol: .text + Type: R_X86_64_64 + Addend: 230 + - Offset: 0x4060E0 + Symbol: .text + Type: R_X86_64_64 + Addend: 358 + - Offset: 0x4060E8 + Symbol: .text + Type: R_X86_64_64 + Addend: 358 + - Offset: 0x406104 + Symbol: .text + Type: R_X86_64_64 + Addend: 358 + - Offset: 0x40610C + Symbol: .text + Type: R_X86_64_64 + Addend: 358 + - Offset: 0x406128 + Symbol: .init + Type: R_X86_64_64 + Addend: 22 + - Offset: 0x406130 + Symbol: .init + Type: R_X86_64_64 + Addend: 27 + - Offset: 0x40614C + Symbol: .fini + Type: R_X86_64_64 + Addend: 8 + - Offset: 0x406154 + Symbol: .fini + Type: R_X86_64_64 + Addend: 13 + - Type: SectionHeaderTable + Sections: + - Name: .interp + - Name: .note.gnu.property + - Name: .note.gnu.build-id + - Name: .note.ABI-tag + - Name: .gnu.hash + - Name: .dynsym + - Name: .dynstr + - Name: .gnu.version + - Name: .gnu.version_r + - Name: .rela.dyn + - Name: .rela.plt + - Name: .init + - Name: .rela.init + - Name: .plt + - Name: .text + - Name: .rela.text + - Name: .fini + - Name: .rodata + - Name: .eh_frame_hdr + - Name: .eh_frame + - Name: .rela.eh_frame + - Name: .init_array + - Name: .rela.init_array + - Name: .fini_array + - Name: .rela.fini_array + - Name: .dynamic + - Name: .got + - Name: .got.plt + - Name: .data + - Name: .tm_clone_table + - Name: .bss + - Name: .comment + - Name: .gnu.build.attributes + - Name: .rela.gnu.build.attributes + - Name: .symtab + - Name: .strtab + - Name: .shstrtab +Symbols: + - Name: .interp + Type: STT_SECTION + Section: .interp + Value: 0x400318 + - Name: .note.gnu.property + Type: STT_SECTION + Section: .note.gnu.property + Value: 0x400338 + - Name: .note.gnu.build-id + Type: STT_SECTION + Section: .note.gnu.build-id + Value: 0x400358 + - Name: .note.ABI-tag + Type: STT_SECTION + Section: .note.ABI-tag + Value: 0x40037C + - Name: .gnu.hash + Type: STT_SECTION + Section: .gnu.hash + Value: 0x4003A0 + - Name: .dynsym + Type: STT_SECTION + Section: .dynsym + Value: 0x4003C0 + - Name: .dynstr + Type: STT_SECTION + Section: .dynstr + Value: 0x400450 + - Name: .gnu.version + Type: STT_SECTION + Section: .gnu.version + Value: 0x4004CE + - Name: .gnu.version_r + Type: STT_SECTION + Section: .gnu.version_r + Value: 0x4004E0 + - Name: .rela.dyn + Type: STT_SECTION + Section: .rela.dyn + Value: 0x400510 + - Name: .rela.plt + Type: STT_SECTION + Section: .rela.plt + Value: 0x400570 + - Name: .init + Type: STT_SECTION + Section: .init + Value: 0x401000 + - Name: .plt + Type: STT_SECTION + Section: .plt + Value: 0x401020 + - Name: .text + Type: STT_SECTION + Section: .text + Value: 0x401040 + - Name: .fini + Type: STT_SECTION + Section: .fini + Value: 0x4011A8 + - Name: .rodata + Type: STT_SECTION + Section: .rodata + Value: 0x402000 + - Name: .eh_frame_hdr + Type: STT_SECTION + Section: .eh_frame_hdr + Value: 0x402010 + - Name: .eh_frame + Type: STT_SECTION + Section: .eh_frame + Value: 0x402048 + - Name: .init_array + Type: STT_SECTION + Section: .init_array + Value: 0x403DE8 + - Name: .fini_array + Type: STT_SECTION + Section: .fini_array + Value: 0x403DF0 + - Name: .dynamic + Type: STT_SECTION + Section: .dynamic + Value: 0x403DF8 + - Name: .got + Type: STT_SECTION + Section: .got + Value: 0x403FC8 + - Name: .got.plt + Type: STT_SECTION + Section: .got.plt + Value: 0x403FE8 + - Name: .data + Type: STT_SECTION + Section: .data + Value: 0x404008 + - Name: .tm_clone_table + Type: STT_SECTION + Section: .tm_clone_table + Value: 0x404010 + - Name: .bss + Type: STT_SECTION + Section: .bss + Value: 0x404010 + - Name: .comment + Type: STT_SECTION + Section: .comment + - Name: .gnu.build.attributes + Type: STT_SECTION + Section: .gnu.build.attributes + Value: 0x406018 + - Name: crt1.o + Type: STT_FILE + Index: SHN_ABS + - Name: __abi_tag + Type: STT_OBJECT + Section: .note.ABI-tag + Value: 0x40037C + Size: 0x20 + - Name: crtstuff.c + Type: STT_FILE + Index: SHN_ABS + - Name: __TMC_LIST__ + Type: STT_OBJECT + Section: .tm_clone_table + Value: 0x404010 + - Name: deregister_tm_clones + Type: STT_FUNC + Section: .text + Value: 0x401080 + - Name: register_tm_clones + Type: STT_FUNC + Section: .text + Value: 0x4010B0 + - Name: __do_global_dtors_aux + Type: STT_FUNC + Section: .text + Value: 0x4010F0 + - Name: completed.0 + Type: STT_OBJECT + Section: .bss + Value: 0x404010 + Size: 0x1 + - Name: __do_global_dtors_aux_fini_array_entry + Type: STT_OBJECT + Section: .fini_array + Value: 0x403DF0 + - Name: frame_dummy + Type: STT_FUNC + Section: .text + Value: 0x401120 + - Name: __frame_dummy_init_array_entry + Type: STT_OBJECT + Section: .init_array + Value: 0x403DE8 + - Name: callcont-fallthru.c + Type: STT_FILE + Index: SHN_ABS + - Name: 'crtstuff.c (1)' + Type: STT_FILE + Index: SHN_ABS + - Name: __FRAME_END__ + Type: STT_OBJECT + Section: .eh_frame + Value: 0x4020F0 + - Type: STT_FILE + Index: SHN_ABS + - Name: _DYNAMIC + Type: STT_OBJECT + Section: .dynamic + Value: 0x403DF8 + - Name: __GNU_EH_FRAME_HDR + Section: .eh_frame_hdr + Value: 0x402010 + - Name: _GLOBAL_OFFSET_TABLE_ + Type: STT_OBJECT + Section: .got.plt + Value: 0x403FE8 + - Name: '__libc_start_main@GLIBC_2.34' + Type: STT_FUNC + Binding: STB_GLOBAL + - Name: _ITM_deregisterTMCloneTable + Binding: STB_WEAK + - Name: data_start + Section: .data + Binding: STB_WEAK + Value: 0x404008 + - Name: _edata + Section: .tm_clone_table + Binding: STB_GLOBAL + Value: 0x404010 + - Name: _fini + Type: STT_FUNC + Section: .fini + Binding: STB_GLOBAL + Value: 0x4011A8 + Other: [ STV_HIDDEN ] + - Name: __data_start + Section: .data + Binding: STB_GLOBAL + Value: 0x404008 + - Name: __gmon_start__ + Binding: STB_WEAK + - Name: __dso_handle + Type: STT_OBJECT + Section: .rodata + Binding: STB_GLOBAL + Value: 0x402008 + Other: [ STV_HIDDEN ] + - Name: _IO_stdin_used + Type: STT_OBJECT + Section: .rodata + Binding: STB_GLOBAL + Value: 0x402000 + Size: 0x4 + - Name: foo + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x401130 + Size: 0x6 + - Name: _end + Section: .bss + Binding: STB_GLOBAL + Value: 0x404018 + - Name: _dl_relocate_static_pie + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x401070 + Size: 0x5 + Other: [ STV_HIDDEN ] + - Name: _start + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x401040 + Size: 0x26 + - Name: __bss_start + Section: .bss + Binding: STB_GLOBAL + Value: 0x404010 + - Name: main + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x401140 + Size: 0x66 + - Name: 'atoi@GLIBC_2.2.5' + Type: STT_FUNC + Binding: STB_GLOBAL + - Name: __TMC_END__ + Type: STT_OBJECT + Section: .tm_clone_table + Binding: STB_GLOBAL + Value: 0x404010 + Other: [ STV_HIDDEN ] + - Name: _ITM_registerTMCloneTable + Binding: STB_WEAK + - Name: _init + Type: STT_FUNC + Section: .init + Binding: STB_GLOBAL + Value: 0x401000 + Other: [ STV_HIDDEN ] +DynamicSymbols: + - Name: __libc_start_main + Type: STT_FUNC + Binding: STB_GLOBAL + - Name: _ITM_deregisterTMCloneTable + Binding: STB_WEAK + - Name: __gmon_start__ + Binding: STB_WEAK + - Name: atoi + Type: STT_FUNC + Binding: STB_GLOBAL + - Name: _ITM_registerTMCloneTable + Binding: STB_WEAK +... diff --git a/bolt/test/X86/callcont-fallthru.test b/bolt/test/X86/callcont-fallthru.test new file mode 100644 index 0000000000000..8e43589e8f542 --- /dev/null +++ b/bolt/test/X86/callcont-fallthru.test @@ -0,0 +1,9 @@ +## Reproduces missing call continuation fallthrough count when using +## pre-aggregated perf data + +# RUN: yaml2obj %p/Inputs/callcont-fallthru.yaml > %t.exe +# RUN: llvm-bolt %t.exe --pa -p %p/Inputs/callcont-fallthru.preagg -o %t.out \ +# RUN: --print-cfg --print-only=main | FileCheck %s + +# CHECK: callq foo +# CHECK-NEXT: count: 0 From 5f48b9253844f970245a7e46c85bb5343b0efc4c Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Sat, 21 Sep 2024 10:04:42 -0700 Subject: [PATCH 02/10] repurpose for the fix of call cont discontinuity Created using spr 1.3.4 --- bolt/include/bolt/Profile/DataAggregator.h | 9 +- bolt/lib/Profile/DataAggregator.cpp | 105 ++++++++------------- bolt/test/X86/callcont-fallthru.test | 2 +- 3 files changed, 46 insertions(+), 70 deletions(-) diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 6453b3070ceb8..1e7695baab62c 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -202,8 +202,8 @@ class DataAggregator : public DataReader { /// Return a vector of offsets corresponding to a trace in a function /// if the trace is valid, std::nullopt otherwise. std::optional, 16>> - getFallthroughsInTrace(BinaryFunction &BF, const LBREntry &First, - const LBREntry &Second, uint64_t Count = 1) const; + getFallthroughsInTrace(BinaryFunction &BF, uint64_t From, uint64_t To, + uint64_t Count = 1) const; /// Record external entry into the function \p BF. /// @@ -268,9 +268,8 @@ class DataAggregator : public DataReader { /// Register a \p Branch. bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds); - /// Register a trace between two LBR entries supplied in execution order. - bool doTrace(const LBREntry &First, const LBREntry &Second, - uint64_t Count = 1); + /// Register a trace between two addresses. + bool doTrace(const uint64_t From, const uint64_t To, uint64_t Count = 1); /// Parser helpers /// Return false if we exhausted our parser buffer and finished parsing diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index fcde6f5f4642c..f73c966ec053a 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -804,9 +804,10 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, }; BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true); - // Ignore returns. + // Record returns as call->call continuation fall-through. if (IsReturn) - return true; + return doTrace(To - 1, To, Count); + BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false); if (!FromFunc && !ToFunc) return false; @@ -820,16 +821,24 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds); } -bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second, +bool DataAggregator::doTrace(const uint64_t From, const uint64_t To, uint64_t Count) { - BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To); - BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From); + BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From); + BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To); if (!FromFunc || !ToFunc) { LLVM_DEBUG({ - dbgs() << "Out of range trace starting in " << FromFunc->getPrintName() - << formatv(" @ {0:x}", First.To - FromFunc->getAddress()) - << " and ending in " << ToFunc->getPrintName() - << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress()); + dbgs() << "Out of range trace starting in "; + if (FromFunc) + dbgs() << formatv("{0} @ {1:x}", *FromFunc, + From - FromFunc->getAddress()); + else + dbgs() << Twine::utohexstr(From); + dbgs() << " and ending in "; + if (ToFunc) + dbgs() << formatv("{0} @ {1:x}", *ToFunc, To - ToFunc->getAddress()); + else + dbgs() << Twine::utohexstr(To); + dbgs() << '\n'; }); NumLongRangeTraces += Count; return false; @@ -838,32 +847,30 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second, NumInvalidTraces += Count; LLVM_DEBUG({ dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() - << formatv(" @ {0:x}", First.To - FromFunc->getAddress()) + << formatv(" @ {0:x}", From - FromFunc->getAddress()) << " and ending in " << ToFunc->getPrintName() - << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress()); + << formatv(" @ {0:x}\n", To - ToFunc->getAddress()); }); return false; } std::optional FTs = - BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To, - Second.From) - : getFallthroughsInTrace(*FromFunc, First, Second, Count); + BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), From, To) + : getFallthroughsInTrace(*FromFunc, From, To, Count); if (!FTs) { LLVM_DEBUG( dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() - << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) + << " @ " << Twine::utohexstr(From - FromFunc->getAddress()) << " and ending in " << ToFunc->getPrintName() << " @ " << ToFunc->getPrintName() << " @ " - << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); + << Twine::utohexstr(To - ToFunc->getAddress()) << '\n'); NumInvalidTraces += Count; return false; } LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for " - << FromFunc->getPrintName() << ":" - << Twine::utohexstr(First.To) << " to " - << Twine::utohexstr(Second.From) << ".\n"); + << FromFunc->getPrintName() << ":" << Twine::utohexstr(From) + << " to " << Twine::utohexstr(To) << ".\n"); BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc); for (auto [From, To] : *FTs) { if (BAT) { @@ -877,10 +884,8 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second, } std::optional, 16>> -DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, - const LBREntry &FirstLBR, - const LBREntry &SecondLBR, - uint64_t Count) const { +DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, uint64_t From, + uint64_t To, uint64_t Count) const { SmallVector, 16> Branches; BinaryContext &BC = BF.getBinaryContext(); @@ -891,8 +896,8 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, assert(BF.hasCFG() && "can only record traces in CFG state"); // Offsets of the trace within this function. - const uint64_t From = FirstLBR.To - BF.getAddress(); - const uint64_t To = SecondLBR.From - BF.getAddress(); + From = From - BF.getAddress(); + To = To - BF.getAddress(); if (From > To) return std::nullopt; @@ -903,24 +908,6 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, if (!FromBB || !ToBB) return std::nullopt; - // Adjust FromBB if the first LBR is a return from the last instruction in - // the previous block (that instruction should be a call). - if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) && - !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { - const BinaryBasicBlock *PrevBB = - BF.getLayout().getBlock(FromBB->getIndex() - 1); - if (PrevBB->getSuccessor(FromBB->getLabel())) { - const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); - if (Instr && BC.MIB->isCall(*Instr)) - FromBB = PrevBB; - else - LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR - << '\n'); - } else { - LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n'); - } - } - // Fill out information for fall-through edges. The From and To could be // within the same basic block, e.g. when two call instructions are in the // same block. In this case we skip the processing. @@ -937,8 +924,8 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, // Check for bad LBRs. if (!BB->getSuccessor(NextBB->getLabel())) { LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n" - << " " << FirstLBR << '\n' - << " " << SecondLBR << '\n'); + << " " << From << '\n' + << " " << To << '\n'); return std::nullopt; } @@ -1595,16 +1582,11 @@ void DataAggregator::processBranchEvents() { NamedRegionTimer T("processBranch", "Processing branch events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); - for (const auto &AggrLBR : FallthroughLBRs) { - const Trace &Loc = AggrLBR.first; - const FTInfo &Info = AggrLBR.second; - LBREntry First{Loc.From, Loc.From, false}; - LBREntry Second{Loc.To, Loc.To, false}; + for (const auto &[Loc, Info]: FallthroughLBRs) { if (Info.InternCount) - doTrace(First, Second, Info.InternCount); + doTrace(Loc.From, Loc.To, Info.InternCount); if (Info.ExternCount) { - First.From = 0; - doTrace(First, Second, Info.ExternCount); + doTrace(0, Loc.To, Info.ExternCount); } } @@ -1768,21 +1750,16 @@ void DataAggregator::processPreAggregated() { TimerGroupName, TimerGroupDesc, opts::TimeAggregator); uint64_t NumTraces = 0; - for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) { - switch (AggrEntry.EntryType) { + for (const auto &[From, To, Count, Mispreds, Type]: AggregatedLBRs) { + bool IsExternalOrigin = Type == AggregatedLBREntry::FT_EXTERNAL_ORIGIN; + switch (Type) { case AggregatedLBREntry::BRANCH: - doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count, - AggrEntry.Mispreds); + doBranch(From.Offset, To.Offset, Count, Mispreds); break; case AggregatedLBREntry::FT: case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: { - LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT - ? AggrEntry.From.Offset - : 0, - AggrEntry.From.Offset, false}; - LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false}; - doTrace(First, Second, AggrEntry.Count); - NumTraces += AggrEntry.Count; + doTrace(IsExternalOrigin ? 0 : From.Offset, To.Offset, Count); + NumTraces += Count; break; } } diff --git a/bolt/test/X86/callcont-fallthru.test b/bolt/test/X86/callcont-fallthru.test index 8e43589e8f542..e0a5c5a6852d4 100644 --- a/bolt/test/X86/callcont-fallthru.test +++ b/bolt/test/X86/callcont-fallthru.test @@ -6,4 +6,4 @@ # RUN: --print-cfg --print-only=main | FileCheck %s # CHECK: callq foo -# CHECK-NEXT: count: 0 +# CHECK-NEXT: count: 103204 From 97412974e7e470c00b232aed69f139d28ce97e52 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Sat, 21 Sep 2024 10:09:26 -0700 Subject: [PATCH 03/10] clang-format Created using spr 1.3.4 --- bolt/lib/Profile/DataAggregator.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index f73c966ec053a..dbd0ed07c7c1d 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -858,12 +858,12 @@ bool DataAggregator::doTrace(const uint64_t From, const uint64_t To, BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), From, To) : getFallthroughsInTrace(*FromFunc, From, To, Count); if (!FTs) { - LLVM_DEBUG( - dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() - << " @ " << Twine::utohexstr(From - FromFunc->getAddress()) - << " and ending in " << ToFunc->getPrintName() << " @ " - << ToFunc->getPrintName() << " @ " - << Twine::utohexstr(To - ToFunc->getAddress()) << '\n'); + LLVM_DEBUG(dbgs() << "Invalid trace starting in " + << FromFunc->getPrintName() << " @ " + << Twine::utohexstr(From - FromFunc->getAddress()) + << " and ending in " << ToFunc->getPrintName() << " @ " + << ToFunc->getPrintName() << " @ " + << Twine::utohexstr(To - ToFunc->getAddress()) << '\n'); NumInvalidTraces += Count; return false; } @@ -1582,7 +1582,7 @@ void DataAggregator::processBranchEvents() { NamedRegionTimer T("processBranch", "Processing branch events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); - for (const auto &[Loc, Info]: FallthroughLBRs) { + for (const auto &[Loc, Info] : FallthroughLBRs) { if (Info.InternCount) doTrace(Loc.From, Loc.To, Info.InternCount); if (Info.ExternCount) { @@ -1750,7 +1750,7 @@ void DataAggregator::processPreAggregated() { TimerGroupName, TimerGroupDesc, opts::TimeAggregator); uint64_t NumTraces = 0; - for (const auto &[From, To, Count, Mispreds, Type]: AggregatedLBRs) { + for (const auto &[From, To, Count, Mispreds, Type] : AggregatedLBRs) { bool IsExternalOrigin = Type == AggregatedLBREntry::FT_EXTERNAL_ORIGIN; switch (Type) { case AggregatedLBREntry::BRANCH: From 9c4effa15a5bfa7a8c03aad25421a462bb56ffaf Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Sat, 21 Sep 2024 19:08:41 -0700 Subject: [PATCH 04/10] Drop changes in doTrace/getFallthroughsInTrace Created using spr 1.3.4 --- bolt/include/bolt/Profile/DataAggregator.h | 9 +- bolt/lib/Profile/DataAggregator.cpp | 108 ++++++++++++++------- 2 files changed, 77 insertions(+), 40 deletions(-) diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 1e7695baab62c..6453b3070ceb8 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -202,8 +202,8 @@ class DataAggregator : public DataReader { /// Return a vector of offsets corresponding to a trace in a function /// if the trace is valid, std::nullopt otherwise. std::optional, 16>> - getFallthroughsInTrace(BinaryFunction &BF, uint64_t From, uint64_t To, - uint64_t Count = 1) const; + getFallthroughsInTrace(BinaryFunction &BF, const LBREntry &First, + const LBREntry &Second, uint64_t Count = 1) const; /// Record external entry into the function \p BF. /// @@ -268,8 +268,9 @@ class DataAggregator : public DataReader { /// Register a \p Branch. bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds); - /// Register a trace between two addresses. - bool doTrace(const uint64_t From, const uint64_t To, uint64_t Count = 1); + /// Register a trace between two LBR entries supplied in execution order. + bool doTrace(const LBREntry &First, const LBREntry &Second, + uint64_t Count = 1); /// Parser helpers /// Return false if we exhausted our parser buffer and finished parsing diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index dbd0ed07c7c1d..72905d0ecf6a0 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -805,8 +805,11 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true); // Record returns as call->call continuation fall-through. - if (IsReturn) - return doTrace(To - 1, To, Count); + if (IsReturn) { + LBREntry First{To - 1, To - 1, false}; + LBREntry Second{To, To, false}; + return doTrace(First, Second, Count); + } BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false); if (!FromFunc && !ToFunc) @@ -821,23 +824,24 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds); } -bool DataAggregator::doTrace(const uint64_t From, const uint64_t To, +bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second, uint64_t Count) { - BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From); - BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To); + BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To); + BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From); if (!FromFunc || !ToFunc) { LLVM_DEBUG({ dbgs() << "Out of range trace starting in "; if (FromFunc) dbgs() << formatv("{0} @ {1:x}", *FromFunc, - From - FromFunc->getAddress()); + First.To - FromFunc->getAddress()); else - dbgs() << Twine::utohexstr(From); + dbgs() << Twine::utohexstr(First.To); dbgs() << " and ending in "; if (ToFunc) - dbgs() << formatv("{0} @ {1:x}", *ToFunc, To - ToFunc->getAddress()); + dbgs() << formatv("{0} @ {1:x}", *ToFunc, + Second.From - ToFunc->getAddress()); else - dbgs() << Twine::utohexstr(To); + dbgs() << Twine::utohexstr(Second.From); dbgs() << '\n'; }); NumLongRangeTraces += Count; @@ -847,30 +851,32 @@ bool DataAggregator::doTrace(const uint64_t From, const uint64_t To, NumInvalidTraces += Count; LLVM_DEBUG({ dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() - << formatv(" @ {0:x}", From - FromFunc->getAddress()) + << formatv(" @ {0:x}", First.To - FromFunc->getAddress()) << " and ending in " << ToFunc->getPrintName() - << formatv(" @ {0:x}\n", To - ToFunc->getAddress()); + << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress()); }); return false; } std::optional FTs = - BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), From, To) - : getFallthroughsInTrace(*FromFunc, From, To, Count); + BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To, + Second.From) + : getFallthroughsInTrace(*FromFunc, First, Second, Count); if (!FTs) { - LLVM_DEBUG(dbgs() << "Invalid trace starting in " - << FromFunc->getPrintName() << " @ " - << Twine::utohexstr(From - FromFunc->getAddress()) - << " and ending in " << ToFunc->getPrintName() << " @ " - << ToFunc->getPrintName() << " @ " - << Twine::utohexstr(To - ToFunc->getAddress()) << '\n'); + LLVM_DEBUG( + dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() + << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) + << " and ending in " << ToFunc->getPrintName() << " @ " + << ToFunc->getPrintName() << " @ " + << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); NumInvalidTraces += Count; return false; } LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for " - << FromFunc->getPrintName() << ":" << Twine::utohexstr(From) - << " to " << Twine::utohexstr(To) << ".\n"); + << FromFunc->getPrintName() << ":" + << Twine::utohexstr(First.To) << " to " + << Twine::utohexstr(Second.From) << ".\n"); BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc); for (auto [From, To] : *FTs) { if (BAT) { @@ -884,8 +890,10 @@ bool DataAggregator::doTrace(const uint64_t From, const uint64_t To, } std::optional, 16>> -DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, uint64_t From, - uint64_t To, uint64_t Count) const { +DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, + const LBREntry &FirstLBR, + const LBREntry &SecondLBR, + uint64_t Count) const { SmallVector, 16> Branches; BinaryContext &BC = BF.getBinaryContext(); @@ -896,8 +904,8 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, uint64_t From, assert(BF.hasCFG() && "can only record traces in CFG state"); // Offsets of the trace within this function. - From = From - BF.getAddress(); - To = To - BF.getAddress(); + const uint64_t From = FirstLBR.To - BF.getAddress(); + const uint64_t To = SecondLBR.From - BF.getAddress(); if (From > To) return std::nullopt; @@ -908,6 +916,24 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, uint64_t From, if (!FromBB || !ToBB) return std::nullopt; + // Adjust FromBB if the first LBR is a return from the last instruction in + // the previous block (that instruction should be a call). + if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) && + !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { + const BinaryBasicBlock *PrevBB = + BF.getLayout().getBlock(FromBB->getIndex() - 1); + if (PrevBB->getSuccessor(FromBB->getLabel())) { + const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); + if (Instr && BC.MIB->isCall(*Instr)) + FromBB = PrevBB; + else + LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR + << '\n'); + } else { + LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n'); + } + } + // Fill out information for fall-through edges. The From and To could be // within the same basic block, e.g. when two call instructions are in the // same block. In this case we skip the processing. @@ -924,8 +950,8 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, uint64_t From, // Check for bad LBRs. if (!BB->getSuccessor(NextBB->getLabel())) { LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n" - << " " << From << '\n' - << " " << To << '\n'); + << " " << FirstLBR << '\n' + << " " << SecondLBR << '\n'); return std::nullopt; } @@ -1582,11 +1608,16 @@ void DataAggregator::processBranchEvents() { NamedRegionTimer T("processBranch", "Processing branch events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); - for (const auto &[Loc, Info] : FallthroughLBRs) { + for (const auto &AggrLBR : FallthroughLBRs) { + const Trace &Loc = AggrLBR.first; + const FTInfo &Info = AggrLBR.second; + LBREntry First{Loc.From, Loc.From, false}; + LBREntry Second{Loc.To, Loc.To, false}; if (Info.InternCount) - doTrace(Loc.From, Loc.To, Info.InternCount); + doTrace(First, Second, Info.InternCount); if (Info.ExternCount) { - doTrace(0, Loc.To, Info.ExternCount); + First.From = 0; + doTrace(First, Second, Info.ExternCount); } } @@ -1750,16 +1781,21 @@ void DataAggregator::processPreAggregated() { TimerGroupName, TimerGroupDesc, opts::TimeAggregator); uint64_t NumTraces = 0; - for (const auto &[From, To, Count, Mispreds, Type] : AggregatedLBRs) { - bool IsExternalOrigin = Type == AggregatedLBREntry::FT_EXTERNAL_ORIGIN; - switch (Type) { + for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) { + switch (AggrEntry.EntryType) { case AggregatedLBREntry::BRANCH: - doBranch(From.Offset, To.Offset, Count, Mispreds); + doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count, + AggrEntry.Mispreds); break; case AggregatedLBREntry::FT: case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: { - doTrace(IsExternalOrigin ? 0 : From.Offset, To.Offset, Count); - NumTraces += Count; + LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT + ? AggrEntry.From.Offset + : 0, + AggrEntry.From.Offset, false}; + LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false}; + doTrace(First, Second, AggrEntry.Count); + NumTraces += AggrEntry.Count; break; } } From 9e4dd66c4669fd1a374d35b7522fde864e9b2efb Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Tue, 1 Oct 2024 17:19:07 -0700 Subject: [PATCH 05/10] Handle external origin LBR (non-BAT mode) Created using spr 1.3.4 --- bolt/lib/Profile/DataAggregator.cpp | 122 ++- bolt/test/X86/Inputs/callcont-fallthru.preagg | 21 - bolt/test/X86/Inputs/callcont-fallthru.yaml | 889 ------------------ bolt/test/X86/callcont-fallthru.s | 60 ++ bolt/test/X86/callcont-fallthru.test | 9 - 5 files changed, 132 insertions(+), 969 deletions(-) delete mode 100644 bolt/test/X86/Inputs/callcont-fallthru.preagg delete mode 100644 bolt/test/X86/Inputs/callcont-fallthru.yaml create mode 100644 bolt/test/X86/callcont-fallthru.s delete mode 100644 bolt/test/X86/callcont-fallthru.test diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 72905d0ecf6a0..fe371ef3ca105 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -775,46 +775,86 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc, bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds) { - bool IsReturn = false; - auto handleAddress = [&](uint64_t &Addr, bool IsFrom) -> BinaryFunction * { - if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr)) { - Addr -= Func->getAddress(); - if (IsFrom) { - auto checkReturn = [&](auto MaybeInst) { - IsReturn = MaybeInst && BC->MIB->isReturn(*MaybeInst); - }; - if (Func->hasInstructions()) - checkReturn(Func->getInstructionAtOffset(Addr)); - else - checkReturn(Func->disassembleInstructionAtOffset(Addr)); - } + // Returns whether \p Offset in \p Func contains a return instruction. + auto checkReturn = [&](const BinaryFunction &Func, const uint64_t Offset) { + auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); }; + return Func.hasInstructions() + ? isReturn(Func.getInstructionAtOffset(Offset)) + : isReturn(Func.disassembleInstructionAtOffset(Offset)); + }; - if (BAT) - Addr = BAT->translate(Func->getAddress(), Addr, IsFrom); + // Returns whether \p Offset in \p Func corresponds to a call continuation + // fallthrough block. + auto checkCallCont = [&](BinaryFunction &Func, const uint64_t Offset) { + // Note the use of MCInstrAnalysis: no call continuation for a tail call. + auto isCall = [&](auto MI) { return MI && BC->MIA->isCall(*MI); }; + + // No call continuation at a function start. + if (!Offset) + return false; + + // FIXME: support BAT case where the function might be in empty state + // (split fragments declared non-simple). + if (!Func.hasCFG()) + return false; + + // The offset should not be an entry point or a landing pad. + const BinaryBasicBlock *ContBB = Func.getBasicBlockAtOffset(Offset); + if (!ContBB || ContBB->isEntryPoint() || ContBB->isLandingPad()) + return false; + + // Check that preceding instruction is a call. + const BinaryBasicBlock *CallBB = + Func.getBasicBlockContainingOffset(Offset - 1); + if (!CallBB || CallBB == ContBB) + return false; + return isCall(CallBB->getLastNonPseudoInstr()); + }; - if (BinaryFunction *ParentFunc = getBATParentFunction(*Func)) { - Func = ParentFunc; - if (IsFrom) - NumColdSamples += Count; - } + // Mutates \p Addr to an offset into the containing function, performing BAT + // offset translation and parent lookup. + // + // Returns the containing function (or BAT parent) and whether the address + // corresponds to a return (if \p IsFrom) or a call continuation (otherwise). + auto handleAddress = [&](uint64_t &Addr, bool IsFrom) { + BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr); + if (!Func) + return std::pair{Func, false}; - return Func; - } - return nullptr; - }; + Addr -= Func->getAddress(); - BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true); - // Record returns as call->call continuation fall-through. - if (IsReturn) { - LBREntry First{To - 1, To - 1, false}; - LBREntry Second{To, To, false}; - return doTrace(First, Second, Count); - } + bool IsRetOrCallCont = + IsFrom ? checkReturn(*Func, Addr) : checkCallCont(*Func, Addr); + + if (BAT) + Addr = BAT->translate(Func->getAddress(), Addr, IsFrom); + + BinaryFunction *ParentFunc = getBATParentFunction(*Func); + if (!ParentFunc) + return std::pair{Func, IsRetOrCallCont}; - BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false); + if (IsFrom) + NumColdSamples += Count; + + return std::pair{ParentFunc, IsRetOrCallCont}; + }; + + uint64_t ToOrig = To; + auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom=*/true); + auto [ToFunc, IsCallCont] = handleAddress(To, /*IsFrom=*/false); if (!FromFunc && !ToFunc) return false; + // Record call to continuation trace. + if (IsCallCont && FromFunc != ToFunc) { + LBREntry First{ToOrig - 1, ToOrig - 1, false}; + LBREntry Second{ToOrig, ToOrig, false}; + return doTrace(First, Second, Count); + } + // Ignore returns. + if (IsReturn) + return true; + // Treat recursive control transfers as inter-branches. if (FromFunc == ToFunc && To != 0) { recordBranch(*FromFunc, From, To, Count, Mispreds); @@ -916,24 +956,6 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, if (!FromBB || !ToBB) return std::nullopt; - // Adjust FromBB if the first LBR is a return from the last instruction in - // the previous block (that instruction should be a call). - if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) && - !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { - const BinaryBasicBlock *PrevBB = - BF.getLayout().getBlock(FromBB->getIndex() - 1); - if (PrevBB->getSuccessor(FromBB->getLabel())) { - const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); - if (Instr && BC.MIB->isCall(*Instr)) - FromBB = PrevBB; - else - LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR - << '\n'); - } else { - LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n'); - } - } - // Fill out information for fall-through edges. The From and To could be // within the same basic block, e.g. when two call instructions are in the // same block. In this case we skip the processing. diff --git a/bolt/test/X86/Inputs/callcont-fallthru.preagg b/bolt/test/X86/Inputs/callcont-fallthru.preagg deleted file mode 100644 index 0b5f344540573..0000000000000 --- a/bolt/test/X86/Inputs/callcont-fallthru.preagg +++ /dev/null @@ -1,21 +0,0 @@ -B ffffffff81e01006 401194 8 0 -B 401180 401199 98482 96 -B 401199 401166 99542 0 -B 401177 401130 102776 0 -B 401135 40117c 103204 0 -B 401186 40118b 1022983 0 -B 401194 40117c 1021645 1 -F 40117c 401135 1161 -F 40117c 401180 92267 -F 40118b 401194 991002 -F 40117c 401186 968072 -F 40118b 401186 11468 -F 401130 401135 100015 -F 401166 401177 96992 -F 401199 401199 96168 -F 40117c ffffffff81e01006 7 -F 401199 401180 1140 -F 401194 ffffffff81e01006 1 -F 40117c 401194 11522 -F 401166 401199 1151 -F 401130 401177 1154 diff --git a/bolt/test/X86/Inputs/callcont-fallthru.yaml b/bolt/test/X86/Inputs/callcont-fallthru.yaml deleted file mode 100644 index a1f8417d1e217..0000000000000 --- a/bolt/test/X86/Inputs/callcont-fallthru.yaml +++ /dev/null @@ -1,889 +0,0 @@ ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC - Machine: EM_X86_64 - Entry: 0x401040 -ProgramHeaders: - - Type: PT_PHDR - Flags: [ PF_R ] - VAddr: 0x400040 - Align: 0x8 - - Type: PT_INTERP - Flags: [ PF_R ] - FirstSec: .interp - LastSec: .interp - VAddr: 0x400318 - - Type: PT_LOAD - Flags: [ PF_R ] - FirstSec: .interp - LastSec: .rela.plt - VAddr: 0x400000 - Align: 0x1000 - - Type: PT_LOAD - Flags: [ PF_X, PF_R ] - FirstSec: .init - LastSec: .fini - VAddr: 0x401000 - Align: 0x1000 - - Type: PT_LOAD - Flags: [ PF_R ] - FirstSec: .rodata - LastSec: .eh_frame - VAddr: 0x402000 - Align: 0x1000 - - Type: PT_LOAD - Flags: [ PF_W, PF_R ] - FirstSec: .init_array - LastSec: .bss - VAddr: 0x403DE8 - Align: 0x1000 - - Type: PT_DYNAMIC - Flags: [ PF_W, PF_R ] - FirstSec: .dynamic - LastSec: .dynamic - VAddr: 0x403DF8 - Align: 0x8 - - Type: PT_NOTE - Flags: [ PF_R ] - FirstSec: .note.gnu.property - LastSec: .note.gnu.property - VAddr: 0x400338 - Align: 0x8 - - Type: PT_NOTE - Flags: [ PF_R ] - FirstSec: .note.gnu.build-id - LastSec: .note.ABI-tag - VAddr: 0x400358 - Align: 0x4 - - Type: PT_GNU_PROPERTY - Flags: [ PF_R ] - FirstSec: .note.gnu.property - LastSec: .note.gnu.property - VAddr: 0x400338 - Align: 0x8 - - Type: PT_GNU_EH_FRAME - Flags: [ PF_R ] - FirstSec: .eh_frame_hdr - LastSec: .eh_frame_hdr - VAddr: 0x402010 - Align: 0x4 - - Type: PT_GNU_STACK - Flags: [ PF_W, PF_R ] - Align: 0x10 - - Type: PT_GNU_RELRO - Flags: [ PF_R ] - FirstSec: .init_array - LastSec: .got - VAddr: 0x403DE8 -Sections: - - Name: .interp - Type: SHT_PROGBITS - Flags: [ SHF_ALLOC ] - Address: 0x400318 - AddressAlign: 0x1 - Content: 2F6C696236342F6C642D6C696E75782D7838362D36342E736F2E3200 - - Name: .note.gnu.property - Type: SHT_NOTE - Flags: [ SHF_ALLOC ] - Address: 0x400338 - AddressAlign: 0x8 - Notes: - - Name: GNU - Desc: 028000C0040000000300000000000000 - Type: NT_GNU_PROPERTY_TYPE_0 - - Name: .note.gnu.build-id - Type: SHT_NOTE - Flags: [ SHF_ALLOC ] - Address: 0x400358 - AddressAlign: 0x4 - Notes: - - Name: GNU - Desc: A77EA471B9AAA21E180E5FD02A0A0B2E4AB643E9 - Type: NT_PRPSINFO - - Name: .note.ABI-tag - Type: SHT_NOTE - Flags: [ SHF_ALLOC ] - Address: 0x40037C - AddressAlign: 0x4 - Notes: - - Name: GNU - Desc: '00000000030000000200000000000000' - Type: NT_VERSION - - Name: .gnu.hash - Type: SHT_GNU_HASH - Flags: [ SHF_ALLOC ] - Address: 0x4003A0 - Link: .dynsym - AddressAlign: 0x8 - Header: - SymNdx: 0x1 - Shift2: 0x0 - BloomFilter: [ 0x0 ] - HashBuckets: [ 0x0 ] - HashValues: [ ] - - Name: .dynsym - Type: SHT_DYNSYM - Flags: [ SHF_ALLOC ] - Address: 0x4003C0 - Link: .dynstr - AddressAlign: 0x8 - - Name: .dynstr - Type: SHT_STRTAB - Flags: [ SHF_ALLOC ] - Address: 0x400450 - AddressAlign: 0x1 - - Name: .gnu.version - Type: SHT_GNU_versym - Flags: [ SHF_ALLOC ] - Address: 0x4004CE - Link: .dynsym - AddressAlign: 0x2 - Entries: [ 0, 2, 1, 1, 3, 1 ] - - Name: .gnu.version_r - Type: SHT_GNU_verneed - Flags: [ SHF_ALLOC ] - Address: 0x4004E0 - Link: .dynstr - AddressAlign: 0x8 - Dependencies: - - Version: 1 - File: libc.so.6 - Entries: - - Name: GLIBC_2.2.5 - Hash: 157882997 - Flags: 0 - Other: 3 - - Name: GLIBC_2.34 - Hash: 110530996 - Flags: 0 - Other: 2 - - Name: .rela.dyn - Type: SHT_RELA - Flags: [ SHF_ALLOC ] - Address: 0x400510 - Link: .dynsym - AddressAlign: 0x8 - Relocations: - - Offset: 0x403FC8 - Symbol: __libc_start_main - Type: R_X86_64_GLOB_DAT - - Offset: 0x403FD0 - Symbol: _ITM_deregisterTMCloneTable - Type: R_X86_64_GLOB_DAT - - Offset: 0x403FD8 - Symbol: __gmon_start__ - Type: R_X86_64_GLOB_DAT - - Offset: 0x403FE0 - Symbol: _ITM_registerTMCloneTable - Type: R_X86_64_GLOB_DAT - - Name: .rela.plt - Type: SHT_RELA - Flags: [ SHF_ALLOC, SHF_INFO_LINK ] - Address: 0x400570 - Link: .dynsym - AddressAlign: 0x8 - Info: .got.plt - Relocations: - - Offset: 0x404000 - Symbol: atoi - Type: R_X86_64_JUMP_SLOT - - Name: .init - Type: SHT_PROGBITS - Flags: [ SHF_ALLOC, SHF_EXECINSTR ] - Address: 0x401000 - AddressAlign: 0x4 - Offset: 0x1000 - Content: F30F1EFA4883EC08488B05C92F00004885C07402FFD04883C408C3 - - Name: .plt - Type: SHT_PROGBITS - Flags: [ SHF_ALLOC, SHF_EXECINSTR ] - Address: 0x401020 - AddressAlign: 0x10 - EntSize: 0x10 - Content: FF35CA2F0000FF25CC2F00000F1F4000FF25CA2F00006800000000E9E0FFFFFF - - Name: .text - Type: SHT_PROGBITS - Flags: [ SHF_ALLOC, SHF_EXECINSTR ] - Address: 0x401040 - AddressAlign: 0x10 - Contentame: .fini - Type: SHT_PROGBITS - Flags: [ SHF_ALLOC, SHF_EXECINSTR ] - Address: 0x4011A8 - AddressAlign: 0x4 - Content: F30F1EFA4883EC084883C408C3 - - Name: .rodata - Type: SHT_PROGBITS - Flags: [ SHF_ALLOC ] - Address: 0x402000 - AddressAlign: 0x8 - Offset: 0x2000 - Content: '01000200000000000000000000000000' - - Name: .eh_frame_hdr - Type: SHT_PROGBITS - Flags: [ SHF_ALLOC ] - Address: 0x402010 - AddressAlign: 0x4 - Content: 011B033B340000000500000010F0FFFF7800000030F0FFFF5000000060F0FFFF6400000020F1FFFFA000000030F1FFFFC0000000 - - Name: .eh_frame - Type: SHT_PROGBITS - Flags: [ SHF_ALLOC ] - Address: 0x402048 - AddressAlign: 0x8 - Content: 1400000000000000017A5200017810011B0C070890010000100000001C000000D8EFFFFF26000000004407101000000030000000F4EFFFFF0500000000000000240000004400000090EFFFFF20000000000E10460E184A0F0B770880003F1A3B2A332422000000001C0000006C00000078F0FFFF0600000000410E108602430D06410C07080000001C0000008C00000068F0FFFF6600000000410E108602430D0602610C0708000000000000 - - Name: .init_array - Type: SHT_INIT_ARRAY - Flags: [ SHF_WRITE, SHF_ALLOC ] - Address: 0x403DE8 - AddressAlign: 0x8 - EntSize: 0x8 - Offset: 0x2DE8 - Content: '2011400000000000' - - Name: .fini_array - Type: SHT_FINI_ARRAY - Flags: [ SHF_WRITE, SHF_ALLOC ] - Address: 0x403DF0 - AddressAlign: 0x8 - EntSize: 0x8 - Content: F010400000000000 - - Name: .dynamic - Type: SHT_DYNAMIC - Flags: [ SHF_WRITE, SHF_ALLOC ] - Address: 0x403DF8 - Link: .dynstr - AddressAlign: 0x8 - Entries: - - Tag: DT_NEEDED - Value: 0x18 - - Tag: DT_INIT - Value: 0x401000 - - Tag: DT_FINI - Value: 0x4011A8 - - Tag: DT_INIT_ARRAY - Value: 0x403DE8 - - Tag: DT_INIT_ARRAYSZ - Value: 0x8 - - Tag: DT_FINI_ARRAY - Value: 0x403DF0 - - Tag: DT_FINI_ARRAYSZ - Value: 0x8 - - Tag: DT_GNU_HASH - Value: 0x4003A0 - - Tag: DT_STRTAB - Value: 0x400450 - - Tag: DT_SYMTAB - Value: 0x4003C0 - - Tag: DT_STRSZ - Value: 0x7E - - Tag: DT_SYMENT - Value: 0x18 - - Tag: DT_DEBUG - Value: 0x0 - - Tag: DT_PLTGOT - Value: 0x403FE8 - - Tag: DT_PLTRELSZ - Value: 0x18 - - Tag: DT_PLTREL - Value: 0x7 - - Tag: DT_JMPREL - Value: 0x400570 - - Tag: DT_RELA - Value: 0x400510 - - Tag: DT_RELASZ - Value: 0x60 - - Tag: DT_RELAENT - Value: 0x18 - - Tag: DT_VERNEED - Value: 0x4004E0 - - Tag: DT_VERNEEDNUM - Value: 0x1 - - Tag: DT_VERSYM - Value: 0x4004CE - - Tag: DT_NULL - Value: 0x0 - - Tag: DT_NULL - Value: 0x0 - - Tag: DT_NULL - Value: 0x0 - - Tag: DT_NULL - Value: 0x0 - - Tag: DT_NULL - Value: 0x0 - - Tag: DT_NULL - Value: 0x0 - - Name: .got - Type: SHT_PROGBITS - Flags: [ SHF_WRITE, SHF_ALLOC ] - Address: 0x403FC8 - AddressAlign: 0x8 - EntSize: 0x8 - Content: '0000000000000000000000000000000000000000000000000000000000000000' - - Name: .got.plt - Type: SHT_PROGBITS - Flags: [ SHF_WRITE, SHF_ALLOC ] - Address: 0x403FE8 - AddressAlign: 0x8 - EntSize: 0x8 - Content: F83D400000000000000000000000000000000000000000003610400000000000 - - Name: .data - Type: SHT_PROGBITS - Flags: [ SHF_WRITE, SHF_ALLOC ] - Address: 0x404008 - AddressAlign: 0x1 - Content: '00000000' - - Name: .tm_clone_table - Type: SHT_PROGBITS - Flags: [ SHF_WRITE, SHF_ALLOC ] - Address: 0x404010 - AddressAlign: 0x8 - - Name: .bss - Type: SHT_NOBITS - Flags: [ SHF_WRITE, SHF_ALLOC ] - Address: 0x404010 - AddressAlign: 0x1 - Size: 0x8 - - Name: .comment - Type: SHT_PROGBITS - Flags: [ SHF_MERGE, SHF_STRINGS ] - AddressAlign: 0x1 - EntSize: 0x1 - Content: 4743433A2028474E55292031312E352E302032303234303731392028526564204861742031312E352E302D3229004743433A2028474E55292031332E332E312032303234303631312028526564204861742031332E332E312D322900636C616E672076657273696F6E2031382E312E38202843656E744F532031382E312E382D332E656C392900 - - Name: .gnu.build.attributes - Type: SHT_NOTE - Address: 0x406018 - AddressAlign: 0x4 - Notes: - - Name: "GA$\x013a1" - Desc: '40104000000000006610400000000000' - Type: NT_GNU_BUILD_ATTRIBUTE_OPEN - - Name: "GA$\x013a1" - Desc: '75104000000000007510400000000000' - Type: NT_GNU_BUILD_ATTRIBUTE_OPEN - - Name: "GA$\x013a1" - Desc: '00104000000000001610400000000000' - Type: NT_GNU_BUILD_ATTRIBUTE_OPEN - - Name: "GA$\x013a1" - Desc: A811400000000000B011400000000000 - Type: NT_GNU_BUILD_ATTRIBUTE_OPEN - - Name: "GA$\x013a1" - Desc: '80104000000000002611400000000000' - Type: NT_GNU_BUILD_ATTRIBUTE_OPEN - - Name: "GA$\x013a1" - Desc: A611400000000000A611400000000000 - Type: NT_GNU_BUILD_ATTRIBUTE_OPEN - - Name: "GA$\x013a1" - Desc: A611400000000000A611400000000000 - Type: NT_GNU_BUILD_ATTRIBUTE_OPEN - - Name: "GA$\x013a1" - Desc: 16104000000000001B10400000000000 - Type: NT_GNU_BUILD_ATTRIBUTE_OPEN - - Name: "GA$\x013a1" - Desc: B011400000000000B511400000000000 - Type: NT_GNU_BUILD_ATTRIBUTE_OPEN - - Name: .rela.init - Type: SHT_RELA - Flags: [ SHF_INFO_LINK ] - Link: .symtab - AddressAlign: 0x8 - Info: .init - Relocations: - - Offset: 0x40100B - Symbol: __gmon_start__ - Type: R_X86_64_REX_GOTPCRELX - Addend: -4 - - Name: .rela.text - Type: SHT_RELA - Flags: [ SHF_INFO_LINK ] - Link: .symtab - AddressAlign: 0x8 - Info: .text - Relocations: - - Offset: 0x40105B - Symbol: main - Type: R_X86_64_32S - - Offset: 0x401061 - Symbol: '__libc_start_main@GLIBC_2.34' - Type: R_X86_64_GOTPCRELX - Addend: -4 - - Offset: 0x401083 - Symbol: .tm_clone_table - Type: R_X86_64_PC32 - Addend: -4 - - Offset: 0x40108A - Symbol: __TMC_END__ - Type: R_X86_64_PC32 - Addend: -4 - - Offset: 0x401096 - Symbol: _ITM_deregisterTMCloneTable - Type: R_X86_64_REX_GOTPCRELX - Addend: -4 - - Offset: 0x4010B3 - Symbol: .tm_clone_table - Type: R_X86_64_PC32 - Addend: -4 - - Offset: 0x4010BA - Symbol: __TMC_END__ - Type: R_X86_64_PC32 - Addend: -4 - - Offset: 0x4010D7 - Symbol: _ITM_registerTMCloneTable - Type: R_X86_64_REX_GOTPCRELX - Addend: -4 - - Offset: 0x4010F6 - Symbol: .bss - Type: R_X86_64_PC32 - Addend: -5 - - Offset: 0x401108 - Symbol: .bss - Type: R_X86_64_PC32 - Addend: -5 - - Offset: 0x40115F - Symbol: 'atoi@GLIBC_2.2.5' - Type: R_X86_64_PLT32 - Addend: -4 - - Offset: 0x401178 - Symbol: foo - Type: R_X86_64_PLT32 - Addend: -4 - - Name: .rela.eh_frame - Type: SHT_RELA - Flags: [ SHF_INFO_LINK ] - Link: .symtab - AddressAlign: 0x8 - Info: .eh_frame - Relocations: - - Offset: 0x402068 - Symbol: .text - Type: R_X86_64_PC32 - - Offset: 0x40207C - Symbol: .text - Type: R_X86_64_PC32 - Addend: 48 - - Offset: 0x4020B8 - Symbol: .text - Type: R_X86_64_PC32 - Addend: 240 - - Offset: 0x4020D8 - Symbol: .text - Type: R_X86_64_PC32 - Addend: 256 - - Name: .rela.init_array - Type: SHT_RELA - Flags: [ SHF_INFO_LINK ] - Link: .symtab - AddressAlign: 0x8 - Info: .init_array - Relocations: - - Offset: 0x403DE8 - Symbol: .text - Type: R_X86_64_64 - Addend: 224 - - Name: .rela.fini_array - Type: SHT_RELA - Flags: [ SHF_INFO_LINK ] - Link: .symtab - AddressAlign: 0x8 - Info: .fini_array - Relocations: - - Offset: 0x403DF0 - Symbol: .text - Type: R_X86_64_64 - Addend: 176 - - Name: .rela.gnu.build.attributes - Type: SHT_RELA - Flags: [ SHF_INFO_LINK ] - Link: .symtab - AddressAlign: 0x8 - Info: .gnu.build.attributes - Relocations: - - Offset: 0x40602C - Symbol: .text - Type: R_X86_64_64 - - Offset: 0x406034 - Symbol: .text - Type: R_X86_64_64 - Addend: 38 - - Offset: 0x406050 - Symbol: .text - Type: R_X86_64_64 - Addend: 53 - - Offset: 0x406058 - Symbol: .text - Type: R_X86_64_64 - Addend: 53 - - Offset: 0x406074 - Symbol: .init - Type: R_X86_64_64 - - Offset: 0x40607C - Symbol: .init - Type: R_X86_64_64 - Addend: 22 - - Offset: 0x406098 - Symbol: .fini - Type: R_X86_64_64 - - Offset: 0x4060A0 - Symbol: .fini - Type: R_X86_64_64 - Addend: 8 - - Offset: 0x4060BC - Symbol: .text - Type: R_X86_64_64 - Addend: 64 - - Offset: 0x4060C4 - Symbol: .text - Type: R_X86_64_64 - Addend: 230 - - Offset: 0x4060E0 - Symbol: .text - Type: R_X86_64_64 - Addend: 358 - - Offset: 0x4060E8 - Symbol: .text - Type: R_X86_64_64 - Addend: 358 - - Offset: 0x406104 - Symbol: .text - Type: R_X86_64_64 - Addend: 358 - - Offset: 0x40610C - Symbol: .text - Type: R_X86_64_64 - Addend: 358 - - Offset: 0x406128 - Symbol: .init - Type: R_X86_64_64 - Addend: 22 - - Offset: 0x406130 - Symbol: .init - Type: R_X86_64_64 - Addend: 27 - - Offset: 0x40614C - Symbol: .fini - Type: R_X86_64_64 - Addend: 8 - - Offset: 0x406154 - Symbol: .fini - Type: R_X86_64_64 - Addend: 13 - - Type: SectionHeaderTable - Sections: - - Name: .interp - - Name: .note.gnu.property - - Name: .note.gnu.build-id - - Name: .note.ABI-tag - - Name: .gnu.hash - - Name: .dynsym - - Name: .dynstr - - Name: .gnu.version - - Name: .gnu.version_r - - Name: .rela.dyn - - Name: .rela.plt - - Name: .init - - Name: .rela.init - - Name: .plt - - Name: .text - - Name: .rela.text - - Name: .fini - - Name: .rodata - - Name: .eh_frame_hdr - - Name: .eh_frame - - Name: .rela.eh_frame - - Name: .init_array - - Name: .rela.init_array - - Name: .fini_array - - Name: .rela.fini_array - - Name: .dynamic - - Name: .got - - Name: .got.plt - - Name: .data - - Name: .tm_clone_table - - Name: .bss - - Name: .comment - - Name: .gnu.build.attributes - - Name: .rela.gnu.build.attributes - - Name: .symtab - - Name: .strtab - - Name: .shstrtab -Symbols: - - Name: .interp - Type: STT_SECTION - Section: .interp - Value: 0x400318 - - Name: .note.gnu.property - Type: STT_SECTION - Section: .note.gnu.property - Value: 0x400338 - - Name: .note.gnu.build-id - Type: STT_SECTION - Section: .note.gnu.build-id - Value: 0x400358 - - Name: .note.ABI-tag - Type: STT_SECTION - Section: .note.ABI-tag - Value: 0x40037C - - Name: .gnu.hash - Type: STT_SECTION - Section: .gnu.hash - Value: 0x4003A0 - - Name: .dynsym - Type: STT_SECTION - Section: .dynsym - Value: 0x4003C0 - - Name: .dynstr - Type: STT_SECTION - Section: .dynstr - Value: 0x400450 - - Name: .gnu.version - Type: STT_SECTION - Section: .gnu.version - Value: 0x4004CE - - Name: .gnu.version_r - Type: STT_SECTION - Section: .gnu.version_r - Value: 0x4004E0 - - Name: .rela.dyn - Type: STT_SECTION - Section: .rela.dyn - Value: 0x400510 - - Name: .rela.plt - Type: STT_SECTION - Section: .rela.plt - Value: 0x400570 - - Name: .init - Type: STT_SECTION - Section: .init - Value: 0x401000 - - Name: .plt - Type: STT_SECTION - Section: .plt - Value: 0x401020 - - Name: .text - Type: STT_SECTION - Section: .text - Value: 0x401040 - - Name: .fini - Type: STT_SECTION - Section: .fini - Value: 0x4011A8 - - Name: .rodata - Type: STT_SECTION - Section: .rodata - Value: 0x402000 - - Name: .eh_frame_hdr - Type: STT_SECTION - Section: .eh_frame_hdr - Value: 0x402010 - - Name: .eh_frame - Type: STT_SECTION - Section: .eh_frame - Value: 0x402048 - - Name: .init_array - Type: STT_SECTION - Section: .init_array - Value: 0x403DE8 - - Name: .fini_array - Type: STT_SECTION - Section: .fini_array - Value: 0x403DF0 - - Name: .dynamic - Type: STT_SECTION - Section: .dynamic - Value: 0x403DF8 - - Name: .got - Type: STT_SECTION - Section: .got - Value: 0x403FC8 - - Name: .got.plt - Type: STT_SECTION - Section: .got.plt - Value: 0x403FE8 - - Name: .data - Type: STT_SECTION - Section: .data - Value: 0x404008 - - Name: .tm_clone_table - Type: STT_SECTION - Section: .tm_clone_table - Value: 0x404010 - - Name: .bss - Type: STT_SECTION - Section: .bss - Value: 0x404010 - - Name: .comment - Type: STT_SECTION - Section: .comment - - Name: .gnu.build.attributes - Type: STT_SECTION - Section: .gnu.build.attributes - Value: 0x406018 - - Name: crt1.o - Type: STT_FILE - Index: SHN_ABS - - Name: __abi_tag - Type: STT_OBJECT - Section: .note.ABI-tag - Value: 0x40037C - Size: 0x20 - - Name: crtstuff.c - Type: STT_FILE - Index: SHN_ABS - - Name: __TMC_LIST__ - Type: STT_OBJECT - Section: .tm_clone_table - Value: 0x404010 - - Name: deregister_tm_clones - Type: STT_FUNC - Section: .text - Value: 0x401080 - - Name: register_tm_clones - Type: STT_FUNC - Section: .text - Value: 0x4010B0 - - Name: __do_global_dtors_aux - Type: STT_FUNC - Section: .text - Value: 0x4010F0 - - Name: completed.0 - Type: STT_OBJECT - Section: .bss - Value: 0x404010 - Size: 0x1 - - Name: __do_global_dtors_aux_fini_array_entry - Type: STT_OBJECT - Section: .fini_array - Value: 0x403DF0 - - Name: frame_dummy - Type: STT_FUNC - Section: .text - Value: 0x401120 - - Name: __frame_dummy_init_array_entry - Type: STT_OBJECT - Section: .init_array - Value: 0x403DE8 - - Name: callcont-fallthru.c - Type: STT_FILE - Index: SHN_ABS - - Name: 'crtstuff.c (1)' - Type: STT_FILE - Index: SHN_ABS - - Name: __FRAME_END__ - Type: STT_OBJECT - Section: .eh_frame - Value: 0x4020F0 - - Type: STT_FILE - Index: SHN_ABS - - Name: _DYNAMIC - Type: STT_OBJECT - Section: .dynamic - Value: 0x403DF8 - - Name: __GNU_EH_FRAME_HDR - Section: .eh_frame_hdr - Value: 0x402010 - - Name: _GLOBAL_OFFSET_TABLE_ - Type: STT_OBJECT - Section: .got.plt - Value: 0x403FE8 - - Name: '__libc_start_main@GLIBC_2.34' - Type: STT_FUNC - Binding: STB_GLOBAL - - Name: _ITM_deregisterTMCloneTable - Binding: STB_WEAK - - Name: data_start - Section: .data - Binding: STB_WEAK - Value: 0x404008 - - Name: _edata - Section: .tm_clone_table - Binding: STB_GLOBAL - Value: 0x404010 - - Name: _fini - Type: STT_FUNC - Section: .fini - Binding: STB_GLOBAL - Value: 0x4011A8 - Other: [ STV_HIDDEN ] - - Name: __data_start - Section: .data - Binding: STB_GLOBAL - Value: 0x404008 - - Name: __gmon_start__ - Binding: STB_WEAK - - Name: __dso_handle - Type: STT_OBJECT - Section: .rodata - Binding: STB_GLOBAL - Value: 0x402008 - Other: [ STV_HIDDEN ] - - Name: _IO_stdin_used - Type: STT_OBJECT - Section: .rodata - Binding: STB_GLOBAL - Value: 0x402000 - Size: 0x4 - - Name: foo - Type: STT_FUNC - Section: .text - Binding: STB_GLOBAL - Value: 0x401130 - Size: 0x6 - - Name: _end - Section: .bss - Binding: STB_GLOBAL - Value: 0x404018 - - Name: _dl_relocate_static_pie - Type: STT_FUNC - Section: .text - Binding: STB_GLOBAL - Value: 0x401070 - Size: 0x5 - Other: [ STV_HIDDEN ] - - Name: _start - Type: STT_FUNC - Section: .text - Binding: STB_GLOBAL - Value: 0x401040 - Size: 0x26 - - Name: __bss_start - Section: .bss - Binding: STB_GLOBAL - Value: 0x404010 - - Name: main - Type: STT_FUNC - Section: .text - Binding: STB_GLOBAL - Value: 0x401140 - Size: 0x66 - - Name: 'atoi@GLIBC_2.2.5' - Type: STT_FUNC - Binding: STB_GLOBAL - - Name: __TMC_END__ - Type: STT_OBJECT - Section: .tm_clone_table - Binding: STB_GLOBAL - Value: 0x404010 - Other: [ STV_HIDDEN ] - - Name: _ITM_registerTMCloneTable - Binding: STB_WEAK - - Name: _init - Type: STT_FUNC - Section: .init - Binding: STB_GLOBAL - Value: 0x401000 - Other: [ STV_HIDDEN ] -DynamicSymbols: - - Name: __libc_start_main - Type: STT_FUNC - Binding: STB_GLOBAL - - Name: _ITM_deregisterTMCloneTable - Binding: STB_WEAK - - Name: __gmon_start__ - Binding: STB_WEAK - - Name: atoi - Type: STT_FUNC - Binding: STB_GLOBAL - - Name: _ITM_registerTMCloneTable - Binding: STB_WEAK -... diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s new file mode 100644 index 0000000000000..0a59d799f1ef3 --- /dev/null +++ b/bolt/test/X86/callcont-fallthru.s @@ -0,0 +1,60 @@ +## Ensures that a call continuation fallthrough count is set when using +## pre-aggregated perf data. + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: link_fdata %s %t.exe %t.pa PREAGG +# RUN: llvm-strip --strip-unneeded %t.exe +# RUN: llvm-bolt %t.exe --pa -p %t.pa -o %t.out \ +# RUN: --print-cfg --print-only=main | FileCheck %s + + .globl foo + .type foo, %function +foo: + pushq %rbp + movq %rsp, %rbp + popq %rbp +Lfoo_ret: + retq +.size foo, .-foo + + .globl main + .type main, %function +main: + pushq %rbp + movq %rsp, %rbp + subq $0x20, %rsp + movl $0x0, -0x4(%rbp) + movl %edi, -0x8(%rbp) + movq %rsi, -0x10(%rbp) + movq -0x10(%rbp), %rax + movq 0x8(%rax), %rdi + movl %eax, -0x14(%rbp) + +Ltmp4: + cmpl $0x0, -0x14(%rbp) + je Ltmp0 + + movl $0xa, -0x18(%rbp) + callq foo +# PREAGG: B #Lfoo_ret# #Ltmp3# 1 0 +# CHECK: callq foo +# CHECK-NEXT: count: 1 + +Ltmp3: + cmpl $0x0, -0x18(%rbp) + jmp Ltmp2 + +Ltmp2: + movl -0x18(%rbp), %eax + addl $-0x1, %eax + movl %eax, -0x18(%rbp) + jmp Ltmp3 + jmp Ltmp4 + +Ltmp0: + xorl %eax, %eax + addq $0x20, %rsp + popq %rbp + retq +.size main, .-main diff --git a/bolt/test/X86/callcont-fallthru.test b/bolt/test/X86/callcont-fallthru.test deleted file mode 100644 index e0a5c5a6852d4..0000000000000 --- a/bolt/test/X86/callcont-fallthru.test +++ /dev/null @@ -1,9 +0,0 @@ -## Reproduces missing call continuation fallthrough count when using -## pre-aggregated perf data - -# RUN: yaml2obj %p/Inputs/callcont-fallthru.yaml > %t.exe -# RUN: llvm-bolt %t.exe --pa -p %p/Inputs/callcont-fallthru.preagg -o %t.out \ -# RUN: --print-cfg --print-only=main | FileCheck %s - -# CHECK: callq foo -# CHECK-NEXT: count: 103204 From 06fe34d8817695c6769f199f5ac386ac2e872af4 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Mon, 21 Oct 2024 12:58:49 -0700 Subject: [PATCH 06/10] Added plt call (return from external location) test case Created using spr 1.3.4 --- bolt/test/X86/callcont-fallthru.s | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s index 0a59d799f1ef3..d10373720a660 100644 --- a/bolt/test/X86/callcont-fallthru.s +++ b/bolt/test/X86/callcont-fallthru.s @@ -27,6 +27,12 @@ main: movl $0x0, -0x4(%rbp) movl %edi, -0x8(%rbp) movq %rsi, -0x10(%rbp) + callq puts@PLT +# PREAGG: B X:0 #Ltmp1# 2 0 +# CHECK: callq puts@PLT +# CHECK-NEXT: count: 2 + +Ltmp1: movq -0x10(%rbp), %rax movq 0x8(%rax), %rdi movl %eax, -0x14(%rbp) @@ -51,6 +57,7 @@ Ltmp2: movl %eax, -0x18(%rbp) jmp Ltmp3 jmp Ltmp4 + jmp Ltmp1 Ltmp0: xorl %eax, %eax From e8ec9c936c31ae0ec0a56a224860045c0a2885a1 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Mon, 21 Oct 2024 14:32:11 -0700 Subject: [PATCH 07/10] Add test for getFallthroughsInTrace Created using spr 1.3.4 --- bolt/test/X86/callcont-fallthru.s | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s index d10373720a660..1feb283edb221 100644 --- a/bolt/test/X86/callcont-fallthru.s +++ b/bolt/test/X86/callcont-fallthru.s @@ -4,10 +4,16 @@ # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib # RUN: link_fdata %s %t.exe %t.pa PREAGG +# RUN: link_fdata %s %t.exe %t.pa2 PREAGG2 # RUN: llvm-strip --strip-unneeded %t.exe # RUN: llvm-bolt %t.exe --pa -p %t.pa -o %t.out \ # RUN: --print-cfg --print-only=main | FileCheck %s +## Check that getFallthroughsInTrace correctly handles a trace starting at plt +## call continuation +# RUN: llvm-bolt %t.exe --pa -p %t.pa2 -o %t.out2 \ +# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK2 + .globl foo .type foo, %function foo: @@ -32,6 +38,9 @@ main: # CHECK: callq puts@PLT # CHECK-NEXT: count: 2 +# CHECK2: callq puts@PLT +# CHECK2-NEXT: count: 0 + Ltmp1: movq -0x10(%rbp), %rax movq 0x8(%rax), %rdi @@ -40,6 +49,8 @@ Ltmp1: Ltmp4: cmpl $0x0, -0x14(%rbp) je Ltmp0 +# CHECK2: je .Ltmp0 +# CHECK2-NEXT: count: 3 movl $0xa, -0x18(%rbp) callq foo @@ -47,8 +58,13 @@ Ltmp4: # CHECK: callq foo # CHECK-NEXT: count: 1 +# PREAGG2: F #Ltmp1# #Ltmp3_br# 3 +# CHECK2: callq foo +# CHECK2-NEXT: count: 3 + Ltmp3: cmpl $0x0, -0x18(%rbp) +Ltmp3_br: jmp Ltmp2 Ltmp2: From 9ac54dd2a2fd0e9b942b3a6399133d0428652b18 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Sun, 27 Oct 2024 11:43:47 -0700 Subject: [PATCH 08/10] Use return profile conversion for pre-aggregated profile only Created using spr 1.3.4 --- bolt/include/bolt/Profile/DataAggregator.h | 3 ++- bolt/lib/Profile/DataAggregator.cpp | 27 ++++++++++++++++++---- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 6453b3070ceb8..2880bfd03be78 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -266,7 +266,8 @@ class DataAggregator : public DataReader { uint64_t Mispreds); /// Register a \p Branch. - bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds); + bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds, + bool IsPreagg); /// Register a trace between two LBR entries supplied in execution order. bool doTrace(const LBREntry &First, const LBREntry &Second, diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index fe371ef3ca105..b1cd9db9fc481 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -774,7 +774,7 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc, } bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, - uint64_t Mispreds) { + uint64_t Mispreds, bool IsPreagg) { // Returns whether \p Offset in \p Func contains a return instruction. auto checkReturn = [&](const BinaryFunction &Func, const uint64_t Offset) { auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); }; @@ -846,7 +846,7 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, return false; // Record call to continuation trace. - if (IsCallCont && FromFunc != ToFunc) { + if (IsPreagg && IsCallCont && FromFunc != ToFunc) { LBREntry First{ToOrig - 1, ToOrig - 1, false}; LBREntry Second{ToOrig, ToOrig, false}; return doTrace(First, Second, Count); @@ -956,6 +956,24 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, if (!FromBB || !ToBB) return std::nullopt; + // Adjust FromBB if the first LBR is a return from the last instruction in + // the previous block (that instruction should be a call). + if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) && + !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { + const BinaryBasicBlock *PrevBB = + BF.getLayout().getBlock(FromBB->getIndex() - 1); + if (PrevBB->getSuccessor(FromBB->getLabel())) { + const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); + if (Instr && BC.MIB->isCall(*Instr)) + FromBB = PrevBB; + else + LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR + << '\n'); + } else { + LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n'); + } + } + // Fill out information for fall-through edges. The From and To could be // within the same basic block, e.g. when two call instructions are in the // same block. In this case we skip the processing. @@ -1646,7 +1664,8 @@ void DataAggregator::processBranchEvents() { for (const auto &AggrLBR : BranchLBRs) { const Trace &Loc = AggrLBR.first; const TakenBranchInfo &Info = AggrLBR.second; - doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount); + doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount, + /*IsPreagg=*/false); } } @@ -1807,7 +1826,7 @@ void DataAggregator::processPreAggregated() { switch (AggrEntry.EntryType) { case AggregatedLBREntry::BRANCH: doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count, - AggrEntry.Mispreds); + AggrEntry.Mispreds, /*IsPreagg=*/true); break; case AggregatedLBREntry::FT: case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: { From a8e1c5457a2f382b2e545b03e0093271e27bdb59 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Wed, 30 Oct 2024 16:06:03 -0700 Subject: [PATCH 09/10] address comments Created using spr 1.3.4 --- bolt/include/bolt/Core/BinaryFunction.h | 4 ++ bolt/lib/Profile/DataAggregator.cpp | 9 ++-- bolt/test/X86/callcont-fallthru.s | 67 +++++++++++++++++++++---- 3 files changed, 67 insertions(+), 13 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 6ebbaf94754e8..3ce3be0a70806 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -906,6 +906,10 @@ class BinaryFunction { return BB && BB->getOffset() == Offset ? BB : nullptr; } + const BinaryBasicBlock *getBasicBlockAtOffset(uint64_t Offset) const { + return const_cast(this)->getBasicBlockAtOffset(Offset); + } + /// Retrieve the landing pad BB associated with invoke instruction \p Invoke /// that is in \p BB. Return nullptr if none exists BinaryBasicBlock *getLandingPadBBFor(const BinaryBasicBlock &BB, diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index b1cd9db9fc481..c25795aab1746 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -784,8 +784,9 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, }; // Returns whether \p Offset in \p Func corresponds to a call continuation - // fallthrough block. - auto checkCallCont = [&](BinaryFunction &Func, const uint64_t Offset) { + // fallthrough block excluding externally-reachable entry points (secondary + // entries and landing pads). + auto checkCallCont = [&](const BinaryFunction &Func, const uint64_t Offset) { // Note the use of MCInstrAnalysis: no call continuation for a tail call. auto isCall = [&](auto MI) { return MI && BC->MIA->isCall(*MI); }; @@ -846,7 +847,7 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, return false; // Record call to continuation trace. - if (IsPreagg && IsCallCont && FromFunc != ToFunc) { + if (IsPreagg && FromFunc != ToFunc && (IsReturn || IsCallCont)) { LBREntry First{ToOrig - 1, ToOrig - 1, false}; LBREntry Second{ToOrig, ToOrig, false}; return doTrace(First, Second, Count); @@ -1665,7 +1666,7 @@ void DataAggregator::processBranchEvents() { const Trace &Loc = AggrLBR.first; const TakenBranchInfo &Info = AggrLBR.second; doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount, - /*IsPreagg=*/false); + /*IsPreagg*/ false); } } diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s index 1feb283edb221..31a7910d7fa3f 100644 --- a/bolt/test/X86/callcont-fallthru.s +++ b/bolt/test/X86/callcont-fallthru.s @@ -1,12 +1,15 @@ ## Ensures that a call continuation fallthrough count is set when using ## pre-aggregated perf data. -# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o -# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib -# RUN: link_fdata %s %t.exe %t.pa PREAGG -# RUN: link_fdata %s %t.exe %t.pa2 PREAGG2 -# RUN: llvm-strip --strip-unneeded %t.exe -# RUN: llvm-bolt %t.exe --pa -p %t.pa -o %t.out \ +# RUN: %clangxx %cxxflags %s -o %t -Wl,-q -nostdlib +# RUN: link_fdata %s %t %t.pa1 PREAGG +# RUN: link_fdata %s %t %t.pa2 PREAGG2 +# RUN: link_fdata %s %t %t.pa3 PREAGG3 +# RUN: link_fdata %s %t %t.pa4 PREAGG4 + +## Check normal case: fallthrough is not LP or secondary entry. +# RUN: llvm-strip --strip-unneeded %t -o %t.exe +# RUN: llvm-bolt %t.exe --pa -p %t.pa1 -o %t.out \ # RUN: --print-cfg --print-only=main | FileCheck %s ## Check that getFallthroughsInTrace correctly handles a trace starting at plt @@ -14,6 +17,14 @@ # RUN: llvm-bolt %t.exe --pa -p %t.pa2 -o %t.out2 \ # RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK2 +## Check that we don't treat secondary entry points as call continuation sites. +# RUN: llvm-bolt %t --pa -p %t.pa3 -o %t.out \ +# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK3 + +## Check fallthrough to a landing pad case. +# RUN: llvm-bolt %t.exe --pa -p %t.pa4 -o %t.out \ +# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK4 + .globl foo .type foo, %function foo: @@ -27,6 +38,10 @@ Lfoo_ret: .globl main .type main, %function main: +.Lfunc_begin0: + .cfi_startproc + .cfi_personality 155, DW.ref.__gxx_personality_v0 + .cfi_lsda 27, .Lexception0 pushq %rbp movq %rsp, %rbp subq $0x20, %rsp @@ -34,13 +49,11 @@ main: movl %edi, -0x8(%rbp) movq %rsi, -0x10(%rbp) callq puts@PLT +## Target is a call continuation # PREAGG: B X:0 #Ltmp1# 2 0 # CHECK: callq puts@PLT # CHECK-NEXT: count: 2 -# CHECK2: callq puts@PLT -# CHECK2-NEXT: count: 0 - Ltmp1: movq -0x10(%rbp), %rax movq 0x8(%rax), %rdi @@ -54,14 +67,26 @@ Ltmp4: movl $0xa, -0x18(%rbp) callq foo +## Target is a call continuation # PREAGG: B #Lfoo_ret# #Ltmp3# 1 0 # CHECK: callq foo # CHECK-NEXT: count: 1 +## PLT call continuation fallthrough spanning the call # PREAGG2: F #Ltmp1# #Ltmp3_br# 3 # CHECK2: callq foo # CHECK2-NEXT: count: 3 +## Target is a secondary entry point +# PREAGG3: B X:0 #Ltmp3# 2 0 +# CHECK3: callq foo +# CHECK3-NEXT: count: 0 + +## Target is a landing pad +# PREAGG4: B X:0 #Ltmp3# 2 0 +# CHECK4: callq puts@PLT +# CHECK4-NEXT: count: 0 + Ltmp3: cmpl $0x0, -0x18(%rbp) Ltmp3_br: @@ -80,4 +105,28 @@ Ltmp0: addq $0x20, %rsp popq %rbp retq +.Lfunc_end0: + .cfi_endproc .size main, .-main + + .section .gcc_except_table,"a",@progbits + .p2align 2, 0x0 +GCC_except_table0: +.Lexception0: + .byte 255 # @LPStart Encoding = omit + .byte 255 # @TType Encoding = omit + .byte 1 # Call site Encoding = uleb128 + .uleb128 .Lcst_end0-.Lcst_begin0 +.Lcst_begin0: + .uleb128 .Lfunc_begin0-.Lfunc_begin0 # >> Call Site 1 << + .uleb128 .Lfunc_end0-.Lfunc_begin0 # Call between .Lfunc_begin0 and .Lfunc_end0 + .uleb128 Ltmp3-.Lfunc_begin0 # jumps to Ltmp3 + .byte 0 # has no landing pad + .byte 0 # On action: cleanup +.Lcst_end0: + .p2align 2, 0x0 + .hidden DW.ref.__gxx_personality_v0 + .weak DW.ref.__gxx_personality_v0 + .section .data.DW.ref.__gxx_personality_v0,"awG",@progbits,DW.ref.__gxx_personality_v0,comdat + .p2align 3, 0x0 + .type DW.ref.__gxx_personality_v0,@object From b23bdb2bcb0471fb315dc351e7a1d2936ef2f3b4 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Thu, 7 Nov 2024 16:19:39 -0800 Subject: [PATCH 10/10] clang-format Created using spr 1.3.4 --- bolt/lib/Profile/DataAggregator.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 967499063b95f..697cac9fbcaa0 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -833,8 +833,8 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, }; uint64_t ToOrig = To; - auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom*/true); - auto [ToFunc, IsCallCont] = handleAddress(To, /*IsFrom*/false); + auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom*/ true); + auto [ToFunc, IsCallCont] = handleAddress(To, /*IsFrom*/ false); if (!FromFunc && !ToFunc) return false; @@ -1824,7 +1824,7 @@ void DataAggregator::processPreAggregated() { switch (AggrEntry.EntryType) { case AggregatedLBREntry::BRANCH: doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count, - AggrEntry.Mispreds, /*IsPreagg*/true); + AggrEntry.Mispreds, /*IsPreagg*/ true); break; case AggregatedLBREntry::FT: case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {