9797 * /tmp/jitted-PID-0.so: [headers][.text][unwind_info][padding]
9898 * /tmp/jitted-PID-1.so: [headers][.text][unwind_info][padding]
9999 *
100- * The padding size (0x100) is chosen to accommodate typical unwind info sizes
101- * while maintaining 16-byte alignment requirements.
100+ * The padding size is now calculated automatically during initialization
101+ * based on the actual unwind information requirements.
102102 */
103- #define PERF_JIT_CODE_PADDING 0x100
104103
105104/* Convenient access to the global trampoline API state */
106105#define trampoline_api _PyRuntime.ceval.perf.trampoline_api
@@ -646,6 +645,8 @@ static void elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) {
646645// DWARF EH FRAME GENERATION
647646// =============================================================================
648647
648+ static void elf_init_ehframe (ELFObjectContext * ctx );
649+
649650/*
650651 * Initialize DWARF .eh_frame section for a code region
651652 *
@@ -660,6 +661,23 @@ static void elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) {
660661 * Args:
661662 * ctx: ELF object context containing code size and buffer pointers
662663 */
664+ static size_t calculate_eh_frame_size (void ) {
665+ /* Calculate the EH frame size for the trampoline function */
666+ extern void * _Py_trampoline_func_start ;
667+ extern void * _Py_trampoline_func_end ;
668+
669+ size_t code_size = (char * )& _Py_trampoline_func_end - (char * )& _Py_trampoline_func_start ;
670+
671+ ELFObjectContext ctx ;
672+ char buffer [1024 ]; // Buffer for DWARF data (1KB should be sufficient)
673+ ctx .code_size = code_size ;
674+ ctx .startp = ctx .p = (uint8_t * )buffer ;
675+ ctx .fde_p = NULL ;
676+
677+ elf_init_ehframe (& ctx );
678+ return ctx .p - ctx .startp ;
679+ }
680+
663681static void elf_init_ehframe (ELFObjectContext * ctx ) {
664682 uint8_t * p = ctx -> p ;
665683 uint8_t * framep = p ; // Remember start of frame data
@@ -856,7 +874,7 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
856874 *
857875 * The FDE describes unwinding information specific to this function.
858876 * It references the CIE and provides function-specific CFI instructions.
859- *
877+ *
860878 * The PC-relative offset is calculated after the entire EH frame is built
861879 * to ensure accurate positioning relative to the synthesized DSO layout.
862880 */
@@ -881,16 +899,16 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
881899# endif
882900 DWRF_U8 (DWRF_CFA_advance_loc | 1 ); // Advance past push %rbp (1 byte)
883901 DWRF_U8 (DWRF_CFA_def_cfa_offset ); // def_cfa_offset 16
884- DWRF_UV (16 );
902+ DWRF_UV (16 ); // New offset: SP + 16
885903 DWRF_U8 (DWRF_CFA_offset | DWRF_REG_BP ); // offset r6 at cfa-16
886- DWRF_UV (2 );
904+ DWRF_UV (2 ); // Offset factor: 2 * 8 = 16 bytes
887905 DWRF_U8 (DWRF_CFA_advance_loc | 3 ); // Advance past mov %rsp,%rbp (3 bytes)
888906 DWRF_U8 (DWRF_CFA_def_cfa_register ); // def_cfa_register r6
889- DWRF_UV (DWRF_REG_BP );
907+ DWRF_UV (DWRF_REG_BP ); // Use base pointer register
890908 DWRF_U8 (DWRF_CFA_advance_loc | 3 ); // Advance past call *%rcx (2 bytes) + pop %rbp (1 byte) = 3
891909 DWRF_U8 (DWRF_CFA_def_cfa ); // def_cfa r7 ofs 8
892- DWRF_UV (DWRF_REG_SP );
893- DWRF_UV (8 );
910+ DWRF_UV (DWRF_REG_SP ); // Use stack pointer register
911+ DWRF_UV (8 ); // New offset: SP + 8
894912#elif defined(__aarch64__ ) && defined(__AARCH64EL__ ) && !defined(__ILP32__ )
895913 /* AArch64 calling convention unwinding rules */
896914 DWRF_U8 (DWRF_CFA_advance_loc | 1 ); // Advance location by 1 instruction (stp x29, x30)
@@ -914,11 +932,11 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
914932 )
915933
916934 ctx -> p = p ; // Update context pointer to end of generated data
917-
935+
918936 /* Calculate and update the PC-relative offset in the FDE
919- *
937+ *
920938 * When perf processes the jitdump, it creates a synthesized DSO with this layout:
921- *
939+ *
922940 * Synthesized DSO Memory Layout:
923941 * ┌─────────────────────────────────────────────────────────────┐ < code_start
924942 * │ Code Section │
@@ -936,33 +954,33 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
936954 * │ │ CFI Instructions... │ │
937955 * │ └─────────────────────────────────────────────────────┘ │
938956 * ├─────────────────────────────────────────────────────────────┤ < reference_point
939- * │ EhFrameHeader │
957+ * │ EhFrameHeader │
940958 * │ (navigation metadata) │
941959 * └─────────────────────────────────────────────────────────────┘
942- *
960+ *
943961 * The PC offset field in the FDE must contain the distance from itself to code_start:
944- *
962+ *
945963 * distance = code_start - fde_pc_field
946- *
964+ *
947965 * Where:
948966 * fde_pc_field_location = reference_point - eh_frame_size + fde_offset_in_frame
949967 * code_start_location = reference_point - eh_frame_size - round_up(code_size, 8)
950- *
968+ *
951969 * Therefore:
952970 * distance = code_start_location - fde_pc_field_location
953971 * = (ref - eh_frame_size - rounded_code_size) - (ref - eh_frame_size + fde_offset_in_frame)
954972 * = -rounded_code_size - fde_offset_in_frame
955973 * = -(round_up(code_size, 8) + fde_offset_in_frame)
956974 *
957975 * Note: fde_offset_in_frame is the offset from EH frame start to the PC offset field,
958- *
976+ *
959977 */
960978 if (ctx -> fde_p != NULL ) {
961979 int32_t fde_offset_in_frame = (ctx -> fde_p - ctx -> startp );
962980 int32_t rounded_code_size = round_up (ctx -> code_size , 8 );
963981 int32_t pc_relative_offset = - (rounded_code_size + fde_offset_in_frame );
964-
965-
982+
983+
966984 // Update the PC-relative offset in the FDE
967985 * (int32_t * )ctx -> fde_p = pc_relative_offset ;
968986 }
@@ -1066,8 +1084,10 @@ static void* perf_map_jit_init(void) {
10661084 /* Initialize code ID counter */
10671085 perf_jit_map_state .code_id = 0 ;
10681086
1069- /* Configure trampoline API with padding information */
1070- trampoline_api .code_padding = PERF_JIT_CODE_PADDING ;
1087+ /* Calculate padding size based on actual unwind info requirements */
1088+ size_t eh_frame_size = calculate_eh_frame_size ();
1089+ size_t unwind_data_size = sizeof (EhFrameHeader ) + eh_frame_size ;
1090+ trampoline_api .code_padding = round_up (unwind_data_size , 16 );
10711091
10721092 return & perf_jit_map_state ;
10731093}
@@ -1175,7 +1195,7 @@ static void perf_map_jit_write_entry(void *state, const void *code_addr,
11751195 ev2 .unwind_data_size = sizeof (EhFrameHeader ) + eh_frame_size ;
11761196
11771197 /* Verify we don't exceed our padding budget */
1178- assert (ev2 .unwind_data_size <= PERF_JIT_CODE_PADDING );
1198+ assert (ev2 .unwind_data_size <= ( uint64_t ) trampoline_api . code_padding );
11791199
11801200 ev2 .eh_frame_hdr_size = sizeof (EhFrameHeader );
11811201 ev2 .mapped_size = round_up (ev2 .unwind_data_size , 16 ); // 16-byte alignment
0 commit comments