@@ -125,9 +125,10 @@ class X86AsmBackend : public MCAsmBackend {
125125 unsigned TargetPrefixMax = 0 ;
126126
127127 MCInst PrevInst;
128+ unsigned PrevInstOpcode = 0 ;
128129 MCBoundaryAlignFragment *PendingBA = nullptr ;
129130 std::pair<MCFragment *, size_t > PrevInstPosition;
130- bool CanPadInst = false ;
131+ bool IsRightAfterData = false ;
131132
132133 uint8_t determinePaddingPrefix (const MCInst &Inst) const ;
133134 bool isMacroFused (const MCInst &Cmp, const MCInst &Jcc) const ;
@@ -267,8 +268,8 @@ static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
267268}
268269
269270// / Check if the instruction is a prefix.
270- static bool isPrefix (const MCInst &MI , const MCInstrInfo &MCII) {
271- return X86II::isPrefix (MCII.get (MI. getOpcode () ).TSFlags );
271+ static bool isPrefix (unsigned Opcode , const MCInstrInfo &MCII) {
272+ return X86II::isPrefix (MCII.get (Opcode ).TSFlags );
272273}
273274
274275// / Check if the instruction is valid as the first instruction in macro fusion.
@@ -382,9 +383,9 @@ bool X86AsmBackend::allowEnhancedRelaxation() const {
382383
383384// / X86 has certain instructions which enable interrupts exactly one
384385// / instruction *after* the instruction which stores to SS. Return true if the
385- // / given instruction has such an interrupt delay slot.
386- static bool hasInterruptDelaySlot ( const MCInst &Inst ) {
387- switch (Inst. getOpcode () ) {
386+ // / given instruction may have such an interrupt delay slot.
387+ static bool mayHaveInterruptDelaySlot ( unsigned InstOpcode ) {
388+ switch (InstOpcode ) {
388389 case X86::POPSS16:
389390 case X86::POPSS32:
390391 case X86::STI:
@@ -394,9 +395,9 @@ static bool hasInterruptDelaySlot(const MCInst &Inst) {
394395 case X86::MOV32sr:
395396 case X86::MOV64sr:
396397 case X86::MOV16sm:
397- if (Inst. getOperand ( 0 ). getReg () == X86::SS)
398- return true ;
399- break ;
398+ // In fact, this is only the case if the first operand is SS. However, as
399+ // segment moves occur extremely rarely, this is just a minor pessimization.
400+ return true ;
400401 }
401402 return false ;
402403}
@@ -450,22 +451,22 @@ bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
450451 // TLSCALL).
451452 return false ;
452453
453- if (hasInterruptDelaySlot (PrevInst ))
454+ if (mayHaveInterruptDelaySlot (PrevInstOpcode ))
454455 // If this instruction follows an interrupt enabling instruction with a one
455456 // instruction delay, inserting a nop would change behavior.
456457 return false ;
457458
458- if (isPrefix (PrevInst , *MCII))
459+ if (isPrefix (PrevInstOpcode , *MCII))
459460 // If this instruction follows a prefix, inserting a nop/prefix would change
460461 // semantic.
461462 return false ;
462463
463- if (isPrefix (Inst, *MCII))
464+ if (isPrefix (Inst. getOpcode () , *MCII))
464465 // If this instruction is a prefix, inserting a prefix would change
465466 // semantic.
466467 return false ;
467468
468- if (isRightAfterData (OS. getCurrentFragment (), PrevInstPosition) )
469+ if (IsRightAfterData )
469470 // If this instruction follows any data, there is no clear
470471 // instruction boundary, inserting a nop/prefix would change semantic.
471472 return false ;
@@ -509,16 +510,24 @@ bool X86AsmBackend::needAlign(const MCInst &Inst) const {
509510// / Insert BoundaryAlignFragment before instructions to align branches.
510511void X86AsmBackend::emitInstructionBegin (MCObjectStreamer &OS,
511512 const MCInst &Inst, const MCSubtargetInfo &STI) {
512- CanPadInst = canPadInst (Inst, OS);
513+ // Used by canPadInst. Done here, because in emitInstructionEnd, the current
514+ // fragment will have changed.
515+ IsRightAfterData =
516+ isRightAfterData (OS.getCurrentFragment (), PrevInstPosition);
513517
514518 if (!canPadBranches (OS))
515519 return ;
516520
521+ // NB: PrevInst only valid if canPadBranches is true.
517522 if (!isMacroFused (PrevInst, Inst))
518523 // Macro fusion doesn't happen indeed, clear the pending.
519524 PendingBA = nullptr ;
520525
521- if (!CanPadInst)
526+ // When branch padding is enabled (basically the skx102 erratum => unlikely),
527+ // we call canPadInst (not cheap) twice. However, in the common case, we can
528+ // avoid unnecessary calls to that, as this is otherwise only used for
529+ // relaxable fragments.
530+ if (!canPadInst (Inst, OS))
522531 return ;
523532
524533 if (PendingBA && PendingBA->getNextNode () == OS.getCurrentFragment ()) {
@@ -552,16 +561,22 @@ void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
552561}
553562
554563// / Set the last fragment to be aligned for the BoundaryAlignFragment.
555- void X86AsmBackend::emitInstructionEnd (MCObjectStreamer &OS, const MCInst &Inst) {
556- PrevInst = Inst;
564+ void X86AsmBackend::emitInstructionEnd (MCObjectStreamer &OS,
565+ const MCInst & Inst) {
557566 MCFragment *CF = OS.getCurrentFragment ();
558- PrevInstPosition = std::make_pair (CF, getSizeForInstFragment (CF));
559567 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
560- F->setAllowAutoPadding (CanPadInst);
568+ F->setAllowAutoPadding (canPadInst (Inst, OS));
569+
570+ // Update PrevInstOpcode here, canPadInst() reads that.
571+ PrevInstOpcode = Inst.getOpcode ();
572+ PrevInstPosition = std::make_pair (CF, getSizeForInstFragment (CF));
561573
562574 if (!canPadBranches (OS))
563575 return ;
564576
577+ // PrevInst is only needed if canPadBranches. Copying an MCInst isn't cheap.
578+ PrevInst = Inst;
579+
565580 if (!needAlign (Inst) || !PendingBA)
566581 return ;
567582
0 commit comments