@@ -206,6 +206,10 @@ static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
206206 return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f ;
207207}
208208
209+ static bool isREX2 (struct InternalInstruction *insn, uint8_t prefix) {
210+ return insn->mode == MODE_64BIT && prefix == 0xd5 ;
211+ }
212+
209213// Consumes all of an instruction's prefix bytes, and marks the
210214// instruction as having them. Also sets the instruction's default operand,
211215// address, and other relevant data sizes to report operands correctly.
@@ -337,8 +341,7 @@ static int readPrefixes(struct InternalInstruction *insn) {
337341 return -1 ;
338342 }
339343
340- if ((insn->mode == MODE_64BIT || (byte1 & 0xc0 ) == 0xc0 ) &&
341- ((~byte1 & 0x8 ) == 0x8 ) && ((byte2 & 0x4 ) == 0x4 )) {
344+ if ((insn->mode == MODE_64BIT || (byte1 & 0xc0 ) == 0xc0 )) {
342345 insn->vectorExtensionType = TYPE_EVEX;
343346 } else {
344347 --insn->readerCursor ; // unconsume byte1
@@ -357,13 +360,19 @@ static int readPrefixes(struct InternalInstruction *insn) {
357360 return -1 ;
358361 }
359362
360- // We simulate the REX prefix for simplicity's sake
361363 if (insn->mode == MODE_64BIT) {
364+ // We simulate the REX prefix for simplicity's sake
362365 insn->rexPrefix = 0x40 |
363366 (wFromEVEX3of4 (insn->vectorExtensionPrefix [2 ]) << 3 ) |
364367 (rFromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 2 ) |
365368 (xFromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 1 ) |
366369 (bFromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 0 );
370+
371+ // We simulate the REX2 prefix for simplicity's sake
372+ insn->rex2ExtensionPrefix [1 ] =
373+ (r2FromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 6 ) |
374+ (x2FromEVEX3of4 (insn->vectorExtensionPrefix [2 ]) << 5 ) |
375+ (b2FromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 4 );
367376 }
368377
369378 LLVM_DEBUG (
@@ -474,6 +483,23 @@ static int readPrefixes(struct InternalInstruction *insn) {
474483 insn->vectorExtensionPrefix [1 ],
475484 insn->vectorExtensionPrefix [2 ]));
476485 }
486+ } else if (isREX2 (insn, byte)) {
487+ uint8_t byte1;
488+ if (peek (insn, byte1)) {
489+ LLVM_DEBUG (dbgs () << " Couldn't read second byte of REX2" );
490+ return -1 ;
491+ }
492+ insn->rex2ExtensionPrefix [0 ] = byte;
493+ consume (insn, insn->rex2ExtensionPrefix [1 ]);
494+
495+ // We simulate the REX prefix for simplicity's sake
496+ insn->rexPrefix = 0x40 | (wFromREX2 (insn->rex2ExtensionPrefix [1 ]) << 3 ) |
497+ (rFromREX2 (insn->rex2ExtensionPrefix [1 ]) << 2 ) |
498+ (xFromREX2 (insn->rex2ExtensionPrefix [1 ]) << 1 ) |
499+ (bFromREX2 (insn->rex2ExtensionPrefix [1 ]) << 0 );
500+ LLVM_DEBUG (dbgs () << format (" Found REX2 prefix 0x%hhx 0x%hhx" ,
501+ insn->rex2ExtensionPrefix [0 ],
502+ insn->rex2ExtensionPrefix [1 ]));
477503 } else if (isREX (insn, byte)) {
478504 if (peek (insn, nextByte))
479505 return -1 ;
@@ -532,7 +558,8 @@ static int readSIB(struct InternalInstruction *insn) {
532558 if (consume (insn, insn->sib ))
533559 return -1 ;
534560
535- index = indexFromSIB (insn->sib ) | (xFromREX (insn->rexPrefix ) << 3 );
561+ index = indexFromSIB (insn->sib ) | (xFromREX (insn->rexPrefix ) << 3 ) |
562+ (x2FromREX2 (insn->rex2ExtensionPrefix [1 ]) << 4 );
536563
537564 if (index == 0x4 ) {
538565 insn->sibIndex = SIB_INDEX_NONE;
@@ -542,7 +569,8 @@ static int readSIB(struct InternalInstruction *insn) {
542569
543570 insn->sibScale = 1 << scaleFromSIB (insn->sib );
544571
545- base = baseFromSIB (insn->sib ) | (bFromREX (insn->rexPrefix ) << 3 );
572+ base = baseFromSIB (insn->sib ) | (bFromREX (insn->rexPrefix ) << 3 ) |
573+ (b2FromREX2 (insn->rex2ExtensionPrefix [1 ]) << 4 );
546574
547575 switch (base) {
548576 case 0x5 :
@@ -604,7 +632,7 @@ static int readDisplacement(struct InternalInstruction *insn) {
604632
605633// Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
606634static int readModRM (struct InternalInstruction *insn) {
607- uint8_t mod, rm, reg, evexrm ;
635+ uint8_t mod, rm, reg;
608636 LLVM_DEBUG (dbgs () << " readModRM()" );
609637
610638 if (insn->consumedModRM )
@@ -636,14 +664,13 @@ static int readModRM(struct InternalInstruction *insn) {
636664 break ;
637665 }
638666
639- reg |= rFromREX (insn->rexPrefix ) << 3 ;
640- rm |= bFromREX (insn->rexPrefix ) << 3 ;
667+ reg |= (rFromREX (insn->rexPrefix ) << 3 ) |
668+ (r2FromREX2 (insn->rex2ExtensionPrefix [1 ]) << 4 );
669+ rm |= (bFromREX (insn->rexPrefix ) << 3 ) |
670+ (b2FromREX2 (insn->rex2ExtensionPrefix [1 ]) << 4 );
641671
642- evexrm = 0 ;
643- if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
672+ if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT)
644673 reg |= r2FromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 4 ;
645- evexrm = xFromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 4 ;
646- }
647674
648675 insn->reg = (Reg)(insn->regBase + reg);
649676
@@ -731,7 +758,7 @@ static int readModRM(struct InternalInstruction *insn) {
731758 break ;
732759 case 0x3 :
733760 insn->eaDisplacement = EA_DISP_NONE;
734- insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm );
761+ insn->eaBase = (EABase)(insn->eaRegBase + rm);
735762 break ;
736763 }
737764 break ;
@@ -741,7 +768,7 @@ static int readModRM(struct InternalInstruction *insn) {
741768 return 0 ;
742769}
743770
744- #define GENERIC_FIXUP_FUNC (name, base, prefix, mask ) \
771+ #define GENERIC_FIXUP_FUNC (name, base, prefix ) \
745772 static uint16_t name (struct InternalInstruction *insn, OperandType type, \
746773 uint8_t index, uint8_t *valid) { \
747774 *valid = 1 ; \
@@ -753,28 +780,15 @@ static int readModRM(struct InternalInstruction *insn) {
753780 case TYPE_Rv: \
754781 return base + index; \
755782 case TYPE_R8: \
756- index &= mask; \
757- if (index > 0xf ) \
758- *valid = 0 ; \
759- if (insn->rexPrefix && index >= 4 && index <= 7 ) { \
783+ if (insn->rexPrefix && index >= 4 && index <= 7 ) \
760784 return prefix##_SPL + (index - 4 ); \
761- } else { \
785+ else \
762786 return prefix##_AL + index; \
763- } \
764787 case TYPE_R16: \
765- index &= mask; \
766- if (index > 0xf ) \
767- *valid = 0 ; \
768788 return prefix##_AX + index; \
769789 case TYPE_R32: \
770- index &= mask; \
771- if (index > 0xf ) \
772- *valid = 0 ; \
773790 return prefix##_EAX + index; \
774791 case TYPE_R64: \
775- index &= mask; \
776- if (index > 0xf ) \
777- *valid = 0 ; \
778792 return prefix##_RAX + index; \
779793 case TYPE_ZMM: \
780794 return prefix##_ZMM0 + index; \
@@ -824,8 +838,8 @@ static int readModRM(struct InternalInstruction *insn) {
824838// @param valid - The address of a uint8_t. The target is set to 1 if the
825839// field is valid for the register class; 0 if not.
826840// @return - The proper value.
827- GENERIC_FIXUP_FUNC (fixupRegValue, insn->regBase, MODRM_REG, 0x1f )
828- GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf )
841+ GENERIC_FIXUP_FUNC (fixupRegValue, insn->regBase, MODRM_REG)
842+ GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
829843
830844// Consult an operand specifier to determine which of the fixup*Value functions
831845// to use in correcting readModRM()'ss interpretation.
@@ -855,8 +869,31 @@ static int fixupReg(struct InternalInstruction *insn,
855869 if (!valid)
856870 return -1 ;
857871 break ;
858- case ENCODING_SIB:
859872 CASE_ENCODING_RM:
873+ if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
874+ modFromModRM (insn->modRM ) == 3 ) {
875+ // EVEX_X can extend the register id to 32 for a non-GPR register that is
876+ // encoded in RM.
877+ // mode : MODE_64_BIT
878+ // Only 8 vector registers are available in 32 bit mode
879+ // mod : 3
880+ // RM encodes a register
881+ switch (op->type ) {
882+ case TYPE_Rv:
883+ case TYPE_R8:
884+ case TYPE_R16:
885+ case TYPE_R32:
886+ case TYPE_R64:
887+ break ;
888+ default :
889+ insn->eaBase =
890+ (EABase)(insn->eaBase +
891+ (xFromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 4 ));
892+ break ;
893+ }
894+ }
895+ [[fallthrough]];
896+ case ENCODING_SIB:
860897 if (insn->eaBase >= insn->eaRegBase ) {
861898 insn->eaBase = (EABase)fixupRMValue (
862899 insn, (OperandType)op->type , insn->eaBase - insn->eaRegBase , &valid);
@@ -945,6 +982,10 @@ static bool readOpcode(struct InternalInstruction *insn) {
945982 insn->opcodeType = XOPA_MAP;
946983 return consume (insn, insn->opcode );
947984 }
985+ } else if (mFromREX2 (insn->rex2ExtensionPrefix [1 ])) {
986+ // m bit indicates opcode map 1
987+ insn->opcodeType = TWOBYTE;
988+ return consume (insn, insn->opcode );
948989 }
949990
950991 if (consume (insn, current))
@@ -1388,10 +1429,16 @@ static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
13881429 if (size == 0 )
13891430 size = insn->registerSize ;
13901431
1432+ auto setOpcodeRegister = [&](unsigned base) {
1433+ insn->opcodeRegister =
1434+ (Reg)(base + ((bFromREX (insn->rexPrefix ) << 3 ) |
1435+ (b2FromREX2 (insn->rex2ExtensionPrefix [1 ]) << 4 ) |
1436+ (insn->opcode & 7 )));
1437+ };
1438+
13911439 switch (size) {
13921440 case 1 :
1393- insn->opcodeRegister = (Reg)(
1394- MODRM_REG_AL + ((bFromREX (insn->rexPrefix ) << 3 ) | (insn->opcode & 7 )));
1441+ setOpcodeRegister (MODRM_REG_AL);
13951442 if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
13961443 insn->opcodeRegister < MODRM_REG_AL + 0x8 ) {
13971444 insn->opcodeRegister =
@@ -1400,18 +1447,13 @@ static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
14001447
14011448 break ;
14021449 case 2 :
1403- insn->opcodeRegister = (Reg)(
1404- MODRM_REG_AX + ((bFromREX (insn->rexPrefix ) << 3 ) | (insn->opcode & 7 )));
1450+ setOpcodeRegister (MODRM_REG_AX);
14051451 break ;
14061452 case 4 :
1407- insn->opcodeRegister =
1408- (Reg)(MODRM_REG_EAX +
1409- ((bFromREX (insn->rexPrefix ) << 3 ) | (insn->opcode & 7 )));
1453+ setOpcodeRegister (MODRM_REG_EAX);
14101454 break ;
14111455 case 8 :
1412- insn->opcodeRegister =
1413- (Reg)(MODRM_REG_RAX +
1414- ((bFromREX (insn->rexPrefix ) << 3 ) | (insn->opcode & 7 )));
1456+ setOpcodeRegister (MODRM_REG_RAX);
14151457 break ;
14161458 }
14171459
0 commit comments