diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 80ff18d914dca..135aec0c8135c 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -432,7 +432,44 @@ enum RoundingMode { RNE = 1, RDN = 2, ROD = 3, + Invalid }; + +inline static StringRef roundingModeToString(RoundingMode RndMode) { + switch (RndMode) { + default: + llvm_unreachable("Unknown vector fixed-point rounding mode"); + case RISCVVXRndMode::RNU: + return "rnu"; + case RISCVVXRndMode::RNE: + return "rne"; + case RISCVVXRndMode::RDN: + return "rdn"; + case RISCVVXRndMode::ROD: + return "rod"; + } +} + +inline static RoundingMode stringToRoundingMode(StringRef Str) { + return StringSwitch(Str) + .Case("rnu", RISCVVXRndMode::RNU) + .Case("rne", RISCVVXRndMode::RNE) + .Case("rdn", RISCVVXRndMode::RDN) + .Case("rod", RISCVVXRndMode::ROD) + .Default(RISCVVXRndMode::Invalid); +} + +inline static bool isValidRoundingMode(unsigned Mode) { + switch (Mode) { + default: + return false; + case RISCVVXRndMode::RNU: + case RISCVVXRndMode::RNE: + case RISCVVXRndMode::RDN: + case RISCVVXRndMode::ROD: + return true; + } +} } // namespace RISCVVXRndMode //===----------------------------------------------------------------------===// diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test new file mode 100644 index 0000000000000..d5fdbc17192aa --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test @@ -0,0 +1,59 @@ +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \ +# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 | \ +# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \ +# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 \ +# RUN: --min-instructions=100 | \ +# RUN: FileCheck %s --check-prefix=RTHROUGHPUT1 + +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \ +# RUN: --opcode-name=PseudoVRGATHEREI16_VV_M2_E32_M1,PseudoVRGATHER_VI_M2,PseudoVRGATHER_VV_M8_E32,PseudoVRGATHER_VX_M4 | \ +# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \ +# RUN: --opcode-name=PseudoVRGATHEREI16_VV_M2_E32_M1,PseudoVRGATHER_VI_M2,PseudoVRGATHER_VV_M8_E32,PseudoVRGATHER_VX_M4 \ +# RUN: --min-instructions=100 | \ +# RUN: FileCheck %s --check-prefix=RTHROUGHPUT2 + +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \ +# RUN: --opcode-name=PseudoVSLIDE1UP_VX_M1,PseudoVSLIDEUP_VI_M2,PseudoVSLIDEUP_VX_M2 | \ +# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \ +# RUN: --opcode-name=PseudoVSLIDE1UP_VX_M1,PseudoVSLIDEUP_VI_M2,PseudoVSLIDEUP_VX_M2 \ +# RUN: --min-instructions=100 | \ +# RUN: FileCheck %s --check-prefix=RTHROUGHPUT3 + +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \ +# RUN: --opcode-name=PseudoVNCLIPU_WI_M2,PseudoVNSRA_WI_M2,PseudoVNSRL_WI_M2 | \ +# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \ +# RUN: --opcode-name=PseudoVNCLIPU_WI_M2,PseudoVNSRA_WI_M2,PseudoVNSRL_WI_M2 \ +# RUN: --min-instructions=100 | \ +# RUN: FileCheck %s --check-prefix=RTHROUGHPUT4 + +# These instructions are only eligible under the inverse throughput mode. + +# LATENCY-NOT: PseudoVCOMPRESS_VM_M2_E8 +# LATENCY-NOT: PseudoVCPOP_M_B32 +# LATENCY-NOT: PseudoVRGATHEREI16_VV_M2_E32_M1 +# LATENCY-NOT: PseudoVRGATHER_VI_M2 +# LATENCY-NOT: PseudoVRGATHER_VV_M8_E32 +# LATENCY-NOT: PseudoVRGATHER_VX_M4 +# LATENCY-NOT: PseudoVSLIDE1UP_VX_M1 +# LATENCY-NOT: PseudoVSLIDEUP_VI_M2 +# LATENCY-NOT: PseudoVSLIDEUP_VX_M2 +# LATENCY-NOT: PseudoVNCLIPU_WI_M2 +# LATENCY-NOT: PseudoVNSRA_WI_M2 +# LATENCY-NOT: PseudoVNSRL_WI_M2 + +# RTHROUGHPUT1: PseudoVCOMPRESS_VM_M2_E8 +# RTHROUGHPUT1: PseudoVCPOP_M_B32 +# RTHROUGHPUT2: PseudoVRGATHEREI16_VV_M2_E32_M1 +# RTHROUGHPUT2: PseudoVRGATHER_VI_M2 +# RTHROUGHPUT2: PseudoVRGATHER_VV_M8_E32 +# RTHROUGHPUT2: PseudoVRGATHER_VX_M4 +# RTHROUGHPUT3: PseudoVSLIDE1UP_VX_M1 +# RTHROUGHPUT3: PseudoVSLIDEUP_VI_M2 +# RTHROUGHPUT3: PseudoVSLIDEUP_VX_M2 +# RTHROUGHPUT4: PseudoVNCLIPU_WI_M2 +# RTHROUGHPUT4: PseudoVNSRA_WI_M2 +# RTHROUGHPUT4: PseudoVNSRL_WI_M2 diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/explicit-sew.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/explicit-sew.test new file mode 100644 index 0000000000000..476cf35818d6f --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/explicit-sew.test @@ -0,0 +1,7 @@ +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \ +# RUN: --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s + +# Make sure none of the config has SEW other than e32 +# CHECK: PseudoVFWREDUSUM_VS_M1_E32 +# CHECK: SEW: e32 +# CHECK-NOT: SEW: e{{(8|16|64)}} diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test new file mode 100644 index 0000000000000..e3a4336fdf670 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test @@ -0,0 +1,6 @@ +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput --opcode-name=PseudoVNCLIPU_WX_M1_MASK \ +# RUN: --riscv-filter-config='vtype = {VXRM: rod, AVL: VLMAX, SEW: e(8|16), Policy: ta/mu}' --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s + +# CHECK: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e8, Policy: ta/mu}' +# CHECK: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e16, Policy: ta/mu}' +# CHECK-NOT: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e(32|64), Policy: ta/mu}' diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/reduction.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/reduction.test new file mode 100644 index 0000000000000..a637fa24af16b --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/reduction.test @@ -0,0 +1,7 @@ +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVWREDSUMU_VS_M8_E32 --min-instructions=100 | \ +# RUN: FileCheck %s + +# Make sure reduction ops don't have alias between vd and vs1 +# CHECK: instructions: +# CHECK-NEXT: PseudoVWREDSUMU_VS_M8_E32 +# CHECK-NOT: V[[REG:[0-9]+]] V[[REG]] V{{[0-9]+}}M8 V[[REG]] diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/self-aliasing.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/self-aliasing.test new file mode 100644 index 0000000000000..c950341716238 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/self-aliasing.test @@ -0,0 +1,6 @@ +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVXOR_VX_M4 --min-instructions=100 | \ +# RUN: FileCheck %s + +# Make sure all def / use operands are the same in latency mode. +# CHECK: instructions: +# CHECK-NEXT: PseudoVXOR_VX_M4 V[[REG:[0-9]+]]M4 V[[REG]]M4 V[[REG]]M4 X{{.*}} diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test new file mode 100644 index 0000000000000..00cdfffacf0b3 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test @@ -0,0 +1,12 @@ +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVAADDU_VV_M1 \ +# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=VX +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFADD_VFPR16_M1_E16 \ +# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FP + +# VX: PseudoVAADDU_VV_M1 +# VX: VXRM: rnu +# VX-NOT: VXRM: {{(rne|rdn|rod)}} + +# FP: PseudoVFADD_VFPR16_M1_E16 +# FP: FRM: dyn +# FP-NOT: FRM: {{(rtz|rdn|rup|rmm|rne)}} diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew-zvk.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew-zvk.test new file mode 100644 index 0000000000000..515d3397b57be --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew-zvk.test @@ -0,0 +1,33 @@ +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \ +# RUN: --opcode-name=PseudoVAESDF_VS_M1_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \ +# RUN: FileCheck %s --check-prefix=ZVK +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \ +# RUN: --opcode-name=PseudoVGHSH_VV_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \ +# RUN: FileCheck %s --check-prefix=ZVK +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \ +# RUN: --opcode-name=PseudoVSM4K_VI_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \ +# RUN: FileCheck %s --check-prefix=ZVK +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \ +# RUN: --opcode-name=PseudoVSM3C_VI_M2 --max-configs-per-opcode=1000 --min-instructions=100 | \ +# RUN: FileCheck %s --check-prefix=ZVK +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \ +# RUN: --opcode-name=PseudoVSHA2MS_VV_M1_E32 --max-configs-per-opcode=1000 --min-instructions=100 | \ +# RUN: FileCheck %s --allow-empty --check-prefix=ZVKNH +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \ +# RUN: --opcode-name=PseudoVSHA2MS_VV_M2_E64 --max-configs-per-opcode=1000 --min-instructions=100 | \ +# RUN: FileCheck %s --allow-empty --check-prefix=ZVKNH +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \ +# RUN: --opcode-name=PseudoVSM3C_VI_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \ +# RUN: FileCheck %s --allow-empty --check-prefix=EMPTY + +# Most vector crypto only supports SEW=32, except Zvknhb which also supports SEW=64 +# ZVK-NOT: SEW: e{{(8|16)}} +# ZVK: SEW: e32 +# ZVK-NOT: SEW: e64 + +# ZVKNH(A|B) can either have SEW=32 (EGW=128) or SEW=64 (EGW=256) + +# ZVKNH-NOT: SEW: e{{(8|16)}} +# ZVKNH: SEW: e{{(32|64)}} + +# EMPTY-NOT: SEW: e{{(8|16|32|64)}} diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew.test new file mode 100644 index 0000000000000..b678300564529 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew.test @@ -0,0 +1,41 @@ +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVMUL_VV_MF4_MASK \ +# RUN: --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FRAC-LMUL +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \ +# RUN: --opcode-name=PseudoVFADD_VFPR16_M1_E16,PseudoVFADD_VV_M2_E16,PseudoVFCLASS_V_MF2 --max-configs-per-opcode=1000 --min-instructions=100 | \ +# RUN: FileCheck %s --check-prefix=FP +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \ +# RUN: --opcode-name=PseudoVSEXT_VF8_M2,PseudoVZEXT_VF8_M2 --max-configs-per-opcode=1000 --min-instructions=100 | \ +# RUN: FileCheck %s --check-prefix=VEXT +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 -benchmark-phase=assemble-measured-code --mode=latency \ +# RUN: --opcode-name=PseudoVFREDUSUM_VS_M1_E16 --max-configs-per-opcode=1000 --min-instructions=100 | \ +# RUN: FileCheck %s --check-prefix=VFRED --allow-empty + +# Make sure only the supported SEWs are generated for fractional LMUL. +# FRAC-LMUL: PseudoVMUL_VV_MF4_MASK +# FRAC-LMUL: SEW: e8 +# FRAC-LMUL: SEW: e16 +# FRAC-LMUL-NOT: SEW: e{{(32|64)}} + +# Make sure only SEWs that are equal to the supported FLEN are generated +# FP: PseudoVFADD_VFPR16_M1_E16 +# FP-NOT: SEW: e8 +# FP: PseudoVFADD_VV_M2_E16 +# FP-NOT: SEW: e8 +# FP: PseudoVFCLASS_V_MF2 +# FP-NOT: SEW: e8 + +# VS/ZEXT can only operate on SEW that will not lead to invalid EEW on the +# source operand. +# VEXT: PseudoVSEXT_VF8_M2 +# VEXT-NOT: SEW: e8 +# VEXT-NOT: SEW: e16 +# VEXT-NOT: SEW: e32 +# VEXT: SEW: e64 +# VEXT: PseudoVZEXT_VF8_M2 +# VEXT-NOT: SEW: e8 +# VEXT-NOT: SEW: e16 +# VEXT-NOT: SEW: e32 +# VEXT: SEW: e64 + +# P470 doesn't have Zvfh so 16-bit vfredusum shouldn't exist +# VFRED-NOT: PseudoVFREDUSUM_VS_M1_E16 diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/vlmax-only.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/vlmax-only.test new file mode 100644 index 0000000000000..30897b6e13735 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/vlmax-only.test @@ -0,0 +1,7 @@ +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \ +# RUN: --riscv-vlmax-for-vl --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s + +# Only allow VLMAX for AVL when -riscv-vlmax-for-vl is present +# CHECK: PseudoVFWREDUSUM_VS_M1_E32 +# CHECK: AVL: VLMAX +# CHECK-NOT: AVL: {{(simm5|)}} diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/vtype-rm-setup.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/vtype-rm-setup.test new file mode 100644 index 0000000000000..c41b357c13821 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/vtype-rm-setup.test @@ -0,0 +1,13 @@ +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \ +# RUN: --max-configs-per-opcode=1 --min-instructions=100 --dump-object-to-disk=%t.o > %t.txt +# RUN: llvm-objdump --triple=riscv64 -d %t.o | FileCheck %s --check-prefix=VFWREDUSUM +# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVSSRL_VX_MF4 \ +# RUN: --max-configs-per-opcode=1 --min-instructions=100 --dump-object-to-disk=%t.o > %t.txt +# RUN: llvm-objdump --triple=riscv64 -d %t.o | FileCheck %s --check-prefix=VSSRL + +# Make sure the correct VSETVL / VXRM write / FRM write instructions are generated +# VFWREDUSUM: vsetvli {{.*}}, zero, e32, m1, tu, ma +# VFWREDUSUM: fsrmi {{.*}}, 0x0 + +# VSSRL: vsetvli {{.*}}, zero, e8, mf4, tu, ma +# VSSRL: csrwi vxrm, 0x0 diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp index c002f68b427f7..e0e796cee8040 100644 --- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp +++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp @@ -50,6 +50,8 @@ bool Operand::isTied() const { return TiedToIndex.has_value(); } bool Operand::isVariable() const { return VariableIndex.has_value(); } +bool Operand::isEarlyClobber() const { return IsEarlyClobber; } + bool Operand::isMemory() const { return isExplicit() && getExplicitOperandInfo().OperandType == MCOI::OPERAND_MEMORY; @@ -115,6 +117,8 @@ Instruction::create(const MCInstrInfo &InstrInfo, Operand Operand; Operand.Index = OpIndex; Operand.IsDef = (OpIndex < Description->getNumDefs()); + Operand.IsEarlyClobber = + (Description->getOperandConstraint(OpIndex, MCOI::EARLY_CLOBBER) != -1); // TODO(gchatelet): Handle isLookupPtrRegClass. if (OpInfo.RegClass >= 0) Operand.Tracker = &RATC.getRegisterClass(OpInfo.RegClass); diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h index c1af10fa460a3..0a62967897c79 100644 --- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h +++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h @@ -72,6 +72,7 @@ struct Operand { bool isVariable() const; bool isMemory() const; bool isImmediate() const; + bool isEarlyClobber() const; unsigned getIndex() const; unsigned getTiedToIndex() const; unsigned getVariableIndex() const; @@ -82,6 +83,7 @@ struct Operand { // Please use the accessors above and not the following fields. std::optional Index; bool IsDef = false; + bool IsEarlyClobber = false; const RegisterAliasingTracker *Tracker = nullptr; // Set for Register Op. const MCOperandInfo *Info = nullptr; // Set for Explicit Op. std::optional TiedToIndex; // Set for Reg&Explicit Op. @@ -115,6 +117,8 @@ struct Instruction { Instruction &operator=(const Instruction &) = delete; Instruction &operator=(Instruction &&) = delete; + unsigned getOpcode() const { return Description.getOpcode(); } + // Returns the Operand linked to this Variable. // In case the Variable is tied, the primary (i.e. Def) Operand is returned. const Operand &getPrimaryOperand(const Variable &Var) const; diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt b/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt index 489ac6d6e34b3..d379874fa1d0e 100644 --- a/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt +++ b/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt @@ -14,6 +14,8 @@ set(LLVM_LINK_COMPONENTS add_llvm_library(LLVMExegesisRISCV DISABLE_LLVM_LINK_LLVM_DYLIB STATIC + RISCVExegesisPreprocessing.cpp + RISCVExegesisPostprocessing.cpp Target.cpp DEPENDS diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPasses.h b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPasses.h new file mode 100644 index 0000000000000..f206966331756 --- /dev/null +++ b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPasses.h @@ -0,0 +1,19 @@ +//===- RISCVExegesisPasses.h - RISC-V specific Exegesis Passes --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_EXEGESIS_LIB_RISCV_RISCVEXEGESISPASSES_H +#define LLVM_TOOLS_EXEGESIS_LIB_RISCV_RISCVEXEGESISPASSES_H +namespace llvm { +class FunctionPass; + +namespace exegesis { +FunctionPass *createRISCVPreprocessingPass(); +FunctionPass *createRISCVPostprocessingPass(); +} // namespace exegesis +} // namespace llvm +#endif diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp new file mode 100644 index 0000000000000..13317c6684396 --- /dev/null +++ b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp @@ -0,0 +1,129 @@ +//===- RISCVExegesisPostprocessing.cpp - Post processing MI for exegesis---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// \file +// This Pass converts some of the virtual register operands in VSETVLI and FRM +// pseudos into physical registers. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVExegesisPasses.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-exegesis-post-processing" + +namespace { +struct RISCVExegesisPostprocessing : public MachineFunctionPass { + static char ID; + + RISCVExegesisPostprocessing() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + // Extremely simple register allocator that picks a register that hasn't + // been defined or used in this function. + Register allocateGPRRegister(const MachineFunction &MF, + const MachineRegisterInfo &MRI); + + bool processVSETVL(MachineInstr &MI, MachineRegisterInfo &MRI); + bool processWriteFRM(MachineInstr &MI, MachineRegisterInfo &MRI); +}; +} // anonymous namespace + +char RISCVExegesisPostprocessing::ID = 0; + +bool RISCVExegesisPostprocessing::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + for (auto &MBB : MF) + for (auto &MI : MBB) { + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + case RISCV::VSETVLI: + case RISCV::VSETVL: + case RISCV::PseudoVSETVLI: + case RISCV::PseudoVSETVLIX0: + Changed |= processVSETVL(MI, MF.getRegInfo()); + break; + case RISCV::SwapFRMImm: + case RISCV::WriteFRM: + Changed |= processWriteFRM(MI, MF.getRegInfo()); + break; + default: + break; + } + } + + if (Changed) + MF.getRegInfo().clearVirtRegs(); + + LLVM_DEBUG(MF.print(dbgs() << "===After RISCVExegesisPostprocessing===\n"); + dbgs() << "\n"); + + return Changed; +} + +Register RISCVExegesisPostprocessing::allocateGPRRegister( + const MachineFunction &MF, const MachineRegisterInfo &MRI) { + const auto &TRI = *MRI.getTargetRegisterInfo(); + + // We hope to avoid allocating callee-saved registers. And GPRTC + // happens to account for nearly all caller-saved registers. + const TargetRegisterClass *GPRClass = TRI.getRegClass(RISCV::GPRTCRegClassID); + BitVector Candidates = TRI.getAllocatableSet(MF, GPRClass); + + for (unsigned SetIdx : Candidates.set_bits()) { + if (MRI.reg_empty(Register(SetIdx))) + return Register(SetIdx); + } + + // All bets are off, assign a fixed one. + return RISCV::X5; +} + +bool RISCVExegesisPostprocessing::processVSETVL(MachineInstr &MI, + MachineRegisterInfo &MRI) { + bool Changed = false; + // Replace both AVL and VL (i.e. the result) operands with physical + // registers. + for (unsigned Idx = 0U; Idx < 2; ++Idx) + if (MI.getOperand(Idx).isReg()) { + Register RegOp = MI.getOperand(Idx).getReg(); + if (RegOp.isVirtual()) { + MRI.replaceRegWith(RegOp, allocateGPRRegister(*MI.getMF(), MRI)); + Changed = true; + } + } + + return Changed; +} + +bool RISCVExegesisPostprocessing::processWriteFRM(MachineInstr &MI, + MachineRegisterInfo &MRI) { + // The virtual register will be the first operand in both SwapFRMImm and + // WriteFRM. + Register DestReg = MI.getOperand(0).getReg(); + if (DestReg.isVirtual()) { + MRI.replaceRegWith(DestReg, allocateGPRRegister(*MI.getMF(), MRI)); + return true; + } + return false; +} + +FunctionPass *llvm::exegesis::createRISCVPostprocessingPass() { + return new RISCVExegesisPostprocessing(); +} diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPreprocessing.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPreprocessing.cpp new file mode 100644 index 0000000000000..7f1cfd9ea52df --- /dev/null +++ b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPreprocessing.cpp @@ -0,0 +1,85 @@ +//===- RISCVExegesisPreprocessing.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// \file +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVExegesisPasses.h" +#include "RISCVRegisterInfo.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-exegesis-preprocessing" + +namespace { +struct RISCVExegesisPreprocessing : public MachineFunctionPass { + static char ID; + + RISCVExegesisPreprocessing() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} // anonymous namespace + +char RISCVExegesisPreprocessing::ID = 0; + +static bool processAVLOperand(MachineInstr &MI, MachineRegisterInfo &MRI, + const TargetInstrInfo &TII) { + const MCInstrDesc &Desc = TII.get(MI.getOpcode()); + uint64_t TSFlags = Desc.TSFlags; + if (!RISCVII::hasVLOp(TSFlags)) + return false; + + const MachineOperand &VLOp = MI.getOperand(RISCVII::getVLOpNum(Desc)); + if (VLOp.isReg()) { + Register VLReg = VLOp.getReg(); + if (VLReg.isVirtual()) + return false; + assert(RISCV::GPRRegClass.contains(VLReg)); + // Replace all uses of the original physical register with a new virtual + // register. The only reason we can do such replacement here is because it's + // almost certain that VLReg only has a single definition. + Register NewVLReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + MRI.replaceRegWith(VLReg, NewVLReg); + return true; + } + + return false; +} + +bool RISCVExegesisPreprocessing::runOnMachineFunction(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const auto &STI = MF.getSubtarget(); + if (!STI.hasVInstructions()) + return false; + const TargetInstrInfo &TII = *STI.getInstrInfo(); + + LLVM_DEBUG(MF.print(dbgs() << "===Before RISCVExegesisPoreprocessing===\n"); + dbgs() << "\n"); + + bool Changed = false; + for (auto &MBB : MF) + for (auto &MI : MBB) { + Changed |= processAVLOperand(MI, MRI, TII); + } + + return Changed; +} + +FunctionPass *llvm::exegesis::createRISCVPreprocessingPass() { + return new RISCVExegesisPreprocessing(); +} diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp index d70f609c5e080..6d97a7ecfffb8 100644 --- a/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp @@ -7,11 +7,19 @@ //===----------------------------------------------------------------------===// #include "../Target.h" +#include "../ParallelSnippetGenerator.h" +#include "../SerialSnippetGenerator.h" +#include "../SnippetGenerator.h" #include "MCTargetDesc/RISCVBaseInfo.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "MCTargetDesc/RISCVMatInt.h" +#include "RISCV.h" +#include "RISCVExegesisPasses.h" #include "RISCVInstrInfo.h" +#include "RISCVRegisterInfo.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" // include computeAvailableFeatures and computeRequiredFeatures. #define GET_AVAILABLE_OPCODE_CHECKER @@ -24,10 +32,580 @@ namespace llvm { namespace exegesis { +static cl::opt + OnlyUsesVLMAXForVL("riscv-vlmax-for-vl", + cl::desc("Only enumerate VLMAX for VL operand"), + cl::init(false), cl::Hidden); + +static cl::opt + EnumerateRoundingModes("riscv-enumerate-rounding-modes", + cl::desc("Enumerate different FRM and VXRM"), + cl::init(true), cl::Hidden); + +static cl::opt + FilterConfig("riscv-filter-config", + cl::desc("Show only the configs matching this regex"), + cl::init(""), cl::Hidden); + #include "RISCVGenExegesis.inc" namespace { +template class RISCVSnippetGenerator : public BaseT { + static void printRoundingMode(raw_ostream &OS, unsigned Val, bool UsesVXRM) { + if (UsesVXRM) { + assert(RISCVVXRndMode::isValidRoundingMode(Val)); + OS << RISCVVXRndMode::roundingModeToString( + static_cast(Val)); + } else { + assert(RISCVFPRndMode::isValidRoundingMode(Val)); + OS << RISCVFPRndMode::roundingModeToString( + static_cast(Val)); + } + } + + static constexpr unsigned MinSEW = 8; + // ELEN is basically SEW_max. + unsigned ELEN = 64; + + // We can't know the real min/max VLEN w/o a Function, so we're + // using the VLen from Zvl. + unsigned ZvlVLen = 32; + + /// Mask for registers that are NOT standalone registers like X0 and V0 + BitVector AggregateRegisters; + + // Returns true when opcode is available in any of the FBs. + static bool + isOpcodeAvailableIn(unsigned Opcode, + ArrayRef FBs) { + FeatureBitset RequiredFeatures = RISCV_MC::computeRequiredFeatures(Opcode); + for (uint8_t FB : FBs) { + if (RequiredFeatures[FB]) + return true; + } + return false; + } + + static bool isRVVFloatingPointOp(unsigned Opcode) { + return isOpcodeAvailableIn(Opcode, + {RISCV_MC::Feature_HasVInstructionsAnyFBit}); + } + + // Get the element group width of each vector cryptor extension. + static unsigned getZvkEGWSize(unsigned Opcode, unsigned SEW) { + using namespace RISCV_MC; + if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvkgBit, + Feature_HasStdExtZvknedBit, + Feature_HasStdExtZvksedBit})) + return 128U; + if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvkshBit})) + return 256U; + if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvknhaOrZvknhbBit})) + // In Zvknh[ab], when SEW=64 is used (i.e. Zvknhb), EGW is 256. + // Otherwise it's 128. + return SEW == 64 ? 256U : 128U; + + llvm_unreachable("Unsupported opcode"); + } + + // A handy utility to multiply or divide an integer by LMUL. + template static T multiplyLMul(T Val, RISCVVType::VLMUL VLMul) { + auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul); + return IsFractional ? Val / LMul : Val * LMul; + } + + /// Return the denominator of the fractional (i.e. the `x` in .vfx suffix) or + /// nullopt if BaseOpcode is not a vector sext/zext. + static std::optional isRVVSignZeroExtend(unsigned BaseOpcode) { + switch (BaseOpcode) { + case RISCV::VSEXT_VF2: + case RISCV::VZEXT_VF2: + return 2; + case RISCV::VSEXT_VF4: + case RISCV::VZEXT_VF4: + return 4; + case RISCV::VSEXT_VF8: + case RISCV::VZEXT_VF8: + return 8; + default: + return std::nullopt; + } + } + + void annotateWithVType(const CodeTemplate &CT, const Instruction &Instr, + unsigned BaseOpcode, + const BitVector &ForbiddenRegisters, + std::vector &Result) const; + +public: + RISCVSnippetGenerator(const LLVMState &State, + const SnippetGenerator::Options &Opts) + : BaseT(State, Opts), + AggregateRegisters(State.getRegInfo().getNumRegs(), /*initVal=*/true) { + // Initialize standalone registers mask. + const MCRegisterInfo &RegInfo = State.getRegInfo(); + const unsigned StandaloneRegClasses[] = { + RISCV::GPRRegClassID, RISCV::FPR16RegClassID, RISCV::VRRegClassID}; + + for (unsigned RegClassID : StandaloneRegClasses) + for (unsigned Reg : RegInfo.getRegClass(RegClassID)) + AggregateRegisters.reset(Reg); + + // Initialize ELEN and VLEN. + // FIXME: We could have obtained these two constants from RISCVSubtarget + // but in order to get that from TargetMachine, we need a Function. + const MCSubtargetInfo &STI = State.getSubtargetInfo(); + ELEN = STI.checkFeatures("+zve64x") ? 64 : 32; + + std::string ZvlQuery; + for (unsigned Size = 32; Size <= 65536; Size *= 2) { + ZvlQuery = "+zvl"; + raw_string_ostream SS(ZvlQuery); + SS << Size << "b"; + if (STI.checkFeatures(SS.str()) && ZvlVLen < Size) + ZvlVLen = Size; + } + } + + Expected> + generateCodeTemplates(InstructionTemplate Variant, + const BitVector &ForbiddenRegisters) const override; +}; + +static bool isMaskedSibling(unsigned MaskedOp, unsigned UnmaskedOp) { + const auto *RVVMasked = RISCV::getMaskedPseudoInfo(MaskedOp); + return RVVMasked && RVVMasked->UnmaskedPseudo == UnmaskedOp; +} + +// There are primarily two kinds of opcodes that are not eligible +// in a serial snippet: +// (1) Has a use operand that can not overlap with the def operand +// (i.e. early clobber). +// (2) The register file of the only use operand is different from +// that of the def operand. For instance, use operand is vector and +// the result is a scalar. +static bool isIneligibleOfSerialSnippets(unsigned BaseOpcode, + const Instruction &I) { + if (llvm::any_of(I.Operands, + [](const Operand &Op) { return Op.isEarlyClobber(); })) + return true; + + switch (BaseOpcode) { + case RISCV::VCOMPRESS_VM: + case RISCV::VCPOP_M: + case RISCV::VCPOP_V: + // The permutation instructions listed below cannot have destination + // overlapping with the source. + case RISCV::VRGATHEREI16_VV: + case RISCV::VRGATHER_VI: + case RISCV::VRGATHER_VV: + case RISCV::VRGATHER_VX: + case RISCV::VSLIDE1UP_VX: + case RISCV::VSLIDEUP_VI: + case RISCV::VSLIDEUP_VX: + return true; + default: + return false; + } +} + +static bool isZvfhminZvfbfminOpcodes(unsigned BaseOpcode) { + switch (BaseOpcode) { + case RISCV::VFNCVT_F_F_W: + case RISCV::VFWCVT_F_F_V: + case RISCV::VFNCVTBF16_F_F_W: + case RISCV::VFWCVTBF16_F_F_V: + return true; + default: + return false; + } +} + +static bool isVectorReduction(unsigned BaseOpcode) { + switch (BaseOpcode) { + case RISCV::VREDAND_VS: + case RISCV::VREDMAXU_VS: + case RISCV::VREDMAX_VS: + case RISCV::VREDMINU_VS: + case RISCV::VREDMIN_VS: + case RISCV::VREDOR_VS: + case RISCV::VREDSUM_VS: + case RISCV::VREDXOR_VS: + case RISCV::VWREDSUMU_VS: + case RISCV::VWREDSUM_VS: + case RISCV::VFREDMAX_VS: + case RISCV::VFREDMIN_VS: + case RISCV::VFREDOSUM_VS: + case RISCV::VFREDUSUM_VS: + return true; + default: + return false; + } +} + +template +void RISCVSnippetGenerator::annotateWithVType( + const CodeTemplate &OrigCT, const Instruction &Instr, unsigned BaseOpcode, + const BitVector &ForbiddenRegisters, + std::vector &Result) const { + const MCSubtargetInfo &STI = SnippetGenerator::State.getSubtargetInfo(); + unsigned VPseudoOpcode = Instr.getOpcode(); + + bool IsSerial = std::is_same_v; + + const MCInstrDesc &MIDesc = Instr.Description; + const uint64_t TSFlags = MIDesc.TSFlags; + + RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags); + + const size_t StartingResultSize = Result.size(); + + SmallPtrSet VTypeOperands; + std::optional SelfAliasing; + // Exegesis see instructions with tied operands being inherently serial. + // But for RVV instructions, those tied operands are passthru rather + // than real read operands. So we manually put dependency between + // destination (i.e. def) and any of the non-tied/SEW/policy/AVL/RM + // operands. + auto assignSerialRVVOperands = [&, this](InstructionTemplate &IT) { + // Initialize SelfAliasing on first use. + if (!SelfAliasing.has_value()) { + BitVector ExcludeRegs = ForbiddenRegisters; + ExcludeRegs |= AggregateRegisters; + SelfAliasing = AliasingConfigurations(Instr, Instr, ExcludeRegs); + bool EmptyUses = false; + for (auto &ARO : SelfAliasing->Configurations) { + auto &Uses = ARO.Uses; + for (auto ROA = Uses.begin(); ROA != Uses.end();) { + const Operand *Op = ROA->Op; + // Exclude tied operand(s). + if (Op->isTied()) { + ROA = Uses.erase(ROA); + continue; + } + + // Special handling for reduction operations: for a given reduction + // `vredop vd, vs2, vs1`, we don't want vd to be aliased with vs1 + // since we're only reading `vs1[0]` and many implementations + // optimize for this case (e.g. chaining). Instead, we're forcing + // it to create alias between vd and vs2. + if (isVectorReduction(BaseOpcode) && + // vs1's operand index is always 3. + Op->getIndex() == 3) { + ROA = Uses.erase(ROA); + continue; + } + + // Exclude any special operands like SEW and VL -- we've already + // assigned values to them. + if (VTypeOperands.count(Op)) { + ROA = Uses.erase(ROA); + continue; + } + ++ROA; + } + + // If any of the use operand candidate lists is empty, there is + // no point to assign self aliasing registers. + if (Uses.empty()) { + EmptyUses = true; + break; + } + } + if (EmptyUses) + SelfAliasing->Configurations.clear(); + } + + // This is a self aliasing instruction so defs and uses are from the same + // instance, hence twice IT in the following call. + if (!SelfAliasing->empty() && !SelfAliasing->hasImplicitAliasing()) + setRandomAliasing(*SelfAliasing, IT, IT); + }; + + // We are going to create a CodeTemplate (configuration) for each supported + // SEW, policy, and VL. + // FIXME: Account for EEW and EMUL. + SmallVector, 4> Log2SEWs; + SmallVector, 4> Policies; + SmallVector, 3> AVLs; + SmallVector, 8> RoundingModes; + + bool HasSEWOp = RISCVII::hasSEWOp(TSFlags); + bool HasPolicyOp = RISCVII::hasVecPolicyOp(TSFlags); + bool HasVLOp = RISCVII::hasVLOp(TSFlags); + bool HasRMOp = RISCVII::hasRoundModeOp(TSFlags); + bool UsesVXRM = RISCVII::usesVXRM(TSFlags); + + if (HasSEWOp) { + const Operand &SEWOp = Instr.Operands[RISCVII::getSEWOpNum(MIDesc)]; + VTypeOperands.insert(&SEWOp); + + if (SEWOp.Info->OperandType == RISCVOp::OPERAND_SEW_MASK) { + // If it's a mask-producing instruction, the SEW operand is always zero. + Log2SEWs.push_back(0); + } else { + SmallVector SEWCandidates; + + // (RVV spec 3.4.2) For fractional LMUL, the supported SEW are between + // [SEW_min, LMUL * ELEN]. + unsigned SEWUpperBound = + VLMul >= RISCVVType::LMUL_F8 ? multiplyLMul(ELEN, VLMul) : ELEN; + for (unsigned SEW = MinSEW; SEW <= SEWUpperBound; SEW <<= 1) { + SEWCandidates.push_back(SEW); + + // Some scheduling classes already integrate SEW; only put + // their corresponding SEW values at the SEW operands. + // NOTE: It is imperative to put this condition in the front, otherwise + // it is tricky and difficult to know if there is an integrated + // SEW after other rules are applied to filter the candidates. + const auto *RVVBase = + RISCVVInversePseudosTable::getBaseInfo(BaseOpcode, VLMul, SEW); + if (RVVBase && (RVVBase->Pseudo == VPseudoOpcode || + isMaskedSibling(VPseudoOpcode, RVVBase->Pseudo) || + isMaskedSibling(RVVBase->Pseudo, VPseudoOpcode))) { + // There is an integrated SEW, remove all but the SEW pushed last. + SEWCandidates.erase(SEWCandidates.begin(), SEWCandidates.end() - 1); + break; + } + } + + // Filter out some candidates. + for (auto SEW = SEWCandidates.begin(); SEW != SEWCandidates.end();) { + // For floating point operations, only select SEW of the supported FLEN. + if (isRVVFloatingPointOp(VPseudoOpcode)) { + bool Supported = false; + Supported |= isZvfhminZvfbfminOpcodes(BaseOpcode) && *SEW == 16; + Supported |= STI.hasFeature(RISCV::FeatureStdExtZvfh) && *SEW == 16; + Supported |= STI.hasFeature(RISCV::FeatureStdExtF) && *SEW == 32; + Supported |= STI.hasFeature(RISCV::FeatureStdExtD) && *SEW == 64; + if (!Supported) { + SEW = SEWCandidates.erase(SEW); + continue; + } + } + + // The EEW for source operand in VSEXT and VZEXT is a fraction + // of the SEW, hence only SEWs that will lead to valid EEW are allowed. + if (auto Frac = isRVVSignZeroExtend(BaseOpcode)) + if (*SEW / *Frac < MinSEW) { + SEW = SEWCandidates.erase(SEW); + continue; + } + + // Most vector crypto 1.0 instructions only work on SEW=32. + using namespace RISCV_MC; + if (isOpcodeAvailableIn(BaseOpcode, {Feature_HasStdExtZvkgBit, + Feature_HasStdExtZvknedBit, + Feature_HasStdExtZvknhaOrZvknhbBit, + Feature_HasStdExtZvksedBit, + Feature_HasStdExtZvkshBit})) { + if (*SEW != 32) + // Zvknhb supports SEW=64 as well. + if (*SEW != 64 || !STI.hasFeature(RISCV::FeatureStdExtZvknhb) || + !isOpcodeAvailableIn(BaseOpcode, + {Feature_HasStdExtZvknhaOrZvknhbBit})) { + SEW = SEWCandidates.erase(SEW); + continue; + } + + // We're also enforcing the requirement of `LMUL * VLEN >= EGW` here, + // because some of the extensions have SEW-dependant EGW. + unsigned EGW = getZvkEGWSize(BaseOpcode, *SEW); + if (multiplyLMul(ZvlVLen, VLMul) < EGW) { + SEW = SEWCandidates.erase(SEW); + continue; + } + } + + ++SEW; + } + + // We're not going to produce any result with zero SEW candidate. + if (SEWCandidates.empty()) + return; + + for (unsigned SEW : SEWCandidates) + Log2SEWs.push_back(Log2_32(SEW)); + } + } else { + Log2SEWs.push_back(std::nullopt); + } + + if (HasPolicyOp) { + VTypeOperands.insert(&Instr.Operands[RISCVII::getVecPolicyOpNum(MIDesc)]); + + Policies = {0, RISCVVType::TAIL_AGNOSTIC, RISCVVType::MASK_AGNOSTIC, + (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC)}; + } else { + Policies.push_back(std::nullopt); + } + + if (HasVLOp) { + VTypeOperands.insert(&Instr.Operands[RISCVII::getVLOpNum(MIDesc)]); + + if (OnlyUsesVLMAXForVL) + AVLs.push_back(-1); + else + AVLs = {// 5-bit immediate value + 1, + // VLMAX + -1, + // Non-X0 register + 0}; + } else { + AVLs.push_back(std::nullopt); + } + + if (HasRMOp) { + VTypeOperands.insert(&Instr.Operands[RISCVII::getVLOpNum(MIDesc) - 1]); + + if (UsesVXRM) { + // Use RNU as the default VXRM. + RoundingModes = {RISCVVXRndMode::RNU}; + if (EnumerateRoundingModes) + RoundingModes.append( + {RISCVVXRndMode::RNE, RISCVVXRndMode::RDN, RISCVVXRndMode::ROD}); + } else { + if (EnumerateRoundingModes) + RoundingModes = {RISCVFPRndMode::RNE, RISCVFPRndMode::RTZ, + RISCVFPRndMode::RDN, RISCVFPRndMode::RUP, + RISCVFPRndMode::RMM}; + else + // If we're not enumerating FRM, use DYN to instruct + // RISCVInsertReadWriteCSRPass to insert nothing. + RoundingModes = {RISCVFPRndMode::DYN}; + } + } else { + RoundingModes = {std::nullopt}; + } + + std::set, std::optional, + std::optional, std::optional>> + Combinations; + for (auto AVL : AVLs) { + for (auto Log2SEW : Log2SEWs) + for (auto Policy : Policies) { + for (auto RM : RoundingModes) + Combinations.insert(std::make_tuple(RM, AVL, Log2SEW, Policy)); + } + } + + std::string ConfigStr; + SmallVector, 4> ValueAssignments; + for (const auto &[RM, AVL, Log2SEW, Policy] : Combinations) { + InstructionTemplate IT(&Instr); + + ListSeparator LS; + ConfigStr = "vtype = {"; + raw_string_ostream SS(ConfigStr); + + ValueAssignments.clear(); + + if (RM) { + const Operand &Op = Instr.Operands[RISCVII::getVLOpNum(MIDesc) - 1]; + ValueAssignments.push_back({&Op, MCOperand::createImm(*RM)}); + printRoundingMode(SS << LS << (UsesVXRM ? "VXRM" : "FRM") << ": ", *RM, + UsesVXRM); + } + + if (AVL) { + MCOperand OpVal; + if (*AVL < 0) { + // VLMAX + OpVal = MCOperand::createImm(-1); + SS << LS << "AVL: VLMAX"; + } else if (*AVL == 0) { + // A register holding AVL. + // TODO: Generate a random register. + OpVal = MCOperand::createReg(RISCV::X5); + OpVal.print(SS << LS << "AVL: "); + } else { + // A 5-bit immediate. + // The actual value assignment is deferred to + // RISCVExegesisTarget::randomizeTargetMCOperand. + SS << LS << "AVL: simm5"; + } + if (OpVal.isValid()) { + const Operand &Op = Instr.Operands[RISCVII::getVLOpNum(MIDesc)]; + ValueAssignments.push_back({&Op, OpVal}); + } + } + + if (Log2SEW) { + const Operand &Op = Instr.Operands[RISCVII::getSEWOpNum(MIDesc)]; + ValueAssignments.push_back({&Op, MCOperand::createImm(*Log2SEW)}); + SS << LS << "SEW: e" << (*Log2SEW ? 1 << *Log2SEW : 8); + } + + if (Policy) { + const Operand &Op = Instr.Operands[RISCVII::getVecPolicyOpNum(MIDesc)]; + ValueAssignments.push_back({&Op, MCOperand::createImm(*Policy)}); + SS << LS + << "Policy: " << (*Policy & RISCVVType::TAIL_AGNOSTIC ? "ta" : "tu") + << "/" << (*Policy & RISCVVType::MASK_AGNOSTIC ? "ma" : "mu"); + } + + SS << "}"; + + // Filter out some configurations, if needed. + if (!FilterConfig.empty()) { + if (!Regex(FilterConfig).match(ConfigStr)) + continue; + } + + CodeTemplate CT = OrigCT.clone(); + CT.Config = std::move(ConfigStr); + for (InstructionTemplate &IT : CT.Instructions) { + if (IsSerial) { + // Reset this template's value assignments and do it + // ourselves. + IT = InstructionTemplate(&Instr); + assignSerialRVVOperands(IT); + } + + for (const auto &[Op, OpVal] : ValueAssignments) + IT.getValueFor(*Op) = OpVal; + } + Result.push_back(std::move(CT)); + if (Result.size() - StartingResultSize >= + SnippetGenerator::Opts.MaxConfigsPerOpcode) + return; + } +} + +template +Expected> +RISCVSnippetGenerator::generateCodeTemplates( + InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const { + const Instruction &Instr = Variant.getInstr(); + + bool IsSerial = std::is_same_v; + + unsigned BaseOpcode = RISCV::getRVVMCOpcode(Instr.getOpcode()); + + // Bail out ineligible opcodes before generating base code templates since + // the latter is quite expensive. + if (IsSerial && BaseOpcode && isIneligibleOfSerialSnippets(BaseOpcode, Instr)) + return std::vector{}; + + auto BaseCodeTemplates = + BaseT::generateCodeTemplates(Variant, ForbiddenRegisters); + if (!BaseCodeTemplates) + return BaseCodeTemplates.takeError(); + + if (!BaseOpcode) + return BaseCodeTemplates; + + // Specialize for RVV pseudo. + std::vector ExpandedTemplates; + for (const auto &BaseCT : *BaseCodeTemplates) + annotateWithVType(BaseCT, Instr, BaseOpcode, ForbiddenRegisters, + ExpandedTemplates); + + return ExpandedTemplates; +} + // Stores constant value to a general-purpose (integer) register. static std::vector loadIntReg(const MCSubtargetInfo &STI, MCRegister Reg, const APInt &Value) { @@ -75,32 +653,12 @@ static std::vector loadFP64RegBits32(const MCSubtargetInfo &STI, return Instrs; } -static MCInst nop() { - // ADDI X0, X0, 0 - return MCInstBuilder(RISCV::ADDI) - .addReg(RISCV::X0) - .addReg(RISCV::X0) - .addImm(0); -} - -static bool isVectorRegList(MCRegister Reg) { - return RISCV::VRM2RegClass.contains(Reg) || - RISCV::VRM4RegClass.contains(Reg) || - RISCV::VRM8RegClass.contains(Reg) || - RISCV::VRN2M1RegClass.contains(Reg) || - RISCV::VRN2M2RegClass.contains(Reg) || - RISCV::VRN2M4RegClass.contains(Reg) || - RISCV::VRN3M1RegClass.contains(Reg) || - RISCV::VRN3M2RegClass.contains(Reg) || - RISCV::VRN4M1RegClass.contains(Reg) || - RISCV::VRN4M2RegClass.contains(Reg) || - RISCV::VRN5M1RegClass.contains(Reg) || - RISCV::VRN6M1RegClass.contains(Reg) || - RISCV::VRN7M1RegClass.contains(Reg) || - RISCV::VRN8M1RegClass.contains(Reg); -} - class ExegesisRISCVTarget : public ExegesisTarget { + // NOTE: Alternatively, we can use BitVector here, but the number of RVV MC + // opcodes is just a small portion of the entire opcode space, so I thought it + // would be a waste of space to use BitVector. + mutable SmallSet RVVMCOpcodesWithPseudos; + public: ExegesisRISCVTarget(); @@ -109,6 +667,30 @@ class ExegesisRISCVTarget : public ExegesisTarget { std::vector setRegTo(const MCSubtargetInfo &STI, MCRegister Reg, const APInt &Value) const override; + const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State, + unsigned Opcode) const override { + // We don't want to support RVV instructions that depend on VTYPE, because + // those instructions by themselves don't carry any additional information + // for us to setup the proper VTYPE environment via VSETVL instructions. + // FIXME: Ideally, we should use RISCVVInversePseudosTable, but it requires + // LMUL and SEW and I don't think enumerating those combinations is any + // better than the ugly trick here that memorizes the corresponding MC + // opcodes of the RVV pseudo we have processed previously. This works most + // of the time because RVV pseudo opcodes are placed before any other RVV + // opcodes. Of course this doesn't work if we're asked to benchmark only a + // certain subset of opcodes. + if (RVVMCOpcodesWithPseudos.count(Opcode)) + return "The MC opcode of RVV instructions are ignored"; + + // We want to support all RVV pseudos. + if (unsigned MCOpcode = RISCV::getRVVMCOpcode(Opcode)) { + RVVMCOpcodesWithPseudos.insert(MCOpcode); + return nullptr; + } + + return ExegesisTarget::getIgnoredOpcodeReasonOrNull(State, Opcode); + } + MCRegister getDefaultLoopCounterRegister(const Triple &) const override; void decrementLoopCounterAndJump(MachineBasicBlock &MBB, @@ -131,9 +713,38 @@ class ExegesisRISCVTarget : public ExegesisTarget { MCOperand &AssignedValue, const BitVector &ForbiddenRegs) const override; + std::unique_ptr createSerialSnippetGenerator( + const LLVMState &State, + const SnippetGenerator::Options &Opts) const override { + return std::make_unique>( + State, Opts); + } + + std::unique_ptr createParallelSnippetGenerator( + const LLVMState &State, + const SnippetGenerator::Options &Opts) const override { + return std::make_unique>( + State, Opts); + } + std::vector generateInstructionVariants(const Instruction &Instr, unsigned MaxConfigsPerOpcode) const override; + + void addTargetSpecificPasses(PassManagerBase &PM) const override { + // Turn AVL operand of physical registers into virtual registers. + PM.add(exegesis::createRISCVPreprocessingPass()); + PM.add(createRISCVInsertVSETVLIPass()); + // Setting up the correct FRM. + PM.add(createRISCVInsertReadWriteCSRPass()); + PM.add(createRISCVInsertWriteVXRMPass()); + // This will assign physical register to the result of VSETVLI instructions + // that produce VLMAX. + PM.add(exegesis::createRISCVPostprocessingPass()); + // PseudoRET will be expanded by RISCVAsmPrinter; we have to expand + // PseudoMovImm with RISCVPostRAExpandPseudoPass though. + PM.add(createRISCVPostRAExpandPseudoPass()); + } }; ExegesisRISCVTarget::ExegesisRISCVTarget() @@ -157,20 +768,7 @@ std::vector ExegesisRISCVTarget::setRegTo(const MCSubtargetInfo &STI, return loadFPRegBits(STI, Reg, Value, RISCV::FMV_D_X); return loadFP64RegBits32(STI, Reg, Value); } - if (Reg == RISCV::FRM || Reg == RISCV::VL || Reg == RISCV::VLENB || - Reg == RISCV::VTYPE || RISCV::GPRPairRegClass.contains(Reg) || - RISCV::VRRegClass.contains(Reg) || isVectorRegList(Reg)) { - // Don't initialize: - // - FRM - // - VL, VLENB, VTYPE - // - vector registers (and vector register lists) - // - Zfinx registers - // Generate 'NOP' so that exegesis treats such registers as initialized - // (it tries to initialize them with '0' anyway). - return {nop()}; - } - errs() << "setRegTo is not implemented for Reg " << Reg - << ", results will be unreliable\n"; + // TODO: Emit proper code to initialize other kinds of registers. return {}; } @@ -243,6 +841,15 @@ Error ExegesisRISCVTarget::randomizeTargetMCOperand( case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO: AssignedValue = MCOperand::createImm(1); break; + case RISCVOp::OPERAND_SIMM5: + // 5-bit signed immediate value. + AssignedValue = MCOperand::createImm(randomIndex(31) - 16); + break; + case RISCVOp::OPERAND_AVL: + case RISCVOp::OPERAND_UIMM5: + // 5-bit unsigned immediate value. + AssignedValue = MCOperand::createImm(randomIndex(31)); + break; default: if (OperandType >= RISCVOp::OPERAND_FIRST_RISCV_IMM && OperandType <= RISCVOp::OPERAND_LAST_RISCV_IMM) diff --git a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp index 25cdf1ce66d44..f233ea4288a7e 100644 --- a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp +++ b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp @@ -53,6 +53,8 @@ computeAliasingInstructions(const LLVMState &State, const Instruction *Instr, if (OtherOpcode == Instr->Description.getOpcode()) continue; const Instruction &OtherInstr = State.getIC().getInstr(OtherOpcode); + if (ET.getIgnoredOpcodeReasonOrNull(State, OtherInstr.getOpcode())) + continue; if (OtherInstr.hasMemoryOperands()) continue; if (!ET.allowAsBackToBack(OtherInstr)) diff --git a/llvm/tools/llvm-exegesis/lib/Target.cpp b/llvm/tools/llvm-exegesis/lib/Target.cpp index 5ea5b4c2c002f..68d19514bedb2 100644 --- a/llvm/tools/llvm-exegesis/lib/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/Target.cpp @@ -35,6 +35,19 @@ const ExegesisTarget *ExegesisTarget::lookup(Triple TT) { return nullptr; } +const char * +ExegesisTarget::getIgnoredOpcodeReasonOrNull(const LLVMState &State, + unsigned Opcode) const { + const MCInstrDesc &InstrDesc = State.getIC().getInstr(Opcode).Description; + if (InstrDesc.isPseudo() || InstrDesc.usesCustomInsertionHook()) + return "Unsupported opcode: isPseudo/usesCustomInserter"; + if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch()) + return "Unsupported opcode: isBranch/isIndirectBranch"; + if (InstrDesc.isCall() || InstrDesc.isReturn()) + return "Unsupported opcode: isCall/isReturn"; + return nullptr; +} + Expected> ExegesisTarget::createCounter(StringRef CounterName, const LLVMState &, ArrayRef ValidationCounters, diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h index f3fbe3780616f..77fbaa6e95412 100644 --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -154,6 +154,9 @@ class ExegesisTarget { return IsOpcodeAvailable(Opcode, Features); } + virtual const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State, + unsigned Opcode) const; + // Sets the stack register to the auxiliary memory so that operations // requiring the stack can be formed (e.g., setting large registers). The code // generated by this function may clobber registers. diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp index b9938a92855a4..babcffeb9666a 100644 --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -300,18 +300,6 @@ T ExitOnFileError(const Twine &FileName, Expected &&E) { return std::move(*E); } -static const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State, - unsigned Opcode) { - const MCInstrDesc &InstrDesc = State.getIC().getInstr(Opcode).Description; - if (InstrDesc.isPseudo() || InstrDesc.usesCustomInsertionHook()) - return "Unsupported opcode: isPseudo/usesCustomInserter"; - if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch()) - return "Unsupported opcode: isBranch/isIndirectBranch"; - if (InstrDesc.isCall() || InstrDesc.isReturn()) - return "Unsupported opcode: isCall/isReturn"; - return nullptr; -} - // Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided, // and returns the opcode indices or {} if snippets should be read from // `SnippetsFile`. @@ -370,7 +358,8 @@ static Expected> generateSnippets(const LLVMState &State, unsigned Opcode, const BitVector &ForbiddenRegs) { // Ignore instructions that we cannot run. - if (const char *Reason = getIgnoredOpcodeReasonOrNull(State, Opcode)) + if (const char *Reason = + State.getExegesisTarget().getIgnoredOpcodeReasonOrNull(State, Opcode)) return make_error(Reason); const Instruction &Instr = State.getIC().getInstr(Opcode);