Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -5301,28 +5301,29 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
}
}

multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm> {
multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm,
SDPatternOperator OpN = null_frag> {
// double-precision to 32-bit SIMD/FPR
def SDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm,
[]> {
[(set FPR32:$Rd, (i32 (OpN (f64 FPR64:$Rn))))]> {
let Inst{31} = 0; // 32-bit FPR flag
}

// half-precision to 32-bit SIMD/FPR
def SHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR32, asm,
[]> {
[(set FPR32:$Rd, (i32 (OpN (f16 FPR16:$Rn))))]> {
let Inst{31} = 0; // 32-bit FPR flag
}

// half-precision to 64-bit SIMD/FPR
def DHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR64, asm,
[]> {
[(set FPR64:$Rd, (i64 (OpN (f16 FPR16:$Rn))))]> {
let Inst{31} = 1; // 64-bit FPR flag
}

// single-precision to 64-bit SIMD/FPR
def DSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, FPR64, asm,
[]> {
[(set FPR64:$Rd, (i64 (OpN (f32 FPR32:$Rn))))]> {
let Inst{31} = 1; // 64-bit FPR flag
}
}
Expand Down Expand Up @@ -7940,14 +7941,18 @@ multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
}
}

let mayRaiseFPException = 1, Uses = [FPCR] in
multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
let mayRaiseFPException = 1, Uses = [FPCR], FastISelShouldIgnore = 1 in
multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpN = null_frag> {
let Predicates = [HasNEONandIsStreamingSafe] in {
def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,[]>;
def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,[]>;
def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,
[(set (i64 FPR64:$Rd), (OpN (f64 FPR64:$Rn)))]>;
def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,
[(set FPR32:$Rd, (i32 (OpN (f32 FPR32:$Rn))))]>;
}
let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,[]>;
def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,
[(set FPR16:$Rd, (i16 (OpN (f16 FPR16:$Rn))))]>;
}
}

Expand Down
142 changes: 97 additions & 45 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -5231,18 +5231,19 @@ defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;

let Predicates = [HasNEON, HasFPRCVT] in{
defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas">;
defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau">;
defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms">;
defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu">;
defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns">;
defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu">;
defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps">;
defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu">;
defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas", int_aarch64_neon_fcvtas>;
defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau", int_aarch64_neon_fcvtau>;
defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms", int_aarch64_neon_fcvtms>;
defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu", int_aarch64_neon_fcvtmu>;
defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns", int_aarch64_neon_fcvtns>;
defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu", int_aarch64_neon_fcvtnu>;
defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps", int_aarch64_neon_fcvtps>;
defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu", int_aarch64_neon_fcvtpu>;
defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">;
defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">;
}


// AArch64's FCVT instructions saturate when out of range.
multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
let Predicates = [HasFullFP16] in {
Expand Down Expand Up @@ -5309,35 +5310,6 @@ multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string IN
defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;

multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
}
def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;

let Predicates = [HasFullFP16] in {
def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
(!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
}
def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
(!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
(!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
(!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
(!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
}

defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;

multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
def : Pat<(i32 (to_int (round f32:$Rn))),
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
Expand Down Expand Up @@ -6572,14 +6544,14 @@ defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">;
defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">;
defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">;
defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">;
defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">;
defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">;
defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">;
defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas", int_aarch64_neon_fcvtas>;
defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau", int_aarch64_neon_fcvtau>;
defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms", int_aarch64_neon_fcvtms>;
defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu", int_aarch64_neon_fcvtmu>;
defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns", int_aarch64_neon_fcvtns>;
defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu", int_aarch64_neon_fcvtnu>;
defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps", int_aarch64_neon_fcvtps>;
defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu", int_aarch64_neon_fcvtpu>;
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
Expand All @@ -6600,6 +6572,86 @@ defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar
defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
int_aarch64_neon_usqadd>;

// Floating-point conversion patterns.
multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
def : Pat<(f32 (bitconvert (i32 (OpN (f64 FPR64:$Rn))))),
(!cast<Instruction>(INST # SDr) FPR64:$Rn)>;
def : Pat<(f32 (bitconvert (i32 (OpN (f16 FPR16:$Rn))))),
(!cast<Instruction>(INST # SHr) FPR16:$Rn)>;
def : Pat<(f64 (bitconvert (i64 (OpN (f16 FPR16:$Rn))))),
(!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))),
(!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))),
(!cast<Instruction>(INST # v1i32) FPR32:$Rn)>;
def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
(!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;

}
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtas, "FCVTAS">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtau, "FCVTAU">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtms, "FCVTMS">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtmu, "FCVTMU">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtns, "FCVTNS">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtnu, "FCVTNU">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtps, "FCVTPS">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtpu, "FCVTPU">;

multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
}
def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;

// For global-isel we can use register classes to determine
// which FCVT instruction to use.
let Predicates = [HasFPRCVT] in {
def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # SHr) $Rn)>;
def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # DHr) $Rn)>;
def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # DSr) $Rn)>;
def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # SDr) $Rn)>;
}
def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # v1i32) $Rn)>;
def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # v1i64) $Rn)>;

let Predicates = [HasFPRCVT] in {
def : Pat<(f32 (bitconvert (i32 (round f16:$Rn)))),
(!cast<Instruction>(INST # SHr) $Rn)>;
def : Pat<(f64 (bitconvert (i64 (round f16:$Rn)))),
(!cast<Instruction>(INST # DHr) $Rn)>;
def : Pat<(f64 (bitconvert (i64 (round f32:$Rn)))),
(!cast<Instruction>(INST # DSr) $Rn)>;
def : Pat<(f32 (bitconvert (i32 (round f64:$Rn)))),
(!cast<Instruction>(INST # SDr) $Rn)>;
}
def : Pat<(f32 (bitconvert (i32 (round f32:$Rn)))),
(!cast<Instruction>(INST # v1i32) $Rn)>;
def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))),
(!cast<Instruction>(INST # v1i64) $Rn)>;

let Predicates = [HasFullFP16] in {
def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
(!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
}
def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
(!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
(!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
(!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
(!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
}

defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;

// f16 -> s16 conversions
let Predicates = [HasFullFP16] in {
def : Pat<(i16(fp_to_sint_sat_gi f16:$Rn)), (FCVTZSv1f16 f16:$Rn)>;
Expand Down
32 changes: 29 additions & 3 deletions llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -573,9 +573,7 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
case Intrinsic::aarch64_neon_fcvtnu:
case Intrinsic::aarch64_neon_fcvtps:
case Intrinsic::aarch64_neon_fcvtpu:
// Force FPR register bank for half types, as those types otherwise
// don't get legalized correctly resulting in fp16 <-> gpr32 COPY's.
return MRI.getType(MI.getOperand(2).getReg()) == LLT::float16();
return true;
default:
break;
}
Expand Down Expand Up @@ -1148,6 +1146,34 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
case Intrinsic::aarch64_neon_fcvtas:
case Intrinsic::aarch64_neon_fcvtau:
case Intrinsic::aarch64_neon_fcvtzs:
case Intrinsic::aarch64_neon_fcvtzu:
case Intrinsic::aarch64_neon_fcvtms:
case Intrinsic::aarch64_neon_fcvtmu:
case Intrinsic::aarch64_neon_fcvtns:
case Intrinsic::aarch64_neon_fcvtnu:
case Intrinsic::aarch64_neon_fcvtps:
case Intrinsic::aarch64_neon_fcvtpu: {
OpRegBankIdx[2] = PMI_FirstFPR;
if (MRI.getType(MI.getOperand(0).getReg()).isVector()) {
OpRegBankIdx[0] = PMI_FirstFPR;
break;
}
TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
TypeSize SrcSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, TRI);
if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &&
all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
[&](const MachineInstr &UseMI) {
return onlyUsesFP(UseMI, MRI, TRI) ||
prefersFPUse(UseMI, MRI, TRI);
}))
OpRegBankIdx[0] = PMI_FirstFPR;
else
OpRegBankIdx[0] = PMI_FirstGPR;
break;
}
case Intrinsic::aarch64_neon_vcvtfxs2fp:
case Intrinsic::aarch64_neon_vcvtfxu2fp:
case Intrinsic::aarch64_neon_vcvtfp2fxs:
Expand Down
Loading