Skip to content

Commit 2a4fbe9

Browse files
committed
SVE2 FP API's
Contains implementations for: - AddRotateComplex - AddSaturateRotateComplex - ConvertToDoubleOdd - ConvertToSingleEvenRoundToOdd - Log2 - MultiplyAddRotateComplex - MultiplyAddRotateComplexBySelectedScalar - MultiplyAddRoundedDoublingSaturateHighRotateComplex* - ReciprocalEstimate - ReciprocalSqrtEstimate
1 parent 35ffd43 commit 2a4fbe9

File tree

12 files changed

+1245
-71
lines changed

12 files changed

+1245
-71
lines changed

src/coreclr/jit/emitarm64sve.cpp

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2645,9 +2645,9 @@ void emitter::emitInsSve_R_R_I(instruction ins,
26452645
assert(isVectorRegister(reg1)); // ddddd
26462646
assert(isVectorRegister(reg2)); // nnnnn
26472647
assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
2648+
assert(isValidRot(emitDecodeRotationImm90_or_270(imm)));
26482649

26492650
// Convert rot to bitwise representation: 0 if 90, 1 if 270
2650-
imm = emitEncodeRotationImm90_or_270(imm); // r
26512651
fmt = IF_SVE_FV_2A;
26522652
break;
26532653

@@ -4574,14 +4574,13 @@ void emitter::emitInsSve_R_R_R_I(instruction ins,
45744574
case INS_sve_sqrdcmlah:
45754575
assert(insScalableOptsNone(sopt));
45764576
assert(insOptsScalableStandard(opt));
4577-
assert(isVectorRegister(reg1)); // ddddd
4578-
assert(isVectorRegister(reg2)); // nnnnn
4579-
assert(isVectorRegister(reg3)); // mmmmm
4580-
assert(isValidRot(imm)); // rr
4581-
assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
4577+
assert(isVectorRegister(reg1)); // ddddd
4578+
assert(isVectorRegister(reg2)); // nnnnn
4579+
assert(isVectorRegister(reg3)); // mmmmm
4580+
assert(isValidRot(emitDecodeRotationImm0_to_270(imm))); // rr
4581+
assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx
45824582

45834583
// Convert rot to bitwise representation
4584-
imm = emitEncodeRotationImm0_to_270(imm);
45854584
fmt = IF_SVE_EK_3A;
45864585
break;
45874586

@@ -5785,12 +5784,12 @@ void emitter::emitInsSve_R_R_R_I_I(instruction ins,
57855784
break;
57865785

57875786
case INS_sve_cmla:
5788-
assert(isVectorRegister(reg1)); // ddddd
5789-
assert(isVectorRegister(reg2)); // nnnnn
5790-
assert(isLowVectorRegister(reg3)); // mmmm
5791-
assert(isValidRot(imm2)); // rr
5787+
assert(isVectorRegister(reg1)); // ddddd
5788+
assert(isVectorRegister(reg2)); // nnnnn
5789+
assert(isLowVectorRegister(reg3)); // mmmm
5790+
assert(isValidRot(emitDecodeRotationImm0_to_270(imm2))); // rr
57925791
// Convert imm2 from rotation value (0-270) to bitwise representation (0-3)
5793-
imm = (imm1 << 2) | emitEncodeRotationImm0_to_270(imm2);
5792+
imm = (imm1 << 2) | imm2;
57945793

57955794
if (opt == INS_OPTS_SCALABLE_H)
57965795
{
@@ -5807,13 +5806,12 @@ void emitter::emitInsSve_R_R_R_I_I(instruction ins,
58075806
break;
58085807

58095808
case INS_sve_sqrdcmlah:
5810-
assert(isVectorRegister(reg1)); // ddddd
5811-
assert(isVectorRegister(reg2)); // nnnnn
5812-
assert(isLowVectorRegister(reg3)); // mmmm
5813-
assert(isValidRot(imm2)); // rr
5814-
// Convert imm2 from rotation value (0-270) to bitwise representation (0-3)
5815-
imm = (imm1 << 2) | emitEncodeRotationImm0_to_270(imm2);
5809+
assert(isVectorRegister(reg1)); // ddddd
5810+
assert(isVectorRegister(reg2)); // nnnnn
5811+
assert(isLowVectorRegister(reg3)); // mmmm
5812+
assert(isValidRot(emitDecodeRotationImm0_to_270(imm2))); // rr
58165813

5814+
imm = (imm1 << 2) | imm2;
58175815
if (opt == INS_OPTS_SCALABLE_H)
58185816
{
58195817
assert(isValidUimm<2>(imm1)); // ii

src/coreclr/jit/hwintrinsic.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1275,6 +1275,8 @@ struct HWIntrinsicInfo
12751275
}
12761276

12771277
case NI_Sve_MultiplyAddRotateComplexBySelectedScalar:
1278+
case NI_Sve2_MultiplyAddRotateComplexBySelectedScalar:
1279+
case NI_Sve2_MultiplyAddRoundedDoublingSaturateHighRotateComplexBySelectedScalar:
12781280
case NI_Sve2_DotProductRotateComplexBySelectedIndex:
12791281
{
12801282
assert(sig->numArgs == 5);

src/coreclr/jit/hwintrinsicarm64.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,11 +445,15 @@ void HWIntrinsicInfo::lookupImmBounds(
445445
break;
446446

447447
case NI_Sve_AddRotateComplex:
448+
case NI_Sve2_AddRotateComplex:
449+
case NI_Sve2_AddSaturateRotateComplex:
448450
immLowerBound = 0;
449451
immUpperBound = 1;
450452
break;
451453

452454
case NI_Sve_MultiplyAddRotateComplex:
455+
case NI_Sve2_MultiplyAddRotateComplex:
456+
case NI_Sve2_MultiplyAddRoundedDoublingSaturateHighRotateComplex:
453457
case NI_Sve2_DotProductRotateComplex:
454458
immLowerBound = 0;
455459
immUpperBound = 3;
@@ -493,6 +497,41 @@ void HWIntrinsicInfo::lookupImmBounds(
493497
}
494498
break;
495499

500+
case NI_Sve2_MultiplyAddRotateComplexBySelectedScalar:
501+
if (immNumber == 1)
502+
{
503+
// Bounds for rotation
504+
immLowerBound = 0;
505+
immUpperBound = 3;
506+
}
507+
else
508+
{
509+
// Bounds for index
510+
assert(immNumber == 2);
511+
assert(baseType == TYP_USHORT || baseType == TYP_SHORT || baseType == TYP_INT ||
512+
baseType == TYP_UINT);
513+
immLowerBound = 0;
514+
immUpperBound = (baseType == TYP_USHORT || baseType == TYP_SHORT) ? 3 : 1;
515+
}
516+
break;
517+
518+
case NI_Sve2_MultiplyAddRoundedDoublingSaturateHighRotateComplexBySelectedScalar:
519+
if (immNumber == 1)
520+
{
521+
// Bounds for rotation
522+
immLowerBound = 0;
523+
immUpperBound = 3;
524+
}
525+
else
526+
{
527+
// Bounds for index
528+
assert(immNumber == 2);
529+
assert(baseType == TYP_INT || baseType == TYP_SHORT);
530+
immLowerBound = 0;
531+
immUpperBound = (baseType == TYP_SHORT) ? 3 : 1;
532+
}
533+
break;
534+
496535
case NI_Sve_TrigonometricMultiplyAddCoefficient:
497536
immLowerBound = 0;
498537
immUpperBound = 7;
@@ -3180,6 +3219,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
31803219
}
31813220

31823221
case NI_Sve_MultiplyAddRotateComplexBySelectedScalar:
3222+
case NI_Sve2_MultiplyAddRotateComplexBySelectedScalar:
3223+
case NI_Sve2_MultiplyAddRoundedDoublingSaturateHighRotateComplexBySelectedScalar:
31833224
case NI_Sve2_DotProductRotateComplexBySelectedIndex:
31843225
{
31853226
assert(sig->numArgs == 5);

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 59 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
582582
case NI_Sve_ConvertToInt32:
583583
case NI_Sve_ConvertToUInt32:
584584
case NI_Sve_ConvertToSingle:
585+
case NI_Sve2_ConvertToSingleEvenRoundToOdd:
585586
{
586587
embOpt = emitTypeSize(intrinEmbMask.baseType) == EA_8BYTE ? INS_OPTS_D_TO_S
587588
: INS_OPTS_SCALABLE_S;
@@ -591,6 +592,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
591592
case NI_Sve_ConvertToInt64:
592593
case NI_Sve_ConvertToUInt64:
593594
case NI_Sve_ConvertToDouble:
595+
case NI_Sve2_ConvertToDoubleOdd:
594596
{
595597
embOpt = emitTypeSize(intrinEmbMask.baseType) == EA_4BYTE ? INS_OPTS_S_TO_D
596598
: INS_OPTS_SCALABLE_D;
@@ -2598,6 +2600,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
25982600
}
25992601

26002602
case NI_Sve_TrigonometricMultiplyAddCoefficient:
2603+
case NI_Sve2_AddRotateComplex:
2604+
case NI_Sve2_AddSaturateRotateComplex:
26012605
{
26022606
assert(isRMW);
26032607

@@ -2618,6 +2622,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
26182622
}
26192623

26202624
case NI_Sve_MultiplyAddRotateComplexBySelectedScalar:
2625+
case NI_Sve2_MultiplyAddRotateComplexBySelectedScalar:
2626+
case NI_Sve2_MultiplyAddRoundedDoublingSaturateHighRotateComplexBySelectedScalar:
26212627
{
26222628
assert(isRMW);
26232629
assert(hasImmediateOperand);
@@ -2640,39 +2646,67 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
26402646
else
26412647
{
26422648
// Use the helper to generate a table. The table can only use a single lookup value, therefore
2643-
// the two immediates index (0 to 1, in op4Reg) and rotation (0 to 3, in op5Reg) must be
2644-
// combined to a single value (0 to 7)
2649+
// the two immediates index and rotation must be combined to a single value
26452650
assert(!intrin.op4->isContainedIntOrIImmed() && !intrin.op5->isContainedIntOrIImmed());
26462651
emitAttr scalarSize = emitActualTypeSize(node->GetSimdBaseType());
26472652

2648-
// Combine the two immediates into op4Reg
2649-
// Shift rotation left to be out of range of index
2650-
GetEmitter()->emitIns_R_R_I(INS_lsl, scalarSize, op5Reg, op5Reg, 1);
2651-
// Combine the two values by ORing
2652-
GetEmitter()->emitIns_R_R_R(INS_orr, scalarSize, op4Reg, op4Reg, op5Reg);
2653+
var_types baseType = node->GetSimdBaseType();
26532654

2654-
// Generate the table using the combined immediate
2655-
HWIntrinsicImmOpHelper helper(this, op4Reg, 0, 7, node, (targetReg != op1Reg) ? 2 : 1);
2656-
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
2655+
if (baseType == TYP_SHORT || baseType == TYP_USHORT)
26572656
{
2658-
if (targetReg != op1Reg)
2657+
GetEmitter()->emitIns_R_R_I(INS_lsl, scalarSize, op5Reg, op5Reg, 2);
2658+
GetEmitter()->emitIns_R_R_R(INS_orr, scalarSize, op4Reg, op4Reg, op5Reg);
2659+
2660+
// index and rotation both take values 0 to 3 so must be
2661+
// combined to a single value (0 to 15)
2662+
HWIntrinsicImmOpHelper helper(this, op4Reg, 0, 15, node, (targetReg != op1Reg) ? 2 : 1);
2663+
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
26592664
{
2660-
assert(targetReg != op2Reg);
2661-
assert(targetReg != op3Reg);
2662-
GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2665+
if (targetReg != op1Reg)
2666+
{
2667+
assert(targetReg != op2Reg);
2668+
assert(targetReg != op3Reg);
2669+
GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2670+
}
2671+
2672+
const int value = helper.ImmValue();
2673+
const ssize_t index = value & 3;
2674+
const ssize_t rotation = (value >> 2) & 3;
2675+
GetEmitter()->emitInsSve_R_R_R_I_I(ins, emitSize, targetReg, op2Reg, op3Reg, index,
2676+
rotation, opt);
26632677
}
26642678

2665-
// Extract index and rotation from the immediate
2666-
const int value = helper.ImmValue();
2667-
const ssize_t index = value & 1;
2668-
const ssize_t rotation = value >> 1;
2669-
GetEmitter()->emitInsSve_R_R_R_I_I(ins, emitSize, targetReg, op2Reg, op3Reg, index, rotation,
2670-
opt);
2679+
GetEmitter()->emitIns_R_R_I(INS_and, scalarSize, op4Reg, op4Reg, 3);
2680+
GetEmitter()->emitIns_R_R_I(INS_lsr, scalarSize, op5Reg, op5Reg, 2);
26712681
}
2682+
else
2683+
{
2684+
assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_FLOAT);
2685+
GetEmitter()->emitIns_R_R_I(INS_lsl, scalarSize, op5Reg, op5Reg, 1);
2686+
GetEmitter()->emitIns_R_R_R(INS_orr, scalarSize, op4Reg, op4Reg, op5Reg);
26722687

2673-
// Restore the original values in op4Reg and op5Reg
2674-
GetEmitter()->emitIns_R_R_I(INS_and, scalarSize, op4Reg, op4Reg, 1);
2675-
GetEmitter()->emitIns_R_R_I(INS_lsr, scalarSize, op5Reg, op5Reg, 1);
2688+
// index (0 to 1, in op4Reg) and rotation (0 to 3, in op5Reg) must be
2689+
// combined to a single value (0 to 7)
2690+
HWIntrinsicImmOpHelper helper(this, op4Reg, 0, 7, node, (targetReg != op1Reg) ? 2 : 1);
2691+
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
2692+
{
2693+
if (targetReg != op1Reg)
2694+
{
2695+
assert(targetReg != op2Reg);
2696+
assert(targetReg != op3Reg);
2697+
GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2698+
}
2699+
2700+
const int value = helper.ImmValue();
2701+
const ssize_t index = value & 1;
2702+
const ssize_t rotation = value >> 1;
2703+
GetEmitter()->emitInsSve_R_R_R_I_I(ins, emitSize, targetReg, op2Reg, op3Reg, index,
2704+
rotation, opt);
2705+
}
2706+
2707+
GetEmitter()->emitIns_R_R_I(INS_and, scalarSize, op4Reg, op4Reg, 1);
2708+
GetEmitter()->emitIns_R_R_I(INS_lsr, scalarSize, op5Reg, op5Reg, 1);
2709+
}
26762710
}
26772711

26782712
break;
@@ -2737,6 +2771,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
27372771
GetEmitter()->emitInsSve_R_R_R(ins, emitSize, targetReg, op3Reg, op1Reg, INS_OPTS_SCALABLE_D);
27382772
break;
27392773

2774+
case NI_Sve2_MultiplyAddRotateComplex:
2775+
case NI_Sve2_MultiplyAddRoundedDoublingSaturateHighRotateComplex:
27402776
case NI_Sve2_DotProductRotateComplex:
27412777
{
27422778
assert(isRMW);

0 commit comments

Comments
 (0)