@@ -317,7 +317,6 @@ multiclass SiFive7WriteResBase<int VLEN,
317317 ProcResourceKind VL, ProcResourceKind VS,
318318 ProcResourceKind VCQ,
319319 SiFive7FPLatencies fpLatencies,
320- bit isFP64Throttled = false,
321320 bit hasFastGather = false> {
322321
323322 // Branching
@@ -832,29 +831,56 @@ multiclass SiFive7WriteResBase<int VLEN,
832831 // 13. Vector Floating-Point Instructions
833832 foreach mx = SchedMxListF in {
834833 foreach sew = SchedSEWSet<mx, isF=1>.val in {
835- defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 64)),
836- SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
837- SiFive7GetCyclesDefault<mx>.c);
838- defvar Lat8 = !if(!and(isFP64Throttled, !eq(sew, 64)), Cycles, 8);
839- defvar VA = !if(!and(isFP64Throttled, !eq(sew, 64)), VA1, VA1OrVA2);
840834 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
841- let Latency = Lat8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
842- defm : LMULSEWWriteResMXSEW<"WriteVFALUV", [VCQ, VA], mx, sew, IsWorstCase>;
843- defm : LMULSEWWriteResMXSEW<"WriteVFALUF", [VCQ, VA], mx, sew, IsWorstCase>;
844- defm : LMULSEWWriteResMXSEW<"WriteVFMulV", [VCQ, VA], mx, sew, IsWorstCase>;
845- defm : LMULSEWWriteResMXSEW<"WriteVFMulF", [VCQ, VA], mx, sew, IsWorstCase>;
846- defm : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [VCQ, VA], mx, sew, IsWorstCase>;
847- defm : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [VCQ, VA], mx, sew, IsWorstCase>;
848- defm : LMULSEWWriteResMXSEW<"WriteVFRecpV", [VCQ, VA1], mx, sew, IsWorstCase>;
849- defm : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
850- }
851- defvar Lat4 = !if(!and(isFP64Throttled, !eq(sew, 64)), Cycles, 4);
852- let Latency = Lat4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
853- defm : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [VCQ, VA], mx, sew, IsWorstCase>;
854- defm : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [VCQ, VA], mx, sew, IsWorstCase>;
855- // min max require merge
856- defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [VCQ, VA1], mx, sew, IsWorstCase>;
857- defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [VCQ, VA1], mx, sew, IsWorstCase>;
835+ if !eq(sew, 64) then {
836+ defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
837+ foreach SchedWriteName = ["WriteVFALUV", "WriteVFALUF", "WriteVFMulV", "WriteVFMulF",
838+ "WriteVFMulAddV", "WriteVFMulAddF"] in
839+ defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
840+ // Predicated
841+ [VCQ, VA1], !add(SingleElementCycles, 7), [0, 1], [1, !add(1, SingleElementCycles)],
842+ // Not Predicated
843+ [VCQ, VA1OrVA2], 8, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
844+ mx, sew, IsWorstCase>;
845+ foreach SchedWriteName = ["WriteVFRecpV", "WriteVFCvtIToFV"] in
846+ defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
847+ // Predicated
848+ [VCQ, VA1], !add(SingleElementCycles, 7), [0, 1], [1, !add(1, SingleElementCycles)],
849+ // Not Predicated
850+ [VCQ, VA1], 8, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
851+ mx, sew, IsWorstCase>;
852+ foreach SchedWriteName = ["WriteVFSgnjV", "WriteVFSgnjF"] in
853+ defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
854+ // Predicated
855+ [VCQ, VA1], !add(SingleElementCycles, 3), [0, 1], [1, !add(1, SingleElementCycles)],
856+ // Not Predicated
857+ [VCQ, VA1OrVA2], 4, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
858+ mx, sew, IsWorstCase>;
859+ foreach SchedWriteName = ["WriteVFMinMaxV", "WriteVFMinMaxF"] in
860+ defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
861+ // Predicated
862+ [VCQ, VA1], !add(SingleElementCycles, 3), [0, 1], [1, !add(1, SingleElementCycles)],
863+ // Not Predicated
864+ [VCQ, VA1], 4, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
865+ mx, sew, IsWorstCase>;
866+ } else {
867+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, SiFive7GetCyclesDefault<mx>.c)] in {
868+ defm : LMULSEWWriteResMXSEW<"WriteVFALUV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
869+ defm : LMULSEWWriteResMXSEW<"WriteVFALUF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
870+ defm : LMULSEWWriteResMXSEW<"WriteVFMulV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
871+ defm : LMULSEWWriteResMXSEW<"WriteVFMulF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
872+ defm : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
873+ defm : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
874+ defm : LMULSEWWriteResMXSEW<"WriteVFRecpV", [VCQ, VA1], mx, sew, IsWorstCase>;
875+ defm : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
876+ }
877+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, SiFive7GetCyclesDefault<mx>.c)] in {
878+ defm : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
879+ defm : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
880+ // min max require merge
881+ defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [VCQ, VA1], mx, sew, IsWorstCase>;
882+ defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [VCQ, VA1], mx, sew, IsWorstCase>;
883+ }
858884 }
859885 }
860886 }
@@ -892,31 +918,48 @@ multiclass SiFive7WriteResBase<int VLEN,
892918 // Widening
893919 foreach mx = SchedMxListW in {
894920 foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
895- defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 32)),
896- SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
897- SiFive7GetCyclesDefault<mx>.c);
898921 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
899- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
900- defm : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
922+ defvar DefaultCycles = SiFive7GetCyclesDefault<mx>.c;
923+ if !eq(sew, 32) then {
924+ defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
925+ defm : LMULSEWWriteResMXSEWVariant<"WriteVFWCvtIToFV", SingleElementVecFP64SchedPred,
926+ // Predicated
927+ [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)],
928+ // Not Predicated
929+ [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)],
930+ mx, sew, IsWorstCase>;
931+ } else {
932+ let Latency = 8,
933+ AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in
934+ defm : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
935+ }
901936 }
902937 }
903938 foreach mx = SchedMxListFW in {
904939 foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
905- defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
940+ defvar DefaultCycles = SiFive7GetCyclesDefault<mx>.c;
906941 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
907- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles )] in {
942+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles )] in {
908943 defm : LMULSEWWriteResMXSEW<"WriteVFWALUV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
909944 defm : LMULSEWWriteResMXSEW<"WriteVFWALUF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
910945 defm : LMULSEWWriteResMXSEW<"WriteVFWMulV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
911946 defm : LMULSEWWriteResMXSEW<"WriteVFWMulF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
912947 defm : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
913948 defm : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
914949 }
915- defvar CvtCycles = !if(!and(isFP64Throttled, !eq(sew, 32)),
916- SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
917- SiFive7GetCyclesDefault<mx>.c);
918- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, CvtCycles)] in
919- defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
950+ if !eq(sew, 32) then {
951+ defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
952+ defm : LMULSEWWriteResMXSEWVariant<"WriteVFWCvtFToFV", SingleElementVecFP64SchedPred,
953+ // Predicated
954+ [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)],
955+ // Not Predicated
956+ [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)],
957+ mx, sew, IsWorstCase>;
958+ } else {
959+ let Latency = 8,
960+ AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in
961+ defm : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
962+ }
920963 }
921964 defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
922965 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
@@ -933,13 +976,23 @@ multiclass SiFive7WriteResBase<int VLEN,
933976 }
934977 foreach mx = SchedMxListFW in {
935978 foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
936- defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 32)),
937- SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
938- SiFive7GetCyclesNarrowing<mx>.c);
939979 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
940- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
941- defm : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
942- defm : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
980+ defvar DefaultCycles = SiFive7GetCyclesNarrowing<mx>.c;
981+ if !eq(sew, 32) then {
982+ defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
983+ foreach SchedWriteName = ["WriteVFNCvtIToFV", "WriteVFNCvtFToFV"] in
984+ defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
985+ // Predicated
986+ [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)],
987+ // Not Predicated
988+ [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)],
989+ mx, sew, IsWorstCase>;
990+ } else {
991+ let Latency = 8,
992+ AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in {
993+ defm : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
994+ defm : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
995+ }
943996 }
944997 }
945998 }
@@ -1499,7 +1552,6 @@ multiclass SiFive7ReadAdvance {
14991552/// eventually be supplied by different SchedMachineModels.
15001553multiclass SiFive7SchedResources<int vlen, bit extraVALU,
15011554 SiFive7FPLatencies fpLatencies,
1502- bit isFP64Throttled,
15031555 bit hasFastGather> {
15041556 defm SiFive7 : SiFive7ProcResources<extraVALU>;
15051557
@@ -1527,8 +1579,7 @@ multiclass SiFive7SchedResources<int vlen, bit extraVALU,
15271579 : SiFive7WriteResBase<vlen, SiFive7PipeA, SiFive7PipeB, SiFive7PipeAB,
15281580 SiFive7IDiv, SiFive7FDiv, SiFive7VA1,
15291581 SiFive7VA1OrVA2, SiFive7VL, SiFive7VS,
1530- SiFive7VCQ, fpLatencies, isFP64Throttled,
1531- hasFastGather>;
1582+ SiFive7VCQ, fpLatencies, hasFastGather>;
15321583
15331584 //===----------------------------------------------------------------------===//
15341585 // Bypass and advance
@@ -1560,7 +1611,6 @@ class SiFive7SchedMachineModel<int vlen> : SchedMachineModel {
15601611 bit HasExtraVALU = false;
15611612
15621613 SiFive7FPLatencies FPLatencies;
1563- bit IsFP64Throttled = false;
15641614 bit HasFastGather = false;
15651615
15661616 string Name = !subst("Model", "", !subst("SiFive7", "", NAME));
@@ -1587,7 +1637,6 @@ def SiFive7VLEN512Model : SiFive7SchedMachineModel<512> {
15871637def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> {
15881638 let HasExtraVALU = true;
15891639 let FPLatencies = SiFive7LowFPLatencies;
1590- let IsFP64Throttled = true;
15911640 let HasFastGather = true;
15921641}
15931642
@@ -1596,7 +1645,6 @@ foreach model = [SiFive7VLEN512Model, SiFive7VLEN1024X300Model] in {
15961645 let SchedModel = model in
15971646 defm model.Name : SiFive7SchedResources<model.VLEN, model.HasExtraVALU,
15981647 model.FPLatencies,
1599- model.IsFP64Throttled,
16001648 model.HasFastGather>;
16011649}
16021650
0 commit comments