Skip to content

Commit c3ebe82

Browse files
jgu222igcbot
authored andcommitted
Fixed the alignment for indirect float/half dst
Float/half operations requires all operands to have the same offsets. If it is indirect, we have to assume they are not aligned so that raw mov insts are generated. Raw mov instructions can be changed to int type, as int indirect addressing is more flexible. This change make sure that fixDstAlignment() will be invoked for indirect float/half dst.
1 parent 34985c0 commit c3ebe82

File tree

2 files changed

+30
-2
lines changed

2 files changed

+30
-2
lines changed

visa/HWConformity.cpp

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1746,6 +1746,29 @@ bool HWConformity::fixDstAlignment(INST_LIST_ITER i, G4_BB* bb, G4_Type extype,
17461746
return false;
17471747
}
17481748

1749+
auto allScalarSrcs = [](G4_INST* I)
1750+
{
1751+
// return true If all of I's srcs are scalar; false otherwise.
1752+
bool allScalarSrc = true;
1753+
for (int i = 0, nsrc = I->getNumSrc(); i < nsrc; i++)
1754+
{
1755+
G4_Operand* src = I->getSrc(i);
1756+
if (src && !src->isNullReg() && src->isSrcRegRegion() &&
1757+
!(src->asSrcRegRegion()->getRegion()->isScalar()))
1758+
{
1759+
allScalarSrc = false;
1760+
break;
1761+
}
1762+
}
1763+
return allScalarSrc;
1764+
};
1765+
1766+
// float operation requires that operands of every channel to start on the same offset
1767+
// except for scalar operands.
1768+
// If dst is indirect and not all srcs are scalar, assume it is not aligned.
1769+
bool FPDstUnaligned_restriction9_7 =
1770+
((extype == Type_F || extype == Type_HF) && dst->getRegAccess() != Direct && !allScalarSrcs(inst));
1771+
17491772
bool dstHFMixModeInst = inst->getDst()->getType() == builder.getMixModeType() && extype == Type_F;
17501773
bool dstNotAlignedToExecType = exec_size > 1 && (dst_elsize * h_stride) < extypesize &&
17511774
!(builder.hasMixMode() && dstHFMixModeInst);
@@ -1764,7 +1787,8 @@ bool HWConformity::fixDstAlignment(INST_LIST_ITER i, G4_BB* bb, G4_Type extype,
17641787
dst->getRegAccess() != Direct &&
17651788
!(byteDst && extypesize == 2 && exec_size == 1)
17661789
) ||
1767-
dstNotAlignedToExecType)
1790+
dstNotAlignedToExecType ||
1791+
FPDstUnaligned_restriction9_7)
17681792
{
17691793
/*
17701794
* 10.3

visa/ReduceExecSize.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,11 @@ bool HWConformity::fixInstOpndTypeAlign(INST_LIST_ITER i, G4_BB* bb)
164164
if (extypesize < (int)kernel.numEltPerGRF<Type_UB>()/2)
165165
{
166166
uint32_t dst_elsize = inst->getDst()->getTypeSize();
167-
if (dst_elsize < (unsigned int)extypesize)
167+
if (dst_elsize < (unsigned int)extypesize ||
168+
// indirect float type needs to be handled as well.
169+
// See fixDstAlignment for detail
170+
((extype == Type_F || extype == Type_HF) &&
171+
inst->getDst()->getRegAccess() != Direct))
168172
{
169173
if (fixDstAlignment(i, bb, extype, dst_elsize))
170174
{

0 commit comments

Comments
 (0)