Skip to content

Commit d8e59a1

Browse files
aratajewigcbot
authored andcommitted
Report error if intel_reqd_sub_group_size` cannot be satisfied
Without this change, IGC silently switches to SIMD16 for kernels with nested stackcalls even though a kernel has `intel_reqd_sub_group_size(32)`` attribute attached
1 parent 6c9722c commit d8e59a1

File tree

3 files changed

+85
-35
lines changed

3 files changed

+85
-35
lines changed

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp

Lines changed: 55 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,39 @@ namespace IGC
643643
return result;
644644
}
645645

646+
uint32_t COpenCLKernel::getReqdSubGroupSize(llvm::Function& F, MetaDataUtils* MDUtils) const
647+
{
648+
FunctionInfoMetaDataHandle funcInfoMD = MDUtils->getFunctionsInfoItem(&F);
649+
int simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
650+
651+
// Finds the kernel and get the group simd size from the kernel
652+
if (m_FGA)
653+
{
654+
llvm::Function* Kernel = &F;
655+
auto FG = m_FGA->getGroup(&F);
656+
Kernel = FG->getHead();
657+
funcInfoMD = MDUtils->getFunctionsInfoItem(Kernel);
658+
simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
659+
}
660+
return simd_size;
661+
}
662+
663+
uint32_t COpenCLKernel::getMaxPressure(llvm::Function& F, MetaDataUtils* MDUtils) const
664+
{
665+
FunctionInfoMetaDataHandle funcInfoMD = MDUtils->getFunctionsInfoItem(&F);
666+
unsigned int maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
667+
668+
if (m_FGA)
669+
{
670+
llvm::Function* Kernel = &F;
671+
auto FG = m_FGA->getGroup(&F);
672+
Kernel = FG->getHead();
673+
funcInfoMD = MDUtils->getFunctionsInfoItem(Kernel);
674+
maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
675+
}
676+
return maxPressure;
677+
}
678+
646679
void COpenCLKernel::CreateKernelArgInfo()
647680
{
648681
auto funcMDIt = m_Context->getModuleMetaData()->FuncMD.find(entry);
@@ -3659,7 +3692,25 @@ namespace IGC
36593692

36603693
// Func and Perf checks pass, compile this SIMD
36613694
if (simdStatus == SIMDStatus::SIMD_PASS)
3695+
{
36623696
return true;
3697+
}
3698+
// Report an error if intel_reqd_sub_group_size cannot be satisfied
3699+
else
3700+
{
3701+
MetaDataUtils* pMdUtils = EP.getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
3702+
CodeGenContext* ctx = GetContext();
3703+
auto reqdSubGroupSize = getReqdSubGroupSize(F, pMdUtils);
3704+
if (reqdSubGroupSize == numLanes(simdMode))
3705+
{
3706+
ctx->EmitError(
3707+
(std::string("Cannot compile a kernel in the SIMD mode specified by intel_reqd_sub_group_size(") +
3708+
std::to_string(reqdSubGroupSize) +
3709+
std::string(")")).c_str(),
3710+
&F);
3711+
return false;
3712+
}
3713+
}
36633714

36643715
// Functional failure, skip compiling this SIMD
36653716
if (simdStatus == SIMDStatus::SIMD_FUNC_FAIL)
@@ -3682,28 +3733,16 @@ namespace IGC
36823733
CodeGenContext* pCtx = GetContext();
36833734
MetaDataUtils* pMdUtils = EP.getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
36843735
FunctionInfoMetaDataHandle funcInfoMD = pMdUtils->getFunctionsInfoItem(&F);
3685-
int simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
3736+
uint32_t simd_size = getReqdSubGroupSize(F, pMdUtils);
36863737
bool hasSubGroupForce = hasSubGroupIntrinsicPVC(F);
3687-
unsigned int maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
3688-
3689-
// Finds the kernel and get the group simd size from the kernel
3690-
if (m_FGA)
3691-
{
3692-
llvm::Function* Kernel = &F;
3693-
auto FG = m_FGA->getGroup(&F);
3694-
Kernel = FG->getHead();
3695-
funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel);
3696-
simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
3697-
maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
3698-
}
3738+
uint32_t maxPressure = getMaxPressure(F, pMdUtils);
36993739

37003740
auto FG = m_FGA ? m_FGA->getGroup(&F) : nullptr;
37013741
bool hasStackCall = FG && FG->hasStackCall();
37023742
bool isIndirectGroup = FG && m_FGA->isIndirectCallGroup(FG);
37033743
bool hasSubroutine = FG && !FG->isSingleIgnoringStackOverflowDetection() && !hasStackCall && !isIndirectGroup;
37043744
bool forceLowestSIMDForStackCalls = IGC_IS_FLAG_ENABLED(ForceLowestSIMDForStackCalls) && (hasStackCall || isIndirectGroup);
37053745

3706-
37073746
if (simd_size == 0)
37083747
{
37093748
if (maxPressure >= IGC_GET_FLAG_VALUE(ForceSIMDRPELimit) &&
@@ -3849,19 +3888,8 @@ namespace IGC
38493888
MetaDataUtils* pMdUtils = EP.getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
38503889
ModuleMetaData* modMD = pCtx->getModuleMetaData();
38513890
FunctionInfoMetaDataHandle funcInfoMD = pMdUtils->getFunctionsInfoItem(&F);
3852-
int simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
3853-
unsigned int maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
3854-
3855-
// Finds the kernel and get the group simd size from the kernel
3856-
if (m_FGA)
3857-
{
3858-
llvm::Function* Kernel = &F;
3859-
auto FG = m_FGA->getGroup(&F);
3860-
Kernel = FG->getHead();
3861-
funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel);
3862-
simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
3863-
maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
3864-
}
3891+
uint32_t simd_size = getReqdSubGroupSize(F, pMdUtils);
3892+
uint32_t maxPressure = getMaxPressure(F, pMdUtils);
38653893

38663894
// For simd variant functions, detect which SIMD sizes are needed
38673895
if (compileFunctionVariants && F.hasFnAttribute("variant-function-def"))
@@ -3896,14 +3924,6 @@ namespace IGC
38963924
bool hasIndirectCall = FG && FG->hasIndirectCall();
38973925
if (hasNestedCall || hasIndirectCall || isIndirectGroup)
38983926
{
3899-
// If sub_group_size is set to 32, resize it to 16 so SIMD16 compilation will still succeed
3900-
if (simd_size == 32)
3901-
{
3902-
pCtx->EmitWarning("Detected 'reqd_sub_group_size=32', but compiling to SIMD16 due to enabling CallWA, which does not support SIMD32 when nested/indirect calls are present.");
3903-
llvm::Function* Kernel = FG->getHead();
3904-
funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel);
3905-
funcInfoMD->getSubGroupSize()->setSIMDSize(16);
3906-
}
39073927
pCtx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, ShaderDispatchMode::NOT_APPLICABLE);
39083928
return SIMDStatus::SIMD_FUNC_FAIL;
39093929
}

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,9 @@ namespace IGC
263263
std::string getKernelArgTypeQualifier(const FunctionMetaData& funcMD, uint argIndex) const;
264264
std::string getKernelArgAddressQualifier(const FunctionMetaData& funcMD, uint argIndex) const;
265265
std::string getKernelArgAccessQualifier(const FunctionMetaData& funcMD, uint argIndex) const;
266+
// Helper function to get SIMD size specified in intel_reqd_sub_group_size attribute
267+
uint32_t getReqdSubGroupSize(llvm::Function& F, IGC::IGCMD::MetaDataUtils* MDUtils) const;
268+
uint32_t getMaxPressure(llvm::Function& F, IGC::IGCMD::MetaDataUtils* MDUtils) const;
266269
};
267270

268271
void CodeGen(OpenCLProgramContext* ctx);
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2024 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
// REQUIRES: dg2-supported
10+
// RUN: not ocloc compile -file %s -device dg2 | FileCheck %s
11+
12+
// IGC enables EUFusion CallWA for DG2 when nested stackcalls or indirect calls
13+
// are present in a module. The workaround is not supported in SIMD32. This test
14+
// verifies if a proper error message is printed if CallWA is required and
15+
// intel_reqd_sub_group_size kernel attribute is set to 32.
16+
17+
// CHECK: error: Cannot compile a kernel in the SIMD mode specified by intel_reqd_sub_group_size(32)
18+
// CHECK-NEXT: in kernel: 'test_simple'
19+
20+
int fact(int n) {
21+
return n < 2 ? 1 : n * fact(n - 1);
22+
}
23+
24+
__attribute__((intel_reqd_sub_group_size(32)))
25+
kernel void test_simple(global int* out, int n) {
26+
out[0] = fact(n);
27+
}

0 commit comments

Comments
 (0)