Skip to content

Commit 582b5ab

Browse files
committed
Synchronisation commit to: [IGC Backout][IGC][DNB]: Backout CL734782 due to build break
Change-Id: I9b4ea3d35b4f494c512ffd38c712794428d95321
1 parent 0d7bc01 commit 582b5ab

File tree

10 files changed

+390
-72
lines changed

10 files changed

+390
-72
lines changed

IGC/AdaptorOCL/DriverInfoOCL.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ namespace TC
5858
bool NeedExtraPassesAfterAlwaysInlinerPass() const { return true; }
5959
bool enableVISAPreRAScheduler() const override { return true; }
6060

61-
61+
bool NeedWAToTransformA32MessagesToA64() const override{ return true; }
6262
bool WALoadStorePatternMatch() const override { return true; }
6363
bool WADisableCustomPass() const override { return true; }
6464
bool WAEnableMemOpt2ForOCL() const override { return true; }

IGC/AdaptorOCL/dllInterfaceCompute.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,9 @@ bool TranslateBuild(
579579
llvm::Module* pKernelModule = nullptr;
580580
LLVMContextWrapper* llvmContext = new LLVMContextWrapper;
581581
RegisterComputeErrHandlers(*llvmContext);
582+
583+
ShaderHash inputShHash = ShaderHashOCL((const UINT*)pInputArgs->pInput, pInputArgs->InputSize / 4);
584+
582585
if (!ParseInput(pKernelModule, pInputArgs, pOutputArgs, *llvmContext, inputDataFormatTemp))
583586
{
584587
return false;
@@ -615,7 +618,7 @@ bool TranslateBuild(
615618
deserialize(*oclContext.getModuleMetaData(), pKernelModule);
616619
}
617620

618-
oclContext.hash = ShaderHashOCL((const UINT*)pInputArgs->pInput, pInputArgs->InputSize / 4);
621+
oclContext.hash = inputShHash;
619622
oclContext.annotater = nullptr;
620623

621624
// Set default denorm.

IGC/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2117,9 +2117,13 @@ igc_arch_get_cpu(_cpuSuffix "${IGC_OPTION__ARCHITECTURE_TARGET}")
21172117
if(NOT DEFINED IGC_OPTION__OUTPUT_DIR)
21182118
set(IGC_OPTION__OUTPUT_DIR "${IGC_SOURCE_DIR}/../../dump${_cpuSuffix}/igc" CACHE PATH "Output directory path where the final libraries will be stored.")
21192119
endif()
2120+
get_filename_component(IGC_OPTION__OUTPUT_DIR ${IGC_OPTION__OUTPUT_DIR} ABSOLUTE)
21202121
set(USC_OPTION__OUTPUT_DIR "${IGC_SOURCE_DIR}/../../dump${_cpuSuffix}/usc" CACHE PATH "Output directory path where the final USC libraries will be stored.")
21212122
unset(_cpuSuffix)
21222123

2124+
if(NOT DEFINED IGC_OPTION__INCLUDE_IGC_COMPILER_TOOLS)
2125+
set(IGC_OPTION__INCLUDE_IGC_COMPILER_TOOLS OFF)
2126+
endif()
21232127

21242128
set(IGC_OPTION__COMPILE_LINK_ALLOW_UNSAFE_SIZE_OPT ON CACHE BOOL "Compile/link: Allow unsafe size optimization like --gc-sections.")
21252129
mark_as_advanced(IGC_OPTION__COMPILE_LINK_ALLOW_UNSAFE_SIZE_OPT)

IGC/Compiler/CISACodeGen/CheckInstrTypes.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,8 +255,7 @@ void CheckInstrTypes::visitStoreInst(StoreInst &I)
255255
g_InstrTypes->numInsts++;
256256
g_InstrTypes->hasLoadStore = true;
257257
uint as = I.getPointerAddressSpace();
258-
BufferType bufType = GetBufferType(as);
259-
if (bufType != BUFFER_TYPE_UNKNOWN)
258+
if(as != ADDRESS_SPACE_PRIVATE)
260259
{
261260
g_InstrTypes->psHasSideEffect = true;
262261
}

IGC/Compiler/Optimizer/OpenCLPasses/DeviceEnqueueFuncs/TransformBlocks.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -965,6 +965,21 @@ namespace //Anonymous
965965
}
966966
};
967967

968+
class KernelMaxWorkGroupSizeCall : public CallHandler
969+
{
970+
public:
971+
explicit KernelMaxWorkGroupSizeCall(DeviceExecCallArgs* call) : CallHandler(call)
972+
{}
973+
974+
virtual llvm::Value* getNewValue(const Dispatcher* dispatcher) override
975+
{
976+
const auto newName = "__builtin_IB_get_max_workgroup_size";
977+
auto calledFunction = _deviceExecCall->getCalledFunction();
978+
if (calledFunction == nullptr) report_fatal_error("indirect calls are not supported");
979+
return CreateNewCall(newName, calledFunction->getReturnType(), {});
980+
}
981+
};
982+
968983
//////////////////////////////////////////////////////////////////////////
969984
/// Handle get_kernel_sub_group_count_for_ndrange() call
970985
//////////////////////////////////////////////////////////////////////////
@@ -1651,7 +1666,7 @@ namespace //Anonymous
16511666
},
16521667
{
16531668
FNAME_WORK_GROUP_SIZE_IMPL,
1654-
[](llvm::CallInst& call, DataContext& dm) { return new KernelSubGroupSizeCall(new ObjCBlockCallArgs(call, dm)); }
1669+
[](llvm::CallInst& call, DataContext& dm) { return new KernelMaxWorkGroupSizeCall(new ObjCBlockCallArgs(call, dm)); }
16551670
},
16561671
{
16571672
FNAME_SUB_GROUP_COUNT_FOR_NDRANGE,

IGC/Compiler/Optimizer/OpenCLPasses/StatelessToStatefull/StatelessToStatefull.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ bool StatelessToStatefull::pointerIsPositiveOffsetFromKernelArgument(
366366
updateArgInfo(arg, gepProducesPositivePointer);
367367
}
368368
}
369-
if (gepProducesPositivePointer &&
369+
if ((gepProducesPositivePointer || m_hasBufferOffsetArg) &&
370370
getOffsetFromGEP(F, GEPs, argNumber, arg->isImplicitArg(), offset))
371371
{
372372
return true;

IGC/OCLFE/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ set(IGC_BUILD__SRC__FCL
2323
"${CMAKE_CURRENT_SOURCE_DIR}/igd_fcl_mcl/source/clang_tb.cpp"
2424
"${CMAKE_CURRENT_SOURCE_DIR}/igd_fcl_mcl/source/clang_debug.cpp"
2525
"${CMAKE_CURRENT_SOURCE_DIR}/igd_fcl_mcl/source/LoadBuffer.cpp"
26+
"${CMAKE_CURRENT_SOURCE_DIR}/../common/igc_regkeys.cpp"
27+
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorCommon/customApi.cpp"
2628
"${IGC_BUILD__SRC__IGC_Common_CLElfLib}"
2729
)
2830

IGC/OCLFE/igd_fcl_mcl/source/clang_tb.cpp

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,40 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
4141
#include <sstream>
4242
#include <stdlib.h>
4343
#include <string>
44+
#include <iomanip>
45+
46+
#if defined( _DEBUG ) || defined( _INTERNAL )
47+
#define IGC_DEBUG_VARIABLES
48+
#endif
49+
50+
51+
#if defined(IGC_DEBUG_VARIABLES)
52+
#include "common/Types.hpp"
53+
#include "common/igc_regkeys.hpp"
54+
#include "AdaptorCommon/customApi.hpp"
55+
#include "3d/common/iStdLib/utility.h"
56+
#include <mutex>
57+
58+
namespace IGC
59+
{
60+
namespace Debug
61+
{
62+
63+
static std::mutex stream_mutex;
64+
65+
void DumpLock()
66+
{
67+
stream_mutex.lock();
68+
}
69+
70+
void DumpUnlock()
71+
{
72+
stream_mutex.unlock();
73+
}
74+
75+
}
76+
}
77+
#endif
4478

4579
#ifndef WIN32
4680
#include <dlfcn.h>
@@ -250,6 +284,13 @@ namespace TC
250284
#endif
251285
}
252286

287+
#if defined(IGC_DEBUG_VARIABLES)
288+
if (success)
289+
{
290+
LoadRegistryKeys();
291+
}
292+
#endif
293+
253294
if (!success)
254295
{
255296
CClangTranslationBlock::Delete(pTranslationBlock);
@@ -1007,6 +1048,43 @@ namespace TC
10071048
&args,
10081049
exceptString);
10091050
bool successTC = TranslateClang(&args, pOutputArgs, exceptString, pInputArgs->pInternalOptions);
1051+
1052+
#if defined(IGC_DEBUG_VARIABLES)
1053+
if (IGC_IS_FLAG_ENABLED(ShaderDumpEnable))
1054+
{
1055+
1056+
// Works for all OSes. Creates dir if necessary.
1057+
const char *pOutputFolder = IGC::Debug::GetShaderOutputFolder();
1058+
stringstream ss;
1059+
char* pBuffer = (char *)pInputArgs->pInput;
1060+
UINT bufferSize = pInputArgs->InputSize;
1061+
1062+
// Create hash based on cclang binary output (currently llvm binary; later also spirv).
1063+
// Hash computed in fcl needs to be same as the one computed in igc.
1064+
// This is to ensure easy matching .cl files dumped in fcl with .ll/.dat/.asm/... files dumoed in igc.
1065+
QWORD hash = iSTD::Hash(reinterpret_cast<const DWORD *>(pOutputArgs->pOutput), int_cast<DWORD>(pOutputArgs->OutputSize) / 4);
1066+
1067+
ss << pOutputFolder;
1068+
ss << "OCL_"
1069+
<< "asm"
1070+
<< std::hex
1071+
<< std::setfill('0')
1072+
<< std::setw(sizeof(hash) * CHAR_BIT / 4)
1073+
<< hash
1074+
<< std::dec
1075+
<< std::setfill(' ')
1076+
<< ".cl";
1077+
1078+
FILE* pFile = NULL;
1079+
fopen_s(&pFile, ss.str().c_str(), "wb");
1080+
if (pFile)
1081+
{
1082+
fwrite(pBuffer, 1, bufferSize - 1, pFile);
1083+
fclose(pFile);
1084+
}
1085+
}
1086+
#endif
1087+
10101088
if (exceptString.empty())
10111089
{
10121090
return successTC;

visa/Optimizer.cpp

Lines changed: 50 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2927,8 +2927,17 @@ void Optimizer::reassociateConst()
29272927
// can't sink source if def overwrites it
29282928
return false;
29292929
}
2930+
// additionally check for the use inst that dst type size is >= src type size
2931+
// otherwise the first add may truncate upper bits due to overflow,
2932+
// which makes reassociation unsafe
2933+
if (getTypeSize(useSrc->getType()) < getTypeSize(use->getDst()->getType()))
2934+
{
2935+
return false;
2936+
}
2937+
29302938
return true;
29312939
};
2940+
29322941
if (isGoodSrc0Def(src0Def, inst) && !chkFwdOutputHazard(src0Def, iter))
29332942
{
29342943
//std::cout << "reassociate: \n";
@@ -2977,79 +2986,79 @@ G4_Imm* Optimizer::foldConstVal(G4_Imm* const1, G4_Imm* const2, G4_opcode op)
29772986
G4_Type src0T = const1->getType(), src1T = const2->getType(), resultType = src0T;
29782987

29792988
if (op == G4_add || op == G4_mul || op == G4_and)
2989+
{
2990+
resultType = findConstFoldCommonType(src0T, src1T);
2991+
if (resultType == Type_UNDEF)
29802992
{
2981-
resultType = findConstFoldCommonType( src0T, src1T );
2982-
if (resultType == Type_UNDEF)
2983-
{
29842993
return nullptr;
2985-
}
2994+
}
29862995

2987-
int64_t res;
2996+
int64_t res;
29882997
switch (op)
2989-
{
2990-
case G4_add:
2998+
{
2999+
case G4_add:
29913000
res = (int64_t)(const1->getInt()) + (int64_t)(const2->getInt());
2992-
break;
3001+
break;
29933002

2994-
case G4_mul:
3003+
case G4_mul:
29953004
res = (int64_t)(const1->getInt()) * (int64_t)(const2->getInt());
2996-
break;
3005+
break;
29973006

2998-
case G4_and:
3007+
case G4_and:
29993008
res = (int64_t)(const1->getInt()) & (int64_t)(const2->getInt());
3000-
break;
3009+
break;
30013010

3002-
default:
3011+
default:
30033012
return nullptr;
3004-
}
3013+
}
30053014

3006-
// result type is either D or UD
3007-
// don't fold if the value overflows D/UD
3008-
if (!G4_Imm::isInTypeRange(res, resultType))
3009-
{
3015+
// result type is either D or UD
3016+
// don't fold if the value overflows D/UD
3017+
if (!G4_Imm::isInTypeRange(res, resultType))
3018+
{
30103019
return nullptr;
3011-
}
3012-
return builder.createImmWithLowerType(res, resultType);
30133020
}
3014-
else
3015-
{
3021+
return builder.createImmWithLowerType(res, resultType);
3022+
}
3023+
else
3024+
{
30163025
uint32_t shift = const2->getInt() & 0x1f;
30173026

30183027
if (op == G4_shl || op == G4_shr)
3019-
{
3028+
{
30203029
uint32_t value = (uint32_t)const1->getInt();
3021-
// set result type to D/UD as it may overflow W. If the value fits the type will be lowered later
3022-
// source type matters here since it affects sign extension
3023-
resultType = IS_SIGNED_INT(resultType) ? Type_D : Type_UD;
3030+
// set result type to D/UD as it may overflow W. If the value fits the type will be lowered later
3031+
// source type matters here since it affects sign extension
3032+
resultType = IS_SIGNED_INT(resultType) ? Type_D : Type_UD;
30243033
int64_t res = op == G4_shl ?
3025-
((int64_t) value) << shift :
3026-
value >> shift;
3027-
if (!G4_Imm::isInTypeRange(res, resultType))
3028-
{
3034+
((int64_t)value) << shift :
3035+
value >> shift;
3036+
if (!G4_Imm::isInTypeRange(res, resultType))
3037+
{
30293038
return nullptr;
3030-
}
3039+
}
30313040

30323041
return builder.createImmWithLowerType(res, resultType);
3033-
}
3042+
}
30343043

30353044
if (op == G4_asr)
3045+
{
3046+
if (IS_SIGNED_INT(resultType))
30363047
{
3037-
if( IS_SIGNED_INT(resultType) )
3038-
{
30393048
int64_t value = const1->getInt();
3040-
int64_t res = value >> shift;
3049+
int64_t res = value >> shift;
30413050
return builder.createImmWithLowerType(res, resultType);
3042-
}
3043-
else
3044-
{
3051+
}
3052+
else
3053+
{
30453054
uint64_t value = const1->getInt();
3046-
uint64_t res = value >> shift;
3055+
uint64_t res = value >> shift;
30473056
return builder.createImmWithLowerType(res, resultType);
3048-
}
30493057
}
30503058
}
3051-
return nullptr;
30523059
}
3060+
return nullptr;
3061+
}
30533062

30543063

30553064

0 commit comments

Comments
 (0)