Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 38df707

Browse files
committed
ARM64: Enable End-To-End ReadyToRun (R2R) Crossgen
Fixes https://github.com/dotnet/coreclr/issues/4649 The immediate issues was NYI on genEmitHelperCalls. The initial implementation for the missing part was enough to just crossgen System.dll. But running tests revealed various issues in crossgened binaries (R2R). Most common user/helper calls in R2R are represented as indirect calls similar to interface call using virtual stub dispatch cell -- thunk/helper needs a indirect cell address to update the final target address on the data location. `IsDelayLoadHelper` and `IsLazyHelper` belong to this case. Instead of passing such parameter, x64/x86 uses an encoding trick -- it assumes the call is dispatched like `call [addr]`. So from the return address, runtime could extract indirect cell address. Unfortunately this is not an option for arm64 (actually arm as well but I haven't fixed it in this change) where indirect call on memory is not encodable. So, I made the following changes: 1. For the call requiring that needs to pass indirect cell address, I tagged the call tree via `setR2RRelativeIndir`. Tried to be comprehensive, but I may miss something. Currently, it includes a regular call and various helpers for (virtual) load function pointer/static data access, etc. Hopely we change JIT/EE interface somehow that gives us such explicit information. 2. Use the X11 to record indirect cell address for such call tree in lower similar to VSD. 3. Fixed encodings `ZapIndirectHelperThunk`. In particular the immediate value/offset for `ldr` should be scaled down 4 times since HW will scale it 4 times. 4. Implement `genEmitHelperCalls` for indirect case. This is not the case requiring indirect cell address. This is the case we inlined the indirect helper thunk for the speed. I'm seeing the case for size opt helper call, we invoke a direct call to such thunk which actually uses x12 to dispatch the final target. Likewise, I used x12 for this expansion which seems a trash register that is not overlapped with arugments with jit helpers like writer barriers. With this change, I've tested various cases/scenraios locally. Also I've verified all tests are passed against mscorlib.ni.dll and System.ni.dll.
1 parent 4716d27 commit 38df707

File tree

9 files changed

+85
-48
lines changed

9 files changed

+85
-48
lines changed

src/jit/codegenarm64.cpp

Lines changed: 20 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6730,39 +6730,31 @@ void CodeGen::genEmitHelperCall(unsigned helper,
67306730

67316731
if (addr == nullptr)
67326732
{
6733-
NYI("genEmitHelperCall indirect");
6734-
#if 0
6735-
assert(pAddr != nullptr);
6736-
if (genAddrCanBeEncodedAsPCRelOffset((size_t)pAddr))
6733+
// This is call to a runtime helper.
6734+
// adrp x, [reloc:rel page addr]
6735+
// add x, x, [reloc:page offset]
6736+
// ldr x, [x]
6737+
// br x
6738+
6739+
if (callTargetReg == REG_NA)
67376740
{
6738-
// generate call whose target is specified by PC-relative 32-bit offset.
6739-
callType = emitter::EC_FUNC_TOKEN_INDIR;
6740-
addr = pAddr;
6741+
// If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
6742+
// this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
6743+
callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
67416744
}
6742-
else
6743-
{
6744-
// If this address cannot be encoded as PC-relative 32-bit offset, load it into REG_HELPER_CALL_TARGET
6745-
// and use register indirect addressing mode to make the call.
6746-
// mov reg, addr
6747-
// call [reg]
6748-
if (callTargetReg == REG_NA)
6749-
{
6750-
// If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
6751-
// this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
6752-
callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
6753-
}
67546745

6755-
regMaskTP callTargetMask = genRegMask(callTargetReg);
6756-
regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
6746+
regMaskTP callTargetMask = genRegMask(callTargetReg);
6747+
regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
67576748

6758-
// assert that all registers in callTargetMask are in the callKillSet
6759-
noway_assert((callTargetMask & callKillSet) == callTargetMask);
6749+
// assert that all registers in callTargetMask are in the callKillSet
6750+
noway_assert((callTargetMask & callKillSet) == callTargetMask);
67606751

6761-
callTarget = callTargetReg;
6762-
CodeGen::genSetRegToIcon(callTarget, (ssize_t) pAddr, TYP_I_IMPL);
6763-
callType = emitter::EC_INDIR_ARD;
6764-
}
6765-
#endif // 0
6752+
callTarget = callTargetReg;
6753+
6754+
// adrp + add with relocations will be emitted
6755+
getEmitter()->emitIns_R_AI(INS_adrp, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
6756+
getEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, callTarget, callTarget);
6757+
callType = emitter::EC_INDIR_R;
67666758
}
67676759

67686760
getEmitter()->emitIns_Call(callType,

src/jit/ee_il_dll.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,13 +255,13 @@ void JitTls::SetCompiler(Compiler* compiler)
255255
// The main JIT function for the 32 bit JIT. See code:ICorJitCompiler#EEToJitInterface for more on the EE-JIT
256256
// interface. Things really don't get going inside the JIT until the code:Compiler::compCompile#Phases
257257
// method. Usually that is where you want to go.
258-
259258
CorJitResult CILJit::compileMethod (
260259
ICorJitInfo* compHnd,
261260
CORINFO_METHOD_INFO* methodInfo,
262261
unsigned flags,
263262
BYTE ** entryAddress,
264263
ULONG * nativeSizeOfCode)
264+
265265
{
266266
if (g_realJitCompiler != nullptr)
267267
{

src/jit/flowgraph.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7047,6 +7047,8 @@ GenTreePtr Compiler::fgOptimizeDelegateConstructor(GenTreePtr call, CORINFO_C
70477047
info.compCompHnd->getReadyToRunHelper(targetMethod->gtFptrVal.gtLdftnResolvedToken,
70487048
CORINFO_HELP_READYTORUN_DELEGATE_CTOR, &call->gtCall.gtEntryPoint);
70497049
#endif
7050+
// This is the case from GetDynamicHelperCell.
7051+
call->gtCall.setR2RRelativeIndir();
70507052
}
70517053
}
70527054
else

src/jit/gentree.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4464,6 +4464,15 @@ unsigned Compiler::gtSetEvalOrder(GenTree * tree)
44644464
ftreg |= RBM_VIRTUAL_STUB_PARAM;
44654465
}
44664466

4467+
#ifdef FEATURE_READYTORUN_COMPILER
4468+
#ifdef _TARGET_ARM64_
4469+
if (tree->gtCall.IsR2RRelativeIndir())
4470+
{
4471+
ftreg |= RBM_R2R_INDIRECT_PARAM;
4472+
}
4473+
#endif
4474+
#endif
4475+
44674476
// Normally function calls don't preserve caller save registers
44684477
// and thus are much more expensive.
44694478
// However a few function calls do preserve these registers
@@ -7403,6 +7412,8 @@ Compiler::gtDispNodeName(GenTree *tree)
74037412
gtfType = " ind";
74047413
else if (tree->gtFlags & GTF_CALL_VIRT_STUB)
74057414
gtfType = " stub";
7415+
else if (tree->gtCall.IsR2RRelativeIndir())
7416+
gtfType = " r2r_ind";
74067417
else if (tree->gtFlags & GTF_CALL_UNMANAGED)
74077418
{
74087419
char * gtfTypeBufWalk = gtfTypeBuf;

src/jit/gentree.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2762,6 +2762,8 @@ struct GenTreeCall final : public GenTree
27622762
// a Pinvoke but not as an unmanaged call. See impCheckForPInvokeCall() to
27632763
// know when these flags are set.
27642764

2765+
#define GTF_CALL_M_R2R_REL_INDIRECT 0x2000 // GT_CALL -- ready to run call is indirected through a relative address
2766+
27652767
bool IsUnmanaged() { return (gtFlags & GTF_CALL_UNMANAGED) != 0; }
27662768
bool NeedsNullCheck() { return (gtFlags & GTF_CALL_NULLCHECK) != 0; }
27672769
bool CallerPop() { return (gtFlags & GTF_CALL_POP_ARGS) != 0; }
@@ -2870,6 +2872,13 @@ struct GenTreeCall final : public GenTree
28702872
bool IsSameThis() { return (gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS) != 0; }
28712873
bool IsDelegateInvoke(){ return (gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) != 0; }
28722874
bool IsVirtualStubRelativeIndir() { return (gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) != 0; }
2875+
bool IsR2RRelativeIndir() { return (gtCallMoreFlags & GTF_CALL_M_R2R_REL_INDIRECT) != 0; }
2876+
void setR2RRelativeIndir() {
2877+
if (gtEntryPoint.accessType == IAT_PVALUE)
2878+
{
2879+
gtCallMoreFlags |= GTF_CALL_M_R2R_REL_INDIRECT;
2880+
}
2881+
}
28732882
bool IsVarargs() { return (gtCallMoreFlags & GTF_CALL_M_VARARGS) != 0; }
28742883

28752884
unsigned short gtCallMoreFlags; // in addition to gtFlags

src/jit/importer.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1647,6 +1647,9 @@ GenTreePtr Compiler::impReadyToRunHelperToTree(
16471647

16481648
op1->gtCall.gtEntryPoint = lookup;
16491649

1650+
// This is the case from GetDynamicHelperCell.
1651+
op1->gtCall.setR2RRelativeIndir();
1652+
16501653
return op1;
16511654
}
16521655
#endif
@@ -4519,6 +4522,8 @@ GenTreePtr Compiler::impImportLdvirtftn (GenTreePtr thisPtr,
45194522

45204523
call->gtEntryPoint = pCallInfo->codePointerLookup.constLookup;
45214524

4525+
// This is the case from GetDynamicHelperCell.
4526+
call->setR2RRelativeIndir();
45224527
return call;
45234528
}
45244529
#endif
@@ -5192,6 +5197,9 @@ GenTreePtr Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN * pResolv
51925197
op1 = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF, callFlags);
51935198

51945199
op1->gtCall.gtEntryPoint = pFieldInfo->fieldLookup;
5200+
5201+
// This is the case from GetDynamicHelperCell.
5202+
op1->gtCall.setR2RRelativeIndir();
51955203
}
51965204
else
51975205
#endif
@@ -6028,6 +6036,9 @@ var_types Compiler::impImportCall (OPCODE opcode,
60286036
if (opts.IsReadyToRun())
60296037
{
60306038
call->gtCall.gtEntryPoint = callInfo->codePointerLookup.constLookup;
6039+
6040+
// This is the case from GetExternalMethodCell.
6041+
call->gtCall.setR2RRelativeIndir();
60316042
}
60326043
#endif
60336044
break;

src/jit/lower.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2399,10 +2399,26 @@ GenTree* Lowering::LowerDirectCall(GenTreeCall* call)
23992399
break;
24002400

24012401
case IAT_PVALUE:
2402+
{
24022403
// Non-virtual direct calls to addresses accessed by
24032404
// a single indirection.
2404-
result = Ind(AddrGen(addr));
2405+
GenTree* cellAddr = AddrGen(addr);
2406+
GenTree* indir = Ind(cellAddr);
2407+
2408+
#ifdef FEATURE_READYTORUN_COMPILER
2409+
#ifdef _TARGET_ARM64_
2410+
// For arm64, we dispatch code same as VSD using X11 for indirection cell address,
2411+
// which ZapIndirectHelperThunk expects.
2412+
if (call->IsR2RRelativeIndir())
2413+
{
2414+
cellAddr->gtRegNum = REG_R2R_INDIRECT_PARAM;
2415+
indir->gtRegNum = REG_JUMP_THUNK_PARAM;
2416+
}
2417+
#endif
2418+
#endif
2419+
result = indir;
24052420
break;
2421+
}
24062422

24072423
case IAT_PPVALUE:
24082424
// Non-virtual direct calls to addresses accessed by

src/jit/target.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1514,6 +1514,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
15141514
#define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED)
15151515
#define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH)
15161516
#define RBM_CALLEE_TRASH_NOGC (RBM_R12|RBM_R13|RBM_R14|RBM_R15)
1517+
#define REG_DEFAULT_HELPER_CALL_TARGET REG_R12
15171518

15181519
#define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
15191520
#define RBM_ALLFLOAT (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH)
@@ -1606,6 +1607,11 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
16061607
#define RBM_VIRTUAL_STUB_PARAM RBM_R11
16071608
#define PREDICT_REG_VIRTUAL_STUB_PARAM PREDICT_REG_R11
16081609

1610+
// R2R indirect call. Use the same registers as VSD
1611+
#define REG_R2R_INDIRECT_PARAM REG_R11
1612+
#define RBM_R2R_INDIRECT_PARAM RBM_R11
1613+
#define PREDICT_REG_RER_INDIRECT_PARAM PREDICT_REG_R11
1614+
16091615
// Registers used by PInvoke frame setup
16101616
#define REG_PINVOKE_FRAME REG_R8
16111617
#define RBM_PINVOKE_FRAME RBM_R8

src/zap/zapimport.cpp

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2146,17 +2146,8 @@ DWORD ZapIndirectHelperThunk::SaveWorker(ZapWriter * pZapWriter)
21462146
#elif defined(_TARGET_ARM64_)
21472147
if (IsDelayLoadHelper())
21482148
{
2149-
if (IsVSD())
2150-
{
2151-
// x11 contains indirection cell
2152-
// Do nothing x11 contains our first param
2153-
}
2154-
else
2155-
{
2156-
// mov x11, x12
2157-
*(DWORD*)p = 0xaa0c03eb;
2158-
p += 4;
2159-
}
2149+
// x11 contains indirection cell
2150+
// Do nothing x11 contains our first param
21602151

21612152
// movz x8, #index
21622153
DWORD index = GetSectionIndex();
@@ -2166,9 +2157,9 @@ DWORD ZapIndirectHelperThunk::SaveWorker(ZapWriter * pZapWriter)
21662157

21672158
// move Module* -> x9
21682159
// ldr x9, [PC+0x14]
2169-
*(DWORD*)p = 0x58000289;
2160+
*(DWORD*)p = 0x580000A9;
21702161
p += 4;
2171-
2162+
21722163
//ldr x9, [x9]
21732164
*(DWORD*)p = 0xf9400129;
21742165
p += 4;
@@ -2178,7 +2169,7 @@ DWORD ZapIndirectHelperThunk::SaveWorker(ZapWriter * pZapWriter)
21782169
{
21792170
// Move Module* -> x1
21802171
// ldr x1, [PC+0x14]
2181-
*(DWORD*)p = 0x58000289;
2172+
*(DWORD*)p = 0x580000A1;
21822173
p += 4;
21832174

21842175
// ldr x1, [x1]
@@ -2187,10 +2178,8 @@ DWORD ZapIndirectHelperThunk::SaveWorker(ZapWriter * pZapWriter)
21872178
}
21882179

21892180
// branch to helper
2190-
2191-
// mov x12, [helper]
21922181
// ldr x12, [PC+0x14]
2193-
*(DWORD*)p = 0x58000289;
2182+
*(DWORD*)p = 0x580000AC;
21942183
p += 4;
21952184

21962185
// ldr x12, [x12]
@@ -2199,12 +2188,13 @@ DWORD ZapIndirectHelperThunk::SaveWorker(ZapWriter * pZapWriter)
21992188

22002189
// br x12
22012190
*(DWORD *)p = 0xd61f0180;
2202-
p += 4;
2191+
p += 4;
22032192

22042193
// [Module*]
22052194
if (pImage != NULL)
22062195
pImage->WriteReloc(buffer, (int)(p - buffer), pImage->GetImportTable()->GetHelperImport(READYTORUN_HELPER_Module), 0, IMAGE_REL_BASED_PTR);
22072196
p += 8;
2197+
22082198
// [helper]
22092199
if (pImage != NULL)
22102200
pImage->WriteReloc(buffer, (int)(p - buffer), pImage->GetImportTable()->GetHelperImport(GetReadyToRunHelper()), 0, IMAGE_REL_BASED_PTR);

0 commit comments

Comments
 (0)