@@ -421,7 +421,7 @@ PhaseStatus Compiler::fgExpandThreadLocalAccess()
421421{
422422 PhaseStatus result = PhaseStatus::MODIFIED_NOTHING;
423423
424- if (!doesMethodHasTlsFieldAccess ())
424+ if (!methodHasTlsFieldAccess ())
425425 {
426426 // TP: nothing to expand in the current method
427427 JITDUMP (" Nothing to expand.\n " )
@@ -478,36 +478,50 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement*
478478 return false ;
479479 }
480480
481+ assert (!opts.IsReadyToRun ());
482+
483+ if (TargetOS::IsUnix)
484+ {
485+ #if defined(TARGET_ARM) || !defined(TARGET_64BIT)
486+ // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such
487+ // as MRC and MCR are used to access them. We do not support them and so should never optimize the
488+ // field access using TLS.
489+ noway_assert (!" Unsupported scenario of optimizing TLS access on Linux Arm32/x86" );
490+ #endif
491+ }
492+ else
493+ {
481494#ifdef TARGET_ARM
482- // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such
483- // as MRC and MCR are used to access them. We do not support them and so should never optimize the
484- // field access using TLS.
485- assert (!" Unsupported scenario of optimizing TLS access on Arm32" );
495+ // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such
496+ // as MRC and MCR are used to access them. We do not support them and so should never optimize the
497+ // field access using TLS.
498+ noway_assert (!" Unsupported scenario of optimizing TLS access on Windows Arm32" );
486499#endif
500+ }
487501
488502 JITDUMP (" Expanding thread static local access for [%06d] in " FMT_BB " :\n " , dspTreeID (call), block->bbNum );
489503 DISPTREE (call);
490504 JITDUMP (" \n " );
505+
491506 bool isGCThreadStatic =
492507 eeGetHelperNum (call->gtCallMethHnd ) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED;
493508
494509 CORINFO_THREAD_STATIC_BLOCKS_INFO threadStaticBlocksInfo;
495- info. compCompHnd -> getThreadLocalStaticBlocksInfo (&threadStaticBlocksInfo, isGCThreadStatic );
510+ memset (&threadStaticBlocksInfo, 0 , sizeof (CORINFO_THREAD_STATIC_BLOCKS_INFO) );
496511
497- uint32_t offsetOfMaxThreadStaticBlocksVal = 0 ;
498- uint32_t offsetOfThreadStaticBlocksVal = 0 ;
512+ info.compCompHnd ->getThreadLocalStaticBlocksInfo (&threadStaticBlocksInfo, isGCThreadStatic);
499513
500514 JITDUMP (" getThreadLocalStaticBlocksInfo (%s)\n :" , isGCThreadStatic ? " GC" : " Non-GC" );
501- offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks ;
502- offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks ;
503-
504- JITDUMP (" tlsIndex= %u\n " , (ssize_t )threadStaticBlocksInfo.tlsIndex .addr );
505- JITDUMP (" offsetOfThreadLocalStoragePointer= %u\n " , threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer );
506- JITDUMP (" offsetOfMaxThreadStaticBlocks= %u\n " , offsetOfMaxThreadStaticBlocksVal);
507- JITDUMP (" offsetOfThreadStaticBlocks= %u\n " , offsetOfThreadStaticBlocksVal);
508- JITDUMP (" offsetOfGCDataPointer= %u\n " , threadStaticBlocksInfo.offsetOfGCDataPointer );
515+ JITDUMP (" tlsIndex= %p\n " , dspPtr (threadStaticBlocksInfo.tlsIndex .addr ));
516+ JITDUMP (" tlsGetAddrFtnPtr= %p\n " , dspPtr (threadStaticBlocksInfo.tlsGetAddrFtnPtr ));
517+ JITDUMP (" tlsIndexObject= %p\n " , dspPtr (threadStaticBlocksInfo.tlsIndexObject ));
518+ JITDUMP (" threadVarsSection= %p\n " , dspPtr (threadStaticBlocksInfo.threadVarsSection ));
519+ JITDUMP (" offsetOfThreadLocalStoragePointer= %u\n " ,
520+ dspOffset (threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer ));
521+ JITDUMP (" offsetOfMaxThreadStaticBlocks= %u\n " , dspOffset (threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks ));
522+ JITDUMP (" offsetOfThreadStaticBlocks= %u\n " , dspOffset (threadStaticBlocksInfo.offsetOfThreadStaticBlocks ));
523+ JITDUMP (" offsetOfGCDataPointer= %u\n " , dspOffset (threadStaticBlocksInfo.offsetOfGCDataPointer ));
509524
510- assert (threadStaticBlocksInfo.tlsIndex .accessType == IAT_VALUE);
511525 assert ((eeGetHelperNum (call->gtCallMethHnd ) == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED) ||
512526 (eeGetHelperNum (call->gtCallMethHnd ) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED));
513527
@@ -546,56 +560,131 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement*
546560 gtUpdateStmtSideEffects (stmt);
547561
548562 GenTree* typeThreadStaticBlockIndexValue = call->gtArgs .GetArgByIndex (0 )->GetNode ();
563+ GenTree* tlsValue = nullptr ;
564+ unsigned tlsLclNum = lvaGrabTemp (true DEBUGARG (" TLS access" ));
565+ lvaTable[tlsLclNum].lvType = TYP_I_IMPL;
566+ GenTree* maxThreadStaticBlocksValue = nullptr ;
567+ GenTree* threadStaticBlocksValue = nullptr ;
568+ GenTree* tlsValueDef = nullptr ;
569+
570+ if (TargetOS::IsWindows)
571+ {
572+ size_t tlsIndexValue = (size_t )threadStaticBlocksInfo.tlsIndex .addr ;
573+ GenTree* dllRef = nullptr ;
549574
550- void ** pIdAddr = nullptr ;
575+ if (tlsIndexValue != 0 )
576+ {
577+ dllRef = gtNewIconHandleNode (tlsIndexValue * TARGET_POINTER_SIZE, GTF_ICON_TLS_HDL);
578+ }
551579
552- size_t tlsIndexValue = (size_t )threadStaticBlocksInfo.tlsIndex .addr ;
553- GenTree* dllRef = nullptr ;
580+ // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] or GS:[cns]
581+ tlsValue = gtNewIconHandleNode (threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer , GTF_ICON_TLS_HDL);
582+ tlsValue = gtNewIndir (TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
554583
555- if (tlsIndexValue != 0 )
556- {
557- dllRef = gtNewIconHandleNode (tlsIndexValue * TARGET_POINTER_SIZE, GTF_ICON_TLS_HDL);
584+ if (dllRef != nullptr )
585+ {
586+ // Add the dllRef to produce thread local storage reference for coreclr
587+ tlsValue = gtNewOperNode (GT_ADD, TYP_I_IMPL, tlsValue, dllRef);
588+ }
589+
590+ // Base of coreclr's thread local storage
591+ tlsValue = gtNewIndir (TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
558592 }
593+ else if (TargetOS::IsMacOS)
594+ {
595+ // For OSX x64/arm64, we need to get the address of relevant __thread_vars section of
596+ // the thread local variable `t_ThreadStatics`. Address of `tlv_get_address` is stored
597+ // in this entry, which we dereference and invoke it, passing the __thread_vars address
598+ // present in `threadVarsSection`.
599+ //
600+ // Code sequence to access thread local variable on osx/x64:
601+ //
602+ // mov rdi, threadVarsSection
603+ // call [rdi]
604+ //
605+ // Code sequence to access thread local variable on osx/arm64:
606+ //
607+ // mov x0, threadVarsSection
608+ // mov x1, [x0]
609+ // blr x1
610+ //
611+ size_t threadVarsSectionVal = (size_t )threadStaticBlocksInfo.threadVarsSection ;
612+ GenTree* tls_get_addr_val = gtNewIconHandleNode (threadVarsSectionVal, GTF_ICON_FTN_ADDR);
613+
614+ tls_get_addr_val = gtNewIndir (TYP_I_IMPL, tls_get_addr_val, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
559615
560- // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] or GS:[cns]
561- GenTree* tlsRef = gtNewIconHandleNode (threadStaticBlocksInfo. offsetOfThreadLocalStoragePointer , GTF_ICON_TLS_HDL );
616+ tlsValue = gtNewIndCallNode (tls_get_addr_val, TYP_I_IMPL);
617+ GenTreeCall* tlsRefCall = tlsValue-> AsCall ( );
562618
563- tlsRef = gtNewIndir (TYP_I_IMPL, tlsRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
619+ // This is a call which takes an argument.
620+ // Populate and set the ABI appropriately.
621+ assert (opts.altJit || threadVarsSectionVal != 0 );
622+ GenTree* tlsArg = gtNewIconNode (threadVarsSectionVal, TYP_I_IMPL);
623+ tlsRefCall->gtArgs .PushBack (this , NewCallArg::Primitive (tlsArg));
564624
565- if (dllRef != nullptr )
625+ fgMorphArgs (tlsRefCall);
626+
627+ tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT);
628+ }
629+ else if (TargetOS::IsUnix)
566630 {
567- // Add the dllRef to produce thread local storage reference for coreclr
568- tlsRef = gtNewOperNode (GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
631+ #if defined(TARGET_AMD64)
632+ // Code sequence to access thread local variable on linux/x64:
633+ //
634+ // mov rdi, 0x7FE5C418CD28 ; tlsIndexObject
635+ // mov rax, 0x7FE5C47AFDB0 ; _tls_get_addr
636+ // call rax
637+ //
638+ GenTree* tls_get_addr_val =
639+ gtNewIconHandleNode ((size_t )threadStaticBlocksInfo.tlsGetAddrFtnPtr , GTF_ICON_FTN_ADDR);
640+ tlsValue = gtNewIndCallNode (tls_get_addr_val, TYP_I_IMPL);
641+ GenTreeCall* tlsRefCall = tlsValue->AsCall ();
642+
643+ // This is an indirect call which takes an argument.
644+ // Populate and set the ABI appropriately.
645+ assert (opts.altJit || threadStaticBlocksInfo.tlsIndexObject != 0 );
646+ GenTree* tlsArg = gtNewIconNode ((size_t )threadStaticBlocksInfo.tlsIndexObject , TYP_I_IMPL);
647+ tlsRefCall->gtArgs .PushBack (this , NewCallArg::Primitive (tlsArg));
648+
649+ fgMorphArgs (tlsRefCall);
650+
651+ tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT);
652+ #ifdef UNIX_X86_ABI
653+ tlsRefCall->gtFlags &= ~GTF_CALL_POP_ARGS;
654+ #endif // UNIX_X86_ABI
655+ #elif defined(TARGET_ARM64)
656+ // Code sequence to access thread local variable on linux/arm64:
657+ //
658+ // mrs xt, tpidr_elf0
659+ // mov xd, [xt+cns]
660+ tlsValue = gtNewIconHandleNode (0 , GTF_ICON_TLS_HDL);
661+ #else
662+ assert (!" Unsupported scenario of optimizing TLS access on Linux Arm32/x86" );
663+ #endif
569664 }
570665
571- // Base of coreclr's thread local storage
572- GenTree* tlsValue = gtNewIndir (TYP_I_IMPL, tlsRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
573-
574666 // Cache the tls value
575- unsigned tlsLclNum = lvaGrabTemp (true DEBUGARG (" TLS access" ));
576- lvaTable[tlsLclNum].lvType = TYP_I_IMPL;
577- GenTree* tlsValueDef = gtNewStoreLclVarNode (tlsLclNum, tlsValue);
578- GenTree* tlsLclValueUse = gtNewLclVarNode (tlsLclNum);
667+ tlsValueDef = gtNewStoreLclVarNode (tlsLclNum, tlsValue);
668+ GenTree* tlsLclValueUse = gtNewLclVarNode (tlsLclNum);
669+
670+ size_t offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks ;
671+ size_t offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks ;
579672
580673 // Create tree for "maxThreadStaticBlocks = tls[offsetOfMaxThreadStaticBlocks]"
581674 GenTree* offsetOfMaxThreadStaticBlocks = gtNewIconNode (offsetOfMaxThreadStaticBlocksVal, TYP_I_IMPL);
582675 GenTree* maxThreadStaticBlocksRef =
583676 gtNewOperNode (GT_ADD, TYP_I_IMPL, gtCloneExpr (tlsLclValueUse), offsetOfMaxThreadStaticBlocks);
584- GenTree* maxThreadStaticBlocksValue =
585- gtNewIndir (TYP_INT, maxThreadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
677+ maxThreadStaticBlocksValue = gtNewIndir (TYP_INT, maxThreadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
678+
679+ GenTree* threadStaticBlocksRef = gtNewOperNode (GT_ADD, TYP_I_IMPL, gtCloneExpr (tlsLclValueUse),
680+ gtNewIconNode (offsetOfThreadStaticBlocksVal, TYP_I_IMPL));
681+ threadStaticBlocksValue = gtNewIndir (TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
586682
587683 // Create tree for "if (maxThreadStaticBlocks < typeIndex)"
588684 GenTree* maxThreadStaticBlocksCond =
589685 gtNewOperNode (GT_LT, TYP_INT, maxThreadStaticBlocksValue, gtCloneExpr (typeThreadStaticBlockIndexValue));
590686 maxThreadStaticBlocksCond = gtNewOperNode (GT_JTRUE, TYP_VOID, maxThreadStaticBlocksCond);
591687
592- // Create tree for "threadStaticBlockBase = tls[offsetOfThreadStaticBlocks]"
593- GenTree* offsetOfThreadStaticBlocks = gtNewIconNode (offsetOfThreadStaticBlocksVal, TYP_I_IMPL);
594- GenTree* threadStaticBlocksRef =
595- gtNewOperNode (GT_ADD, TYP_I_IMPL, gtCloneExpr (tlsLclValueUse), offsetOfThreadStaticBlocks);
596- GenTree* threadStaticBlocksValue =
597- gtNewIndir (TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
598-
599688 // Create tree to "threadStaticBlockValue = threadStaticBlockBase[typeIndex]"
600689 typeThreadStaticBlockIndexValue = gtNewOperNode (GT_MUL, TYP_INT, gtCloneExpr (typeThreadStaticBlockIndexValue),
601690 gtNewIconNode (TARGET_POINTER_SIZE, TYP_INT));
0 commit comments