From bba5951b6f9cd77047cafc554b20144b33602298 Mon Sep 17 00:00:00 2001 From: Shivam Gupta Date: Tue, 18 Jun 2024 01:09:30 +0530 Subject: [PATCH 01/25] [MLIR] Fix an assert that contains a mistake in conditional operator (#95668) This is described in (N2) https://pvs-studio.com/en/blog/posts/cpp/1126/ so caught by the PVS Studio analyzer. Warning message - V502 Perhaps the '?:' operator works in a different way than it was expected. The '?:' operator has a lower priority than the '+' operator. LoopEmitter.cpp 983 V502 Perhaps the '?:' operator works in a different way than it was expected. The '?:' operator has a lower priority than the '+' operator. LoopEmitter.cpp 1039 The assert should be assert(bArgs.size() == reduc.size() + (needsUniv ? 1 : 0)); since + has higher precedence and ? has lower. This further can be reduce to assert(aArgs.size() == reduc.size() + needsUniv); because needUniv is a bool value which is implicitly converted to 0 or --- .../lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.cpp index 05883f1cefdf3..fe0e515a2d180 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.cpp @@ -542,7 +542,7 @@ std::pair LoopEmitter::emitWhileLoopOverTensorsAtLvls( } // The remaining block arguments are user-provided reduction values and an // optional universal index. Make sure their sizes match. - assert(bArgs.size() == reduc.size() + needsUniv ? 1 : 0); + assert(bArgs.size() == reduc.size() + needsUniv); builder.create(loc, whileCond, before->getArguments()); // Generates loop body. @@ -560,7 +560,7 @@ std::pair LoopEmitter::emitWhileLoopOverTensorsAtLvls( } // In-place update on reduction variable. - assert(aArgs.size() == reduc.size() + needsUniv ? 1 : 0); + assert(aArgs.size() == reduc.size() + needsUniv); for (unsigned i = 0, e = reduc.size(); i < e; i++) reduc[i] = aArgs[i]; From 3b997294d6117241477ab36be0595040f8278707 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 17 Jun 2024 21:44:52 +0200 Subject: [PATCH 02/25] AMDGPU: Remove .v2bf16 buffer atomic fadd intrinsics (#95783) These are redundant with the unsuffixed versions, and have a name collision with surprising behavior when the base intrinsic is used with v2bf16. The global and flat variants should be removed too, but those are complicated due to using v2i16 in place of the natural v2bf16. Those cases can soon be completely deleted in favor of atomicrmw. The GlobalISel codegen change is broken and substitutes handling as bf16 for handling as f16, but it's a bug that this passed the IRTranslator in the first place. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 44 +------------------ llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 1 - llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 1 - llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 - .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 9 ---- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 - .../Target/AMDGPU/AMDGPUSearchableTables.td | 4 -- llvm/lib/Target/AMDGPU/BUFInstructions.td | 2 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 9 ---- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 - llvm/lib/Target/AMDGPU/SIInstructions.td | 1 - .../test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll | 12 ++--- ...mdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll | 10 +++-- 13 files changed, 15 insertions(+), 82 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index e6b69b39911a9..45f1092094572 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1337,27 +1337,9 @@ def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic< // gfx908 intrinsic def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic; + +// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx940, gfx12+. def int_amdgcn_raw_ptr_buffer_atomic_fadd : AMDGPURawPtrBufferAtomic; -// gfx12+ intrinsic -def int_amdgcn_raw_buffer_atomic_fadd_v2bf16 : Intrinsic < - [llvm_v2bf16_ty], - [llvm_v2bf16_ty, - llvm_v4i32_ty, - llvm_i32_ty, - llvm_i32_ty, - llvm_i32_ty], - [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, - AMDGPURsrcIntrinsic<1, 0>; -def int_amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16 : Intrinsic < - [llvm_v2bf16_ty], - [llvm_v2bf16_ty, - AMDGPUBufferRsrcTy, - llvm_i32_ty, - llvm_i32_ty, - llvm_i32_ty], - [IntrArgMemOnly, NoCapture>, - ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, - AMDGPURsrcIntrinsic<1, 0>; class AMDGPUStructBufferAtomic : Intrinsic < [data_ty], @@ -1434,28 +1416,6 @@ def int_amdgcn_struct_ptr_buffer_atomic_cmpswap : Intrinsic< // gfx908 intrinsic def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic; def int_amdgcn_struct_ptr_buffer_atomic_fadd : AMDGPUStructPtrBufferAtomic; -// gfx12 intrinsic -def int_amdgcn_struct_buffer_atomic_fadd_v2bf16 : Intrinsic < - [llvm_v2bf16_ty], - [llvm_v2bf16_ty, - llvm_v4i32_ty, - llvm_i32_ty, - llvm_i32_ty, - llvm_i32_ty, - llvm_i32_ty], - [ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, - AMDGPURsrcIntrinsic<1, 0>; -def int_amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16 : Intrinsic < - [llvm_v2bf16_ty], - [llvm_v2bf16_ty, - AMDGPUBufferRsrcTy, - llvm_i32_ty, - llvm_i32_ty, - llvm_i32_ty, - llvm_i32_ty], - [IntrArgMemOnly, NoCapture>, - ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, - AMDGPURsrcIntrinsic<1, 0>; // gfx90a intrinsics def int_amdgcn_struct_buffer_atomic_fmin : AMDGPUStructBufferAtomic; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 231db188e65dc..d81c18875eebd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -290,7 +290,6 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; -def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 18193d8807597..519e623306eb1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -5564,7 +5564,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP) NODE_NAME_CASE(BUFFER_ATOMIC_CSUB) NODE_NAME_CASE(BUFFER_ATOMIC_FADD) - NODE_NAME_CASE(BUFFER_ATOMIC_FADD_BF16) NODE_NAME_CASE(BUFFER_ATOMIC_FMIN) NODE_NAME_CASE(BUFFER_ATOMIC_FMAX) NODE_NAME_CASE(BUFFER_ATOMIC_COND_SUB_U32) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 71c4334029b43..206bb46b6c863 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -615,7 +615,6 @@ enum NodeType : unsigned { BUFFER_ATOMIC_CMPSWAP, BUFFER_ATOMIC_CSUB, BUFFER_ATOMIC_FADD, - BUFFER_ATOMIC_FADD_BF16, BUFFER_ATOMIC_FMIN, BUFFER_ATOMIC_FMAX, BUFFER_ATOMIC_COND_SUB_U32, diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 973b6b8cce177..0c7b1968e551c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -6018,11 +6018,6 @@ static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) { case Intrinsic::amdgcn_struct_buffer_atomic_fadd: case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd: return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD; - case Intrinsic::amdgcn_raw_buffer_atomic_fadd_v2bf16: - case Intrinsic::amdgcn_struct_buffer_atomic_fadd_v2bf16: - case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16: - case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16: - return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD_BF16; case Intrinsic::amdgcn_raw_buffer_atomic_fmin: case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin: case Intrinsic::amdgcn_struct_buffer_atomic_fmin: @@ -7330,10 +7325,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd: case Intrinsic::amdgcn_struct_buffer_atomic_fadd: case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd: - case Intrinsic::amdgcn_raw_buffer_atomic_fadd_v2bf16: - case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16: - case Intrinsic::amdgcn_struct_buffer_atomic_fadd_v2bf16: - case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16: return legalizeBufferAtomic(MI, B, IntrID); case Intrinsic::amdgcn_rsq_clamp: return legalizeRsqClampIntrinsic(MI, MRI, B); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 7ebd674757fbc..313d53a1524d2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3079,7 +3079,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl( return; } case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD: - case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD_BF16: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: { applyDefaultMapping(OpdMapper); @@ -4376,7 +4375,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD: - case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD_BF16: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN: case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: { // vdata_out diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index e84d39a2895c8..7b29d573b6101 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -269,7 +269,6 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; -def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; @@ -287,7 +286,6 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; -def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; @@ -305,7 +303,6 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; -def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; @@ -323,7 +320,6 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; -def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 43e5434ea2700..f5b6de15e19e7 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1751,7 +1751,7 @@ let OtherPredicates = [HasAtomicCSubNoRtnInsts] in defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["noret"]>; let SubtargetPredicate = isGFX12Plus in { - defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd_bf16", v2bf16, "BUFFER_ATOMIC_PK_ADD_BF16_VBUFFER">; + defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2bf16, "BUFFER_ATOMIC_PK_ADD_BF16_VBUFFER">; defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["ret"]>; let OtherPredicates = [HasAtomicCSubNoRtnInsts] in diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d9a163ded6bab..c436e03806dc8 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8833,17 +8833,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::amdgcn_raw_buffer_atomic_fadd: case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd: return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD); - case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16: - case Intrinsic::amdgcn_raw_buffer_atomic_fadd_v2bf16: - return lowerRawBufferAtomicIntrin(Op, DAG, - AMDGPUISD::BUFFER_ATOMIC_FADD_BF16); case Intrinsic::amdgcn_struct_buffer_atomic_fadd: case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd: return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD); - case Intrinsic::amdgcn_struct_buffer_atomic_fadd_v2bf16: - case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16: - return lowerStructBufferAtomicIntrin(Op, DAG, - AMDGPUISD::BUFFER_ATOMIC_FADD_BF16); case Intrinsic::amdgcn_raw_buffer_atomic_fmin: case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin: return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FMIN); @@ -15841,7 +15833,6 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode *N, case AMDGPUISD::BUFFER_ATOMIC_CMPSWAP: case AMDGPUISD::BUFFER_ATOMIC_CSUB: case AMDGPUISD::BUFFER_ATOMIC_FADD: - case AMDGPUISD::BUFFER_ATOMIC_FADD_BF16: case AMDGPUISD::BUFFER_ATOMIC_FMIN: case AMDGPUISD::BUFFER_ATOMIC_FMAX: // Target-specific read-modify-write atomics are sources of divergence. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 6682763210411..9b9ff4a5d6996 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -222,7 +222,6 @@ defm SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">; defm SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">; defm SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">; defm SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">; -defm SIbuffer_atomic_fadd_bf16 : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD_BF16">; defm SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">; defm SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">; defm SIbuffer_atomic_cond_sub_u32 : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_COND_SUB_U32">; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index ba31027da92e8..e32bb8fec1f54 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3892,7 +3892,6 @@ def G_AMDGPU_BUFFER_ATOMIC_XOR : BufferAtomicGenericInstruction; def G_AMDGPU_BUFFER_ATOMIC_INC : BufferAtomicGenericInstruction; def G_AMDGPU_BUFFER_ATOMIC_DEC : BufferAtomicGenericInstruction; def G_AMDGPU_BUFFER_ATOMIC_FADD : BufferAtomicGenericInstruction; -def G_AMDGPU_BUFFER_ATOMIC_FADD_BF16 : BufferAtomicGenericInstruction; def G_AMDGPU_BUFFER_ATOMIC_FMIN : BufferAtomicGenericInstruction; def G_AMDGPU_BUFFER_ATOMIC_FMAX : BufferAtomicGenericInstruction; diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll index 2f29a1a9aa768..9f339af0f5580 100644 --- a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll +++ b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll @@ -321,7 +321,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret_offset(<2 x half> %val, ; ; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_noret_offset: ; GFX12-GISEL: ; %bb.0: -; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92 +; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, off, s[0:3], s4 offset:92 ; GFX12-GISEL-NEXT: s_nop 0 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-GISEL-NEXT: s_endpgm @@ -339,7 +339,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret(<2 x half> %val, <4 x i ; ; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_noret: ; GFX12-GISEL: ; %bb.0: -; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen +; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen ; GFX12-GISEL-NEXT: s_nop 0 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-GISEL-NEXT: s_endpgm @@ -356,7 +356,7 @@ define amdgpu_ps <2 x half> @raw_buffer_atomic_add_v2f16_ret_offset(<2 x half> % ; ; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_ret_offset: ; GFX12-GISEL: ; %bb.0: -; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92 th:TH_ATOMIC_RETURN +; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, off, s[0:3], s4 offset:92 th:TH_ATOMIC_RETURN ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: ; return to shader part epilog %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 92, i32 %soffset, i32 0) @@ -372,7 +372,7 @@ define amdgpu_ps <2 x half> @raw_buffer_atomic_add_v2f16_ret(<2 x half> %val, <4 ; ; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_ret: ; GFX12-GISEL: ; %bb.0: -; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN +; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: ; return to shader part epilog %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -388,7 +388,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_v2f16_ret(<2 x half> %val, <4 x ; ; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2f16_ret: ; GFX12-GISEL: ; %bb.0: -; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN +; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: ; return to shader part epilog %orig = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -406,7 +406,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret(<2 x half> %val, <4 ; ; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2f16_noret: ; GFX12-GISEL: ; %bb.0: -; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen +; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen ; GFX12-GISEL-NEXT: s_nop 0 ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-GISEL-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll index a3b83c346c1bb..f8caf84d5c51a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll @@ -17,10 +17,12 @@ ; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=null %t/struct-ret-v2f16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2F16-GISEL %s ; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=null %t/raw-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-RAW-V2BF16-GISEL %s ; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-GISEL %s -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %t/raw-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-RAW-V2BF16-GISEL %s -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-GISEL %s -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %t/raw-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-RAW-V2BF16-GISEL %s -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-GISEL %s + +; FIXME: These should fail when bfloat support is handled correctly +; xUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %t/raw-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-RAW-V2BF16-GISEL %s +; xUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-GISEL %s +; xUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %t/raw-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-RAW-V2BF16-GISEL %s +; xUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-GISEL %s ; Make sure buffer fadd atomics with return values are not selected ; for gfx908 where they do not work. From 8930ac1bbe0bc50402da53b22501e17045a537ca Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 17 Jun 2024 21:51:25 +0200 Subject: [PATCH 03/25] AMDGPU: Cleanup selection patterns for buffer loads (#95378) We should just support these for all register types. --- llvm/lib/Target/AMDGPU/BUFInstructions.td | 72 ++++++++++------------- llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 16 ++--- 2 files changed, 39 insertions(+), 49 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index f5b6de15e19e7..dff19b6a93286 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1419,27 +1419,21 @@ let OtherPredicates = [HasPackedD16VMem] in { defm : MUBUF_LoadIntrinsicPat; } // End HasPackedD16VMem. -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; +foreach vt = Reg32Types.types in { +defm : MUBUF_LoadIntrinsicPat; +} + +foreach vt = Reg64Types.types in { +defm : MUBUF_LoadIntrinsicPat; +} + +foreach vt = Reg96Types.types in { +defm : MUBUF_LoadIntrinsicPat; +} + +foreach vt = Reg128Types.types in { +defm : MUBUF_LoadIntrinsicPat; +} defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; @@ -1530,27 +1524,21 @@ let OtherPredicates = [HasPackedD16VMem] in { defm : MUBUF_StoreIntrinsicPat; } // End HasPackedD16VMem. -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; +foreach vt = Reg32Types.types in { +defm : MUBUF_StoreIntrinsicPat; +} + +foreach vt = Reg64Types.types in { +defm : MUBUF_StoreIntrinsicPat; +} + +foreach vt = Reg96Types.types in { +defm : MUBUF_StoreIntrinsicPat; +} + +foreach vt = Reg128Types.types in { +defm : MUBUF_StoreIntrinsicPat; +} defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 3666976cf82f8..a8efe2b2ba35e 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -586,7 +586,9 @@ class RegisterTypes reg_types> { def Reg16Types : RegisterTypes<[i16, f16, bf16]>; def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>; -def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, v4i16, v4f16, v4bf16, p0]>; +def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, v4i16, v4f16, v4bf16]>; +def Reg96Types : RegisterTypes<[v3i32, v3f32]>; +def Reg128Types : RegisterTypes<[v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16]>; let HasVGPR = 1 in { // VOP3 and VINTERP can access 256 lo and 256 hi registers. @@ -744,7 +746,7 @@ def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, let BaseClassOrder = 10000; } -def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f16, v8bf16], 32, +def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", Reg128Types.types, 32, (add PRIVATE_RSRC_REG)> { let isAllocatable = 0; let CopyCost = -1; @@ -815,7 +817,7 @@ def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v let HasSGPR = 1; } -def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16, v4bf16], 32, +def SGPR_64 : SIRegisterClass<"AMDGPU", Reg64Types.types, 32, (add SGPR_64Regs)> { let CopyCost = 1; let AllocationPriority = 1; @@ -905,8 +907,8 @@ multiclass SRegClass; -defm "" : SRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16], SGPR_128Regs, TTMP_128Regs>; +defm "" : SRegClass<3, Reg96Types.types, SGPR_96Regs, TTMP_96Regs>; +defm "" : SRegClass<4, Reg128Types.types, SGPR_128Regs, TTMP_128Regs>; defm "" : SRegClass<5, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>; defm "" : SRegClass<6, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>; defm "" : SRegClass<7, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>; @@ -958,8 +960,8 @@ multiclass VRegClass regTypes, dag regList> { defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4bf16, v4i16, p0, p1, p4], (add VGPR_64)>; -defm VReg_96 : VRegClass<3, [v3i32, v3f32], (add VGPR_96)>; -defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16], (add VGPR_128)>; +defm VReg_96 : VRegClass<3, Reg96Types.types, (add VGPR_96)>; +defm VReg_128 : VRegClass<4, Reg128Types.types, (add VGPR_128)>; defm VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>; defm VReg_192 : VRegClass<6, [v6i32, v6f32, v3i64, v3f64], (add VGPR_192)>; From 804335638078f5a7c2bd31847b6080763cb22159 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 17 Jun 2024 14:52:50 -0500 Subject: [PATCH 04/25] [Offload] Change HSA header search order (#95769) Summary: The HSA headers existed previously in `include/hsa.h` and were moved to `include/hsa/hsa.h` in a later ROCm version. The include headers here were originally designed to favor a newer one. However, this unintentionally prevented the dyanmic HSA's `hsa.h` from being used if both were present. This patch changes the order so it will be found first. Related to https://github.com/llvm/llvm-project/pull/95484. --- offload/plugins-nextgen/amdgpu/src/rtl.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index e678213df18ce..e6643d3260eb4 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -58,12 +58,12 @@ #endif #if defined(__has_include) -#if __has_include("hsa/hsa.h") -#include "hsa/hsa.h" -#include "hsa/hsa_ext_amd.h" -#elif __has_include("hsa.h") +#if __has_include("hsa.h") #include "hsa.h" #include "hsa_ext_amd.h" +#elif __has_include("hsa/hsa.h") +#include "hsa/hsa.h" +#include "hsa/hsa_ext_amd.h" #endif #else #include "hsa/hsa.h" From 77d8cfb3c50e3341d65af1f9e442004bbd77af9b Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov <6532716+alexander-shaposhnikov@users.noreply.github.com> Date: Mon, 17 Jun 2024 12:59:04 -0700 Subject: [PATCH 05/25] [Flang] Switch to common::visit more call sites (#90018) Switch to common::visit more call sites. Test plan: ninja check-all --- flang/include/flang/Evaluate/tools.h | 4 +- flang/include/flang/Lower/DumpEvaluateExpr.h | 2 +- flang/include/flang/Lower/PFTBuilder.h | 7 +- flang/include/flang/Lower/Support/Utils.h | 40 +++-- .../include/flang/Optimizer/Support/Matcher.h | 5 +- flang/lib/Evaluate/intrinsics.cpp | 2 +- flang/lib/Lower/Allocatable.cpp | 23 +-- flang/lib/Lower/Bridge.cpp | 98 +++++------ flang/lib/Lower/CallInterface.cpp | 6 +- flang/lib/Lower/ComponentPath.cpp | 2 +- flang/lib/Lower/ConvertArrayConstructor.cpp | 37 +++-- flang/lib/Lower/ConvertCall.cpp | 3 +- flang/lib/Lower/ConvertConstant.cpp | 8 +- flang/lib/Lower/ConvertExpr.cpp | 155 ++++++++++-------- flang/lib/Lower/ConvertExprToHLFIR.cpp | 23 +-- flang/lib/Lower/ConvertType.cpp | 2 +- flang/lib/Lower/DirectivesCommon.h | 8 +- flang/lib/Lower/IO.cpp | 16 +- flang/lib/Lower/IterationSpace.cpp | 39 ++--- flang/lib/Lower/Mangler.cpp | 2 +- flang/lib/Lower/OpenACC.cpp | 41 ++--- flang/lib/Lower/OpenMP/Clauses.cpp | 24 +-- flang/lib/Lower/OpenMP/OpenMP.cpp | 27 +-- flang/lib/Lower/OpenMP/Utils.cpp | 2 +- flang/lib/Lower/PFTBuilder.cpp | 51 +++--- flang/lib/Lower/VectorSubscripts.cpp | 95 +++++------ flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 6 +- flang/lib/Semantics/check-acc-structure.cpp | 14 +- flang/lib/Semantics/check-coarray.cpp | 2 +- 29 files changed, 391 insertions(+), 353 deletions(-) diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h index 378a5fca03264..ea56a20633f0c 100644 --- a/flang/include/flang/Evaluate/tools.h +++ b/flang/include/flang/Evaluate/tools.h @@ -450,12 +450,12 @@ struct ExtractSubstringHelper { template static std::optional visit(const Designator &e) { - return std::visit([](auto &&s) { return visit(s); }, e.u); + return common::visit([](auto &&s) { return visit(s); }, e.u); } template static std::optional visit(const Expr &e) { - return std::visit([](auto &&s) { return visit(s); }, e.u); + return common::visit([](auto &&s) { return visit(s); }, e.u); } }; diff --git a/flang/include/flang/Lower/DumpEvaluateExpr.h b/flang/include/flang/Lower/DumpEvaluateExpr.h index c67df245359e3..88f53e96a81c2 100644 --- a/flang/include/flang/Lower/DumpEvaluateExpr.h +++ b/flang/include/flang/Lower/DumpEvaluateExpr.h @@ -68,7 +68,7 @@ class DumpEvaluateExpr { } template void show(const std::variant &u) { - std::visit([&](const auto &v) { show(v); }, u); + Fortran::common::visit([&](const auto &v) { show(v); }, u); } template void show(const std::vector &x) { diff --git a/flang/include/flang/Lower/PFTBuilder.h b/flang/include/flang/Lower/PFTBuilder.h index c2b600c6b5d9b..7f1b93c564b4c 100644 --- a/flang/include/flang/Lower/PFTBuilder.h +++ b/flang/include/flang/Lower/PFTBuilder.h @@ -76,7 +76,7 @@ class ReferenceVariantBase { } template constexpr auto visit(VISITOR &&visitor) const { - return std::visit( + return Fortran::common::visit( common::visitors{[&visitor](auto ref) { return visitor(ref.get()); }}, u); } @@ -494,7 +494,8 @@ struct Variable { /// Is this variable a global? bool isGlobal() const { - return std::visit([](const auto &x) { return x.isGlobal(); }, var); + return Fortran::common::visit([](const auto &x) { return x.isGlobal(); }, + var); } /// Is this a module or submodule variable? @@ -504,7 +505,7 @@ struct Variable { } const Fortran::semantics::Scope *getOwningScope() const { - return std::visit( + return Fortran::common::visit( common::visitors{ [](const Nominal &x) { return &x.symbol->GetUltimate().owner(); }, [](const AggregateStore &agg) { return &agg.getOwningScope(); }}, diff --git a/flang/include/flang/Lower/Support/Utils.h b/flang/include/flang/Lower/Support/Utils.h index e791f3dbb221a..1cc74521e22d8 100644 --- a/flang/include/flang/Lower/Support/Utils.h +++ b/flang/include/flang/Lower/Support/Utils.h @@ -69,7 +69,8 @@ static Fortran::lower::SomeExpr ignoreEvConvert(const A &x) { inline Fortran::lower::SomeExpr ignoreEvConvert(const Fortran::evaluate::Expr> &x) { - return std::visit([](const auto &v) { return ignoreEvConvert(v); }, x.u); + return Fortran::common::visit( + [](const auto &v) { return ignoreEvConvert(v); }, x.u); } /// Zip two containers of the same size together and flatten the pairs. `flatZip @@ -119,7 +120,8 @@ class HashEvaluateExpr { return 0u; } static unsigned getHashValue(const Fortran::evaluate::Subscript &x) { - return std::visit([&](const auto &v) { return getHashValue(v); }, x.u); + return Fortran::common::visit( + [&](const auto &v) { return getHashValue(v); }, x.u); } static unsigned getHashValue(const Fortran::evaluate::Triplet &x) { return getHashValue(x.lower()) - getHashValue(x.upper()) * 5u - @@ -154,7 +156,8 @@ class HashEvaluateExpr { return getHashValue(x.GetComponent()) * 13u; } static unsigned getHashValue(const Fortran::evaluate::DataRef &x) { - return std::visit([&](const auto &v) { return getHashValue(v); }, x.u); + return Fortran::common::visit( + [&](const auto &v) { return getHashValue(v); }, x.u); } static unsigned getHashValue(const Fortran::evaluate::ComplexPart &x) { return getHashValue(x.complex()) - static_cast(x.part()); @@ -247,8 +250,9 @@ class HashEvaluateExpr { return getHashValue(sym.get()); } static unsigned getHashValue(const Fortran::evaluate::Substring &x) { - return 61u * std::visit([&](const auto &p) { return getHashValue(p); }, - x.parent()) - + return 61u * + Fortran::common::visit( + [&](const auto &p) { return getHashValue(p); }, x.parent()) - getHashValue(x.lower()) - (getHashValue(x.lower()) + 1u); } static unsigned @@ -270,7 +274,8 @@ class HashEvaluateExpr { } static unsigned getHashValue(const Fortran::evaluate::ProcedureDesignator &x) { - return std::visit([&](const auto &v) { return getHashValue(v); }, x.u); + return Fortran::common::visit( + [&](const auto &v) { return getHashValue(v); }, x.u); } static unsigned getHashValue(const Fortran::evaluate::ProcedureRef &x) { unsigned args = 13u; @@ -321,15 +326,18 @@ class HashEvaluateExpr { } template static unsigned getHashValue(const Fortran::evaluate::Expr &x) { - return std::visit([&](const auto &v) { return getHashValue(v); }, x.u); + return Fortran::common::visit( + [&](const auto &v) { return getHashValue(v); }, x.u); } static unsigned getHashValue( const Fortran::evaluate::Relational &x) { - return std::visit([&](const auto &v) { return getHashValue(v); }, x.u); + return Fortran::common::visit( + [&](const auto &v) { return getHashValue(v); }, x.u); } template static unsigned getHashValue(const Fortran::evaluate::Designator &x) { - return std::visit([&](const auto &v) { return getHashValue(v); }, x.u); + return Fortran::common::visit( + [&](const auto &v) { return getHashValue(v); }, x.u); } template static unsigned @@ -378,7 +386,7 @@ class IsEqualEvaluateExpr { } static bool isEqual(const Fortran::evaluate::Subscript &x, const Fortran::evaluate::Subscript &y) { - return std::visit( + return Fortran::common::visit( [&](const auto &v, const auto &w) { return isEqual(v, w); }, x.u, y.u); } static bool isEqual(const Fortran::evaluate::Triplet &x, @@ -411,7 +419,7 @@ class IsEqualEvaluateExpr { } static bool isEqual(const Fortran::evaluate::DataRef &x, const Fortran::evaluate::DataRef &y) { - return std::visit( + return Fortran::common::visit( [&](const auto &v, const auto &w) { return isEqual(v, w); }, x.u, y.u); } static bool isEqual(const Fortran::evaluate::ComplexPart &x, @@ -499,7 +507,7 @@ class IsEqualEvaluateExpr { } static bool isEqual(const Fortran::evaluate::Substring &x, const Fortran::evaluate::Substring &y) { - return std::visit( + return Fortran::common::visit( [&](const auto &p, const auto &q) { return isEqual(p, q); }, x.parent(), y.parent()) && isEqual(x.lower(), y.lower()) && isEqual(x.upper(), y.upper()); @@ -529,7 +537,7 @@ class IsEqualEvaluateExpr { } static bool isEqual(const Fortran::evaluate::ProcedureDesignator &x, const Fortran::evaluate::ProcedureDesignator &y) { - return std::visit( + return Fortran::common::visit( [&](const auto &v, const auto &w) { return isEqual(v, w); }, x.u, y.u); } static bool isEqual(const Fortran::evaluate::ProcedureRef &x, @@ -591,19 +599,19 @@ class IsEqualEvaluateExpr { template static bool isEqual(const Fortran::evaluate::Expr &x, const Fortran::evaluate::Expr &y) { - return std::visit( + return Fortran::common::visit( [&](const auto &v, const auto &w) { return isEqual(v, w); }, x.u, y.u); } static bool isEqual(const Fortran::evaluate::Relational &x, const Fortran::evaluate::Relational &y) { - return std::visit( + return Fortran::common::visit( [&](const auto &v, const auto &w) { return isEqual(v, w); }, x.u, y.u); } template static bool isEqual(const Fortran::evaluate::Designator &x, const Fortran::evaluate::Designator &y) { - return std::visit( + return Fortran::common::visit( [&](const auto &v, const auto &w) { return isEqual(v, w); }, x.u, y.u); } template diff --git a/flang/include/flang/Optimizer/Support/Matcher.h b/flang/include/flang/Optimizer/Support/Matcher.h index da1d7c21f42c4..44672d3c0a072 100644 --- a/flang/include/flang/Optimizer/Support/Matcher.h +++ b/flang/include/flang/Optimizer/Support/Matcher.h @@ -13,6 +13,7 @@ #ifndef FORTRAN_OPTIMIZER_SUPPORT_MATCHER_H #define FORTRAN_OPTIMIZER_SUPPORT_MATCHER_H +#include "flang/Common/idioms.h" #include // Boilerplate CRTP class for a simplified type-casing syntactic sugar. This @@ -23,10 +24,10 @@ template struct matches : Ts... { using Ts::operator()...; }; template matches(Ts...) -> matches; template struct matcher { template auto match(Ts... ts) { - return std::visit(matches{ts...}, static_cast(this)->matchee()); + return Fortran::common::visit(matches{ts...}, static_cast(this)->matchee()); } template auto match(Ts... ts) const { - return std::visit(matches{ts...}, static_cast(this)->matchee()); + return Fortran::common::visit(matches{ts...}, static_cast(this)->matchee()); } }; // clang-format on diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index ace316174a892..1bba541e8e14a 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -2936,7 +2936,7 @@ static bool CheckForNonPositiveValues(FoldingContext &context, if (arg.Rank() > 0) { if (const Expr *expr{arg.UnwrapExpr()}) { if (const auto *intExpr{std::get_if>(&expr->u)}) { - std::visit( + Fortran::common::visit( [&](const auto &kindExpr) { using IntType = typename std::decay_t::Result; if (const auto *constArray{ diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index 068f5d25967c9..77e02898ac9fb 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -350,10 +350,10 @@ class AllocateStmtHelper { void visitAllocateOptions() { for (const auto &allocOption : std::get>(stmt.t)) - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::StatOrErrmsg &statOrErr) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::StatVariable &statVar) { statExpr = Fortran::semantics::GetExpr(statVar); @@ -898,15 +898,16 @@ void Fortran::lower::genDeallocateStmt( const Fortran::lower::SomeExpr *errMsgExpr = nullptr; for (const Fortran::parser::StatOrErrmsg &statOrErr : std::get>(stmt.t)) - std::visit(Fortran::common::visitors{ - [&](const Fortran::parser::StatVariable &statVar) { - statExpr = Fortran::semantics::GetExpr(statVar); - }, - [&](const Fortran::parser::MsgVariable &errMsgVar) { - errMsgExpr = Fortran::semantics::GetExpr(errMsgVar); - }, - }, - statOrErr.u); + Fortran::common::visit( + Fortran::common::visitors{ + [&](const Fortran::parser::StatVariable &statVar) { + statExpr = Fortran::semantics::GetExpr(statVar); + }, + [&](const Fortran::parser::MsgVariable &errMsgVar) { + errMsgExpr = Fortran::semantics::GetExpr(errMsgVar); + }, + }, + statOrErr.u); ErrorManager errorManager; errorManager.init(converter, loc, statExpr, errMsgExpr); fir::FirOpBuilder &builder = converter.getFirOpBuilder(); diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 24a57812ba104..423c418889a00 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -302,7 +302,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { bool hasMainProgram = false; const Fortran::semantics::Symbol *globalOmpRequiresSymbol = nullptr; for (Fortran::lower::pft::Program::Units &u : pft.getUnits()) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](Fortran::lower::pft::FunctionLikeUnit &f) { if (f.isMainProgram()) @@ -336,7 +336,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { // Primary translation pass. for (Fortran::lower::pft::Program::Units &u : pft.getUnits()) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](Fortran::lower::pft::FunctionLikeUnit &f) { lowerFunc(f); }, [&](Fortran::lower::pft::ModuleLikeUnit &m) { lowerMod(m); }, @@ -2062,7 +2062,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { handleLocalitySpecs(info); for (const auto *dir : dirs) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::CompilerDirective::VectorAlways &d) { addLoopAnnotationAttr(info); }, @@ -2433,7 +2433,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { } void genFIR(const Fortran::parser::ForallAssignmentStmt &stmt) { - std::visit([&](const auto &x) { genFIR(x); }, stmt.u); + Fortran::common::visit([&](const auto &x) { genFIR(x); }, stmt.u); } void genFIR(const Fortran::parser::EndForallStmt &) { @@ -2494,7 +2494,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { forall.t)); for (const Fortran::parser::ForallBodyConstruct &s : std::get>(forall.t)) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::WhereConstruct &b) { genFIR(b); }, [&](const Fortran::common::Indirection< @@ -2617,7 +2617,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { void genFIR(const Fortran::parser::CompilerDirective &dir) { Fortran::lower::pft::Evaluation &eval = getEval(); - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::CompilerDirective::VectorAlways &) { attachDirectiveToLoop(dir, &eval); @@ -3198,7 +3198,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { const auto &rank = std::get( rankCaseStmt->t); assert(e->block && "missing SelectRankCaseStmt block"); - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::ScalarIntConstantExpr &rankExpr) { blockList.emplace_back(e->block); @@ -3229,9 +3229,9 @@ class FirConverter : public Fortran::lower::AbstractConverter { "selector should not yet be set"); Fortran::lower::StatementContext &stmtCtx = activeConstructStack.back().stmtCtx; - const Fortran::lower::SomeExpr *selectorExpr = - std::visit([](const auto &x) { return Fortran::semantics::GetExpr(x); }, - std::get(selectRankStmt.t).u); + const Fortran::lower::SomeExpr *selectorExpr = Fortran::common::visit( + [](const auto &x) { return Fortran::semantics::GetExpr(x); }, + std::get(selectRankStmt.t).u); assert(selectorExpr && "failed to retrieve selector expr"); hlfir::Entity selector = Fortran::lower::convertExprToHLFIR( loc, *this, *selectorExpr, localSymbols, stmtCtx); @@ -3663,7 +3663,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { Fortran::parser::Label errLabel{}; bool hasIostat{}; for (const auto &spec : specList) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::EndLabel &label) { endLabel = label.v; @@ -4373,7 +4373,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { void genAssignment(const Fortran::evaluate::Assignment &assign) { mlir::Location loc = toLocation(); if (lowerToHighLevelFIR()) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::Assignment::Intrinsic &) { genDataAssignment(assign, /*userDefinedAssignment=*/nullptr); @@ -4401,7 +4401,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { explicitIterSpace.genLoopNest(); } Fortran::lower::StatementContext stmtCtx; - std::visit( + Fortran::common::visit( Fortran::common::visitors{ // [1] Plain old assignment. [&](const Fortran::evaluate::Assignment::Intrinsic &) { @@ -4670,7 +4670,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { } } void genFIR(const Fortran::parser::WhereBodyConstruct &body) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::Statement< Fortran::parser::AssignmentStmt> &stmt) { @@ -5386,18 +5386,19 @@ class FirConverter : public Fortran::lower::AbstractConverter { // The intrinsic module scope, if present, is the first scope. const Fortran::semantics::Scope *intrinsicModuleScope = nullptr; for (Fortran::lower::pft::Program::Units &u : pft.getUnits()) { - std::visit(Fortran::common::visitors{ - [&](Fortran::lower::pft::FunctionLikeUnit &f) { - intrinsicModuleScope = &f.getScope().parent(); - }, - [&](Fortran::lower::pft::ModuleLikeUnit &m) { - intrinsicModuleScope = &m.getScope().parent(); - }, - [&](Fortran::lower::pft::BlockDataUnit &b) {}, - [&](Fortran::lower::pft::CompilerDirectiveUnit &d) {}, - [&](Fortran::lower::pft::OpenACCDirectiveUnit &d) {}, - }, - u); + Fortran::common::visit( + Fortran::common::visitors{ + [&](Fortran::lower::pft::FunctionLikeUnit &f) { + intrinsicModuleScope = &f.getScope().parent(); + }, + [&](Fortran::lower::pft::ModuleLikeUnit &m) { + intrinsicModuleScope = &m.getScope().parent(); + }, + [&](Fortran::lower::pft::BlockDataUnit &b) {}, + [&](Fortran::lower::pft::CompilerDirectiveUnit &d) {}, + [&](Fortran::lower::pft::OpenACCDirectiveUnit &d) {}, + }, + u); if (intrinsicModuleScope) { while (!intrinsicModuleScope->IsGlobal()) intrinsicModuleScope = &intrinsicModuleScope->parent(); @@ -5531,7 +5532,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { analyzeExplicitSpace(lhs); analyzeExplicitSpace(rhs); }; - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::ProcedureRef &procRef) { // Ensure the procRef expressions are the one being visited. @@ -5549,7 +5550,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { explicitIterSpace.endAssign(); } void analyzeExplicitSpace(const Fortran::parser::ForallAssignmentStmt &stmt) { - std::visit([&](const auto &s) { analyzeExplicitSpace(s); }, stmt.u); + Fortran::common::visit([&](const auto &s) { analyzeExplicitSpace(s); }, + stmt.u); } void analyzeExplicitSpace(const Fortran::parser::AssignmentStmt &s) { analyzeExplicitSpace(s.typedAssignment->v.operator->()); @@ -5594,13 +5596,14 @@ class FirConverter : public Fortran::lower::AbstractConverter { analyzeExplicitSpace(e); } void analyzeExplicitSpace(const Fortran::parser::WhereBodyConstruct &body) { - std::visit(Fortran::common::visitors{ - [&](const Fortran::common::Indirection< - Fortran::parser::WhereConstruct> &wc) { - analyzeExplicitSpace(wc.value()); - }, - [&](const auto &s) { analyzeExplicitSpace(s.statement); }}, - body.u); + Fortran::common::visit( + Fortran::common::visitors{ + [&](const Fortran::common::Indirection< + Fortran::parser::WhereConstruct> &wc) { + analyzeExplicitSpace(wc.value()); + }, + [&](const auto &s) { analyzeExplicitSpace(s.statement); }}, + body.u); } void analyzeExplicitSpace(const Fortran::parser::MaskedElsewhereStmt &stmt) { const Fortran::lower::SomeExpr *exp = Fortran::semantics::GetExpr( @@ -5651,16 +5654,17 @@ class FirConverter : public Fortran::lower::AbstractConverter { .statement); for (const Fortran::parser::ForallBodyConstruct &s : std::get>(forall.t)) { - std::visit(Fortran::common::visitors{ - [&](const Fortran::common::Indirection< - Fortran::parser::ForallConstruct> &b) { - analyzeExplicitSpace(b.value()); - }, - [&](const Fortran::parser::WhereConstruct &w) { - analyzeExplicitSpace(w); - }, - [&](const auto &b) { analyzeExplicitSpace(b.statement); }}, - s.u); + Fortran::common::visit( + Fortran::common::visitors{ + [&](const Fortran::common::Indirection< + Fortran::parser::ForallConstruct> &b) { + analyzeExplicitSpace(b.value()); + }, + [&](const Fortran::parser::WhereConstruct &w) { + analyzeExplicitSpace(w); + }, + [&](const auto &b) { analyzeExplicitSpace(b.statement); }}, + s.u); } analyzeExplicitSpacePop(); } @@ -5715,7 +5719,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { std::string getConstantExprManglePrefix(mlir::Location loc, const Fortran::lower::SomeExpr &expr, mlir::Type eleTy) { - return std::visit( + return Fortran::common::visit( [&](const auto &x) -> std::string { using T = std::decay_t; if constexpr (Fortran::common::HasMember< @@ -5730,7 +5734,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { fir::emitFatalError(loc, "non a constant derived type expression"); } else { - return std::visit( + return Fortran::common::visit( [&](const auto &someKind) -> std::string { using T = std::decay_t; using TK = Fortran::evaluate::Type(pair); - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const auto &dummy) { const auto &entity = getDataObjectEntity(std::get<1>(pair)); @@ -877,7 +877,7 @@ class Fortran::lower::CallInterfaceImpl { for (auto pair : llvm::zip(procedure.dummyArguments, argumentEntities)) { const Fortran::evaluate::characteristics::DummyArgument &argCharacteristics = std::get<0>(pair); - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::characteristics::DummyDataObject &dummy) { diff --git a/flang/lib/Lower/ComponentPath.cpp b/flang/lib/Lower/ComponentPath.cpp index d20ea23153102..5bdbca6062e6d 100644 --- a/flang/lib/Lower/ComponentPath.cpp +++ b/flang/lib/Lower/ComponentPath.cpp @@ -36,7 +36,7 @@ void Fortran::lower::ComponentPath::clear() { bool Fortran::lower::isRankedArrayAccess(const Fortran::evaluate::ArrayRef &x) { for (const Fortran::evaluate::Subscript &sub : x.subscript()) { - if (std::visit( + if (Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::Triplet &) { return true; }, [&](const Fortran::evaluate::IndirectSubscriptIntegerExpr &e) { diff --git a/flang/lib/Lower/ConvertArrayConstructor.cpp b/flang/lib/Lower/ConvertArrayConstructor.cpp index 341fad9a5e43c..3c43cd20eb080 100644 --- a/flang/lib/Lower/ConvertArrayConstructor.cpp +++ b/flang/lib/Lower/ConvertArrayConstructor.cpp @@ -438,7 +438,7 @@ class ArrayCtorLoweringStrategy { void pushValue(mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity value) { - return std::visit( + return Fortran::common::visit( [&](auto &impl) { return impl.pushValue(loc, builder, value); }, implVariant); } @@ -446,7 +446,7 @@ class ArrayCtorLoweringStrategy { mlir::Value startImpliedDo(mlir::Location loc, fir::FirOpBuilder &builder, mlir::Value lower, mlir::Value upper, mlir::Value stride) { - return std::visit( + return Fortran::common::visit( [&](auto &impl) { return impl.startImpliedDo(loc, builder, lower, upper, stride); }, @@ -455,13 +455,13 @@ class ArrayCtorLoweringStrategy { hlfir::Entity finishArrayCtorLowering(mlir::Location loc, fir::FirOpBuilder &builder) { - return std::visit( + return Fortran::common::visit( [&](auto &impl) { return impl.finishArrayCtorLowering(loc, builder); }, implVariant); } void startImpliedDoScope(llvm::StringRef doName, mlir::Value indexValue) { - std::visit( + Fortran::common::visit( [&](auto &impl) { return impl.startImpliedDoScope(doName, indexValue); }, @@ -469,8 +469,8 @@ class ArrayCtorLoweringStrategy { } void endImpliedDoScope() { - std::visit([&](auto &impl) { return impl.endImpliedDoScope(); }, - implVariant); + Fortran::common::visit([&](auto &impl) { return impl.endImpliedDoScope(); }, + implVariant); } private: @@ -612,16 +612,17 @@ ArrayCtorAnalysis::ArrayCtorAnalysis( arrayValueListStack.pop_back_val(); for (const Fortran::evaluate::ArrayConstructorValue &acValue : *currentArrayValueList) - std::visit(Fortran::common::visitors{ - [&](const Fortran::evaluate::ImpliedDo &impledDo) { - arrayValueListStack.push_back(&impledDo.values()); - localNumberOfImpliedDo++; - }, - [&](const Fortran::evaluate::Expr &expr) { - localNumberOfExpr++; - anyArrayExpr = anyArrayExpr || expr.Rank() > 0; - }}, - acValue.u); + Fortran::common::visit( + Fortran::common::visitors{ + [&](const Fortran::evaluate::ImpliedDo &impledDo) { + arrayValueListStack.push_back(&impledDo.values()); + localNumberOfImpliedDo++; + }, + [&](const Fortran::evaluate::Expr &expr) { + localNumberOfExpr++; + anyArrayExpr = anyArrayExpr || expr.Rank() > 0; + }}, + acValue.u); anyImpliedDo = anyImpliedDo || localNumberOfImpliedDo > 0; if (localNumberOfImpliedDo == 0) { @@ -765,7 +766,7 @@ static void genAcValue(mlir::Location loc, impliedDoIndexValue); for (const auto &acValue : impledDo.values()) - std::visit( + Fortran::common::visit( [&](const auto &x) { genAcValue(loc, converter, x, symMap, stmtCtx, arrayBuilder); }, @@ -787,7 +788,7 @@ hlfir::EntityWithAttributes Fortran::lower::ArrayConstructorBuilder::gen( loc, converter, arrayCtorExpr, symMap, stmtCtx); // Run the array lowering strategy through the ac-values. for (const auto &acValue : arrayCtorExpr) - std::visit( + Fortran::common::visit( [&](const auto &x) { genAcValue(loc, converter, x, symMap, stmtCtx, arrayBuilder); }, diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index b1dc41f3ca838..65a2ffbea5dd1 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -935,7 +935,8 @@ struct CallCleanUp { mlir::Value mustFree; }; void genCleanUp(mlir::Location loc, fir::FirOpBuilder &builder) { - std::visit([&](auto &c) { c.genCleanUp(loc, builder); }, cleanUp); + Fortran::common::visit([&](auto &c) { c.genCleanUp(loc, builder); }, + cleanUp); } std::variant cleanUp; }; diff --git a/flang/lib/Lower/ConvertConstant.cpp b/flang/lib/Lower/ConvertConstant.cpp index a4ace40a3a1c4..3361817ee27ee 100644 --- a/flang/lib/Lower/ConvertConstant.cpp +++ b/flang/lib/Lower/ConvertConstant.cpp @@ -105,7 +105,7 @@ class DenseGlobalBuilder { const Fortran::lower::SomeExpr &initExpr, cuf::DataAttributeAttr dataAttr) { DenseGlobalBuilder globalBuilder; - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::Expr & x) { globalBuilder.tryConvertingToAttributes(builder, x); }, @@ -164,7 +164,7 @@ class DenseGlobalBuilder { template void tryConvertingToAttributes(fir::FirOpBuilder &builder, const Fortran::evaluate::Expr &expr) { - std::visit( + Fortran::common::visit( [&](const auto &x) { using TR = Fortran::evaluate::ResultType; if (const auto *constant = @@ -796,7 +796,7 @@ static fir::ExtendedValue genConstantValue(Fortran::lower::AbstractConverter &converter, mlir::Location loc, const Fortran::lower::SomeExpr &constantExpr) { - return std::visit( + return Fortran::common::visit( [&](const auto &x) -> fir::ExtendedValue { using T = std::decay_t; if constexpr (Fortran::common::HasMember< @@ -805,7 +805,7 @@ genConstantValue(Fortran::lower::AbstractConverter &converter, Fortran::common::TypeCategory::Derived) { return genConstantValue(converter, loc, x); } else { - return std::visit( + return Fortran::common::visit( [&](const auto &preciseKind) { return genConstantValue(converter, loc, preciseKind); }, diff --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp index 9567685aa3d2e..9937e9d159886 100644 --- a/flang/lib/Lower/ConvertExpr.cpp +++ b/flang/lib/Lower/ConvertExpr.cpp @@ -398,8 +398,8 @@ static bool isParenthesizedVariable(const Fortran::evaluate::Expr &expr) { return Fortran::evaluate::IsVariable(parentheses->left()); return false; } else { - return std::visit([&](const auto &x) { return isParenthesizedVariable(x); }, - expr.u); + return Fortran::common::visit( + [&](const auto &x) { return isParenthesizedVariable(x); }, expr.u); } } @@ -646,7 +646,7 @@ isOptimizableTranspose(Fortran::evaluate::Expr expr, if (!isTransposeOptEnabled(converter)) return false; - return std::visit( + return Fortran::common::visit( [&](const auto &e) { return isOptimizableTranspose(e, converter); }, expr.u); } @@ -696,7 +696,7 @@ class ScalarExprLowering { // - result of NULL() or NULL(MOLD) intrinsic. // NULL() requires some context to be lowered, so it is not handled // here and must be lowered according to the context where it appears. - ExtValue exv = std::visit( + ExtValue exv = Fortran::common::visit( [&](const auto &x) { return genMutableBoxValueImpl(x); }, expr.u); const fir::MutableBoxValue *mutableBox = exv.getBoxOf(); @@ -737,7 +737,7 @@ class ScalarExprLowering { template ExtValue genMutableBoxValueImpl(const Fortran::evaluate::Designator &designator) { - return std::visit( + return Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::SymbolRef &sym) -> ExtValue { return converter.getSymbolExtendedValue(*sym, &symMap); @@ -754,8 +754,8 @@ class ScalarExprLowering { template ExtValue genMutableBoxValueImpl(const Fortran::evaluate::Expr &expr) { - return std::visit([&](const auto &x) { return genMutableBoxValueImpl(x); }, - expr.u); + return Fortran::common::visit( + [&](const auto &x) { return genMutableBoxValueImpl(x); }, expr.u); } mlir::Location getLoc() { return location; } @@ -1222,7 +1222,8 @@ class ScalarExprLowering { ExtValue genval(const Fortran::evaluate::Relational &op) { - return std::visit([&](const auto &x) { return genval(x); }, op.u); + return Fortran::common::visit([&](const auto &x) { return genval(x); }, + op.u); } template &list) { if (!getLastSym(cmpt).test(Fortran::semantics::Symbol::Flag::ParentComp)) list.push_front(&cmpt); - return std::visit( + return Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::Component &x) { if (Fortran::semantics::IsAllocatableOrPointer(getLastSym(x))) @@ -1713,11 +1716,12 @@ class ScalarExprLowering { template ExtValue gen(const Fortran::evaluate::Designator &des) { - return std::visit([&](const auto &x) { return gen(x); }, des.u); + return Fortran::common::visit([&](const auto &x) { return gen(x); }, des.u); } template ExtValue genval(const Fortran::evaluate::Designator &des) { - return std::visit([&](const auto &x) { return genval(x); }, des.u); + return Fortran::common::visit([&](const auto &x) { return genval(x); }, + des.u); } mlir::Type genType(const Fortran::evaluate::DynamicType &dt) { @@ -2900,8 +2904,8 @@ class ScalarExprLowering { } template bool isTransformationalRef(Fortran::evaluate::Expr expr) { - return std::visit([&](const auto &e) { return isTransformationalRef(e); }, - expr.u); + return Fortran::common::visit( + [&](const auto &e) { return isTransformationalRef(e); }, expr.u); } template @@ -2914,11 +2918,13 @@ class ScalarExprLowering { /// value, so it may be possible to avoid making a temporary. template ExtValue asArrayArg(const Fortran::evaluate::Expr &x) { - return std::visit([&](const auto &e) { return asArrayArg(e, x); }, x.u); + return Fortran::common::visit( + [&](const auto &e) { return asArrayArg(e, x); }, x.u); } template ExtValue asArrayArg(const Fortran::evaluate::Expr &x, const B &y) { - return std::visit([&](const auto &e) { return asArrayArg(e, y); }, x.u); + return Fortran::common::visit( + [&](const auto &e) { return asArrayArg(e, y); }, x.u); } template ExtValue asArrayArg(const Fortran::evaluate::Designator &, const B &x) { @@ -2956,7 +2962,8 @@ class ScalarExprLowering { if (isScalar(x) || Fortran::evaluate::UnwrapWholeSymbolOrComponentDataRef(x) || (isTransformationalRef(x) && !isOptimizableTranspose(x, converter))) - return std::visit([&](const auto &e) { return genref(e); }, x.u); + return Fortran::common::visit([&](const auto &e) { return genref(e); }, + x.u); if (useBoxArg) return asArrayArg(x); return asArray(x); @@ -2967,7 +2974,8 @@ class ScalarExprLowering { return val; if (isScalar(x) || Fortran::evaluate::UnwrapWholeSymbolDataRef(x) || inInitializer) - return std::visit([&](const auto &e) { return genval(e); }, x.u); + return Fortran::common::visit([&](const auto &e) { return genval(e); }, + x.u); return asArray(x); } @@ -2976,7 +2984,8 @@ class ScalarExprLowering { Fortran::common::TypeCategory::Logical, KIND>> &exp) { if (mlir::Value val = getIfOverridenExpr(exp)) return val; - return std::visit([&](const auto &e) { return genval(e); }, exp.u); + return Fortran::common::visit([&](const auto &e) { return genval(e); }, + exp.u); } using RefSet = @@ -3462,7 +3471,7 @@ class ArrayExprLowering { ExtValue lowerBoxedArrayExpr(const Fortran::lower::SomeExpr &exp) { PushSemantics(ConstituentSemantics::BoxValue); - return std::visit( + return Fortran::common::visit( [&](const auto &e) { auto f = genarr(e); ExtValue exv = f(IterationSpace{}); @@ -3824,28 +3833,29 @@ class ArrayExprLowering { fir::factory::getExtents(loc, builder, exv); mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); for (auto ss : llvm::enumerate(x.subscript())) { - std::visit(Fortran::common::visitors{ - [&](const Fortran::evaluate::Triplet &trip) { - // For a subscript of triple notation, we compute the - // range of this dimension of the iteration space. - auto lo = [&]() { - if (auto optLo = trip.lower()) - return fir::getBase(asScalar(*optLo)); - return getLBound(exv, ss.index(), one); - }(); - auto hi = [&]() { - if (auto optHi = trip.upper()) - return fir::getBase(asScalar(*optHi)); - return getUBound(exv, ss.index(), one); - }(); - auto step = builder.createConvert( - loc, idxTy, fir::getBase(asScalar(trip.stride()))); - auto extent = builder.genExtentFromTriplet(loc, lo, hi, - step, idxTy); - destShape.push_back(extent); - }, - [&](auto) {}}, - ss.value().u); + Fortran::common::visit( + Fortran::common::visitors{ + [&](const Fortran::evaluate::Triplet &trip) { + // For a subscript of triple notation, we compute the + // range of this dimension of the iteration space. + auto lo = [&]() { + if (auto optLo = trip.lower()) + return fir::getBase(asScalar(*optLo)); + return getLBound(exv, ss.index(), one); + }(); + auto hi = [&]() { + if (auto optHi = trip.upper()) + return fir::getBase(asScalar(*optHi)); + return getUBound(exv, ss.index(), one); + }(); + auto step = builder.createConvert( + loc, idxTy, fir::getBase(asScalar(trip.stride()))); + auto extent = + builder.genExtentFromTriplet(loc, lo, hi, step, idxTy); + destShape.push_back(extent); + }, + [&](auto) {}}, + ss.value().u); } return true; } @@ -3855,8 +3865,8 @@ class ArrayExprLowering { return genShapeFromDataRef(x.GetComponent()); } bool genShapeFromDataRef(const Fortran::evaluate::DataRef &x) { - return std::visit([&](const auto &v) { return genShapeFromDataRef(v); }, - x.u); + return Fortran::common::visit( + [&](const auto &v) { return genShapeFromDataRef(v); }, x.u); } /// When in an explicit space, the ranked component must be evaluated to @@ -3890,7 +3900,7 @@ class ArrayExprLowering { TODO(getLoc(), "polymorphic array expression lowering with vector subscript"); - return std::visit( + return Fortran::common::visit( [&](const auto &e) { return lowerArrayExpression(genarr(e), resTy); }, exp.u); } @@ -5012,10 +5022,12 @@ class ArrayExprLowering { LLVM_DEBUG(Fortran::lower::DumpEvaluateExpr::dump(llvm::dbgs(), x)); if (isArray(x) || (explicitSpaceIsActive() && isLeftHandSide()) || isElementalProcWithArrayArgs(x)) - return std::visit([&](const auto &e) { return genarr(e); }, x.u); + return Fortran::common::visit([&](const auto &e) { return genarr(e); }, + x.u); if (explicitSpaceIsActive()) { assert(!isArray(x) && !isLeftHandSide()); - auto cc = std::visit([&](const auto &e) { return genarr(e); }, x.u); + auto cc = + Fortran::common::visit([&](const auto &e) { return genarr(e); }, x.u); auto result = cc(IterationSpace{}); return [=](IterSpace) { return result; }; } @@ -5289,7 +5301,8 @@ class ArrayExprLowering { static Fortran::lower::SomeExpr ignoreEvConvert(const Fortran::evaluate::Expr> &x) { - return std::visit([&](const auto &v) { return ignoreEvConvert(v); }, x.u); + return Fortran::common::visit( + [&](const auto &v) { return ignoreEvConvert(v); }, x.u); } template static Fortran::lower::SomeExpr ignoreEvConvert( @@ -5310,8 +5323,8 @@ class ArrayExprLowering { template static const Fortran::semantics::Symbol * extractSubscriptSymbol(const Fortran::evaluate::Expr &x) { - return std::visit([&](const auto &v) { return extractSubscriptSymbol(v); }, - x.u); + return Fortran::common::visit( + [&](const auto &v) { return extractSubscriptSymbol(v); }, x.u); } template static const Fortran::semantics::Symbol * @@ -5420,7 +5433,7 @@ class ArrayExprLowering { std::size_t shapeIndex = 0; for (auto sub : llvm::enumerate(x.subscript())) { const std::size_t subsIndex = sub.index(); - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::Triplet &t) { mlir::Value lowerBound; @@ -6034,8 +6047,8 @@ class ArrayExprLowering { /// Substrings (see 9.4.1) CC genarr(const Fortran::evaluate::Substring &x, ComponentPath &components) { components.substring = &x; - return std::visit([&](const auto &v) { return genarr(v, components); }, - x.parent()); + return Fortran::common::visit( + [&](const auto &v) { return genarr(v, components); }, x.parent()); } template @@ -6333,7 +6346,7 @@ class ArrayExprLowering { stmtCtx.pushScope(); std::optional charLen; for (const Fortran::evaluate::ArrayConstructorValue &acv : x.values()) { - auto [exv, copyNeeded] = std::visit( + auto [exv, copyNeeded] = Fortran::common::visit( [&](const auto &v) { return genArrayCtorInitializer(v, resTy, mem, buffPos, buffSize, stmtCtx); @@ -6417,7 +6430,7 @@ class ArrayExprLowering { // Populate the buffer with the elements, growing as necessary. std::optional charLen; for (const auto &expr : x) { - auto [exv, copyNeeded] = std::visit( + auto [exv, copyNeeded] = Fortran::common::visit( [&](const auto &e) { return genArrayCtorInitializer(e, resTy, mem, buffPos, buffSize, stmtCtx); @@ -6582,22 +6595,24 @@ class ArrayExprLowering { } CC genarr( const Fortran::evaluate::Relational &r) { - return std::visit([&](const auto &x) { return genarr(x); }, r.u); + return Fortran::common::visit([&](const auto &x) { return genarr(x); }, + r.u); } template CC genarr(const Fortran::evaluate::Designator &des) { ComponentPath components(des.Rank() > 0); - return std::visit([&](const auto &x) { return genarr(x, components); }, - des.u); + return Fortran::common::visit( + [&](const auto &x) { return genarr(x, components); }, des.u); } /// Is the path component rank > 0? static bool ranked(const PathComponent &x) { - return std::visit(Fortran::common::visitors{ - [](const ImplicitSubscripts &) { return false; }, - [](const auto *v) { return v->Rank() > 0; }}, - x); + return Fortran::common::visit( + Fortran::common::visitors{ + [](const ImplicitSubscripts &) { return false; }, + [](const auto *v) { return v->Rank() > 0; }}, + x); } void extendComponent(Fortran::lower::ComponentPath &component, @@ -6653,7 +6668,7 @@ class ArrayExprLowering { : nextPathSemantics()); unsigned index = 0; for (const auto &v : llvm::reverse(revPath)) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const ImplicitSubscripts &) { prefix = false; @@ -6678,7 +6693,7 @@ class ArrayExprLowering { unsigned ssIndex = 0u; llvm::SmallVector componentsToAdd; for (const auto &ss : x->subscript()) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate:: IndirectSubscriptIntegerExpr &ie) { @@ -7099,8 +7114,8 @@ class ArrayExprLowering { } CC genarr(const Fortran::evaluate::DataRef &x, ComponentPath &components) { - return std::visit([&](const auto &v) { return genarr(v, components); }, - x.u); + return Fortran::common::visit( + [&](const auto &v) { return genarr(v, components); }, x.u); } bool pathIsEmpty(const ComponentPath &components) { @@ -7575,13 +7590,13 @@ void Fortran::lower::createArrayLoads( }; if (esp.lhsBases[counter]) { auto &base = *esp.lhsBases[counter]; - auto load = std::visit(genLoad, base); + auto load = Fortran::common::visit(genLoad, base); esp.initialArgs.push_back(load); esp.resetInnerArgs(); esp.bindLoad(base, load); } for (const auto &base : esp.rhsBases[counter]) - esp.bindLoad(base, std::visit(genLoad, base)); + esp.bindLoad(base, Fortran::common::visit(genLoad, base)); } void Fortran::lower::createArrayMergeStores( diff --git a/flang/lib/Lower/ConvertExprToHLFIR.cpp b/flang/lib/Lower/ConvertExprToHLFIR.cpp index 9035856eabfe7..1933f38f735b5 100644 --- a/flang/lib/Lower/ConvertExprToHLFIR.cpp +++ b/flang/lib/Lower/ConvertExprToHLFIR.cpp @@ -75,7 +75,7 @@ class HlfirDesignatorBuilder { hlfir::EntityWithAttributes gen(const CharacterDesignators &designatorVariant, bool vectorSubscriptDesignatorToValue = true) { - return std::visit( + return Fortran::common::visit( [&](const auto &x) -> hlfir::EntityWithAttributes { return genLeafPartRef(x, vectorSubscriptDesignatorToValue); }, @@ -88,7 +88,7 @@ class HlfirDesignatorBuilder { hlfir::EntityWithAttributes gen(const RealDesignators &designatorVariant, bool vectorSubscriptDesignatorToValue = true) { - return std::visit( + return Fortran::common::visit( [&](const auto &x) -> hlfir::EntityWithAttributes { return genLeafPartRef(x, vectorSubscriptDesignatorToValue); }, @@ -101,7 +101,7 @@ class HlfirDesignatorBuilder { hlfir::EntityWithAttributes gen(const OtherDesignators &designatorVariant, bool vectorSubscriptDesignatorToValue = true) { - return std::visit( + return Fortran::common::visit( [&](const auto &x) -> hlfir::EntityWithAttributes { return genLeafPartRef(x, vectorSubscriptDesignatorToValue); }, @@ -169,7 +169,7 @@ class HlfirDesignatorBuilder { fir::FortranVariableOpInterface gen(const Fortran::evaluate::DataRef &dataRef) { - return std::visit( + return Fortran::common::visit( Fortran::common::visitors{[&](const auto &x) { return gen(x); }}, dataRef.u); } @@ -364,7 +364,7 @@ class HlfirDesignatorBuilder { fir::FortranVariableOpInterface gen(const Fortran::evaluate::Substring &substring) { PartInfo partInfo; - mlir::Type baseStringType = std::visit( + mlir::Type baseStringType = Fortran::common::visit( [&](const auto &x) { return visit(x, partInfo); }, substring.parent()); assert(partInfo.typeParams.size() == 1 && "expect base string length"); // Compute the substring lower and upper bound. @@ -436,8 +436,8 @@ class HlfirDesignatorBuilder { mlir::Type visit(const Fortran::evaluate::DataRef &dataRef, PartInfo &partInfo) { - return std::visit([&](const auto &x) { return visit(x, partInfo); }, - dataRef.u); + return Fortran::common::visit( + [&](const auto &x) { return visit(x, partInfo); }, dataRef.u); } mlir::Type @@ -892,7 +892,7 @@ hlfir::EntityWithAttributes HlfirDesignatorBuilder::genDesignatorExpr( bool vectorSubscriptDesignatorToValue) { // Expr plumbing to unwrap Designator and call // gen(Designator.u). - return std::visit( + return Fortran::common::visit( [&](const auto &x) -> hlfir::EntityWithAttributes { using T = std::decay_t; if constexpr (Fortran::common::HasMember< @@ -904,7 +904,7 @@ hlfir::EntityWithAttributes HlfirDesignatorBuilder::genDesignatorExpr( .u, vectorSubscriptDesignatorToValue); } else { - return std::visit( + return Fortran::common::visit( [&](const auto &preciseKind) { using TK = typename std::decay_t::Result; @@ -1426,7 +1426,8 @@ class HlfirBuilder { return hlfir::EntityWithAttributes{match->second}; } } - return std::visit([&](const auto &x) { return gen(x); }, expr.u); + return Fortran::common::visit([&](const auto &x) { return gen(x); }, + expr.u); } private: @@ -1594,7 +1595,7 @@ class HlfirBuilder { hlfir::EntityWithAttributes gen(const Fortran::evaluate::Relational &op) { - return std::visit([&](const auto &x) { return gen(x); }, op.u); + return Fortran::common::visit([&](const auto &x) { return gen(x); }, op.u); } hlfir::EntityWithAttributes gen(const Fortran::evaluate::TypeParamInquiry &) { diff --git a/flang/lib/Lower/ConvertType.cpp b/flang/lib/Lower/ConvertType.cpp index e6557d7f0b767..f64f6c93541a3 100644 --- a/flang/lib/Lower/ConvertType.cpp +++ b/flang/lib/Lower/ConvertType.cpp @@ -212,7 +212,7 @@ struct TypeBuilderImpl { } mlir::Type genTypelessExprType(const Fortran::lower::SomeExpr &expr) { - return std::visit( + return Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::BOZLiteralConstant &) -> mlir::Type { return mlir::NoneType::get(context); diff --git a/flang/lib/Lower/DirectivesCommon.h b/flang/lib/Lower/DirectivesCommon.h index 48b090f6d2dbe..f0af5f982c14f 100644 --- a/flang/lib/Lower/DirectivesCommon.h +++ b/flang/lib/Lower/DirectivesCommon.h @@ -836,7 +836,7 @@ struct PeelConvert { static Fortran::semantics::MaybeExpr visit_with_category( const Fortran::evaluate::Expr> &expr) { - return std::visit( + return Fortran::common::visit( [](auto &&s) { return visit_with_category(s); }, expr.u); } @@ -859,12 +859,12 @@ struct PeelConvert { static Fortran::semantics::MaybeExpr visit(const Fortran::evaluate::Expr> &expr) { - return std::visit([](auto &&s) { return visit_with_category(s); }, - expr.u); + return Fortran::common::visit( + [](auto &&s) { return visit_with_category(s); }, expr.u); } static Fortran::semantics::MaybeExpr visit(const Fortran::evaluate::Expr &expr) { - return std::visit([](auto &&s) { return visit(s); }, expr.u); + return Fortran::common::visit([](auto &&s) { return visit(s); }, expr.u); } template // static Fortran::semantics::MaybeExpr visit(const T &) { diff --git a/flang/lib/Lower/IO.cpp b/flang/lib/Lower/IO.cpp index 97ef991cb3990..9e98b230b676f 100644 --- a/flang/lib/Lower/IO.cpp +++ b/flang/lib/Lower/IO.cpp @@ -1388,7 +1388,7 @@ static void threadSpecs(Fortran::lower::AbstractConverter &converter, fir::FirOpBuilder &builder = converter.getFirOpBuilder(); for (const auto &spec : specList) { makeNextConditionalOn(builder, loc, checkResult, ok); - ok = std::visit( + ok = Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::IoControlSpec::Size &x) -> mlir::Value { // Size must be queried after the related READ runtime calls, not @@ -1425,7 +1425,7 @@ ConditionSpecInfo lowerErrorSpec(Fortran::lower::AbstractConverter &converter, ConditionSpecInfo csi; const Fortran::lower::SomeExpr *ioMsgExpr = nullptr; for (const auto &spec : specList) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::StatVariable &var) { csi.ioStatExpr = Fortran::semantics::GetExpr(var); @@ -2397,7 +2397,7 @@ lowerIdExpr(Fortran::lower::AbstractConverter &converter, mlir::Location loc, const std::list &ispecs, Fortran::lower::StatementContext &stmtCtx) { for (const Fortran::parser::InquireSpec &spec : ispecs) - if (mlir::Value v = std::visit( + if (mlir::Value v = Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::IdExpr &idExpr) { return fir::getBase(converter.genExprValue( @@ -2419,11 +2419,11 @@ static void threadInquire(Fortran::lower::AbstractConverter &converter, mlir::Value idExpr = lowerIdExpr(converter, loc, ispecs, stmtCtx); for (const Fortran::parser::InquireSpec &spec : ispecs) { makeNextConditionalOn(builder, loc, checkResult, ok); - ok = std::visit(Fortran::common::visitors{[&](const auto &x) { - return genInquireSpec(converter, loc, cookie, idExpr, x, - stmtCtx); - }}, - spec.u); + ok = Fortran::common::visit(Fortran::common::visitors{[&](const auto &x) { + return genInquireSpec(converter, loc, cookie, + idExpr, x, stmtCtx); + }}, + spec.u); } } diff --git a/flang/lib/Lower/IterationSpace.cpp b/flang/lib/Lower/IterationSpace.cpp index 6bf310b5cfb76..9303536403837 100644 --- a/flang/lib/Lower/IterationSpace.cpp +++ b/flang/lib/Lower/IterationSpace.cpp @@ -21,14 +21,14 @@ unsigned Fortran::lower::getHashValue( const Fortran::lower::ExplicitIterSpace::ArrayBases &x) { - return std::visit( + return Fortran::common::visit( [&](const auto *p) { return HashEvaluateExpr::getHashValue(*p); }, x); } bool Fortran::lower::isEqual( const Fortran::lower::ExplicitIterSpace::ArrayBases &x, const Fortran::lower::ExplicitIterSpace::ArrayBases &y) { - return std::visit( + return Fortran::common::visit( Fortran::common::visitors{ // Fortran::semantics::Symbol * are the exception here. These pointers // have identity; if two Symbol * values are the same (different) then @@ -169,7 +169,7 @@ class ArrayBaseFinder { } template RT find(const std::variant &u) { - return std::visit([&](const auto &v) { return find(v); }, u); + return Fortran::common::visit([&](const auto &v) { return find(v); }, u); } template RT find(const std::vector &x) { @@ -361,22 +361,23 @@ llvm::raw_ostream & Fortran::lower::operator<<(llvm::raw_ostream &s, const Fortran::lower::ExplicitIterSpace &e) { auto dump = [&](const auto &u) { - std::visit(Fortran::common::visitors{ - [&](const Fortran::semantics::Symbol *y) { - s << " " << *y << '\n'; - }, - [&](const Fortran::evaluate::ArrayRef *y) { - s << " "; - if (y->base().IsSymbol()) - s << y->base().GetFirstSymbol(); - else - s << y->base().GetComponent().GetLastSymbol(); - s << '\n'; - }, - [&](const Fortran::evaluate::Component *y) { - s << " " << y->GetLastSymbol() << '\n'; - }}, - u); + Fortran::common::visit( + Fortran::common::visitors{ + [&](const Fortran::semantics::Symbol *y) { + s << " " << *y << '\n'; + }, + [&](const Fortran::evaluate::ArrayRef *y) { + s << " "; + if (y->base().IsSymbol()) + s << y->base().GetFirstSymbol(); + else + s << y->base().GetComponent().GetLastSymbol(); + s << '\n'; + }, + [&](const Fortran::evaluate::Component *y) { + s << " " << y->GetLastSymbol() << '\n'; + }}, + u); }; s << "LHS bases:\n"; for (const std::optional &u : diff --git a/flang/lib/Lower/Mangler.cpp b/flang/lib/Lower/Mangler.cpp index 9a33be318a27d..878ba6dea49b6 100644 --- a/flang/lib/Lower/Mangler.cpp +++ b/flang/lib/Lower/Mangler.cpp @@ -110,7 +110,7 @@ std::string Fortran::lower::mangle::mangleName( return fir::NameUniquer::doVariable(modules, procs, blockId, symbolName); }; - return std::visit( + return Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::semantics::MainProgramDetails &) { return fir::NameUniquer::doProgramEntry().str(); diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 4f5da8fb70eba..166fa686cd883 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -46,14 +46,15 @@ static mlir::Location genOperandLocation(Fortran::lower::AbstractConverter &converter, const Fortran::parser::AccObject &accObject) { mlir::Location loc = converter.genUnknownLocation(); - std::visit(Fortran::common::visitors{ - [&](const Fortran::parser::Designator &designator) { - loc = converter.genLocation(designator.source); - }, - [&](const Fortran::parser::Name &name) { - loc = converter.genLocation(name.source); - }}, - accObject.u); + Fortran::common::visit( + Fortran::common::visitors{ + [&](const Fortran::parser::Designator &designator) { + loc = converter.genLocation(designator.source); + }, + [&](const Fortran::parser::Name &name) { + loc = converter.genLocation(name.source); + }}, + accObject.u); return loc; } @@ -297,8 +298,8 @@ genDataOperandOperations(const Fortran::parser::AccObjectList &objectList, std::stringstream asFortran; mlir::Location operandLocation = genOperandLocation(converter, accObject); Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject); - Fortran::semantics::MaybeExpr designator = - std::visit([&](auto &&s) { return ea.Analyze(s); }, accObject.u); + Fortran::semantics::MaybeExpr designator = Fortran::common::visit( + [&](auto &&s) { return ea.Analyze(s); }, accObject.u); Fortran::lower::AddrAndBoundsInfo info = Fortran::lower::gatherDataOperandAddrAndBounds< mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>( @@ -335,8 +336,8 @@ static void genDeclareDataOperandOperations( std::stringstream asFortran; mlir::Location operandLocation = genOperandLocation(converter, accObject); Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject); - Fortran::semantics::MaybeExpr designator = - std::visit([&](auto &&s) { return ea.Analyze(s); }, accObject.u); + Fortran::semantics::MaybeExpr designator = Fortran::common::visit( + [&](auto &&s) { return ea.Analyze(s); }, accObject.u); Fortran::lower::AddrAndBoundsInfo info = Fortran::lower::gatherDataOperandAddrAndBounds< mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>( @@ -790,8 +791,8 @@ genPrivatizations(const Fortran::parser::AccObjectList &objectList, std::stringstream asFortran; mlir::Location operandLocation = genOperandLocation(converter, accObject); Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject); - Fortran::semantics::MaybeExpr designator = - std::visit([&](auto &&s) { return ea.Analyze(s); }, accObject.u); + Fortran::semantics::MaybeExpr designator = Fortran::common::visit( + [&](auto &&s) { return ea.Analyze(s); }, accObject.u); Fortran::lower::AddrAndBoundsInfo info = Fortran::lower::gatherDataOperandAddrAndBounds< mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>( @@ -1364,8 +1365,8 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList, std::stringstream asFortran; mlir::Location operandLocation = genOperandLocation(converter, accObject); Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject); - Fortran::semantics::MaybeExpr designator = - std::visit([&](auto &&s) { return ea.Analyze(s); }, accObject.u); + Fortran::semantics::MaybeExpr designator = Fortran::common::visit( + [&](auto &&s) { return ea.Analyze(s); }, accObject.u); Fortran::lower::AddrAndBoundsInfo info = Fortran::lower::gatherDataOperandAddrAndBounds< mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>( @@ -3414,7 +3415,7 @@ static void genGlobalCtors(Fortran::lower::AbstractConverter &converter, fir::FirOpBuilder &builder = converter.getFirOpBuilder(); for (const auto &accObject : accObjectList.v) { mlir::Location operandLocation = genOperandLocation(converter, accObject); - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::Designator &designator) { if (const auto *name = @@ -3993,7 +3994,7 @@ genACC(Fortran::lower::AbstractConverter &converter, const Fortran::parser::OpenACCAtomicConstruct &atomicConstruct) { mlir::Location loc = converter.genLocation(atomicConstruct.source); - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::AccAtomicRead &atomicRead) { Fortran::lower::genOmpAccAtomicRead static SymbolWithDesignator visit(const evaluate::Expr &e) { - return std::visit([](auto &&s) { return visit(s); }, e.u); + return Fortran::common::visit([](auto &&s) { return visit(s); }, e.u); } static void verify(const SymbolWithDesignator &sd) { @@ -112,7 +112,7 @@ struct SymbolAndDesignatorExtractor { SymbolWithDesignator getSymbolAndDesignator(const MaybeExpr &expr) { if (!expr) return SymbolWithDesignator{}; - return std::visit( + return Fortran::common::visit( [](auto &&s) { return SymbolAndDesignatorExtractor::visit(s); }, expr->u); } @@ -278,7 +278,7 @@ DefinedOperator makeDefinedOperator(const parser::DefinedOperator &inp, // clang-format on ); - return std::visit( + return Fortran::common::visit( common::visitors{ [&](const parser::DefinedOpName &s) { return DefinedOperator{ @@ -294,7 +294,7 @@ DefinedOperator makeDefinedOperator(const parser::DefinedOperator &inp, ProcedureDesignator makeProcedureDesignator(const parser::ProcedureDesignator &inp, semantics::SemanticsContext &semaCtx) { - return ProcedureDesignator{std::visit( + return ProcedureDesignator{Fortran::common::visit( common::visitors{ [&](const parser::Name &t) { return makeObject(t, semaCtx); }, [&](const parser::ProcComponentRef &t) { @@ -306,7 +306,7 @@ makeProcedureDesignator(const parser::ProcedureDesignator &inp, ReductionOperator makeReductionOperator(const parser::OmpReductionOperator &inp, semantics::SemanticsContext &semaCtx) { - return std::visit( + return Fortran::common::visit( common::visitors{ [&](const parser::DefinedOperator &s) { return ReductionOperator{makeDefinedOperator(s, semaCtx)}; @@ -366,7 +366,7 @@ Allocate make(const parser::OmpClause::Allocate &inp, using Tuple = decltype(Allocate::t); - return Allocate{std::visit( + return Allocate{Fortran::common::visit( common::visitors{ // simple-modifier [&](const wrapped::AllocateModifier::Allocator &v) -> Tuple { @@ -531,7 +531,7 @@ Depend make(const parser::OmpClause::Depend &inp, // clang-format on ); - return Depend{std::visit( // + return Depend{Fortran::common::visit( // common::visitors{ // Doacross [&](const wrapped::Source &s) -> Variant { @@ -793,7 +793,7 @@ Linear make(const parser::OmpClause::Linear &inp, using Tuple = decltype(Linear::t); - return Linear{std::visit( + return Linear{Fortran::common::visit( common::visitors{ [&](const wrapped::WithModifier &s) -> Tuple { return { @@ -949,7 +949,7 @@ Order make(const parser::OmpClause::Order &inp, auto &t1 = std::get(inp.v.t); auto convert3 = [&](const parser::OmpOrderModifier &s) { - return std::visit( + return Fortran::common::visit( [&](parser::OmpOrderModifier::Kind k) { return convert1(k); }, s.u); }; return Order{ @@ -1212,7 +1212,7 @@ UsesAllocators make(const parser::OmpClause::UsesAllocators &inp, Clause makeClause(const parser::OmpClause &cls, semantics::SemanticsContext &semaCtx) { - return std::visit( + return Fortran::common::visit( [&](auto &&s) { return makeClause(getClauseId(cls), clause::make(s, semaCtx), cls.source); diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index aac22f0faad37..7d75e6f67dc1b 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2199,7 +2199,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclarativeConstruct &ompDeclConstruct) { - std::visit( + Fortran::common::visit( [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, ompDeclConstruct.u); } @@ -2276,7 +2276,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPStandaloneConstruct &standaloneConstruct) { - std::visit( + Fortran::common::visit( [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, standaloneConstruct.u); } @@ -2296,7 +2296,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPAtomicConstruct &atomicConstruct) { - std::visit( + Fortran::common::visit( common::visitors{ [&](const parser::OmpAtomicRead &atomicRead) { mlir::Location loc = converter.genLocation(atomicRead.source); @@ -2487,7 +2487,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPConstruct &ompConstruct) { - std::visit( + Fortran::common::visit( [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, ompConstruct.u); } @@ -2649,21 +2649,22 @@ void Fortran::lower::gatherOpenMPDeferredDeclareTargets( const parser::OpenMPDeclarativeConstruct &ompDecl, llvm::SmallVectorImpl &deferredDeclareTarget) { - std::visit(common::visitors{ - [&](const parser::OpenMPDeclareTargetConstruct &ompReq) { - collectDeferredDeclareTargets(converter, semaCtx, eval, - ompReq, deferredDeclareTarget); - }, - [&](const auto &) {}, - }, - ompDecl.u); + Fortran::common::visit( + common::visitors{ + [&](const parser::OpenMPDeclareTargetConstruct &ompReq) { + collectDeferredDeclareTargets(converter, semaCtx, eval, ompReq, + deferredDeclareTarget); + }, + [&](const auto &) {}, + }, + ompDecl.u); } bool Fortran::lower::isOpenMPDeviceDeclareTarget( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclarativeConstruct &ompDecl) { - return std::visit( + return Fortran::common::visit( common::visitors{ [&](const parser::OpenMPDeclareTargetConstruct &ompReq) { mlir::omp::DeclareTargetDeviceType targetType = diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 36d96f37ff36a..8aeef175ad2d2 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -325,7 +325,7 @@ void insertChildMapInfoIntoParent( semantics::Symbol *getOmpObjectSymbol(const parser::OmpObject &ompObject) { semantics::Symbol *sym = nullptr; - std::visit( + Fortran::common::visit( common::visitors{ [&](const parser::Designator &designator) { if (auto *arrayEle = diff --git a/flang/lib/Lower/PFTBuilder.cpp b/flang/lib/Lower/PFTBuilder.cpp index fc34895d1ec75..5b3d5471925bf 100644 --- a/flang/lib/Lower/PFTBuilder.cpp +++ b/flang/lib/Lower/PFTBuilder.cpp @@ -103,7 +103,7 @@ class PFTBuilder { stmt.unwrapped, pftParentStack.back(), stmt.position, stmt.label}); return false; } else if constexpr (std::is_same_v) { - return std::visit( + return Fortran::common::visit( common::visitors{ [&](const common::Indirection &x) { addEvaluation(lower::pft::Evaluation{ @@ -239,7 +239,7 @@ class PFTBuilder { // Get rid of production wrapper bool Pre(const parser::Statement &statement) { - addEvaluation(std::visit( + addEvaluation(Fortran::common::visit( [&](const auto &x) { return lower::pft::Evaluation{x, pftParentStack.back(), statement.source, statement.label}; @@ -248,7 +248,7 @@ class PFTBuilder { return false; } bool Pre(const parser::WhereBodyConstruct &whereBody) { - return std::visit( + return Fortran::common::visit( common::visitors{ [&](const parser::Statement &stmt) { // Not caught as other AssignmentStmt because it is not @@ -469,7 +469,7 @@ class PFTBuilder { makeEvaluationAction(const parser::ActionStmt &statement, parser::CharBlock position, std::optional label) { - return std::visit( + return Fortran::common::visit( common::visitors{ [&](const auto &x) { return lower::pft::Evaluation{ @@ -664,7 +664,7 @@ class PFTBuilder { }; auto analyzeSpecs{[&](const auto &specList) { for (const auto &spec : specList) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::parser::Format &format) { analyzeFormatSpec(format); @@ -1172,26 +1172,27 @@ class PFTDumper { void dumpPFT(llvm::raw_ostream &outputStream, const lower::pft::Program &pft) { for (auto &unit : pft.getUnits()) { - std::visit(common::visitors{ - [&](const lower::pft::BlockDataUnit &unit) { - outputStream << getNodeIndex(unit) << " "; - outputStream << "BlockData: "; - outputStream << "\nEnd BlockData\n\n"; - }, - [&](const lower::pft::FunctionLikeUnit &func) { - dumpFunctionLikeUnit(outputStream, func); - }, - [&](const lower::pft::ModuleLikeUnit &unit) { - dumpModuleLikeUnit(outputStream, unit); - }, - [&](const lower::pft::CompilerDirectiveUnit &unit) { - dumpCompilerDirectiveUnit(outputStream, unit); - }, - [&](const lower::pft::OpenACCDirectiveUnit &unit) { - dumpOpenACCDirectiveUnit(outputStream, unit); - }, - }, - unit); + Fortran::common::visit( + common::visitors{ + [&](const lower::pft::BlockDataUnit &unit) { + outputStream << getNodeIndex(unit) << " "; + outputStream << "BlockData: "; + outputStream << "\nEnd BlockData\n\n"; + }, + [&](const lower::pft::FunctionLikeUnit &func) { + dumpFunctionLikeUnit(outputStream, func); + }, + [&](const lower::pft::ModuleLikeUnit &unit) { + dumpModuleLikeUnit(outputStream, unit); + }, + [&](const lower::pft::CompilerDirectiveUnit &unit) { + dumpCompilerDirectiveUnit(outputStream, unit); + }, + [&](const lower::pft::OpenACCDirectiveUnit &unit) { + dumpOpenACCDirectiveUnit(outputStream, unit); + }, + }, + unit); } } diff --git a/flang/lib/Lower/VectorSubscripts.cpp b/flang/lib/Lower/VectorSubscripts.cpp index d7a311d32d59d..389a89ddcf102 100644 --- a/flang/lib/Lower/VectorSubscripts.cpp +++ b/flang/lib/Lower/VectorSubscripts.cpp @@ -55,10 +55,11 @@ class VectorSubscriptBoxBuilder { using Designator = Fortran::evaluate::Designator; if constexpr (Fortran::common::HasMember) { const auto &designator = std::get(expr.u); - return std::visit([&](const auto &x) { return gen(x); }, designator.u); + return Fortran::common::visit([&](const auto &x) { return gen(x); }, + designator.u); } else { - return std::visit([&](const auto &x) { return genDesignator(x); }, - expr.u); + return Fortran::common::visit( + [&](const auto &x) { return genDesignator(x); }, expr.u); } } @@ -66,8 +67,8 @@ class VectorSubscriptBoxBuilder { // type of X elements. mlir::Type gen(const Fortran::evaluate::DataRef &dataRef) { - return std::visit([&](const auto &ref) -> mlir::Type { return gen(ref); }, - dataRef.u); + return Fortran::common::visit( + [&](const auto &ref) -> mlir::Type { return gen(ref); }, dataRef.u); } mlir::Type gen(const Fortran::evaluate::SymbolRef &symRef) { @@ -128,7 +129,7 @@ class VectorSubscriptBoxBuilder { mlir::Type gen(const Fortran::evaluate::ArrayRef &arrayRef) { auto isTripletOrVector = [](const Fortran::evaluate::Subscript &subscript) -> bool { - return std::visit( + return Fortran::common::visit( Fortran::common::visitors{ [](const Fortran::evaluate::IndirectSubscriptIntegerExpr &expr) { return expr.value().Rank() != 0; @@ -165,7 +166,7 @@ class VectorSubscriptBoxBuilder { mlir::Type idxTy = builder.getIndexType(); mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); for (const auto &subscript : llvm::enumerate(arrayRef.subscript())) { - std::visit( + Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::IndirectSubscriptIntegerExpr &expr) { if (expr.value().Rank() == 0) { @@ -327,24 +328,24 @@ Fortran::lower::VectorSubscriptBox::createSlice(fir::FirOpBuilder &builder, mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); auto undef = builder.create(loc, idxTy); for (const LoweredSubscript &subscript : loweredSubscripts) - std::visit(Fortran::common::visitors{ - [&](const LoweredTriplet &triplet) { - triples.emplace_back(triplet.lb); - triples.emplace_back(triplet.ub); - triples.emplace_back(triplet.stride); - }, - [&](const LoweredVectorSubscript &vector) { - triples.emplace_back(one); - triples.emplace_back(vector.size); - triples.emplace_back(one); - }, - [&](const mlir::Value &i) { - triples.emplace_back(i); - triples.emplace_back(undef); - triples.emplace_back(undef); - }, - }, - subscript); + Fortran::common::visit(Fortran::common::visitors{ + [&](const LoweredTriplet &triplet) { + triples.emplace_back(triplet.lb); + triples.emplace_back(triplet.ub); + triples.emplace_back(triplet.stride); + }, + [&](const LoweredVectorSubscript &vector) { + triples.emplace_back(one); + triples.emplace_back(vector.size); + triples.emplace_back(one); + }, + [&](const mlir::Value &i) { + triples.emplace_back(i); + triples.emplace_back(undef); + triples.emplace_back(undef); + }, + }, + subscript); return builder.create(loc, triples, componentPath); } @@ -390,28 +391,28 @@ fir::ExtendedValue Fortran::lower::VectorSubscriptBox::getElementAt( llvm::SmallVector indexes; size_t inductionIdx = inductionVariables.size() - 1; for (const LoweredSubscript &subscript : loweredSubscripts) - std::visit(Fortran::common::visitors{ - [&](const LoweredTriplet &triplet) { - indexes.emplace_back(inductionVariables[inductionIdx--]); - }, - [&](const LoweredVectorSubscript &vector) { - mlir::Value vecIndex = inductionVariables[inductionIdx--]; - mlir::Value vecBase = fir::getBase(vector.vector); - mlir::Type vecEleTy = fir::unwrapSequenceType( - fir::unwrapPassByRefType(vecBase.getType())); - mlir::Type refTy = builder.getRefType(vecEleTy); - auto vecEltRef = builder.create( - loc, refTy, vecBase, vecIndex); - auto vecElt = - builder.create(loc, vecEleTy, vecEltRef); - indexes.emplace_back( - builder.createConvert(loc, idxTy, vecElt)); - }, - [&](const mlir::Value &i) { - indexes.emplace_back(builder.createConvert(loc, idxTy, i)); - }, - }, - subscript); + Fortran::common::visit( + Fortran::common::visitors{ + [&](const LoweredTriplet &triplet) { + indexes.emplace_back(inductionVariables[inductionIdx--]); + }, + [&](const LoweredVectorSubscript &vector) { + mlir::Value vecIndex = inductionVariables[inductionIdx--]; + mlir::Value vecBase = fir::getBase(vector.vector); + mlir::Type vecEleTy = fir::unwrapSequenceType( + fir::unwrapPassByRefType(vecBase.getType())); + mlir::Type refTy = builder.getRefType(vecEleTy); + auto vecEltRef = builder.create( + loc, refTy, vecBase, vecIndex); + auto vecElt = + builder.create(loc, vecEleTy, vecEltRef); + indexes.emplace_back(builder.createConvert(loc, idxTy, vecElt)); + }, + [&](const mlir::Value &i) { + indexes.emplace_back(builder.createConvert(loc, idxTy, i)); + }, + }, + subscript); mlir::Type refTy = builder.getRefType(getElementType()); auto elementAddr = builder.create( loc, refTy, fir::getBase(loweredBase), shape, slice, indexes, diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index ab106f62aecfb..c929d05038462 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -1788,7 +1788,7 @@ IntrinsicLibrary::genIntrinsicCall(llvm::StringRef specificName, llvm::StringRef name = genericName(specificName); if (const IntrinsicHandler *handler = findIntrinsicHandler(name)) { bool outline = handler->outline || outlineAllIntrinsics; - return {std::visit( + return {Fortran::common::visit( [&](auto &generator) -> fir::ExtendedValue { return invokeHandler(generator, *handler, resultType, args, outline, *this); @@ -1802,7 +1802,7 @@ IntrinsicLibrary::genIntrinsicCall(llvm::StringRef specificName, if (fir::getTargetTriple(mod).isPPC()) { if (const IntrinsicHandler *ppcHandler = findPPCIntrinsicHandler(name)) { bool outline = ppcHandler->outline || outlineAllIntrinsics; - return {std::visit( + return {Fortran::common::visit( [&](auto &generator) -> fir::ExtendedValue { return invokeHandler(generator, *ppcHandler, resultType, args, outline, *this); @@ -2136,7 +2136,7 @@ mlir::SymbolRefAttr IntrinsicLibrary::getUnrestrictedIntrinsicSymbolRefAttr( bool loadRefArguments = true; mlir::func::FuncOp funcOp; if (const IntrinsicHandler *handler = findIntrinsicHandler(name)) - funcOp = std::visit( + funcOp = Fortran::common::visit( [&](auto generator) { return getWrapper(generator, name, signature, loadRefArguments); }, diff --git a/flang/lib/Semantics/check-acc-structure.cpp b/flang/lib/Semantics/check-acc-structure.cpp index 69b9fe17e6a88..25140a0473749 100644 --- a/flang/lib/Semantics/check-acc-structure.cpp +++ b/flang/lib/Semantics/check-acc-structure.cpp @@ -403,9 +403,9 @@ void AccStructureChecker::CheckMultipleOccurrenceInDeclare( if (GetContext().directive != llvm::acc::Directive::ACCD_declare) return; for (const auto &object : list.v) { - std::visit( - Fortran::common::visitors{ - [&](const Fortran::parser::Designator &designator) { + common::visit( + common::visitors{ + [&](const parser::Designator &designator) { if (const auto *name = getDesignatorNameIfDataRef(designator)) { if (declareSymbols.contains(&name->symbol->GetUltimate())) { if (declareSymbols[&name->symbol->GetUltimate()] == clause) { @@ -435,7 +435,7 @@ void AccStructureChecker::CheckMultipleOccurrenceInDeclare( declareSymbols.insert({&name->symbol->GetUltimate(), clause}); } }, - [&](const Fortran::parser::Name &name) { + [&](const parser::Name &name) { // TODO: check common block }}, object.u); @@ -674,9 +674,9 @@ void AccStructureChecker::Enter(const parser::AccClause::Reduction &reduction) { const auto &objects{std::get(list.t)}; for (const auto &object : objects.v) { - std::visit( - Fortran::common::visitors{ - [&](const Fortran::parser::Designator &designator) { + common::visit( + common::visitors{ + [&](const parser::Designator &designator) { if (const auto *name = getDesignatorNameIfDataRef(designator)) { const auto *type{name->symbol->GetType()}; if (type->IsNumeric(TypeCategory::Integer) && diff --git a/flang/lib/Semantics/check-coarray.cpp b/flang/lib/Semantics/check-coarray.cpp index 106af7960fa94..6cf61a6b923db 100644 --- a/flang/lib/Semantics/check-coarray.cpp +++ b/flang/lib/Semantics/check-coarray.cpp @@ -93,7 +93,7 @@ static void CheckCoindexedStatOrErrmsg(SemanticsContext &context, } } }}; - std::visit(CoindexedCheck, statOrErrmsg.u); + Fortran::common::visit(CoindexedCheck, statOrErrmsg.u); } static void CheckSyncStatList( From 7ddff3a586baaf6f4403183ba51121951ce0602e Mon Sep 17 00:00:00 2001 From: Abid Qadeer Date: Mon, 17 Jun 2024 21:00:09 +0100 Subject: [PATCH 06/25] [flang] Fix comments and formatting. (NFC) (#95786) As mentioned in [here](https://github.com/llvm/llvm-project/pull/95462#discussion_r1640126721), the formatting of the comments have been fixed. Also added comments before literal arguments. --- .../Transforms/DebugTypeGenerator.cpp | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp index 70ff8b386ac3d..407ecc8e327b4 100644 --- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp +++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp @@ -75,8 +75,8 @@ static mlir::LLVM::DITypeAttr genBasicType(mlir::MLIRContext *context, } static mlir::LLVM::DITypeAttr genPlaceholderType(mlir::MLIRContext *context) { - return genBasicType(context, mlir::StringAttr::get(context, "integer"), 32, - llvm::dwarf::DW_ATE_signed); + return genBasicType(context, mlir::StringAttr::get(context, "integer"), + /*bitSize=*/32, llvm::dwarf::DW_ATE_signed); } mlir::LLVM::DITypeAttr DebugTypeGenerator::convertBoxedSequenceType( @@ -145,11 +145,10 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertBoxedSequenceType( elements.push_back(subrangeTy); } return mlir::LLVM::DICompositeTypeAttr::get( - context, llvm::dwarf::DW_TAG_array_type, /*recursive id*/ {}, - /* name */ nullptr, /* file */ nullptr, /* line */ 0, - /* scope */ nullptr, elemTy, mlir::LLVM::DIFlags::Zero, - /* sizeInBits */ 0, /*alignInBits*/ 0, elements, dataLocation, - /* rank */ nullptr, allocated, associated); + context, llvm::dwarf::DW_TAG_array_type, /*recursive_id=*/{}, + /*name=*/nullptr, /*file=*/nullptr, /*line=*/0, /*scope=*/nullptr, elemTy, + mlir::LLVM::DIFlags::Zero, /*sizeInBits=*/0, /*alignInBits=*/0, elements, + dataLocation, /*rank=*/nullptr, allocated, associated); } mlir::LLVM::DITypeAttr DebugTypeGenerator::convertSequenceType( @@ -184,12 +183,11 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertSequenceType( // have been set to some valid default values. return mlir::LLVM::DICompositeTypeAttr::get( - context, llvm::dwarf::DW_TAG_array_type, /*recursive id*/ {}, - /* name */ nullptr, /* file */ nullptr, /* line */ 0, /* scope */ nullptr, - elemTy, mlir::LLVM::DIFlags::Zero, /* sizeInBits */ 0, - /*alignInBits*/ 0, elements, /* dataLocation */ nullptr, - /* rank */ nullptr, /* allocated */ nullptr, - /* associated */ nullptr); + context, llvm::dwarf::DW_TAG_array_type, /*recursive_id=*/{}, + /*name=*/nullptr, /*file=*/nullptr, /*line=*/0, /*scope=*/nullptr, elemTy, + mlir::LLVM::DIFlags::Zero, /*sizeInBits=*/0, /*alignInBits=*/0, elements, + /*dataLocation=*/nullptr, /*rank=*/nullptr, /*allocated=*/nullptr, + /*associated=*/nullptr); } mlir::LLVM::DITypeAttr DebugTypeGenerator::convertCharacterType( From 7620fe0d2d1e0257611c0ab0d96f3bf1bf7a1079 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 17 Jun 2024 16:20:58 -0400 Subject: [PATCH 07/25] [CI][format] Explicitly pass extensions to git-clang-format (#95794) This ensures that the CI script controls which file extensions are considered instead of letting git-clang-format apply its own filtering rules. In particular, this properly handles libc++ extension-less headers which were passed to git-clang-format, but then dropped by that tool as having an unrecognized extension. --- llvm/utils/git/code-format-helper.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py index f1207026704e8..d60d4131bc94b 100755 --- a/llvm/utils/git/code-format-helper.py +++ b/llvm/utils/git/code-format-helper.py @@ -216,6 +216,17 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str cf_cmd.append(args.start_rev) cf_cmd.append(args.end_rev) + # Gather the extension of all modified files and pass them explicitly to git-clang-format. + # This prevents git-clang-format from applying its own filtering rules on top of ours. + extensions = set() + for file in cpp_files: + _, ext = os.path.splitext(file) + extensions.add( + ext.strip(".") + ) # Exclude periods since git-clang-format takes extensions without them + cf_cmd.append("--extensions") + cf_cmd.append("'{}'".format(",".join(extensions))) + cf_cmd.append("--") cf_cmd += cpp_files From 5b04b6fe3fabba8f76d730da3c0d528e1dd0c184 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Mon, 17 Jun 2024 13:21:34 -0700 Subject: [PATCH 08/25] [HWASan] [compiler-rt] support non-4k pages on Android (#95069) --- compiler-rt/lib/asan/asan_linux.cpp | 3 ++- compiler-rt/lib/asan/asan_mac.cpp | 3 ++- compiler-rt/lib/asan/asan_premap_shadow.cpp | 3 ++- .../lib/hwasan/hwasan_dynamic_shadow.cpp | 17 +++++++++++------ compiler-rt/lib/memprof/memprof_linux.cpp | 3 ++- .../lib/sanitizer_common/sanitizer_common.h | 17 ++++++++--------- .../lib/sanitizer_common/sanitizer_linux.cpp | 4 ++-- .../sanitizer_linux_libcdep.cpp | 5 ++--- .../lib/sanitizer_common/sanitizer_mac.cpp | 4 ++-- .../lib/sanitizer_common/sanitizer_win.cpp | 5 ++--- 10 files changed, 35 insertions(+), 29 deletions(-) diff --git a/compiler-rt/lib/asan/asan_linux.cpp b/compiler-rt/lib/asan/asan_linux.cpp index a517de5af00dc..0b470db86748f 100644 --- a/compiler-rt/lib/asan/asan_linux.cpp +++ b/compiler-rt/lib/asan/asan_linux.cpp @@ -93,7 +93,8 @@ uptr FindDynamicShadowStart() { # endif return MapDynamicShadow(shadow_size_bytes, ASAN_SHADOW_SCALE, - /*min_shadow_base_alignment*/ 0, kHighMemEnd); + /*min_shadow_base_alignment*/ 0, kHighMemEnd, + GetMmapGranularity()); } void AsanApplyToGlobals(globals_op_fptr op, const void *needle) { diff --git a/compiler-rt/lib/asan/asan_mac.cpp b/compiler-rt/lib/asan/asan_mac.cpp index b250f796e165f..bfc349223258b 100644 --- a/compiler-rt/lib/asan/asan_mac.cpp +++ b/compiler-rt/lib/asan/asan_mac.cpp @@ -51,7 +51,8 @@ bool IsSystemHeapAddress (uptr addr) { return false; } uptr FindDynamicShadowStart() { return MapDynamicShadow(MemToShadowSize(kHighMemEnd), ASAN_SHADOW_SCALE, - /*min_shadow_base_alignment*/ 0, kHighMemEnd); + /*min_shadow_base_alignment*/ 0, kHighMemEnd, + GetMmapGranularity()); } // No-op. Mac does not support static linkage anyway. diff --git a/compiler-rt/lib/asan/asan_premap_shadow.cpp b/compiler-rt/lib/asan/asan_premap_shadow.cpp index bed2f62a22511..6e08b8f966507 100644 --- a/compiler-rt/lib/asan/asan_premap_shadow.cpp +++ b/compiler-rt/lib/asan/asan_premap_shadow.cpp @@ -33,7 +33,8 @@ uptr PremapShadowSize() { // PremapShadowSize() bytes on the right of it are mapped r/o. uptr PremapShadow() { return MapDynamicShadow(PremapShadowSize(), /*mmap_alignment_scale*/ 3, - /*min_shadow_base_alignment*/ 0, kHighMemEnd); + /*min_shadow_base_alignment*/ 0, kHighMemEnd, + GetMmapGranularity()); } bool PremapShadowFailed() { diff --git a/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp b/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp index 7642ba6c0bf08..48bc3b631ac07 100644 --- a/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp +++ b/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp @@ -36,15 +36,20 @@ decltype(__hwasan_shadow)* __hwasan_premap_shadow(); namespace __hwasan { +// We cannot call anything in libc here (see comment above), so we need to +// assume the biggest allowed page size. +// Android max page size is defined as 16k here: +// https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/page.h#41 +static constexpr uptr kMaxGranularity = 16384; + // Conservative upper limit. static uptr PremapShadowSize() { - return RoundUpTo(GetMaxVirtualAddress() >> kShadowScale, - GetMmapGranularity()); + return RoundUpTo(GetMaxVirtualAddress() >> kShadowScale, kMaxGranularity); } static uptr PremapShadow() { return MapDynamicShadow(PremapShadowSize(), kShadowScale, - kShadowBaseAlignment, kHighMemEnd); + kShadowBaseAlignment, kHighMemEnd, kMaxGranularity); } static bool IsPremapShadowAvailable() { @@ -56,7 +61,7 @@ static bool IsPremapShadowAvailable() { } static uptr FindPremappedShadowStart(uptr shadow_size_bytes) { - const uptr granularity = GetMmapGranularity(); + const uptr granularity = kMaxGranularity; const uptr shadow_start = reinterpret_cast(&__hwasan_shadow); const uptr premap_shadow_size = PremapShadowSize(); const uptr shadow_size = RoundUpTo(shadow_size_bytes, granularity); @@ -109,7 +114,7 @@ uptr FindDynamicShadowStart(uptr shadow_size_bytes) { if (IsPremapShadowAvailable()) return FindPremappedShadowStart(shadow_size_bytes); return MapDynamicShadow(shadow_size_bytes, kShadowScale, kShadowBaseAlignment, - kHighMemEnd); + kHighMemEnd, kMaxGranularity); } } // namespace __hwasan @@ -135,7 +140,7 @@ uptr FindDynamicShadowStart(uptr shadow_size_bytes) { RingBufferSize()); # endif return MapDynamicShadow(shadow_size_bytes, kShadowScale, kShadowBaseAlignment, - kHighMemEnd); + kHighMemEnd, GetMmapGranularity()); } } // namespace __hwasan diff --git a/compiler-rt/lib/memprof/memprof_linux.cpp b/compiler-rt/lib/memprof/memprof_linux.cpp index 26a2b456b874e..fbe5d250f840b 100644 --- a/compiler-rt/lib/memprof/memprof_linux.cpp +++ b/compiler-rt/lib/memprof/memprof_linux.cpp @@ -58,7 +58,8 @@ void InitializePlatformExceptionHandlers() {} uptr FindDynamicShadowStart() { uptr shadow_size_bytes = MemToShadowSize(kHighMemEnd); return MapDynamicShadow(shadow_size_bytes, SHADOW_SCALE, - /*min_shadow_base_alignment*/ 0, kHighMemEnd); + /*min_shadow_base_alignment*/ 0, kHighMemEnd, + GetMmapGranularity()); } void *MemprofDlSymNext(const char *sym) { return dlsym(RTLD_NEXT, sym); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h index c451fc962c529..2d1059140c303 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -60,14 +60,10 @@ inline int Verbosity() { return atomic_load(¤t_verbosity, memory_order_relaxed); } -#if SANITIZER_ANDROID -inline uptr GetPageSize() { -// Android post-M sysconf(_SC_PAGESIZE) crashes if called from .preinit_array. - return 4096; -} -inline uptr GetPageSizeCached() { - return 4096; -} +#if SANITIZER_ANDROID && !defined(__aarch64__) +// 32-bit Android only has 4k pages. +inline uptr GetPageSize() { return 4096; } +inline uptr GetPageSizeCached() { return 4096; } #else uptr GetPageSize(); extern uptr PageSizeCached; @@ -77,6 +73,7 @@ inline uptr GetPageSizeCached() { return PageSizeCached; } #endif + uptr GetMmapGranularity(); uptr GetMaxVirtualAddress(); uptr GetMaxUserVirtualAddress(); @@ -91,6 +88,7 @@ void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size, // Memory management void *MmapOrDie(uptr size, const char *mem_type, bool raw_report = false); + inline void *MmapOrDieQuietly(uptr size, const char *mem_type) { return MmapOrDie(size, mem_type, /*raw_report*/ true); } @@ -139,7 +137,8 @@ void UnmapFromTo(uptr from, uptr to); // shadow_size_bytes bytes on the right, which on linux is mapped no access. // The high_mem_end may be updated if the original shadow size doesn't fit. uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale, - uptr min_shadow_base_alignment, uptr &high_mem_end); + uptr min_shadow_base_alignment, uptr &high_mem_end, + uptr granularity); // Let S = max(shadow_size, num_aliases * alias_size, ring_buffer_size). // Reserves 2*S bytes of address space to the right of the returned address and diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 5d2dd3a7a658f..d15caa76efb06 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -1136,7 +1136,7 @@ uptr GetMaxUserVirtualAddress() { return addr; } -# if !SANITIZER_ANDROID +# if !SANITIZER_ANDROID || defined(__aarch64__) uptr GetPageSize() { # if SANITIZER_LINUX && (defined(__x86_64__) || defined(__i386__)) && \ defined(EXEC_PAGESIZE) @@ -1155,7 +1155,7 @@ uptr GetPageSize() { return sysconf(_SC_PAGESIZE); // EXEC_PAGESIZE may not be trustworthy. # endif } -# endif // !SANITIZER_ANDROID +# endif uptr ReadBinaryName(/*out*/ char *buf, uptr buf_len) { # if SANITIZER_SOLARIS diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp index 6d05411222d9e..175362183fd78 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -995,9 +995,8 @@ void UnmapFromTo(uptr from, uptr to) { } uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale, - uptr min_shadow_base_alignment, - UNUSED uptr &high_mem_end) { - const uptr granularity = GetMmapGranularity(); + uptr min_shadow_base_alignment, UNUSED uptr &high_mem_end, + uptr granularity) { const uptr alignment = Max(granularity << shadow_scale, 1ULL << min_shadow_base_alignment); const uptr left_padding = diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp index 24e3d1112520e..cbdf3e95925bf 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp @@ -1188,8 +1188,8 @@ uptr GetMaxVirtualAddress() { } uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale, - uptr min_shadow_base_alignment, uptr &high_mem_end) { - const uptr granularity = GetMmapGranularity(); + uptr min_shadow_base_alignment, uptr &high_mem_end, + uptr granularity) { const uptr alignment = Max(granularity << shadow_scale, 1ULL << min_shadow_base_alignment); const uptr left_padding = diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp index 4e5ad8e4693b4..0b198890fc798 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp @@ -384,9 +384,8 @@ bool DontDumpShadowMemory(uptr addr, uptr length) { } uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale, - uptr min_shadow_base_alignment, - UNUSED uptr &high_mem_end) { - const uptr granularity = GetMmapGranularity(); + uptr min_shadow_base_alignment, UNUSED uptr &high_mem_end, + uptr granularity) { const uptr alignment = Max(granularity << shadow_scale, 1ULL << min_shadow_base_alignment); const uptr left_padding = From d6cc35f7f67575f2d3534ea385c2f36f48f49aea Mon Sep 17 00:00:00 2001 From: Peiming Liu Date: Mon, 17 Jun 2024 13:29:53 -0700 Subject: [PATCH 09/25] Reapply "[mlir][sparse] implement lowering rules for IterateOp." (#95836) --- .../Transforms/SparseIterationToScf.cpp | 121 +++++++++++++++++- .../Transforms/Utils/SparseTensorIterator.cpp | 40 ++++++ .../Transforms/Utils/SparseTensorIterator.h | 26 +++- .../SparseTensor/sparse_iteration_to_scf.mlir | 54 ++++++-- 4 files changed, 224 insertions(+), 17 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseIterationToScf.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseIterationToScf.cpp index 62887c75c872b..4224925147c84 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseIterationToScf.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseIterationToScf.cpp @@ -34,6 +34,20 @@ convertIterSpaceType(IterSpaceType itSp, SmallVectorImpl &fields) { return success(); } +static std::optional +convertIteratorType(IteratorType itTp, SmallVectorImpl &fields) { + // The actually Iterator Values (that are updated every iteration). + auto idxTp = IndexType::get(itTp.getContext()); + // TODO: handle batch dimension. + assert(itTp.getEncoding().getBatchLvlRank() == 0); + if (!itTp.isUnique()) { + // Segment high for non-unique iterator. + fields.push_back(idxTp); + } + fields.push_back(idxTp); + return success(); +} + namespace { /// Sparse codegen rule for number of entries operator. @@ -57,10 +71,114 @@ class ExtractIterSpaceConverter } }; +class SparseIterateOpConverter : public OneToNOpConversionPattern { +public: + using OneToNOpConversionPattern::OneToNOpConversionPattern; + LogicalResult + matchAndRewrite(IterateOp op, OpAdaptor adaptor, + OneToNPatternRewriter &rewriter) const override { + if (!op.getCrdUsedLvls().empty()) + return rewriter.notifyMatchFailure( + op, "non-empty coordinates list not implemented."); + + Location loc = op.getLoc(); + + auto iterSpace = SparseIterationSpace::fromValues( + op.getIterSpace().getType(), adaptor.getIterSpace(), 0); + + std::unique_ptr it = + iterSpace.extractIterator(rewriter, loc); + + if (it->iteratableByFor()) { + auto [lo, hi] = it->genForCond(rewriter, loc); + Value step = constantIndex(rewriter, loc, 1); + SmallVector ivs; + for (ValueRange inits : adaptor.getInitArgs()) + llvm::append_range(ivs, inits); + scf::ForOp forOp = rewriter.create(loc, lo, hi, step, ivs); + + Block *loopBody = op.getBody(); + OneToNTypeMapping bodyTypeMapping(loopBody->getArgumentTypes()); + if (failed(typeConverter->convertSignatureArgs( + loopBody->getArgumentTypes(), bodyTypeMapping))) + return failure(); + rewriter.applySignatureConversion(loopBody, bodyTypeMapping); + + rewriter.eraseBlock(forOp.getBody()); + Region &dstRegion = forOp.getRegion(); + rewriter.inlineRegionBefore(op.getRegion(), dstRegion, dstRegion.end()); + + auto yieldOp = + llvm::cast(forOp.getBody()->getTerminator()); + + rewriter.setInsertionPointToEnd(forOp.getBody()); + // replace sparse_tensor.yield with scf.yield. + rewriter.create(loc, yieldOp.getResults()); + rewriter.eraseOp(yieldOp); + + const OneToNTypeMapping &resultMapping = adaptor.getResultMapping(); + rewriter.replaceOp(op, forOp.getResults(), resultMapping); + } else { + SmallVector ivs; + llvm::append_range(ivs, it->getCursor()); + for (ValueRange inits : adaptor.getInitArgs()) + llvm::append_range(ivs, inits); + + assert(llvm::all_of(ivs, [](Value v) { return v != nullptr; })); + + TypeRange types = ValueRange(ivs).getTypes(); + auto whileOp = rewriter.create(loc, types, ivs); + SmallVector l(types.size(), op.getIterator().getLoc()); + + // Generates loop conditions. + Block *before = rewriter.createBlock(&whileOp.getBefore(), {}, types, l); + rewriter.setInsertionPointToStart(before); + ValueRange bArgs = before->getArguments(); + auto [whileCond, remArgs] = it->genWhileCond(rewriter, loc, bArgs); + assert(remArgs.size() == adaptor.getInitArgs().size()); + rewriter.create(loc, whileCond, before->getArguments()); + + // Generates loop body. + Block *loopBody = op.getBody(); + OneToNTypeMapping bodyTypeMapping(loopBody->getArgumentTypes()); + if (failed(typeConverter->convertSignatureArgs( + loopBody->getArgumentTypes(), bodyTypeMapping))) + return failure(); + rewriter.applySignatureConversion(loopBody, bodyTypeMapping); + + Region &dstRegion = whileOp.getAfter(); + // TODO: handle uses of coordinate! + rewriter.inlineRegionBefore(op.getRegion(), dstRegion, dstRegion.end()); + ValueRange aArgs = whileOp.getAfterArguments(); + auto yieldOp = llvm::cast( + whileOp.getAfterBody()->getTerminator()); + + rewriter.setInsertionPointToEnd(whileOp.getAfterBody()); + + aArgs = it->linkNewScope(aArgs); + ValueRange nx = it->forward(rewriter, loc); + SmallVector yields; + llvm::append_range(yields, nx); + llvm::append_range(yields, yieldOp.getResults()); + + // replace sparse_tensor.yield with scf.yield. + rewriter.eraseOp(yieldOp); + rewriter.create(loc, yields); + + const OneToNTypeMapping &resultMapping = adaptor.getResultMapping(); + rewriter.replaceOp( + op, whileOp.getResults().drop_front(it->getCursor().size()), + resultMapping); + } + return success(); + } +}; + } // namespace mlir::SparseIterationTypeConverter::SparseIterationTypeConverter() { addConversion([](Type type) { return type; }); + addConversion(convertIteratorType); addConversion(convertIterSpaceType); addSourceMaterialization([](OpBuilder &builder, IterSpaceType spTp, @@ -74,5 +192,6 @@ mlir::SparseIterationTypeConverter::SparseIterationTypeConverter() { void mlir::populateLowerSparseIterationToSCFPatterns( TypeConverter &converter, RewritePatternSet &patterns) { - patterns.add(converter, patterns.getContext()); + patterns.add( + converter, patterns.getContext()); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.cpp index be8e15d6ae6f4..ef95fcc84bd90 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.cpp @@ -331,6 +331,13 @@ class TrivialIterator : public ConcreteIterator { TrivialIterator(const SparseTensorLevel &stl) : ConcreteIterator(stl, IterKind::kTrivial, /*itValCnt=*/1) {} + TrivialIterator(OpBuilder &b, Location l, const SparseTensorLevel &stl, + Value posLo, Value posHi) + : ConcreteIterator(stl, IterKind::kTrivial, /*itValCnt=*/1), posLo(posLo), + posHi(posHi) { + seek(posLo); + } + std::string getDebugInterfacePrefix() const override { return std::string("trivial<") + stl.toString() + ">"; } @@ -420,6 +427,14 @@ class DedupIterator : public ConcreteIterator { : ConcreteIterator(stl, IterKind::kDedup, /*itValCnt=*/2) { assert(!stl.isUnique()); } + + DedupIterator(OpBuilder &b, Location l, const SparseTensorLevel &stl, + Value posLo, Value posHi) + : ConcreteIterator(stl, IterKind::kDedup, /*itValCnt=*/2), posHi(posHi) { + assert(!stl.isUnique()); + seek({posLo, genSegmentHigh(b, l, posLo)}); + } + // For LLVM-style RTTI. static bool classof(const SparseIterator *from) { return from->kind == IterKind::kDedup; @@ -1532,6 +1547,11 @@ SparseIterationSpace mlir::sparse_tensor::SparseIterationSpace::fromValues( return space; } +std::unique_ptr +SparseIterationSpace::extractIterator(OpBuilder &b, Location l) const { + return makeSimpleIterator(b, l, *this); +} + //===----------------------------------------------------------------------===// // SparseIterator factory functions. //===----------------------------------------------------------------------===// @@ -1590,6 +1610,26 @@ sparse_tensor::makeSynLevelAndIterator(Value sz, unsigned tid, unsigned lvl, return std::make_pair(std::move(stl), std::move(it)); } +std::unique_ptr +sparse_tensor::makeSimpleIterator(OpBuilder &b, Location l, + const SparseIterationSpace &iterSpace) { + // assert(iterSpace.getSpaceDim() == 1); + std::unique_ptr ret; + if (!iterSpace.isUnique()) { + // We always dedupliate the non-unique level, but we should optimize it away + // if possible. + ret = std::make_unique(b, l, iterSpace.getLastLvl(), + iterSpace.getBoundLo(), + iterSpace.getBoundHi()); + } else { + ret = std::make_unique(b, l, iterSpace.getLastLvl(), + iterSpace.getBoundLo(), + iterSpace.getBoundHi()); + } + ret->setSparseEmitStrategy(SparseEmitStrategy::kFunctional); + return ret; +} + std::unique_ptr sparse_tensor::makeSimpleIterator(const SparseTensorLevel &stl, SparseEmitStrategy strategy) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.h b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.h index 17636af2b2f9d..91f363db93f1d 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.h @@ -132,6 +132,10 @@ class SparseIterationSpace { Value getBoundLo() const { return bound.first; } Value getBoundHi() const { return bound.second; } + // Extract an iterator to iterate over the sparse iteration space. + std::unique_ptr extractIterator(OpBuilder &b, + Location l) const; + private: SmallVector> lvls; std::pair bound; @@ -192,6 +196,13 @@ class SparseIterator { crd = nullptr; } + // Reconstructs a iteration space directly from the provided ValueRange. + static std::unique_ptr + fromValues(IteratorType dstTp, ValueRange values, unsigned tid); + + // The inverse operation of `fromValues`. + SmallVector toValues() const { llvm_unreachable("Not implemented"); } + // // Iterator properties. // @@ -345,12 +356,21 @@ std::unique_ptr makeSparseTensorLevel(OpBuilder &b, unsigned tid, Level lvl); -/// Helper function to create a TensorLevel object from given `tensor`. +/// Helper function to create a TensorLevel object from given ValueRange. std::unique_ptr makeSparseTensorLevel(LevelType lt, Value sz, ValueRange buffers, unsigned tid, Level l); -/// Helper function to create a simple SparseIterator object that iterates -/// over the SparseTensorLevel. + +/// Helper function to create a simple SparseIterator object that iterate +/// over the entire iteration space. +std::unique_ptr +makeSimpleIterator(OpBuilder &b, Location l, + const SparseIterationSpace &iterSpace); + +/// Helper function to create a simple SparseIterator object that iterate +/// over the sparse tensor level. +/// TODO: switch to `SparseIterationSpace` (which support N-D iterator) when +/// feature complete. std::unique_ptr makeSimpleIterator( const SparseTensorLevel &stl, SparseEmitStrategy strategy = SparseEmitStrategy::kFunctional); diff --git a/mlir/test/Dialect/SparseTensor/sparse_iteration_to_scf.mlir b/mlir/test/Dialect/SparseTensor/sparse_iteration_to_scf.mlir index 5fcd661bb69b2..77a0e89dc7c81 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_iteration_to_scf.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_iteration_to_scf.mlir @@ -1,4 +1,5 @@ // RUN: mlir-opt %s --lower-sparse-iteration-to-scf | FileCheck %s +// RUN: mlir-opt %s --sparse-space-collapse --lower-sparse-iteration-to-scf | FileCheck %s --check-prefix COLLAPSED #COO = #sparse_tensor.encoding<{ map = (i, j) -> ( @@ -7,17 +8,44 @@ ) }> -// CHECK-LABEL: func.func @sparse_1D_space( -// CHECK-SAME: %[[VAL_0:.*]]: tensor) -> !sparse_tensor.iter_space<#sparse{{[0-9]*}}, lvls = 0> { -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[LVL_SIZE:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[C0]] : tensor -// CHECK: %[[POS_MEM:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor to memref -// CHECK: %[[CRD_MEM:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor to memref -// CHECK: %[[POS_LO:.*]] = memref.load %[[POS_MEM]]{{\[}}%[[C0]]] : memref -// CHECK: %[[POS_HI:.*]] = memref.load %[[POS_MEM]]{{\[}}%[[C1]]] : memref -// CHECK: %[[ITER_SPACE:.*]] = builtin.unrealized_conversion_cast %[[POS_MEM]], %[[CRD_MEM]], %[[LVL_SIZE]], %[[POS_LO]], %[[POS_HI]] -func.func @sparse_1D_space(%sp : tensor) -> !sparse_tensor.iter_space<#COO, lvls = 0> { - %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor -> !sparse_tensor.iter_space<#COO, lvls = 0> - return %l1 : !sparse_tensor.iter_space<#COO, lvls = 0> +// CHECK-LABEL: @sparse_iteration_to_scf +// // deduplication +// CHECK: scf.while {{.*}} { +// CHECK: } do { +// CHECK: } +// CHECK: scf.while {{.*}} { +// CHECK: } do { +// // actual computation +// CHECK: scf.for {{.*}} { +// CHECK: arith.addi +// CHECK: } +// // deduplication +// CHECK: scf.while {{.*}} { +// CHECK: } do { +// CHECK: } +// CHECK: scf.yield +// CHECK: } +// CHECK: return + +// COLLAPSED-LABEL: @sparse_iteration_to_scf +// COLLAPSED: %[[RET:.*]] = scf.for {{.*}} { +// COLLAPSED: %[[VAL:.*]] = arith.addi +// COLLAPSED: scf.yield %[[VAL]] : index +// COLLAPSED: } +// COLLAPSED: return %[[RET]] : index +func.func @sparse_iteration_to_scf(%sp : tensor<4x8xf32, #COO>) -> index { + %i = arith.constant 0 : index + %c1 = arith.constant 1 : index + %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 + : tensor<4x8xf32, #COO> -> !sparse_tensor.iter_space<#COO, lvls = 0> + %r1 = sparse_tensor.iterate %it1 in %l1 iter_args(%outer = %i): !sparse_tensor.iter_space<#COO, lvls = 0 to 1> -> index { + %l2 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 1 + : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0 to 1> -> !sparse_tensor.iter_space<#COO, lvls = 1> + %r2 = sparse_tensor.iterate %it2 in %l2 iter_args(%inner = %outer): !sparse_tensor.iter_space<#COO, lvls = 1 to 2> -> index { + %k = arith.addi %inner, %c1 : index + sparse_tensor.yield %k : index + } + sparse_tensor.yield %r2 : index + } + return %r1 : index } From dcb6c0d71c8dbb6bb17391c968c3716cfafd3765 Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Mon, 17 Jun 2024 15:44:35 -0500 Subject: [PATCH 10/25] Reland [mlir][Target] Improve ROCDL gpu serialization API (#95813) Reland: https://github.com/llvm/llvm-project/pull/95456 This patch improves the ROCDL gpu serialization API by: - Introducing the enum `AMDGCNLibraries` for specifying the AMD GCN device code libraries to use during linking. - Removing `getCommonBitcodeLibs` in favor of `AMDGCNLibraries`. Previously `getCommonBitcodeLibs` would try to load all AMD GCN bitcode librariesm now it will only load the requested libraries. - Exposing the `compileToBinary` method and making it virtual, allowing downstream users to re-use this method. - Exposing `moduleToObjectImpl`, this method provides a prototype flow for compiling to binary, allowing downstream users to re-use this method. - It also avoids constructing the control variables if no device libraries are being used. - Changes the style of the error messages to be composable, ie no full stops. - Adds an error message for when the ROCm toolkit can't be found but it was required. --- mlir/include/mlir/Target/LLVM/ROCDL/Utils.h | 41 ++- mlir/lib/Dialect/GPU/CMakeLists.txt | 2 +- mlir/lib/Target/LLVM/CMakeLists.txt | 13 +- mlir/lib/Target/LLVM/ROCDL/Target.cpp | 306 +++++++++++--------- 4 files changed, 213 insertions(+), 149 deletions(-) diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h index 374fa65bd02e3..44c9ded317fa5 100644 --- a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h +++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h @@ -27,6 +27,19 @@ namespace ROCDL { /// 5. Returns an empty string. StringRef getROCMPath(); +/// Helper enum for specifying the AMD GCN device libraries required for +/// compilation. +enum class AMDGCNLibraries : uint32_t { + None = 0, + Ockl = 1, + Ocml = 2, + OpenCL = 4, + Hip = 8, + LastLib = Hip, + LLVM_MARK_AS_BITMASK_ENUM(LastLib), + All = (LastLib << 1) - 1 +}; + /// Base class for all ROCDL serializations from GPU modules into binary /// strings. By default this class serializes into LLVM bitcode. class SerializeGPUModuleBase : public LLVM::ModuleToObject { @@ -49,8 +62,8 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { /// Returns the bitcode files to be loaded. ArrayRef getFileList() const; - /// Appends standard ROCm device libraries like `ocml.bc`, `ockl.bc`, etc. - LogicalResult appendStandardLibs(); + /// Appends standard ROCm device libraries to `fileList`. + LogicalResult appendStandardLibs(AMDGCNLibraries libs); /// Loads the bitcode files in `fileList`. virtual std::optional>> @@ -63,15 +76,20 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { LogicalResult handleBitcodeFile(llvm::Module &module) override; protected: - /// Appends the paths of common ROCm device libraries to `libs`. - LogicalResult getCommonBitcodeLibs(llvm::SmallVector &libs, - SmallVector &libPath, - StringRef isaVersion); - /// Adds `oclc` control variables to the LLVM module. - void addControlVariables(llvm::Module &module, bool wave64, bool daz, - bool finiteOnly, bool unsafeMath, bool fastMath, - bool correctSqrt, StringRef abiVer); + void addControlVariables(llvm::Module &module, AMDGCNLibraries libs, + bool wave64, bool daz, bool finiteOnly, + bool unsafeMath, bool fastMath, bool correctSqrt, + StringRef abiVer); + + /// Compiles assembly to a binary. + virtual std::optional> + compileToBinary(const std::string &serializedISA); + + /// Default implementation of `ModuleToObject::moduleToObject`. + std::optional> + moduleToObjectImpl(const gpu::TargetOptions &targetOptions, + llvm::Module &llvmModule); /// Returns the assembled ISA. std::optional> assembleIsa(StringRef isa); @@ -84,6 +102,9 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { /// List of LLVM bitcode files to link to. SmallVector fileList; + + /// AMD GCN libraries to use when linking, the default is using none. + AMDGCNLibraries deviceLibs = AMDGCNLibraries::None; }; } // namespace ROCDL } // namespace mlir diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index 61ab298ebfb98..08c8aea36fac9 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -106,7 +106,7 @@ if(MLIR_ENABLE_ROCM_CONVERSIONS) "Building mlir with ROCm support requires the AMDGPU backend") endif() - set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs") + set(DEFAULT_ROCM_PATH "" CACHE PATH "Fallback path to search for ROCm installs") target_compile_definitions(obj.MLIRGPUTransforms PRIVATE __DEFAULT_ROCM_PATH__="${DEFAULT_ROCM_PATH}" diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt index 5a3fa160850b4..6e146710d67af 100644 --- a/mlir/lib/Target/LLVM/CMakeLists.txt +++ b/mlir/lib/Target/LLVM/CMakeLists.txt @@ -123,17 +123,18 @@ add_mlir_dialect_library(MLIRROCDLTarget ) if(MLIR_ENABLE_ROCM_CONVERSIONS) - if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)) - message(SEND_ERROR - "Building mlir with ROCm support requires the AMDGPU backend") - endif() - if (DEFINED ROCM_PATH) set(DEFAULT_ROCM_PATH "${ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs") elseif(DEFINED ENV{ROCM_PATH}) set(DEFAULT_ROCM_PATH "$ENV{ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs") else() - set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs") + IF (WIN32) + # Avoid setting an UNIX path for Windows. + # TODO: Eventually migrate to FindHIP once it becomes a part of CMake. + set(DEFAULT_ROCM_PATH "" CACHE PATH "Fallback path to search for ROCm installs") + else() + set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs") + endif() endif() message(VERBOSE "MLIR Default ROCM toolkit path: ${DEFAULT_ROCM_PATH}") diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp index cc13e5b7436ea..6784f3668bde3 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp @@ -17,9 +17,6 @@ #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/Support/FileUtilities.h" #include "mlir/Target/LLVM/ROCDL/Utils.h" -#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" #include "llvm/IR/Constants.h" @@ -112,8 +109,9 @@ SerializeGPUModuleBase::SerializeGPUModuleBase( if (auto file = dyn_cast(attr)) fileList.push_back(file.str()); - // Append standard ROCm device bitcode libraries to the files to be loaded. - (void)appendStandardLibs(); + // By default add all libraries if the toolkit path is not empty. + if (!getToolkitPath().empty()) + deviceLibs = AMDGCNLibraries::All; } void SerializeGPUModuleBase::init() { @@ -138,29 +136,67 @@ ArrayRef SerializeGPUModuleBase::getFileList() const { return fileList; } -LogicalResult SerializeGPUModuleBase::appendStandardLibs() { +LogicalResult SerializeGPUModuleBase::appendStandardLibs(AMDGCNLibraries libs) { + if (libs == AMDGCNLibraries::None) + return success(); StringRef pathRef = getToolkitPath(); - if (!pathRef.empty()) { - SmallVector path; - path.insert(path.begin(), pathRef.begin(), pathRef.end()); - llvm::sys::path::append(path, "amdgcn", "bitcode"); - pathRef = StringRef(path.data(), path.size()); - if (!llvm::sys::fs::is_directory(pathRef)) { - getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef - << " does not exist or is not a directory."; - return failure(); - } - StringRef isaVersion = - llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip)); - isaVersion.consume_front("gfx"); - return getCommonBitcodeLibs(fileList, path, isaVersion); + // Fail if the toolkit is empty. + if (pathRef.empty()) + return failure(); + + // Get the path for the device libraries + SmallString<256> path; + path.insert(path.begin(), pathRef.begin(), pathRef.end()); + llvm::sys::path::append(path, "amdgcn", "bitcode"); + pathRef = StringRef(path.data(), path.size()); + + // Fail if the path is invalid. + if (!llvm::sys::fs::is_directory(pathRef)) { + getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef + << " does not exist or is not a directory"; + return failure(); } + + // Get the ISA version. + StringRef isaVersion = + llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip)); + isaVersion.consume_front("gfx"); + + // Helper function for adding a library. + auto addLib = [&](const Twine &lib) -> bool { + auto baseSize = path.size(); + llvm::sys::path::append(path, lib); + StringRef pathRef(path.data(), path.size()); + if (!llvm::sys::fs::is_regular_file(pathRef)) { + getOperation().emitRemark() << "bitcode library path: " << pathRef + << " does not exist or is not a file"; + return true; + } + fileList.push_back(pathRef.str()); + path.truncate(baseSize); + return false; + }; + + // Add ROCm device libraries. Fail if any of the libraries is not found, ie. + // if any of the `addLib` failed. + if ((any(libs & AMDGCNLibraries::Ocml) && addLib("ocml.bc")) || + (any(libs & AMDGCNLibraries::Ockl) && addLib("ockl.bc")) || + (any(libs & AMDGCNLibraries::Hip) && addLib("hip.bc")) || + (any(libs & AMDGCNLibraries::OpenCL) && addLib("opencl.bc")) || + (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl)) && + addLib("oclc_isa_version_" + isaVersion + ".bc"))) + return failure(); return success(); } std::optional>> SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) { SmallVector> bcFiles; + // Return if there are no libs to load. + if (deviceLibs == AMDGCNLibraries::None && fileList.empty()) + return bcFiles; + if (failed(appendStandardLibs(deviceLibs))) + return std::nullopt; if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles, true))) return std::nullopt; @@ -174,80 +210,76 @@ LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) { // Stop spamming us with clang version numbers if (auto *ident = module.getNamedMetadata("llvm.ident")) module.eraseNamedMetadata(ident); + // Override the libModules datalayout and target triple with the compiler's + // data layout should there be a discrepency. + setDataLayoutAndTriple(module); return success(); } void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) { - [[maybe_unused]] std::optional targetMachine = - getOrCreateTargetMachine(); - assert(targetMachine && "expect a TargetMachine"); - addControlVariables(module, target.hasWave64(), target.hasDaz(), + // If all libraries are not set, traverse the module to determine which + // libraries are required. + if (deviceLibs != AMDGCNLibraries::All) { + for (llvm::Function &f : module.functions()) { + if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) { + StringRef funcName = f.getName(); + if ("printf" == funcName) + deviceLibs |= AMDGCNLibraries::OpenCL | AMDGCNLibraries::Ockl | + AMDGCNLibraries::Ocml; + if (funcName.starts_with("__ockl_")) + deviceLibs |= AMDGCNLibraries::Ockl; + if (funcName.starts_with("__ocml_")) + deviceLibs |= AMDGCNLibraries::Ocml; + } + } + } + addControlVariables(module, deviceLibs, target.hasWave64(), target.hasDaz(), target.hasFiniteOnly(), target.hasUnsafeMath(), target.hasFastMath(), target.hasCorrectSqrt(), target.getAbi()); } -// Get the paths of ROCm device libraries. -LogicalResult SerializeGPUModuleBase::getCommonBitcodeLibs( - llvm::SmallVector &libs, SmallVector &libPath, - StringRef isaVersion) { - auto addLib = [&](StringRef path) -> bool { - if (!llvm::sys::fs::is_regular_file(path)) { - getOperation().emitRemark() << "Bitcode library path: " << path - << " does not exist or is not a file.\n"; - return true; - } - libs.push_back(path.str()); - return false; - }; - auto getLibPath = [&libPath](Twine lib) { - auto baseSize = libPath.size(); - llvm::sys::path::append(libPath, lib + ".bc"); - std::string path(StringRef(libPath.data(), libPath.size()).str()); - libPath.truncate(baseSize); - return path; - }; - - // Add ROCm device libraries. Fail if any of the libraries is not found. - if (addLib(getLibPath("ocml")) || addLib(getLibPath("ockl")) || - addLib(getLibPath("hip")) || addLib(getLibPath("opencl")) || - addLib(getLibPath("oclc_isa_version_" + isaVersion))) - return failure(); - return success(); -} - void SerializeGPUModuleBase::addControlVariables( - llvm::Module &module, bool wave64, bool daz, bool finiteOnly, - bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer) { - llvm::Type *i8Ty = llvm::Type::getInt8Ty(module.getContext()); - auto addControlVariable = [i8Ty, &module](StringRef name, bool enable) { + llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz, + bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt, + StringRef abiVer) { + // Return if no device libraries are required. + if (libs == AMDGCNLibraries::None) + return; + // Helper function for adding control variables. + auto addControlVariable = [&module](StringRef name, uint32_t value, + uint32_t bitwidth) { + if (module.getNamedGlobal(name)) { + return; + } + llvm::IntegerType *type = + llvm::IntegerType::getIntNTy(module.getContext(), bitwidth); llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable( - module, i8Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, - llvm::ConstantInt::get(i8Ty, enable), name, nullptr, - llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4); + module, /*isConstant=*/type, true, + llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, + llvm::ConstantInt::get(type, value), name, /*before=*/nullptr, + /*threadLocalMode=*/llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, + /*addressSpace=*/4); controlVariable->setVisibility( llvm::GlobalValue::VisibilityTypes::ProtectedVisibility); - controlVariable->setAlignment(llvm::MaybeAlign(1)); + controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8)); controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); }; - addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath); - addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath); - addControlVariable("__oclc_daz_opt", daz || fastMath); - addControlVariable("__oclc_correctly_rounded_sqrt32", - correctSqrt && !fastMath); - addControlVariable("__oclc_wavefrontsize64", wave64); - - llvm::Type *i32Ty = llvm::Type::getInt32Ty(module.getContext()); - int abi = 500; - abiVer.getAsInteger(0, abi); - llvm::GlobalVariable *abiVersion = new llvm::GlobalVariable( - module, i32Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, - llvm::ConstantInt::get(i32Ty, abi), "__oclc_ABI_version", nullptr, - llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4); - abiVersion->setVisibility( - llvm::GlobalValue::VisibilityTypes::ProtectedVisibility); - abiVersion->setAlignment(llvm::MaybeAlign(4)); - abiVersion->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); + // Add ocml related control variables. + if (any(libs & AMDGCNLibraries::Ocml)) { + addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath, 8); + addControlVariable("__oclc_daz_opt", daz || fastMath, 8); + addControlVariable("__oclc_correctly_rounded_sqrt32", + correctSqrt && !fastMath, 8); + addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath, 8); + } + // Add ocml or ockl related control variables. + if (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl))) { + addControlVariable("__oclc_wavefrontsize64", wave64, 8); + int abi = 500; + abiVer.getAsInteger(0, abi); + addControlVariable("__oclc_ABI_version", abi, 32); + } } std::optional> @@ -312,48 +344,16 @@ SerializeGPUModuleBase::assembleIsa(StringRef isa) { parser->setTargetParser(*tap); parser->Run(false); - return result; } -#if MLIR_ENABLE_ROCM_CONVERSIONS -namespace { -class AMDGPUSerializer : public SerializeGPUModuleBase { -public: - AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, - const gpu::TargetOptions &targetOptions); - - gpu::GPUModuleOp getOperation(); - - // Compile to HSA. - std::optional> - compileToBinary(const std::string &serializedISA); - - std::optional> - moduleToObject(llvm::Module &llvmModule) override; - -private: - // Target options. - gpu::TargetOptions targetOptions; -}; -} // namespace - -AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, - const gpu::TargetOptions &targetOptions) - : SerializeGPUModuleBase(module, target, targetOptions), - targetOptions(targetOptions) {} - -gpu::GPUModuleOp AMDGPUSerializer::getOperation() { - return dyn_cast(&SerializeGPUModuleBase::getOperation()); -} - std::optional> -AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { +SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) { // Assemble the ISA. std::optional> isaBinary = assembleIsa(serializedISA); if (!isaBinary) { - getOperation().emitError() << "Failed during ISA assembling."; + getOperation().emitError() << "failed during ISA assembling"; return std::nullopt; } @@ -363,7 +363,7 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempIsaBinaryFd, tempIsaBinaryFilename)) { getOperation().emitError() - << "Failed to create a temporary file for dumping the ISA binary."; + << "failed to create a temporary file for dumping the ISA binary"; return std::nullopt; } llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); @@ -378,7 +378,7 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFilename)) { getOperation().emitError() - << "Failed to create a temporary file for the HSA code object."; + << "failed to create a temporary file for the HSA code object"; return std::nullopt; } llvm::FileRemover cleanupHsaco(tempHsacoFilename); @@ -389,7 +389,7 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { lldPath, {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename}); if (lldResult != 0) { - getOperation().emitError() << "lld invocation failed."; + getOperation().emitError() << "lld invocation failed"; return std::nullopt; } @@ -398,7 +398,7 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { llvm::MemoryBuffer::getFile(tempHsacoFilename, /*IsText=*/false); if (!hsacoFile) { getOperation().emitError() - << "Failed to read the HSA code object from the temp file."; + << "failed to read the HSA code object from the temp file"; return std::nullopt; } @@ -407,13 +407,13 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { return SmallVector(buffer.begin(), buffer.end()); } -std::optional> -AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { +std::optional> SerializeGPUModuleBase::moduleToObjectImpl( + const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule) { // Return LLVM IR if the compilation target is offload. #define DEBUG_TYPE "serialize-to-llvm" LLVM_DEBUG({ - llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr() - << "\n" + llvm::dbgs() << "LLVM IR for module: " + << cast(getOperation()).getNameAttr() << "\n" << llvmModule << "\n"; }); #undef DEBUG_TYPE @@ -423,8 +423,8 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { std::optional targetMachine = getOrCreateTargetMachine(); if (!targetMachine) { - getOperation().emitError() << "Target Machine unavailable for triple " - << triple << ", can't compile with LLVM\n"; + getOperation().emitError() << "target Machine unavailable for triple " + << triple << ", can't compile with LLVM"; return std::nullopt; } @@ -432,12 +432,13 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { std::optional serializedISA = translateToISA(llvmModule, **targetMachine); if (!serializedISA) { - getOperation().emitError() << "Failed translating the module to ISA."; + getOperation().emitError() << "failed translating the module to ISA"; return std::nullopt; } #define DEBUG_TYPE "serialize-to-isa" LLVM_DEBUG({ - llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n" + llvm::dbgs() << "ISA for module: " + << cast(getOperation()).getNameAttr() << "\n" << *serializedISA << "\n"; }); #undef DEBUG_TYPE @@ -445,9 +446,45 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly) return SmallVector(serializedISA->begin(), serializedISA->end()); + // Compiling to binary requires a valid ROCm path, fail if it's not found. + if (getToolkitPath().empty()) + getOperation().emitError() << "invalid ROCm path, please set a valid path"; + // Compile to binary. return compileToBinary(*serializedISA); } + +#if MLIR_ENABLE_ROCM_CONVERSIONS +namespace { +class AMDGPUSerializer : public SerializeGPUModuleBase { +public: + AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, + const gpu::TargetOptions &targetOptions); + + gpu::GPUModuleOp getOperation(); + + std::optional> + moduleToObject(llvm::Module &llvmModule) override; + +private: + // Target options. + gpu::TargetOptions targetOptions; +}; +} // namespace + +AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, + const gpu::TargetOptions &targetOptions) + : SerializeGPUModuleBase(module, target, targetOptions), + targetOptions(targetOptions) {} + +gpu::GPUModuleOp AMDGPUSerializer::getOperation() { + return dyn_cast(&SerializeGPUModuleBase::getOperation()); +} + +std::optional> +AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { + return moduleToObjectImpl(targetOptions, llvmModule); +} #endif // MLIR_ENABLE_ROCM_CONVERSIONS std::optional> ROCDLTargetAttrImpl::serializeToObject( @@ -457,7 +494,7 @@ std::optional> ROCDLTargetAttrImpl::serializeToObject( if (!module) return std::nullopt; if (!mlir::isa(module)) { - module->emitError("Module must be a GPU module."); + module->emitError("module must be a GPU module"); return std::nullopt; } #if MLIR_ENABLE_ROCM_CONVERSIONS @@ -466,8 +503,8 @@ std::optional> ROCDLTargetAttrImpl::serializeToObject( serializer.init(); return serializer.run(); #else - module->emitError("The `AMDGPU` target was not built. Please enable it when " - "building LLVM."); + module->emitError("the `AMDGPU` target was not built. Please enable it when " + "building LLVM"); return std::nullopt; #endif // MLIR_ENABLE_ROCM_CONVERSIONS } @@ -477,10 +514,15 @@ ROCDLTargetAttrImpl::createObject(Attribute attribute, const SmallVector &object, const gpu::TargetOptions &options) const { gpu::CompilationTarget format = options.getCompilationTarget(); + // If format is `fatbin` transform it to binary as `fatbin` is not yet + // supported. + if (format > gpu::CompilationTarget::Binary) + format = gpu::CompilationTarget::Binary; + + DictionaryAttr properties{}; Builder builder(attribute.getContext()); return builder.getAttr( - attribute, - format > gpu::CompilationTarget::Binary ? gpu::CompilationTarget::Binary - : format, - builder.getStringAttr(StringRef(object.data(), object.size())), nullptr); + attribute, format, + builder.getStringAttr(StringRef(object.data(), object.size())), + properties); } From 44ca65661e2cdd5636f592f573a2837e6ae948ba Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 17 Jun 2024 15:51:26 -0500 Subject: [PATCH 11/25] [libc] Only include getauxval on AARCH64 targets (#95844) Summary: Not all platforms support this function or header, but it was being included by every test. Move it inside of the `ifdef` for the only user, which is aarch64. --- libc/test/IntegrationTest/test.cpp | 3 ++- libc/test/UnitTest/HermeticTestUtils.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/libc/test/IntegrationTest/test.cpp b/libc/test/IntegrationTest/test.cpp index a8b2f2911fd8e..0c961dbafb840 100644 --- a/libc/test/IntegrationTest/test.cpp +++ b/libc/test/IntegrationTest/test.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/__support/common.h" -#include "src/sys/auxv/getauxval.h" #include #include @@ -83,6 +82,8 @@ void *realloc(void *ptr, size_t s) { void *__dso_handle = nullptr; #ifdef LIBC_TARGET_ARCH_IS_AARCH64 +#include "src/sys/auxv/getauxval.h" + // Due to historical reasons, libgcc on aarch64 may expect __getauxval to be // defined. See also https://gcc.gnu.org/pipermail/gcc-cvs/2020-June/300635.html unsigned long __getauxval(unsigned long id) { diff --git a/libc/test/UnitTest/HermeticTestUtils.cpp b/libc/test/UnitTest/HermeticTestUtils.cpp index 6e815e6c8aab0..191e54b7344a6 100644 --- a/libc/test/UnitTest/HermeticTestUtils.cpp +++ b/libc/test/UnitTest/HermeticTestUtils.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/__support/common.h" -#include "src/sys/auxv/getauxval.h" #include #include @@ -111,6 +110,8 @@ void __cxa_pure_virtual() { void *__dso_handle = nullptr; #ifdef LIBC_TARGET_ARCH_IS_AARCH64 +#include "src/sys/auxv/getauxval.h" + // Due to historical reasons, libgcc on aarch64 may expect __getauxval to be // defined. See also https://gcc.gnu.org/pipermail/gcc-cvs/2020-June/300635.html unsigned long __getauxval(unsigned long id) { From a50bcc03cbaecf6473c6bf41f4497758a7876f3d Mon Sep 17 00:00:00 2001 From: lntue <35648136+lntue@users.noreply.github.com> Date: Mon, 17 Jun 2024 16:56:19 -0400 Subject: [PATCH 12/25] [libc][stdlib] Only add internal malloc in full build mode. Use the system malloc in overlay mode. (#95845) This causes an issue in overlay mode: https://github.com/llvm/llvm-project/pull/95736#issuecomment-2172739705 --- libc/src/stdlib/CMakeLists.txt | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt index e26c19f03f5ab..fdbf7b75e72f4 100644 --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -418,15 +418,23 @@ else() libc.src.string.memory_utils.inline_memcpy libc.src.string.memory_utils.inline_memset ) - add_entrypoint_object( - malloc - SRCS - freelist_malloc.cpp - HDRS - malloc.h - DEPENDS - .freelist_heap - ) + # Only add malloc in full build mode. Use the system malloc in overlay mode. + if(LLVM_LIBC_FULL_BUILD) + add_entrypoint_object( + malloc + SRCS + freelist_malloc.cpp + HDRS + malloc.h + DEPENDS + .freelist_heap + ) + else() + add_entrypoint_external( + malloc + ) + endif() + add_entrypoint_external( free ) From 16aa39ad94350670f4d72dace0a4866fbe10d716 Mon Sep 17 00:00:00 2001 From: Joshua Baehring <98630690+JoshuaMBa@users.noreply.github.com> Date: Mon, 17 Jun 2024 14:06:31 -0700 Subject: [PATCH 13/25] [scudo] Update error handling for seondary cache entry count (#95595) Initially, the scudo allocator would return an error if the user attempted to set the cache capacity (i.e. the number of possible entries in the cache) above the maximum cache capacity. Now the allocator will resort to using the maximum cache capacity in this event. An error will still be returned if the user attempts to set the number of entries to a negative value. --- compiler-rt/lib/scudo/standalone/secondary.h | 7 ++++--- compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h index d8c9f5bcfcaf6..9a8e53be388b7 100644 --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -391,10 +391,11 @@ template class MapAllocatorCache { return true; } if (O == Option::MaxCacheEntriesCount) { - const u32 MaxCount = static_cast(Value); - if (MaxCount > Config::getEntriesArraySize()) + if (Value < 0) return false; - atomic_store_relaxed(&MaxEntriesCount, MaxCount); + atomic_store_relaxed( + &MaxEntriesCount, + Min(static_cast(Value), Config::getEntriesArraySize())); return true; } if (O == Option::MaxCacheEntrySize) { diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp index 8f0250e88ebf3..af69313214ea6 100644 --- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp @@ -192,9 +192,9 @@ TEST_F(MapAllocatorTest, SecondaryIterate) { TEST_F(MapAllocatorTest, SecondaryOptions) { // Attempt to set a maximum number of entries higher than the array size. - EXPECT_FALSE( - Allocator->setOption(scudo::Option::MaxCacheEntriesCount, 4096U)); - // A negative number will be cast to a scudo::u32, and fail. + EXPECT_TRUE(Allocator->setOption(scudo::Option::MaxCacheEntriesCount, 4096U)); + + // Attempt to set an invalid (negative) number of entries EXPECT_FALSE(Allocator->setOption(scudo::Option::MaxCacheEntriesCount, -1)); if (Allocator->canCache(0U)) { // Various valid combinations. From 6037a698b919e0c8dbf39673d68835c49dc5130a Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Mon, 17 Jun 2024 14:18:55 -0700 Subject: [PATCH 14/25] [mlgo] inline for size: add bypass mechanism for perserving performance (#95616) This allows shrinking for size the cold part of the code, without sacrificing performance. --- llvm/include/llvm/Analysis/MLInlineAdvisor.h | 2 + llvm/lib/Analysis/MLInlineAdvisor.cpp | 18 ++++- .../models/gen-inline-oz-test-model.py | 14 +++- llvm/test/Transforms/Inline/ML/bypass.ll | 78 +++++++++++++++++++ 4 files changed, 107 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Transforms/Inline/ML/bypass.ll diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h index f58862e533529..2aa077fe0e035 100644 --- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h @@ -13,6 +13,7 @@ #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/MLModelRunner.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/PassManager.h" #include @@ -89,6 +90,7 @@ class MLInlineAdvisor : public InlineAdvisor { llvm::SmallPtrSet NodesInLastSCC; DenseSet AllNodes; bool ForceStop = false; + ProfileSummaryInfo &PSI; }; /// InlineAdvice that tracks changes post inlining. For that reason, it only diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp index 75eb8ece2e447..21946572339b9 100644 --- a/llvm/lib/Analysis/MLInlineAdvisor.cpp +++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/MLInlineAdvisor.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/FunctionPropertiesAnalysis.h" #include "llvm/Analysis/InlineCost.h" @@ -23,6 +24,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MLModelRunner.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ReleaseModeModelRunner.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" @@ -46,6 +48,14 @@ static cl::opt InteractiveIncludeDefault("inliner-interactive-include-default", cl::Hidden, cl::desc(InclDefaultMsg)); +enum class SkipMLPolicyCriteria { Never, IfCallerIsNotCold }; + +static cl::opt SkipPolicy( + "ml-inliner-skip-policy", cl::Hidden, cl::init(SkipMLPolicyCriteria::Never), + cl::values(clEnumValN(SkipMLPolicyCriteria::Never, "never", "never"), + clEnumValN(SkipMLPolicyCriteria::IfCallerIsNotCold, + "if-caller-not-cold", "if the caller is not cold"))); + #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL) // codegen-ed file #include "InlinerSizeModel.h" // NOLINT @@ -129,7 +139,8 @@ MLInlineAdvisor::MLInlineAdvisor( M, MAM.getResult(M).getManager()), ModelRunner(std::move(Runner)), GetDefaultAdvice(GetDefaultAdvice), CG(MAM.getResult(M)), - InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) { + InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize), + PSI(MAM.getResult(M)) { assert(ModelRunner); ModelRunner->switchContext(""); // Extract the 'call site height' feature - the position of a call site @@ -334,6 +345,11 @@ std::unique_ptr MLInlineAdvisor::getAdviceImpl(CallBase &CB) { auto &TIR = FAM.getResult(Callee); auto &ORE = FAM.getResult(Caller); + if (SkipPolicy == SkipMLPolicyCriteria::IfCallerIsNotCold) { + if (!PSI.isFunctionEntryCold(&Caller)) + return std::make_unique(this, CB, ORE, + GetDefaultAdvice(CB)); + } auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE); // If this is a "never inline" case, there won't be any changes to internal // state we need to track, so we can just return the base InlineAdvice, which diff --git a/llvm/lib/Analysis/models/gen-inline-oz-test-model.py b/llvm/lib/Analysis/models/gen-inline-oz-test-model.py index 4898509ea544f..83055890283e8 100644 --- a/llvm/lib/Analysis/models/gen-inline-oz-test-model.py +++ b/llvm/lib/Analysis/models/gen-inline-oz-test-model.py @@ -102,12 +102,12 @@ def get_output_spec_path(path): return os.path.join(path, "output_spec.json") -def build_mock_model(path, signature): +def build_mock_model(path, signature, advice): """Build and save the mock model with the given signature""" module = tf.Module() def action(*inputs): - return {signature["output"]: tf.constant(value=1, dtype=tf.int64)} + return {signature["output"]: tf.constant(value=advice, dtype=tf.int64)} module.action = tf.function()(action) action = {"action": module.action.get_concrete_function(signature["inputs"])} @@ -128,12 +128,18 @@ def get_signature(): def main(argv): - assert len(argv) == 2 + assert len(argv) == 2 or (len(argv) == 3 and argv[2] == "never") model_path = argv[1] print(f"Output model to: [{argv[1]}]") + + constant_advice = 1 + if len(argv) == 3: + constant_advice = 0 + print(f"The model will always return: {constant_advice}") + signature = get_signature() - build_mock_model(model_path, signature) + build_mock_model(model_path, signature, constant_advice) if __name__ == "__main__": diff --git a/llvm/test/Transforms/Inline/ML/bypass.ll b/llvm/test/Transforms/Inline/ML/bypass.ll new file mode 100644 index 0000000000000..ccdefdcc93bfe --- /dev/null +++ b/llvm/test/Transforms/Inline/ML/bypass.ll @@ -0,0 +1,78 @@ +; REQUIRES: have_tflite +; RUN: rm -rf %t.runfiles %t.tflite %t.model_out +; RUN: mkdir %t.runfiles +; RUN: cp %S/../../../../lib/Analysis/models/gen-inline-oz-test-model.py %t.runfiles +; RUN: cp %S/../../../../lib/Analysis/models/saved-model-to-tflite.py %t.runfiles +; RUN: %python %t.runfiles/gen-inline-oz-test-model.py %t.model_out never +; RUN: %python %t.runfiles/saved-model-to-tflite.py %t.model_out %t.tflite + +; When running O2, we expect both callers to inline callee. +; RUN: opt < %s -passes='default' -inline-threshold=0 -hot-callsite-threshold=100 -S | FileCheck %s --check-prefixes=O2-HOT,O2-COLD + +; The ML model we use always blocks inlining (by construction) +; RUN: opt < %s -passes='default' -inline-threshold=0 -hot-callsite-threshold=100 \ +; RUN: -enable-ml-inliner=development -ml-inliner-model-under-training=%t.tflite \ +; RUN: -S | FileCheck %s --check-prefixes=ML-HOT,ML-COLD + +; When bypassing ML for non-cold callers, the hot caller will have its callee inlined, but the cold one won't +; RUN: opt < %s -passes='default' -inline-threshold=0 -hot-callsite-threshold=100 \ +; RUN: -enable-ml-inliner=development -ml-inliner-model-under-training=%t.tflite \ +; RUN: -ml-inliner-skip-policy=if-caller-not-cold -S | FileCheck %s --check-prefixes=O2-HOT,ML-COLD + +declare void @extern() + +define i32 @callee(i32 %x) { + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + call void @extern() + ret i32 %x3 +} + +define i32 @hot_caller(i32 %y1) !prof !15 { + %y = call i32 @callee(i32 %y1), !prof !16 + ret i32 %y +} + +define i32 @cold_caller(i32 %y1) !prof !17 { + %y = call i32 @callee(i32 %y1), !prof !16 + ret i32 %y +} + + +!llvm.module.flags = !{!1} +!15 = !{!"function_entry_count", i64 300} +!16 = !{!"branch_weights", i64 300} +!17 = !{!"function_entry_count", i64 1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"SampleProfile"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} + +; O2-HOT-LABEL: @hot_caller +; O2-HOT-NOT: call i32 @callee +; O2-HOT: call void @extern +; O2-HOT-NEXT: call void @extern +; O2-HOT-NEXT: ret +; O2-COLD-LABEL: @cold_caller +; O2-COLD-NOT: call i32 @callee +; O2-COLD: call void @extern +; O2-COLD-NEXT: call void @extern +; O2-COLD-NEXT: ret + +; ML-HOT-LABEL: @hot_caller +; ML-HOT-NEXT: call i32 @callee +; ML-COLD-LABEL: @cold_caller +; ML-COLD-NEXT: call i32 @callee \ No newline at end of file From 3a2f7d8a9f84db380af5122418098cb28a57443f Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Mon, 17 Jun 2024 16:19:21 -0500 Subject: [PATCH 15/25] Revert "Reland [mlir][Target] Improve ROCDL gpu serialization API" (#95847) Reverts llvm/llvm-project#95813 --- mlir/include/mlir/Target/LLVM/ROCDL/Utils.h | 41 +-- mlir/lib/Dialect/GPU/CMakeLists.txt | 2 +- mlir/lib/Target/LLVM/CMakeLists.txt | 13 +- mlir/lib/Target/LLVM/ROCDL/Target.cpp | 306 +++++++++----------- 4 files changed, 149 insertions(+), 213 deletions(-) diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h index 44c9ded317fa5..374fa65bd02e3 100644 --- a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h +++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h @@ -27,19 +27,6 @@ namespace ROCDL { /// 5. Returns an empty string. StringRef getROCMPath(); -/// Helper enum for specifying the AMD GCN device libraries required for -/// compilation. -enum class AMDGCNLibraries : uint32_t { - None = 0, - Ockl = 1, - Ocml = 2, - OpenCL = 4, - Hip = 8, - LastLib = Hip, - LLVM_MARK_AS_BITMASK_ENUM(LastLib), - All = (LastLib << 1) - 1 -}; - /// Base class for all ROCDL serializations from GPU modules into binary /// strings. By default this class serializes into LLVM bitcode. class SerializeGPUModuleBase : public LLVM::ModuleToObject { @@ -62,8 +49,8 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { /// Returns the bitcode files to be loaded. ArrayRef getFileList() const; - /// Appends standard ROCm device libraries to `fileList`. - LogicalResult appendStandardLibs(AMDGCNLibraries libs); + /// Appends standard ROCm device libraries like `ocml.bc`, `ockl.bc`, etc. + LogicalResult appendStandardLibs(); /// Loads the bitcode files in `fileList`. virtual std::optional>> @@ -76,20 +63,15 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { LogicalResult handleBitcodeFile(llvm::Module &module) override; protected: - /// Adds `oclc` control variables to the LLVM module. - void addControlVariables(llvm::Module &module, AMDGCNLibraries libs, - bool wave64, bool daz, bool finiteOnly, - bool unsafeMath, bool fastMath, bool correctSqrt, - StringRef abiVer); + /// Appends the paths of common ROCm device libraries to `libs`. + LogicalResult getCommonBitcodeLibs(llvm::SmallVector &libs, + SmallVector &libPath, + StringRef isaVersion); - /// Compiles assembly to a binary. - virtual std::optional> - compileToBinary(const std::string &serializedISA); - - /// Default implementation of `ModuleToObject::moduleToObject`. - std::optional> - moduleToObjectImpl(const gpu::TargetOptions &targetOptions, - llvm::Module &llvmModule); + /// Adds `oclc` control variables to the LLVM module. + void addControlVariables(llvm::Module &module, bool wave64, bool daz, + bool finiteOnly, bool unsafeMath, bool fastMath, + bool correctSqrt, StringRef abiVer); /// Returns the assembled ISA. std::optional> assembleIsa(StringRef isa); @@ -102,9 +84,6 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { /// List of LLVM bitcode files to link to. SmallVector fileList; - - /// AMD GCN libraries to use when linking, the default is using none. - AMDGCNLibraries deviceLibs = AMDGCNLibraries::None; }; } // namespace ROCDL } // namespace mlir diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index 08c8aea36fac9..61ab298ebfb98 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -106,7 +106,7 @@ if(MLIR_ENABLE_ROCM_CONVERSIONS) "Building mlir with ROCm support requires the AMDGPU backend") endif() - set(DEFAULT_ROCM_PATH "" CACHE PATH "Fallback path to search for ROCm installs") + set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs") target_compile_definitions(obj.MLIRGPUTransforms PRIVATE __DEFAULT_ROCM_PATH__="${DEFAULT_ROCM_PATH}" diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt index 6e146710d67af..5a3fa160850b4 100644 --- a/mlir/lib/Target/LLVM/CMakeLists.txt +++ b/mlir/lib/Target/LLVM/CMakeLists.txt @@ -123,18 +123,17 @@ add_mlir_dialect_library(MLIRROCDLTarget ) if(MLIR_ENABLE_ROCM_CONVERSIONS) + if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)) + message(SEND_ERROR + "Building mlir with ROCm support requires the AMDGPU backend") + endif() + if (DEFINED ROCM_PATH) set(DEFAULT_ROCM_PATH "${ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs") elseif(DEFINED ENV{ROCM_PATH}) set(DEFAULT_ROCM_PATH "$ENV{ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs") else() - IF (WIN32) - # Avoid setting an UNIX path for Windows. - # TODO: Eventually migrate to FindHIP once it becomes a part of CMake. - set(DEFAULT_ROCM_PATH "" CACHE PATH "Fallback path to search for ROCm installs") - else() - set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs") - endif() + set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs") endif() message(VERBOSE "MLIR Default ROCM toolkit path: ${DEFAULT_ROCM_PATH}") diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp index 6784f3668bde3..cc13e5b7436ea 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp @@ -17,6 +17,9 @@ #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/Support/FileUtilities.h" #include "mlir/Target/LLVM/ROCDL/Utils.h" +#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" #include "llvm/IR/Constants.h" @@ -109,9 +112,8 @@ SerializeGPUModuleBase::SerializeGPUModuleBase( if (auto file = dyn_cast(attr)) fileList.push_back(file.str()); - // By default add all libraries if the toolkit path is not empty. - if (!getToolkitPath().empty()) - deviceLibs = AMDGCNLibraries::All; + // Append standard ROCm device bitcode libraries to the files to be loaded. + (void)appendStandardLibs(); } void SerializeGPUModuleBase::init() { @@ -136,67 +138,29 @@ ArrayRef SerializeGPUModuleBase::getFileList() const { return fileList; } -LogicalResult SerializeGPUModuleBase::appendStandardLibs(AMDGCNLibraries libs) { - if (libs == AMDGCNLibraries::None) - return success(); +LogicalResult SerializeGPUModuleBase::appendStandardLibs() { StringRef pathRef = getToolkitPath(); - // Fail if the toolkit is empty. - if (pathRef.empty()) - return failure(); - - // Get the path for the device libraries - SmallString<256> path; - path.insert(path.begin(), pathRef.begin(), pathRef.end()); - llvm::sys::path::append(path, "amdgcn", "bitcode"); - pathRef = StringRef(path.data(), path.size()); - - // Fail if the path is invalid. - if (!llvm::sys::fs::is_directory(pathRef)) { - getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef - << " does not exist or is not a directory"; - return failure(); - } - - // Get the ISA version. - StringRef isaVersion = - llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip)); - isaVersion.consume_front("gfx"); - - // Helper function for adding a library. - auto addLib = [&](const Twine &lib) -> bool { - auto baseSize = path.size(); - llvm::sys::path::append(path, lib); - StringRef pathRef(path.data(), path.size()); - if (!llvm::sys::fs::is_regular_file(pathRef)) { - getOperation().emitRemark() << "bitcode library path: " << pathRef - << " does not exist or is not a file"; - return true; + if (!pathRef.empty()) { + SmallVector path; + path.insert(path.begin(), pathRef.begin(), pathRef.end()); + llvm::sys::path::append(path, "amdgcn", "bitcode"); + pathRef = StringRef(path.data(), path.size()); + if (!llvm::sys::fs::is_directory(pathRef)) { + getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef + << " does not exist or is not a directory."; + return failure(); } - fileList.push_back(pathRef.str()); - path.truncate(baseSize); - return false; - }; - - // Add ROCm device libraries. Fail if any of the libraries is not found, ie. - // if any of the `addLib` failed. - if ((any(libs & AMDGCNLibraries::Ocml) && addLib("ocml.bc")) || - (any(libs & AMDGCNLibraries::Ockl) && addLib("ockl.bc")) || - (any(libs & AMDGCNLibraries::Hip) && addLib("hip.bc")) || - (any(libs & AMDGCNLibraries::OpenCL) && addLib("opencl.bc")) || - (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl)) && - addLib("oclc_isa_version_" + isaVersion + ".bc"))) - return failure(); + StringRef isaVersion = + llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip)); + isaVersion.consume_front("gfx"); + return getCommonBitcodeLibs(fileList, path, isaVersion); + } return success(); } std::optional>> SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) { SmallVector> bcFiles; - // Return if there are no libs to load. - if (deviceLibs == AMDGCNLibraries::None && fileList.empty()) - return bcFiles; - if (failed(appendStandardLibs(deviceLibs))) - return std::nullopt; if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles, true))) return std::nullopt; @@ -210,76 +174,80 @@ LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) { // Stop spamming us with clang version numbers if (auto *ident = module.getNamedMetadata("llvm.ident")) module.eraseNamedMetadata(ident); - // Override the libModules datalayout and target triple with the compiler's - // data layout should there be a discrepency. - setDataLayoutAndTriple(module); return success(); } void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) { - // If all libraries are not set, traverse the module to determine which - // libraries are required. - if (deviceLibs != AMDGCNLibraries::All) { - for (llvm::Function &f : module.functions()) { - if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) { - StringRef funcName = f.getName(); - if ("printf" == funcName) - deviceLibs |= AMDGCNLibraries::OpenCL | AMDGCNLibraries::Ockl | - AMDGCNLibraries::Ocml; - if (funcName.starts_with("__ockl_")) - deviceLibs |= AMDGCNLibraries::Ockl; - if (funcName.starts_with("__ocml_")) - deviceLibs |= AMDGCNLibraries::Ocml; - } - } - } - addControlVariables(module, deviceLibs, target.hasWave64(), target.hasDaz(), + [[maybe_unused]] std::optional targetMachine = + getOrCreateTargetMachine(); + assert(targetMachine && "expect a TargetMachine"); + addControlVariables(module, target.hasWave64(), target.hasDaz(), target.hasFiniteOnly(), target.hasUnsafeMath(), target.hasFastMath(), target.hasCorrectSqrt(), target.getAbi()); } -void SerializeGPUModuleBase::addControlVariables( - llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz, - bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt, - StringRef abiVer) { - // Return if no device libraries are required. - if (libs == AMDGCNLibraries::None) - return; - // Helper function for adding control variables. - auto addControlVariable = [&module](StringRef name, uint32_t value, - uint32_t bitwidth) { - if (module.getNamedGlobal(name)) { - return; +// Get the paths of ROCm device libraries. +LogicalResult SerializeGPUModuleBase::getCommonBitcodeLibs( + llvm::SmallVector &libs, SmallVector &libPath, + StringRef isaVersion) { + auto addLib = [&](StringRef path) -> bool { + if (!llvm::sys::fs::is_regular_file(path)) { + getOperation().emitRemark() << "Bitcode library path: " << path + << " does not exist or is not a file.\n"; + return true; } - llvm::IntegerType *type = - llvm::IntegerType::getIntNTy(module.getContext(), bitwidth); + libs.push_back(path.str()); + return false; + }; + auto getLibPath = [&libPath](Twine lib) { + auto baseSize = libPath.size(); + llvm::sys::path::append(libPath, lib + ".bc"); + std::string path(StringRef(libPath.data(), libPath.size()).str()); + libPath.truncate(baseSize); + return path; + }; + + // Add ROCm device libraries. Fail if any of the libraries is not found. + if (addLib(getLibPath("ocml")) || addLib(getLibPath("ockl")) || + addLib(getLibPath("hip")) || addLib(getLibPath("opencl")) || + addLib(getLibPath("oclc_isa_version_" + isaVersion))) + return failure(); + return success(); +} + +void SerializeGPUModuleBase::addControlVariables( + llvm::Module &module, bool wave64, bool daz, bool finiteOnly, + bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer) { + llvm::Type *i8Ty = llvm::Type::getInt8Ty(module.getContext()); + auto addControlVariable = [i8Ty, &module](StringRef name, bool enable) { llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable( - module, /*isConstant=*/type, true, - llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, - llvm::ConstantInt::get(type, value), name, /*before=*/nullptr, - /*threadLocalMode=*/llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, - /*addressSpace=*/4); + module, i8Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, + llvm::ConstantInt::get(i8Ty, enable), name, nullptr, + llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4); controlVariable->setVisibility( llvm::GlobalValue::VisibilityTypes::ProtectedVisibility); - controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8)); + controlVariable->setAlignment(llvm::MaybeAlign(1)); controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); }; - // Add ocml related control variables. - if (any(libs & AMDGCNLibraries::Ocml)) { - addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath, 8); - addControlVariable("__oclc_daz_opt", daz || fastMath, 8); - addControlVariable("__oclc_correctly_rounded_sqrt32", - correctSqrt && !fastMath, 8); - addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath, 8); - } - // Add ocml or ockl related control variables. - if (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl))) { - addControlVariable("__oclc_wavefrontsize64", wave64, 8); - int abi = 500; - abiVer.getAsInteger(0, abi); - addControlVariable("__oclc_ABI_version", abi, 32); - } + addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath); + addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath); + addControlVariable("__oclc_daz_opt", daz || fastMath); + addControlVariable("__oclc_correctly_rounded_sqrt32", + correctSqrt && !fastMath); + addControlVariable("__oclc_wavefrontsize64", wave64); + + llvm::Type *i32Ty = llvm::Type::getInt32Ty(module.getContext()); + int abi = 500; + abiVer.getAsInteger(0, abi); + llvm::GlobalVariable *abiVersion = new llvm::GlobalVariable( + module, i32Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, + llvm::ConstantInt::get(i32Ty, abi), "__oclc_ABI_version", nullptr, + llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4); + abiVersion->setVisibility( + llvm::GlobalValue::VisibilityTypes::ProtectedVisibility); + abiVersion->setAlignment(llvm::MaybeAlign(4)); + abiVersion->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); } std::optional> @@ -344,16 +312,48 @@ SerializeGPUModuleBase::assembleIsa(StringRef isa) { parser->setTargetParser(*tap); parser->Run(false); + return result; } +#if MLIR_ENABLE_ROCM_CONVERSIONS +namespace { +class AMDGPUSerializer : public SerializeGPUModuleBase { +public: + AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, + const gpu::TargetOptions &targetOptions); + + gpu::GPUModuleOp getOperation(); + + // Compile to HSA. + std::optional> + compileToBinary(const std::string &serializedISA); + + std::optional> + moduleToObject(llvm::Module &llvmModule) override; + +private: + // Target options. + gpu::TargetOptions targetOptions; +}; +} // namespace + +AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, + const gpu::TargetOptions &targetOptions) + : SerializeGPUModuleBase(module, target, targetOptions), + targetOptions(targetOptions) {} + +gpu::GPUModuleOp AMDGPUSerializer::getOperation() { + return dyn_cast(&SerializeGPUModuleBase::getOperation()); +} + std::optional> -SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) { +AMDGPUSerializer::compileToBinary(const std::string &serializedISA) { // Assemble the ISA. std::optional> isaBinary = assembleIsa(serializedISA); if (!isaBinary) { - getOperation().emitError() << "failed during ISA assembling"; + getOperation().emitError() << "Failed during ISA assembling."; return std::nullopt; } @@ -363,7 +363,7 @@ SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) { if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempIsaBinaryFd, tempIsaBinaryFilename)) { getOperation().emitError() - << "failed to create a temporary file for dumping the ISA binary"; + << "Failed to create a temporary file for dumping the ISA binary."; return std::nullopt; } llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); @@ -378,7 +378,7 @@ SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) { if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFilename)) { getOperation().emitError() - << "failed to create a temporary file for the HSA code object"; + << "Failed to create a temporary file for the HSA code object."; return std::nullopt; } llvm::FileRemover cleanupHsaco(tempHsacoFilename); @@ -389,7 +389,7 @@ SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) { lldPath, {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename}); if (lldResult != 0) { - getOperation().emitError() << "lld invocation failed"; + getOperation().emitError() << "lld invocation failed."; return std::nullopt; } @@ -398,7 +398,7 @@ SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) { llvm::MemoryBuffer::getFile(tempHsacoFilename, /*IsText=*/false); if (!hsacoFile) { getOperation().emitError() - << "failed to read the HSA code object from the temp file"; + << "Failed to read the HSA code object from the temp file."; return std::nullopt; } @@ -407,13 +407,13 @@ SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) { return SmallVector(buffer.begin(), buffer.end()); } -std::optional> SerializeGPUModuleBase::moduleToObjectImpl( - const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule) { +std::optional> +AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { // Return LLVM IR if the compilation target is offload. #define DEBUG_TYPE "serialize-to-llvm" LLVM_DEBUG({ - llvm::dbgs() << "LLVM IR for module: " - << cast(getOperation()).getNameAttr() << "\n" + llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr() + << "\n" << llvmModule << "\n"; }); #undef DEBUG_TYPE @@ -423,8 +423,8 @@ std::optional> SerializeGPUModuleBase::moduleToObjectImpl( std::optional targetMachine = getOrCreateTargetMachine(); if (!targetMachine) { - getOperation().emitError() << "target Machine unavailable for triple " - << triple << ", can't compile with LLVM"; + getOperation().emitError() << "Target Machine unavailable for triple " + << triple << ", can't compile with LLVM\n"; return std::nullopt; } @@ -432,13 +432,12 @@ std::optional> SerializeGPUModuleBase::moduleToObjectImpl( std::optional serializedISA = translateToISA(llvmModule, **targetMachine); if (!serializedISA) { - getOperation().emitError() << "failed translating the module to ISA"; + getOperation().emitError() << "Failed translating the module to ISA."; return std::nullopt; } #define DEBUG_TYPE "serialize-to-isa" LLVM_DEBUG({ - llvm::dbgs() << "ISA for module: " - << cast(getOperation()).getNameAttr() << "\n" + llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n" << *serializedISA << "\n"; }); #undef DEBUG_TYPE @@ -446,45 +445,9 @@ std::optional> SerializeGPUModuleBase::moduleToObjectImpl( if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly) return SmallVector(serializedISA->begin(), serializedISA->end()); - // Compiling to binary requires a valid ROCm path, fail if it's not found. - if (getToolkitPath().empty()) - getOperation().emitError() << "invalid ROCm path, please set a valid path"; - // Compile to binary. return compileToBinary(*serializedISA); } - -#if MLIR_ENABLE_ROCM_CONVERSIONS -namespace { -class AMDGPUSerializer : public SerializeGPUModuleBase { -public: - AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, - const gpu::TargetOptions &targetOptions); - - gpu::GPUModuleOp getOperation(); - - std::optional> - moduleToObject(llvm::Module &llvmModule) override; - -private: - // Target options. - gpu::TargetOptions targetOptions; -}; -} // namespace - -AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, - const gpu::TargetOptions &targetOptions) - : SerializeGPUModuleBase(module, target, targetOptions), - targetOptions(targetOptions) {} - -gpu::GPUModuleOp AMDGPUSerializer::getOperation() { - return dyn_cast(&SerializeGPUModuleBase::getOperation()); -} - -std::optional> -AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { - return moduleToObjectImpl(targetOptions, llvmModule); -} #endif // MLIR_ENABLE_ROCM_CONVERSIONS std::optional> ROCDLTargetAttrImpl::serializeToObject( @@ -494,7 +457,7 @@ std::optional> ROCDLTargetAttrImpl::serializeToObject( if (!module) return std::nullopt; if (!mlir::isa(module)) { - module->emitError("module must be a GPU module"); + module->emitError("Module must be a GPU module."); return std::nullopt; } #if MLIR_ENABLE_ROCM_CONVERSIONS @@ -503,8 +466,8 @@ std::optional> ROCDLTargetAttrImpl::serializeToObject( serializer.init(); return serializer.run(); #else - module->emitError("the `AMDGPU` target was not built. Please enable it when " - "building LLVM"); + module->emitError("The `AMDGPU` target was not built. Please enable it when " + "building LLVM."); return std::nullopt; #endif // MLIR_ENABLE_ROCM_CONVERSIONS } @@ -514,15 +477,10 @@ ROCDLTargetAttrImpl::createObject(Attribute attribute, const SmallVector &object, const gpu::TargetOptions &options) const { gpu::CompilationTarget format = options.getCompilationTarget(); - // If format is `fatbin` transform it to binary as `fatbin` is not yet - // supported. - if (format > gpu::CompilationTarget::Binary) - format = gpu::CompilationTarget::Binary; - - DictionaryAttr properties{}; Builder builder(attribute.getContext()); return builder.getAttr( - attribute, format, - builder.getStringAttr(StringRef(object.data(), object.size())), - properties); + attribute, + format > gpu::CompilationTarget::Binary ? gpu::CompilationTarget::Binary + : format, + builder.getStringAttr(StringRef(object.data(), object.size())), nullptr); } From 1adf0fae0503fccd1216865dd0276c3ec97f62a7 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Mon, 17 Jun 2024 14:38:26 -0700 Subject: [PATCH 16/25] Revert "[HWASan] [compiler-rt] support non-4k pages on Android" (#95853) Reverts llvm/llvm-project#95069 Broke windows bot --- compiler-rt/lib/asan/asan_linux.cpp | 3 +-- compiler-rt/lib/asan/asan_mac.cpp | 3 +-- compiler-rt/lib/asan/asan_premap_shadow.cpp | 3 +-- .../lib/hwasan/hwasan_dynamic_shadow.cpp | 17 ++++++----------- compiler-rt/lib/memprof/memprof_linux.cpp | 3 +-- .../lib/sanitizer_common/sanitizer_common.h | 17 +++++++++-------- .../lib/sanitizer_common/sanitizer_linux.cpp | 4 ++-- .../sanitizer_linux_libcdep.cpp | 5 +++-- .../lib/sanitizer_common/sanitizer_mac.cpp | 4 ++-- .../lib/sanitizer_common/sanitizer_win.cpp | 5 +++-- 10 files changed, 29 insertions(+), 35 deletions(-) diff --git a/compiler-rt/lib/asan/asan_linux.cpp b/compiler-rt/lib/asan/asan_linux.cpp index 0b470db86748f..a517de5af00dc 100644 --- a/compiler-rt/lib/asan/asan_linux.cpp +++ b/compiler-rt/lib/asan/asan_linux.cpp @@ -93,8 +93,7 @@ uptr FindDynamicShadowStart() { # endif return MapDynamicShadow(shadow_size_bytes, ASAN_SHADOW_SCALE, - /*min_shadow_base_alignment*/ 0, kHighMemEnd, - GetMmapGranularity()); + /*min_shadow_base_alignment*/ 0, kHighMemEnd); } void AsanApplyToGlobals(globals_op_fptr op, const void *needle) { diff --git a/compiler-rt/lib/asan/asan_mac.cpp b/compiler-rt/lib/asan/asan_mac.cpp index bfc349223258b..b250f796e165f 100644 --- a/compiler-rt/lib/asan/asan_mac.cpp +++ b/compiler-rt/lib/asan/asan_mac.cpp @@ -51,8 +51,7 @@ bool IsSystemHeapAddress (uptr addr) { return false; } uptr FindDynamicShadowStart() { return MapDynamicShadow(MemToShadowSize(kHighMemEnd), ASAN_SHADOW_SCALE, - /*min_shadow_base_alignment*/ 0, kHighMemEnd, - GetMmapGranularity()); + /*min_shadow_base_alignment*/ 0, kHighMemEnd); } // No-op. Mac does not support static linkage anyway. diff --git a/compiler-rt/lib/asan/asan_premap_shadow.cpp b/compiler-rt/lib/asan/asan_premap_shadow.cpp index 6e08b8f966507..bed2f62a22511 100644 --- a/compiler-rt/lib/asan/asan_premap_shadow.cpp +++ b/compiler-rt/lib/asan/asan_premap_shadow.cpp @@ -33,8 +33,7 @@ uptr PremapShadowSize() { // PremapShadowSize() bytes on the right of it are mapped r/o. uptr PremapShadow() { return MapDynamicShadow(PremapShadowSize(), /*mmap_alignment_scale*/ 3, - /*min_shadow_base_alignment*/ 0, kHighMemEnd, - GetMmapGranularity()); + /*min_shadow_base_alignment*/ 0, kHighMemEnd); } bool PremapShadowFailed() { diff --git a/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp b/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp index 48bc3b631ac07..7642ba6c0bf08 100644 --- a/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp +++ b/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp @@ -36,20 +36,15 @@ decltype(__hwasan_shadow)* __hwasan_premap_shadow(); namespace __hwasan { -// We cannot call anything in libc here (see comment above), so we need to -// assume the biggest allowed page size. -// Android max page size is defined as 16k here: -// https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/page.h#41 -static constexpr uptr kMaxGranularity = 16384; - // Conservative upper limit. static uptr PremapShadowSize() { - return RoundUpTo(GetMaxVirtualAddress() >> kShadowScale, kMaxGranularity); + return RoundUpTo(GetMaxVirtualAddress() >> kShadowScale, + GetMmapGranularity()); } static uptr PremapShadow() { return MapDynamicShadow(PremapShadowSize(), kShadowScale, - kShadowBaseAlignment, kHighMemEnd, kMaxGranularity); + kShadowBaseAlignment, kHighMemEnd); } static bool IsPremapShadowAvailable() { @@ -61,7 +56,7 @@ static bool IsPremapShadowAvailable() { } static uptr FindPremappedShadowStart(uptr shadow_size_bytes) { - const uptr granularity = kMaxGranularity; + const uptr granularity = GetMmapGranularity(); const uptr shadow_start = reinterpret_cast(&__hwasan_shadow); const uptr premap_shadow_size = PremapShadowSize(); const uptr shadow_size = RoundUpTo(shadow_size_bytes, granularity); @@ -114,7 +109,7 @@ uptr FindDynamicShadowStart(uptr shadow_size_bytes) { if (IsPremapShadowAvailable()) return FindPremappedShadowStart(shadow_size_bytes); return MapDynamicShadow(shadow_size_bytes, kShadowScale, kShadowBaseAlignment, - kHighMemEnd, kMaxGranularity); + kHighMemEnd); } } // namespace __hwasan @@ -140,7 +135,7 @@ uptr FindDynamicShadowStart(uptr shadow_size_bytes) { RingBufferSize()); # endif return MapDynamicShadow(shadow_size_bytes, kShadowScale, kShadowBaseAlignment, - kHighMemEnd, GetMmapGranularity()); + kHighMemEnd); } } // namespace __hwasan diff --git a/compiler-rt/lib/memprof/memprof_linux.cpp b/compiler-rt/lib/memprof/memprof_linux.cpp index fbe5d250f840b..26a2b456b874e 100644 --- a/compiler-rt/lib/memprof/memprof_linux.cpp +++ b/compiler-rt/lib/memprof/memprof_linux.cpp @@ -58,8 +58,7 @@ void InitializePlatformExceptionHandlers() {} uptr FindDynamicShadowStart() { uptr shadow_size_bytes = MemToShadowSize(kHighMemEnd); return MapDynamicShadow(shadow_size_bytes, SHADOW_SCALE, - /*min_shadow_base_alignment*/ 0, kHighMemEnd, - GetMmapGranularity()); + /*min_shadow_base_alignment*/ 0, kHighMemEnd); } void *MemprofDlSymNext(const char *sym) { return dlsym(RTLD_NEXT, sym); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h index 2d1059140c303..c451fc962c529 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -60,10 +60,14 @@ inline int Verbosity() { return atomic_load(¤t_verbosity, memory_order_relaxed); } -#if SANITIZER_ANDROID && !defined(__aarch64__) -// 32-bit Android only has 4k pages. -inline uptr GetPageSize() { return 4096; } -inline uptr GetPageSizeCached() { return 4096; } +#if SANITIZER_ANDROID +inline uptr GetPageSize() { +// Android post-M sysconf(_SC_PAGESIZE) crashes if called from .preinit_array. + return 4096; +} +inline uptr GetPageSizeCached() { + return 4096; +} #else uptr GetPageSize(); extern uptr PageSizeCached; @@ -73,7 +77,6 @@ inline uptr GetPageSizeCached() { return PageSizeCached; } #endif - uptr GetMmapGranularity(); uptr GetMaxVirtualAddress(); uptr GetMaxUserVirtualAddress(); @@ -88,7 +91,6 @@ void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size, // Memory management void *MmapOrDie(uptr size, const char *mem_type, bool raw_report = false); - inline void *MmapOrDieQuietly(uptr size, const char *mem_type) { return MmapOrDie(size, mem_type, /*raw_report*/ true); } @@ -137,8 +139,7 @@ void UnmapFromTo(uptr from, uptr to); // shadow_size_bytes bytes on the right, which on linux is mapped no access. // The high_mem_end may be updated if the original shadow size doesn't fit. uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale, - uptr min_shadow_base_alignment, uptr &high_mem_end, - uptr granularity); + uptr min_shadow_base_alignment, uptr &high_mem_end); // Let S = max(shadow_size, num_aliases * alias_size, ring_buffer_size). // Reserves 2*S bytes of address space to the right of the returned address and diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index d15caa76efb06..5d2dd3a7a658f 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -1136,7 +1136,7 @@ uptr GetMaxUserVirtualAddress() { return addr; } -# if !SANITIZER_ANDROID || defined(__aarch64__) +# if !SANITIZER_ANDROID uptr GetPageSize() { # if SANITIZER_LINUX && (defined(__x86_64__) || defined(__i386__)) && \ defined(EXEC_PAGESIZE) @@ -1155,7 +1155,7 @@ uptr GetPageSize() { return sysconf(_SC_PAGESIZE); // EXEC_PAGESIZE may not be trustworthy. # endif } -# endif +# endif // !SANITIZER_ANDROID uptr ReadBinaryName(/*out*/ char *buf, uptr buf_len) { # if SANITIZER_SOLARIS diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp index 175362183fd78..6d05411222d9e 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -995,8 +995,9 @@ void UnmapFromTo(uptr from, uptr to) { } uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale, - uptr min_shadow_base_alignment, UNUSED uptr &high_mem_end, - uptr granularity) { + uptr min_shadow_base_alignment, + UNUSED uptr &high_mem_end) { + const uptr granularity = GetMmapGranularity(); const uptr alignment = Max(granularity << shadow_scale, 1ULL << min_shadow_base_alignment); const uptr left_padding = diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp index cbdf3e95925bf..24e3d1112520e 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp @@ -1188,8 +1188,8 @@ uptr GetMaxVirtualAddress() { } uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale, - uptr min_shadow_base_alignment, uptr &high_mem_end, - uptr granularity) { + uptr min_shadow_base_alignment, uptr &high_mem_end) { + const uptr granularity = GetMmapGranularity(); const uptr alignment = Max(granularity << shadow_scale, 1ULL << min_shadow_base_alignment); const uptr left_padding = diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp index 0b198890fc798..4e5ad8e4693b4 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp @@ -384,8 +384,9 @@ bool DontDumpShadowMemory(uptr addr, uptr length) { } uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale, - uptr min_shadow_base_alignment, UNUSED uptr &high_mem_end, - uptr granularity) { + uptr min_shadow_base_alignment, + UNUSED uptr &high_mem_end) { + const uptr granularity = GetMmapGranularity(); const uptr alignment = Max(granularity << shadow_scale, 1ULL << min_shadow_base_alignment); const uptr left_padding = From 6b54ecca885f58cd9449535a8678d93144c62e4d Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Mon, 17 Jun 2024 14:37:24 -0700 Subject: [PATCH 17/25] [mlgo] remove `inlining_default` - unused feature The feature was only exposed for training and was immediately dropped on the training side. It was bulk-copied into the test model generator, where it had no effect (the generator always returns a constant). In the AOT + test model case, since the test model returns a constant, all input features are pruned by the AOT compiler, so its presence/absence doesn't matter. --- llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp | 1 - llvm/lib/Analysis/models/gen-inline-oz-test-model.py | 1 - 2 files changed, 2 deletions(-) diff --git a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp index 7d51302bcc1ad..3ab0af8d57da1 100644 --- a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp +++ b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp @@ -253,7 +253,6 @@ class LoggingMLInlineAdvice : public MLInlineAdvice { }; static const std::vector TrainingOnlyFeatures{ - TensorSpec::createSpec(TFFeedPrefix + "inlining_default", {1}), TensorSpec::createSpec(TFFeedPrefix + "discount", {1}), TensorSpec::createSpec(TFFeedPrefix + "reward", {1}), TensorSpec::createSpec(TFFeedPrefix + "step_type", {1})}; diff --git a/llvm/lib/Analysis/models/gen-inline-oz-test-model.py b/llvm/lib/Analysis/models/gen-inline-oz-test-model.py index 83055890283e8..3846e7a3cee76 100644 --- a/llvm/lib/Analysis/models/gen-inline-oz-test-model.py +++ b/llvm/lib/Analysis/models/gen-inline-oz-test-model.py @@ -47,7 +47,6 @@ def get_input_signature(): "edge_count", "callsite_height", "cost_estimate", - "inlining_default", "sroa_savings", "sroa_losses", "load_elimination", From ca22469a101e73fd2b2057d98d3f880f064764e5 Mon Sep 17 00:00:00 2001 From: lntue <35648136+lntue@users.noreply.github.com> Date: Mon, 17 Jun 2024 17:56:45 -0400 Subject: [PATCH 18/25] [libc][stdlib] Run freelist_heap_test only in full build mode. (#95850) --- libc/test/src/stdlib/CMakeLists.txt | 30 +++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt index 648404afb5730..0ded674ee0e12 100644 --- a/libc/test/src/stdlib/CMakeLists.txt +++ b/libc/test/src/stdlib/CMakeLists.txt @@ -79,20 +79,22 @@ add_libc_test( libc.src.__support.CPP.span ) -add_libc_test( - freelist_heap_test - SUITE - libc-stdlib-tests - SRCS - freelist_heap_test.cpp - freelist_malloc_test.cpp - DEPENDS - libc.src.__support.CPP.span - libc.src.stdlib.freelist_heap - libc.src.stdlib.malloc - libc.src.string.memcmp - libc.src.string.memcpy -) +if(LLVM_LIBC_FULL_BUILD) + add_libc_test( + freelist_heap_test + SUITE + libc-stdlib-tests + SRCS + freelist_heap_test.cpp + freelist_malloc_test.cpp + DEPENDS + libc.src.__support.CPP.span + libc.src.stdlib.freelist_heap + libc.src.stdlib.malloc + libc.src.string.memcmp + libc.src.string.memcpy + ) +endif() add_fp_unittest( strtod_test From ca1a96364a568be6a2e65e6f12328b23dd35bafc Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Mon, 17 Jun 2024 15:06:15 -0700 Subject: [PATCH 19/25] [mlir][drr] Fix variadic destination emission (#95855) Its possible for handleResultPattern to emit helpers, these helpers cannot be interleaved with pushing into the array. Emit into a separate string to enable helpers to be emitted before the population of vector. Signed-off-by: Jacques Pienaar --- mlir/test/mlir-tblgen/rewriter-static-matcher.td | 15 +++++++++++++++ mlir/tools/mlir-tblgen/RewriterGen.cpp | 15 +++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/mlir/test/mlir-tblgen/rewriter-static-matcher.td b/mlir/test/mlir-tblgen/rewriter-static-matcher.td index 2907923cb28e4..7a84dfd5706b1 100644 --- a/mlir/test/mlir-tblgen/rewriter-static-matcher.td +++ b/mlir/test/mlir-tblgen/rewriter-static-matcher.td @@ -35,6 +35,16 @@ def COp : NS_Op<"c_op", []> { let results = (outs AnyInteger); } +def DOp : NS_Op<"d_op", []> { + let arguments = (ins + Variadic:$any_integer + ); + + let results = (outs AnyInteger); +} + +def Foo : NativeCodeCall<"foo($_builder, $0)">; + // Test static matcher for duplicate DagNode // --- @@ -53,3 +63,8 @@ def : Pat<(AOp (BOp I32Attr:$attr, I32:$int)), // CHECK: if(::mlir::failed([[$DAG_MATCHER]](rewriter, op1, tblgen_ops def : Pat<(COp $_, (BOp I32Attr:$attr, I32:$int)), (COp $attr, $int)>; + +// CHECK: auto [[$VAR:.*]] = foo( +// CHECK: ::llvm::SmallVector<::mlir::Value, 4> [[$ARR:tblgen_variadic_values_.*]]; +// CHECK: [[$ARR]].push_back([[$VAR]]); +def : Pat<(AOp $x), (DOp (variadic (Foo $x)))>; diff --git a/mlir/tools/mlir-tblgen/RewriterGen.cpp b/mlir/tools/mlir-tblgen/RewriterGen.cpp index d8e16d98fd756..b8a3d5b49938e 100644 --- a/mlir/tools/mlir-tblgen/RewriterGen.cpp +++ b/mlir/tools/mlir-tblgen/RewriterGen.cpp @@ -1261,20 +1261,23 @@ std::string PatternEmitter::handleResultPattern(DagNode resultTree, std::string PatternEmitter::handleVariadic(DagNode tree, int depth) { assert(tree.isVariadic()); + std::string output; + llvm::raw_string_ostream oss(output); auto name = std::string(formatv("tblgen_variadic_values_{0}", nextValueId++)); symbolInfoMap.bindValue(name); - os << "::llvm::SmallVector<::mlir::Value, 4> " << name << ";\n"; + oss << "::llvm::SmallVector<::mlir::Value, 4> " << name << ";\n"; for (int i = 0, e = tree.getNumArgs(); i != e; ++i) { if (auto child = tree.getArgAsNestedDag(i)) { - os << name << ".push_back(" << handleResultPattern(child, i, depth + 1) - << ");\n"; + oss << name << ".push_back(" << handleResultPattern(child, i, depth + 1) + << ");\n"; } else { - os << name << ".push_back(" - << handleOpArgument(tree.getArgAsLeaf(i), tree.getArgName(i)) - << ");\n"; + oss << name << ".push_back(" + << handleOpArgument(tree.getArgAsLeaf(i), tree.getArgName(i)) + << ");\n"; } } + os << oss.str(); return name; } From 6c17f1cabdee3399feceb478921a8369bde18b16 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Mon, 17 Jun 2024 10:54:46 -0700 Subject: [PATCH 20/25] Fix comments in ValueObjectPrinter.h (NFC) --- .../lldb/DataFormatters/ValueObjectPrinter.h | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/lldb/include/lldb/DataFormatters/ValueObjectPrinter.h b/lldb/include/lldb/DataFormatters/ValueObjectPrinter.h index 32b101a2f9843..fb5d60ba30d77 100644 --- a/lldb/include/lldb/DataFormatters/ValueObjectPrinter.h +++ b/lldb/include/lldb/DataFormatters/ValueObjectPrinter.h @@ -1,5 +1,4 @@ -//===-- ValueObjectPrinter.h ---------------------------------------*- C++ -//-*-===// +//===-- ValueObjectPrinter.h ------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -25,7 +24,7 @@ class ValueObjectPrinter { /// does not retain the ValueObject it is printing, that is the job of /// its caller. It also doesn't attempt to track changes in the /// ValueObject, e.g. changing synthetic child providers or changing - /// dynamic vrs. static vrs. synthetic settings. + /// dynamic versus static versus synthetic settings. public: ValueObjectPrinter(ValueObject &valobj, Stream *s); @@ -42,16 +41,16 @@ class ValueObjectPrinter { InstancePointersSetSP m_printed_instance_pointers; - // only this class (and subclasses, if any) should ever be concerned with the - // depth mechanism + /// Only this class (and subclasses, if any) should ever be + /// concerned with the depth mechanism. ValueObjectPrinter(ValueObject &valobj, Stream *s, const DumpValueObjectOptions &options, const DumpValueObjectOptions::PointerDepth &ptr_depth, uint32_t curr_depth, InstancePointersSetSP printed_instance_pointers); - // we should actually be using delegating constructors here but some versions - // of GCC still have trouble with those + /// Ee should actually be using delegating constructors here but + /// some versions of GCC still have trouble with those. void Init(ValueObject &valobj, Stream *s, const DumpValueObjectOptions &options, const DumpValueObjectOptions::PointerDepth &ptr_depth, @@ -67,7 +66,7 @@ class ValueObjectPrinter { /// use dynamic and use synthetic settings of the ValueObject being printed, /// so changes made to these settings won't affect already made /// ValueObjectPrinters. SetupMostSpecializedValue(); - + /// /// Access the cached "most specialized value" - that is the one to use for /// printing the value object's value. However, be sure to use /// GetValueForChildGeneration when you are generating the children of this @@ -143,9 +142,9 @@ class ValueObjectPrinter { bool ShouldShowName() const; ValueObject &m_orig_valobj; - ValueObject *m_cached_valobj; /// Cache the current "most specialized" value. - /// Don't use this directly, use - /// GetMostSpecializedValue. + /// Cache the current "most specialized" value. Don't use this + /// directly, use GetMostSpecializedValue. + ValueObject *m_cached_valobj; Stream *m_stream; DumpValueObjectOptions m_options; Flags m_type_flags; From 68fc8dffe4669fed1f7eb88e6770509505f9960a Mon Sep 17 00:00:00 2001 From: shaw young <58664393+shawbyoung@users.noreply.github.com> Date: Mon, 17 Jun 2024 13:27:56 -0700 Subject: [PATCH 21/25] [BOLT] Drop high discrepancy profiles in matching (#95156) Summary: Functions with high discrepancy (measured by matched function blocks) can be ignored with an added command line argument for better performance. Test Plan: Added stale-matching-min-matched-block.test --------- Co-authored-by: Amir Ayupov --- bolt/docs/CommandLineArgumentReference.md | 7 ++- bolt/lib/Profile/StaleProfileMatching.cpp | 30 +++++++--- ...arge_profile_stale_low_matched_blocks.yaml | 57 +++++++++++++++++++ .../X86/stale-matching-min-matched-block.test | 10 ++++ 4 files changed, 96 insertions(+), 8 deletions(-) create mode 100644 bolt/test/X86/Inputs/blarge_profile_stale_low_matched_blocks.yaml create mode 100644 bolt/test/X86/stale-matching-min-matched-block.test diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md index 8887d1f5d5bd4..49e226513028f 100644 --- a/bolt/docs/CommandLineArgumentReference.md +++ b/bolt/docs/CommandLineArgumentReference.md @@ -802,6 +802,11 @@ The maximum size of a function to consider for inference. +- `--stale-matching-min-matched-block=` + + Minimum percent of exact match block for a function to be considered for + profile inference. + - `--stale-threshold=` Maximum percentage of stale functions to tolerate (default: 100) @@ -1161,4 +1166,4 @@ - `--print-options` - Print non-default options after command line parsing \ No newline at end of file + Print non-default options after command line parsing diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp index 365bc5389266d..c9bcd04281e72 100644 --- a/bolt/lib/Profile/StaleProfileMatching.cpp +++ b/bolt/lib/Profile/StaleProfileMatching.cpp @@ -51,6 +51,12 @@ cl::opt cl::desc("Infer counts from stale profile data."), cl::init(false), cl::Hidden, cl::cat(BoltOptCategory)); +cl::opt StaleMatchingMinMatchedBlock( + "stale-matching-min-matched-block", + cl::desc("Percentage threshold of matched basic blocks at which stale " + "profile inference is executed."), + cl::init(0), cl::Hidden, cl::cat(BoltOptCategory)); + cl::opt StaleMatchingMaxFuncSize( "stale-matching-max-func-size", cl::desc("The maximum size of a function to consider for inference."), @@ -391,10 +397,9 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) { /// of the basic blocks in the binary, the count is "matched" to the block. /// Similarly, if both the source and the target of a count in the profile are /// matched to a jump in the binary, the count is recorded in CFG. -void matchWeightsByHashes(BinaryContext &BC, - const BinaryFunction::BasicBlockOrderType &BlockOrder, - const yaml::bolt::BinaryFunctionProfile &YamlBF, - FlowFunction &Func) { +size_t matchWeightsByHashes( + BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder, + const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func) { assert(Func.Blocks.size() == BlockOrder.size() + 1); std::vector Blocks; @@ -500,6 +505,8 @@ void matchWeightsByHashes(BinaryContext &BC, Block.HasUnknownWeight = false; Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]); } + + return MatchedBlocks.size(); } /// The function finds all blocks that are (i) reachable from the Entry block @@ -575,10 +582,16 @@ void preprocessUnreachableBlocks(FlowFunction &Func) { /// Decide if stale profile matching can be applied for a given function. /// Currently we skip inference for (very) large instances and for instances /// having "unexpected" control flow (e.g., having no sink basic blocks). -bool canApplyInference(const FlowFunction &Func) { +bool canApplyInference(const FlowFunction &Func, + const yaml::bolt::BinaryFunctionProfile &YamlBF, + const uint64_t &MatchedBlocks) { if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize) return false; + if (MatchedBlocks * 100 < + opts::StaleMatchingMinMatchedBlock * YamlBF.Blocks.size()) + return false; + bool HasExitBlocks = llvm::any_of( Func.Blocks, [&](const FlowBlock &Block) { return Block.isExit(); }); if (!HasExitBlocks) @@ -725,18 +738,21 @@ bool YAMLProfileReader::inferStaleProfile( const BinaryFunction::BasicBlockOrderType BlockOrder( BF.getLayout().block_begin(), BF.getLayout().block_end()); + // Tracks the number of matched blocks. + // Create a wrapper flow function to use with the profile inference algorithm. FlowFunction Func = createFlowFunction(BlockOrder); // Match as many block/jump counts from the stale profile as possible - matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func); + size_t MatchedBlocks = + matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func); // Adjust the flow function by marking unreachable blocks Unlikely so that // they don't get any counts assigned. preprocessUnreachableBlocks(Func); // Check if profile inference can be applied for the instance. - if (!canApplyInference(Func)) + if (!canApplyInference(Func, YamlBF, MatchedBlocks)) return false; // Apply the profile inference algorithm. diff --git a/bolt/test/X86/Inputs/blarge_profile_stale_low_matched_blocks.yaml b/bolt/test/X86/Inputs/blarge_profile_stale_low_matched_blocks.yaml new file mode 100644 index 0000000000000..785e23922ce49 --- /dev/null +++ b/bolt/test/X86/Inputs/blarge_profile_stale_low_matched_blocks.yaml @@ -0,0 +1,57 @@ +--- +header: + profile-version: 1 + binary-name: 'reader-yaml.test.tmp.exe' + binary-build-id: '' + profile-flags: [ lbr ] + profile-origin: branch profile reader + profile-events: '' + dfs-order: false + hash-func: xxh3 +functions: + - name: SolveCubic + fid: 6 + hash: 0x0000000000000000 + exec: 151 + nblocks: 18 + blocks: + - bid: 0 + insns: 43 + hash: 0x4600940a609c0000 + exec: 151 + succ: [ { bid: 1, cnt: 151, mis: 2 }, { bid: 7, cnt: 0 } ] + - bid: 1 + insns: 7 + hash: 0x167a1f084f130088 + succ: [ { bid: 13, cnt: 151 }, { bid: 2, cnt: 0 } ] + - bid: 13 + insns: 26 + hash: 0xa8d50000f81902a7 + succ: [ { bid: 3, cnt: 89 }, { bid: 2, cnt: 10 } ] + - bid: 3 + insns: 9 + hash: 0xc516000073dc00a0 + succ: [ { bid: 5, cnt: 151 } ] + - bid: 5 + insns: 9 + hash: 0x6446e1ea500111 + - name: usqrt + fid: 7 + hash: 0x0000000000000000 + exec: 20 + nblocks: 6 + blocks: + - bid: 0 + insns: 4 + hash: 0x0000000000000001 + exec: 20 + succ: [ { bid: 1, cnt: 0 } ] + - bid: 1 + insns: 9 + hash: 0x0000000000000001 + succ: [ { bid: 3, cnt: 320, mis: 171 }, { bid: 2, cnt: 0 } ] + - bid: 3 + insns: 2 + hash: 0x0000000000000001 + succ: [ { bid: 1, cnt: 300, mis: 33 }, { bid: 4, cnt: 20 } ] +... diff --git a/bolt/test/X86/stale-matching-min-matched-block.test b/bolt/test/X86/stale-matching-min-matched-block.test new file mode 100644 index 0000000000000..383d4d7dcb9d2 --- /dev/null +++ b/bolt/test/X86/stale-matching-min-matched-block.test @@ -0,0 +1,10 @@ +## This script checks the stale-matching-min-matched-block flag. + +RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe + +## Testing "usqrt" +RUN: llvm-bolt %t.exe -o %t.null --b %p/Inputs/blarge_profile_stale_low_matched_blocks.yaml \ +RUN: --infer-stale-profile=1 --stale-matching-min-matched-block=75 \ +RUN: --profile-ignore-hash=1 --debug-only=bolt-prof 2>&1 | FileCheck %s + +CHECK: BOLT-INFO: inferred profile for 1 (50.00% of profiled, 50.00% of stale) functions responsible for 46.31% samples (552 out of 1192) From 0041582b6ca137ad04e26985a87a1ae45fb0f0bb Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 17 Jun 2024 14:52:40 -0700 Subject: [PATCH 22/25] [lldb] Fix Python interpreter workaround (attempt #2) On macOS, to make DYLD_INSERT_LIBRARIES and the Python shim work together, we have a workaroud that copies the "real" Python interpreter into the build directory. This doesn't work when running in a virtual environment, as the copied interpreter cannot find the packages installed in the virtual environment relative to itself. Address this issue by copying the Python interpreter into the virtual environment's `bin` folder, rather than the build folder, when the test suite detects that it's being run inside a virtual environment. I'm not thrilled about this solution because it puts a file outside the build directory. However, given virtual environments are considered disposable, this seems reasonable. --- lldb/test/API/lit.cfg.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py index 48c5f49e78e01..6d45508ccb916 100644 --- a/lldb/test/API/lit.cfg.py +++ b/lldb/test/API/lit.cfg.py @@ -63,13 +63,14 @@ def find_python_interpreter(): if "DYLD_INSERT_LIBRARIES" not in config.environment: return None - # If we're running in a virtual environment, we already have a copy of the - # Python executable. + # If we're running in a virtual environment, we have to copy Python into + # the virtual environment for it to work. if sys.prefix != sys.base_prefix: - return None + copied_python = os.path.join(sys.prefix, "bin", "copied-python") + else: + copied_python = os.path.join(config.lldb_build_directory, "copied-python") # Avoid doing any work if we already copied the binary. - copied_python = os.path.join(config.lldb_build_directory, "copied-python") if os.path.isfile(copied_python): return copied_python From cd48335a8a5e97c4a4942fb4f94a511da3eb60f4 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Mon, 17 Jun 2024 15:19:24 -0700 Subject: [PATCH 23/25] [docs][Security Group] Update Apple representation. (#95491) Remove Kate; Apple is now represented by Oliver (and myself). --- llvm/docs/Security.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/docs/Security.rst b/llvm/docs/Security.rst index 91b762719138d..9bd2b1d435fd0 100644 --- a/llvm/docs/Security.rst +++ b/llvm/docs/Security.rst @@ -44,7 +44,6 @@ username for an individual isn't available, the brackets will be empty. * Ed Maste (individual; FreeBSD) [@emaste] * George Burgess IV (Google) [@gburgessiv] * Josh Stone (Red Hat; Rust) [@cuviper] -* Kate McInnes (Apple) [] * Kristof Beyls (ARM) [@kbeyls] * Matthew Riley (Google) [@mmdriley] * Nikhil Gupta (Nvidia) [] From c6049e67efaaca34ca8ad93b007397b118574b81 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Mon, 17 Jun 2024 15:04:57 -0700 Subject: [PATCH 24/25] Reapply "[HWASan] [compiler-rt] support non-4k pages on Android" (#95853) Updated MapDynamicShadow callsite in asan_win. --- compiler-rt/lib/asan/asan_linux.cpp | 3 ++- compiler-rt/lib/asan/asan_mac.cpp | 3 ++- compiler-rt/lib/asan/asan_premap_shadow.cpp | 3 ++- compiler-rt/lib/asan/asan_win.cpp | 3 ++- .../lib/hwasan/hwasan_dynamic_shadow.cpp | 17 +++++++++++------ compiler-rt/lib/memprof/memprof_linux.cpp | 3 ++- .../lib/sanitizer_common/sanitizer_common.h | 17 ++++++++--------- .../lib/sanitizer_common/sanitizer_linux.cpp | 4 ++-- .../sanitizer_linux_libcdep.cpp | 5 ++--- .../lib/sanitizer_common/sanitizer_mac.cpp | 4 ++-- .../lib/sanitizer_common/sanitizer_win.cpp | 5 ++--- 11 files changed, 37 insertions(+), 30 deletions(-) diff --git a/compiler-rt/lib/asan/asan_linux.cpp b/compiler-rt/lib/asan/asan_linux.cpp index a517de5af00dc..0b470db86748f 100644 --- a/compiler-rt/lib/asan/asan_linux.cpp +++ b/compiler-rt/lib/asan/asan_linux.cpp @@ -93,7 +93,8 @@ uptr FindDynamicShadowStart() { # endif return MapDynamicShadow(shadow_size_bytes, ASAN_SHADOW_SCALE, - /*min_shadow_base_alignment*/ 0, kHighMemEnd); + /*min_shadow_base_alignment*/ 0, kHighMemEnd, + GetMmapGranularity()); } void AsanApplyToGlobals(globals_op_fptr op, const void *needle) { diff --git a/compiler-rt/lib/asan/asan_mac.cpp b/compiler-rt/lib/asan/asan_mac.cpp index b250f796e165f..bfc349223258b 100644 --- a/compiler-rt/lib/asan/asan_mac.cpp +++ b/compiler-rt/lib/asan/asan_mac.cpp @@ -51,7 +51,8 @@ bool IsSystemHeapAddress (uptr addr) { return false; } uptr FindDynamicShadowStart() { return MapDynamicShadow(MemToShadowSize(kHighMemEnd), ASAN_SHADOW_SCALE, - /*min_shadow_base_alignment*/ 0, kHighMemEnd); + /*min_shadow_base_alignment*/ 0, kHighMemEnd, + GetMmapGranularity()); } // No-op. Mac does not support static linkage anyway. diff --git a/compiler-rt/lib/asan/asan_premap_shadow.cpp b/compiler-rt/lib/asan/asan_premap_shadow.cpp index bed2f62a22511..6e08b8f966507 100644 --- a/compiler-rt/lib/asan/asan_premap_shadow.cpp +++ b/compiler-rt/lib/asan/asan_premap_shadow.cpp @@ -33,7 +33,8 @@ uptr PremapShadowSize() { // PremapShadowSize() bytes on the right of it are mapped r/o. uptr PremapShadow() { return MapDynamicShadow(PremapShadowSize(), /*mmap_alignment_scale*/ 3, - /*min_shadow_base_alignment*/ 0, kHighMemEnd); + /*min_shadow_base_alignment*/ 0, kHighMemEnd, + GetMmapGranularity()); } bool PremapShadowFailed() { diff --git a/compiler-rt/lib/asan/asan_win.cpp b/compiler-rt/lib/asan/asan_win.cpp index cda1f7a91e140..09a13b11cff1f 100644 --- a/compiler-rt/lib/asan/asan_win.cpp +++ b/compiler-rt/lib/asan/asan_win.cpp @@ -268,7 +268,8 @@ void PlatformTSDDtor(void *tsd) { AsanThread::TSDDtor(tsd); } // ---------------------- Various stuff ---------------- {{{ uptr FindDynamicShadowStart() { return MapDynamicShadow(MemToShadowSize(kHighMemEnd), ASAN_SHADOW_SCALE, - /*min_shadow_base_alignment*/ 0, kHighMemEnd); + /*min_shadow_base_alignment*/ 0, kHighMemEnd, + GetMmapGranularity()); } void AsanCheckDynamicRTPrereqs() {} diff --git a/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp b/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp index 7642ba6c0bf08..48bc3b631ac07 100644 --- a/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp +++ b/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp @@ -36,15 +36,20 @@ decltype(__hwasan_shadow)* __hwasan_premap_shadow(); namespace __hwasan { +// We cannot call anything in libc here (see comment above), so we need to +// assume the biggest allowed page size. +// Android max page size is defined as 16k here: +// https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/page.h#41 +static constexpr uptr kMaxGranularity = 16384; + // Conservative upper limit. static uptr PremapShadowSize() { - return RoundUpTo(GetMaxVirtualAddress() >> kShadowScale, - GetMmapGranularity()); + return RoundUpTo(GetMaxVirtualAddress() >> kShadowScale, kMaxGranularity); } static uptr PremapShadow() { return MapDynamicShadow(PremapShadowSize(), kShadowScale, - kShadowBaseAlignment, kHighMemEnd); + kShadowBaseAlignment, kHighMemEnd, kMaxGranularity); } static bool IsPremapShadowAvailable() { @@ -56,7 +61,7 @@ static bool IsPremapShadowAvailable() { } static uptr FindPremappedShadowStart(uptr shadow_size_bytes) { - const uptr granularity = GetMmapGranularity(); + const uptr granularity = kMaxGranularity; const uptr shadow_start = reinterpret_cast(&__hwasan_shadow); const uptr premap_shadow_size = PremapShadowSize(); const uptr shadow_size = RoundUpTo(shadow_size_bytes, granularity); @@ -109,7 +114,7 @@ uptr FindDynamicShadowStart(uptr shadow_size_bytes) { if (IsPremapShadowAvailable()) return FindPremappedShadowStart(shadow_size_bytes); return MapDynamicShadow(shadow_size_bytes, kShadowScale, kShadowBaseAlignment, - kHighMemEnd); + kHighMemEnd, kMaxGranularity); } } // namespace __hwasan @@ -135,7 +140,7 @@ uptr FindDynamicShadowStart(uptr shadow_size_bytes) { RingBufferSize()); # endif return MapDynamicShadow(shadow_size_bytes, kShadowScale, kShadowBaseAlignment, - kHighMemEnd); + kHighMemEnd, GetMmapGranularity()); } } // namespace __hwasan diff --git a/compiler-rt/lib/memprof/memprof_linux.cpp b/compiler-rt/lib/memprof/memprof_linux.cpp index 26a2b456b874e..fbe5d250f840b 100644 --- a/compiler-rt/lib/memprof/memprof_linux.cpp +++ b/compiler-rt/lib/memprof/memprof_linux.cpp @@ -58,7 +58,8 @@ void InitializePlatformExceptionHandlers() {} uptr FindDynamicShadowStart() { uptr shadow_size_bytes = MemToShadowSize(kHighMemEnd); return MapDynamicShadow(shadow_size_bytes, SHADOW_SCALE, - /*min_shadow_base_alignment*/ 0, kHighMemEnd); + /*min_shadow_base_alignment*/ 0, kHighMemEnd, + GetMmapGranularity()); } void *MemprofDlSymNext(const char *sym) { return dlsym(RTLD_NEXT, sym); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h index c451fc962c529..2d1059140c303 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -60,14 +60,10 @@ inline int Verbosity() { return atomic_load(¤t_verbosity, memory_order_relaxed); } -#if SANITIZER_ANDROID -inline uptr GetPageSize() { -// Android post-M sysconf(_SC_PAGESIZE) crashes if called from .preinit_array. - return 4096; -} -inline uptr GetPageSizeCached() { - return 4096; -} +#if SANITIZER_ANDROID && !defined(__aarch64__) +// 32-bit Android only has 4k pages. +inline uptr GetPageSize() { return 4096; } +inline uptr GetPageSizeCached() { return 4096; } #else uptr GetPageSize(); extern uptr PageSizeCached; @@ -77,6 +73,7 @@ inline uptr GetPageSizeCached() { return PageSizeCached; } #endif + uptr GetMmapGranularity(); uptr GetMaxVirtualAddress(); uptr GetMaxUserVirtualAddress(); @@ -91,6 +88,7 @@ void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size, // Memory management void *MmapOrDie(uptr size, const char *mem_type, bool raw_report = false); + inline void *MmapOrDieQuietly(uptr size, const char *mem_type) { return MmapOrDie(size, mem_type, /*raw_report*/ true); } @@ -139,7 +137,8 @@ void UnmapFromTo(uptr from, uptr to); // shadow_size_bytes bytes on the right, which on linux is mapped no access. // The high_mem_end may be updated if the original shadow size doesn't fit. uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale, - uptr min_shadow_base_alignment, uptr &high_mem_end); + uptr min_shadow_base_alignment, uptr &high_mem_end, + uptr granularity); // Let S = max(shadow_size, num_aliases * alias_size, ring_buffer_size). // Reserves 2*S bytes of address space to the right of the returned address and diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 5d2dd3a7a658f..d15caa76efb06 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -1136,7 +1136,7 @@ uptr GetMaxUserVirtualAddress() { return addr; } -# if !SANITIZER_ANDROID +# if !SANITIZER_ANDROID || defined(__aarch64__) uptr GetPageSize() { # if SANITIZER_LINUX && (defined(__x86_64__) || defined(__i386__)) && \ defined(EXEC_PAGESIZE) @@ -1155,7 +1155,7 @@ uptr GetPageSize() { return sysconf(_SC_PAGESIZE); // EXEC_PAGESIZE may not be trustworthy. # endif } -# endif // !SANITIZER_ANDROID +# endif uptr ReadBinaryName(/*out*/ char *buf, uptr buf_len) { # if SANITIZER_SOLARIS diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp index 6d05411222d9e..175362183fd78 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -995,9 +995,8 @@ void UnmapFromTo(uptr from, uptr to) { } uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale, - uptr min_shadow_base_alignment, - UNUSED uptr &high_mem_end) { - const uptr granularity = GetMmapGranularity(); + uptr min_shadow_base_alignment, UNUSED uptr &high_mem_end, + uptr granularity) { const uptr alignment = Max(granularity << shadow_scale, 1ULL << min_shadow_base_alignment); const uptr left_padding = diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp index 24e3d1112520e..cbdf3e95925bf 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp @@ -1188,8 +1188,8 @@ uptr GetMaxVirtualAddress() { } uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale, - uptr min_shadow_base_alignment, uptr &high_mem_end) { - const uptr granularity = GetMmapGranularity(); + uptr min_shadow_base_alignment, uptr &high_mem_end, + uptr granularity) { const uptr alignment = Max(granularity << shadow_scale, 1ULL << min_shadow_base_alignment); const uptr left_padding = diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp index 4e5ad8e4693b4..0b198890fc798 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp @@ -384,9 +384,8 @@ bool DontDumpShadowMemory(uptr addr, uptr length) { } uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale, - uptr min_shadow_base_alignment, - UNUSED uptr &high_mem_end) { - const uptr granularity = GetMmapGranularity(); + uptr min_shadow_base_alignment, UNUSED uptr &high_mem_end, + uptr granularity) { const uptr alignment = Max(granularity << shadow_scale, 1ULL << min_shadow_base_alignment); const uptr left_padding = From 013a40d474e3acaa7a090d5e279f2d8a2f18fbd8 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Mon, 17 Jun 2024 18:48:33 -0400 Subject: [PATCH 25/25] [Clang][AMDGPU] Add a new builtin type for buffer rsrc --- clang/include/clang/AST/ASTContext.h | 2 + clang/include/clang/AST/Type.h | 3 + clang/include/clang/AST/TypeProperties.td | 4 + clang/include/clang/Basic/AMDGPUTypes.def | 21 +++++ .../include/clang/Serialization/ASTBitCodes.h | 5 +- clang/lib/AST/ASTContext.cpp | 16 ++++ clang/lib/AST/ASTImporter.cpp | 4 + clang/lib/AST/ExprConstant.cpp | 2 + clang/lib/AST/ItaniumMangle.cpp | 6 ++ clang/lib/AST/MicrosoftMangle.cpp | 2 + clang/lib/AST/NSAPI.cpp | 2 + clang/lib/AST/PrintfFormatString.cpp | 2 + clang/lib/AST/Type.cpp | 6 ++ clang/lib/AST/TypeLoc.cpp | 2 + clang/lib/CodeGen/CGDebugInfo.cpp | 11 ++- clang/lib/CodeGen/CGDebugInfo.h | 2 + clang/lib/CodeGen/CodeGenTypes.cpp | 5 ++ clang/lib/CodeGen/ItaniumCXXABI.cpp | 2 + clang/lib/Index/USRGeneration.cpp | 5 ++ clang/lib/Sema/Sema.cpp | 8 ++ clang/lib/Sema/SemaExpr.cpp | 4 + clang/lib/Serialization/ASTCommon.cpp | 5 ++ clang/lib/Serialization/ASTReader.cpp | 5 ++ clang/test/AST/ast-dump-amdgpu-types.c | 10 +++ .../amdgpu-buffer-rsrc-type-debug-info.c | 8 ++ .../amdgpu-buffer-rsrc-typeinfo.cpp | 9 ++ .../CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl | 82 +++++++++++++++++++ clang/test/SemaCXX/amdgpu-buffer-rsrc.cpp | 17 ++++ clang/test/SemaHIP/amdgpu-buffer-rsrc.hip | 20 +++++ clang/test/SemaOpenCL/amdgpu-buffer-rsrc.cl | 12 +++ clang/test/SemaOpenMP/amdgpu-buffer-rsrc.cpp | 17 ++++ clang/tools/libclang/CIndex.cpp | 2 + 32 files changed, 299 insertions(+), 2 deletions(-) create mode 100644 clang/include/clang/Basic/AMDGPUTypes.def create mode 100644 clang/test/AST/ast-dump-amdgpu-types.c create mode 100644 clang/test/CodeGen/amdgpu-buffer-rsrc-type-debug-info.c create mode 100644 clang/test/CodeGenCXX/amdgpu-buffer-rsrc-typeinfo.cpp create mode 100644 clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl create mode 100644 clang/test/SemaCXX/amdgpu-buffer-rsrc.cpp create mode 100644 clang/test/SemaHIP/amdgpu-buffer-rsrc.hip create mode 100644 clang/test/SemaOpenCL/amdgpu-buffer-rsrc.cl create mode 100644 clang/test/SemaOpenMP/amdgpu-buffer-rsrc.cpp diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 53ece996769a8..4d1f440506e09 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -1147,6 +1147,8 @@ class ASTContext : public RefCountedBase { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) CanQualType SingletonId; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) CanQualType SingletonId; +#include "clang/Basic/AMDGPUTypes.def" // Types for deductions in C++0x [stmt.ranged]'s desugaring. Built on demand. mutable QualType AutoDeductTy; // Deduction against 'auto'. diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index fab233b62d8d1..61246479188e9 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -3015,6 +3015,9 @@ class BuiltinType : public Type { // WebAssembly reference types #define WASM_TYPE(Name, Id, SingletonId) Id, #include "clang/Basic/WebAssemblyReferenceTypes.def" +// AMDGPU types +#define AMDGPU_TYPE(Name, Id, SingletonId) Id, +#include "clang/Basic/AMDGPUTypes.def" // All other builtin types #define BUILTIN_TYPE(Id, SingletonId) Id, #define LAST_BUILTIN_TYPE(Id) LastKind = Id diff --git a/clang/include/clang/AST/TypeProperties.td b/clang/include/clang/AST/TypeProperties.td index 40dd16f080e2e..aba14b222a03a 100644 --- a/clang/include/clang/AST/TypeProperties.td +++ b/clang/include/clang/AST/TypeProperties.td @@ -861,6 +861,10 @@ let Class = BuiltinType in { case BuiltinType::ID: return ctx.SINGLETON_ID; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(NAME, ID, SINGLETON_ID) \ + case BuiltinType::ID: return ctx.SINGLETON_ID; +#include "clang/Basic/AMDGPUTypes.def" + #define BUILTIN_TYPE(ID, SINGLETON_ID) \ case BuiltinType::ID: return ctx.SINGLETON_ID; #include "clang/AST/BuiltinTypes.def" diff --git a/clang/include/clang/Basic/AMDGPUTypes.def b/clang/include/clang/Basic/AMDGPUTypes.def new file mode 100644 index 0000000000000..e0d7be470a325 --- /dev/null +++ b/clang/include/clang/Basic/AMDGPUTypes.def @@ -0,0 +1,21 @@ +//===-- AMDGPUTypes.def - Metadata about AMDGPU types -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines various AMDGPU builtin types. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPU_OPAQUE_PTR_TYPE +#define AMDGPU_OPAQUE_PTR_TYPE(Name, MangledName, AS, Width, Align, Id, SingletonId) \ + AMDGPU_TYPE(Name, Id, SingletonId) +#endif + +AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_buffer_rsrc_t", "__amdgpu_buffer_rsrc_t", 8, 128, 128, AMDGPUBufferRsrc, AMDGPUBufferRsrcTy) + +#undef AMDGPU_TYPE +#undef AMDGPU_OPAQUE_PTR_TYPE diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index a4728b1c06b3f..24e616f76b9af 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1097,6 +1097,9 @@ enum PredefinedTypeIDs { // \brief WebAssembly reference types with auto numeration #define WASM_TYPE(Name, Id, SingletonId) PREDEF_TYPE_##Id##_ID, #include "clang/Basic/WebAssemblyReferenceTypes.def" +// \brief AMDGPU types with auto numeration +#define AMDGPU_TYPE(Name, Id, SingletonId) PREDEF_TYPE_##Id##_ID, +#include "clang/Basic/AMDGPUTypes.def" /// The placeholder type for unresolved templates. PREDEF_TYPE_UNRESOLVED_TEMPLATE, @@ -1109,7 +1112,7 @@ enum PredefinedTypeIDs { /// /// Type IDs for non-predefined types will start at /// NUM_PREDEF_TYPE_IDs. -const unsigned NUM_PREDEF_TYPE_IDS = 503; +const unsigned NUM_PREDEF_TYPE_IDS = 504; // Ensure we do not overrun the predefined types we reserved // in the enum PredefinedTypeIDs above. diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 34aa399fda2f8..d389ef12468ee 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -1384,6 +1384,13 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target, #include "clang/Basic/WebAssemblyReferenceTypes.def" } + if (Target.getTriple().isAMDGPU() || + (AuxTarget && AuxTarget->getTriple().isAMDGPU())) { +#define AMDGPU_TYPE(Name, Id, SingletonId) \ + InitBuiltinType(SingletonId, BuiltinType::Id); +#include "clang/Basic/AMDGPUTypes.def" + } + // Builtin type for __objc_yes and __objc_no ObjCBuiltinBoolTy = (Target.useSignedCharForObjCBool() ? SignedCharTy : BoolTy); @@ -2200,6 +2207,13 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { Align = 8; \ break; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_OPAQUE_PTR_TYPE(NAME, MANGLEDNAME, AS, WIDTH, ALIGN, ID, \ + SINGLETONID) \ + case BuiltinType::ID: \ + Width = WIDTH; \ + Align = ALIGN; \ + break; +#include "clang/Basic/AMDGPUTypes.def" } break; case Type::ObjCObjectPointer: @@ -8168,6 +8182,8 @@ static char getObjCEncodingForPrimitiveType(const ASTContext *C, #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" { DiagnosticsEngine &Diags = C->getDiagnostics(); unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 02cd4ed9a6cac..1b67feaae8874 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -1099,6 +1099,10 @@ ExpectedType ASTNodeImporter::VisitBuiltinType(const BuiltinType *T) { case BuiltinType::Id: \ return Importer.getToContext().SingletonId; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: \ + return Importer.getToContext().SingletonId; +#include "clang/Basic/AMDGPUTypes.def" #define SHARED_SINGLETON_TYPE(Expansion) #define BUILTIN_TYPE(Id, SingletonId) \ case BuiltinType::Id: return Importer.getToContext().SingletonId; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3a6c8b4f82cca..dd355a7125c5a 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11814,6 +11814,8 @@ GCCTypeClass EvaluateBuiltinClassifyType(QualType T, #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" return GCCTypeClass::None; case BuiltinType::Dependent: diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index ed9e6eeb36c75..203db72c43733 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3423,6 +3423,12 @@ void CXXNameMangler::mangleType(const BuiltinType *T) { Out << 'u' << type_name.size() << type_name; \ break; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: \ + type_name = Name; \ + Out << 'u' << type_name.size() << type_name; \ + break; +#include "clang/Basic/AMDGPUTypes.def" } } diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index a863ec7a529b9..d87be5f2043a9 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -2612,6 +2612,8 @@ void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers, #include "clang/Basic/PPCTypes.def" #define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/RISCVVTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::ShortAccum: case BuiltinType::Accum: case BuiltinType::LongAccum: diff --git a/clang/lib/AST/NSAPI.cpp b/clang/lib/AST/NSAPI.cpp index 2d16237f5325a..48d1763125e6c 100644 --- a/clang/lib/AST/NSAPI.cpp +++ b/clang/lib/AST/NSAPI.cpp @@ -453,6 +453,8 @@ NSAPI::getNSNumberFactoryMethodKind(QualType T) const { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::BoundMember: case BuiltinType::UnresolvedTemplate: case BuiltinType::Dependent: diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp index dd3b38fabb550..3031d76abbd75 100644 --- a/clang/lib/AST/PrintfFormatString.cpp +++ b/clang/lib/AST/PrintfFormatString.cpp @@ -865,6 +865,8 @@ bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" #define SIGNED_TYPE(Id, SingletonId) #define UNSIGNED_TYPE(Id, SingletonId) #define FLOATING_TYPE(Id, SingletonId) diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 33acae2cbafac..656b733a13b0e 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3509,6 +3509,10 @@ StringRef BuiltinType::getName(const PrintingPolicy &Policy) const { case Id: \ return Name; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) \ + case Id: \ + return Name; +#include "clang/Basic/AMDGPUTypes.def" } llvm_unreachable("Invalid builtin type."); @@ -4778,6 +4782,8 @@ bool Type::canHaveNullability(bool ResultIfUnknown) const { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::BuiltinFn: case BuiltinType::NullPtr: case BuiltinType::IncompleteMatrixIdx: diff --git a/clang/lib/AST/TypeLoc.cpp b/clang/lib/AST/TypeLoc.cpp index 9dd90d9bf4e54..33e6ccbadc12d 100644 --- a/clang/lib/AST/TypeLoc.cpp +++ b/clang/lib/AST/TypeLoc.cpp @@ -428,6 +428,8 @@ TypeSpecifierType BuiltinTypeLoc::getWrittenTypeSpec() const { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::BuiltinFn: case BuiltinType::IncompleteMatrixIdx: case BuiltinType::ArraySection: diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 572ba84d22ef5..a072475ba7705 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -865,7 +865,16 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { return SingletonId; \ } #include "clang/Basic/WebAssemblyReferenceTypes.def" - +#define AMDGPU_OPAQUE_PTR_TYPE(Name, MangledName, AS, Width, Align, Id, \ + SingletonId) \ + case BuiltinType::Id: { \ + if (!SingletonId) \ + SingletonId = \ + DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, \ + MangledName, TheCU, TheCU->getFile(), 0); \ + return SingletonId; \ + } +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::UChar: case BuiltinType::Char_U: Encoding = llvm::dwarf::DW_ATE_unsigned_char; diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index 272c8d6e75965..2731c627d9dc3 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -83,6 +83,8 @@ class CGDebugInfo { #include "clang/Basic/OpenCLExtensionTypes.def" #define WASM_TYPE(Name, Id, SingletonId) llvm::DIType *SingletonId = nullptr; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) llvm::DIType *SingletonId = nullptr; +#include "clang/Basic/AMDGPUTypes.def" /// Cache of previously constructed Types. llvm::DenseMap TypeCache; diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 0a926e4ac27fe..d823c336e39bf 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -533,6 +533,11 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { llvm_unreachable("Unexpected wasm reference builtin type!"); \ } break; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_OPAQUE_PTR_TYPE(Name, MangledName, AS, Width, Align, Id, \ + SingletonId) \ + case BuiltinType::Id: \ + return llvm::PointerType::get(getLLVMContext(), AS); +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::Dependent: #define BUILTIN_TYPE(Id, SingletonId) #define PLACEHOLDER_TYPE(Id, SingletonId) \ diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 5a3e83de625c9..01a735c1437e1 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -3362,6 +3362,8 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::ShortAccum: case BuiltinType::Accum: case BuiltinType::LongAccum: diff --git a/clang/lib/Index/USRGeneration.cpp b/clang/lib/Index/USRGeneration.cpp index 31c4a3345c09d..5036ddee35fd1 100644 --- a/clang/lib/Index/USRGeneration.cpp +++ b/clang/lib/Index/USRGeneration.cpp @@ -772,6 +772,11 @@ void USRGenerator::VisitType(QualType T) { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: \ + Out << "@BT@" << #Name; \ + break; +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::ShortAccum: Out << "@BT@ShortAccum"; break; case BuiltinType::Accum: diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 907a05a5d1b49..069978c1b4023 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -497,6 +497,14 @@ void Sema::Initialize() { #include "clang/Basic/WebAssemblyReferenceTypes.def" } + if (Context.getTargetInfo().getTriple().isAMDGPU() || + (Context.getAuxTargetInfo() && + Context.getAuxTargetInfo()->getTriple().isAMDGPU())) { +#define AMDGPU_TYPE(Name, Id, SingletonId) \ + addImplicitTypedef(Name, Context.SingletonId); +#include "clang/Basic/AMDGPUTypes.def" + } + if (Context.getTargetInfo().hasBuiltinMSVaList()) { DeclarationName MSVaList = &Context.Idents.get("__builtin_ms_va_list"); if (IdResolver.begin(MSVaList) == IdResolver.end()) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 4db8b4130c3c7..a31cefc540cbf 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -6169,6 +6169,8 @@ static bool isPlaceholderToRemoveAsArg(QualType type) { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" #define PLACEHOLDER_TYPE(ID, SINGLETON_ID) #define BUILTIN_TYPE(ID, SINGLETON_ID) case BuiltinType::ID: #include "clang/AST/BuiltinTypes.def" @@ -21004,6 +21006,8 @@ ExprResult Sema::CheckPlaceholderExpr(Expr *E) { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" #define BUILTIN_TYPE(Id, SingletonId) case BuiltinType::Id: #define PLACEHOLDER_TYPE(Id, SingletonId) #include "clang/AST/BuiltinTypes.def" diff --git a/clang/lib/Serialization/ASTCommon.cpp b/clang/lib/Serialization/ASTCommon.cpp index bc662a87a7bf3..3385cb8aad7e4 100644 --- a/clang/lib/Serialization/ASTCommon.cpp +++ b/clang/lib/Serialization/ASTCommon.cpp @@ -258,6 +258,11 @@ serialization::TypeIdxFromBuiltin(const BuiltinType *BT) { ID = PREDEF_TYPE_##Id##_ID; \ break; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: \ + ID = PREDEF_TYPE_##Id##_ID; \ + break; +#include "clang/Basic/AMDGPUTypes.def" case BuiltinType::BuiltinFn: ID = PREDEF_TYPE_BUILTIN_FN; break; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index a2c322087fd1e..0810d720bb4e0 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -7401,6 +7401,11 @@ QualType ASTReader::GetType(TypeID ID) { T = Context.SingletonId; \ break; #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) \ + case PREDEF_TYPE_##Id##_ID: \ + T = Context.SingletonId; \ + break; +#include "clang/Basic/AMDGPUTypes.def" } assert(!T.isNull() && "Unknown predefined type"); diff --git a/clang/test/AST/ast-dump-amdgpu-types.c b/clang/test/AST/ast-dump-amdgpu-types.c new file mode 100644 index 0000000000000..e032d678f1a09 --- /dev/null +++ b/clang/test/AST/ast-dump-amdgpu-types.c @@ -0,0 +1,10 @@ +// REQUIRES: amdgpu-registered-target +// Test without serialization: +// RUN: %clang_cc1 -triple amdgcn -ast-dump -ast-dump-filter __amdgpu_buffer_rsrc_t %s | FileCheck %s +// +// Test with serialization: +// RUN: %clang_cc1 -triple amdgcn -emit-pch -o %t %s +// RUN: %clang_cc1 -x c -triple amdgcn -include-pch %t -ast-dump-all -ast-dump-filter __amdgpu_buffer_rsrc_t /dev/null | sed -e "s/ //" -e "s/ imported//" | FileCheck %s + +// CHECK: TypedefDecl {{.*}} implicit __amdgpu_buffer_rsrc_t +// CHECK-NEXT: -BuiltinType {{.*}} '__amdgpu_buffer_rsrc_t' diff --git a/clang/test/CodeGen/amdgpu-buffer-rsrc-type-debug-info.c b/clang/test/CodeGen/amdgpu-buffer-rsrc-type-debug-info.c new file mode 100644 index 0000000000000..c266fa83e4b62 --- /dev/null +++ b/clang/test/CodeGen/amdgpu-buffer-rsrc-type-debug-info.c @@ -0,0 +1,8 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s -debug-info-kind=limited 2>&1 | FileCheck %s + +// CHECK: name: "__amdgpu_buffer_rsrc_t",{{.*}}baseType: ![[BT:[0-9]+]] +// CHECK: [[BT]] = !DICompositeType(tag: DW_TAG_structure_type, name: "__amdgpu_buffer_rsrc_t", {{.*}} flags: DIFlagFwdDecl) +void test_locals(void) { + __amdgpu_buffer_rsrc_t k; +} diff --git a/clang/test/CodeGenCXX/amdgpu-buffer-rsrc-typeinfo.cpp b/clang/test/CodeGenCXX/amdgpu-buffer-rsrc-typeinfo.cpp new file mode 100644 index 0000000000000..a44e7dc5efe6a --- /dev/null +++ b/clang/test/CodeGenCXX/amdgpu-buffer-rsrc-typeinfo.cpp @@ -0,0 +1,9 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn %s -emit-llvm -o - | FileCheck %s + +namespace std { class type_info; }; + +auto &b = typeid(__amdgpu_buffer_rsrc_t); + +// CHECK-DAG: @_ZTSu22__amdgpu_buffer_rsrc_t = {{.*}} c"u22__amdgpu_buffer_rsrc_t\00" +// CHECK-DAG: @_ZTIu22__amdgpu_buffer_rsrc_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu22__amdgpu_buffer_rsrc_t diff --git a/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl b/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl new file mode 100644 index 0000000000000..69dabda08fba6 --- /dev/null +++ b/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl @@ -0,0 +1,82 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature + // REQUIRES: amdgpu-registered-target + // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s + +typedef struct AA_ty { + int x; + __amdgpu_buffer_rsrc_t r; +} AA; + +AA getAA(void *p); +__amdgpu_buffer_rsrc_t getBufferImpl(void *p); +void consumeBuffer(__amdgpu_buffer_rsrc_t); + +// CHECK-LABEL: define {{[^@]+}}@getBuffer +// CHECK-SAME: (ptr addrspace(5) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call ptr addrspace(8) @getBufferImpl(ptr addrspace(5) noundef [[P]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: ret ptr addrspace(8) [[CALL]] +// +__amdgpu_buffer_rsrc_t getBuffer(void *p) { + return getBufferImpl(p); +} + +// CHECK-LABEL: define {{[^@]+}}@consumeBufferPtr +// CHECK-SAME: (ptr addrspace(5) noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq ptr addrspace(5) [[P]], addrspacecast (ptr null to ptr addrspace(5)) +// CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +// CHECK: if.then: +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[P]], align 16, !tbaa [[TBAA4:![0-9]+]] +// CHECK-NEXT: tail call void @consumeBuffer(ptr addrspace(8) [[TMP0]]) #[[ATTR2]] +// CHECK-NEXT: br label [[IF_END]] +// CHECK: if.end: +// CHECK-NEXT: ret void +// +void consumeBufferPtr(__amdgpu_buffer_rsrc_t *p) { + if (p) + consumeBuffer(*p); +} + +// CHECK-LABEL: define {{[^@]+}}@test +// CHECK-SAME: (ptr addrspace(5) noundef readonly [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A]], align 16, !tbaa [[TBAA8:![0-9]+]] +// CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +// CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq ptr addrspace(5) [[A]], addrspacecast (ptr null to ptr addrspace(5)) +// CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[TOBOOL_NOT_I]], [[TOBOOL_NOT]] +// CHECK-NEXT: br i1 [[OR_COND]], label [[IF_END:%.*]], label [[IF_THEN_I:%.*]] +// CHECK: if.then.i: +// CHECK-NEXT: [[R:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[A]], i32 16 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[R]], align 16, !tbaa [[TBAA4]] +// CHECK-NEXT: tail call void @consumeBuffer(ptr addrspace(8) [[TMP1]]) #[[ATTR2]] +// CHECK-NEXT: br label [[IF_END]] +// CHECK: if.end: +// CHECK-NEXT: ret void +// +void test(AA *a) { + if (a->x) + consumeBufferPtr(&(a->r)); +} + +// CHECK-LABEL: define {{[^@]+}}@bar +// CHECK-SAME: (ptr addrspace(5) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call [[STRUCT_AA_TY:%.*]] @[[GETAA:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr addrspace(5) noundef [[P]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_AA_TY]] [[CALL]], 0 +// CHECK-NEXT: [[CALL_I:%.*]] = tail call ptr addrspace(8) @getBufferImpl(ptr addrspace(5) noundef [[P]]) #[[ATTR2]] +// CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[TMP0]], 0 +// CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label [[TEST_EXIT:%.*]], label [[IF_THEN_I_I:%.*]] +// CHECK: if.then.i.i: +// CHECK-NEXT: tail call void @consumeBuffer(ptr addrspace(8) [[CALL_I]]) #[[ATTR2]] +// CHECK-NEXT: br label [[TEST_EXIT]] +// CHECK: test.exit: +// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [[STRUCT_AA_TY]] [[CALL]], ptr addrspace(8) [[CALL_I]], 1 +// CHECK-NEXT: ret [[STRUCT_AA_TY]] [[DOTFCA_1_INSERT]] +// +AA bar(void *p) { + AA a = getAA(p); + a.r = getBuffer(p); + test(&a); + return a; +} diff --git a/clang/test/SemaCXX/amdgpu-buffer-rsrc.cpp b/clang/test/SemaCXX/amdgpu-buffer-rsrc.cpp new file mode 100644 index 0000000000000..80c4c519c4e6b --- /dev/null +++ b/clang/test/SemaCXX/amdgpu-buffer-rsrc.cpp @@ -0,0 +1,17 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -fsyntax-only -verify -std=gnu++11 -triple amdgcn -Wno-unused-value %s + +void foo() { + int n = 100; + __amdgpu_buffer_rsrc_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_buffer_rsrc_t' with an rvalue of type 'int'}} + static_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_buffer_rsrc_t' is not allowed}} + dynamic_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{invalid target type '__amdgpu_buffer_rsrc_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}} + reinterpret_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_buffer_rsrc_t' is not allowed}} + int c(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_buffer_rsrc_t'}} + __amdgpu_buffer_rsrc_t k; + int *ip = (int *)k; // expected-error {{cannot cast from type '__amdgpu_buffer_rsrc_t' to pointer type 'int *'}} + void *vp = (void *)k; // expected-error {{cannot cast from type '__amdgpu_buffer_rsrc_t' to pointer type 'void *'}} +} + +static_assert(sizeof(__amdgpu_buffer_rsrc_t) == 16, "wrong size"); +static_assert(alignof(__amdgpu_buffer_rsrc_t) == 16, "wrong aignment"); diff --git a/clang/test/SemaHIP/amdgpu-buffer-rsrc.hip b/clang/test/SemaHIP/amdgpu-buffer-rsrc.hip new file mode 100644 index 0000000000000..3e5b22dc8963d --- /dev/null +++ b/clang/test/SemaHIP/amdgpu-buffer-rsrc.hip @@ -0,0 +1,20 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -fsyntax-only -verify -triple amdgcn -Wno-unused-value %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple amdgcn -Wno-unused-value %s + +#define __device__ __attribute__((device)) + +__device__ void foo() { + int n = 100; + __amdgpu_buffer_rsrc_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_buffer_rsrc_t' with an rvalue of type 'int'}} + static_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_buffer_rsrc_t' is not allowed}} + dynamic_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{invalid target type '__amdgpu_buffer_rsrc_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}} + reinterpret_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_buffer_rsrc_t' is not allowed}} + int c(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_buffer_rsrc_t'}} + __amdgpu_buffer_rsrc_t k; + int *ip = (int *)k; // expected-error {{cannot cast from type '__amdgpu_buffer_rsrc_t' to pointer type 'int *'}} + void *vp = (void *)k; // expected-error {{cannot cast from type '__amdgpu_buffer_rsrc_t' to pointer type 'void *'}} +} + +static_assert(sizeof(__amdgpu_buffer_rsrc_t) == 16, "wrong size"); +static_assert(alignof(__amdgpu_buffer_rsrc_t) == 16, "wrong aignment"); diff --git a/clang/test/SemaOpenCL/amdgpu-buffer-rsrc.cl b/clang/test/SemaOpenCL/amdgpu-buffer-rsrc.cl new file mode 100644 index 0000000000000..2d74835699c6d --- /dev/null +++ b/clang/test/SemaOpenCL/amdgpu-buffer-rsrc.cl @@ -0,0 +1,12 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -verify -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -Wno-unused-value %s +// RUN: %clang_cc1 -verify -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -Wno-unused-value %s + +void foo() { + int n = 100; + __amdgpu_buffer_rsrc_t v = 0; // expected-error {{initializing '__private __amdgpu_buffer_rsrc_t' with an expression of incompatible type 'int'}} + int c = v; // expected-error {{initializing '__private int' with an expression of incompatible type '__private __amdgpu_buffer_rsrc_t'}} + __amdgpu_buffer_rsrc_t k; + int *ip = (int *)k; // expected-error {{operand of type '__amdgpu_buffer_rsrc_t' where arithmetic or pointer type is required}} + void *vp = (void *)k; // expected-error {{operand of type '__amdgpu_buffer_rsrc_t' where arithmetic or pointer type is required}} + } diff --git a/clang/test/SemaOpenMP/amdgpu-buffer-rsrc.cpp b/clang/test/SemaOpenMP/amdgpu-buffer-rsrc.cpp new file mode 100644 index 0000000000000..eb6ded229a75c --- /dev/null +++ b/clang/test/SemaOpenMP/amdgpu-buffer-rsrc.cpp @@ -0,0 +1,17 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -triple amdgcn-amd-amdhsa -fopenmp-is-target-device -Wno-unused-value %s + +void foo() { +#pragma omp target + { + int n = 100; + __amdgpu_buffer_rsrc_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_buffer_rsrc_t' with an rvalue of type 'int'}} + static_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_buffer_rsrc_t' is not allowed}} + dynamic_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{invalid target type '__amdgpu_buffer_rsrc_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}} + reinterpret_cast<__amdgpu_buffer_rsrc_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_buffer_rsrc_t' is not allowed}} + int c(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_buffer_rsrc_t'}} + __amdgpu_buffer_rsrc_t k; + int *ip = (int *)k; // expected-error {{cannot cast from type '__amdgpu_buffer_rsrc_t' to pointer type 'int *'}} + void *vp = (void *)k; // expected-error {{cannot cast from type '__amdgpu_buffer_rsrc_t' to pointer type 'void *'}} + } + } diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index dcd9555e1bfcc..35312e3d2ae70 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -1643,6 +1643,8 @@ bool CursorVisitor::VisitBuiltinTypeLoc(BuiltinTypeLoc TL) { #include "clang/Basic/RISCVVTypes.def" #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/WebAssemblyReferenceTypes.def" +#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AMDGPUTypes.def" #define BUILTIN_TYPE(Id, SingletonId) #define SIGNED_TYPE(Id, SingletonId) case BuiltinType::Id: #define UNSIGNED_TYPE(Id, SingletonId) case BuiltinType::Id: