diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp index dd204126be5db..73a46843f0320 100644 --- a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp +++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp @@ -106,8 +106,7 @@ struct CUFAddConstructor mlir::func::FuncOp func; switch (attr.getValue()) { - case cuf::DataAttribute::Device: - case cuf::DataAttribute::Constant: { + case cuf::DataAttribute::Device: { func = fir::runtime::getRuntimeFunc( loc, builder); auto fTy = func.getFunctionType(); @@ -145,8 +144,6 @@ struct CUFAddConstructor default: break; } - if (!func) - continue; } } builder.create(loc, mlir::ValueRange{}); diff --git a/flang/test/Fir/CUDA/cuda-constructor-2.f90 b/flang/test/Fir/CUDA/cuda-constructor-2.f90 index 901497e2cde55..29efdb083878a 100644 --- a/flang/test/Fir/CUDA/cuda-constructor-2.f90 +++ b/flang/test/Fir/CUDA/cuda-constructor-2.f90 @@ -30,3 +30,33 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry>>>) -> !fir.ref // CHECK-DAG: fir.call @_FortranACUFRegisterVariable(%[[MODULE:.*]], %[[BOXREF]], %{{.*}}, %{{.*}}) // + +// ----- + +// Checking that constant global variables are not registered + +// CHECK: @_FortranACUFRegisterAllocator +// CHECK-NOT: fir.call @_FortranACUFRegisterVariable + +module attributes {dlti.dl_spec = #dlti.dl_spec : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, f80 = dense<128> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, i64 = dense<64> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, "dlti.stack_alignment" = 128 : i64, "dlti.endianness" = "little">, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (https://github.com/llvm/llvm-project.git 3372303188df0f7f8ac26e7ab610cf8b0f716d42)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { + fir.global @_QMiso_c_bindingECc_int {data_attr = #cuf.cuda} constant : i32 + + + fir.type_info @_QM__fortran_builtinsT__builtin_c_ptr noinit nodestroy nofinal : !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> + gpu.module @cuda_device_mod { + fir.global @_QMiso_c_bindingECc_int {data_attr = #cuf.cuda} constant : i32 + gpu.func @_QMdevmodPdevsub(%arg0: !fir.ref>>>) -> i32 { + %c0 = arith.constant 0 : index + %c4_i32 = arith.constant 4 : i32 + %0 = fir.alloca i32 {bindc_name = "devsub", uniq_name = "_QMdevmodFdevsubEdevsub"} + %1 = fir.alloca i32 {bindc_name = "__builtin_warpsize", uniq_name = "_QM__fortran_builtinsEC__builtin_warpsize"} + %2 = fir.load %arg0 : !fir.ref>>> + %3:3 = fir.box_dims %2, %c0 : (!fir.box>>, index) -> (index, index, index) + %4 = fir.convert %3#1 : (index) -> i32 + %5 = arith.muli %4, %c4_i32 : i32 + fir.store %5 to %0 : !fir.ref + %6 = fir.load %0 : !fir.ref + gpu.return %6 : i32 + } + } +} \ No newline at end of file