Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions flang/include/flang/Optimizer/Builder/IntrinsicCall.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ struct IntrinsicLibrary {
mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAtomicAddR2(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genAtomicAddVector(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAtomicCas(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
Expand Down
49 changes: 49 additions & 0 deletions flang/lib/Optimizer/Builder/IntrinsicCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,14 @@ static constexpr IntrinsicHandler handlers[]{
{"atan2pi", &I::genAtanpi},
{"atand", &I::genAtand},
{"atanpi", &I::genAtanpi},
{"atomicadd_r2x2",
&I::genAtomicAddVector,
{{{"a", asAddr}, {"v", asAddr}}},
false},
{"atomicadd_r4x2",
&I::genAtomicAddVector,
{{{"a", asAddr}, {"v", asAddr}}},
false},
{"atomicaddd", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicaddf", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicaddi", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
Expand Down Expand Up @@ -3168,6 +3176,47 @@ IntrinsicLibrary::genAtomicAddR2(mlir::Type resultType,
mlir::ArrayRef<int64_t>{0});
}

fir::ExtendedValue
IntrinsicLibrary::genAtomicAddVector(mlir::Type resultType,
llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 2);
mlir::Value res = fir::AllocaOp::create(
builder, loc, fir::SequenceType::get({2}, resultType));
mlir::Value a = fir::getBase(args[0]);
if (mlir::isa<fir::BaseBoxType>(a.getType())) {
a = fir::BoxAddrOp::create(builder, loc, a);
}
auto vecTy = mlir::VectorType::get({2}, resultType);
auto refTy = fir::ReferenceType::get(resultType);
mlir::Type i32Ty = builder.getI32Type();
mlir::Type idxTy = builder.getIndexType();
mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
mlir::Value v1Coord = fir::CoordinateOp::create(builder, loc, refTy,
fir::getBase(args[1]), zero);
mlir::Value v2Coord = fir::CoordinateOp::create(builder, loc, refTy,
fir::getBase(args[1]), one);
mlir::Value v1 = fir::LoadOp::create(builder, loc, v1Coord);
mlir::Value v2 = fir::LoadOp::create(builder, loc, v2Coord);
mlir::Value undef = mlir::LLVM::UndefOp::create(builder, loc, vecTy);
mlir::Value vec1 = mlir::LLVM::InsertElementOp::create(
builder, loc, undef, v1, builder.createIntegerConstant(loc, i32Ty, 0));
mlir::Value vec2 = mlir::LLVM::InsertElementOp::create(
builder, loc, vec1, v2, builder.createIntegerConstant(loc, i32Ty, 1));
mlir::Value add =
genAtomBinOp(builder, loc, mlir::LLVM::AtomicBinOp::fadd, a, vec2);
mlir::Value r1 = mlir::LLVM::ExtractElementOp::create(
builder, loc, add, builder.createIntegerConstant(loc, i32Ty, 0));
mlir::Value r2 = mlir::LLVM::ExtractElementOp::create(
builder, loc, add, builder.createIntegerConstant(loc, i32Ty, 1));
mlir::Value c1 = fir::CoordinateOp::create(builder, loc, refTy, res, zero);
mlir::Value c2 = fir::CoordinateOp::create(builder, loc, refTy, res, one);
fir::StoreOp::create(builder, loc, r1, c1);
fir::StoreOp::create(builder, loc, r2, c2);
mlir::Value ext = builder.createIntegerConstant(loc, idxTy, 2);
return fir::ArrayBoxValue(res, {ext});
}

mlir::Value IntrinsicLibrary::genAtomicSub(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
assert(args.size() == 2);
Expand Down
16 changes: 16 additions & 0 deletions flang/module/cudadevice.f90
Original file line number Diff line number Diff line change
Expand Up @@ -1178,6 +1178,22 @@ attributes(device) pure integer(4) function atomicaddr2(address, val)
end function
end interface

interface atomicaddvector
attributes(device) pure function atomicadd_r2x2(address, val) result(z)
!dir$ ignore_tkr (rd) address, (d) val
real(2), dimension(2), intent(inout) :: address
real(2), dimension(2), intent(in) :: val
real(2), dimension(2) :: z
end function

attributes(device) pure function atomicadd_r4x2(address, val) result(z)
!dir$ ignore_tkr (rd) address, (d) val
real(4), dimension(2), intent(inout) :: address
real(4), dimension(2), intent(in) :: val
real(4), dimension(2) :: z
end function
end interface

interface atomicsub
attributes(device) pure integer function atomicsubi(address, val)
!dir$ ignore_tkr (d) address, (d) val
Expand Down
19 changes: 19 additions & 0 deletions flang/test/Lower/CUDA/cuda-atomicadd.cuf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s

! Test CUDA Fortran atmoicadd functions available cudadevice module

attributes(global) subroutine atomicaddvector_r2()
real(2), device :: a(2), tmp1(2), tmp2(2)
tmp1 = atomicAddVector(a, tmp2)
end subroutine

! CHECK-LABEL: func.func @_QPatomicaddvector_r2() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
! CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, vector<2xf16>

attributes(global) subroutine atomicaddvector_r4()
real(4), device :: a(2), tmp1(2), tmp2(2)
tmp1 = atomicAddVector(a, tmp2)
end subroutine

! CHECK-LABEL: func.func @_QPatomicaddvector_r4() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
! CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, vector<2xf32>
Loading