Skip to content

Commit a781521

Browse files
committed
[OPENMP50]Codegen support for order(concurrent) clause.
Emit llvm parallel access metadata for the loops if they are marked as order(concurrent).
1 parent bed4d9c commit a781521

File tree

3 files changed

+69
-10
lines changed

3 files changed

+69
-10
lines changed

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1931,6 +1931,9 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
19311931
LoopStack.setParallel(!IsMonotonic);
19321932
LoopStack.setVectorizeEnable();
19331933
emitSimdlenSafelenClause(*this, D, IsMonotonic);
1934+
if (const auto *C = D.getSingleClause<OMPOrderClause>())
1935+
if (C->getKind() == OMPC_ORDER_concurrent)
1936+
LoopStack.setParallel(/*Enable=*/true);
19341937
}
19351938

19361939
void CodeGenFunction::EmitOMPSimdFinal(
@@ -2202,10 +2205,14 @@ void CodeGenFunction::EmitOMPOuterLoop(
22022205
[&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
22032206
// Generate !llvm.loop.parallel metadata for loads and stores for loops
22042207
// with dynamic/guided scheduling and without ordered clause.
2205-
if (!isOpenMPSimdDirective(S.getDirectiveKind()))
2208+
if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
22062209
CGF.LoopStack.setParallel(!IsMonotonic);
2207-
else
2210+
if (const auto *C = S.getSingleClause<OMPOrderClause>())
2211+
if (C->getKind() == OMPC_ORDER_concurrent)
2212+
CGF.LoopStack.setParallel(/*Enable=*/true);
2213+
} else {
22082214
CGF.EmitOMPSimdInit(S, IsMonotonic);
2215+
}
22092216
},
22102217
[&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
22112218
&LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
@@ -2720,8 +2727,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
27202727
emitCommonSimdLoop(
27212728
*this, S,
27222729
[&S](CodeGenFunction &CGF, PrePostActionTy &) {
2723-
if (isOpenMPSimdDirective(S.getDirectiveKind()))
2730+
if (isOpenMPSimdDirective(S.getDirectiveKind())) {
27242731
CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2732+
} else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
2733+
if (C->getKind() == OMPC_ORDER_concurrent)
2734+
CGF.LoopStack.setParallel(/*Enable=*/true);
2735+
}
27252736
},
27262737
[IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
27272738
&S, ScheduleKind, LoopExit,

clang/test/OpenMP/for_codegen.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -743,28 +743,28 @@ void body_f();
743743
// OMP5-LABEL: imperfectly_nested_loop
744744
void imperfectly_nested_loop() {
745745
// OMP5: call void @__kmpc_for_static_init_4(
746-
#pragma omp for collapse(3)
746+
#pragma omp for collapse(3) order(concurrent)
747747
for (int i = 0; i < 10; ++i) {
748748
{
749749
int a, d;
750750
// OMP5: invoke void @{{.+}}first{{.+}}()
751751
first();
752-
// OMP5: load i32
753-
// OMP5: store i32
752+
// OMP5: load i32{{.*}}!llvm.access.group ![[AG:[0-9]+]]
753+
// OMP5: store i32{{.*}}!llvm.access.group ![[AG]]
754754
a = d;
755755
for (int j = 0; j < 10; ++j) {
756756
int a, d;
757757
// OMP5: invoke void @{{.+}}inner_f{{.+}}()
758758
inner_f();
759-
// OMP5: load i32
760-
// OMP5: store i32
759+
// OMP5: load i32{{.*}}!llvm.access.group ![[AG]]
760+
// OMP5: store i32{{.*}}!llvm.access.group ![[AG]]
761761
a = d;
762762
for (int k = 0; k < 10; ++k) {
763763
int a, d;
764764
// OMP5: invoke void @{{.+}}body_f{{.+}}()
765765
body_f();
766-
// OMP5: load i32
767-
// OMP5: store i32
766+
// OMP5: load i32{{.*}}!llvm.access.group ![[AG]]
767+
// OMP5: store i32{{.*}}!llvm.access.group ![[AG]]
768768
a = d;
769769
}
770770
// OMP5: invoke void @{{.+}}inner_l{{.+}}()
@@ -776,6 +776,10 @@ void imperfectly_nested_loop() {
776776
}
777777
// OMP5: call void @__kmpc_for_static_fini(
778778
}
779+
780+
// OMP5: ![[AG]] = distinct !{}
781+
// OMP5: !{!"llvm.loop.parallel_accesses", ![[AG]]}
782+
779783
#endif
780784

781785
#endif // HEADER
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
2+
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
3+
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
4+
5+
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix SIMD-ONLY
6+
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
7+
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix SIMD-ONLY
8+
// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
9+
// REQUIRES: powerpc-registered-target
10+
11+
// expected-no-diagnostics
12+
#ifndef HEADER
13+
#define HEADER
14+
15+
// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
16+
void gtid_test() {
17+
// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
18+
// CHECK: %0 = call i32 @__tgt_target_teams(i64 -1, i8* @{{.+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 0, i32 0)
19+
// CHECK: call void [[TARGET_OUTLINE:@.+]]()
20+
// CHECK: ret void
21+
#pragma omp target teams distribute parallel for order(concurrent)
22+
for(int i = 0 ; i < 100; i++) {}
23+
}
24+
25+
// CHECK: define internal void [[TARGET_OUTLINE]]()
26+
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @{{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[TEAMS_OUTLINE:@.+]] to void (i32*, i32*, ...)*))
27+
// CHECK: ret void
28+
29+
// CHECK: define internal void [[TEAMS_OUTLINE]](i32* {{.+}}, i32* {{.+}})
30+
// CHECK: call void @__kmpc_for_static_init_4(
31+
// CHECK-NOT: {{store|load}}{{.+}}!llvm.access.group !
32+
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* [[PARALLEL_OUTLINE:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}})
33+
// CHECK-NOT: {{store|load}}{{.+}}!llvm.access.group !
34+
// CHECK: call void @__kmpc_for_static_fini(
35+
36+
// CHECK: define internal void [[PARALLEL_OUTLINE]](i32* {{.+}}, i32* {{.+}}, i64 {{.+}}, i64 {{.+}})
37+
// CHECK: call void @__kmpc_for_static_init_4(
38+
// CHECK: {{store|load}}{{.+}}!llvm.access.group ![[AG:[0-9]+]]
39+
// CHECK: call void @__kmpc_for_static_fini(
40+
// CHECK: ret void
41+
42+
// CHECK: ![[AG]] = distinct !{}
43+
// CHECK: !{!"llvm.loop.parallel_accesses", ![[AG]]}
44+
#endif

0 commit comments

Comments
 (0)