Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 069169a

Browse files
authored
[SYCL] Add tests for atomic_ref memory orders (#900)
Adds tests for various memory orders. These tests are not ideal, as they do not in fact fail if run with more a relaxed memory order. Closes #793.
1 parent 8a28d78 commit 069169a

File tree

2 files changed

+379
-12
lines changed

2 files changed

+379
-12
lines changed
Lines changed: 218 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,239 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O3 -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70
22
// RUN: %HOST_RUN_PLACEHOLDER %t.out
33
// RUN: %CPU_RUN_PLACEHOLDER %t.out
44
// RUN: %GPU_RUN_PLACEHOLDER %t.out
55
// RUN: %ACC_RUN_PLACEHOLDER %t.out
66
// L0, OpenCL, and HIP backends don't currently support
77
// info::device::atomic_memory_order_capabilities
8-
// UNSUPPORTED: level_zero || opencl || hip
8+
// UNSUPPORTED: level_zero, opencl, hip
99

10-
// NOTE: Tests load and store for acquire-release memory ordering.
10+
// host does not support barrier
11+
// XFAIL: host
12+
13+
// NOTE: Tests fetch_add for acquire and release memory ordering.
1114

1215
#include "atomic_memory_order.h"
1316
#include <iostream>
17+
#include <numeric>
1418
using namespace sycl;
1519

16-
int main() {
20+
template <memory_order order> void test_acquire_global() {
21+
const size_t N_items = 1024;
22+
const size_t N_iters = 1000;
23+
24+
int error = 0;
25+
int val[] = {0, 0};
26+
27+
queue q;
28+
{
29+
buffer<int> error_buf(&error, 1);
30+
buffer<int> val_buf(val, 1);
31+
32+
q.submit([&](handler &cgh) {
33+
auto error =
34+
error_buf.template get_access<access::mode::read_write>(cgh);
35+
auto val = val_buf.template get_access<access::mode::read_write>(cgh);
36+
cgh.parallel_for(range<1>(N_items), [=](item<1> it) {
37+
volatile int *val_p = val.get_pointer();
38+
auto atm0 =
39+
atomic_ref<int, memory_order::relaxed, memory_scope::device,
40+
access::address_space::global_space>(val[0]);
41+
auto atm1 =
42+
atomic_ref<int, memory_order::relaxed, memory_scope::device,
43+
access::address_space::global_space>(val[1]);
44+
for (int i = 0; i < N_iters; i++) {
45+
if (it.get_id(0) == 0) {
46+
atm0.fetch_add(1, order);
47+
val_p[1]++;
48+
} else {
49+
int tmp1 = atm1.load(memory_order::acquire);
50+
int tmp0 = atm0.load(memory_order::relaxed);
51+
if (tmp0 < tmp1) {
52+
error[0] = 1;
53+
}
54+
}
55+
}
56+
});
57+
}).wait_and_throw();
58+
}
59+
assert(error == 0);
60+
}
61+
62+
template <memory_order order> void test_acquire_local() {
63+
const size_t local_size = 1024;
64+
const size_t N_wgs = 16;
65+
const size_t global_size = local_size * N_wgs;
66+
const size_t N_iters = 1000;
67+
68+
int error = 0;
69+
int val[] = {0, 0};
70+
71+
queue q;
72+
{
73+
buffer<int> error_buf(&error, 1);
74+
buffer<int> val_buf(val, 1);
75+
76+
q.submit([&](handler &cgh) {
77+
auto error =
78+
error_buf.template get_access<access::mode::read_write>(cgh);
79+
accessor<int, 1, access::mode::read_write, access::target::local> val(
80+
2, cgh);
81+
cgh.parallel_for(
82+
nd_range<1>(global_size, local_size), [=](nd_item<1> it) {
83+
size_t lid = it.get_local_id(0);
84+
val[0] = 0;
85+
val[1] = 0;
86+
it.barrier(access::fence_space::local_space);
87+
volatile int *val_p = val.get_pointer();
88+
auto atm0 =
89+
atomic_ref<int, memory_order::relaxed, memory_scope::device,
90+
access::address_space::local_space>(val[0]);
91+
auto atm1 =
92+
atomic_ref<int, memory_order::relaxed, memory_scope::device,
93+
access::address_space::local_space>(val[1]);
94+
for (int i = 0; i < N_iters; i++) {
95+
if (it.get_local_id(0) == 0) {
96+
atm0.fetch_add(1, order);
97+
val_p[1]++;
98+
} else {
99+
int tmp1 = atm1.load(memory_order::acquire);
100+
int tmp0 = atm0.load(memory_order::relaxed);
101+
if (tmp0 < tmp1) {
102+
error[0] = 1;
103+
}
104+
}
105+
}
106+
});
107+
}).wait_and_throw();
108+
}
109+
assert(error == 0);
110+
}
111+
112+
template <memory_order order> void test_release_global() {
113+
const size_t N_items = 1024;
114+
const size_t N_iters = 1000;
115+
116+
int error = 0;
117+
int val[] = {0, 0};
118+
17119
queue q;
120+
{
121+
buffer<int> error_buf(&error, 1);
122+
buffer<int> val_buf(val, 1);
123+
124+
q.submit([&](handler &cgh) {
125+
auto error =
126+
error_buf.template get_access<access::mode::read_write>(cgh);
127+
auto val = val_buf.template get_access<access::mode::read_write>(cgh);
128+
cgh.parallel_for(range<1>(N_items), [=](item<1> it) {
129+
volatile int *val_p = val.get_pointer();
130+
auto atm0 =
131+
atomic_ref<int, memory_order::relaxed, memory_scope::device,
132+
access::address_space::global_space>(val[0]);
133+
auto atm1 =
134+
atomic_ref<int, memory_order::relaxed, memory_scope::device,
135+
access::address_space::global_space>(val[1]);
136+
for (int i = 0; i < N_iters; i++) {
137+
if (it.get_id(0) == 0) {
138+
val_p[0]++;
139+
atm1.fetch_add(1, order);
140+
} else {
141+
int tmp1 = atm1.load(memory_order::acquire);
142+
int tmp0 = atm0.load(memory_order::relaxed);
143+
if (tmp0 < tmp1) {
144+
error[0] = 1;
145+
}
146+
}
147+
}
148+
});
149+
}).wait_and_throw();
150+
}
151+
assert(error == 0);
152+
}
153+
154+
template <memory_order order> void test_release_local() {
155+
const size_t local_size = 1024;
156+
const size_t N_wgs = 16;
157+
const size_t global_size = local_size * N_wgs;
158+
const size_t N_iters = 1000;
18159

160+
int error = 0;
161+
int val[] = {0, 0};
162+
163+
queue q;
164+
{
165+
buffer<int> error_buf(&error, 1);
166+
buffer<int> val_buf(val, 1);
167+
168+
q.submit([&](handler &cgh) {
169+
auto error =
170+
error_buf.template get_access<access::mode::read_write>(cgh);
171+
accessor<int, 1, access::mode::read_write, access::target::local> val(
172+
2, cgh);
173+
cgh.parallel_for(
174+
nd_range<1>(global_size, local_size), [=](nd_item<1> it) {
175+
size_t lid = it.get_local_id(0);
176+
val[0] = 0;
177+
val[1] = 0;
178+
it.barrier(access::fence_space::local_space);
179+
volatile int *val_p = val.get_pointer();
180+
auto atm0 =
181+
atomic_ref<int, memory_order::relaxed, memory_scope::device,
182+
access::address_space::local_space>(val[0]);
183+
auto atm1 =
184+
atomic_ref<int, memory_order::relaxed, memory_scope::device,
185+
access::address_space::local_space>(val[1]);
186+
for (int i = 0; i < N_iters; i++) {
187+
if (it.get_local_id(0) == 0) {
188+
val_p[0]++;
189+
atm1.fetch_add(1, order);
190+
} else {
191+
int tmp1 = atm1.load(memory_order::acquire);
192+
int tmp0 = atm0.load(memory_order::relaxed);
193+
if (tmp0 < tmp1) {
194+
error[0] = 1;
195+
}
196+
}
197+
}
198+
});
199+
}).wait_and_throw();
200+
}
201+
assert(error == 0);
202+
}
203+
204+
int main() {
205+
queue q;
19206
std::vector<memory_order> supported_memory_orders =
20207
q.get_device().get_info<info::device::atomic_memory_order_capabilities>();
21208

22-
if (!is_supported(supported_memory_orders, memory_order::acq_rel)) {
23-
std::cout << "Skipping test\n";
24-
return 0;
209+
if (is_supported(supported_memory_orders, memory_order::acquire)) {
210+
std::cout << "Testing acquire" << std::endl;
211+
test_acquire_global<memory_order::acquire>();
212+
test_acquire_local<memory_order::acquire>();
213+
}
214+
if (is_supported(supported_memory_orders, memory_order::release)) {
215+
std::cout << "Testing release" << std::endl;
216+
test_release_global<memory_order::release>();
217+
test_release_local<memory_order::release>();
218+
}
219+
if (is_supported(supported_memory_orders, memory_order::acq_rel)) {
220+
std::cout << "Testing acq_rel" << std::endl;
221+
// Acquire-release memory order must also support both acquire and release
222+
// orderings.
223+
assert(is_supported(supported_memory_orders, memory_order::acquire) &&
224+
is_supported(supported_memory_orders, memory_order::release));
225+
test_acquire_global<memory_order::acq_rel>();
226+
test_acquire_local<memory_order::acq_rel>();
227+
test_release_global<memory_order::acq_rel>();
228+
test_release_local<memory_order::acq_rel>();
229+
}
230+
if (is_supported(supported_memory_orders, memory_order::seq_cst)) {
231+
std::cout << "Testing seq_cst" << std::endl;
232+
test_acquire_global<memory_order::seq_cst>();
233+
test_acquire_local<memory_order::seq_cst>();
234+
test_release_global<memory_order::seq_cst>();
235+
test_release_local<memory_order::seq_cst>();
25236
}
26-
27-
// Acquire-release memory order must also support both acquire and release
28-
// orderings.
29-
assert(is_supported(supported_memory_orders, memory_order::acquire) &&
30-
is_supported(supported_memory_orders, memory_order::release));
31237

32238
std::cout << "Test passed." << std::endl;
33239
}

0 commit comments

Comments
 (0)