Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 3c35bbd

Browse files
author
Artem Gindinson
committed
[SYCL] Add E2E tests for device code instrumentation
Add a couple tests with execute-only kernels to ensure that ITT annotations' generation doesn't break the compilation/execution. This aims to test intel/llvm#4615. Signed-off-by: Artem Gindinson <[email protected]>
1 parent 9c1b5e2 commit 3c35bbd

File tree

2 files changed

+90
-0
lines changed

2 files changed

+90
-0
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// RUN: %clangxx -fsycl -fsycl-instrument-device-code %s -o %t.out
2+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
3+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
6+
7+
// RUN: %clangxx -fsycl -fsycl-instrument-device-code %s -o %t.cpu.out \
8+
// RUN: -fsycl-targets=spir64_x86_64-unknown-unknown
9+
// RUN: %CPU_RUN_PLACEHOLDER %t.cpu.out
10+
11+
#include "CL/sycl.hpp"
12+
13+
using namespace sycl;
14+
15+
int main() {
16+
queue q{};
17+
18+
int source = 42;
19+
int target = 0;
20+
{
21+
buffer<int> source_buf(&source, 1);
22+
buffer<int> target_buf(&target, 1);
23+
24+
// Ensure that a simple kernel gets run when instrumented with
25+
// ITT start/finish annotations and ITT atomic start/finish annotations.
26+
q.submit([&](handler &cgh) {
27+
auto source_acc =
28+
source_buf.template get_access<access::mode::read_write>(cgh);
29+
auto target_acc =
30+
target_buf.template get_access<access::mode::discard_write>(cgh);
31+
cgh.single_task<class simple_atomic_kernel>([=]() {
32+
auto source_atomic =
33+
ext::oneapi::atomic_ref<int, memory_order::relaxed,
34+
memory_scope::device,
35+
access::address_space::global_space>(
36+
source_acc[0]);
37+
// Store source value into target
38+
target_acc[0] = source_atomic.load();
39+
// Nullify source
40+
source_atomic.store(0);
41+
});
42+
});
43+
}
44+
45+
return 0;
46+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// RUN: %clangxx -fsycl -fsycl-instrument-device-code %s -o %t.out
2+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
3+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
6+
7+
// RUN: %clangxx -fsycl -fsycl-instrument-device-code %s -o %t.cpu.out \
8+
// RUN: -fsycl-targets=spir64_x86_64-unknown-unknown
9+
// RUN: %CPU_RUN_PLACEHOLDER %t.cpu.out
10+
11+
#include "CL/sycl.hpp"
12+
#include <vector>
13+
14+
using namespace sycl;
15+
16+
int main() {
17+
queue q{};
18+
19+
std::vector<int> data_vec(/*size*/ 10, /*value*/ 0);
20+
{
21+
range<1> num_items(data_vec.size());
22+
buffer<int> buf(data_vec.data(), num_items);
23+
range<1> local_range(2);
24+
25+
// Ensure that a simple kernel gets run when instrumented with
26+
// ITT start/finish annotations and ITT wg_barrier/wi_resume annotations.
27+
q.submit([&](handler &cgh) {
28+
auto acc = buf.get_access<access::mode::read_write>(cgh);
29+
accessor<int, 1, access::mode::read_write, access::target::local>
30+
local_acc(local_range, cgh);
31+
cgh.parallel_for<class simple_barrier_kernel>(
32+
nd_range<1>(num_items, local_range), [=](nd_item<1> item) {
33+
size_t idx = item.get_global_linear_id();
34+
int pos = idx & 1;
35+
int opp = pos ^ 1;
36+
local_acc[pos] = acc[idx];
37+
item.barrier(access::fence_space::local_space);
38+
acc[idx] = local_acc[opp];
39+
});
40+
});
41+
}
42+
43+
return 0;
44+
}

0 commit comments

Comments
 (0)