Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Add 'feature' tests to InvokeSimd #1443

Merged
merged 6 commits into from
Jan 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// TODO: enable when Jira ticket resolved
// XFAIL: gpu
//
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../SPMD_invoke_ESIMD_external.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/feature/SPMD_invoke_ESIMD_external.cpp,
* but compiles without optional subgroup attribute specified and intended to
* check that compiler is able to choose subgroup size correctly.
*/
16 changes: 16 additions & 0 deletions SYCL/InvokeSimd/feature/IMPL_SUBGROUP/invoke_simd_struct.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../invoke_simd_struct.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/feature/invoke_simd_struct.cpp, but
* compiles without optional subgroup attribute specified and intended to check
* that compiler is able to choose subgroup size correctly.
*/
19 changes: 19 additions & 0 deletions SYCL/InvokeSimd/feature/IMPL_SUBGROUP/popcnt.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// TODO: enable after simd_mask supported
// XFAIL: gpu
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../popcnt.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/feature/popcnt.cpp, but compiles without
* optional subgroup attribute specified and intended to check that compiler is
* able to choose subgroup size correctly.
*/
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip

// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../../invoke_simd_conv.cpp -o %t.out
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../popcnt_emu.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/spec/invoke_simd_conv.cpp, but compiles
* This tests is the same as InvokeSimd/feature/popcnt_emu.cpp, but compiles
* without optional subgroup attribute specified and intended to check that
* compiler is able to choose subgroup size correctly.
*/
16 changes: 16 additions & 0 deletions SYCL/InvokeSimd/feature/IMPL_SUBGROUP/scale.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../scale.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/feature/scale.cpp, but compiles without
* optional subgroup attribute specified and intended to check that compiler is
* able to choose subgroup size correctly.
*/
16 changes: 16 additions & 0 deletions SYCL/InvokeSimd/feature/IMPL_SUBGROUP/void_retval.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../void_retval.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/feature/void_retval.cpp, but compiles
* without optional subgroup attribute specified and intended to check that
* compiler is able to choose subgroup size correctly.
*/
150 changes: 150 additions & 0 deletions SYCL/InvokeSimd/feature/SPMD_invoke_ESIMD_external.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// TODO: enable when Jira ticket resolved
// XFAIL: gpu
//
// RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This test checks the case of calling the same external function from the SPMD
* and ESIMD kernels.
*/

#include <sycl/ext/intel/esimd.hpp>
#include <sycl/ext/oneapi/experimental/invoke_simd.hpp>
#include <sycl/ext/oneapi/experimental/uniform.hpp>
#include <sycl/sycl.hpp>

#include <functional>
#include <iostream>
#include <type_traits>

/* Subgroup size attribute is optional
* In case it is absent compiler decides what subgroup size to use
*/
#ifdef IMPL_SUBGROUP
#define SUBGROUP_ATTR
#else
#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]]
#endif

using namespace sycl::ext::oneapi::experimental;
namespace esimd = sycl::ext::intel::esimd;
constexpr int VL = 16;

esimd::simd<float, VL> ESIMD_CALLEE(float *A, int i) SYCL_ESIMD_FUNCTION {
esimd::simd<float, VL> res;
res.copy_from(A + i);
return res;
}

[[intel::device_indirectly_callable]] SYCL_EXTERNAL
simd<float, VL> __regcall SIMD_CALLEE(float *A, int i) SYCL_ESIMD_FUNCTION {
esimd::simd<float, VL> res = ESIMD_CALLEE(A, i);
return res;
}

using namespace sycl;

int main() {
constexpr unsigned Size = 1024;
constexpr unsigned GroupSize = 4 * VL;

auto q = queue{gpu_selector_v};
auto dev = q.get_device();
std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
<< "\n";

float *A = static_cast<float *>(malloc_shared(Size * sizeof(float), q));
float *B = static_cast<float *>(malloc_shared(Size * sizeof(float), q));
float *C = static_cast<float *>(malloc_shared(Size * sizeof(float), q));

for (unsigned i = 0; i < Size; ++i) {
A[i] = i;
B[i] = C[i] = -1;
}

try {
sycl::range<1> GlobalRange{Size};
// Number of workitems in each workgroup.
sycl::range<1> LocalRange{GroupSize};
sycl::nd_range<1> Range(GlobalRange, LocalRange);

auto e = q.submit([&](handler &cgh) {
cgh.parallel_for<class TestInvokeSimd>(
Range, [=](nd_item<1> ndi) SUBGROUP_ATTR {
sub_group sg = ndi.get_sub_group();
group<1> g = ndi.get_group();
uint32_t i = sg.get_group_linear_id() * VL +
g.get_group_linear_id() * GroupSize;
uint32_t wi_id = i + sg.get_local_id();

float res = invoke_simd(sg, SIMD_CALLEE, uniform{A}, uniform{i});
B[wi_id] = res;
});
});
e.wait();
} catch (sycl::exception const &e) {
sycl::free(A, q);
sycl::free(B, q);
sycl::free(C, q);

std::cout << "SYCL exception caught: " << e.what() << '\n';
return e.code().value();
}

try {
sycl::range<1> GlobalRange{Size};
// Number of workitems in each workgroup.
sycl::range<1> LocalRange{VL};
sycl::nd_range<1> Range(GlobalRange, LocalRange);

auto e = q.submit([&](handler &cgh) {
cgh.parallel_for<class TestExternalCall>(
Range, [=](nd_item<1> ndi) SYCL_ESIMD_KERNEL {
uint32_t i = ndi.get_group(0) * VL;

esimd::simd<float, VL> res(SIMD_CALLEE(B, i));
res.copy_to(C + i);
});
});
e.wait();
} catch (sycl::exception const &e) {
sycl::free(A, q);
sycl::free(B, q);
sycl::free(C, q);

std::cout << "SYCL exception caught: " << e.what() << '\n';
return e.code().value();
}

int err_cnt = 0;

for (unsigned i = 0; i < Size; ++i) {
if (A[i] != B[i] || B[i] != C[i]) {
if (++err_cnt < 10) {
std::cout << "failed at index " << i << ", " << A[i] << " != " << B[i]
<< " != " << C[i] << "\n";
}
}
}

sycl::free(A, q);
sycl::free(B, q);
sycl::free(C, q);

if (err_cnt > 0) {
std::cout << " pass rate: "
<< ((float)(Size - err_cnt) / (float)Size) * 100.0f << "% ("
<< (Size - err_cnt) << "/" << Size << ")\n";
}

std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n");
return err_cnt;
}
Loading