Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .github/workflows/build_and_run.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ jobs:
python: ["3.9", "3.10", "3.11"]
sycl: ["sycl","no-sycl"]
install: ["pip", "setup.py"]
precision: ["single", "double"]
exclude:
# setuptools<64 + scikit-build produce 'UNKOWN' package name for
# python 3.11. Could not find exact reference for that issue.
Expand Down Expand Up @@ -174,10 +175,12 @@ jobs:
echo "OCL_ICD_FILENAMES=$env:CONDA_PREFIX\Library\lib\intelocl64.dll" >> $env:GITHUB_ENV

- name: Run benchmarks
run: dpbench -i ${{env.WORKLOADS}} run -r2 --no-print-results || exit 1
run: dpbench -i ${{env.WORKLOADS}} run -r2 --no-print-results --precision=${{matrix.precision}} || exit 1

- name: Run rodinia benchmarks
run: dpbench -i ${{env.WORKLOADS}} --last-run run -r2 --no-print-results --rodinia --no-dpbench|| exit 1
# TODO: fix rodinia benchmark: https://github.com/IntelPython/dpbench/issues/316
if: matrix.precision != 'single'
run: dpbench -i ${{env.WORKLOADS}} --last-run run -r2 --no-print-results --rodinia --no-dpbench --precision=${{matrix.precision}} || exit 1

- name: Generate report
run: dpbench -i ${{env.WORKLOADS}} report || exit 1
7 changes: 5 additions & 2 deletions .github/workflows/conda-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ jobs:
matrix:
python: ['3.9', '3.10', '3.11']
os: [ubuntu-20.04, ubuntu-latest, windows-latest]
precision: ["single", "double"]
experimental: [false]

continue-on-error: ${{ matrix.experimental }}
Expand Down Expand Up @@ -193,11 +194,13 @@ jobs:
# we want to make sure that configuration files are geting populated
- name: Run npbench benchmark
run: |
dpbench -i numpy -b azimint_hist run --npbench
dpbench -i numpy -b azimint_hist run --npbench --precision=${{matrix.precision}}

- name: Run rodinia benchmark
# TODO: fix rodinia benchmark: https://github.com/IntelPython/dpbench/issues/316
if: matrix.precision != 'single'
run: |
dpbench run --rodinia --no-dpbench --no-validate -r 1
dpbench run --rodinia --no-dpbench --no-validate -r 1 --precision=${{matrix.precision}}

upload_anaconda:
name: Upload dppy/label/dev ['${{ matrix.os }}', python='${{ matrix.python }}']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,30 +6,10 @@
#include <stdlib.h>
#include <type_traits>

#ifdef __DO_FLOAT__
#define EXP(x) expf(x)
#define LOG(x) logf(x)
#define SQRT(x) sqrtf(x)
#define ERF(x) erff(x)
#define INVSQRT(x) 1.0f / sqrtf(x)

#define QUARTER 0.25f
#define HALF 0.5f
#define TWO 2.0f
#else
#define EXP(x) sycl::exp(x)
#define LOG(x) sycl::log(x)
#define SQRT(x) sycl::sqrt(x)
#define ERF(x) sycl::erf(x)
#define INVSQRT(x) 1.0 / sycl::sqrt(x)

#define QUARTER 0.25
#define HALF 0.5
#define TWO 2.0
#endif

using namespace sycl;

template <typename FpTy> class BlackScholesKernel;

template <typename FpTy>
void black_scholes_impl(queue Queue,
size_t nopt,
Expand All @@ -41,27 +21,30 @@ void black_scholes_impl(queue Queue,
FpTy *call,
FpTy *put)
{
constexpr FpTy _0_25 = 0.25;
constexpr FpTy _0_5 = 0.5;

auto e = Queue.submit([&](handler &h) {
h.parallel_for<class BlackScholesKernel>(
h.parallel_for<BlackScholesKernel<FpTy>>(
range<1>{nopt}, [=](id<1> myID) {
FpTy mr = -rate;
FpTy sig_sig_two = volatility * volatility * TWO;
FpTy sig_sig_two = volatility * volatility * 2;
int i = myID[0];
FpTy a, b, c, y, z, e;
FpTy d1, d2, w1, w2;

a = LOG(price[i] / strike[i]);
a = sycl::log(price[i] / strike[i]);
b = t[i] * mr;
z = t[i] * sig_sig_two;
c = QUARTER * z;
y = INVSQRT(z);
c = _0_25 * z;
y = sycl::rsqrt(z);
w1 = (a - b + c) * y;
w2 = (a - b - c) * y;
d1 = ERF(w1);
d2 = ERF(w2);
d1 = HALF + HALF * d1;
d2 = HALF + HALF * d2;
e = EXP(b);
d1 = sycl::erf(w1);
d2 = sycl::erf(w2);
d1 = _0_5 + _0_5 * d1;
d2 = _0_5 + _0_5 * d2;
e = sycl::exp(b);
call[i] = price[i] * d1 - strike[i] * e * d2;
put[i] = call[i] - price[i] + strike[i] * e;
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,22 @@ void black_scholes_sync(size_t /**/,
if (!ensure_compatibility(price, strike, t, call, put))
throw std::runtime_error("Input arrays are not acceptable.");

if (typenum != UAR_DOUBLE) {
throw std::runtime_error("Expected a double precision FP array.");
if (typenum == UAR_FLOAT) {
black_scholes_impl<float>(Queue, nopt, price.get_data<float>(),
strike.get_data<float>(), t.get_data<float>(),
rate, volatility, call.get_data<float>(),
put.get_data<float>());
}
else if (typenum == UAR_DOUBLE) {
black_scholes_impl<double>(
Queue, nopt, price.get_data<double>(), strike.get_data<double>(),
t.get_data<double>(), rate, volatility, call.get_data<double>(),
put.get_data<double>());
}
else {
throw std::runtime_error(
"Expected a double or single precision FP array.");
Comment on lines +67 to +81
Copy link
Contributor

@adarshyoga adarshyoga Nov 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be possible to directly bind functions with template parameters. This might be a neater solution. @oleksandr-pavlyk your thoughts?

}

black_scholes_impl(Queue, nopt, price.get_data<double>(),
strike.get_data<double>(), t.get_data<double>(), rate,
volatility, call.get_data<double>(),
put.get_data<double>());
}

PYBIND11_MODULE(_black_scholes_sycl, m)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ void getNeighborhood(size_t n,
}
}

template <typename FpTy> class DBScanKernel;

template <typename FpTy>
size_t dbscan_impl(queue q,
size_t n_samples,
Expand All @@ -126,14 +128,13 @@ size_t dbscan_impl(queue q,
q.wait();

auto e = q.submit([&](handler &h) {
h.parallel_for<class DBScanKernel>(
h.parallel_for<DBScanKernel<FpTy>>(
range<1>{n_samples}, [=](id<1> myID) {
size_t i1 = myID[0];
size_t i2 = (i1 + 1 == n_samples ? n_samples : i1 + 1);
getNeighborhood<double>(n_samples, n_features, data, i2 - i1,
data + i1 * n_features, eps,
d_indices + i1 * n_samples,
d_sizes + i1);
getNeighborhood<FpTy>(n_samples, n_features, data, i2 - i1,
data + i1 * n_features, eps,
d_indices + i1 * n_samples, d_sizes + i1);
});
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,21 @@ size_t dbscan_sync(size_t n_samples,
size_t min_pts)
{
auto queue = data.get_queue();
auto typenum = data.get_typenum();

if (!ensure_compatibility(data))
throw std::runtime_error("Input arrays are not acceptable.");

if (data.get_typenum() != UAR_DOUBLE) {
throw std::runtime_error("Expected a double precision FP array.");
if (typenum == UAR_FLOAT) {
return dbscan_impl<float>(queue, n_samples, n_features,
data.get_data<float>(), eps, min_pts);
}
else if (typenum == UAR_DOUBLE) {
return dbscan_impl<double>(queue, n_samples, n_features,
data.get_data<double>(), eps, min_pts);
}

return dbscan_impl<double>(queue, n_samples, n_features,
data.get_data<double>(), eps, min_pts);
throw std::runtime_error("Expected a double or single precision FP array.");
}

PYBIND11_MODULE(_dbscan_sycl, m)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

using namespace sycl;

template <typename FpTy> class theKernel;

template <typename FpTy>
void l2_norm_impl(queue Queue,
size_t npoints,
Expand All @@ -18,7 +20,7 @@ void l2_norm_impl(queue Queue,
{
Queue
.submit([&](handler &h) {
h.parallel_for<class theKernel>(range<1>{npoints}, [=](id<1> myID) {
h.parallel_for<theKernel<FpTy>>(range<1>{npoints}, [=](id<1> myID) {
size_t i = myID[0];
for (size_t k = 0; k < dims; k++) {
d[i] += a[i * dims + k] * a[i * dims + k];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,20 @@ void l2_norm_sync(dpctl::tensor::usm_ndarray a, dpctl::tensor::usm_ndarray d)

auto dims = 3;
auto npoints = a.get_size() / dims;
auto typenum = a.get_typenum();

if (a.get_typenum() != UAR_DOUBLE) {
throw std::runtime_error("Expected a double precision FP array.");
if (typenum == UAR_FLOAT) {
l2_norm_impl(Queue, npoints, dims, a.get_data<float>(),
d.get_data<float>());
}
else if (typenum == UAR_DOUBLE) {
l2_norm_impl(Queue, npoints, dims, a.get_data<double>(),
d.get_data<double>());
}
else {
throw std::runtime_error(
"Expected a double or single precision FP array.");
}

l2_norm_impl(Queue, npoints, dims, a.get_data<double>(),
d.get_data<double>());
}

PYBIND11_MODULE(_l2_norm_sycl, m)
Expand Down
2 changes: 1 addition & 1 deletion dpbench/benchmarks/pca/pca_numba_n.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def pca(data, dims_rescaled_data=2):
evectors = evectors[:, :dims_rescaled_data]

# carry out the transformation on the data using eigenvectors
tdata = np.dot(evectors.T, data.T).T
tdata = np.dot(evectors.T.astype(data.dtype), data.T).T

# return the transformed data, eigenvalues, and eigenvectors
return tdata, evalues, evectors
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,10 @@
#include <stdlib.h>
#include <type_traits>

#define SIN(x) sycl::sin(x)
#define COS(x) sycl::cos(x)
#define SQRT(x) sycl::sqrt(x)
#define LOG(x) sycl::log(x)

using namespace sycl;

template <typename FpTy> class RamboKernel;

template <typename FpTy>
event rambo_impl(queue Queue,
size_t nevts,
Expand All @@ -26,20 +23,21 @@ event rambo_impl(queue Queue,
const FpTy *usmQ1,
FpTy *usmOutput)
{
constexpr FpTy pi_v = M_PI;
return Queue.submit([&](handler &h) {
h.parallel_for<class RamboKernel>(range<1>{nevts}, [=](id<1> myID) {
h.parallel_for<RamboKernel<FpTy>>(range<1>{nevts}, [=](id<1> myID) {
for (size_t j = 0; j < nout; j++) {
int i = myID[0];
size_t idx = i * nout + j;

FpTy C = 2.0 * usmC1[idx] - 1.0;
FpTy S = SQRT(1 - C * C);
FpTy F = 2.0 * M_PI * usmF1[idx];
FpTy Q = -LOG(usmQ1[idx]);
FpTy C = 2 * usmC1[idx] - 1;
FpTy S = sycl::sqrt(1 - C * C);
FpTy F = 2 * pi_v * usmF1[idx];
FpTy Q = -sycl::log(usmQ1[idx]);

usmOutput[idx * 4] = Q;
usmOutput[idx * 4 + 1] = Q * S * SIN(F);
usmOutput[idx * 4 + 2] = Q * S * COS(F);
usmOutput[idx * 4 + 1] = Q * S * sycl::sin(F);
usmOutput[idx * 4 + 2] = Q * S * sycl::cos(F);
usmOutput[idx * 4 + 3] = Q * C;
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,27 @@ void rambo_sync(size_t nevts,
if (!ensure_compatibility(C1, F1, Q1))
throw std::runtime_error("Input arrays are not acceptable.");

if (C1.get_typenum() != UAR_DOUBLE || F1.get_typenum() != UAR_DOUBLE ||
Q1.get_typenum() != UAR_DOUBLE || output.get_typenum() != UAR_DOUBLE)
{
throw std::runtime_error("Expected a double precision FP array.");
}
if (output.get_typenum() != C1.get_typenum())
throw std::runtime_error("Input arrays are not acceptable.");

auto typenum = C1.get_typenum();

auto e = rambo_impl(Queue, nevts, nout, C1.get_data<double>(),
F1.get_data<double>(), Q1.get_data<double>(),
output.get_data<double>());
e.wait();
if (typenum == UAR_FLOAT) {
auto e = rambo_impl(Queue, nevts, nout, C1.get_data<float>(),
F1.get_data<float>(), Q1.get_data<float>(),
output.get_data<float>());
e.wait();
}
else if (typenum == UAR_DOUBLE) {
auto e = rambo_impl(Queue, nevts, nout, C1.get_data<double>(),
F1.get_data<double>(), Q1.get_data<double>(),
output.get_data<double>());
e.wait();
}
else {
throw std::runtime_error(
"Expected a double or single precision FP array.");
}
}

PYBIND11_MODULE(_rambo_sycl, m)
Expand Down