Merge pull request #261 from IntelPython/feature/add_igpu_support_for_numba_dpex_k

ZzEeKkAa · web-flow · commit 7d809cdc1f37 · 2023-05-12T12:31:38.000-04:00
Add fp32 support for numba_dpex_k
diff --git a/dpbench/benchmarks/black_scholes/black_scholes_initialize.py b/dpbench/benchmarks/black_scholes/black_scholes_initialize.py
@@ -7,15 +7,15 @@ def initialize(nopt, seed, types_dict):
     import numpy as np
     import numpy.random as default_rng
 
-    dtype = types_dict["float"]
-    S0L = 10.0
-    S0H = 50.0
-    XL = 10.0
-    XH = 50.0
-    TL = 1.0
-    TH = 2.0
-    RISK_FREE = 0.1
-    VOLATILITY = 0.2
+    dtype: np.dtype = types_dict["float"]
+    S0L = dtype.type(10.0)
+    S0H = dtype.type(50.0)
+    XL = dtype.type(10.0)
+    XH = dtype.type(50.0)
+    TL = dtype.type(1.0)
+    TH = dtype.type(2.0)
+    RISK_FREE = dtype.type(0.1)
+    VOLATILITY = dtype.type(0.2)
 
     default_rng.seed(seed)
     price = default_rng.uniform(S0L, S0H, nopt).astype(dtype)
diff --git a/dpbench/benchmarks/black_scholes/black_scholes_numba_dpex_k.py b/dpbench/benchmarks/black_scholes/black_scholes_numba_dpex_k.py
@@ -4,13 +4,15 @@
 
 from math import erf, exp, log, sqrt
 
+import dpnp as np
 import numba_dpex as dpex
 
 
 @dpex.kernel
 def _black_scholes_kernel(nopt, price, strike, t, rate, volatility, call, put):
+    dtype = price.dtype
     mr = -rate
-    sig_sig_two = volatility * volatility * 2
+    sig_sig_two = volatility * volatility * dtype.type(2)
 
     i = dpex.get_global_id(0)
 
@@ -22,14 +24,14 @@ def _black_scholes_kernel(nopt, price, strike, t, rate, volatility, call, put):
     b = T * mr
 
     z = T * sig_sig_two
-    c = 0.25 * z
-    y = 1.0 / sqrt(z)
+    c = dtype.type(0.25) * z
+    y = dtype.type(1.0) / sqrt(z)
 
     w1 = (a - b + c) * y
     w2 = (a - b - c) * y
 
-    d1 = 0.5 + 0.5 * erf(w1)
-    d2 = 0.5 + 0.5 * erf(w2)
+    d1 = dtype.type(0.5) + dtype.type(0.5) * erf(w1)
+    d2 = dtype.type(0.5) + dtype.type(0.5) * erf(w2)
 
     Se = exp(b) * S
 
diff --git a/dpbench/benchmarks/dbscan/dbscan_initialize.py b/dpbench/benchmarks/dbscan/dbscan_initialize.py
@@ -64,8 +64,10 @@ class Params(NamedTuple):
         data_size, Params(eps=DEFAULT_EPS, minpts=DEFAULT_MINPTS)
     )
 
+    dtype: np.dtype = types_dict["float"]
+
     return (
-        X.flatten().astype(types_dict["float"]),
-        params.eps,
+        X.flatten().astype(dtype),
+        dtype.type(params.eps),
         params.minpts,
     )
diff --git a/dpbench/benchmarks/dbscan/dbscan_numba_dpex_k.py b/dpbench/benchmarks/dbscan/dbscan_numba_dpex_k.py
@@ -63,7 +63,7 @@ def get_neighborhood(n, dim, data, eps, ind_lst, sz_lst, block_size, nblocks):
         i2 = n if ii + 1 == nblocks1 else i1 + block_size1
         for j in range(start, stop):
             for k in range(i1, i2):
-                dist = 0.0
+                dist = data.dtype.type(0.0)
                 for m in range(dim):
                     diff = data[k * dim + m] - data[j * dim + m]
                     dist += diff * diff
diff --git a/dpbench/benchmarks/gpairs/gpairs_numba_dpex_k.py b/dpbench/benchmarks/gpairs/gpairs_numba_dpex_k.py
@@ -2,10 +2,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-import math
-
 import numba_dpex as dpex
-import numpy as np
 
 # This implementation is numba dpex kernel version with atomics.
 
@@ -27,6 +24,7 @@ def count_weighted_pairs_3d_intel_no_slm_ker(
     rbins_squared,
     result,
 ):
+    dtype = x0.dtype
     lid0 = dpex.get_local_id(0)
     gr0 = dpex.get_group_id(0)
 
@@ -38,9 +36,9 @@ def count_weighted_pairs_3d_intel_no_slm_ker(
 
     n_wi = 20
 
-    dsq_mat = dpex.private.array(shape=(20 * 20), dtype=np.float32)
-    w0_vec = dpex.private.array(shape=(20), dtype=np.float32)
-    w1_vec = dpex.private.array(shape=(20), dtype=np.float32)
+    dsq_mat = dpex.private.array(shape=(20 * 20), dtype=dtype)
+    w0_vec = dpex.private.array(shape=(20), dtype=dtype)
+    w1_vec = dpex.private.array(shape=(20), dtype=dtype)
 
     offset0 = gr0 * n_wi * lws0 + lid0
     offset1 = gr1 * n_wi * lws1 + lid1
@@ -80,7 +78,7 @@ def count_weighted_pairs_3d_intel_no_slm_ker(
 
     # update slm_hist. Use work-item private buffer of 16 tfloat elements
     for k in range(0, slm_hist_size, private_hist_size):
-        private_hist = dpex.private.array(shape=(16), dtype=np.float32)
+        private_hist = dpex.private.array(shape=(16), dtype=dtype)
         for p in range(private_hist_size):
             private_hist[p] = 0.0
 
@@ -95,7 +93,9 @@ def count_weighted_pairs_3d_intel_no_slm_ker(
                 pk = k
                 for p in range(private_hist_size):
                     private_hist[p] += (
-                        pw if (pk < nbins and dsq <= rbins_squared[pk]) else 0.0
+                        pw
+                        if (pk < nbins and dsq <= rbins_squared[pk])
+                        else dtype.type(0.0)
                     )
                     pk += 1
 
diff --git a/dpbench/benchmarks/kmeans/kmeans_initialize.py b/dpbench/benchmarks/kmeans/kmeans_initialize.py
@@ -7,10 +7,10 @@ def initialize(npoints, niters, seed, ndims, ncentroids, types_dict):
     import numpy as np
     import numpy.random as default_rng
 
-    f_dtype = types_dict["float"]
-    i_dtype = types_dict["int"]
-    XL = 1.0
-    XH = 5.0
+    f_dtype: np.dtype = types_dict["float"]
+    i_dtype: np.dtype = types_dict["int"]
+    XL = f_dtype.type(1.0)
+    XH = f_dtype.type(5.0)
 
     default_rng.seed(seed)
 
diff --git a/dpbench/benchmarks/kmeans/kmeans_numba_dpex_k.py b/dpbench/benchmarks/kmeans/kmeans_numba_dpex_k.py
@@ -11,7 +11,8 @@
 def groupByCluster(arrayP, arrayPcluster, arrayC, num_points, num_centroids):
     idx = dpex.get_global_id(0)
     # if idx < num_points: # why it was removed??
-    minor_distance = -1
+    dtype = arrayC.dtype
+    minor_distance = dtype.type(-1)
     for i in range(num_centroids):
         dx = arrayP[idx, 0] - arrayC[i, 0]
         dy = arrayP[idx, 1] - arrayC[i, 1]
@@ -41,8 +42,9 @@ def calCentroidsSum2(arrayP, arrayPcluster, arrayCsum, arrayCnumpoint):
 @dpex.kernel
 def updateCentroids(arrayC, arrayCsum, arrayCnumpoint, num_centroids):
     i = dpex.get_global_id(0)
-    arrayC[i, 0] = arrayCsum[i, 0] / arrayCnumpoint[i]
-    arrayC[i, 1] = arrayCsum[i, 1] / arrayCnumpoint[i]
+    dtype = arrayC.dtype
+    arrayC[i, 0] = arrayCsum[i, 0] / dtype.type(arrayCnumpoint[i])
+    arrayC[i, 1] = arrayCsum[i, 1] / dtype.type(arrayCnumpoint[i])
 
 
 @dpex.kernel
diff --git a/dpbench/benchmarks/knn/knn_initialize.py b/dpbench/benchmarks/knn/knn_initialize.py
@@ -41,6 +41,6 @@ def _gen_test_data(test_size, data_dim, seed_test, dtype):
     )
     x_test = _gen_test_data(test_size, data_dim, seed_test, dtype)
     predictions = np.empty(test_size, types_dict["int"])
-    votes_to_classes = np.zeros((test_size, classes_num))
+    votes_to_classes = np.zeros((test_size, classes_num), dtype)
 
     return (x_train, y_train, x_test, predictions, votes_to_classes)
diff --git a/dpbench/benchmarks/knn/knn_numba_dpex_k.py b/dpbench/benchmarks/knn/knn_numba_dpex_k.py
@@ -20,15 +20,16 @@ def _knn_kernel(  # noqa: C901: TODO: can we simplify logic?
     votes_to_classes_lst,
     data_dim,
 ):
+    dtype = train.dtype
     i = dpex.get_global_id(0)
     # here k has to be 5 in order to match with numpy
-    queue_neighbors = dpex.private.array(shape=(5, 2), dtype=np.float64)
+    queue_neighbors = dpex.private.array(shape=(5, 2), dtype=dtype)
 
     for j in range(k):
         x1 = train[j]
         x2 = test[i]
 
-        distance = 0.0
+        distance = dtype.type(0.0)
         for jj in range(data_dim):
             diff = x1[jj] - x2[jj]
             distance += diff * diff
@@ -55,7 +56,7 @@ def _knn_kernel(  # noqa: C901: TODO: can we simplify logic?
         x1 = train[j]
         x2 = test[i]
 
-        distance = 0.0
+        distance = dtype.type(0.0)
         for jj in range(data_dim):
             diff = x1[jj] - x2[jj]
             distance += diff * diff
@@ -83,7 +84,7 @@ def _knn_kernel(  # noqa: C901: TODO: can we simplify logic?
         votes_to_classes[int(queue_neighbors[j, 1])] += 1
 
     max_ind = 0
-    max_value = 0
+    max_value = dtype.type(0)
 
     for j in range(classes_num):
         if votes_to_classes[j] > max_value:
diff --git a/dpbench/benchmarks/pairwise_distance/pairwise_distance_numba_dpex_k.py b/dpbench/benchmarks/pairwise_distance/pairwise_distance_numba_dpex_k.py
@@ -13,7 +13,7 @@ def _pairwise_distance_kernel(X1, X2, D):
     X2_rows = X2.shape[0]
     X1_cols = X1.shape[1]
     for j in range(X2_rows):
-        d = 0.0
+        d = X1.dtype.type(0.0)
         for k in range(X1_cols):
             tmp = X1[i, k] - X2[j, k]
             d += tmp * tmp
diff --git a/dpbench/benchmarks/rambo/rambo_initialize.py b/dpbench/benchmarks/rambo/rambo_initialize.py
@@ -19,4 +19,4 @@ def initialize(nevts, nout, types_dict):
             F1[i, j] = np.random.rand()
             Q1[i, j] = np.random.rand() * np.random.rand()
 
-    return (C1, F1, Q1, np.empty((nevts, nout, 4)))
+    return (C1, F1, Q1, np.empty((nevts, nout, 4), dtype))
diff --git a/dpbench/benchmarks/rambo/rambo_numba_dpex_k.py b/dpbench/benchmarks/rambo/rambo_numba_dpex_k.py
@@ -9,11 +9,12 @@
 
 @dpex.kernel
 def _rambo(C1, F1, Q1, nout, output):
+    dtype = C1.dtype
     i = dpex.get_global_id(0)
     for j in range(nout):
-        C = 2.0 * C1[i, j] - 1.0
-        S = sqrt(1 - C * C)
-        F = 2.0 * pi * F1[i, j]
+        C = dtype.type(2.0) * C1[i, j] - dtype.type(1.0)
+        S = sqrt(dtype.type(1) - C * C)
+        F = dtype.type(2.0 * pi) * F1[i, j]
         Q = -log(Q1[i, j])
 
         output[i, j, 0] = Q

Original file line number	Diff line number	Diff line change
`@@ -64,8 +64,10 @@ class Params(NamedTuple):`
`64`	`64`	`data_size, Params(eps=DEFAULT_EPS, minpts=DEFAULT_MINPTS)`
`65`	`65`	`)`
`66`	`66`
	`67`	`+ dtype: np.dtype = types_dict["float"]`
	`68`	`+`
`67`	`69`	`return (`
`68`		`- X.flatten().astype(types_dict["float"]),`
`69`		`- params.eps,`
	`70`	`+ X.flatten().astype(dtype),`
	`71`	`+ dtype.type(params.eps),`
`70`	`72`	`params.minpts,`
`71`	`73`	`)`
Original file line number	Diff line number	Diff line change
`@@ -41,6 +41,6 @@ def _gen_test_data(test_size, data_dim, seed_test, dtype):`
`41`	`41`	`)`
`42`	`42`	`x_test = _gen_test_data(test_size, data_dim, seed_test, dtype)`
`43`	`43`	`predictions = np.empty(test_size, types_dict["int"])`
`44`		`- votes_to_classes = np.zeros((test_size, classes_num))`
	`44`	`+ votes_to_classes = np.zeros((test_size, classes_num), dtype)`
`45`	`45`
`46`	`46`	`return (x_train, y_train, x_test, predictions, votes_to_classes)`