diff --git a/npbench/benchmarks/azimint_hist/azimint_hist.py b/npbench/benchmarks/azimint_hist/azimint_hist.py index aa7491f3..b58cbb1b 100644 --- a/npbench/benchmarks/azimint_hist/azimint_hist.py +++ b/npbench/benchmarks/azimint_hist/azimint_hist.py @@ -1,8 +1,9 @@ # Copyright 2021 ETH Zurich and the NPBench authors. All rights reserved. +import numpy as np -def initialize(N): +def initialize(N, datatype=np.float32): from numpy.random import default_rng rng = default_rng(42) - data, radius = rng.random((N, )), rng.random((N, )) + data, radius = rng.random((N, ), dtype=datatype), rng.random((N, ), dtype=datatype) return data, radius diff --git a/npbench/benchmarks/azimint_hist/azimint_hist_dace.py b/npbench/benchmarks/azimint_hist/azimint_hist_dace.py index ebdcfdf2..5db0eed9 100644 --- a/npbench/benchmarks/azimint_hist/azimint_hist_dace.py +++ b/npbench/benchmarks/azimint_hist/azimint_hist_dace.py @@ -36,12 +36,13 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N, bins, npt = (dc.symbol(s, dtype=dc.int64) for s in ('N', 'bins', 'npt')) @dc.program -def get_bin_edges(a: dc.float64[N], bin_edges: dc.float64[bins + 1]): +def get_bin_edges(a: dc_float[N], bin_edges: dc_float[bins + 1]): a_min = np.amin(a) a_max = np.amax(a) delta = (a_max - a_min) / bins @@ -52,7 +53,7 @@ def get_bin_edges(a: dc.float64[N], bin_edges: dc.float64[bins + 1]): @dc.program -def compute_bin(x: dc.float64, bin_edges: dc.float64[bins + 1]): +def compute_bin(x: dc_float, bin_edges: dc_float[bins + 1]): # assuming uniform bins for now a_min = bin_edges[0] a_max = bin_edges[bins] @@ -60,7 +61,7 @@ def compute_bin(x: dc.float64, bin_edges: dc.float64[bins + 1]): @dc.program -def histogram(a: dc.float64[N], bin_edges: dc.float64[bins + 1]): +def histogram(a: dc_float[N], bin_edges: dc_float[bins + 1]): hist = np.ndarray((bins, ), dtype=np.int64) hist[:] = 0 get_bin_edges(a, bin_edges) @@ -73,8 +74,8 @@ def histogram(a: dc.float64[N], bin_edges: dc.float64[bins + 1]): @dc.program -def histogram_weights(a: dc.float64[N], bin_edges: dc.float64[bins + 1], - weights: dc.float64[N]): +def histogram_weights(a: dc_float[N], bin_edges: dc_float[bins + 1], + weights: dc_float[N]): hist = np.ndarray((bins, ), dtype=weights.dtype) hist[:] = 0 get_bin_edges(a, bin_edges) @@ -87,11 +88,11 @@ def histogram_weights(a: dc.float64[N], bin_edges: dc.float64[bins + 1], @dc.program -def azimint_hist(data: dc.float64[N], radius: dc.float64[N]): +def azimint_hist(data: dc_float[N], radius: dc_float[N]): # histu = np.histogram(radius, npt)[0] - bin_edges_u = np.ndarray((npt + 1, ), dtype=np.float64) + bin_edges_u = np.ndarray((npt + 1, ), dtype=dc_float) histu = histogram(radius, bin_edges_u) # histw = np.histogram(radius, npt, weights=data)[0] - bin_edges_w = np.ndarray((npt + 1, ), dtype=np.float64) + bin_edges_w = np.ndarray((npt + 1, ), dtype=dc_float) histw = histogram_weights(radius, bin_edges_w, data) return histw / histu diff --git a/npbench/benchmarks/azimint_naive/azimint_naive.py b/npbench/benchmarks/azimint_naive/azimint_naive.py index aa7491f3..db701658 100644 --- a/npbench/benchmarks/azimint_naive/azimint_naive.py +++ b/npbench/benchmarks/azimint_naive/azimint_naive.py @@ -1,8 +1,8 @@ # Copyright 2021 ETH Zurich and the NPBench authors. All rights reserved. -def initialize(N): +def initialize(N, datatype): from numpy.random import default_rng rng = default_rng(42) - data, radius = rng.random((N, )), rng.random((N, )) + data, radius = rng.random((N, ), dtype=datatype), rng.random((N, ), dtype=datatype) return data, radius diff --git a/npbench/benchmarks/azimint_naive/azimint_naive_dace.py b/npbench/benchmarks/azimint_naive/azimint_naive_dace.py index 0066d612..8f9d32c9 100644 --- a/npbench/benchmarks/azimint_naive/azimint_naive_dace.py +++ b/npbench/benchmarks/azimint_naive/azimint_naive_dace.py @@ -10,15 +10,16 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N, npt = (dc.symbol(s, dtype=dc.int64) for s in ('N', 'npt')) @dc.program -def azimint_naive(data: dc.float64[N], radius: dc.float64[N]): +def azimint_naive(data: dc_float[N], radius: dc_float[N]): # rmax = radius.max() rmax = np.amax(radius) - res = np.zeros((npt, ), dtype=np.float64) # Fix in np.full + res = np.zeros((npt, ), dtype=dc_float) # Fix in np.full for i in range(npt): # for i in dc.map[0:npt]: # Optimization r1 = rmax * i / npt @@ -27,7 +28,7 @@ def azimint_naive(data: dc.float64[N], radius: dc.float64[N]): # values_r12 = data[mask_r12] # res[i] = np.mean(values_r12) on_values = 0 - tmp = np.float64(0) + tmp = dc_float(0) for j in dc.map[0:N]: if mask_r12[j]: tmp += data[j] diff --git a/npbench/benchmarks/cavity_flow/cavity_flow.py b/npbench/benchmarks/cavity_flow/cavity_flow.py index ea29e68b..5d4aa926 100644 --- a/npbench/benchmarks/cavity_flow/cavity_flow.py +++ b/npbench/benchmarks/cavity_flow/cavity_flow.py @@ -3,10 +3,10 @@ import numpy as np -def initialize(ny, nx): - u = np.zeros((ny, nx), dtype=np.float64) - v = np.zeros((ny, nx), dtype=np.float64) - p = np.zeros((ny, nx), dtype=np.float64) +def initialize(ny, nx, datatype=np.float32): + u = np.zeros((ny, nx), dtype=datatype) + v = np.zeros((ny, nx), dtype=datatype) + p = np.zeros((ny, nx), dtype=datatype) dx = 2 / (nx - 1) dy = 2 / (ny - 1) dt = .1 / ((nx - 1) * (ny - 1)) diff --git a/npbench/benchmarks/cavity_flow/cavity_flow_dace.py b/npbench/benchmarks/cavity_flow/cavity_flow_dace.py index 6ff03beb..6be6f438 100644 --- a/npbench/benchmarks/cavity_flow/cavity_flow_dace.py +++ b/npbench/benchmarks/cavity_flow/cavity_flow_dace.py @@ -9,14 +9,15 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float nx, ny, nit = (dc.symbol(s, dc.int64) for s in ('nx', 'ny', 'nit')) @dc.program -def build_up_b(b: dc.float64[ny, nx], rho: dc.float64, dt: dc.float64, - u: dc.float64[ny, nx], v: dc.float64[ny, nx], dx: dc.float64, - dy: dc.float64): +def build_up_b(b: dc_float[ny, nx], rho: dc_float, dt: dc_float, + u: dc_float[ny, nx], v: dc_float[ny, nx], dx: dc_float, + dy: dc_float): b[1:-1, 1:-1] = (rho * (1 / dt * ((u[1:-1, 2:] - u[1:-1, 0:-2]) / (2 * dx) + @@ -28,8 +29,8 @@ def build_up_b(b: dc.float64[ny, nx], rho: dc.float64, dt: dc.float64, @dc.program -def pressure_poisson(p: dc.float64[ny, nx], dx: dc.float64, dy: dc.float64, - b: dc.float64[ny, nx]): +def pressure_poisson(p: dc_float[ny, nx], dx: dc_float, dy: dc_float, + b: dc_float[ny, nx]): pn = np.empty_like(p) pn[:] = p.copy() @@ -47,10 +48,10 @@ def pressure_poisson(p: dc.float64[ny, nx], dx: dc.float64, dy: dc.float64, @dc.program -def cavity_flow(nt: dc.int64, nit: dc.int64, u: dc.float64[ny, nx], - v: dc.float64[ny, nx], dt: dc.float64, dx: dc.float64, - dy: dc.float64, p: dc.float64[ny, nx], rho: dc.float64, - nu: dc.float64): +def cavity_flow(nt: dc.int64, nit: dc.int64, u: dc_float[ny, nx], + v: dc_float[ny, nx], dt: dc_float, dx: dc_float, + dy: dc_float, p: dc_float[ny, nx], rho: dc_float, + nu: dc_float): un = np.empty_like(u) vn = np.empty_like(v) b = np.zeros((ny, nx)) diff --git a/npbench/benchmarks/channel_flow/channel_flow.py b/npbench/benchmarks/channel_flow/channel_flow.py index 18524a29..b0071d66 100644 --- a/npbench/benchmarks/channel_flow/channel_flow.py +++ b/npbench/benchmarks/channel_flow/channel_flow.py @@ -3,11 +3,11 @@ import numpy as np -def initialize(ny, nx): - u = np.zeros((ny, nx), dtype=np.float64) - v = np.zeros((ny, nx), dtype=np.float64) - p = np.ones((ny, nx), dtype=np.float64) - dx = 2 / (nx - 1) - dy = 2 / (ny - 1) - dt = .1 / ((nx - 1) * (ny - 1)) +def initialize(ny, nx, datatype=np.float32): + u = np.zeros((ny, nx), dtype=datatype) + v = np.zeros((ny, nx), dtype=datatype) + p = np.ones((ny, nx), dtype=datatype) + dx = datatype(2 / (nx - 1)) + dy = datatype(2 / (ny - 1)) + dt = datatype(.1 / ((nx - 1) * (ny - 1))) return u, v, p, dx, dy, dt diff --git a/npbench/benchmarks/channel_flow/channel_flow_dace.py b/npbench/benchmarks/channel_flow/channel_flow_dace.py index 7654d7c0..dd679071 100644 --- a/npbench/benchmarks/channel_flow/channel_flow_dace.py +++ b/npbench/benchmarks/channel_flow/channel_flow_dace.py @@ -9,13 +9,14 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float nx, ny, nit = (dc.symbol(s, dc.int64) for s in ('nx', 'ny', 'nit')) @dc.program -def build_up_b(rho: dc.float64, dt: dc.float64, dx: dc.float64, dy: dc.float64, - u: dc.float64[ny, nx], v: dc.float64[ny, nx]): +def build_up_b(rho: dc_float, dt: dc_float, dx: dc_float, dy: dc_float, + u: dc_float[ny, nx], v: dc_float[ny, nx]): b = np.zeros_like(u) b[1:-1, 1:-1] = (rho * (1 / dt * ((u[1:-1, 2:] - u[1:-1, 0:-2]) / (2 * dx) + @@ -45,8 +46,8 @@ def build_up_b(rho: dc.float64, dt: dc.float64, dx: dc.float64, dy: dc.float64, @dc.program -def pressure_poisson_periodic(p: dc.float64[ny, nx], dx: dc.float64, - dy: dc.float64, b: dc.float64[ny, nx]): +def pressure_poisson_periodic(p: dc_float[ny, nx], dx: dc_float, + dy: dc_float, b: dc_float[ny, nx]): pn = np.empty_like(p) for q in range(nit): @@ -74,10 +75,10 @@ def pressure_poisson_periodic(p: dc.float64[ny, nx], dx: dc.float64, @dc.program -def channel_flow(nit: dc.int64, u: dc.float64[ny, nx], v: dc.float64[ny, nx], - dt: dc.float64, dx: dc.float64, dy: dc.float64, - p: dc.float64[ny, nx], rho: dc.float64, nu: dc.float64, - F: dc.float64): +def channel_flow(nit: dc.int64, u: dc_float[ny, nx], v: dc_float[ny, nx], + dt: dc_float, dx: dc_float, dy: dc_float, + p: dc_float[ny, nx], rho: dc_float, nu: dc_float, + F: dc_float): udiff = 1.0 stepcount = 0 diff --git a/npbench/benchmarks/compute/compute.py b/npbench/benchmarks/compute/compute.py index 242ffbe0..0f146f1c 100644 --- a/npbench/benchmarks/compute/compute.py +++ b/npbench/benchmarks/compute/compute.py @@ -3,9 +3,10 @@ import numpy as np -def initialize(M, N): +def initialize(M, N, datatype): from numpy.random import default_rng rng = default_rng(42) + # we ignore the datatype and always use int64 array_1 = rng.uniform(0, 1000, size=(M, N)).astype(np.int64) array_2 = rng.uniform(0, 1000, size=(M, N)).astype(np.int64) a = np.int64(4) diff --git a/npbench/benchmarks/contour_integral/contour_integral.py b/npbench/benchmarks/contour_integral/contour_integral.py index f5dd4983..137fb0bd 100644 --- a/npbench/benchmarks/contour_integral/contour_integral.py +++ b/npbench/benchmarks/contour_integral/contour_integral.py @@ -3,14 +3,14 @@ import numpy as np -def rng_complex(shape, rng): - return (rng.random(shape) + rng.random(shape) * 1j) +def rng_complex(shape, rng, datatype): + return (rng.random(shape, dtype=datatype) + rng.random(shape, dtype=datatype) * 1j) -def initialize(NR, NM, slab_per_bc, num_int_pts): +def initialize(NR, NM, slab_per_bc, num_int_pts, datatype=np.float32): from numpy.random import default_rng rng = default_rng(42) - Ham = rng_complex((slab_per_bc + 1, NR, NR), rng) - int_pts = rng_complex((num_int_pts, ), rng) - Y = rng_complex((NR, NM), rng) + Ham = rng_complex((slab_per_bc + 1, NR, NR), rng, datatype) + int_pts = rng_complex((num_int_pts, ), rng, datatype) + Y = rng_complex((NR, NM), rng, datatype) return Ham, int_pts, Y diff --git a/npbench/benchmarks/contour_integral/contour_integral_dace.py b/npbench/benchmarks/contour_integral/contour_integral_dace.py index bb9ec377..e5ad6a1a 100644 --- a/npbench/benchmarks/contour_integral/contour_integral_dace.py +++ b/npbench/benchmarks/contour_integral/contour_integral_dace.py @@ -3,18 +3,20 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_complex_float + NR, NM, slab_per_bc = (dc.symbol(s, dtype=dc.int64) for s in ('NR', 'NM', 'slab_per_bc')) @dc.program -def contour_integral(Ham: dc.complex128[slab_per_bc + 1, NR, NR], - int_pts: dc.complex128[32], Y: dc.complex128[NR, NM]): - P0 = np.zeros((NR, NM), dtype=np.complex128) - P1 = np.zeros((NR, NM), dtype=np.complex128) +def contour_integral(Ham: dc_complex_float[slab_per_bc + 1, NR, NR], + int_pts: dc_complex_float[32], Y: dc_complex_float[NR, NM]): + P0 = np.zeros((NR, NM), dtype=dc_complex_float) + P1 = np.zeros((NR, NM), dtype=dc_complex_float) for idx in range(32): z = int_pts[idx] - Tz = np.zeros((NR, NR), dtype=np.complex128) + Tz = np.zeros((NR, NR), dtype=dc_complex_float) for n in range(slab_per_bc + 1): zz = np.power(z, slab_per_bc / 2 - n) Tz += zz * Ham[n] diff --git a/npbench/benchmarks/crc16/crc16.py b/npbench/benchmarks/crc16/crc16.py index 3e8f33c6..3c044479 100644 --- a/npbench/benchmarks/crc16/crc16.py +++ b/npbench/benchmarks/crc16/crc16.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(N): +def initialize(N, datatype): from numpy.random import default_rng rng = default_rng(42) data = rng.integers(0, 256, size=(N, ), dtype=np.uint8) diff --git a/npbench/benchmarks/deep_learning/conv2d_bias/conv2d.py b/npbench/benchmarks/deep_learning/conv2d_bias/conv2d.py index 2e7e685e..49cb6bbd 100644 --- a/npbench/benchmarks/deep_learning/conv2d_bias/conv2d.py +++ b/npbench/benchmarks/deep_learning/conv2d_bias/conv2d.py @@ -3,12 +3,12 @@ import numpy as np -def initialize(C_in, C_out, H, K, N, W): +def initialize(C_in, C_out, H, K, N, W, datatype=np.float32): from numpy.random import default_rng rng = default_rng(42) # NHWC data layout - input = rng.random((N, H, W, C_in), dtype=np.float32) + input = rng.random((N, H, W, C_in), dtype=datatype) # Weights - weights = rng.random((K, K, C_in, C_out), dtype=np.float32) - bias = rng.random((C_out, ), dtype=np.float32) + weights = rng.random((K, K, C_in, C_out), dtype=datatype) + bias = rng.random((C_out, ), dtype=datatype) return input, weights, bias diff --git a/npbench/benchmarks/deep_learning/conv2d_bias/conv2d_dace.py b/npbench/benchmarks/deep_learning/conv2d_bias/conv2d_dace.py index 0edb5642..c34ede51 100644 --- a/npbench/benchmarks/deep_learning/conv2d_bias/conv2d_dace.py +++ b/npbench/benchmarks/deep_learning/conv2d_bias/conv2d_dace.py @@ -1,5 +1,6 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float C_in, C_out, H, K, N, W = (dc.symbol(s, dc.int64) for s in ('C_in', 'C_out', 'H', 'K', 'N', 'W')) @@ -7,7 +8,7 @@ # Deep learning convolutional operator (stride = 1) @dc.program -def conv2d(input: dc.float32[N, H, W, C_in], weights: dc.float32[K, K, C_in, +def conv2d(input: dc_float[N, H, W, C_in], weights: dc_float[K, K, C_in, C_out]): # K = weights.shape[0] # Assuming square kernel # N = input.shape[0] @@ -15,7 +16,7 @@ def conv2d(input: dc.float32[N, H, W, C_in], weights: dc.float32[K, K, C_in, # W_out = input.shape[2] - K + 1 # C_out = weights.shape[3] # output = np.empty((N, H_out, W_out, C_out), dtype=np.float32) - output = np.ndarray((N, H - K + 1, W - K + 1, C_out), dtype=np.float32) + output = np.ndarray((N, H - K + 1, W - K + 1, C_out), dtype=dc_float) # Loop structure adapted from https://github.com/SkalskiP/ILearnDeepLearning.py/blob/ba0b5ba589d4e656141995e8d1a06d44db6ce58d/01_mysteries_of_neural_networks/06_numpy_convolutional_neural_net/src/layers/convolutional.py#L88 # for i, j in dc.map[0:H-K+1, 0:W-K+1]: @@ -31,7 +32,7 @@ def conv2d(input: dc.float32[N, H, W, C_in], weights: dc.float32[K, K, C_in, @dc.program -def conv2d_bias(input: dc.float32[N, H, W, C_in], - weights: dc.float32[K, K, C_in, - C_out], bias: dc.float32[C_out]): +def conv2d_bias(input: dc_float[N, H, W, C_in], + weights: dc_float[K, K, C_in, + C_out], bias: dc_float[C_out]): return conv2d(input, weights) + bias diff --git a/npbench/benchmarks/deep_learning/lenet/lenet.py b/npbench/benchmarks/deep_learning/lenet/lenet.py index f051f3b0..fc7a7924 100644 --- a/npbench/benchmarks/deep_learning/lenet/lenet.py +++ b/npbench/benchmarks/deep_learning/lenet/lenet.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(N, H, W): +def initialize(N, H, W, datatype=np.float32): from numpy.random import default_rng rng = default_rng(42) @@ -18,18 +18,18 @@ def initialize(N, H, W): C_before_fc1 = 16 * H_pool2 * W_pool2 # NHWC data layout - input = rng.random((N, H, W, 1), dtype=np.float32) + input = rng.random((N, H, W, 1), dtype=datatype) # Weights - conv1 = rng.random((5, 5, 1, 6), dtype=np.float32) - conv1bias = rng.random((6, ), dtype=np.float32) - conv2 = rng.random((5, 5, 6, 16), dtype=np.float32) - conv2bias = rng.random((16, ), dtype=np.float32) - fc1w = rng.random((C_before_fc1, 120), dtype=np.float32) - fc1b = rng.random((120, ), dtype=np.float32) - fc2w = rng.random((120, 84), dtype=np.float32) - fc2b = rng.random((84, ), dtype=np.float32) - fc3w = rng.random((84, 10), dtype=np.float32) - fc3b = rng.random((10, ), dtype=np.float32) + conv1 = rng.random((5, 5, 1, 6), dtype=datatype) + conv1bias = rng.random((6, ), dtype=datatype) + conv2 = rng.random((5, 5, 6, 16), dtype=datatype) + conv2bias = rng.random((16, ), dtype=datatype) + fc1w = rng.random((C_before_fc1, 120), dtype=datatype) + fc1b = rng.random((120, ), dtype=datatype) + fc2w = rng.random((120, 84), dtype=datatype) + fc2b = rng.random((84, ), dtype=datatype) + fc3w = rng.random((84, 10), dtype=datatype) + fc3b = rng.random((10, ), dtype=datatype) return (input, conv1, conv1bias, conv2, conv2bias, fc1w, fc1b, fc2w, fc2b, fc3w, fc3b, C_before_fc1) diff --git a/npbench/benchmarks/deep_learning/lenet/lenet_dace.py b/npbench/benchmarks/deep_learning/lenet/lenet_dace.py index d95752b1..fa195e92 100644 --- a/npbench/benchmarks/deep_learning/lenet/lenet_dace.py +++ b/npbench/benchmarks/deep_learning/lenet/lenet_dace.py @@ -1,5 +1,6 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N, H, W, C_before_fc1, S0, S1, S2, S3, S4, S5 = (dc.symbol( s, dtype=dc.int64) for s in ('N', 'H', 'W', 'C_before_fc1', 'S0', 'S1', @@ -7,18 +8,18 @@ @dc.program -def relu2(x: dc.float32[S0, S1]): +def relu2(x: dc_float[S0, S1]): return np.maximum(x, 0) @dc.program -def relu4(x: dc.float32[S0, S1, S2, S3]): +def relu4(x: dc_float[S0, S1, S2, S3]): return np.maximum(x, 0) # Deep learning convolutional operator (stride = 1) @dc.program -def conv2d(input: dc.float32[S0, S1, S2, S3], weights: dc.float32[S4, S4, S3, +def conv2d(input: dc_float[S0, S1, S2, S3], weights: dc_float[S4, S4, S3, S5]): # K = weights.shape[0] # Assuming square kernel # N = input.shape[0] @@ -49,7 +50,7 @@ def conv2d(input: dc.float32[S0, S1, S2, S3], weights: dc.float32[S4, S4, S3, # 2x2 maxpool operator, as used in LeNet-5 @dc.program -def maxpool2d(x: dc.float32[S0, S1, S2, S3]): +def maxpool2d(x: dc_float[S0, S1, S2, S3]): # output = np.empty( # [x.shape[0], x.shape[1] // 2, x.shape[2] // 2, x.shape[3]], # dtype=x.dtype) @@ -66,12 +67,12 @@ def maxpool2d(x: dc.float32[S0, S1, S2, S3]): # LeNet-5 Convolutional Neural Network (inference mode) @dc.program -def lenet5(input: dc.float32[N, H, W, 1], conv1: dc.float32[5, 5, 1, 6], - conv1bias: dc.float32[6], conv2: dc.float32[5, 5, 6, 16], - conv2bias: dc.float32[16], fc1w: dc.float32[C_before_fc1, 120], - fc1b: dc.float32[120], fc2w: dc.float32[120, 84], - fc2b: dc.float32[84], fc3w: dc.float32[84, - 10], fc3b: dc.float32[10]): +def lenet5(input: dc_float[N, H, W, 1], conv1: dc_float[5, 5, 1, 6], + conv1bias: dc_float[6], conv2: dc_float[5, 5, 6, 16], + conv2bias: dc_float[16], fc1w: dc_float[C_before_fc1, 120], + fc1b: dc_float[120], fc2w: dc_float[120, 84], + fc2b: dc_float[84], fc3w: dc_float[84, + 10], fc3b: dc_float[10]): # x = relu(conv2d(input, conv1) + conv1bias) # x = maxpool2d(x) # x = relu(conv2d(x, conv2) + conv2bias) diff --git a/npbench/benchmarks/deep_learning/mlp/mlp.py b/npbench/benchmarks/deep_learning/mlp/mlp.py index 27966a0a..930b530c 100644 --- a/npbench/benchmarks/deep_learning/mlp/mlp.py +++ b/npbench/benchmarks/deep_learning/mlp/mlp.py @@ -3,19 +3,19 @@ import numpy as np -def initialize(C_in, N, S0, S1, S2): +def initialize(C_in, N, S0, S1, S2, datatype=np.float32): from numpy.random import default_rng rng = default_rng(42) mlp_sizes = [S0, S1, S2] # [300, 100, 10] # Inputs - input = np.random.rand(N, C_in).astype(np.float32) + input = np.random.rand(N, C_in).astype(datatype) # Weights - w1 = rng.random((C_in, mlp_sizes[0]), dtype=np.float32) - b1 = rng.random((mlp_sizes[0], ), dtype=np.float32) - w2 = rng.random((mlp_sizes[0], mlp_sizes[1]), dtype=np.float32) - b2 = rng.random((mlp_sizes[1], ), dtype=np.float32) - w3 = rng.random((mlp_sizes[1], mlp_sizes[2]), dtype=np.float32) - b3 = rng.random((mlp_sizes[2], ), dtype=np.float32) + w1 = rng.random((C_in, mlp_sizes[0]), dtype=datatype) + b1 = rng.random((mlp_sizes[0], ), dtype=datatype) + w2 = rng.random((mlp_sizes[0], mlp_sizes[1]), dtype=datatype) + b2 = rng.random((mlp_sizes[1], ), dtype=datatype) + w3 = rng.random((mlp_sizes[1], mlp_sizes[2]), dtype=datatype) + b3 = rng.random((mlp_sizes[2], ), dtype=datatype) return input, w1, b1, w2, b2, w3, b3 diff --git a/npbench/benchmarks/deep_learning/mlp/mlp_dace.py b/npbench/benchmarks/deep_learning/mlp/mlp_dace.py index 2ffe217c..f233bad5 100644 --- a/npbench/benchmarks/deep_learning/mlp/mlp_dace.py +++ b/npbench/benchmarks/deep_learning/mlp/mlp_dace.py @@ -1,5 +1,6 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float C_in, N, S0, S1, S2, N1, N2 = (dc.symbol(s, dtype=dc.int64) for s in ('C_in', 'N', 'S0', 'S1', 'S2', 'N1', @@ -7,13 +8,13 @@ @dc.program -def relu(x: dc.float32[N1, N2]): +def relu(x: dc_float[N1, N2]): return np.maximum(x, 0) # Numerically-stable version of softmax @dc.program -def softmax(x: dc.float32[N1, N2]): +def softmax(x: dc_float[N1, N2]): # tmp_max = np.max(x, axis=-1, keepdims=True) tmp_max = np.maximum.reduce(x, axis=-1, keepdims=True) tmp_out = np.exp(x - tmp_max) @@ -24,9 +25,9 @@ def softmax(x: dc.float32[N1, N2]): # 3-layer MLP @dc.program -def mlp(input: dc.float32[N, C_in], w1: dc.float32[C_in, S0], - b1: dc.float32[S0], w2: dc.float32[S0, S1], b2: dc.float32[S1], - w3: dc.float32[S1, S2], b3: dc.float32[S2]): +def mlp(input: dc_float[N, C_in], w1: dc_float[C_in, S0], + b1: dc_float[S0], w2: dc_float[S0, S1], b2: dc_float[S1], + w3: dc_float[S1, S2], b3: dc_float[S2]): x1 = relu(input @ w1 + b1) x2 = relu(x1 @ w2 + b2) x3 = softmax(x2 @ w3 + b3) # Softmax call can be omitted if necessary diff --git a/npbench/benchmarks/deep_learning/resnet/resnet.py b/npbench/benchmarks/deep_learning/resnet/resnet.py index 3770a779..202d5cc2 100644 --- a/npbench/benchmarks/deep_learning/resnet/resnet.py +++ b/npbench/benchmarks/deep_learning/resnet/resnet.py @@ -3,14 +3,14 @@ import numpy as np -def initialize(N, W, H, C1, C2): +def initialize(N, W, H, C1, C2, datatype=np.float32): from numpy.random import default_rng rng = default_rng(42) # Input - input = rng.random((N, H, W, C1), dtype=np.float32) + input = rng.random((N, H, W, C1), dtype=datatype) # Weights - conv1 = rng.random((1, 1, C1, C2), dtype=np.float32) - conv2 = rng.random((3, 3, C2, C2), dtype=np.float32) - conv3 = rng.random((1, 1, C2, C1), dtype=np.float32) + conv1 = rng.random((1, 1, C1, C2), dtype=datatype) + conv2 = rng.random((3, 3, C2, C2), dtype=datatype) + conv3 = rng.random((1, 1, C2, C1), dtype=datatype) return (input, conv1, conv2, conv3) diff --git a/npbench/benchmarks/deep_learning/softmax/softmax.py b/npbench/benchmarks/deep_learning/softmax/softmax.py index b18c3c74..5b5102c5 100644 --- a/npbench/benchmarks/deep_learning/softmax/softmax.py +++ b/npbench/benchmarks/deep_learning/softmax/softmax.py @@ -3,8 +3,8 @@ import numpy as np -def initialize(N, H, SM): +def initialize(N, H, SM, datatype=np.float32): from numpy.random import default_rng rng = default_rng(42) - x = rng.random((N, H, SM, SM), dtype=np.float32) + x = rng.random((N, H, SM, SM), dtype=datatype) return x diff --git a/npbench/benchmarks/deep_learning/softmax/softmax_dace.py b/npbench/benchmarks/deep_learning/softmax/softmax_dace.py index 658705f9..72d7b4a9 100644 --- a/npbench/benchmarks/deep_learning/softmax/softmax_dace.py +++ b/npbench/benchmarks/deep_learning/softmax/softmax_dace.py @@ -1,12 +1,13 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N, H, SM = (dc.symbol(s, dc.int64) for s in ('N', 'H', 'SM')) # Numerically-stable version of softmax @dc.program -def softmax(x: dc.float32[N, H, SM, SM]): +def softmax(x: dc_float[N, H, SM, SM]): # tmp_max = np.max(x, axis=-1, keepdims=True) tmp_max = np.maximum.reduce(x, axis=-1, keepdims=True, initial=-9999) tmp_out = np.exp(x - tmp_max) @@ -17,7 +18,7 @@ def softmax(x: dc.float32[N, H, SM, SM]): # Numerically-stable version of softmax @dc.program -def softmax_gpu(x: dc.float32[N, H, SM, SM], out: dc.float32[N, H, SM, SM]): +def softmax_gpu(x: dc_float[N, H, SM, SM], out: dc_float[N, H, SM, SM]): # tmp_max = np.max(x, axis=-1, keepdims=True) tmp_max = np.maximum.reduce(x, axis=-1, keepdims=True, initial=-9999) tmp_out = np.exp(x - tmp_max) diff --git a/npbench/benchmarks/go_fast/go_fast.py b/npbench/benchmarks/go_fast/go_fast.py index c96b8dd7..d38c5cb1 100644 --- a/npbench/benchmarks/go_fast/go_fast.py +++ b/npbench/benchmarks/go_fast/go_fast.py @@ -3,8 +3,8 @@ import numpy as np -def initialize(N): +def initialize(N, datatype=np.float32): from numpy.random import default_rng rng = default_rng(42) - x = rng.random((N, N), dtype=np.float64) + x = rng.random((N, N), dtype=datatype) return x diff --git a/npbench/benchmarks/go_fast/go_fast_dace.py b/npbench/benchmarks/go_fast/go_fast_dace.py index f0e93973..b29c910b 100644 --- a/npbench/benchmarks/go_fast/go_fast_dace.py +++ b/npbench/benchmarks/go_fast/go_fast_dace.py @@ -2,12 +2,13 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N = dc.symbol('N', dtype=dc.int64) @dc.program -def go_fast(a: dc.float64[N, N]): +def go_fast(a: dc_float[N, N]): trace = 0.0 for i in range(N): trace += np.tanh(a[i, i]) diff --git a/npbench/benchmarks/mandelbrot1/mandelbrot1_numpy.py b/npbench/benchmarks/mandelbrot1/mandelbrot1_numpy.py index ccaa6891..e2178471 100644 --- a/npbench/benchmarks/mandelbrot1/mandelbrot1_numpy.py +++ b/npbench/benchmarks/mandelbrot1/mandelbrot1_numpy.py @@ -5,19 +5,21 @@ # ----------------------------------------------------------------------------- import numpy as np +from npbench.infrastructure.framework import np_float, np_complex def mandelbrot(xmin, xmax, ymin, ymax, xn, yn, maxiter, horizon=2.0): # Adapted from https://www.ibm.com/developerworks/community/blogs/jfp/... # .../entry/How_To_Compute_Mandelbrodt_Set_Quickly?lang=en - X = np.linspace(xmin, xmax, xn, dtype=np.float64) - Y = np.linspace(ymin, ymax, yn, dtype=np.float64) + X = np.linspace(xmin, xmax, xn, dtype=np_float) + Y = np.linspace(ymin, ymax, yn, dtype=np_float) C = X + Y[:, None] * 1j N = np.zeros(C.shape, dtype=np.int64) - Z = np.zeros(C.shape, dtype=np.complex128) + Z = np.zeros(C.shape, dtype=np_complex) for n in range(maxiter): I = np.less(abs(Z), horizon) N[I] = n Z[I] = Z[I]**2 + C[I] N[N == maxiter - 1] = 0 return Z, N + diff --git a/npbench/benchmarks/nbody/nbody.py b/npbench/benchmarks/nbody/nbody.py index 1c3fdd50..1774cb47 100644 --- a/npbench/benchmarks/nbody/nbody.py +++ b/npbench/benchmarks/nbody/nbody.py @@ -3,11 +3,11 @@ import numpy as np -def initialize(N, tEnd, dt): +def initialize(N, tEnd, dt, datatype=np.float32): from numpy.random import default_rng rng = default_rng(42) - mass = 20.0 * np.ones((N, 1)) / N # total mass of particles is 20 - pos = rng.random((N, 3)) # randomly selected positions and velocities - vel = rng.random((N, 3)) + mass = 20.0 * np.ones((N, 1), dtype=datatype) / N # total mass of particles is 20 + pos = rng.random((N, 3), dtype=datatype) # randomly selected positions and velocities + vel = rng.random((N, 3), dtype=datatype) Nt = int(np.ceil(tEnd / dt)) return mass, pos, vel, Nt diff --git a/npbench/benchmarks/nbody/nbody_dace.py b/npbench/benchmarks/nbody/nbody_dace.py index 5db81c5e..80851e2f 100644 --- a/npbench/benchmarks/nbody/nbody_dace.py +++ b/npbench/benchmarks/nbody/nbody_dace.py @@ -3,6 +3,7 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float """ Create Your Own N-body Simulation (With Python) Philip Mocz (2020) Princeton Univeristy, @PMocz @@ -21,8 +22,8 @@ @dc.program -def getAcc(pos: dc.float64[N, 3], mass: dc.float64[N], G: dc.float64, - softening: dc.float64): +def getAcc(pos: dc_float[N, 3], mass: dc_float[N], G: dc_float, + softening: dc_float): """ Calculate the acceleration on each particle due to Newton's Law pos is an N x 3 matrix of positions @@ -59,7 +60,7 @@ def getAcc(pos: dc.float64[N, 3], mass: dc.float64[N], G: dc.float64, # pack together the acceleration components # a = np.hstack((ax,ay,az)) - a = np.ndarray((N, 3), dtype=np.float64) + a = np.ndarray((N, 3), dtype=dc_float) # hstack(a, ax, ay, az) a[:, 0] = ax a[:, 1] = ay @@ -69,8 +70,8 @@ def getAcc(pos: dc.float64[N, 3], mass: dc.float64[N], G: dc.float64, @dc.program -def getEnergy(pos: dc.float64[N, 3], vel: dc.float64[N, 3], - mass: dc.float64[N], G: dc.float64): +def getEnergy(pos: dc_float[N, 3], vel: dc_float[N, 3], + mass: dc_float[N], G: dc_float): """ Get kinetic energy (KE) and potential energy (PE) of simulation pos is N x 3 matrix of positions @@ -123,8 +124,8 @@ def getEnergy(pos: dc.float64[N, 3], vel: dc.float64[N, 3], @dc.program -def nbody(mass: dc.float64[N], pos: dc.float64[N, 3], vel: dc.float64[N, 3], - dt: dc.float64, G: dc.float64, softening: dc.float64): +def nbody(mass: dc_float[N], pos: dc_float[N, 3], vel: dc_float[N, 3], + dt: dc_float, G: dc_float, softening: dc_float): # Convert to Center-of-Mass frame # vel -= np.mean(mass * vel, axis=0) / np.mean(mass) @@ -139,8 +140,8 @@ def nbody(mass: dc.float64[N], pos: dc.float64[N, 3], vel: dc.float64[N, 3], acc = getAcc(pos, mass, G, softening) # calculate initial energy of system - KE = np.ndarray(Nt + 1, dtype=np.float64) - PE = np.ndarray(Nt + 1, dtype=np.float64) + KE = np.ndarray(Nt + 1, dtype=dc_float) + PE = np.ndarray(Nt + 1, dtype=dc_float) KE[0], PE[0] = getEnergy(pos, vel, mass, G) t = 0.0 diff --git a/npbench/benchmarks/nbody/nbody_numpy.py b/npbench/benchmarks/nbody/nbody_numpy.py index 98088bf3..3e2f1a8b 100644 --- a/npbench/benchmarks/nbody/nbody_numpy.py +++ b/npbench/benchmarks/nbody/nbody_numpy.py @@ -89,8 +89,8 @@ def nbody(mass, pos, vel, N, Nt, dt, G, softening): acc = getAcc(pos, mass, G, softening) # calculate initial energy of system - KE = np.ndarray(Nt + 1, dtype=np.float64) - PE = np.ndarray(Nt + 1, dtype=np.float64) + KE = np.ndarray(Nt + 1, dtype=mass.dtype) + PE = np.ndarray(Nt + 1, dtype=mass.dtype) KE[0], PE[0] = getEnergy(pos, vel, mass, G) t = 0.0 diff --git a/npbench/benchmarks/polybench/adi/adi.py b/npbench/benchmarks/polybench/adi/adi.py index 40c68e98..f5915b0f 100644 --- a/npbench/benchmarks/polybench/adi/adi.py +++ b/npbench/benchmarks/polybench/adi/adi.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(N, datatype=np.float64): +def initialize(N, datatype=np.float32): u = np.fromfunction(lambda i, j: (i + N - j) / N, (N, N), dtype=datatype) return u diff --git a/npbench/benchmarks/polybench/adi/adi_dace.py b/npbench/benchmarks/polybench/adi/adi_dace.py index 18c87a76..7b1a696c 100644 --- a/npbench/benchmarks/polybench/adi/adi_dace.py +++ b/npbench/benchmarks/polybench/adi/adi_dace.py @@ -2,12 +2,13 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N = dc.symbol('N', dtype=dc.int64) @dc.program -def kernel(TSTEPS: dc.int64, u: dc.float64[N, N]): +def kernel(TSTEPS: dc.int64, u: dc_float[N, N]): v = np.empty(u.shape, dtype=u.dtype) p = np.empty(u.shape, dtype=u.dtype) @@ -54,3 +55,4 @@ def kernel(TSTEPS: dc.int64, u: dc.float64[N, N]): u[1:N - 1, N - 1] = 1.0 for j in range(N - 2, 0, -1): u[1:N - 1, j] = p[1:N - 1, j] * u[1:N - 1, j + 1] + q[1:N - 1, j] + return u diff --git a/npbench/benchmarks/polybench/atax/atax.py b/npbench/benchmarks/polybench/atax/atax.py index 986b19bb..d5b0741d 100644 --- a/npbench/benchmarks/polybench/atax/atax.py +++ b/npbench/benchmarks/polybench/atax/atax.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(M, N, datatype=np.float64): +def initialize(M, N, datatype=np.float32): fn = datatype(N) x = np.fromfunction(lambda i: 1 + (i / fn), (N, ), dtype=datatype) A = np.fromfunction(lambda i, j: ((i + j) % N) / (5 * M), (M, N), diff --git a/npbench/benchmarks/polybench/atax/atax_dace.py b/npbench/benchmarks/polybench/atax/atax_dace.py index cebed263..433c8a92 100644 --- a/npbench/benchmarks/polybench/atax/atax_dace.py +++ b/npbench/benchmarks/polybench/atax/atax_dace.py @@ -1,10 +1,11 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float M, N = (dc.symbol(s, dtype=dc.int64) for s in ('M', 'N')) @dc.program -def kernel(A: dc.float64[M, N], x: dc.float64[N]): +def kernel(A: dc_float[M, N], x: dc_float[N]): return (A @ x) @ A diff --git a/npbench/benchmarks/polybench/bicg/bicg.py b/npbench/benchmarks/polybench/bicg/bicg.py index 5e4b77f6..29da3f93 100644 --- a/npbench/benchmarks/polybench/bicg/bicg.py +++ b/npbench/benchmarks/polybench/bicg/bicg.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(M, N, datatype=np.float64): +def initialize(M, N, datatype=np.float32): A = np.fromfunction(lambda i, j: (i * (j + 1) % N) / N, (N, M), dtype=datatype) p = np.fromfunction(lambda i: (i % M) / M, (M, ), dtype=datatype) diff --git a/npbench/benchmarks/polybench/bicg/bicg_dace.py b/npbench/benchmarks/polybench/bicg/bicg_dace.py index cf1f5547..1d928f3a 100644 --- a/npbench/benchmarks/polybench/bicg/bicg_dace.py +++ b/npbench/benchmarks/polybench/bicg/bicg_dace.py @@ -1,10 +1,11 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float M, N = (dc.symbol(s, dtype=dc.int64) for s in ('M', 'N')) @dc.program -def kernel(A: dc.float64[N, M], p: dc.float64[M], r: dc.float64[N]): +def kernel(A: dc_float[N, M], p: dc_float[M], r: dc_float[N]): return r @ A, A @ p diff --git a/npbench/benchmarks/polybench/cholesky/cholesky.py b/npbench/benchmarks/polybench/cholesky/cholesky.py index 1fe67bd7..c4ce9153 100644 --- a/npbench/benchmarks/polybench/cholesky/cholesky.py +++ b/npbench/benchmarks/polybench/cholesky/cholesky.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(N, datatype=np.float64): +def initialize(N, datatype=np.float32): A = np.empty((N, N), dtype=datatype) for i in range(N): A[i, :i + 1] = np.fromfunction(lambda j: (-j % N) / N + 1, (i + 1, ), diff --git a/npbench/benchmarks/polybench/cholesky/cholesky_dace.py b/npbench/benchmarks/polybench/cholesky/cholesky_dace.py index 08b0f701..659b70b1 100644 --- a/npbench/benchmarks/polybench/cholesky/cholesky_dace.py +++ b/npbench/benchmarks/polybench/cholesky/cholesky_dace.py @@ -1,11 +1,12 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float M, N = (dc.symbol(s, dtype=dc.int64) for s in ('M', 'N')) @dc.program -def kernel(A: dc.float64[N, N]): +def kernel(A: dc_float[N, N]): A[0, 0] = np.sqrt(A[0, 0]) for i in range(1, N): diff --git a/npbench/benchmarks/polybench/cholesky2/cholesky2.py b/npbench/benchmarks/polybench/cholesky2/cholesky2.py index 1fe67bd7..c4ce9153 100644 --- a/npbench/benchmarks/polybench/cholesky2/cholesky2.py +++ b/npbench/benchmarks/polybench/cholesky2/cholesky2.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(N, datatype=np.float64): +def initialize(N, datatype=np.float32): A = np.empty((N, N), dtype=datatype) for i in range(N): A[i, :i + 1] = np.fromfunction(lambda j: (-j % N) / N + 1, (i + 1, ), diff --git a/npbench/benchmarks/polybench/cholesky2/cholesky2_dace.py b/npbench/benchmarks/polybench/cholesky2/cholesky2_dace.py index 3e0d5b68..ad28fffc 100644 --- a/npbench/benchmarks/polybench/cholesky2/cholesky2_dace.py +++ b/npbench/benchmarks/polybench/cholesky2/cholesky2_dace.py @@ -1,12 +1,13 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N = dc.symbol('N', dtype=dc.int64) k = dc.symbol('k', dtype=dc.int64) @dc.program -def triu(A: dc.float64[N, N]): +def triu(A: dc_float[N, N], k: dc.int64): B = np.zeros_like(A) for i in dc.map[0:N]: for j in dc.map[i + k:N]: @@ -15,5 +16,5 @@ def triu(A: dc.float64[N, N]): @dc.program -def kernel(A: dc.float64[N, N]): +def kernel(A: dc_float[N, N]): A[:] = np.linalg.cholesky(A) + triu(A, k=1) diff --git a/npbench/benchmarks/polybench/correlation/correlation.py b/npbench/benchmarks/polybench/correlation/correlation.py index 861947fe..3c4857fa 100644 --- a/npbench/benchmarks/polybench/correlation/correlation.py +++ b/npbench/benchmarks/polybench/correlation/correlation.py @@ -3,9 +3,8 @@ import numpy as np -def initialize(M, N, datatype=np.float64): +def initialize(M, N, datatype=np.float32): float_n = datatype(N) - data = np.fromfunction(lambda i, j: (i * j) / M + i, (N, M), - dtype=datatype) + data = np.fromfunction(lambda i, j: (i * j) / M + i, (N, M), dtype=datatype) return float_n, data diff --git a/npbench/benchmarks/polybench/correlation/correlation_dace.py b/npbench/benchmarks/polybench/correlation/correlation_dace.py index 00761bd4..03d2e5d4 100644 --- a/npbench/benchmarks/polybench/correlation/correlation_dace.py +++ b/npbench/benchmarks/polybench/correlation/correlation_dace.py @@ -1,11 +1,12 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float M, N = (dc.symbol(s, dtype=dc.int64) for s in ('M', 'N')) @dc.program -def kernel(float_n: dc.float64, data: dc.float64[N, M]): +def kernel(float_n: dc_float, data: dc_float[N, M]): mean = np.mean(data, axis=0) # stddev = np.std(data, axis=0) diff --git a/npbench/benchmarks/polybench/covariance/covariance.py b/npbench/benchmarks/polybench/covariance/covariance.py index f7f98f25..e0d92543 100644 --- a/npbench/benchmarks/polybench/covariance/covariance.py +++ b/npbench/benchmarks/polybench/covariance/covariance.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(M, N, datatype=np.float64): +def initialize(M, N, datatype=np.float32): float_n = datatype(N) data = np.fromfunction(lambda i, j: (i * j) / M, (N, M), dtype=datatype) diff --git a/npbench/benchmarks/polybench/covariance/covariance_dace.py b/npbench/benchmarks/polybench/covariance/covariance_dace.py index a35ecd21..b753a539 100644 --- a/npbench/benchmarks/polybench/covariance/covariance_dace.py +++ b/npbench/benchmarks/polybench/covariance/covariance_dace.py @@ -1,11 +1,12 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float M, N = (dc.symbol(s, dtype=dc.int64) for s in ('M', 'N')) @dc.program -def kernel(float_n: dc.float64, data: dc.float64[N, M]): +def kernel(float_n: dc_float, data: dc_float[N, M]): mean = np.mean(data, axis=0) # data -= mean diff --git a/npbench/benchmarks/polybench/covariance2/covariance2.py b/npbench/benchmarks/polybench/covariance2/covariance2.py index f7f98f25..e0d92543 100644 --- a/npbench/benchmarks/polybench/covariance2/covariance2.py +++ b/npbench/benchmarks/polybench/covariance2/covariance2.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(M, N, datatype=np.float64): +def initialize(M, N, datatype=np.float32): float_n = datatype(N) data = np.fromfunction(lambda i, j: (i * j) / M, (N, M), dtype=datatype) diff --git a/npbench/benchmarks/polybench/deriche/deriche.py b/npbench/benchmarks/polybench/deriche/deriche.py index b99dae2c..843d62d1 100644 --- a/npbench/benchmarks/polybench/deriche/deriche.py +++ b/npbench/benchmarks/polybench/deriche/deriche.py @@ -3,10 +3,10 @@ import numpy as np -def initialize(W, H, datatype=np.float64): +def initialize(W, H, datatype=np.float32): alpha = datatype(0.25) imgIn = np.fromfunction(lambda i, j: ((313 * i + 991 * j) % 65536) / 65535.0, (W, H), - dtype=datatype) + dtype=datatype).astype(datatype) return alpha, imgIn diff --git a/npbench/benchmarks/polybench/deriche/deriche_dace.py b/npbench/benchmarks/polybench/deriche/deriche_dace.py index 2272eca4..1e0abd0d 100644 --- a/npbench/benchmarks/polybench/deriche/deriche_dace.py +++ b/npbench/benchmarks/polybench/deriche/deriche_dace.py @@ -1,11 +1,12 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float W, H = (dc.symbol(s, dtype=dc.int64) for s in ('W', 'H')) @dc.program -def kernel(alpha: dc.float64, imgIn: dc.float64[W, H]): +def kernel(alpha: dc_float, imgIn: dc_float[W, H]): k = (1.0 - np.exp(-alpha)) * (1.0 - np.exp(-alpha)) / ( 1.0 + alpha * np.exp(-alpha) - np.exp(2.0 * alpha)) diff --git a/npbench/benchmarks/polybench/doitgen/doitgen.py b/npbench/benchmarks/polybench/doitgen/doitgen.py index 9608318a..62e61fb3 100644 --- a/npbench/benchmarks/polybench/doitgen/doitgen.py +++ b/npbench/benchmarks/polybench/doitgen/doitgen.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(NR, NQ, NP, datatype=np.float64): +def initialize(NR, NQ, NP, datatype=np.float32): A = np.fromfunction(lambda i, j, k: ((i * j + k) % NP) / NP, (NR, NQ, NP), dtype=datatype) C4 = np.fromfunction(lambda i, j: (i * j % NP) / NP, (NP, NP), diff --git a/npbench/benchmarks/polybench/doitgen/doitgen_dace.py b/npbench/benchmarks/polybench/doitgen/doitgen_dace.py index a5a017a1..3ef93fc1 100644 --- a/npbench/benchmarks/polybench/doitgen/doitgen_dace.py +++ b/npbench/benchmarks/polybench/doitgen/doitgen_dace.py @@ -1,12 +1,12 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float NR, NQ, NP = (dc.symbol(s, dtype=dc.int64) for s in ('NR', 'NQ', 'NP')) @dc.program -def kernel(A: dc.float64[NR, NQ, NP], C4: dc.float64[NP, NP]): - +def kernel(A: dc_float[NR, NQ, NP], C4: dc_float[NP, NP]): # Ideal - not working becayse Matmul with dim > 3 unsupported # A[:] = np.reshape(np.reshape(A, (NR, NQ, 1, NP)) @ C4, (NR, NQ, NP)) for r in range(NR): diff --git a/npbench/benchmarks/polybench/durbin/durbin.py b/npbench/benchmarks/polybench/durbin/durbin.py index 144707d2..2d5f2c1e 100644 --- a/npbench/benchmarks/polybench/durbin/durbin.py +++ b/npbench/benchmarks/polybench/durbin/durbin.py @@ -3,6 +3,6 @@ import numpy as np -def initialize(N, datatype=np.float64): +def initialize(N, datatype=np.float32): r = np.fromfunction(lambda i: N + 1 - i, (N, ), dtype=datatype) return r diff --git a/npbench/benchmarks/polybench/durbin/durbin_dace.py b/npbench/benchmarks/polybench/durbin/durbin_dace.py index 213723b3..e0d6e4d0 100644 --- a/npbench/benchmarks/polybench/durbin/durbin_dace.py +++ b/npbench/benchmarks/polybench/durbin/durbin_dace.py @@ -1,19 +1,19 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float M, N = (dc.symbol(s, dtype=dc.int64) for s in ('M', 'N')) @dc.program -def flip(A: dc.float64[M]): - B = np.ndarray((M, ), dtype=np.float64) +def flip(A: dc_float[M]): + B = np.ndarray((M, ), dtype=dc_float) for i in dc.map[0:M]: B[i] = A[M - 1 - i] return B - @dc.program -def kernel(r: dc.float64[N]): +def kernel(r: dc_float[N]): y = np.empty_like(r) alpha = -r[0] diff --git a/npbench/benchmarks/polybench/fdtd_2d/fdtd_2d.py b/npbench/benchmarks/polybench/fdtd_2d/fdtd_2d.py index a8699f4c..88af4b9f 100644 --- a/npbench/benchmarks/polybench/fdtd_2d/fdtd_2d.py +++ b/npbench/benchmarks/polybench/fdtd_2d/fdtd_2d.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(TMAX, NX, NY, datatype=np.float64): +def initialize(TMAX, NX, NY, datatype=np.float32): ex = np.fromfunction(lambda i, j: (i * (j + 1)) / NX, (NX, NY), dtype=datatype) ey = np.fromfunction(lambda i, j: (i * (j + 2)) / NY, (NX, NY), diff --git a/npbench/benchmarks/polybench/fdtd_2d/fdtd_2d_dace.py b/npbench/benchmarks/polybench/fdtd_2d/fdtd_2d_dace.py index 1eb028c2..67f3cd3b 100644 --- a/npbench/benchmarks/polybench/fdtd_2d/fdtd_2d_dace.py +++ b/npbench/benchmarks/polybench/fdtd_2d/fdtd_2d_dace.py @@ -1,12 +1,13 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float TMAX, NX, NY = (dc.symbol(s, dtype=dc.int64) for s in ('TMAX', 'NX', 'NY')) @dc.program -def kernel(ex: dc.float64[NX, NY], ey: dc.float64[NX, NY], - hz: dc.float64[NX, NY], _fict_: dc.float64[TMAX]): +def kernel(ex: dc_float[NX, NY], ey: dc_float[NX, NY], + hz: dc_float[NX, NY], _fict_: dc_float[TMAX]): for t in range(TMAX): ey[0, :] = _fict_[t] diff --git a/npbench/benchmarks/polybench/floyd_warshall/floyd_warshall_dace.py b/npbench/benchmarks/polybench/floyd_warshall/floyd_warshall_dace.py index d157ebbf..d7e6b38e 100644 --- a/npbench/benchmarks/polybench/floyd_warshall/floyd_warshall_dace.py +++ b/npbench/benchmarks/polybench/floyd_warshall/floyd_warshall_dace.py @@ -1,11 +1,12 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N = dc.symbol('N', dtype=dc.int64) @dc.program -def kernel(path: dc.int32[N, N]): +def kernel(path: dc_float[N, N]): # def kernel(path: dc.float64[N, N]): for k in range(N): diff --git a/npbench/benchmarks/polybench/gemm/gemm.py b/npbench/benchmarks/polybench/gemm/gemm.py index 72f39d48..bccb4597 100644 --- a/npbench/benchmarks/polybench/gemm/gemm.py +++ b/npbench/benchmarks/polybench/gemm/gemm.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(NI, NJ, NK, datatype=np.float64): +def initialize(NI, NJ, NK, datatype=np.float32): alpha = datatype(1.5) beta = datatype(1.2) C = np.fromfunction(lambda i, j: ((i * j + 1) % NI) / NI, (NI, NJ), diff --git a/npbench/benchmarks/polybench/gemm/gemm_dace.py b/npbench/benchmarks/polybench/gemm/gemm_dace.py index 60a2a8fa..098cab88 100644 --- a/npbench/benchmarks/polybench/gemm/gemm_dace.py +++ b/npbench/benchmarks/polybench/gemm/gemm_dace.py @@ -1,11 +1,12 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float NI, NJ, NK = (dc.symbol(s, dtype=dc.int64) for s in ('NI', 'NJ', 'NK')) @dc.program -def kernel(alpha: dc.float64, beta: dc.float64, C: dc.float64[NI, NJ], - A: dc.float64[NI, NK], B: dc.float64[NK, NJ]): +def kernel(alpha: dc_float, beta: dc_float, C: dc_float[NI, NJ], + A: dc_float[NI, NK], B: dc_float[NK, NJ]): C[:] = alpha * A @ B + beta * C diff --git a/npbench/benchmarks/polybench/gemver/gemver.py b/npbench/benchmarks/polybench/gemver/gemver.py index 46797806..31001aea 100644 --- a/npbench/benchmarks/polybench/gemver/gemver.py +++ b/npbench/benchmarks/polybench/gemver/gemver.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(N, datatype=np.float64): +def initialize(N, datatype=np.float32): alpha = datatype(1.5) beta = datatype(1.2) fn = datatype(N) diff --git a/npbench/benchmarks/polybench/gemver/gemver_dace.py b/npbench/benchmarks/polybench/gemver/gemver_dace.py index e0a777db..32ca30a3 100644 --- a/npbench/benchmarks/polybench/gemver/gemver_dace.py +++ b/npbench/benchmarks/polybench/gemver/gemver_dace.py @@ -1,14 +1,15 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float M, N = (dc.symbol(s, dtype=dc.int64) for s in ('M', 'N')) @dc.program -def kernel(alpha: dc.float64, beta: dc.float64, A: dc.float64[N, N], - u1: dc.float64[N], v1: dc.float64[N], u2: dc.float64[N], - v2: dc.float64[N], w: dc.float64[N], x: dc.float64[N], - y: dc.float64[N], z: dc.float64[N]): +def kernel(alpha: dc_float, beta: dc_float, A: dc_float[N, N], + u1: dc_float[N], v1: dc_float[N], u2: dc_float[N], + v2: dc_float[N], w: dc_float[N], x: dc_float[N], + y: dc_float[N], z: dc_float[N]): A += np.multiply.outer(u1, v1) + np.multiply.outer(u2, v2) x += beta * y @ A + z diff --git a/npbench/benchmarks/polybench/gesummv/gesummv.py b/npbench/benchmarks/polybench/gesummv/gesummv.py index 3848053f..275d186b 100644 --- a/npbench/benchmarks/polybench/gesummv/gesummv.py +++ b/npbench/benchmarks/polybench/gesummv/gesummv.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(N, datatype=np.float64): +def initialize(N, datatype=np.float32): alpha = datatype(1.5) beta = datatype(1.2) A = np.fromfunction(lambda i, j: ((i * j + 1) % N) / N, (N, N), diff --git a/npbench/benchmarks/polybench/gesummv/gesummv_dace.py b/npbench/benchmarks/polybench/gesummv/gesummv_dace.py index 79e3c06b..075596b0 100644 --- a/npbench/benchmarks/polybench/gesummv/gesummv_dace.py +++ b/npbench/benchmarks/polybench/gesummv/gesummv_dace.py @@ -1,11 +1,12 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N = dc.symbol('N', dtype=dc.int64) @dc.program -def kernel(alpha: dc.float64, beta: dc.float64, A: dc.float64[N, N], - B: dc.float64[N, N], x: dc.float64[N]): +def kernel(alpha: dc.float64, beta: dc.float64, A: dc_float[N, N], + B: dc_float[N, N], x: dc_float[N]): return alpha * A @ x + beta * B @ x diff --git a/npbench/benchmarks/polybench/gramschmidt/gramschmidt.py b/npbench/benchmarks/polybench/gramschmidt/gramschmidt.py index c7423ca6..ecf4d6a7 100644 --- a/npbench/benchmarks/polybench/gramschmidt/gramschmidt.py +++ b/npbench/benchmarks/polybench/gramschmidt/gramschmidt.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(M, N, datatype=np.float64): +def initialize(M, N, datatype=np.float32): from numpy.random import default_rng rng = default_rng(42) diff --git a/npbench/benchmarks/polybench/gramschmidt/gramschmidt_dace.py b/npbench/benchmarks/polybench/gramschmidt/gramschmidt_dace.py index aa2cb085..d67bccdd 100644 --- a/npbench/benchmarks/polybench/gramschmidt/gramschmidt_dace.py +++ b/npbench/benchmarks/polybench/gramschmidt/gramschmidt_dace.py @@ -1,11 +1,12 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float M, N, S = (dc.symbol(s, dtype=dc.int64) for s in ('M', 'N', 'S')) @dc.program -def kernel(A: dc.float64[M, N]): +def kernel(A: dc_float[M, N]): Q = np.zeros_like(A) R = np.zeros((N, N), dtype=A.dtype) diff --git a/npbench/benchmarks/polybench/heat_3d/heat_3d.py b/npbench/benchmarks/polybench/heat_3d/heat_3d.py index 44d2a4d0..03845837 100644 --- a/npbench/benchmarks/polybench/heat_3d/heat_3d.py +++ b/npbench/benchmarks/polybench/heat_3d/heat_3d.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(N, datatype=np.float64): +def initialize(N, datatype=np.float32): A = np.fromfunction(lambda i, j, k: (i + j + (N - k)) * 10 / N, (N, N, N), dtype=datatype) B = np.copy(A) diff --git a/npbench/benchmarks/polybench/heat_3d/heat_3d_dace.py b/npbench/benchmarks/polybench/heat_3d/heat_3d_dace.py index 53c93e49..a905c398 100644 --- a/npbench/benchmarks/polybench/heat_3d/heat_3d_dace.py +++ b/npbench/benchmarks/polybench/heat_3d/heat_3d_dace.py @@ -1,11 +1,12 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N = dc.symbol('N', dtype=dc.int64) @dc.program -def kernel(TSTEPS: dc.int64, A: dc.float64[N, N, N], B: dc.float64[N, N, N]): +def kernel(TSTEPS: dc.int64, A: dc_float[N, N, N], B: dc_float[N, N, N]): for t in range(1, TSTEPS): B[1:-1, 1:-1, diff --git a/npbench/benchmarks/polybench/jacobi_1d/jacobi_1d.py b/npbench/benchmarks/polybench/jacobi_1d/jacobi_1d.py index 64930970..53396da2 100644 --- a/npbench/benchmarks/polybench/jacobi_1d/jacobi_1d.py +++ b/npbench/benchmarks/polybench/jacobi_1d/jacobi_1d.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(N, datatype=np.float64): +def initialize(N, datatype=np.float32): A = np.fromfunction(lambda i: (i + 2) / N, (N, ), dtype=datatype) B = np.fromfunction(lambda i: (i + 3) / N, (N, ), dtype=datatype) diff --git a/npbench/benchmarks/polybench/jacobi_1d/jacobi_1d_dace.py b/npbench/benchmarks/polybench/jacobi_1d/jacobi_1d_dace.py index a57b3abf..26d26152 100644 --- a/npbench/benchmarks/polybench/jacobi_1d/jacobi_1d_dace.py +++ b/npbench/benchmarks/polybench/jacobi_1d/jacobi_1d_dace.py @@ -1,11 +1,11 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N = dc.symbol('N', dtype=dc.int64) - @dc.program -def kernel(TSTEPS: dc.int64, A: dc.float64[N], B: dc.float64[N]): +def kernel(TSTEPS: dc.int64, A: dc_float[N], B: dc_float[N]): for t in range(1, TSTEPS): B[1:-1] = 0.33333 * (A[:-2] + A[1:-1] + A[2:]) diff --git a/npbench/benchmarks/polybench/jacobi_2d/jacobi_2d.py b/npbench/benchmarks/polybench/jacobi_2d/jacobi_2d.py index 9b1c43a2..e166ad14 100644 --- a/npbench/benchmarks/polybench/jacobi_2d/jacobi_2d.py +++ b/npbench/benchmarks/polybench/jacobi_2d/jacobi_2d.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(N, datatype=np.float64): +def initialize(N, datatype=np.float32): A = np.fromfunction(lambda i, j: i * (j + 2) / N, (N, N), dtype=datatype) B = np.fromfunction(lambda i, j: i * (j + 3) / N, (N, N), dtype=datatype) diff --git a/npbench/benchmarks/polybench/jacobi_2d/jacobi_2d_dace.py b/npbench/benchmarks/polybench/jacobi_2d/jacobi_2d_dace.py index 2aac48a3..2eb34cf1 100644 --- a/npbench/benchmarks/polybench/jacobi_2d/jacobi_2d_dace.py +++ b/npbench/benchmarks/polybench/jacobi_2d/jacobi_2d_dace.py @@ -1,11 +1,12 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N = dc.symbol('N', dtype=dc.int64) @dc.program -def kernel(TSTEPS: dc.int64, A: dc.float64[N, N], B: dc.float64[N, N]): +def kernel(TSTEPS: dc.int64, A: dc_float[N, N], B: dc_float[N, N]): for t in range(1, TSTEPS): B[1:-1, 1:-1] = 0.2 * (A[1:-1, 1:-1] + A[1:-1, :-2] + A[1:-1, 2:] + diff --git a/npbench/benchmarks/polybench/k2mm/k2mm.py b/npbench/benchmarks/polybench/k2mm/k2mm.py index 3c072087..36f5e8cc 100644 --- a/npbench/benchmarks/polybench/k2mm/k2mm.py +++ b/npbench/benchmarks/polybench/k2mm/k2mm.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(NI, NJ, NK, NL, datatype=np.float64): +def initialize(NI, NJ, NK, NL, datatype=np.float32): alpha = datatype(1.5) beta = datatype(1.2) A = np.fromfunction(lambda i, j: ((i * j + 1) % NI) / NI, (NI, NK), diff --git a/npbench/benchmarks/polybench/k2mm/k2mm_dace.py b/npbench/benchmarks/polybench/k2mm/k2mm_dace.py index 3db8c490..c183a228 100644 --- a/npbench/benchmarks/polybench/k2mm/k2mm_dace.py +++ b/npbench/benchmarks/polybench/k2mm/k2mm_dace.py @@ -1,13 +1,13 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float NI, NJ, NK, NL = (dc.symbol(s, dtype=dc.int64) for s in ('NI', 'NJ', 'NK', 'NL')) @dc.program -def kernel(alpha: dc.float64, beta: dc.float64, A: dc.float64[NI, NK], - B: dc.float64[NK, NJ], C: dc.float64[NJ, NL], D: dc.float64[NI, - NL]): +def kernel(alpha: dc_float, beta: dc_float, A: dc_float[NI, NK], + B: dc_float[NK, NJ], C: dc_float[NJ, NL], D: dc_float[NI, NL]): D[:] = alpha * A @ B @ C + beta * D diff --git a/npbench/benchmarks/polybench/k3mm/k3mm.py b/npbench/benchmarks/polybench/k3mm/k3mm.py index 70af0bf5..9d432bc9 100644 --- a/npbench/benchmarks/polybench/k3mm/k3mm.py +++ b/npbench/benchmarks/polybench/k3mm/k3mm.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(NI, NJ, NK, NL, NM, datatype=np.float64): +def initialize(NI, NJ, NK, NL, NM, datatype=np.float32): A = np.fromfunction(lambda i, j: ((i * j + 1) % NI) / (5 * NI), (NI, NK), dtype=datatype) B = np.fromfunction(lambda i, j: ((i * (j + 1) + 2) % NJ) / (5 * NJ), diff --git a/npbench/benchmarks/polybench/k3mm/k3mm_dace.py b/npbench/benchmarks/polybench/k3mm/k3mm_dace.py index 0be34668..149d07dd 100644 --- a/npbench/benchmarks/polybench/k3mm/k3mm_dace.py +++ b/npbench/benchmarks/polybench/k3mm/k3mm_dace.py @@ -1,12 +1,13 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float NI, NJ, NK, NL, NM = (dc.symbol(s, dtype=dc.int64) for s in ('NI', 'NJ', 'NK', 'NL', 'NM')) @dc.program -def kernel(A: dc.float64[NI, NK], B: dc.float64[NK, NJ], C: dc.float64[NJ, NM], - D: dc.float64[NM, NL]): +def kernel(A: dc_float[NI, NK], B: dc_float[NK, NJ], C: dc_float[NJ, NM], + D: dc_float[NM, NL]): return A @ B @ C @ D diff --git a/npbench/benchmarks/polybench/lu/lu.py b/npbench/benchmarks/polybench/lu/lu.py index 1fe67bd7..c4ce9153 100644 --- a/npbench/benchmarks/polybench/lu/lu.py +++ b/npbench/benchmarks/polybench/lu/lu.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(N, datatype=np.float64): +def initialize(N, datatype=np.float32): A = np.empty((N, N), dtype=datatype) for i in range(N): A[i, :i + 1] = np.fromfunction(lambda j: (-j % N) / N + 1, (i + 1, ), diff --git a/npbench/benchmarks/polybench/lu/lu_dace.py b/npbench/benchmarks/polybench/lu/lu_dace.py index 7623aa02..47a442ee 100644 --- a/npbench/benchmarks/polybench/lu/lu_dace.py +++ b/npbench/benchmarks/polybench/lu/lu_dace.py @@ -1,11 +1,12 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N = dc.symbol('N', dtype=dc.int64) @dc.program -def kernel(A: dc.float64[N, N]): +def kernel(A: dc_float[N, N]): for i in range(N): for j in range(i): diff --git a/npbench/benchmarks/polybench/ludcmp/ludcmp.py b/npbench/benchmarks/polybench/ludcmp/ludcmp.py index ef9b9679..1d440728 100644 --- a/npbench/benchmarks/polybench/ludcmp/ludcmp.py +++ b/npbench/benchmarks/polybench/ludcmp/ludcmp.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(N, datatype=np.float64): +def initialize(N, datatype=np.float32): A = np.empty((N, N), dtype=datatype) for i in range(N): A[i, :i + 1] = np.fromfunction(lambda j: (-j % N) / N + 1, (i + 1, ), diff --git a/npbench/benchmarks/polybench/ludcmp/ludcmp_dace.py b/npbench/benchmarks/polybench/ludcmp/ludcmp_dace.py index b5020063..95ff97e8 100644 --- a/npbench/benchmarks/polybench/ludcmp/ludcmp_dace.py +++ b/npbench/benchmarks/polybench/ludcmp/ludcmp_dace.py @@ -1,11 +1,12 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N = dc.symbol('N', dtype=dc.int64) @dc.program -def kernel(A: dc.float64[N, N], b: dc.float64[N]): +def kernel(A: dc_float[N, N], b: dc_float[N]): x = np.zeros_like(b) y = np.zeros_like(b) diff --git a/npbench/benchmarks/polybench/mvt/mvt.py b/npbench/benchmarks/polybench/mvt/mvt.py index 044ab8fc..c6bcb29f 100644 --- a/npbench/benchmarks/polybench/mvt/mvt.py +++ b/npbench/benchmarks/polybench/mvt/mvt.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(N, datatype=np.float64): +def initialize(N, datatype=np.float32): x1 = np.fromfunction(lambda i: (i % N) / N, (N, ), dtype=datatype) x2 = np.fromfunction(lambda i: ((i + 1) % N) / N, (N, ), dtype=datatype) y_1 = np.fromfunction(lambda i: ((i + 3) % N) / N, (N, ), dtype=datatype) diff --git a/npbench/benchmarks/polybench/mvt/mvt_dace.py b/npbench/benchmarks/polybench/mvt/mvt_dace.py index 74740be6..b2059338 100644 --- a/npbench/benchmarks/polybench/mvt/mvt_dace.py +++ b/npbench/benchmarks/polybench/mvt/mvt_dace.py @@ -1,12 +1,13 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N = dc.symbol('N', dtype=dc.int64) @dc.program -def kernel(x1: dc.float64[N], x2: dc.float64[N], y_1: dc.float64[N], - y_2: dc.float64[N], A: dc.float64[N, N]): +def kernel(x1: dc_float[N], x2: dc_float[N], y_1: dc_float[N], + y_2: dc_float[N], A: dc_float[N, N]): x1 += A @ y_1 x2 += y_2 @ A diff --git a/npbench/benchmarks/polybench/nussinov/nussinov.py b/npbench/benchmarks/polybench/nussinov/nussinov.py index a4770556..d24a7a41 100644 --- a/npbench/benchmarks/polybench/nussinov/nussinov.py +++ b/npbench/benchmarks/polybench/nussinov/nussinov.py @@ -4,6 +4,6 @@ def initialize(N, datatype=np.int32): - seq = np.fromfunction(lambda i: (i + 1) % 4, (N, ), dtype=datatype) + seq = np.fromfunction(lambda i: (i + 1) % 4, (N, ), dtype=np.int32) return seq diff --git a/npbench/benchmarks/polybench/seidel_2d/seidel_2d.py b/npbench/benchmarks/polybench/seidel_2d/seidel_2d.py index e111c704..32b471c6 100644 --- a/npbench/benchmarks/polybench/seidel_2d/seidel_2d.py +++ b/npbench/benchmarks/polybench/seidel_2d/seidel_2d.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(N, datatype=np.float64): +def initialize(N, datatype=np.float32): A = np.fromfunction(lambda i, j: (i * (j + 2) + 2) / N, (N, N), dtype=datatype) diff --git a/npbench/benchmarks/polybench/seidel_2d/seidel_2d_dace.py b/npbench/benchmarks/polybench/seidel_2d/seidel_2d_dace.py index 1811d5cb..edcd7f6f 100644 --- a/npbench/benchmarks/polybench/seidel_2d/seidel_2d_dace.py +++ b/npbench/benchmarks/polybench/seidel_2d/seidel_2d_dace.py @@ -1,11 +1,12 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N = dc.symbol('N', dtype=dc.int64) @dc.program -def kernel(TSTEPS: dc.int64, A: dc.float64[N, N]): +def kernel(TSTEPS: dc.int64, A: dc_float[N, N]): for t in range(0, TSTEPS - 1): for i in range(1, N - 1): diff --git a/npbench/benchmarks/polybench/symm/symm.py b/npbench/benchmarks/polybench/symm/symm.py index a2de7055..8ec1a5cb 100644 --- a/npbench/benchmarks/polybench/symm/symm.py +++ b/npbench/benchmarks/polybench/symm/symm.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(M, N, datatype=np.float64): +def initialize(M, N, datatype=np.float32): alpha = datatype(1.5) beta = datatype(1.2) C = np.fromfunction(lambda i, j: ((i + j) % 100) / M, (M, N), diff --git a/npbench/benchmarks/polybench/symm/symm_dace.py b/npbench/benchmarks/polybench/symm/symm_dace.py index fa970f9d..4eb76aa4 100644 --- a/npbench/benchmarks/polybench/symm/symm_dace.py +++ b/npbench/benchmarks/polybench/symm/symm_dace.py @@ -1,12 +1,13 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float M, N = (dc.symbol(s, dtype=dc.int64) for s in ('M', 'N')) @dc.program -def kernel(alpha: dc.float64, beta: dc.float64, C: dc.float64[M, N], - A: dc.float64[M, M], B: dc.float64[M, N]): +def kernel(alpha: dc_float, beta: dc_float, C: dc_float[M, N], + A: dc_float[M, M], B: dc_float[M, N]): temp2 = np.empty((N, ), dtype=C.dtype) C *= beta diff --git a/npbench/benchmarks/polybench/syr2k/syr2k.py b/npbench/benchmarks/polybench/syr2k/syr2k.py index 839e39c3..38d22cbe 100644 --- a/npbench/benchmarks/polybench/syr2k/syr2k.py +++ b/npbench/benchmarks/polybench/syr2k/syr2k.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(M, N, datatype=np.float64): +def initialize(M, N, datatype=np.float32): alpha = datatype(1.5) beta = datatype(1.2) C = np.fromfunction(lambda i, j: ((i * j + 3) % N) / M, (N, N), diff --git a/npbench/benchmarks/polybench/syr2k/syr2k_dace.py b/npbench/benchmarks/polybench/syr2k/syr2k_dace.py index b62842fa..897396e0 100644 --- a/npbench/benchmarks/polybench/syr2k/syr2k_dace.py +++ b/npbench/benchmarks/polybench/syr2k/syr2k_dace.py @@ -1,12 +1,13 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float M, N = (dc.symbol(s, dtype=dc.int64) for s in ('M', 'N')) @dc.program -def kernel(alpha: dc.float64, beta: dc.float64, C: dc.float64[N, N], - A: dc.float64[N, M], B: dc.float64[N, M]): +def kernel(alpha: dc_float, beta: dc_float, C: dc_float[N, N], + A: dc_float[N, M], B: dc_float[N, M]): for i in range(N): C[i, :i + 1] *= beta diff --git a/npbench/benchmarks/polybench/syrk/syrk.py b/npbench/benchmarks/polybench/syrk/syrk.py index bbac91ec..cb6e93d6 100644 --- a/npbench/benchmarks/polybench/syrk/syrk.py +++ b/npbench/benchmarks/polybench/syrk/syrk.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(M, N, datatype=np.float64): +def initialize(M, N, datatype=np.float32): alpha = datatype(1.5) beta = datatype(1.2) C = np.fromfunction(lambda i, j: ((i * j + 2) % N) / M, (N, N), diff --git a/npbench/benchmarks/polybench/syrk/syrk_dace.py b/npbench/benchmarks/polybench/syrk/syrk_dace.py index 18da4b0f..c169ad9a 100644 --- a/npbench/benchmarks/polybench/syrk/syrk_dace.py +++ b/npbench/benchmarks/polybench/syrk/syrk_dace.py @@ -1,12 +1,13 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float M, N = (dc.symbol(s, dtype=dc.int64) for s in ('M', 'N')) @dc.program -def kernel(alpha: dc.float64, beta: dc.float64, C: dc.float64[N, N], - A: dc.float64[N, M]): +def kernel(alpha: dc_float, beta: dc_float, C: dc_float[N, N], + A: dc_float[N, M]): for i in range(N): C[i, :i + 1] *= beta diff --git a/npbench/benchmarks/polybench/trisolv/trisolv.py b/npbench/benchmarks/polybench/trisolv/trisolv.py index 9532d40e..b5ea5489 100644 --- a/npbench/benchmarks/polybench/trisolv/trisolv.py +++ b/npbench/benchmarks/polybench/trisolv/trisolv.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(N, datatype=np.float64): +def initialize(N, datatype=np.float32): L = np.fromfunction(lambda i, j: (i + N - j + 1) * 2 / N, (N, N), dtype=datatype) x = np.full((N, ), -999, dtype=datatype) diff --git a/npbench/benchmarks/polybench/trisolv/trisolv_dace.py b/npbench/benchmarks/polybench/trisolv/trisolv_dace.py index 1670fdb9..61ef7bd9 100644 --- a/npbench/benchmarks/polybench/trisolv/trisolv_dace.py +++ b/npbench/benchmarks/polybench/trisolv/trisolv_dace.py @@ -1,11 +1,12 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N = dc.symbol('N', dtype=dc.int64) @dc.program -def kernel(L: dc.float64[N, N], x: dc.float64[N], b: dc.float64[N]): +def kernel(L: dc_float[N, N], x: dc_float[N], b: dc_float[N]): for i in range(N): x[i] = (b[i] - L[i, :i] @ x[:i]) / L[i, i] diff --git a/npbench/benchmarks/polybench/trmm/trmm.py b/npbench/benchmarks/polybench/trmm/trmm.py index a9817236..8af9377b 100644 --- a/npbench/benchmarks/polybench/trmm/trmm.py +++ b/npbench/benchmarks/polybench/trmm/trmm.py @@ -3,7 +3,7 @@ import numpy as np -def initialize(M, N, datatype=np.float64): +def initialize(M, N, datatype=np.float32): alpha = datatype(1.5) A = np.fromfunction(lambda i, j: ((i * j) % M) / M, (M, M), dtype=datatype) for i in range(M): diff --git a/npbench/benchmarks/polybench/trmm/trmm_dace.py b/npbench/benchmarks/polybench/trmm/trmm_dace.py index bd542a96..483f12be 100644 --- a/npbench/benchmarks/polybench/trmm/trmm_dace.py +++ b/npbench/benchmarks/polybench/trmm/trmm_dace.py @@ -1,5 +1,6 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float M, N, S = (dc.symbol(s, dtype=dc.int64) for s in ('M', 'N', 'S')) @@ -9,7 +10,7 @@ @dc.program -def kernel(alpha: dc.float64, A: dc.float64[M, M], B: dc.float64[M, N]): +def kernel(alpha: dc_float, A: dc_float[M, M], B: dc_float[M, N]): for i in range(M): for j in range(N): diff --git a/npbench/benchmarks/pythran/arc_distance/arc_distance.py b/npbench/benchmarks/pythran/arc_distance/arc_distance.py index 210a808a..55bac8db 100644 --- a/npbench/benchmarks/pythran/arc_distance/arc_distance.py +++ b/npbench/benchmarks/pythran/arc_distance/arc_distance.py @@ -1,9 +1,8 @@ # Copyright 2021 ETH Zurich and the NPBench authors. All rights reserved. +import numpy as np - -def initialize(N): - from numpy.random import default_rng - rng = default_rng(42) +def initialize(N, datatype=np.float32): + rng = np.random.default_rng(42) t0, p0, t1, p1 = rng.random((N, )), rng.random((N, )), rng.random( (N, )), rng.random((N, )) - return t0, p0, t1, p1 + return t0.astype(datatype), p0.astype(datatype), t1.astype(datatype), p1.astype(datatype) diff --git a/npbench/benchmarks/pythran/arc_distance/arc_distance_dace.py b/npbench/benchmarks/pythran/arc_distance/arc_distance_dace.py index 864e3881..792f6f78 100644 --- a/npbench/benchmarks/pythran/arc_distance/arc_distance_dace.py +++ b/npbench/benchmarks/pythran/arc_distance/arc_distance_dace.py @@ -28,13 +28,14 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float N = dc.symbol('N', dtype=dc.int64) @dc.program -def arc_distance(theta_1: dc.float64[N], phi_1: dc.float64[N], - theta_2: dc.float64[N], phi_2: dc.float64[N]): +def arc_distance(theta_1: dc_float[N], phi_1: dc_float[N], + theta_2: dc_float[N], phi_2: dc_float[N]): """ Calculates the pairwise arc distance between all points in vector a and b. """ diff --git a/npbench/benchmarks/scattering_self_energies/scattering_self_energies.py b/npbench/benchmarks/scattering_self_energies/scattering_self_energies.py index 14bdd885..f5c61aac 100644 --- a/npbench/benchmarks/scattering_self_energies/scattering_self_energies.py +++ b/npbench/benchmarks/scattering_self_energies/scattering_self_energies.py @@ -3,20 +3,20 @@ import numpy as np -def rng_complex(shape, rng): - return (rng.random(shape) + rng.random(shape) * 1j) +def rng_complex(shape, rng, datatype): + return (rng.random(shape, dtype=datatype) + rng.random(shape, dtype=datatype) * 1j) -def initialize(Nkz, NE, Nqz, Nw, N3D, NA, NB, Norb): +def initialize(Nkz, NE, Nqz, Nw, N3D, NA, NB, Norb, datatype=np.float32): from numpy.random import default_rng rng = default_rng(42) neigh_idx = np.ndarray([NA, NB], dtype=np.int32) for i in range(NA): neigh_idx[i] = np.positive(np.arange(i - NB / 2, i + NB / 2) % NA) - dH = rng_complex([NA, NB, N3D, Norb, Norb], rng) - G = rng_complex([Nkz, NE, NA, Norb, Norb], rng) - D = rng_complex([Nqz, Nw, NA, NB, N3D, N3D], rng) - Sigma = np.zeros([Nkz, NE, NA, Norb, Norb], dtype=np.complex128) + dH = rng_complex([NA, NB, N3D, Norb, Norb], rng, datatype) + G = rng_complex([Nkz, NE, NA, Norb, Norb], rng, datatype) + D = rng_complex([Nqz, Nw, NA, NB, N3D, N3D], rng, datatype) + Sigma = np.zeros([Nkz, NE, NA, Norb, Norb], dtype=D.dtype) return neigh_idx, dH, G, D, Sigma diff --git a/npbench/benchmarks/spmv/spmv.py b/npbench/benchmarks/spmv/spmv.py index e1971754..ef1e1787 100644 --- a/npbench/benchmarks/spmv/spmv.py +++ b/npbench/benchmarks/spmv/spmv.py @@ -3,11 +3,11 @@ import numpy as np -def initialize(M, N, nnz): +def initialize(M, N, nnz, datatype=np.float64): from numpy.random import default_rng rng = default_rng(42) - x = rng.random((N, )) + x = rng.random((N, ), dtype=datatype) from scipy.sparse import random @@ -15,7 +15,7 @@ def initialize(M, N, nnz): N, density=nnz / (M * N), format='csr', - dtype=np.float64, + dtype=datatype, random_state=rng) rows = np.uint32(matrix.indptr) cols = np.uint32(matrix.indices) diff --git a/npbench/benchmarks/spmv/spmv_dace.py b/npbench/benchmarks/spmv/spmv_dace.py index 53f8e033..2070b2c8 100644 --- a/npbench/benchmarks/spmv/spmv_dace.py +++ b/npbench/benchmarks/spmv/spmv_dace.py @@ -1,6 +1,7 @@ # Sparse Matrix-Vector Multiplication (SpMV) import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float M, N, nnz = (dc.symbol(s, dtype=dc.int64) for s in ('M', 'N', 'nnz')) @@ -9,7 +10,7 @@ # (CSR) format @dc.program def spmv(A_row: dc.uint32[M + 1], A_col: dc.uint32[nnz], - A_val: dc.float64[nnz], x: dc.float64[N]): + A_val: dc_float[nnz], x: dc_float[N]): # y = np.empty(A_row.size - 1, A_val.dtype) y = np.empty(M, A_val.dtype) diff --git a/npbench/benchmarks/stockham_fft/stockham_fft.py b/npbench/benchmarks/stockham_fft/stockham_fft.py index 5d2c2233..7818acf3 100644 --- a/npbench/benchmarks/stockham_fft/stockham_fft.py +++ b/npbench/benchmarks/stockham_fft/stockham_fft.py @@ -3,16 +3,16 @@ import numpy as np -def rng_complex(shape, rng): - return (rng.random(shape) + rng.random(shape) * 1j) +def rng_complex(shape, rng, datatype): + return (rng.random(shape, dtype=datatype) + rng.random(shape, dtype=datatype) * 1j) -def initialize(R, K): +def initialize(R, K, datatype=np.float32): from numpy.random import default_rng rng = default_rng(42) N = R**K - X = rng_complex((N, ), rng) - Y = np.zeros_like(X, dtype=np.complex128) + X = rng_complex((N,), rng, datatype) + Y = np.zeros_like(X, dtype=X.dtype) return N, X, Y diff --git a/npbench/benchmarks/weather_stencils/hdiff/hdiff.py b/npbench/benchmarks/weather_stencils/hdiff/hdiff.py index 3b191fda..3a32de4c 100644 --- a/npbench/benchmarks/weather_stencils/hdiff/hdiff.py +++ b/npbench/benchmarks/weather_stencils/hdiff/hdiff.py @@ -3,13 +3,13 @@ import numpy as np -def initialize(I, J, K): +def initialize(I, J, K, datatype=np.float32): from numpy.random import default_rng rng = default_rng(42) # Define arrays - in_field = rng.random((I + 4, J + 4, K)) - out_field = rng.random((I, J, K)) - coeff = rng.random((I, J, K)) + in_field = rng.random((I + 4, J + 4, K), dtype=datatype) + out_field = rng.random((I, J, K), dtype=datatype) + coeff = rng.random((I, J, K), dtype=datatype) return in_field, out_field, coeff diff --git a/npbench/benchmarks/weather_stencils/hdiff/hdiff_dace.py b/npbench/benchmarks/weather_stencils/hdiff/hdiff_dace.py index e8571873..6d4826e1 100644 --- a/npbench/benchmarks/weather_stencils/hdiff/hdiff_dace.py +++ b/npbench/benchmarks/weather_stencils/hdiff/hdiff_dace.py @@ -1,13 +1,14 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float I, J, K = (dc.symbol(s, dtype=dc.int64) for s in ('I', 'J', 'K')) # Adapted from https://github.com/GridTools/gt4py/blob/1caca893034a18d5df1522ed251486659f846589/tests/test_integration/stencil_definitions.py#L194 @dc.program -def hdiff(in_field: dc.float64[I + 4, J + 4, K], - out_field: dc.float64[I, J, K], coeff: dc.float64[I, J, K]): +def hdiff(in_field: dc_float[I + 4, J + 4, K], + out_field: dc_float[I, J, K], coeff: dc_float[I, J, K]): # I, J, K = out_field.shape[0], out_field.shape[1], out_field.shape[2] lap_field = 4.0 * in_field[1:I + 3, 1:J + 3, :] - ( in_field[2:I + 4, 1:J + 3, :] + in_field[0:I + 2, 1:J + 3, :] + diff --git a/npbench/benchmarks/weather_stencils/vadv/vadv.py b/npbench/benchmarks/weather_stencils/vadv/vadv.py index 94a89b3f..b2bbc5fa 100644 --- a/npbench/benchmarks/weather_stencils/vadv/vadv.py +++ b/npbench/benchmarks/weather_stencils/vadv/vadv.py @@ -3,17 +3,17 @@ import numpy as np -def initialize(I, J, K): +def initialize(I, J, K, datatype=np.float32): from numpy.random import default_rng rng = default_rng(42) dtr_stage = 3. / 20. # Define arrays - utens_stage = rng.random((I, J, K)) - u_stage = rng.random((I, J, K)) - wcon = rng.random((I + 1, J, K)) - u_pos = rng.random((I, J, K)) - utens = rng.random((I, J, K)) + utens_stage = rng.random((I, J, K), dtype=datatype) + u_stage = rng.random((I, J, K), dtype=datatype) + wcon = rng.random((I + 1, J, K), dtype=datatype) + u_pos = rng.random((I, J, K), dtype=datatype) + utens = rng.random((I, J, K), dtype=datatype) return dtr_stage, utens_stage, u_stage, wcon, u_pos, utens diff --git a/npbench/benchmarks/weather_stencils/vadv/vadv_dace.py b/npbench/benchmarks/weather_stencils/vadv/vadv_dace.py index 9cf596a5..86354fbc 100644 --- a/npbench/benchmarks/weather_stencils/vadv/vadv_dace.py +++ b/npbench/benchmarks/weather_stencils/vadv/vadv_dace.py @@ -1,5 +1,6 @@ import numpy as np import dace as dc +from npbench.infrastructure.dace_framework import dc_float # Sample constants BET_M = 0.5 @@ -10,9 +11,9 @@ # Adapted from https://github.com/GridTools/gt4py/blob/1caca893034a18d5df1522ed251486659f846589/tests/test_integration/stencil_definitions.py#L111 @dc.program -def vadv(utens_stage: dc.float64[I, J, K], u_stage: dc.float64[I, J, K], - wcon: dc.float64[I + 1, J, K], u_pos: dc.float64[I, J, K], - utens: dc.float64[I, J, K], dtr_stage: dc.float64): +def vadv(utens_stage: dc_float[I, J, K], u_stage: dc_float[I, J, K], + wcon: dc_float[I + 1, J, K], u_pos: dc_float[I, J, K], + utens: dc_float[I, J, K], dtr_stage: dc_float): ccol = np.ndarray((I, J, K), dtype=utens_stage.dtype) dcol = np.ndarray((I, J, K), dtype=utens_stage.dtype) data_col = np.ndarray((I, J), dtype=utens_stage.dtype) diff --git a/npbench/infrastructure/benchmark.py b/npbench/infrastructure/benchmark.py index 97d063d9..4842a8d0 100644 --- a/npbench/infrastructure/benchmark.py +++ b/npbench/infrastructure/benchmark.py @@ -1,8 +1,9 @@ # Copyright 2021 ETH Zurich and the NPBench authors. All rights reserved. import json import pathlib +import numpy as np -from typing import Any, Dict +from typing import Any, Dict, Optional class Benchmark(object): @@ -28,7 +29,7 @@ def __init__(self, bname: str): print("Benchmark JSON file {b} could not be opened.".format(b=bench_filename)) raise (e) - def get_data(self, preset: str = 'L') -> Dict[str, Any]: + def get_data(self, preset: str = 'L', datatype: Optional[str] = None) -> Dict[str, Any]: """ Initializes the benchmark data. :param preset: The data-size preset (S, M, L, paper). """ @@ -44,6 +45,11 @@ def get_data(self, preset: str = 'L') -> Dict[str, Any]: parameters = self.info["parameters"][preset] for k, v in parameters.items(): data[k] = v + if datatype is not None: + all_datatypes = {"float32": np.float32, "float64": np.float64} + if datatype not in all_datatypes: + raise NotImplementedError("Datatype {} is not supported.".format(datatype)) + data["datatype"] = all_datatypes[datatype] # 3. Import initialization function if "init" in self.info.keys() and self.info["init"]: module_filename = "{m}.py".format(m=self.info["module_name"]) @@ -56,9 +62,10 @@ def get_data(self, preset: str = 'L') -> Dict[str, Any]: print("Module Python file {m} could not be opened.".format(m=module_filename)) raise (e) # 4. Execute initialization + maybe_datatype = ["datatype"] if datatype is not None else [] init_str = "{oargs} = {i}({iargs})".format(oargs=",".join(self.info["init"]["output_args"]), i=self.info["init"]["func_name"], - iargs=",".join(self.info["init"]["input_args"])) + iargs=",".join(self.info["init"]["input_args"] + maybe_datatype)) exec(init_str, data) del data[self.info["init"]["func_name"]] diff --git a/npbench/infrastructure/dace_framework.py b/npbench/infrastructure/dace_framework.py index 2a5afec0..2033ce82 100644 --- a/npbench/infrastructure/dace_framework.py +++ b/npbench/infrastructure/dace_framework.py @@ -4,8 +4,10 @@ import traceback from npbench.infrastructure import Benchmark, Framework, utilities as util -from typing import Callable, Sequence, Tuple +from typing import Callable, Literal, Sequence, Tuple, Union +dc_float = None +dc_complex_float = None class DaceFramework(Framework): """ A class for reading and processing framework information. """ @@ -314,3 +316,12 @@ def param_str(self, bench: Benchmark, impl: Callable = None): input_params = self.params(bench, impl) return ", ".join(["{p}={p}".format(p=p) for p in input_params]) + + def set_datatype(self, datatype: Union[Literal['float32'], Literal['float64'], None]): + # We might get None here if no datatype is specified. This is sad since we cannot know the exact datatype here + # and we are relying on the fact that frameworks have their default datatypes set to float32. + super().set_datatype(datatype) + global dc_float, dc_complex_float + from dace import float32, float64, complex64, complex128 + dc_float = float64 if datatype == 'float64' else float32 + dc_complex_float = complex128 if datatype == 'float64' else complex64 diff --git a/npbench/infrastructure/framework.py b/npbench/infrastructure/framework.py index 2d630dee..1de46aef 100644 --- a/npbench/infrastructure/framework.py +++ b/npbench/infrastructure/framework.py @@ -5,8 +5,10 @@ import pkg_resources from npbench.infrastructure import Benchmark -from typing import Any, Callable, Dict, Sequence, Tuple +from typing import Any, Callable, Dict, Sequence, Tuple, Union, Literal +np_float = None +np_complex = None class Framework(object): """ A class for reading and processing framework information. """ @@ -158,7 +160,18 @@ def exec_str(self, bench: Benchmark, impl: Callable = None): arg_str = self.arg_str(bench, impl) # param_str = self.param_str(bench, impl) return "__npb_result = __npb_impl({a})".format(a=arg_str) - + + def set_datatype(self, datatype: Union[Literal["float32"], Literal["float64"]]): + """ Sets the datatype for the framework. + :param datatype: The datatype to set (float32, float64). + """ + global np_float, np_complex + if datatype == 'float32': + np_float = np.float32 + np_complex = np.complex64 + else: + np_float = np.float64 + np_complex = np.complex128 def generate_framework(fname: str, save_strict: bool = False, load_strict: bool = False) -> Framework: """ Generates a framework object with the correct class. diff --git a/npbench/infrastructure/test.py b/npbench/infrastructure/test.py index 24c093e5..b303d733 100644 --- a/npbench/infrastructure/test.py +++ b/npbench/infrastructure/test.py @@ -1,8 +1,10 @@ # Copyright 2021 ETH Zurich and the NPBench authors. All rights reserved. import time +import traceback +import numpy as np from npbench.infrastructure import (Benchmark, Framework, timeout_decorator as tout, utilities as util) -from typing import Any, Callable, Dict, Sequence, Tuple +from typing import Any, Callable, Dict, Sequence, Tuple, Optional class Test(object): @@ -32,7 +34,7 @@ def _execute(self, frmwrk: Framework, impl: Callable, impl_name: str, mode: str, '__npb_result') except Exception as e: print("Failed to execute the {} implementation.".format(report_str)) - print(e) + traceback.print_exception(e) if not ignore_errors: raise return None, None @@ -50,17 +52,37 @@ def _execute(self, frmwrk: Framework, impl: Callable, impl_name: str, mode: str, assert len(out) == num_return_args + num_output_args, "Number of output arguments does not match." return out, timelist - def run(self, preset: str, validate: bool, repeat: int, timeout: float = 200.0, ignore_errors: bool = True): + def run(self, preset: str, validate: bool, repeat: int, timeout: float = 200.0, ignore_errors: bool = True, datatype: Optional[str] = None): """ Tests the framework against the benchmark. :param preset: The preset to use for testing (S, M, L, paper). :param validate: If true, it validates the output against NumPy. :param repeat: The number of repeatitions. """ - print("***** Testing {f} with {b} on the {p} dataset *****".format(b=self.bench.bname, + print("***** Testing {f} with {b} on the {p} dataset, datatype {d} *****".format(b=self.bench.bname, f=self.frmwrk.info["full_name"], - p=preset)) - - bdata = self.bench.get_data(preset) + p=preset, + d=datatype if datatype is not None else "default")) + + self.frmwrk.set_datatype(datatype) + bdata = self.bench.get_data(preset, datatype) + + # Some of the input data is taken from float constants defined in the benchmark JSON file. + # These constants are stored as Python floats. + # However, frameworks like DaCe generally expect scalars to be in a specific datatype (e.g., np.float32 or np.float64). + # Since we don't have any information about the expected datatype of these constants in the JSON file, + # we try to detect the expected datatype from the input data we got from the benchmark. + # Ideally, we would store the expected datatype information in the benchmark JSON file directly so we don't have to guess here. + dtypes = set( + type(v) for v in bdata.values() if type(v) in [np.float32, np.float64] + ) + dtypes |= set(type(v.dtype.type()) for v in bdata.values() if type(v) is np.ndarray and v.dtype in [np.float32, np.float64]) + if len(dtypes) > 1: + raise ValueError("Inconsistent datatypes detected in benchmark data: mixture of float32 and float64 values.") + if len(dtypes) == 1: + detected_dtype = dtypes.pop() + for k, v in bdata.items(): + if type(v) is float: + bdata[k] = detected_dtype(v) # Run NumPy for validation if validate and self.frmwrk.fname != "numpy" and self.numpy: @@ -120,8 +142,9 @@ def first_execution(impl, impl_name): print("{} - {} - validation: SUCCESS".format(frmwrk_name, impl_name)) elif not ignore_errors: raise ValueError("{} did not validate!".format(frmwrk_name)) - except Exception: + except Exception as e: print("Failed to run {} validation.".format(self.frmwrk.info["full_name"])) + traceback.print_exception(e) if not ignore_errors: raise # Main execution @@ -161,4 +184,3 @@ def first_execution(impl, impl_name): result = tuple(new_d.values()) # print(result) util.create_result(conn, util.sql_insert_into_results_table, result) - diff --git a/run_benchmark.py b/run_benchmark.py index ac644a1e..f566a93b 100644 --- a/run_benchmark.py +++ b/run_benchmark.py @@ -42,6 +42,12 @@ type=util.str2bool, nargs="?", default=False) + parser.add_argument("-d", + "--datatype", + type=str, + help="datatype to use", + choices=["float32", "float64"], + required=False) args = vars(parser.parse_args()) # print(args) @@ -54,4 +60,4 @@ lcount = LineCount(bench, frmwrk, numpy) lcount.count() test = Test(bench, frmwrk, numpy) - test.run(args["preset"], args["validate"], args["repeat"], args["timeout"]) + test.run(args["preset"], args["validate"], args["repeat"], args["timeout"], datatype=args["datatype"]) diff --git a/run_framework.py b/run_framework.py index 411b739d..f8d8b00b 100644 --- a/run_framework.py +++ b/run_framework.py @@ -9,14 +9,15 @@ def run_benchmark(benchname, fname, preset, validate, repeat, timeout, - ignore_errors, save_strict, load_strict): - frmwrk = generate_framework(fname, save_strict, load_strict) - numpy = generate_framework("numpy") - bench = Benchmark(benchname) - lcount = LineCount(bench, frmwrk, numpy) - lcount.count() - test = Test(bench, frmwrk, numpy) - test.run(preset, validate, repeat, timeout, ignore_errors) + ignore_errors, save_strict, load_strict, datatype): + for f in fname: + frmwrk = generate_framework(f, save_strict, load_strict) + numpy = generate_framework("numpy") + bench = Benchmark(benchname) + lcount = LineCount(bench, frmwrk, numpy) + lcount.count() + test = Test(bench, frmwrk, numpy) + test.run(preset, validate, repeat, timeout, ignore_errors, datatype) if __name__ == "__main__": @@ -57,6 +58,12 @@ def run_benchmark(benchname, fname, preset, validate, repeat, timeout, type=util.str2bool, nargs="?", default=False) + parser.add_argument("-d", + "--datatype", + type=str, + help="datatype to use", + choices=["float32", "float64"], + required=False) args = vars(parser.parse_args()) parent_folder = pathlib.Path(__file__).parent.absolute() @@ -70,7 +77,7 @@ def run_benchmark(benchname, fname, preset, validate, repeat, timeout, args=(benchname, args["framework"], args["preset"], args["validate"], args["repeat"], args["timeout"], args["ignore_errors"], args["save_strict_sdfg"], - args["load_strict_sdfg"])) + args["load_strict_sdfg"], args["datatype"])) p.start() p.join() exit_code = p.exitcode