Skip to content

Commit

Permalink
Updated bench to modern API
Browse files Browse the repository at this point in the history
  • Loading branch information
FrancescAlted committed Nov 26, 2024
1 parent bb871b8 commit dd1de28
Show file tree
Hide file tree
Showing 9 changed files with 2,841 additions and 13,248 deletions.
3 changes: 2 additions & 1 deletion ANNOUNCE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,10 @@ or numexpr, you can find:
* High performance compression codecs, for integer, floating point, complex
booleans, string and structured data.
* Can perform many kind of math expressions, including reductions, indexing,
filters, User Defined Functions and more.
filters and more.
* Support for NumPy ufunc mechanism, allowing to mix and match NumPy and
Blosc2 computations.
* Excellent integration with Numba and Cython via User Defined Functions.
* Support for broadcasting operations. This is a powerful feature that
allows to perform operations on arrays of different shapes.
* Much better adherence to the NumPy casting rules than numexpr.
Expand Down
4 changes: 2 additions & 2 deletions bench/ndarray/broadcast_expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# LICENSE file in the root directory of this source tree)
#######################################################################

# Small benchmark for evaluating outer products using the broadcast feature
# Small benchmark for computing outer products using the broadcast feature

from time import time

Expand Down Expand Up @@ -38,7 +38,7 @@
t0 = time()
# d = c.compute(cparams=dict(codec=codec, clevel=5), chunks=(chunks, chunks), blocks=(blocks, blocks))
d = c.compute(cparams={"codec": codec, "clevel": 5})
print(f"Elapsed time (eval): {time() - t0:.6f} s")
print(f"Elapsed time (compute): {time() - t0:.2f}s")
# print(d[:])
print(f"cratio: {d.schunk.cratio:.2f}x")
# print(d.info)
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# LICENSE file in the root directory of this source tree)
#######################################################################

# Benchmark to evaluate expressions with numba and NDArray instances as operands.
# Benchmark to compute expressions with numba and NDArray instances as operands.
# As numba takes a while to compile the first time, we use cached functions, so
# make sure to run the script at least a couple of times.

Expand All @@ -18,19 +18,20 @@

import blosc2


shape = (5000, 10_000)
chunks = [500, 10_000]
blocks = [4, 10_000]
# Comment out the next line to force chunks and blocks above
# Comment out the next line to enforce chunks and blocks above
chunks, blocks = None, None
# Check with fast compression
cparams = {"clevel": 1, "codec": blosc2.Codec.BLOSCLZ}
cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.BLOSCLZ)

dtype = np.float32
rtol = 1e-6 if dtype == np.float32 else 1e-17
atol = 1e-6 if dtype == np.float32 else 1e-17

# Expression to evaluate
# Expression to compute
exprs = ("x + 1",
"x**2 + y**2 + 2 * x * y + 1",
"sin(x)**3 + cos(y)**2 + cos(x) * sin(y) + z",
Expand All @@ -50,7 +51,7 @@
print(f"shape: {x.shape}, chunks: {x.chunks}, blocks: {x.blocks}, cratio: {x.schunk.cratio:.2f}")


# Define the functions to evaluate the expressions
# Define the functions to compute the expressions
# First the pure numba+numpy version
@nb.jit(parallel=True, cache=True)
def func_numba(x, y, z, n):
Expand Down Expand Up @@ -93,9 +94,9 @@ def udf_numba(inputs, output, offset):


for n, expr in enumerate(exprs):
print(f"*** Evaluating expression: {expr} ...")
print(f"*** Computing expression: {expr} ...")

# Evaluate the expression with NumPy/numexpr
# Compute the expression with NumPy/numexpr
npexpr = expr.replace("sin", "np.sin").replace("cos", "np.cos")
t0 = time()
npres = eval(npexpr, vardict)
Expand All @@ -106,14 +107,14 @@ def udf_numba(inputs, output, offset):
ne.evaluate(expr, vardict, out=np.empty_like(npx))
print("NumExpr took %.3f s" % (time() - t0))

# Evaluate the expression with Blosc2+numexpr
# Compute the expression with Blosc2
blosc2.cparams_dflts["codec"] = blosc2.Codec.LZ4
blosc2.cparams_dflts["clevel"] = 5
b2expr = expr.replace("sin", "blosc2.sin").replace("cos", "blosc2.cos")
c = eval(b2expr, b2vardict)
t0 = time()
d = c.compute()
print("LazyExpr+eval took %.3f s" % (time() - t0))
print("LazyExpr+compute took %.3f s" % (time() - t0))
# Check
np.testing.assert_allclose(d[:], npres, rtol=rtol, atol=atol)
t0 = time()
Expand All @@ -134,28 +135,14 @@ def udf_numba(inputs, output, offset):
elif n == 2:
inputs = (x, y, z)

expr_ = blosc2.lazyudf(udf_numba, inputs, npx.dtype, chunked_eval=False,
chunks=chunks, blocks=blocks, cparams=cparams)
# actual benchmark
# eval() uses the udf function as a prefilter
t0 = time()
res = expr_.compute()
print("LazyUDF+eval took %.3f s" % (time() - t0))
np.testing.assert_allclose(res[...], npres, rtol=rtol, atol=atol)
# getitem uses the same compiled function but as a postfilter
t0 = time()
res = expr_[:]
print("LazyUDF+getitem took %.3f s" % (time() - t0))
np.testing.assert_allclose(res[...], npres, rtol=rtol, atol=atol)

expr_ = blosc2.lazyudf(udf_numba, inputs, npx.dtype, chunked_eval=True,
expr_ = blosc2.lazyudf(udf_numba, inputs, npx.dtype,
chunks=chunks, blocks=blocks, cparams=cparams)
# getitem but using chunked evaluation
t0 = time()
res = expr_.compute()
print("LazyUDF+chunked_eval took %.3f s" % (time() - t0))
print("LazyUDF+compute took %.3f s" % (time() - t0))
np.testing.assert_allclose(res[...], npres, rtol=rtol, atol=atol)
t0 = time()
res = expr_[:]
print("LazyUDF+getitem+chunked_eval took %.3f s" % (time() - t0))
print("LazyUDF+getitem took %.3f s" % (time() - t0))
np.testing.assert_allclose(res[...], npres, rtol=rtol, atol=atol)
Loading

0 comments on commit dd1de28

Please sign in to comment.