Expression Evaluation (In-Memory)

In this bench we will evaluate some expressions in memory for ironArray, NumPy, Zarr, Numba, HDF5 and TileDB. To do that, we will use the same data that in the Expression evaluation in memory tutorial. It is important to stress that we are using defaults for every library, which are typically fine tuned for general performance; this is a pretty fair way to compare them without spending long time optimizing for every one.

[1]:
%load_ext memprofiler
%matplotlib inline
import iarray as ia
import numpy as np
import zarr
import dask.array as da
import h5py
import hdf5plugin as h5plugin
import os
from numcodecs import Blosc
import tiledb
[2]:
!vmtouch -e "../tutorials/precip1.iarr" "../tutorials/precip2.iarr" "../tutorials/precip3.iarr"
           Files: 3
     Directories: 0
   Evicted Pages: 215221 (840M)
         Elapsed: 0.007635 seconds
[3]:
%%mprof_run -i 0.1 load
precip1 = ia.load("../tutorials/precip1.iarr")
precip2 = ia.load("../tutorials/precip2.iarr")
precip3 = ia.load("../tutorials/precip3.iarr")
print(precip1.info)
type   : IArray
shape  : (720, 721, 1440)
chunks : (128, 128, 256)
blocks : (16, 32, 64)
cratio : 12.45

memprofiler: used 842.07 MiB RAM (peak of 842.07 MiB) in 0.5752 s, total RAM usage 1076.28 MiB

ironArray

Mean evaluation

[4]:
shape = precip1.shape
[5]:
precip_mean_expr = ia.expr_from_string("(p1 + p2 + p3) / 3", {'p1': precip1, 'p2': precip2, 'p3': precip3})
[6]:
%%mprof_run 1.iarray::mean
precip_mean = precip_mean_expr.eval()
precip_mean
[6]:
<IArray (720, 721, 1440) np.float32>
memprofiler: used 859.54 MiB RAM (peak of 859.54 MiB) in 0.3553 s, total RAM usage 1953.76 MiB

Now we will favor the speed and then the compression ratio to see the performance from each ia.Favor:

[7]:
precip_mean_expr_speed = ia.expr_from_string("(p1 + p2 + p3) / 3", {'p1': precip1, 'p2': precip2, 'p3': precip3}, favor=ia.Favor.SPEED)
[8]:
%%mprof_run 1.iarray::mean_speed
precip_mean_sp = precip_mean_expr_speed.eval()
precip_mean_sp
[8]:
<IArray (720, 721, 1440) np.float32>
memprofiler: used 835.46 MiB RAM (peak of 835.46 MiB) in 0.3252 s, total RAM usage 2789.81 MiB
[9]:
precip_mean_expr_cratio = ia.expr_from_string("(p1 + p2 + p3) / 3", {'p1': precip1, 'p2': precip2, 'p3': precip3}, favor=ia.Favor.CRATIO)
[10]:
%%mprof_run 1.iarray::mean_cratio
precip_mean_cr = precip_mean_expr_cratio.eval()
precip_mean_cr
[10]:
<IArray (720, 721, 1440) np.float32>
memprofiler: used 546.83 MiB RAM (peak of 546.83 MiB) in 1.3235 s, total RAM usage 3337.20 MiB
[11]:
del precip_mean_sp
del precip_mean_cr

Expressions with transcendental functions

As in the mean evaluation, we will use the different ia.Favor available and compare the performance.

[12]:
precip_trans_expr = ia.expr_from_string("(tan(p1) * (sin(p1) * sin(p2) + cos(p2)) + sqrt(p3) * 2)", {'p1': precip1, 'p2': precip2, 'p3': precip3})
[13]:
%%mprof_run 1.iarray::trans
precip_trans = precip_trans_expr.eval()
precip_trans
[13]:
<IArray (720, 721, 1440) np.float32>
memprofiler: used 772.68 MiB RAM (peak of 772.68 MiB) in 0.6020 s, total RAM usage 4110.60 MiB
[14]:
precip_trans_expr_speed = ia.expr_from_string("(tan(p1) * (sin(p1) * sin(p2) + cos(p2)) + sqrt(p3) * 2)", {'p1': precip1, 'p2': precip2, 'p3': precip3}, favor=ia.Favor.SPEED)
[15]:
%%mprof_run 1.iarray::trans_speed
precip_trans_speed = precip_trans_expr_speed.eval()
precip_trans_speed
[15]:
<IArray (720, 721, 1440) np.float32>
memprofiler: used 751.26 MiB RAM (peak of 751.26 MiB) in 0.5916 s, total RAM usage 4862.46 MiB
[16]:
precip_trans_expr_cratio = ia.expr_from_string("(tan(p1) * (sin(p1) * sin(p2) + cos(p2)) + sqrt(p3) * 2)", {'p1': precip1, 'p2': precip2, 'p3': precip3}, favor=ia.Favor.CRATIO)
[17]:
%%mprof_run 1.iarray::trans_cratio
precip_trans_cratio = precip_trans_expr_cratio.eval()
precip_trans_cratio
[17]:
<IArray (720, 721, 1440) np.float32>
memprofiler: used 559.39 MiB RAM (peak of 559.39 MiB) in 1.3969 s, total RAM usage 5422.46 MiB
[18]:
del precip_trans_speed
del precip_trans_cratio

Zarr

Mean evaluation

[19]:
%%time
zarr_precip1 = zarr.array(precip1.data)
zarr_precip2 = zarr.array(precip2.data)
zarr_precip3 = zarr.array(precip3.data)
CPU times: user 24.5 s, sys: 3.6 s, total: 28.1 s
Wall time: 8.8 s
[20]:
zdask1 = da.from_zarr(zarr_precip1)
zdask2 = da.from_zarr(zarr_precip2)
zdask3 = da.from_zarr(zarr_precip3)
[21]:
%%mprof_run 2.zarr::mean

zdask_mean = (zdask1 + zdask2 + zdask3) / 3

zarr_mean = zarr.create(shape=precip_mean.shape, dtype=precip_mean.dtype)
da.to_zarr(zdask_mean, zarr_mean)
memprofiler: used 1642.80 MiB RAM (peak of 1644.06 MiB) in 2.0697 s, total RAM usage 8057.52 MiB
[22]:
del zarr_mean
del zdask_mean

Expressions with transcendental functions

[23]:
%%mprof_run 2.zarr::trans
zdask_trans = (da.tan(zdask1) * (da.sin(zdask1) * da.sin(zdask2) + da.cos(zdask2)) + da.sqrt(zdask3) * 2)
zarr_trans = zarr.create(shape=zarr_precip1.shape, dtype=precip_mean.dtype)
da.to_zarr(zdask_trans, zarr_trans)
memprofiler: used 775.35 MiB RAM (peak of 776.43 MiB) in 4.9666 s, total RAM usage 8367.94 MiB
[24]:
del zarr_precip1
del zarr_precip2
del zarr_precip3

del zdask1
del zdask2
del zdask3

del zdask_trans
del zarr_trans

HDF5

Mean evaluation

[25]:
h5_urlpath1 = "precip1.hdf5"
h5_urlpath2 = "precip2.hdf5"
h5_urlpath3 = "precip3.hdf5"

if not os.path.exists(h5_urlpath1):
    with h5py.File(h5_urlpath1, "w") as f:
        h5_precip1 = f.create_dataset("h5_precip1", precip1.shape, dtype=precip1.dtype, **h5plugin.Blosc())
        precip1.copyto(h5_precip1)
h5_file1 = h5py.File(h5_urlpath1, "r", driver='core', backing_store=False)
precip_h5_1 = h5_file1['h5_precip1']
precip1_h5dask = da.from_array(precip_h5_1)

if not os.path.exists(h5_urlpath2):
    with h5py.File(h5_urlpath2, "w") as f:
        h5_precip2 = f.create_dataset("h5_precip2", precip2.shape, dtype=precip2.dtype, **h5plugin.Blosc())
        precip2.copyto(h5_precip2)
h5_file2 = h5py.File(h5_urlpath2, "r", driver='core', backing_store=False)
precip_h5_2 = h5_file2['h5_precip2']
precip2_h5dask = da.from_array(precip_h5_2)

if not os.path.exists(h5_urlpath3):
    with h5py.File(h5_urlpath3, "w") as f:
        h5_precip3 = f.create_dataset("h5_precip3", precip3.shape, dtype=precip3.dtype, **h5plugin.Blosc())
        precip3.copyto(h5_precip3)
h5_file3 = h5py.File(h5_urlpath3, "r", driver='core', backing_store=False)
precip_h5_3 = h5_file3['h5_precip3']
precip3_h5dask = da.from_array(precip_h5_3)

h5_mean_urlpath = "mean.hdf5"
ia.remove_urlpath(h5_mean_urlpath)
[26]:
%%mprof_run 3.hdf5::mean

h5dask_mean = (precip1_h5dask + precip2_h5dask + precip3_h5dask) / 3

f = h5py.File(h5_mean_urlpath, "w", driver='core', backing_store=False)
h5_mean = f.create_dataset(name=h5_mean_urlpath, shape=precip_mean.shape, dtype=precip_mean.dtype, **h5plugin.Blosc())
da.to_hdf5(h5_mean_urlpath, '/x', h5dask_mean)
memprofiler: used 775.62 MiB RAM (peak of 3777.41 MiB) in 31.2644 s, total RAM usage 8984.12 MiB
[27]:
del h5dask_mean
del h5_mean

Expressions with transcendental functions

[28]:
p1_h5dask = precip1_h5dask
p2_h5dask = precip2_h5dask
p3_h5dask = precip3_h5dask

h5_trans_urlpath = "trans.hdf5"
ia.remove_urlpath(h5_trans_urlpath)
[29]:
%%mprof_run 3.hdf5::trans

h5dask_trans = (da.tan(p1_h5dask) * (da.sin(p1_h5dask) * da.sin(p2_h5dask) + da.cos(p2_h5dask)) + da.sqrt(p3_h5dask) * 2)

f = h5py.File(h5_trans_urlpath, "w", driver='core', backing_store=False)
h5_trans = f.create_dataset(name=h5_trans_urlpath, shape=precip_trans.shape, dtype=precip_trans.dtype,
                            **h5plugin.Blosc())
da.to_hdf5(h5_trans_urlpath, '/x', h5dask_trans)
memprofiler: used 519.29 MiB RAM (peak of 7718.27 MiB) in 32.1732 s, total RAM usage 9503.42 MiB
[30]:
del precip1_h5dask
del precip2_h5dask
del precip3_h5dask
del p1_h5dask
del p2_h5dask
del p3_h5dask

del h5dask_trans
del h5_trans

TileDB

[31]:
shape = precip1.shape
chunks = precip1.chunks
adom = tiledb.Domain(
        tiledb.Dim(name="rows", domain=(0, shape[0] - 1), dtype=np.int32, tile=chunks[0]),
        tiledb.Dim(name="cols", domain=(0, shape[1] - 1), dtype=np.int32, tile=chunks[1]),
        tiledb.Dim(name="else", domain=(0, shape[2] - 1), dtype=np.int32, tile=chunks[2]),
    )

# The array will be dense with a single attribute "a" so each (i,j) cell can store an integer.
filters = tiledb.FilterList([tiledb.ByteShuffleFilter(), tiledb.LZ4Filter(5)])

aschema = tiledb.ArraySchema(
    domain=adom,  sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.float32, filters=filters)]
)

# Create the (empty) array on disk.
array_name = "mem://precip_1_tiledb"

if not os.path.exists(array_name):
    tiledb.DenseArray.create(array_name, aschema)
    with tiledb.DenseArray(array_name, mode="w") as A:
        A[:] = precip1.data


tiling2 = precip2.chunks
bdom = tiledb.Domain(
        tiledb.Dim(name="rows", domain=(0, shape[0] - 1), dtype=np.int32, tile=chunks[0]),
        tiledb.Dim(name="cols", domain=(0, shape[1] - 1), dtype=np.int32, tile=chunks[1]),
        tiledb.Dim(name="else", domain=(0, shape[2] - 1), dtype=np.int32, tile=chunks[2]),
    )

# The array will be dense with a single attribute "a" so each (i,j) cell can store an integer.
bschema = tiledb.ArraySchema(
    domain=bdom,  sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.float32, filters=filters)]
)

# Create the (empty) array on disk.
array_nameb = "mem://precip_2_tiledb"
if not os.path.exists(array_nameb):
    tiledb.DenseArray.create(array_nameb, bschema)
    with tiledb.DenseArray(array_nameb, mode="w") as B:
        B[:] = precip2.data


tiling3 = precip3.chunks
cdom = tiledb.Domain(
        tiledb.Dim(name="rows", domain=(0, shape[0] - 1), dtype=np.int32, tile=chunks[0]),
        tiledb.Dim(name="cols", domain=(0, shape[1] - 1), dtype=np.int32, tile=chunks[1]),
        tiledb.Dim(name="else", domain=(0, shape[2] - 1), dtype=np.int32, tile=chunks[2]),
    )

# The array will be dense with a single attribute "a" so each (i,j) cell can store an integer.
cschema = tiledb.ArraySchema(
    domain=cdom,  sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.float32, filters=filters)]
)

# Create the (empty) array on disk.
array_namec = "mem://precip_3_tiledb"

if not os.path.exists(array_namec):
    tiledb.DenseArray.create(array_namec, cschema)
    with tiledb.DenseArray(array_namec, mode="w") as C:
        C[:] = precip3.data
[32]:
tilea = da.from_tiledb(array_name, attribute='a')
tileb = da.from_tiledb(array_nameb, attribute='a')
tilec = da.from_tiledb(array_namec, attribute='a')

Mean

[33]:
%%mprof_run 4.tiledb::mean

res_tdask = (tilea + tileb + tilec) / 3
res_tdask.to_tiledb("mem://res_tiledb")
memprofiler: used 3182.25 MiB RAM (peak of 3739.12 MiB) in 10.2894 s, total RAM usage 11278.86 MiB
[34]:
del res_tdask
tiledb.remove("mem://res_tiledb")

Expressions with transcendental functions

[35]:
%%mprof_run 4.tiledb::trans

res_tdask_trans = da.tan(tilea) * (da.sin(tilea) * da.sin(tileb) + da.cos(tileb)) + da.sqrt(tilec) * 2

res_tdask_trans.to_tiledb("mem://res_tiledb_trans")
memprofiler: used 906.37 MiB RAM (peak of 1927.88 MiB) in 13.9893 s, total RAM usage 11401.68 MiB
[36]:
del tilea
del tileb
del tilec

del res_tdask_trans
tiledb.remove("mem://res_tiledb_trans")
tiledb.remove(array_name)
tiledb.remove(array_nameb)
tiledb.remove(array_namec)

NumPy

Mean evaluation

[37]:
%%mprof_run
np_precip1 = precip1.data
np_precip2 = precip2.data
np_precip3 = precip3.data
memprofiler: used 8554.84 MiB RAM (peak of 8554.84 MiB) in 5.7420 s, total RAM usage 18157.45 MiB
[38]:
%%mprof_run 5.numpy::mean
np_mean = (np_precip1 + np_precip2 + np_precip3) / 3
memprofiler: used 2851.61 MiB RAM (peak of 2851.61 MiB) in 1.0326 s, total RAM usage 21009.08 MiB

Expressions with transcendental functions

[39]:
%%mprof_run 5.numpy::trans
p1_ = np_precip1
p2_ = np_precip2
p3_ = np_precip3
np_trans = (np.tan(p1_) * (np.sin(p1_) * np.sin(p2_) + np.cos(p2_)) + np.sqrt(p3_) * 2)
memprofiler: used 2851.62 MiB RAM (peak of 8554.75 MiB) in 11.2373 s, total RAM usage 23860.70 MiB
[40]:
del np_mean
del p1_
del p2_
del p3_
del np_trans

Numba

Numba is a well known library for performing efficient computations with NumPy arrays:

Mean evaluation

[41]:
import numba as nb
@nb.jit(nopython=True, parallel=True)
def mean_numba(x, y, z):
    out = np.empty(x.shape, x.dtype)
    for i in nb.prange(x.shape[0]):
        for j in nb.prange(x.shape[1]):
            for k in nb.prange(x.shape[2]):
                out[i, j, k] = (x[i, j, k] + y[i, j, k] + z[i, j, k]) / 3
    return out

In this example we are enforcing Numba to execute outside the Python interpreter (nopython=True) for leveraging the parallelism support in Numba (parallel=True). We will compute the mean twice because the first time Numba loses some time initializing buffers.

[42]:
%%mprof_run
nb_mean = mean_numba(np_precip1, np_precip2, np_precip3)
memprofiler: used 2872.29 MiB RAM (peak of 2872.29 MiB) in 0.9132 s, total RAM usage 21055.63 MiB
[43]:
# Avoid tracking memory consumption of this object
del nb_mean
[44]:
%%mprof_run 6.numba::mean
nb_mean = mean_numba(np_precip1, np_precip2, np_precip3)
memprofiler: used 2849.25 MiB RAM (peak of 2849.25 MiB) in 0.5464 s, total RAM usage 21056.66 MiB

Expressions with transcendental functions

[45]:
import numba as nb
@nb.jit(nopython=True, parallel=True)
def trans_numba(x, y, z):
    out = np.empty(x.shape, x.dtype)
    for i in nb.prange(x.shape[0]):
        for j in nb.prange(x.shape[1]):
            for k in nb.prange(x.shape[2]):
                out[i, j, k] = (np.tan(x[i, j, k]) * (np.sin(x[i, j, k]) * np.sin(y[i, j, k]) + np.cos(y[i, j, k])) + np.sqrt(z[i, j, k]) * 2)
    return out
[46]:
%%mprof_run
nb_trans = trans_numba(np_precip1, np_precip2, np_precip3)
memprofiler: used 2851.35 MiB RAM (peak of 2851.35 MiB) in 0.9022 s, total RAM usage 23910.56 MiB
[47]:
# Avoid tracking memory consumption of this object
del nb_trans
[48]:
%%mprof_run 6.numba::trans
nb_trans = trans_numba(np_precip1, np_precip2, np_precip3)
memprofiler: used 2849.12 MiB RAM (peak of 2849.12 MiB) in 0.5433 s, total RAM usage 23910.67 MiB
[49]:
del np_precip1
del np_precip2
del np_precip3

del nb_mean
del nb_trans

Resource consumption

[50]:
%mprof_plot .*::mean -t "Mean computation"