Expression Evaluation (On-Disk)

In the Expression Evaluation (On-Disk) we evaluated some expression for different iarrays configurations. In this benchmark we will compare the performance of ironArray with the optimized iarray configuration with the performance from Zarr, HDF5 and TileDB. It is important to stress that we are using defaults for every library, which are typically fine tuned for general performance; this is a pretty fair way to compare them without spending long time optimizing for every one.

To avoid freeing memory while measuring memory consumption and therefore measuring negative values for memory consumption, we will compute the mean expression and the transcendental expression for each library and then delete the arrays created. At the end we will compare the different libraries for each expression evaluation.

[1]:
%load_ext memprofiler
%matplotlib inline
import numpy as np
import iarray as ia
import zarr
import dask
import dask.array as da
import os
import tiledb
import h5py
import hdf5plugin as h5plugin
from numcodecs import blosc, Blosc

ironArray

As in latter benchmarks, we are going to evict the files to better assess an out of core evaluation:

[2]:
!vmtouch -e ../tutorials/precip1.iarr ../tutorials/precip2.iarr ../tutorials/precip3.iarr
           Files: 651
     Directories: 3
   Evicted Pages: 212414 (829M)
         Elapsed: 0.046294 seconds
[3]:
%%mprof_run
precip1 = ia.open("../tutorials/precip1.iarr")
precip2 = ia.open("../tutorials/precip2.iarr")
precip3 = ia.open("../tutorials/precip3.iarr")
shape = precip1.shape
print(precip1.info)
type   : IArray
shape  : (720, 721, 1440)
chunks : (128, 128, 256)
blocks : (16, 32, 64)
cratio : 12.68

memprofiler: used 0.55 MiB RAM (peak of 0.55 MiB) in 0.0044 s, total RAM usage 246.36 MiB

Mean

[4]:
precip_mean = (precip1 + precip2 + precip3) / 3
[5]:
%%mprof_run 1.iarray::mean
res_precip_mean = precip_mean.eval(urlpath="mean-3m.iarr", mode="w")
res_shape = res_precip_mean.shape
res_dtype = res_precip_mean.dtype
del res_precip_mean
memprofiler: used 60.48 MiB RAM (peak of 74.00 MiB) in 2.3689 s, total RAM usage 306.86 MiB

Now we will favor the speed and then the compression ratio to see the performance from each ia.Favor:

[6]:
%%mprof_run 1.iarray::mean_speed
with ia.config(favor=ia.Favor.SPEED):
    precip_mean_speed = precip_mean.eval(urlpath="mean_sp-3m.iarr", mode="w")
del precip_mean_speed
memprofiler: used -7.62 MiB RAM (peak of 31.66 MiB) in 2.3951 s, total RAM usage 299.27 MiB
[7]:
%%mprof_run 1.iarray::mean_cratio
with ia.config(favor=ia.Favor.CRATIO):
    precip_mean_cratio = precip_mean.eval(urlpath="mean_cr-3m.iarr", mode="w")
del precip_mean_cratio
memprofiler: used 64.63 MiB RAM (peak of 82.93 MiB) in 3.3780 s, total RAM usage 363.90 MiB

Transcendental expressions

[8]:
precip_trans_expr = ia.tan(precip1) * (ia.sin(precip1) * ia.sin(precip2) + ia.cos(precip2)) + ia.sqrt(precip3) * 2
[9]:
%%mprof_run 1.iarray::trans

precip_trans = precip_trans_expr.eval(urlpath="trans-3m.iarr", mode="w")
del precip_trans
memprofiler: used 2.38 MiB RAM (peak of 33.84 MiB) in 2.9887 s, total RAM usage 366.37 MiB
[10]:
!vmtouch -e ../tutorials/precip1.iarr ../tutorials/precip2.iarr ../tutorials/precip3.iarr
           Files: 651
     Directories: 3
   Evicted Pages: 212414 (829M)
         Elapsed: 0.047194 seconds
[11]:
%%mprof_run 1.iarray::trans_speed
with ia.config(favor=ia.Favor.SPEED):
    precip_trans_speed = precip_trans_expr.eval(urlpath="trans_sp-3m.iarr", mode="w")
del precip_trans_speed
memprofiler: used 0.36 MiB RAM (peak of 31.06 MiB) in 2.6237 s, total RAM usage 366.75 MiB
[12]:
!vmtouch -e ../tutorials/precip1.iarr ../tutorials/precip2.iarr ../tutorials/precip3.iarr
           Files: 651
     Directories: 3
   Evicted Pages: 212414 (829M)
         Elapsed: 0.046111 seconds
[13]:
%%mprof_run 1.iarray::trans_cratio
with ia.config(favor=ia.Favor.CRATIO):
    precip_trans_cratio = precip_trans_expr.eval(urlpath="trans_cr-3m.iarr", mode="w")
del precip_trans_cratio
memprofiler: used 0.45 MiB RAM (peak of 27.63 MiB) in 4.2515 s, total RAM usage 367.20 MiB

Zarr

[14]:
zarr_urlpath1 = "precip1-op.zarr"
zarr_urlpath2 = "precip2-op.zarr"
zarr_urlpath3 = "precip3-op.zarr"

if not os.path.exists(zarr_urlpath1):
    zarr_precip1 = zarr.empty(precip1.shape, dtype=precip1.dtype, store=zarr_urlpath1)
    precip1.copyto(zarr_precip1)
zarr_precip1 = zarr.open(zarr_urlpath1)
print(zarr_precip1.info)

if not os.path.exists(zarr_urlpath2):
    zarr_precip2 = zarr.empty(precip2.shape, dtype=precip2.dtype, store=zarr_urlpath2)
    precip2.copyto(zarr_precip2)
zarr_precip2 = zarr.open(zarr_urlpath2)

if not os.path.exists(zarr_urlpath3):
    zarr_precip3 = zarr.empty(precip3.shape, dtype=precip3.dtype, store=zarr_urlpath3)
    precip3.copyto(zarr_precip3)
zarr_precip3 = zarr.open(zarr_urlpath3)
Type               : zarr.core.Array
Data type          : float32
Shape              : (720, 721, 1440)
Chunk shape        : (45, 91, 180)
Order              : C
Read-only          : False
Compressor         : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store type         : zarr.storage.DirectoryStore
No. bytes          : 2990131200 (2.8G)
No. bytes stored   : 345247729 (329.3M)
Storage ratio      : 8.7
Chunks initialized : 1024/1024

[15]:
%%mprof_run
da_precip1 = da.from_zarr(zarr_precip1)
da_precip2 = da.from_zarr(zarr_precip2)
da_precip3 = da.from_zarr(zarr_precip3)
shape = zarr_precip1.shape
dtype = zarr_precip1.dtype
memprofiler: used 0.67 MiB RAM (peak of 0.67 MiB) in 0.0030 s, total RAM usage 368.00 MiB

Mean

[16]:
!vmtouch -e $zarr_urlpath1 $zarr_urlpath2 $zarr_urlpath3
           Files: 3075
     Directories: 3
   Evicted Pages: 251047 (980M)
         Elapsed: 0.055216 seconds
[17]:
%%mprof_run 2.zarr::mean

res_zarr = (da_precip1 + da_precip2 + da_precip3) / 3

zarr_precip_mean_disk = zarr.open("mean-3m.zarr", "w", shape=shape, dtype=dtype)
with dask.config.set(scheduler="threads"):
    da.to_zarr(res_zarr, zarr_precip_mean_disk)
memprofiler: used 641.35 MiB RAM (peak of 642.99 MiB) in 2.4998 s, total RAM usage 1009.38 MiB
[18]:
del res_zarr
del zarr_precip_mean_disk

Transcendental expressions

[19]:
!vmtouch -e $zarr_urlpath1 $zarr_urlpath2 $zarr_urlpath3
           Files: 3075
     Directories: 3
   Evicted Pages: 251047 (980M)
         Elapsed: 0.05133 seconds
[20]:
%%mprof_run 2.zarr::trans

res_zarr = np.tan(da_precip1) * (np.sin(da_precip1) * np.sin(da_precip2) + np.cos(da_precip2)) + np.sqrt(da_precip3) * 2
zarr_precip_trans = zarr.open("trans-3m.zarr", "w", shape=shape, dtype=dtype)
da.to_zarr(res_zarr, zarr_precip_trans)
memprofiler: used 638.39 MiB RAM (peak of 644.01 MiB) in 5.3157 s, total RAM usage 1647.77 MiB
[21]:
del zarr_precip1
del zarr_precip2
del zarr_precip3

del da_precip1
del da_precip2
del da_precip3

del res_zarr
del zarr_precip_trans

HDF5

[22]:
h5_urlpath1 = "precip1.hdf5"
h5_urlpath2 = "precip2.hdf5"
h5_urlpath3 = "precip3.hdf5"

if not os.path.exists(h5_urlpath1):
    with h5py.File(h5_urlpath1, "w") as f:
        h5_precip1 = f.create_dataset("h5_precip1", precip1.shape, dtype=precip1.dtype, **h5plugin.Blosc())
        precip1.copyto(h5_precip1)
h5_file1 = h5py.File(h5_urlpath1, "r")
precip_h5_1 = h5_file1['h5_precip1']
precip1_h5dask = da.from_array(precip_h5_1)

if not os.path.exists(h5_urlpath2):
    with h5py.File(h5_urlpath2, "w") as f:
        h5_precip2 = f.create_dataset("h5_precip2", precip2.shape, dtype=precip2.dtype, **h5plugin.Blosc())
        precip2.copyto(h5_precip2)
h5_file2 = h5py.File(h5_urlpath2, "r", driver='core', backing_store=False)
precip_h5_2 = h5_file2['h5_precip2']
precip2_h5dask = da.from_array(precip_h5_2)

if not os.path.exists(h5_urlpath3):
    with h5py.File(h5_urlpath3, "w") as f:
        h5_precip3 = f.create_dataset("h5_precip3", precip3.shape, dtype=precip3.dtype, **h5plugin.Blosc())
        precip3.copyto(h5_precip3)
h5_file3 = h5py.File(h5_urlpath3, "r", driver='core', backing_store=False)
precip_h5_3 = h5_file3['h5_precip3']
precip3_h5dask = da.from_array(precip_h5_3)

h5_mean_urlpath = "mean.hdf5"
ia.remove_urlpath(h5_mean_urlpath)

h5_trans_urlpath = "trans.hdf5"
ia.remove_urlpath(h5_trans_urlpath)

Mean

[23]:
!vmtouch -e $h5_urlpath1 $h5_urlpath2 $h5_urlpath3
           Files: 3
     Directories: 0
   Evicted Pages: 277335 (1G)
         Elapsed: 0.044729 seconds
[24]:
%%mprof_run 3.hdf5::mean

h5dask_mean = (precip1_h5dask + precip2_h5dask + precip3_h5dask) / 3

f = h5py.File(h5_mean_urlpath, "w")
h5_mean = f.create_dataset(name=h5_mean_urlpath, shape=res_shape, dtype=res_dtype, **h5plugin.Blosc())
h5dask_mean.to_hdf5(h5_mean_urlpath, 'x')
memprofiler: used 2.88 MiB RAM (peak of 7872.27 MiB) in 7.6562 s, total RAM usage 2368.70 MiB

Transcendental expressions

[25]:
!vmtouch -e $h5_urlpath1 $h5_urlpath2 $h5_urlpath3
           Files: 3
     Directories: 0
   Evicted Pages: 277335 (1G)
         Elapsed: 0.017109 seconds
[26]:
%%mprof_run 3.hdf5::trans

h5dask_trans = np.tan(precip1_h5dask) * (np.sin(precip1_h5dask) * np.sin(precip2_h5dask) + np.cos(precip2_h5dask)) + np.sqrt(precip3_h5dask) * 2

f = h5py.File(h5_trans_urlpath, "w")
h5_trans = f.create_dataset(name=h5_trans_urlpath, shape=res_shape, dtype=res_dtype, **h5plugin.Blosc())
h5dask_trans.to_hdf5(h5_trans_urlpath, 'x')
memprofiler: used 0.06 MiB RAM (peak of 12328.99 MiB) in 10.0241 s, total RAM usage 2368.76 MiB
[27]:
del precip_h5_1
del precip_h5_2
del precip_h5_3

del precip1_h5dask
del precip2_h5dask
del precip3_h5dask

del h5dask_mean
del h5_mean
del h5_trans
del h5dask_trans

TileDB

[28]:
shape = precip1.shape
chunks = precip1.chunks
adom = tiledb.Domain(
        tiledb.Dim(name="rows", domain=(0, shape[0] - 1), dtype=np.int32, tile=chunks[0]),
        tiledb.Dim(name="cols", domain=(0, shape[1] - 1), dtype=np.int32, tile=chunks[1]),
        tiledb.Dim(name="else", domain=(0, shape[2] - 1), dtype=np.int32, tile=chunks[2]),
    )

# The array will be dense with a single attribute "a" so each (i,j) cell can store an integer.
filters = tiledb.FilterList([tiledb.ByteShuffleFilter(), tiledb.LZ4Filter(5)])

aschema = tiledb.ArraySchema(
    domain=adom,  sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.float32, filters=filters)]
)

# Create the (empty) array on disk.
array_name = "precip_1_tiledb"

if not os.path.exists(array_name):
    tiledb.DenseArray.create(array_name, aschema)
    with tiledb.DenseArray(array_name, mode="w") as A:
        A[:] = precip1.data

bdom = tiledb.Domain(
        tiledb.Dim(name="rows", domain=(0, shape[0] - 1), dtype=np.int32, tile=chunks[0]),
        tiledb.Dim(name="cols", domain=(0, shape[1] - 1), dtype=np.int32, tile=chunks[1]),
        tiledb.Dim(name="else", domain=(0, shape[2] - 1), dtype=np.int32, tile=chunks[2]),
    )

# The array will be dense with a single attribute "a" so each (i,j) cell can store an integer.
bschema = tiledb.ArraySchema(
    domain=bdom,  sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.float32, filters=filters)]
)

# Create the (empty) array on disk.
array_nameb = "precip_2_tiledb"
if not os.path.exists(array_nameb):
    tiledb.DenseArray.create(array_nameb, bschema)
    with tiledb.DenseArray(array_nameb, mode="w") as B:
        B[:] = precip2.data


cdom = tiledb.Domain(
        tiledb.Dim(name="rows", domain=(0, shape[0] - 1), dtype=np.int32, tile=chunks[0]),
        tiledb.Dim(name="cols", domain=(0, shape[1] - 1), dtype=np.int32, tile=chunks[1]),
        tiledb.Dim(name="else", domain=(0, shape[2] - 1), dtype=np.int32, tile=chunks[2]),
    )

# The array will be dense with a single attribute "a" so each (i,j) cell can store an integer.
cschema = tiledb.ArraySchema(
    domain=cdom,  sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.float32, filters=filters)]
)

# Create the (empty) array on disk.
array_namec = "precip_3_tiledb"

if not os.path.exists(array_namec):
    tiledb.DenseArray.create(array_namec, cschema)
    with tiledb.DenseArray(array_namec, mode="w") as C:
        C[:] = precip3.data
[29]:
del precip1
del precip2
del precip3
[30]:
tilea = da.from_tiledb(array_name, attribute='a')
tileb = da.from_tiledb(array_nameb, attribute='a')
tilec = da.from_tiledb(array_namec, attribute='a')

Mean

[31]:
!vmtouch -e $array_name $array_nameb $array_namec
           Files: 15
     Directories: 12
   Evicted Pages: 275896 (1G)
         Elapsed: 0.023686 seconds
[32]:
# Run this once first because HDF5 uses too much memory and the OS is releasing it afterwards
res_dask_t = (tilea + tileb + tilec) / 3
res_dask_t.to_tiledb("res_tiledb")
del res_dask_t
tiledb.remove("res_tiledb")
[33]:
!vmtouch -e $array_name $array_nameb $array_namec
           Files: 15
     Directories: 12
   Evicted Pages: 275896 (1G)
         Elapsed: 0.049919 seconds
[34]:
%%mprof_run 4.tiledb::mean

res_dask_t = (tilea + tileb + tilec) / 3
res_dask_t.to_tiledb("res_tiledb")
memprofiler: used -27.82 MiB RAM (peak of 1393.72 MiB) in 8.9136 s, total RAM usage 1538.54 MiB
[35]:
del res_dask_t
tiledb.remove("res_tiledb")

Transcendental expressions

[36]:
!vmtouch -e $array_name $array_nameb $array_namec
           Files: 15
     Directories: 12
   Evicted Pages: 275896 (1G)
         Elapsed: 0.051312 seconds
[37]:
%%mprof_run 4.tiledb::trans

res_tiledb_trans = da.tan(tilea) * (da.sin(tilea) * da.sin(tileb) + da.cos(tileb)) + da.sqrt(tilec) * 2
res_tiledb_trans.to_tiledb("res_tiledb_trans")
memprofiler: used -38.10 MiB RAM (peak of 1959.82 MiB) in 11.9561 s, total RAM usage 1500.47 MiB
[38]:
del tilea
del tileb
del tilec

del res_tiledb_trans
tiledb.remove("res_tiledb_trans")

Resource consumption

As a summary, let’s do a plot on the speed and memory consumption for the different kind of computations. First for a regular mean:

[39]:
%mprof_plot .*::mean -t "Mean evaluation (disk)"