Reductions in memory

In this benchmark we will compare ironArray’s performance with Zarr, HDF5, Numpy and TileDB. We will work with the data used in the Reductions tutorial with the default chunk shapes for every library and compute the same reduction for every case with in memory operands. It is important to stress that we are using defaults for every library, which are typically fine tuned for general performance; this is a pretty fair way to compare them without spending long time optimizing for every one.

Let’s go:

[1]:
%load_ext memprofiler

import numpy as np
import iarray as ia
import os
import zarr
import dask.array as da
import h5py
import hdf5plugin as h5plugin
from numcodecs import Blosc
import tiledb
[2]:
!du -sh ../tutorials/precip-3m.iarr
809M    ../tutorials/precip-3m.iarr

ironArray

[3]:
%%time

ia_precip = ia.load("../tutorials/precip-3m.iarr")
print(ia_precip.shape)
print("cratio: ", round(ia_precip.cratio, 2))
chunks = ia_precip.chunks
print(ia_precip.info)
(3, 720, 721, 1440)
cratio:  12.82
type   : IArray
shape  : (3, 720, 721, 1440)
chunks : (1, 128, 128, 256)
blocks : (1, 16, 32, 64)
cratio : 12.82

CPU times: user 3.17 ms, sys: 230 ms, total: 233 ms
Wall time: 232 ms
[4]:
%%mprof_run 1.iarray::mean_memory
ia_reduc = ia.mean(ia_precip, axis=(3, 2, 0)).data
memprofiler: used 35.37 MiB RAM (peak of 35.37 MiB) in 0.6422 s, total RAM usage 1067.45 MiB

Zarr

[5]:
precip_zarr = zarr.array(ia_precip.data)
print(precip_zarr.info)
precip_zdask = da.from_zarr(precip_zarr)
Type               : zarr.core.Array
Data type          : float32
Shape              : (3, 720, 721, 1440)
Chunk shape        : (1, 90, 91, 180)
Order              : C
Read-only          : False
Compressor         : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store type         : builtins.dict
No. bytes          : 8970393600 (8.4G)
No. bytes stored   : 1021932468 (974.6M)
Storage ratio      : 8.8
Chunks initialized : 1536/1536

[6]:
%%mprof_run 2.zarr::mean_memory
zdask_reduc = da.mean(precip_zdask, axis=(3, 2, 0))
zarr_reduc = zarr.create(shape=ia_reduc.shape, dtype=ia_reduc.dtype)
da.to_zarr(zdask_reduc, zarr_reduc)
memprofiler: used 896.61 MiB RAM (peak of 897.04 MiB) in 1.2559 s, total RAM usage 2949.57 MiB
[7]:
np.testing.assert_almost_equal(zarr_reduc, ia_reduc)
del precip_zdask
del zdask_reduc
del zarr_reduc

HDF5

[8]:
h5_urlpath = "precip-3m.hdf5"

if not os.path.exists(h5_urlpath):
    with h5py.File(h5_urlpath, "w") as f:
        h5_precip = f.create_dataset("h5_precip", ia_precip.shape, dtype=ia_precip.dtype, **h5plugin.Blosc())
        ia_precip.copyto(h5_precip)


h5_file = h5py.File(h5_urlpath, "r", driver='core', backing_store=False)
precip_h5 = h5_file['h5_precip']


precip_h5dask = da.from_array(precip_h5)

h5_reduc_urlpath = "reduc.hdf5"
ia.remove_urlpath(h5_reduc_urlpath)
[9]:
%%mprof_run 3.hdf5::mean_memory

h5dask_reduc = da.mean(precip_h5dask, axis=(3, 2, 0))

f = h5py.File(h5_reduc_urlpath, "w", driver='core', backing_store=False)
h5_reduc = f.create_dataset(name=h5_reduc_urlpath, shape=ia_reduc.shape, dtype=ia_reduc.dtype, **h5plugin.Blosc())
da.to_hdf5(h5_reduc_urlpath, '/x', h5dask_reduc)
memprofiler: used 272.57 MiB RAM (peak of 2993.96 MiB) in 3.6537 s, total RAM usage 4306.12 MiB
[10]:
del h5dask_reduc
del precip_h5dask
del h5_reduc

TileDB

[11]:
# TileDB does not come with defaults for chunk shape (tiles), so using ironArray's one
shape = ia_precip.shape
adom = tiledb.Domain(
        tiledb.Dim(name="rows", domain=(0, shape[0] - 1), dtype=np.int32, tile=chunks[0]),
        tiledb.Dim(name="cols", domain=(0, shape[1] - 1), dtype=np.int32, tile=chunks[1]),
        tiledb.Dim(name="else", domain=(0, shape[2] - 1), dtype=np.int32, tile=chunks[2]),
        tiledb.Dim(name="else2", domain=(0, shape[3] - 1), dtype=np.int32, tile=chunks[3]),
    )

filters = tiledb.FilterList([tiledb.ByteShuffleFilter(), tiledb.LZ4Filter(5)])
aschema = tiledb.ArraySchema(
    domain=adom,  sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.float32, filters=filters)]
)

# Create the (empty) array on disk.
tiledb_name = "mem://precip-3m-optimal.tiledb"
if not os.path.exists(tiledb_name):
    tiledb.DenseArray.create(tiledb_name, aschema)
    with tiledb.DenseArray(tiledb_name, mode="w") as A_opt:
        A_opt[:] = ia_precip.data

dask_tiledb = da.from_tiledb(tiledb_name, attribute='a')
[12]:
%%mprof_run 4.tiledb::mean_memory
res_dask_opt = da.mean(dask_tiledb, axis=(3,2,0))
res_dask_opt.to_tiledb("mem://res_tiledb")
memprofiler: used 507.21 MiB RAM (peak of 519.75 MiB) in 5.7167 s, total RAM usage 5545.39 MiB
[13]:
del res_dask_opt
del dask_tiledb
tiledb.remove(tiledb_name)
tiledb.remove("mem://res_tiledb")

NumPy

Let’s do the same computation with NumPy. First we will get the NumPy array from the ironArray one:

[14]:
%%time
precip = ia_precip.data
CPU times: user 19.4 s, sys: 3.28 s, total: 22.7 s
Wall time: 6.69 s

and now the actual reduction:

[15]:
%%mprof_run 5.numpy::mean_memory
np_reduc = np.mean(precip, axis=(3, 2, 0))
memprofiler: used 0.01 MiB RAM (peak of 0.01 MiB) in 0.4715 s, total RAM usage 13037.74 MiB
[16]:
np.testing.assert_almost_equal(ia_reduc, np_reduc)
del precip
del np_reduc

Results

Finally, we will do a plot of the memory and time consumption with each different library.

[17]:
%mprof_plot .*::mean