Slicing Datasets and Creating Views

In this benchmark we will use the data in the Slicing Datasets and Creating Views tutorial to measure and compare ironArray’s performance with Zarr, HDF5 and TileDB, another chunked and compressed array format. In the in memory scenario, we will also compare ironArray’s performance with NumPy.

Opening the Dataset

We will first open the dataset and use the optimized chunk and block shapes obtained in the corresponding tutorial.

[1]:
import zarr
import h5py
import hdf5plugin as h5plugin
import numpy as np
import iarray as ia
from numcodecs import blosc, Blosc
import random
import os
import tiledb

ia.set_config_defaults(btune=False, codec=ia.Codec.LZ4, filters=[ia.Filter.SHUFFLE])
blosc.set_nthreads(ia.get_config_defaults().nthreads)

%load_ext memprofiler
[2]:
%%time
ia_urlpath = "../tutorials/precip_slicing.iarr"
precip_ia = ia.open(ia_urlpath)
print(precip_ia.info)
type   : IArray
shape  : (720, 721, 1440)
chunks : (128, 128, 256)
blocks : (16, 32, 64)
cratio : 8.49

CPU times: user 143 µs, sys: 196 µs, total: 339 µs
Wall time: 544 µs
[3]:
chunks = (precip_ia.shape[0], 8, precip_ia.shape[2])
blocks = (64, 4, 64)

precip_ia_op = precip_ia.copy(chunks=chunks, blocks=blocks)
print(precip_ia_op.info)
type   : IArray
shape  : (720, 721, 1440)
chunks : (720, 8, 1440)
blocks : (64, 4, 64)
cratio : 8.79

We will perform some random slices from each dimension of the array:

[4]:
ind = random.sample(range(min(precip_ia.shape)), 10)

In-memory performance

Let’s start by measuring performance when data is in memory.

ironArray

[5]:
%%mprof_run 1.iarray::dim0

for i in ind:
    ia_0 = precip_ia_op[i, :, :].data
memprofiler: used -3.79 MiB RAM (peak of 1.84 MiB) in 0.1944 s, total RAM usage 625.98 MiB
[6]:
%%mprof_run 1.iarray::dim1

for i in ind:
    ia_1 = precip_ia_op[:, i, :].data
memprofiler: used 0.11 MiB RAM (peak of 17.84 MiB) in 0.0351 s, total RAM usage 626.09 MiB
[7]:
%%mprof_run 1.iarray::dim2

for i in ind:
    ia_2 = precip_ia_op[:, :, i].data
memprofiler: used -0.90 MiB RAM (peak of 3.51 MiB) in 0.2000 s, total RAM usage 625.40 MiB

Zarr

[8]:
compressor = Blosc(cname='lz4', clevel=5, shuffle=Blosc.SHUFFLE)
precip = precip_ia.data
precip_zarr = zarr.array(precip, chunks=chunks, compressor=compressor)
[9]:
%%mprof_run 2.zarr::dim0

for i in ind:
    zarr_0 = (precip_zarr[i, :, :])
memprofiler: used 4.09 MiB RAM (peak of 4.09 MiB) in 0.6405 s, total RAM usage 3843.06 MiB
[10]:
%%mprof_run 2.zarr::dim1

for i in ind:
    zarr_1 = precip_zarr[:, i, :]
memprofiler: used 3.23 MiB RAM (peak of 3.23 MiB) in 0.0288 s, total RAM usage 3846.29 MiB
[11]:
%%mprof_run 2.zarr::dim2

for i in ind:
    zarr_2 = precip_zarr[:, :, i]
memprofiler: used 1.99 MiB RAM (peak of 1.99 MiB) in 0.6719 s, total RAM usage 3848.29 MiB

HDF5

[12]:
h5_urlpath = "precip_slicing.hdf5"


if not os.path.exists(h5_urlpath):
    with h5py.File(h5_urlpath, "w") as f:
        h5_precip = f.create_dataset("h5_precip", precip_ia.shape, dtype=precip_ia.dtype,
                                     chunks=chunks, **h5plugin.Blosc())
        precip_ia.copyto(h5_precip)


h5_file = h5py.File(h5_urlpath, "r", driver='core', backing_store=False)
precip_h5 = h5_file['h5_precip']
[13]:
%%mprof_run 3.hdf5::dim0
for i in ind:
    h5_0 = precip_h5[i, :, :]
memprofiler: used 31.87 MiB RAM (peak of 31.87 MiB) in 6.1577 s, total RAM usage 4270.38 MiB
[14]:
%%mprof_run 3.hdf5::dim1
for i in ind:
    h5_1 = precip_h5[:, i, :]
memprofiler: used 0.00 MiB RAM (peak of 0.00 MiB) in 0.0940 s, total RAM usage 4270.39 MiB
[15]:
%%mprof_run 3.hdf5::dim2
for i in ind:
    h5_2 = precip_h5[:, :, i]
memprofiler: used 0.00 MiB RAM (peak of 0.00 MiB) in 6.3382 s, total RAM usage 4270.40 MiB

TileDB

[16]:
%%time
shape = precip_ia_op.shape
adom = tiledb.Domain(
        tiledb.Dim(name="rows", domain=(0, shape[0] - 1), dtype=np.int32, tile=chunks[0]),
        tiledb.Dim(name="cols", domain=(0, shape[1] - 1), dtype=np.int32, tile=chunks[1]),
        tiledb.Dim(name="else", domain=(0, shape[2] - 1), dtype=np.int32, tile=chunks[2]),
    )

filters = tiledb.FilterList([tiledb.ByteShuffleFilter(), tiledb.LZ4Filter(5)])
schema = tiledb.ArraySchema(
    domain=adom,  sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.float32, filters=filters)]
)

# Create the (empty) array on disk.
mem_tiledb_urlpath = "mem://mem_precip_tiledb"

tiledb.DenseArray.create(mem_tiledb_urlpath, schema)
with tiledb.DenseArray(mem_tiledb_urlpath, mode="w") as precip_tile:
    precip_tile[:] = precip_ia_op.data
CPU times: user 22.6 s, sys: 6.33 s, total: 29 s
Wall time: 2.31 s
[17]:
%%mprof_run 4.tiledb::dim0
with tiledb.DenseArray(mem_tiledb_urlpath, mode="r") as precip_tile:
    for i in ind:
        tiledb_0 = precip_tile[i, :, :]
memprofiler: used 1838.72 MiB RAM (peak of 3271.30 MiB) in 6.2459 s, total RAM usage 6541.73 MiB
[18]:
%%mprof_run 4.tiledb::dim1
with tiledb.DenseArray(mem_tiledb_urlpath, mode="r") as precip_tile:
    for i in ind:
        tiledb_1 = precip_tile[:, i, :]
memprofiler: used -1850.36 MiB RAM (peak of 0.00 MiB) in 0.1744 s, total RAM usage 4691.41 MiB
[19]:
%%mprof_run 4.tiledb::dim2
with tiledb.DenseArray(mem_tiledb_urlpath, mode="r") as precip_tile:
    for i in ind:
        tiledb_2 = precip_tile[:, :, i]
memprofiler: used 1969.28 MiB RAM (peak of 3329.96 MiB) in 7.1302 s, total RAM usage 6660.69 MiB

NumPy

[20]:
precip_np = precip_ia_op.data

NumPy uses views for slices, so we are forcing a copy to compare apples with apples:

[21]:
%%mprof_run 5.numpy::dim0

for i in ind:
    np_0 = precip_np[i, :, :].copy()
memprofiler: used 0.00 MiB RAM (peak of 0.00 MiB) in 0.0076 s, total RAM usage 9513.91 MiB
[22]:
%%mprof_run 5.numpy::dim1

for i in ind:
    np_1 = precip_np[:, i, :].copy()
memprofiler: used 0.00 MiB RAM (peak of 0.00 MiB) in 0.0089 s, total RAM usage 9513.91 MiB
[23]:
%%mprof_run 5.numpy::dim2

for i in ind:
    np_2 = precip_np[:, :, i].copy()
memprofiler: used 0.00 MiB RAM (peak of 0.00 MiB) in 0.0317 s, total RAM usage 9513.93 MiB

Results

[24]:
np.testing.assert_almost_equal(ia_0, zarr_0)
np.testing.assert_almost_equal(ia_1, zarr_1)
np.testing.assert_almost_equal(ia_2, zarr_2)
np.testing.assert_almost_equal(ia_0, h5_0)
np.testing.assert_almost_equal(ia_1, h5_1)
np.testing.assert_almost_equal(ia_2, h5_2)
np.testing.assert_almost_equal(ia_0, np_0)
np.testing.assert_almost_equal(ia_1, np_1)
np.testing.assert_almost_equal(ia_2, np_2)
np.testing.assert_almost_equal(ia_0, tiledb_0["a"])
np.testing.assert_almost_equal(ia_1, tiledb_1["a"])
np.testing.assert_almost_equal(ia_2, tiledb_2["a"])

Now, since we can not control the Python memory manager, we are only going to analyze the execution times:

[25]:
%mprof_barplot -t "Slicing Performance (with an optimized dimension)" --variable time  .*zarr::.* .*hdf5::.* .*iarray::.* .*numpy::.* .*tiledb::.*