Source code for xdas.io.utils
"""
HDF5 utility functions for compressing datasets.
Preserves file structure and metadata during compression.
"""
import h5py
import hdf5plugin # noqa
[docs]
def compress(src_path: str, dst_path: str, dataset_location: str, encoding: dict):
"""
Compress a specific dataset in an HDF5 file while preserving the rest of the file structure and metadata.
Parameters
----------
src_path : str
Path to the original .hdf5 file.
dst_path : str
Path to save the compressed .hdf5 file.
dataset_location : str
Path to the dataset to compress inside the HDF5 file.
encoding : dict
Dictionary of encoding options for the dataset.
Should contain the following keys:
- 'compression': the compression algorithm to use and its parameters, part of the hdf5plugin library
- 'chunks': the chunk size for the dataset, should be a tuple of integers, default to False for no chunking
"""
if "chunks" in encoding.keys() and not encoding["chunks"]:
encoding.pop("chunks")
with h5py.File(src_path, "r") as src_file, h5py.File(dst_path, "w") as dst_file:
dataset_name = "/" + dataset_location.lstrip("/")
def _copy(src_group, dst_group, current_path):
# Copy group attributes
dst_group.attrs.update(src_group.attrs)
for name, obj in src_group.items():
obj_path = current_path.rstrip("/") + "/" + name
# Compress the chosen dataset
if obj_path == dataset_name:
data = src_file[dataset_name][()]
ds = dst_group.create_dataset(name, data=data, **encoding)
for key, val in src_file[dataset_name].attrs.items():
ds.attrs[key] = val
# Copy the group
elif isinstance(obj, h5py.Group):
grp = dst_group.create_group(name)
_copy(obj, grp, obj_path)
# Copy the rest
else:
src_group.file.copy(obj, dst_group, name=name)
_copy(src_file, dst_file, "/")