Skip to content
Open
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@ description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"astropy~=7.0.1",
"astropy~=7.2.0",
"cartopy~=0.24.1",
"cf-xarray~=0.10",
"cftime~=1.6.0",
"dask>=2024.12.0,<2025.4",
"matplotlib~=3.8",
"netcdf4==1.7.3",
"numcodecs>=0.13.0,<0.17",
"numcodecs-combinators[xarray]~=0.2.10",
"numcodecs-combinators[xarray]~=0.2.13",
"numcodecs-observers~=0.1.2",
"numcodecs-safeguards==0.1.0b2",
"numcodecs-wasm~=0.2.2",
"numcodecs-wasm-bit-round~=0.4.0",
"numcodecs-wasm-fixed-offset-scale~=0.4.0",
Expand All @@ -28,6 +29,7 @@ dependencies = [
"numcodecs-wasm-zfp~=0.6.0",
"numcodecs-wasm-zfp-classic~=0.4.0",
"numcodecs-wasm-zstd~=0.4.0",
"numcodecs-zero~=0.1.2",
"pandas~=2.2",
"scipy~=1.14",
"seaborn~=0.13.2",
Expand Down
14 changes: 14 additions & 0 deletions src/climatebenchpress/compressor/compressors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
"BitRound",
"BitRoundPco",
"Jpeg2000",
"SafeguardedBitRoundPco",
"SafeguardedSperr",
"SafeguardedSz3",
"SafeguardedZero",
"SafeguardedZeroDssim",
"SafeguardedZfpRound",
"Sperr",
"StochRound",
"StochRoundPco",
Expand All @@ -15,6 +21,14 @@
from .bitround import BitRound
from .bitround_pco import BitRoundPco
from .jpeg2000 import Jpeg2000
from .safeguarded import (
SafeguardedBitRoundPco,
SafeguardedSperr,
SafeguardedSz3,
SafeguardedZero,
SafeguardedZeroDssim,
SafeguardedZfpRound,
)
from .sperr import Sperr
from .stochround import StochRound
from .stochround_pco import StochRoundPco
Expand Down
24 changes: 24 additions & 0 deletions src/climatebenchpress/compressor/compressors/abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ def abs_bound_codec(
dtype: Optional[np.dtype] = None,
data_min: Optional[float] = None,
data_max: Optional[float] = None,
data_abs_min: Optional[float] = None,
data_abs_max: Optional[float] = None,
data_min_2d: Optional[np.ndarray] = None,
data_max_2d: Optional[np.ndarray] = None,
) -> Codec:
"""Create a codec with an absolute error bound."""
pass
Expand All @@ -100,6 +104,10 @@ def rel_bound_codec(
dtype: Optional[np.dtype] = None,
data_min: Optional[float] = None,
data_max: Optional[float] = None,
data_abs_min: Optional[float] = None,
data_abs_max: Optional[float] = None,
data_min_2d: Optional[np.ndarray] = None,
data_max_2d: Optional[np.ndarray] = None,
) -> Codec:
"""Create a codec with a relative error bound."""
pass
Expand All @@ -112,6 +120,8 @@ def build(
data_abs_max: dict[VariableName, float],
data_min: dict[VariableName, float],
data_max: dict[VariableName, float],
data_min_2d: dict[VariableName, np.ndarray],
data_max_2d: dict[VariableName, np.ndarray],
error_bounds: list[dict[VariableName, ErrorBound]],
) -> dict[VariantName, list[NamedPerVariableCodec]]:
"""
Expand All @@ -135,6 +145,12 @@ def build(
Dict mapping from variable name to minimum value for the variable.
data_max : dict[VariableName, float]
Dict mapping from variable name to maximum value for the variable.
data_min_2d : dict[VariableName, np.ndarray]
Dict mapping from variable name to per-lat-lon-slice minimum value for the
variable.
data_max_2d : dict[VariableName, np.ndarray]
Dict mapping from variable name to per-lat-lon-slice maximum value for the
variable.
error_bounds: list[ErrorBound]
List of error bounds to use for the compressor.

Expand Down Expand Up @@ -167,6 +183,10 @@ def build(
dtype=dtypes[var],
data_min=data_min[var],
data_max=data_max[var],
data_abs_min=data_abs_min[var],
data_abs_max=data_abs_max[var],
data_min_2d=data_min_2d[var],
data_max_2d=data_max_2d[var],
)
elif eb.rel_error is not None and cls.has_rel_error_impl:
new_codecs[var] = partial(
Expand All @@ -175,6 +195,10 @@ def build(
dtype=dtypes[var],
data_min=data_min[var],
data_max=data_max[var],
data_abs_min=data_abs_min[var],
data_abs_max=data_abs_max[var],
data_min_2d=data_min_2d[var],
data_max_2d=data_max_2d[var],
)
else:
# This should never happen as we have already transformed the error bounds.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
__all__ = [
"SafeguardedBitRoundPco",
"SafeguardedSperr",
"SafeguardedSz3",
"SafeguardedZero",
"SafeguardedZeroDssim",
"SafeguardedZfpRound",
]

from .bitround_pco import SafeguardedBitRoundPco
from .sperr import SafeguardedSperr
from .sz3 import SafeguardedSz3
from .zero import SafeguardedZero
from .zero_dssim import SafeguardedZeroDssim
from .zfp_round import SafeguardedZfpRound
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
__all__ = ["SafeguardedBitRoundPco"]


import numcodecs_safeguards
import numcodecs_wasm_bit_round
import numcodecs_wasm_pco

from ..abc import Compressor
from ..utils import compute_keepbits


class SafeguardedBitRoundPco(Compressor):
"""Safeguarded Bit Rounding + PCodec compressor.

This compressor first applies bit rounding to the data, which reduces the precision of the data
while preserving its overall structure. After that, it uses PCodec for further compression.
"""

name = "safeguarded-bitround-pco"
description = "Safeguarded(Bit Rounding + PCodec)"

@staticmethod
def abs_bound_codec(error_bound, *, dtype=None, data_abs_max=None, **kwargs):
assert dtype is not None, "dtype must be provided"
assert data_abs_max is not None, "data_abs_max must be provided"

# conservative abs->rel error bound transformation,
# same as convert_abs_error_to_rel_error
# so that we can inform the safeguards of the abs bound
keepbits = compute_keepbits(dtype, error_bound / data_abs_max)

return numcodecs_safeguards.SafeguardedCodec(
codec=numcodecs_wasm_bit_round.BitRound(keepbits=keepbits),
lossless=numcodecs_safeguards.lossless.Lossless(
for_codec=numcodecs_wasm_pco.Pco(
level=8,
mode="auto",
delta="auto",
paging="equal-pages-up-to",
)
),
safeguards=[
dict(kind="eb", type="abs", eb=error_bound, equal_nan=True),
],
)

@staticmethod
def rel_bound_codec(error_bound, *, dtype=None, **kwargs):
assert dtype is not None, "dtype must be provided"

keepbits = compute_keepbits(dtype, error_bound)

return numcodecs_safeguards.SafeguardedCodec(
codec=numcodecs_wasm_bit_round.BitRound(keepbits=keepbits),
lossless=numcodecs_safeguards.lossless.Lossless(
for_codec=numcodecs_wasm_pco.Pco(
level=8,
mode="auto",
delta="auto",
paging="equal-pages-up-to",
)
),
safeguards=[
dict(kind="eb", type="rel", eb=error_bound, equal_nan=True),
],
)
60 changes: 60 additions & 0 deletions src/climatebenchpress/compressor/compressors/safeguarded/sperr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
__all__ = ["SafeguardedSperr"]

import numcodecs
import numcodecs.abc
import numcodecs.compat
import numcodecs_safeguards
import numcodecs_wasm_sperr
import numpy as np
from numcodecs_combinators.stack import CodecStack

from ..abc import Compressor


class SafeguardedSperr(Compressor):
"""Safeguarded SPERR compressor."""

name = "safeguarded-sperr"
description = "Safeguarded(SPERR)"

@staticmethod
def abs_bound_codec(error_bound, **kwargs):
return numcodecs_safeguards.SafeguardedCodec(
codec=CodecStack(
NaNToMean(),
numcodecs_wasm_sperr.Sperr(mode="pwe", pwe=error_bound),
),
safeguards=[
dict(kind="eb", type="abs", eb=error_bound, equal_nan=True),
],
)

@staticmethod
def rel_bound_codec(error_bound, *, data_abs_min=None, **kwargs):
assert data_abs_min is not None, "data_abs_min must be provided"

return numcodecs_safeguards.SafeguardedCodec(
codec=CodecStack(
NaNToMean(),
# conservative rel->abs error bound transformation,
# same as convert_rel_error_to_abs_error
# so that we can inform the safeguards of the rel bound
numcodecs_wasm_sperr.Sperr(mode="pwe", pwe=error_bound * data_abs_min),
),
safeguards=[
dict(kind="eb", type="rel", eb=error_bound, equal_nan=True),
],
)


# inspired by H5Z-SPERR's treatment of NaN values:
# https://github.com/NCAR/H5Z-SPERR/blob/72ebcb00e382886c229c5ef5a7e237fe451d5fb8/src/h5z-sperr.c#L464-L473
# https://github.com/NCAR/H5Z-SPERR/blob/72ebcb00e382886c229c5ef5a7e237fe451d5fb8/src/h5zsperr_helper.cpp#L179-L212
class NaNToMean(numcodecs.abc.Codec):
codec_id = "nan-to-mean" # type: ignore

def encode(self, buf):
return np.nan_to_num(buf, nan=np.nanmean(buf), posinf=np.inf, neginf=-np.inf)

def decode(self, buf, out=None):
return numcodecs.compat.ndarray_copy(buf, out)
31 changes: 31 additions & 0 deletions src/climatebenchpress/compressor/compressors/safeguarded/sz3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
__all__ = ["SafeguardedSz3"]

import numcodecs_safeguards
import numcodecs_wasm_sz3

from ..abc import Compressor


class SafeguardedSz3(Compressor):
"""Safeguarded SZ3 compressor."""

name = "safeguarded-sz3"
description = "Safeguarded(SZ3)"

@staticmethod
def abs_bound_codec(error_bound, **kwargs):
return numcodecs_safeguards.SafeguardedCodec(
codec=numcodecs_wasm_sz3.Sz3(eb_mode="abs", eb_abs=error_bound),
safeguards=[
dict(kind="eb", type="abs", eb=error_bound, equal_nan=True),
],
)

@staticmethod
def rel_bound_codec(error_bound, **kwargs):
return numcodecs_safeguards.SafeguardedCodec(
codec=numcodecs_wasm_sz3.Sz3(eb_mode="rel", eb_rel=error_bound),
safeguards=[
dict(kind="eb", type="rel", eb=error_bound, equal_nan=True),
],
)
31 changes: 31 additions & 0 deletions src/climatebenchpress/compressor/compressors/safeguarded/zero.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
__all__ = ["SafeguardedZero"]

import numcodecs_safeguards
import numcodecs_zero

from ..abc import Compressor


class SafeguardedZero(Compressor):
"""Safeguarded all-zero compressor."""

name = "safeguarded-zero"
description = "Safeguarded(0)"

@staticmethod
def abs_bound_codec(error_bound, **kwargs):
return numcodecs_safeguards.SafeguardedCodec(
codec=numcodecs_zero.ZeroCodec(),
safeguards=[
dict(kind="eb", type="abs", eb=error_bound, equal_nan=True),
],
)

@staticmethod
def rel_bound_codec(error_bound, **kwargs):
return numcodecs_safeguards.SafeguardedCodec(
codec=numcodecs_zero.ZeroCodec(),
safeguards=[
dict(kind="eb", type="rel", eb=error_bound, equal_nan=True),
],
)
Loading