From 6dc32250402895cc05a6cab697c41a60d96a80de Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 5 Sep 2025 15:48:16 +0300 Subject: [PATCH 01/14] Draft safeguards compressor wrappers --- .../compressor/compressors/__init__.py | 10 ++++++ .../compressors/safeguards/__init__.py | 11 ++++++ .../compressors/safeguards/sperr.py | 22 ++++++++++++ .../compressor/compressors/safeguards/sz3.py | 31 +++++++++++++++++ .../compressor/compressors/safeguards/zero.py | 31 +++++++++++++++++ .../compressors/safeguards/zfp_round.py | 34 +++++++++++++++++++ 6 files changed, 139 insertions(+) create mode 100644 src/climatebenchpress/compressor/compressors/safeguards/__init__.py create mode 100644 src/climatebenchpress/compressor/compressors/safeguards/sperr.py create mode 100644 src/climatebenchpress/compressor/compressors/safeguards/sz3.py create mode 100644 src/climatebenchpress/compressor/compressors/safeguards/zero.py create mode 100644 src/climatebenchpress/compressor/compressors/safeguards/zfp_round.py diff --git a/src/climatebenchpress/compressor/compressors/__init__.py b/src/climatebenchpress/compressor/compressors/__init__.py index b523c75..31bcf95 100644 --- a/src/climatebenchpress/compressor/compressors/__init__.py +++ b/src/climatebenchpress/compressor/compressors/__init__.py @@ -2,6 +2,10 @@ "BitRound", "BitRoundPco", "Jpeg2000", + "SafeguardsSperr", + "SafeguardsSz3", + "SafeguardsZero", + "SafeguardsZfpRound", "Sperr", "StochRound", "StochRoundPco", @@ -15,6 +19,12 @@ from .bitround import BitRound from .bitround_pco import BitRoundPco from .jpeg2000 import Jpeg2000 +from .safeguards import ( + SafeguardsSperr, + SafeguardsSz3, + SafeguardsZero, + SafeguardsZfpRound, +) from .sperr import Sperr from .stochround import StochRound from .stochround_pco import StochRoundPco diff --git a/src/climatebenchpress/compressor/compressors/safeguards/__init__.py b/src/climatebenchpress/compressor/compressors/safeguards/__init__.py new file mode 100644 index 0000000..ffce923 --- /dev/null +++ b/src/climatebenchpress/compressor/compressors/safeguards/__init__.py @@ -0,0 +1,11 @@ +__all__ = [ + "SafeguardsSperr", + "SafeguardsSz3", + "SafeguardsZero", + "SafeguardsZfpRound", +] + +from .sperr import SafeguardsSperr +from .sz3 import SafeguardsSz3 +from .zero import SafeguardsZero +from .zfp_round import SafeguardsZfpRound diff --git a/src/climatebenchpress/compressor/compressors/safeguards/sperr.py b/src/climatebenchpress/compressor/compressors/safeguards/sperr.py new file mode 100644 index 0000000..c49183c --- /dev/null +++ b/src/climatebenchpress/compressor/compressors/safeguards/sperr.py @@ -0,0 +1,22 @@ +__all__ = ["SafeguardsSperr"] + +import numcodecs_safeguards +import numcodecs_wasm_sperr + +from ..abc import Compressor + + +class SafeguardsSperr(Compressor): + """Safeguarded SPERR compressor.""" + + name = "safeguards-sperr" + description = "Safeguards(SPERR)" + + @staticmethod + def abs_bound_codec(error_bound, **kwargs): + return numcodecs_safeguards.SafeguardsCodec( + codec=numcodecs_wasm_sperr.Sperr(mode="pwe", pwe=error_bound), + safeguards=[ + dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), + ], + ) diff --git a/src/climatebenchpress/compressor/compressors/safeguards/sz3.py b/src/climatebenchpress/compressor/compressors/safeguards/sz3.py new file mode 100644 index 0000000..480992c --- /dev/null +++ b/src/climatebenchpress/compressor/compressors/safeguards/sz3.py @@ -0,0 +1,31 @@ +__all__ = ["SafeguardsSz3"] + +import numcodecs_safeguards +import numcodecs_wasm_sz3 + +from ..abc import Compressor + + +class SafeguardsSz3(Compressor): + """Safeguarded SZ3 compressor.""" + + name = "safeguards-sz3" + description = "Safeguards(SZ3)" + + @staticmethod + def abs_bound_codec(error_bound, **kwargs): + return numcodecs_safeguards.SafeguardsCodec( + codec=numcodecs_wasm_sz3.Sz3(eb_mode="abs", eb_abs=error_bound), + safeguards=[ + dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), + ], + ) + + @staticmethod + def rel_bound_codec(error_bound, **kwargs): + return numcodecs_safeguards.SafeguardsCodec( + codec=numcodecs_wasm_sz3.Sz3(eb_mode="rel", eb_rel=error_bound), + safeguards=[ + dict(kind="eb", type="rel", eb=error_bound, equal_nan=True), + ], + ) diff --git a/src/climatebenchpress/compressor/compressors/safeguards/zero.py b/src/climatebenchpress/compressor/compressors/safeguards/zero.py new file mode 100644 index 0000000..d156c8a --- /dev/null +++ b/src/climatebenchpress/compressor/compressors/safeguards/zero.py @@ -0,0 +1,31 @@ +__all__ = ["SafeguardsZero"] + +import numcodecs_safeguards +import numcodecs_zero + +from ..abc import Compressor + + +class SafeguardsZero(Compressor): + """Safeguarded all-zero compressor.""" + + name = "safeguards-zero" + description = "Safeguards(0)" + + @staticmethod + def abs_bound_codec(error_bound, **kwargs): + return numcodecs_safeguards.SafeguardsCodec( + codec=numcodecs_zero.ZeroCodec(), + safeguards=[ + dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), + ], + ) + + @staticmethod + def rel_bound_codec(error_bound, **kwargs): + return numcodecs_safeguards.SafeguardsCodec( + codec=numcodecs_zero.ZeroCodec(), + safeguards=[ + dict(kind="eb", type="rel", eb=error_bound, equal_nan=True), + ], + ) diff --git a/src/climatebenchpress/compressor/compressors/safeguards/zfp_round.py b/src/climatebenchpress/compressor/compressors/safeguards/zfp_round.py new file mode 100644 index 0000000..81b8caa --- /dev/null +++ b/src/climatebenchpress/compressor/compressors/safeguards/zfp_round.py @@ -0,0 +1,34 @@ +__all__ = ["SafeguardsZfpRound"] + +import numcodecs_safeguards +import numcodecs_wasm_zfp + +from ..abc import Compressor + + +class SafeguardsZfpRound(Compressor): + """Safeguarded ZFP-ROUND compressor. + + This is an adjusted version of the ZFP compressor with an improved rounding mechanism + for the transform coefficients. + """ + + name = "safeguards-zfp-round" + description = "Safeguards(ZFP-ROUND)" + + # NOTE: + # ZFP mechanism for strictly supporting relative error bounds is to + # truncate the floating point bit representation and then use ZFP's lossless + # mode for compression. This is essentially equivalent to the BitRound + # compressors we are already implementing (with a difference what the lossless + # compression algorithm is). + # See https://zfp.readthedocs.io/en/release1.0.1/faq.html#q-relerr for more details. + + @staticmethod + def abs_bound_codec(error_bound, **kwargs): + return numcodecs_safeguards.SafeguardsCodec( + codec=numcodecs_wasm_zfp.Zfp(mode="fixed-accuracy", tolerance=error_bound), + safeguards=[ + dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), + ], + ) From 222618680c4881ebcf3417923bc41781f981dead Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 8 Sep 2025 10:57:26 +0300 Subject: [PATCH 02/14] Add a very simple, cheeky dSSIM safeguard --- .../compressor/compressors/__init__.py | 2 + .../compressors/safeguards/__init__.py | 2 + .../compressors/safeguards/zero_dssim.py | 79 +++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 src/climatebenchpress/compressor/compressors/safeguards/zero_dssim.py diff --git a/src/climatebenchpress/compressor/compressors/__init__.py b/src/climatebenchpress/compressor/compressors/__init__.py index 31bcf95..b86813c 100644 --- a/src/climatebenchpress/compressor/compressors/__init__.py +++ b/src/climatebenchpress/compressor/compressors/__init__.py @@ -5,6 +5,7 @@ "SafeguardsSperr", "SafeguardsSz3", "SafeguardsZero", + "SafeguardsZeroDssim", "SafeguardsZfpRound", "Sperr", "StochRound", @@ -23,6 +24,7 @@ SafeguardsSperr, SafeguardsSz3, SafeguardsZero, + SafeguardsZeroDssim, SafeguardsZfpRound, ) from .sperr import Sperr diff --git a/src/climatebenchpress/compressor/compressors/safeguards/__init__.py b/src/climatebenchpress/compressor/compressors/safeguards/__init__.py index ffce923..e75d448 100644 --- a/src/climatebenchpress/compressor/compressors/safeguards/__init__.py +++ b/src/climatebenchpress/compressor/compressors/safeguards/__init__.py @@ -2,10 +2,12 @@ "SafeguardsSperr", "SafeguardsSz3", "SafeguardsZero", + "SafeguardsZeroDssim", "SafeguardsZfpRound", ] from .sperr import SafeguardsSperr from .sz3 import SafeguardsSz3 from .zero import SafeguardsZero +from .zero_dssim import SafeguardsZeroDssim from .zfp_round import SafeguardsZfpRound diff --git a/src/climatebenchpress/compressor/compressors/safeguards/zero_dssim.py b/src/climatebenchpress/compressor/compressors/safeguards/zero_dssim.py new file mode 100644 index 0000000..d0d3c71 --- /dev/null +++ b/src/climatebenchpress/compressor/compressors/safeguards/zero_dssim.py @@ -0,0 +1,79 @@ +__all__ = ["SafeguardsZeroDssim"] + +import numcodecs_safeguards +import numcodecs_zero + +from ..abc import Compressor + + +class SafeguardsZeroDssim(Compressor): + """Safeguarded all-zero compressor that also safeguards the dSSIM score.""" + + name = "safeguards-zero-dssim" + description = "Safeguards(0, dSSIM)" + + @staticmethod + def abs_bound_codec(error_bound, **kwargs): + return numcodecs_safeguards.SafeguardsCodec( + codec=numcodecs_zero.ZeroCodec(), + safeguards=[ + dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), + # guarantee that the global minimum and maximum are preserved, + # which simplifies the rescaling + dict(kind="sign", offset="$x_min"), + dict(kind="sign", offset="$x_max"), + dict( + kind="qoi_eb_pw", + qoi=""" + # we guarantee that + # min(data) = min(corrected) and + # max(data) = max(corrected) + # with the sign safeguards above + v["smin"] = c["$x_min"]; + v["smax"] = c["$x_max"]; + v["r"] = v["smax"] - v["smin"]; + + # re-scale to [0-1] and quantize to 256 bins + v["sc_a2"] = round_ties_even(((x - v["smin"]) / v["r"]) * 255) / 255; + + # force the quantized value to stay the same + return v["sc_a2"]; + """, + type="abs", + eb=0, + ), + ], + ) + + @staticmethod + def rel_bound_codec(error_bound, **kwargs): + return numcodecs_safeguards.SafeguardsCodec( + codec=numcodecs_zero.ZeroCodec(), + safeguards=[ + dict(kind="eb", type="rel", eb=error_bound, equal_nan=True), + # guarantee that the global minimum and maximum are preserved, + # which simplifies the rescaling + dict(kind="sign", offset="$x_min"), + dict(kind="sign", offset="$x_max"), + dict( + kind="qoi_eb_pw", + qoi=""" + # we guarantee that + # min(data) = min(corrected) and + # max(data) = max(corrected) + # with the sign safeguards above + v["smin"] = c["$x_min"]; + v["smax"] = c["$x_max"]; + v["r"] = v["smax"] - v["smin"]; + + # re-scale to [0-1] and quantize to 256 bins + v["sc_a2"] = round_ties_even(((x - v["smin"]) / v["r"]) * 255) / 255; + + # force the quantized value to stay the same + return v["sc_a2"]; + """, + type="abs", + eb=0, + ), + ], + ) From 93c903fcf5036e48fe32f427de65642d8f47e185 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 11 Dec 2025 10:33:59 +0200 Subject: [PATCH 03/14] Add numcodecs-safeguards PyPi dependency --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 596e327..c654e8d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "numcodecs>=0.13.0,<0.17", "numcodecs-combinators[xarray]~=0.2.10", "numcodecs-observers~=0.1.2", + "numcodecs-safeguards==0.1.0a1", "numcodecs-wasm~=0.2.2", "numcodecs-wasm-bit-round~=0.4.0", "numcodecs-wasm-fixed-offset-scale~=0.4.0", @@ -28,6 +29,7 @@ dependencies = [ "numcodecs-wasm-zfp~=0.6.0", "numcodecs-wasm-zfp-classic~=0.4.0", "numcodecs-wasm-zstd~=0.4.0", + "numcodecs-zero~=0.1.0", "pandas~=2.2", "scipy~=1.14", "seaborn~=0.13.2", From 6cddcd892d1724c47a153e91ea42076ce7fa7000 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 23 Jan 2026 15:21:05 +0200 Subject: [PATCH 04/14] safeguard (conservative) relative error bounds --- pyproject.toml | 8 ++--- .../compressor/compressors/abc.py | 4 +++ .../compressors/safeguards/sperr.py | 35 ++++++++++++++++++- .../compressors/safeguards/zfp_round.py | 16 +++++++++ 4 files changed, 58 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c654e8d..73bf9a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,9 +13,9 @@ dependencies = [ "matplotlib~=3.8", "netcdf4==1.7.3", "numcodecs>=0.13.0,<0.17", - "numcodecs-combinators[xarray]~=0.2.10", + "numcodecs-combinators[xarray]~=0.2.13", "numcodecs-observers~=0.1.2", - "numcodecs-safeguards==0.1.0a1", + "numcodecs-safeguards==0.1.0b1", "numcodecs-wasm~=0.2.2", "numcodecs-wasm-bit-round~=0.4.0", "numcodecs-wasm-fixed-offset-scale~=0.4.0", @@ -24,12 +24,12 @@ dependencies = [ "numcodecs-wasm-round~=0.5.0", "numcodecs-wasm-sperr~=0.2.0", "numcodecs-wasm-stochastic-rounding~=0.2.0", - "numcodecs-wasm-sz3~=0.7.0", + "numcodecs-wasm-sz3~=0.8.0", "numcodecs-wasm-tthresh~=0.3.0", "numcodecs-wasm-zfp~=0.6.0", "numcodecs-wasm-zfp-classic~=0.4.0", "numcodecs-wasm-zstd~=0.4.0", - "numcodecs-zero~=0.1.0", + "numcodecs-zero~=0.1.2", "pandas~=2.2", "scipy~=1.14", "seaborn~=0.13.2", diff --git a/src/climatebenchpress/compressor/compressors/abc.py b/src/climatebenchpress/compressor/compressors/abc.py index c4dacb8..9029a83 100644 --- a/src/climatebenchpress/compressor/compressors/abc.py +++ b/src/climatebenchpress/compressor/compressors/abc.py @@ -167,6 +167,8 @@ def build( dtype=dtypes[var], data_min=data_min[var], data_max=data_max[var], + data_abs_min=data_abs_min[var], + data_abs_max=data_abs_max[var], ) elif eb.rel_error is not None and cls.has_rel_error_impl: new_codecs[var] = partial( @@ -175,6 +177,8 @@ def build( dtype=dtypes[var], data_min=data_min[var], data_max=data_max[var], + data_abs_min=data_abs_min[var], + data_abs_max=data_abs_max[var], ) else: # This should never happen as we have already transformed the error bounds. diff --git a/src/climatebenchpress/compressor/compressors/safeguards/sperr.py b/src/climatebenchpress/compressor/compressors/safeguards/sperr.py index c49183c..815290d 100644 --- a/src/climatebenchpress/compressor/compressors/safeguards/sperr.py +++ b/src/climatebenchpress/compressor/compressors/safeguards/sperr.py @@ -1,7 +1,10 @@ __all__ = ["SafeguardsSperr"] +import numcodecs import numcodecs_safeguards import numcodecs_wasm_sperr +import numpy as np +from numcodecs_combinators.stack import CodecStack from ..abc import Compressor @@ -15,8 +18,38 @@ class SafeguardsSperr(Compressor): @staticmethod def abs_bound_codec(error_bound, **kwargs): return numcodecs_safeguards.SafeguardsCodec( - codec=numcodecs_wasm_sperr.Sperr(mode="pwe", pwe=error_bound), + codec=CodecStack( + NaNToZero(), + numcodecs_wasm_sperr.Sperr(mode="pwe", pwe=error_bound), + ), safeguards=[ dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), ], ) + + @staticmethod + def rel_bound_codec(error_bound, *, data_abs_min=None, **kwargs): + assert data_abs_min is not None, "data_abs_min must be provided" + + return numcodecs_safeguards.SafeguardsCodec( + codec=CodecStack( + NaNToZero(), + # conservative rel->abs error bound transformation, + # same as convert_rel_error_to_abs_error + # so that we can inform the safeguards of the rel bound + numcodecs_wasm_sperr.Sperr(mode="pwe", pwe=error_bound * data_abs_min), + ), + safeguards=[ + dict(kind="eb", type="rel", eb=error_bound, equal_nan=True), + ], + ) + + +class NaNToZero(numcodecs.abc.Codec): + codec_id = "nan-to-zero" + + def encode(self, buf): + return np.nan_to_num(buf, nan=0, posinf=np.inf, neginf=-np.inf) + + def decode(self, buf, out=None): + return numcodecs.compat.ndarray_copy(buf, out) diff --git a/src/climatebenchpress/compressor/compressors/safeguards/zfp_round.py b/src/climatebenchpress/compressor/compressors/safeguards/zfp_round.py index 81b8caa..de28f08 100644 --- a/src/climatebenchpress/compressor/compressors/safeguards/zfp_round.py +++ b/src/climatebenchpress/compressor/compressors/safeguards/zfp_round.py @@ -32,3 +32,19 @@ def abs_bound_codec(error_bound, **kwargs): dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), ], ) + + @staticmethod + def rel_bound_codec(error_bound, *, data_abs_min=None, **kwargs): + assert data_abs_min is not None, "data_abs_min must be provided" + + return numcodecs_safeguards.SafeguardsCodec( + # conservative rel->abs error bound transformation, + # same as convert_rel_error_to_abs_error + # so that we can inform the safeguards of the rel bound + codec=numcodecs_wasm_zfp.Zfp( + mode="fixed-accuracy", tolerance=error_bound * data_abs_min + ), + safeguards=[ + dict(kind="eb", type="rel", eb=error_bound, equal_nan=True), + ], + ) From 916373f1bd2a559be4f00bda20a1f99208b2b153 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sat, 24 Jan 2026 00:11:59 +0200 Subject: [PATCH 05/14] fix mypy --- src/climatebenchpress/compressor/compressors/abc.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/climatebenchpress/compressor/compressors/abc.py b/src/climatebenchpress/compressor/compressors/abc.py index 9029a83..b72eeb9 100644 --- a/src/climatebenchpress/compressor/compressors/abc.py +++ b/src/climatebenchpress/compressor/compressors/abc.py @@ -88,6 +88,8 @@ def abs_bound_codec( dtype: Optional[np.dtype] = None, data_min: Optional[float] = None, data_max: Optional[float] = None, + data_abs_min: Optional[float] = None, + data_abs_max: Optional[float] = None, ) -> Codec: """Create a codec with an absolute error bound.""" pass @@ -100,6 +102,8 @@ def rel_bound_codec( dtype: Optional[np.dtype] = None, data_min: Optional[float] = None, data_max: Optional[float] = None, + data_abs_min: Optional[float] = None, + data_abs_max: Optional[float] = None, ) -> Codec: """Create a codec with a relative error bound.""" pass From cc764a291ecad04e416df26f386ac937f6d23bc9 Mon Sep 17 00:00:00 2001 From: Juniper Tyree <50025784+juntyr@users.noreply.github.com> Date: Sun, 25 Jan 2026 11:18:20 +0200 Subject: [PATCH 06/14] Update sz3 to v0.8.1 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 73bf9a3..2540959 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "numcodecs-wasm-round~=0.5.0", "numcodecs-wasm-sperr~=0.2.0", "numcodecs-wasm-stochastic-rounding~=0.2.0", - "numcodecs-wasm-sz3~=0.8.0", + "numcodecs-wasm-sz3~=0.8.1", "numcodecs-wasm-tthresh~=0.3.0", "numcodecs-wasm-zfp~=0.6.0", "numcodecs-wasm-zfp-classic~=0.4.0", From d879ca5a2b6c5d09d7da3b8e932ce256bbad9b4f Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 29 Jan 2026 08:27:19 +0200 Subject: [PATCH 07/14] Replace NaNs with the mean before SPERR --- .../compressor/compressors/safeguards/sperr.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/climatebenchpress/compressor/compressors/safeguards/sperr.py b/src/climatebenchpress/compressor/compressors/safeguards/sperr.py index 815290d..fd82304 100644 --- a/src/climatebenchpress/compressor/compressors/safeguards/sperr.py +++ b/src/climatebenchpress/compressor/compressors/safeguards/sperr.py @@ -19,7 +19,7 @@ class SafeguardsSperr(Compressor): def abs_bound_codec(error_bound, **kwargs): return numcodecs_safeguards.SafeguardsCodec( codec=CodecStack( - NaNToZero(), + NaNToMean(), numcodecs_wasm_sperr.Sperr(mode="pwe", pwe=error_bound), ), safeguards=[ @@ -33,7 +33,7 @@ def rel_bound_codec(error_bound, *, data_abs_min=None, **kwargs): return numcodecs_safeguards.SafeguardsCodec( codec=CodecStack( - NaNToZero(), + NaNToMean(), # conservative rel->abs error bound transformation, # same as convert_rel_error_to_abs_error # so that we can inform the safeguards of the rel bound @@ -45,11 +45,14 @@ def rel_bound_codec(error_bound, *, data_abs_min=None, **kwargs): ) -class NaNToZero(numcodecs.abc.Codec): - codec_id = "nan-to-zero" +# inspired by H5Z-SPERR's treatment of NaN values: +# https://github.com/NCAR/H5Z-SPERR/blob/72ebcb00e382886c229c5ef5a7e237fe451d5fb8/src/h5z-sperr.c#L464-L473 +# https://github.com/NCAR/H5Z-SPERR/blob/72ebcb00e382886c229c5ef5a7e237fe451d5fb8/src/h5zsperr_helper.cpp#L179-L212 +class NaNToMean(numcodecs.abc.Codec): + codec_id = "nan-to-mean" def encode(self, buf): - return np.nan_to_num(buf, nan=0, posinf=np.inf, neginf=-np.inf) + return np.nan_to_num(buf, nan=np.nanmean(buf), posinf=np.inf, neginf=-np.inf) def decode(self, buf, out=None): return numcodecs.compat.ndarray_copy(buf, out) From 4837f76a985c1c8fa7d29d65f2928e672f99924f Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 30 Jan 2026 21:51:54 +0200 Subject: [PATCH 08/14] some improvements --- pyproject.toml | 2 +- .../compressor/compressors/__init__.py | 22 +++++------ .../compressors/safeguarded/__init__.py | 13 +++++++ .../{safeguards => safeguarded}/sperr.py | 10 ++--- .../{safeguards => safeguarded}/sz3.py | 8 ++-- .../{safeguards => safeguarded}/zero.py | 8 ++-- .../{safeguards => safeguarded}/zero_dssim.py | 38 ++++++++++++------- .../{safeguards => safeguarded}/zfp_round.py | 16 +++++--- .../compressors/safeguards/__init__.py | 13 ------- 9 files changed, 72 insertions(+), 58 deletions(-) create mode 100644 src/climatebenchpress/compressor/compressors/safeguarded/__init__.py rename src/climatebenchpress/compressor/compressors/{safeguards => safeguarded}/sperr.py (90%) rename src/climatebenchpress/compressor/compressors/{safeguards => safeguarded}/sz3.py (85%) rename src/climatebenchpress/compressor/compressors/{safeguards => safeguarded}/zero.py (84%) rename src/climatebenchpress/compressor/compressors/{safeguards => safeguarded}/zero_dssim.py (65%) rename src/climatebenchpress/compressor/compressors/{safeguards => safeguarded}/zfp_round.py (78%) delete mode 100644 src/climatebenchpress/compressor/compressors/safeguards/__init__.py diff --git a/pyproject.toml b/pyproject.toml index 2540959..7a48e7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "Add your description here" readme = "README.md" requires-python = ">=3.12" dependencies = [ - "astropy~=7.0.1", + "astropy~=7.2.0", "cartopy~=0.24.1", "cf-xarray~=0.10", "cftime~=1.6.0", diff --git a/src/climatebenchpress/compressor/compressors/__init__.py b/src/climatebenchpress/compressor/compressors/__init__.py index b86813c..46638b3 100644 --- a/src/climatebenchpress/compressor/compressors/__init__.py +++ b/src/climatebenchpress/compressor/compressors/__init__.py @@ -2,11 +2,11 @@ "BitRound", "BitRoundPco", "Jpeg2000", - "SafeguardsSperr", - "SafeguardsSz3", - "SafeguardsZero", - "SafeguardsZeroDssim", - "SafeguardsZfpRound", + "SafeguardedSperr", + "SafeguardedSz3", + "SafeguardedZero", + "SafeguardedZeroDssim", + "SafeguardedZfpRound", "Sperr", "StochRound", "StochRoundPco", @@ -20,12 +20,12 @@ from .bitround import BitRound from .bitround_pco import BitRoundPco from .jpeg2000 import Jpeg2000 -from .safeguards import ( - SafeguardsSperr, - SafeguardsSz3, - SafeguardsZero, - SafeguardsZeroDssim, - SafeguardsZfpRound, +from .safeguarded import ( + SafeguardedSperr, + SafeguardedSz3, + SafeguardedZero, + SafeguardedZeroDssim, + SafeguardedZfpRound, ) from .sperr import Sperr from .stochround import StochRound diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/__init__.py b/src/climatebenchpress/compressor/compressors/safeguarded/__init__.py new file mode 100644 index 0000000..7660f38 --- /dev/null +++ b/src/climatebenchpress/compressor/compressors/safeguarded/__init__.py @@ -0,0 +1,13 @@ +__all__ = [ + "SafeguardedSperr", + "SafeguardedSz3", + "SafeguardedZero", + "SafeguardedZeroDssim", + "SafeguardedZfpRound", +] + +from .sperr import SafeguardedSperr +from .sz3 import SafeguardedSz3 +from .zero import SafeguardedZero +from .zero_dssim import SafeguardedZeroDssim +from .zfp_round import SafeguardedZfpRound diff --git a/src/climatebenchpress/compressor/compressors/safeguards/sperr.py b/src/climatebenchpress/compressor/compressors/safeguarded/sperr.py similarity index 90% rename from src/climatebenchpress/compressor/compressors/safeguards/sperr.py rename to src/climatebenchpress/compressor/compressors/safeguarded/sperr.py index fd82304..58b0a1c 100644 --- a/src/climatebenchpress/compressor/compressors/safeguards/sperr.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/sperr.py @@ -1,4 +1,4 @@ -__all__ = ["SafeguardsSperr"] +__all__ = ["SafeguardedSperr"] import numcodecs import numcodecs_safeguards @@ -9,11 +9,11 @@ from ..abc import Compressor -class SafeguardsSperr(Compressor): +class SafeguardedSperr(Compressor): """Safeguarded SPERR compressor.""" - name = "safeguards-sperr" - description = "Safeguards(SPERR)" + name = "safeguarded-sperr" + description = "Safeguarded(SPERR)" @staticmethod def abs_bound_codec(error_bound, **kwargs): @@ -49,7 +49,7 @@ def rel_bound_codec(error_bound, *, data_abs_min=None, **kwargs): # https://github.com/NCAR/H5Z-SPERR/blob/72ebcb00e382886c229c5ef5a7e237fe451d5fb8/src/h5z-sperr.c#L464-L473 # https://github.com/NCAR/H5Z-SPERR/blob/72ebcb00e382886c229c5ef5a7e237fe451d5fb8/src/h5zsperr_helper.cpp#L179-L212 class NaNToMean(numcodecs.abc.Codec): - codec_id = "nan-to-mean" + codec_id = "nan-to-mean" # type: ignore def encode(self, buf): return np.nan_to_num(buf, nan=np.nanmean(buf), posinf=np.inf, neginf=-np.inf) diff --git a/src/climatebenchpress/compressor/compressors/safeguards/sz3.py b/src/climatebenchpress/compressor/compressors/safeguarded/sz3.py similarity index 85% rename from src/climatebenchpress/compressor/compressors/safeguards/sz3.py rename to src/climatebenchpress/compressor/compressors/safeguarded/sz3.py index 480992c..8d89e03 100644 --- a/src/climatebenchpress/compressor/compressors/safeguards/sz3.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/sz3.py @@ -1,4 +1,4 @@ -__all__ = ["SafeguardsSz3"] +__all__ = ["SafeguardedSz3"] import numcodecs_safeguards import numcodecs_wasm_sz3 @@ -6,11 +6,11 @@ from ..abc import Compressor -class SafeguardsSz3(Compressor): +class SafeguardedSz3(Compressor): """Safeguarded SZ3 compressor.""" - name = "safeguards-sz3" - description = "Safeguards(SZ3)" + name = "safeguarded-sz3" + description = "Safeguarded(SZ3)" @staticmethod def abs_bound_codec(error_bound, **kwargs): diff --git a/src/climatebenchpress/compressor/compressors/safeguards/zero.py b/src/climatebenchpress/compressor/compressors/safeguarded/zero.py similarity index 84% rename from src/climatebenchpress/compressor/compressors/safeguards/zero.py rename to src/climatebenchpress/compressor/compressors/safeguarded/zero.py index d156c8a..c15919a 100644 --- a/src/climatebenchpress/compressor/compressors/safeguards/zero.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/zero.py @@ -1,4 +1,4 @@ -__all__ = ["SafeguardsZero"] +__all__ = ["SafeguardedZero"] import numcodecs_safeguards import numcodecs_zero @@ -6,11 +6,11 @@ from ..abc import Compressor -class SafeguardsZero(Compressor): +class SafeguardedZero(Compressor): """Safeguarded all-zero compressor.""" - name = "safeguards-zero" - description = "Safeguards(0)" + name = "safeguarded-zero" + description = "Safeguarded(0)" @staticmethod def abs_bound_codec(error_bound, **kwargs): diff --git a/src/climatebenchpress/compressor/compressors/safeguards/zero_dssim.py b/src/climatebenchpress/compressor/compressors/safeguarded/zero_dssim.py similarity index 65% rename from src/climatebenchpress/compressor/compressors/safeguards/zero_dssim.py rename to src/climatebenchpress/compressor/compressors/safeguarded/zero_dssim.py index d0d3c71..cf1fefc 100644 --- a/src/climatebenchpress/compressor/compressors/safeguards/zero_dssim.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/zero_dssim.py @@ -1,4 +1,4 @@ -__all__ = ["SafeguardsZeroDssim"] +__all__ = ["SafeguardedZeroDssim"] import numcodecs_safeguards import numcodecs_zero @@ -6,22 +6,25 @@ from ..abc import Compressor -class SafeguardsZeroDssim(Compressor): +class SafeguardedZeroDssim(Compressor): """Safeguarded all-zero compressor that also safeguards the dSSIM score.""" - name = "safeguards-zero-dssim" - description = "Safeguards(0, dSSIM)" + name = "safeguarded-zero-dssim" + description = "Safeguarded(0, dSSIM)" @staticmethod - def abs_bound_codec(error_bound, **kwargs): + def abs_bound_codec(error_bound, data_min=None, data_max=None, **kwargs): + assert data_min is not None, "data_min must be provided" + assert data_max is not None, "data_max must be provided" + return numcodecs_safeguards.SafeguardsCodec( codec=numcodecs_zero.ZeroCodec(), safeguards=[ dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), # guarantee that the global minimum and maximum are preserved, # which simplifies the rescaling - dict(kind="sign", offset="$x_min"), - dict(kind="sign", offset="$x_max"), + dict(kind="sign", offset="x_min"), + dict(kind="sign", offset="x_max"), dict( kind="qoi_eb_pw", qoi=""" @@ -29,8 +32,8 @@ def abs_bound_codec(error_bound, **kwargs): # min(data) = min(corrected) and # max(data) = max(corrected) # with the sign safeguards above - v["smin"] = c["$x_min"]; - v["smax"] = c["$x_max"]; + v["smin"] = c["x_min"]; + v["smax"] = c["x_max"]; v["r"] = v["smax"] - v["smin"]; # re-scale to [0-1] and quantize to 256 bins @@ -43,18 +46,23 @@ def abs_bound_codec(error_bound, **kwargs): eb=0, ), ], + # use data_min instead of $x_min to allow for chunking + fixed_constants=dict(x_min=data_min, x_max=data_max), ) @staticmethod - def rel_bound_codec(error_bound, **kwargs): + def rel_bound_codec(error_bound, data_min=None, data_max=None, **kwargs): + assert data_min is not None, "data_min must be provided" + assert data_max is not None, "data_max must be provided" + return numcodecs_safeguards.SafeguardsCodec( codec=numcodecs_zero.ZeroCodec(), safeguards=[ dict(kind="eb", type="rel", eb=error_bound, equal_nan=True), # guarantee that the global minimum and maximum are preserved, # which simplifies the rescaling - dict(kind="sign", offset="$x_min"), - dict(kind="sign", offset="$x_max"), + dict(kind="sign", offset="x_min"), + dict(kind="sign", offset="x_max"), dict( kind="qoi_eb_pw", qoi=""" @@ -62,8 +70,8 @@ def rel_bound_codec(error_bound, **kwargs): # min(data) = min(corrected) and # max(data) = max(corrected) # with the sign safeguards above - v["smin"] = c["$x_min"]; - v["smax"] = c["$x_max"]; + v["smin"] = c["x_min"]; + v["smax"] = c["x_max"]; v["r"] = v["smax"] - v["smin"]; # re-scale to [0-1] and quantize to 256 bins @@ -76,4 +84,6 @@ def rel_bound_codec(error_bound, **kwargs): eb=0, ), ], + # use data_min instead of $x_min to allow for chunking + fixed_constants=dict(x_min=data_min, x_max=data_max), ) diff --git a/src/climatebenchpress/compressor/compressors/safeguards/zfp_round.py b/src/climatebenchpress/compressor/compressors/safeguarded/zfp_round.py similarity index 78% rename from src/climatebenchpress/compressor/compressors/safeguards/zfp_round.py rename to src/climatebenchpress/compressor/compressors/safeguarded/zfp_round.py index de28f08..89f1641 100644 --- a/src/climatebenchpress/compressor/compressors/safeguards/zfp_round.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/zfp_round.py @@ -1,4 +1,4 @@ -__all__ = ["SafeguardsZfpRound"] +__all__ = ["SafeguardedZfpRound"] import numcodecs_safeguards import numcodecs_wasm_zfp @@ -6,15 +6,15 @@ from ..abc import Compressor -class SafeguardsZfpRound(Compressor): +class SafeguardedZfpRound(Compressor): """Safeguarded ZFP-ROUND compressor. This is an adjusted version of the ZFP compressor with an improved rounding mechanism for the transform coefficients. """ - name = "safeguards-zfp-round" - description = "Safeguards(ZFP-ROUND)" + name = "safeguarded-zfp-round" + description = "Safeguarded(ZFP-ROUND)" # NOTE: # ZFP mechanism for strictly supporting relative error bounds is to @@ -27,7 +27,9 @@ class SafeguardsZfpRound(Compressor): @staticmethod def abs_bound_codec(error_bound, **kwargs): return numcodecs_safeguards.SafeguardsCodec( - codec=numcodecs_wasm_zfp.Zfp(mode="fixed-accuracy", tolerance=error_bound), + codec=numcodecs_wasm_zfp.Zfp( + mode="fixed-accuracy", tolerance=error_bound, non_finite="allow-unsafe" + ), safeguards=[ dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), ], @@ -42,7 +44,9 @@ def rel_bound_codec(error_bound, *, data_abs_min=None, **kwargs): # same as convert_rel_error_to_abs_error # so that we can inform the safeguards of the rel bound codec=numcodecs_wasm_zfp.Zfp( - mode="fixed-accuracy", tolerance=error_bound * data_abs_min + mode="fixed-accuracy", + tolerance=error_bound * data_abs_min, + non_finite="allow-unsafe", ), safeguards=[ dict(kind="eb", type="rel", eb=error_bound, equal_nan=True), diff --git a/src/climatebenchpress/compressor/compressors/safeguards/__init__.py b/src/climatebenchpress/compressor/compressors/safeguards/__init__.py deleted file mode 100644 index e75d448..0000000 --- a/src/climatebenchpress/compressor/compressors/safeguards/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -__all__ = [ - "SafeguardsSperr", - "SafeguardsSz3", - "SafeguardsZero", - "SafeguardsZeroDssim", - "SafeguardsZfpRound", -] - -from .sperr import SafeguardsSperr -from .sz3 import SafeguardsSz3 -from .zero import SafeguardsZero -from .zero_dssim import SafeguardsZeroDssim -from .zfp_round import SafeguardsZfpRound From f302b39fccd6b68a8f48316db2176603dd616dcf Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sat, 31 Jan 2026 00:58:33 +0200 Subject: [PATCH 09/14] fix compression stats --- .../compressor/scripts/compress.py | 50 ++++++++++++------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/src/climatebenchpress/compressor/scripts/compress.py b/src/climatebenchpress/compressor/scripts/compress.py index 98b71e4..96fa51f 100644 --- a/src/climatebenchpress/compressor/scripts/compress.py +++ b/src/climatebenchpress/compressor/scripts/compress.py @@ -199,24 +199,38 @@ def compress_decompress( ) as codec_: variables[v] = codec_.encode_decode_data_array(ds[v]).compute() - measurements[v] = { - "encoded_bytes": sum( - b.post for b in nbytes.encode_sizes[HashableCodec(codec[-1])] - ), - "decoded_bytes": sum( - b.post for b in nbytes.decode_sizes[HashableCodec(codec[0])] - ), - "encode_timing": sum(t for ts in timing.encode_times.values() for t in ts), - "decode_timing": sum(t for ts in timing.decode_times.values() for t in ts), - "encode_instructions": sum( - i for is_ in instructions.encode_instructions.values() for i in is_ - ) - or None, - "decode_instructions": sum( - i for is_ in instructions.decode_instructions.values() for i in is_ - ) - or None, - } + cs = [c._codec for c in codec_.__iter__()] + + measurements[v] = { + # bytes measurements: only look at the first and last codec in + # the top level stack, which gives the total encoded and + # decoded sizes + "encoded_bytes": sum( + b.post for b in nbytes.encode_sizes[HashableCodec(cs[-1])] + ), + "decoded_bytes": sum( + b.post for b in nbytes.decode_sizes[HashableCodec(cs[0])] + ), + # time measurements: only sum over the top level stack members + # to avoid double counting from nested codec combinators + "encode_timing": sum( + t for c in cs for t in timing.encode_times[HashableCodec(c)] + ), + "decode_timing": sum( + t for c in cs for t in timing.decode_times[HashableCodec(c)] + ), + # encode instructions: sum over all codecs since WASM + # instruction counts are currently not aggregated in codec + # combinators + "encode_instructions": sum( + i for is_ in instructions.encode_instructions.values() for i in is_ + ) + or None, + "decode_instructions": sum( + i for is_ in instructions.decode_instructions.values() for i in is_ + ) + or None, + } return xr.Dataset(variables, coords=ds.coords, attrs=ds.attrs), measurements From 8f54e8e9f5e8063caa3ac12abc19c0d4a4059900 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sat, 31 Jan 2026 09:23:04 +0200 Subject: [PATCH 10/14] downgrade SZ3 to 0.7.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7a48e7f..3e4f33d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "numcodecs-wasm-round~=0.5.0", "numcodecs-wasm-sperr~=0.2.0", "numcodecs-wasm-stochastic-rounding~=0.2.0", - "numcodecs-wasm-sz3~=0.8.1", + "numcodecs-wasm-sz3~=0.7.0", "numcodecs-wasm-tthresh~=0.3.0", "numcodecs-wasm-zfp~=0.6.0", "numcodecs-wasm-zfp-classic~=0.4.0", From 3772370faa0b0a2be64e5f2297c3b43b01e589d0 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 2 Feb 2026 11:03:42 +0200 Subject: [PATCH 11/14] Add safeguarded BitRound+PCO --- .../compressor/compressors/__init__.py | 2 + .../compressors/safeguarded/__init__.py | 2 + .../compressors/safeguarded/bitround_pco.py | 66 +++++++++++++++++++ .../compressors/safeguarded/sperr.py | 2 + 4 files changed, 72 insertions(+) create mode 100644 src/climatebenchpress/compressor/compressors/safeguarded/bitround_pco.py diff --git a/src/climatebenchpress/compressor/compressors/__init__.py b/src/climatebenchpress/compressor/compressors/__init__.py index 46638b3..63da691 100644 --- a/src/climatebenchpress/compressor/compressors/__init__.py +++ b/src/climatebenchpress/compressor/compressors/__init__.py @@ -2,6 +2,7 @@ "BitRound", "BitRoundPco", "Jpeg2000", + "SafeguardedBitRoundPco", "SafeguardedSperr", "SafeguardedSz3", "SafeguardedZero", @@ -21,6 +22,7 @@ from .bitround_pco import BitRoundPco from .jpeg2000 import Jpeg2000 from .safeguarded import ( + SafeguardedBitRoundPco, SafeguardedSperr, SafeguardedSz3, SafeguardedZero, diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/__init__.py b/src/climatebenchpress/compressor/compressors/safeguarded/__init__.py index 7660f38..2fb5669 100644 --- a/src/climatebenchpress/compressor/compressors/safeguarded/__init__.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/__init__.py @@ -1,4 +1,5 @@ __all__ = [ + "SafeguardedBitRoundPco", "SafeguardedSperr", "SafeguardedSz3", "SafeguardedZero", @@ -6,6 +7,7 @@ "SafeguardedZfpRound", ] +from .bitround_pco import SafeguardedBitRoundPco from .sperr import SafeguardedSperr from .sz3 import SafeguardedSz3 from .zero import SafeguardedZero diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/bitround_pco.py b/src/climatebenchpress/compressor/compressors/safeguarded/bitround_pco.py new file mode 100644 index 0000000..b175fff --- /dev/null +++ b/src/climatebenchpress/compressor/compressors/safeguarded/bitround_pco.py @@ -0,0 +1,66 @@ +__all__ = ["SafeguardedBitRoundPco"] + + +import numcodecs_safeguards +import numcodecs_wasm_bit_round +import numcodecs_wasm_pco + +from ..abc import Compressor +from ..utils import compute_keepbits + + +class SafeguardedBitRoundPco(Compressor): + """Safeguarded Bit Rounding + PCodec compressor. + + This compressor first applies bit rounding to the data, which reduces the precision of the data + while preserving its overall structure. After that, it uses PCodec for further compression. + """ + + name = "safeguarded-bitround-pco" + description = "Safeguarded(Bit Rounding + PCodec)" + + @staticmethod + def abs_bound_codec(error_bound, *, dtype=None, data_abs_max=None, **kwargs): + assert dtype is not None, "dtype must be provided" + assert data_abs_max is not None, "data_abs_max must be provided" + + # conservative abs->rel error bound transformation, + # same as convert_abs_error_to_rel_error + # so that we can inform the safeguards of the abs bound + keepbits = compute_keepbits(dtype, error_bound / data_abs_max) + + return numcodecs_safeguards.SafeguardsCodec( + codec=numcodecs_wasm_bit_round.BitRound(keepbits=keepbits), + lossless=numcodecs_safeguards.lossless.Lossless( + for_codec=numcodecs_wasm_pco.Pco( + level=8, + mode="auto", + delta="auto", + paging="equal-pages-up-to", + ) + ), + safeguards=[ + dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), + ], + ) + + @staticmethod + def rel_bound_codec(error_bound, *, dtype=None, **kwargs): + assert dtype is not None, "dtype must be provided" + + keepbits = compute_keepbits(dtype, error_bound) + + return numcodecs_safeguards.SafeguardsCodec( + codec=numcodecs_wasm_bit_round.BitRound(keepbits=keepbits), + lossless=numcodecs_safeguards.lossless.Lossless( + for_codec=numcodecs_wasm_pco.Pco( + level=8, + mode="auto", + delta="auto", + paging="equal-pages-up-to", + ) + ), + safeguards=[ + dict(kind="eb", type="rel", eb=error_bound, equal_nan=True), + ], + ) diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/sperr.py b/src/climatebenchpress/compressor/compressors/safeguarded/sperr.py index 58b0a1c..7498f8c 100644 --- a/src/climatebenchpress/compressor/compressors/safeguarded/sperr.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/sperr.py @@ -1,6 +1,8 @@ __all__ = ["SafeguardedSperr"] import numcodecs +import numcodecs.abc +import numcodecs.compat import numcodecs_safeguards import numcodecs_wasm_sperr import numpy as np From a9b373e07ec79f42f1c763bf9b4fc5146e20eb0e Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 2 Feb 2026 11:21:59 +0200 Subject: [PATCH 12/14] Upgrade to numcodecs-safeguards==0.1.0b2 --- pyproject.toml | 2 +- .../compressor/compressors/safeguarded/bitround_pco.py | 4 ++-- .../compressor/compressors/safeguarded/sperr.py | 4 ++-- .../compressor/compressors/safeguarded/sz3.py | 4 ++-- .../compressor/compressors/safeguarded/zero.py | 4 ++-- .../compressor/compressors/safeguarded/zero_dssim.py | 4 ++-- .../compressor/compressors/safeguarded/zfp_round.py | 4 ++-- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3e4f33d..ff7f490 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ dependencies = [ "numcodecs>=0.13.0,<0.17", "numcodecs-combinators[xarray]~=0.2.13", "numcodecs-observers~=0.1.2", - "numcodecs-safeguards==0.1.0b1", + "numcodecs-safeguards==0.1.0b2", "numcodecs-wasm~=0.2.2", "numcodecs-wasm-bit-round~=0.4.0", "numcodecs-wasm-fixed-offset-scale~=0.4.0", diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/bitround_pco.py b/src/climatebenchpress/compressor/compressors/safeguarded/bitround_pco.py index b175fff..36cd540 100644 --- a/src/climatebenchpress/compressor/compressors/safeguarded/bitround_pco.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/bitround_pco.py @@ -29,7 +29,7 @@ def abs_bound_codec(error_bound, *, dtype=None, data_abs_max=None, **kwargs): # so that we can inform the safeguards of the abs bound keepbits = compute_keepbits(dtype, error_bound / data_abs_max) - return numcodecs_safeguards.SafeguardsCodec( + return numcodecs_safeguards.SafeguardedCodec( codec=numcodecs_wasm_bit_round.BitRound(keepbits=keepbits), lossless=numcodecs_safeguards.lossless.Lossless( for_codec=numcodecs_wasm_pco.Pco( @@ -50,7 +50,7 @@ def rel_bound_codec(error_bound, *, dtype=None, **kwargs): keepbits = compute_keepbits(dtype, error_bound) - return numcodecs_safeguards.SafeguardsCodec( + return numcodecs_safeguards.SafeguardedCodec( codec=numcodecs_wasm_bit_round.BitRound(keepbits=keepbits), lossless=numcodecs_safeguards.lossless.Lossless( for_codec=numcodecs_wasm_pco.Pco( diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/sperr.py b/src/climatebenchpress/compressor/compressors/safeguarded/sperr.py index 7498f8c..5bb2373 100644 --- a/src/climatebenchpress/compressor/compressors/safeguarded/sperr.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/sperr.py @@ -19,7 +19,7 @@ class SafeguardedSperr(Compressor): @staticmethod def abs_bound_codec(error_bound, **kwargs): - return numcodecs_safeguards.SafeguardsCodec( + return numcodecs_safeguards.SafeguardedCodec( codec=CodecStack( NaNToMean(), numcodecs_wasm_sperr.Sperr(mode="pwe", pwe=error_bound), @@ -33,7 +33,7 @@ def abs_bound_codec(error_bound, **kwargs): def rel_bound_codec(error_bound, *, data_abs_min=None, **kwargs): assert data_abs_min is not None, "data_abs_min must be provided" - return numcodecs_safeguards.SafeguardsCodec( + return numcodecs_safeguards.SafeguardedCodec( codec=CodecStack( NaNToMean(), # conservative rel->abs error bound transformation, diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/sz3.py b/src/climatebenchpress/compressor/compressors/safeguarded/sz3.py index 8d89e03..609f1de 100644 --- a/src/climatebenchpress/compressor/compressors/safeguarded/sz3.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/sz3.py @@ -14,7 +14,7 @@ class SafeguardedSz3(Compressor): @staticmethod def abs_bound_codec(error_bound, **kwargs): - return numcodecs_safeguards.SafeguardsCodec( + return numcodecs_safeguards.SafeguardedCodec( codec=numcodecs_wasm_sz3.Sz3(eb_mode="abs", eb_abs=error_bound), safeguards=[ dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), @@ -23,7 +23,7 @@ def abs_bound_codec(error_bound, **kwargs): @staticmethod def rel_bound_codec(error_bound, **kwargs): - return numcodecs_safeguards.SafeguardsCodec( + return numcodecs_safeguards.SafeguardedCodec( codec=numcodecs_wasm_sz3.Sz3(eb_mode="rel", eb_rel=error_bound), safeguards=[ dict(kind="eb", type="rel", eb=error_bound, equal_nan=True), diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/zero.py b/src/climatebenchpress/compressor/compressors/safeguarded/zero.py index c15919a..99e1790 100644 --- a/src/climatebenchpress/compressor/compressors/safeguarded/zero.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/zero.py @@ -14,7 +14,7 @@ class SafeguardedZero(Compressor): @staticmethod def abs_bound_codec(error_bound, **kwargs): - return numcodecs_safeguards.SafeguardsCodec( + return numcodecs_safeguards.SafeguardedCodec( codec=numcodecs_zero.ZeroCodec(), safeguards=[ dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), @@ -23,7 +23,7 @@ def abs_bound_codec(error_bound, **kwargs): @staticmethod def rel_bound_codec(error_bound, **kwargs): - return numcodecs_safeguards.SafeguardsCodec( + return numcodecs_safeguards.SafeguardedCodec( codec=numcodecs_zero.ZeroCodec(), safeguards=[ dict(kind="eb", type="rel", eb=error_bound, equal_nan=True), diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/zero_dssim.py b/src/climatebenchpress/compressor/compressors/safeguarded/zero_dssim.py index cf1fefc..0e9b295 100644 --- a/src/climatebenchpress/compressor/compressors/safeguarded/zero_dssim.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/zero_dssim.py @@ -17,7 +17,7 @@ def abs_bound_codec(error_bound, data_min=None, data_max=None, **kwargs): assert data_min is not None, "data_min must be provided" assert data_max is not None, "data_max must be provided" - return numcodecs_safeguards.SafeguardsCodec( + return numcodecs_safeguards.SafeguardedCodec( codec=numcodecs_zero.ZeroCodec(), safeguards=[ dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), @@ -55,7 +55,7 @@ def rel_bound_codec(error_bound, data_min=None, data_max=None, **kwargs): assert data_min is not None, "data_min must be provided" assert data_max is not None, "data_max must be provided" - return numcodecs_safeguards.SafeguardsCodec( + return numcodecs_safeguards.SafeguardedCodec( codec=numcodecs_zero.ZeroCodec(), safeguards=[ dict(kind="eb", type="rel", eb=error_bound, equal_nan=True), diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/zfp_round.py b/src/climatebenchpress/compressor/compressors/safeguarded/zfp_round.py index 89f1641..d98f3d9 100644 --- a/src/climatebenchpress/compressor/compressors/safeguarded/zfp_round.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/zfp_round.py @@ -26,7 +26,7 @@ class SafeguardedZfpRound(Compressor): @staticmethod def abs_bound_codec(error_bound, **kwargs): - return numcodecs_safeguards.SafeguardsCodec( + return numcodecs_safeguards.SafeguardedCodec( codec=numcodecs_wasm_zfp.Zfp( mode="fixed-accuracy", tolerance=error_bound, non_finite="allow-unsafe" ), @@ -39,7 +39,7 @@ def abs_bound_codec(error_bound, **kwargs): def rel_bound_codec(error_bound, *, data_abs_min=None, **kwargs): assert data_abs_min is not None, "data_abs_min must be provided" - return numcodecs_safeguards.SafeguardsCodec( + return numcodecs_safeguards.SafeguardedCodec( # conservative rel->abs error bound transformation, # same as convert_rel_error_to_abs_error # so that we can inform the safeguards of the rel bound From 35b25919ac6d9515414ad50ffa1117a74e68e2b2 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Wed, 4 Feb 2026 12:01:19 +0200 Subject: [PATCH 13/14] fix Safeguarded(0, dSSIM) for multiple 2D slices --- .../compressor/compressors/abc.py | 16 +++++++++ .../compressors/safeguarded/zero_dssim.py | 22 ++++++------ .../compressor/plotting/plot_metrics.py | 5 +-- .../compressor/scripts/compress.py | 34 +++++++++++++++++-- 4 files changed, 63 insertions(+), 14 deletions(-) diff --git a/src/climatebenchpress/compressor/compressors/abc.py b/src/climatebenchpress/compressor/compressors/abc.py index b72eeb9..e20429f 100644 --- a/src/climatebenchpress/compressor/compressors/abc.py +++ b/src/climatebenchpress/compressor/compressors/abc.py @@ -90,6 +90,8 @@ def abs_bound_codec( data_max: Optional[float] = None, data_abs_min: Optional[float] = None, data_abs_max: Optional[float] = None, + data_min_2d: Optional[np.ndarray] = None, + data_max_2d: Optional[np.ndarray] = None, ) -> Codec: """Create a codec with an absolute error bound.""" pass @@ -104,6 +106,8 @@ def rel_bound_codec( data_max: Optional[float] = None, data_abs_min: Optional[float] = None, data_abs_max: Optional[float] = None, + data_min_2d: Optional[np.ndarray] = None, + data_max_2d: Optional[np.ndarray] = None, ) -> Codec: """Create a codec with a relative error bound.""" pass @@ -116,6 +120,8 @@ def build( data_abs_max: dict[VariableName, float], data_min: dict[VariableName, float], data_max: dict[VariableName, float], + data_min_2d: dict[VariableName, np.ndarray], + data_max_2d: dict[VariableName, np.ndarray], error_bounds: list[dict[VariableName, ErrorBound]], ) -> dict[VariantName, list[NamedPerVariableCodec]]: """ @@ -139,6 +145,12 @@ def build( Dict mapping from variable name to minimum value for the variable. data_max : dict[VariableName, float] Dict mapping from variable name to maximum value for the variable. + data_min_2d : dict[VariableName, np.ndarray] + Dict mapping from variable name to per-lat-lon-slice minimum value for the + variable. + data_max_2d : dict[VariableName, np.ndarray] + Dict mapping from variable name to per-lat-lon-slice maximum value for the + variable. error_bounds: list[ErrorBound] List of error bounds to use for the compressor. @@ -173,6 +185,8 @@ def build( data_max=data_max[var], data_abs_min=data_abs_min[var], data_abs_max=data_abs_max[var], + data_min_2d=data_min_2d[var], + data_max_2d=data_max_2d[var], ) elif eb.rel_error is not None and cls.has_rel_error_impl: new_codecs[var] = partial( @@ -183,6 +197,8 @@ def build( data_max=data_max[var], data_abs_min=data_abs_min[var], data_abs_max=data_abs_max[var], + data_min_2d=data_min_2d[var], + data_max_2d=data_max_2d[var], ) else: # This should never happen as we have already transformed the error bounds. diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/zero_dssim.py b/src/climatebenchpress/compressor/compressors/safeguarded/zero_dssim.py index 0e9b295..6f300ad 100644 --- a/src/climatebenchpress/compressor/compressors/safeguarded/zero_dssim.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/zero_dssim.py @@ -13,9 +13,9 @@ class SafeguardedZeroDssim(Compressor): description = "Safeguarded(0, dSSIM)" @staticmethod - def abs_bound_codec(error_bound, data_min=None, data_max=None, **kwargs): - assert data_min is not None, "data_min must be provided" - assert data_max is not None, "data_max must be provided" + def abs_bound_codec(error_bound, data_min_2d=None, data_max_2d=None, **kwargs): + assert data_min_2d is not None, "data_min_2d must be provided" + assert data_max_2d is not None, "data_max_2d must be provided" return numcodecs_safeguards.SafeguardedCodec( codec=numcodecs_zero.ZeroCodec(), @@ -46,14 +46,15 @@ def abs_bound_codec(error_bound, data_min=None, data_max=None, **kwargs): eb=0, ), ], - # use data_min instead of $x_min to allow for chunking - fixed_constants=dict(x_min=data_min, x_max=data_max), + # use data_min_2d instead of $x_min since we need the minimum per + # 2d latitude-longitude slice + fixed_constants=dict(x_min=data_min_2d, x_max=data_max_2d), ) @staticmethod - def rel_bound_codec(error_bound, data_min=None, data_max=None, **kwargs): - assert data_min is not None, "data_min must be provided" - assert data_max is not None, "data_max must be provided" + def rel_bound_codec(error_bound, data_min_2d=None, data_max_2d=None, **kwargs): + assert data_min_2d is not None, "data_min_2d must be provided" + assert data_max_2d is not None, "data_max_2d must be provided" return numcodecs_safeguards.SafeguardedCodec( codec=numcodecs_zero.ZeroCodec(), @@ -84,6 +85,7 @@ def rel_bound_codec(error_bound, data_min=None, data_max=None, **kwargs): eb=0, ), ], - # use data_min instead of $x_min to allow for chunking - fixed_constants=dict(x_min=data_min, x_max=data_max), + # use data_min_2d instead of $x_min since we need the minimum per + # 2d latitude-longitude slice + fixed_constants=dict(x_min=data_min_2d, x_max=data_max_2d), ) diff --git a/src/climatebenchpress/compressor/plotting/plot_metrics.py b/src/climatebenchpress/compressor/plotting/plot_metrics.py index b00d92c..408b098 100644 --- a/src/climatebenchpress/compressor/plotting/plot_metrics.py +++ b/src/climatebenchpress/compressor/plotting/plot_metrics.py @@ -207,8 +207,9 @@ def _normalize(data): # Normalize each variable by its mean and std normalized[new_col] = normalized.apply( - lambda x: (x[col] - mean_std[x["Variable"]][0]) - / mean_std[x["Variable"]][1], + lambda x: ( + (x[col] - mean_std[x["Variable"]][0]) / mean_std[x["Variable"]][1] + ), axis=1, ) diff --git a/src/climatebenchpress/compressor/scripts/compress.py b/src/climatebenchpress/compressor/scripts/compress.py index 96fa51f..df7d40f 100644 --- a/src/climatebenchpress/compressor/scripts/compress.py +++ b/src/climatebenchpress/compressor/scripts/compress.py @@ -87,6 +87,8 @@ def compress( ds_abs_maxs: dict[str, float] = dict() ds_mins: dict[str, float] = dict() ds_maxs: dict[str, float] = dict() + ds_min_2ds: dict[str, np.ndarray] = dict() + ds_max_2ds: dict[str, np.ndarray] = dict() for v in ds: vs: str = str(v) abs_vals = xr.ufuncs.abs(ds[v]) @@ -96,6 +98,16 @@ def compress( ds_abs_maxs[vs] = abs_vals.max().values.item() ds_mins[vs] = ds[v].min().values.item() ds_maxs[vs] = ds[v].max().values.item() + ds_min_2ds[vs] = ( + ds[v] + .min(dim=[ds[v].cf["Y"].name, ds[v].cf["X"].name], keepdims=True) + .values + ) + ds_max_2ds[vs] = ( + ds[v] + .max(dim=[ds[v].cf["Y"].name, ds[v].cf["X"].name], keepdims=True) + .values + ) if chunked: for v in ds: @@ -115,7 +127,14 @@ def compress( compressor_variants: dict[str, list[NamedPerVariableCodec]] = ( compressor.build( - ds_dtypes, ds_abs_mins, ds_abs_maxs, ds_mins, ds_maxs, error_bounds + ds_dtypes, + ds_abs_mins, + ds_abs_maxs, + ds_mins, + ds_maxs, + ds_min_2ds, + ds_max_2ds, + error_bounds, ) ) @@ -189,6 +208,15 @@ def compress_decompress( if not isinstance(codec, CodecStack): codec = CodecStack(codec) + # HACK: Safeguarded(0, dSSIM) requires the per-lat-lon-slice minimum + # and maximum + # for potentially-chunked data we should really use xarray-safeguards, + # but not using chunks also works (for now) + is_safeguarded_zero_dssim = ( + "# === pointwise dSSIM quantity of interest === #" + in json.dumps(codec.get_config()) + ) + with numcodecs_observers.observe( codec, observers=[ @@ -197,7 +225,9 @@ def compress_decompress( timing, ], ) as codec_: - variables[v] = codec_.encode_decode_data_array(ds[v]).compute() + variables[v] = codec_.encode_decode_data_array( + ds[v].compute() if is_safeguarded_zero_dssim else ds[v] + ).compute() cs = [c._codec for c in codec_.__iter__()] From de79c732ccf324ae6413059c73c99926585f7d3b Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Wed, 4 Feb 2026 12:04:33 +0200 Subject: [PATCH 14/14] small cleanup --- .../compressor/compressors/safeguarded/zero_dssim.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/zero_dssim.py b/src/climatebenchpress/compressor/compressors/safeguarded/zero_dssim.py index 6f300ad..ea483ba 100644 --- a/src/climatebenchpress/compressor/compressors/safeguarded/zero_dssim.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/zero_dssim.py @@ -21,13 +21,15 @@ def abs_bound_codec(error_bound, data_min_2d=None, data_max_2d=None, **kwargs): codec=numcodecs_zero.ZeroCodec(), safeguards=[ dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), - # guarantee that the global minimum and maximum are preserved, - # which simplifies the rescaling + # guarantee that the per-latitude-longitude-slice minimum and + # maximum are preserved, which simplifies the rescaling dict(kind="sign", offset="x_min"), dict(kind="sign", offset="x_max"), dict( kind="qoi_eb_pw", qoi=""" + # === pointwise dSSIM quantity of interest === # + # we guarantee that # min(data) = min(corrected) and # max(data) = max(corrected) @@ -60,13 +62,15 @@ def rel_bound_codec(error_bound, data_min_2d=None, data_max_2d=None, **kwargs): codec=numcodecs_zero.ZeroCodec(), safeguards=[ dict(kind="eb", type="rel", eb=error_bound, equal_nan=True), - # guarantee that the global minimum and maximum are preserved, - # which simplifies the rescaling + # guarantee that the per-latitude-longitude-slice minimum and + # maximum are preserved, which simplifies the rescaling dict(kind="sign", offset="x_min"), dict(kind="sign", offset="x_max"), dict( kind="qoi_eb_pw", qoi=""" + # === pointwise dSSIM quantity of interest === # + # we guarantee that # min(data) = min(corrected) and # max(data) = max(corrected)