From 8c8d099ad6aa0ba95fc71b9a61ded0b42fd20084 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 4 May 2026 12:55:36 -0700 Subject: [PATCH] Make cubic resample prefilter explicit so chunk seams stay sub-eps (#1464) scipy.ndimage.map_coordinates with order >= 2 silently runs spline_filter on a 12-pixel edge-padded copy of the input. Calling it implicitly per block (and twice per block in the NaN-aware case, once for filled and once for weights) couples the IIR boundary transient to the chunk geometry; the eager and chunked paths could disagree by ~1e-6 near seams. Run the prepad+spline_filter step ourselves, shift the sample coordinates by the pad, and pass prefilter=False to map_coordinates. Mirror the change in the cupy path. Bump cubic overlap depth from 10 to 16 so the residual transient (0.268^16 ~ 7e-10) rounds to zero in the float32 output. Add a TestCubicPrefilterParity suite that asserts atol=1e-10 between eager numpy and dask+numpy paths on polynomial, random, and NaN inputs. --- xrspatial/resample.py | 168 +++++++++++++++++++++++++++---- xrspatial/tests/test_resample.py | 66 ++++++++++++ 2 files changed, 215 insertions(+), 19 deletions(-) diff --git a/xrspatial/resample.py b/xrspatial/resample.py index 5eb93978..76c242ef 100644 --- a/xrspatial/resample.py +++ b/xrspatial/resample.py @@ -9,7 +9,10 @@ import numpy as np import xarray as xr -from scipy.ndimage import map_coordinates as _scipy_map_coords +from scipy.ndimage import ( + map_coordinates as _scipy_map_coords, + spline_filter as _scipy_spline_filter, +) try: import dask.array as da @@ -39,9 +42,10 @@ # Overlap depth (input pixels) each interpolation kernel needs from # neighbouring chunks when processing dask arrays. Cubic requires extra # depth because the B-spline prefilter is a global IIR filter whose -# boundary transient decays as ~0.268^n; depth 10 brings it to machine -# epsilon. -_INTERP_DEPTH = {'nearest': 1, 'bilinear': 1, 'cubic': 10} +# boundary transient decays as ~0.268^n. Depth 16 puts the residual at +# ~7e-10, comfortably below float32 epsilon so chunk-seam parity rounds +# to zero in the float32 output. +_INTERP_DEPTH = {'nearest': 1, 'bilinear': 1, 'cubic': 16} # Approximate working-set size per output cell for the eager backends: # one float64 working buffer (8 B) plus a float32 output cell (4 B). @@ -159,6 +163,45 @@ def _block_centered_coords(n_in, n_out): return (o + 0.5) * (n_in / n_out) - 0.5 +# -- Spline prefilter helpers ----------------------------------------------- +# +# scipy.ndimage.map_coordinates(prefilter=True) silently does three things: +# (1) edge-pad the input by 12 pixels for mode='nearest' / 'grid-constant' +# so the IIR transient stabilises before reaching real data, +# (2) call spline_filter on that padded array, and +# (3) shift the sample coordinates by the same offset. The padding step +# is private (``_prepad_for_spline_filter``) and is needed for the +# explicit-prefilter path to match the implicit one bit-for-bit. +# +# We replicate it here so callers can prefilter once per array (e.g. the +# NaN-aware filled / weights pair) and pass ``prefilter=False`` to +# map_coordinates without changing the boundary semantics. Doing the +# prefilter explicitly also makes the per-block dask path deterministic -- +# the same spline coefficients are computed in eager and chunked modes +# (modulo the IIR transient that the depth=10 overlap already absorbs). + +_SPLINE_PREPAD_NEAREST = 12 + + +def _prepad_and_filter_np(arr, order): + """Edge-pad and spline-filter *arr* for an explicit ``mode='nearest'`` + prefilter pass. Returns ``(filtered, npad)``; the caller adds *npad* + to its sample coordinates. + """ + npad = _SPLINE_PREPAD_NEAREST + padded = np.pad(arr, npad, mode='edge') + filtered = _scipy_spline_filter(padded, order=order, mode='nearest') + return filtered, npad + + +def _prepad_and_filter_cupy(arr, order, spline_filter_fn): + """CuPy variant of :func:`_prepad_and_filter_np`.""" + npad = _SPLINE_PREPAD_NEAREST + padded = cupy.pad(arr, npad, mode='edge') + filtered = spline_filter_fn(padded, order=order, mode='nearest') + return filtered, npad + + # -- NaN-aware interpolation (NumPy) ---------------------------------------- def _nan_aware_interp_np(data, out_h, out_w, order): @@ -180,16 +223,38 @@ def _nan_aware_interp_np(data, out_h, out_w, order): result = _scipy_map_coords(data, coords, order=0, mode='nearest') return result.reshape(out_h, out_w) + # For order >= 2 run the spline prefilter explicitly so the IIR boundary + # transient is computed once per array instead of implicitly inside each + # map_coordinates call. Bilinear (order == 1) prefilter is a no-op. + use_explicit = order >= 2 + mask = np.isnan(data) if not mask.any(): - result = _scipy_map_coords(data, coords, order=order, mode='nearest') + if use_explicit: + src, npad = _prepad_and_filter_np(data, order) + result = _scipy_map_coords(src, coords + npad, order=order, + mode='nearest', prefilter=False) + else: + result = _scipy_map_coords(data, coords, order=order, + mode='nearest') return result.reshape(out_h, out_w) filled = np.where(mask, 0.0, data) weights = (~mask).astype(data.dtype) - z_data = _scipy_map_coords(filled, coords, order=order, mode='nearest') - z_wt = _scipy_map_coords(weights, coords, order=order, mode='nearest') + if use_explicit: + filled, npad = _prepad_and_filter_np(filled, order) + weights, _ = _prepad_and_filter_np(weights, order) + sample_coords = coords + npad + z_data = _scipy_map_coords(filled, sample_coords, order=order, + mode='nearest', prefilter=False) + z_wt = _scipy_map_coords(weights, sample_coords, order=order, + mode='nearest', prefilter=False) + else: + z_data = _scipy_map_coords(filled, coords, order=order, + mode='nearest') + z_wt = _scipy_map_coords(weights, coords, order=order, + mode='nearest') result = np.where(z_wt > 0.01, z_data / np.maximum(z_wt, 1e-10), @@ -201,7 +266,10 @@ def _nan_aware_interp_np(data, out_h, out_w, order): def _nan_aware_interp_cupy(data, out_h, out_w, order): """CuPy variant of :func:`_nan_aware_interp_np`.""" - from cupyx.scipy.ndimage import map_coordinates as _cupy_map_coords + from cupyx.scipy.ndimage import ( + map_coordinates as _cupy_map_coords, + spline_filter as _cupy_spline_filter, + ) iy = cupy.asarray(_block_centered_coords(data.shape[0], out_h)) ix = cupy.asarray(_block_centered_coords(data.shape[1], out_w)) @@ -212,16 +280,35 @@ def _nan_aware_interp_cupy(data, out_h, out_w, order): result = _cupy_map_coords(data, coords, order=0, mode='nearest') return result.reshape(out_h, out_w) + use_explicit = order >= 2 + mask = cupy.isnan(data) if not mask.any(): - result = _cupy_map_coords(data, coords, order=order, mode='nearest') + if use_explicit: + src, npad = _prepad_and_filter_cupy(data, order, _cupy_spline_filter) + result = _cupy_map_coords(src, coords + npad, order=order, + mode='nearest', prefilter=False) + else: + result = _cupy_map_coords(data, coords, order=order, + mode='nearest') return result.reshape(out_h, out_w) filled = cupy.where(mask, 0.0, data) weights = (~mask).astype(data.dtype) - z_data = _cupy_map_coords(filled, coords, order=order, mode='nearest') - z_wt = _cupy_map_coords(weights, coords, order=order, mode='nearest') + if use_explicit: + filled, npad = _prepad_and_filter_cupy(filled, order, _cupy_spline_filter) + weights, _ = _prepad_and_filter_cupy(weights, order, _cupy_spline_filter) + sample_coords = coords + npad + z_data = _cupy_map_coords(filled, sample_coords, order=order, + mode='nearest', prefilter=False) + z_wt = _cupy_map_coords(weights, sample_coords, order=order, + mode='nearest', prefilter=False) + else: + z_data = _cupy_map_coords(filled, coords, order=order, + mode='nearest') + z_wt = _cupy_map_coords(weights, coords, order=order, + mode='nearest') result = cupy.where(z_wt > 0.01, z_data / cupy.maximum(z_wt, 1e-10), @@ -411,15 +498,36 @@ def _interp_block_np(block, global_in_h, global_in_w, yy, xx = np.meshgrid(iy_local, ix_local, indexing='ij') coords = np.array([yy.ravel(), xx.ravel()]) - # NaN-aware interpolation + # NaN-aware interpolation. For order >= 2 we run the spline prefilter + # explicitly per array (block / filled / weights) so the IIR boundary + # transient is identical between eager and chunked paths. + use_explicit = order >= 2 + mask = np.isnan(block) if order == 0 or not mask.any(): - result = _scipy_map_coords(block, coords, order=order, mode='nearest') + if use_explicit: + src, npad = _prepad_and_filter_np(block, order) + result = _scipy_map_coords(src, coords + npad, order=order, + mode='nearest', prefilter=False) + else: + result = _scipy_map_coords(block, coords, order=order, + mode='nearest') else: filled = np.where(mask, 0.0, block) weights = (~mask).astype(np.float64) - z_data = _scipy_map_coords(filled, coords, order=order, mode='nearest') - z_wt = _scipy_map_coords(weights, coords, order=order, mode='nearest') + if use_explicit: + filled, npad = _prepad_and_filter_np(filled, order) + weights, _ = _prepad_and_filter_np(weights, order) + sample_coords = coords + npad + z_data = _scipy_map_coords(filled, sample_coords, order=order, + mode='nearest', prefilter=False) + z_wt = _scipy_map_coords(weights, sample_coords, order=order, + mode='nearest', prefilter=False) + else: + z_data = _scipy_map_coords(filled, coords, order=order, + mode='nearest') + z_wt = _scipy_map_coords(weights, coords, order=order, + mode='nearest') result = np.where(z_wt > 0.01, z_data / np.maximum(z_wt, 1e-10), np.nan) @@ -431,7 +539,10 @@ def _interp_block_cupy(block, global_in_h, global_in_w, cum_in_y, cum_in_x, cum_out_y, cum_out_x, depth, order, block_info=None): """CuPy variant of :func:`_interp_block_np`.""" - from cupyx.scipy.ndimage import map_coordinates as _cupy_map_coords + from cupyx.scipy.ndimage import ( + map_coordinates as _cupy_map_coords, + spline_filter as _cupy_spline_filter, + ) yi, xi = block_info[0]['chunk-location'] target_h = int(cum_out_y[yi + 1] - cum_out_y[yi]) @@ -454,14 +565,33 @@ def _interp_block_cupy(block, global_in_h, global_in_w, yy, xx = cupy.meshgrid(iy_local, ix_local, indexing='ij') coords = cupy.array([yy.ravel(), xx.ravel()]) + use_explicit = order >= 2 + mask = cupy.isnan(block) if order == 0 or not mask.any(): - result = _cupy_map_coords(block, coords, order=order, mode='nearest') + if use_explicit: + src, npad = _prepad_and_filter_cupy(block, order, _cupy_spline_filter) + result = _cupy_map_coords(src, coords + npad, order=order, + mode='nearest', prefilter=False) + else: + result = _cupy_map_coords(block, coords, order=order, + mode='nearest') else: filled = cupy.where(mask, 0.0, block) weights = (~mask).astype(cupy.float64) - z_data = _cupy_map_coords(filled, coords, order=order, mode='nearest') - z_wt = _cupy_map_coords(weights, coords, order=order, mode='nearest') + if use_explicit: + filled, npad = _prepad_and_filter_cupy(filled, order, _cupy_spline_filter) + weights, _ = _prepad_and_filter_cupy(weights, order, _cupy_spline_filter) + sample_coords = coords + npad + z_data = _cupy_map_coords(filled, sample_coords, order=order, + mode='nearest', prefilter=False) + z_wt = _cupy_map_coords(weights, sample_coords, order=order, + mode='nearest', prefilter=False) + else: + z_data = _cupy_map_coords(filled, coords, order=order, + mode='nearest') + z_wt = _cupy_map_coords(weights, coords, order=order, + mode='nearest') result = cupy.where(z_wt > 0.01, z_data / cupy.maximum(z_wt, 1e-10), cupy.nan) diff --git a/xrspatial/tests/test_resample.py b/xrspatial/tests/test_resample.py index d77e3061..851eccf5 100644 --- a/xrspatial/tests/test_resample.py +++ b/xrspatial/tests/test_resample.py @@ -380,6 +380,72 @@ def test_aggregate_parity(self, numpy_and_cupy_rasters, method): atol=1e-5, equal_nan=True) +# --------------------------------------------------------------------------- +# Cubic prefilter chunk-seam parity (#1464) +# --------------------------------------------------------------------------- + +@dask_array_available +class TestCubicPrefilterParity: + """Explicit spline prefilter keeps cubic resample bit-identical between + the eager numpy path and the chunked dask+numpy path. + """ + + def _make_pair(self, data, chunks=(8, 8)): + np_agg = create_test_raster(data, backend='numpy', + attrs={'res': (1.0, 1.0)}) + dk_agg = create_test_raster(data, backend='dask+numpy', + attrs={'res': (1.0, 1.0)}, + chunks=chunks) + return np_agg, dk_agg + + def test_cubic_polynomial_chunk_seam_high_precision(self): + """Degree-2 polynomial is exactly representable by cubic splines. + With enough chunks to expose multiple seams, eager and chunked + paths must agree to float64 round-off. + """ + y, x = np.mgrid[0:60, 0:60].astype(np.float64) + f = (x * x + y * y - x * y).astype(np.float32) + np_agg, dk_agg = self._make_pair(f, chunks=(13, 13)) + np_out = resample(np_agg, scale_factor=0.5, method='cubic').values + dk_out = resample(dk_agg, scale_factor=0.5, method='cubic').values + # Tightened from the 1e-5 used in TestDaskParity to catch prefilter + # boundary drift; would have caught the implicit-prefilter bug. + np.testing.assert_allclose(dk_out, np_out, atol=1e-10) + + def test_cubic_random_chunk_seam_tight(self): + """Random data also matches once the explicit prefilter is in place. + Without the fix the implicit per-block prefilter leaks ~1e-6 of + boundary transient into chunk-interior samples. + """ + data = np.random.RandomState(1152).rand(50, 50).astype(np.float32) + np_agg, dk_agg = self._make_pair(data, chunks=(11, 11)) + np_out = resample(np_agg, scale_factor=0.5, method='cubic').values + dk_out = resample(dk_agg, scale_factor=0.5, method='cubic').values + np.testing.assert_allclose(dk_out, np_out, atol=1e-10) + + @pytest.mark.parametrize('sf', [0.5, 2.0, 0.7]) + def test_cubic_chunk_seam_various_scales(self, sf): + """Tight parity holds across upsample, downsample, and odd ratios.""" + data = np.random.RandomState(7).rand(48, 48).astype(np.float32) + np_agg, dk_agg = self._make_pair(data, chunks=(11, 11)) + np_out = resample(np_agg, scale_factor=sf, method='cubic').values + dk_out = resample(dk_agg, scale_factor=sf, method='cubic').values + np.testing.assert_allclose(dk_out, np_out, atol=1e-10) + + def test_cubic_chunk_seam_with_nan(self): + """NaN-aware path uses two prefilter passes (filled + weights); + both must stay deterministic across chunk boundaries. + """ + data = np.random.RandomState(99).rand(50, 50).astype(np.float32) + data[5, 5] = np.nan + data[20, 30] = np.nan + np_agg, dk_agg = self._make_pair(data, chunks=(11, 11)) + np_out = resample(np_agg, scale_factor=0.5, method='cubic').values + dk_out = resample(dk_agg, scale_factor=0.5, method='cubic').values + np.testing.assert_allclose(dk_out, np_out, atol=1e-10, + equal_nan=True) + + # --------------------------------------------------------------------------- # Memory guard (#1295) # ---------------------------------------------------------------------------