Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions docs/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,37 @@ Use `grid_shape` when a fixed pixel width/height is required (e.g., ML model inp
## I get 429 quota errors. What do I do?
Reduce parallelism (fewer Dask workers), narrow the AOI or time range, combine server-side operations before opening, or switch to the standard endpoint for computed collections.

Xee also exposes retry controls for both pixel fetches and metadata `getInfo()`
calls. For example:

```python
import xarray as xr

ds = xr.open_dataset(
'ee://ECMWF/ERA5_LAND/MONTHLY_AGGR',
engine='ee',
crs='EPSG:4326',
crs_transform=(0.25, 0, -180, 0, -0.25, 90),
shape_2d=(1440, 720),
getitem_kwargs={
'max_retries': 8,
'initial_delay': 500,
},
getinfo_kwargs={
'max_retries': 8,
'initial_delay': 1000,
},
)
```

Defaults are:

- `getitem_kwargs`: `max_retries=6`, `initial_delay=500` ms
- `getinfo_kwargs`: `max_retries=6`, `initial_delay=1000` ms

`helpers.extract_grid_params(...)` also supports `getinfo_kwargs` if metadata
calls need tuning under quota pressure.

## Can I open a computed `ee.ImageCollection`?
Yes. Build the collection with filtering / mapping functions, then pass the resulting collection object directly to `xr.open_dataset(..., engine='ee')` with grid parameters.

Expand Down
34 changes: 34 additions & 0 deletions docs/guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,40 @@ temp_slice = ds['temperature_2m'].isel(time=0)
temp_slice.plot()
```

## Configure Retries

Xee supports configurable retries for two paths:

- Pixel reads via `getitem_kwargs`
- Metadata `getInfo()` calls via `getinfo_kwargs`

```python
import ee
import xarray as xr
from xee import helpers

ic = ee.ImageCollection('ECMWF/ERA5_LAND/MONTHLY_AGGR')

# Optional: tune helper metadata fetch retries.
grid_params = helpers.extract_grid_params(
ic,
getinfo_kwargs={'max_retries': 8, 'initial_delay': 1000},
)

ds = xr.open_dataset(
ic,
engine='ee',
**grid_params,
getitem_kwargs={'max_retries': 8, 'initial_delay': 500},
getinfo_kwargs={'max_retries': 8, 'initial_delay': 1000},
)
```

Defaults:

- `getitem_kwargs`: `max_retries=6`, `initial_delay=500` ms
- `getinfo_kwargs`: `max_retries=6`, `initial_delay=1000` ms

## Further Resources

- [Core Concepts](concepts.md)
Expand Down
36 changes: 36 additions & 0 deletions docs/performance.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,42 @@ Recommendations:
3. Consolidate operations server-side (EE `.map`, `.select`, band math) before opening in Xee.
4. Cache intermediate results in memory rather than re-opening repeatedly.

## Retry Tuning

Xee uses exponential backoff with jitter for:

- Pixel requests (`getitem_kwargs`) used during array reads.
- Metadata `getInfo()` requests (`getinfo_kwargs`) used during dataset setup and
helper metadata fetches.

Defaults:

- `getitem_kwargs`: `max_retries=6`, `initial_delay=500` ms
- `getinfo_kwargs`: `max_retries=6`, `initial_delay=1000` ms

`getinfo_kwargs` starts with a longer default delay to reduce setup-time retry bursts against EE metadata endpoints.

You can tune these in `xr.open_dataset(...)`:

```python
ds = xr.open_dataset(
collection,
engine='ee',
crs='EPSG:4326',
crs_transform=(0.25, 0, -180, 0, -0.25, 90),
shape_2d=(1440, 720),
getitem_kwargs={'max_retries': 8, 'initial_delay': 500},
getinfo_kwargs={'max_retries': 8, 'initial_delay': 1000},
)
```

Rule of thumb:

1. If failures happen during dataset open / metadata fetch, tune
`getinfo_kwargs` first.
2. If failures happen during chunk reads / compute, tune `getitem_kwargs` first.
3. Reduce Dask concurrency before increasing retries too aggressively.

## Chunk Size Considerations

EE responses have an upper size limit (tens of MB). Xee's backend picks reasonable pixel window sizes automatically. If you see many small requests, consider choosing a coarser grid or limiting variable selection to needed bands.
Expand Down
38 changes: 35 additions & 3 deletions xee/ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from xarray.backends import store as backends_store
from xarray.core import indexing
from xarray.core import utils
from xee import retries
from xee import types

import ee
Expand Down Expand Up @@ -128,6 +129,11 @@ class EarthEngineStore(common.AbstractDataStore):
'initial_delay': 500,
}

GETINFO_KWARGS: dict[str, int] = {
'max_retries': 6,
'initial_delay': 1000,
}

SCALE_UNITS: dict[str, int] = {
'degree': 1,
'metre': 10_000,
Expand Down Expand Up @@ -166,6 +172,7 @@ def open(
ee_init_if_necessary: bool = False,
executor_kwargs: dict[str, Any] | None = None,
getitem_kwargs: dict[str, int] | None = None,
getinfo_kwargs: dict[str, int] | None = None,
fast_time_slicing: bool = False,
) -> EarthEngineStore:
if mode != 'r':
Expand All @@ -188,6 +195,7 @@ def open(
ee_init_if_necessary=ee_init_if_necessary,
executor_kwargs=executor_kwargs,
getitem_kwargs=getitem_kwargs,
getinfo_kwargs=getinfo_kwargs,
fast_time_slicing=fast_time_slicing,
)

Expand All @@ -207,6 +215,7 @@ def __init__(
ee_init_if_necessary: bool = False,
executor_kwargs: dict[str, Any] | None = None,
getitem_kwargs: dict[str, int] | None = None,
getinfo_kwargs: dict[str, int] | None = None,
fast_time_slicing: bool = False,
):
# Ensure crs_transform is a tuple and create the affine.Affine object.
Expand Down Expand Up @@ -236,6 +245,7 @@ def __init__(
self.executor_kwargs = executor_kwargs

self.getitem_kwargs = {**self.GETITEM_KWARGS, **(getitem_kwargs or {})}
self.getinfo_kwargs = {**self.GETINFO_KWARGS, **(getinfo_kwargs or {})}

self.image_collection = image_collection
if n_images != -1:
Expand Down Expand Up @@ -306,7 +316,11 @@ def get_info(self) -> dict[str, Any]:
)
)

info = ee.List([rpc for _, rpc in rpcs]).getInfo()
info = retries.robust_call(
lambda: ee.List([rpc for _, rpc in rpcs]).getInfo(),
catch=ee.ee_exception.EEException,
**self.getinfo_kwargs,
)

return dict(zip((name for name, _ in rpcs), info))

Expand Down Expand Up @@ -657,9 +671,20 @@ def _ee_bounds_to_bounds(bounds: dict[str, Any]) -> types.Bounds:
return x_min, y_min, x_max, y_max


def geometry_to_bounds(geom: ee.Geometry) -> types.Bounds:
def geometry_to_bounds(
geom: ee.Geometry,
getinfo_kwargs: dict[str, int] | None = None,
) -> types.Bounds:
"""Finds the CRS bounds from a ee.Geometry polygon."""
bounds = geom.bounds().getInfo()
getinfo_kwargs = {
**EarthEngineStore.GETINFO_KWARGS,
**(getinfo_kwargs or {}),
}
bounds = retries.robust_call(
lambda: geom.bounds().getInfo(),
catch=ee.ee_exception.EEException,
**getinfo_kwargs,
)
return _ee_bounds_to_bounds(bounds)


Expand Down Expand Up @@ -920,6 +945,7 @@ def open_dataset(
ee_init_kwargs: dict[str, Any] | None = None,
executor_kwargs: dict[str, Any] | None = None,
getitem_kwargs: dict[str, int] | None = None,
getinfo_kwargs: dict[str, int] | None = None,
fast_time_slicing: bool = False,
) -> xarray.Dataset: # type: ignore
"""Open an Earth Engine ImageCollection as an Xarray Dataset.
Expand Down Expand Up @@ -989,6 +1015,11 @@ def open_dataset(
- 'max_retries', the maximum number of retry attempts. Defaults to 6.
- 'initial_delay', the initial delay in milliseconds before the first
retry. Defaults to 500.
getinfo_kwargs (optional): Exponential backoff kwargs applied to
Earth Engine `getInfo()` calls used by Xee metadata workflows.
- 'max_retries', the maximum number of retry attempts. Defaults to 6.
- 'initial_delay', the initial delay in milliseconds before the first
retry. Defaults to 1000.
fast_time_slicing (optional): Whether to perform an optimization that
makes slicing an ImageCollection across time faster. This optimization
loads EE images in a slice by ID, so any modifications to images in a
Expand Down Expand Up @@ -1023,6 +1054,7 @@ def open_dataset(
ee_init_if_necessary=ee_init_if_necessary,
executor_kwargs=executor_kwargs,
getitem_kwargs=getitem_kwargs,
getinfo_kwargs=getinfo_kwargs,
fast_time_slicing=fast_time_slicing,
)

Expand Down
26 changes: 26 additions & 0 deletions xee/ext_integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,14 @@ def setUp(self):
n_images=64,
getitem_kwargs={'max_retries': 9},
)
self.getinfo_tuned_store = xee.EarthEngineStore(
ee.ImageCollection('LANDSAT/LC08/C02/T1').filterDate(
'2017-01-01', '2017-01-03'
),
n_images=64,
getinfo_kwargs={'max_retries': 9, 'initial_delay': 1200},
**_TEST_GRID_PARAMS,
)
self.all_img_store = xee.EarthEngineStore(
ee.ImageCollection('LANDSAT/LC08/C02/T1').filterDate(
'2017-01-01', '2017-01-03'
Expand Down Expand Up @@ -298,6 +306,15 @@ def test_getitem_kwargs(self):
self.assertEqual(arr2.store.getitem_kwargs['initial_delay'], 500)
self.assertEqual(arr2.store.getitem_kwargs['max_retries'], 9)

def test_getinfo_kwargs(self):
arr = xee.EarthEngineBackendArray('B4', self.getinfo_tuned_store)
self.assertEqual(arr.store.getinfo_kwargs['initial_delay'], 1200)
self.assertEqual(arr.store.getinfo_kwargs['max_retries'], 9)

arr1 = xee.EarthEngineBackendArray('longitude', self.lnglat_store)
self.assertEqual(arr1.store.getinfo_kwargs['initial_delay'], 1000)
self.assertEqual(arr1.store.getinfo_kwargs['max_retries'], 6)


class EEBackendEntrypointTest(absltest.TestCase):

Expand Down Expand Up @@ -632,6 +649,15 @@ def test_extract_grid_params_from_image(self):
self.assertEqual(grid_params['crs'], 'EPSG:32613')
np.allclose(grid_params['crs_transform'], [30, 0, 643185, 0, -30, 4255815])

def test_extract_grid_params_from_image_with_getinfo_kwargs(self):
img = ee.Image('LANDSAT/LT05/C02/T1_TOA/LT05_031034_20110619')
grid_params = helpers.extract_grid_params(
img, getinfo_kwargs={'max_retries': 8, 'initial_delay': 1100}
)
self.assertEqual(grid_params['shape_2d'], (7881, 6981))
self.assertEqual(grid_params['crs'], 'EPSG:32613')
np.allclose(grid_params['crs_transform'], [30, 0, 643185, 0, -30, 4255815])

def test_extract_grid_params_from_image_collection(self):
dem = ee.ImageCollection('COPERNICUS/DEM/GLO30')
grid_params = helpers.extract_grid_params(dem)
Expand Down
Loading
Loading