diff --git a/docs/reference/configurator.config_entsoe.ConfigEntsoe.qmd b/docs/reference/configurator.config_entsoe.ConfigEntsoe.qmd index 8609e324..9247f8bd 100644 --- a/docs/reference/configurator.config_entsoe.ConfigEntsoe.qmd +++ b/docs/reference/configurator.config_entsoe.ConfigEntsoe.qmd @@ -61,6 +61,7 @@ configurator.config_entsoe.ConfigEntsoe( task='lazy', agg_weights=None, forecaster_factory=None, + lgbm_n_jobs=1, data_loader=None, test_data_loader=None, auto_save_models=True, diff --git a/docs/reference/configurator.config_multi.ConfigMulti.qmd b/docs/reference/configurator.config_multi.ConfigMulti.qmd index ba76fab4..93cef29b 100644 --- a/docs/reference/configurator.config_multi.ConfigMulti.qmd +++ b/docs/reference/configurator.config_multi.ConfigMulti.qmd @@ -61,6 +61,7 @@ configurator.config_multi.ConfigMulti( task='lazy', agg_weights=None, forecaster_factory=None, + lgbm_n_jobs=1, data_loader=None, test_data_loader=None, auto_save_models=True, @@ -130,6 +131,7 @@ API queries and holiday feature generation. | poly_features_degree | [int](`int`) | Polynomial-interaction degree. ``1`` (default) generates no interactions; ``2`` adds pairwise bilinear terms; ``3+`` higher order. | `1` | | max_poly_features | [int](`int`) | Cap on polynomial interaction columns; only the top ``max_poly_features`` ranked by mutual information with the target are kept (``<= 0`` disables). Defaults to ``10``. | `10` | | poly_mi_n_jobs | [Optional](`typing.Optional`)\[[int](`int`)\] | Parallel jobs for the mutual-information ranking that enforces ``max_poly_features``. ``-1`` (default) uses all cores; ``None`` runs single-threaded. Parallelism does not change the selection. | `-1` | +| lgbm_n_jobs | [int](`int`) | Thread count for the LightGBM estimators built by the lgbm forecaster factories (``LGBMRegressor(n_jobs=...)``). Defaults to ``1`` so the backtester parallelises CV folds across processes instead of relying on LightGBM's in-model OpenMP, which anti-scales on heterogeneous-core CPUs (e.g. Apple Silicon). Set ``-1`` / a larger value on many-core homogeneous machines (e.g. Linux Xeon). | `1` | | poly_mi_sample_size | [Optional](`typing.Optional`)\[[int](`int`)\] | Row cap for that ranking; longer series are scored on a reproducible random subsample of this size (seeded by ``random_state``), which can change which borderline columns make the top K. ``None`` scores every row (the pre-15.8 behaviour). Defaults to ``4000``. | `4000` | | index_name | [str](`str`) | Name assigned to the datetime column when the index is reset. Defaults to ``"DateTime"``. | `'DateTime'` | | bounds | [Optional](`typing.Optional`)\[[List](`typing.List`)\[[tuple](`tuple`)\]\] | Per-column outlier bounds as a list of ``(lower, upper)`` tuples, one entry per target column. ``None`` until set. | `None` | @@ -184,6 +186,7 @@ API queries and holiday feature generation. | poly_features_degree | [int](`int`) | Polynomial-interaction degree (1 = off). | | max_poly_features | [int](`int`) | Cap on kept ``poly_*`` columns (top-K by MI). | | poly_mi_n_jobs | [Optional](`typing.Optional`)\[[int](`int`)\] | Parallel jobs for the MI ranking (``-1`` = all cores; selection-invariant). | +| lgbm_n_jobs | [int](`int`) | LightGBM estimator thread count for the lgbm forecaster factories (default ``1``; favours per-fold process parallelism over in-model OpenMP, which anti-scales on Apple Silicon). Raise on many-core homogeneous CPUs. | | poly_mi_sample_size | [Optional](`typing.Optional`)\[[int](`int`)\] | Row cap for the MI ranking (``None`` = score every row). | | include_covid_infection_rate | [bool](`bool`) | Append the bundled RKI German national COVID-19 7-day incidence as an exogenous regressor. | | include_entsoe_forecast_load | [bool](`bool`) | Append the ENTSO-E day-ahead Forecasted Load as a near-oracle exogenous prior. | diff --git a/docs/reference/multitask.factories.default_lgbm_forecaster_factory.qmd b/docs/reference/multitask.factories.default_lgbm_forecaster_factory.qmd index 7ee88354..443e2027 100644 --- a/docs/reference/multitask.factories.default_lgbm_forecaster_factory.qmd +++ b/docs/reference/multitask.factories.default_lgbm_forecaster_factory.qmd @@ -18,11 +18,11 @@ a signature change. ## Parameters {.doc-section .doc-section-parameters} -| Name | Type | Description | Default | -|-------------|------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------| -| config | [Any](`typing.Any`) | Any object satisfying the ``PipelineConfig`` protocol from ``spotforecast2_safe.multitask.base``. Reads ``random_state``, ``lags_consider``, and ``window_size``. | _required_ | -| weight_func | [Optional](`typing.Optional`)\[[Any](`typing.Any`)\] | Optional per-sample weight function produced by the imputation step (``apply_imputation``). | `None` | -| target | [Optional](`typing.Optional`)\[[str](`str`)\] | Target column name. Ignored by this default factory; provided for the benefit of custom factories that need it. | `None` | +| Name | Type | Description | Default | +|-------------|------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------| +| config | [Any](`typing.Any`) | Any object satisfying the ``PipelineConfig`` protocol from ``spotforecast2_safe.multitask.base``. Reads ``random_state``, ``lags_consider``, ``window_size`` and ``lgbm_n_jobs`` (the LightGBM thread count, ``getattr`` default ``1`` for config-like objects that predate the field; see ``ConfigMulti.lgbm_n_jobs``). | _required_ | +| weight_func | [Optional](`typing.Optional`)\[[Any](`typing.Any`)\] | Optional per-sample weight function produced by the imputation step (``apply_imputation``). | `None` | +| target | [Optional](`typing.Optional`)\[[str](`str`)\] | Target column name. Ignored by this default factory; provided for the benefit of custom factories that need it. | `None` | ## Returns {.doc-section .doc-section-returns} diff --git a/docs/reference/multitask.factories.quantile_lgbm_forecaster_factory.qmd b/docs/reference/multitask.factories.quantile_lgbm_forecaster_factory.qmd index 7620083d..3768ad58 100644 --- a/docs/reference/multitask.factories.quantile_lgbm_forecaster_factory.qmd +++ b/docs/reference/multitask.factories.quantile_lgbm_forecaster_factory.qmd @@ -20,12 +20,12 @@ sf2-safe (LightGBM only, no torch/optuna). Refs ``hong16b``, ``roma19a``. ## Parameters {.doc-section .doc-section-parameters} -| Name | Type | Description | Default | -|-------------|------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|---------------------| -| config | [Any](`typing.Any`) | Object satisfying the ``PipelineConfig`` protocol; reads ``random_state``, ``lags_consider``, ``window_size``. | _required_ | -| quantiles | [Sequence](`typing.Sequence`)\[[float](`float`)\] | Quantile levels in the open interval ``(0, 1)``. Defaults to ``(0.1, 0.5, 0.9)``. | `DEFAULT_QUANTILES` | -| weight_func | [Optional](`typing.Optional`)\[[Any](`typing.Any`)\] | Optional per-sample weight function. | `None` | -| target | [Optional](`typing.Optional`)\[[str](`str`)\] | Accepted and ignored (parity with the default factory). | `None` | +| Name | Type | Description | Default | +|-------------|------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------| +| config | [Any](`typing.Any`) | Object satisfying the ``PipelineConfig`` protocol; reads ``random_state``, ``lags_consider``, ``window_size`` and ``lgbm_n_jobs`` (LightGBM thread count, ``getattr`` default ``1``). | _required_ | +| quantiles | [Sequence](`typing.Sequence`)\[[float](`float`)\] | Quantile levels in the open interval ``(0, 1)``. Defaults to ``(0.1, 0.5, 0.9)``. | `DEFAULT_QUANTILES` | +| weight_func | [Optional](`typing.Optional`)\[[Any](`typing.Any`)\] | Optional per-sample weight function. | `None` | +| target | [Optional](`typing.Optional`)\[[str](`str`)\] | Accepted and ignored (parity with the default factory). | `None` | ## Returns {.doc-section .doc-section-returns} diff --git a/src/spotforecast2_safe/configurator/config_multi.py b/src/spotforecast2_safe/configurator/config_multi.py index 4489b0a3..bdb4919a 100644 --- a/src/spotforecast2_safe/configurator/config_multi.py +++ b/src/spotforecast2_safe/configurator/config_multi.py @@ -114,6 +114,12 @@ class features (``is_workday``, ``day_type``). Defaults to ``False``. ranking that enforces ``max_poly_features``. ``-1`` (default) uses all cores; ``None`` runs single-threaded. Parallelism does not change the selection. + lgbm_n_jobs (int): Thread count for the LightGBM estimators built by the + lgbm forecaster factories (``LGBMRegressor(n_jobs=...)``). Defaults + to ``1`` so the backtester parallelises CV folds across processes + instead of relying on LightGBM's in-model OpenMP, which anti-scales + on heterogeneous-core CPUs (e.g. Apple Silicon). Set ``-1`` / a + larger value on many-core homogeneous machines (e.g. Linux Xeon). poly_mi_sample_size (Optional[int]): Row cap for that ranking; longer series are scored on a reproducible random subsample of this size (seeded by ``random_state``), which can change which borderline @@ -237,6 +243,10 @@ class features (``is_workday``, ``day_type``). Defaults to ``False``. max_poly_features (int): Cap on kept ``poly_*`` columns (top-K by MI). poly_mi_n_jobs (Optional[int]): Parallel jobs for the MI ranking (``-1`` = all cores; selection-invariant). + lgbm_n_jobs (int): LightGBM estimator thread count for the lgbm + forecaster factories (default ``1``; favours per-fold process + parallelism over in-model OpenMP, which anti-scales on Apple + Silicon). Raise on many-core homogeneous CPUs. poly_mi_sample_size (Optional[int]): Row cap for the MI ranking (``None`` = score every row). include_covid_infection_rate (bool): Append the bundled RKI German @@ -451,6 +461,17 @@ class features (``is_workday``, ``day_type``). Defaults to ``False``. # ``BaseTask.create_forecaster`` falls back to # ``default_lgbm_forecaster_factory``. forecaster_factory: Optional[Any] = None + # Thread count for the LightGBM estimators built by the lgbm forecaster + # factories (``default_lgbm_forecaster_factory`` / + # ``quantile_lgbm_forecaster_factory``), forwarded as ``LGBMRegressor(n_jobs=...)``. + # Default ``1``: on heterogeneous-core CPUs (e.g. Apple Silicon's + # performance + efficiency cores) LightGBM's all-core OpenMP *anti-scales* + # (the fork-join barrier stalls on the slow E-cores). With ``n_jobs=1`` the + # backtesting heuristic (``select_n_jobs_backtesting``) instead parallelises + # the CV folds across processes, which scales cleanly. Raise it (or set + # ``-1`` for all cores) on many-core homogeneous machines (e.g. Linux Xeon) + # where in-model threading scales well. + lgbm_n_jobs: int = 1 # Data-loader hook (consumed by ``BaseTask.prepare_data``): # ``data_loader(config) -> pd.DataFrame``. Invoked iff no DataFrame is # supplied via the constructor or ``prepare_data``. diff --git a/src/spotforecast2_safe/multitask/base.py b/src/spotforecast2_safe/multitask/base.py index 405fd9d1..4906f031 100644 --- a/src/spotforecast2_safe/multitask/base.py +++ b/src/spotforecast2_safe/multitask/base.py @@ -156,6 +156,10 @@ class PipelineConfig(Protocol): random_state: int verbose: bool cache_home: Optional[Any] + # LightGBM estimator thread count for the lgbm forecaster factories + # (LGBMRegressor(n_jobs=...)); default 1 favours per-fold process + # parallelism over in-model OpenMP. See ConfigMulti.lgbm_n_jobs. + lgbm_n_jobs: int # Optional callables forecaster_factory: Optional[Any] data_loader: Optional[Any] diff --git a/src/spotforecast2_safe/multitask/factories.py b/src/spotforecast2_safe/multitask/factories.py index 8d0aaaed..36267bb3 100644 --- a/src/spotforecast2_safe/multitask/factories.py +++ b/src/spotforecast2_safe/multitask/factories.py @@ -47,7 +47,9 @@ def default_lgbm_forecaster_factory( Args: config: Any object satisfying the ``PipelineConfig`` protocol from ``spotforecast2_safe.multitask.base``. Reads ``random_state``, - ``lags_consider``, and ``window_size``. + ``lags_consider``, ``window_size`` and ``lgbm_n_jobs`` (the + LightGBM thread count, ``getattr`` default ``1`` for config-like + objects that predate the field; see ``ConfigMulti.lgbm_n_jobs``). weight_func: Optional per-sample weight function produced by the imputation step (``apply_imputation``). target: Target column name. Ignored by this default factory; provided @@ -79,7 +81,11 @@ def default_lgbm_forecaster_factory( """ del target # default factory does not specialise per target return ForecasterRecursive( - estimator=LGBMRegressor(random_state=config.random_state, verbose=-1), + estimator=LGBMRegressor( + random_state=config.random_state, + verbose=-1, + n_jobs=getattr(config, "lgbm_n_jobs", 1), + ), lags=config.lags_consider[-1], window_features=RollingFeaturesUnified( stats=["mean"], window_sizes=config.window_size @@ -117,7 +123,8 @@ def quantile_lgbm_forecaster_factory( Args: config: Object satisfying the ``PipelineConfig`` protocol; reads - ``random_state``, ``lags_consider``, ``window_size``. + ``random_state``, ``lags_consider``, ``window_size`` and + ``lgbm_n_jobs`` (LightGBM thread count, ``getattr`` default ``1``). quantiles: Quantile levels in the open interval ``(0, 1)``. Defaults to ``(0.1, 0.5, 0.9)``. weight_func: Optional per-sample weight function. @@ -155,6 +162,7 @@ def quantile_lgbm_forecaster_factory( alpha=q, random_state=config.random_state, verbose=-1, + n_jobs=getattr(config, "lgbm_n_jobs", 1), ), lags=config.lags_consider[-1], window_features=RollingFeaturesUnified( diff --git a/src/spotforecast2_safe/weather/client.py b/src/spotforecast2_safe/weather/client.py index 6aeddd9f..8237cc51 100644 --- a/src/spotforecast2_safe/weather/client.py +++ b/src/spotforecast2_safe/weather/client.py @@ -25,20 +25,23 @@ os.environ.get("SPOTFORECAST2_WEATHER_MIN_REQUEST_INTERVAL", "0.5") ) _THROTTLE_LOCK = threading.Lock() -_LAST_REQUEST_MONOTONIC = 0.0 +# One-element list holding the monotonic timestamp of the most recent Open-Meteo +# request. A mutable container is updated in place under the lock, so +# ``_throttle_open_meteo`` needs no module-level ``global`` rebind (which static +# analysis flags as an unused global, py/unused-global-variable). +_LAST_REQUEST_MONOTONIC = [0.0] def _throttle_open_meteo() -> None: """Block until at least ``_MIN_REQUEST_INTERVAL_S`` has passed since the last Open-Meteo request (process-wide). No-op when the interval is non-positive.""" - global _LAST_REQUEST_MONOTONIC if _MIN_REQUEST_INTERVAL_S <= 0: return with _THROTTLE_LOCK: - wait = _MIN_REQUEST_INTERVAL_S - (monotonic() - _LAST_REQUEST_MONOTONIC) + wait = _MIN_REQUEST_INTERVAL_S - (monotonic() - _LAST_REQUEST_MONOTONIC[0]) if wait > 0: sleep(wait) - _LAST_REQUEST_MONOTONIC = monotonic() + _LAST_REQUEST_MONOTONIC[0] = monotonic() class WeatherFetchError(ValueError): diff --git a/tests/multitask/test_multitask_base.py b/tests/multitask/test_multitask_base.py index dfe775e2..83d454c0 100644 --- a/tests/multitask/test_multitask_base.py +++ b/tests/multitask/test_multitask_base.py @@ -492,6 +492,18 @@ def test_weight_func_none_initially(self, tmp_path): fc = task.create_forecaster() assert fc.weight_func is None + def test_lgbm_n_jobs_defaults_to_one(self, tmp_path): + """The default factory pins LightGBM to a single thread by default.""" + task = LazyTask(_minimal_cfg(cache_home=tmp_path)) + fc = task.create_forecaster() + assert fc.estimator.get_params()["n_jobs"] == 1 + + def test_lgbm_n_jobs_honoured_from_config(self, tmp_path): + """config.lgbm_n_jobs flows through to the LightGBM estimator.""" + task = LazyTask(_minimal_cfg(cache_home=tmp_path, lgbm_n_jobs=-1)) + fc = task.create_forecaster() + assert fc.estimator.get_params()["n_jobs"] == -1 + def test_custom_factory_override(self, tmp_path): """config.forecaster_factory replaces the default factory.""" sentinel = object() diff --git a/tests/test_per_zone_weather.py b/tests/test_per_zone_weather.py index a190e1b9..907e08e6 100644 --- a/tests/test_per_zone_weather.py +++ b/tests/test_per_zone_weather.py @@ -624,7 +624,7 @@ def test_throttle_spaces_open_meteo_requests(self, monkeypatch): import spotforecast2_safe.weather.client as wc monkeypatch.setattr(wc, "_MIN_REQUEST_INTERVAL_S", 0.5) - monkeypatch.setattr(wc, "_LAST_REQUEST_MONOTONIC", 0.0) + monkeypatch.setattr(wc, "_LAST_REQUEST_MONOTONIC", [0.0]) clock = [1000.0] sleeps: list[float] = [] monkeypatch.setattr(wc, "monotonic", lambda: clock[0]) diff --git a/tests/test_quantile_factory.py b/tests/test_quantile_factory.py index fcac6029..bd88f2f0 100644 --- a/tests/test_quantile_factory.py +++ b/tests/test_quantile_factory.py @@ -50,6 +50,21 @@ def test_invalid_quantiles_raise(self, bad): with pytest.raises(ValueError): quantile_lgbm_forecaster_factory(_config(), quantiles=bad) + def test_lgbm_n_jobs_defaults_to_one(self): + """Configs without the field (getattr default) pin LightGBM to 1 thread.""" + heads = quantile_lgbm_forecaster_factory(_config(), quantiles=[0.1, 0.9]) + for q in (0.1, 0.9): + assert heads[q].estimator.get_params()["n_jobs"] == 1 + + def test_lgbm_n_jobs_honoured(self): + """config.lgbm_n_jobs flows through to every quantile head.""" + cfg = types.SimpleNamespace( + random_state=0, lags_consider=[1, 24], window_size=24, lgbm_n_jobs=-1 + ) + heads = quantile_lgbm_forecaster_factory(cfg, quantiles=[0.1, 0.9]) + for q in (0.1, 0.9): + assert heads[q].estimator.get_params()["n_jobs"] == -1 + class _StubForecaster: """Minimal fitted-forecaster stand-in returning fixed predictions."""