From f8b7dd29b429dc5136076d70ff1118cf7fb389ee Mon Sep 17 00:00:00 2001
From: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com>
Date: Sat, 13 Jun 2026 08:25:05 +0200
Subject: [PATCH] test(preprocessing,data): fix CodeQL alerts in tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resolve the three open code-scanning alerts on the default branch:

- py/uninitialized-local-variable (#211, #212) in
  test_target_corruption.py: `mask` was assigned inside a `try` whose
  `except` called `pytest.fail()`. CodeQL does not model `pytest.fail`
  as NoReturn, so it saw `mask` as possibly-unbound at the trailing
  `assert not mask.any()`. Drop the try/except wrapper and call
  detect_target_corruption directly — a raise already fails the test
  with a full traceback, which is the "must not raise" guarantee these
  cases assert.
- py/import-and-import-from (#213) in test_entsoe_loader.py: the module
  was imported both as `import ... as entsoe_loader` and via
  `from ... import (...)`. Drop the from-import and reference the
  loaders through the module alias. The alias must stay regardless —
  monkeypatch.setattr(entsoe_loader, "get_data_home", ...) patches the
  name in the loader's own namespace, which is where it is looked up.

Test-only change; no shipped behavior affected.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 tests/preprocessing/test_target_corruption.py | 26 +++++++------------
 tests/test_entsoe_loader.py                   | 18 +++++--------
 2 files changed, 17 insertions(+), 27 deletions(-)

diff --git a/tests/preprocessing/test_target_corruption.py b/tests/preprocessing/test_target_corruption.py
index 911e3bc1..aa0e07ab 100644
--- a/tests/preprocessing/test_target_corruption.py
+++ b/tests/preprocessing/test_target_corruption.py
@@ -650,14 +650,11 @@ def test_fall_back_no_raise(self):
         )
         vals = [BASE_MW] * len(idx)
         df = pd.DataFrame({"load": vals}, index=idx)
-        try:
-            mask = detect_target_corruption(
-                df, targets=["load"], range_mw=5_000, step_mw=8_000, window_days=7
-            )
-        except Exception as exc:
-            pytest.fail(
-                f"detect_target_corruption raised on fall-back DST index: {exc}"
-            )
+        # A raise here fails the test (with a full traceback); that is exactly
+        # the "must not raise" guarantee this case is asserting.
+        mask = detect_target_corruption(
+            df, targets=["load"], range_mw=5_000, step_mw=8_000, window_days=7
+        )
         assert not mask.any(), "Clean DST week must produce no flags."
 
     def test_fall_back_dropout_is_flagged(self):
@@ -694,14 +691,11 @@ def test_spring_forward_no_raise(self):
         )
         vals = [BASE_MW] * len(idx)
         df = pd.DataFrame({"load": vals}, index=idx)
-        try:
-            mask = detect_target_corruption(
-                df, targets=["load"], range_mw=5_000, step_mw=8_000, window_days=7
-            )
-        except Exception as exc:
-            pytest.fail(
-                f"detect_target_corruption raised on spring-forward DST index: {exc}"
-            )
+        # A raise here fails the test (with a full traceback); that is exactly
+        # the "must not raise" guarantee this case is asserting.
+        mask = detect_target_corruption(
+            df, targets=["load"], range_mw=5_000, step_mw=8_000, window_days=7
+        )
         assert not mask.any(), "Clean spring-forward DST week must produce no flags."
 
 
diff --git a/tests/test_entsoe_loader.py b/tests/test_entsoe_loader.py
index 35e1f28e..6b91d8b7 100644
--- a/tests/test_entsoe_loader.py
+++ b/tests/test_entsoe_loader.py
@@ -8,10 +8,6 @@
 
 import spotforecast2_safe.data.entsoe_loader as entsoe_loader
 from spotforecast2_safe.configurator import ConfigEntsoe
-from spotforecast2_safe.data.entsoe_loader import (
-    entsoe_data_loader,
-    entsoe_test_data_loader,
-)
 
 
 def _write_interim_csv(path, start: str, periods: int, tz: str | None = "UTC"):
@@ -29,7 +25,7 @@ def test_absolute_path_loads_full_frame(self, tmp_path):
 
         config = ConfigEntsoe()
         config.data_filename = str(csv_path)
-        df = entsoe_data_loader(config)
+        df = entsoe_loader.entsoe_data_loader(config)
 
         assert df.shape == (48, 1)
         assert df.index.name == "Time (UTC)"
@@ -41,7 +37,7 @@ def test_relative_path_resolves_against_data_home(self, tmp_path, monkeypatch):
 
         config = ConfigEntsoe()
         config.data_filename = "energy_load.csv"
-        df = entsoe_data_loader(config)
+        df = entsoe_loader.entsoe_data_loader(config)
 
         assert df.shape == (24, 1)
 
@@ -50,7 +46,7 @@ def test_missing_file_raises_with_cli_hint(self, tmp_path):
         config.data_filename = str(tmp_path / "does_not_exist.csv")
 
         with pytest.raises(FileNotFoundError, match="spotforecast2-entsoe"):
-            entsoe_data_loader(config)
+            entsoe_loader.entsoe_data_loader(config)
 
 
 class TestEntsoeTestDataLoader:
@@ -66,7 +62,7 @@ def test_slices_predict_size_steps_after_end_train(self, tmp_path):
         _write_interim_csv(csv_path, "2025-12-29 00:00", 120)
         config = self._config(csv_path, "2025-12-31 00:00+00:00")
 
-        test_df = entsoe_test_data_loader(config)
+        test_df = entsoe_loader.entsoe_test_data_loader(config)
 
         assert test_df.shape == (24, 1)
         assert test_df.index[0] == pd.Timestamp("2025-12-31 01:00", tz="UTC")
@@ -77,7 +73,7 @@ def test_naive_end_train_is_localized_to_utc(self, tmp_path):
         _write_interim_csv(csv_path, "2025-12-29 00:00", 120)
         config = self._config(csv_path, "2025-12-31 00:00")  # no tz marker
 
-        test_df = entsoe_test_data_loader(config)
+        test_df = entsoe_loader.entsoe_test_data_loader(config)
 
         assert test_df.shape == (24, 1)
         assert test_df.index[0] == pd.Timestamp("2025-12-31 01:00", tz="UTC")
@@ -87,7 +83,7 @@ def test_naive_csv_index_is_supported(self, tmp_path):
         _write_interim_csv(csv_path, "2025-12-29 00:00", 120, tz=None)
         config = self._config(csv_path, "2025-12-31 00:00+00:00")
 
-        test_df = entsoe_test_data_loader(config)
+        test_df = entsoe_loader.entsoe_test_data_loader(config)
 
         assert test_df.shape == (24, 1)
         assert test_df.index[0] == pd.Timestamp("2025-12-31 01:00")
@@ -98,6 +94,6 @@ def test_window_shorter_when_data_runs_out(self, tmp_path):
         _write_interim_csv(csv_path, "2025-12-29 00:00", 60)  # ends 12-31 11:00
         config = self._config(csv_path, "2025-12-31 00:00+00:00")
 
-        test_df = entsoe_test_data_loader(config)
+        test_df = entsoe_loader.entsoe_test_data_loader(config)
 
         assert len(test_df) == 11  # only the rows that exist after the cutoff