From 1257e7ae1375c84f37ed8d2f670baa3023b10a2b Mon Sep 17 00:00:00 2001
From: Olufemi Taiwo <Olufemitaiwo23@gmail.com>
Date: Sun, 19 Apr 2026 19:48:04 +0100
Subject: [PATCH 01/17] feat(api): enforce API key auth with dev bypass,
 surface is_synthetic flag, add config health validation

- Add cv_dev development key bypass for local testing
- Require X-API-Key on all mutation endpoints (POST predict, orgs, alerts, subscriptions)
- Surface is_synthetic at root of inference response for frontend demo banners
- Expand /api/health to validate config alignment (bands vs in_channels, classes vs num_classes)
---
 src/climatevision/api/auth.py           |  8 +++
 src/climatevision/api/main.py           | 69 ++++++++++++++++++++++---
 src/climatevision/inference/pipeline.py |  3 ++
 tests/test_api.py                       | 42 +++++++++++++++
 tests/test_pipeline.py                  | 45 ++++++++++++++++
 5 files changed, 160 insertions(+), 7 deletions(-)
 create mode 100644 tests/test_api.py
 create mode 100644 tests/test_pipeline.py

diff --git a/src/climatevision/api/auth.py b/src/climatevision/api/auth.py
index d6a6b6b..85a8ad7 100644
--- a/src/climatevision/api/auth.py
+++ b/src/climatevision/api/auth.py
@@ -77,6 +77,14 @@ def validate_key(self, api_key: str) -> Optional[dict]:
         if not api_key or not api_key.startswith("cv_"):
             return None
 
+        # Development bypass — allow cv_dev for local testing
+        if api_key == "cv_dev":
+            return {
+                "id": 0,
+                "name": "Development",
+                "demo": True,
+            }
+
         # Check cache first
         key_hash = self.hash_key(api_key)
         if key_hash in self._key_cache:
diff --git a/src/climatevision/api/main.py b/src/climatevision/api/main.py
index ac40911..729b213 100644
--- a/src/climatevision/api/main.py
+++ b/src/climatevision/api/main.py
@@ -43,6 +43,7 @@
     mark_alert_delivered,
 )
 from climatevision.inference import run_inference_from_file, run_inference_from_gee
+from climatevision.api.auth import require_api_key
 
 logger = logging.getLogger(__name__)
 
@@ -385,11 +386,49 @@ def root() -> RedirectResponse:
 
     @app.get("/api/health")
     def health() -> dict[str, Any]:
-        """Health check endpoint with API information."""
+        """Health check endpoint with API information and config validation."""
+        from climatevision.data.band_mapping import get_model_config
+
+        enabled_types = [t for t in SUPPORTED_ANALYSIS_TYPES if t["enabled"]]
+        config_issues: list[dict[str, Any]] = []
+
+        for atype in enabled_types:
+            name = atype["name"]
+            try:
+                cfg = get_model_config(name)
+                expected_channels = len(atype["bands"])
+                expected_classes = len(atype["classes"])
+                if cfg.get("in_channels") != expected_channels:
+                    config_issues.append(
+                        {
+                            "analysis_type": name,
+                            "issue": "in_channels mismatch",
+                            "expected": expected_channels,
+                            "got": cfg.get("in_channels"),
+                        }
+                    )
+                if cfg.get("num_classes") != expected_classes:
+                    config_issues.append(
+                        {
+                            "analysis_type": name,
+                            "issue": "num_classes mismatch",
+                            "expected": expected_classes,
+                            "got": cfg.get("num_classes"),
+                        }
+                    )
+            except Exception as exc:
+                config_issues.append(
+                    {"analysis_type": name, "issue": "config missing", "error": str(exc)}
+                )
+
+        health_status = "ok" if not config_issues else "degraded"
+
         return {
-            "status": "ok",
+            "status": health_status,
             "version": "0.2.0",
-            "analysis_types": [t["name"] for t in SUPPORTED_ANALYSIS_TYPES if t["enabled"]],
+            "analysis_types": [t["name"] for t in enabled_types],
+            "config_valid": len(config_issues) == 0,
+            "config_issues": config_issues,
         }
 
     @app.get("/api/analysis-types")
@@ -519,7 +558,10 @@ def get_run(run_id: int) -> dict[str, Any]:
     # ===== Prediction Endpoints =====
 
     @app.post("/api/predict")
-    async def predict_json(body: PredictRequest) -> dict[str, Any]:
+    async def predict_json(
+        body: PredictRequest,
+        org: dict[str, Any] = Depends(require_api_key),
+    ) -> dict[str, Any]:
         """Run prediction using bounding box and date range."""
         if body.start_date and body.end_date and body.start_date > body.end_date:
             raise HTTPException(status_code=400, detail="start_date must be before end_date")
@@ -587,6 +629,7 @@ async def predict_json(body: PredictRequest) -> dict[str, Any]:
     @app.post("/api/predict/upload")
     async def predict_upload(
         kind: str = Form(default="upload"),
+        org: dict[str, Any] = Depends(require_api_key),
         analysis_type: str = Form(default="deforestation"),
         bbox: str | None = Form(default=None),
         start_date: str | None = Form(default=None),
@@ -670,7 +713,10 @@ async def predict_upload(
     # ===== Organization (NGO) Endpoints =====
 
     @app.post("/api/organizations", response_model=OrganizationWithKeyResponse)
-    def create_org(body: CreateOrganizationRequest) -> dict[str, Any]:
+    def create_org(
+        body: CreateOrganizationRequest,
+        org: dict[str, Any] = Depends(require_api_key),
+    ) -> dict[str, Any]:
         """Register a new organization. Returns API key (save it securely)."""
         result = create_organization(
             name=body.name,
@@ -739,6 +785,7 @@ def get_org(org_id: int) -> OrganizationResponse:
     def create_org_subscription(
         org_id: int,
         body: CreateSubscriptionRequest,
+        org: dict[str, Any] = Depends(require_api_key),
     ) -> SubscriptionResponse:
         """Create a new region subscription for an organization."""
         org = get_organization(org_id)
@@ -831,7 +878,11 @@ def list_org_alerts(
         ]
 
     @app.post("/api/organizations/{org_id}/alerts")
-    def create_org_alert(org_id: int, body: CreateAlertRequest) -> AlertResponse:
+    def create_org_alert(
+        org_id: int,
+        body: CreateAlertRequest,
+        org: dict[str, Any] = Depends(require_api_key),
+    ) -> AlertResponse:
         """Create a new alert for an organization."""
         org = get_organization(org_id)
         if not org:
@@ -864,6 +915,7 @@ def create_org_alert(org_id: int, body: CreateAlertRequest) -> AlertResponse:
     def acknowledge_org_alert(
         alert_id: int,
         acknowledged_by: Optional[str] = None,
+        org: dict[str, Any] = Depends(require_api_key),
     ) -> dict[str, Any]:
         """Acknowledge an alert."""
         success = acknowledge_alert(alert_id, acknowledged_by)
@@ -872,7 +924,10 @@ def acknowledge_org_alert(
         return {"success": True, "alert_id": alert_id}
 
     @app.post("/api/alerts/{alert_id}/deliver")
-    def mark_alert_as_delivered(alert_id: int) -> dict[str, Any]:
+    def mark_alert_as_delivered(
+        alert_id: int,
+        org: dict[str, Any] = Depends(require_api_key),
+    ) -> dict[str, Any]:
         """Mark an alert as delivered."""
         success = mark_alert_delivered(alert_id)
         if not success:
diff --git a/src/climatevision/inference/pipeline.py b/src/climatevision/inference/pipeline.py
index 9bbe25f..d5b6c5d 100644
--- a/src/climatevision/inference/pipeline.py
+++ b/src/climatevision/inference/pipeline.py
@@ -277,6 +277,7 @@ def run_inference(
         "region": region,
         "ndvi_stats": ndvi_stats,
         "inference": inference,
+        "is_synthetic": False,
     }
 
 
@@ -391,6 +392,7 @@ def run_inference_from_gee(
             analysis_type=analysis_type,
         )
         result["metadata"] = metadata
+        result["is_synthetic"] = metadata.get("is_synthetic", False)
 
         # Override NDVI with GEE-derived stats if we got them; else keep computed
         if ndvi_stats is not None:
@@ -423,6 +425,7 @@ def run_inference_from_gee(
     if gee_count:
         region["images_available"] = gee_count
     result["region"] = region
+    result["is_synthetic"] = True
     result["metadata"] = {"is_synthetic": True, "fallback_reason": "gee_tile_download_failed"}
 
     return result
diff --git a/tests/test_api.py b/tests/test_api.py
new file mode 100644
index 0000000..1593b40
--- /dev/null
+++ b/tests/test_api.py
@@ -0,0 +1,42 @@
+"""Tests for ClimateVision API endpoints."""
+
+import pytest
+from fastapi.testclient import TestClient
+
+
+def test_health_endpoint(client: TestClient) -> None:
+    """Health check should return 200 without auth."""
+    response = client.get("/api/health")
+    assert response.status_code == 200
+    data = response.json()
+    assert data["status"] in ("ok", "degraded")
+
+
+def test_predict_json_rejects_missing_auth(client: TestClient) -> None:
+    """POST /api/predict should reject requests without API key."""
+    payload = {
+        "bbox": [-60.0, -15.0, -45.0, -5.0],
+        "start_date": "2023-01-01",
+        "end_date": "2023-12-31",
+        "analysis_type": "deforestation",
+    }
+    response = client.post("/api/predict", json=payload)
+    assert response.status_code == 401
+    assert "API key required" in response.json()["detail"]
+
+
+def test_predict_json_accepts_dev_key(client: TestClient) -> None:
+    """POST /api/predict should accept the cv_dev development key."""
+    payload = {
+        "bbox": [-60.0, -15.0, -45.0, -5.0],
+        "start_date": "2023-01-01",
+        "end_date": "2023-12-31",
+        "analysis_type": "deforestation",
+    }
+    response = client.post(
+        "/api/predict",
+        json=payload,
+        headers={"X-API-Key": "cv_dev"},
+    )
+    # Should pass auth; inference may fail due to missing models/GEE
+    assert response.status_code in (200, 500)
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
new file mode 100644
index 0000000..103b37d
--- /dev/null
+++ b/tests/test_pipeline.py
@@ -0,0 +1,45 @@
+"""Tests for inference pipeline."""
+
+import pytest
+
+from climatevision.inference.pipeline import _load_model, _get_device
+from climatevision.data.band_mapping import get_model_config
+
+
+def test_get_model_config_returns_correct_channels() -> None:
+    """Config should return correct in_channels for each analysis type."""
+    deforestation = get_model_config("deforestation")
+    assert deforestation["in_channels"] == 4
+    assert deforestation["num_classes"] == 2
+
+    ice = get_model_config("ice_melting")
+    assert ice["in_channels"] == 4
+    assert ice["num_classes"] == 3
+
+    flood = get_model_config("flooding")
+    assert flood["in_channels"] == 3
+    assert flood["num_classes"] == 3
+
+
+@pytest.mark.parametrize(
+    "analysis_type",
+    ["deforestation", "ice_melting", "flooding"],
+)
+def test_load_model_selects_correct_architecture(analysis_type: str) -> None:
+    """_load_model should create a model with config-matched channels/classes."""
+    import climatevision.inference.pipeline as pipeline_module
+
+    # Clear cache so each parametrize run starts fresh
+    pipeline_module._model_cache.clear()
+
+    cfg = get_model_config(analysis_type)
+    try:
+        model, device = _load_model(analysis_type)
+    except RuntimeError:
+        # Checkpoint shape mismatch is expected when only a generic
+        # 2-class checkpoint exists. We still verify the model
+        # architecture was created correctly before the load failed.
+        model = pipeline_module.UNet(n_channels=cfg["in_channels"], n_classes=cfg["num_classes"])
+
+    assert model.n_channels == cfg["in_channels"]
+    assert model.n_classes == cfg["num_classes"]

From 256fbf63d21ee9273d2756b08c475905db454f72 Mon Sep 17 00:00:00 2001
From: Victor Mbachu <victor@placeholder.com>
Date: Sun, 19 Apr 2026 19:48:11 +0100
Subject: [PATCH 02/17] ci: add pytest scaffolding and GitHub Actions workflow

- Add FastAPI test client fixture
- Create CI workflow for Python (flake8, pytest) and frontend (npm build)
- Bootstrap tests/ directory structure
---
 .github/workflows/ci.yml | 53 ++++++++++++++++++++++++++++++++++++++++
 tests/__init__.py        |  1 +
 tests/conftest.py        | 13 ++++++++++
 3 files changed, 67 insertions(+)
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 tests/__init__.py
 create mode 100644 tests/conftest.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..7defd9b
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,53 @@
+name: CI
+
+on:
+  push:
+    branches: [main, develop]
+  pull_request:
+    branches: [main, develop]
+
+jobs:
+  python:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Lint with flake8
+        run: |
+          flake8 src/ --count --select=E9,F63,F7,F82 --show-source --statistics
+          flake8 src/ --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+
+      - name: Test with pytest
+        run: |
+          pytest tests/ -v --tb=short
+
+  frontend:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: frontend
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Node
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+          cache: "npm"
+          cache-dependency-path: frontend/package-lock.json
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Type check and build
+        run: npm run build
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..773e0d8
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+# ClimateVision test suite
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..8ebffc5
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,13 @@
+"""Pytest fixtures for ClimateVision."""
+
+import pytest
+from fastapi.testclient import TestClient
+
+from climatevision.api.main import create_app
+
+
+@pytest.fixture
+def client() -> TestClient:
+    """FastAPI test client."""
+    app = create_app()
+    return TestClient(app)

From 139ed61843504ad5490e97f54d1d3137f4307865 Mon Sep 17 00:00:00 2001
From: Godswill Okoroafor Chukwu <godswillchukwu21@gmail.com>
Date: Sun, 19 Apr 2026 19:48:19 +0100
Subject: [PATCH 03/17] test(models): add UNet and Siamese architecture tests

- Parametrize UNet init for all 3 analysis types (4ch/2cl, 4ch/3cl, 3ch/3cl)
- Validate forward pass output shapes
- Add Siamese change detection forward shape test
---
 tests/test_models.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 tests/test_models.py

diff --git a/tests/test_models.py b/tests/test_models.py
new file mode 100644
index 0000000..8e6ada6
--- /dev/null
+++ b/tests/test_models.py
@@ -0,0 +1,39 @@
+"""Tests for ClimateVision ML models."""
+
+import pytest
+import torch
+
+from climatevision.models.unet import UNet
+from climatevision.models.siamese import SiameseNetwork
+
+
+@pytest.mark.parametrize(
+    "n_channels,n_classes",
+    [
+        (4, 2),  # deforestation
+        (4, 3),  # ice_melting
+        (3, 3),  # flooding
+    ],
+)
+def test_unet_init(n_channels: int, n_classes: int) -> None:
+    """U-Net should initialize with variable input/output shapes."""
+    model = UNet(n_channels=n_channels, n_classes=n_classes)
+    assert model.n_channels == n_channels
+    assert model.n_classes == n_classes
+
+
+def test_unet_forward_shape() -> None:
+    """U-Net forward should preserve spatial dimensions."""
+    model = UNet(n_channels=4, n_classes=2)
+    x = torch.randn(1, 4, 256, 256)
+    logits = model(x)
+    assert logits.shape == (1, 2, 256, 256)
+
+
+def test_siamese_forward_shape() -> None:
+    """Siamese network should output a change map."""
+    model = SiameseNetwork(in_channels=4)
+    before = torch.randn(1, 4, 256, 256)
+    after = torch.randn(1, 4, 256, 256)
+    logits = model(before, after)
+    assert logits.shape == (1, 2, 256, 256)

From 0da6c7919e01e596bb76eaca04b9ec2caa37141e Mon Sep 17 00:00:00 2001
From: Gold Okpa <okpagold@gmail.com>
Date: Sun, 19 Apr 2026 19:55:30 +0100
Subject: [PATCH 04/17] docs: add first-time and intermediate contributor issue
 guides

- Link to 6 active good-first-issue and help-wanted issues
- Add claim workflow for new contributors
- Include time estimates and skill-building map
---
 CONTRIBUTING.md | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index bcba074..d29cd37 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -31,7 +31,33 @@ We are committed to providing a welcoming and inclusive environment. Please be r
 
 #### First Time Contributors
 
-Look for issues labeled `good first issue` - these are specifically chosen for newcomers.
+Look for issues labeled `good first issue` — these are specifically chosen for newcomers.
+
+**Recommended first issues (ready to pick up):**
+
+| Issue | What You'll Learn | Time Estimate |
+|-------|-----------------|---------------|
+| [#9: Add frontend unit tests](https://github.com/Climate-Vision/ClimateVision/issues/9) | Vitest, React Testing Library, Vite | 2–4 hours |
+| [#13: Add Docker Compose](https://github.com/Climate-Vision/ClimateVision/issues/13) | Docker, multi-service orchestration | 3–6 hours |
+
+**How to claim an issue:**
+1. Read the issue description and acceptance criteria
+2. Comment "I'd like to work on this" — a maintainer will assign you
+3. Fork the repo and create a branch: `git checkout -b feature/issue-9-frontend-tests`
+4. Open a **draft PR** within 48 hours (even if incomplete) so we can give early feedback
+
+**Need help?** Tag `@Climate-Vision/maintainers` in the issue or open a [Discussion](https://github.com/Climate-Vision/ClimateVision/discussions).
+
+#### Intermediate Contributors
+
+Ready for something meatier? These issues close critical gaps in our production pipeline:
+
+| Issue | Area | Skills You'll Build |
+|-------|------|-------------------|
+| [#10: Alert delivery worker](https://github.com/Climate-Vision/ClimateVision/issues/10) | Backend | FastAPI BackgroundTasks, SMTP, webhooks |
+| [#11: WebSocket real-time updates](https://github.com/Climate-Vision/ClimateVision/issues/11) | Full-stack | FastAPI WebSockets, React hooks, graceful degradation |
+| [#12: ONNX Runtime inference](https://github.com/Climate-Vision/ClimateVision/issues/12) | MLOps | ONNX Runtime, PyTorch export, latency benchmarking |
+| [#14: Carbon analytics API](https://github.com/Climate-Vision/ClimateVision/issues/14) | Analytics | Feature flags, API schema design, geospatial math |
 
 #### Development Process
 

From ff21090399c5abcda85a10cf0cc9a38732195a53 Mon Sep 17 00:00:00 2001
From: Victor Mbachu <victor@placeholder.com>
Date: Sun, 19 Apr 2026 20:01:24 +0100
Subject: [PATCH 05/17] fix(frontend): correct case-sensitive import paths for
 Map components

- ../components/map/ -> ../components/Map/
- Fixes vite build failure on Linux (case-sensitive filesystem)
---
 frontend/src/pages/NewAnalysis.tsx | 2 +-
 frontend/src/pages/Upload.tsx      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/frontend/src/pages/NewAnalysis.tsx b/frontend/src/pages/NewAnalysis.tsx
index e992b81..a670bc8 100644
--- a/frontend/src/pages/NewAnalysis.tsx
+++ b/frontend/src/pages/NewAnalysis.tsx
@@ -3,7 +3,7 @@ import { useNavigate } from 'react-router-dom'
 import { Loader2 } from 'lucide-react'
 import type { AnalysisType } from '../api'
 import { predictJson } from '../api'
-import { MapBBoxPicker } from '../components/map/MapBBoxPicker'
+import { MapBBoxPicker } from '../components/Map/MapBBoxPicker'
 import { AnalysisTypeSelector } from '../components/ui/AnalysisTypeSelector'
 import { ResultsPanel } from '../components/results/ResultsPanel'
 import { ErrorBoundary } from '../components/ui/ErrorBoundary'
diff --git a/frontend/src/pages/Upload.tsx b/frontend/src/pages/Upload.tsx
index a241a64..5107689 100644
--- a/frontend/src/pages/Upload.tsx
+++ b/frontend/src/pages/Upload.tsx
@@ -4,7 +4,7 @@ import { CloudUpload, FileText, X, ChevronDown, ChevronUp, Loader2 } from 'lucid
 import type { AnalysisType } from '../api'
 import { predictUpload } from '../api'
 import { AnalysisTypeSelector } from '../components/ui/AnalysisTypeSelector'
-import { MapBBoxPicker } from '../components/map/MapBBoxPicker'
+import { MapBBoxPicker } from '../components/Map/MapBBoxPicker'
 import { ErrorBoundary } from '../components/ui/ErrorBoundary'
 import { useToast } from '../contexts/ToastContext'
 import { useApp } from '../contexts/AppContext'

From cf9610090fb832edb6fa8600413d6794f02ffd70 Mon Sep 17 00:00:00 2001
From: Olufemi Taiwo <Olufemitaiwo23@gmail.com>
Date: Sun, 19 Apr 2026 20:03:58 +0100
Subject: [PATCH 06/17] fix(pipeline): remove unnecessary global declaration
 causing flake8 F824

---
 src/climatevision/inference/pipeline.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/climatevision/inference/pipeline.py b/src/climatevision/inference/pipeline.py
index d5b6c5d..7af17ab 100644
--- a/src/climatevision/inference/pipeline.py
+++ b/src/climatevision/inference/pipeline.py
@@ -66,8 +66,6 @@ def _find_best_checkpoint(analysis_type: str) -> Optional[Path]:
 
 def _load_model(analysis_type: str = "deforestation") -> tuple[UNet, torch.device]:
     """Load (or return cached) U-Net model configured for the analysis type."""
-    global _model_cache
-
     if analysis_type in _model_cache:
         return _model_cache[analysis_type]
 

From c3d02c18b6e6997749ba6872ae689797ede43256 Mon Sep 17 00:00:00 2001
From: Victor Mbachu <victor@placeholder.com>
Date: Sun, 19 Apr 2026 20:05:55 +0100
Subject: [PATCH 07/17] ci: install system deps before pip install (GDAL,
 OpenGL)

- Fixes pip install failure for gdal and rasterio on Ubuntu runners
- Adds libgdal-dev, gdal-bin, libgl1-mesa-glx
---
 .github/workflows/ci.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7defd9b..0f531b8 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,6 +17,11 @@ jobs:
         with:
           python-version: "3.11"
 
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y libgdal-dev gdal-bin libgl1-mesa-glx
+
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip

From f7a75641d237aacacf9096868b058fd6e04ba4e3 Mon Sep 17 00:00:00 2001
From: Victor Mbachu <victor@placeholder.com>
Date: Sun, 19 Apr 2026 20:10:58 +0100
Subject: [PATCH 08/17] ci: remove redundant gdal pip package and simplify
 system deps

- gdal Python package requires exact system GDAL version matching
- rasterio covers all GDAL functionality we actually use
- Simplify CI system deps to libgl1 only (for opencv runtime)
---
 .github/workflows/ci.yml | 2 +-
 requirements.txt         | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0f531b8..b8498ad 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -20,7 +20,7 @@ jobs:
       - name: Install system dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install -y libgdal-dev gdal-bin libgl1-mesa-glx
+          sudo apt-get install -y libgl1
 
       - name: Install dependencies
         run: |
diff --git a/requirements.txt b/requirements.txt
index 507a13a..14444c3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,7 +7,6 @@ scikit-learn>=1.0.0
 
 # Geospatial Data Processing
 rasterio>=1.3.0
-gdal>=3.4.0
 geopandas>=0.12.0
 shapely>=2.0.0
 pyproj>=3.4.0

From 7c317df2e06adc0935b554a33987e52130397f6a Mon Sep 17 00:00:00 2001
From: Victor Mbachu <victor@placeholder.com>
Date: Sun, 19 Apr 2026 20:34:23 +0100
Subject: [PATCH 09/17] ci: install package in editable mode for pytest

- Fixes ModuleNotFoundError: No module named 'climatevision'
- pip install -e . registers src/ as an importable package
---
 .github/workflows/ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b8498ad..047198f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -26,6 +26,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install -r requirements.txt
+          pip install -e .
 
       - name: Lint with flake8
         run: |

From b8e34ead4eb3e85a528a0d1e60b4a9607512e73d Mon Sep 17 00:00:00 2001
From: Victor Mbachu <victor@placeholder.com>
Date: Sun, 19 Apr 2026 20:37:47 +0100
Subject: [PATCH 10/17] feat(data): add dataset, augmentation, and synthetic
 data modules

- ForestDataset with DataLoader support
- Training/validation augmentation pipelines
- Synthetic tile generation for demo/fallback mode
---
 src/climatevision/data/augmentation.py |  93 +++++++++
 src/climatevision/data/dataset.py      | 274 +++++++++++++++++++++++++
 src/climatevision/data/synthetic.py    | 268 ++++++++++++++++++++++++
 3 files changed, 635 insertions(+)
 create mode 100644 src/climatevision/data/augmentation.py
 create mode 100644 src/climatevision/data/dataset.py
 create mode 100644 src/climatevision/data/synthetic.py

diff --git a/src/climatevision/data/augmentation.py b/src/climatevision/data/augmentation.py
new file mode 100644
index 0000000..d0578c9
--- /dev/null
+++ b/src/climatevision/data/augmentation.py
@@ -0,0 +1,93 @@
+"""
+Data augmentation pipeline for Sentinel-2 satellite imagery.
+
+Compatible with albumentations >= 2.0 (always_apply removed, use p=1.0).
+"""
+from __future__ import annotations
+
+import albumentations as A
+import numpy as np
+
+
+def get_train_transforms(image_size: int = 256) -> A.Compose:
+    return A.Compose(
+        [
+            # --- Geometry ---
+            A.RandomCrop(height=image_size, width=image_size, p=1.0),
+            A.HorizontalFlip(p=0.5),
+            A.VerticalFlip(p=0.5),
+            A.RandomRotate90(p=0.5),
+            A.Transpose(p=0.3),
+
+            # Elastic / grid distortion simulates terrain warp
+            A.OneOf(
+                [
+                    A.ElasticTransform(alpha=120, sigma=6, p=1.0),
+                    A.GridDistortion(num_steps=5, distort_limit=0.3, p=1.0),
+                    A.OpticalDistortion(distort_limit=0.2, p=1.0),
+                ],
+                p=0.3,
+            ),
+
+            # Coarse dropout simulates cloud / cloud-shadow occlusion
+            A.CoarseDropout(
+                num_holes_range=(1, 8),
+                hole_height_range=(8, 32),
+                hole_width_range=(8, 32),
+                fill_value=0,
+                p=0.3,
+            ),
+
+            # --- Radiometric / spectral ---
+            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
+            A.GaussNoise(std_range=(0.01, 0.05), p=0.4),
+            A.OneOf(
+                [
+                    A.GaussianBlur(blur_limit=(3, 5), p=1.0),
+                    A.MedianBlur(blur_limit=3, p=1.0),
+                ],
+                p=0.2,
+            ),
+            A.RandomGamma(gamma_limit=(80, 120), p=0.3),
+        ],
+        additional_targets={"mask": "mask"},
+    )
+
+
+def get_val_transforms(image_size: int = 256) -> A.Compose:
+    return A.Compose(
+        [
+            A.CenterCrop(height=image_size, width=image_size, p=1.0),
+        ],
+        additional_targets={"mask": "mask"},
+    )
+
+
+# TTA transforms — constructed lazily to avoid module-level side effects
+def _build_tta_transforms() -> list:
+    return [
+        A.Compose([]),
+        A.Compose([A.HorizontalFlip(p=1.0)]),
+        A.Compose([A.VerticalFlip(p=1.0)]),
+        A.Compose([A.HorizontalFlip(p=1.0), A.VerticalFlip(p=1.0)]),
+        A.Compose([A.RandomRotate90(p=1.0)]),
+    ]
+
+
+TTA_TRANSFORMS = None  # Loaded on first use via get_tta_transforms()
+
+
+def get_tta_transforms() -> list:
+    global TTA_TRANSFORMS
+    if TTA_TRANSFORMS is None:
+        TTA_TRANSFORMS = _build_tta_transforms()
+    return TTA_TRANSFORMS
+
+
+TTA_INVERSE = [
+    lambda x: x,
+    lambda x: np.flip(x, axis=-1).copy(),
+    lambda x: np.flip(x, axis=-2).copy(),
+    lambda x: np.flip(np.flip(x, axis=-1), axis=-2).copy(),
+    lambda x: np.rot90(x, k=-1, axes=(-2, -1)).copy(),
+]
diff --git a/src/climatevision/data/dataset.py b/src/climatevision/data/dataset.py
new file mode 100644
index 0000000..99ff568
--- /dev/null
+++ b/src/climatevision/data/dataset.py
@@ -0,0 +1,274 @@
+"""
+PyTorch Dataset for forest segmentation from Sentinel-2 GeoTIFF imagery.
+
+Expected directory layout (configurable):
+  <root>/
+    train/
+      images/   *.tif   — 4-band (R, G, B, NIR) float32 / uint16
+      masks/    *.tif   — uint8 binary (0=non-forest, 1=forest)
+    val/
+      images/
+      masks/
+    test/
+      images/
+      masks/
+
+Naming convention: image and mask files share the same stem, e.g.
+  images/patch_00042.tif  ↔  masks/patch_00042.tif
+"""
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Callable, Optional
+
+import numpy as np
+import torch
+from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Low-level image I/O (rasterio with Pillow fallback)
+# ---------------------------------------------------------------------------
+
+def _load_tif(path: Path) -> np.ndarray:
+    """Return (C, H, W) float32 array."""
+    try:
+        import rasterio
+        with rasterio.open(path) as src:
+            return src.read().astype(np.float32)
+    except Exception:
+        from PIL import Image
+        arr = np.array(Image.open(path)).astype(np.float32)
+        if arr.ndim == 2:
+            arr = arr[np.newaxis]          # (1, H, W)
+        else:
+            arr = np.transpose(arr, (2, 0, 1))  # (C, H, W)
+        return arr
+
+
+def _load_mask(path: Path) -> np.ndarray:
+    """Return (H, W) uint8 array with values {0, 1}."""
+    try:
+        import rasterio
+        with rasterio.open(path) as src:
+            mask = src.read(1)
+    except Exception:
+        from PIL import Image
+        mask = np.array(Image.open(path).convert("L"))
+    return (mask > 0).astype(np.uint8)
+
+
+# ---------------------------------------------------------------------------
+# ForestDataset
+# ---------------------------------------------------------------------------
+
+class ForestDataset(Dataset):
+    """
+    Sentinel-2 forest/non-forest segmentation dataset.
+
+    Args:
+        root:        Path containing `images/` and `masks/` sub-directories.
+        transform:   albumentations Compose transform (applied to image+mask).
+        normalizer:  Sentinel2Normalizer instance (applied after transform).
+        image_size:  Spatial size. Images are padded/cropped if needed.
+    """
+
+    def __init__(
+        self,
+        root: str | Path,
+        transform: Optional[Callable] = None,
+        normalizer: Optional[Callable] = None,
+        image_size: int = 256,
+    ):
+        self.root = Path(root)
+        self.transform = transform
+        self.normalizer = normalizer
+        self.image_size = image_size
+
+        image_dir = self.root / "images"
+        mask_dir  = self.root / "masks"
+
+        stems = sorted(p.stem for p in image_dir.glob("*.tif"))
+        self.samples: list[tuple[Path, Path]] = []
+        for stem in stems:
+            img_path  = image_dir / f"{stem}.tif"
+            mask_path = mask_dir  / f"{stem}.tif"
+            if mask_path.exists():
+                self.samples.append((img_path, mask_path))
+            else:
+                logger.warning("No mask for %s — skipped.", stem)
+
+        if not self.samples:
+            raise FileNotFoundError(
+                f"No image/mask pairs found in {self.root}. "
+                "Run `python scripts/prepare_data.py` first."
+            )
+        logger.info("ForestDataset: %d samples from %s", len(self.samples), self.root)
+
+    # ------------------------------------------------------------------
+    def __len__(self) -> int:
+        return len(self.samples)
+
+    # ------------------------------------------------------------------
+    def __getitem__(self, idx: int) -> tuple[torch.Tensor, torch.Tensor]:
+        img_path, mask_path = self.samples[idx]
+
+        image = _load_tif(img_path)   # (C, H, W) float32
+        mask  = _load_mask(mask_path) # (H, W)    uint8
+
+        # Ensure 4 bands (pad with zeros if fewer)
+        c, h, w = image.shape
+        if c < 4:
+            pad = np.zeros((4 - c, h, w), dtype=np.float32)
+            image = np.concatenate([image, pad], axis=0)
+        elif c > 4:
+            image = image[:4]
+
+        # Ensure spatial size — pad if smaller, random crop via transform
+        if h < self.image_size or w < self.image_size:
+            image, mask = self._pad(image, mask)
+
+        # albumentations expects (H, W, C)
+        image_hwc = np.transpose(image, (1, 2, 0))
+        if self.transform is not None:
+            result    = self.transform(image=image_hwc, mask=mask)
+            image_hwc = result["image"]
+            mask      = result["mask"]
+        image = np.transpose(image_hwc, (2, 0, 1))  # back to (C, H, W)
+
+        # Normalize to float32 zero-mean / unit-variance
+        if self.normalizer is not None:
+            image = self.normalizer(image)
+        else:
+            # Minimal default: divide by 10000 (Sentinel-2 L2A scale)
+            image = image / 10000.0
+
+        return (
+            torch.tensor(image.copy(), dtype=torch.float32),
+            torch.tensor(mask.astype(np.int64).copy(), dtype=torch.int64),
+        )
+
+    # ------------------------------------------------------------------
+    def _pad(
+        self, image: np.ndarray, mask: np.ndarray
+    ) -> tuple[np.ndarray, np.ndarray]:
+        c, h, w = image.shape
+        ph = max(0, self.image_size - h)
+        pw = max(0, self.image_size - w)
+        image = np.pad(image, ((0, 0), (0, ph), (0, pw)), mode="reflect")
+        mask  = np.pad(mask,  ((0, ph), (0, pw)),          mode="reflect")
+        return image, mask
+
+    # ------------------------------------------------------------------
+    def compute_class_weights(self) -> torch.Tensor:
+        """
+        Return [w_non_forest, w_forest] inverse-frequency weights.
+        Processes a random subset of 200 samples for speed.
+        """
+        rng   = np.random.default_rng(42)
+        idxs  = rng.choice(len(self.samples), min(200, len(self.samples)), replace=False)
+        counts = np.zeros(2, dtype=np.float64)
+        for i in idxs:
+            _, mask_path = self.samples[i]
+            mask = _load_mask(mask_path).flatten()
+            counts[0] += (mask == 0).sum()
+            counts[1] += (mask == 1).sum()
+        total = counts.sum()
+        weights = total / (2.0 * counts + 1e-6)
+        logger.info(
+            "Class weights → non-forest: %.3f  forest: %.3f", weights[0], weights[1]
+        )
+        return torch.tensor(weights, dtype=torch.float32)
+
+    # ------------------------------------------------------------------
+    def make_sampler(self) -> WeightedRandomSampler:
+        """
+        Weighted sampler that over-samples patches rich in forest pixels.
+        This accelerates learning of the minority class.
+        """
+        sample_weights: list[float] = []
+        rng = np.random.default_rng(0)
+        for _, mask_path in self.samples:
+            mask = _load_mask(mask_path)
+            forest_frac = mask.mean()
+            # Weight ∝ forest fraction (clamped so fully non-forest patches
+            # still appear occasionally)
+            sample_weights.append(max(float(forest_frac), 0.05))
+
+        return WeightedRandomSampler(
+            weights=sample_weights,
+            num_samples=len(sample_weights),
+            replacement=True,
+        )
+
+
+# ---------------------------------------------------------------------------
+# DataLoader factory
+# ---------------------------------------------------------------------------
+
+def create_dataloaders(
+    data_dir: str | Path,
+    batch_size: int = 8,
+    num_workers: int = 4,
+    image_size: int = 256,
+    normalizer: Optional[Callable] = None,
+    pin_memory: bool = True,
+    use_weighted_sampler: bool = True,
+) -> dict[str, DataLoader]:
+    """
+    Build train / val / test DataLoaders from a data directory.
+
+    Args:
+        data_dir:             Root directory containing train/, val/, test/.
+        batch_size:           Samples per batch.
+        num_workers:          DataLoader worker processes.
+        image_size:           Spatial size after cropping.
+        normalizer:           Sentinel2Normalizer instance.
+        pin_memory:           Pin CPU tensors for faster GPU transfer.
+        use_weighted_sampler: Over-sample forest-rich patches during training.
+
+    Returns:
+        dict with keys 'train', 'val', 'test'.
+    """
+    from .augmentation import get_train_transforms, get_val_transforms
+
+    data_dir = Path(data_dir)
+    loaders: dict[str, DataLoader] = {}
+
+    for split in ("train", "val", "test"):
+        split_dir = data_dir / split
+        if not split_dir.exists():
+            logger.warning("Split directory %s not found — skipped.", split_dir)
+            continue
+
+        is_train = split == "train"
+        transform = get_train_transforms(image_size) if is_train else get_val_transforms(image_size)
+
+        dataset = ForestDataset(
+            root=split_dir,
+            transform=transform,
+            normalizer=normalizer,
+            image_size=image_size,
+        )
+
+        sampler = None
+        shuffle = is_train
+        if is_train and use_weighted_sampler:
+            sampler = dataset.make_sampler()
+            shuffle = False  # sampler is mutually exclusive with shuffle
+
+        loaders[split] = DataLoader(
+            dataset,
+            batch_size=batch_size,
+            sampler=sampler,
+            shuffle=shuffle,
+            num_workers=num_workers,
+            pin_memory=pin_memory,
+            drop_last=is_train,
+            persistent_workers=(num_workers > 0),
+        )
+
+    return loaders
diff --git a/src/climatevision/data/synthetic.py b/src/climatevision/data/synthetic.py
new file mode 100644
index 0000000..4015816
--- /dev/null
+++ b/src/climatevision/data/synthetic.py
@@ -0,0 +1,268 @@
+"""
+Synthetic Sentinel-2 forest patch generator.
+
+Produces realistic 4-band (R, G, B, NIR) imagery with corresponding binary
+forest masks using fractal Perlin-noise patterns that capture the spatial
+autocorrelation of real tropical forest boundaries.
+
+Statistics match Sentinel-2 L2A surface reflectance (scaled 0–10000):
+
+              Red (B04)   Green (B03)  Blue (B02)  NIR (B08)
+  Forest      ~400–900    ~700–1100    ~500–900    ~3000–7000
+  Non-forest  ~700–2000   ~800–1500    ~700–1300   ~1000–3000
+
+Usage:
+    generate_synthetic_dataset(
+        output_dir="data",
+        n_train=800,
+        n_val=100,
+        n_test=100,
+        patch_size=256,
+    )
+"""
+from __future__ import annotations
+
+import logging
+import os
+from pathlib import Path
+from typing import Tuple
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Perlin-noise helpers
+# ---------------------------------------------------------------------------
+
+def _fade(t: np.ndarray) -> np.ndarray:
+    return t * t * t * (t * (t * 6 - 15) + 10)
+
+
+def _lerp(a: np.ndarray, b: np.ndarray, t: np.ndarray) -> np.ndarray:
+    return a + t * (b - a)
+
+
+def _gradient(h: np.ndarray, x: np.ndarray, y: np.ndarray) -> np.ndarray:
+    """Dot product of gradient vector and distance vector."""
+    vectors = np.array([[0, 1], [0, -1], [1, 0], [-1, 0]], dtype=np.float32)
+    g = vectors[h % 4]
+    return g[..., 0] * x + g[..., 1] * y
+
+
+def _perlin2d(shape: Tuple[int, int], scale: float, rng: np.random.Generator) -> np.ndarray:
+    """2D Perlin noise in [-1, 1]."""
+    h, w = shape
+    x = np.linspace(0, scale, w, endpoint=False)
+    y = np.linspace(0, scale, h, endpoint=False)
+    xg, yg = np.meshgrid(x, y)
+
+    xi = xg.astype(int)
+    yi = yg.astype(int)
+    xf = xg - xi
+    yf = yg - yi
+
+    u = _fade(xf)
+    v = _fade(yf)
+
+    # Random permutation table
+    p = rng.permutation(256).astype(np.int32)
+    p = np.stack([p, p]).flatten()  # extend
+
+    aa = p[p[xi    ] + yi    ]
+    ab = p[p[xi    ] + yi + 1]
+    ba = p[p[xi + 1] + yi    ]
+    bb = p[p[xi + 1] + yi + 1]
+
+    x0 = _lerp(_gradient(aa, xf,     yf    ),
+               _gradient(ba, xf - 1, yf    ), u)
+    x1 = _lerp(_gradient(ab, xf,     yf - 1),
+               _gradient(bb, xf - 1, yf - 1), u)
+    return _lerp(x0, x1, v)
+
+
+def _fractal_noise(
+    shape: Tuple[int, int],
+    rng: np.random.Generator,
+    octaves: int = 6,
+    lacunarity: float = 2.0,
+    persistence: float = 0.5,
+    base_scale: float = 4.0,
+) -> np.ndarray:
+    """Fractal (fBm) noise — sum of Perlin octaves."""
+    noise = np.zeros(shape, dtype=np.float32)
+    amplitude = 1.0
+    total_amp = 0.0
+    scale = base_scale
+    for _ in range(octaves):
+        noise += amplitude * _perlin2d(shape, scale, rng)
+        total_amp += amplitude
+        amplitude *= persistence
+        scale *= lacunarity
+    return noise / total_amp
+
+
+# ---------------------------------------------------------------------------
+# Patch generation
+# ---------------------------------------------------------------------------
+
+def _generate_patch(
+    rng: np.random.Generator,
+    patch_size: int = 256,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Returns:
+        image: (4, H, W) float32 Sentinel-2 reflectance ×10000
+        mask:  (H, W)    uint8  binary (0=non-forest, 1=forest)
+    """
+    H = W = patch_size
+
+    # 1. Forest mask via fractal noise threshold
+    noise = _fractal_noise((H, W), rng, octaves=6, base_scale=rng.uniform(3, 8))
+    # Vary forest fraction: real Amazon has ~60-90% forest, cleared areas <30%
+    forest_frac = rng.uniform(0.15, 0.90)
+    threshold = np.percentile(noise, (1 - forest_frac) * 100)
+    mask = (noise >= threshold).astype(np.uint8)  # 1=forest
+
+    # 2. Add secondary noise for forest texture variation
+    texture = _fractal_noise((H, W), rng, octaves=4, base_scale=2.0)
+
+    # 3. Build 4-band reflectance image
+    image = np.zeros((4, H, W), dtype=np.float32)
+    f = mask.astype(np.float32)        # 1 where forest
+    nf = 1.0 - f                       # 1 where non-forest
+
+    # Band-specific forest / non-forest reflectance ranges (mean ± noise)
+    # Red (B04)
+    image[0] = (
+        f  * (rng.normal(600, 80, (H, W))  + texture * 150)
+      + nf * (rng.normal(1300, 200, (H, W)) + texture * 300)
+    )
+    # Green (B03)
+    image[1] = (
+        f  * (rng.normal(900, 80, (H, W))  + texture * 120)
+      + nf * (rng.normal(1200, 150, (H, W)) + texture * 200)
+    )
+    # Blue (B02)
+    image[2] = (
+        f  * (rng.normal(700, 60, (H, W))  + texture * 80)
+      + nf * (rng.normal(1000, 130, (H, W)) + texture * 150)
+    )
+    # NIR (B08) — strongest discriminator
+    image[3] = (
+        f  * (rng.normal(4500, 600, (H, W)) + texture * 800)
+      + nf * (rng.normal(1800, 400, (H, W)) + texture * 400)
+    )
+
+    # Clip to realistic Sentinel-2 range
+    image = np.clip(image, 0, 10000)
+
+    # Occasionally add a cloud-like occlusion (random bright rectangle)
+    if rng.random() < 0.12:
+        r0 = rng.integers(0, H // 2)
+        c0 = rng.integers(0, W // 2)
+        rh = rng.integers(20, H // 3)
+        rw = rng.integers(20, W // 3)
+        cloud_val = rng.uniform(8000, 10000)
+        image[:, r0:r0+rh, c0:c0+rw] = cloud_val
+
+    return image.astype(np.float32), mask
+
+
+# ---------------------------------------------------------------------------
+# GeoTIFF writer (rasterio required; falls back to numpy .npy)
+# ---------------------------------------------------------------------------
+
+def _write_geotiff(path: Path, data: np.ndarray) -> None:
+    """Write (C, H, W) or (H, W) array as GeoTIFF."""
+    try:
+        import rasterio
+        from rasterio.transform import from_bounds
+
+        if data.ndim == 2:
+            data = data[np.newaxis]
+
+        c, h, w = data.shape
+        transform = from_bounds(0, 0, 1, 1, w, h)
+        dtype = "float32" if data.dtype == np.float32 else "uint8"
+
+        with rasterio.open(
+            path,
+            "w",
+            driver="GTiff",
+            height=h,
+            width=w,
+            count=c,
+            dtype=dtype,
+            crs="EPSG:4326",
+            transform=transform,
+            compress="lzw",
+        ) as dst:
+            dst.write(data)
+    except ImportError:
+        # Fallback: save as .npy (dataset loader handles this)
+        npy_path = path.with_suffix(".npy")
+        np.save(npy_path, data)
+        logger.warning("rasterio not available; saved as %s", npy_path)
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+def generate_synthetic_dataset(
+    output_dir: str | Path = "data",
+    n_train: int = 800,
+    n_val: int = 100,
+    n_test: int = 100,
+    patch_size: int = 256,
+    seed: int = 42,
+) -> None:
+    """
+    Generate synthetic forest segmentation dataset.
+
+    Output layout:
+        <output_dir>/
+          train/images/*.tif  train/masks/*.tif
+          val/images/*.tif    val/masks/*.tif
+          test/images/*.tif   test/masks/*.tif
+
+    Args:
+        output_dir:  Root directory to write data into.
+        n_train:     Number of training patches.
+        n_val:       Number of validation patches.
+        n_test:      Number of test patches.
+        patch_size:  Spatial size of each patch (pixels).
+        seed:        Random seed for reproducibility.
+    """
+    output_dir = Path(output_dir)
+    rng = np.random.default_rng(seed)
+
+    splits = {"train": n_train, "val": n_val, "test": n_test}
+    total = sum(splits.values())
+    generated = 0
+
+    for split, n in splits.items():
+        img_dir  = output_dir / split / "images"
+        mask_dir = output_dir / split / "masks"
+        img_dir.mkdir(parents=True, exist_ok=True)
+        mask_dir.mkdir(parents=True, exist_ok=True)
+
+        logger.info("Generating %d %s patches …", n, split)
+
+        for i in range(n):
+            image, mask = _generate_patch(rng, patch_size)
+            stem = f"patch_{i:05d}"
+            _write_geotiff(img_dir  / f"{stem}.tif", image)
+            _write_geotiff(mask_dir / f"{stem}.tif", mask[np.newaxis].astype(np.float32))
+            generated += 1
+
+            if generated % 100 == 0:
+                pct = generated / total * 100
+                logger.info("  %d / %d patches  (%.0f%%)", generated, total, pct)
+
+    logger.info(
+        "Dataset generation complete: %d train, %d val, %d test patches → %s",
+        n_train, n_val, n_test, output_dir,
+    )

From aa643ea1782d8a241690c2125553763b92afcd23 Mon Sep 17 00:00:00 2001
From: Victor Mbachu <victor@placeholder.com>
Date: Sun, 19 Apr 2026 20:43:52 +0100
Subject: [PATCH 11/17] fix(deps): add email-validator for pydantic EmailStr
 support

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 14444c3..c67ad0e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -39,6 +39,7 @@ dask[complete]>=2023.1.0
 fastapi>=0.95.0
 uvicorn[standard]>=0.20.0
 pydantic>=2.0.0
+email-validator>=2.0.0
 python-multipart>=0.0.5
 
 # MLOps (optional)

From 6ac29d15fe4641bdbc9a4c6f90b33195793726e1 Mon Sep 17 00:00:00 2001
From: Victor Mbachu <victor@placeholder.com>
Date: Sun, 19 Apr 2026 22:22:47 +0100
Subject: [PATCH 12/17] docs: update Victor's role doc with sprint progress and
 live CI config

- Add DONE/PENDING task list for April 2026 sprint
- Include actual .github/workflows/ci.yml code in role doc
- Update local CI check commands to match current workflow
---
 team_docs/Victor_Mbachu_Role.pdf |  Bin 0 -> 14819 bytes
 team_docs/generate_role_docs.py  | 2312 ++++++++++++++++++++++++++++++
 2 files changed, 2312 insertions(+)
 create mode 100644 team_docs/Victor_Mbachu_Role.pdf
 create mode 100644 team_docs/generate_role_docs.py

diff --git a/team_docs/Victor_Mbachu_Role.pdf b/team_docs/Victor_Mbachu_Role.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..6e747fb0506adccf1ff0bfd7d0c25b75948e0b09
GIT binary patch
literal 14819
zcmch;WmH_*wl<6tEVu_KEI^Q=(82@3U4y$8?(P~acp$jD1osf!9Rk56xVwj!^yxk)
z-F@yocYNO+w|?vzd+s%5)vmSXv!+nX3yU&>m{^fP0A_%VfjKfSFEa3rjgz$_fD;)g
z1u=2}XtF*%QbcC|?aKMvmFu@F=qHaNGUy-ALB9>qMh40l8$tAiY+M1F%umnRx!9Rl
zzyL5ChzZQ~^i<i+)))Yk*EcnGK<4L1HnukUsg>nv<R9AQY>ll24ILpi*1xL;07W5=
z;s9$WOUow#KUt;q-E5p30YEux<2N>zPFB|cAUgp5VbSkO-sn5(TiTfZqUNWif2et5
zW9?{c?dSkt`)w9aa_ygP|B#?)>|o<$Z)ofQVEL2GpW1&B_>=b!fuaygM`QaZV=eU^
zjfIU3ZH$bOfzrm-rjBM$LcrjkYz~h0#`;#suBnk~+79z|SYD$Qvx)O|rc}upv9kpY
z&Z)U^=ra?!lRRF2;^O`?m<-S6RJMLxd*A^W`#+?}lH$a`*k0~`t48TPA53BH%Rhe?
zMJs`Y9N<VKdyKiIFKrUJr7vF1hHS!R?1tCHN+fp%_!t{g<9>GLKT<&%_VT{EJ8X$@
z>0ZJ$Nm!6AM;(lkPsb=3wjfZQUp**k6_l{7T;ljZ$$0oF*!zOGJ3eE<lsg4}{M<D|
zQ(L0ROtZGUzN+pVR(n^GNrZl9&yZ23Sz*}wF8ZsSn&}<uF|BNsUhEi~M8K3F-(5a(
z37VHqn8D<QChB$eHsKdD7Nn|o`3f&`34!PVR8i}uWT>u87C}}O&Ba8cWweJo+O2PE
z;}K>BzS2`_STuDw11=S#v@u4np|6jhwG^1Lyt!OhH9bmr7-RHte}C&b&4&6?tYir<
zEg@?6sLe&evF5xMmgXJ3y%aD^sE*Mq7QO0rq*8$ZVe;Dc1mop{owI_q%0q9|<L)`m
ztF#6*YvY_qD@azGgY(EOzt+uVYq_M?^0QPg!}BzT(wX75E{uTA*#oxH&$|(y#?G(Q
z-c<o4(K*GptIH4ay}YgMtMFK+gg&_Er7@^;-(`|PDis(Ti5lMmwp|uUa5{^E>W`L%
z1&KP5*VLnpz=*v;HF>DWqj9SqB4<i|BrroJf`{R_K|#U-Lew9A(3fRiCFq2OIXC4f
z+V8{`O0j6m<oVxDV{tP;N|19jA*9OWMT@m>tu^OX5)+DNa>)Qm1OmmcKFQ#7lIe^1
zI9G`z<;hRKCrwz~&*2OF@KTWFY@z#Gm*C(T;1S&^Qbkk3AKe-<ukx0CAJ{frc|){S
z!<O%o=+J~`f_9gkz3V$zL)p}tJSk#YgGrg}^D$%8Cv6gsom_rlF9yX{-O0X%L-A{Y
ze)ptavPTbsS|Zsfu;R6%(hkX;rc~rO`UqS5SO8W{bkDm4j(Oi|scdt05aeApwRRMW
zf(jL4xW25oe0-#WsoqC8GaL(vVkYHKSXs(|vTl7F3)ZNizPG9*^l3@eP>R?eGC7)K
zP^$t%P6k)aM$|0?1DL5v8U<FJLY;_SbXxGKmPSUd(7xhjbg9`_4th9<!FP9s@FTrk
zlC(WW&NWmGY()vy;C@G)W>RXQ3uX?`Nvsz?@nb-V&xW$j(Ku#E&Q}bg-}|cMniuW?
z5%n^;rqN-^^CYlKy2~X~ILVA~_9`Yxqv9Su)#E<F5}z!BRXsU0%A1YVQ8*Pg3r`*I
z))36Ff_1Mf;ITznc<v$b8Es5tHO2$PUK~>h_$F4>&?Ho&1D`7FMx>T}S;mQK5~J$7
z^(CzlpLQo}4~stFpkOD>F5<BtF|Qe~``dH}yH)0kYIC@07(Vp#iui|q&4A0jtaVX>
z!SVG<E@~)Ol}+T^QXHP9$wAENxl>6$O9DtZn)>)s297WoEq2y{^wR$|HK8nJ(^>2W
zV!FHWx@P#7R8is93bp{esaL3|JfFCnsW6Dz5y<e;MO==NeAUSx-}-~_l32N|V48vs
z4<I5`Ze5ELm>D8VqHTR`mbM{|K6~Ha;WdknpyUwoWCN4D7E)5sIkz$b1)){0tE!2$
zGjtb2-OB{8pwcPNM)m@*q!!ijH`s$QoRx?8uU_9F*X#r~(`B}kxXT=`r7Q{&W0dbE
zuvG=Sp~s%VY%DjtTb(>GDf)(yile{WCy=CGiHjQ>>L#Bgu1CUIe;{v2FDpw|3XVi}
z2(pQ=iw^wwW*QH9LV1CxL5$WF-?W?J4SUH_UW&{QMWT|ol2vbZ%5#Zflfp_0@Nb9x
z+A!3P>dw_|5i>i02ij|a;@I)>hlq>{_{2lLQjwV-(VTcM52Va8EoifZER>4p(gaJ&
zorAn2rzBG0;#zF%s&{mz@X73w6D$OBqwh=ndK;-CcOHffQkz(T%<l*m3p*~R#jFk`
zUbyA13(bwTSkl7}1&K^gQ~S=7UhJH&FkuAo^FcDwM>DiY?2_p#VG~;tp)jKmMh2tq
z>e!X#Ubk5p$=DbdajsW5)vtmV;AqXlv5v1lzg?8HPS|^4eD^9F9OdSCRCN08azf-j
zhlY2bPKyl{iBa|RyP|D><JY}8+=8+gu{!ht)!1RQVogb%8Tt`^a}Jd8EZ#^4IkT6d
zp&Q=3mcpIsqsC1pApDjiX53ohA$QsF?dPsXsRU;@oK)XXNptH6Os*5avBLuB$#%)`
z>X%Ug`6xYTA75V$bdSiz+4=8_-9f4ze>^}~dKyx%C^cEqP~A5b?ch(pmmrq8e_y)J
zchS=KAUoyTzt8mHf=6gjJoNGu7Z`n8K#g+>KbnJ*bKj5jZ8Ux{h8Q=-bxl12K7dVK
z0P80Gbc(0v5PCltJfEp_cdd2jv#`BX{|#4{o0Cb`#6?JCoB_?Gd~?LpD=Y3azr4LQ
zy8W!gs;!r5P1=p{ds5wdfrSK8ieSTfw0k*5Tng3Lt&Kf?PN4M-O@%ThlPRkY7ctpu
zsr=H6fJ6ZYl_QV-o6l(cHuU>0BfSdg8M$gI69(euT0$B|2>8!P16Q}3-`a(kfv(7O
zI^Fd^FX-LSW`Ar@+Zzz7RAO361TFbvayFlZ<~jI&odM``6iJ=$2_;c~wx$mDHz4?)
z8YhUzyO?9#ml)lF<L-er@nfSm>sW#G)h8=pwMqz~o2ARX>i(HXx7Ddc;b<__bEElb
zFT2(+EQ7(A`_%Cd&0b%HgM$0t-%hcCHm08?MIi=F&uVwS-?0zZ$v0s^%buaC_Ng~*
zKc@%+;5EE5_(BWZI$LGht)5!aT;$ZNH0rk<ZhYyW%#1Xg{zzwr5Q0iKa%&xBM-}Kq
z;?r`CP4E!Kbl2YdiftAoiWXXlU!>0~<`dCjV{<)%cR;`U7#y^o@)Lsn4oiLqV?Tl1
zAE4%+D2d}gproG&PVrZG_X|q;$@_=E{|+TRfh5j9p`<i5O`CZzk=IehwJ%gSYW`ct
zY6jf9ScwU16B;Xyen)5D&&*g>4nk6|B&aV8eC|mVPpRv2@#Zt-v*p&f?#n}vcV;;K
z@I%f|d#?Ox!}_k)K4z?7nRtu4klh}Yz#rc6=h>~~5z15-;pek`G}d9G?1uc<Ohn}N
zAd`=t?IqoGyg1&ZUp2vT6_hN>aIa1xMMhqT(iqzb!l5_0Hq8=I`5}z%vr2NS+AJ@w
ze#BkPcH#eR&*|`0|BjYLC9P@;+c{@bA@YwtwgNdIwW>Sa?&2vwBL{|GTjwl!R}6kI
z-m2q7>qZ1CF!_)eh=b_HA|(sIb&-oPnyS|ST?0jiUDMVb(*$XY;S=A4*6D)sE+g(G
zd$%O7eE5CnIvzcWhF^7Vi^jAxQma$n*9yTl0so;C(GM6MAE?{gyBiyYJP#YnOBBP!
z$b&GoCIia{8x7_PUvi?Q`_7b)GBHK1jqT-94O-scrPY`QKBk2*(KnAbzRrAEW1fqN
zZB-g>u7V5D<2#>yraso9co=z=>Fn(xBQU9@lDB${QDQ-7Z_-7-<rt{AEEPvC#FIm<
z3}(UJ+m&>%X^jO?m!wJvpMD%Hc80+D-(VXQ3O54e?P3f$%I18n+#u!YA|n@|BC7K>
zvv*S?*WvSxQZO$i10K%>lYH2s91pc}=Enfq=1d~}7d#!^E!5TJH&nW(_egCHRvHHI
z6~?tM8*rKxESg$*Oz?Ws$xuP_gf`J*6**mlrhbt;=o>0r_~D|B(O*K2mp-D=zi>9u
z^)rngDofxJ+Xy3$)Sgp0!0PSlDB>iME=@Zj<riy&B%|b^g$?Dqv?mN^@HJ%*i9iEV
zjzy8aP((DuPrr<CQz;UPFQ;uRH*$#9rR+|;_Dj6U>2}ej^g>-MHusxm40l+F!MW8x
zHO%9Xfdzhi?rJpZdb6`W-MrrN?IoHQSF3gkwI@sBerQ2qsr%x~VUD;#_vsI-i5c%H
zSY<?S*U~FDPJDwTwk-K8jPy~q2aCEVOpia?N7^pd^?rcuxE^*UUqp;>lzt#zNvnkS
zZu=%RBiqHv)(kA*8gS0$;?At}(#Skd5KQD4)ahfcbg+W5H?<@Ox}m+++1z7HT`SrB
zh+jiHri=j%*pOqoQBzpq7t0##XN`m5%Tx5ix&N4m7VKr((t7La_Hfd<)RUq}FoWi}
zw>sM4c5ONY^??|lkT`m|E62dx3+_ch673tiC01*{QDWbF@5reU^#jrVHX5_UIG<DY
z0rVRkT{V;C_u+i$S>;vag=uyu7l=MB^~1zvV#xD;mbHe@2SYLGr)-qw%B_;u-@cH0
z10)RFPPeP4?~ef(8+Wy&a5+bfWX;v7=v&t0`L%!%!VQ2CAzz$(7-!KY<pKj0VjDlV
z8q_MmsC=8DT-V#p3&5NstRJkO#4qkc<K3!pquj_%vz_n1GL!)h2tDh1l(N%O#-{1>
zE<F(u_jNSG*(W@@u3iBiTz?;srQb(WKdH52_uZ_c2<8c{#!Xk_qcEx?0r6TajW1(H
z7*D@;Vk<T(HXrzWh%ZA4B%Q$b=&{ywwo)@g!?J<EujZg-hc1Ise+a?m_J$P+)w7D%
zD`kHuWl%`z-5#Gh3kmE>KPeWrbdW9FXVKN5?VRT-B!aX%ypz72%RB;jVg9I^jrnAb
zg1tyPPi;@kZDpy%V*(qQff^dZXSjt@vWYWQF}<z_mj(CWuT@{1k%?ET1y!A=(p9BV
zW5fZ>gwjQ#PfSv<F$e||T~{-K?qr!AZwn&NW>*2y2c$yV-Bj7QJsNAq<C``TS2;+;
zvPw2{BNR}WN(W1r^SDHeDtPaU8=FHbX-n|uL&|8smRaw;qzv0wG#UJy*exht0W9We
z@2WtTAh`k8?l4EbewibIF@F}zmv!KVTsIbp{vvogz7A$3Li921wWQ8+>hgm`#*b@P
zr;xDEuOdHLxEvk5Xf(zadFvikg|sfSpur)mv0=(*v055uv~F?pO*F>pD+P}qZ%&(+
zsRid-=OYr3IIuCEkHPcZm|nby@8k&sw_dwBG~TkqF)WUkYM6Bg$vUF?ipU9=UwPHh
z-Pb1S=}~7W8m(e&xiIkD(xhE93)|7}97-dF-F{*4qVKZONt{Ai*v`&pxplk~DkR^M
z7aoRfof)k6i6q}0+$}c;4aw*@G1q-@fc@G@zvVsFyQ*@sBTT^enX9VfK<{-+rYvjb
zM~_VM7nv7gSqhQ|f>*d~qM-K+x2!!>s;yM=-@@>-Q*aN~`A~)C6?+LYSzms`3<#%I
z5{)`|gkx~H!r?=)qM7p1BD@?wCjauFK%{P9aULBs=~!R--Tqy_MAYG){+MJY2nJ#n
zejfdWkMpfZ6nHJrdnO61$>W&7eKY=%z_IE^s1DIeKy0TPb`e*>omm_fwQ05&uqTVp
zzp4I#RO>ze;<%3j*TzCxHGDOhQnWPq`s~tT#8f|oXpy(d^V`ZdO?u=GlEg>oG<TQV
zzdQb)!JR++^gkUx_&+%Qe?)(NiR=90_<!>LA@IL<{2<Vu9Dk(Ry!E^&hS#Vns(hxn
zf*s?ML|dah)AP02gg`j>+bRmxsUDpEL#B1)?OqoXYO!2tr4M9ql7=JSKC*71ZnPJ0
zGNJIgda@dqAKft<3-U%D4@(u04hgJmK~~(ye3ZIvTOK^QgQ#rPV}+7}horuQDsA>=
z!j&%HR|bY(F*RTYG0q%Y-)xe7LH9~*M53Au?1pA=usz6lwQ^7zuW8+-T4b#KfG~J;
zEpWW`LzFZ(;3_mOu+i?3|EvkeHm{bUxU3NM9=VzupS~P^f$y!*PNe1gdY_$H4y!S_
zJ?bHJ^5?L@)XyFQgXqB4ub*uXk*EoN;my#KfyQiKbsD}fQE^CM&3O#UngQAl8pGcx
ztzd4A?t2f!>hU&5wT7k}w-viKPAaw~y{Ohj8HW;p>p-U?MZ|C=4_~u`v+j%iPUs({
zn4)o@gW&sY(+WPtYvHx|jPa@?@5PjH<-FA5@xC9vzt@Zt{Nrj-gr2%~nX#YvF>DUH
zD$X%4B)(^e?8qg)ivg8e0`V~RdI+ue2<vX>=<=Xxgfvh?>igO39)3f{@`BFrqkprF
z?$U6(-+Ue0#5jT+T4eDV2P;_Vlesb1>mpi7wd9C5C`z08Bxa34n?@IPMYVd(RyUGn
z8G8|l(cxs-rNlfL&saSOUI!U|h`8cL7E10KuR>J9yIFT>x07fZHVk4*adtXUD+zH@
z7uXG-Nwosh<5vQRMk8+ty7}r%PP@&W9Y%Kt`ijdm(W+Ro<rn${FPlgu&S>8ii78FP
z$AssXx@`GI>&T~C8B!7}3`p{ZVC2(I9MT42I4xYcV?>OJW{7?2VqPgLst4i#y4t0B
z;aMYKlZQd1K?(zFCpy7kN{H(DK2mU_!^9wusQ{@@o0sq|G*GHR$F~r<l!oZ*a$DLg
zxkj46b_luR$f6-Zot5jI>f)>8G|lg$2zROjWI_jeboDfgJgM`wqOIL9!VDJ~<x*-b
z4FtU8sr>aLwnWvLtdm~{ZdFhxeBmXQ$Dyr{J~IgTdmtU{)eTq`RH`ipx{j@I3)a6*
zGY2qllBtAufaaP5Q_Ra6IFOwxyU!cQ7qo`TGY(!nP`<DsZW`j5=;j(GnN*PT|4ipd
zGVAQvNO`!YA-G7;n4t09*2xF9ty4okKsdqWg7_*&2sLBsN^LD8U<&;<U_np3k1Q`%
z1^TE`=$$c4xPSoVb@-h|iV#vu@=91V8K!xbcb~Kwt;GD`BD2tok9zLU^##nxQESjV
zqc;tk{pmH~B3ql_a*IFueq-<G=b-RNdR?7kH&1ydSzBj<sK&03<SjrJxUs%DM>wai
zMkB~d4W^cF8Q_>Aqr7}ju%bJs#=6aTOHoGjUWyja-q}!rJQ3bX`4iTxEmeR#yqDE3
z4UZ@PMkn0)df}ehhndgVD~`Oi-zANSB|ber&-Y|Cw+m7%)0J`*h-hw}Lh0T?9NvE=
z0@?C6Mc8z*2uz+$m(~j<(KM3eFuIdmyV6`3K5<|g6jR@*?lyoq!ppGoC^EZ}B%ll;
zvL-$3So*ek5pY@Bwh_4~WJiYSsZB9j*8)xdnN0WrVs`~^o-5Pu;tY0}2c>GxZ!*rH
zyAwo*ln6UMmlxPJnUC+_rnRAB%za*}nMx5d+o~E+8h_45PHq84eR=z4X{u^wlnKxA
zVkMO-6^<ln(MQ(q_{+7lq5;pla<tIIvUuxy&G$LAxKol=cX_s61>Cg-RKy%`!1mS&
zGIluh{=>%3U|+=_q?p+A<vhe2ajb!|r~a(2S-n~n0)koC+H2!zV&%IZ5*0Hnan?K6
zvYOIWV&gLT2u+F0BlxG>nOHoY4(E0xnVAY~8%DG4F!bemk8;?wuA1-kg<NUNSGnxd
z;Ygl=@QKo-d_*p=cA=ER{lgXsLp%G#OPbVp6+TdPeXX7PMBe}rejw|0#BzL*w1iz2
z&_eet9FH1VH!?X6`%*NvU8Zy_zOGnH!X1m(tBKD)Oqbk)Mi8-tA|b~MC#5=3Ktm2<
z&M#Gil8U=EtJ+MqMvWIKihSURVnS)=8Shqx+dwWWJ3(W=Pn8(|+0m@z%}tx2lKeZ-
zwVb2FT2E&M&VUsy+*?5pejn@-cq`oeJ0k-L>A{yo`F$Hp%xyCz0UCNIg_@m1PJH!u
zsaG)Z-eZ1^s(JF8>8jcH5?H0-Sxx6OVM+ky+8B$EBp4_$=<8XC`R6avy~ko|7j*?+
z#44$iPk6#KpeSa%ZNSXFCMcY#5lUJ=0@0Rt!b|#hR_IZ>-ag?i#y+i*HHfdq!uLWx
z3%^q#6q1vq)J?(60m_Ju^p!wCekvL``#8EXJ5p<zJ2V0^pu)wi&8YWjz~dJtbgum9
z7GYkqAK+3m|E{!(*kB@SRo?j;0uQ-2>tvPp=&@v;3x9XkKXWjD_||_qYtYY>%hNIZ
zzf5NQp6vKn7Umak{TuTiBL7==&Bev>7k8~~GvD(wn?bMwBVw;Vevcy}K<$(q#Zjc)
z%PHd5QreeL!`^M|BGK5?An+qyT@edHuV~oCQnWg<|B;o|lyG7c2m^e+a}sm}uYePC
z)LkUdEtYgczSt_@z06>@d<DTnTw=(fl4`LPjbl5@j*rOg4ktU>In{pM9zT4De?IvT
z-u8?$=L?Dn+nK2j(WIB4-yKNG-A%i=*6h2gaYDJy_(O(ku=gGwG96EoEO&~VAh*tz
zY+~$^TFHlLjkjnIuV<Q~9rURedEZXVCKP8dM2S7D-mVUXV@bx%cIw2LZBi4;Je{t)
ztsP(=XWKF&AF#bX-(38F6;XAxa2(%6yL{uKh*KQ<b)r78P3lIEE(LD2UNuBKcph01
zUsjM`U!=~|;W2D!plNE}TUS7Z*x0dt&_bJW>XguO-Rpovx_T-l&jT^8Kq0r~kw!Ev
z1P=<w-T^{fsXTpJ)q^MT9Hz1gPqcwgpx7j6Z=tVeLPObr?%Vh64+Sf-FEYZ-EYC$a
zroKKijglxcSBEh%XY)#UnMl2&1E>*w+g-0pRqVgCdoP%bgq@w)_yyj&yvFKN+Ip4@
z>}LDNu^v&f8K)f%DSA4|FQ2|9nAiBB;|H1%4_o%?=CSQ3<ITx?0V0a+RrH`ec=N$V
zis_+GsV;gnPXKyju7;Y22p4jIrFL;JWkaUoy^B<yN&Xu0Mnin=o@iNu6Jcwnn<g$T
z@O#yQhVkgsIQate>Do@xn4m9ALv~c(@WlDxvQ#31?VaogEW+E~z)M3QcS=s>cyk?t
z;G)6AHj=|@5{j&RbYX^|V#@%A1dsBT#)rPBX$>zRPd~#6%cC@#T*P5+nUnE!@LqTd
zK3K7^XFMD83`@&&Exv;xt@}-mMKUoRsVqE;%y|7MJPU_=+?JV=dHL&b#@j{F{$6_b
z==nyx{7gy9sVdme^~yqP^>%5T319g=qGhL1(kNO{8o3b>HjdkU4MZz>ecSJ@(1+#1
zwefif3Y=!UOb{6SR%jI>t`=L+b0Rocp7HQ*hc!_|9TI;uFCY^5eWK6y*D;Up>n>KL
zczNFYs;=Y1VSyQP5H#ij9lV2bsat=+k56Z;b#zquEifs1oi|9}cw(E`)gaw&y&c=s
zFOdX;;uc?<5)m!pI3?wLA#+T10&8wLS(D>aafq4A=QZ_R>dDE^6H`)?eihVuUnXt>
ztW&V*3u5q%3sloZm!^EG?1zm*WkXNw4Nj=eI#$lt017X>fcyQPAyXUH{L-&e;dd>o
z@OpWBLbrMg_(Y=xJ9A&CQ=hu!r)yEejhg06ai2ONi9D1wf;O7we8(~$rZIuh+LNqf
zcPkL)<5Sc~AM8h-h|#nllJE~pIZ+DO>E{H-<kh#0Vk(Rg+lyRLi-Dgr_o5PKY=8(G
z9zzand*++*TaG<1d=9x(5$i`-k?-H|6mVL@&42EFv1l95Ek?5_Yslp5r+IOu<jPZj
z=G?FA-8(rI*%FtIl~J;~Qe@+wn6i2~J6X<i*wi_;x`RX<qy+p{fR1xvck<2-I_X@v
zy6SUgnKd0gZdqQ6;y_;uU43cKt>nu}GiI7n2K+edng-~4F%ebf-pKdZhsARKVn?=Z
zZD4MVY~PeU(jd|Ln=q;F@<6|_rbP~VRHOp8PgrtxI0}ZsOM0>lZl5QZsH{I$Ebmo~
zq1Wo|nNX7y68n)?1&aqfv-h}^m5mj2)aPCtf4mMY&H%ObF?M;XhcyP{jhlR$d5^w|
zQwHRTNdA#5HoSuxY_|(P$(rM087Qw_M?TDH+#GEoawWuPjuE<g=eHClI%ZR5tGJ5I
z2E}$6$PczGg*{07gf_47$%+w*jfFna2(78zPy~3-Ei>efh@FOxN6Oi{j%mOt!3{8&
z5_y(5VmQO+esODbPd0)`KOPOi{4S}!=^v6aoqZ?zz5)8wpO_v}ewiKh!{Mr5=LI^G
zZy^tx4dtVG78Ov2=8(#v(zz<P`=j25uo6%)7o_!!VT^c>gxb{?R^48fCo&RBWt+!U
z2dLGL5=0x#k9Qn7Z~(A?qQEyPZ}cSYT(;9{g9`VbY>B4LW>l)daGnkoEH%YoY^78G
zwAP=|ws5~Y6xGWSV9v#CEAK7J<U2k9v4u4_&z$n?+wa<h7y+na9SV9YF<pj83Dvi!
zzvr8)LT#QEz*1z4zmiPtH_B>aB@BGCxW=Ff!Z9;<h*ne27s#M?>J=zNM8_?aY0r}S
z{GelwD1CK*vLf#SD>!p->95|fVh9-M-kh_=F8)I3!jFBpa0^B~mgB2uE~@pLvSR+u
z0E5FL>x0<Lygu%Q&>RzkKK<3{afZ{mR#}R4ZYXA6+v@;y5A#g22JstiZacI5cAC=+
z<N%88epyk!i*or*YZkS|U;&bX;{0)_P(R7^``&jWDI{<R6+`9jYs??+QfJGozBkc)
zW8$d;fjz1_ZhB=%UM*y#AMM=3m`4pydh-V$JV!rhY35xXF7U%A>k4wHKC*7yq#XAy
zJ`4ude+k$2nF`+WspfIxDPwn)m=vt`#mn#&zv--;JW)!*xQ~+$AjCMKlXhOX#3DPe
zyFc0AAbY6rNb9Z;0N-IZvERQ%V(&3nrSE$mH42WZ(3oAa&xt~>yeA{Po-F@6&iJj$
z;}2BuFQD;zy#K!h8viI4`S)P~(68kl|1Hp9W%+YlAmR78z|~V+;ETAgdF_?<DWhF1
zg@}K{`<TRhTH4rH0wt=>a4pz}>k-8d)Z-S4wa^tZlcpx4>0GQ9&)amFDYzDN<vRi~
zUWc+NuXn%>tZs68l8X1NIu@z^xVqbl_fLqJg;LIZR?o}|jft>}^8w$+M*oK&^yFdf
zJatQ3>owA~JaY$wnt5o9Z3@($z#eW((?GS%RQ__+OZxB`+dJ4tgdelecF$VCJsKTD
zbR#k!3y$+4RVtMd;do4|#nKZZQ@O_*pT+k~(o_(y6YE+>Gj7%;C8bx^DERzL0=n}~
zYGvs4$XkGQ>e`2jH_3I{?6b+di$wLpw9Eve;_@9>xL+b9>ZN-36p&Lf^KNmVI-yIe
z@YY%w6xHgjrUTEo_|;uduo6>lC>yIun$NJykv_YK@6y4WXpN2I6q0w4=3&qrchK=?
zDa!im;iw?`+d2o{z7Scj)Z~BfuaUd<Yy;nwxZ4F1^E&ozL#{|_UL@kYc=j~7?nBou
z<^HP@0DUsYfJ4&s_2np2iIU5AG|1ygbV+SV!3iJ?izH$az>ewoLc}b4772|^KJtK&
z_u2adf#)NQN;~~Gz^m=VM0w>D8jWMIdq#5Di#9d=S7U+UJIL$go>n<cLFgo+H_AN<
z&yOa`JP4N4l*|Xr4SJ%42oHokq}LLQMZ7vwFqRSui;4J5loHH9%esu(6p<+CtZ*H>
z^WfyTql%2UQdAoEd$@$nF47FW3B=UyDClNnlWLK~jZj1#b&B3mc?qc+-f#0Mdr{=)
zWFz8ZV)nheO_p<GijsW#xm2DlM)A?I$NVJ0qn)%Sfiw|c(hYk%ZD%rNJ3N2X3=Q6m
z)e~<lqbX78t?L)QoPF72uB0G31ioO=qmt)WZ$(?pr_Yqwv%)Jt!&8X0s?O~ca`!b>
z?_@F8z|juY&HidpGeMMn?&@indWwF+6Y=&dymQP_XMOV_h6>>~VRcN=ggA|~P2>0#
zPL)uVEjZ}J^kY#B(BYXPO2LD|RbfmJ)nd8@ks<-bsM#O|Zv!A1{;+WO?yR7Sw}ekh
zYrAWeWzW2$nFn24y5T(wvN&~?iLwL5Gp#{_H<%m%Xuk1OC5?1ktGA^9A)&dDS3$06
zJud0xfKvMhBep;V*_#lIy8uT)-ghSUQ<(0|yZe~h#nt>O=;g_&x`8i-fTTEFToo&h
zp4=7tv<DB3w#DX?`4<ZPG$91-2yccadoU*g`d*6R3nJ!(BL>NiymUnbr^(nh*poaf
zZjx?lxcVfbN(Fw$BXU7Ab@x_d#`s{>f4*NLIHjGcdTT9csdxDMeaD5K_eo}En7^H5
z<HE9FdS65NIPc`0688%MucLl4Ub|d?NlU%m)VF?|Dw@nW5uO<;F4=>xvMIT!tHmW2
z1uG>{XC*G+HJm}qX*8aZb@)gH8@6CCaWX2;<{rKTE@vZCP9v8$AD0s^()K2Xc}My>
zUxt(l*3TYJ1!!7A5ejFNldaq$%wcH>B-8M0Js-8RG<mt`yW>ZUGyYQKIX}MuV?K?x
zUy&buyvR0mY#iyAt}n31P2=nqWJyU3K0SyVNBnqB%6g$3<o7$S54fbE4};kLa_+E(
zur{_gn>Wjcih<=DgDJ9jKFoy=j~|_xA@I%<NJF`p+kWWxwD`mFsWAJQ(n%tcbVS46
zcvN8s&YyVi9-rMe(~Gryt|ioQGZ}FxE|iU1GHo-q3m@DP!KvW!yyDi#%Y5E=kBFdX
z4f}6)@qb(T2KrC6Z@*`K|NU|Ce_H$oV*5*4H&5-zW}XA%VX1A+7v=y#IO1B00u9bA
zg<UfQZEr4f+4y5!Ep05lqQqkP<K273WVPoqE}AgHp$5x5i>)44W(aNkS;EW>Ua?pM
zTp{uAc!!TW90yhwM_qMM2K3x_-mcD|Urpk~X9`22HL&sl{p_n;jtC#*bhJBm;Ng>R
z61ACDFE-+$d^x$OmHi}f`{A{ie4j5bB71`?O7=QFq9>HiZHhJAqb$Svy!Rv6)=Sl*
z6FP&Wx>#AQ<ke0WH*!P<nA$Ee4Yhni_|ae`Z8|f(Nq(QzORCq6^MS5|`nw_qgWvZs
zaWo`ce%Qw$9te^k!4)LWmk`?9s?K_mqZ)DL&Q=SXdTkYq$CDBWyM5d9k^uA8ZMlKJ
zsM7f>{2Hr76)$OqWcHMf=iX3H@M&Ir9~&t!kE+1q1PW`rtdf;hGK8eAU}G*`)#^xR
z;axBD#}T9`RW2p2m^Io6_VQ&a!w21kKB)M$zyYxG2-5PA?_*Qhf;)n4&vrjrRmmxz
z+k4ENvkK@^Y0R=UHAU=V*r*_q48H?>MjYbA(ue@>>M!M;)uxm4Du&t$Gtrh=<(nnR
z2&b>HN?h?-$@xanzgHG<po2+1Ul{5-^`NIxAI6<=$><pP!c}3=^}F(-eE*Eh?$&3Z
zj;|`}cQBwNj%8u4)Kool`<-H)2LGl%$N*aY17=LOph8{M{Ys9_L$2SWKISDt%;Nn}
z`8*S|H=(f(QH~ki8TN(mB{*!V(1DFeBpc@x()>Yt9z4kaqr?z%#$C?Mg73}nBW1Tw
zI*wPzD3o{RuDF_cFuQ`}{ZV4pS6%D*!(#Kd{V?BDn(F394$1Z`^gidtlQM^ex4hkP
zDNe)boQvq`l<IlRn3MQ`?VL-(c2)DhUu$8z9^^+$1Mwv8i}&auo#H0*5Zh6^y@wls
zAwz#O<Cr0z#unElQ6<i*F}?PS>1>dH>-R-6ns<rRkL@cWd>3MmuHKUBg~6y}W>%))
z3_0Au-7K>5Zp5K>3zA!uCk$7BDnFA(v{VIwK<!4t&dRV;C5wleU;#Op<|d{Jl)AoH
z#n&3J;CPTt20lTnOYT?-ke4yi3J@tiT9_m0Gl&4=pkX0ym@Qf(>~RpYljmMhB)}q3
zs=X-79m+B2yI5Qh$)=TTNRQtjac&e|rG0hoE6;}V`nl>Lqw@2Dw(ZpJUS<&`8`yVl
zvJ(yWr>HbZeC07Ts#SuWDfz6AlxihV20ib>T!N%t7!y{+%TtAyF!w&nv@9!uuO#8U
zEi@+=mGdg)S5m&_7cRQ*sk+xPuYBFiN{2RMdiM?mWZE%#2i>&aO!Rfc%A}ycNfYm)
zsn)u1Zs!@Pul^1wr4O=;L|Uelj7gNJn_pc7>xpbb)ZCuN_8f0^x0=vQ1z!`Vw-<m4
zDOsNw#5qe8qawmA=4n}}c|a?9PQArva?;AuW3=hgu0S|w(s`_P-uLc|lai0&ev`O{
zhato%0;xvn<V2tM%yXIX=x(>^3Z0YrU~AxEVsv$*VZ)PXn@J{eA)rycj=*wqy}eu=
zes_crQ!IIqpnol=tB_(}{OVYqK0ikosf7gHd%Np|VfO}f-2?SHtasr0i#+4AW!$@>
z2S&{rBSgzjZLQWe*z=d#kbB*7+XN>SLh+zArmrVi*#kaIwrsd(MfUxsleO1(PA5o5
zx;ExM4j!VD%^sgaC9IMYG&2+!PGT19B~GuAvp!O}yC^><=ob*C+pNzAN%Ai5(rOpJ
zuE*0|jNhU%#BO4S!C&UR`%K*!6_M}T%?T@_1aQ7`YHZ*f{-Njm<$QrMlTv(LGa#E%
z?}Sk%X7rVgnAQbb8jf$AjkWPPUL8=%5D%Jkd&;K^Bm6z;bQJ#S>-Mk7nd|GBm+=#4
zFYfxH6>|7jB+$?qCkJ!nD&!+a=S@N_s_az!3Upn=*CM2F?#7&tvrcIUuf5)vH}=ka
zzSvFY`+kZJ)xSXdZ+y+aEg%B@rwXEfl$QS&Cx-uN2@xm9pCgkA2{z&LV2qySqXGi=
zPc^{O?I>d@DK0Z9P1;3{#Ni7tErW(;s8=MTYL$!4D*t^X)`Uz|d15R+jQYoA)?_i(
z7yPN9So;Q^oIoUU-&nQ+LMAPx=iyfk6-qs;t|xmZXY*c?f_V|NXo^xDxKxcVDG+V+
zoi5#xs$Edk@S+%1kDc8vNo<vvJ8)==#nU^g;OZW3*m-@bi-;%Zflh_mcQ79wSA15Y
zv;oiE*<{Oc^l$7^gXYy-afMo~iRUkEpWA3bY@|OO-;~$#D?43b{4j+aT~ymyOG=be
z;#Y{7?i+=&l)KLhI{O{x7v+TAV7$7XM<e}V|6^Xlf5*;W3zM8&EJ<7tStK_&Fb_22
z+Kea!rc|^Jf)axE4k&4E0>0WNSrD5gdaAF*SGc-DA0vDAp*J(`Su%Fka}4NCJUn-c
z<h==8eZ0D^J#sZU=HB_H?KCod1=*T(BV!GWk>_erIYw&s-32+B=tbanSVx(KmM6n#
zeDRlGt+f!hw&owKXDz?nxNFzn$x)8+Qh!VhHlb7aHk#RwR$97)Xk$u$G`Cb|TvcgO
zTj@2C1Tw(WU0_pCjNRE8lw_sYcEpLn?3Z;+sS0czQ9tf3wUyb@a8yXrPJVyBx{R5$
zGE1MYpDHFM0L`i$8Hfp$tRkQQ-(>SFL}iw*J=f2)@|kYN5kyy4%W{XzB>}1wE;Etd
z)n+lIWHe(8gMQ0uTPBJRX`$?b*x}Jqm=KprHEw`PLsL#w>XpnV4{1?Bi=4Ztv%O%l
zibH(umktsWJF)%JqI+_?BLu789C7-y)6nmxN}h^#v@HAaU2p5^T=eF!i1udiHp4Mw
z#$W3mXbeB&7)DAvq*44n{unar1fRBirZa*+lkua)^TGLqhbnZ3JBpo@JgfQwJ7|?6
z`4I$390AdKl1UfJiuyu1u~;N^kiPaC+`c_1jZrR%+Zf*te6-!HDbXI~Bo@~@v@&h!
zRt-kDsXFW5h@>lq9vwj&7$mE?M|XXXMfq=5=C2ukA$<p9(WfQ|08rf6(%IM%VyMq3
zWMgTB3>2|`YF>a?n*xBU5Nkne2gn};WT29hf#Yu-96uo;=#P58pJIMZB>bB9|2Jy>
z7c~5u{rEQ;808EsA$Cqr4H$oz{{M9@EI%_Ue}q5&rRk|f#vWp9|Gyyr*T$RQUI#BP
zP!t3J{k|QFvH(E8mtcsp0zkiY-aIvu0Ojp%43&%>0h++44juqd+1T~zMM_vbwWkRE
z{{80nZwc*RUJc8iLN7nx{(n}73>0Mp{PsI2{{PYZ>j~t4IMM$l&3`(m{GZPGS5NqV
zJLf++%>UCl|GLNg-_H5hZRgEXqt(wIGU2CYEC8)AHw!Zh2QwRpiv`5O!Uocy`(;hI
z+8di7GXq$-keUDa0dTOhv$6wB0RN&rExn%~fb~CVpr;XkrLnL+S@D-~tUs6SpJ^=Y
z;HQn{FEkF0r_JEcG&YW({bzrnaWX%x;y=?k*uhV4{4X>Pj=$Lg=KQ;TVDR5`fS6f+
z;<Z1?d!ljubsiuPnB%YW0D-vJp4R%Gd09BYf3+FJ0tWx}_XDwTf&a$K$_)OSURKcG
zd0APw{$?{P+t1ze&$h6#v;ECxRt}E8*~iKWe%d+y!pp_{Hyv!uPYsU$9Or1S53zjm
z3_lwwl_2iMKmS)iMH?GOz|XDsDK{@+ZDIrX$?@BbFhG-+jg|ckD+gGR;|)7Ii!exp
vLy(1om6e@E@C}zRvnY!&|9>9w+aB-WsBiE1+rJsa#tdddrluB=6Gi@iF*e$c

literal 0
HcmV?d00001

diff --git a/team_docs/generate_role_docs.py b/team_docs/generate_role_docs.py
new file mode 100644
index 0000000..0c4aaf2
--- /dev/null
+++ b/team_docs/generate_role_docs.py
@@ -0,0 +1,2312 @@
+#!/usr/bin/env python3
+"""
+Generate personalized ClimateVision role assignment PDFs for each team member.
+"""
+
+from fpdf import FPDF
+import os
+
+OUTPUT_DIR = "/Users/starrexshotit/Desktop/ClimateVision-main/team_docs"
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+
+class RoleDoc(FPDF):
+    def __init__(self, member_name):
+        super().__init__()
+        self.member_name = member_name
+
+    def header(self):
+        # Green header bar
+        self.set_fill_color(34, 120, 74)
+        self.rect(0, 0, 210, 28, 'F')
+        self.set_font("Helvetica", "B", 16)
+        self.set_text_color(255, 255, 255)
+        self.set_y(5)
+        self.cell(0, 10, "ClimateVision", align="L", new_x="LMARGIN", new_y="NEXT")
+        self.set_font("Helvetica", "", 9)
+        self.cell(0, 6, "Role Assignment & Codebase Ownership", align="L", new_x="LMARGIN", new_y="NEXT")
+        self.set_text_color(0, 0, 0)
+        self.ln(10)
+
+    def footer(self):
+        self.set_y(-15)
+        self.set_font("Helvetica", "I", 8)
+        self.set_text_color(130, 130, 130)
+        self.cell(0, 10, f"ClimateVision | Confidential - Prepared for {self.member_name} | Page {self.page_no()}", align="C")
+
+    def section_title(self, title):
+        self.set_font("Helvetica", "B", 13)
+        self.set_text_color(34, 120, 74)
+        self.cell(0, 8, title, new_x="LMARGIN", new_y="NEXT")
+        # Underline
+        self.set_draw_color(34, 120, 74)
+        self.set_line_width(0.5)
+        self.line(10, self.get_y(), 200, self.get_y())
+        self.ln(4)
+        self.set_text_color(0, 0, 0)
+
+    def subsection_title(self, title):
+        self.set_font("Helvetica", "B", 11)
+        self.set_text_color(50, 50, 50)
+        self.cell(0, 7, title, new_x="LMARGIN", new_y="NEXT")
+        self.ln(1)
+        self.set_text_color(0, 0, 0)
+
+    def _sanitize(self, text):
+        """Replace unicode chars that latin-1 can't handle."""
+        replacements = {
+            '\u2013': '-',   # en dash
+            '\u2014': '-',   # em dash
+            '\u2018': "'",   # left single quote
+            '\u2019': "'",   # right single quote
+            '\u201c': '"',   # left double quote
+            '\u201d': '"',   # right double quote
+            '\u2022': '-',   # bullet
+            '\u2026': '...', # ellipsis
+        }
+        for old, new in replacements.items():
+            text = text.replace(old, new)
+        return text
+
+    def body_text(self, text):
+        self.set_font("Helvetica", "", 10)
+        self.multi_cell(0, 5.5, self._sanitize(text))
+        self.ln(2)
+
+    def bullet(self, text):
+        self.set_font("Helvetica", "", 10)
+        x = self.get_x()
+        self.cell(6, 5.5, "-", new_x="END")
+        self.multi_cell(0, 5.5, self._sanitize(text))
+        self.ln(1)
+
+    def code_block(self, text):
+        self.set_font("Courier", "", 9)
+        self.set_fill_color(240, 240, 240)
+        lines = text.strip().split("\n")
+        for line in lines:
+            self.cell(0, 5, "  " + line, fill=True, new_x="LMARGIN", new_y="NEXT")
+        self.ln(3)
+        self.set_font("Helvetica", "", 10)
+
+    def key_value(self, key, value):
+        self.set_font("Helvetica", "B", 10)
+        self.cell(45, 6, self._sanitize(key) + ":", new_x="END")
+        self.set_font("Helvetica", "", 10)
+        self.multi_cell(0, 6, self._sanitize(value))
+        self.ln(1)
+
+    def month_block(self, month_title, weeks):
+        self.set_font("Helvetica", "B", 10)
+        self.set_fill_color(34, 120, 74)
+        self.set_text_color(255, 255, 255)
+        self.cell(0, 7, "  " + month_title, fill=True, new_x="LMARGIN", new_y="NEXT")
+        self.set_text_color(0, 0, 0)
+        self.ln(2)
+        for week_title, tasks in weeks:
+            self.set_font("Helvetica", "B", 10)
+            self.cell(0, 6, week_title, new_x="LMARGIN", new_y="NEXT")
+            self.ln(1)
+            for task in tasks:
+                self.bullet(task)
+        self.ln(2)
+
+
+def create_adeolu_doc():
+    pdf = RoleDoc("Adeolu Mary Oshadare")
+    pdf.add_page()
+
+    # Title
+    pdf.set_font("Helvetica", "B", 18)
+    pdf.cell(0, 10, "Adeolu Mary Oshadare", align="C", new_x="LMARGIN", new_y="NEXT")
+    pdf.set_font("Helvetica", "", 11)
+    pdf.set_text_color(100, 100, 100)
+    pdf.cell(0, 7, "Data Science Engineer 2 - Data Pipeline & GIS Lead", align="C", new_x="LMARGIN", new_y="NEXT")
+    pdf.set_text_color(0, 0, 0)
+    pdf.ln(5)
+
+    # Quick Info
+    pdf.key_value("GitHub", "@Oshgig")
+    pdf.key_value("Access Level", "Maintainer")
+    pdf.key_value("Reports To", "@Goldokpa (Project Owner)")
+    pdf.key_value("Project Duration", "3 Months")
+    pdf.ln(3)
+
+    # How It Fits Me
+    pdf.section_title("How This Role Fits You")
+    pdf.body_text(
+        "Your B.Tech in Remote Sensing & GIS from FUTA gives you something no one else on this team has - "
+        "a formal education in exactly the kind of spatial data ClimateVision processes. You understand "
+        "satellite imagery at a fundamental level: spectral bands, atmospheric correction, spatial resolution, "
+        "and coordinate reference systems."
+    )
+    pdf.body_text(
+        "As a GIS Analyst at Charis Tech Hub, you already worked with Google Earth Engine and AWS, writing "
+        "Python scripts to model and extract insights from large geospatial datasets. That is precisely what "
+        "ClimateVision's data pipeline needs - someone who can build the bridge between raw Sentinel-2 imagery "
+        "and the clean, preprocessed tensors our ML models consume."
+    )
+    pdf.body_text(
+        "Your MSc in Data Science from Hertfordshire added the machine learning layer: Scikit-Learn, TensorFlow, "
+        "XGBoost, Pandas, and data pipelines. Your credit card fraud detection project showed you can handle "
+        "imbalanced datasets (SMOTE) and build production-quality ML models - the same skills needed when dealing "
+        "with satellite imagery where cloud-free forest pixels are the minority class."
+    )
+    pdf.body_text(
+        "Your experience with Power BI, Tableau, ArcGIS Story Maps, and data storytelling means you can also "
+        "create the visual outputs that make our satellite data understandable to non-technical stakeholders "
+        "like conservation NGOs."
+    )
+    pdf.ln(2)
+
+    # Role Description
+    pdf.section_title("Your Role on ClimateVision")
+    pdf.body_text(
+        "You own the entire data layer - everything that happens between raw satellite imagery arriving from "
+        "APIs and clean, model-ready data being passed to the ML pipeline. You are the gatekeeper of data quality."
+    )
+    pdf.subsection_title("Core Responsibilities")
+    pdf.bullet("Build and maintain the automated satellite data ingestion pipeline (Sentinel Hub, Google Earth Engine)")
+    pdf.bullet("Develop preprocessing workflows: cloud masking, atmospheric correction, image normalization, tiling")
+    pdf.bullet("Create PyTorch Dataset & DataLoader classes for training and inference")
+    pdf.bullet("Implement data augmentation strategies (rotation, flipping, spectral perturbations)")
+    pdf.bullet("Engineer spectral features: NDVI, EVI, moisture indices from raw multispectral bands")
+    pdf.bullet("Build data validation and quality checks for incoming satellite imagery")
+    pdf.bullet("Manage the data/ directory structure (raw, processed, satellite)")
+    pdf.bullet("Create EDA notebooks for spatial data exploration and visualization")
+    pdf.ln(2)
+
+    # Codebase Ownership
+    pdf.section_title("Your Codebase Ownership")
+    pdf.body_text("You are the primary owner of the following files and directories:")
+    pdf.code_block(
+        "src/climatevision/data/              # PRIMARY OWNER - Entire data module\n"
+        "  sentinel2.py                        # Sentinel-2 downloader & preprocessor\n"
+        "  landsat.py                          # Landsat data loader\n"
+        "  dataset.py                          # PyTorch Dataset classes\n"
+        "  preprocess.py                       # Cloud masking, normalization\n"
+        "  augmentation.py                     # Data augmentation pipeline\n"
+        "  __init__.py                         # Module exports\n"
+        "\n"
+        "src/climatevision/utils/\n"
+        "  geospatial.py                       # CO-OWNER - Geospatial utilities\n"
+        "  visualization.py                    # CO-OWNER - Spatial visualizations\n"
+        "\n"
+        "scripts/\n"
+        "  setup_gee.py                        # Google Earth Engine setup\n"
+        "  download_data.py                    # Automated satellite data download\n"
+        "\n"
+        "data/                                 # Data directory structure\n"
+        "  raw/ | processed/ | satellite/\n"
+        "\n"
+        "notebooks/\n"
+        "  02_data_exploration.ipynb            # EDA notebook"
+    )
+    pdf.ln(2)
+
+    # 3-Month Timeline
+    pdf.section_title("Your 3-Month Delivery Timeline")
+    pdf.month_block("MONTH 1: Foundation (Weeks 1-4)", [
+        ("Week 1-2: Data Ingestion", [
+            "Set up Sentinel Hub API and Google Earth Engine authentication",
+            "Build sentinel2.py - download, parse, and store Sentinel-2 imagery",
+            "Create landsat.py - Landsat 8/9 data loader with band mapping",
+            "Implement basic cloud masking using SCL (Scene Classification Layer)",
+        ]),
+        ("Week 3-4: PyTorch Data Pipeline", [
+            "Build dataset.py - PyTorch Dataset class for satellite image tiles",
+            "Implement preprocess.py - normalization, atmospheric correction, tiling (256x256)",
+            "Create data validation checks (band count, resolution, CRS consistency)",
+            "Write 02_data_exploration.ipynb - EDA notebook with sample visualizations",
+        ]),
+    ])
+    pdf.month_block("MONTH 2: Advanced Features (Weeks 5-8)", [
+        ("Week 5-6: Feature Engineering & Augmentation", [
+            "Implement spectral index calculation: NDVI, EVI, SAVI, moisture indices",
+            "Build augmentation.py using albumentations (rotation, flip, spectral noise)",
+            "Add temporal compositing - median/max NDVI composites over time windows",
+        ]),
+        ("Week 7-8: Scale & Performance", [
+            "Integrate Dask for distributed preprocessing of large image collections",
+            "Optimize data loading with parallel I/O and memory-mapped files",
+            "Build data caching layer for preprocessed tiles",
+        ]),
+    ])
+    pdf.month_block("MONTH 3: Production Readiness (Weeks 9-12)", [
+        ("Week 9-10: Quality & Validation", [
+            "Implement data validation framework (schema checks, anomaly detection)",
+            "Set up DVC (Data Version Control) for dataset tracking",
+            "Create data quality reports and monitoring dashboards",
+        ]),
+        ("Week 11-12: Documentation & Integration", [
+            "Write comprehensive docstrings and module documentation",
+            "Integration testing with ML pipeline (ensure DataLoader feeds models correctly)",
+            "Create data pipeline tutorial notebook for onboarding",
+        ]),
+    ])
+
+    # Git Workflow
+    pdf.section_title("Your Git Workflow")
+    pdf.body_text("Follow this branching convention for all your work:")
+    pdf.code_block(
+        "# Create feature branches from develop\n"
+        "git checkout develop\n"
+        "git pull origin develop\n"
+        "git checkout -b feature/data-sentinel2-loader\n"
+        "\n"
+        "# Your branch naming convention:\n"
+        "feature/data-*          (new data features)\n"
+        "fix/data-*              (bug fixes in data module)\n"
+        "refactor/data-*         (restructuring data code)"
+    )
+    pdf.body_text(
+        "All PRs go to the develop branch. PRs require at least 1 review from another team member. "
+        "Tag @edoh-Onuh or @franchaise for data-related reviews since they consume your data outputs."
+    )
+    pdf.ln(3)
+
+    # Key Collaborators
+    pdf.section_title("Your Key Collaborators")
+    pdf.bullet("@edoh-Onuh (ML Lead) - Your DataLoaders feed directly into their training pipeline. Coordinate on tensor shapes, normalization, and augmentation strategies.")
+    pdf.bullet("@franchaise (Analytics Lead) - They need processed data for carbon estimation. Align on feature formats and metadata.")
+    pdf.bullet("Olufemi Taiwo (API Lead) - Inference pipeline uses your preprocessing code. Ensure consistency between training and inference data paths.")
+    pdf.bullet("@cutewizzy11 (Full-Stack) - Frontend map visualizations may need GeoJSON exports from your geospatial utils.")
+
+    # Code Pipeline
+    pdf.section_title("Your Code Pipeline")
+    pdf.body_text("This is your end-to-end working pipeline from environment setup to pushing code.")
+
+    pdf.subsection_title("Step 1: Environment Setup")
+    pdf.code_block(
+        "# Clone and install dependencies\n"
+        "git clone https://github.com/Climate-Vision/ClimateVision.git\n"
+        "cd ClimateVision\n"
+        "pip install -r requirements.txt\n"
+        "\n"
+        "# Authenticate Google Earth Engine\n"
+        "python scripts/setup_gee.py\n"
+        "# Follow browser prompt to authorise your GEE service account"
+    )
+
+    pdf.subsection_title("Step 2: Ingest Satellite Data")
+    pdf.code_block(
+        "# Download Sentinel-2 imagery for a bounding box and date range\n"
+        "python scripts/prepare_data.py \\\n"
+        "  --bbox \"-60,-15,-45,5\" \\\n"
+        "  --start 2023-01-01 \\\n"
+        "  --end   2023-12-31 \\\n"
+        "  --source sentinel2 \\\n"
+        "  --output data/raw/amazon_2023\n"
+        "\n"
+        "# Output: GeoTIFF tiles saved to data/raw/amazon_2023/"
+    )
+
+    pdf.subsection_title("Step 3: Preprocess & Build Dataset")
+    pdf.code_block(
+        "# Run cloud masking, normalization, and 256x256 tiling\n"
+        "python - <<'EOF'\n"
+        "from climatevision.data.preprocessing import preprocess_tiles\n"
+        "preprocess_tiles(\n"
+        "    input_dir='data/raw/amazon_2023/',\n"
+        "    output_dir='data/processed/amazon_2023/',\n"
+        "    tile_size=256,\n"
+        "    cloud_threshold=0.2\n"
+        ")\n"
+        "EOF\n"
+        "\n"
+        "# Validate the PyTorch dataset loads correctly\n"
+        "python - <<'EOF'\n"
+        "from climatevision.data.dataset import SatelliteDataset\n"
+        "ds = SatelliteDataset('data/processed/amazon_2023/', split='train')\n"
+        "img, mask = ds[0]\n"
+        "print(f'Dataset size: {len(ds)} | Image shape: {img.shape} | Mask shape: {mask.shape}')\n"
+        "EOF"
+    )
+
+    pdf.subsection_title("Step 4: Compute Spectral Indices")
+    pdf.code_block(
+        "# Calculate NDVI, EVI, and moisture indices from raw bands\n"
+        "python - <<'EOF'\n"
+        "from climatevision.utils.geospatial import compute_indices\n"
+        "compute_indices(\n"
+        "    tile_dir='data/processed/amazon_2023/',\n"
+        "    indices=['ndvi', 'evi', 'moisture'],\n"
+        "    output_dir='data/processed/amazon_2023_features/'\n"
+        ")\n"
+        "EOF"
+    )
+
+    pdf.subsection_title("Step 5: Commit & Push Your Work")
+    pdf.code_block(
+        "# Switch to your git identity\n"
+        "source team_docs/switch_user.sh adeolu\n"
+        "\n"
+        "# Create a feature branch\n"
+        "git checkout develop\n"
+        "git pull origin develop\n"
+        "git checkout -b feature/data-sentinel2-preprocessing\n"
+        "\n"
+        "# Stage your files\n"
+        "git add src/climatevision/data/\n"
+        "git add scripts/prepare_data.py\n"
+        "\n"
+        "# Commit\n"
+        "git commit -m \"feat(data): add Sentinel-2 cloud masking and tile preprocessing pipeline\"\n"
+        "\n"
+        "# Push from your account\n"
+        "git push adeolu feature/data-sentinel2-preprocessing"
+    )
+
+    pdf.output(os.path.join(OUTPUT_DIR, "Adeolu_Mary_Oshadare_Role.pdf"))
+    print("Created: Adeolu_Mary_Oshadare_Role.pdf")
+
+
+def create_francis_doc():
+    pdf = RoleDoc("Francis Umo")
+    pdf.add_page()
+
+    pdf.set_font("Helvetica", "B", 18)
+    pdf.cell(0, 10, "Francis Umo", align="C", new_x="LMARGIN", new_y="NEXT")
+    pdf.set_font("Helvetica", "", 11)
+    pdf.set_text_color(100, 100, 100)
+    pdf.cell(0, 7, "Data Science Engineer 3 - Carbon Analytics & Validation Lead", align="C", new_x="LMARGIN", new_y="NEXT")
+    pdf.set_text_color(0, 0, 0)
+    pdf.ln(5)
+
+    pdf.key_value("GitHub", "@franchaise")
+    pdf.key_value("Access Level", "Maintainer")
+    pdf.key_value("Reports To", "@Goldokpa (Project Owner)")
+    pdf.key_value("Project Duration", "3 Months")
+    pdf.ln(3)
+
+    # How It Fits Me
+    pdf.section_title("How This Role Fits You")
+    pdf.body_text(
+        "With 8+ years of progressive experience in data analysis and business intelligence, you bring "
+        "the deepest analytical maturity on this team. While others focus on building models and pipelines, "
+        "you are the person who makes sure the numbers tell the right story and that the results are trustworthy."
+    )
+    pdf.body_text(
+        "Your expertise in Python, PostgreSQL, and SQL means you can build the carbon estimation models that "
+        "require heavy data querying, aggregation, and statistical analysis. At Dataleum, you conducted data "
+        "quality checks, developed dashboards to monitor financial data, and created reports that reduced fraud "
+        "by 80% - that same rigour is exactly what's needed when validating whether our ML models are correctly "
+        "estimating carbon loss from deforestation."
+    )
+    pdf.body_text(
+        "Your proficiency in Tableau and Power BI is a direct match for building the impact reporting layer. "
+        "ClimateVision needs to produce clear, visual reports that conservation organizations and government "
+        "agencies can act on. Your data storytelling background makes you the ideal person to translate "
+        "raw model outputs into actionable intelligence."
+    )
+    pdf.body_text(
+        "Your cross-functional collaboration experience - working with IT teams, stakeholders, and bringing "
+        "analytical models into production - means you understand how to bridge the gap between a data science "
+        "experiment and a production metric that decision-makers rely on."
+    )
+    pdf.ln(2)
+
+    # Role Description
+    pdf.section_title("Your Role on ClimateVision")
+    pdf.body_text(
+        "You own the analytics and validation layer - everything that turns raw model predictions into "
+        "meaningful environmental metrics. If the ML model says 'this pixel is deforested,' you quantify "
+        "what that means in tons of carbon, hectares of forest, and dollars of environmental impact."
+    )
+    pdf.subsection_title("Core Responsibilities")
+    pdf.bullet("Develop carbon stock estimation models (Random Forest, XGBoost regression)")
+    pdf.bullet("Build biomass-to-carbon conversion pipelines using allometric equations")
+    pdf.bullet("Implement uncertainty quantification (bootstrap, Monte Carlo, confidence intervals)")
+    pdf.bullet("Create ground truth validation framework - compare model outputs to known data")
+    pdf.bullet("Build statistical testing suite (hypothesis testing, A/B testing for model versions)")
+    pdf.bullet("Design and generate impact reports (area deforested, carbon lost, trends over time)")
+    pdf.bullet("Develop KPI dashboards for monitoring model performance and environmental outcomes")
+    pdf.bullet("Create validation notebooks demonstrating model accuracy across regions")
+    pdf.ln(2)
+
+    # Codebase Ownership
+    pdf.section_title("Your Codebase Ownership")
+    pdf.body_text("You are the primary owner of the following files and directories:")
+    pdf.code_block(
+        "src/climatevision/analytics/          # PRIMARY OWNER - New analytics module\n"
+        "  carbon.py                            # Carbon stock estimation models\n"
+        "  statistics.py                        # Statistical testing & analysis\n"
+        "  reporting.py                         # Impact report generation\n"
+        "  validation.py                        # Ground truth validation framework\n"
+        "  __init__.py                          # Module exports\n"
+        "\n"
+        "src/climatevision/models/\n"
+        "  regression.py                        # PRIMARY OWNER - Biomass/carbon regression\n"
+        "\n"
+        "src/climatevision/utils/\n"
+        "  metrics.py                           # CO-OWNER - Extend with carbon metrics\n"
+        "\n"
+        "notebooks/\n"
+        "  03_carbon_analysis.ipynb             # Carbon estimation analysis\n"
+        "  04_model_validation.ipynb            # Validation & benchmarking\n"
+        "  05_impact_reporting.ipynb            # Reporting notebook\n"
+        "\n"
+        "outputs/\n"
+        "  reports/                             # Generated impact reports\n"
+        "  dashboards/                          # Dashboard configs"
+    )
+    pdf.ln(2)
+
+    # 3-Month Timeline
+    pdf.section_title("Your 3-Month Delivery Timeline")
+    pdf.month_block("MONTH 1: Foundation (Weeks 1-4)", [
+        ("Week 1-2: Carbon Estimation Models", [
+            "Research allometric equations for biomass estimation by forest type",
+            "Build carbon.py - Random Forest & XGBoost regression for biomass prediction",
+            "Create feature pipeline: spectral indices -> biomass -> carbon conversion",
+            "Implement metrics for regression evaluation (RMSE, MAE, R-squared)",
+        ]),
+        ("Week 3-4: Validation Framework", [
+            "Build validation.py - compare model predictions to ground truth datasets",
+            "Source and integrate reference data (Global Forest Watch, forest inventory data)",
+            "Create confusion matrix, precision/recall analysis for segmentation outputs",
+            "Write 04_model_validation.ipynb with baseline validation results",
+        ]),
+    ])
+    pdf.month_block("MONTH 2: Advanced Analytics (Weeks 5-8)", [
+        ("Week 5-6: Uncertainty & Statistical Testing", [
+            "Implement bootstrap confidence intervals for carbon estimates",
+            "Build Monte Carlo simulation for uncertainty propagation",
+            "Create statistics.py - hypothesis testing, trend analysis functions",
+            "Implement A/B testing framework for comparing model versions",
+        ]),
+        ("Week 7-8: Impact Reporting", [
+            "Build reporting.py - automated report generation (PDF/HTML)",
+            "Design KPI framework: hectares lost, carbon tons, trend direction",
+            "Create 05_impact_reporting.ipynb - template for regional impact reports",
+            "Integrate with PostgreSQL for historical metric storage",
+        ]),
+    ])
+    pdf.month_block("MONTH 3: Production Readiness (Weeks 9-12)", [
+        ("Week 9-10: Dashboard & Integration", [
+            "Build dashboard data endpoints (feed metrics to frontend charts)",
+            "Create time-series analysis for deforestation trend tracking",
+            "Implement anomaly detection for unusual forest loss patterns",
+        ]),
+        ("Week 11-12: Documentation & Case Studies", [
+            "Produce 3 regional case study reports (Amazon, Congo, Southeast Asia)",
+            "Write comprehensive documentation for analytics module",
+            "Final validation sweep across all model outputs",
+            "Performance benchmarking and accuracy documentation",
+        ]),
+    ])
+
+    # Git Workflow
+    pdf.section_title("Your Git Workflow")
+    pdf.code_block(
+        "# Create feature branches from develop\n"
+        "git checkout develop\n"
+        "git pull origin develop\n"
+        "git checkout -b feature/analytics-carbon-estimation\n"
+        "\n"
+        "# Your branch naming convention:\n"
+        "feature/analytics-*     (new analytics features)\n"
+        "fix/analytics-*         (bug fixes)\n"
+        "refactor/analytics-*    (code restructuring)"
+    )
+    pdf.body_text(
+        "All PRs go to the develop branch. PRs require at least 1 review. "
+        "Tag @edoh-Onuh for reviews on model evaluation metrics, and @Oshgig for data format questions."
+    )
+    pdf.ln(3)
+
+    # Key Collaborators
+    pdf.section_title("Your Key Collaborators")
+    pdf.bullet("@edoh-Onuh (ML Lead) - Their model predictions are your primary input. Coordinate on output formats, probability thresholds, and confidence scores.")
+    pdf.bullet("@Oshgig (Data Pipeline Lead) - She provides the preprocessed data you need for carbon regression features. Align on spectral indices and metadata.")
+    pdf.bullet("Olufemi Taiwo (API Lead) - Your analytics endpoints need to be exposed through the API. Coordinate on response schemas.")
+    pdf.bullet("@cutewizzy11 (Full-Stack) - Frontend dashboards visualize your metrics. Provide JSON data contracts for charts.")
+
+    # Code Pipeline
+    pdf.section_title("Your Code Pipeline")
+    pdf.body_text("Your pipeline starts where the ML model ends - taking prediction masks and turning them into carbon impact numbers and stakeholder reports.")
+
+    pdf.subsection_title("Step 1: Environment Setup")
+    pdf.code_block(
+        "git clone https://github.com/Climate-Vision/ClimateVision.git\n"
+        "cd ClimateVision\n"
+        "pip install -r requirements.txt\n"
+        "\n"
+        "# Verify analytics dependencies\n"
+        "python -c \"import xgboost, sklearn, mlflow, optuna; print('Analytics stack ready')\""
+    )
+
+    pdf.subsection_title("Step 2: Run Inference to Get Prediction Masks")
+    pdf.code_block(
+        "# Generate deforestation masks from a trained model\n"
+        "python scripts/infer.py \\\n"
+        "  --bbox \"-60,-15,-45,5\" \\\n"
+        "  --date 2023-06-01 \\\n"
+        "  --analysis_type deforestation \\\n"
+        "  --output outputs/masks/\n"
+        "\n"
+        "# Output: outputs/masks/deforestation_mask.tif + confidence_scores.npy"
+    )
+
+    pdf.subsection_title("Step 3: Estimate Carbon Loss")
+    pdf.code_block(
+        "# Run carbon stock estimation on the prediction mask\n"
+        "python - <<'EOF'\n"
+        "from climatevision.analytics.carbon import estimate_carbon\n"
+        "result = estimate_carbon(\n"
+        "    mask_path='outputs/masks/deforestation_mask.tif',\n"
+        "    region='amazon',\n"
+        "    forest_type='tropical_moist'\n"
+        ")\n"
+        "print(f\"Deforested area: {result['hectares']:.1f} ha\")\n"
+        "print(f\"Carbon lost:     {result['carbon_tonnes']:.1f} tCO2e\")\n"
+        "print(f\"Confidence CI:   {result['ci_lower']:.1f} - {result['ci_upper']:.1f} tCO2e\")\n"
+        "EOF"
+    )
+
+    pdf.subsection_title("Step 4: Validate Against Ground Truth")
+    pdf.code_block(
+        "# Compare model outputs to Global Forest Watch reference data\n"
+        "python - <<'EOF'\n"
+        "from climatevision.analytics.validation import validate_predictions\n"
+        "metrics = validate_predictions(\n"
+        "    pred_mask='outputs/masks/deforestation_mask.tif',\n"
+        "    ground_truth='data/ground_truth/amazon_gfw_2023.tif'\n"
+        ")\n"
+        "print(f\"IoU: {metrics['iou']:.3f} | F1: {metrics['f1']:.3f} | Precision: {metrics['precision']:.3f}\")\n"
+        "EOF"
+    )
+
+    pdf.subsection_title("Step 5: Generate Impact Report")
+    pdf.code_block(
+        "# Auto-generate a PDF/HTML impact report for stakeholders\n"
+        "python - <<'EOF'\n"
+        "from climatevision.analytics.reporting import generate_report\n"
+        "generate_report(\n"
+        "    region='amazon',\n"
+        "    period='2023-Q2',\n"
+        "    carbon_result=result,\n"
+        "    validation_metrics=metrics,\n"
+        "    output_dir='outputs/reports/'\n"
+        ")\n"
+        "EOF\n"
+        "\n"
+        "# Output: outputs/reports/amazon_2023-Q2_impact_report.pdf"
+    )
+
+    pdf.subsection_title("Step 7: Commit & Push Your Work")
+    pdf.code_block(
+        "# Switch to your git identity\n"
+        "source team_docs/switch_user.sh francis\n"
+        "\n"
+        "git checkout develop && git pull origin develop\n"
+        "git checkout -b feature/analytics-carbon-estimation\n"
+        "\n"
+        "git add src/climatevision/analytics/\n"
+        "git add notebooks/03_carbon_analysis.ipynb\n"
+        "git commit -m \"feat(analytics): add carbon stock estimation with confidence intervals\"\n"
+        "\n"
+        "git push francis feature/analytics-carbon-estimation"
+    )
+
+    pdf.output(os.path.join(OUTPUT_DIR, "Francis_Umo_Role.pdf"))
+    print("Created: Francis_Umo_Role.pdf")
+
+
+def create_olufemi_doc():
+    pdf = RoleDoc("Olufemi Taiwo")
+    pdf.add_page()
+
+    pdf.set_font("Helvetica", "B", 18)
+    pdf.cell(0, 10, "Olufemi Taiwo", align="C", new_x="LMARGIN", new_y="NEXT")
+    pdf.set_font("Helvetica", "", 11)
+    pdf.set_text_color(100, 100, 100)
+    pdf.cell(0, 7, "Data Science Engineer 4 - API & Data Quality Lead", align="C", new_x="LMARGIN", new_y="NEXT")
+    pdf.set_text_color(0, 0, 0)
+    pdf.ln(5)
+
+    pdf.key_value("GitHub", "(To be assigned)")
+    pdf.key_value("Access Level", "Maintainer")
+    pdf.key_value("Reports To", "@Goldokpa (Project Owner)")
+    pdf.key_value("Project Duration", "3 Months")
+    pdf.ln(3)
+
+    # How It Fits Me
+    pdf.section_title("How This Role Fits You")
+    pdf.body_text(
+        "Your current role as Reporting and Data Quality Officer at the Royal Marsden NHS Foundation Trust "
+        "is the clearest signal for this assignment. Every working day you validate data flows, investigate "
+        "mismatches across Epic EPR, troubleshoot system errors using SQL, and hold the line on reporting "
+        "accuracy for senior clinical stakeholders. That obsessive attention to data integrity at every step "
+        "from input to output is exactly what ClimateVision's API and inference pipeline need."
+    )
+    pdf.body_text(
+        "At Fidelity Bank, you kept payment platforms reliable around the clock as an Application Support "
+        "Analyst - triaging incidents, analysing root causes, and producing service reports that guided "
+        "operational decisions. ClimateVision runs a similar system: satellite images arrive as requests, "
+        "the API must respond correctly and quickly, and any failure needs to be caught, logged, and "
+        "escalated before it reaches users. That is your wheelhouse."
+    )
+    pdf.body_text(
+        "Your Business Intelligence work at Dataleum - building Power BI dashboards, conducting data quality "
+        "checks, achieving 98% GDPR compliance - means you already understand auditability. In a climate "
+        "monitoring system used by NGOs and government agencies, every prediction must be traceable, every "
+        "alert explainable, and every data flow compliant. You build that confidence layer."
+    )
+    pdf.body_text(
+        "Your ITIL 4 certification is a direct fit for incident management, change control, and problem "
+        "management in production. Combined with your MSc in Data Science, you are the person who makes "
+        "the API not just functional, but operationally trustworthy - with structured logging, audit trails, "
+        "validated schemas, and monitoring that surfaces issues before users notice them."
+    )
+    pdf.ln(2)
+
+    # Role Description
+    pdf.section_title("Your Role on ClimateVision")
+    pdf.body_text(
+        "You own the API layer and the inference pipeline - everything between a trained model and a user "
+        "receiving a validated, structured response. You ensure the system is reliable, observable, and "
+        "produces outputs that are correct and auditable. You are the data quality gatekeeper for every "
+        "prediction that leaves the system."
+    )
+    pdf.subsection_title("Core Responsibilities")
+    pdf.bullet("Extend and maintain the FastAPI backend (endpoints, authentication, request validation)")
+    pdf.bullet("Build Pydantic schemas for all API request/response objects - the contract for data quality")
+    pdf.bullet("Implement structured logging, error handling, and audit trails throughout the inference flow")
+    pdf.bullet("Build the inference validation layer - catch bad inputs, validate outputs, flag anomalies")
+    pdf.bullet("Create the deforestation alert system with configurable thresholds and notification routing")
+    pdf.bullet("Build API monitoring endpoints: health checks, data quality metrics, run status dashboards")
+    pdf.bullet("Write SQL queries and admin endpoints for operational reporting and data audits")
+    pdf.bullet("Design and document the API contract (request/response schemas, error codes, versioning)")
+    pdf.ln(2)
+
+    # Codebase Ownership
+    pdf.section_title("Your Codebase Ownership")
+    pdf.body_text("You are the primary owner of the following files and directories:")
+    pdf.code_block(
+        "src/climatevision/inference/           # PRIMARY OWNER\n"
+        "  pipeline.py                          # Core inference pipeline\n"
+        "  batch_processor.py                   # Batch processing with job queuing\n"
+        "  postprocess.py                       # Output filtering & thresholding\n"
+        "  alert_generator.py                   # Deforestation alert system\n"
+        "  __init__.py\n"
+        "\n"
+        "src/climatevision/api/                 # PRIMARY OWNER\n"
+        "  main.py                              # FastAPI application\n"
+        "  auth.py                              # API key authentication\n"
+        "  middleware.py                         # Request logging, CORS\n"
+        "  schemas.py                           # Pydantic request/response schemas\n"
+        "  __init__.py\n"
+        "\n"
+        "src/climatevision/db.py                # CO-OWNER - Database & audit queries\n"
+        "\n"
+        "run_api.sh                             # API startup script\n"
+        "config.yaml                            # API & inference config sections"
+    )
+    pdf.ln(2)
+
+    # 3-Month Timeline
+    pdf.section_title("Your 3-Month Delivery Timeline")
+    pdf.month_block("MONTH 1: Foundation (Weeks 1-4)", [
+        ("Week 1-2: Schemas & Validation", [
+            "Build schemas.py - Pydantic models for every API request and response object",
+            "Extend pipeline.py with input validation: image shape, band count, coordinate bounds",
+            "Add structured JSON logging throughout the inference flow (request ID, timestamps, errors)",
+            "Implement output validation - flag predictions outside expected confidence ranges",
+        ]),
+        ("Week 3-4: API Hardening", [
+            "Implement auth.py - API key authentication and organisation-based access control",
+            "Build middleware.py - request logging, CORS, request size limits",
+            "Create /api/health, /api/status, and /api/metrics endpoints for operational monitoring",
+            "Write API integration tests covering validation edge cases and error responses",
+        ]),
+    ])
+    pdf.month_block("MONTH 2: Quality & Alerts (Weeks 5-8)", [
+        ("Week 5-6: Inference Quality Layer", [
+            "Build postprocess.py - confidence thresholding and prediction filtering",
+            "Implement anomaly detection for unusual inference outputs (flag for review)",
+            "Create audit log entries for every prediction: input hash, model version, output summary",
+            "Build batch_processor.py - parallel image processing with per-job status tracking",
+        ]),
+        ("Week 7-8: Alert System & Reporting", [
+            "Build alert_generator.py - configurable deforestation threshold alerting",
+            "Implement notification routing (email, webhook) for triggered alerts",
+            "Write SQL reporting queries for run history, error rates, and data quality KPIs",
+            "Create admin endpoints for operational dashboards: throughput, failure rates, alert volumes",
+        ]),
+    ])
+    pdf.month_block("MONTH 3: Observability & Documentation (Weeks 9-12)", [
+        ("Week 9-10: Monitoring & Data Quality Reports", [
+            "Build a /api/reports endpoint returning data quality metrics over configurable time windows",
+            "Implement request tracing: correlate API requests to inference runs to alerts",
+            "Create a data quality dashboard feed (JSON) for the frontend to visualise pipeline health",
+            "SQL-based audit trail queries: who requested what, when, and with what result",
+        ]),
+        ("Week 11-12: Documentation & Launch Readiness", [
+            "Write the API reference: all endpoints, schemas, error codes, and usage examples",
+            "Document the incident response runbook: what each error means and how to resolve it",
+            "Security review: input sanitisation, SQL injection checks, API key rotation procedures",
+            "Final integration testing with all team modules - validate end-to-end data flow",
+        ]),
+    ])
+
+    # Git Workflow
+    pdf.section_title("Your Git Workflow")
+    pdf.code_block(
+        "# Create feature branches from develop\n"
+        "git checkout develop\n"
+        "git pull origin develop\n"
+        "git checkout -b feature/api-schemas\n"
+        "\n"
+        "# Your branch naming convention:\n"
+        "feature/api-*           (API features & endpoints)\n"
+        "feature/inference-*     (inference pipeline & validation)\n"
+        "feature/schemas-*       (Pydantic schema changes)\n"
+        "fix/api-*               (bug fixes)"
+    )
+    pdf.body_text(
+        "All PRs go to the develop branch. Tag @cutewizzy11 for API contract reviews (he consumes your "
+        "endpoints from the frontend) and @edoh-Onuh when touching inference logic that involves model outputs."
+    )
+    pdf.ln(3)
+
+    # Key Collaborators
+    pdf.section_title("Your Key Collaborators")
+    pdf.bullet("@edoh-Onuh (ML Lead) - Their trained models are loaded by your inference pipeline. Coordinate on model format (.pth vs ONNX), input shapes, output schemas, and confidence score formats.")
+    pdf.bullet("@Oshgig (Data Pipeline Lead) - Your inference input validation must match her preprocessing exactly. Align on normalization constants, expected band order, and coordinate formats.")
+    pdf.bullet("@franchaise (Analytics Lead) - Their analytics endpoints are exposed through your API. Coordinate on response schemas, pagination, and data quality flags in outputs.")
+    pdf.bullet("@cutewizzy11 (Full-Stack & CI/CD) - He consumes your API from the frontend and manages Docker and deployment. You two define the API contract together - endpoints, schemas, error codes.")
+
+    # Code Pipeline
+    pdf.section_title("Your Code Pipeline")
+    pdf.body_text("Your pipeline covers running and validating the FastAPI server, testing all endpoints, enforcing data quality, and maintaining the inference layer.")
+
+    pdf.subsection_title("Step 1: Environment Setup")
+    pdf.code_block(
+        "git clone https://github.com/Climate-Vision/ClimateVision.git\n"
+        "cd ClimateVision\n"
+        "pip install -r requirements.txt\n"
+        "\n"
+        "# Set environment variables\n"
+        "cp .env.example .env\n"
+        "# Edit .env: set MODEL_PATH, DB_PATH, API_KEY_SECRET"
+    )
+
+    pdf.subsection_title("Step 2: Start the API Server")
+    pdf.code_block(
+        "# Start FastAPI in development mode with auto-reload\n"
+        "uvicorn climatevision.api.main:app \\\n"
+        "  --reload \\\n"
+        "  --host 0.0.0.0 \\\n"
+        "  --port 8000\n"
+        "\n"
+        "# Interactive API docs available at:\n"
+        "# http://localhost:8000/docs\n"
+        "# http://localhost:8000/redoc"
+    )
+
+    pdf.subsection_title("Step 3: Test Prediction Endpoints")
+    pdf.code_block(
+        "# Test JSON prediction endpoint\n"
+        "curl -X POST http://localhost:8000/predict/json \\\n"
+        "  -H \"Content-Type: application/json\" \\\n"
+        "  -d '{\n"
+        "    \"bbox\": [-60, -15, -45, 5],\n"
+        "    \"start_date\": \"2023-01-01\",\n"
+        "    \"end_date\":   \"2023-12-31\",\n"
+        "    \"analysis_type\": \"deforestation\"\n"
+        "  }'\n"
+        "\n"
+        "# Test file-upload endpoint\n"
+        "curl -X POST http://localhost:8000/predict/upload \\\n"
+        "  -F \"file=@data/test/sample_tile.tif\" \\\n"
+        "  -F \"analysis_type=flooding\"\n"
+        "\n"
+        "# Health check\n"
+        "curl http://localhost:8000/health"
+    )
+
+    pdf.subsection_title("Step 4: Run Data Quality Checks")
+    pdf.code_block(
+        "# Validate all run records in the database meet schema requirements\n"
+        "python - <<'EOF'\n"
+        "from climatevision.db import get_db_connection, validate_run_schema\n"
+        "conn = get_db_connection()\n"
+        "issues = validate_run_schema(conn)\n"
+        "if issues:\n"
+        "    print(f'Data quality issues found: {len(issues)}')\n"
+        "    for issue in issues:\n"
+        "        print(f'  - {issue}')\n"
+        "else:\n"
+        "    print('All records pass quality checks')\n"
+        "EOF"
+    )
+
+    pdf.subsection_title("Step 5: Register an NGO Organisation")
+    pdf.code_block(
+        "# Create an NGO organisation via the API\n"
+        "curl -X POST http://localhost:8000/organizations \\\n"
+        "  -H \"Content-Type: application/json\" \\\n"
+        "  -d '{\n"
+        "    \"name\": \"Amazon Conservation Trust\",\n"
+        "    \"email\": \"alerts@amazonconservation.org\",\n"
+        "    \"region\": \"amazon\"\n"
+        "  }'\n"
+        "\n"
+        "# Add a regional monitoring subscription\n"
+        "curl -X POST http://localhost:8000/organizations/1/subscriptions \\\n"
+        "  -H \"Content-Type: application/json\" \\\n"
+        "  -d '{\"bbox\": [-60,-15,-45,5], \"analysis_type\": \"deforestation\", \"alert_threshold\": 0.15}'"
+    )
+
+    pdf.subsection_title("Step 6: Commit & Push Your Work")
+    pdf.code_block(
+        "# Switch to your git identity\n"
+        "source team_docs/switch_user.sh olufemi\n"
+        "\n"
+        "git checkout develop && git pull origin develop\n"
+        "git checkout -b feature/api-input-validation\n"
+        "\n"
+        "git add src/climatevision/api/main.py\n"
+        "git add src/climatevision/db.py\n"
+        "git commit -m \"feat(api): add Pydantic input validation and audit logging to predict endpoints\"\n"
+        "\n"
+        "# Push from YOUR GitHub account (femi23)\n"
+        "git push olufemi feature/api-input-validation"
+    )
+
+    pdf.output(os.path.join(OUTPUT_DIR, "Olufemi_Taiwo_Role.pdf"))
+    print("Created: Olufemi_Taiwo_Role.pdf")
+
+
+def create_edoh_doc():
+    pdf = RoleDoc("Edoh-Onuh")
+    pdf.add_page()
+
+    pdf.set_font("Helvetica", "B", 18)
+    pdf.cell(0, 10, "Edoh-Onuh (John Edoh Onuh)", align="C", new_x="LMARGIN", new_y="NEXT")
+    pdf.set_font("Helvetica", "", 11)
+    pdf.set_text_color(100, 100, 100)
+    pdf.cell(0, 7, "Data Science Engineer 1 - ML Model Development Lead", align="C", new_x="LMARGIN", new_y="NEXT")
+    pdf.set_text_color(0, 0, 0)
+    pdf.ln(5)
+
+    pdf.key_value("GitHub", "@edoh-Onuh")
+    pdf.key_value("Access Level", "Maintainer")
+    pdf.key_value("Reports To", "@Goldokpa (Project Owner)")
+    pdf.key_value("Project Duration", "3 Months")
+    pdf.ln(3)
+
+    # How It Fits Me
+    pdf.section_title("How This Role Fits You")
+    pdf.body_text(
+        "Your GitHub portfolio makes the case better than any job description could. You built JED Climate - "
+        "a full-stack climate intelligence platform - independently. It has a FastAPI analytics engine serving "
+        "a carbon calculator and climate predictor, PyTorch/TensorFlow ML services, real-time Recharts "
+        "dashboards for CO2 levels, Arctic ice extent, and sea level rise, and a 14-service Docker Compose "
+        "local stack. That is almost exactly what ClimateVision is. You already know this problem space."
+    )
+    pdf.body_text(
+        "Your fintech-fraud-detection repo demonstrates the depth of ML engineering this role needs: "
+        "XGBoost, Random Forest, and Neural Network ensembles with sub-100ms inference latency, SHAP/LIME "
+        "explainability, concept drift detection, and a production-grade FastAPI serving layer. The same "
+        "engineering discipline - fast, explainable, reliable model inference - is exactly what ClimateVision's "
+        "deforestation detection pipeline requires."
+    )
+    pdf.body_text(
+        "Your classification track record is consistent and strong: diabetes risk prediction (Scikit-learn), "
+        "fraud detection (XGBoost + Neural Networks), text classification (NLP), and time series forecasting "
+        "(Tesla stock). Every one of those is a direct analogue to forest vs. non-forest pixel segmentation - "
+        "the core problem you will be solving here with U-Net and Siamese architectures."
+    )
+    pdf.body_text(
+        "Your sustainable energy analysis and JED Climate's environmental dashboards show you genuinely "
+        "understand the climate data domain - spectral trends, temporal signals, and what makes environmental "
+        "metrics meaningful. That context matters when you are tuning a model to detect 5% forest loss "
+        "in Sentinel-2 imagery at 10-metre resolution."
+    )
+    pdf.ln(2)
+
+    # Role Description
+    pdf.section_title("Your Role on ClimateVision")
+    pdf.body_text(
+        "You own all deep learning model architectures, the training pipeline, and model evaluation. "
+        "Your goal is to train models that achieve high accuracy on forest segmentation and change "
+        "detection, then package them cleanly for the inference pipeline. Carbon regression modelling "
+        "sits with the Analytics Lead - your focus is purely classification and change detection."
+    )
+    pdf.subsection_title("Core Responsibilities")
+    pdf.bullet("Improve and extend the U-Net architecture (Attention U-Net, residual connections, multi-scale features)")
+    pdf.bullet("Train and evaluate the Siamese network for temporal bi-date change detection")
+    pdf.bullet("Build a complete training pipeline: data loading, training loop, validation, checkpointing")
+    pdf.bullet("Implement loss functions tuned for satellite imagery class imbalance (Focal Loss, Dice Loss)")
+    pdf.bullet("Run hyperparameter optimisation using Optuna (learning rate, batch size, architecture depth)")
+    pdf.bullet("Implement transfer learning from pretrained encoders (ResNet, EfficientNet backbones)")
+    pdf.bullet("Build model evaluation framework: F1, IoU, precision-recall curves, confusion matrices")
+    pdf.bullet("Export optimised models to ONNX for production inference speed")
+    pdf.bullet("Implement experiment tracking with MLflow - log runs, metrics, and artefacts")
+    pdf.ln(2)
+
+    # Codebase Ownership
+    pdf.section_title("Your Codebase Ownership")
+    pdf.body_text("You are the primary owner of the following files and directories:")
+    pdf.code_block(
+        "src/climatevision/models/              # PRIMARY OWNER\n"
+        "  unet.py                              # U-Net & Attention U-Net\n"
+        "  siamese.py                           # Siamese change detection network\n"
+        "  __init__.py\n"
+        "  # Note: regression.py is owned by @franchaise (Analytics Lead)\n"
+        "\n"
+        "src/climatevision/training/            # PRIMARY OWNER - New module\n"
+        "  trainer.py                           # Training loop & checkpointing\n"
+        "  evaluator.py                         # Model evaluation framework\n"
+        "  scheduler.py                         # Learning rate schedulers\n"
+        "  callbacks.py                         # Early stopping, logging\n"
+        "  __init__.py\n"
+        "\n"
+        "src/climatevision/utils/\n"
+        "  metrics.py                           # CO-OWNER - Loss functions, metrics\n"
+        "\n"
+        "scripts/\n"
+        "  run_training.py                      # Training pipeline script\n"
+        "  train.py                             # Existing training script\n"
+        "  hyperparameter_search.py             # Optuna hyperparameter search\n"
+        "\n"
+        "models/                                # Trained model weights\n"
+        "models_pretrained/                     # Pretrained backbone weights"
+    )
+    pdf.ln(2)
+
+    # 3-Month Timeline
+    pdf.section_title("Your 3-Month Delivery Timeline")
+    pdf.month_block("MONTH 1: Foundation (Weeks 1-4)", [
+        ("Week 1-2: Training Infrastructure", [
+            "Build trainer.py - complete training loop with mixed-precision, gradient accumulation",
+            "Implement checkpointing (save best model, resume from checkpoint)",
+            "Create evaluator.py - F1, IoU, precision, recall, confusion matrix",
+            "Set up experiment tracking with MLflow - log all runs, hyperparameters, artefacts",
+        ]),
+        ("Week 3-4: Baseline Models", [
+            "Train baseline U-Net on curated forest segmentation dataset",
+            "Implement Focal Loss and Dice Loss for forest/non-forest class imbalance",
+            "Run initial benchmarks: accuracy on Amazon, Congo, Southeast Asia test sets",
+            "Document baseline results as the performance floor to beat",
+        ]),
+    ])
+    pdf.month_block("MONTH 2: Advanced Models (Weeks 5-8)", [
+        ("Week 5-6: Architecture Improvements", [
+            "Implement Attention U-Net with skip connection attention gates",
+            "Add ResNet/EfficientNet encoder backbone via transfer learning (ImageNet pretrained)",
+            "Run hyperparameter search with Optuna (learning rate, batch size, depth, dropout)",
+            "Train Siamese network for bi-temporal change detection",
+        ]),
+        ("Week 7-8: Model Optimisation", [
+            "Implement model ensemble (U-Net + Attention U-Net prediction averaging)",
+            "Build Monte Carlo Dropout for per-pixel uncertainty estimation",
+            "Spatial cross-validation to prevent data leakage across adjacent image tiles",
+            "Performance benchmarking across all model variants - pick production candidate",
+        ]),
+    ])
+    pdf.month_block("MONTH 3: Production Models (Weeks 9-12)", [
+        ("Week 9-10: Export & Versioning", [
+            "Export best-performing models to ONNX format for fast production inference",
+            "Implement model quantisation and pruning for latency reduction",
+            "Set up model registry with versioning, metadata, and performance records",
+            "Create model cards: accuracy, known limitations, training data, bias notes",
+        ]),
+        ("Week 11-12: Final Evaluation", [
+            "Comprehensive evaluation on held-out test sets across all regions",
+            "Ablation studies: measure contribution of each architectural choice",
+            "Write model documentation and training reproduction guide",
+            "Integration testing with Olufemi's inference pipeline - validate end-to-end",
+        ]),
+    ])
+
+    # Git Workflow
+    pdf.section_title("Your Git Workflow")
+    pdf.code_block(
+        "# Create feature branches from develop\n"
+        "git checkout develop\n"
+        "git pull origin develop\n"
+        "git checkout -b feature/model-attention-unet\n"
+        "\n"
+        "# Your branch naming convention:\n"
+        "feature/model-*         (new model architectures)\n"
+        "feature/training-*      (training pipeline features)\n"
+        "fix/model-*             (bug fixes)\n"
+        "experiment/model-*      (experimental architectures)"
+    )
+    pdf.body_text(
+        "All PRs go to the develop branch. Tag @Oshgig when your models require different data formats, "
+        "@franchaise when evaluation metrics or output confidence formats change, and Olufemi Taiwo "
+        "when touching model export formats or inference input shapes."
+    )
+    pdf.ln(3)
+
+    # Key Collaborators
+    pdf.section_title("Your Key Collaborators")
+    pdf.bullet("@Oshgig (Data Pipeline Lead) - She builds the DataLoaders you train on. Coordinate on tensor shapes, normalization values, band order, and augmentation strategies.")
+    pdf.bullet("@franchaise (Analytics Lead) - He owns carbon regression modelling and validates your classification outputs against ground truth. Share model confidence scores and prediction probability formats.")
+    pdf.bullet("Olufemi Taiwo (API & Data Quality Lead) - He loads your trained models into the inference pipeline. Coordinate on model file format (.pth vs ONNX), expected input shapes, and output schema.")
+    pdf.bullet("@cutewizzy11 (Full-Stack & CI/CD) - CI/CD pipeline runs your training scripts. Keep scripts deterministic, well-documented, and reproducible.")
+
+    # Code Pipeline
+    pdf.section_title("Your Code Pipeline")
+    pdf.body_text("Your pipeline covers model architecture development, training, evaluation, and exporting production-ready checkpoints.")
+
+    pdf.subsection_title("Step 1: Environment Setup")
+    pdf.code_block(
+        "git clone https://github.com/Climate-Vision/ClimateVision.git\n"
+        "cd ClimateVision\n"
+        "pip install -r requirements.txt\n"
+        "\n"
+        "# Verify PyTorch and GPU availability\n"
+        "python -c \"import torch; print(f'PyTorch {torch.__version__} | CUDA: {torch.cuda.is_available()}')\""
+    )
+
+    pdf.subsection_title("Step 2: Verify Data Is Ready")
+    pdf.code_block(
+        "# Confirm @Oshgig's DataLoader feeds correctly into your model\n"
+        "python - <<'EOF'\n"
+        "from climatevision.data.dataset import SatelliteDataset\n"
+        "from torch.utils.data import DataLoader\n"
+        "ds = SatelliteDataset('data/processed/', split='train')\n"
+        "loader = DataLoader(ds, batch_size=4, num_workers=2)\n"
+        "imgs, masks = next(iter(loader))\n"
+        "print(f'Batch shape: {imgs.shape} | Mask shape: {masks.shape}')\n"
+        "# Expected: torch.Size([4, 13, 256, 256]) | torch.Size([4, 256, 256])\n"
+        "EOF"
+    )
+
+    pdf.subsection_title("Step 3: Train Baseline U-Net")
+    pdf.code_block(
+        "# Train baseline segmentation model\n"
+        "python scripts/train.py \\\n"
+        "  --model unet \\\n"
+        "  --analysis-type deforestation \\\n"
+        "  --epochs 50 \\\n"
+        "  --batch-size 16 \\\n"
+        "  --lr 1e-4 \\\n"
+        "  --checkpoint-dir models/ \\\n"
+        "  --mlflow-tracking\n"
+        "\n"
+        "# Monitor training: open http://localhost:5000 (MLflow UI)\n"
+        "mlflow ui --port 5000"
+    )
+
+    pdf.subsection_title("Step 4: Hyperparameter Search")
+    pdf.code_block(
+        "# Run Optuna search over learning rate, batch size, depth\n"
+        "python scripts/hyperparameter_search.py \\\n"
+        "  --model unet \\\n"
+        "  --n-trials 50 \\\n"
+        "  --study-name unet_deforestation_v1 \\\n"
+        "  --metric val_iou\n"
+        "\n"
+        "# Best trial is automatically saved to models/best_hparam_unet.pth"
+    )
+
+    pdf.subsection_title("Step 5: Evaluate & Export Model")
+    pdf.code_block(
+        "# Full evaluation on held-out test set\n"
+        "python scripts/evaluate.py \\\n"
+        "  --checkpoint models/best_unet.pth \\\n"
+        "  --split test \\\n"
+        "  --analysis-type deforestation\n"
+        "\n"
+        "# Export to ONNX for fast production inference\n"
+        "python scripts/export_model.py \\\n"
+        "  --checkpoint models/best_unet.pth \\\n"
+        "  --format onnx \\\n"
+        "  --output models/unet_deforestation_v1.onnx"
+    )
+
+    pdf.subsection_title("Step 6: Commit & Push Your Work")
+    pdf.code_block(
+        "# Switch to your git identity\n"
+        "source team_docs/switch_user.sh edoh\n"
+        "\n"
+        "git checkout develop && git pull origin develop\n"
+        "git checkout -b feature/model-attention-unet\n"
+        "\n"
+        "git add src/climatevision/models/unet.py\n"
+        "git add src/climatevision/training/\n"
+        "git commit -m \"feat(model): add attention gates to U-Net encoder skip connections\"\n"
+        "\n"
+        "git push edoh feature/model-attention-unet"
+    )
+
+    pdf.output(os.path.join(OUTPUT_DIR, "Edoh_Onuh_Role.pdf"))
+    print("Created: Edoh_Onuh_Role.pdf")
+
+
+def create_victor_doc():
+    pdf = RoleDoc("Victor Mbachu")
+    pdf.add_page()
+
+    pdf.set_font("Helvetica", "B", 18)
+    pdf.cell(0, 10, "Victor Mbachu", align="C", new_x="LMARGIN", new_y="NEXT")
+    pdf.set_font("Helvetica", "", 11)
+    pdf.set_text_color(100, 100, 100)
+    pdf.cell(0, 7, "Senior Full-Stack Engineer & Infrastructure Co-Owner", align="C", new_x="LMARGIN", new_y="NEXT")
+    pdf.set_text_color(0, 0, 0)
+    pdf.ln(5)
+
+    pdf.key_value("GitHub", "@cutewizzy11")
+    pdf.key_value("Access Level", "Co-Owner (Admin)")
+    pdf.key_value("Reports To", "@Goldokpa (Project Owner)")
+    pdf.key_value("Project Duration", "3 Months")
+    pdf.ln(3)
+
+    # How It Fits Me
+    pdf.section_title("How This Role Fits You")
+    pdf.body_text(
+        "At Zeta Global you design and run distributed microservice systems handling over 2 million API "
+        "requests daily with 99.9% uptime across multiple AWS regions - ECS Fargate clusters, RDS Aurora, "
+        "SNS/SQS messaging, and blue-green CI/CD deployments provisioned via Terraform. You also serve as "
+        "on-call engineer with a 15-minute average incident resolution time. That is the production "
+        "engineering standard ClimateVision needs to reach, and you have already built it professionally."
+    )
+    pdf.body_text(
+        "At RWS Global you containerised applications with Docker, deployed across dev, staging, and "
+        "production environments, led a team of 3 engineers in Agile sprints, and maintained GitHub Actions "
+        "CI/CD pipelines with TDD coverage. The Docker and deployment ownership on this project - "
+        "previously unassigned - is a natural fit: you do this as part of your day job, not as a "
+        "stretch task."
+    )
+    pdf.body_text(
+        "Your stack breadth is the reason you can serve as repository co-owner rather than just a "
+        "frontend contributor. React, Next.js, Vue, TypeScript, Node.js, PHP/Laravel, Python/Django - "
+        "you can read and reason about the FastAPI backend, the PyTorch inference pipeline, and the "
+        "React dashboard with equal confidence. Reviewing PRs across four data scientists requires "
+        "that range. Your AWS Certified Cloud Practitioner and Professional Scrum Master certifications "
+        "anchor both the infrastructure ownership and the project coordination function."
+    )
+    pdf.body_text(
+        "Your AI integration experience - GPT-4 and Anthropic API work at RWS Global and PetMe - "
+        "means you understand the ML serving layer you are wrapping with a frontend. When @edoh-Onuh "
+        "exports a model and Olufemi builds the inference API, you are not reading foreign code. You "
+        "have shipped production AI features before. Your two co-authored papers on agentic AI systems "
+        "show that engagement runs deeper than implementation."
+    )
+    pdf.ln(2)
+
+    # Role Description
+    pdf.section_title("Your Role on ClimateVision")
+    pdf.body_text(
+        "You own the frontend application, the CI/CD infrastructure, and the Docker/deployment layer. "
+        "As co-owner you are also the quality gate for all code entering the repository - the one "
+        "person on the team who can review and reason about every layer of the stack."
+    )
+    pdf.subsection_title("Core Responsibilities - Frontend")
+    pdf.bullet("Build the React/TypeScript dashboard with interactive Leaflet map for satellite analysis results")
+    pdf.bullet("Create Recharts components for deforestation trends, carbon metrics, and model performance")
+    pdf.bullet("Implement api.ts - the fully-typed API client for all FastAPI backend communication")
+    pdf.bullet("Build the alert notification panel for real-time deforestation alerts")
+    pdf.bullet("Implement responsive TailwindCSS design for desktop and tablet viewports")
+    pdf.bullet("Create the deep-dive analysis page with region selector, date range picker, and model comparison")
+    pdf.ln(1)
+
+    pdf.subsection_title("Core Responsibilities - Infrastructure & CI/CD")
+    pdf.bullet("Own the Dockerfile - multi-stage production build for the FastAPI + frontend application")
+    pdf.bullet("Own docker-compose.yml - local development stack wiring API, database, and frontend services")
+    pdf.bullet("Build and maintain GitHub Actions CI/CD pipelines: lint, type-check, test, and deploy on every PR")
+    pdf.bullet("Manage production environment configuration - dev/staging/prod separation and secrets management")
+    pdf.bullet("Serve as first responder for production incidents - triage, diagnose, and coordinate resolution")
+    pdf.ln(1)
+
+    pdf.subsection_title("Sprint Progress - April 2026")
+    pdf.bullet("DONE: GitHub Actions CI pipeline (Python flake8 + pytest, frontend npm build)")
+    pdf.bullet("DONE: Test scaffolding (tests/ directory with pytest fixtures)")
+    pdf.bullet("DONE: Frontend build fixes (case-sensitive import paths)")
+    pdf.bullet("DONE: Dependency fixes (removed gdal pip package, added email-validator)")
+    pdf.bullet("PENDING: Frontend unit tests with Vitest + React Testing Library")
+    pdf.bullet("PENDING: Auth UI - capture X-API-Key in AppContext")
+    pdf.bullet("PENDING: WebSocket client for real-time run status")
+    pdf.bullet("PENDING: Alert notification UI with severity filters")
+    pdf.bullet("PENDING: Mask overlay on map component")
+    pdf.bullet("PENDING: Docker Compose for full-stack local dev")
+    pdf.ln(1)
+
+    pdf.subsection_title("Core Responsibilities - Co-Owner")
+    pdf.bullet("Review and merge pull requests from all team members (target: <24 hour turnaround)")
+    pdf.bullet("Manage GitHub issues, milestones, project boards, and sprint planning")
+    pdf.bullet("Enforce branch protection rules, code quality standards, and API contract consistency")
+    pdf.bullet("Manage the release process: version tagging, changelog, and release notes")
+    pdf.ln(2)
+
+    # Codebase Ownership
+    pdf.section_title("Your Codebase Ownership")
+    pdf.body_text("You are the primary owner of the following files and directories:")
+    pdf.code_block(
+        "frontend/                              # PRIMARY OWNER - Entire frontend\n"
+        "  src/\n"
+        "    App.tsx                             # Main application shell\n"
+        "    api.ts                              # Typed API client\n"
+        "    main.tsx                            # Entry point\n"
+        "    styles.css                          # TailwindCSS styles\n"
+        "    components/                         # Component library\n"
+        "      Map.tsx                           # Leaflet map\n"
+        "      ResultsViewer.tsx                 # Prediction results\n"
+        "      Charts.tsx                        # Recharts visualizations\n"
+        "      AlertPanel.tsx                    # Alert notifications\n"
+        "      Settings.tsx                      # User settings\n"
+        "    pages/\n"
+        "      Dashboard.tsx                     # Main dashboard\n"
+        "      Analysis.tsx                      # Deep analysis view\n"
+        "      History.tsx                       # Run history\n"
+        "  package.json | vite.config.ts | tsconfig.json\n"
+        "\n"
+        "Dockerfile                             # PRIMARY OWNER - Multi-stage production build\n"
+        "docker-compose.yml                     # PRIMARY OWNER - Local development stack\n"
+        "\n"
+        ".github/workflows/                     # PRIMARY OWNER\n"
+        "  ci.yml                               # Continuous integration\n"
+        "  deploy.yml                            # Deployment pipeline\n"
+        "  tests.yml                            # Test automation\n"
+        "\n"
+        "tests/                                 # CO-OWNER (with all DS engineers)"
+    )
+    pdf.ln(2)
+
+    # 3-Month Timeline
+    pdf.section_title("Your 3-Month Delivery Timeline")
+    pdf.month_block("MONTH 1: Foundation (Weeks 1-4)", [
+        ("Week 1-2: Infrastructure & CI/CD", [
+            "Write multi-stage Dockerfile for optimised API + frontend production image",
+            "Build docker-compose.yml wiring FastAPI, SQLite/PostgreSQL, and frontend services locally",
+            "Set up GitHub Actions CI: lint, type-check, pytest, and Vite build on every PR",
+            "Create branch protection rules: require passing CI and 1 review before merging to develop",
+        ]),
+        ("Week 3-4: Frontend Architecture & Core Components", [
+            "Configure React Router, Vite, TypeScript strict mode, TailwindCSS, ESLint, and Prettier",
+            "Build Map.tsx - Leaflet map with GeoJSON overlay for deforestation masks",
+            "Implement api.ts - fully-typed API client for all FastAPI endpoints",
+            "Create Dashboard.tsx - main landing page with summary metrics and run status",
+        ]),
+    ])
+    pdf.month_block("MONTH 2: Feature Development (Weeks 5-8)", [
+        ("Week 5-6: Data Visualisation", [
+            "Build Charts.tsx - Recharts components for deforestation trend lines, bar charts, gauges",
+            "Create ResultsViewer.tsx - segmentation masks overlaid on satellite imagery",
+            "Implement Analysis.tsx - region selector, date picker, model comparison view",
+            "Set up Vitest and React Testing Library - component test coverage from the start",
+        ]),
+        ("Week 7-8: Real-Time & Interactivity", [
+            "Build WebSocket integration for live prediction job status updates",
+            "Create AlertPanel.tsx - real-time deforestation alert notification feed",
+            "Implement History.tsx - paginated, filterable list of past analysis runs",
+            "Build Settings.tsx - user preferences and API key management",
+        ]),
+    ])
+    pdf.month_block("MONTH 3: Production Readiness (Weeks 9-12)", [
+        ("Week 9-10: Deployment & Environment Config", [
+            "Configure dev/staging/prod environment separation with secrets management",
+            "Set up deployment pipeline to Vercel (frontend) and Docker-based backend hosting",
+            "Implement health monitoring and automated alerting for production incidents",
+            "Performance pass: code splitting, lazy loading, image optimisation, bundle analysis",
+        ]),
+        ("Week 11-12: Integration, Testing & Release", [
+            "Full end-to-end integration testing against all backend API endpoints",
+            "Responsive design audit for tablet and large desktop breakpoints",
+            "Accessibility review: keyboard navigation and screen reader compatibility",
+            "Manage v1.0 release: changelog, version tag, release notes, and deployment sign-off",
+        ]),
+    ])
+
+    # Git Workflow
+    pdf.section_title("Your Git Workflow")
+    pdf.code_block(
+        "# Create feature branches from develop\n"
+        "git checkout develop\n"
+        "git pull origin develop\n"
+        "git checkout -b feature/frontend-leaflet-map\n"
+        "\n"
+        "# Your branch naming convention:\n"
+        "feature/frontend-*     (frontend features)\n"
+        "feature/infra-*        (Docker, CI/CD, deployment)\n"
+        "feature/ci-*           (GitHub Actions changes)\n"
+        "fix/frontend-*         (bug fixes)\n"
+        "release/v*             (release branches)"
+    )
+    pdf.body_text(
+        "As co-owner, you can merge directly to develop after self-review for frontend-only or infra-only "
+        "changes. For changes touching shared Python code or API contracts, get a review from @Goldokpa "
+        "or the relevant module owner."
+    )
+    pdf.ln(3)
+
+    # Key Collaborators
+    pdf.section_title("Your Key Collaborators")
+    pdf.bullet("Olufemi Taiwo (API & Data Quality Lead) - He owns the FastAPI schemas, inference validation, and audit logging. You own the Docker image and deployment pipeline that runs his API. Define the API contract together: endpoint URLs, request/response shapes, auth headers, and error formats.")
+    pdf.bullet("@franchaise (Analytics Lead) - His carbon metrics and KPI data feed your dashboard charts. Align on JSON data contracts, refresh intervals, and pagination formats.")
+    pdf.bullet("@edoh-Onuh (ML Lead) - Model prediction outputs need to be visualised on the map. Coordinate on GeoJSON output format, confidence score rendering, and how prediction jobs report status via the API.")
+    pdf.bullet("@Oshgig (Data Pipeline Lead) - Satellite imagery tile previews on the map may draw on her geospatial utilities. Align on tile formats, coordinate systems, and GeoJSON structures.")
+
+    # Code Pipeline
+    pdf.section_title("Your Code Pipeline")
+    pdf.body_text("Your pipeline covers frontend development, Docker orchestration, CI/CD management, and full-stack integration testing.")
+
+    pdf.subsection_title("Step 1: Environment Setup")
+    pdf.code_block(
+        "git clone https://github.com/Climate-Vision/ClimateVision.git\n"
+        "cd ClimateVision\n"
+        "\n"
+        "# Backend dependencies\n"
+        "pip install -r requirements.txt\n"
+        "\n"
+        "# Frontend dependencies\n"
+        "cd frontend && npm install && cd .."
+    )
+
+    pdf.subsection_title("Step 2: Start Full Local Dev Stack")
+    pdf.code_block(
+        "# Option A: Docker Compose (full stack - recommended)\n"
+        "docker-compose up --build\n"
+        "# API:      http://localhost:8000\n"
+        "# Frontend: http://localhost:5173\n"
+        "# MLflow:   http://localhost:5000\n"
+        "\n"
+        "# Option B: Run services individually for faster iteration\n"
+        "uvicorn climatevision.api.main:app --reload --port 8000 &\n"
+        "cd frontend && npm run dev"
+    )
+
+    pdf.subsection_title("Step 3: Frontend Development Loop")
+    pdf.code_block(
+        "cd frontend\n"
+        "\n"
+        "# Run linting and type checks\n"
+        "npm run lint\n"
+        "npm run type-check\n"
+        "\n"
+        "# Run component tests\n"
+        "npm run test\n"
+        "\n"
+        "# Build production bundle and check for errors\n"
+        "npm run build\n"
+        "\n"
+        "# Preview production build locally\n"
+        "npm run preview"
+    )
+
+    pdf.subsection_title("Step 4: Current CI/CD Configuration")
+    pdf.body_text("The following .github/workflows/ci.yml is live and runs on every PR to main/develop:")
+    pdf.code_block(
+        "name: CI\n"
+        "on:\n"
+        "  push:\n"
+        "    branches: [main, develop]\n"
+        "  pull_request:\n"
+        "    branches: [main, develop]\n"
+        "\n"
+        "jobs:\n"
+        "  python:\n"
+        "    runs-on: ubuntu-latest\n"
+        "    steps:\n"
+        "      - uses: actions/checkout@v4\n"
+        "      - uses: actions/setup-python@v5\n"
+        "        with: {python-version: '3.11'}\n"
+        "      - run: sudo apt-get update && sudo apt-get install -y libgl1\n"
+        "      - run: pip install -r requirements.txt && pip install -e .\n"
+        "      - run: flake8 src/ --select=E9,F63,F7,F82\n"
+        "      - run: pytest tests/ -v --tb=short\n"
+        "\n"
+        "  frontend:\n"
+        "    runs-on: ubuntu-latest\n"
+        "    defaults: {run: {working-directory: frontend}}\n"
+        "    steps:\n"
+        "      - uses: actions/checkout@v4\n"
+        "      - uses: actions/setup-node@v4\n"
+        "        with: {node-version: '20', cache: 'npm'}\n"
+        "      - run: npm ci\n"
+        "      - run: npm run build"
+    )
+    pdf.ln(2)
+
+    pdf.subsection_title("Step 5: Build & Test Docker Image")
+    pdf.code_block(
+        "# Build production Docker image\n"
+        "docker build -t climatevision:latest .\n"
+        "\n"
+        "# Run container and verify it starts cleanly\n"
+        "docker run -p 8000:8000 climatevision:latest\n"
+        "\n"
+        "# Check all services are healthy inside the container\n"
+        "curl http://localhost:8000/health\n"
+        "\n"
+        "# Inspect image size and layers\n"
+        "docker image inspect climatevision:latest | grep Size"
+    )
+
+    pdf.subsection_title("Step 6: Run Full CI Checks Locally")
+    pdf.code_block(
+        "# Simulate the GitHub Actions CI pipeline before pushing\n"
+        "\n"
+        "# 1. Python: lint and tests\n"
+        "flake8 src/ --count --select=E9,F63,F7,F82 --show-source --statistics\n"
+        "flake8 src/ --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics\n"
+        "pytest tests/ -v --tb=short\n"
+        "\n"
+        "# 2. Frontend: build\n"
+        "cd frontend && npm run build\n"
+        "\n"
+        "# 3. Docker build succeeds\n"
+        "docker-compose build"
+    )
+
+    pdf.subsection_title("Step 6: Commit & Push Your Work")
+    pdf.code_block(
+        "# Switch to your git identity\n"
+        "source team_docs/switch_user.sh victor\n"
+        "\n"
+        "git checkout develop && git pull origin develop\n"
+        "git checkout -b feature/frontend-leaflet-map\n"
+        "\n"
+        "git add frontend/src/components/Map.tsx\n"
+        "git add frontend/src/api.ts\n"
+        "git commit -m \"feat(frontend): add Leaflet map with GeoJSON deforestation overlay\"\n"
+        "\n"
+        "git push victor feature/frontend-leaflet-map\n"
+        "\n"
+        "# As co-owner: review and merge PRs from the team\n"
+        "# gh pr review <PR_NUMBER> --approve\n"
+        "# gh pr merge <PR_NUMBER> --squash"
+    )
+
+    pdf.output(os.path.join(OUTPUT_DIR, "Victor_Mbachu_Role.pdf"))
+    print("Created: Victor_Mbachu_Role.pdf")
+
+
+def create_godswill_doc():
+    pdf = RoleDoc("Godswill Okoroafor Chukwu")
+    pdf.add_page()
+
+    pdf.set_font("Helvetica", "B", 18)
+    pdf.cell(0, 10, "Godswill Okoroafor Chukwu", align="C", new_x="LMARGIN", new_y="NEXT")
+    pdf.set_font("Helvetica", "", 11)
+    pdf.set_text_color(100, 100, 100)
+    pdf.cell(0, 7, "Data Science Engineer 5 - ML Training, Experiment Tracking & Insights Lead", align="C", new_x="LMARGIN", new_y="NEXT")
+    pdf.set_text_color(0, 0, 0)
+    pdf.ln(5)
+
+    pdf.key_value("GitHub", "(To be assigned)")
+    pdf.key_value("Access Level", "Maintainer")
+    pdf.key_value("Reports To", "@Goldokpa (Project Owner)")
+    pdf.key_value("Project Duration", "3 Months")
+    pdf.ln(3)
+
+    # How It Fits Me
+    pdf.section_title("How This Role Fits You")
+    pdf.body_text(
+        "Your MSc in Big Data and Data Science Technology (Distinction) from Northumbria University is the "
+        "academic backbone this role demands. You have not just studied machine learning - you have delivered "
+        "it in production environments. At Amdari Inc., you built predictive and forecasting models that drove "
+        "strategic revenue decisions, applied clustering to identify at-risk student groups, and automated "
+        "reporting pipelines that cut manual processing time significantly. Every one of those deliverables "
+        "maps directly onto what ClimateVision needs from its ML training and insights layer."
+    )
+    pdf.body_text(
+        "Where @edoh-Onuh architects the deep learning models (U-Net, Siamese networks), you are the engineer "
+        "who drives those models through rigorous training cycles, tracks every experiment, measures every "
+        "metric, and extracts insights from the results. Your experience running classification, regression, "
+        "and clustering pipelines in Python - combined with your Data Scientist role at Amdari - means you "
+        "understand the full lifecycle: data in, model trained, results validated, insights delivered."
+    )
+    pdf.body_text(
+        "Your proficiency in Power BI and Looker Studio is a strategic asset here. ClimateVision generates "
+        "real predictions - deforestation percentages, ice extent loss, flood area - that conservation NGOs "
+        "and research partners need presented clearly. You build the reporting layer that translates raw model "
+        "outputs into KPI dashboards, trend reports, and alert summaries that non-technical stakeholders "
+        "can act on. That is the last mile between a working model and measurable real-world impact."
+    )
+    pdf.body_text(
+        "Your background in automating recurring reporting processes with Python and designing cross-functional "
+        "dashboards means you also own the bridge between the ML pipeline and the business intelligence layer. "
+        "With your DataCamp Associate Data Scientist certification and Full Stack Data Science qualification "
+        "from 10Alytics, you bring both the theoretical depth and the applied toolkit that this role requires."
+    )
+    pdf.ln(2)
+
+    # Role Description
+    pdf.section_title("Your Role on ClimateVision")
+    pdf.body_text(
+        "You own the training analytics, experiment tracking, and insights reporting pipeline - the layer that "
+        "connects raw model outputs to actionable environmental intelligence. While the ML Lead builds model "
+        "architectures and the Data Pipeline Lead ingests satellite imagery, you are the engineer who runs "
+        "training experiments at scale, tracks what works and why, measures model impact, and delivers "
+        "structured insights to teams and stakeholders. You are the system's analytical conscience."
+    )
+    pdf.subsection_title("Core Responsibilities")
+    pdf.bullet("Orchestrate model training runs using scripts/train.py and scripts/run_training.py with full experiment tracking via MLflow")
+    pdf.bullet("Design and execute hyperparameter tuning experiments using Optuna to maximise IoU, F1, and Dice scores")
+    pdf.bullet("Build and maintain the model evaluation pipeline - benchmarking across deforestation, ice melting, and flooding tasks")
+    pdf.bullet("Implement clustering analysis on prediction outputs to identify regional environmental patterns and hotspots")
+    pdf.bullet("Develop forecasting models to project deforestation trends, ice melt rates, and flood risk over time")
+    pdf.bullet("Automate KPI reporting pipelines that summarise model performance and environmental metrics for NGO stakeholders")
+    pdf.bullet("Design and maintain Power BI / Looker Studio dashboards tracking training progress, model accuracy, and climate impact")
+    pdf.bullet("Create data quality reports that validate training datasets and flag anomalies before they reach the model")
+    pdf.bullet("Produce regional impact analysis notebooks showing before/after environmental change metrics")
+    pdf.bullet("Feed structured insight data to the API layer and React dashboard for live reporting")
+    pdf.ln(2)
+
+    # Codebase Ownership
+    pdf.section_title("Your Codebase Ownership")
+    pdf.body_text("You are the primary owner of the following files and directories:")
+    pdf.code_block(
+        "scripts/                              # PRIMARY OWNER - Training & evaluation scripts\n"
+        "  train.py                            # Model training entry point\n"
+        "  run_training.py                     # Training orchestration & scheduling\n"
+        "  evaluate.py                         # Model evaluation & benchmarking\n"
+        "  infer.py                            # Single inference runner\n"
+        "\n"
+        "src/climatevision/training/\n"
+        "  trainer.py                          # CO-OWNER - Training loop, EMA, mixed precision\n"
+        "  losses.py                           # CO-OWNER - Focal Loss, Dice Loss tuning\n"
+        "\n"
+        "src/climatevision/utils/\n"
+        "  metrics.py                          # CO-OWNER - IoU, F1, Dice, recall tracking\n"
+        "  visualization.py                    # CO-OWNER - Training curve & result plots\n"
+        "\n"
+        "notebooks/\n"
+        "  06_training_analysis.ipynb          # Experiment tracking & training insights\n"
+        "  07_model_benchmarking.ipynb         # Cross-task model performance comparison\n"
+        "  08_regional_insights.ipynb          # Clustering & trend analysis by region\n"
+        "\n"
+        "outputs/\n"
+        "  reports/training/                   # Training run reports\n"
+        "  dashboards/kpi/                     # KPI dashboard configs\n"
+        "\n"
+        "logs/                                 # Training logs & MLflow run artifacts\n"
+        "models/                               # Model checkpoints (coordinate with ML Lead)"
+    )
+    pdf.ln(2)
+
+    # Key Impact Areas
+    pdf.section_title("Your High-Impact Contributions")
+    pdf.body_text(
+        "Your work directly determines whether ClimateVision's models are as accurate as possible and whether "
+        "their outputs are trusted by the organisations that rely on them. Three areas define your impact:"
+    )
+    pdf.subsection_title("1. Experiment-Driven Model Improvement")
+    pdf.body_text(
+        "Every training run you log is a data point. By systematically tracking learning rate schedules, "
+        "augmentation strategies, loss function weights, and batch sizes via MLflow and Optuna, you will "
+        "build the evidence base that drives model accuracy from baseline to production-grade. Your tuning "
+        "work is the difference between a model that detects 65% of deforestation events and one that "
+        "detects 85%."
+    )
+    pdf.subsection_title("2. Regional Clustering & Trend Forecasting")
+    pdf.body_text(
+        "Your clustering expertise turns raw pixel predictions into geographic intelligence. By grouping "
+        "regions with similar deforestation trajectories or flood risk patterns, you reveal insights that "
+        "no single prediction run can show. Paired with time-series forecasting models, you can project "
+        "where the next environmental crisis is developing before it becomes catastrophic - giving NGO "
+        "partners the lead time they need to act."
+    )
+    pdf.subsection_title("3. Stakeholder-Ready Reporting")
+    pdf.body_text(
+        "Raw model metrics mean nothing to a conservation officer or a policy researcher. Your Power BI "
+        "and automated Python reporting pipelines convert IoU scores and segmentation masks into carbon "
+        "loss estimates, hectare counts, and trend alerts that stakeholders can put in a board report. "
+        "This is the last mile of impact - and you own it."
+    )
+    pdf.ln(2)
+
+    # 3-Month Timeline
+    pdf.section_title("Your 3-Month Delivery Timeline")
+    pdf.month_block("MONTH 1: Foundation (Weeks 1-4)", [
+        ("Week 1-2: Training Infrastructure & Experiment Tracking", [
+            "Set up MLflow tracking server and connect to scripts/train.py",
+            "Instrument trainer.py to log all hyperparameters, metrics, and artifacts per run",
+            "Run baseline training experiments for deforestation, ice melting, and flooding tasks",
+            "Document baseline IoU, F1, and Dice scores per analysis type",
+        ]),
+        ("Week 3-4: Evaluation Pipeline", [
+            "Build scripts/evaluate.py - full evaluation suite with per-class metrics",
+            "Extend metrics.py with precision-recall curves and confusion matrix exports",
+            "Create 07_model_benchmarking.ipynb - cross-task performance comparison",
+            "Identify top 3 weaknesses in baseline models and propose tuning strategies",
+        ]),
+    ])
+    pdf.month_block("MONTH 2: Optimisation & Insights (Weeks 5-8)", [
+        ("Week 5-6: Hyperparameter Tuning", [
+            "Set up Optuna study for learning rate, batch size, loss weights, and augmentation",
+            "Run tuning experiments targeting IoU improvement of at least 10% over baseline",
+            "Log all trials in MLflow with full reproducibility (seed, config, checkpoint)",
+            "Implement best-config automatic checkpoint promotion pipeline",
+        ]),
+        ("Week 7-8: Clustering & Trend Forecasting", [
+            "Build regional clustering pipeline using K-Means / DBSCAN on prediction outputs",
+            "Develop time-series forecasting models for deforestation and ice melt trends",
+            "Create 08_regional_insights.ipynb - hotspot identification and trend projections",
+            "Generate first set of regional environmental trend reports",
+        ]),
+    ])
+    pdf.month_block("MONTH 3: Reporting & Production Readiness (Weeks 9-12)", [
+        ("Week 9-10: KPI Dashboard & Automated Reporting", [
+            "Build automated Python reporting pipeline - weekly model performance summaries",
+            "Design Power BI / Looker Studio KPI dashboard (accuracy trends, alert counts, coverage)",
+            "Expose dashboard data via API endpoints coordinated with Olufemi",
+            "Automate NGO-facing impact reports: area affected, confidence scores, trend direction",
+        ]),
+        ("Week 11-12: Documentation & Final Benchmarks", [
+            "Write 06_training_analysis.ipynb - full experiment history and lessons learned",
+            "Produce final benchmark report comparing all model versions across 3 months",
+            "Document all MLflow experiments, best checkpoints, and recommended configs",
+            "Deliver 3 regional case study insight reports to the team for stakeholder use",
+        ]),
+    ])
+
+    # Git Workflow
+    pdf.section_title("Your Git Workflow")
+    pdf.body_text("Follow this branching convention for all your work:")
+    pdf.code_block(
+        "# Create feature branches from develop\n"
+        "git checkout develop\n"
+        "git pull origin develop\n"
+        "git checkout -b feature/training-mlflow-setup\n"
+        "\n"
+        "# Your branch naming convention:\n"
+        "feature/training-*      (training pipeline features)\n"
+        "feature/insights-*      (reporting and analytics features)\n"
+        "fix/training-*          (bug fixes in training scripts)\n"
+        "experiment/tuning-*     (hyperparameter experiment branches)"
+    )
+    pdf.body_text(
+        "All PRs go to the develop branch. PRs require at least 1 review. "
+        "Tag @edoh-Onuh for model architecture questions and @franchaise for analytics overlap reviews. "
+        "Always attach MLflow run IDs in PRs that change training logic so reviewers can verify metrics."
+    )
+    pdf.ln(3)
+
+    # Key Collaborators
+    pdf.section_title("Your Key Collaborators")
+    pdf.bullet("@edoh-Onuh (ML Model Development Lead) - You run the training experiments on their model architectures. Coordinate on loss function choices, training hyperparameters, and checkpoint formats. Their architecture decisions constrain your tuning search space.")
+    pdf.bullet("@Oshgig (Data Pipeline Lead) - Your training runs consume her PyTorch DataLoaders. Align on tensor shapes, normalization ranges, augmentation strategies, and the data split structure (train/val/test).")
+    pdf.bullet("@franchaise (Carbon Analytics Lead) - Your model evaluation outputs are the input to their carbon estimation and validation work. Provide segmentation mask formats, confidence scores, and per-class metrics in agreed schemas.")
+    pdf.bullet("Olufemi Taiwo (API & Data Quality Lead) - Your KPI reporting data needs to be surfaced via API endpoints. Coordinate on response formats, refresh cycles, and how training run metadata is exposed to the dashboard.")
+    pdf.bullet("Victor Mbachu (Full-Stack & Infrastructure) - Your dashboard configs and reporting outputs feed the React frontend visualisations. Align on JSON contracts for time-series charts, gauge metrics, and alert summaries.")
+
+    # Code Pipeline
+    pdf.section_title("Your Code Pipeline")
+    pdf.body_text("Your pipeline covers experiment tracking setup, running and tuning training jobs, evaluating model performance, and generating insight reports for stakeholders.")
+
+    pdf.subsection_title("Step 1: Environment Setup")
+    pdf.code_block(
+        "git clone https://github.com/Climate-Vision/ClimateVision.git\n"
+        "cd ClimateVision\n"
+        "pip install -r requirements.txt\n"
+        "\n"
+        "# Verify ML and analytics stack\n"
+        "python -c \"import torch, mlflow, optuna, sklearn; print('ML stack ready')\"\n"
+        "\n"
+        "# Start MLflow tracking server\n"
+        "mlflow server --host 0.0.0.0 --port 5000 &\n"
+        "# Dashboard: http://localhost:5000"
+    )
+
+    pdf.subsection_title("Step 2: Run a Training Experiment")
+    pdf.code_block(
+        "# Run a tracked training job\n"
+        "python scripts/run_training.py \\\n"
+        "  --config config/deforestation.yaml \\\n"
+        "  --mlflow-tracking \\\n"
+        "  --experiment-name deforestation_v1\n"
+        "\n"
+        "# All metrics, params, and checkpoints auto-logged to MLflow\n"
+        "# View results: http://localhost:5000/#/experiments"
+    )
+
+    pdf.subsection_title("Step 3: Hyperparameter Tuning with Optuna")
+    pdf.code_block(
+        "# Launch an Optuna study to find the best training config\n"
+        "python - <<'EOF'\n"
+        "import optuna, mlflow\n"
+        "from climatevision.training.trainer import train_with_config\n"
+        "\n"
+        "def objective(trial):\n"
+        "    config = {\n"
+        "        'lr':         trial.suggest_float('lr', 1e-5, 1e-3, log=True),\n"
+        "        'batch_size': trial.suggest_categorical('batch_size', [8, 16, 32]),\n"
+        "        'dropout':    trial.suggest_float('dropout', 0.1, 0.5),\n"
+        "    }\n"
+        "    return train_with_config(config, metric='val_iou')\n"
+        "\n"
+        "study = optuna.create_study(direction='maximize', study_name='unet_deforestation')\n"
+        "study.optimize(objective, n_trials=50)\n"
+        "print(f'Best IoU: {study.best_value:.4f}')\n"
+        "print(f'Best params: {study.best_params}')\n"
+        "EOF"
+    )
+
+    pdf.subsection_title("Step 4: Evaluate & Benchmark Models")
+    pdf.code_block(
+        "# Evaluate best checkpoint across all analysis types\n"
+        "python scripts/evaluate.py \\\n"
+        "  --checkpoint models/best_unet.pth \\\n"
+        "  --split test \\\n"
+        "  --analysis-type deforestation \\\n"
+        "  --export-metrics outputs/reports/training/deforestation_eval.json\n"
+        "\n"
+        "# Compare all model versions logged in MLflow\n"
+        "python - <<'EOF'\n"
+        "import mlflow\n"
+        "runs = mlflow.search_runs(experiment_names=['deforestation_v1'],\n"
+        "                          order_by=['metrics.val_iou DESC'])\n"
+        "print(runs[['run_id','metrics.val_iou','params.lr','params.batch_size']].head(10))\n"
+        "EOF"
+    )
+
+    pdf.subsection_title("Step 5: Generate Stakeholder KPI Report")
+    pdf.code_block(
+        "# Run clustering on prediction outputs to find regional hotspots\n"
+        "python - <<'EOF'\n"
+        "from sklearn.cluster import KMeans\n"
+        "import numpy as np, json\n"
+        "predictions = np.load('outputs/masks/deforestation_confidence.npy')\n"
+        "kmeans = KMeans(n_clusters=5, random_state=42).fit(predictions.reshape(-1, 1))\n"
+        "hotspot_regions = np.where(kmeans.labels_ == kmeans.cluster_centers_.argmax())[0]\n"
+        "print(f'High-risk tiles identified: {len(hotspot_regions)}')\n"
+        "EOF\n"
+        "\n"
+        "# Auto-generate weekly KPI summary report\n"
+        "python - <<'EOF'\n"
+        "from climatevision.analytics.reporting import generate_kpi_report\n"
+        "generate_kpi_report(\n"
+        "    metrics_dir='outputs/reports/training/',\n"
+        "    period='2024-W12',\n"
+        "    output='outputs/dashboards/kpi/weekly_summary.pdf'\n"
+        ")\n"
+        "EOF"
+    )
+
+    pdf.subsection_title("Step 6: Commit & Push Your Work")
+    pdf.code_block(
+        "# Switch to your git identity\n"
+        "source team_docs/switch_user.sh godswill\n"
+        "\n"
+        "git checkout develop && git pull origin develop\n"
+        "git checkout -b feature/training-mlflow-setup\n"
+        "\n"
+        "git add scripts/run_training.py\n"
+        "git add scripts/evaluate.py\n"
+        "git add notebooks/06_training_analysis.ipynb\n"
+        "git commit -m \"feat(training): add MLflow experiment tracking and Optuna hyperparameter search\"\n"
+        "\n"
+        "git push godswill feature/training-mlflow-setup"
+    )
+
+    pdf.output(os.path.join(OUTPUT_DIR, "Godswill_Chukwu_Role.pdf"))
+    print("Created: Godswill_Chukwu_Role.pdf")
+
+
+def create_paul_doc():
+    pdf = RoleDoc("Paul (cutewizzy11)")
+    pdf.add_page()
+
+    pdf.set_font("Helvetica", "B", 18)
+    pdf.cell(0, 10, "Paul", align="C", new_x="LMARGIN", new_y="NEXT")
+    pdf.set_font("Helvetica", "", 11)
+    pdf.set_text_color(100, 100, 100)
+    pdf.cell(0, 7, "Frontend Developer - React Dashboard & UI Lead", align="C", new_x="LMARGIN", new_y="NEXT")
+    pdf.set_text_color(0, 0, 0)
+    pdf.ln(5)
+
+    pdf.key_value("GitHub", "@cutewizzy11")
+    pdf.key_value("Access Level", "Maintainer")
+    pdf.key_value("Reports To", "@Goldokpa (Project Owner)")
+    pdf.key_value("Project Duration", "3 Months")
+    pdf.ln(3)
+
+    # How It Fits
+    pdf.section_title("How This Role Fits You")
+    pdf.body_text(
+        "Your GitHub portfolio shows a developer who is comfortable across the full stack but has a clear "
+        "strength in TypeScript and JavaScript-driven interfaces. nova-agent, Data-management-Koinonia, "
+        "and anyebe-web-craft are all TypeScript projects - the same language ClimateVision's frontend is "
+        "built in. Your react-projects and ecommerce-app repositories show hands-on React experience, and "
+        "your Heart-Attack-Risk-Predictor on Streamlit shows you can bridge data science outputs and "
+        "interactive user interfaces - exactly the challenge you face here."
+    )
+    pdf.body_text(
+        "ClimateVision's dashboard already has a working foundation: React 18, TypeScript strict mode, "
+        "Vite, TailwindCSS, React Router, Recharts, and a fully-typed API client. Your job is not to "
+        "start from scratch - it is to take this functional base and build the components, pages, and "
+        "interactions that turn it into a polished, production-ready environmental monitoring dashboard "
+        "that NGOs and researchers can actually use."
+    )
+    pdf.body_text(
+        "Your experience with data management interfaces (Koinonia church app) and e-commerce UIs means "
+        "you understand how to build interfaces where users interact with structured data - filtering, "
+        "searching, viewing records, managing subscriptions. That skill maps directly onto ClimateVision's "
+        "run history browser, NGO subscription manager, and alert tracking panel. You have shipped this "
+        "category of UI before."
+    )
+    pdf.ln(2)
+
+    # Role Description
+    pdf.section_title("Your Role on ClimateVision")
+    pdf.body_text(
+        "You own the React dashboard - every pixel the end user sees. The backend API is built, the "
+        "data models are defined, and the component library has a strong foundation. Your mission is "
+        "to complete the user-facing layer: build missing pages, wire components to live API data, "
+        "implement real-time updates, and ensure the interface is responsive, accessible, and fast. "
+        "You are the engineer who makes ClimateVision feel like a real product."
+    )
+    pdf.subsection_title("Core Responsibilities")
+    pdf.bullet("Build and complete all dashboard pages: Dashboard home, NGO Management, Alerts, and deep-dive Analysis views")
+    pdf.bullet("Wire all components to live API data using the existing api.ts client - replace mock/static data throughout")
+    pdf.bullet("Implement real-time run status updates using polling (useRunPolling hook) and WebSocket for live job tracking")
+    pdf.bullet("Build the NGO management page - organisation registration, subscription setup, alert acknowledgment")
+    pdf.bullet("Implement the Alerts page - filterable, paginated alert feed with severity badges and map drill-down")
+    pdf.bullet("Extend the Map components - overlay segmentation masks on the map after prediction completes")
+    pdf.bullet("Add component-level tests using Vitest and React Testing Library")
+    pdf.bullet("Ensure full responsive design for tablet and desktop breakpoints using TailwindCSS")
+    pdf.bullet("Implement accessibility: keyboard navigation, screen reader labels, focus management")
+    pdf.bullet("Performance: code splitting, lazy loading pages, skeleton loading states already in the UI library")
+    pdf.ln(2)
+
+    # Codebase Ownership
+    pdf.section_title("Your Codebase Ownership")
+    pdf.body_text("You are the primary owner of the entire frontend directory:")
+    pdf.code_block(
+        "frontend/src/                          # PRIMARY OWNER - Full frontend\n"
+        "\n"
+        "  pages/                               # PRIMARY OWNER - All page components\n"
+        "    NewAnalysis.tsx                    # Exists - extend with live map result overlay\n"
+        "    Upload.tsx                         # Exists - connect to /predict/upload endpoint\n"
+        "    RunHistory.tsx                     # Exists - add filters, pagination, search\n"
+        "    Analytics.tsx                      # Exists - connect live data, add date picker\n"
+        "    Settings.tsx                       # Exists - wire to API key and config endpoints\n"
+        "    Dashboard.tsx                      # BUILD - Home page KPI summary\n"
+        "    NGOManagement.tsx                  # BUILD - Org registration + subscriptions\n"
+        "    Alerts.tsx                         # BUILD - Alert feed with severity filters\n"
+        "\n"
+        "  components/                          # PRIMARY OWNER - All UI components\n"
+        "    charts/                            # Extend existing Recharts components\n"
+        "    Map/                               # Extend - add mask overlay on results\n"
+        "    ngo/                               # Complete - wire AlertsPanel, SubscriptionManager\n"
+        "    results/                           # Complete - wire ResultsPanel to live predictions\n"
+        "    runs/                              # Extend RunCard with status polling\n"
+        "    ui/                                # Extend UI library as needed\n"
+        "\n"
+        "  api.ts                               # CO-OWNER - Add any missing endpoint calls\n"
+        "  types.ts                             # CO-OWNER - Add frontend-specific types\n"
+        "  contexts/                            # CO-OWNER - AppContext, ToastContext\n"
+        "  hooks/                               # PRIMARY OWNER - useGeocoding, useRunPolling\n"
+        "\n"
+        "  tests/                               # PRIMARY OWNER - Component tests (to be created)\n"
+        "    components/\n"
+        "    pages/"
+    )
+    pdf.ln(2)
+
+    # 3-Month Timeline
+    pdf.section_title("Your 3-Month Delivery Timeline")
+    pdf.month_block("MONTH 1: Foundation & Live Data (Weeks 1-4)", [
+        ("Week 1-2: Setup & API Wiring", [
+            "Clone repo, install deps, run dev server - verify all pages render",
+            "Run the FastAPI backend locally and confirm api.ts endpoints connect",
+            "Wire RunHistory page to live /runs API data - replace any static data",
+            "Wire Analytics page to live run metrics - confirm charts render with real data",
+            "Add loading skeletons (SkeletonCard already exists) to all data-fetching pages",
+        ]),
+        ("Week 3-4: Dashboard Home & Settings", [
+            "Build Dashboard.tsx - KPI summary cards: total runs, alerts, analysis breakdown",
+            "Add Dashboard as the new root route (/) and move NewAnalysis to /new-analysis",
+            "Wire Settings.tsx to API config endpoints - API base URL, analysis preferences",
+            "Implement Toast notifications for success/error states across all forms",
+        ]),
+    ])
+    pdf.month_block("MONTH 2: NGO Features & Real-Time (Weeks 5-8)", [
+        ("Week 5-6: NGO Management Page", [
+            "Build NGOManagement.tsx - list registered organisations from /organizations endpoint",
+            "Implement organisation registration form with validation",
+            "Build SubscriptionManager UI - region bbox picker + analysis type + threshold",
+            "Wire to POST /organizations and POST /organizations/{id}/subscriptions endpoints",
+        ]),
+        ("Week 7-8: Alerts & Real-Time Updates", [
+            "Build Alerts.tsx - paginated alert feed filtered by severity and analysis type",
+            "Implement alert acknowledgment button wired to PATCH /organizations/{id}/alerts/{id}",
+            "Extend useRunPolling hook to poll job status and update UI when predictions complete",
+            "Add live segmentation mask overlay on RegionMap after a prediction run finishes",
+        ]),
+    ])
+    pdf.month_block("MONTH 3: Polish & Production (Weeks 9-12)", [
+        ("Week 9-10: Testing & Accessibility", [
+            "Set up Vitest and React Testing Library - write tests for all page components",
+            "Test all API integration points with mocked responses",
+            "Accessibility audit: add aria-labels, keyboard nav, focus rings across all pages",
+            "Responsive design audit - tablet (768px) and large desktop (1440px) breakpoints",
+        ]),
+        ("Week 11-12: Performance & Final Integration", [
+            "Implement React.lazy() and Suspense for all page-level code splitting",
+            "Bundle analysis with vite-bundle-visualizer - eliminate unused dependencies",
+            "Full end-to-end test: bbox input -> prediction job -> live status -> result on map",
+            "Final UI polish pass: spacing, typography, colour consistency across all pages",
+        ]),
+    ])
+
+    # Code Pipeline
+    pdf.section_title("Your Code Pipeline")
+    pdf.body_text("Your daily pipeline as frontend developer - from clone to a live feature pushed to GitHub.")
+
+    pdf.subsection_title("Step 1: Setup")
+    pdf.code_block(
+        "git clone https://github.com/Climate-Vision/ClimateVision.git\n"
+        "cd ClimateVision/frontend\n"
+        "npm install\n"
+        "\n"
+        "# Start the backend API (needed for live data)\n"
+        "cd .. && uvicorn climatevision.api.main:app --reload --port 8000 &\n"
+        "\n"
+        "# Start the frontend dev server\n"
+        "cd frontend && npm run dev\n"
+        "# App running at: http://localhost:5173"
+    )
+
+    pdf.subsection_title("Step 2: Build a New Page or Component")
+    pdf.code_block(
+        "# Example: building the Dashboard home page\n"
+        "touch src/pages/Dashboard.tsx\n"
+        "\n"
+        "# Import existing UI primitives - don't rebuild what exists\n"
+        "# Available: Card, Badge, StatusBadge, SkeletonCard, ProgressBar,\n"
+        "#            Tooltip, EmptyState, ErrorBoundary, AnalysisTypeSelector\n"
+        "\n"
+        "# Import charts - already built with Recharts\n"
+        "# Available: TimeSeriesChart, BarChart, GaugeChart\n"
+        "\n"
+        "# Import API functions from api.ts\n"
+        "# import { listRuns, listOrganizations, listAlerts } from '../api'"
+    )
+
+    pdf.subsection_title("Step 3: Connect to Live API Data")
+    pdf.code_block(
+        "# Example: fetching live runs in a component\n"
+        "import { useEffect, useState } from 'react'\n"
+        "import { listRuns } from '../api'\n"
+        "import type { Run } from '../api'\n"
+        "\n"
+        "const [runs, setRuns] = useState<Run[]>([])\n"
+        "const [loading, setLoading] = useState(true)\n"
+        "\n"
+        "useEffect(() => {\n"
+        "  listRuns().then(data => {\n"
+        "    setRuns(data)\n"
+        "    setLoading(false)\n"
+        "  })\n"
+        "}, [])\n"
+        "\n"
+        "# Use SkeletonCard while loading\n"
+        "if (loading) return <SkeletonCard />"
+    )
+
+    pdf.subsection_title("Step 4: Run Quality Checks")
+    pdf.code_block(
+        "# From the frontend/ directory:\n"
+        "\n"
+        "# TypeScript type check - zero errors before pushing\n"
+        "npm run type-check\n"
+        "\n"
+        "# Lint check\n"
+        "npm run lint\n"
+        "\n"
+        "# Run component tests\n"
+        "npm run test\n"
+        "\n"
+        "# Production build - must succeed before any PR\n"
+        "npm run build"
+    )
+
+    pdf.subsection_title("Step 5: Commit & Push Your Work")
+    pdf.code_block(
+        "# Switch to your git identity\n"
+        "source team_docs/switch_user.sh paul\n"
+        "\n"
+        "git checkout develop && git pull origin develop\n"
+        "git checkout -b feature/frontend-dashboard-home\n"
+        "\n"
+        "# Stage only frontend files\n"
+        "git add frontend/src/pages/Dashboard.tsx\n"
+        "git add frontend/src/main.tsx\n"
+        "\n"
+        "git commit -m \"feat(frontend): add Dashboard home page with KPI summary cards\"\n"
+        "\n"
+        "# Push from your GitHub account\n"
+        "git push paul feature/frontend-dashboard-home\n"
+        "\n"
+        "# Branch naming convention:\n"
+        "# feature/frontend-*    new UI features\n"
+        "# fix/frontend-*        bug fixes\n"
+        "# refactor/frontend-*   component refactoring"
+    )
+
+    pdf.section_title("Your Key Collaborators")
+    pdf.bullet("Olufemi Taiwo (femi23) - He owns the FastAPI backend your api.ts calls. Any new endpoint you need, request it from him. Coordinate on response shapes, pagination, and error formats.")
+    pdf.bullet("@Goldokpa (Project Owner) - He built the original api.ts and App shell. He is your first point of contact for architecture questions and has context on every frontend design decision.")
+    pdf.bullet("@franchaise (Analytics Lead) - His carbon metrics and KPI data feed your Analytics and Dashboard pages. Agree on the JSON structure for chart data with him.")
+    pdf.bullet("Victor Mbachu (@cutewizzy11 in other refs) - If Docker or CI/CD issues block your local dev, coordinate with the infrastructure owner.")
+    pdf.bullet("@edoh-Onuh (ML Lead) - Model prediction outputs appear as map overlays in your UI. Coordinate on the GeoJSON mask format and confidence score schema so your map component renders them correctly.")
+
+    pdf.output(os.path.join(OUTPUT_DIR, "Paul_cutewizzy11_Role.pdf"))
+    print("Created: Paul_cutewizzy11_Role.pdf")
+
+
+def create_gold_doc():
+    pdf = RoleDoc("Gold Okpa")
+    pdf.add_page()
+
+    pdf.set_font("Helvetica", "B", 18)
+    pdf.cell(0, 10, "Gold Okpa", align="C", new_x="LMARGIN", new_y="NEXT")
+    pdf.set_font("Helvetica", "", 11)
+    pdf.set_text_color(100, 100, 100)
+    pdf.cell(0, 7, "Project Owner & Lead Architect - ClimateVision", align="C", new_x="LMARGIN", new_y="NEXT")
+    pdf.set_text_color(0, 0, 0)
+    pdf.ln(5)
+
+    pdf.key_value("GitHub", "@Goldokpa")
+    pdf.key_value("Access Level", "Owner (Admin)")
+    pdf.key_value("Email", "okpagold@gmail.com")
+    pdf.key_value("Project Duration", "Ongoing")
+    pdf.ln(3)
+
+    # Role Overview
+    pdf.section_title("Your Role on ClimateVision")
+    pdf.body_text(
+        "You built ClimateVision from the ground up. Every foundational layer of this system - the React "
+        "frontend and API client, the Google Earth Engine integration with service account auth and synthetic "
+        "NDVI fallback, the data pipeline scripts, the training and evaluation infrastructure, the Colab "
+        "training notebook, and the overall architecture - was shipped by you. You are not just the project "
+        "owner in title. You are the technical architect, the integration lead, and the person who knows "
+        "every module of this codebase at a deep level."
+    )
+    pdf.body_text(
+        "As the team scales, your role shifts from building everything yourself to orchestrating six "
+        "specialist engineers - setting the architectural direction, reviewing and merging their code, "
+        "maintaining the integrity of the overall system, and ensuring every module fits together cleanly. "
+        "You are the final authority on what goes into the main branch and what ships to users."
+    )
+    pdf.ln(2)
+
+    pdf.subsection_title("Core Responsibilities")
+    pdf.bullet("Own the overall system architecture and make final decisions on design patterns, module boundaries, and API contracts")
+    pdf.bullet("Review and merge all pull requests into the develop and main branches")
+    pdf.bullet("Maintain config.yaml - the single source of truth for all model, data, and API configuration")
+    pdf.bullet("Own the Google Earth Engine integration and satellite data orchestration at the system level")
+    pdf.bullet("Manage GitHub repository: branch protection rules, secrets, environment variables, and access permissions")
+    pdf.bullet("Coordinate sprint planning, milestone tracking, and cross-team dependency resolution")
+    pdf.bullet("Own the release process: version tagging, changelog, and production deployment sign-off")
+    pdf.bullet("Onboard new team members and ensure every engineer has the access and context they need")
+    pdf.bullet("Make final calls on model selection, analysis type prioritisation, and stakeholder deliverables")
+    pdf.ln(2)
+
+    # Codebase Ownership
+    pdf.section_title("Your Codebase Ownership")
+    pdf.body_text("As project owner you have authority over the full codebase. Your primary ownership areas are:")
+    pdf.code_block(
+        "config.yaml                            # PRIMARY OWNER - All system configuration\n"
+        ".env / .env.example                    # PRIMARY OWNER - Environment secrets template\n"
+        "setup.py / requirements.txt            # PRIMARY OWNER - Package definition\n"
+        "\n"
+        "src/climatevision/                     # ARCHITECT - Full codebase authority\n"
+        "  api/main.py                          # Co-owner with Olufemi - original author\n"
+        "  analysis/                            # Original author - analysis framework\n"
+        "  config.py                            # PRIMARY OWNER - Config management\n"
+        "  db.py                                # PRIMARY OWNER - Database schema\n"
+        "\n"
+        "scripts/                               # ORIGINAL AUTHOR - All pipeline scripts\n"
+        "  prepare_data.py                      # GEE data pipeline (you built this)\n"
+        "  setup_gee.py                         # GEE service account auth\n"
+        "  train.py | evaluate.py | infer.py    # Training & inference scripts\n"
+        "  export_model.py                      # ONNX export\n"
+        "\n"
+        "frontend/                              # ORIGINAL AUTHOR - App shell & API client\n"
+        "  src/App.tsx                          # Main application\n"
+        "  src/api.ts                           # API client (you wrote this)\n"
+        "\n"
+        "notebooks/                             # ORIGINAL AUTHOR\n"
+        "  train_on_colab.ipynb                 # Colab training notebook\n"
+        "\n"
+        ".github/                               # PRIMARY OWNER - CI/CD and repo rules\n"
+        "README.md / CONTRIBUTING.md            # PRIMARY OWNER - Public documentation"
+    )
+    pdf.ln(2)
+
+    # 3-Month Plan
+    pdf.section_title("Your 3-Month Orchestration Plan")
+    pdf.month_block("MONTH 1: Team Integration (Weeks 1-4)", [
+        ("Week 1-2: Onboarding & Access", [
+            "Grant all 6 engineers Maintainer access on GitHub",
+            "Set up branch protection: require passing CI + 1 review on develop",
+            "Create GitHub project board with milestones mapped to each engineer's 3-month timeline",
+            "Distribute and walk through each team member's role document",
+            "Verify all engineers can clone the repo, install dependencies, and run the API locally",
+        ]),
+        ("Week 3-4: Architecture Alignment", [
+            "Hold kickoff session: walkthrough of config.yaml, module boundaries, and API contracts",
+            "Define and document tensor shapes, data formats, and model output schemas",
+            "Review and merge first PRs from each team member - establish code review rhythm",
+            "Set up MLflow server on shared infrastructure for experiment tracking",
+        ]),
+    ])
+    pdf.month_block("MONTH 2: Integration & Quality (Weeks 5-8)", [
+        ("Week 5-6: Cross-Module Integration", [
+            "Integration test: Adeolu's DataLoader -> Edoh's model -> Olufemi's inference API",
+            "Integration test: Olufemi's API output -> Francis' carbon estimation -> Victor's dashboard",
+            "Resolve any data contract mismatches between modules",
+            "Set up automated integration test suite in GitHub Actions",
+        ]),
+        ("Week 7-8: Architecture Reviews", [
+            "Review all module implementations against original architecture design",
+            "Identify and resolve any technical debt or design drift before it compounds",
+            "Run end-to-end test: satellite bbox input -> dashboard output for all 3 analysis types",
+            "Performance profiling: measure API latency and model inference time",
+        ]),
+    ])
+    pdf.month_block("MONTH 3: Production & Release (Weeks 9-12)", [
+        ("Week 9-10: Production Hardening", [
+            "Review all security configurations: API keys, CORS, input validation, secrets management",
+            "Final review of Docker and CI/CD pipeline with Victor",
+            "Load test the API endpoints - verify stability under concurrent requests",
+            "Complete documentation audit: README, API docs, and module docstrings",
+        ]),
+        ("Week 11-12: v1.0 Release", [
+            "Final code review sweep across all modules",
+            "Tag v1.0 release with full changelog",
+            "Deploy to production environment and verify all services healthy",
+            "Publish project to open-source community and notify NGO partners",
+        ]),
+    ])
+
+    # Code Pipeline
+    pdf.section_title("Your Code Pipeline")
+    pdf.body_text("As project owner your pipeline covers architecture, integration testing, PR reviews, and release management - as well as direct development when extending core systems.")
+
+    pdf.subsection_title("Step 1: Daily Project Management")
+    pdf.code_block(
+        "# Check open PRs and review queue\n"
+        "gh pr list --repo Climate-Vision/ClimateVision\n"
+        "\n"
+        "# Check CI status across all branches\n"
+        "gh run list --repo Climate-Vision/ClimateVision --limit 10\n"
+        "\n"
+        "# View open issues\n"
+        "gh issue list --repo Climate-Vision/ClimateVision --label bug"
+    )
+
+    pdf.subsection_title("Step 2: Review & Merge a Team Member's PR")
+    pdf.code_block(
+        "# Fetch and checkout their branch for local testing\n"
+        "git fetch origin\n"
+        "git checkout feature/data-sentinel2-preprocessing\n"
+        "\n"
+        "# Test their code runs correctly\n"
+        "pip install -r requirements.txt\n"
+        "python -c \"from climatevision.data.preprocessing import preprocess_tiles; print('OK')\"\n"
+        "\n"
+        "# Review on GitHub and approve\n"
+        "gh pr review <PR_NUMBER> --approve --body \"Tested locally - preprocessing pipeline works correctly\"\n"
+        "\n"
+        "# Merge into develop\n"
+        "gh pr merge <PR_NUMBER> --squash --delete-branch"
+    )
+
+    pdf.subsection_title("Step 3: Run End-to-End Integration Test")
+    pdf.code_block(
+        "# Start all services\n"
+        "docker-compose up --build -d\n"
+        "\n"
+        "# Test the full pipeline: bbox -> prediction -> response\n"
+        "curl -X POST http://localhost:8000/predict/json \\\n"
+        "  -H \"Content-Type: application/json\" \\\n"
+        "  -d '{\"bbox\": [-60,-15,-45,5], \"start_date\": \"2023-01-01\",\n"
+        "       \"end_date\": \"2023-12-31\", \"analysis_type\": \"deforestation\"}'\n"
+        "\n"
+        "# Run automated integration tests\n"
+        "pytest tests/integration/ -v\n"
+        "\n"
+        "# Verify frontend builds and loads dashboard data\n"
+        "cd frontend && npm run build && npm run preview"
+    )
+
+    pdf.subsection_title("Step 4: Update System Configuration")
+    pdf.code_block(
+        "# Edit the master config (all analysis types, thresholds, model params)\n"
+        "# File: config.yaml\n"
+        "\n"
+        "# Example: update deforestation alert threshold\n"
+        "# deforestation:\n"
+        "#   alert_threshold: 0.15  -> 0.10  (more sensitive)\n"
+        "\n"
+        "# Validate config loads correctly after changes\n"
+        "python - <<'EOF'\n"
+        "from climatevision.config import load_config\n"
+        "cfg = load_config('config.yaml')\n"
+        "print(f\"Analysis types: {list(cfg.keys())}\")\n"
+        "EOF"
+    )
+
+    pdf.subsection_title("Step 5: Tag a Release")
+    pdf.code_block(
+        "# Ensure you are on the owner identity\n"
+        "source team_docs/switch_user.sh gold\n"
+        "\n"
+        "# Merge develop into main for release\n"
+        "git checkout main\n"
+        "git merge develop --no-ff -m \"release: v1.0.0\"\n"
+        "\n"
+        "# Tag the release\n"
+        "git tag -a v1.0.0 -m \"ClimateVision v1.0.0 - Deforestation, Ice Melt, Flood Detection\"\n"
+        "\n"
+        "# Push main and tag to GitHub\n"
+        "git push origin main\n"
+        "git push origin v1.0.0\n"
+        "\n"
+        "# Create GitHub release with changelog\n"
+        "gh release create v1.0.0 \\\n"
+        "  --title \"ClimateVision v1.0.0\" \\\n"
+        "  --notes \"First production release. Supports deforestation, arctic ice, and flood detection.\""
+    )
+
+    pdf.subsection_title("Step 6: Direct Development (Core Systems)")
+    pdf.code_block(
+        "# When extending core architecture directly\n"
+        "source team_docs/switch_user.sh gold\n"
+        "\n"
+        "git checkout develop && git pull origin develop\n"
+        "git checkout -b feature/core-new-analysis-type\n"
+        "\n"
+        "# Make changes to core modules (analysis/, config.py, db.py, api/main.py)\n"
+        "\n"
+        "git add src/climatevision/analysis/\n"
+        "git add config.yaml\n"
+        "git commit -m \"feat(core): add drought detection analysis type to registry\"\n"
+        "\n"
+        "# Push as project owner\n"
+        "git push origin feature/core-new-analysis-type"
+    )
+
+    pdf.section_title("Your Key Collaborators")
+    pdf.bullet("Victor Mbachu (@cutewizzy11) - Co-owner for infrastructure decisions. Coordinate on Dockerfile, CI/CD pipelines, and production deployment architecture.")
+    pdf.bullet("Edoh-Onuh (@edoh-Onuh) - ML Lead. Final authority on model architecture decisions sits with you, but Edoh drives the implementation. Review all model PRs carefully.")
+    pdf.bullet("Olufemi Taiwo (femi23) - API Lead. You are the original author of main.py. Any structural changes to the API must go through your review.")
+    pdf.bullet("Adeolu Mary Oshadare (@Oshgig) - Data Pipeline Lead. You built the GEE scripts she extends. Maintain alignment on data contracts between ingestion and training.")
+    pdf.bullet("Francis Umo (@franchaise) - Analytics Lead. Carbon estimates and impact reports are the primary stakeholder-facing output. Review these deliverables closely.")
+    pdf.bullet("Godswill Chukwu - ML Insights Lead. His experiment results and KPI reports inform your architectural and model selection decisions.")
+
+    pdf.output(os.path.join(OUTPUT_DIR, "Gold_Okpa_Role.pdf"))
+    print("Created: Gold_Okpa_Role.pdf")
+
+
+if __name__ == "__main__":
+    create_adeolu_doc()
+    create_francis_doc()
+    create_olufemi_doc()
+    create_edoh_doc()
+    create_victor_doc()
+    create_godswill_doc()
+    create_paul_doc()
+    create_gold_doc()
+    print(f"\nAll 8 role documents generated in: {OUTPUT_DIR}")

From 74923048047405715fb3a8f5526b3ddd9bec84f7 Mon Sep 17 00:00:00 2001
From: Presmanes <presmanes3@gmail.com>
Date: Sat, 16 May 2026 14:47:03 +0200
Subject: [PATCH 13/17] Add: SMTP environment variables to .env.example

---
 .env.example | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/.env.example b/.env.example
index b4cdc4a..e059fa5 100644
--- a/.env.example
+++ b/.env.example
@@ -17,3 +17,13 @@ API_SECRET_KEY=your_secret_key_here
 
 # Database (optional - for later)
 DATABASE_URL=postgresql://user:password@localhost:5432/climatevision
+
+# Alert Delivery — SMTP Configuration (optional)
+# Leave empty to skip email delivery (alerts will be logged to console instead)
+# For development/testing: https://mailtrap.io (free, no real emails sent)
+# For production: Gmail App Password, SendGrid, Mailgun, etc.
+SMTP_HOST=
+SMTP_PORT=587
+SMTP_USER=
+SMTP_PASS=
+SMTP_FROM=alerts@climatevision.dev

From 89d61b25e2ea592dee6481bd65039952d2652788 Mon Sep 17 00:00:00 2001
From: Presmanes <presmanes3@gmail.com>
Date: Sat, 16 May 2026 14:47:07 +0200
Subject: [PATCH 14/17] Add: alert delivery helper functions in db.py

---
 src/climatevision/db.py | 46 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/src/climatevision/db.py b/src/climatevision/db.py
index 711a2ad..acef502 100644
--- a/src/climatevision/db.py
+++ b/src/climatevision/db.py
@@ -503,3 +503,49 @@ def mark_alert_delivered(alert_id: int) -> bool:
             (now, alert_id),
         )
         return cursor.rowcount > 0
+
+
+def get_alert(alert_id: int) -> Optional[sqlite3.Row]:
+    """Get a single alert by ID."""
+    with get_connection() as conn:
+        return conn.execute(
+            "SELECT * FROM organization_alerts WHERE id = ?", (alert_id,)
+        ).fetchone()
+
+
+def get_subscription(sub_id: int) -> Optional[sqlite3.Row]:
+    """Get a single subscription by ID."""
+    with get_connection() as conn:
+        return conn.execute(
+            "SELECT * FROM organization_subscriptions WHERE id = ?", (sub_id,)
+        ).fetchone()
+
+
+def get_pending_alerts(
+    organization_id: int,
+    limit: int = 50,
+) -> list[sqlite3.Row]:
+    """Get undelivered alerts for an organization."""
+    with get_connection() as conn:
+        return conn.execute(
+            """
+            SELECT * FROM organization_alerts
+            WHERE organization_id = ? AND delivered = 0
+            ORDER BY created_at DESC LIMIT ?
+            """,
+            (organization_id, limit),
+        ).fetchall()
+
+
+def increment_delivery_attempts(alert_id: int) -> bool:
+    """Increment the delivery attempts counter for an alert."""
+    with get_connection() as conn:
+        cursor = conn.execute(
+            """
+            UPDATE organization_alerts
+            SET delivery_attempts = delivery_attempts + 1
+            WHERE id = ?
+            """,
+            (alert_id,),
+        )
+        return cursor.rowcount > 0

From d70356d5c88bafe753156c5b5776ea3ac3eccfd2 Mon Sep 17 00:00:00 2001
From: Presmanes <presmanes3@gmail.com>
Date: Sat, 16 May 2026 14:47:12 +0200
Subject: [PATCH 15/17] Add: alert delivery worker with SMTP, webhook and retry
 logic

---
 src/climatevision/workers/__init__.py       |   5 +
 src/climatevision/workers/alert_delivery.py | 223 ++++++++++++++++++++
 2 files changed, 228 insertions(+)
 create mode 100644 src/climatevision/workers/__init__.py
 create mode 100644 src/climatevision/workers/alert_delivery.py

diff --git a/src/climatevision/workers/__init__.py b/src/climatevision/workers/__init__.py
new file mode 100644
index 0000000..cd614e8
--- /dev/null
+++ b/src/climatevision/workers/__init__.py
@@ -0,0 +1,5 @@
+"""ClimateVision background workers for alert delivery."""
+
+from climatevision.workers.alert_delivery import process_alert_delivery
+
+__all__ = ["process_alert_delivery"]
diff --git a/src/climatevision/workers/alert_delivery.py b/src/climatevision/workers/alert_delivery.py
new file mode 100644
index 0000000..4b10be7
--- /dev/null
+++ b/src/climatevision/workers/alert_delivery.py
@@ -0,0 +1,223 @@
+"""Alert delivery worker with SMTP and webhook channels.
+
+Triggered via FastAPI BackgroundTasks on alert creation.
+Retries up to 3 times with exponential backoff (60 s, 120 s).
+"""
+
+import logging
+import os
+import smtplib
+import time
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+
+import requests
+
+from climatevision.db import (
+    get_alert,
+    get_organization,
+    get_subscription,
+    increment_delivery_attempts,
+    mark_alert_delivered,
+)
+
+logger = logging.getLogger(__name__)
+
+# Exponential backoff delays in seconds between attempts.
+_BACKOFF_DELAYS = [60, 120]
+
+
+def _smtp_configured() -> bool:
+    """Check whether the minimum SMTP environment variables are set."""
+    return bool(os.getenv("SMTP_HOST") and os.getenv("SMTP_USER"))
+
+
+def send_email_smtp(to_email: str, subject: str, body: str) -> bool:
+    """Send an alert email via SMTP using environment credentials.
+
+    Args:
+        to_email: Recipient address.
+        subject: Email subject line.
+        body: Plain-text body.
+
+    Returns:
+        True if the SMTP server accepted the message, otherwise False.
+    """
+    host = os.getenv("SMTP_HOST")
+    port = int(os.getenv("SMTP_PORT", "587"))
+    user = os.getenv("SMTP_USER")
+    password = os.getenv("SMTP_PASS")
+    from_addr = os.getenv("SMTP_FROM", "alerts@climatevision.dev")
+
+    if not host or not user:
+        logger.warning("SMTP not configured — skipping email delivery")
+        return False
+
+    msg = MIMEMultipart()
+    msg["From"] = from_addr
+    msg["To"] = to_email
+    msg["Subject"] = subject
+    msg.attach(MIMEText(body, "plain"))
+
+    try:
+        with smtplib.SMTP(host, port, timeout=30) as server:
+            server.starttls()
+            if password:
+                server.login(user, password)
+            server.send_message(msg)
+        logger.info("Email delivered to %s", to_email)
+        return True
+    except Exception:
+        logger.exception("Email delivery failed for %s", to_email)
+        return False
+
+
+def send_webhook(url: str, payload: dict) -> bool:
+    """POST an alert payload to a webhook URL.
+
+    Args:
+        url: Webhook endpoint.
+        payload: JSON-serializable dict with alert data.
+
+    Returns:
+        True when the endpoint responds with a 2xx status, otherwise False.
+    """
+    try:
+        resp = requests.post(url, json=payload, timeout=30)
+        if resp.status_code < 400:
+            logger.info("Webhook accepted by %s", url)
+            return True
+        logger.warning("Webhook rejected by %s — status %s", url, resp.status_code)
+        return False
+    except Exception:
+        logger.exception("Webhook delivery failed for %s", url)
+        return False
+
+
+def _build_email_body(alert: dict) -> str:
+    """Compose a plain-text email body from an alert row."""
+    lines = [
+        f"Alert: {alert['title']}",
+        f"Type: {alert['alert_type']}",
+        f"Severity: {alert['severity']}",
+        "",
+        alert["message"],
+        "",
+    ]
+    if alert.get("details"):
+        lines.append(f"Details: {alert['details']}")
+    return "\n".join(lines)
+
+
+def _build_webhook_payload(alert: dict, org_id: int) -> dict:
+    """Build the JSON payload sent to webhook endpoints."""
+    return {
+        "alert_id": alert["id"],
+        "organization_id": org_id,
+        "alert_type": alert["alert_type"],
+        "severity": alert["severity"],
+        "title": alert["title"],
+        "message": alert["message"],
+        "details": alert.get("details"),
+        "created_at": alert["created_at"],
+    }
+
+
+def process_alert_delivery(alert_id: int) -> None:
+    """Deliver an alert via its configured channel with retries.
+
+    Reads the alert and its linked subscription/organization from the
+    database, determines the notification channel, and attempts delivery
+    up to three times with exponential backoff.
+
+    Args:
+        alert_id: Primary key of the alert to deliver.
+    """
+    alert_row = get_alert(alert_id)
+    if alert_row is None:
+        logger.error("Alert %s not found", alert_id)
+        return
+
+    alert = dict(alert_row)
+
+    if alert["delivered"]:
+        logger.info("Alert %s already delivered — skipping", alert_id)
+        return
+
+    org_row = get_organization(alert["organization_id"])
+    if org_row is None:
+        logger.error("Organization %s for alert %s not found", alert["organization_id"], alert_id)
+        return
+
+    org = dict(org_row)
+
+    subscription = None
+    if alert["subscription_id"] is not None:
+        sub_row = get_subscription(alert["subscription_id"])
+        if sub_row is not None:
+            subscription = dict(sub_row)
+
+    channel = "email"
+    if subscription:
+        channel = subscription["notification_channel"]
+
+    for attempt in range(3):
+        success = False
+
+        if channel == "email":
+            contact = org.get("contact_email")
+            if contact:
+                body = _build_email_body(dict(alert))
+                success = send_email_smtp(
+                    to_email=contact,
+                    subject=f"[ClimateVision Alert] {alert['title']}",
+                    body=body,
+                )
+            else:
+                logger.warning(
+                    "Organization %s has no contact_email — skipping email delivery",
+                    org["id"],
+                )
+                return
+
+        elif channel == "webhook":
+            webhook_url = None
+            if subscription:
+                webhook_url = subscription.get("webhook_url")
+            if webhook_url:
+                payload = _build_webhook_payload(dict(alert), org["id"])
+                success = send_webhook(url=webhook_url, payload=payload)
+            else:
+                logger.warning(
+                    "Subscription for alert %s has no webhook_url — skipping webhook delivery",
+                    alert_id,
+                )
+                return
+
+        elif channel == "api":
+            # API delivery is implicit — the alert exists in the DB and is
+            # already queryable via the REST endpoints.
+            logger.info("API channel — alert %s is already queryable", alert_id)
+            mark_alert_delivered(alert_id)
+            return
+
+        else:
+            logger.warning("Unknown notification channel '%s' for alert %s", channel, alert_id)
+            return
+
+        if success:
+            mark_alert_delivered(alert_id)
+            logger.info("Alert %s delivered successfully on attempt %d", alert_id, attempt + 1)
+            return
+
+        # Record the failed attempt.
+        increment_delivery_attempts(alert_id)
+        logger.warning("Alert %s delivery attempt %d failed", alert_id, attempt + 1)
+
+        # Exponential backoff before the next retry (if any remain).
+        if attempt < 2:
+            delay = _BACKOFF_DELAYS[attempt]
+            logger.info("Retrying alert %s in %d seconds", alert_id, delay)
+            time.sleep(delay)
+
+    logger.error("Alert %s delivery failed after 3 attempts", alert_id)

From 69757a79d2bb9738f82d9d9ad95f02458c17e7f2 Mon Sep 17 00:00:00 2001
From: Presmanes <presmanes3@gmail.com>
Date: Sat, 16 May 2026 14:47:17 +0200
Subject: [PATCH 16/17] Add: GET /alerts/pending endpoint and BackgroundTasks
 integration

---
 src/climatevision/api/main.py | 36 +++++++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/src/climatevision/api/main.py b/src/climatevision/api/main.py
index 729b213..2663ff2 100644
--- a/src/climatevision/api/main.py
+++ b/src/climatevision/api/main.py
@@ -20,7 +20,7 @@
 
 from pydantic import field_validator
 
-from fastapi import FastAPI, File, Form, HTTPException, UploadFile, Header, Query, Depends, Request
+from fastapi import FastAPI, File, Form, HTTPException, UploadFile, Header, Query, Depends, Request, BackgroundTasks
 from fastapi.responses import RedirectResponse
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
@@ -39,10 +39,12 @@
     get_subscriptions_for_organization,
     create_organization_alert,
     get_alerts_for_organization,
+    get_pending_alerts,
     acknowledge_alert,
     mark_alert_delivered,
 )
 from climatevision.inference import run_inference_from_file, run_inference_from_gee
+from climatevision.workers.alert_delivery import process_alert_delivery
 from climatevision.api.auth import require_api_key
 
 logger = logging.getLogger(__name__)
@@ -877,13 +879,41 @@ def list_org_alerts(
             for alert in alerts
         ]
 
+    @app.get("/api/organizations/{org_id}/alerts/pending")
+    def list_pending_alerts(
+        org_id: int,
+        limit: int = Query(default=50, le=200),
+    ) -> list[AlertResponse]:
+        """List pending (undelivered) alerts for monitoring."""
+        org = get_organization(org_id)
+        if not org:
+            raise HTTPException(status_code=404, detail="Organization not found")
+        
+        alerts = get_pending_alerts(org_id, limit=limit)
+        
+        return [
+            AlertResponse(
+                id=alert["id"],
+                organization_id=alert["organization_id"],
+                alert_type=alert["alert_type"],
+                severity=alert["severity"],
+                title=alert["title"],
+                message=alert["message"],
+                delivered=bool(alert["delivered"]),
+                acknowledged=bool(alert["acknowledged"]),
+                created_at=alert["created_at"],
+            )
+            for alert in alerts
+        ]
+
     @app.post("/api/organizations/{org_id}/alerts")
     def create_org_alert(
         org_id: int,
         body: CreateAlertRequest,
+        background_tasks: BackgroundTasks,
         org: dict[str, Any] = Depends(require_api_key),
     ) -> AlertResponse:
-        """Create a new alert for an organization."""
+        """Create a new alert for an organization and queue background delivery."""
         org = get_organization(org_id)
         if not org:
             raise HTTPException(status_code=404, detail="Organization not found")
@@ -899,6 +929,8 @@ def create_org_alert(
             details=body.details,
         )
         
+        background_tasks.add_task(process_alert_delivery, alert_id)
+        
         return AlertResponse(
             id=alert_id,
             organization_id=org_id,

From bab0cbb74ec2d1165b44a9c28ca984f63453a436 Mon Sep 17 00:00:00 2001
From: Presmanes <presmanes3@gmail.com>
Date: Sat, 16 May 2026 14:47:21 +0200
Subject: [PATCH 17/17] Test: alert delivery worker and pending endpoint

---
 tests/conftest.py                    |  55 +++++
 tests/workers/conftest.py            |  34 +++
 tests/workers/test_alert_delivery.py | 313 +++++++++++++++++++++++++++
 3 files changed, 402 insertions(+)
 create mode 100644 tests/workers/conftest.py
 create mode 100644 tests/workers/test_alert_delivery.py

diff --git a/tests/conftest.py b/tests/conftest.py
index 8ebffc5..2629ba8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,8 +1,63 @@
 """Pytest fixtures for ClimateVision."""
 
+import sys
+from unittest import mock
+
 import pytest
 from fastapi.testclient import TestClient
 
+# Conditionally stub heavy ML dependencies when they are not installed.
+# This allows the import graph (api → inference → models → torch) to
+# resolve in CI / torch-free environments.  Tests that exercise actual
+# model logic will still fail — only the import chain is unblocked.
+_MISSING_MODULES: list[str] = []
+
+try:
+    import torch  # noqa: F401
+except ImportError:
+    _MISSING_MODULES.extend([
+        "torch", "torch.utils", "torch.utils.data",
+        "torch.nn", "torch.nn.functional",
+    ])
+
+try:
+    import torchvision  # noqa: F401
+except ImportError:
+    _MISSING_MODULES.append("torchvision")
+
+try:
+    import cv2  # noqa: F401
+except ImportError:
+    _MISSING_MODULES.append("cv2")
+
+try:
+    import rasterio  # noqa: F401
+except ImportError:
+    _MISSING_MODULES.append("rasterio")
+
+try:
+    import shapely  # noqa: F401
+except ImportError:
+    _MISSING_MODULES.extend(["shapely", "shapely.geometry"])
+
+try:
+    import geopandas  # noqa: F401
+except ImportError:
+    _MISSING_MODULES.append("geopandas")
+
+try:
+    import sklearn  # noqa: F401
+except ImportError:
+    _MISSING_MODULES.extend(["sklearn", "sklearn.metrics"])
+
+try:
+    import albumentations  # noqa: F401
+except ImportError:
+    _MISSING_MODULES.append("albumentations")
+
+for mod_name in _MISSING_MODULES:
+    sys.modules[mod_name] = mock.MagicMock()
+
 from climatevision.api.main import create_app
 
 
diff --git a/tests/workers/conftest.py b/tests/workers/conftest.py
new file mode 100644
index 0000000..8b2c28b
--- /dev/null
+++ b/tests/workers/conftest.py
@@ -0,0 +1,34 @@
+"""Pytest fixtures for alert delivery worker tests."""
+
+import sys
+from unittest import mock
+
+import pytest
+from fastapi.testclient import TestClient
+
+# Stub heavy ML dependencies so tests can import the API layer
+# without installing torch / rasterio / opencv.
+sys.modules["torch"] = mock.MagicMock()
+sys.modules["torch.utils"] = mock.MagicMock()
+sys.modules["torch.utils.data"] = mock.MagicMock()
+sys.modules["torch.nn"] = mock.MagicMock()
+sys.modules["torch.nn.functional"] = mock.MagicMock()
+sys.modules["torchvision"] = mock.MagicMock()
+sys.modules["torchvision.transforms"] = mock.MagicMock()
+sys.modules["rasterio"] = mock.MagicMock()
+sys.modules["cv2"] = mock.MagicMock()
+sys.modules["sklearn"] = mock.MagicMock()
+sys.modules["sklearn.metrics"] = mock.MagicMock()
+sys.modules["albumentations"] = mock.MagicMock()
+sys.modules["geopandas"] = mock.MagicMock()
+sys.modules["shapely"] = mock.MagicMock()
+sys.modules["shapely.geometry"] = mock.MagicMock()
+
+from climatevision.api.main import create_app
+
+
+@pytest.fixture
+def client() -> TestClient:
+    """FastAPI test client."""
+    app = create_app()
+    return TestClient(app)
diff --git a/tests/workers/test_alert_delivery.py b/tests/workers/test_alert_delivery.py
new file mode 100644
index 0000000..1592169
--- /dev/null
+++ b/tests/workers/test_alert_delivery.py
@@ -0,0 +1,313 @@
+"""Tests for alert delivery worker and pending endpoint."""
+
+from unittest import mock
+
+import pytest
+from fastapi.testclient import TestClient
+
+from climatevision.workers.alert_delivery import (
+    process_alert_delivery,
+    send_email_smtp,
+    send_webhook,
+)
+
+
+class TestPendingEndpoint:
+    """Integration tests for GET /api/organizations/{org_id}/alerts/pending."""
+
+    @pytest.fixture
+    def org(self, client: TestClient) -> dict:
+        """Create and return a test organization."""
+        response = client.post(
+            "/api/organizations",
+            json={
+                "name": "Test NGO",
+                "type": "ngo",
+                "contact_email": "test@example.com",
+            },
+            headers={"X-API-Key": "cv_dev"},
+        )
+        assert response.status_code == 200
+        return response.json()
+
+    def test_pending_endpoint_returns_undelivered(
+        self, client: TestClient, org: dict
+    ) -> None:
+        """Pending endpoint should return only undelivered alerts."""
+        org_id = org["id"]
+
+        with mock.patch("climatevision.api.main.process_alert_delivery"):
+            resp = client.post(
+                f"/api/organizations/{org_id}/alerts",
+                json={
+                    "alert_type": "deforestation",
+                    "severity": "high",
+                    "title": "Forest loss detected",
+                    "message": "20% forest cover lost.",
+                },
+                headers={"X-API-Key": "cv_dev"},
+            )
+            assert resp.status_code == 200
+            alert = resp.json()
+
+        resp = client.get(
+            f"/api/organizations/{org_id}/alerts/pending",
+            headers={"X-API-Key": "cv_dev"},
+        )
+        assert resp.status_code == 200
+        pending = resp.json()
+        assert len(pending) == 1
+        assert pending[0]["id"] == alert["id"]
+        assert pending[0]["delivered"] is False
+
+        client.post(
+            f"/api/alerts/{alert['id']}/deliver",
+            headers={"X-API-Key": "cv_dev"},
+        )
+
+        resp = client.get(
+            f"/api/organizations/{org_id}/alerts/pending",
+            headers={"X-API-Key": "cv_dev"},
+        )
+        assert resp.status_code == 200
+        pending = resp.json()
+        assert len(pending) == 0
+
+    def test_create_alert_triggers_background_delivery(
+        self, client: TestClient, org: dict
+    ) -> None:
+        """Creating an alert should enqueue a BackgroundTask."""
+        org_id = org["id"]
+
+        with mock.patch("climatevision.api.main.process_alert_delivery") as mock_deliver:
+            resp = client.post(
+                f"/api/organizations/{org_id}/alerts",
+                json={
+                    "alert_type": "flooding",
+                    "severity": "critical",
+                    "title": "Flood alert",
+                    "message": "Severe flooding detected.",
+                },
+                headers={"X-API-Key": "cv_dev"},
+            )
+            assert resp.status_code == 200
+
+        mock_deliver.assert_called_once()
+        alert_id = mock_deliver.call_args[0][0]
+        assert isinstance(alert_id, int)
+
+
+class TestEmailDelivery:
+    """Unit tests for SMTP email delivery."""
+
+    def test_email_delivery_success(self) -> None:
+        """SMTP configured and server accepts the message."""
+        env = {
+            "SMTP_HOST": "smtp.example.com",
+            "SMTP_PORT": "587",
+            "SMTP_USER": "user",
+            "SMTP_PASS": "pass",
+            "SMTP_FROM": "from@example.com",
+        }
+        with mock.patch.dict("os.environ", env, clear=False), mock.patch(
+            "climatevision.workers.alert_delivery.smtplib.SMTP"
+        ) as mock_smtp:
+            instance = mock_smtp.return_value.__enter__.return_value
+            result = send_email_smtp("to@example.com", "Subject", "Body")
+            assert result is True
+            instance.starttls.assert_called_once()
+            instance.login.assert_called_once_with("user", "pass")
+            instance.send_message.assert_called_once()
+
+    def test_email_delivery_skips_when_not_configured(self) -> None:
+        """When SMTP_HOST is unset, the function returns False gracefully."""
+        with mock.patch.dict("os.environ", {"SMTP_HOST": ""}, clear=False):
+            result = send_email_smtp("to@example.com", "Subject", "Body")
+            assert result is False
+
+
+class TestWebhookDelivery:
+    """Unit tests for HTTP webhook delivery."""
+
+    def test_webhook_delivery_success(self) -> None:
+        """Webhook endpoint returns 2xx."""
+        with mock.patch(
+            "climatevision.workers.alert_delivery.requests.post"
+        ) as mock_post:
+            mock_post.return_value.status_code = 200
+            result = send_webhook("https://example.com/hook", {"key": "value"})
+            assert result is True
+            mock_post.assert_called_once_with(
+                "https://example.com/hook",
+                json={"key": "value"},
+                timeout=30,
+            )
+
+    def test_webhook_delivery_failure(self) -> None:
+        """Webhook endpoint returns 5xx."""
+        with mock.patch(
+            "climatevision.workers.alert_delivery.requests.post"
+        ) as mock_post:
+            mock_post.return_value.status_code = 500
+            result = send_webhook("https://example.com/hook", {"key": "value"})
+            assert result is False
+
+
+class TestProcessAlertDelivery:
+    """Unit tests for the main delivery orchestrator."""
+
+    def test_skip_already_delivered(self) -> None:
+        """Alerts already marked delivered should not be re-processed."""
+        alert = {"id": 1, "delivered": 1, "organization_id": 1, "subscription_id": None}
+
+        with mock.patch(
+            "climatevision.workers.alert_delivery.get_alert", return_value=alert
+        ), mock.patch(
+            "climatevision.workers.alert_delivery.send_email_smtp"
+        ) as mock_email:
+            process_alert_delivery(1)
+            mock_email.assert_not_called()
+
+    def test_delivery_retry_on_failure(self) -> None:
+        """Failed delivery increments attempts and retries with backoff."""
+        alert = {
+            "id": 1,
+            "organization_id": 1,
+            "subscription_id": 1,
+            "alert_type": "deforestation",
+            "severity": "high",
+            "title": "Test",
+            "message": "Msg",
+            "details": None,
+            "created_at": "2024-01-01T00:00:00",
+            "delivered": 0,
+        }
+        org = {"id": 1, "contact_email": "test@example.com"}
+        sub = {"id": 1, "notification_channel": "email", "webhook_url": None}
+
+        with (
+            mock.patch(
+                "climatevision.workers.alert_delivery.get_alert", return_value=alert
+            ),
+            mock.patch(
+                "climatevision.workers.alert_delivery.get_organization",
+                return_value=org,
+            ),
+            mock.patch(
+                "climatevision.workers.alert_delivery.get_subscription",
+                return_value=sub,
+            ),
+            mock.patch(
+                "climatevision.workers.alert_delivery.send_email_smtp",
+                return_value=False,
+            ) as mock_email,
+            mock.patch(
+                "climatevision.workers.alert_delivery.increment_delivery_attempts"
+            ) as mock_incr,
+            mock.patch(
+                "climatevision.workers.alert_delivery.time.sleep"
+            ) as mock_sleep,
+            mock.patch(
+                "climatevision.workers.alert_delivery.mark_alert_delivered"
+            ) as mock_mark,
+        ):
+            process_alert_delivery(1)
+
+            assert mock_email.call_count == 3
+            assert mock_sleep.call_count == 2
+            mock_sleep.assert_any_call(60)
+            mock_sleep.assert_any_call(120)
+            assert mock_incr.call_count == 3
+            mock_mark.assert_not_called()
+
+    def test_max_retries_exhausted(self) -> None:
+        """After 3 failures, alert remains undelivered."""
+        alert = {
+            "id": 1,
+            "organization_id": 1,
+            "subscription_id": 1,
+            "alert_type": "deforestation",
+            "severity": "high",
+            "title": "Test",
+            "message": "Msg",
+            "details": None,
+            "created_at": "2024-01-01T00:00:00",
+            "delivered": 0,
+        }
+        org = {"id": 1, "contact_email": "test@example.com"}
+        sub = {"id": 1, "notification_channel": "email", "webhook_url": None}
+
+        with (
+            mock.patch(
+                "climatevision.workers.alert_delivery.get_alert", return_value=alert
+            ),
+            mock.patch(
+                "climatevision.workers.alert_delivery.get_organization",
+                return_value=org,
+            ),
+            mock.patch(
+                "climatevision.workers.alert_delivery.get_subscription",
+                return_value=sub,
+            ),
+            mock.patch(
+                "climatevision.workers.alert_delivery.send_email_smtp",
+                return_value=False,
+            ),
+            mock.patch(
+                "climatevision.workers.alert_delivery.increment_delivery_attempts"
+            ) as mock_incr,
+            mock.patch("climatevision.workers.alert_delivery.time.sleep"),
+            mock.patch(
+                "climatevision.workers.alert_delivery.mark_alert_delivered"
+            ) as mock_mark,
+        ):
+            process_alert_delivery(1)
+
+            assert mock_incr.call_count == 3
+            mock_mark.assert_not_called()
+
+    def test_exponential_backoff_timing(self) -> None:
+        """Verify backoff delays are 60 s and 120 s."""
+        alert = {
+            "id": 1,
+            "organization_id": 1,
+            "subscription_id": 1,
+            "alert_type": "deforestation",
+            "severity": "high",
+            "title": "Test",
+            "message": "Msg",
+            "details": None,
+            "created_at": "2024-01-01T00:00:00",
+            "delivered": 0,
+        }
+        org = {"id": 1, "contact_email": "test@example.com"}
+        sub = {"id": 1, "notification_channel": "email", "webhook_url": None}
+
+        with (
+            mock.patch(
+                "climatevision.workers.alert_delivery.get_alert", return_value=alert
+            ),
+            mock.patch(
+                "climatevision.workers.alert_delivery.get_organization",
+                return_value=org,
+            ),
+            mock.patch(
+                "climatevision.workers.alert_delivery.get_subscription",
+                return_value=sub,
+            ),
+            mock.patch(
+                "climatevision.workers.alert_delivery.send_email_smtp",
+                return_value=False,
+            ),
+            mock.patch(
+                "climatevision.workers.alert_delivery.increment_delivery_attempts"
+            ),
+            mock.patch(
+                "climatevision.workers.alert_delivery.time.sleep"
+            ) as mock_sleep,
+            mock.patch("climatevision.workers.alert_delivery.mark_alert_delivered"),
+        ):
+            process_alert_delivery(1)
+
+            delays = [call[0][0] for call in mock_sleep.call_args_list]
+            assert delays == [60, 120]