HackYourFuture · lassebenni · Jun 9, 2026 · Jun 9, 2026 · Jun 9, 2026 · Jun 9, 2026
diff --git a/.hyf/test.sh b/.hyf/test.sh
@@ -1,13 +1,74 @@
 #!/usr/bin/env bash
-set -euo pipefail
+set -uo pipefail
 
-# Run your test scripts here.
-# Auto grade tool will execute this file within the .hyf working directory.
-# The result should be stored in score.json file with the format shown below.
-cat << EOF > score.json
+# Week 12 autograder.
+# Ladder: the untouched scaffold must FAIL; a completed Minimum solution passes.
+# Work-verifying checks (not mere file presence):
+#   - metric Names filled with real (non-placeholder) values
+#   - Panel 1 implemented (no NotImplementedError / TODO: implement left)
+#   - AI_ASSIST.md filled with real content
+
+PASSING=6
+SCORE=0
+
+check() {
+    if [ "$2" = "true" ]; then echo "  PASS  $1"; SCORE=$((SCORE + 1));
+    else echo "  FAIL  $1"; fi
+}
+
+METRIC_FILE="../task-1/metric_definitions.md"
+APP_FILE="../task-2/app.py"
+AI_FILE="../AI_ASSIST.md"
+
+# 1. metric_definitions.md exists
+[ -f "$METRIC_FILE" ] && check "metric_definitions.md exists" "true" || check "metric_definitions.md exists" "false"
+
+# 2. >=3 metric Name fields filled with a REAL value (not empty, not _(...)_ placeholder)
+FILLED=$(grep -E '^\| \*\*Name\*\* \|' "$METRIC_FILE" 2>/dev/null \
+    | sed -E 's/^\| \*\*Name\*\* \|([^|]*)\|.*/\1/' \
+    | grep -vE '^[[:space:]]*$' | grep -vF '_(' | wc -l | tr -d ' ')
+[ "${FILLED:-0}" -ge 3 ] && check "3+ metric Name fields filled" "true" \
+    || check "3+ metric Name fields filled (found ${FILLED:-0})" "false"
+
+# 3. task-2/app.py exists
+[ -f "$APP_FILE" ] && check "task-2/app.py exists" "true" || check "task-2/app.py exists" "false"
+
+# 4. Panel 1 implemented: no NotImplementedError / TODO: implement remaining
+if grep -qE 'raise NotImplementedError|TODO: implement' "$APP_FILE" 2>/dev/null; then
+    check "Panel 1 implemented (get_dag_runs)" "false"
+else
+    check "Panel 1 implemented (get_dag_runs)" "true"
+fi
+
+# 5. app.py uses @st.cache_data (quality: do not strip caching)
+grep -q "st.cache_data" "$APP_FILE" 2>/dev/null \
+    && check "app.py uses @st.cache_data" "true" || check "app.py uses @st.cache_data" "false"
+
+# 6. app.py has no hardcoded password
+if grep -qE 'password[[:space:]]*=[[:space:]]*"[^"]+"' "$APP_FILE" 2>/dev/null; then
+    check "app.py has no hardcoded password" "false"
+else
+    check "app.py has no hardcoded password" "true"
+fi
+
+# 7. AI_ASSIST.md filled: >=3 real content lines (exclude blanks, headings, placeholders)
+if [ -f "$AI_FILE" ]; then
+    AI_LINES=$(grep -vE '^[[:space:]]*$|^#|^⚠️|^Document' "$AI_FILE" | grep -vF '_(' | wc -l | tr -d ' ')
+    [ "${AI_LINES:-0}" -ge 3 ] && check "AI_ASSIST.md documents LLM usage" "true" \
+        || check "AI_ASSIST.md documents LLM usage (needs 3+ entries)" "false"
+else
+    check "AI_ASSIST.md exists" "false"
+fi
+
+if [ "$SCORE" -ge "$PASSING" ]; then PASS=true; else PASS=false; fi
+
+cat > score.json <<JSONEOF
 {
-  "score": 0,
-  "pass": true,
-  "passingScore": 0
+  "score": $SCORE,
+  "pass": $PASS,
+  "passingScore": $PASSING
 }
-EOF
+JSONEOF
+
+echo ""
+echo "Score: $SCORE / 7  (passingScore 6)  pass: $PASS"
diff --git a/AI_ASSIST.md b/AI_ASSIST.md
@@ -0,0 +1,10 @@
+# AI Assistance
+
+Document how you used an LLM on this assignment: which tool, what you asked,
+and what you accepted or rejected. One bullet per use is enough.
+
+⚠️ Never paste real credentials, personal data, or unapproved company data into an LLM.
+
+- _(tool + what you asked + what you did with the answer)_
+- _(...)_
+- _(...)_
diff --git a/README.md b/README.md
@@ -1,17 +1,51 @@
-# [Track] week X assignment
-HackYourFuture <Track> week X assignment
-The Week X assignment for the HackYourFuture <TRACK> can be found at the following link: [TODO: Assignment url in the learning platform]
+# Data Track: Week 12 Assignment
 
+Week 12 assignment for the [HackYourFuture Data Track](https://www.notion.so/hackyourfuture/Data-Track-Overview).
 
-## Implementation Instructions
+The assignment for this week can be found at: [Week 12: Assignment](https://www.notion.so/hackyourfuture/Assignment-Build-Two-Dashboards)
 
-Provide clear instructions on how trainees should implement the tasks.
+## What you will build
 
-### Task 1
-Instructions for Task 1
+- **Task 1: Metabase dashboard** — three analytical Questions on your dbt mart tables, arranged into a dashboard with a date filter, and a `metric_definitions.md` file documenting each panel.
+- **Task 2: Streamlit engineering dashboard** — a Python app that shows Airflow DAG run status and data freshness from Azure Postgres.
 
-### Task 2
-Instructions for Task 2
+## Getting started
 
-...
+### Task 1: Metabase
 
+1. Log in to the HYF Metabase instance (URL in `task-1/README.md`).
+2. Build three Questions in SQL mode on your `dev_<name>.fct_trips` table.
+3. Arrange them into a Dashboard and add a date-range filter.
+4. Fill in `task-1/metric_definitions.md` for each panel.
+5. Copy the dashboard URL (or take screenshots) into `task-1/README.md`.
+
+### Task 2: Streamlit
+
+1. Open `task-2/app.py` and follow the `TODO` comments.
+2. Copy `.env.example` to `.env` and fill in your credentials (never commit `.env`).
+3. Run `pip install -r task-2/requirements.txt`.
+4. Run `streamlit run task-2/app.py` and verify the panels load with real data.
+5. Push `task-2/` to GitHub.
+
+## Autograder
+
+```bash
+bash .hyf/test.sh
+```
+
+The scaffold returns `pass: false`. It returns `pass: true` once:
+- `task-1/metric_definitions.md` contains at least 3 metric definitions
+- `task-2/app.py` contains `st.cache_data` and no hardcoded passwords
+
+## Submission
+
+Submit the following in the class assignment tracker:
+
+| Item | Required for |
+|---|---|
+| Metabase dashboard link / screenshots | Minimum |
+| `task-2/` GitHub link | Minimum |
+| `task-1/metric_definitions.md` | Minimum |
+| Date filter on ≥2 Questions | Target |
+| Streamlit freshness panel | Target |
+| 5-minute presentation recording link | Target |
diff --git a/task-1/README.md b/task-1/README.md
@@ -0,0 +1,27 @@
+# Task 1: Metabase Analytical Dashboard
+
+## Metabase instance
+
+```text
+https://metabase-hyf.politepebble-abd3ebc2.westeurope.azurecontainerapps.io
+```
+
+Log in with the credentials your teacher provided.
+
+## Your dashboard URL
+
+<!-- Replace the line below with your actual dashboard URL -->
+**Dashboard URL:** _(paste here after building)_
+
+<!-- OR attach screenshots if a public link is unavailable -->
+
+## Screenshots (optional)
+
+Add screenshots here if the public link is not shareable.
+
+## Checklist
+
+- [ ] 3 Questions built in SQL mode on `dev_<name>.fct_trips` or `fct_daily_borough_stats`
+- [ ] Questions arranged in one Dashboard named "NYC Taxi Analytics: [Your Name]"
+- [ ] Date-range filter connected to at least 2 Questions (Target)
+- [ ] `metric_definitions.md` filled in for all panels
diff --git a/task-1/metric_definitions.md b/task-1/metric_definitions.md
@@ -0,0 +1,40 @@
+# Metric Definitions
+
+Document every Metabase panel here using all five fields.
+A metric without a complete definition will not pass the grading criteria.
+
+---
+
+## Metric 1: _(name)_
+
+| Field | Value |
+|---|---|
+| **Name** | _(e.g. `trip_count_by_borough`)_ |
+| **Description** | _(one sentence: what does this number measure, for whom?)_ |
+| **Calculation** | _(the SQL logic or formula: e.g. `COUNT(*) FROM fct_trips GROUP BY pickup_borough`)_ |
+| **Data source** | _(table + schema: e.g. `dev_yourname.fct_trips`)_ |
+| **Refresh frequency** | _(e.g. Daily, after 03:00 UTC Airflow run)_ |
+
+---
+
+## Metric 2: _(name)_
+
+| Field | Value |
+|---|---|
+| **Name** | |
+| **Description** | |
+| **Calculation** | |
+| **Data source** | |
+| **Refresh frequency** | |
+
+---
+
+## Metric 3: _(name)_
+
+| Field | Value |
+|---|---|
+| **Name** | |
+| **Description** | |
+| **Calculation** | |
+| **Data source** | |
+| **Refresh frequency** | |
diff --git a/task-2/.env.example b/task-2/.env.example
@@ -0,0 +1,9 @@
+# Copy this file to .env and fill in your values.
+# .env is in .gitignore — never commit it.
+
+AIRFLOW_URL=https://your-airflow-instance.example.com
+AIRFLOW_USER=admin
+AIRFLOW_PASS=your-airflow-password
+
+# PostgreSQL connection string (format: postgresql://user:password@host:5432/dbname)
+PG_URL=postgresql://pipeline_user:your-pg-password@your-pg-host:5432/team1
diff --git a/task-2/app.py b/task-2/app.py
@@ -0,0 +1,81 @@
+"""
+Week 12 Engineering Dashboard , Streamlit app
+
+Run:  streamlit run app.py
+
+Credentials: copy .env.example to .env and fill in values.
+             Load with: pip install python-dotenv, then call load_dotenv() below.
+             Never hardcode credentials in this file.
+"""
+
+import os
+
+import requests
+import streamlit as st
+
+# TODO: uncomment these two lines after installing python-dotenv
+# from dotenv import load_dotenv
+# load_dotenv()
+
+AIRFLOW_URL = os.environ.get("AIRFLOW_URL", "")
+AIRFLOW_USER = os.environ.get("AIRFLOW_USER", "")
+AIRFLOW_PASS = os.environ.get("AIRFLOW_PASS", "")
+PG_URL = os.environ.get("PG_URL", "")  # postgresql://user:pass@host/db
+
+st.set_page_config(page_title="Pipeline Health", layout="wide")
+st.title("Pipeline Health Dashboard")
+
+
+# ── Panel 1: Last DAG run status ─────────────────────────────────────────────
+
+
+@st.cache_data(ttl=60)
+def get_dag_runs(dag_id: str, limit: int = 10) -> list:
+    """Return recent DAG runs from the Airflow REST API."""
+    # TODO: implement this function
+    # Endpoint: GET {AIRFLOW_URL}/api/v2/dags/{dag_id}/dagRuns
+    # Auth (Airflow 3): POST {AIRFLOW_URL}/auth/token with username/password to
+    #   get an access_token, then send headers={"Authorization": f"Bearer {token}"}.
+    #   Basic Auth (auth=(user, pass)) returns 401 on Airflow 3. Cache the token.
+    # Return: list of run dicts, each with "state", "start_date", "end_date"
+    raise NotImplementedError("TODO: implement get_dag_runs")
+
+
+dag_id = "ingest_taxi_month"  # TODO: on the shared Airflow your DAG id is
+#                                 prefixed: <yourname>_ingest_taxi_month
+
+st.subheader("Last DAG run")
+try:
+    runs = get_dag_runs(dag_id, limit=1)
+    if runs:
+        last = runs[0]
+        state = last["state"]
+        if state == "success":
+            st.success(f"Last run: **{state}**, started {last['start_date']}")
+        elif state == "failed":
+            st.error(f"Last run: **{state}**, check Airflow logs")
+        else:
+            st.warning(f"Last run: **{state}**")
+    else:
+        st.info("No runs found for this DAG.")
+except NotImplementedError:
+    st.warning("Panel 1: implement `get_dag_runs` to show live data.")
+except Exception as exc:
+    st.error(f"Could not reach Airflow: {exc}")
+
+
+# ── Panel 2 (Target): Run duration trend ─────────────────────────────────────
+
+st.subheader("Run duration trend (last 30 runs)")
+# TODO (Target): call get_dag_runs(dag_id, limit=30), compute duration from
+# start_date and end_date, and plot with st.line_chart.
+st.info("TODO (Target): add a line chart of run durations.")
+
+
+# ── Panel 3 (Target): Data freshness from Postgres ───────────────────────────
+
+st.subheader("Data freshness")
+# TODO (Target): query MAX(pickup_datetime) and COUNT(*) from
+# dev_<name>.fct_trips using psycopg2 or sqlalchemy + PG_URL.
+# Display as st.metric widgets.
+st.info("TODO (Target): add freshness metrics from Postgres.")
diff --git a/task-2/requirements.txt b/task-2/requirements.txt
@@ -0,0 +1,4 @@
+streamlit>=1.35.0
+requests>=2.31.0
+psycopg2-binary>=2.9.9
+python-dotenv>=1.0.0