From bdf339156a2d2c55ffa490b39d2f084bf93fc08d Mon Sep 17 00:00:00 2001 From: Lasse Benninga Date: Tue, 9 Jun 2026 17:27:33 +0200 Subject: [PATCH 1/9] scaffold: README.md --- README.md | 54 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 96ce7bc..f73bc5f 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,51 @@ -# [Track] week X assignment -HackYourFuture week X assignment -The Week X assignment for the HackYourFuture can be found at the following link: [TODO: Assignment url in the learning platform] +# Data Track: Week 12 Assignment +Week 12 assignment for the [HackYourFuture Data Track](https://www.notion.so/hackyourfuture/Data-Track-Overview). -## Implementation Instructions +The assignment for this week can be found at: [Week 12: Assignment](https://www.notion.so/hackyourfuture/Assignment-Build-Two-Dashboards) -Provide clear instructions on how trainees should implement the tasks. +## What you will build -### Task 1 -Instructions for Task 1 +- **Task 1: Metabase dashboard** — three analytical Questions on your dbt mart tables, arranged into a dashboard with a date filter, and a `metric_definitions.md` file documenting each panel. +- **Task 2: Streamlit engineering dashboard** — a Python app that shows Airflow DAG run status and data freshness from Azure Postgres. -### Task 2 -Instructions for Task 2 +## Getting started -... +### Task 1: Metabase +1. Log in to the HYF Metabase instance (URL in `task-1/README.md`). +2. Build three Questions in SQL mode on your `dev_.fct_trips` table. +3. Arrange them into a Dashboard and add a date-range filter. +4. Fill in `task-1/metric_definitions.md` for each panel. +5. Copy the dashboard URL (or take screenshots) into `task-1/README.md`. + +### Task 2: Streamlit + +1. Open `task-2/app.py` and follow the `TODO` comments. +2. Copy `.env.example` to `.env` and fill in your credentials (never commit `.env`). +3. Run `pip install -r task-2/requirements.txt`. +4. Run `streamlit run task-2/app.py` and verify the panels load with real data. +5. Push `task-2/` to GitHub. + +## Autograder + +```bash +bash .hyf/test.sh +``` + +The scaffold returns `pass: false`. It returns `pass: true` once: +- `task-1/metric_definitions.md` contains at least 3 metric definitions +- `task-2/app.py` contains `st.cache_data` and no hardcoded passwords + +## Submission + +Submit the following in the class assignment tracker: + +| Item | Required for | +|---|---| +| Metabase dashboard link / screenshots | Minimum | +| `task-2/` GitHub link | Minimum | +| `task-1/metric_definitions.md` | Minimum | +| Date filter on ≥2 Questions | Target | +| Streamlit freshness panel | Target | +| 5-minute presentation recording link | Target | From ede2b5b2ab9b8f5a962fbac58816753fbcd12679 Mon Sep 17 00:00:00 2001 From: Lasse Benninga Date: Tue, 9 Jun 2026 17:27:43 +0200 Subject: [PATCH 2/9] scaffold: task-1/README.md --- task-1/README.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 task-1/README.md diff --git a/task-1/README.md b/task-1/README.md new file mode 100644 index 0000000..b0b39e9 --- /dev/null +++ b/task-1/README.md @@ -0,0 +1,27 @@ +# Task 1: Metabase Analytical Dashboard + +## Metabase instance + +```text +https://metabase-hyf.politepebble-abd3ebc2.westeurope.azurecontainerapps.io +``` + +Log in with the credentials your teacher provided. + +## Your dashboard URL + + +**Dashboard URL:** _(paste here after building)_ + + + +## Screenshots (optional) + +Add screenshots here if the public link is not shareable. + +## Checklist + +- [ ] 3 Questions built in SQL mode on `dev_.fct_trips` or `fct_daily_borough_stats` +- [ ] Questions arranged in one Dashboard named "NYC Taxi Analytics: [Your Name]" +- [ ] Date-range filter connected to at least 2 Questions (Target) +- [ ] `metric_definitions.md` filled in for all panels From c669e289c0c4728cf798b44d35b6b6f66297d8d8 Mon Sep 17 00:00:00 2001 From: Lasse Benninga Date: Tue, 9 Jun 2026 17:27:54 +0200 Subject: [PATCH 3/9] scaffold: task-1/metric_definitions.md --- task-1/metric_definitions.md | 40 ++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 task-1/metric_definitions.md diff --git a/task-1/metric_definitions.md b/task-1/metric_definitions.md new file mode 100644 index 0000000..bfe1acb --- /dev/null +++ b/task-1/metric_definitions.md @@ -0,0 +1,40 @@ +# Metric Definitions + +Document every Metabase panel here using all five fields. +A metric without a complete definition will not pass the grading criteria. + +--- + +## Metric 1: _(name)_ + +| Field | Value | +|---|---| +| **Name** | _(e.g. `trip_count_by_borough`)_ | +| **Description** | _(one sentence: what does this number measure, for whom?)_ | +| **Calculation** | _(the SQL logic or formula: e.g. `COUNT(*) FROM fct_trips GROUP BY pickup_borough`)_ | +| **Data source** | _(table + schema: e.g. `dev_yourname.fct_trips`)_ | +| **Refresh frequency** | _(e.g. Daily, after 03:00 UTC Airflow run)_ | + +--- + +## Metric 2: _(name)_ + +| Field | Value | +|---|---| +| **Name** | | +| **Description** | | +| **Calculation** | | +| **Data source** | | +| **Refresh frequency** | | + +--- + +## Metric 3: _(name)_ + +| Field | Value | +|---|---| +| **Name** | | +| **Description** | | +| **Calculation** | | +| **Data source** | | +| **Refresh frequency** | | From 1a458848dce1fd1fcbc55221fd8f990acec7805e Mon Sep 17 00:00:00 2001 From: Lasse Benninga Date: Tue, 9 Jun 2026 17:28:12 +0200 Subject: [PATCH 4/9] scaffold: task-2/app.py --- task-2/app.py | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 task-2/app.py diff --git a/task-2/app.py b/task-2/app.py new file mode 100644 index 0000000..3c3e0d9 --- /dev/null +++ b/task-2/app.py @@ -0,0 +1,78 @@ +""" +Week 12 Engineering Dashboard — Streamlit app + +Run: streamlit run app.py + +Credentials: copy .env.example to .env and fill in values. + Load with: pip install python-dotenv, then call load_dotenv() below. + Never hardcode credentials in this file. +""" + +import os + +import requests +import streamlit as st + +# TODO: uncomment these two lines after installing python-dotenv +# from dotenv import load_dotenv +# load_dotenv() + +AIRFLOW_URL = os.environ.get("AIRFLOW_URL", "") +AIRFLOW_USER = os.environ.get("AIRFLOW_USER", "") +AIRFLOW_PASS = os.environ.get("AIRFLOW_PASS", "") +PG_URL = os.environ.get("PG_URL", "") # postgresql://user:pass@host/db + +st.set_page_config(page_title="Pipeline Health", layout="wide") +st.title("Pipeline Health Dashboard") + + +# ── Panel 1: Last DAG run status ───────────────────────────────────────────── + + +@st.cache_data(ttl=60) +def get_dag_runs(dag_id: str, limit: int = 10) -> list: + """Return recent DAG runs from the Airflow REST API.""" + # TODO: implement this function + # Endpoint: GET {AIRFLOW_URL}/api/v1/dags/{dag_id}/dagRuns + # Auth: requests.get(..., auth=(AIRFLOW_USER, AIRFLOW_PASS)) + # Return: list of run dicts, each with "state", "start_date", "end_date" + raise NotImplementedError("TODO: implement get_dag_runs") + + +dag_id = "ingest_taxi_month" # TODO: update with your actual DAG id + +st.subheader("Last DAG run") +try: + runs = get_dag_runs(dag_id, limit=1) + if runs: + last = runs[0] + state = last["state"] + if state == "success": + st.success(f"Last run: **{state}** — started {last['start_date']}") + elif state == "failed": + st.error(f"Last run: **{state}** — check Airflow logs") + else: + st.warning(f"Last run: **{state}**") + else: + st.info("No runs found for this DAG.") +except NotImplementedError: + st.warning("Panel 1: implement `get_dag_runs` to show live data.") +except Exception as exc: + st.error(f"Could not reach Airflow: {exc}") + + +# ── Panel 2 (Target): Run duration trend ───────────────────────────────────── + +st.subheader("Run duration trend (last 30 runs)") +# TODO (Target): call get_dag_runs(dag_id, limit=30), compute duration from +# start_date and end_date, and plot with st.line_chart. +st.info("TODO (Target): add a line chart of run durations.") + + +# ── Panel 3 (Target): Data freshness from Postgres ─────────────────────────── + +st.subheader("Data freshness") +# TODO (Target): query MAX(pickup_datetime) and COUNT(*) from +# dev_.fct_trips using psycopg2 or sqlalchemy + PG_URL. +# Display as st.metric widgets. +st.info("TODO (Target): add freshness metrics from Postgres.") From 9327106a540b6b5ea9886c74b4483ea8ddf25dbd Mon Sep 17 00:00:00 2001 From: Lasse Benninga Date: Tue, 9 Jun 2026 17:28:20 +0200 Subject: [PATCH 5/9] scaffold: task-2/requirements.txt --- task-2/requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 task-2/requirements.txt diff --git a/task-2/requirements.txt b/task-2/requirements.txt new file mode 100644 index 0000000..2cf1fd3 --- /dev/null +++ b/task-2/requirements.txt @@ -0,0 +1,4 @@ +streamlit>=1.35.0 +requests>=2.31.0 +psycopg2-binary>=2.9.9 +python-dotenv>=1.0.0 From 6af2af2843634602936a5926d43e90fd3db5ee06 Mon Sep 17 00:00:00 2001 From: Lasse Benninga Date: Tue, 9 Jun 2026 17:28:22 +0200 Subject: [PATCH 6/9] scaffold: task-2/.env.example --- task-2/.env.example | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 task-2/.env.example diff --git a/task-2/.env.example b/task-2/.env.example new file mode 100644 index 0000000..affce79 --- /dev/null +++ b/task-2/.env.example @@ -0,0 +1,9 @@ +# Copy this file to .env and fill in your values. +# .env is in .gitignore — never commit it. + +AIRFLOW_URL=https://your-airflow-instance.example.com +AIRFLOW_USER=admin +AIRFLOW_PASS=your-airflow-password + +# PostgreSQL connection string (format: postgresql://user:password@host:5432/dbname) +PG_URL=postgresql://pipeline_user:your-pg-password@your-pg-host:5432/team1 From 66eb2becc127c5f298207d06b11ff721d0637b98 Mon Sep 17 00:00:00 2001 From: Lasse Benninga Date: Tue, 9 Jun 2026 17:28:39 +0200 Subject: [PATCH 7/9] scaffold: .hyf/test.sh --- .hyf/test.sh | 84 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 76 insertions(+), 8 deletions(-) diff --git a/.hyf/test.sh b/.hyf/test.sh index ee037fc..365dcbc 100644 --- a/.hyf/test.sh +++ b/.hyf/test.sh @@ -1,13 +1,81 @@ #!/usr/bin/env bash set -euo pipefail -# Run your test scripts here. -# Auto grade tool will execute this file within the .hyf working directory. -# The result should be stored in score.json file with the format shown below. -cat << EOF > score.json +# Week 12 autograder +# Checks: +# 1. metric_definitions.md has at least 3 metric entries +# 2. app.py uses @st.cache_data +# 3. app.py does not contain hardcoded passwords (heuristic: no "password" = "...") + +PASS=true +SCORE=0 + +check() { + local description="$1" + local result="$2" + if [ "$result" = "true" ]; then + echo " PASS $description" + SCORE=$((SCORE + 1)) + else + echo " FAIL $description" + PASS=false + fi +} + +METRIC_FILE="../task-1/metric_definitions.md" +APP_FILE="../task-2/app.py" + +# Check 1: metric definitions file exists +if [ -f "$METRIC_FILE" ]; then + check "metric_definitions.md exists" "true" +else + check "metric_definitions.md exists" "false" +fi + +# Check 2: at least 3 metric sections +METRIC_COUNT=$(grep -c "^## Metric" "$METRIC_FILE" 2>/dev/null || echo 0) +if [ "$METRIC_COUNT" -ge 3 ]; then + check "metric_definitions.md has ≥3 metrics" "true" +else + check "metric_definitions.md has ≥3 metrics (found $METRIC_COUNT)" "false" +fi + +# Check 3: at least 3 metrics have a non-empty Name field +FILLED=$(grep -A1 "\*\*Name\*\*" "$METRIC_FILE" 2>/dev/null | grep -v "^--$" | grep -v "Name" | grep -v "^$" | grep -v "e.g\." | wc -l | tr -d ' ') +if [ "$FILLED" -ge 3 ]; then + check "≥3 metric Name fields are filled" "true" +else + check "≥3 metric Name fields are filled (found $FILLED)" "false" +fi + +# Check 4: app.py exists +if [ -f "$APP_FILE" ]; then + check "task-2/app.py exists" "true" +else + check "task-2/app.py exists" "false" +fi + +# Check 5: app.py uses @st.cache_data +if grep -q "st.cache_data" "$APP_FILE" 2>/dev/null; then + check "app.py uses @st.cache_data" "true" +else + check "app.py uses @st.cache_data" "false" +fi + +# Check 6: app.py does not hardcode a password (heuristic) +if grep -qE 'password\s*=\s*"[^"]+"' "$APP_FILE" 2>/dev/null; then + check "app.py has no hardcoded password string" "false" +else + check "app.py has no hardcoded password string" "true" +fi + +cat << JSONEOF > score.json { - "score": 0, - "pass": true, - "passingScore": 0 + "score": $SCORE, + "pass": $PASS, + "passingScore": 4 } -EOF +JSONEOF + +echo "" +echo "Score: $SCORE / 6 — pass: $PASS" From 211017454eb5cf1d1c9db896545bbc0cf58098d8 Mon Sep 17 00:00:00 2001 From: Lasse Benninga Date: Thu, 11 Jun 2026 08:17:00 +0200 Subject: [PATCH 8/9] fix(week-12): repair autograder ladder + Airflow 3 API in assignment scaffold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The untouched scaffold previously scored 6/6 and PASSED — every check was a file-presence check the scaffold satisfied. Rebuilt test.sh with work-verifying checks so the scaffold fails (4/7) and a completed Minimum solution passes (7/7): - metric Name fields filled with real (non-placeholder) values - Panel 1 implemented (no `raise NotImplementedError` / `TODO: implement`) - AI_ASSIST.md filled with 3+ real entries - pass is now score >= passingScore (6), not "every check passed" Also: - task-2/app.py: /api/v1 -> /api/v2 (Airflow 3), remove em dashes, note the shared-Airflow _ DAG-id prefix - add AI_ASSIST.md template (Minimum deliverable, matches the chapter) Verified locally: scaffold 4/7 fail, reference solution 7/7 pass. Co-Authored-By: Claude Opus 4.8 (1M context) --- .hyf/test.sh | 95 ++++++++++++++++++++++++--------------------------- AI_ASSIST.md | 10 ++++++ task-2/app.py | 11 +++--- 3 files changed, 60 insertions(+), 56 deletions(-) mode change 100644 => 100755 .hyf/test.sh create mode 100644 AI_ASSIST.md diff --git a/.hyf/test.sh b/.hyf/test.sh old mode 100644 new mode 100755 index 365dcbc..1d04ac9 --- a/.hyf/test.sh +++ b/.hyf/test.sh @@ -1,81 +1,74 @@ #!/usr/bin/env bash -set -euo pipefail +set -uo pipefail -# Week 12 autograder -# Checks: -# 1. metric_definitions.md has at least 3 metric entries -# 2. app.py uses @st.cache_data -# 3. app.py does not contain hardcoded passwords (heuristic: no "password" = "...") +# Week 12 autograder. +# Ladder: the untouched scaffold must FAIL; a completed Minimum solution passes. +# Work-verifying checks (not mere file presence): +# - metric Names filled with real (non-placeholder) values +# - Panel 1 implemented (no NotImplementedError / TODO: implement left) +# - AI_ASSIST.md filled with real content -PASS=true +PASSING=6 SCORE=0 check() { - local description="$1" - local result="$2" - if [ "$result" = "true" ]; then - echo " PASS $description" - SCORE=$((SCORE + 1)) - else - echo " FAIL $description" - PASS=false - fi + if [ "$2" = "true" ]; then echo " PASS $1"; SCORE=$((SCORE + 1)); + else echo " FAIL $1"; fi } METRIC_FILE="../task-1/metric_definitions.md" APP_FILE="../task-2/app.py" +AI_FILE="../AI_ASSIST.md" -# Check 1: metric definitions file exists -if [ -f "$METRIC_FILE" ]; then - check "metric_definitions.md exists" "true" -else - check "metric_definitions.md exists" "false" -fi +# 1. metric_definitions.md exists +[ -f "$METRIC_FILE" ] && check "metric_definitions.md exists" "true" || check "metric_definitions.md exists" "false" -# Check 2: at least 3 metric sections -METRIC_COUNT=$(grep -c "^## Metric" "$METRIC_FILE" 2>/dev/null || echo 0) -if [ "$METRIC_COUNT" -ge 3 ]; then - check "metric_definitions.md has ≥3 metrics" "true" -else - check "metric_definitions.md has ≥3 metrics (found $METRIC_COUNT)" "false" -fi +# 2. >=3 metric Name fields filled with a REAL value (not empty, not _(...)_ placeholder) +FILLED=$(grep -E '^\| \*\*Name\*\* \|' "$METRIC_FILE" 2>/dev/null \ + | sed -E 's/^\| \*\*Name\*\* \|([^|]*)\|.*/\1/' \ + | grep -vE '^[[:space:]]*$' | grep -vF '_(' | wc -l | tr -d ' ') +[ "${FILLED:-0}" -ge 3 ] && check "3+ metric Name fields filled" "true" \ + || check "3+ metric Name fields filled (found ${FILLED:-0})" "false" -# Check 3: at least 3 metrics have a non-empty Name field -FILLED=$(grep -A1 "\*\*Name\*\*" "$METRIC_FILE" 2>/dev/null | grep -v "^--$" | grep -v "Name" | grep -v "^$" | grep -v "e.g\." | wc -l | tr -d ' ') -if [ "$FILLED" -ge 3 ]; then - check "≥3 metric Name fields are filled" "true" -else - check "≥3 metric Name fields are filled (found $FILLED)" "false" -fi +# 3. task-2/app.py exists +[ -f "$APP_FILE" ] && check "task-2/app.py exists" "true" || check "task-2/app.py exists" "false" -# Check 4: app.py exists -if [ -f "$APP_FILE" ]; then - check "task-2/app.py exists" "true" +# 4. Panel 1 implemented: no NotImplementedError / TODO: implement remaining +if grep -qE 'raise NotImplementedError|TODO: implement' "$APP_FILE" 2>/dev/null; then + check "Panel 1 implemented (get_dag_runs)" "false" else - check "task-2/app.py exists" "false" + check "Panel 1 implemented (get_dag_runs)" "true" fi -# Check 5: app.py uses @st.cache_data -if grep -q "st.cache_data" "$APP_FILE" 2>/dev/null; then - check "app.py uses @st.cache_data" "true" +# 5. app.py uses @st.cache_data (quality: do not strip caching) +grep -q "st.cache_data" "$APP_FILE" 2>/dev/null \ + && check "app.py uses @st.cache_data" "true" || check "app.py uses @st.cache_data" "false" + +# 6. app.py has no hardcoded password +if grep -qE 'password[[:space:]]*=[[:space:]]*"[^"]+"' "$APP_FILE" 2>/dev/null; then + check "app.py has no hardcoded password" "false" else - check "app.py uses @st.cache_data" "false" + check "app.py has no hardcoded password" "true" fi -# Check 6: app.py does not hardcode a password (heuristic) -if grep -qE 'password\s*=\s*"[^"]+"' "$APP_FILE" 2>/dev/null; then - check "app.py has no hardcoded password string" "false" +# 7. AI_ASSIST.md filled: >=3 real content lines (exclude blanks, headings, placeholders) +if [ -f "$AI_FILE" ]; then + AI_LINES=$(grep -vE '^[[:space:]]*$|^#|^⚠️|^Document' "$AI_FILE" | grep -vF '_(' | wc -l | tr -d ' ') + [ "${AI_LINES:-0}" -ge 3 ] && check "AI_ASSIST.md documents LLM usage" "true" \ + || check "AI_ASSIST.md documents LLM usage (needs 3+ entries)" "false" else - check "app.py has no hardcoded password string" "true" + check "AI_ASSIST.md exists" "false" fi -cat << JSONEOF > score.json +if [ "$SCORE" -ge "$PASSING" ]; then PASS=true; else PASS=false; fi + +cat > score.json < list: """Return recent DAG runs from the Airflow REST API.""" # TODO: implement this function - # Endpoint: GET {AIRFLOW_URL}/api/v1/dags/{dag_id}/dagRuns + # Endpoint: GET {AIRFLOW_URL}/api/v2/dags/{dag_id}/dagRuns # Auth: requests.get(..., auth=(AIRFLOW_USER, AIRFLOW_PASS)) # Return: list of run dicts, each with "state", "start_date", "end_date" raise NotImplementedError("TODO: implement get_dag_runs") -dag_id = "ingest_taxi_month" # TODO: update with your actual DAG id +dag_id = "ingest_taxi_month" # TODO: on the shared Airflow your DAG id is +# prefixed: _ingest_taxi_month st.subheader("Last DAG run") try: @@ -48,9 +49,9 @@ def get_dag_runs(dag_id: str, limit: int = 10) -> list: last = runs[0] state = last["state"] if state == "success": - st.success(f"Last run: **{state}** — started {last['start_date']}") + st.success(f"Last run: **{state}**, started {last['start_date']}") elif state == "failed": - st.error(f"Last run: **{state}** — check Airflow logs") + st.error(f"Last run: **{state}**, check Airflow logs") else: st.warning(f"Last run: **{state}**") else: From 4f132b92d26411b483f91ddcc684a116b0de1c25 Mon Sep 17 00:00:00 2001 From: Lasse Benninga Date: Thu, 11 Jun 2026 16:00:41 +0200 Subject: [PATCH 9/9] fix(week-12): assignment task-2 TODO describes Airflow 3 token auth The shared Airflow 3.2 VM rejects Basic Auth (401 on /api/v2, verified live). Updated the get_dag_runs TODO to the token flow: POST /auth/token for a JWT, then Authorization: Bearer. Stub stays NotImplementedError (autograder ladder unchanged: scaffold still fails). Co-Authored-By: Claude Opus 4.8 (1M context) --- task-2/app.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/task-2/app.py b/task-2/app.py index e94e8cd..7df7c3d 100644 --- a/task-2/app.py +++ b/task-2/app.py @@ -34,7 +34,9 @@ def get_dag_runs(dag_id: str, limit: int = 10) -> list: """Return recent DAG runs from the Airflow REST API.""" # TODO: implement this function # Endpoint: GET {AIRFLOW_URL}/api/v2/dags/{dag_id}/dagRuns - # Auth: requests.get(..., auth=(AIRFLOW_USER, AIRFLOW_PASS)) + # Auth (Airflow 3): POST {AIRFLOW_URL}/auth/token with username/password to + # get an access_token, then send headers={"Authorization": f"Bearer {token}"}. + # Basic Auth (auth=(user, pass)) returns 401 on Airflow 3. Cache the token. # Return: list of run dicts, each with "state", "start_date", "end_date" raise NotImplementedError("TODO: implement get_dag_runs")