diff --git a/.github/autograder.py b/.github/autograder.py index 3c76050..752d523 100644 --- a/.github/autograder.py +++ b/.github/autograder.py @@ -6,11 +6,13 @@ Reads PYTEST_EXIT_CODE from the environment to know whether tests passed. Output: - - checkmark per check printed to stdout + - ✅/❌ per check printed to stdout - Markdown summary written to $GITHUB_STEP_SUMMARY (if set) - Exit 1 when any *critical* check fails; exit 0 otherwise """ +from __future__ import annotations + import os import re import subprocess @@ -23,27 +25,30 @@ # --------------------------------------------------------------------------- class Result: - def __init__(self, label, passed, critical, detail=""): + def __init__(self, label: str, passed: bool, critical: bool, detail: str = ""): self.label = label self.passed = passed self.critical = critical self.detail = detail @property - def icon(self): + def icon(self) -> str: return "✅" if self.passed else "❌" - def __str__(self): + def __str__(self) -> str: suffix = f" ({self.detail})" if self.detail else "" crit_tag = " [CRITICAL]" if not self.passed and self.critical else "" - return f" {self.icon} {self.label}{crit_tag}{suffix}" + return f"{self.icon} {self.label}{crit_tag}{suffix}" + +Results = List[Result] -def ok(label, critical=False, detail=""): + +def ok(label: str, critical: bool = False, detail: str = "") -> Result: return Result(label, True, critical, detail) -def fail(label, critical=False, detail=""): +def fail(label: str, critical: bool = False, detail: str = "") -> Result: return Result(label, False, critical, detail) @@ -54,12 +59,16 @@ def fail(label, critical=False, detail=""): ROOT = Path(".") -def file_exists(path): +def file_exists(path: str) -> bool: return (ROOT / path).exists() -def grep_src(pattern): - """Search src/ for pattern. Returns list of matching lines, or None if src/ missing.""" +def grep_src(pattern: str, *, invert: bool = False) -> Optional[Tuple[str, ...]]: + """ + Search src/ for *pattern*. + Returns a tuple of matching lines, or None if the directory is missing. + When invert=True, returns lines that do NOT match (grep -v style). + """ src_dir = ROOT / "src" if not src_dir.is_dir(): return None @@ -67,12 +76,14 @@ def grep_src(pattern): regex = re.compile(pattern) for py_file in src_dir.rglob("*.py"): for line in py_file.read_text(errors="replace").splitlines(): - if regex.search(line): + hit = bool(regex.search(line)) + if (hit and not invert) or (not hit and invert): matches.append(line) - return matches + return tuple(matches) -def count_test_functions(): +def count_test_functions() -> int: + """Count `def test_` definitions across all test files.""" count = 0 tests_dir = ROOT / "tests" if not tests_dir.is_dir(): @@ -84,7 +95,8 @@ def count_test_functions(): return count -def readme_is_filled_in(): +def readme_is_filled_in() -> bool: + """Return True if README.md has more than 5 non-heading lines.""" readme = ROOT / "README.md" if not readme.exists(): return False @@ -95,38 +107,77 @@ def readme_is_filled_in(): return len(non_heading) > 5 -def ai_assist_is_filled_in(): - """Return True if AI_ASSIST.md has at least one real 'What I asked:' entry.""" +def ai_assist_is_filled_in() -> bool: + """ + Return True if AI_ASSIST.md contains at least one 'What I asked:' entry + beyond the default stub placeholder. + """ ai_file = ROOT / "AI_ASSIST.md" if not ai_file.exists(): return False content = ai_file.read_text(errors="replace") - entries = re.findall(r"What I asked:\*\*\s*(.+)", content) - # Also match unbolded variant - entries += re.findall(r"What I asked:\s*(.+)", content) - real_entries = [e for e in entries if e.strip() and not e.strip().startswith("<")] + # The stub contains a single placeholder like "What I asked: " + # A real entry would be "What I asked: " followed by actual text (not angle-bracket placeholder) + entries = re.findall(r"What I asked:\s*(.+)", content) + real_entries = [e for e in entries if not e.strip().startswith("<")] return len(real_entries) >= 1 -def env_example_has_empty_secrets(): +def readme_has_architecture_overview() -> bool: + """ + Return True if README.md contains an architecture overview or data-flow description. + Accepts: a mermaid code block, an ASCII diagram (arrows/boxes), or a paragraph + containing 'architecture', 'data flow', or 'pipeline' followed by directional words. + """ + readme = ROOT / "README.md" + if not readme.exists(): + return False + content = readme.read_text(errors="replace") + # mermaid block + if "```mermaid" in content: + return True + # ASCII diagram indicators (arrows between words) + if re.search(r"[→←↓↑]|->|-->|\|.*\|", content): + return True + # Explicit architecture/flow section heading or paragraph + if re.search(r"(?i)(##\s*(architecture|data.?flow|pipeline.?flow)|architecture\s*overview)", content): + return True + return False + + +def env_file_not_committed() -> bool: + """Return True if .env is NOT present in the working tree (not committed).""" + return not (ROOT / ".env").exists() + + +def env_example_has_empty_secrets() -> bool: + """ + .env.example lines for POSTGRES_URL and AZURE_STORAGE_CONNECTION_STRING + must have empty (or absent) values. + """ env_example = ROOT / ".env.example" if not env_example.exists(): - return True + return True # checked separately for line in env_example.read_text(errors="replace").splitlines(): for key in ("POSTGRES_URL", "AZURE_STORAGE_CONNECTION_STRING"): if line.startswith(f"{key}="): - value = line[len(f"{key}="):].strip().strip("\"'") + value = line[len(f"{key}="):].strip() + # Strip surrounding quotes + value = value.strip("\"'") if value: return False return True -def no_hardcoded_secrets(): - """Return (clean, detail). clean=True means no secrets found.""" +def no_hardcoded_secrets() -> Tuple[bool, str]: + """ + Scan src/ for patterns that look like hardcoded credentials. + Returns (clean, detail_string). + """ patterns = [ - r"postgres://\w+:\w+@", - r"DefaultEndpointsProtocol=https;AccountKey=", - r"AccountKey=[A-Za-z0-9+/]{40,}", + r"postgres://\w+:\w+@", # postgres DSN with credentials + r"DefaultEndpointsProtocol=https;AccountKey=", # Azure storage conn string + r"AccountKey=[A-Za-z0-9+/]{40,}", # raw account key ] hits = [] src_dir = ROOT / "src" @@ -136,19 +187,24 @@ def no_hardcoded_secrets(): text = py_file.read_text(errors="replace") for pat in patterns: if re.search(pat, text): - hits.append(f"{py_file.name}: matched pattern") + hits.append(f"{py_file}: matched '{pat}'") if hits: return False, "; ".join(hits[:3]) return True, "" -def no_bare_print_in_src(): +def no_bare_print_in_src() -> bool: + """ + Return True if src/ contains no bare `print(` calls. + We allow `print` in tests but not in src/. + """ src_dir = ROOT / "src" if not src_dir.is_dir(): return True for py_file in src_dir.rglob("*.py"): for line in py_file.read_text(errors="replace").splitlines(): stripped = line.lstrip() + # Allow commented lines and logging.info style if stripped.startswith("#"): continue if re.search(r"\bprint\s*\(", line): @@ -156,7 +212,8 @@ def no_bare_print_in_src(): return True -def merge_commit_count(): +def merge_commit_count() -> int: + """Count merge commits on main using git log.""" try: result = subprocess.run( ["git", "log", "--merges", "main", "--oneline"], @@ -164,26 +221,32 @@ def merge_commit_count(): text=True, timeout=30, ) - return len([l for l in result.stdout.splitlines() if l.strip()]) + lines = [l for l in result.stdout.splitlines() if l.strip()] + return len(lines) except Exception: return 0 -def has_non_autograder_workflow(): +def has_non_autograder_workflow() -> bool: + """Return True if .github/workflows/ has at least one yml that isn't autograder.yml.""" wf_dir = ROOT / ".github" / "workflows" if not wf_dir.is_dir(): return False - return bool([f for f in wf_dir.glob("*.yml") if f.name != "autograder.yml"]) + ymls = [ + f for f in wf_dir.glob("*.yml") + if f.name != "autograder.yml" + ] + return len(ymls) > 0 # --------------------------------------------------------------------------- -# Check groups +# Check runners # --------------------------------------------------------------------------- -def check_file_structure(): +def check_file_structure() -> Results: results = [] - for path, critical in [ + required_files = [ ("Dockerfile", True), ("pyproject.toml", True), ("uv.lock", False), @@ -195,148 +258,218 @@ def check_file_structure(): ("src/pipeline.py", False), ("src/models.py", False), ("src/storage.py", False), - ]: + ] + + for path, critical in required_files: label = f"File exists: {path}" - results.append(ok(label, critical) if file_exists(path) else fail(label, critical)) + if file_exists(path): + results.append(ok(label, critical)) + else: + results.append(fail(label, critical)) + # tests/ has at least one test_*.py tests_dir = ROOT / "tests" has_test_file = tests_dir.is_dir() and bool(list(tests_dir.rglob("test_*.py"))) - results.append( - ok("tests/ has at least one test_*.py") if has_test_file - else fail("tests/ has at least one test_*.py") - ) + if has_test_file: + results.append(ok("tests/ has at least one test_*.py")) + else: + results.append(fail("tests/ has at least one test_*.py")) - results.append( - ok("README.md has been filled in") if readme_is_filled_in() - else fail("README.md has been filled in") - ) + # README filled in + if readme_is_filled_in(): + results.append(ok("README.md has been filled in (>5 non-heading lines)")) + else: + results.append(fail("README.md has been filled in (>5 non-heading lines)")) - results.append( - ok("AI_ASSIST.md has been filled in") if ai_assist_is_filled_in() - else fail("AI_ASSIST.md has been filled in") - ) + # AI_ASSIST filled in + if ai_assist_is_filled_in(): + results.append(ok("AI_ASSIST.md has at least one real 'What I asked:' entry")) + else: + results.append(fail("AI_ASSIST.md has at least one real 'What I asked:' entry")) + + # Architecture overview in README (diagram, data-flow description, or mermaid block) + if readme_has_architecture_overview(): + results.append(ok("README.md contains an architecture overview or data-flow description")) + else: + results.append(fail("README.md contains an architecture overview or data-flow description", + detail="add a diagram (mermaid/ASCII) or a short description of the data flow")) return results -def check_code_patterns(): +def check_code_patterns() -> Results: results = [] + # HTTP library: proxy for "uses a live external API" + http_lines = grep_src(r"import requests|from requests|import httpx|from httpx|import aiohttp|from aiohttp") + if http_lines is None: + results.append(fail("src/ exists", critical=True)) + elif http_lines: + results.append(ok("HTTP library imported in src/ (requests / httpx / aiohttp)", critical=True)) + else: + results.append(fail("HTTP library imported in src/ (requests / httpx / aiohttp)", critical=True, + detail="no requests/httpx/aiohttp found; pipeline must use a live external API")) + + # pandas import pandas_lines = grep_src(r"import pandas|from pandas") - if pandas_lines is None: - results.append(fail("src/ directory exists", critical=True)) - elif pandas_lines: + if pandas_lines: results.append(ok("pandas imported in src/", critical=True)) else: results.append(fail("pandas imported in src/", critical=True)) - pydantic_lines = grep_src(r"BaseModel|import pydantic|from pydantic") - results.append( - ok("Pydantic BaseModel used in src/", critical=True) if pydantic_lines - else fail("Pydantic BaseModel used in src/", critical=True) + # pandas real transform work (beyond a bare DataFrame constructor) + transform_lines = grep_src( + r"pd\.to_datetime|\.dt\.|\.groupby\(|\.agg\(|\.merge\(|\.assign\(|\.rename\(|\.fillna\(|\.dropna\(" ) + if transform_lines: + results.append(ok("pandas transform operations present (groupby/agg/to_datetime/etc.)", critical=True)) + else: + results.append(fail( + "pandas transform operations present (groupby/agg/to_datetime/etc.)", + critical=True, + detail="wrapping records in pd.DataFrame() without transforming doesn't count", + )) + + # pydantic / BaseModel + pydantic_lines = grep_src(r"BaseModel|import pydantic|from pydantic") + if pydantic_lines: + results.append(ok("Pydantic / BaseModel used in src/", critical=True)) + else: + results.append(fail("Pydantic / BaseModel used in src/", critical=True)) + # sys.exit (fail-fast on missing env vars) sys_exit_lines = grep_src(r"\bsys\.exit\b") - results.append( - ok("sys.exit present in src/ (fail-fast on missing env vars)") if sys_exit_lines - else fail("sys.exit present in src/ (fail-fast on missing env vars)") - ) + if sys_exit_lines: + results.append(ok("sys.exit present in src/ (fail-fast pattern)")) + else: + results.append(fail("sys.exit present in src/ (fail-fast pattern)")) - results.append( - ok("No bare print() in src/ (logging used instead)") if no_bare_print_in_src() - else fail("No bare print() in src/ (logging used instead)") - ) + # no bare print() in src/ + if no_bare_print_in_src(): + results.append(ok("No bare print() in src/ (logging used instead)")) + else: + results.append(fail("No bare print() in src/ (logging used instead)")) return results -def check_security(): +def check_security() -> Results: results = [] - results.append( - ok(".env not committed", critical=True) if not (ROOT / ".env").exists() - else fail(".env not committed", critical=True, detail=".env found in working tree") - ) + # .env not committed + if env_file_not_committed(): + results.append(ok(".env not committed to the repo", critical=True)) + else: + results.append(fail(".env not committed to the repo", critical=True, + detail=".env file found in working tree")) - results.append( - ok(".env.example has empty values for secret keys") if env_example_has_empty_secrets() - else fail(".env.example has empty values for secret keys") - ) + # .env.example has empty secret values + if env_example_has_empty_secrets(): + results.append(ok(".env.example has empty values for secret keys")) + else: + results.append(fail(".env.example has empty values for secret keys", + detail="POSTGRES_URL or AZURE_STORAGE_CONNECTION_STRING appear to have values")) + # no hardcoded secrets in src/ clean, detail = no_hardcoded_secrets() - results.append( - ok("No hardcoded secrets in src/", critical=True) if clean - else fail("No hardcoded secrets in src/", critical=True, detail=detail) - ) + if clean: + results.append(ok("No hardcoded secrets in src/", critical=True)) + else: + results.append(fail("No hardcoded secrets in src/", critical=True, detail=detail)) return results -def check_tests(pytest_exit_code): +def check_tests(pytest_exit_code: int) -> Results: results = [] fn_count = count_test_functions() - results.append( - ok(f"At least 2 test functions found ({fn_count} total)") if fn_count >= 2 - else fail(f"At least 2 test functions found (only {fn_count} found)") - ) + if fn_count >= 2: + results.append(ok(f"At least 2 test functions found ({fn_count} total)")) + else: + results.append(fail(f"At least 2 test functions found ({fn_count} found)")) - results.append( - ok("pytest passes", critical=True) if pytest_exit_code == 0 - else fail("pytest passes", critical=True, detail=f"exit code {pytest_exit_code}") - ) + if pytest_exit_code == 0: + results.append(ok("pytest passes", critical=True)) + else: + results.append(fail("pytest passes", critical=True, + detail=f"pytest exited with code {pytest_exit_code}")) return results -def check_cicd(): +def check_cicd() -> Results: results = [] - results.append( - ok("CI workflow exists (.github/workflows/)") if has_non_autograder_workflow() - else fail("CI workflow exists (.github/workflows/)") - ) + + if has_non_autograder_workflow(): + results.append(ok(".github/workflows/ has a student-authored workflow")) + else: + results.append(fail(".github/workflows/ has a student-authored workflow")) + return results -def check_git_workflow(): +def check_git_workflow() -> Results: results = [] + merge_count = merge_commit_count() - results.append( - ok(f"At least 3 PRs merged ({merge_count} merge commits on main)") if merge_count >= 3 - else fail(f"At least 3 PRs merged ({merge_count} merge commits found)") - ) + if merge_count >= 3: + results.append(ok(f"At least 3 merge commits on main ({merge_count} found)")) + else: + results.append(fail(f"At least 3 merge commits on main ({merge_count} found)", + detail="expected pull-request-based workflow with feature branches")) + return results # --------------------------------------------------------------------------- -# Manual checklist +# Manual checklist (printed but never fails the grade) # --------------------------------------------------------------------------- MANUAL_CHECKS = [ - "Docker image builds and runs locally (`docker run --env-file .env`)", - "Transform does real work: pipeline.py calls at least one of parse_dates / dropna / fillna / rename / assign / a new derived column (not just pd.DataFrame(records) → storage)", - "Azure ACR: image exists with a tagged version", - "Azure: Container App Job created with --trigger-type Schedule --cron-expression", - "Azure: job ran successfully (execution history shows Succeeded)", - "Azure: job output verifiable (rows in Postgres AND blobs in Blob Storage)", - "Azure: job uses --registry-server, --replica-timeout 300, --env-vars", - "Cleanup: Container App Job deleted after evaluation", + "Docker image builds and runs locally", + "Azure Container Registry image exists with a tagged version", + "Container App Job created in the shared environment", + "Job ran successfully (execution history shows Succeeded)", + "Job output verifiable (rows in Postgres or blobs in storage)", + "Job uses --registry-server, --replica-timeout 300, --env-vars", + "Container App Job deleted after evaluation", ] # --------------------------------------------------------------------------- -# Markdown summary +# Output helpers # --------------------------------------------------------------------------- -def build_markdown(all_results, score, total): +def print_section(title: str, results: Results) -> None: + print(f"\n{'─' * 60}") + print(f" {title}") + print(f"{'─' * 60}") + for r in results: + print(r) + + +def build_markdown_summary(all_results: Results, score: int, total: int) -> str: lines = [ "# Week 7 Autograder Results", "", - f"**Automated checks: {score}/{total} passed**", + f"**Score: {score}/{total} automated checks passed**", "", - "| # | Check | Result |", - "|---|-------|--------|", ] + + sections = { + "File structure": [], + "Code patterns": [], + "Security": [], + "Tests": [], + "CI/CD": [], + "Git workflow": [], + } + + # We re-use the grouped results passed in by the caller via all_results. + # Since we don't have section metadata here, we print a flat table. + lines.append("| # | Check | Result |") + lines.append("|---|-------|--------|") for i, r in enumerate(all_results, 1): status = "✅ Pass" if r.passed else "❌ Fail" crit = " *(critical)*" if r.critical and not r.passed else "" @@ -348,8 +481,8 @@ def build_markdown(all_results, score, total): "## Manual checks (teacher review required)", "", ] - for c in MANUAL_CHECKS: - lines.append(f"- [ ] {c}") + for check in MANUAL_CHECKS: + lines.append(f"- [ ] {check}") return "\n".join(lines) @@ -358,17 +491,37 @@ def build_markdown(all_results, score, total): # Main # --------------------------------------------------------------------------- -def main(): +def is_template_stub() -> bool: + """Return True when running inside the unmodified starter template. + + The template's src/pipeline.py contains a placeholder comment that + students are expected to replace. If it's still there, this is the + template repo, not a student submission. Skip grading. + """ + pipeline = ROOT / "src" / "pipeline.py" + if not pipeline.exists(): + return False + return "# TODO: Replace with your API call" in pipeline.read_text(errors="replace") + + +def main() -> int: + pytest_exit_code_str = os.environ.get("PYTEST_EXIT_CODE", "1") try: - pytest_exit_code = int(os.environ.get("PYTEST_EXIT_CODE", "1")) + pytest_exit_code = int(pytest_exit_code_str) except ValueError: pytest_exit_code = 1 print("=" * 60) - print(" HackYourFuture — Week 7 Autograder") + print(" HackYourFuture: Week 7 Autograder") print("=" * 60) - grouped = [ + if is_template_stub(): + print("\n Template repo detected: skipping grading.") + print(" Replace the stubs in src/ before the autograder runs.") + print("=" * 60) + return 0 + + grouped: List[Tuple[str, Results]] = [ ("File structure", check_file_structure()), ("Code patterns", check_code_patterns()), ("Security", check_security()), @@ -377,13 +530,9 @@ def main(): ("Git workflow", check_git_workflow()), ] - all_results = [] + all_results: Results = [] for section_name, results in grouped: - print(f"\n{'─' * 60}") - print(f" {section_name}") - print(f"{'─' * 60}") - for r in results: - print(r) + print_section(section_name, results) all_results.extend(results) passed = sum(1 for r in all_results if r.passed) @@ -391,26 +540,31 @@ def main(): critical_failures = [r for r in all_results if not r.passed and r.critical] print(f"\n{'=' * 60}") - print(f" Automated checks: {passed}/{total} passed") + print(f" Score: {passed}/{total} automated checks passed") if critical_failures: - print(f"\n Critical failures:") + print(f"\n Critical failures ({len(critical_failures)}):") for r in critical_failures: print(f" {r.icon} {r.label}") print(f"{'=' * 60}") - print("\n Manual checks (teacher review required):") - for c in MANUAL_CHECKS: - print(f" [ ] {c}") + # Manual checklist + print("\n📋 Manual checks (teacher review required):") + for check in MANUAL_CHECKS: + print(f" ☐ {check}") + # Write GitHub Step Summary summary_path = os.environ.get("GITHUB_STEP_SUMMARY") if summary_path: try: with open(summary_path, "w") as fh: - fh.write(build_markdown(all_results, passed, total)) + fh.write(build_markdown_summary(all_results, passed, total)) except OSError as exc: print(f"\nWarning: could not write step summary: {exc}", file=sys.stderr) - return 1 if critical_failures else 0 + # Exit 1 only when critical checks fail + if critical_failures: + return 1 + return 0 if __name__ == "__main__": diff --git a/src/pipeline.py b/src/pipeline.py index 0d19b1e..8492785 100644 --- a/src/pipeline.py +++ b/src/pipeline.py @@ -4,16 +4,15 @@ import os import sys -from dotenv import load_dotenv - -load_dotenv() - import pandas as pd +from dotenv import load_dotenv from pydantic import ValidationError from src.models import WeatherReading from src.storage import insert_readings, upload_raw_json +load_dotenv() + logging.basicConfig( level=os.getenv("LOG_LEVEL", "INFO"), format="%(asctime)s %(levelname)s %(message)s",