diff --git a/.codeboarding/analysis.json b/.codeboarding/analysis.json index a7d7e4f..04dceb1 100644 --- a/.codeboarding/analysis.json +++ b/.codeboarding/analysis.json @@ -1,20 +1,20 @@ { "metadata": { - "generated_at": "2026-06-11T16:41:16.710439+00:00", - "commit_hash": "ba5d7787eaecfa38964dd4663606927d98e501ba", + "generated_at": "2026-06-12T12:22:29.920827+00:00", + "commit_hash": "20bfd68682a5f048e8120701230c98539acff2ca", "repo_name": "CodeBoarding-action", "depth_level": 1, "file_coverage_summary": { - "total_files": 15, - "analyzed": 3, - "not_analyzed": 12, + "total_files": 17, + "analyzed": 4, + "not_analyzed": 13, "not_analyzed_by_reason": { "other": 9, - "codeboardingignore": 3 + "codeboardingignore": 4 } } }, - "description": "The CodeBoarding-action architecture is a CI/CD pipeline that automates architectural observability by comparing codebase versions, generating visual Mermaid.js diffs, and providing actionable IDE deep-links within GitHub Pull Requests.", + "description": "The CodeBoarding-action project implements a pipeline that orchestrates static analysis of code branches, performs structural diffing to generate visual Mermaid.js diagrams, and integrates these insights into GitHub Pull Requests via interactive deep links and editor-specific metadata.", "files": { "scripts/cb_engine.py": { "method_keys": [ @@ -30,6 +30,17 @@ "scripts/cb_engine.py|scripts.cb_engine.main" ] }, + "scripts/build_component_files.py": { + "method_keys": [ + "scripts/build_component_files.py|scripts.build_component_files._walk", + "scripts/build_component_files.py|scripts.build_component_files._subtree_files", + "scripts/build_component_files.py|scripts.build_component_files._subtree_methods", + "scripts/build_component_files.py|scripts.build_component_files._changed_files_for", + "scripts/build_component_files.py|scripts.build_component_files._block", + "scripts/build_component_files.py|scripts.build_component_files.render_component_files", + "scripts/build_component_files.py|scripts.build_component_files.main" + ] + }, "scripts/diff_to_mermaid.py": { "method_keys": [ "scripts/diff_to_mermaid.py|scripts.diff_to_mermaid.load_analysis", @@ -50,7 +61,6 @@ "scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._display_status", "scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._Scope", "scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._Scope.__init__", - "scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._Scope.resolve", "scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._filter_changed", "scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._filter_changed.touches", "scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._init_directive", @@ -66,6 +76,7 @@ "scripts/build_cta.py": { "method_keys": [ "scripts/build_cta.py|scripts.build_cta.detect_editors", + "scripts/build_cta.py|scripts.build_cta.build_webview_link", "scripts/build_cta.py|scripts.build_cta.build_cta", "scripts/build_cta.py|scripts.build_cta.build_cta.link", "scripts/build_cta.py|scripts.build_cta.main" @@ -143,6 +154,55 @@ "end_line": 257, "type": "FUNCTION" }, + "scripts/build_component_files.py|scripts.build_component_files._walk": { + "file_path": "scripts/build_component_files.py", + "qualified_name": "scripts.build_component_files._walk", + "start_line": 56, + "end_line": 62, + "type": "FUNCTION" + }, + "scripts/build_component_files.py|scripts.build_component_files._subtree_files": { + "file_path": "scripts/build_component_files.py", + "qualified_name": "scripts.build_component_files._subtree_files", + "start_line": 65, + "end_line": 74, + "type": "FUNCTION" + }, + "scripts/build_component_files.py|scripts.build_component_files._subtree_methods": { + "file_path": "scripts/build_component_files.py", + "qualified_name": "scripts.build_component_files._subtree_methods", + "start_line": 77, + "end_line": 85, + "type": "FUNCTION" + }, + "scripts/build_component_files.py|scripts.build_component_files._changed_files_for": { + "file_path": "scripts/build_component_files.py", + "qualified_name": "scripts.build_component_files._changed_files_for", + "start_line": 88, + "end_line": 101, + "type": "FUNCTION" + }, + "scripts/build_component_files.py|scripts.build_component_files._block": { + "file_path": "scripts/build_component_files.py", + "qualified_name": "scripts.build_component_files._block", + "start_line": 104, + "end_line": 121, + "type": "FUNCTION" + }, + "scripts/build_component_files.py|scripts.build_component_files.render_component_files": { + "file_path": "scripts/build_component_files.py", + "qualified_name": "scripts.build_component_files.render_component_files", + "start_line": 124, + "end_line": 175, + "type": "FUNCTION" + }, + "scripts/build_component_files.py|scripts.build_component_files.main": { + "file_path": "scripts/build_component_files.py", + "qualified_name": "scripts.build_component_files.main", + "start_line": 178, + "end_line": 213, + "type": "FUNCTION" + }, "scripts/diff_to_mermaid.py|scripts.diff_to_mermaid.load_analysis": { "file_path": "scripts/diff_to_mermaid.py", "qualified_name": "scripts.diff_to_mermaid.load_analysis", @@ -269,13 +329,6 @@ "end_line": 298, "type": "METHOD" }, - "scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._Scope.resolve": { - "file_path": "scripts/diff_to_mermaid.py", - "qualified_name": "scripts.diff_to_mermaid._Scope.resolve", - "start_line": 300, - "end_line": 309, - "type": "METHOD" - }, "scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._filter_changed": { "file_path": "scripts/diff_to_mermaid.py", "qualified_name": "scripts.diff_to_mermaid._filter_changed", @@ -349,36 +402,43 @@ "scripts/build_cta.py|scripts.build_cta.detect_editors": { "file_path": "scripts/build_cta.py", "qualified_name": "scripts.build_cta.detect_editors", - "start_line": 35, - "end_line": 47, + "start_line": 36, + "end_line": 48, + "type": "FUNCTION" + }, + "scripts/build_cta.py|scripts.build_cta.build_webview_link": { + "file_path": "scripts/build_cta.py", + "qualified_name": "scripts.build_cta.build_webview_link", + "start_line": 62, + "end_line": 77, "type": "FUNCTION" }, "scripts/build_cta.py|scripts.build_cta.build_cta": { "file_path": "scripts/build_cta.py", "qualified_name": "scripts.build_cta.build_cta", - "start_line": 61, - "end_line": 96, + "start_line": 80, + "end_line": 137, "type": "FUNCTION" }, "scripts/build_cta.py|scripts.build_cta.build_cta.link": { "file_path": "scripts/build_cta.py", "qualified_name": "scripts.build_cta.build_cta.link", - "start_line": 79, - "end_line": 80, + "start_line": 120, + "end_line": 121, "type": "FUNCTION" }, "scripts/build_cta.py|scripts.build_cta.main": { "file_path": "scripts/build_cta.py", "qualified_name": "scripts.build_cta.main", - "start_line": 99, - "end_line": 114, + "start_line": 140, + "end_line": 176, "type": "FUNCTION" } }, "components": [ { "name": "Analysis Orchestrator", - "description": "Manages the execution lifecycle of the GitHub Action, including environment setup, triggering analysis, and data validation.", + "description": "The central controller that manages the GitHub Action lifecycle, coordinating environment setup and triggering static analysis for base and head branches.", "key_entities": [ { "qualified_name": "scripts.cb_engine.main", @@ -399,10 +459,10 @@ "reference_end_line": 153 }, { - "qualified_name": "scripts.cb_engine.validate_base_analysis", + "qualified_name": "scripts.cb_engine.run_health", "reference_file": "scripts/cb_engine.py", - "reference_start_line": 45, - "reference_end_line": 76 + "reference_start_line": 183, + "reference_end_line": 212 } ], "source_cluster_ids": [ @@ -429,15 +489,9 @@ "can_expand": true }, { - "name": "Visual Diff Generator", - "description": "The core logic engine that compares structural metadata between codebase versions and renders differences into Mermaid.js graph syntax.", + "name": "Visual Diff Engine", + "description": "Responsible for calculating differences between base and head states, generating Mermaid.js visualizations, and mapping code-level changes to architectural components. This cluster (scripts.build_component_files.py) handles subtree walking, identifying changed files, and mapping methods to components via _subtree_methods and render_component_files. It interacts with the Analysis Orchestrator and Engagement Layer by providing the structured diff data required for visualization.", "key_entities": [ - { - "qualified_name": "scripts.diff_to_mermaid.main", - "reference_file": "scripts/diff_to_mermaid.py", - "reference_start_line": 527, - "reference_end_line": 568 - }, { "qualified_name": "scripts.diff_to_mermaid.build_diff", "reference_file": "scripts/diff_to_mermaid.py", @@ -455,15 +509,34 @@ "reference_file": "scripts/diff_to_mermaid.py", "reference_start_line": 162, "reference_end_line": 207 + }, + { + "qualified_name": "scripts.diff_to_mermaid.load_analysis", + "reference_file": "scripts/diff_to_mermaid.py", + "reference_start_line": 50, + "reference_end_line": 54 } ], "source_cluster_ids": [ 2, 3, 4, - 5 + 5, + 7 ], "file_methods": [ + { + "file_path": "scripts/build_component_files.py", + "methods": [ + "scripts.build_component_files._walk", + "scripts.build_component_files._subtree_files", + "scripts.build_component_files._subtree_methods", + "scripts.build_component_files._changed_files_for", + "scripts.build_component_files._block", + "scripts.build_component_files.render_component_files", + "scripts.build_component_files.main" + ] + }, { "file_path": "scripts/diff_to_mermaid.py", "methods": [ @@ -485,7 +558,6 @@ "scripts.diff_to_mermaid._display_status", "scripts.diff_to_mermaid._Scope", "scripts.diff_to_mermaid._Scope.__init__", - "scripts.diff_to_mermaid._Scope.resolve", "scripts.diff_to_mermaid._filter_changed", "scripts.diff_to_mermaid._filter_changed.touches", "scripts.diff_to_mermaid._init_directive", @@ -503,26 +575,26 @@ "can_expand": true }, { - "name": "UX & Integration Helper", - "description": "Manages the generation of user-facing outputs, including GitHub comments, status checks, and interactive links (CTAs) for external viewing. This component integrates the new 'build_cta.py' logic to generate interactive webview links and editor-specific deep links, expanding its role from simple reporting to providing actionable CI/CD report navigation. It receives processed data from the Visual Diff Generator to finalize the GitHub Action output.", + "name": "Engagement & Integration Layer", + "description": "Generates user-facing metadata, detects developer environments, and creates CTA links to bridge GitHub PR comments with the interactive CodeBoarding platform.", "key_entities": [ { - "qualified_name": "scripts.build_cta.main", + "qualified_name": "scripts.build_cta.build_cta", "reference_file": "scripts/build_cta.py", - "reference_start_line": 99, - "reference_end_line": 114 + "reference_start_line": 80, + "reference_end_line": 137 }, { - "qualified_name": "scripts.build_cta.build_cta", + "qualified_name": "scripts.build_cta.build_webview_link", "reference_file": "scripts/build_cta.py", - "reference_start_line": 61, - "reference_end_line": 96 + "reference_start_line": 62, + "reference_end_line": 77 }, { "qualified_name": "scripts.build_cta.detect_editors", "reference_file": "scripts/build_cta.py", - "reference_start_line": 35, - "reference_end_line": 47 + "reference_start_line": 36, + "reference_end_line": 48 } ], "source_cluster_ids": [ @@ -533,6 +605,7 @@ "file_path": "scripts/build_cta.py", "methods": [ "scripts.build_cta.detect_editors", + "scripts.build_cta.build_webview_link", "scripts.build_cta.build_cta", "scripts.build_cta.build_cta.link", "scripts.build_cta.main" @@ -545,29 +618,29 @@ ], "components_relations": [ { - "relation": "Passes validated analysis artifacts to initiate structural comparison", + "relation": "triggers state comparison and visualization", "src_name": "Analysis Orchestrator", - "dst_name": "Visual Diff Generator", + "dst_name": "Visual Diff Engine", "src_id": "1", "dst_id": "2", "edge_count": 0, "is_static": false }, { - "relation": "Provides context of changed files and components for deep-link generation", - "src_name": "Visual Diff Generator", - "dst_name": "UX & Integration Helper", - "src_id": "2", + "relation": "provides execution context for CTA generation", + "src_name": "Analysis Orchestrator", + "dst_name": "Engagement & Integration Layer", + "src_id": "1", "dst_id": "3", "edge_count": 0, "is_static": false }, { - "relation": "Returns formatted markdown snippets and CTA links for final output", - "src_name": "UX & Integration Helper", - "dst_name": "Analysis Orchestrator", - "src_id": "3", - "dst_id": "1", + "relation": "provides structured diff data for reporting", + "src_name": "Visual Diff Engine", + "dst_name": "Engagement & Integration Layer", + "src_id": "2", + "dst_id": "3", "edge_count": 0, "is_static": false } diff --git a/.codeboarding/health/health_report.json b/.codeboarding/health/health_report.json index 5dc4c6e..d8a0138 100644 --- a/.codeboarding/health/health_report.json +++ b/.codeboarding/health/health_report.json @@ -1,13 +1,13 @@ { "repository_name": "CodeBoarding-action", - "timestamp": "2026-06-11T16:41:11.294364+00:00", - "overall_score": 0.9996183206106869, + "timestamp": "2026-06-12T12:22:22.416630+00:00", + "overall_score": 0.9996710526315788, "check_summaries": [ { "check_name": "function_size", "description": "Checks that functions/methods do not exceed line count thresholds", "check_type": "standard", - "total_entities_checked": 43, + "total_entities_checked": 50, "findings_count": 0, "warning_count": 0, "score": 1.0, @@ -17,7 +17,7 @@ "check_name": "fan_out", "description": "Checks efferent coupling: how many other functions each function calls", "check_type": "standard", - "total_entities_checked": 43, + "total_entities_checked": 50, "findings_count": 0, "warning_count": 0, "score": 1.0, @@ -27,7 +27,7 @@ "check_name": "fan_in", "description": "Checks afferent coupling: how many other functions call each function", "check_type": "standard", - "total_entities_checked": 43, + "total_entities_checked": 50, "findings_count": 0, "warning_count": 0, "score": 1.0, diff --git a/action.yml b/action.yml index e9b24b8..2e72261 100644 --- a/action.yml +++ b/action.yml @@ -706,6 +706,8 @@ runs: ACTION_PATH: ${{ github.action_path }} TARGET_REPO: ${{ github.workspace }}/target-repo DIAGRAM_MD: ${{ steps.diagram.outputs.diagram_md }} + BASE_ANALYSIS: ${{ steps.analyze.outputs.base_analysis }} + HEAD_ANALYSIS: ${{ steps.analyze.outputs.head_analysis }} RUN_ID: ${{ github.run_id }} N: ${{ steps.diagram.outputs.n_changed }} CHANGED: ${{ steps.diagram.outputs.changed }} @@ -744,6 +746,26 @@ runs: --repo-path "$TARGET_REPO" --issues "${ISSUES:-0}" "${extra[@]}" } + # Per-component changed-file dropdowns: which files made each node change + # color. The git listing is the PR's own changes (three-dot merge-base..head, + # the same set as the Files-changed tab; --no-renames so a moved file's old + # path still appears for the donor component). Node colors compare against + # the base *tip*, so a node colored only by base-branch churn on a stale PR + # gets no dropdown. Best-effort: if the git diff fails (e.g. merge-base + # unreachable on a shallow fork fetch) the script falls back to + # analysis-derived changes, and if the script fails the section is omitted — + # the comment always posts. + FILES_MD="${RUNNER_TEMP}/component_files.md" + : > "$FILES_MD" + if [ -n "$BASE_ANALYSIS" ] && [ -n "$HEAD_ANALYSIS" ]; then + CHANGED_LIST="${RUNNER_TEMP}/changed_files.txt" + files_args=(--base "$BASE_ANALYSIS" --head "$HEAD_ANALYSIS" --out "$FILES_MD") + if git -C "$TARGET_REPO" -c core.quotepath=off diff --no-renames --name-only "$BASE_SHA...HEAD" > "$CHANGED_LIST" 2>/dev/null; then + files_args+=(--changed-files "$CHANGED_LIST") + fi + python3 "$ACTION_PATH/scripts/build_component_files.py" "${files_args[@]}" || : > "$FILES_MD" + fi + { echo "### ${HEADER} · $(headline)" echo "" @@ -761,6 +783,10 @@ runs: else echo "No architectural changes detected versus \`${BASE_REF}\`." fi + if [ -s "$FILES_MD" ]; then + echo "" + cat "$FILES_MD" + fi cta echo "" echo "codeboarding-action · run ${RUN_ID}" diff --git a/scripts/build_component_files.py b/scripts/build_component_files.py new file mode 100644 index 0000000..0ef64d5 --- /dev/null +++ b/scripts/build_component_files.py @@ -0,0 +1,217 @@ +"""Render per-component changed-file dropdowns for the sticky PR comment. + +Takes the same base/head ``analysis.json`` pair as ``diff_to_mermaid.py`` (the +diff logic is imported from there, so the dropdowns and the diagram always +agree on what changed) and emits one collapsed ``
`` block per changed +top-level component, listing the files that made it change color — the question +the colored diagram raises but can't answer. + +Which files count as "changed" for a component: + + * With ``--changed-files`` (a ``git diff --no-renames --name-only`` listing of + the PR's own changes, merge-base..head — the same set as the Files-changed + tab): the intersection of the component subtree's file paths with that + listing. A component can own 40 files while the PR touched 2 — listing all + 40 would answer the wrong question. ``--no-renames`` matters: with rename + detection on, a moved file's old path never appears in ``--name-only`` and + the donor component's dropdown would silently vanish. Node colors compare + head against the base *tip*, so a node colored only by base-branch churn + on a stale PR intentionally gets no dropdown. + * Without it: the analysis-derived change set — files added to / removed + from the component plus files whose method set changed. This misses + body-only edits (the analysis can't see them), so the git listing is + preferred. + +Component file paths come from ``file_methods[].file_path`` plus +``key_entities[].reference_file`` — some engine outputs (observed on +TypeScript repos) carry file linkage only in ``key_entities``. + +Names and paths are emitted as HTML (````/````, escaped) rather than +markdown spans so arbitrary repo content can't break the comment markup. +Per-component and total size caps keep the block a small fraction of GitHub's +65,536-char comment limit (the Mermaid diagram alone may use ~45k). + +Self-contained stdlib; imports the diff from its sibling diff_to_mermaid.py. +""" + +from __future__ import annotations + +import argparse +import html +import json +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) +import diff_to_mermaid as dm # noqa: E402 + +# Budget: ~45k diagram + this block + ~1.5k header/CTA/footer must stay under +# GitHub's 65,536-char comment cap; overflow drops dropdowns, never the diagram. +MAX_TEXT = 10_000 +MAX_FILES_PER_COMPONENT = 15 + +_NOUN = {"added": "added", "modified": "changed", "deleted": "removed"} + + +def _walk(comp: dict, skip_deleted: bool = False): + """Yield ``comp`` and its whole subtree; optionally prune deleted ghosts.""" + if skip_deleted and comp.get("diff_status") == "deleted": + return + yield comp + for sub in comp.get("components") or []: + yield from _walk(sub, skip_deleted) + + +def _subtree_files(comp: dict | None, skip_deleted: bool = False) -> set: + if comp is None: + return set() + files: set = set() + for c in _walk(comp, skip_deleted): + files.update(fm.get("file_path") or "" for fm in c.get("file_methods") or []) + # Some engine outputs carry file linkage only in key_entities. + files.update(ke.get("reference_file") or "" for ke in c.get("key_entities") or []) + files.discard("") + return files + + +def _subtree_methods(comp: dict | None, skip_deleted: bool = False) -> dict: + merged: dict = {} + if comp is None: + return merged + for c in _walk(comp, skip_deleted): + for fp, names in dm._methods_by_file(c).items(): + merged.setdefault(fp, set()).update(names) + merged.pop("", None) # entries lacking file_path must not become a phantom file + return merged + + +def _changed_files_for(comp: dict, base_match: dict | None, changed_files: set | None) -> list: + """Files to list for one changed top-level component (see module docstring).""" + if changed_files is not None: + # Ghost subtrees inside the diff carry base-side files, so the union of + # both sides covers added, modified, and deleted components alike. + return sorted((_subtree_files(comp) | _subtree_files(base_match)) & changed_files) + head_files = _subtree_files(comp, skip_deleted=True) + base_files = _subtree_files(base_match) + head_methods = _subtree_methods(comp, skip_deleted=True) + base_methods = _subtree_methods(base_match) + method_changed = { + fp for fp in set(head_methods) | set(base_methods) if head_methods.get(fp, set()) != base_methods.get(fp, set()) + } + return sorted((head_files ^ base_files) | method_changed) + + +def _block(name: str, status: str, files: list, n_sub: int = 0) -> str: + shown = files[:MAX_FILES_PER_COMPONENT] + hidden = len(files) - len(shown) + n = len(files) + # Rollup parents (only nested components changed) carry the recursive count + # the headline and diagram use, so the dropdown explains "3 components + # changed" instead of contradicting it. + rollup = f"{n_sub} changed sub-component{'' if n_sub == 1 else 's'}, " if n_sub else "" + lines = [ + "
", + f"{html.escape(name)} : {rollup}{n} file{'' if n == 1 else 's'} {_NOUN[status]}", + "", # blank line: required for GitHub to render markdown after + ] + lines += [f"- {html.escape(fp)}" for fp in shown] + if hidden: + lines.append(f"- …and {hidden} more") + lines += ["", "
"] + return "\n".join(lines) + + +def render_component_files( + diff: dict, + base: dict, + changed_files: set | None = None, + max_chars: int = MAX_TEXT, +) -> tuple: + """Return (markdown_text, meta). ``markdown_text`` is "" when there's nothing to list.""" + base_by_name = {dm._comp_name(c): c for c in base.get("components") or []} + blocks: list = [] # (block_text, n_files_listed) + truncated = False + for comp in diff.get("components") or []: + status = dm._display_status(comp) + if status not in dm.CHANGED: + continue + # A deleted ghost IS its base component (the diff builds it from base), + # so use it directly; the name lookup would misattribute files when two + # top-level components share a name. + base_match = comp if comp.get("diff_status") == "deleted" else base_by_name.get(dm._comp_name(comp)) + files = _changed_files_for(comp, base_match, changed_files) + if not files: + continue # e.g. relation-only change, base-branch churn, or a reorg the PR didn't touch + truncated = truncated or len(files) > MAX_FILES_PER_COMPONENT + n_sub = ( + dm._count_changed_components(comp.get("components") or []) + if comp.get("diff_status") not in dm.CHANGED + else 0 + ) + blocks.append( + ( + _block(dm._comp_name(comp) or dm._comp_id(comp) or "(unnamed)", status, files, n_sub), + min(len(files), MAX_FILES_PER_COMPONENT), + ) + ) + + rendered: list = [] + size = 0 + n_components = n_files = 0 + for i, (block, n_listed) in enumerate(blocks): + if size + len(block) > max_chars: + truncated = True + if rendered: # never emit a dangling "…and N more" with no blocks above it + left = len(blocks) - i + rendered.append(f"…and {left} more changed component{'' if left == 1 else 's'}") + break + rendered.append(block) + size += len(block) + 1 + n_components += 1 + n_files += n_listed + + text = "\n".join(rendered) + meta = {"rendered": bool(text), "n_components": n_components, "n_files": n_files, "truncated": truncated} + return text, meta + + +def main() -> int: + p = argparse.ArgumentParser(description=__doc__) + p.add_argument("--base", required=True, type=Path, help="Path to the base (before) analysis.json") + p.add_argument("--head", required=True, type=Path, help="Path to the head (after) analysis.json") + p.add_argument("--out", required=True, type=Path, help="Where to write the
markdown block") + p.add_argument( + "--changed-files", + type=Path, + default=None, + help="File with the PR's changed paths, one per line (git diff --no-renames " + "--name-only merge-base..head). Omit to fall back to analysis-derived changes.", + ) + args = p.parse_args() + + changed: set | None = None + if args.changed_files is not None: + try: + # surrogateescape: core.quotepath=off emits raw filename bytes; a + # non-UTF-8 path must not kill the whole section (it simply won't + # intersect with the analysis's UTF-8 paths). + raw = args.changed_files.read_text(encoding="utf-8", errors="surrogateescape") + except OSError as exc: + sys.exit(f"::error::Could not read changed-files list at {args.changed_files}: {exc}") + changed = {line.strip() for line in raw.splitlines() if line.strip()} + + base = dm.load_analysis(args.base) + head = dm.load_analysis(args.head) + text, meta = render_component_files(dm.build_diff(base, head), base, changed) + + # Trailing newline so the following CTA's "---" isn't absorbed into the + #
HTML block; conditional so an empty result stays 0 bytes for + # the action's [ -s ] gate. + args.out.write_text(text + "\n" if text else "", encoding="utf-8") + # Machine-readable summary on stdout for the action to consume. + print(json.dumps(meta)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/test_build_component_files.py b/tests/test_build_component_files.py new file mode 100644 index 0000000..9dc7307 --- /dev/null +++ b/tests/test_build_component_files.py @@ -0,0 +1,301 @@ +"""Unit tests for scripts/build_component_files.py — per-component changed-file dropdowns.""" + +import json +import re +import subprocess +import sys +import tempfile +import unittest +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts")) +import build_component_files as bcf # noqa: E402 +import diff_to_mermaid as dm # noqa: E402 + +SCRIPT = Path(__file__).resolve().parent.parent / "scripts" / "build_component_files.py" + + +def comp(name, files=None, subs=None, key_files=None): + c = { + "name": name, + "component_id": name, + "file_methods": [{"file_path": f, "methods": m} for f, m in (files or {}).items()], + } + if key_files is not None: + c["key_entities"] = [{"reference_file": f} for f in key_files] + if subs is not None: + c["components"] = subs + return c + + +def render(base, head, changed_files=None, max_chars=bcf.MAX_TEXT): + diff = dm.build_diff(base, head) + return bcf.render_component_files(diff, base, changed_files, max_chars) + + +class TestGitIntersection(unittest.TestCase): + def test_modified_component_lists_only_touched_files(self): + base = {"components": [comp("Auth", {"a.py": ["f"], "b.py": ["g"], "c.py": ["h"]})]} + head = {"components": [comp("Auth", {"a.py": ["f", "f2"], "b.py": ["g"], "c.py": ["h"]})]} + text, meta = render(base, head, changed_files={"a.py", "unrelated.py"}) + self.assertIn("a.py", text) + self.assertNotIn("b.py", text) # owned but untouched + self.assertNotIn("unrelated.py", text) # touched but not owned + self.assertIn("Auth : 1 file changed", text) + self.assertEqual(meta["n_components"], 1) + self.assertEqual(meta["n_files"], 1) + + def test_added_component_wording(self): + base = {"components": []} + head = {"components": [comp("RateLimiter", {"rl/bucket.py": ["acquire"], "rl/config.py": ["load"]})]} + text, _ = render(base, head, changed_files={"rl/bucket.py", "rl/config.py"}) + self.assertIn("RateLimiter : 2 files added", text) + + def test_deleted_component_lists_base_files(self): + base = {"components": [comp("Legacy", {"legacy/store.py": ["get"], "legacy/migrations.py": ["mig"]})]} + head = {"components": []} + text, _ = render(base, head, changed_files={"legacy/store.py", "legacy/migrations.py"}) + self.assertIn("Legacy : 2 files removed", text) + self.assertIn("legacy/migrations.py", text) + + def test_unchanged_component_emits_nothing(self): + base = {"components": [comp("A", {"a.py": ["f"]})]} + head = {"components": [comp("A", {"a.py": ["f"]})]} + text, meta = render(base, head, changed_files={"a.py"}) + self.assertEqual(text, "") + self.assertFalse(meta["rendered"]) + + def test_changed_component_with_no_touched_files_is_skipped(self): + # Model reorg: file moved between components, but the PR's git diff is elsewhere. + base = {"components": [comp("A", {"a.py": ["f"], "b.py": ["g"]})]} + head = {"components": [comp("A", {"a.py": ["f"]})]} + text, _ = render(base, head, changed_files={"elsewhere.py"}) + self.assertEqual(text, "") + + def test_empty_changed_files_set_means_no_dropdowns_not_fallback(self): + # Empty git diff (net-zero PR / re-run): an empty set must NOT fall + # back to analysis-derived changes — only None (flag omitted) does. + base = {"components": [comp("A", {"a.py": ["f"]})]} + head = {"components": [comp("A", {"a.py": ["f", "g"]})]} + text, meta = render(base, head, changed_files=set()) + self.assertEqual(text, "") + self.assertFalse(meta["rendered"]) + + def test_nested_subtree_files_aggregate_to_top_level(self): + base = {"components": [comp("Parent", {}, subs=[comp("Child", {"deep/x.py": ["f"]})])]} + head = {"components": [comp("Parent", {}, subs=[comp("Child", {"deep/x.py": ["f", "g"]})])]} + text, _ = render(base, head, changed_files={"deep/x.py"}) + self.assertIn("Parent", text) + self.assertIn("deep/x.py", text) + self.assertNotIn("Child", text) # one dropdown per top-level component + + def test_rollup_parent_labels_changed_subcomponents(self): + # Parent unchanged itself (display_status rollup): the summary carries the + # recursive count the headline/diagram use, so counts don't contradict. + base = {"components": [comp("Parent", {"p.py": ["f"]}, subs=[comp("Child", {"c.py": ["g"]})])]} + head = {"components": [comp("Parent", {"p.py": ["f"]}, subs=[comp("Child", {"c.py": ["g", "g2"]})])]} + text, _ = render(base, head, changed_files={"c.py"}) + self.assertIn("Parent : 1 changed sub-component, 1 file changed", text) + + def test_deleted_nested_child_files_list_under_modified_parent(self): + base = {"components": [comp("Parent", {"p.py": ["f"]}, subs=[comp("Child", {"child/x.py": ["g"]})])]} + head = {"components": [comp("Parent", {"p.py": ["f"]}, subs=[])]} + text, _ = render(base, head, changed_files={"child/x.py"}) + self.assertIn("Parent", text) + self.assertIn("child/x.py", text) + + def test_key_entities_only_shape(self): + # Some engine outputs have file_methods: [] everywhere and carry file + # linkage only in key_entities[].reference_file (observed on TS repos). + base = {"components": []} + head = {"components": [comp("Webview", key_files=["src/panel.ts", "src/render.ts"])]} + text, _ = render(base, head, changed_files={"src/panel.ts", "src/render.ts"}) + self.assertIn("Webview : 2 files added", text) + self.assertIn("src/panel.ts", text) + + def test_duplicate_deleted_names_attribute_files_to_own_block(self): + base = {"components": [comp("Dup", {"one.py": ["f"]}), comp("Dup", {"two.py": ["g"]})]} + head = {"components": []} + for changed in (None, {"one.py", "two.py"}): + text, _ = render(base, head, changed_files=changed) + self.assertEqual(text.count("one.py"), 1, text) + self.assertEqual(text.count("two.py"), 1, text) + + +class TestAnalysisFallback(unittest.TestCase): + def test_fallback_lists_structural_and_method_changes(self): + base = {"components": [comp("A", {"kept.py": ["f"], "gone.py": ["g"], "same.py": ["h"]})]} + head = {"components": [comp("A", {"kept.py": ["f", "f2"], "new.py": ["n"], "same.py": ["h"]})]} + text, _ = render(base, head, changed_files=None) + self.assertIn("kept.py", text) # method set changed + self.assertIn("gone.py", text) # removed from component + self.assertIn("new.py", text) # added to component + self.assertNotIn("same.py", text) + + def test_fallback_deleted_component_lists_all_base_files(self): + base = {"components": [comp("Legacy", {"l/a.py": ["f"], "l/b.py": ["g"]})]} + head = {"components": []} + text, _ = render(base, head, changed_files=None) + self.assertIn("Legacy : 2 files removed", text) + + def test_missing_file_path_entry_emits_no_phantom(self): + base = {"components": [comp("A", {"a.py": ["f"]})]} + head = {"components": [comp("A", {"a.py": ["f"]})]} + head["components"][0]["file_methods"].append({"methods": ["orphan"]}) # no file_path + text, _ = render(base, head, changed_files=None) + self.assertNotIn("", text) + + +class TestOrdering(unittest.TestCase): + def test_file_lists_are_sorted(self): + files = {f: ["m"] for f in ["e.py", "b.py", "f.py", "a.py", "d.py", "c.py"]} + base = {"components": []} + head = {"components": [comp("A", files)]} + text, _ = render(base, head, changed_files=set(files)) + paths = re.findall(r"([^<]+)", text) + self.assertEqual(paths, ["a.py", "b.py", "c.py", "d.py", "e.py", "f.py"]) + + def test_blocks_follow_diagram_order_deleted_ghosts_last(self): + # Head order first (matches Mermaid node emission), deleted ghosts appended + # last — NOT alphabetical: Alpha is deleted and must render after Zeta. + base = {"components": [comp("Zeta", {"z.py": ["f"]}), comp("Alpha", {"a.py": ["g"]})]} + head = {"components": [comp("Zeta", {"z.py": ["f", "f2"]})]} + text, _ = render(base, head, changed_files={"z.py", "a.py"}) + self.assertEqual(re.findall(r"(\w+)", text), ["Zeta", "Alpha"]) + + +class TestCapsAndEscaping(unittest.TestCase): + def test_per_component_file_cap(self): + files = {f"src/f{i:02}.py": ["m"] for i in range(20)} + base = {"components": []} + head = {"components": [comp("Big", files)]} + text, meta = render(base, head, changed_files=set(files)) + self.assertEqual(text.count(""), bcf.MAX_FILES_PER_COMPONENT) + self.assertIn("…and 5 more", text) + self.assertIn(": 20 files added", text) # count reflects reality, list is capped + self.assertTrue(meta["truncated"]) + + def test_total_char_budget_drops_whole_components(self): + base = {"components": []} + head = {"components": [comp(f"C{i}", {f"c{i}/f.py": ["m"]}) for i in range(10)]} + text, meta = render(base, head, changed_files={f"c{i}/f.py" for i in range(10)}, max_chars=300) + self.assertIn("more changed components", text) + self.assertTrue(meta["truncated"]) + # meta counts what actually rendered, not what the budget dropped + self.assertEqual(meta["n_files"], text.count("")) + self.assertEqual(meta["n_components"], text.count("
")) + + def test_first_block_exceeding_budget_renders_nothing(self): + # Never a dangling "…and N more" with no blocks above it. + base = {"components": []} + head = {"components": [comp("Big", {f"very/long/path/file{i}.py": ["m"] for i in range(15)})]} + text, meta = render(base, head, changed_files={f"very/long/path/file{i}.py" for i in range(15)}, max_chars=100) + self.assertEqual(text, "") + self.assertFalse(meta["rendered"]) + self.assertEqual(meta["n_files"], 0) + self.assertTrue(meta["truncated"]) + + def test_html_escaping_of_names_and_paths(self): + base = {"components": []} + head = {"components": [comp("A <& B", {"weird/&.py": ["m"]})]} + text, _ = render(base, head, changed_files={"weird/&.py"}) + self.assertIn("A <& B", text) + self.assertIn("weird/<path>&.py", text) + self.assertNotIn("", text) + + def test_blank_line_after_summary(self): + # GitHub only renders markdown inside
after a blank line. + base = {"components": []} + head = {"components": [comp("A", {"a.py": ["m"]})]} + text, _ = render(base, head, changed_files={"a.py"}) + self.assertIn("\n\n-", text) + self.assertIn("\n\n
", text) + + +class TestCLI(unittest.TestCase): + def _analyses(self, d): + (d / "base.json").write_text(json.dumps({"components": [comp("Auth", {"a.py": ["f"], "b.py": ["g"]})]})) + (d / "head.json").write_text(json.dumps({"components": [comp("Auth", {"a.py": ["f", "f2"], "b.py": ["g"]})]})) + + def _run(self, d, *extra): + out = d / "out.md" + args = [ + sys.executable, + str(SCRIPT), + "--base", + str(d / "base.json"), + "--head", + str(d / "head.json"), + "--out", + str(out), + ] + return out, subprocess.run([*args, *extra], capture_output=True, text=True) + + def test_main_writes_out_file_and_prints_meta(self): + with tempfile.TemporaryDirectory() as tmp: + d = Path(tmp) + self._analyses(d) + (d / "changed.txt").write_text("a.py\nunrelated.py\n") + out, r = self._run(d, "--changed-files", str(d / "changed.txt")) + self.assertEqual(r.returncode, 0, r.stderr) + content = out.read_text(encoding="utf-8") + self.assertIn("a.py", content) + self.assertTrue(content.endswith("
\n")) # trailing newline: see main() + meta = json.loads(r.stdout) + self.assertEqual(set(meta), {"rendered", "n_components", "n_files", "truncated"}) + self.assertTrue(meta["rendered"]) + + def test_non_utf8_changed_files_does_not_crash(self): + # core.quotepath=off emits raw filename bytes; a non-UTF-8 path must not + # kill the section — it just can't intersect with the analysis's paths. + with tempfile.TemporaryDirectory() as tmp: + d = Path(tmp) + self._analyses(d) + (d / "changed.txt").write_bytes(b"r\xe9sum\xe9.py\na.py\n") + out, r = self._run(d, "--changed-files", str(d / "changed.txt")) + self.assertEqual(r.returncode, 0, r.stderr) + self.assertIn("a.py", out.read_text(encoding="utf-8")) + + def test_empty_result_writes_zero_bytes(self): + # The action gates the section on [ -s "$FILES_MD" ]. + with tempfile.TemporaryDirectory() as tmp: + d = Path(tmp) + self._analyses(d) + (d / "changed.txt").write_text("elsewhere.py\n") + out, r = self._run(d, "--changed-files", str(d / "changed.txt")) + self.assertEqual(r.returncode, 0, r.stderr) + self.assertEqual(out.read_bytes(), b"") + + +class TestEngineGitPathContract(unittest.TestCase): + """file_methods[].file_path must be repo-relative forward-slash paths identical + to git --name-only output; pinned against the committed engine artifact (the + dogfood workflows regenerate it on engine bumps, so format drift fails here).""" + + def test_committed_analysis_paths_are_git_name_only_format(self): + root = Path(__file__).resolve().parent.parent + analysis = json.loads((root / ".codeboarding" / "analysis.json").read_text()) + paths = set() + + def collect(c): + for fm in c.get("file_methods") or []: + paths.add(fm["file_path"]) + for ke in c.get("key_entities") or []: + if ke.get("reference_file"): + paths.add(ke["reference_file"]) + for s in c.get("components") or []: + collect(s) + + for c in analysis.get("components") or []: + collect(c) + self.assertTrue(paths, "committed analysis.json has no file paths") + tracked = set( + subprocess.run( + ["git", "-C", str(root), "ls-files"], capture_output=True, text=True, check=True + ).stdout.splitlines() + ) + self.assertLessEqual(paths, tracked, f"paths not in git --name-only format: {sorted(paths - tracked)[:5]}") + + +if __name__ == "__main__": + unittest.main()