diff --git a/.codeboarding/analysis.json b/.codeboarding/analysis.json
index a7d7e4f..04dceb1 100644
--- a/.codeboarding/analysis.json
+++ b/.codeboarding/analysis.json
@@ -1,20 +1,20 @@
{
"metadata": {
- "generated_at": "2026-06-11T16:41:16.710439+00:00",
- "commit_hash": "ba5d7787eaecfa38964dd4663606927d98e501ba",
+ "generated_at": "2026-06-12T12:22:29.920827+00:00",
+ "commit_hash": "20bfd68682a5f048e8120701230c98539acff2ca",
"repo_name": "CodeBoarding-action",
"depth_level": 1,
"file_coverage_summary": {
- "total_files": 15,
- "analyzed": 3,
- "not_analyzed": 12,
+ "total_files": 17,
+ "analyzed": 4,
+ "not_analyzed": 13,
"not_analyzed_by_reason": {
"other": 9,
- "codeboardingignore": 3
+ "codeboardingignore": 4
}
}
},
- "description": "The CodeBoarding-action architecture is a CI/CD pipeline that automates architectural observability by comparing codebase versions, generating visual Mermaid.js diffs, and providing actionable IDE deep-links within GitHub Pull Requests.",
+ "description": "The CodeBoarding-action project implements a pipeline that orchestrates static analysis of code branches, performs structural diffing to generate visual Mermaid.js diagrams, and integrates these insights into GitHub Pull Requests via interactive deep links and editor-specific metadata.",
"files": {
"scripts/cb_engine.py": {
"method_keys": [
@@ -30,6 +30,17 @@
"scripts/cb_engine.py|scripts.cb_engine.main"
]
},
+ "scripts/build_component_files.py": {
+ "method_keys": [
+ "scripts/build_component_files.py|scripts.build_component_files._walk",
+ "scripts/build_component_files.py|scripts.build_component_files._subtree_files",
+ "scripts/build_component_files.py|scripts.build_component_files._subtree_methods",
+ "scripts/build_component_files.py|scripts.build_component_files._changed_files_for",
+ "scripts/build_component_files.py|scripts.build_component_files._block",
+ "scripts/build_component_files.py|scripts.build_component_files.render_component_files",
+ "scripts/build_component_files.py|scripts.build_component_files.main"
+ ]
+ },
"scripts/diff_to_mermaid.py": {
"method_keys": [
"scripts/diff_to_mermaid.py|scripts.diff_to_mermaid.load_analysis",
@@ -50,7 +61,6 @@
"scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._display_status",
"scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._Scope",
"scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._Scope.__init__",
- "scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._Scope.resolve",
"scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._filter_changed",
"scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._filter_changed.touches",
"scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._init_directive",
@@ -66,6 +76,7 @@
"scripts/build_cta.py": {
"method_keys": [
"scripts/build_cta.py|scripts.build_cta.detect_editors",
+ "scripts/build_cta.py|scripts.build_cta.build_webview_link",
"scripts/build_cta.py|scripts.build_cta.build_cta",
"scripts/build_cta.py|scripts.build_cta.build_cta.link",
"scripts/build_cta.py|scripts.build_cta.main"
@@ -143,6 +154,55 @@
"end_line": 257,
"type": "FUNCTION"
},
+ "scripts/build_component_files.py|scripts.build_component_files._walk": {
+ "file_path": "scripts/build_component_files.py",
+ "qualified_name": "scripts.build_component_files._walk",
+ "start_line": 56,
+ "end_line": 62,
+ "type": "FUNCTION"
+ },
+ "scripts/build_component_files.py|scripts.build_component_files._subtree_files": {
+ "file_path": "scripts/build_component_files.py",
+ "qualified_name": "scripts.build_component_files._subtree_files",
+ "start_line": 65,
+ "end_line": 74,
+ "type": "FUNCTION"
+ },
+ "scripts/build_component_files.py|scripts.build_component_files._subtree_methods": {
+ "file_path": "scripts/build_component_files.py",
+ "qualified_name": "scripts.build_component_files._subtree_methods",
+ "start_line": 77,
+ "end_line": 85,
+ "type": "FUNCTION"
+ },
+ "scripts/build_component_files.py|scripts.build_component_files._changed_files_for": {
+ "file_path": "scripts/build_component_files.py",
+ "qualified_name": "scripts.build_component_files._changed_files_for",
+ "start_line": 88,
+ "end_line": 101,
+ "type": "FUNCTION"
+ },
+ "scripts/build_component_files.py|scripts.build_component_files._block": {
+ "file_path": "scripts/build_component_files.py",
+ "qualified_name": "scripts.build_component_files._block",
+ "start_line": 104,
+ "end_line": 121,
+ "type": "FUNCTION"
+ },
+ "scripts/build_component_files.py|scripts.build_component_files.render_component_files": {
+ "file_path": "scripts/build_component_files.py",
+ "qualified_name": "scripts.build_component_files.render_component_files",
+ "start_line": 124,
+ "end_line": 175,
+ "type": "FUNCTION"
+ },
+ "scripts/build_component_files.py|scripts.build_component_files.main": {
+ "file_path": "scripts/build_component_files.py",
+ "qualified_name": "scripts.build_component_files.main",
+ "start_line": 178,
+ "end_line": 213,
+ "type": "FUNCTION"
+ },
"scripts/diff_to_mermaid.py|scripts.diff_to_mermaid.load_analysis": {
"file_path": "scripts/diff_to_mermaid.py",
"qualified_name": "scripts.diff_to_mermaid.load_analysis",
@@ -269,13 +329,6 @@
"end_line": 298,
"type": "METHOD"
},
- "scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._Scope.resolve": {
- "file_path": "scripts/diff_to_mermaid.py",
- "qualified_name": "scripts.diff_to_mermaid._Scope.resolve",
- "start_line": 300,
- "end_line": 309,
- "type": "METHOD"
- },
"scripts/diff_to_mermaid.py|scripts.diff_to_mermaid._filter_changed": {
"file_path": "scripts/diff_to_mermaid.py",
"qualified_name": "scripts.diff_to_mermaid._filter_changed",
@@ -349,36 +402,43 @@
"scripts/build_cta.py|scripts.build_cta.detect_editors": {
"file_path": "scripts/build_cta.py",
"qualified_name": "scripts.build_cta.detect_editors",
- "start_line": 35,
- "end_line": 47,
+ "start_line": 36,
+ "end_line": 48,
+ "type": "FUNCTION"
+ },
+ "scripts/build_cta.py|scripts.build_cta.build_webview_link": {
+ "file_path": "scripts/build_cta.py",
+ "qualified_name": "scripts.build_cta.build_webview_link",
+ "start_line": 62,
+ "end_line": 77,
"type": "FUNCTION"
},
"scripts/build_cta.py|scripts.build_cta.build_cta": {
"file_path": "scripts/build_cta.py",
"qualified_name": "scripts.build_cta.build_cta",
- "start_line": 61,
- "end_line": 96,
+ "start_line": 80,
+ "end_line": 137,
"type": "FUNCTION"
},
"scripts/build_cta.py|scripts.build_cta.build_cta.link": {
"file_path": "scripts/build_cta.py",
"qualified_name": "scripts.build_cta.build_cta.link",
- "start_line": 79,
- "end_line": 80,
+ "start_line": 120,
+ "end_line": 121,
"type": "FUNCTION"
},
"scripts/build_cta.py|scripts.build_cta.main": {
"file_path": "scripts/build_cta.py",
"qualified_name": "scripts.build_cta.main",
- "start_line": 99,
- "end_line": 114,
+ "start_line": 140,
+ "end_line": 176,
"type": "FUNCTION"
}
},
"components": [
{
"name": "Analysis Orchestrator",
- "description": "Manages the execution lifecycle of the GitHub Action, including environment setup, triggering analysis, and data validation.",
+ "description": "The central controller that manages the GitHub Action lifecycle, coordinating environment setup and triggering static analysis for base and head branches.",
"key_entities": [
{
"qualified_name": "scripts.cb_engine.main",
@@ -399,10 +459,10 @@
"reference_end_line": 153
},
{
- "qualified_name": "scripts.cb_engine.validate_base_analysis",
+ "qualified_name": "scripts.cb_engine.run_health",
"reference_file": "scripts/cb_engine.py",
- "reference_start_line": 45,
- "reference_end_line": 76
+ "reference_start_line": 183,
+ "reference_end_line": 212
}
],
"source_cluster_ids": [
@@ -429,15 +489,9 @@
"can_expand": true
},
{
- "name": "Visual Diff Generator",
- "description": "The core logic engine that compares structural metadata between codebase versions and renders differences into Mermaid.js graph syntax.",
+ "name": "Visual Diff Engine",
+ "description": "Responsible for calculating differences between base and head states, generating Mermaid.js visualizations, and mapping code-level changes to architectural components. This cluster (scripts.build_component_files.py) handles subtree walking, identifying changed files, and mapping methods to components via _subtree_methods and render_component_files. It interacts with the Analysis Orchestrator and Engagement Layer by providing the structured diff data required for visualization.",
"key_entities": [
- {
- "qualified_name": "scripts.diff_to_mermaid.main",
- "reference_file": "scripts/diff_to_mermaid.py",
- "reference_start_line": 527,
- "reference_end_line": 568
- },
{
"qualified_name": "scripts.diff_to_mermaid.build_diff",
"reference_file": "scripts/diff_to_mermaid.py",
@@ -455,15 +509,34 @@
"reference_file": "scripts/diff_to_mermaid.py",
"reference_start_line": 162,
"reference_end_line": 207
+ },
+ {
+ "qualified_name": "scripts.diff_to_mermaid.load_analysis",
+ "reference_file": "scripts/diff_to_mermaid.py",
+ "reference_start_line": 50,
+ "reference_end_line": 54
}
],
"source_cluster_ids": [
2,
3,
4,
- 5
+ 5,
+ 7
],
"file_methods": [
+ {
+ "file_path": "scripts/build_component_files.py",
+ "methods": [
+ "scripts.build_component_files._walk",
+ "scripts.build_component_files._subtree_files",
+ "scripts.build_component_files._subtree_methods",
+ "scripts.build_component_files._changed_files_for",
+ "scripts.build_component_files._block",
+ "scripts.build_component_files.render_component_files",
+ "scripts.build_component_files.main"
+ ]
+ },
{
"file_path": "scripts/diff_to_mermaid.py",
"methods": [
@@ -485,7 +558,6 @@
"scripts.diff_to_mermaid._display_status",
"scripts.diff_to_mermaid._Scope",
"scripts.diff_to_mermaid._Scope.__init__",
- "scripts.diff_to_mermaid._Scope.resolve",
"scripts.diff_to_mermaid._filter_changed",
"scripts.diff_to_mermaid._filter_changed.touches",
"scripts.diff_to_mermaid._init_directive",
@@ -503,26 +575,26 @@
"can_expand": true
},
{
- "name": "UX & Integration Helper",
- "description": "Manages the generation of user-facing outputs, including GitHub comments, status checks, and interactive links (CTAs) for external viewing. This component integrates the new 'build_cta.py' logic to generate interactive webview links and editor-specific deep links, expanding its role from simple reporting to providing actionable CI/CD report navigation. It receives processed data from the Visual Diff Generator to finalize the GitHub Action output.",
+ "name": "Engagement & Integration Layer",
+ "description": "Generates user-facing metadata, detects developer environments, and creates CTA links to bridge GitHub PR comments with the interactive CodeBoarding platform.",
"key_entities": [
{
- "qualified_name": "scripts.build_cta.main",
+ "qualified_name": "scripts.build_cta.build_cta",
"reference_file": "scripts/build_cta.py",
- "reference_start_line": 99,
- "reference_end_line": 114
+ "reference_start_line": 80,
+ "reference_end_line": 137
},
{
- "qualified_name": "scripts.build_cta.build_cta",
+ "qualified_name": "scripts.build_cta.build_webview_link",
"reference_file": "scripts/build_cta.py",
- "reference_start_line": 61,
- "reference_end_line": 96
+ "reference_start_line": 62,
+ "reference_end_line": 77
},
{
"qualified_name": "scripts.build_cta.detect_editors",
"reference_file": "scripts/build_cta.py",
- "reference_start_line": 35,
- "reference_end_line": 47
+ "reference_start_line": 36,
+ "reference_end_line": 48
}
],
"source_cluster_ids": [
@@ -533,6 +605,7 @@
"file_path": "scripts/build_cta.py",
"methods": [
"scripts.build_cta.detect_editors",
+ "scripts.build_cta.build_webview_link",
"scripts.build_cta.build_cta",
"scripts.build_cta.build_cta.link",
"scripts.build_cta.main"
@@ -545,29 +618,29 @@
],
"components_relations": [
{
- "relation": "Passes validated analysis artifacts to initiate structural comparison",
+ "relation": "triggers state comparison and visualization",
"src_name": "Analysis Orchestrator",
- "dst_name": "Visual Diff Generator",
+ "dst_name": "Visual Diff Engine",
"src_id": "1",
"dst_id": "2",
"edge_count": 0,
"is_static": false
},
{
- "relation": "Provides context of changed files and components for deep-link generation",
- "src_name": "Visual Diff Generator",
- "dst_name": "UX & Integration Helper",
- "src_id": "2",
+ "relation": "provides execution context for CTA generation",
+ "src_name": "Analysis Orchestrator",
+ "dst_name": "Engagement & Integration Layer",
+ "src_id": "1",
"dst_id": "3",
"edge_count": 0,
"is_static": false
},
{
- "relation": "Returns formatted markdown snippets and CTA links for final output",
- "src_name": "UX & Integration Helper",
- "dst_name": "Analysis Orchestrator",
- "src_id": "3",
- "dst_id": "1",
+ "relation": "provides structured diff data for reporting",
+ "src_name": "Visual Diff Engine",
+ "dst_name": "Engagement & Integration Layer",
+ "src_id": "2",
+ "dst_id": "3",
"edge_count": 0,
"is_static": false
}
diff --git a/.codeboarding/health/health_report.json b/.codeboarding/health/health_report.json
index 5dc4c6e..d8a0138 100644
--- a/.codeboarding/health/health_report.json
+++ b/.codeboarding/health/health_report.json
@@ -1,13 +1,13 @@
{
"repository_name": "CodeBoarding-action",
- "timestamp": "2026-06-11T16:41:11.294364+00:00",
- "overall_score": 0.9996183206106869,
+ "timestamp": "2026-06-12T12:22:22.416630+00:00",
+ "overall_score": 0.9996710526315788,
"check_summaries": [
{
"check_name": "function_size",
"description": "Checks that functions/methods do not exceed line count thresholds",
"check_type": "standard",
- "total_entities_checked": 43,
+ "total_entities_checked": 50,
"findings_count": 0,
"warning_count": 0,
"score": 1.0,
@@ -17,7 +17,7 @@
"check_name": "fan_out",
"description": "Checks efferent coupling: how many other functions each function calls",
"check_type": "standard",
- "total_entities_checked": 43,
+ "total_entities_checked": 50,
"findings_count": 0,
"warning_count": 0,
"score": 1.0,
@@ -27,7 +27,7 @@
"check_name": "fan_in",
"description": "Checks afferent coupling: how many other functions call each function",
"check_type": "standard",
- "total_entities_checked": 43,
+ "total_entities_checked": 50,
"findings_count": 0,
"warning_count": 0,
"score": 1.0,
diff --git a/action.yml b/action.yml
index e9b24b8..2e72261 100644
--- a/action.yml
+++ b/action.yml
@@ -706,6 +706,8 @@ runs:
ACTION_PATH: ${{ github.action_path }}
TARGET_REPO: ${{ github.workspace }}/target-repo
DIAGRAM_MD: ${{ steps.diagram.outputs.diagram_md }}
+ BASE_ANALYSIS: ${{ steps.analyze.outputs.base_analysis }}
+ HEAD_ANALYSIS: ${{ steps.analyze.outputs.head_analysis }}
RUN_ID: ${{ github.run_id }}
N: ${{ steps.diagram.outputs.n_changed }}
CHANGED: ${{ steps.diagram.outputs.changed }}
@@ -744,6 +746,26 @@ runs:
--repo-path "$TARGET_REPO" --issues "${ISSUES:-0}" "${extra[@]}"
}
+ # Per-component changed-file dropdowns: which files made each node change
+ # color. The git listing is the PR's own changes (three-dot merge-base..head,
+ # the same set as the Files-changed tab; --no-renames so a moved file's old
+ # path still appears for the donor component). Node colors compare against
+ # the base *tip*, so a node colored only by base-branch churn on a stale PR
+ # gets no dropdown. Best-effort: if the git diff fails (e.g. merge-base
+ # unreachable on a shallow fork fetch) the script falls back to
+ # analysis-derived changes, and if the script fails the section is omitted —
+ # the comment always posts.
+ FILES_MD="${RUNNER_TEMP}/component_files.md"
+ : > "$FILES_MD"
+ if [ -n "$BASE_ANALYSIS" ] && [ -n "$HEAD_ANALYSIS" ]; then
+ CHANGED_LIST="${RUNNER_TEMP}/changed_files.txt"
+ files_args=(--base "$BASE_ANALYSIS" --head "$HEAD_ANALYSIS" --out "$FILES_MD")
+ if git -C "$TARGET_REPO" -c core.quotepath=off diff --no-renames --name-only "$BASE_SHA...HEAD" > "$CHANGED_LIST" 2>/dev/null; then
+ files_args+=(--changed-files "$CHANGED_LIST")
+ fi
+ python3 "$ACTION_PATH/scripts/build_component_files.py" "${files_args[@]}" || : > "$FILES_MD"
+ fi
+
{
echo "### ${HEADER} · $(headline)"
echo ""
@@ -761,6 +783,10 @@ runs:
else
echo "No architectural changes detected versus \`${BASE_REF}\`."
fi
+ if [ -s "$FILES_MD" ]; then
+ echo ""
+ cat "$FILES_MD"
+ fi
cta
echo ""
echo "codeboarding-action · run ${RUN_ID}"
diff --git a/scripts/build_component_files.py b/scripts/build_component_files.py
new file mode 100644
index 0000000..0ef64d5
--- /dev/null
+++ b/scripts/build_component_files.py
@@ -0,0 +1,217 @@
+"""Render per-component changed-file dropdowns for the sticky PR comment.
+
+Takes the same base/head ``analysis.json`` pair as ``diff_to_mermaid.py`` (the
+diff logic is imported from there, so the dropdowns and the diagram always
+agree on what changed) and emits one collapsed ```` block per changed
+top-level component, listing the files that made it change color — the question
+the colored diagram raises but can't answer.
+
+Which files count as "changed" for a component:
+
+ * With ``--changed-files`` (a ``git diff --no-renames --name-only`` listing of
+ the PR's own changes, merge-base..head — the same set as the Files-changed
+ tab): the intersection of the component subtree's file paths with that
+ listing. A component can own 40 files while the PR touched 2 — listing all
+ 40 would answer the wrong question. ``--no-renames`` matters: with rename
+ detection on, a moved file's old path never appears in ``--name-only`` and
+ the donor component's dropdown would silently vanish. Node colors compare
+ head against the base *tip*, so a node colored only by base-branch churn
+ on a stale PR intentionally gets no dropdown.
+ * Without it: the analysis-derived change set — files added to / removed
+ from the component plus files whose method set changed. This misses
+ body-only edits (the analysis can't see them), so the git listing is
+ preferred.
+
+Component file paths come from ``file_methods[].file_path`` plus
+``key_entities[].reference_file`` — some engine outputs (observed on
+TypeScript repos) carry file linkage only in ``key_entities``.
+
+Names and paths are emitted as HTML (````/````, escaped) rather than
+markdown spans so arbitrary repo content can't break the comment markup.
+Per-component and total size caps keep the block a small fraction of GitHub's
+65,536-char comment limit (the Mermaid diagram alone may use ~45k).
+
+Self-contained stdlib; imports the diff from its sibling diff_to_mermaid.py.
+"""
+
+from __future__ import annotations
+
+import argparse
+import html
+import json
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+import diff_to_mermaid as dm # noqa: E402
+
+# Budget: ~45k diagram + this block + ~1.5k header/CTA/footer must stay under
+# GitHub's 65,536-char comment cap; overflow drops dropdowns, never the diagram.
+MAX_TEXT = 10_000
+MAX_FILES_PER_COMPONENT = 15
+
+_NOUN = {"added": "added", "modified": "changed", "deleted": "removed"}
+
+
+def _walk(comp: dict, skip_deleted: bool = False):
+ """Yield ``comp`` and its whole subtree; optionally prune deleted ghosts."""
+ if skip_deleted and comp.get("diff_status") == "deleted":
+ return
+ yield comp
+ for sub in comp.get("components") or []:
+ yield from _walk(sub, skip_deleted)
+
+
+def _subtree_files(comp: dict | None, skip_deleted: bool = False) -> set:
+ if comp is None:
+ return set()
+ files: set = set()
+ for c in _walk(comp, skip_deleted):
+ files.update(fm.get("file_path") or "" for fm in c.get("file_methods") or [])
+ # Some engine outputs carry file linkage only in key_entities.
+ files.update(ke.get("reference_file") or "" for ke in c.get("key_entities") or [])
+ files.discard("")
+ return files
+
+
+def _subtree_methods(comp: dict | None, skip_deleted: bool = False) -> dict:
+ merged: dict = {}
+ if comp is None:
+ return merged
+ for c in _walk(comp, skip_deleted):
+ for fp, names in dm._methods_by_file(c).items():
+ merged.setdefault(fp, set()).update(names)
+ merged.pop("", None) # entries lacking file_path must not become a phantom file
+ return merged
+
+
+def _changed_files_for(comp: dict, base_match: dict | None, changed_files: set | None) -> list:
+ """Files to list for one changed top-level component (see module docstring)."""
+ if changed_files is not None:
+ # Ghost subtrees inside the diff carry base-side files, so the union of
+ # both sides covers added, modified, and deleted components alike.
+ return sorted((_subtree_files(comp) | _subtree_files(base_match)) & changed_files)
+ head_files = _subtree_files(comp, skip_deleted=True)
+ base_files = _subtree_files(base_match)
+ head_methods = _subtree_methods(comp, skip_deleted=True)
+ base_methods = _subtree_methods(base_match)
+ method_changed = {
+ fp for fp in set(head_methods) | set(base_methods) if head_methods.get(fp, set()) != base_methods.get(fp, set())
+ }
+ return sorted((head_files ^ base_files) | method_changed)
+
+
+def _block(name: str, status: str, files: list, n_sub: int = 0) -> str:
+ shown = files[:MAX_FILES_PER_COMPONENT]
+ hidden = len(files) - len(shown)
+ n = len(files)
+ # Rollup parents (only nested components changed) carry the recursive count
+ # the headline and diagram use, so the dropdown explains "3 components
+ # changed" instead of contradicting it.
+ rollup = f"{n_sub} changed sub-component{'' if n_sub == 1 else 's'}, " if n_sub else ""
+ lines = [
+ "",
+ f"{html.escape(name)} : {rollup}{n} file{'' if n == 1 else 's'} {_NOUN[status]}
",
+ "", # blank line: required for GitHub to render markdown after
+ ]
+ lines += [f"- {html.escape(fp)}" for fp in shown]
+ if hidden:
+ lines.append(f"- …and {hidden} more")
+ lines += ["", " "]
+ return "\n".join(lines)
+
+
+def render_component_files(
+ diff: dict,
+ base: dict,
+ changed_files: set | None = None,
+ max_chars: int = MAX_TEXT,
+) -> tuple:
+ """Return (markdown_text, meta). ``markdown_text`` is "" when there's nothing to list."""
+ base_by_name = {dm._comp_name(c): c for c in base.get("components") or []}
+ blocks: list = [] # (block_text, n_files_listed)
+ truncated = False
+ for comp in diff.get("components") or []:
+ status = dm._display_status(comp)
+ if status not in dm.CHANGED:
+ continue
+ # A deleted ghost IS its base component (the diff builds it from base),
+ # so use it directly; the name lookup would misattribute files when two
+ # top-level components share a name.
+ base_match = comp if comp.get("diff_status") == "deleted" else base_by_name.get(dm._comp_name(comp))
+ files = _changed_files_for(comp, base_match, changed_files)
+ if not files:
+ continue # e.g. relation-only change, base-branch churn, or a reorg the PR didn't touch
+ truncated = truncated or len(files) > MAX_FILES_PER_COMPONENT
+ n_sub = (
+ dm._count_changed_components(comp.get("components") or [])
+ if comp.get("diff_status") not in dm.CHANGED
+ else 0
+ )
+ blocks.append(
+ (
+ _block(dm._comp_name(comp) or dm._comp_id(comp) or "(unnamed)", status, files, n_sub),
+ min(len(files), MAX_FILES_PER_COMPONENT),
+ )
+ )
+
+ rendered: list = []
+ size = 0
+ n_components = n_files = 0
+ for i, (block, n_listed) in enumerate(blocks):
+ if size + len(block) > max_chars:
+ truncated = True
+ if rendered: # never emit a dangling "…and N more" with no blocks above it
+ left = len(blocks) - i
+ rendered.append(f"…and {left} more changed component{'' if left == 1 else 's'}")
+ break
+ rendered.append(block)
+ size += len(block) + 1
+ n_components += 1
+ n_files += n_listed
+
+ text = "\n".join(rendered)
+ meta = {"rendered": bool(text), "n_components": n_components, "n_files": n_files, "truncated": truncated}
+ return text, meta
+
+
+def main() -> int:
+ p = argparse.ArgumentParser(description=__doc__)
+ p.add_argument("--base", required=True, type=Path, help="Path to the base (before) analysis.json")
+ p.add_argument("--head", required=True, type=Path, help="Path to the head (after) analysis.json")
+ p.add_argument("--out", required=True, type=Path, help="Where to write the markdown block")
+ p.add_argument(
+ "--changed-files",
+ type=Path,
+ default=None,
+ help="File with the PR's changed paths, one per line (git diff --no-renames "
+ "--name-only merge-base..head). Omit to fall back to analysis-derived changes.",
+ )
+ args = p.parse_args()
+
+ changed: set | None = None
+ if args.changed_files is not None:
+ try:
+ # surrogateescape: core.quotepath=off emits raw filename bytes; a
+ # non-UTF-8 path must not kill the whole section (it simply won't
+ # intersect with the analysis's UTF-8 paths).
+ raw = args.changed_files.read_text(encoding="utf-8", errors="surrogateescape")
+ except OSError as exc:
+ sys.exit(f"::error::Could not read changed-files list at {args.changed_files}: {exc}")
+ changed = {line.strip() for line in raw.splitlines() if line.strip()}
+
+ base = dm.load_analysis(args.base)
+ head = dm.load_analysis(args.head)
+ text, meta = render_component_files(dm.build_diff(base, head), base, changed)
+
+ # Trailing newline so the following CTA's "---" isn't absorbed into the
+ # HTML block; conditional so an empty result stays 0 bytes for
+ # the action's [ -s ] gate.
+ args.out.write_text(text + "\n" if text else "", encoding="utf-8")
+ # Machine-readable summary on stdout for the action to consume.
+ print(json.dumps(meta))
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/tests/test_build_component_files.py b/tests/test_build_component_files.py
new file mode 100644
index 0000000..9dc7307
--- /dev/null
+++ b/tests/test_build_component_files.py
@@ -0,0 +1,301 @@
+"""Unit tests for scripts/build_component_files.py — per-component changed-file dropdowns."""
+
+import json
+import re
+import subprocess
+import sys
+import tempfile
+import unittest
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts"))
+import build_component_files as bcf # noqa: E402
+import diff_to_mermaid as dm # noqa: E402
+
+SCRIPT = Path(__file__).resolve().parent.parent / "scripts" / "build_component_files.py"
+
+
+def comp(name, files=None, subs=None, key_files=None):
+ c = {
+ "name": name,
+ "component_id": name,
+ "file_methods": [{"file_path": f, "methods": m} for f, m in (files or {}).items()],
+ }
+ if key_files is not None:
+ c["key_entities"] = [{"reference_file": f} for f in key_files]
+ if subs is not None:
+ c["components"] = subs
+ return c
+
+
+def render(base, head, changed_files=None, max_chars=bcf.MAX_TEXT):
+ diff = dm.build_diff(base, head)
+ return bcf.render_component_files(diff, base, changed_files, max_chars)
+
+
+class TestGitIntersection(unittest.TestCase):
+ def test_modified_component_lists_only_touched_files(self):
+ base = {"components": [comp("Auth", {"a.py": ["f"], "b.py": ["g"], "c.py": ["h"]})]}
+ head = {"components": [comp("Auth", {"a.py": ["f", "f2"], "b.py": ["g"], "c.py": ["h"]})]}
+ text, meta = render(base, head, changed_files={"a.py", "unrelated.py"})
+ self.assertIn("a.py", text)
+ self.assertNotIn("b.py", text) # owned but untouched
+ self.assertNotIn("unrelated.py", text) # touched but not owned
+ self.assertIn("Auth : 1 file changed", text)
+ self.assertEqual(meta["n_components"], 1)
+ self.assertEqual(meta["n_files"], 1)
+
+ def test_added_component_wording(self):
+ base = {"components": []}
+ head = {"components": [comp("RateLimiter", {"rl/bucket.py": ["acquire"], "rl/config.py": ["load"]})]}
+ text, _ = render(base, head, changed_files={"rl/bucket.py", "rl/config.py"})
+ self.assertIn("RateLimiter : 2 files added", text)
+
+ def test_deleted_component_lists_base_files(self):
+ base = {"components": [comp("Legacy", {"legacy/store.py": ["get"], "legacy/migrations.py": ["mig"]})]}
+ head = {"components": []}
+ text, _ = render(base, head, changed_files={"legacy/store.py", "legacy/migrations.py"})
+ self.assertIn("Legacy : 2 files removed", text)
+ self.assertIn("legacy/migrations.py", text)
+
+ def test_unchanged_component_emits_nothing(self):
+ base = {"components": [comp("A", {"a.py": ["f"]})]}
+ head = {"components": [comp("A", {"a.py": ["f"]})]}
+ text, meta = render(base, head, changed_files={"a.py"})
+ self.assertEqual(text, "")
+ self.assertFalse(meta["rendered"])
+
+ def test_changed_component_with_no_touched_files_is_skipped(self):
+ # Model reorg: file moved between components, but the PR's git diff is elsewhere.
+ base = {"components": [comp("A", {"a.py": ["f"], "b.py": ["g"]})]}
+ head = {"components": [comp("A", {"a.py": ["f"]})]}
+ text, _ = render(base, head, changed_files={"elsewhere.py"})
+ self.assertEqual(text, "")
+
+ def test_empty_changed_files_set_means_no_dropdowns_not_fallback(self):
+ # Empty git diff (net-zero PR / re-run): an empty set must NOT fall
+ # back to analysis-derived changes — only None (flag omitted) does.
+ base = {"components": [comp("A", {"a.py": ["f"]})]}
+ head = {"components": [comp("A", {"a.py": ["f", "g"]})]}
+ text, meta = render(base, head, changed_files=set())
+ self.assertEqual(text, "")
+ self.assertFalse(meta["rendered"])
+
+ def test_nested_subtree_files_aggregate_to_top_level(self):
+ base = {"components": [comp("Parent", {}, subs=[comp("Child", {"deep/x.py": ["f"]})])]}
+ head = {"components": [comp("Parent", {}, subs=[comp("Child", {"deep/x.py": ["f", "g"]})])]}
+ text, _ = render(base, head, changed_files={"deep/x.py"})
+ self.assertIn("Parent", text)
+ self.assertIn("deep/x.py", text)
+ self.assertNotIn("Child", text) # one dropdown per top-level component
+
+ def test_rollup_parent_labels_changed_subcomponents(self):
+ # Parent unchanged itself (display_status rollup): the summary carries the
+ # recursive count the headline/diagram use, so counts don't contradict.
+ base = {"components": [comp("Parent", {"p.py": ["f"]}, subs=[comp("Child", {"c.py": ["g"]})])]}
+ head = {"components": [comp("Parent", {"p.py": ["f"]}, subs=[comp("Child", {"c.py": ["g", "g2"]})])]}
+ text, _ = render(base, head, changed_files={"c.py"})
+ self.assertIn("Parent : 1 changed sub-component, 1 file changed", text)
+
+ def test_deleted_nested_child_files_list_under_modified_parent(self):
+ base = {"components": [comp("Parent", {"p.py": ["f"]}, subs=[comp("Child", {"child/x.py": ["g"]})])]}
+ head = {"components": [comp("Parent", {"p.py": ["f"]}, subs=[])]}
+ text, _ = render(base, head, changed_files={"child/x.py"})
+ self.assertIn("Parent", text)
+ self.assertIn("child/x.py", text)
+
+ def test_key_entities_only_shape(self):
+ # Some engine outputs have file_methods: [] everywhere and carry file
+ # linkage only in key_entities[].reference_file (observed on TS repos).
+ base = {"components": []}
+ head = {"components": [comp("Webview", key_files=["src/panel.ts", "src/render.ts"])]}
+ text, _ = render(base, head, changed_files={"src/panel.ts", "src/render.ts"})
+ self.assertIn("Webview : 2 files added", text)
+ self.assertIn("src/panel.ts", text)
+
+ def test_duplicate_deleted_names_attribute_files_to_own_block(self):
+ base = {"components": [comp("Dup", {"one.py": ["f"]}), comp("Dup", {"two.py": ["g"]})]}
+ head = {"components": []}
+ for changed in (None, {"one.py", "two.py"}):
+ text, _ = render(base, head, changed_files=changed)
+ self.assertEqual(text.count("one.py"), 1, text)
+ self.assertEqual(text.count("two.py"), 1, text)
+
+
+class TestAnalysisFallback(unittest.TestCase):
+ def test_fallback_lists_structural_and_method_changes(self):
+ base = {"components": [comp("A", {"kept.py": ["f"], "gone.py": ["g"], "same.py": ["h"]})]}
+ head = {"components": [comp("A", {"kept.py": ["f", "f2"], "new.py": ["n"], "same.py": ["h"]})]}
+ text, _ = render(base, head, changed_files=None)
+ self.assertIn("kept.py", text) # method set changed
+ self.assertIn("gone.py", text) # removed from component
+ self.assertIn("new.py", text) # added to component
+ self.assertNotIn("same.py", text)
+
+ def test_fallback_deleted_component_lists_all_base_files(self):
+ base = {"components": [comp("Legacy", {"l/a.py": ["f"], "l/b.py": ["g"]})]}
+ head = {"components": []}
+ text, _ = render(base, head, changed_files=None)
+ self.assertIn("Legacy : 2 files removed", text)
+
+ def test_missing_file_path_entry_emits_no_phantom(self):
+ base = {"components": [comp("A", {"a.py": ["f"]})]}
+ head = {"components": [comp("A", {"a.py": ["f"]})]}
+ head["components"][0]["file_methods"].append({"methods": ["orphan"]}) # no file_path
+ text, _ = render(base, head, changed_files=None)
+ self.assertNotIn("", text)
+
+
+class TestOrdering(unittest.TestCase):
+ def test_file_lists_are_sorted(self):
+ files = {f: ["m"] for f in ["e.py", "b.py", "f.py", "a.py", "d.py", "c.py"]}
+ base = {"components": []}
+ head = {"components": [comp("A", files)]}
+ text, _ = render(base, head, changed_files=set(files))
+ paths = re.findall(r"([^<]+)", text)
+ self.assertEqual(paths, ["a.py", "b.py", "c.py", "d.py", "e.py", "f.py"])
+
+ def test_blocks_follow_diagram_order_deleted_ghosts_last(self):
+ # Head order first (matches Mermaid node emission), deleted ghosts appended
+ # last — NOT alphabetical: Alpha is deleted and must render after Zeta.
+ base = {"components": [comp("Zeta", {"z.py": ["f"]}), comp("Alpha", {"a.py": ["g"]})]}
+ head = {"components": [comp("Zeta", {"z.py": ["f", "f2"]})]}
+ text, _ = render(base, head, changed_files={"z.py", "a.py"})
+ self.assertEqual(re.findall(r"(\w+)", text), ["Zeta", "Alpha"])
+
+
+class TestCapsAndEscaping(unittest.TestCase):
+ def test_per_component_file_cap(self):
+ files = {f"src/f{i:02}.py": ["m"] for i in range(20)}
+ base = {"components": []}
+ head = {"components": [comp("Big", files)]}
+ text, meta = render(base, head, changed_files=set(files))
+ self.assertEqual(text.count(""), bcf.MAX_FILES_PER_COMPONENT)
+ self.assertIn("…and 5 more", text)
+ self.assertIn(": 20 files added", text) # count reflects reality, list is capped
+ self.assertTrue(meta["truncated"])
+
+ def test_total_char_budget_drops_whole_components(self):
+ base = {"components": []}
+ head = {"components": [comp(f"C{i}", {f"c{i}/f.py": ["m"]}) for i in range(10)]}
+ text, meta = render(base, head, changed_files={f"c{i}/f.py" for i in range(10)}, max_chars=300)
+ self.assertIn("more changed components", text)
+ self.assertTrue(meta["truncated"])
+ # meta counts what actually rendered, not what the budget dropped
+ self.assertEqual(meta["n_files"], text.count(""))
+ self.assertEqual(meta["n_components"], text.count(""))
+
+ def test_first_block_exceeding_budget_renders_nothing(self):
+ # Never a dangling "…and N more" with no blocks above it.
+ base = {"components": []}
+ head = {"components": [comp("Big", {f"very/long/path/file{i}.py": ["m"] for i in range(15)})]}
+ text, meta = render(base, head, changed_files={f"very/long/path/file{i}.py" for i in range(15)}, max_chars=100)
+ self.assertEqual(text, "")
+ self.assertFalse(meta["rendered"])
+ self.assertEqual(meta["n_files"], 0)
+ self.assertTrue(meta["truncated"])
+
+ def test_html_escaping_of_names_and_paths(self):
+ base = {"components": []}
+ head = {"components": [comp("A <& B", {"weird/&.py": ["m"]})]}
+ text, _ = render(base, head, changed_files={"weird/&.py"})
+ self.assertIn("A <& B", text)
+ self.assertIn("weird/<path>&.py", text)
+ self.assertNotIn("", text)
+
+ def test_blank_line_after_summary(self):
+ # GitHub only renders markdown inside after a blank line.
+ base = {"components": []}
+ head = {"components": [comp("A", {"a.py": ["m"]})]}
+ text, _ = render(base, head, changed_files={"a.py"})
+ self.assertIn("\n\n-", text)
+ self.assertIn("\n\n ", text)
+
+
+class TestCLI(unittest.TestCase):
+ def _analyses(self, d):
+ (d / "base.json").write_text(json.dumps({"components": [comp("Auth", {"a.py": ["f"], "b.py": ["g"]})]}))
+ (d / "head.json").write_text(json.dumps({"components": [comp("Auth", {"a.py": ["f", "f2"], "b.py": ["g"]})]}))
+
+ def _run(self, d, *extra):
+ out = d / "out.md"
+ args = [
+ sys.executable,
+ str(SCRIPT),
+ "--base",
+ str(d / "base.json"),
+ "--head",
+ str(d / "head.json"),
+ "--out",
+ str(out),
+ ]
+ return out, subprocess.run([*args, *extra], capture_output=True, text=True)
+
+ def test_main_writes_out_file_and_prints_meta(self):
+ with tempfile.TemporaryDirectory() as tmp:
+ d = Path(tmp)
+ self._analyses(d)
+ (d / "changed.txt").write_text("a.py\nunrelated.py\n")
+ out, r = self._run(d, "--changed-files", str(d / "changed.txt"))
+ self.assertEqual(r.returncode, 0, r.stderr)
+ content = out.read_text(encoding="utf-8")
+ self.assertIn("a.py", content)
+ self.assertTrue(content.endswith(" \n")) # trailing newline: see main()
+ meta = json.loads(r.stdout)
+ self.assertEqual(set(meta), {"rendered", "n_components", "n_files", "truncated"})
+ self.assertTrue(meta["rendered"])
+
+ def test_non_utf8_changed_files_does_not_crash(self):
+ # core.quotepath=off emits raw filename bytes; a non-UTF-8 path must not
+ # kill the section — it just can't intersect with the analysis's paths.
+ with tempfile.TemporaryDirectory() as tmp:
+ d = Path(tmp)
+ self._analyses(d)
+ (d / "changed.txt").write_bytes(b"r\xe9sum\xe9.py\na.py\n")
+ out, r = self._run(d, "--changed-files", str(d / "changed.txt"))
+ self.assertEqual(r.returncode, 0, r.stderr)
+ self.assertIn("a.py", out.read_text(encoding="utf-8"))
+
+ def test_empty_result_writes_zero_bytes(self):
+ # The action gates the section on [ -s "$FILES_MD" ].
+ with tempfile.TemporaryDirectory() as tmp:
+ d = Path(tmp)
+ self._analyses(d)
+ (d / "changed.txt").write_text("elsewhere.py\n")
+ out, r = self._run(d, "--changed-files", str(d / "changed.txt"))
+ self.assertEqual(r.returncode, 0, r.stderr)
+ self.assertEqual(out.read_bytes(), b"")
+
+
+class TestEngineGitPathContract(unittest.TestCase):
+ """file_methods[].file_path must be repo-relative forward-slash paths identical
+ to git --name-only output; pinned against the committed engine artifact (the
+ dogfood workflows regenerate it on engine bumps, so format drift fails here)."""
+
+ def test_committed_analysis_paths_are_git_name_only_format(self):
+ root = Path(__file__).resolve().parent.parent
+ analysis = json.loads((root / ".codeboarding" / "analysis.json").read_text())
+ paths = set()
+
+ def collect(c):
+ for fm in c.get("file_methods") or []:
+ paths.add(fm["file_path"])
+ for ke in c.get("key_entities") or []:
+ if ke.get("reference_file"):
+ paths.add(ke["reference_file"])
+ for s in c.get("components") or []:
+ collect(s)
+
+ for c in analysis.get("components") or []:
+ collect(c)
+ self.assertTrue(paths, "committed analysis.json has no file paths")
+ tracked = set(
+ subprocess.run(
+ ["git", "-C", str(root), "ls-files"], capture_output=True, text=True, check=True
+ ).stdout.splitlines()
+ )
+ self.assertLessEqual(paths, tracked, f"paths not in git --name-only format: {sorted(paths - tracked)[:5]}")
+
+
+if __name__ == "__main__":
+ unittest.main()