From 4b8ad0491d95293d6aa52af5dc6f45089d3f0ecb Mon Sep 17 00:00:00 2001
From: snowfox1003 <snowfox1003@gmail.com>
Date: Fri, 10 Apr 2026 16:57:29 -0400
Subject: [PATCH] feat(boost_usage): resolve missing headers via catalog lookup
 and batch prepass - #138

---
 boost_usage_tracker/admin.py                  |  45 ++++-
 .../commands/run_boost_usage_tracker.py       |   7 +
 boost_usage_tracker/post_process.py           |  48 ++---
 boost_usage_tracker/services.py               | 168 ++++++++++++++++-
 boost_usage_tracker/tests/test_services.py    | 177 ++++++++++++++++++
 docs/service_api/boost_usage_tracker.md       |  32 ++++
 github_activity_tracker/admin.py              |  74 +++++++-
 github_activity_tracker/models.py             |  13 ++
 8 files changed, 518 insertions(+), 46 deletions(-)

diff --git a/boost_usage_tracker/admin.py b/boost_usage_tracker/admin.py
index 48650d39..20bcfb92 100644
--- a/boost_usage_tracker/admin.py
+++ b/boost_usage_tracker/admin.py
@@ -1,6 +1,8 @@
 from django.contrib import admin
 from django.contrib.admin import ModelAdmin
 
+from boost_usage_tracker import services as boost_usage_services
+
 from .models import BoostExternalRepository, BoostMissingHeaderTmp, BoostUsage
 
 
@@ -41,7 +43,46 @@ class BoostUsageAdmin(ModelAdmin):
 
 @admin.register(BoostMissingHeaderTmp)
 class BoostMissingHeaderTmpAdmin(ModelAdmin):
-    list_display = ("id", "usage", "header_name", "created_at")
+    list_display = (
+        "id",
+        "header_name",
+        "usage_repo",
+        "usage_file_path",
+        "usage",
+        "created_at",
+    )
     list_filter = ("created_at",)
-    search_fields = ("header_name",)
+    search_fields = (
+        "header_name",
+        "usage__repo__repo_name",
+        "usage__file_path__filename",
+    )
     raw_id_fields = ("usage",)
+    list_select_related = ("usage__repo__owner_account", "usage__file_path")
+    actions = ("resolve_selected_if_in_catalog",)
+
+    @admin.display(description="External repo", ordering="usage__repo__repo_name")
+    def usage_repo(self, obj):
+        r = obj.usage.repo
+        return r.full_name if r else "—"
+
+    @admin.display(description="File path", ordering="usage__file_path__filename")
+    def usage_file_path(self, obj):
+        fp = obj.usage.file_path
+        return fp.filename if fp else "—"
+
+    @admin.action(
+        description="Resolve selected (if header exists in BoostFile catalog)"
+    )
+    def resolve_selected_if_in_catalog(self, request, queryset):
+        counts: dict[str, int] = {}
+        for tmp in queryset.select_related("usage__repo", "usage__file_path"):
+            outcome = boost_usage_services.resolve_missing_header_tmp_auto(tmp)
+            counts[outcome] = counts.get(outcome, 0) + 1
+        parts = [
+            f"resolved: {counts.get('resolved', 0)}",
+            f"skipped (no catalog match): {counts.get('skipped_no_match', 0)}",
+            f"skipped (ambiguous): {counts.get('skipped_ambiguous', 0)}",
+            f"errors: {counts.get('error', 0)}",
+        ]
+        self.message_user(request, "; ".join(parts))
diff --git a/boost_usage_tracker/management/commands/run_boost_usage_tracker.py b/boost_usage_tracker/management/commands/run_boost_usage_tracker.py
index 72d2993e..4265519e 100644
--- a/boost_usage_tracker/management/commands/run_boost_usage_tracker.py
+++ b/boost_usage_tracker/management/commands/run_boost_usage_tracker.py
@@ -21,6 +21,7 @@
 from django.utils.dateparse import parse_datetime
 
 from boost_usage_tracker.models import BoostExternalRepository
+from boost_usage_tracker.services import resolve_all_missing_header_tmp_batch
 from github_activity_tracker.models import GitHubRepository
 from boost_usage_tracker.boost_searcher import (
     BOOST_INCLUDE_SEARCH_BATCH_SIZE,
@@ -390,6 +391,12 @@ def _parse_ymd_or_none(value, opt_name):
 
         try:
             if not task_filter or task_filter == "monitor_content":
+                prepass_stats = resolve_all_missing_header_tmp_batch(dry_run=dry_run)
+                logger.info(
+                    "missing_header_tmp prepass (dry_run=%s): %s",
+                    dry_run,
+                    prepass_stats,
+                )
                 task_monitor_content(since, until, min_stars, dry_run)
 
             if not task_filter or task_filter == "monitor_stars":
diff --git a/boost_usage_tracker/post_process.py b/boost_usage_tracker/post_process.py
index c7a05199..d683f966 100644
--- a/boost_usage_tracker/post_process.py
+++ b/boost_usage_tracker/post_process.py
@@ -14,14 +14,16 @@
 from datetime import datetime
 from typing import TYPE_CHECKING
 
-from boost_library_tracker.models import BoostFile
 from boost_usage_tracker.boost_searcher import (
     detect_boost_version_in_repo,
     extract_boost_includes,
 )
 from boost_usage_tracker.repo_searcher import RepoSearchResult
 from boost_usage_tracker.services import (
+    boost_catalog_filename,
     bulk_create_or_update_boost_usage,
+    find_boost_file_for_header_name_detailed,
+    find_boost_files_exact_by_catalog_names,
     get_active_usages_for_repo,
     get_or_create_boost_external_repo,
     get_or_create_missing_header_usage,
@@ -36,59 +38,33 @@
 logger = logging.getLogger(__name__)
 
 
-def _resolve_boost_header(header_path: str):
-    """Resolve a Boost include path to a :class:`BoostFile` or *None*."""
-    parts = header_path.split("/")
-    for i in range(len(parts)):
-        suffix = "/".join(parts[i:])
-        boost_file = (
-            BoostFile.objects.filter(  # pylint: disable=no-member
-                github_file__filename__endswith=suffix
-            )
-            .select_related("github_file")
-            .first()
-        )  # pylint: disable=no-member
-        if boost_file:
-            return boost_file
-    return None
-
-
 def _resolve_boost_headers_bulk(header_paths: set[str]) -> dict[str, object]:
     """Resolve a set of Boost include paths to BoostFile instances in one pass.
 
     Returns a dict ``{header_path: BoostFile | None}``.  Deduplicates the
-    incoming paths and performs one bulk exact-match query first; unresolved
-    paths are then handled by suffix fallback.
+    incoming paths and performs one bulk exact-match query on
+    ``include/<header_path>`` first; unresolved paths are then handled by
+    suffix fallback.
     """
     if not header_paths:
         return {}
 
-    # Fast path: one bulk query for exact filename matches.
-    exact_rows = (
-        BoostFile.objects.filter(
-            github_file__filename__in=header_paths
-        )  # pylint: disable=no-member
-        .select_related("github_file")
-        .order_by("github_file_id")
-    )
-    by_filename: dict[str, object] = {}
-    for row in exact_rows:
-        filename = row.github_file.filename
-        if filename not in by_filename:
-            by_filename[filename] = row
+    catalog_names = {boost_catalog_filename(p) for p in header_paths}
+    exact_map = find_boost_files_exact_by_catalog_names(catalog_names)
 
     resolved: dict[str, object] = {}
     unresolved: list[str] = []
     for path in header_paths:
-        boost_file = by_filename.get(path)
+        cn = boost_catalog_filename(path)
+        boost_file = exact_map.get(cn)
         if boost_file is not None:
             resolved[path] = boost_file
         else:
             unresolved.append(path)
 
-    # Fallback for non-exact cases (still deduplicated by unique header path).
     for path in unresolved:
-        resolved[path] = _resolve_boost_header(path)
+        bf, _ = find_boost_file_for_header_name_detailed(path)
+        resolved[path] = bf
 
     return resolved
 
diff --git a/boost_usage_tracker/services.py b/boost_usage_tracker/services.py
index 936ab6e9..6b7e51b8 100644
--- a/boost_usage_tracker/services.py
+++ b/boost_usage_tracker/services.py
@@ -13,12 +13,13 @@
 
 import logging
 from datetime import date, datetime
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING, Literal, Optional
+
+from boost_library_tracker.models import BoostFile
 
 from .models import BoostExternalRepository, BoostMissingHeaderTmp, BoostUsage
 
 if TYPE_CHECKING:
-    from boost_library_tracker.models import BoostFile
     from github_activity_tracker.models import GitHubFile, GitHubRepository
 
 logger = logging.getLogger(__name__)
@@ -124,7 +125,7 @@ def update_boost_external_repo(
 
 def create_or_update_boost_usage(
     repo: BoostExternalRepository,
-    boost_header: "BoostFile",
+    boost_header: BoostFile,
     file_path: "GitHubFile",
     last_commit_date: Optional[datetime] = None,
 ) -> tuple[BoostUsage, bool]:
@@ -171,6 +172,165 @@ def get_active_usages_for_repo(
     )
 
 
+def boost_catalog_filename(header_path: str) -> str:
+    """Normalize a Boost include path to ``GitHubFile.filename`` in the Boost tree.
+
+    Catalog rows use ``include/<header_path>`` (e.g. ``include/boost/asio.hpp``).
+    """
+    if header_path.startswith("include/"):
+        return header_path
+    return f"include/{header_path}"
+
+
+def _disambiguate_boost_file_candidates(
+    candidates: list[BoostFile],
+) -> Optional[BoostFile]:
+    """Pick one :class:`~boost_library_tracker.models.BoostFile` when several match.
+
+    Rules:
+    - Exactly one non-deleted ``GitHubFile`` → return that ``BoostFile``.
+    - More than one non-deleted → ambiguous, return ``None``.
+    - None non-deleted: exactly one candidate total (even if deleted) → return it;
+      otherwise ambiguous or empty → ``None``.
+    """
+    if not candidates:
+        return None
+    active = [c for c in candidates if not c.github_file.is_deleted]
+    all_n = len(candidates)
+    if len(active) == 1:
+        return active[0]
+    if len(active) > 1:
+        return None
+    if all_n == 1:
+        return candidates[0]
+    return None
+
+
+def find_boost_files_exact_by_catalog_names(
+    catalog_names: set[str],
+) -> dict[str, Optional[BoostFile]]:
+    """Map each catalog filename to a disambiguated ``BoostFile`` (or ``None``)."""
+    if not catalog_names:
+        return {}
+    rows = list(
+        BoostFile.objects.filter(
+            github_file__filename__in=catalog_names
+        ).select_related("github_file")
+    )
+    by_filename: dict[str, list[BoostFile]] = {}
+    for row in rows:
+        by_filename.setdefault(row.github_file.filename, []).append(row)
+    return {
+        name: _disambiguate_boost_file_candidates(by_filename.get(name, []))
+        for name in catalog_names
+    }
+
+
+def find_boost_file_for_header_name_detailed(
+    header_path: str,
+) -> tuple[Optional[BoostFile], Literal["found", "not_found", "ambiguous"]]:
+    """Resolve a Boost include path to ``BoostFile`` with a status for metrics."""
+    full_path = boost_catalog_filename(header_path)
+    exact = list(
+        BoostFile.objects.filter(github_file__filename=full_path).select_related(
+            "github_file"
+        )
+    )
+    picked = _disambiguate_boost_file_candidates(exact)
+    if picked is not None:
+        return picked, "found"
+    if len(exact) > 0:
+        return None, "ambiguous"
+
+    # Do not use substring or ``endswith`` on ``full_path``: a longer path such as
+    # ``libs/asio/include/boost/asio.hpp`` is a different file than
+    # ``include/boost/asio.hpp`` and must not be treated as the same header.
+    return None, "not_found"
+
+
+def find_boost_file_for_header_name(header_path: str) -> Optional[BoostFile]:
+    """Resolve a Boost include path to a ``BoostFile`` or ``None``."""
+    bf, _ = find_boost_file_for_header_name_detailed(header_path)
+    return bf
+
+
+def delete_boost_missing_header_tmp(tmp: BoostMissingHeaderTmp) -> None:
+    """Delete a temporary missing-header row (service-layer delete)."""
+    tmp.delete()
+
+
+def maybe_delete_placeholder_boost_usage_after_tmp_removed(usage_pk: int) -> bool:
+    """If *usage* is still a null-header placeholder with no tmp rows, delete it.
+
+    Returns ``True`` if a row was deleted.
+    """
+    usage = BoostUsage.objects.filter(pk=usage_pk).first()
+    if usage is None:
+        return False
+    if usage.boost_header_id is not None:
+        return False
+    if usage.missing_header_tmp.exists():
+        return False
+    usage.delete()
+    return True
+
+
+def resolve_missing_header_tmp_auto(tmp: BoostMissingHeaderTmp) -> str:
+    """Resolve one tmp row when the header exists unambiguously in the catalog.
+
+    Creates/updates real ``BoostUsage``, deletes *tmp*, and drops the placeholder
+    usage when it has no remaining tmp rows.
+
+    Returns one of: ``resolved``, ``skipped_no_match``, ``skipped_ambiguous``,
+    ``error`` (logged on exception).
+    """
+    boost_file, status = find_boost_file_for_header_name_detailed(tmp.header_name)
+    if status == "ambiguous":
+        return "skipped_ambiguous"
+    if boost_file is None:
+        return "skipped_no_match"
+    usage_pk = tmp.usage_id
+    try:
+        usage = tmp.usage
+        create_or_update_boost_usage(
+            usage.repo,
+            boost_file,
+            usage.file_path,
+            last_commit_date=usage.last_commit_date,
+        )
+        delete_boost_missing_header_tmp(tmp)
+        maybe_delete_placeholder_boost_usage_after_tmp_removed(usage_pk)
+    except Exception:  # pylint: disable=broad-exception-caught
+        logger.exception("resolve_missing_header_tmp_auto failed for tmp_id=%s", tmp.pk)
+        return "error"
+    return "resolved"
+
+
+def resolve_all_missing_header_tmp_batch(*, dry_run: bool = False) -> dict[str, int]:
+    """Process every ``BoostMissingHeaderTmp`` row (iterator, chunk-friendly).
+
+    When *dry_run* is ``True``, no writes; counts ``would_resolve`` / ``skipped_*``.
+    """
+    from collections import Counter
+
+    counts: Counter[str] = Counter()
+    qs = BoostMissingHeaderTmp.objects.all().select_related(
+        "usage__repo", "usage__file_path"
+    )
+    for tmp in qs.iterator(chunk_size=500):
+        if dry_run:
+            _, status = find_boost_file_for_header_name_detailed(tmp.header_name)
+            if status == "found":
+                counts["would_resolve"] += 1
+            elif status == "ambiguous":
+                counts["skipped_ambiguous"] += 1
+            else:
+                counts["skipped_no_match"] += 1
+        else:
+            counts[resolve_missing_header_tmp_auto(tmp)] += 1
+    return dict(counts)
+
+
 def get_or_create_missing_header_usage(
     repo: BoostExternalRepository,
     file_path: "GitHubFile",
@@ -208,7 +368,7 @@ def get_or_create_missing_header_usage(
 
 def bulk_create_or_update_boost_usage(
     repo: BoostExternalRepository,
-    items: list[tuple["BoostFile", "GitHubFile", Optional[datetime]]],
+    items: list[tuple[BoostFile, "GitHubFile", Optional[datetime]]],
 ) -> tuple[int, int]:
     """Create or update many BoostUsage rows in bulk.
 
diff --git a/boost_usage_tracker/tests/test_services.py b/boost_usage_tracker/tests/test_services.py
index d3cef058..08f8d279 100644
--- a/boost_usage_tracker/tests/test_services.py
+++ b/boost_usage_tracker/tests/test_services.py
@@ -694,3 +694,180 @@ def test_mark_usages_excepted_bulk_sets_excepted_at(
     assert n == 1
     usage.refresh_from_db()
     assert usage.excepted_at is not None
+
+
+# --- Boost header catalog lookup + missing-header resolution ---
+
+
+@pytest.mark.django_db
+def test_find_boost_file_for_header_name_single_match(boost_file, github_file):
+    """find_boost_file_for_header_name returns BoostFile when catalog has one match."""
+    assert github_file.filename == "include/boost/algorithm.hpp"
+    got = services.find_boost_file_for_header_name("boost/algorithm.hpp")
+    assert got is not None
+    assert got.pk == boost_file.pk
+
+
+@pytest.mark.django_db
+def test_find_boost_file_for_header_name_not_found():
+    """find_boost_file_for_header_name returns None when nothing matches."""
+    assert services.find_boost_file_for_header_name("boost/does/not/exist.hpp") is None
+
+
+@pytest.mark.django_db
+def test_find_boost_file_for_header_name_does_not_match_longer_path(
+    boost_library_repository,
+    boost_library,
+):
+    """Only ``include/<header>`` is the catalog key; longer paths are not aliases."""
+    from boost_library_tracker import services as bl_services
+    from model_bakery import baker
+
+    gf = baker.make(
+        "github_activity_tracker.GitHubFile",
+        repo=boost_library_repository,
+        filename="libs/asio/include/boost/asio/stuff.hpp",
+    )
+    bl_services.get_or_create_boost_file(gf, boost_library)
+    assert services.find_boost_file_for_header_name("boost/asio/stuff.hpp") is None
+
+
+@pytest.mark.django_db
+def test_find_boost_file_for_header_name_two_active_same_filename_ambiguous():
+    """Two non-deleted files with the same path in different repos → ambiguous."""
+    import uuid
+
+    from boost_library_tracker import services as bl_services
+    from model_bakery import baker
+
+    def _one_boost_file():
+        owner = baker.make("cppa_user_tracker.GitHubAccount")
+        gh_repo = baker.make(
+            "github_activity_tracker.GitHubRepository",
+            owner_account=owner,
+            repo_name="boostdup-" + uuid.uuid4().hex[:6],
+        )
+        bl_repo, _ = bl_services.get_or_create_boost_library_repo(gh_repo)
+        lib, _ = bl_services.get_or_create_boost_library(bl_repo, "dup-lib")
+        gf = baker.make(
+            "github_activity_tracker.GitHubFile",
+            repo=gh_repo,
+            filename="include/boost/dup_header.hpp",
+            is_deleted=False,
+        )
+        return bl_services.get_or_create_boost_file(gf, lib)[0]
+
+    _one_boost_file()
+    _one_boost_file()
+    bf, status = services.find_boost_file_for_header_name_detailed(
+        "boost/dup_header.hpp"
+    )
+    assert bf is None
+    assert status == "ambiguous"
+
+
+@pytest.mark.django_db
+def test_find_boost_file_for_header_name_single_deleted_still_picked():
+    """Only matching row is deleted → still linked (disambiguation rule)."""
+    import uuid
+
+    from boost_library_tracker import services as bl_services
+    from model_bakery import baker
+
+    owner = baker.make("cppa_user_tracker.GitHubAccount")
+    gh_repo = baker.make(
+        "github_activity_tracker.GitHubRepository",
+        owner_account=owner,
+        repo_name="boostdel-" + uuid.uuid4().hex[:6],
+    )
+    bl_repo, _ = bl_services.get_or_create_boost_library_repo(gh_repo)
+    lib, _ = bl_services.get_or_create_boost_library(bl_repo, "del-lib")
+    gf = baker.make(
+        "github_activity_tracker.GitHubFile",
+        repo=gh_repo,
+        filename="include/boost/deleted_only.hpp",
+        is_deleted=True,
+    )
+    bf, _ = bl_services.get_or_create_boost_file(gf, lib)
+    got = services.find_boost_file_for_header_name("boost/deleted_only.hpp")
+    assert got is not None
+    assert got.pk == bf.pk
+
+
+@pytest.mark.django_db
+def test_resolve_missing_header_tmp_auto_resolves(
+    ext_repo, external_github_file, boost_file
+):
+    """resolve_missing_header_tmp_auto creates real usage and removes tmp."""
+    _, tmp, _ = services.get_or_create_missing_header_usage(
+        ext_repo,
+        external_github_file,
+        "boost/algorithm.hpp",
+    )
+    placeholder_pk = tmp.usage_id
+    assert services.resolve_missing_header_tmp_auto(tmp) == "resolved"
+    assert BoostMissingHeaderTmp.objects.count() == 0
+    assert BoostUsage.objects.filter(
+        repo=ext_repo,
+        boost_header=boost_file,
+        file_path=external_github_file,
+    ).exists()
+    assert not BoostUsage.objects.filter(pk=placeholder_pk).exists()
+
+
+@pytest.mark.django_db
+def test_resolve_missing_header_tmp_auto_two_tmps_keeps_placeholder_until_last(
+    ext_repo,
+    external_github_file,
+    boost_library_repository,
+    boost_library,
+):
+    """Two tmp rows on same placeholder: first resolve keeps usage; second deletes it."""
+    from boost_library_tracker import services as bl_services
+    from model_bakery import baker
+
+    gf_a = baker.make(
+        "github_activity_tracker.GitHubFile",
+        repo=boost_library_repository,
+        filename="include/boost/resolve_a.hpp",
+    )
+    gf_b = baker.make(
+        "github_activity_tracker.GitHubFile",
+        repo=boost_library_repository,
+        filename="include/boost/resolve_b.hpp",
+    )
+    bl_services.get_or_create_boost_file(gf_a, boost_library)
+    bl_services.get_or_create_boost_file(gf_b, boost_library)
+
+    usage, tmp_a, _ = services.get_or_create_missing_header_usage(
+        ext_repo, external_github_file, "boost/resolve_a.hpp"
+    )
+    _, tmp_b, _ = services.get_or_create_missing_header_usage(
+        ext_repo, external_github_file, "boost/resolve_b.hpp"
+    )
+    assert tmp_b.usage_id == usage.pk
+
+    assert services.resolve_missing_header_tmp_auto(tmp_a) == "resolved"
+    assert BoostUsage.objects.filter(pk=usage.pk).exists()
+    assert BoostMissingHeaderTmp.objects.count() == 1
+
+    assert services.resolve_missing_header_tmp_auto(tmp_b) == "resolved"
+    assert BoostMissingHeaderTmp.objects.count() == 0
+    assert not BoostUsage.objects.filter(pk=usage.pk).exists()
+
+
+@pytest.mark.django_db
+def test_resolve_all_missing_header_tmp_batch_dry_run_no_writes(
+    ext_repo,
+    external_github_file,
+    boost_file,
+):
+    """Dry-run batch does not delete tmp rows."""
+    services.get_or_create_missing_header_usage(
+        ext_repo,
+        external_github_file,
+        "boost/algorithm.hpp",
+    )
+    stats = services.resolve_all_missing_header_tmp_batch(dry_run=True)
+    assert stats.get("would_resolve", 0) >= 1
+    assert BoostMissingHeaderTmp.objects.count() == 1
diff --git a/docs/service_api/boost_usage_tracker.md b/docs/service_api/boost_usage_tracker.md
index 82dba8da..a7d0ec1f 100644
--- a/docs/service_api/boost_usage_tracker.md
+++ b/docs/service_api/boost_usage_tracker.md
@@ -29,6 +29,38 @@
 
 ---
 
+## Boost header catalog lookup (read + disambiguation)
+
+Catalog paths use `include/<header_path>` (see `boost_catalog_filename`). Lookup uses **only** an exact `GitHubFile.filename` match to that full path (e.g. `include/boost/asio.hpp`). Longer paths such as `libs/asio/include/boost/asio.hpp` are different files and are **not** matched via substring or `endswith`. When several `BoostFile` rows share the same `GitHubFile.filename` (e.g. across repos), resolution uses `GitHubFile.is_deleted`:
+
+- Exactly one non-deleted match → that `BoostFile`.
+- More than one non-deleted match → ambiguous (`None`).
+- No non-deleted matches: exactly one candidate total (even if deleted) → that `BoostFile`; otherwise ambiguous or no match.
+
+| Function                               | Parameter types                                      | Return type                                           | Raises |
+| -------------------------------------- | ---------------------------------------------------- | ----------------------------------------------------- | ------ |
+| `boost_catalog_filename`               | `header_path: str`                                   | `str`                                                 | —      |
+| `find_boost_file_for_header_name`      | `header_path: str`                                   | `BoostFile \| None`                                   | —      |
+| `find_boost_file_for_header_name_detailed` | `header_path: str`                               | `tuple[BoostFile \| None, "found"\|"not_found"\|"ambiguous"]` | —      |
+| `find_boost_files_exact_by_catalog_names` | `catalog_names: set[str]`                        | `dict[str, BoostFile \| None]`                        | —      |
+
+---
+
+## BoostMissingHeaderTmp resolution
+
+| Function                              | Parameter types                    | Return type        | Raises |
+| ------------------------------------- | ---------------------------------- | ------------------ | ------ |
+| `delete_boost_missing_header_tmp`     | `tmp: BoostMissingHeaderTmp`       | `None`             | —      |
+| `maybe_delete_placeholder_boost_usage_after_tmp_removed` | `usage_pk: int`         | `bool`             | —      |
+| `resolve_missing_header_tmp_auto`     | `tmp: BoostMissingHeaderTmp`       | `str` (outcome tag)| —      |
+| `resolve_all_missing_header_tmp_batch`| `dry_run: bool = False`            | `dict[str, int]`   | —      |
+
+**Outcome tags for `resolve_missing_header_tmp_auto`:** `resolved`, `skipped_no_match`, `skipped_ambiguous`, `error`.
+
+**Note:** `resolve_all_missing_header_tmp_batch` iterates all tmp rows. With `dry_run=True`, no DB writes; keys include `would_resolve`, `skipped_no_match`, `skipped_ambiguous`. Used before `monitor_content` in `run_boost_usage_tracker` and from Django admin actions.
+
+---
+
 ## Related docs
 
 - [Schema.md](../Schema.md) – Section 4: Boost Usage Tracker.
diff --git a/github_activity_tracker/admin.py b/github_activity_tracker/admin.py
index 94f8d465..ce91907c 100644
--- a/github_activity_tracker/admin.py
+++ b/github_activity_tracker/admin.py
@@ -1,5 +1,5 @@
 from django.contrib import admin
-from django.contrib.admin import ModelAdmin
+from django.contrib.admin import ModelAdmin, TabularInline
 
 from .models import (
     GitCommit,
@@ -31,16 +31,47 @@ class LicenseAdmin(ModelAdmin):
     search_fields = ("name", "spdx_id")
 
 
+class RepoLanguageInline(TabularInline):
+    """Languages use ``through=RepoLanguage``; edit links here, not as a raw M2M widget."""
+
+    model = RepoLanguage
+    extra = 0
+    raw_id_fields = ("language",)
+
+
 @admin.register(GitHubRepository)
 class GitHubRepositoryAdmin(ModelAdmin):
     list_display = (
         "id",
+        "full_name",
         "owner_account",
         "repo_name",
         "stars",
         "forks",
         "repo_pushed_at",
     )
+    list_select_related = ("owner_account",)
+    readonly_fields = ("full_name",)
+    fieldsets = (
+        (
+            None,
+            {
+                "fields": (
+                    "full_name",
+                    "owner_account",
+                    "repo_name",
+                    "stars",
+                    "forks",
+                    "description",
+                    "repo_pushed_at",
+                    "repo_created_at",
+                    "repo_updated_at",
+                ),
+            },
+        ),
+        ("Relations", {"fields": ("licenses",)}),
+    )
+    inlines = (RepoLanguageInline,)
     list_filter = ("repo_created_at",)
     search_fields = ("repo_name", "description")
     raw_id_fields = ("owner_account",)
@@ -62,10 +93,45 @@ class GitCommitAdmin(ModelAdmin):
 
 @admin.register(GitHubFile)
 class GitHubFileAdmin(ModelAdmin):
-    list_display = ("id", "repo", "filename", "is_deleted", "created_at")
+    list_display = (
+        "id",
+        "filename",
+        "repo_full_name",
+        "previous_path",
+        "is_deleted",
+        "boost_library_name",
+        "created_at",
+    )
     list_filter = ("is_deleted",)
-    search_fields = ("filename",)
-    raw_id_fields = ("repo",)
+    search_fields = (
+        "filename",
+        "repo__repo_name",
+        "repo__owner_account__username",
+        "previous_filename__filename",
+    )
+    raw_id_fields = ("repo", "previous_filename")
+    list_select_related = (
+        "repo__owner_account",
+        "previous_filename",
+        "boost_file__library",
+    )
+
+    @admin.display(description="Repository", ordering="repo__repo_name")
+    def repo_full_name(self, obj):
+        return obj.repo.full_name if obj.repo_id else "—"
+
+    @admin.display(description="Previous path")
+    def previous_path(self, obj):
+        if obj.previous_filename_id and obj.previous_filename:
+            return obj.previous_filename.filename
+        return "—"
+
+    @admin.display(description="Boost library", ordering="boost_file__library__name")
+    def boost_library_name(self, obj):
+        bf = getattr(obj, "boost_file", None)
+        if bf is not None and bf.library_id:
+            return bf.library.name
+        return "—"
 
 
 @admin.register(GitCommitFileChange)
diff --git a/github_activity_tracker/models.py b/github_activity_tracker/models.py
index edfcfa36..cc98d10c 100644
--- a/github_activity_tracker/models.py
+++ b/github_activity_tracker/models.py
@@ -125,6 +125,19 @@ class GitHubRepository(models.Model):
         blank=True,
     )
 
+    @property
+    def full_name(self) -> str:
+        """``owner_login/repo_name``, same shape as GitHub API ``full_name``."""
+        if not self.owner_account_id:
+            return self.repo_name
+        login = (self.owner_account.username or "").strip()
+        if not login:
+            return self.repo_name
+        return f"{login}/{self.repo_name}"
+
+    def __str__(self):
+        return self.full_name
+
     class Meta:
         db_table = "github_activity_tracker_githubrepository"
         ordering = ["owner_account", "repo_name"]