From 092562d6b657df0babe15c7097227ce628730797 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Sat, 6 Jun 2026 22:10:39 +0800 Subject: [PATCH 01/16] Strengthen the paper-writing and slide-deck subagent rules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fill real, high-value gaps in the authoring rule docs (each with the project's required Why + example + anti-pattern format): - paper_rule: add "Verb tense" — the per-section tense map (abstract / related-work / method / experiment / conclusion / future-work). A wrong tense is a top "this wasn't written by a researcher" tell, and it was only covered by one Abstract sentence before. - paper_rule: add "Reporting numbers and statistics" — significant figures, percentage-point vs relative %, p-value format, uncertainty, unit consistency. The no-fabrication rule governed whether a number is real; this governs how a real number is written. - slide-deck-rules: add Section 9 (one message per slide; assertion title + evidence body — the biggest "designed for a defence vs paper dumped onto slides" lever) and Section 10 (choose chart vs table vs KPI vs bullets to fit the data). - paper-summary-author: add a field-content quality bar that lands those rules on concrete PaperSummary fields, closing the loop at authoring time. Docs only; tests/test_agents_md.py passes. --- .claude/agents/rules/paper_rule.md | 79 ++++++++++++++++++++ .claude/agents/rules/slide-deck-rules.md | 27 +++++++ .claude/agents/tasks/paper-summary-author.md | 9 +++ 3 files changed, 115 insertions(+) diff --git a/.claude/agents/rules/paper_rule.md b/.claude/agents/rules/paper_rule.md index b957907..002e509 100644 --- a/.claude/agents/rules/paper_rule.md +++ b/.claude/agents/rules/paper_rule.md @@ -1145,6 +1145,85 @@ stack of independently-edited paragraphs. Two requirements: --- +## 時態規範 / Verb tense (HARD RULE) + +Academic English fixes verb tense by a section's **function**, and the wrong +tense is one of the loudest "this wasn't written by a researcher" tells — a +reviewer notices it in the first paragraph. The Abstract rule (§Abstract) says +"present perfect or past"; this is the whole-document map. 中文無時態變化,但 +「動機用現在式陳述仍存在的問題、方法與實驗用過去式敘述已完成的工作、結論用 +現在式陳述成立的事實」這個**語氣對應**仍然適用。 + +**Per-section tense (English):** + +| Where | Tense | Example | Why | +|---|---|---|---| +| Abstract / Intro — what *you did* | past / present perfect | "We **propose** a method and **evaluated** it"; "experiments **showed**…" | reports completed work | +| Background, Related Work — established facts | present | "Transformers **dominate** NLP"; "X **is** widely used" | general truths hold now | +| Problem / research gap | present | "no existing method **handles** Y" | the gap exists now | +| Methodology — what your artefact *does* | present | "the encoder **splits** the prompt into za / zb" | describing the system's behaviour | +| Methodology — what you *did* to build / run it | past | "we **trained** on 3 GPUs"; "we **set** λ = 0.1" | completed actions | +| Experiment — setup + results | past | "we **evaluated** on three benchmarks"; "APD **reached** 92.3% ADA" | completed measurements | +| Result analysis / Conclusion — interpretation | present | "this **shows** that…"; "our method **improves**…" | the conclusion holds now | +| Future Work | future / modal | "we **plan to**…"; "this **could**…" | not yet done | + +**Why it matters:** mixing tenses *inside one paragraph* (e.g. "We **propose** a +method and **evaluated** it and it **will show** gains") reads as machine- +stitched. Choose the tense from the section's function, not sentence by sentence. + +**Anti-pattern:** "In this paper we **will present** a method… Experiments +**show**… We **concluded**…" — future in the abstract, present for completed +experiments, past for a standing conclusion: three wrong choices in one breath. +**Pattern:** "In this paper we **present** a method… Experiments **showed**… +We **conclude**…" + +**Audit checklist:** +- [ ] 摘要與緒論描述「我做了什麼」用過去式/完成式,**不**用 will。 +- [ ] 同一段落內時態一致(除非語意確實跨越「已完成的動作 vs 普遍成立的事實」)。 +- [ ] 只有 Future Work 用 will / could / plan to;其餘章節不用 will 描述已完成的實驗。 + +--- + +## 數字與統計呈現 / Reporting numbers and statistics (HARD RULE) + +The "no fabrication" rule (§不謊造) governs *whether* a number is real; this +rule governs *how* a real number is written. Sloppy number formatting is both a +credibility tell and a back-door to accidental fabrication — an over-precise +digit invents data you don't actually have. + +1. **Significant figures match the measurement.** Report 92.3%, not 92.31748% — + three sig-figs is plausible for an accuracy over a few-thousand-example + benchmark; six implies a precision you cannot have. Never paste a raw float + straight from a `.log`. +2. **Percentage point vs percent.** "Accuracy rose from 88% to 92%" is **+4 + percentage points (pp)**, NOT "+4%" (which reads as 88 × 1.04 = 91.5%) and + NOT "a 4% improvement" unless you mean *relative*. State which one. (Mirrors + the CLAUDE.md prose convention; it is a top reviewer nit.) +3. **p-values:** give the actual value (p = 0.003), not just "p < 0.05", and + never "p = 0.000" (write p < 0.001). A p-value without the test name and + sample size is unreviewable. +4. **Pair an effect with its uncertainty** where the field expects it: mean ± + std, or a 95% CI. A bare "92.3% over 3 seeds" hides the variance that decides + whether a 0.4-pp lead is real. +5. **Units, one per quantity, defined at first use.** Latency as "12.3 ms" + everywhere — not "0.0123 s" in one table and "12ms" in another. (See + §Technical terminology for first-use definition.) +6. **Round consistently within a table column** — every entry in an "ADA (%)" + column to the same number of decimals. + +**Anti-pattern:** "Our method is 4% better (accuracy 92.31748%, p = 0.000)." — +relative/absolute ambiguous, false precision, impossible p-value. +**Pattern:** "Our method improves accuracy by 4.0 pp (88.3% → 92.3%, mean of 3 +seeds; paired t-test, n = 3000, p = 0.003)." + +**Audit checklist:** +- [ ] 每個百分比都標明是「百分點 (absolute)」還是「相對 (%)」。 +- [ ] 沒有從 .log 直接貼出的超長浮點數;有效數字與量測精度相符。 +- [ ] p 值寫實際值,不寫 p = 0.000;附上檢定方法與樣本數。 +- [ ] 同一表格欄位的小數位數一致;單位全文統一且首次出現即定義。 + +--- + ## 字型規範 / Typography (HARD RULE) **EN.** Every academic paper / thesis produced through this project — whether diff --git a/.claude/agents/rules/slide-deck-rules.md b/.claude/agents/rules/slide-deck-rules.md index e9b8579..2c395f6 100644 --- a/.claude/agents/rules/slide-deck-rules.md +++ b/.claude/agents/rules/slide-deck-rules.md @@ -111,6 +111,33 @@ When changing the deck or i18n, delegate to the `slide-overflow-check` subagent **Interaction with content caps:** glosses cost chars and may push a bullet over `_BULLET_MAX_CHARS = 96`. When they do, the priority order from `paper_rule`'s tech-term rule applies: keep the gloss, trim adjacent filler, never drop the gloss. +### 9. One message per slide — assertion headline + evidence (HARD) + +A thesis-style deck is read by an audience watching a talk, not by someone reading a document. Each content slide must carry **one** takeaway, stated *as the title* (an **assertion** — a full claim, not a topic label), with the body acting as the **evidence** for that claim. This is the single biggest lever on whether a deck reads as "designed for a defence" vs "a paper dumped onto slides", and — unlike the geometry rules — it binds the **authoring** step (`paper-summary-author` / `regen_*.py`), not `PptxExporter`. + +- **Assertion title, not topic label.** The title is a sentence-shaped claim the audience should remember. + - ❌ topic label: "Results", "Method", "Evaluation" + - ✅ assertion: "APD beats the 4 SOTA defences by ≥ 5.6 pp", "Disentangling za / zb cuts adversarial leakage to near-zero", "Distillation makes detection 2.3× faster" +- **One message.** If a slide needs two unrelated takeaways, it is two slides. The `PaperSummary` schema already encodes one-message units — each `headline_metrics` row, each `rq_results` block, each `pain_points` quadrant. Do NOT merge two RQs onto one slide to save space; `max_slides_per_paper` (default 25) exists so you don't have to. +- **Body = evidence for the title.** A KPI callout, one chart, one comparison table, or 3-5 tight bullets that *support the assertion* — never a wall of text restating it. If the body doesn't back the title's claim, one of the two is wrong. + +**Why:** a slide titled "Method" with eight bullets forces the audience to find the point themselves; a slide whose title *is* the point, evidenced below it, lands in five seconds. The exporter renders whatever the summary provides, so the assertion has to be authored into the slide's `title` / `subhead`, not left as a section label. + +**Anti-pattern:** title "Experiment Results", body = 9 bullets spanning 3 different findings. **Pattern:** three slides, each titled with one finding, each body = that finding's KPI / table / chart. + +### 10. Choose the evidence form that fits the data (HARD) + +§9 says the body is *evidence*; this says which **form** it takes. Authoring a deck means picking, per slide, between a chart, a table, a KPI callout, and bullets — the wrong choice buries the point even when the content is right. + +- **Trend / comparison across many values → chart.** "ADA across 3 benchmarks × 5 defences" is a grouped bar chart, not a 15-cell table the speaker reads aloud. The eye sees "ours is highest" instantly; it cannot from a number grid. +- **A few exact numbers that *are* the point → KPI callout.** "92.3% ADA · +5.6 pp · 12.3 ms" as three big bold numbers, not a sentence. `headline_metrics` is exactly this. +- **Structured many-row comparison where exact cells matter → table.** Literature positioning (§2.3) and per-RQ result tables, because the reader compares specific cells. Keep them ≤ ~5 rows on a slide (overflow rule §7). +- **Qualitative / sequential points → 3-5 bullets.** Pain-points, method steps, limitations — not numbers. + +**Why:** the exporter already supports figures (`figures`) and tables (`paper_tables` / `rq_results`) — a deck that renders every result as bullets leaves the exporter's strongest slide types unused and makes the audience do the comparison in their heads. + +**Anti-pattern:** a 5×4 accuracy table read cell-by-cell (should be a bar chart); or a single 92.3% drowned in a paragraph (should be a KPI). **Pattern:** chart for "who wins", table for "exact cells", KPI for "the one number", bullets for "the qualitative points". + --- ## LLM-as-agent vs Python pipeline (enrichment dispatch) diff --git a/.claude/agents/tasks/paper-summary-author.md b/.claude/agents/tasks/paper-summary-author.md index ad88bb0..a150540 100644 --- a/.claude/agents/tasks/paper-summary-author.md +++ b/.claude/agents/tasks/paper-summary-author.md @@ -212,6 +212,15 @@ For each paper that is on-topic for the user's actual intent (see "Off-topic pap 7. **Run the script.** `py scripts/regen_<...>.py`. Confirm each `.pptx` written. +### Field-content quality bar (apply the deck + paper rules at authoring time) + +The fields you write here are what `slide-deck-rules` and `paper_rule` later govern — satisfy those rules **as you author**, not after the deck renders: + +- **Each slide-driving string is an assertion, not a topic label** (slide-deck-rules §9). Write a `rq_results` question / `pain_points` sub-head / contribution heading as a claim — "Disentangling za / zb cuts adversarial leakage to near-zero", not "Method". One message per unit: never fold two RQs into one `rq_results` block to save a slide. +- **Pick the field that fits the data** (slide-deck-rules §10). A trend / many-value comparison goes in a `technique_table` / `rq_results.table` (→ table) or a `figures` entry (→ chart); the headline numbers go in `headline_metrics` (→ KPI callout); qualitative / sequential points go in the bullet fields. Don't cram a 5×4 result grid into prose bullets. +- **Numbers follow the reporting rules** (paper_rule §數字與統計呈現). `headline_metrics` values use measurement-appropriate significant figures (92.3%, not 92.31748%), label percentage-points vs relative %, and report p-values as actual values — and never invent a digit the PDF doesn't state. +- **No fabrication** (paper_rule §不謊造). Every number / RQ result / limitation must come from the PDF you read. If the paper doesn't report it, leave the field empty — the exporter skips empty fields, which is correct. + ## After all papers are authored Delegate two audits before handing the deck back — these are non-negotiable: From 72344e45fc2895a476599757c269c6767c598a84 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Sat, 6 Jun 2026 22:26:24 +0800 Subject: [PATCH 02/16] Add figure-design and visual-hierarchy rules to deck-design MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two real gaps in the slide visual-identity doc: - Figures & charts: the exporter inserts figures as PNGs (it draws no native charts), so figure quality is an authoring concern. Add dark-mode adaptation (transparent background — a white PNG on the #12151B slide is the figure version of rgb=None text), chartjunk stripping, brand-palette series, projector-readable label sizes, print DPI, and "re-plot beats screenshot". - Visual hierarchy & focal point: one focal element per slide, hierarchy by size (title > headline number > evidence > caption), whitespace, reading order — the visual rendering of slide-deck-rules Section 9's "one takeaway". Docs only; tests/test_agents_md.py passes. --- .claude/agents/rules/deck-design.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/.claude/agents/rules/deck-design.md b/.claude/agents/rules/deck-design.md index 05060fe..de460d6 100644 --- a/.claude/agents/rules/deck-design.md +++ b/.claude/agents/rules/deck-design.md @@ -310,6 +310,30 @@ provided every slide ends up with: column is clearly numeric values, prefer right-align so units / digits line up. Out of scope for the v1 ship.) +### Figures & charts (an "AI-generated" tell as loud as Calibri or black grids) + +The exporter inserts figures as PNGs via the `figures=` field (`_add_figure_image`); it does **not** draw native charts. So figure *quality* is an authoring responsibility — a default-matplotlib plot or a low-res screenshot undoes the brand discipline the rest of the deck earns. + +- **Dark-mode adaptation is mandatory.** The slide background is `_DARK_SLIDE_BG` (`#12151B`) by default. A white-background PNG dropped onto it shows a glaring white rectangle — the figure equivalent of `rgb=None` text on dark. Export plots with a **transparent background** (`savefig(..., transparent=True)`) and light foreground (axes / labels / lines in near-white or brand teal `#2DD4BF`), OR place the figure on a card whose fill has a `_LIGHT_TO_DARK_FILL` entry. Never a bare white PNG on the dark slide. +- **Strip chartjunk.** No default matplotlib grey panel, no spines on all four sides, no dense gridlines, no 3-D bars / pies, no drop shadows. Top + right spines off, at most one light horizontal gridline set. Data-ink first. +- **Brand palette, not library defaults.** Series colours come from the deck palette (navy / teal / grey), never matplotlib's `C0` blue / `C1` orange — default colours read as "pasted from a notebook". (Red stays banned here too, per the no-red contract.) +- **Readable when projected.** Axis labels + tick labels + legend ≥ ~14pt *in the rendered figure* (a 6pt matplotlib label is unreadable from row 10). Label every axis with its quantity AND unit ("Latency (ms)"), per `paper_rule`'s number-reporting rule. +- **Export at print DPI.** `dpi >= 150` (200 for line-heavy plots). A 72-DPI screenshot pixelates on a projector. +- **Paper screenshots are a last resort.** Re-plotting your own data beats screenshotting the paper's figure — a screenshot carries the paper's off-brand fonts / colours, JPEG artefacts, and usually a white background. Crop tightly; only screenshot when re-plotting is impossible (e.g. a qualitative architecture diagram). + +**Anti-pattern:** `plt.savefig("fig.png")` with defaults → grey panel, blue/orange series, 6pt labels, white border, dropped onto the dark slide. **Pattern:** `savefig("fig.png", dpi=200, transparent=True, bbox_inches="tight")` with teal / navy series, 14pt labels, top + right spines removed. + +### Visual hierarchy & focal point + +Each slide needs one element the eye lands on first — the takeaway from `slide-deck-rules` §9. Size, weight, colour and position build that hierarchy; without it every element competes and the audience reads top-to-bottom hunting for the point. + +- **One focal point per slide.** The biggest / boldest / most-saturated element *is* the takeaway — usually the KPI value (teal, bold, large) or the winning row of a table. Exactly one. +- **Hierarchy by size, not just order.** Title > headline number > evidence > caption, each visibly smaller. A KPI value at the same size as its label has no hierarchy. Caption / provenance text uses `_BRAND_GREY` so it recedes — the palette already encodes this (teal emphasises, grey recedes); don't invert it. +- **Whitespace is not wasted space.** A slide filled edge-to-edge has no focal point. Leave margins and let the KPI block breathe. The `FOOTER_GUARD` (7.05") and per-slide content caps exist partly so content can't sprawl across the whole canvas. +- **Reading order follows the layout.** Assertion title on top, evidence beneath it, provenance / caption last. Don't bury the conclusion in a footnote while the setup sits in the headline. + +**Anti-pattern:** title, three KPIs, a table and a caption all the same size and colour — no focal point, the eye wanders. **Pattern:** one KPI value ~2× the size of its label in teal, the table muted beneath it, caption small and grey. + ## Anti-patterns (instant "AI-generated" tells) - Plain `prs.slide_layouts[6]` (blank) with no programmatic accent. Every From ee3c1e42f576b32c1e149abc1ffa705865a76bf8 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Sat, 6 Jun 2026 22:34:30 +0800 Subject: [PATCH 03/16] Add structural-slide and colour-accessibility rules to the deck agents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two more real ppt-agent gaps: - slide-deck-rules: add Section 11 (structural slides). The exporter renders cover / agenda / section-divider / Q&A / references, but only their *visual* accent was documented, not their job. Each structural slide has one navigational role: cover = title (not the raw query) + authors/year/venue; agenda only for multi-paper decks (pointers, not abstracts); divider = a cognitive reset, name+number only; Q&A = minimal, not a second conclusion; references = only the works actually cited, numbered, split on overflow — not a BibTeX dump. This is where "a paper dumped onto slides" leaks back in. - deck-design (Figures): don't encode meaning by colour alone — teal vs navy is hard for colour-blind viewers and indistinguishable in greyscale; encode twice (colour + marker shape / line style / direct label). Docs only; tests/test_agents_md.py passes. --- .claude/agents/rules/deck-design.md | 1 + .claude/agents/rules/slide-deck-rules.md | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/.claude/agents/rules/deck-design.md b/.claude/agents/rules/deck-design.md index de460d6..0c4a3e6 100644 --- a/.claude/agents/rules/deck-design.md +++ b/.claude/agents/rules/deck-design.md @@ -317,6 +317,7 @@ The exporter inserts figures as PNGs via the `figures=` field (`_add_figure_imag - **Dark-mode adaptation is mandatory.** The slide background is `_DARK_SLIDE_BG` (`#12151B`) by default. A white-background PNG dropped onto it shows a glaring white rectangle — the figure equivalent of `rgb=None` text on dark. Export plots with a **transparent background** (`savefig(..., transparent=True)`) and light foreground (axes / labels / lines in near-white or brand teal `#2DD4BF`), OR place the figure on a card whose fill has a `_LIGHT_TO_DARK_FILL` entry. Never a bare white PNG on the dark slide. - **Strip chartjunk.** No default matplotlib grey panel, no spines on all four sides, no dense gridlines, no 3-D bars / pies, no drop shadows. Top + right spines off, at most one light horizontal gridline set. Data-ink first. - **Brand palette, not library defaults.** Series colours come from the deck palette (navy / teal / grey), never matplotlib's `C0` blue / `C1` orange — default colours read as "pasted from a notebook". (Red stays banned here too, per the no-red contract.) +- **Don't encode meaning by colour alone.** Teal vs navy is hard for some colour-blind viewers and *indistinguishable* in a black-and-white printout. When two series must be told apart, encode them twice — colour **plus** a marker shape / line style (solid vs dashed) or a direct end-of-line label. The winning series can also be the only solid/heavy one. (The 4-colour brand palette is small precisely so it can't carry many simultaneous distinctions — lean on shape and labels.) - **Readable when projected.** Axis labels + tick labels + legend ≥ ~14pt *in the rendered figure* (a 6pt matplotlib label is unreadable from row 10). Label every axis with its quantity AND unit ("Latency (ms)"), per `paper_rule`'s number-reporting rule. - **Export at print DPI.** `dpi >= 150` (200 for line-heavy plots). A 72-DPI screenshot pixelates on a projector. - **Paper screenshots are a last resort.** Re-plotting your own data beats screenshotting the paper's figure — a screenshot carries the paper's off-brand fonts / colours, JPEG artefacts, and usually a white background. Crop tightly; only screenshot when re-plotting is impossible (e.g. a qualitative architecture diagram). diff --git a/.claude/agents/rules/slide-deck-rules.md b/.claude/agents/rules/slide-deck-rules.md index 2c395f6..4088f89 100644 --- a/.claude/agents/rules/slide-deck-rules.md +++ b/.claude/agents/rules/slide-deck-rules.md @@ -138,6 +138,20 @@ A thesis-style deck is read by an audience watching a talk, not by someone readi **Anti-pattern:** a 5×4 accuracy table read cell-by-cell (should be a bar chart); or a single 92.3% drowned in a paragraph (should be a KPI). **Pattern:** chart for "who wins", table for "exact cells", KPI for "the one number", bullets for "the qualitative points". +### 11. Structural slides (cover / agenda / divider / Q&A / references) + +Content slides carry the findings (§9); **structural** slides carry the *navigation*. They have different jobs, and over-filling them is a common "paper dumped onto slides" tell — a divider with eight bullets, or a references slide pasting a whole BibTeX file. Each structural slide has exactly **one** navigational job. + +- **Cover** (`_cover_title` + `_cover_subtitle`). Title = the paper's title run through `_cover_title` (title-cased, period / locale suffix added) — NEVER the raw search query (deck-design anti-pattern). Subtitle = authors · year · venue. For a multi-paper survey deck the cover title is the *survey topic*, not paper #1's title. Presenter name / affiliation / date belong here (a defence), not repeated on every slide. +- **Agenda** (`_agenda_line`). A multi-paper deck lists each paper as one pointer line so the audience can place each paper. A single-paper deck does **not** need an agenda — go cover → content; an agenda for one paper is filler. Agenda lines are pointers, never content (no abstracts on the agenda). +- **Section divider** (the larger top accent band, deck-design). A divider is a *cognitive reset* between topics — section name + number, nothing else. Resist putting the next section's first bullet on it. Its whole value is telling the audience "we've moved from Method to Results". +- **Q&A / closing.** One slide, minimal — "Q&A" or a thanks line + contact. It is NOT a second conclusion; the takeaways already landed on the findings slides. Don't restate results here. +- **References.** List ONLY the works the deck actually cites (the comparison table, the SOTA baselines), numbered to match the in-deck citation markers — not a full bibliography dump. Split across slides when it overflows `FOOTER_GUARD` (§7). Reference text may be small but must stay readable (contrast contract) and on-brand grey, not bright. + +**Why:** structural slides are exactly where "a paper dumped onto slides" leaks back in — a 40-entry references slide, an agenda restating abstracts, a divider doubling as a content slide. Keep each to its one navigational job. + +**Anti-pattern:** a references slide with 35 BibTeX entries in 9pt overflowing the footer; an agenda whose lines are one-sentence paper summaries. **Pattern:** references = the ~8 works actually cited, numbered [1]..[8], split across 2 slides if needed; agenda = "Paper N of M: " pointers. + --- ## LLM-as-agent vs Python pipeline (enrichment dispatch) From c40b876b84e45a89794e25479f41611b7401e678 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Sat, 6 Jun 2026 22:39:40 +0800 Subject: [PATCH 04/16] Add math-rendering and deck-length rules to slide-deck-rules The last two ppt-agent gaps with real deck-building value: - Section 12 (Math notation rendering): Section 8 says to *gloss* a symbol; this says how to *render* it. The exporter flattens everything to ASCII ("za" not z-subscript-a). Rule: real subscripts/superscripts (python-pptx baseline shift), italic variables + upright operators, Unicode math symbols not ASCII stand-ins, complex formulae as transparent-bg LaTeX PNGs (per the Figures dark-mode rule), one notation per concept deck-wide. - Section 13 (Deck length and pacing): max_slides_per_paper (25) is a talk-time budget (~1-1.5 min/slide -> ~20-30 min). Prune to takeaways rather than cramming past the per-slide caps; a multi-paper survey divides the budget; structural slides count but aren't content. Docs only; tests/test_agents_md.py passes. --- .claude/agents/rules/slide-deck-rules.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.claude/agents/rules/slide-deck-rules.md b/.claude/agents/rules/slide-deck-rules.md index 4088f89..7d5a7cd 100644 --- a/.claude/agents/rules/slide-deck-rules.md +++ b/.claude/agents/rules/slide-deck-rules.md @@ -152,6 +152,28 @@ Content slides carry the findings (§9); **structural** slides carry the *naviga **Anti-pattern:** a references slide with 35 BibTeX entries in 9pt overflowing the footer; an agenda whose lines are one-sentence paper summaries. **Pattern:** references = the ~8 works actually cited, numbered [1]..[8], split across 2 slides if needed; agenda = "Paper N of M: " pointers. +### 12. Math notation rendering (presentation, not just glossing) + +§8 says every math symbol must be *glossed* at first use; this says how to *render* the symbol itself. They are independent — `min 互資訊 I(za;zb|Ep)` glosses the operator but still renders the variable as the bare ASCII string "za", which reads as a word, not "z subscript a". + +- **Real subscripts / superscripts, not flattened ASCII.** `za` is z-sub-a, `λmax` is λ-sub-max, `x²` is x-super-2. python-pptx supports run-level baseline shift (`` for subscript, `30000` for superscript) — use it, or Unicode subscript glyphs (`z` + `ₐ`) as a fallback. Typing "za" / "lambda_max" / "x^2" literally is a tell. (The exporter currently flattens these to ASCII — surfacing it here so a builder fixes the run rather than copying the flat form.) +- **Variables italic, operators upright** (standard math typesetting). Variables `z`, `λ`, `x` italic; multi-letter operators `min`, `argmin`, `log`, `softmax` upright. `min` set in italic reads as m·i·n multiplied. +- **Unicode math symbols, not ASCII stand-ins.** `≤ ≥ × · ‖·‖ λ ∑ ∫ ∇ ∈ →`, not `<=`, `>=`, `x`, `sum`, `integral`, `->`. The per-language font stack renders these; ASCII substitutes look like code, not math. +- **Complex formulae → image, not text.** Multi-line equations, fractions, integrals / sums with limits, and matrices cannot be laid out in a pptx text run. Render them with LaTeX to a **transparent-background** PNG (per the Figures dark-mode rule in deck-design) and place via `figures=`. Don't fake a fraction by stacking "a / b" in two textboxes. +- **One notation per concept across the whole deck.** If the paper writes `z_a`, every slide writes `z_a` — not `za` here and `z_adv` there. (Mirrors the paper-side notation-consistency rule.) + +**Anti-pattern:** a slide reading `min I(za;zb|Ep) s.t. ||za-zb||_2 <= eps` — ASCII subscripts, ASCII norm, ASCII `<=`, operator unnamed. **Pattern:** `min I(z_a; z_b | E_p)` with real subscripts + italic variables, `‖z_a − z_b‖₂ ≤ ε`, and the operator named ("minimise the mutual information …") per §8. + +### 13. Deck length and pacing + +`max_slides_per_paper` (default 25) is a **talk-time budget**, not an arbitrary cap. A defence / seminar audience absorbs ~1-1.5 minutes per content slide, so ~25 slides ≈ a 20-30 minute talk for one paper. Authoring past the cap produces a deck that can't be delivered in the slot — the cap exists so you prune at *authoring* time, not live. + +- **Prune to the takeaways, don't shrink to fit.** When a paper has more than fits, drop the weakest unit (an extra method sub-section, a secondary RQ) — do NOT cram everything onto fewer slides past the per-slide caps (§4); that just recreates the wall-of-text tell. +- **A multi-paper survey divides the budget.** 5 papers in one 25-slide deck is ~5 slides each — a one-highlight-per-paper survey (cover / agenda / per-paper highlight / references), not a full thesis deck per paper. Set `max_slides_per_paper` to match the slot. +- **Structural slides count toward the budget but aren't content.** Cover + agenda + dividers + Q&A + references (§11) are ~5-6 of the 25, leaving ~19 for findings — plan around that, don't discover it at slide 25. + +**Anti-pattern:** 40 dense slides "because the paper is rich" — undeliverable, and every slide over-caps. **Pattern:** the cap forces the one-assertion-per-slide discipline of §9; if the content doesn't fit, it wasn't prioritised, not "the cap is too small". + --- ## LLM-as-agent vs Python pipeline (enrichment dispatch) From c17466f76dbce01a8b99207e9d9fbd9c53b6f84c Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Sat, 6 Jun 2026 22:56:01 +0800 Subject: [PATCH 05/16] =?UTF-8?q?Render=20inline=20math=20as=20real=20subs?= =?UTF-8?q?cripts/superscripts=20in=20pptx=20(slide-deck-rules=20=C2=A712)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The exporter flattened math to ASCII ("za", not z-subscript-a). Add inline math rendering: authoring marks math with $...$; inside it `_x` / `_{xy}` become a real subscript and `^x` / `^{xy}` a superscript (via the run's OOXML `baseline` attribute, since python-pptx has no Font.subscript), single-letter tokens (variables z / λ / I) are italicised while multi-letter operators (min / log) stay upright. Plain `_` outside $...$ (file names, prose) is left untouched. - _render_math_paragraph + helpers; every run sets an explicit colour (dark-mode contract — a None-coloured run renders black on the dark slide). - Wired into _add_bullet_box, where math most often appears. A plain bullet still renders as one run exactly as before, so existing decks are unchanged. - 6 unit tests (subscript / superscript / braced / italic-variable-vs-upright- operator / plain-text / bullet integration). 598 tests pass; ruff + bandit clean. --- tests/test_exporters.py | 108 +++++++++++++++++++++++++++++ thesisagents/exporters/pptx.py | 123 ++++++++++++++++++++++++++++++--- 2 files changed, 220 insertions(+), 11 deletions(-) diff --git a/tests/test_exporters.py b/tests/test_exporters.py index f612a0f..db096fb 100644 --- a/tests/test_exporters.py +++ b/tests/test_exporters.py @@ -1389,3 +1389,111 @@ def test_export_unknown_format_raises(sample_papers, tmp_path): ) with pytest.raises(ExportError): export_collection(collection, options) + + +# --------------------------------------------------------------------------- +# Inline math rendering ($...$ -> real subscripts / superscripts + italic vars) +# --------------------------------------------------------------------------- + + +def _new_paragraph(): + from pptx import Presentation + from pptx.util import Inches + + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[6]) + box = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(8), Inches(1)) + return box.text_frame.paragraphs[0] + + +def _baseline(run): + rpr = run._r.rPr # noqa: SLF001 + return rpr.get("baseline") if rpr is not None else None + + +def _near_white(): + from pptx.dml.color import RGBColor + + return RGBColor(0xE5, 0xE7, 0xEB) + + +def test_render_math_subscript_and_italic_variable(): + from thesisagents.exporters import pptx as pptx_mod + + para = _new_paragraph() + pptx_mod._render_math_paragraph( # noqa: SLF001 + para, "$z_a$", size_pt=18, colour=_near_white() + ) + runs = para.runs + assert [r.text for r in runs] == ["z", "a"] + assert runs[0].font.italic is True # single-letter variable z + assert _baseline(runs[0]) is None # base char, normal baseline + assert _baseline(runs[1]) == "-25000" # a rendered as subscript + # Dark-mode contract: every run carries an explicit colour. + assert all(r.font.color.rgb == _near_white() for r in runs) + + +def test_render_math_superscript_and_multiletter_operator_upright(): + from thesisagents.exporters import pptx as pptx_mod + + para = _new_paragraph() + pptx_mod._render_math_paragraph( # noqa: SLF001 + para, "$min x^2$", size_pt=18, colour=_near_white() + ) + by_text = {r.text: r for r in para.runs} + assert by_text["min"].font.italic is False # multi-letter operator upright + assert by_text["x"].font.italic is True # single-letter variable italic + assert _baseline(by_text["2"]) == "30000" # superscript + + +def test_render_math_braced_subscript(): + from thesisagents.exporters import pptx as pptx_mod + + para = _new_paragraph() + pptx_mod._render_math_paragraph( # noqa: SLF001 + para, "$z_{adv}$", size_pt=18, colour=_near_white() + ) + sub = next(r for r in para.runs if r.text == "adv") + assert _baseline(sub) == "-25000" # multi-char braced subscript + + +def test_render_math_plain_text_is_one_normal_run(): + from thesisagents.exporters import pptx as pptx_mod + + para = _new_paragraph() + pptx_mod._render_math_paragraph( # noqa: SLF001 + para, "plain text, no math here", size_pt=18, colour=_near_white() + ) + assert len(para.runs) == 1 + assert para.runs[0].text == "plain text, no math here" + assert _baseline(para.runs[0]) is None + + +def test_render_math_mixed_prose_and_span(): + from thesisagents.exporters import pptx as pptx_mod + + para = _new_paragraph() + pptx_mod._render_math_paragraph( # noqa: SLF001 + para, "loss $z_a$ over batch", size_pt=18, colour=_near_white() + ) + assert para.runs[0].text == "loss " # prose before the span + assert para.runs[-1].text == " over batch" # prose after the span + assert any(_baseline(r) == "-25000" for r in para.runs) # subscript inside + + +def test_bullet_box_renders_math_subscript(): + from pptx import Presentation + from pptx.util import Inches + + from thesisagents.exporters import pptx as pptx_mod + + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[6]) + pptx_mod._add_bullet_box( # noqa: SLF001 + slide, name="body", bullets=["uses $z_a$ latent"], + left=Inches(1), top=Inches(1), width=Inches(8), height=Inches(2), + font_pt=16, + ) + para = slide.shapes[0].text_frame.paragraphs[0] + assert any(_baseline(r) == "-25000" for r in para.runs) # subscript rendered + assert all(r.font.color.rgb is not None for r in para.runs) # dark-mode contract diff --git a/thesisagents/exporters/pptx.py b/thesisagents/exporters/pptx.py index e5ed0dc..53cbbac 100644 --- a/thesisagents/exporters/pptx.py +++ b/thesisagents/exporters/pptx.py @@ -1308,6 +1308,112 @@ def _add_horizontal_rule(slide, *, top) -> None: line.line.width = Pt(0.75) +# --------------------------------------------------------------------------- +# Inline math rendering. Authoring marks math with $...$; inside it, `_x` / +# `_{xy}` render as a real subscript and `^x` / `^{xy}` as a superscript (via +# the run's OOXML `baseline` attribute, since python-pptx has no Font.subscript), +# and a single-letter token (a variable like z / λ / I) is italicised while a +# multi-letter word (an operator like min / log / softmax) stays upright. See +# slide-deck-rules §12. Plain `_` outside `$...$` (file names, prose) is left +# alone — only $-delimited spans are parsed. +# --------------------------------------------------------------------------- +_SUBSCRIPT_BASELINE = -25000 +_SUPERSCRIPT_BASELINE = 30000 +_MATH_DELIM = re.compile(r"\$([^$]+)\$") + + +def _set_run_baseline(run, baseline: int) -> None: + """Shift a run's baseline (1/1000 of a percent): negative = subscript, + positive = superscript. python-pptx exposes no Font.subscript, so set the + OOXML attribute on the run's character properties directly.""" + run._r.get_or_add_rPr().set("baseline", str(baseline)) # noqa: SLF001 + + +def _add_math_run( + paragraph, text: str, *, size_pt: int, colour: RGBColor, + bold: bool, italic: bool = False, baseline: int = 0, +): + """Append one styled run. Always sets an explicit colour (dark-mode + contract — a run with ``color.rgb = None`` renders black on the dark slide + and can't be swapped by the post-pass).""" + run = paragraph.add_run() + run.text = text + run.font.size = Pt(size_pt) + run.font.bold = bold + run.font.italic = italic + run.font.color.rgb = colour + if baseline: + _set_run_baseline(run, baseline) + return run + + +def _math_tokens(span: str): + """Tokenise an inline-math string into (kind, text): ``sub`` / ``sup`` for + ``_x`` / ``_{xy}`` / ``^x`` / ``^{xy}``, ``word`` for a letter run, ``char`` + for anything else.""" + i, n = 0, len(span) + while i < n: + c = span[i] + if c in "_^" and i + 1 < n: + j = i + 1 + if span[j] == "{": + end = span.find("}", j) + if end == -1: + content, i = span[j + 1:], n + else: + content, i = span[j + 1:end], end + 1 + else: + content, i = span[j], j + 1 + yield ("sub" if c == "_" else "sup"), content + elif c.isalpha(): + j = i + while j < n and span[j].isalpha(): + j += 1 + yield "word", span[i:j] + i = j + else: + yield "char", c + i += 1 + + +def _render_math_span(paragraph, span: str, *, size_pt: int, colour: RGBColor, bold: bool) -> None: + """Render one ``$...$`` inner string with real sub/superscripts and italic + single-letter variables (multi-letter operators like ``min`` stay upright).""" + common = {"size_pt": size_pt, "colour": colour, "bold": bold} + for kind, text in _math_tokens(span): + if kind == "sub": + _add_math_run(paragraph, text, baseline=_SUBSCRIPT_BASELINE, **common) + elif kind == "sup": + _add_math_run(paragraph, text, baseline=_SUPERSCRIPT_BASELINE, **common) + elif kind == "word": + _add_math_run(paragraph, text, italic=(len(text) == 1), **common) + else: + _add_math_run(paragraph, text, **common) + + +def _render_math_paragraph( + paragraph, text: str, *, size_pt: int, colour: RGBColor, bold: bool = False, +) -> None: + """Fill ``paragraph`` with runs, rendering ``$...$`` spans as math. Plain + text outside ``$...$`` becomes one run. Use instead of ``paragraph.text = + ...`` wherever a string may contain math notation. + + Example: ``_render_math_paragraph(p, "loss $I(z_a;z_b)$", ...)`` yields + "loss " + I(italic) + "(" + z(italic) + a(subscript) + ";" + … . + """ + paragraph.clear() + pos = 0 + for m in _MATH_DELIM.finditer(text): + if m.start() > pos: + _add_math_run(paragraph, text[pos:m.start()], size_pt=size_pt, colour=colour, bold=bold) + _render_math_span(paragraph, m.group(1), size_pt=size_pt, colour=colour, bold=bold) + pos = m.end() + if pos < len(text): + _add_math_run(paragraph, text[pos:], size_pt=size_pt, colour=colour, bold=bold) + if not paragraph.runs: + _add_math_run(paragraph, "", size_pt=size_pt, colour=colour, bold=bold) + + def _add_textbox( slide, *, name: str, text: str, left, top, width, height, font_pt: int, bold: bool = False, colour: RGBColor | None = None, @@ -1352,18 +1458,13 @@ def _add_bullet_box( return for index, bullet in enumerate(bullets): paragraph = text_frame.paragraphs[0] if index == 0 else text_frame.add_paragraph() - paragraph.text = f"• {bullet}" paragraph.alignment = PP_ALIGN.LEFT - for run in paragraph.runs: - run.font.size = Pt(font_pt) - # ALWAYS set the run colour explicitly. A run with - # ``font.color.rgb = None`` inherits the theme's body-text - # colour (which renders as black) and the dark-mode - # post-pass cannot swap it because there's no source RGB - # to look up in the mapping. See deck-design.md - # "Dark-mode contract" — every text-adding helper sets a - # palette colour, no exceptions. - run.font.color.rgb = _BRAND_DARK + # Render via _render_math_paragraph so $...$ spans become real + # subscripts / superscripts + italic variables; a plain bullet is one + # run. It sets an explicit _BRAND_DARK colour on every run (dark-mode + # contract — a None-coloured run renders black on the dark slide and the + # post-pass can't swap it). See deck-design.md "Dark-mode contract". + _render_math_paragraph(paragraph, f"• {bullet}", size_pt=font_pt, colour=_BRAND_DARK) def _add_footer(slide, text: str) -> None: From 2a847cad94c7fd0e775b597269146fe80e479bf5 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Sat, 6 Jun 2026 23:04:35 +0800 Subject: [PATCH 06/16] Extend pptx math rendering to KPI values and table cells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Factor _append_math_runs out of _render_math_paragraph (the non-clearing core that appends math-aware runs to a paragraph already holding runs) and route two more text surfaces through it: - KPI value run: "$λ_max$=0.1" now renders a real subscript, while a plain "78% F1" stays one upright run (no $, so the "F" isn't italic-ised, and the label / value / baseline three-run structure is unchanged). - Table cells (_style_table_cell): a comparison-table cell like "$z_a$" or "O($n^2$)" renders real sub/superscripts. Style-by-position (header bold, data _BRAND_DARK, row-label column heavier) is preserved, \n-split keeps a multi-line cell's paragraphs, and one helper call replaces cell.text plus a font loop. 3 unit tests (append preserves existing runs + renders math; plain KPI value is one upright run; table cell subscript). 601 tests pass; ruff + bandit clean. --- tests/test_exporters.py | 50 +++++++++++++++++++++++++++ thesisagents/exporters/pptx.py | 62 +++++++++++++++++++++------------- 2 files changed, 88 insertions(+), 24 deletions(-) diff --git a/tests/test_exporters.py b/tests/test_exporters.py index db096fb..f8459c7 100644 --- a/tests/test_exporters.py +++ b/tests/test_exporters.py @@ -1497,3 +1497,53 @@ def test_bullet_box_renders_math_subscript(): para = slide.shapes[0].text_frame.paragraphs[0] assert any(_baseline(r) == "-25000" for r in para.runs) # subscript rendered assert all(r.font.color.rgb is not None for r in para.runs) # dark-mode contract + + +def test_append_math_runs_preserves_existing_runs_and_renders_math(): + from thesisagents.exporters import pptx as pptx_mod + + para = _new_paragraph() + pptx_mod._add_math_run( # noqa: SLF001 — pre-existing label run + para, "Acc: ", size_pt=12, colour=_near_white(), bold=False + ) + pptx_mod._append_math_runs( # noqa: SLF001 — append value, don't clear + para, "$z_a$", size_pt=14, colour=_near_white(), bold=True + ) + assert para.runs[0].text == "Acc: " # label not cleared + assert any(_baseline(r) == "-25000" for r in para.runs) # subscript appended + + +def test_append_math_runs_plain_value_is_one_upright_run(): + from thesisagents.exporters import pptx as pptx_mod + + para = _new_paragraph() + pptx_mod._append_math_runs( # noqa: SLF001 + para, "78% F1", size_pt=14, colour=_near_white(), bold=True + ) + # A KPI value with no $ stays a single run — crucially the "F" is NOT + # italic-ised (math italic only applies inside $...$). + assert len(para.runs) == 1 + assert para.runs[0].text == "78% F1" + assert para.runs[0].font.italic is not True + + +def test_table_cell_renders_math_subscript(): + from pptx import Presentation + from pptx.util import Inches + + from thesisagents.exporters import pptx as pptx_mod + + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[6]) + pptx_mod._add_table( # noqa: SLF001 + slide, + rows=[["Method", "Loss"], ["Ours", "$z_a$"]], + left=Inches(1), top=Inches(1), width=Inches(8), height=Inches(2), + col_widths=[Inches(4), Inches(4)], + ) + table = next(s.table for s in slide.shapes if getattr(s, "has_table", False)) + math_runs = [r for p in table.cell(1, 1).text_frame.paragraphs for r in p.runs] + assert any(_baseline(r) == "-25000" for r in math_runs) # "$z_a$" subscript + # A plain header cell stays upright with no baseline shift. + hdr_runs = [r for p in table.cell(0, 0).text_frame.paragraphs for r in p.runs] + assert hdr_runs and all(_baseline(r) is None for r in hdr_runs) diff --git a/thesisagents/exporters/pptx.py b/thesisagents/exporters/pptx.py index 53cbbac..13f26f0 100644 --- a/thesisagents/exporters/pptx.py +++ b/thesisagents/exporters/pptx.py @@ -1402,6 +1402,18 @@ def _render_math_paragraph( "loss " + I(italic) + "(" + z(italic) + a(subscript) + ";" + … . """ paragraph.clear() + _append_math_runs(paragraph, text, size_pt=size_pt, colour=colour, bold=bold) + if not paragraph.runs: + _add_math_run(paragraph, "", size_pt=size_pt, colour=colour, bold=bold) + + +def _append_math_runs( + paragraph, text: str, *, size_pt: int, colour: RGBColor, bold: bool = False, +) -> None: + """Append math-aware runs for ``text`` to ``paragraph`` **without clearing + it** — ``$...$`` spans render as math, plain segments as one run each. Use + to add a math-aware value to a paragraph that already has runs (e.g. a KPI + value after its grey label). A string with no ``$`` adds exactly one run.""" pos = 0 for m in _MATH_DELIM.finditer(text): if m.start() > pos: @@ -1410,8 +1422,6 @@ def _render_math_paragraph( pos = m.end() if pos < len(text): _add_math_run(paragraph, text[pos:], size_pt=size_pt, colour=colour, bold=bold) - if not paragraph.runs: - _add_math_run(paragraph, "", size_pt=size_pt, colour=colour, bold=bold) def _add_textbox( @@ -1596,16 +1606,16 @@ def _add_kpi_lines( run_label.text = f"• {label}: " run_label.font.size = Pt(_BODY_PT) run_label.font.color.rgb = _BRAND_GREY - run_value = paragraph.add_run() - run_value.text = str(value) - run_value.font.size = Pt(_BODY_PT + 2) - run_value.font.bold = True - # Teal accent for KPI numbers — they're the slide's punch line - # (a 2.3x speedup, a 78% F1, etc.). Bold + teal makes them pop - # without using red, which would read as error/warning. Was red, - # then briefly navy; teal restores a real emphasis colour. - # See deck-design.md "No red text" contract. - run_value.font.color.rgb = _BRAND_HIGHLIGHT + # Value runs are math-aware ($...$ -> real sub/superscripts), so a KPI + # like "$λ_max$=0.1" renders with a real subscript, while a plain + # "78% F1" stays one upright run (no $, so the "F" isn't italic-ised). + # Teal accent: KPI numbers are the slide's punch line (a 2.3x speedup, + # a 78% F1, etc.). Bold + teal makes them pop without red, which would + # read as error/warning. See deck-design.md "No red text" contract. + _append_math_runs( + paragraph, str(value), + size_pt=_BODY_PT + 2, colour=_BRAND_HIGHLIGHT, bold=True, + ) if baseline: run_base = paragraph.add_run() run_base.text = f" ({baseline_label}: {baseline})" @@ -1654,7 +1664,6 @@ def _style_table_cell(cell, value: str, r: int, c: int) -> None: Split out from ``_add_table`` so the cognitive-complexity budget fits — borders + fills + font + alignment all live here. """ - cell.text = value cell.vertical_anchor = MSO_ANCHOR.MIDDLE text_frame = cell.text_frame text_frame.word_wrap = True @@ -1662,17 +1671,22 @@ def _style_table_cell(cell, value: str, r: int, c: int) -> None: text_frame.margin_right = Inches(0.1) text_frame.margin_top = Inches(0.05) text_frame.margin_bottom = Inches(0.05) - for paragraph in text_frame.paragraphs: - for run in paragraph.runs: - run.font.size = Pt(_TABLE_PT) - if r == 0: - run.font.bold = True - run.font.color.rgb = _TABLE_HEADER_FG - else: - run.font.color.rgb = _BRAND_DARK - if c == 0: - # Row-label column gets a slightly heavier weight. - run.font.bold = True + # Cell text style by position: the header row is bold on the header FG, data + # rows are _BRAND_DARK with the row-label column (c == 0) slightly heavier. + if r == 0: + cell_colour, cell_bold = _TABLE_HEADER_FG, True + else: + cell_colour, cell_bold = _BRAND_DARK, (c == 0) + # Math-aware so a comparison-table cell like "$z_a$" or "O($n^2$)" renders + # real sub/superscripts; a plain cell is one run, exactly as before. Split + # on \n so a multi-line cell keeps its paragraphs (cell.text used to do this). + for line_index, line in enumerate(value.split("\n")): + paragraph = ( + text_frame.paragraphs[0] if line_index == 0 else text_frame.add_paragraph() + ) + _render_math_paragraph( + paragraph, line, size_pt=_TABLE_PT, colour=cell_colour, bold=cell_bold, + ) _set_cell_fill(cell, r) _clear_cell_borders(cell) if r == 1: From d6801d20fc622dcce9384e3ca0fe7924270ebf1d Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Sat, 6 Jun 2026 23:14:06 +0800 Subject: [PATCH 07/16] Name the repeated colour-swap idioms in pptx for readability (no behaviour change) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dark-mode post-pass inlined two idioms several times each; give them a name so the value/expression lives in one place: - _rgb_key(rgb): the (int(rgb[0]), int(rgb[1]), int(rgb[2])) tuple used as the light->dark map key — was inlined 3x across _swap_fill / _swap_text_colors. - _rgb_hex(rgb): the "%02X%02X%02X" srgbClr `val` string — was inlined in the cell-border drawer and the border-recolour pass. - _DARK_BODY_TEXT: the #E5E7EB near-white dark-mode body colour, promoted from a per-call local in _swap_text_colors to a module constant beside _DARK_SLIDE_BG (single source of truth, matching the _BRAND_* / _LIGHT_TO_DARK_TEXT style). Pure refactor — 601 tests pass unchanged (incl. the dark-mode contract, no-invisible-runs, and no-red-text regressions that exercise these passes); ruff + bandit clean. --- thesisagents/exporters/pptx.py | 35 ++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/thesisagents/exporters/pptx.py b/thesisagents/exporters/pptx.py index 13f26f0..64280ab 100644 --- a/thesisagents/exporters/pptx.py +++ b/thesisagents/exporters/pptx.py @@ -158,6 +158,11 @@ # Dark-mode palette (post-build recolour, opt-in via # ``ExportOptions.dark_mode``). _DARK_SLIDE_BG = RGBColor(0x12, 0x15, 0x1B) +# Near-white body-text colour for dark mode. The post-build recolour pass +# promotes any run lacking an explicit colour (theme inheritance → renders +# black on the dark slide) to this, and it's the swap target for _BRAND_DARK +# body text in _LIGHT_TO_DARK_TEXT below. Named so the value lives in one place. +_DARK_BODY_TEXT = RGBColor(0xE5, 0xE7, 0xEB) # Light-palette RGB → dark-palette RGB mapping for TEXT colours. Keys # are 3-tuples (R, G, B) since python-pptx's RGBColor is tuple-comparable @@ -201,6 +206,20 @@ _TABLE_DIVIDER = RGBColor(0xD0, 0xD7, 0xE2) # row divider — soft grey-blue _TABLE_HEADER_RULE = RGBColor(0x1F, 0x3A, 0x66) # heavy nav rule under header + +def _rgb_key(rgb) -> tuple[int, int, int]: + """An RGBColor as a hashable ``(r, g, b)`` int tuple — the key type the + light→dark palette maps above are indexed by. Centralises the + ``(int(rgb[0]), int(rgb[1]), int(rgb[2]))`` idiom used by every swap pass.""" + return (int(rgb[0]), int(rgb[1]), int(rgb[2])) + + +def _rgb_hex(rgb) -> str: + """6-char uppercase hex (no ``#``) for an RGBColor or ``(r, g, b)`` tuple — + the form OOXML's ```` attribute expects.""" + return f"{rgb[0]:02X}{rgb[1]:02X}{rgb[2]:02X}" + + # --------------------------------------------------------------------------- # Abstract segmentation (fallback when summary is absent / lightweight only) # --------------------------------------------------------------------------- @@ -1741,8 +1760,7 @@ def _set_cell_border(cell, edge: str, width, colour: RGBColor) -> None: nsmap=None, ) solid = ln.makeelement(qn("a:solidFill"), {}, nsmap=None) - rgb_hex = f"{colour[0]:02X}{colour[1]:02X}{colour[2]:02X}" - solid.append(solid.makeelement(qn("a:srgbClr"), {"val": rgb_hex}, nsmap=None)) + solid.append(solid.makeelement(qn("a:srgbClr"), {"val": _rgb_hex(colour)}, nsmap=None)) ln.append(solid) ln.append(ln.makeelement(qn("a:prstDash"), {"val": "solid"}, nsmap=None)) ln.append(ln.makeelement(qn("a:round"), {}, nsmap=None)) @@ -2091,8 +2109,7 @@ def _swap_fill(shape_or_cell) -> None: return if rgb is None: return - key = (int(rgb[0]), int(rgb[1]), int(rgb[2])) - new = _LIGHT_TO_DARK_FILL.get(key) + new = _LIGHT_TO_DARK_FILL.get(_rgb_key(rgb)) if new is None: return fill.solid() @@ -2112,18 +2129,16 @@ def _swap_text_colors(shape_or_cell) -> None: text_frame = getattr(shape_or_cell, "text_frame", None) if text_frame is None: return - near_white = RGBColor(0xE5, 0xE7, 0xEB) for paragraph in text_frame.paragraphs: for run in paragraph.runs: try: rgb = run.font.color.rgb except (AttributeError, ValueError, TypeError): rgb = None - if rgb is None or (int(rgb[0]), int(rgb[1]), int(rgb[2])) == (0, 0, 0): - run.font.color.rgb = near_white + if rgb is None or _rgb_key(rgb) == (0, 0, 0): + run.font.color.rgb = _DARK_BODY_TEXT continue - key = (int(rgb[0]), int(rgb[1]), int(rgb[2])) - new = _LIGHT_TO_DARK_TEXT.get(key) + new = _LIGHT_TO_DARK_TEXT.get(_rgb_key(rgb)) if new is not None: run.font.color.rgb = RGBColor(*new) @@ -2155,4 +2170,4 @@ def _swap_cell_border_colors(cell) -> None: new = _LIGHT_TO_DARK_FILL.get(key) if new is None: continue - clr.set("val", f"{new[0]:02X}{new[1]:02X}{new[2]:02X}") + clr.set("val", _rgb_hex(new)) From 71628aa36e6fc755422a0606c34a0b81794f863c Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Sat, 6 Jun 2026 23:23:26 +0800 Subject: [PATCH 08/16] Normalise rule-prose semicolons to commas in paper_rule (CLAUDE.md punctuation rule) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CLAUDE.md mandates ,/, over ;/; for clause-joining in the rule base — comma- joined clauses scan faster than semicolon-stacked compounds, and mixed semicolon use makes the rule base read unevenly. paper_rule.md carried 31 full-width ; in its Chinese rule prose, audit checklists, and table cells; each becomes ,. Kept on purpose: the three ; inside the "可寫:『…』" paper-writing SAMPLES in the no-fabrication section — those demonstrate real thesis prose (where ; is a legitimate academic separator), not rule text, so changing them would misrepresent the sample. Math notation (I(za;zb)) and APA citation grouping use half-width ; and were never in scope. No rule meaning changes. Other audited dimensions were already correct: no stale package name (autopapertoppt → thesisagents fully migrated), the PaperSummary→thesis-section mapping table matches CLAUDE.md exactly, and the seven-section skeleton is complete. --- .claude/agents/rules/paper_rule.md | 62 +++++++++++++++--------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/.claude/agents/rules/paper_rule.md b/.claude/agents/rules/paper_rule.md index 002e509..a8c0437 100644 --- a/.claude/agents/rules/paper_rule.md +++ b/.claude/agents/rules/paper_rule.md @@ -96,7 +96,7 @@ Suggested citations: statistics, industry reports, policy documents. / 可引用 ### 1.2 Motivation / 研究動機 -Why existing methods are insufficient; why this is worth studying. / 現有方法的不足;為何值得研究。 +Why existing methods are insufficient; why this is worth studying. / 現有方法的不足,為何值得研究。 Common framings / 常見切入面向: @@ -133,7 +133,7 @@ Briefly describe each chapter (one paragraph, 1-2 sentences per chapter). / 簡 ### 2.1 Background Knowledge / 背景知識 -Introduce the core techniques used in the research. Tip: progress from simple to complex, use figures and tables. / 介紹研究會用到的核心技術;由淺入深、搭配圖表。 +Introduce the core techniques used in the research. Tip: progress from simple to complex, use figures and tables. / 介紹研究會用到的核心技術,由淺入深、搭配圖表。 ### 2.2 Related Techniques / 相關技術 @@ -398,7 +398,7 @@ Both pass the never-mix invariant: they always rewrite **both** the literature-r ## Technical terminology — must include AND must explain (HARD) / 技術名詞:必須出現,且必須解釋(HARD) -A research paper must demonstrate technical depth — so the relevant **technical terms have to appear**. But the audience is not just the author's lab or sub-field; it includes thesis committee members from adjacent areas, conference reviewers skimming many submissions, and (for IEEE-Trans-style journals) non-specialists in the broader software-engineering community. **Every technical term must be glossed in plain language at first use** so a reader without domain background can still follow the paragraph. / 論文須展現技術深度,**技術名詞必須出現**;但讀者不只是同實驗室或同子領域,還包括相鄰領域的口試委員、快速翻閱的審稿人,與非該領域之 IEEE Trans. 讀者。**每個技術名詞於首次出現時,都必須以淺顯易懂之文字解釋**,使無相關背景之讀者仍能理解該段落。 +A research paper must demonstrate technical depth — so the relevant **technical terms have to appear**. But the audience is not just the author's lab or sub-field; it includes thesis committee members from adjacent areas, conference reviewers skimming many submissions, and (for IEEE-Trans-style journals) non-specialists in the broader software-engineering community. **Every technical term must be glossed in plain language at first use** so a reader without domain background can still follow the paragraph. / 論文須展現技術深度,**技術名詞必須出現**,但讀者不只是同實驗室或同子領域,還包括相鄰領域的口試委員、快速翻閱的審稿人,與非該領域之 IEEE Trans. 讀者。**每個技術名詞於首次出現時,都必須以淺顯易懂之文字解釋**,使無相關背景之讀者仍能理解該段落。 ### What counts as a "technical term" / 何謂技術名詞 @@ -437,7 +437,7 @@ Use **parenthetical glosses at first use only** — do NOT repeat the gloss on e ### Interaction with the 6-page Word constraint (or any length budget) -Plain-language glosses cost characters. When the paper has a hard length cap (e.g. TCSE_v2.3.docx targets 6 Word pages), the gloss budget must be paid for by tightening adjacent filler — never by skipping the gloss. Trade-offs in priority order: / 解釋會吃字元;於頁數受限時,**砍冗詞、保留解釋**: +Plain-language glosses cost characters. When the paper has a hard length cap (e.g. TCSE_v2.3.docx targets 6 Word pages), the gloss budget must be paid for by tightening adjacent filler — never by skipping the gloss. Trade-offs in priority order: / 解釋會吃字元,於頁數受限時,**砍冗詞、保留解釋**: 1. Keep the gloss. / 解釋優先保留。 2. Remove AI-tell fillers (`此外`, `進而`, `首先...其次`, `透過此一`, `從而提升`, `日益廣泛`) — see `compress_tcse_v22.py` for the inventory. / 移除 AI-tell。 @@ -546,7 +546,7 @@ is research misconduct and is forbidden without exception. model variant 都必須是作者親自跑過或在 cited 文獻中明確記載者。不 可加入「邏輯上應該存在」但未實際執行之比較欄位。 4. **不得編造參考文獻**。每筆引文都必須是作者親自確認存在且讀過之文獻。 - 看 title 像是真的、看 venue 像是真的、看作者像是真的,不代表它存在; + 看 title 像是真的、看 venue 像是真的、看作者像是真的,不代表它存在, 需以 DOI / arXiv ID / ISBN / 官方 venue 程式集驗證。 5. **不得編造系統參數**。閾值(如 τ = 0.85)、top-K(如 K = 3)、 threshold(如 0.7)若未由作者在程式碼或實驗中實際使用過、且未在實驗 @@ -603,11 +603,11 @@ is research misconduct and is forbidden without exception. 論文之根本價值在於「可被獨立驗證」。一旦某段內容無法被驗證,整篇論文 之可信度即受質疑——即便其他段落皆為真實。捏造之代價遠高於誠實標示 「未驗證」之代價。在期刊投稿環境中,被審稿人或讀者發現捏造數據等同 -於 retraction risk;在學位論文環境中,可被認定為學術不誠實。 +於 retraction risk,在學位論文環境中,可被認定為學術不誠實。 **任何 AI 協作工具(含本 agent set)若為「填滿論文」而產出看似合理但 無證據之內容,皆屬於違反此規則。作者收到 AI 產出之段落後,必須對每 -個實證宣稱執行上述 self-audit;無法通過者一律移除或改寫為未來工作。** +個實證宣稱執行上述 self-audit,無法通過者一律移除或改寫為未來工作。** > 本規則優先序最高。當其與「不重複」、「無 AI 口頭禪」、「圖表必須解釋」 > 等其他硬規則衝突時,本規則優先 —— 寧可省略某段、寧可標示「未驗證」、 @@ -631,29 +631,29 @@ seven canonical sections, all tables, and all figure captions. 1. **摘要 vs 緒論 1.1-1.2** — Abstract compresses the whole paper; the Introduction expands it. Do **not** copy-paste abstract sentences into 1.1 or 1.2. Re-phrase, expand with citation + data, or refer back. - / 摘要與緒論 1.1-1.2 不可逐句重複;緒論應展開、加引用、加數據,或直接回指。 + / 摘要與緒論 1.1-1.2 不可逐句重複,緒論應展開、加引用、加數據,或直接回指。 2. **緒論 1.5 貢獻 vs 結論 5.2 貢獻** — Introduction states contributions as *promises*; Conclusion restates them as *delivered, with quantified evidence*. Same items, different framing — never identical wording. - / 1.5 是「將提出」的承諾,5.2 是「已達成」的成果並附量化證據;條目相同, - 敘述角度不同;逐字相同視為重複。 + / 1.5 是「將提出」的承諾,5.2 是「已達成」的成果並附量化證據,條目相同, + 敘述角度不同,逐字相同視為重複。 3. **方法 3.x vs 實驗 4.3 設計** — Methodology defines the *method*; Experimental Design describes the *settings used to test it* (hyperparameters, baselines). Do not re-derive the algorithm in §4. - / 3.x 寫方法本身;4.3 寫測試設定(超參、baseline);不要在 4.x 再推導一次演算法。 + / 3.x 寫方法本身,4.3 寫測試設定(超參、baseline),不要在 4.x 再推導一次演算法。 4. **實驗 4.4 結果 vs 4.5 分析 vs 5.1 成果** — 4.4 reports numbers (tables / charts); 4.5 explains *why* the numbers came out that way; 5.1 summarises the headline result. The same number must not appear verbatim in all three. - / 4.4 報數字(表格 / 圖),4.5 解釋原因,5.1 摘要結論;同一數字不應於三處原樣重列。 + / 4.4 報數字(表格 / 圖),4.5 解釋原因,5.1 摘要結論,同一數字不應於三處原樣重列。 5. **圖 vs 內文** — Do not transcribe an entire table or chart into the body text. Cite it ("see Table 3") and discuss *the takeaway*, not the data. - / 不要把整張表或圖的內容用文字再列一次;以「見表 3」引述,內文只討論洞見。 + / 不要把整張表或圖的內容用文字再列一次,以「見表 3」引述,內文只討論洞見。 6. **章節銜接段落** — Section-bridging paragraphs must move the argument forward, not summarise what was just said. "In the previous section we discussed X" is a duplication smell — replace with a forward-pointing transition. - / 章節銜接段落應「推進論點」,不是「複述上一節」;「前一節討論了 X」屬重複氣味, + / 章節銜接段落應「推進論點」,不是「複述上一節」,「前一節討論了 X」屬重複氣味, 改寫為向前指的轉接句。 ### When recurrence is allowed / 例外(非重複) @@ -701,7 +701,7 @@ locales). Three sub-rules: **ZH-TW.** 學術論文須讀起來像「真人撰寫的繁體中文學術文字」。三條子規則: 1. **禁用 AI 口頭禪**:與 LLM 輸出強相關的詞語,一律改為人類學術寫法。 -2. **禁用冗詞**:把動詞包進名詞、同義反覆、開場白;直接用該動詞。 +2. **禁用冗詞**:把動詞包進名詞、同義反覆、開場白,直接用該動詞。 3. **用正確繁體中文用詞**:不混用簡體常用語(即使寫成繁體字形也不行), 詳細詞表交由 `language-vocabulary-check`,本 agent 把原則寫死。 @@ -746,13 +746,13 @@ locales). Three sub-rules: | 深入探討 | 探討、分析、研究 | | 值得注意的是 | (刪除,直接寫該事實) | | 至關重要 / 至為關鍵 | 重要、關鍵、決定性的 | -| 不僅……而且…… (每段) | 一節最多一次;其餘用句點分開 | +| 不僅……而且…… (每段) | 一節最多一次,其餘用句點分開 | | 首先……其次……再者……最後…… (每段) | 視論證需要使用,不要每段套 | | 在當今 …… 的時代 | (刪除開場白,直接寫主題) | | 隨著 …… 的快速發展 | (刪除或具體化:「自 20XX 年 N 倍成長」) | | 綜上所述(章名已是「結論」時) | (刪除) | | 換言之 / 也就是說 (連續出現) | 一節最多一次 | -| 顛覆性 / 革命性 | 顯著、改寫;以數字量化 | +| 顛覆性 / 革命性 | 顯著、改寫,以數字量化 | | 進行深入的分析 | 分析 | | 探索性的研究 | 探索性研究 | | 一系列的 …… | (多刪「一系列的」,直接寫名詞複數) | @@ -813,7 +813,7 @@ locales). Three sub-rules: | 為了 X 起見 | 為 X / 為了 X(刪「起見」)| | 採取 …… 措施 | 採取 …… 做法(「措施」為簡體高頻詞,視語境可改)| -> 完整詞表與 14 個 locale 的對應由 `language-vocabulary-check` 維護; +> 完整詞表與 14 個 locale 的對應由 `language-vocabulary-check` 維護, > 本 agent 規範**原則**:locale = zh-tw 的論文,任何詞請以《教育部國語辭典》/ > 國家教育研究院《學術名詞資訊網》為準,不以中國大陸常用語為準。 @@ -870,7 +870,7 @@ undefined acronym is a HARD violation. - **編號 + 標題**(表 1 CRSCORE++ 整體評估 / Table 1 CRSCORE++ overall)。 - **欄位定義**:欄位名(comprehensiveness、conciseness、relevance、…) - 在 §3.5 或圖說中已定義;首次出現於本表時可不重述,但必須有「指標 + 在 §3.5 或圖說中已定義,首次出現於本表時可不重述,但必須有「指標 定義見 §3.5」式回指(per 不重複內容例外條款)。 - **內文討論 takeaway,不是抄數字**:per「不重複內容」§4,4.4 報數字、 4.5 解釋為何 → 4.5 段落必須回答「這張表告訴讀者什麼?哪個欄位最關鍵? @@ -945,7 +945,7 @@ ANN(Approximate Nearest Neighbour)、F1、AUC、mAP、BLEU、ROUGE。 **必須**以內文解釋。 - 「解釋圖」≠「複述圖」:流程圖內每個方框的內容不必逐個用文字再列一次, 但讀者看圖後該得到什麼結論 **必須**在內文點明。 -- 兩條規則合作:圖表 = 視覺壓縮,內文 = 解釋洞見;視覺與洞見都不可缺。 +- 兩條規則合作:圖表 = 視覺壓縮,內文 = 解釋洞見,視覺與洞見都不可缺。 --- @@ -997,16 +997,16 @@ depends on. ### Visual style / 視覺樣式 - **粗體**(bold)必加。 -- 字級介於章標題與正文之間(章 sz=22 → 子章節 sz=20–21;若章 sz=24 → +- 字級介於章標題與正文之間(章 sz=22 → 子章節 sz=20–21,若章 sz=24 → 子章節 sz=22)。**不可與正文同字級且不加粗** —— 不可區分等同沒有。 -- 字型沿用內文 East-Asian 字體(zh-tw 論文:標楷體;簡報:JhengHei UI)。 +- 字型沿用內文 East-Asian 字體(zh-tw 論文:標楷體,簡報:JhengHei UI)。 - 左對齊(少數會議模板要求齊頭縮排,視模板辦理)。 - 上方留空:若前面是正文段落,子章節標題前空一個段距(或設定 - `space_before`);緊接章標題後則不必加距。 + `space_before`),緊接章標題後則不必加距。 ### Paragraph distribution rule / 段落分佈規則(與子章節呼應) -每個子章節必須有**至少一段內文**承接其標題;空標題(標題後直接接下一個 +每個子章節必須有**至少一段內文**承接其標題,空標題(標題後直接接下一個 子章節)是孤兒標題(orphan heading),同樣違反規則。反之,若一段內文 跨越兩個 paper_rule 子章節的範圍(例:一段同時寫了 1.4 目的 + 1.5 貢獻 + 系統設計),就**必須拆段**並補上中間的子章節標題。 @@ -1063,8 +1063,8 @@ stack of independently-edited paragraphs. Two requirements: 1. **段間連貫性**:每段都要承接前一段(呼應或推進)並鋪陳下一段(指向後續 主題)。不可有孤立段、不可有突兀跳題、不可有無錨代名詞。 2. **全文脈絡可追溯**:讀者能從 §1 動機 → §3 方法 → §4 設計 → §5 結果 - → §6 結論一氣讀完,不需要往回翻找定義。結論主張必須呼應引言目標; - 方法選擇必須由 §2 研究缺口或 §3 設計理由支撐;實驗設計必須由 §3 方法 + → §6 結論一氣讀完,不需要往回翻找定義。結論主張必須呼應引言目標, + 方法選擇必須由 §2 研究缺口或 §3 設計理由支撐,實驗設計必須由 §3 方法 支撐。 ### Common coherence violations / 常見連貫性違規 @@ -1080,7 +1080,7 @@ stack of independently-edited paragraphs. Two requirements: 它指的是什麼名詞。若不能 → 改寫為具體名詞。 4. **術語不一致(terminology drift)** — 同一概念在不同章節用不同名稱 (例:§3 寫「思維鏈管線」、§4 寫「CoT 流程」、§5 寫「多階段推理」)。 - 挑一個名稱,全文統一;首次出現括弧內附縮寫,之後一律縮寫。 + 挑一個名稱,全文統一,首次出現括弧內附縮寫,之後一律縮寫。 5. **倒序鋪陳(forward reference)** — 在概念被介紹前先使用它(例:§1 提到「圖 3 評估流程」而圖 3 在 §4 才出現)。改寫為「於第四節提出之 評估流程(圖 3)」或先在 §1 簡述。 @@ -1180,7 +1180,7 @@ We **conclude**…" **Audit checklist:** - [ ] 摘要與緒論描述「我做了什麼」用過去式/完成式,**不**用 will。 - [ ] 同一段落內時態一致(除非語意確實跨越「已完成的動作 vs 普遍成立的事實」)。 -- [ ] 只有 Future Work 用 will / could / plan to;其餘章節不用 will 描述已完成的實驗。 +- [ ] 只有 Future Work 用 will / could / plan to,其餘章節不用 will 描述已完成的實驗。 --- @@ -1218,9 +1218,9 @@ seeds; paired t-test, n = 3000, p = 0.003)." **Audit checklist:** - [ ] 每個百分比都標明是「百分點 (absolute)」還是「相對 (%)」。 -- [ ] 沒有從 .log 直接貼出的超長浮點數;有效數字與量測精度相符。 -- [ ] p 值寫實際值,不寫 p = 0.000;附上檢定方法與樣本數。 -- [ ] 同一表格欄位的小數位數一致;單位全文統一且首次出現即定義。 +- [ ] 沒有從 .log 直接貼出的超長浮點數,有效數字與量測精度相符。 +- [ ] p 值寫實際值,不寫 p = 0.000,附上檢定方法與樣本數。 +- [ ] 同一表格欄位的小數位數一致,單位全文統一且首次出現即定義。 --- From 36b0ed3f990fc9182a75cafccf38e2b12e100f1f Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Wed, 10 Jun 2026 01:07:08 +0800 Subject: [PATCH 09/16] Default decks to light navy-band style; add math rendering and thesis-defence authoring - Flip ExportOptions.dark_mode default to False: the project deck is now the light navy-band style (white slides, navy header band, navy cover), with --dark-mode / GUI checkbox opting into the dark palette. - Render inline $...$ math as real subscripts/superscripts in pptx. - Add the thesis-deck-author task agent for oral-defence decks built from the candidate's own thesis, plus post-author-audit math/metadata passes. - Add dark-text and overflow audit scripts with regression tests. - Sync rule docs, CLI/GUI/MCP surfaces, and Sphinx docs to the new default. --- .claude/agents/rules/deck-design.md | 93 +++-- .claude/agents/rules/paper_rule.md | 87 ++++ .claude/agents/rules/slide-deck-rules.md | 22 +- .claude/agents/tasks/paper-summary-author.md | 13 +- .claude/agents/tasks/post-author-audit.md | 106 ++++- .claude/agents/tasks/slide-overflow-check.md | 66 +-- .claude/agents/tasks/thesis-deck-author.md | 234 +++++++++++ AGENTS.md | 18 +- CLAUDE.md | 47 ++- docs/cli.md | 4 +- docs/en/index.rst | 17 +- docs/gui.md | 8 +- scripts/_audit_dark_text.py | 153 +++++++ scripts/check_overflow.py | 251 ++++++++++++ scripts/regen_code_review_thesis.py | 364 +++++++++++++++++ scripts/regen_thesis_demo.py | 335 +++++++++++++++ tests/test_audit_dark_text.py | 89 ++++ tests/test_check_overflow.py | 181 +++++++++ tests/test_exporters.py | 132 +++++- thesisagents/cli.py | 14 +- thesisagents/core/models.py | 17 +- thesisagents/exporters/i18n.py | 28 +- thesisagents/exporters/pptx.py | 407 ++++++++++++++++--- thesisagents/gui/i18n.py | 30 +- thesisagents/gui/pages/deck.py | 13 +- thesisagents/mcp/server.py | 18 +- 26 files changed, 2490 insertions(+), 257 deletions(-) create mode 100644 .claude/agents/tasks/thesis-deck-author.md create mode 100644 scripts/_audit_dark_text.py create mode 100644 scripts/check_overflow.py create mode 100644 scripts/regen_code_review_thesis.py create mode 100644 scripts/regen_thesis_demo.py create mode 100644 tests/test_audit_dark_text.py create mode 100644 tests/test_check_overflow.py diff --git a/.claude/agents/rules/deck-design.md b/.claude/agents/rules/deck-design.md index 0c4a3e6..47f07c9 100644 --- a/.claude/agents/rules/deck-design.md +++ b/.claude/agents/rules/deck-design.md @@ -48,23 +48,25 @@ Already pinned in `pptx.py`: | Constant | RGB | Use | |---|---|---| | `_BRAND_DARK` | `#1F3A66` (deep navy) | Primary text + accent bar | -| `_BRAND_HIGHLIGHT` | `#0E7490` (teal-700) | Text emphasis — KPI values, RQ question callout, "this stands out" headlines. The sanctioned replacement for the banned red accent. | +| `_BRAND_HIGHLIGHT` | `#2563EB` (academic blue-600) | Text emphasis — KPI values, RQ question callout, "this stands out" headlines. The sanctioned replacement for the banned red accent. Same blue family as `_BRAND_DARK` navy for a cohesive white + blue academic look. | | `_BRAND_ACCENT` | `#C0392B` (warm red) | BANNED for text (see "No red text" contract). Reserved for potential future non-text accent shapes only. | | `_BRAND_GREY` | `#555555` | Metadata, captions, secondary text, placeholder/error states | | `_BRAND_LIGHT` | `#AAAAAA` | Rule lines, dividers | Do NOT introduce new brand colours casually — every additional colour fights for attention. Reuse the five above unless the user explicitly -adds one. Note the deliberate split: **teal is the headline emphasis, -grey is the label/chrome emphasis** — picking the wrong one (e.g. teal +adds one. Note the deliberate split: **blue is the headline emphasis, +grey is the label/chrome emphasis** — picking the wrong one (e.g. blue for a figure caption) makes captions compete with KPIs for the eye. -#### Dark-mode palette (default; opt-out with `dark_mode=False` / `--light-mode` / GUI "Light mode") +#### Dark-mode palette (opt-in with `dark_mode=True` / `--dark-mode` / GUI "Dark mode") -**Dark mode is the project default.** OLED projectors and low-light -presentation venues are the common case; bright-white slides glare -under both. The exporter builds with the light palette first, then -runs `_apply_dark_mode(prs)` as a post-build pass. +**The light navy-band deck is the project default; dark mode is opt-in.** +(It used to be the reverse — the default flipped when the light navy-band +chrome landed.) The exporter always builds with the light palette first, +and only when dark mode is requested runs `_apply_dark_mode(prs)` as a +post-build pass — useful for OLED projectors and low-light venues where +bright-white slides glare. The pass re-colours individual runs / shape fills / cell borders by looking up their current RGB in two mapping dicts. No builder needs to know about dark mode at construction time. @@ -75,7 +77,7 @@ to know about dark mode at construction time. | `_BRAND_DARK` text | `#1F3A66` → `#E5E7EB` | Body text near-white | | `_BRAND_GREY` text | `#555555` → `#9CA3AF` | Metadata mid grey | | `_BRAND_LIGHT` text | `#AAAAAA` → `#6B7280` | Subtle dividers / page numbers | -| `_BRAND_HIGHLIGHT` text | `#0E7490` → `#2DD4BF` | Teal-700 → teal-400; brighter teal reads on the dark slide bg without losing the accent identity | +| `_BRAND_HIGHLIGHT` text | `#2563EB` → `#60A5FA` | Blue-600 → blue-400, brighter blue reads on the dark slide bg without losing the accent identity | | `_BRAND_ACCENT` | `#C0392B` (unchanged) | BANNED for text in both modes (see "No red text" contract). If reused for a non-text shape, kept as-is for brand consistency. | | `_BRAND_DARK` fill (accent bars / table header) | `#1F3A66` → `#3B5AA0` | Lighter navy reads against the dark slide background | | `_TABLE_ROW_ALT` | `#F4F6F9` → `#1F232C` | Dark stripe | @@ -141,9 +143,9 @@ manual inspection of a single rendered deck. constant ``_BRAND_ACCENT`` (= ``#C0392B`` warm red) stays in the palette for potential future non-text accent shapes (sparkline highlight, status badge, etc.), but every TEXT call site has been migrated off it. -The sanctioned text-emphasis colour is **``_BRAND_HIGHLIGHT`` (teal-700, -``#0E7490``)** — pair with ``run.font.bold = True``. Use ``_BRAND_GREY`` -for chrome / label / placeholder emphasis (never teal — teal is reserved +The sanctioned text-emphasis colour is **``_BRAND_HIGHLIGHT`` (academic +blue-600, ``#2563EB``)** — pair with ``run.font.bold = True``. Use ``_BRAND_GREY`` +for chrome / label / placeholder emphasis (never blue — blue is reserved for "this matters", grey is for "this is context"). Why banned: @@ -164,8 +166,8 @@ is closest at hand. The four migrated sites split: | Call site | Role | Replacement | |---|---|---| -| KPI value (`_add_kpi_lines`) | "the slide's punch line" headline | `_BRAND_HIGHLIGHT` (teal) | -| RQ question (`_add_rq_result_slide`) | "the question being answered" headline | `_BRAND_HIGHLIGHT` (teal) | +| KPI value (`_add_kpi_lines`) | "the slide's punch line" headline | `_BRAND_HIGHLIGHT` (blue) | +| RQ question (`_add_rq_result_slide`) | "the question being answered" headline | `_BRAND_HIGHLIGHT` (blue) | | Paper-table caption (`_add_paper_table_slides`) | caption label below subhead | `_BRAND_GREY` (muted) | | Figure-unavailable fallback (`_add_figure_image`) | placeholder / error state | `_BRAND_GREY` (muted) | @@ -176,7 +178,7 @@ Implementation contract: 3. For emphasis on a value (e.g. a KPI number) use: ``run.font.bold = True`` + ``run.font.color.rgb = _BRAND_HIGHLIGHT``. 4. For caption / placeholder / chrome text, use ``_BRAND_GREY`` — not - teal, not navy. Reserving teal for headlines is what makes headlines + blue, not navy. Reserving blue for headlines is what makes headlines actually read as headlines. 5. Regression test ``test_pptx_no_red_text_runs`` walks every run on a default-rendered deck and fails if any run uses ``#C0392B``. @@ -185,7 +187,7 @@ Implementation contract: pass wouldn't quietly map it; the run would carry red through to the dark deck where the regression test fires. 7. The audit script's ``_ACCEPTED_DARK_RUN_COLORS`` set includes the - dark-mode teal variant ``#2DD4BF``; if you introduce another accent + dark-mode blue variant ``#60A5FA``; if you introduce another accent colour, update both the map AND the audit set in the same commit. If a future "non-text accent" use of red comes up (e.g. a tiny status @@ -225,14 +227,14 @@ stayed light. White-on-white. Fixed by adding the mapping reports failure-mode B — run it on a rendered deck during manual inspection. -Exposure surfaces (dark is default; the toggles flip to LIGHT): -- CLI: `--light-mode` opt-out flag (when absent → dark) -- GUI: Deck tab `deck.light_mode_label` checkbox (unchecked → dark) -- Programmatic: `ExportOptions(dark_mode=False)` to opt out -- Regen script: pass `dark_mode=False` per variant — see - `scripts/regen_speculative_decoding_zh_tw.py` which ships both the - default dark deck (`-zh-tw.pptx`) and a light opt-out - (`-zh-tw-light.pptx`). +Exposure surfaces (light is default; the toggles opt IN to DARK): +- CLI: `--dark-mode` opt-in flag (when absent → light) +- GUI: Deck tab `deck.dark_mode_label` checkbox (unchecked → light) +- Programmatic: `ExportOptions(dark_mode=True)` to opt in +- Regen script: pass `dark_mode=` in `ExportOptions` per variant — + `scripts/regen_fang2026.py` is the worked example (it sets + `dark_mode=True` for the dark variant). The default omits the field + (or passes `dark_mode=False`) for the light navy-band deck. ### Table styling (the second-biggest "AI-generated" tell after Calibri) @@ -269,16 +271,27 @@ flows through this single helper, so the change applies uniformly. ### Accent geometry (the "this is a designed deck" tell) -Every content slide gets a thin top accent bar: -- Position: `left=0, top=0, width=_SLIDE_WIDTH (13.333"), height=Inches(0.08)` -- Fill: `_BRAND_DARK` solid -- Name: `accent_top` (semantic name so `pptx_edit` can target it) - -The cover slide gets a left vertical band: -- Position: `left=0, top=0, width=Inches(0.4), height=_SLIDE_HEIGHT (7.5")` -- Fill: `_BRAND_DARK` solid -- Name: `accent_left` -- Cover textboxes shift right by `Inches(0.4)` worth of margin to clear it. +Every content slide gets a **full-width navy header band** (the deck's +signature chrome) with the white title sitting inside it and a thin blue +accent rule along its bottom edge: +- Band position: `left=0, top=0, width=_SLIDE_WIDTH (13.333"), height=_HEADER_BAND_HEIGHT (1.18")` +- Band fill: `_BRAND_DARK` solid; name `accent_top` (kept on the band so + `pptx_edit` / audits still target the content-slide accent by name even + though it's now a band, not a hairline). +- Accent rule: `left=0, top=1.18", width=_SLIDE_WIDTH, height=_ACCENT_RULE_HEIGHT (0.06")`, + fill `_HEADER_ACCENT_FILL` (= `_BRAND_HIGHLIGHT` blue); name `accent_rule`. +- The title (`_new_section_slide`) is WHITE (`_HEADER_TITLE_FG`, the same + white as `_TABLE_HEADER_FG`) and middle-anchored inside the band — navy + `_BRAND_DARK` would be navy-on-navy = invisible. `_BODY_TOP` (1.5") sits + below the band so body content never moves. + +The cover slide gets a **full-bleed navy panel** (not a left band): +- Position: `left=0, top=0, width=_SLIDE_WIDTH, height=_SLIDE_HEIGHT (7.5")` +- Fill: `_BRAND_DARK` solid; name `accent_left` (kept for the cover-accent + semantic name even though it now spans the whole slide). +- Cover title is WHITE; subtitle / meta are near-white (`_DARK_BODY_TEXT`) — + these light colours are correct in BOTH modes because the cover stays + navy either way. Section-divider slides may use a larger top band (`height=Inches(0.6)`) with the section title overlaid in light text — but this is optional @@ -314,26 +327,26 @@ provided every slide ends up with: The exporter inserts figures as PNGs via the `figures=` field (`_add_figure_image`); it does **not** draw native charts. So figure *quality* is an authoring responsibility — a default-matplotlib plot or a low-res screenshot undoes the brand discipline the rest of the deck earns. -- **Dark-mode adaptation is mandatory.** The slide background is `_DARK_SLIDE_BG` (`#12151B`) by default. A white-background PNG dropped onto it shows a glaring white rectangle — the figure equivalent of `rgb=None` text on dark. Export plots with a **transparent background** (`savefig(..., transparent=True)`) and light foreground (axes / labels / lines in near-white or brand teal `#2DD4BF`), OR place the figure on a card whose fill has a `_LIGHT_TO_DARK_FILL` entry. Never a bare white PNG on the dark slide. +- **Dark-mode adaptation is mandatory when the deck is rendered dark.** In dark mode the slide background is `_DARK_SLIDE_BG` (`#12151B`). A white-background PNG dropped onto it shows a glaring white rectangle — the figure equivalent of `rgb=None` text on dark. Export plots with a **transparent background** (`savefig(..., transparent=True)`) and light foreground (axes / labels / lines in near-white or brand blue `#60A5FA`), OR place the figure on a card whose fill has a `_LIGHT_TO_DARK_FILL` entry. Never a bare white PNG on the dark slide. - **Strip chartjunk.** No default matplotlib grey panel, no spines on all four sides, no dense gridlines, no 3-D bars / pies, no drop shadows. Top + right spines off, at most one light horizontal gridline set. Data-ink first. -- **Brand palette, not library defaults.** Series colours come from the deck palette (navy / teal / grey), never matplotlib's `C0` blue / `C1` orange — default colours read as "pasted from a notebook". (Red stays banned here too, per the no-red contract.) +- **Brand palette, not library defaults.** Series colours come from the deck palette (navy / blue / grey), never matplotlib's `C0` blue / `C1` orange — default colours read as "pasted from a notebook". (Red stays banned here too, per the no-red contract.) - **Don't encode meaning by colour alone.** Teal vs navy is hard for some colour-blind viewers and *indistinguishable* in a black-and-white printout. When two series must be told apart, encode them twice — colour **plus** a marker shape / line style (solid vs dashed) or a direct end-of-line label. The winning series can also be the only solid/heavy one. (The 4-colour brand palette is small precisely so it can't carry many simultaneous distinctions — lean on shape and labels.) - **Readable when projected.** Axis labels + tick labels + legend ≥ ~14pt *in the rendered figure* (a 6pt matplotlib label is unreadable from row 10). Label every axis with its quantity AND unit ("Latency (ms)"), per `paper_rule`'s number-reporting rule. - **Export at print DPI.** `dpi >= 150` (200 for line-heavy plots). A 72-DPI screenshot pixelates on a projector. - **Paper screenshots are a last resort.** Re-plotting your own data beats screenshotting the paper's figure — a screenshot carries the paper's off-brand fonts / colours, JPEG artefacts, and usually a white background. Crop tightly; only screenshot when re-plotting is impossible (e.g. a qualitative architecture diagram). -**Anti-pattern:** `plt.savefig("fig.png")` with defaults → grey panel, blue/orange series, 6pt labels, white border, dropped onto the dark slide. **Pattern:** `savefig("fig.png", dpi=200, transparent=True, bbox_inches="tight")` with teal / navy series, 14pt labels, top + right spines removed. +**Anti-pattern:** `plt.savefig("fig.png")` with defaults → grey panel, blue/orange series, 6pt labels, white border, dropped onto the dark slide. **Pattern:** `savefig("fig.png", dpi=200, transparent=True, bbox_inches="tight")` with blue / navy series, 14pt labels, top + right spines removed. ### Visual hierarchy & focal point Each slide needs one element the eye lands on first — the takeaway from `slide-deck-rules` §9. Size, weight, colour and position build that hierarchy; without it every element competes and the audience reads top-to-bottom hunting for the point. -- **One focal point per slide.** The biggest / boldest / most-saturated element *is* the takeaway — usually the KPI value (teal, bold, large) or the winning row of a table. Exactly one. -- **Hierarchy by size, not just order.** Title > headline number > evidence > caption, each visibly smaller. A KPI value at the same size as its label has no hierarchy. Caption / provenance text uses `_BRAND_GREY` so it recedes — the palette already encodes this (teal emphasises, grey recedes); don't invert it. +- **One focal point per slide.** The biggest / boldest / most-saturated element *is* the takeaway — usually the KPI value (blue, bold, large) or the winning row of a table. Exactly one. +- **Hierarchy by size, not just order.** Title > headline number > evidence > caption, each visibly smaller. A KPI value at the same size as its label has no hierarchy. Caption / provenance text uses `_BRAND_GREY` so it recedes — the palette already encodes this (blue emphasises, grey recedes); don't invert it. - **Whitespace is not wasted space.** A slide filled edge-to-edge has no focal point. Leave margins and let the KPI block breathe. The `FOOTER_GUARD` (7.05") and per-slide content caps exist partly so content can't sprawl across the whole canvas. - **Reading order follows the layout.** Assertion title on top, evidence beneath it, provenance / caption last. Don't bury the conclusion in a footnote while the setup sits in the headline. -**Anti-pattern:** title, three KPIs, a table and a caption all the same size and colour — no focal point, the eye wanders. **Pattern:** one KPI value ~2× the size of its label in teal, the table muted beneath it, caption small and grey. +**Anti-pattern:** title, three KPIs, a table and a caption all the same size and colour — no focal point, the eye wanders. **Pattern:** one KPI value ~2× the size of its label in blue, the table muted beneath it, caption small and grey. ## Anti-patterns (instant "AI-generated" tells) diff --git a/.claude/agents/rules/paper_rule.md b/.claude/agents/rules/paper_rule.md index a8c0437..aa7b49c 100644 --- a/.claude/agents/rules/paper_rule.md +++ b/.claude/agents/rules/paper_rule.md @@ -834,6 +834,60 @@ locales). Three sub-rules: --- +## 草稿管理元資訊不得進入交付內容 / Drafting-management metadata must never reach the delivered content (HARD RULE) + +**EN.** The paper / slide a reader sees must contain only the *content*, never the +*bookkeeping* used while writing it. Drafting-management metadata is anything that +helps the author track revisions but means nothing to the reader: version numbers +(`v3.2`, 修正版, 「v3 既有」, 「v3.1 新增」), the file name of a writing guide or +internal tool (`paper_rule.md`, an internal `.rst` path), drop-in / insertion +markers (`INSERT INTO §3.2`, a drop-in block id like `§2.4`, `Renumbered`), +author-facing notes (「目前 §5 僅有…缺少…」, 「本節僅基於…不引入…」), and +version-relative phrasing (「原 v3 設計之…」, 「v3 追加之第 5 點」). One audit of a +thesis assembled from a drop-in insert set found **8** such leaks — `v3`–`v3.5` +version tags in the §3.7 preamble and §6.4.5, a `paper_rule.md` citation in two +mechanism subsections, and three 「原 v3 設計/之」 phrasings — every one had to be +stripped before the document could ship. + +**Why this is its own HARD rule.** A drop-in 段落集 / regen script / insert file is +written with version tags and insertion notes so the *author* can manage which +paragraph came from which revision. The instinct is to paste those paragraphs +verbatim into the deliverable, and the version tag rides along. The reader then +meets 「§3.7.23 為 v3.5 新增之無模型定向信號」 and has no idea what v3.5 is, or +「依 paper_rule.md 不輸出分數」 and no idea what paper_rule.md is. The bookkeeping is +invisible to the author (who knows what v3.5 means) and glaring to the reader (who +does not). It is distinct from the no-fabrication rule — the content is *true*, it +is just *addressed to the wrong audience*. + +**繁中.** 讀者看到的論文/投影片只應有「內容」,不應有你寫作時的「管理記號」。 +草稿管理元資訊指對作者追蹤改版有用、但對讀者毫無意義者:版本號(`v3.2`、修正版、 +「v3 既有」、「v3.1 新增」)、寫作指引或內部工具之檔名(`paper_rule.md`、指向專案 +內部之 `.rst` 路徑)、插入/drop-in 標記(`INSERT INTO §3.2`、drop-in 段落集之 +區塊編號如 `§2.4`、`Renumbered`)、給作者之元說明(「目前 §5 僅有…缺少…」、 +「本節僅基於…不引入…」)、以版本指代之措辭(「原 v3 設計之…」、「v3 追加之 +第 5 點」)。一次以 drop-in 段落集組裝之學位論文審核發現 **8** 處此類洩漏,全部 +須於交付前清除。 + +**How to apply.** +- **Authoring 時**:你寫的是論文內容,不是修改日誌。「這節是新加的」屬於 commit/ + PR/外部筆記,不進正文,以節號(§3.7.23)指代即可,不附版本號。 +- **指代「原本的設計」時**:用「原設計」「初版」等讀者可理解之詞,不用「原 v3 設計」。 +- **引用文件時**:引用隨附開源框架之公開文件(如其 GitHub 倉庫之 docs)可保留,惟 + 須確認該引用對讀者有意義,指向寫作流程內部之指引/腳本檔名一律刪除。 +- **Audit 時**:交付前以正規表示掃過這些 token(版本號 `v\d`、`paper_rule`、 + `INSERT INTO`、`drop-in`、`Renumber`、「原 v\d」)。對應審核步驟見 + `post-author-audit` 之「Drafting-metadata leak scan」。 + +**Example(正確)**:「§3.7.1–§3.7.13 與 §3.7.15–§3.7.18 合為十七項研究級機制, +§3.7.14 為部署層工程設計、§3.7.23 為無模型定向信號(均不計入十七項)。」── 只用 +節號與分類,讀者自足。 + +**Anti-pattern(錯誤)**:「§3.7.1–§3.7.13 為 v3 既有之十三項,§3.7.19 為 v3.2 +新增之可觀測層,§3.7.23 為 v3.5 新增之無模型定向信號。」──「v3」「v3.2」「v3.5」是 +drop-in 段落集之版本號,讀者無從理解,且洩漏了寫作流程。 + +--- + ## 圖、表、內容必須清楚解釋 / Every figure, table, and content item must be clearly explained (HARD RULE) **EN.** Every figure, every table, every pseudo-code block, every dataset, @@ -1265,6 +1319,39 @@ complex-script fallback (Arabic / Devanagari — not used in zh-tw papers but must be set so Word's "Use East-Asian font for CJK" rendering rule applies correctly). +### 字級規範 / Font sizes (HARD) + +**ZH-TW 學位論文預設字級:本文 14pt、標題(章與各級節)20pt 並加粗。** 此為國立 +高雄師範大學等碩士論文格式手冊之常見規範,本文(body)一律 14pt,章標題 +(第 N 章 / Heading 1)與節/子節標題(3.1、3.2.1 …)一律 20pt 加粗,字型仍依 +locale 規則(zh-tw 為標楷體 CJK + Times New Roman Latin)。 + +**Why**: 以 `python-docx` 程式化插入新節時,新段落若沿用「複製來的參考 run」或 +樣式預設,極易帶到錯誤字級——曾發生新插入的「3.1 系統架構」一節本文落在 18pt、 +標題落在 24pt(Heading 2 樣式預設值),與全文 14/20pt 不一致而需補正。明訂絕對 +字級可讓插入後的稽核一眼可驗,不必逐處目視比對。 + +**OOXML 對應**:``(Latin)與 ``(CJK/複雜文字)之 `w:val` 以**半點** +為單位,故 val = pt × 2——本文 14pt → `w:val="28"`,標題 20pt → `w:val="40"`。兩者 +都要設,只設 `w:sz` 時 CJK 字仍會回退樣式字級。 + +```xml + + + + +``` + +**與上方〈Visual style〉相對字級之關係**:上方「章 sz=22 → 子章節 sz=20-21」是給 +**只要求「標題需與正文可區分」之模板**的**相對**示例。當論文手冊**明訂絕對字級** +(如本文 14、標題 20)時以絕對值為準,相對示例不適用——兩者不衝突,一個是 +「沒給絕對值時怎麼拉開層級」,一個是「給了絕對值就照辦」。 + +**Anti-pattern**: 插入新節後沿用 `insert_paragraph_before` 的樣式預設,或把非本文 +段落(caption / 標題)的 run 格式複製到本文,導致 body 18pt/heading 24pt。務必在 +寫檔前逐 run 顯式設 `w:sz` / `w:szCs`,並以下方 audit 連同字型一起驗。 + **Implementation in .pptx**: handled by `deck-design` subagent (see `thesisagents/exporters/pptx.py` `_apply_typography` pass + the per-language `_FONT_FAMILIES` table). The pptx-side default for zh-tw is **Microsoft diff --git a/.claude/agents/rules/slide-deck-rules.md b/.claude/agents/rules/slide-deck-rules.md index 7d5a7cd..b7f6300 100644 --- a/.claude/agents/rules/slide-deck-rules.md +++ b/.claude/agents/rules/slide-deck-rules.md @@ -48,6 +48,14 @@ The pptx exporter is the most visually-sensitive surface in the project. Several - `_EVALUATION_SECTIONS_PER_SLIDE = 2` - KPI blocks and core-observation callouts are **always** split onto their own slide (`_add_kpi_slide`, separate core-observation slide). Never balance "stacks + tail callout" inside a fixed height. +**Count caps are upper bounds — pagination is height-aware.** A fixed *count* can't guarantee fit, because section bodies vary in length: a slide of four 3-line contributions overflows where four 1-line ones fit. So the exporter now sizes and paginates by **estimated rendered height**, not count alone: + +- `_add_stacked_section` body height is **adaptive** — `_stacked_body_height_in(body)` estimates the wrapped line count (same model as the overflow inspector, full-width CJK ≈ 1.0 em / half-width Latin ≈ 0.55 em) and sizes the box to it, floored at `_STACK_BODY_MIN_IN = 0.85"`. A 3-line contribution gets a 3-line box instead of spilling into the next subhead. +- `_paginate_stacks` packs sections into slides by a **height budget** (`_STACK_TOP_IN = 1.7"` → `_FOOTER_GUARD = 7.0"`), starting a new `(i/N)`-titled slide before the cumulative height would cross the guard. `_MAX_STACKS_PER_SLIDE` is kept as an upper bound so a slide never crowds in more than the documented maximum even when sections are tiny. +- `_pain_points_per_slide` drops the pain-point grid from 2×2 to one row of two when cells are text-heavy, so a tall 3-bullet cell gets the full `_PAIN_QUADRANT_HEIGHT_IN = 4.4"` row instead of `4.4/2 = 2.2"`. Short cells keep the 2×2 look. + +**Why height-aware, not truncation:** the project never silently truncates body text (`_cap_bullets` caps *count* with a `(+N more)` marker, `_clean` never lops). So the only overflow-safe lever for variable-length content is box height + pagination. The regression guard is `tests/test_check_overflow.py::test_exported_rich_deck_has_no_overflow`, which exports a deliberately long-content rich deck and asserts the inspector finds zero violations. + ### 5. Semantic shape names Every textbox is named with one of: `title` / `meta` / `body` / `subhead` / `footer` / `page_number` / `kpi` / `kpi_label` / `rq_box` / `paper_subtitle`. `pptx_edit.update_slide(..., title=...)` looks them up by name; **never break this contract** — silently renaming a shape will break the MCP edit tools. @@ -65,13 +73,17 @@ SUPPORTED_LANGUAGES = ( Every language has every key — enforced by `test_every_language_has_every_key`. Untranslated locales fall back silently to `en` via `normalise_language`. +**Deck locales ≠ `paper_rule` writing locales (a real trap).** This list — the locales the *exporter* can render — includes `id` (Indonesian) and has **no Arabic**. The `paper_rule` subagent's writing-locale set instead includes `ar` (Arabic, RTL) and is about authoring *paper text*, not rendering slides. So a request for an Arabic deck does not raise — it falls back silently to `en`. Do not promise Arabic deck output on the strength of `paper_rule` covering Arabic, the two sets are deliberately different surfaces. If Arabic deck rendering is ever needed, it requires an `i18n.py` entry **and** RTL paragraph support in the exporter, neither of which exists today. + When adding a new template string: 1. Add the key to all 14 languages in `i18n.py`. 2. Run `py -m pytest tests/exporters/test_i18n.py` to confirm the parity test stays green. ### 7. No overflow regressions -When changing the deck or i18n, delegate to the `slide-overflow-check` subagent — it walks every shape on every slide and checks rendered-text height vs. the box's reserved height, and confirms no shape extends past the footer guard. +When changing the deck or i18n, delegate to the `slide-overflow-check` subagent — it runs `scripts/check_overflow.py`, which walks every shape on every slide and checks rendered-text height vs. the box's reserved height, and confirms no shape extends past the footer guard. + +**Tables are estimated, not trusted.** python-pptx grows a table row to fit wrapped cell text, but the GraphicFrame's *declared* `height` does not change — so a many-row or long-cell table renders far taller than declared and can cross the footer guard while `shape.height` claims it fits. The inspector therefore **estimates** a table's rendered height (sum of each row's tallest-cell wrapped lines) rather than reading the declared height. This is what enforces the "≤ ~5 rows per slide" authoring guidance in §10 — a 10-row table is now caught, not silently shipped. The exporter deliberately does **not** auto-paginate author tables (splitting a comparison mid-table would reorder / duplicate its semantics, against the project's never-silently-restructure-author-content principle), so the fix for a flagged table is to split it at the **authoring** layer (`rq_results` / `paper_tables`), not in `pptx.py`. ### 8. Content clarity & first-use context (HARD) @@ -156,13 +168,17 @@ Content slides carry the findings (§9); **structural** slides carry the *naviga §8 says every math symbol must be *glossed* at first use; this says how to *render* the symbol itself. They are independent — `min 互資訊 I(za;zb|Ep)` glosses the operator but still renders the variable as the bare ASCII string "za", which reads as a word, not "z subscript a". -- **Real subscripts / superscripts, not flattened ASCII.** `za` is z-sub-a, `λmax` is λ-sub-max, `x²` is x-super-2. python-pptx supports run-level baseline shift (`` for subscript, `30000` for superscript) — use it, or Unicode subscript glyphs (`z` + `ₐ`) as a fallback. Typing "za" / "lambda_max" / "x^2" literally is a tell. (The exporter currently flattens these to ASCII — surfacing it here so a builder fixes the run rather than copying the flat form.) +- **Real subscripts / superscripts, not flattened ASCII.** `za` is z-sub-a, `λmax` is λ-sub-max, `x²` is x-super-2. python-pptx supports run-level baseline shift (`` for subscript, `30000` for superscript) — use it, or Unicode subscript glyphs (`z` + `ₐ`) as a fallback. Typing "za" / "lambda_max" / "x^2" literally is a tell. **The exporter renders this for you — but only when the authoring wraps the notation in `$...$` (the math-delimiter contract below).** Bare `I(za;zb|Ep)` typed without `$...$` stays flat ASCII on the slide; that is the single most common way the feature goes unused (the original fang2026 deck shipped flat because its regen script never used `$...$`). - **Variables italic, operators upright** (standard math typesetting). Variables `z`, `λ`, `x` italic; multi-letter operators `min`, `argmin`, `log`, `softmax` upright. `min` set in italic reads as m·i·n multiplied. - **Unicode math symbols, not ASCII stand-ins.** `≤ ≥ × · ‖·‖ λ ∑ ∫ ∇ ∈ →`, not `<=`, `>=`, `x`, `sum`, `integral`, `->`. The per-language font stack renders these; ASCII substitutes look like code, not math. - **Complex formulae → image, not text.** Multi-line equations, fractions, integrals / sums with limits, and matrices cannot be laid out in a pptx text run. Render them with LaTeX to a **transparent-background** PNG (per the Figures dark-mode rule in deck-design) and place via `figures=`. Don't fake a fraction by stacking "a / b" in two textboxes. - **One notation per concept across the whole deck.** If the paper writes `z_a`, every slide writes `z_a` — not `za` here and `z_adv` there. (Mirrors the paper-side notation-consistency rule.) -**Anti-pattern:** a slide reading `min I(za;zb|Ep) s.t. ||za-zb||_2 <= eps` — ASCII subscripts, ASCII norm, ASCII `<=`, operator unnamed. **Pattern:** `min I(z_a; z_b | E_p)` with real subscripts + italic variables, `‖z_a − z_b‖₂ ≤ ε`, and the operator named ("minimise the mutual information …") per §8. +**The `$...$` math-delimiter authoring contract (HARD).** The exporter only renders real subscripts / superscripts / italic variables for notation the authoring **wraps in `$...$`**, using `_x` / `_{xy}` for subscript and `^x` / `^{xy}` for superscript. So the authoring side (`paper-summary-author`, `regen_*.py`) must write `$I(z_a;z_b|E_p)$`, not bare `I(za;zb|Ep)`. Inside a `$...$` span a single-letter token is italicised as a variable and a multi-letter token stays upright as an operator (`$min$` → upright). Plain `_` outside `$...$` (file names, prose) is left alone, so the delimiter is opt-in and never mangles non-math text. + +Surfaces that render the contract (`_render_math_paragraph` / `_append_math_runs` in `pptx.py`): **bullets** (`_add_bullet_box`), **KPI values** (`_add_kpi_lines`), **table cells** (`_add_table`), **contribution / method body paragraphs** (`_add_stacked_section` via `_add_textbox(math=True)`), and **RQ / core-observation callouts** (`_add_rq_callout`). Together these cover every content surface a thesis deck puts math on. **Why:** body paragraphs and the RQ callout are exactly where the objective formula lives (`I(z_a;z_b|E_p)`), and they were the last two surfaces to bypass the renderer — a deck that glosses the operator (§8) but still shows flat "za" in its contribution paragraph reads half-finished. + +**Anti-pattern:** a slide reading `min I(za;zb|Ep) s.t. ||za-zb||_2 <= eps` — ASCII subscripts, ASCII norm, ASCII `<=`, operator unnamed, and (the authoring-side root cause) no `$...$` so the renderer never fires. **Pattern:** author `$min$ $I(z_a;z_b|E_p)$` with real subscripts + italic variables, `‖z_a − z_b‖₂ ≤ ε`, and the operator named ("minimise the mutual information …") per §8. ### 13. Deck length and pacing diff --git a/.claude/agents/tasks/paper-summary-author.md b/.claude/agents/tasks/paper-summary-author.md index a150540..72ec60d 100644 --- a/.claude/agents/tasks/paper-summary-author.md +++ b/.claude/agents/tasks/paper-summary-author.md @@ -198,13 +198,7 @@ For each paper that is on-topic for the user's actual intent (see "Off-topic pap Concretely: when authoring each `Paper`, copy column 7 of the xlsx → `Paper.doi`, column 8 → `Paper.url`. For arxiv URLs, strip a trailing `v1` / `v2` version suffix: `https://arxiv.org/abs/2506.09580v1` → `arxiv_id="2506.09580"`, `url="https://arxiv.org/abs/2506.09580"`. Leave empty cells as `None` — never fabricate to fill. -4. **Drop a regen script.** Save under `scripts/regen__.py` or `scripts/regen_.py` for batches. Working templates already in the repo: - - `scripts/regen_llm_security_batch.py` — batch, 7 papers - - `scripts/regen_ling2026_agent_skills.py` — single paper en - - `scripts/regen_ling2026_agent_skills_zh_tw.py` — single paper zh-tw - - `scripts/regen_ieee_thesis_style.py` — single paper - - Read the closest template first and follow its shape. +4. **Drop a regen script.** Save under `scripts/regen__.py` or `scripts/regen_.py` for batches. The canonical worked template in the repo is **`scripts/regen_fang2026.py`** — a single-paper, rich-tier, zh-tw, dark-mode build with every rich field populated. Read it first and follow its shape (`_build_summary()` → `_build_paper()` → `PaperCollection` → `PptxExporter().export(...)`). For a **batch**, use the same shape but pass a tuple of papers to `PaperCollection(papers=(...))` and loop the per-paper export, there is no separate batch template in-repo today, the single-paper one generalises directly. 5. **Canonical filename, no `-rich` suffix.** In the script, set `filename_stem=paper.bibtex_key()` so the rich deck overwrites the CLI's lightweight emit at the same path. One `.pptx` per paper, the rich one. Language variants are the only exception (`f"{key}-zh-tw"`). @@ -219,6 +213,7 @@ The fields you write here are what `slide-deck-rules` and `paper_rule` later gov - **Each slide-driving string is an assertion, not a topic label** (slide-deck-rules §9). Write a `rq_results` question / `pain_points` sub-head / contribution heading as a claim — "Disentangling za / zb cuts adversarial leakage to near-zero", not "Method". One message per unit: never fold two RQs into one `rq_results` block to save a slide. - **Pick the field that fits the data** (slide-deck-rules §10). A trend / many-value comparison goes in a `technique_table` / `rq_results.table` (→ table) or a `figures` entry (→ chart); the headline numbers go in `headline_metrics` (→ KPI callout); qualitative / sequential points go in the bullet fields. Don't cram a 5×4 result grid into prose bullets. - **Numbers follow the reporting rules** (paper_rule §數字與統計呈現). `headline_metrics` values use measurement-appropriate significant figures (92.3%, not 92.31748%), label percentage-points vs relative %, and report p-values as actual values — and never invent a digit the PDF doesn't state. +- **Wrap math notation in `$...$`** (slide-deck-rules §12 math-delimiter contract). The exporter renders real subscripts / superscripts / italic variables only for `$...$`-delimited spans, and it does so on every content surface a thesis deck uses — bullets, KPI values, table cells, contribution / method body paragraphs, and RQ / core-observation callouts. So author `$I(z_a;z_b|E_p)$`, `$λ_{max}$`, `$x^2$` — never bare `I(za;zb|Ep)` / `lambda_max` / `x^2`, which ship flat ASCII. Use `_x` / `_{xy}` for subscript, `^x` / `^{xy}` for superscript; a single letter inside the span italicises as a variable, a multi-letter token stays upright as an operator. This is the same notation in `contributions_detailed`, `method_sections`, `core_observation`, and any `rq_results` text — keep it consistent across fields (one notation per concept). The original fang2026 deck shipped flat `za` precisely because its regen script omitted the `$...$`; don't repeat it. - **No fabrication** (paper_rule §不謊造). Every number / RQ result / limitation must come from the PDF you read. If the paper doesn't report it, leave the field empty — the exporter skips empty fields, which is correct. ## After all papers are authored @@ -244,7 +239,7 @@ The search is keyword-based, so off-topic papers slip in: **Rich thesis-style PPT is the default deliverable. Lightweight is a fallback, never the goal when an LLM agent is in the loop.** 1. `ANTHROPIC_API_KEY` set in the environment? → CLI auto-enriches via the Python pipeline; just run it. -2. No key but you (an LLM agent) drive the session? → **you write the rich summary yourself.** The per-paper lightweight `.pptx` the CLI just emitted is an intermediate artefact, not the deliverable. Read each PDF, hand-author a `PaperSummary` with rich-tier fields, drop a `scripts/regen_.py`, run it. Worked example: `scripts/regen_llm_security_batch.py` ships 7 hand-authored rich summaries built exactly this way. +2. No key but you (an LLM agent) drive the session? → **you write the rich summary yourself.** The per-paper lightweight `.pptx` the CLI just emitted is an intermediate artefact, not the deliverable. Read each PDF, hand-author a `PaperSummary` with rich-tier fields, drop a `scripts/regen_.py`, run it. Worked example: `scripts/regen_fang2026.py` is a complete hand-authored rich summary built exactly this way. 3. No LLM in the loop (CI / cron / unattended) → lightweight is acceptable. ### Default CLI invocation (when the user asks for a deck) @@ -281,7 +276,7 @@ When the user says "search X and make a [lang] PPT", run the runbook below strai 1. For each downloaded PDF in `exports//pdfs/`, read it (use the Read tool; large PDFs go through `thesisagents.intelligence.pdf._extract_text`). 2. Classify off-topic — see "Off-topic papers" below. Off-topic PDFs get deleted along with their lightweight `.pptx`, BUT stay in the xlsx + bib (honest record). 3. For each on-topic paper, hand-author a `PaperSummary` with rich-tier fields (`pain_points`, `research_question`, `contributions_detailed`, `headline_metrics`, `technique_table`, `method_sections`, `evaluation_sections`, `system_flow`, `research_questions`, `rq_results`, `core_observation`, `limitations`, `future_work`). All in the user's requested language. -4. Drop `scripts/regen_.py` modelled on `scripts/regen_llm_security_batch.py`. Each entry: `Paper(...summary=PaperSummary(...))`. Export with `filename_stem=paper.bibtex_key()` (NO `-rich` suffix) and `language=`. +4. Drop `scripts/regen_.py` modelled on `scripts/regen_fang2026.py`. Each entry: `Paper(...summary=PaperSummary(...))`. Export with `filename_stem=paper.bibtex_key()` (NO `-rich` suffix) and `language=`. 5. Run the regen. It overwrites the lightweight `.pptx` at the canonical path with the rich-tier deck. **Phase 4 — Audits** diff --git a/.claude/agents/tasks/post-author-audit.md b/.claude/agents/tasks/post-author-audit.md index 2780f86..adebc25 100644 --- a/.claude/agents/tasks/post-author-audit.md +++ b/.claude/agents/tasks/post-author-audit.md @@ -1,13 +1,15 @@ --- name: post-author-audit -description: After a regen_*.py with hand-authored PaperSummary entries has been written and run, perform two mandatory audits before the deck ships — (1) compare each authored Paper.url/doi/arxiv_id against the search xlsx to catch fabricated URLs, and (2) classify off-topic downloads (keyword matches that don't fit the user's actual intent) and delete their pdf + lightweight pptx. Use after paper-summary-author finishes, before reporting deck-ready. +description: After a regen_*.py with hand-authored PaperSummary entries has been written and run, perform four mandatory audits before the deck ships — (1) compare each authored Paper.url/doi/arxiv_id against the search xlsx to catch fabricated URLs, (2) classify off-topic downloads (keyword matches that don't fit the user's actual intent) and delete their pdf + lightweight pptx, (3) scan authored fields for drafting-management metadata (version tags, writing-guide file names, insertion markers) that must not reach the slides, and (4) scan for bare math notation not wrapped in $...$ (which ships as flat ASCII instead of real subscripts). Use after paper-summary-author finishes, before reporting deck-ready. tools: Read, Bash, Edit, Grep, Glob --- -You are the post-authoring auditor for ThesisAgents's LLM-as-agent flow. You run AFTER `paper-summary-author` has authored a regen script and produced rich `.pptx` files. Your job is to catch the two failure modes that have historically slipped through: +You are the post-authoring auditor for ThesisAgents's LLM-as-agent flow. You run AFTER `paper-summary-author` has authored a regen script and produced rich `.pptx` files. Your job is to catch the three failure modes that have historically slipped through: 1. **Fabricated URL / DOI / arxiv_id** in a hand-authored `Paper`. Publisher URL paths cannot be guessed; the agent's first instinct is often wrong (e.g. inventing `view/fang2026` for AAAI when AAAI uses numeric volume IDs). A fabricated URL in the deck is worse than no URL — it visibly 404s the user. 2. **Off-topic downloads left in the run directory.** The search is keyword-based, so off-topic papers slip in (e.g. a Viterbi decoder paper matching "Claude code" because both contain "code"). The user sees the run dir; leaving off-topic pdf + lightweight pptx there is noise. +3. **Drafting-management metadata in an authored field.** Summaries assembled from a drop-in insert set or an earlier draft often carry version tags (「v3.5 新增」), a writing-guide file name (`paper_rule.md`), or insertion markers. Pasted verbatim into a `PaperSummary` field, they ride onto the slide where the reader cannot parse them. +4. **Bare math notation that ships flat.** The exporter renders real subscripts / superscripts only for notation wrapped in `$...$` (slide-deck-rules §12). An authored field that writes `I(za;zb|Ep)` or `lambda_max` without the delimiters renders as flat ASCII — "za" reads as a word, not z-sub-a. The original fang2026 deck shipped exactly this way, so it is a confirmed, recurring failure mode. You do NOT modify the rich summaries themselves — that's `paper-summary-author`'s job. You only audit + prune. @@ -97,6 +99,92 @@ What you KEEP intact (pruning them would rewrite history): - The aggregate `exports//-.bib` - Every rich `.pptx` (and language variants like `-zh-tw.pptx`) for ON-topic papers +## Audit 3 — Drafting-metadata leak scan + +Hand-authored `PaperSummary` fields are *delivered content* — they land verbatim on +slides. A field that quotes the writing process leaks bookkeeping the reader cannot +parse. Scan every authored string for drafting-management metadata (full rule: +`paper_rule` "草稿管理元資訊不得進入交付內容"): + +```python +import re +from pathlib import Path + +LEAK_PATTERNS = [ + (r"\bv\d+(\.\d+)?\b", "version tag"), # v3, v3.5, v2.1 + (r"paper_rule", "writing-guide file name"), + (r"INSERT INTO|drop-in|Renumber", "insertion marker"), + (r"原 v\d", "version-relative phrasing"), + (r"目前 §\d.*僅有|本節僅基於", "author-facing note"), +] +src = Path("scripts/regen_<...>.py").read_text(encoding="utf-8") +for pat, kind in LEAK_PATTERNS: + for m in re.finditer(pat, src): + ctx = src[max(0, m.start() - 30):m.start() + 30].replace("\n", " ") + print(f" {kind}: …{ctx}…") +``` + +Two judgement calls before flagging a hit: +- **`v\d` false positives.** A real model / library / action version in the content + (`Qwen3`, `LoRA`, `CUDA 13.0.1`, `cache/save@v4`) is legitimate — it means + something to the reader. Only flag a `v\d` that refers to *the draft's own + revision* (「v3 既有」, 「v3.5 新增」, 「原 v3 設計」). +- **Public-doc citations are fine.** Citing the bundled framework's public docs + (its GitHub `docs/…`) can stay, an internal writing-guide / script file name + (`paper_rule.md`, `regen_*.py`) cannot. + +For each real leak, the parent must fix the authored field in the regen script and +re-run — do NOT edit the emitted `.pptx` directly, it is overwritten on the next +regen. This audit caught **8** leaks in one drop-in-assembled thesis (`v3`–`v3.5` +tags, a `paper_rule.md` citation, three 「原 v3 設計」 phrasings) that all read as +noise to the reviewer. + +## Audit 4 — Flat-math notation scan + +Math notation only renders as real subscripts / superscripts when the authoring +wraps it in `$...$` (slide-deck-rules §12 math-delimiter contract). A field that +writes `I(za;zb|Ep)` bare ships flat — the reader sees "za" as a word, not +z-sub-a. Scan every authored string for **subscript-shaped tokens that are not +inside a `$...$` span**: + +```python +import re +from pathlib import Path + +src = Path("scripts/regen_<...>.py").read_text(encoding="utf-8") + +# Strip $...$ spans first — anything left that looks like math is unwrapped. +unwrapped = re.sub(r"\$[^$]+\$", "", src) + +MATH_SHAPES = [ + (r"\bI\([A-Za-z]+;[A-Za-z]+(\|[A-Za-z]+)?\)", "mutual-information term I(..;..|..)"), + (r"\b[a-zA-Z]_?\{?(adv|ben|max|min|p|a|b)\}?\b(?/ ... on-topic kept: verdict: DONE + +[3] Drafting-metadata leak scan + scanned: scripts/regen_<...>.py + leaks: + : — …… + ... + verdict: PASS / FAIL + +[4] Flat-math notation scan + scanned: scripts/regen_<...>.py + unwrapped: + — …… + ... + verdict: PASS / FAIL ``` If audit 1 FAILs, the parent must fix and re-run — do NOT prune anything for a paper that has a URL/DOI violation, because the parent may decide to rewrite or remove that entry entirely. diff --git a/.claude/agents/tasks/slide-overflow-check.md b/.claude/agents/tasks/slide-overflow-check.md index 10b0ea6..ec11f0b 100644 --- a/.claude/agents/tasks/slide-overflow-check.md +++ b/.claude/agents/tasks/slide-overflow-check.md @@ -28,38 +28,44 @@ You'll be told (or you can infer from context) which deck(s) to check. Typical i - A specific path: `exports//.pptx` - Or a regen script the parent just ran: re-derive the path from the script's `out_dir` + `filename_stem`. -For each deck, run a headless inspection that walks every slide, every shape, estimates the wrapped text height, and flags violations. The reference inspection pattern is `scripts/regen_ieee_thesis_style.py` and the report shape is in `exports/v3-final-overflow-check.txt`. If neither exists in the current repo, write the inspection inline with `python-pptx`: - -```python -from pptx import Presentation -from pptx.util import Emu - -FOOTER_GUARD_EMU = int(7.05 * 914400) # 7.05" in EMU - -def estimate_wrapped_height(shape) -> int: - """Rough wrap estimator: count lines including soft-wraps at ~chars/width.""" - # Implementation: walk paragraphs, measure font size, estimate chars-per-line - # from shape width and font, sum line heights. Project's inspector script - # already does this — prefer importing it over reinventing. - ... - -prs = Presentation(pptx_path) -violations = [] -for idx, slide in enumerate(prs.slides, start=1): - for shape in slide.shapes: - if not shape.has_text_frame: - continue - top = shape.top or 0 - height = shape.height or 0 - rendered = estimate_wrapped_height(shape) - bottom = top + rendered - if rendered > height: - violations.append((idx, shape.name, "overflows its box", rendered, height)) - if bottom > FOOTER_GUARD_EMU: - violations.append((idx, shape.name, "crosses footer guard", bottom, FOOTER_GUARD_EMU)) +**Use the canonical inspector — `scripts/check_overflow.py`.** It already encodes +the project's wrap estimate and calibration, so run it rather than reinventing one: + +``` +.venv/Scripts/python.exe scripts/check_overflow.py exports/.pptx [more.pptx ...] ``` -Prefer reusing the project's existing inspector (look for `scripts/regen_ieee_thesis_style.py` or any `overflow_check.py`) over writing your own — it already knows the per-font-size estimation constants the project uses. +It prints the report block below per deck and exits with the count of failed decks +(0 = all clean), so you can assert on the exit code. It is also importable — +`from check_overflow import check_pptx, check_pptx_from_prs` — returning a list of +`Violation(slide, shape, kind, rendered_in, limit_in)`; `check_pptx_from_prs(prs)` +takes an already-open `Presentation` so a test can build a deck in memory. + +What it does (so you can trust / explain its output): + +- Reads each run's actual `font.size` (the exporter sets it per run), classifies + each character as full-width (CJK / kana / hangul ≈ 1.0 em) or half-width + (Latin / digits / punctuation ≈ 0.55 em), accumulates width per line, wraps at + the box's inner width, and sums line heights at `font_pt × 1.2` (single spacing). +- **Exempts exporter-placed chrome** — shape names `page_number`, `footer`, and any + `accent*` bar. These are fixed-geometry decoration, the page number / footer live + *at* the 7.05" line by design, so checking them is a false positive (an early + version flagged 48 "violations" on a clean deck, 40 of them chrome). +- Treats an **empty text frame as zero height** (a blank decorative rectangle is + not charged a fallback line). +- **Estimates table rendered height** rather than trusting the declared one: + python-pptx grows a row to fit wrapped cell text but leaves the GraphicFrame's + `height` unchanged, so a many-row / long-cell table overflows the footer guard + while `shape.height` claims it fits. The inspector sums each row's tallest-cell + wrapped lines. A flagged table is fixed by splitting it at the authoring layer + (`rq_results` / `paper_tables`), the exporter does not auto-paginate tables. +- Applies a `0.08"` box tolerance so a sub-third-of-a-line rounding overshoot + doesn't flag a box that visually fits. + +It is a deliberately rough estimate (no font-metrics library) — the same trade-off +the manual check always made. It catches gross overflow, it will not catch a +1-pixel clip. If the canonical script is somehow absent, replicate its logic +inline, do **not** revert to a stub that returns nothing. ## Reporting format diff --git a/.claude/agents/tasks/thesis-deck-author.md b/.claude/agents/tasks/thesis-deck-author.md new file mode 100644 index 0000000..1e0311d --- /dev/null +++ b/.claude/agents/tasks/thesis-deck-author.md @@ -0,0 +1,234 @@ +--- +name: thesis-deck-author +description: Author a degree-thesis ORAL-DEFENCE deck (學位論文口試/答辯簡報) from the candidate's OWN thesis — not a summary of someone else's paper. Reads the candidate's thesis files (PDF / .md / .docx / chapter drafts) when supplied, or consumes a section-by-section content brief the parent gathered interactively, then hand-authors a rich PaperSummary covering the seven canonical paper_rule sections, drops a scripts/regen_.py, runs it, and chains the completeness + overflow + math audits. Use when the user wants a defence deck for their own dissertation, with or without ANTHROPIC_API_KEY (you, the LLM, are the author either way). +tools: Read, Write, Edit, Bash, Grep, Glob +--- + +You are the degree-thesis oral-defence deck author for ThesisAgents. Your +deliverable is the slide deck a candidate presents at their own 口試 / 答辯 — +**their original research**, structured for a committee, covering the seven +canonical sections of `paper_rule`. This is a different job from +`paper-summary-author`, which summarises *someone else's* downloaded paper. The +two share the exporter and the rich `PaperSummary` schema, but the source, the +audits, and the completeness bar differ — read this whole doc before authoring. + +## How this differs from `paper-summary-author` (read first) + +| | `paper-summary-author` | `thesis-deck-author` (this agent) | +|---|---|---| +| Source | a downloaded PDF of *another* author's paper | the **candidate's own** thesis files, or an elicited content brief | +| Goal | one rich slide per *relevant search result* | **one** complete defence deck for *one* thesis | +| Network / browser path | yes (VPN gate, paywalled-PDF WebRunner) | **none** — the candidate owns the source, nothing is fetched | +| URL/DOI fabrication audit | mandatory (`post-author-audit` Audit 1) | N/A — a thesis has no publisher URL to verify | +| Off-topic pruning | mandatory (search returns noise) | N/A — there is one intended thesis | +| Completeness bar | render only populated fields | **all seven `paper_rule` sections present** (see audit below) | +| Fabrication rule | every claim from the PDF you read | every result from the candidate's actual thesis / data — never invent a number to fill a section | + +The shared rules still bind: `slide-deck-rules` (geometry, content caps, §8 +glossing, §9 one-assertion-per-slide, §10 evidence form, §12 `$...$` math), +`deck-design` (dark-mode contract, no-red-text, contrast, accent geometry), +`paper_rule` (the seven sections + numbering). Consult them as you author — they +are not re-derived here. + +## Input modes (both supported) + +**Mode A — candidate's own thesis files.** The user points you at their thesis +(`thesis.pdf`, a chapter folder of `.md` / `.docx`, a results spreadsheet, figure +PNGs). Read them with `Read` (PDF via the `pages` arg, ≤ 20 pages/request), then +author each section from what the document actually says. This is the closest +analogue to `paper-summary-author`'s PDF-reading flow — except the PDF is the +candidate's own work, so there is no download / VPN / off-topic step. + +**Mode B — interactive content brief.** When there is no complete draft, the +**parent agent** (the main Claude session, which has `AskUserQuestion`) elicits +the content section by section and hands it to you as a brief. You do **not** call +`AskUserQuestion` yourself — a spawned subagent cannot drive the interactive +prompt, so asking is the parent's job. If you are invoked in Mode B with a brief +that is missing a canonical section, do not invent it: return a short list of the +missing sections to the parent so it can ask, then resume. (See "When a section +is genuinely empty" below for the one exception.) + +If you are unsure which mode you are in, check for thesis files first +(`Glob` the path the user named). Files present → Mode A. Only a topic / +outline in the prompt → Mode B brief. + +## Seven-section → PaperSummary field mapping (the defence skeleton) + +`paper_rule` defines the seven canonical sections, every defence deck must cover +them, and each maps onto rich `PaperSummary` fields the exporter already renders. +Author in this order, it is also the slide order: + +| `paper_rule` section | Authored field(s) | Renders as | +|---|---|---| +| **Abstract** (one-slide thesis-in-brief) | `core_observation` *or* a tight `headline_metrics` | callout box / KPI block | +| **1. Introduction** — 1.2 motivation, 1.3 problem, 1.5 contributions | `pain_points` (1.2), `research_question` (1.3), `contributions_detailed` (1.5, **cap 4**) | pain-point quadrant + RQ callout + structured contributions | +| **2. Literature Review** — 2.3 comparison, 2.4 gap | `literature_table` (2.3, first row = header), `technique_table` (technique → role) | positioning table(s) | +| **3. Methodology** — 3.1 architecture, 3.2-3.4 components, 3.5 metrics | `system_flow` (3.1 steps), `method_sections` (3.2-3.4, 2/slide), `evaluation_sections` metric defs (3.5) | flow + method-detail slides | +| **4. Experiment & Evaluation** — 4.4 charts, 4.5 analysis | `research_questions` (the eval questions), `rq_results` (table + analysis per RQ), `headline_metrics`, `figures` (charts), `paper_tables` | per-RQ result tables + KPI + figures | +| **5. Conclusion** — 5.1 findings, 5.3 limitations, 5.4 future work | `core_observation` (5.1), `limitations` (5.3), `future_work` (5.4) | core-observation box + limitations/future slide | +| **References** | the works the deck actually *cites* — never the thesis itself | references slide(s), or none | + +**The candidate's own thesis is not a source to cite.** The exporter detects the +own-thesis cover (`source="local"` with no `url` / `doi` / `arxiv_id` / `pdf_url`, +via `_is_own_thesis`) and deliberately drops two borrowed-paper artefacts: the +**source / overview slide** (which exists to attribute a fetched paper) and its +**BibTeX cite-key**, and it **excludes the thesis from its own references slide**. +A rich `PaperSummary` carries no citation list, so an own-thesis-only deck ends up +with **no references slide at all** — that is correct (a defence deck does not cite +itself), not a completeness gap. The works the thesis *cites* appear inside the +content (the `literature_table` / `technique_table` rows, prose), not as an +auto-generated bibliography. **Why this matters at author time:** do not try to +"fix" the missing references slide by inventing a publisher `url` / `doi` on the +cover `Paper` — that flips it back to a borrowed paper and re-introduces the +source slide + BibTeX key the candidate should not see. + +The exporter renders only the fields that have content, so a complete deck means +**every section above has at least its primary field populated**. A thesis deck +that ships with an empty `literature_table` and no `rq_results` is incomplete even +if it passes `slide-overflow-check` geometrically — that is the §completeness +audit below, and it is the single most important difference from the per-paper +summary flow. + +### Cover mapping (defence-specific) + +The cover is a `Paper` record, map the defence metadata onto it: + +- `title` = the **thesis title** (run through the exporter's `_cover_title`, never a topic keyword). +- `authors` = `("",)` — the candidate is the sole author of a thesis. +- `year` = the **defence year**. +- `venue` = `" · · 學位論文"` (e.g. `"國立臺灣大學 · 資訊工程學系 · 碩士學位論文"`). This is where school / department / degree live, the cover renders authors · year · venue. +- `doi` / `url` = empty (`""` / `None`) — a thesis has no publisher DOI. Do **not** invent one. +- Advisor / committee: fold the advisor into `venue` as `… · 指導教授:` when the user supplies it, or carry it on the abstract slide. There is no dedicated committee shape, do not add one to the exporter, the venue line is sufficient for a defence cover. + +## Authoring quality bar (apply the shared rules at author time) + +Satisfy these as you write the fields, not after the deck renders — they are the +same rules `paper-summary-author` lists, restated for the defence context: + +- **One assertion per slide** (`slide-deck-rules` §9). Each `rq_results` question, `contributions_detailed` heading, `pain_points` sub-head is a *claim*, not a topic label — "本方法在 X 上較 baseline 高 4.2 個百分點", not "實驗結果". +- **Pick the evidence form that fits** (`slide-deck-rules` §10). A many-value comparison → `figures` (chart) or a table, the headline numbers → `headline_metrics` (KPI), qualitative points → bullet fields. A defence audience reads a grouped bar chart of "ours vs baselines" instantly, a 20-cell table read aloud loses them. +- **Wrap math in `$...$`** (`slide-deck-rules` §12 math-delimiter contract). Author `$\mathcal{L}$`-style notation as `$I(z_a;z_b|E_p)$`, `$λ_{max}$`, `$x^2$` — never bare `I(za;zb|Ep)`. The exporter renders real subscripts only inside `$...$`, on every content surface (bullets, KPI values, table cells, contribution / method body paragraphs, RQ / core-observation callouts). A defence deck whose objective formula shows flat "za" reads half-finished. +- **Gloss every term at first use** (`slide-deck-rules` §8). The committee may include an examiner outside the sub-field — define each acronym / library / metric the first time it appears. +- **Numbers follow the reporting rules** (`paper_rule` §數字與統計呈現). Measurement-appropriate significant figures, label pp vs relative %, report p-values as actual values. +- **No fabrication** (`paper_rule` §不謊造). Every metric, RQ result, and limitation comes from the candidate's actual thesis / experiments. If the thesis does not report a number, leave the field empty or carry the qualitative claim — never invent a digit to make a KPI slide look fuller. + +## The build (reuse the regen pattern, no exporter changes) + +The defence deck uses the **existing rich tier unchanged** — author a +`PaperSummary`, attach it to a `Paper`, and export with the default +**white + blue academic-paper** style (`dark_mode=False`: white background, +navy `#1F3A66` headings / body, blue `#2563EB` emphasis). Dark mode stays opt-in +(`dark_mode=True`) for OLED / low-light venues. The worked template is +`scripts/regen_thesis_demo.py` (and `scripts/regen_fang2026.py`), copy its structure: + +```python +# scripts/regen_.py +from thesisagents.core.models import ( + ExportOptions, Paper, PaperCollection, PaperSummary, Query, RqResult, +) +from thesisagents.exporters.pptx import PptxExporter + +OUT_DIR = "exports" +FILENAME_STEM = "-" # e.g. "chen2026-fed-learning-zh-tw" +LANGUAGE = "zh-tw" # one of slide-deck-rules SUPPORTED_LANGUAGES + +def _build_summary() -> PaperSummary: + return PaperSummary(language=LANGUAGE, pain_points=..., research_question=..., + contributions_detailed=..., literature_table=..., + method_sections=..., evaluation_sections=..., + research_questions=..., rq_results=..., headline_metrics=..., + core_observation=..., limitations=..., future_work=..., + figures=..., model="hand-authored:thesis-deck-author") + +def _build_paper() -> Paper: + return Paper(source="local", source_id="", title="", + authors=("",), year=, + venue=" · · 學位論文", + abstract="", url="", doi=None, summary=_build_summary()) + +def main() -> None: + collection = PaperCollection( + query=Query(keywords="", sources=("local",)), + papers=(_build_paper(),), + ) + options = ExportOptions(formats=("pptx",), out_dir=OUT_DIR, + filename_stem=FILENAME_STEM, language=LANGUAGE, + dark_mode=False) # white + blue academic default + print(f"saved: {PptxExporter().export(collection, options)}") + +if __name__ == "__main__": + main() +``` + +Run from the project root: `.venv/Scripts/python.exe scripts/regen_.py`. + +`max_slides_per_paper` (default 25 ≈ a 20-30 min talk, `slide-deck-rules` §13) is +the talk-time budget for the slot. A masters defence is often 15-20 min, set a +lower cap and prune to the takeaways rather than cramming (§13). Confirm the slot +length with the user (via the parent) when it is not stated. + +## Mandatory audits before reporting deck-ready + +Run all three, in order: + +1. **Seven-section completeness** (this agent's signature check). After the deck + builds, dump the slide titles and confirm each *content* `paper_rule` section is + present: Abstract, Introduction, Literature Review, Methodology, Experiment & + Evaluation, Conclusion. A missing one is a FAIL — go back and author the field, + do not ship a deck that skips Literature Review or Conclusion. Quote the relevant + `paper_rule` clause for any gap (e.g.「2.3 缺比較表」,「5.4 缺未來工作」). + **References is the one exception:** an own-thesis deck legitimately has **no** + references slide (the thesis does not cite itself — see the mapping table above), + so its absence is expected, not a gap. Report it as `References n/a (own thesis)`, + not as a failure. +2. **Flat-math + overflow** — chain `slide-overflow-check` on the `.pptx` (every + shape's wrapped height fits its box and clears `FOOTER_GUARD = 7.05"`), and run + `post-author-audit` Audit 4 (flat-math scan) over the regen script so no + formula ships as ASCII. +3. **Visual contract** — confirm the dark-mode / no-red-text / contrast contracts + from `deck-design` (the exporter applies them, but verify the render: every run + has an explicit non-black colour, no `#C0392B` text). + +`post-author-audit` Audits 1-3 (URL/DOI, off-topic, drafting-metadata) do not +apply to a thesis — there is no search xlsx and no publisher URL. Audit 4 +(flat-math) does apply and is the only one to run from that agent. + +## When a section is genuinely empty + +A real thesis sometimes lacks a formal Literature-Review comparison table or has a +single RQ. Do not fabricate to fill the skeleton — instead: + +- **Carry the qualitative form.** No `literature_table`? Author the gap as + `pain_points` prose ("既有方法 A/B/C 在 X 上的不足") so §2 is still covered, just + not as a grid. +- **Surface the gap to the user** (via the parent) when a *required* defence + section is missing from the source — "你的草稿沒有 5.4 未來工作,要補一段嗎?". + A defence committee expects all seven, so flag it rather than silently omitting. + +## Anti-patterns (HARD) + +1. **Summarising the thesis like a borrowed paper.** Reusing + `paper-summary-author`'s "one slide per search result" framing — a thesis is one + coherent argument across seven sections, not a stack of independent highlights. +2. **A topic-keyword cover.** Cover title = the thesis title, never the search + query or the topic phrase (`deck-design` anti-pattern). The candidate's name + + school + degree go in `authors` / `venue`, not invented onto a new shape. +3. **Fabricated results to fill a KPI / RQ slide.** A thinner-than-ideal Experiment + section is honest, an invented 92.3% is misconduct. Leave it empty or qualitative. +4. **Flat math in the objective.** `min I(za;zb|Ep)` on the methodology slide — + author `$min$ $I(z_a;z_b|E_p)$` so the renderer fires (§12). +5. **Shipping an incomplete skeleton.** A deck with no Literature Review or no + Conclusion passes overflow but fails the defence — the completeness audit is + the gate, not geometry. + +## Reporting back + +``` +thesis-deck-author — +mode: A (files) / B (brief) +deck: exports/-.pptx +slides: (cap , ~ min talk) +sections: Abstract ✓ · Intro ✓ · LitReview ✓ · Method ✓ · Experiment ✓ · Conclusion ✓ · References n/a (own thesis) +audits: completeness PASS · overflow PASS · flat-math PASS · visual PASS +gaps surfaced: +``` diff --git a/AGENTS.md b/AGENTS.md index ed7f47a..140fe86 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -62,10 +62,10 @@ fallback, never the goal when an LLM is in the loop.** the full search and stay as-is. ### Worked example -`scripts/regen_llm_security_batch.py` ships 7 hand-authored rich -summaries built exactly this way — read each PDF, author the summary, -batch-export. Reuse it as the template for any future multi-paper -search. +`scripts/regen_fang2026.py` is the canonical hand-authored rich summary +built exactly this way — read the PDF, author the `PaperSummary`, export +through `PptxExporter`. Reuse it as the template, a multi-paper batch is +the same shape with a tuple of papers in `PaperCollection`. ### Per-paper flow @@ -108,10 +108,9 @@ search. non-`footer` / non-`page_number` shape must have `top + height ≤ 7.05"` on a 16:9 widescreen slide. -Working templates: `scripts/regen_llm_security_batch.py` (batch, 7 -papers), `scripts/regen_ling2026_agent_skills.py` (single paper en), -`scripts/regen_ling2026_agent_skills_zh_tw.py` (single paper zh-tw), -`scripts/regen_ieee_thesis_style.py` (single paper). +Working template: `scripts/regen_fang2026.py` (single paper, rich-tier, +zh-tw, dark mode — every rich field populated). A batch follows the same +shape with a tuple of papers in `PaperCollection`. ### URL / DOI verification (mandatory) @@ -279,6 +278,9 @@ flagships (Nature, Science, PNAS, CACM, Lecture Notes in CS, …). Pass - DoD gate runner: `.claude/agents/tasks/dod-verify.md`. - LLM-as-agent thesis-style authoring: `.claude/agents/tasks/paper-summary-author.md` + `tasks/post-author-audit.md` + `tasks/slide-overflow-check.md`. +- Degree-thesis ORAL-DEFENCE deck from the candidate's own thesis (seven + `paper_rule` sections → rich deck via the existing exporter): + `.claude/agents/tasks/thesis-deck-author.md`. - Per-source plugin contract and recorded fixtures: `thesisagents/sources//` + `tests/fixtures//`. - LLM-as-agent flow examples: `scripts/regen_*.py`. diff --git a/CLAUDE.md b/CLAUDE.md index 3ac2ba8..5ab3fd1 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -7,7 +7,8 @@ > `code-quality-reviewer`, `compliance-auditor`, `slide-deck-rules`, > `deck-design`, `env-vars`, `language-vocabulary-check`, `paper_rule`) and > `tasks/` (task-running / multi-tasking agents: `dod-verify`, -> `paper-summary-author`, `post-author-audit`, `slide-overflow-check`). Claude +> `paper-summary-author`, `thesis-deck-author`, `post-author-audit`, +> `slide-overflow-check`). Claude > Code discovers them recursively by `name:`, so the path never affects how an > agent is invoked. Full path index: see "Where the detailed rules live" below. @@ -185,14 +186,17 @@ to every `.pptx` this project produces or modifies. After the edit, audit the resulting deck against those subagents' contracts: -1. **Background is `_DARK_SLIDE_BG` (`#12151B`)** unless the user explicitly - opted into light mode (`--light-mode`, `dark_mode=False`, GUI checkbox). +1. **Background is white (the default light navy-band deck)** unless the + user explicitly opted into dark mode (`--dark-mode`, `dark_mode=True`, + GUI "Dark mode" checkbox), in which case it is `_DARK_SLIDE_BG` + (`#12151B`). 2. **No text run has `rgb=None` or `rgb=(0,0,0)`** (invisible on dark bg). 3. **No light fill contains light text** (luminance > 0.7 × 255 on both). 4. **No `#C0392B` red text runs** (banned in both modes). -5. **Top accent bar + cover left-band geometry** present on ThesisAgents - generated decks (hand-made decks are exempt from accent geometry but - not from dark-mode / no-red / contrast contracts). +5. **Navy header band (`accent_top`) + full-bleed navy cover (`accent_left`) + geometry** present on ThesisAgents generated decks (hand-made decks are + exempt from accent geometry but not from dark-mode / no-red / contrast + contracts). **For hand-made decks that don't follow the project's `_BRAND_*` constants**, run `_apply_dark_mode(prs)` from `thesisagents.exporters.pptx` as a @@ -211,6 +215,7 @@ deliverable surfaces: | Source plugins under `sources/` or `thesisagents/fetchers/` | `compliance-auditor` | | Paper / thesis text in any locale | `paper_rule` | | Hand-authoring `PaperSummary` from a PDF | `paper-summary-author`, `paper_rule` | +| Authoring a degree-thesis defence deck from the candidate's own thesis | `thesis-deck-author`, `paper_rule`, `deck-design` | | Anything before commit | `dod-verify` (gate runner) | When in doubt, read more subagents, not fewer. The subagent rules exist @@ -219,13 +224,16 @@ failure mode, not a corner case. ## Dark-Mode Contract: Every Text Run Sets an Explicit Colour (HARD RULE) -Dark mode is the project's default pptx render path. The post-build -recolour pass swaps light-palette RGB values to their dark-palette -equivalents — but it can only swap colours it can read. **A text run -with `run.font.color.rgb = None` inherits the slide-master's theme -colour, renders as near-black on the dark slide background, and is -invisible.** Every text-adding helper in `thesisagents/exporters/pptx.py` -MUST therefore assign `run.font.color.rgb = _BRAND_*` (one of the four +Dark mode is an opt-in pptx render path (the default is the light +navy-band deck), but the contract still binds **every** build because the +exporter always builds with the light palette first and a deck may be +rendered dark at any time. The post-build recolour pass swaps light-palette +RGB values to their dark-palette equivalents — but it can only swap colours +it can read. **A text run with `run.font.color.rgb = None` inherits the +slide-master's theme colour, renders as near-black on the dark slide +background, and is invisible.** Every text-adding helper in +`thesisagents/exporters/pptx.py` MUST therefore assign +`run.font.color.rgb = _BRAND_*` (one of the four palette constants) after creating or overwriting a run. Never leave the colour at its default; never pass `colour=None` to `_add_textbox`; never write `RGBColor(0, 0, 0)` — use `_BRAND_DARK` instead. @@ -252,12 +260,12 @@ default-dark-mode render. as a TEXT colour across both light and dark modes. Red text reads as error / warning in slide conventions and pattern-matches strongly to AI-generated KPI emphasis. The sanctioned text-emphasis colour is -**``_BRAND_HIGHLIGHT``** (teal-700, ``#0E7490``) — pair with +**``_BRAND_HIGHLIGHT``** (academic blue-600, ``#2563EB``) — pair with ``run.font.bold = True``. Use ``_BRAND_GREY`` for caption / placeholder / chrome text so headlines stay headlines. Variety rule: KPI value + RQ -question use teal; figure caption + figure-unavailable use grey — do +question use blue, figure caption + figure-unavailable use grey — do not collapse all four to the same colour. The dark-mode pass swaps -teal-700 → teal-400 (``#2DD4BF``) via ``_LIGHT_TO_DARK_TEXT``; the +blue-600 → blue-400 (``#60A5FA``) via ``_LIGHT_TO_DARK_TEXT``, the audit script's ``_ACCEPTED_DARK_RUN_COLORS`` set knows about both. Regression test ``test_pptx_no_red_text_runs`` walks every run on a default-rendered deck and fails if any run uses ``#C0392B``. The red @@ -320,8 +328,8 @@ two kinds of subagent this project uses: .claude/agents/ ├── rules/ code-quality-reviewer · compliance-auditor · slide-deck-rules · │ deck-design · env-vars · language-vocabulary-check · paper_rule -└── tasks/ dod-verify · paper-summary-author · post-author-audit · - slide-overflow-check +└── tasks/ dod-verify · paper-summary-author · thesis-deck-author · + post-author-audit · slide-overflow-check ``` | Topic | Subagent | File (under `.claude/agents/`) | @@ -335,5 +343,6 @@ two kinds of subagent this project uses: | Academic paper writing rules (multilingual, all 14 locales) — Abstract / Introduction / Literature Review / Methodology / Experiment / Conclusion / References structure + PaperSummary-to-section mapping | `paper_rule` | `rules/paper_rule.md` | | Definition-of-Done gate runner | `dod-verify` | `tasks/dod-verify.md` | | LLM-as-agent thesis-style authoring (PDF → rich PaperSummary) | `paper-summary-author` | `tasks/paper-summary-author.md` | -| URL-fabrication / off-topic audits after authoring | `post-author-audit` | `tasks/post-author-audit.md` | +| Degree-thesis ORAL-DEFENCE deck from the candidate's OWN thesis (seven `paper_rule` sections → rich deck) | `thesis-deck-author` | `tasks/thesis-deck-author.md` | +| URL-fabrication / off-topic / drafting-metadata-leak / flat-math audits after authoring | `post-author-audit` | `tasks/post-author-audit.md` | | Slide-overflow regression check | `slide-overflow-check` | `tasks/slide-overflow-check.md` | diff --git a/docs/cli.md b/docs/cli.md index f06c014..4f4e9c4 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -25,7 +25,7 @@ thesisagents (--query KEYWORDS | --paper IDENTIFIER) [--llm-model MODEL] [--all-venues] [--paywall-threshold FLOAT] [--yes] - [--max-slides N] [--light-mode] + [--max-slides N] [--dark-mode] [--quiet] ``` @@ -54,7 +54,7 @@ thesisagents (--query KEYWORDS | --paper IDENTIFIER) | `--paywall-threshold` | `0.30` | Fraction of paywalled results above which the search-mode pipeline asks the user before generating per-paper PPTs. | | `--yes` | off | Auto-accept the paywall prompt. | | `--max-slides` | `25` | Per-paper slide cap. Pass `0` for unlimited. | -| `--light-mode` | off | Render the pptx in light mode (white slide background + navy text). **Dark mode is the default** — the exporter swaps the brand palette via a post-build pass (slide bg `#12151B`, body text `#E5E7EB`, lighter table-row stripe) so OLED projectors and low-light venues don't glare. Pass this flag for projectors in well-lit rooms or when the deck will be printed. | +| `--dark-mode` | off | Render the pptx in dark mode. **The light navy-band deck is the default** (white slides, full-width navy header band with a white title, navy cover panel). Pass this flag for the dark variant — a post-build pass swaps to a dark slide background (`#12151B`) + near-white text (`#E5E7EB`) and lightens the navy band / cover / table-row fills so the same chrome reads on OLED projectors and in low-light venues. | | `--quiet` | off | Suppress the per-paper one-line printout to stdout. | ## Examples diff --git a/docs/en/index.rst b/docs/en/index.rst index f634517..14c959b 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -354,13 +354,16 @@ doesn't look like generic ``add_slide()`` output: Dark mode ^^^^^^^^^ -Dark mode is the **default** render path. The post-build pass swaps -the light palette to a dark deck (slide bg ``#12151B``, body text -``#E5E7EB``, brighter teal accent ``#2DD4BF``) — designed for OLED -projectors and low-light venues. Opt out per render with -``--light-mode`` on the CLI, the **Light mode** checkbox on the -GUI's Deck tab, or ``ExportOptions(dark_mode=False)`` in Python. -Over MCP, pass ``dark_mode: false`` to the ``export`` tool. +The **light navy-band deck is the default** render path: white +slides, a full-width navy header band with a white title, and a +full-bleed navy cover panel. Opt **in** to dark mode per render with +``--dark-mode`` on the CLI, the **Dark mode** checkbox on the GUI's +Deck tab, or ``ExportOptions(dark_mode=True)`` in Python (over MCP, +pass ``dark_mode: true`` to the ``export`` tool). The dark post-build +pass swaps to a dark deck (slide bg ``#12151B``, body text +``#E5E7EB``, brighter teal accent ``#2DD4BF``) and lightens the navy +band / cover / table fills — designed for OLED projectors and +low-light venues. Red is **banned as a text colour** in both modes. The sanctioned emphasis colour is teal ``#0E7490`` (bold + teal for KPI values and diff --git a/docs/gui.md b/docs/gui.md index 2314c00..77f42ea 100644 --- a/docs/gui.md +++ b/docs/gui.md @@ -111,10 +111,12 @@ so a subsequent **Export** picks up the rich-tier layout automatically. The pre-export deck-shaping controls. Wires `ExportOptions` fields behind Qt widgets: -- **Light mode** checkbox — unchecked (default) ships the dark deck +- **Dark mode** checkbox — unchecked (default) ships the light + navy-band deck (white slides, navy header band with a white title, + navy cover panel). Tick it for the dark OLED / low-light variant (slide bg `#12151B`, body text `#E5E7EB`, brighter teal accent - `#2DD4BF`). Tick it for the printable / well-lit-room variant - (white bg + navy text). Mirrors the CLI `--light-mode` flag. + `#2DD4BF`, lightened navy band / cover / table fills). Mirrors the + CLI `--dark-mode` flag. - **Max slides per paper** — integer spinner. Defaults to 25; `0` means unlimited. - **Max figures per paper** — controls how many `figures=` entries the diff --git a/scripts/_audit_dark_text.py b/scripts/_audit_dark_text.py new file mode 100644 index 0000000..f670dca --- /dev/null +++ b/scripts/_audit_dark_text.py @@ -0,0 +1,153 @@ +"""Manual dark-mode text auditor for a single rendered ThesisAgents deck. + +Why this script exists +---------------------- +The dark-mode / no-red / contrast contracts (see `.claude/agents/rules/deck-design.md`) +are pinned by `tests/test_exporters.py` regression tests — but those only run on +decks the exporter *generates*. CLAUDE.md's "Read Subagents BEFORE Editing Any +.pptx" rule extends the same contracts to **hand-made decks** (anything under +`exports/`, `assets/template/`, etc.), which no test covers. This is the companion +debug script the deck-design doc refers to: point it at any `.pptx` and it reports +every run that would render invisible / off-contract. + +Checks (each maps to a deck-design contract): + +1. **Invisible run** — `rgb is None` or `rgb == (0,0,0)`: inherits the theme + colour and renders near-black on the dark slide background. +2. **Red text** — `#C0392B` (`_BRAND_ACCENT`): banned as a TEXT colour in both + modes (reads as error, pattern-matches AI-generated emphasis). +3. **Light-on-light** — a near-white run inside a near-white-fill shape (both + luminances > 0.7 × 255): the contrast-contract invisibility bug. +4. **Off-palette run (warning)** — a run whose colour is none of the sanctioned + dark-mode run colours (`_ACCEPTED_DARK_RUN_COLORS`). Informational: a custom + colour may be intentional, but it's worth a human glance. + +Usage: + .venv/Scripts/python.exe scripts/_audit_dark_text.py exports/.pptx [more.pptx ...] + +Exit code is the number of decks with at least one hard issue (checks 1-3); the +off-palette warning alone does not fail a deck. Importable: ``audit_deck(path)``. +""" +from __future__ import annotations + +import sys +from dataclasses import dataclass +from pathlib import Path + +from pptx import Presentation + +# Sanctioned dark-mode run colours — the *values* of _LIGHT_TO_DARK_TEXT plus the +# near-white promotion target and the white table-header foreground (which sits on +# the dark navy header fill). Keep in sync with pptx.py if the palette changes. +_ACCEPTED_DARK_RUN_COLORS = frozenset({ + (0xE5, 0xE7, 0xEB), # near-white body text + (0x9C, 0xA3, 0xAF), # mid grey + (0x6B, 0x72, 0x80), # muted grey + (0x60, 0xA5, 0xFA), # blue-400 highlight + (0xFF, 0xFF, 0xFF), # white table-header foreground (on navy fill) +}) +_RED_ACCENT = (0xC0, 0x39, 0x2B) +_LIGHT_LUMA = 0.7 * 255 # luminance above which a colour counts as "light" + + +@dataclass(frozen=True) +class Issue: + slide: int + shape: str + kind: str # "invisible" | "red text" | "light-on-light" | "off-palette" + detail: str + hard: bool # hard issues fail the deck; warnings do not + + +def _rgb_tuple(rgb): + if rgb is None: + return None + return (int(rgb[0]), int(rgb[1]), int(rgb[2])) + + +def _luma(rgb: tuple[int, int, int]) -> float: + r, g, b = rgb + return 0.299 * r + 0.587 * g + 0.114 * b + + +def _shape_fill_rgb(shape): + """The shape's solid fill colour as an (r,g,b) tuple, or None if it has no + readable solid fill (background / pattern / inherited).""" + try: + fill = shape.fill + if fill.type is not None and fill.fore_color.type is not None: + return _rgb_tuple(fill.fore_color.rgb) + except (TypeError, ValueError, AttributeError): + return None + return None + + +def _iter_runs(shape): + if not shape.has_text_frame: + return + for paragraph in shape.text_frame.paragraphs: + for run in paragraph.runs: + if run.text.strip(): + yield run + + +def audit_deck(path: str | Path) -> list[Issue]: + """Return every dark-mode / contrast / red-text issue in the deck.""" + prs = Presentation(str(path)) + issues: list[Issue] = [] + for idx, slide in enumerate(prs.slides, start=1): + for shape in slide.shapes: + name = getattr(shape, "name", "?") or "?" + fill_rgb = _shape_fill_rgb(shape) + fill_light = fill_rgb is not None and _luma(fill_rgb) > _LIGHT_LUMA + for run in _iter_runs(shape): + rgb = _rgb_tuple(run.font.color.rgb if run.font.color and run.font.color.type + else None) + if rgb is None or rgb == (0, 0, 0): + issues.append(Issue(idx, name, "invisible", + f"rgb={rgb} renders near-black on dark bg", True)) + continue + if rgb == _RED_ACCENT: + issues.append(Issue(idx, name, "red text", "#C0392B is banned", True)) + continue + if fill_light and _luma(rgb) > _LIGHT_LUMA: + issues.append(Issue(idx, name, "light-on-light", + f"text {rgb} on fill {fill_rgb}", True)) + continue + if rgb not in _ACCEPTED_DARK_RUN_COLORS: + issues.append(Issue(idx, name, "off-palette", + f"rgb={rgb} not a sanctioned dark-mode colour", False)) + return issues + + +def _report(path: str | Path) -> bool: + issues = audit_deck(path) + hard = [i for i in issues if i.hard] + warn = [i for i in issues if not i.hard] + print(f"dark-text audit — {path}") + print(f"hard issues: {len(hard)}") + for i in hard: + print(f" slide {i.slide}, shape \"{i.shape}\": {i.kind} — {i.detail}") + print(f"warnings: {len(warn)}") + for i in warn: + print(f" slide {i.slide}, shape \"{i.shape}\": {i.kind} — {i.detail}") + verdict = "PASS" if not hard else "FAIL" + print(f"verdict: {verdict}") + return not hard + + +def main(argv: list[str]) -> int: + if not argv: + print("usage: _audit_dark_text.py [more.pptx ...]") + return 2 + failed = 0 + for i, path in enumerate(argv): + if i: + print() + if not _report(path): + failed += 1 + return failed + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/scripts/check_overflow.py b/scripts/check_overflow.py new file mode 100644 index 0000000..1d3a356 --- /dev/null +++ b/scripts/check_overflow.py @@ -0,0 +1,251 @@ +"""Canonical slide-overflow inspector for ThesisAgents decks. + +Why this script exists +---------------------- +The `slide-overflow-check` subagent is supposed to "reuse the project's existing +inspector" — but no such file existed, so the agent had to reinvent an estimator +from a stub every run. This is that inspector: one runnable, importable module the +agent (and a regression test) can call. + +What "overflow" means here (mirrors slide-overflow-check.md): + +- 16:9 widescreen: slide is 13.333" x 7.5". +- Body sits between ``BODY_TOP = 1.5"`` and ``FOOTER_GUARD = 7.05"`` (the line + where page numbers / footer copy live). Nothing may render past 7.05". +- A shape overflows when its *wrapped, rendered* text height exceeds either + (1) the shape's own height, or (2) ``7.05"`` measured from the slide top. + +The wrap estimate reads each run's actual ``font.size`` (the exporter sets it +explicitly per run), classifies each character as full-width (CJK / kana / hangul +/ full-width forms ≈ 1.0 em) or half-width (Latin / digits / punctuation ≈ 0.55 +em), accumulates width per line, and wraps when a line exceeds the box's inner +width. Line height is the run's font size × 1.2 (PowerPoint single spacing). It is +a *rough* estimate — deliberately conservative, the same trade-off the manual +agent made — so it catches gross overflow without needing a font-metrics library. + +Usage: + .venv/Scripts/python.exe scripts/check_overflow.py exports/.pptx [more.pptx ...] + +Exit code is the number of decks that FAILED (0 = all clean), so CI / a test can +assert on it. Importable: ``check_pptx(path) -> list[Violation]``. +""" +from __future__ import annotations + +import sys +import unicodedata +from dataclasses import dataclass +from pathlib import Path + +from pptx import Presentation +from pptx.enum.text import MSO_AUTO_SIZE +from pptx.util import Emu + +_EMU_PER_INCH = 914400 +_FOOTER_GUARD_IN = 7.05 +_FOOTER_GUARD_EMU = int(_FOOTER_GUARD_IN * _EMU_PER_INCH) +# python-pptx default textbox inner margins are 0.1" left + 0.1" right. +_DEFAULT_SIDE_MARGIN_IN = 0.1 +_FULL_WIDTH_EM = 1.0 # CJK / kana / hangul / full-width forms +_HALF_WIDTH_EM = 0.55 # Latin / digits / ASCII punctuation +_LINE_SPACING = 1.2 # PowerPoint single line spacing ≈ 1.2 × font size +_DEFAULT_FONT_PT = 18 # used only when a run carries no explicit size +_TABLE_FONT_PT = 14 # _TABLE_PT in pptx.py — cell font when a run has none +_CELL_V_MARGIN_IN = 0.1 # exporter sets 0.05" top + 0.05" bottom per cell +# Box-overflow tolerance: ignore a sub-fraction-of-a-line overshoot so rounding +# in the estimate doesn't flag a box that visually fits. +_BOX_TOLERANCE_IN = 0.08 + +# Chrome / decoration the exporter places intentionally — these are NOT body +# content and never "overflow" in the meaningful sense: the top/left accent bars +# are fixed-geometry rectangles, and the page number + footer live *at* the +# footer line (7.05") by design, so a footer-guard check on them is a false +# positive. Everything else (title / body / subhead / kpi / rq_box / +# paper_subtitle / tables / figures) is real content and gets checked. +_CHROME_NAMES = frozenset({"page_number", "footer"}) +_CHROME_PREFIXES = ("accent",) + + +def _is_chrome(name: str) -> bool: + return name in _CHROME_NAMES or name.startswith(_CHROME_PREFIXES) + + +@dataclass(frozen=True) +class Violation: + slide: int + shape: str + kind: str # "overflows its box" | "crosses footer guard" + rendered_in: float # measured value, inches + limit_in: float # the limit it broke, inches + + +def _is_full_width(ch: str) -> bool: + """True for characters that occupy ~1 em (CJK, kana, hangul, full-width).""" + if ch in ("\t", "\n"): + return False + return unicodedata.east_asian_width(ch) in ("F", "W") + + +def _char_em(ch: str) -> float: + return _FULL_WIDTH_EM if _is_full_width(ch) else _HALF_WIDTH_EM + + +def _run_font_pt(run, fallback: int) -> int: + size = run.font.size + return int(size.pt) if size is not None else fallback + + +def _paragraph_lines(paragraph, inner_width_pt: float, fallback_pt: int) -> tuple[int, int]: + """Estimate (wrapped line count, max font pt) for one paragraph. + + Width is accumulated per character at the run's own font size, so a mixed + CJK + Latin line wraps where it actually would. An empty paragraph still + occupies one line at the fallback size. + """ + runs = list(paragraph.runs) + if not runs: + return 1, fallback_pt + max_pt = fallback_pt + line_w = 0.0 + lines = 1 + for run in runs: + pt = _run_font_pt(run, fallback_pt) + max_pt = max(max_pt, pt) + for ch in run.text: + if ch == "\n": + lines += 1 + line_w = 0.0 + continue + char_w = _char_em(ch) * pt + if line_w + char_w > inner_width_pt and line_w > 0: + lines += 1 + line_w = char_w + else: + line_w += char_w + return lines, max_pt + + +def _text_height_in(text_frame, box_width_emu: int) -> float: + """Estimated rendered height of a text frame, in inches. + + An empty text frame (decorative rectangle, blank placeholder) contributes + no height — it must not be charged a fallback line. + """ + if not (text_frame.text or "").strip(): + return 0.0 + box_width_in = box_width_emu / _EMU_PER_INCH + ml = (text_frame.margin_left or Emu(int(_DEFAULT_SIDE_MARGIN_IN * _EMU_PER_INCH))) + mr = (text_frame.margin_right or Emu(int(_DEFAULT_SIDE_MARGIN_IN * _EMU_PER_INCH))) + inner_width_in = max(0.1, box_width_in - (ml + mr) / _EMU_PER_INCH) + inner_width_pt = inner_width_in * 72 + total_pt = 0.0 + for paragraph in text_frame.paragraphs: + lines, max_pt = _paragraph_lines(paragraph, inner_width_pt, _DEFAULT_FONT_PT) + total_pt += lines * max_pt * _LINE_SPACING + return total_pt / 72 + + +def _table_height_in(shape) -> float: + """Estimated *rendered* height of a table, in inches. python-pptx grows a + row to fit wrapped cell text, but the GraphicFrame's declared ``height`` + does not change — so a many-row or long-cell table renders far taller than + declared and can cross the footer guard while ``shape.height`` says it fits. + Sum each row's height from its tallest cell's wrapped line count. + """ + table = shape.table + col_w = [c.width or 0 for c in table.columns] + total = 0.0 + for r in range(len(table.rows)): + row_lines = 1 + for c in range(len(table.columns)): + inner_in = max(0.1, col_w[c] / _EMU_PER_INCH - 2 * _CELL_V_MARGIN_IN) + inner_pt = inner_in * 72 + cell_lines = sum( + _paragraph_lines(p, inner_pt, _TABLE_FONT_PT)[0] + for p in table.cell(r, c).text_frame.paragraphs + ) + row_lines = max(row_lines, cell_lines) + total += row_lines * _TABLE_FONT_PT * _LINE_SPACING / 72 + _CELL_V_MARGIN_IN + return total + + +def _shape_violations(slide_idx: int, shape) -> list[Violation]: + out: list[Violation] = [] + name = getattr(shape, "name", "?") or "?" + if _is_chrome(name): + return out # exporter-placed accent bars / page number / footer + top = shape.top or 0 + height = shape.height or 0 + if getattr(shape, "has_table", False): + # Use the estimated rendered height (≥ declared) for the footer-guard + # check, since the table grows past its declared box when cells wrap. + rendered_in = _table_height_in(shape) + height = max(height, int(rendered_in * _EMU_PER_INCH)) + if shape.has_text_frame: + tf = shape.text_frame + shrink = tf.auto_size == MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE + if not shrink: + rendered_in = _text_height_in(tf, shape.width or 0) + if rendered_in - height / _EMU_PER_INCH > _BOX_TOLERANCE_IN: + out.append(Violation( + slide_idx, name, "overflows its box", + round(rendered_in, 2), round(height / _EMU_PER_INCH, 2), + )) + # Footer-guard check applies to every content shape (incl. tables, pictures, + # shrink-to-fit titles): the box itself must clear 7.05". + bottom = top + height + if bottom > _FOOTER_GUARD_EMU + 1: + out.append(Violation( + slide_idx, name, "crosses footer guard", + round(bottom / _EMU_PER_INCH, 2), _FOOTER_GUARD_IN, + )) + return out + + +def check_pptx_from_prs(prs) -> list[Violation]: + """Walk every slide / shape of an open Presentation and return overflow + violations (empty = clean). Split from ``check_pptx`` so tests can build a + deck in memory without writing a temp file.""" + violations: list[Violation] = [] + for idx, slide in enumerate(prs.slides, start=1): + for shape in slide.shapes: + violations.extend(_shape_violations(idx, shape)) + return violations + + +def check_pptx(path: str | Path) -> list[Violation]: + """Walk every slide / shape and return overflow violations (empty = clean).""" + return check_pptx_from_prs(Presentation(str(path))) + + +def _report(path: str | Path) -> bool: + prs = Presentation(str(path)) + n_slides = len(prs.slides) + n_shapes = sum(len(s.shapes) for s in prs.slides) + violations = check_pptx_from_prs(prs) + print(f"overflow check — {path}") + print(f"slides: {n_slides}") + print(f"shapes: {n_shapes}") + print(f"violations: {len(violations)}") + for v in violations: + print(f" slide {v.slide}, shape \"{v.shape}\": {v.kind} " + f"— rendered {v.rendered_in}\" vs {v.limit_in}\"") + verdict = "PASS" if not violations else "FAIL" + print(f"verdict: {verdict}") + return not violations + + +def main(argv: list[str]) -> int: + if not argv: + print("usage: check_overflow.py [more.pptx ...]") + return 2 + failed = 0 + for i, path in enumerate(argv): + if i: + print() + if not _report(path): + failed += 1 + return failed + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/scripts/regen_code_review_thesis.py b/scripts/regen_code_review_thesis.py new file mode 100644 index 0000000..0e111b1 --- /dev/null +++ b/scripts/regen_code_review_thesis.py @@ -0,0 +1,364 @@ +"""Degree-thesis ORAL-DEFENCE deck for 陳冠穎's master's thesis, authored via the +`thesis-deck-author` Mode A flow (candidate's own thesis file). + +Source: D:/Codes/Code-Review-Framework-.../paper/論文_v1.9.docx +All content (metrics, RQs, tables, limitations) is drawn from that thesis — no +number is invented. Cross-reference markers (§5.2, 表 2, [7]) and draft version +tags are intentionally NOT carried onto slides (post-author-audit Audit 3/4). + +Seven-section coverage (paper_rule → PaperSummary field): + Abstract ............... core_observation + headline_metrics + 1. Introduction ........ pain_points (1.2) + research_question (1.3) + + contributions_detailed (1.5, cap 4) + 2. Literature Review ... literature_table (2.3) + technique_table + 3. Methodology ......... system_flow (3.1 系統架構) + method_sections + (3.2 訓練流程 起,其餘往後移) + evaluation_sections (3.5/4) + 4. Experiment .......... research_questions + rq_results (5.1-5.3) + 5. Conclusion .......... core_observation (5.1) + limitations (6.3) + future_work (6.4) + +Output: the paper's own folder (white + blue academic default, dark_mode=False). +Run: .venv/Scripts/python.exe scripts/regen_code_review_thesis.py +""" +from __future__ import annotations + +import sys +from pathlib import Path + +_PROJECT_ROOT = Path(__file__).resolve().parents[1] +if str(_PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(_PROJECT_ROOT)) + +from thesisagents.core.models import ( # noqa: E402 + ExportOptions, + Paper, + PaperCollection, + PaperSummary, + Query, + RqResult, +) +from thesisagents.exporters.pptx import PptxExporter # noqa: E402 + +OUT_DIR = ( + r"D:\Codes\Code-Review-Framework-Combining-Large-Language-Models-" + r"and-Chain-of-Thought-Reasoning\paper" +) +FILENAME_STEM = "code-review-thesis-defense-zh-tw" +LANGUAGE = "zh-tw" + + +def _build_summary() -> PaperSummary: + return PaperSummary( + language=LANGUAGE, + # ---- 1.2 研究動機 / 痛點(四宮格)------------------------------------ + pain_points=( + ( + "人工程式碼審查已成開發瓶頸", + ( + "程式碼審查可在整合前發現 50–70% 缺陷,卻耗費可觀人力與時間", + "處理審查意見的時間幾乎隨評論數線性增加", + "大型專案中審查標準不一、審查者主觀性偏高", + ), + ), + ( + "LLM 直接審查仍有風險", + ( + "易產生幻覺(編造看似合理卻不正確的內容)", + "輸出不穩定、缺乏領域規範約束", + "單一提示詞同時做摘要/審查/風格/異味,造成 context 過載", + ), + ), + ( + "傳統靜態分析工具不足", + ( + "規則僵化,多僅能偵測語法錯誤或已知模式", + "缺乏語義層理解與設計合理性判斷", + "約 2/3 的 LLM 檢測結果超出傳統工具能力範圍", + ), + ), + ( + "既有 LLM 審查研究的缺口", + ( + "多採單一提示詞或單一技術面向改良", + "鮮少同時整合多階段 CoT、知識蒸餾、RAG 與細粒度評估", + "評估多沿用字串重疊或粗粒度 1–5 分,解析度不足", + ), + ), + ), + research_question=( + "在資源受限的前提下,能否以「多階段思維鏈提示詞 + 知識蒸餾微調 + " + "RAG 規則注入」建立一套兼具效率與一致性,且品質可量化交叉驗證的" + "自動化程式碼審查框架?" + ), + # ---- 1.5 研究貢獻(cap 4)------------------------------------------ + contributions_detailed=( + ( + "1. 多階段思維鏈審查流程", + "將審查拆解為摘要、初步審查、靜態分析(linter)、程式碼異味偵測與" + "總彙整五個循序步驟,以 build_global_rule_template 統一注入全域規則," + "降低單一提示詞的 context 負載並形成可追蹤的審查鏈。", + ), + ( + "2. 知識蒸餾 + QLoRA 輕量化", + "以教師模型生成帶 Chain-of-Thought 推理軌跡的審查資料,蒸餾至學生模型 " + "Qwen3-Coder-30B-A3B-Instruct,並以 QLoRA(NF4 4-bit + LoRA rank " + "$r$=64)在兩張 L40S 上完成微調,以可拆卸的 LoRA 適配器保留彈性。", + ), + ( + "3. FAISS RAG 規則檢索層", + "以 Qwen3-Embedding-4B 將領域規則向量化寫入 FAISS,推論時以餘弦相似度 " + "≥ 0.7 動態注入相關規則,抑制幻覺,並避免規則總量隨 context window 擴張" + "而線性增加。", + ), + ( + "4. LLM-as-a-Judge-Our 五維百分制評估", + "設計可讀性、建設性、正確性、多評論覆蓋與完整性五維百分制評分,並以 " + "GPT-5 / Gemini-3 雙裁判加人工評分三方交叉驗證,提升評估解析度與可信度。", + ), + ), + # ---- 摘要 / §5 主要量化成果(KPI)---------------------------------- + headline_metrics=( + ("完整性 (CRSCORE++)", "0.86", "基準 0.67"), + ("相關性 (CRSCORE++)", "0.83", "基準 0.63"), + ("簡潔性 (CRSCORE++)", "0.64", "基準 0.57"), + ("多階段提示詞邊際貢獻", "+34 分", "品質提升的主導因素"), + ("模型微調邊際貢獻", "+2 分", "與流程設計相差逾一個數量級"), + ("測試資料", "44 筆", "GPT-5 + Copilot 生成、人工驗證"), + ), + # ---- 2.10 文獻比較與研究缺口 --------------------------------------- + literature_table=( + ("方法", "多階段提示詞", "模型微調", "RAG", "評估方式"), + ("CRScore (Naik 2024)", "✗", "✗", "✗", "3 維 1–5 分"), + ("CRScore++ (Kapadnis 2025)", "✗", "✗", "✗", "3 維 1–5 分 + RL"), + ("LLaMA-Reviewer (Lu 2023)", "✗", "✓ LoRA", "✗", "BLEU / ROUGE"), + ("AutoReview (Chen 2025)", "✓ 多代理", "✗", "✗", "質性分析"), + ("本研究 Ours", "✓ 5 階段", "✓ QLoRA", "✓ 規則注入", "5 維百分制 + 人工"), + ), + # ---- 關鍵技術 → 角色 ---------------------------------------------- + technique_table=( + ("多階段 CoT 提示詞", "降 context 負載,形成可追蹤、可介入的審查鏈"), + ("知識蒸餾 + QLoRA", "把 30B 教師能力移轉到可在有限資源部署的學生模型"), + ("RAG(FAISS)", "以相似度檢索注入專案規範,抑制幻覺"), + ("LLM-as-a-Judge", "GPT-5 / Gemini-3 雙裁判自動量化評分"), + ("人工評分", "交叉驗證自動評分的可信度"), + ), + # ---- 3.1 系統架構(整體五大組件,對應論文圖二)-------------------- + system_flow=( + "測試資料:ChatGPT 與 Copilot 生成的 Source Code,依 bad_data / code_diff / only_code 三類組織", + "RAG 規則檢索:規則文件經 Qwen3-Embedding-4B 向量化寫入 FAISS,取回相似度 ≥ 0.7 的規則注入提示詞", + "核心模型:Qwen3-Coder-30B-A3B + 4-bit 量化 + 微調 LoRA Adapter,三條 Pipeline 共用", + "審查策略:CoT(五階段)、Single Prompt 基準、Skills(Explainer + Review 雙角色)三者對照", + "評估方法:LLM-as-a-Judge(五維百分制)、CRScore、人工評估三方交叉驗證", + ), + # ---- 3.2 起 方法細節(訓練流程提到最前,其餘依序往後移)---------- + method_sections=( + ( + "訓練流程:知識蒸餾 + QLoRA 微調", + ( + "教師模型以 CoT 提示生成帶推理軌跡的審查樣本(instruction / question / think / answer)作為監督訊號", + "資料 tokenize 後做 label masking,只對 answer 計算 loss", + "學生 Qwen3-Coder-30B-A3B-Instruct 以 NF4 4-bit 量化載入,LoRA rank $r$=64、$α$=64,注入 q/k/v/o 與 gate/up/down_proj", + ), + ), + ( + "多階段思維鏈審查", + ( + "五階段串接:摘要 → 初步審查 → Linter → Code Smell → 總彙整", + "每階段專注單一明確任務,降低模型認知負擔", + "中間文件形成可追蹤、可人工介入的審查鏈", + ), + ), + ( + "RAG 規則注入", + ( + "Qwen3-Embedding-4B 將規則向量化後寫入 FAISS 索引", + "推論時取回餘弦相似度 ≥ 0.7 的規則", + "與七大審查標準一併注入全域提示詞", + ), + ), + # —— 以下為隨附開源框架 prthinker 之設計貢獻,端到端量化效益列為未來工作 —— + ( + "prthinker:把審查接上 GitHub PR 與 IDE", + ( + "以下皆為開源框架 prthinker 的設計貢獻,量化效益列為未來工作", + "JudgeStep 輸出 {verdict, score, reasons} 裁決,保守聚合後映射為 PR 的 APPROVE / REQUEST_CHANGES / COMMENT 事件", + "可替換推論後端(本機 HF / FastAPI / OpenAI / Anthropic),並以 MCP 將審查管線暴露為 IDE 可直接調用的工具", + ), + ), + ( + "prthinker:從作者反饋持續學習", + ( + "Dismissed 語料:對被作者拒絕的留言以相似度過濾抑制重複噪音(作用於輸出端)", + "Accepted 語料:以被採納的建議作 in-context 範例提升採納率(作用於輸入端)", + "再衍生可重用規則、跨 PR finding 聚類,並以 repo 知識圖譜接地符號以抑制虛構", + ), + ), + ( + "prthinker:強化審查品質的研究級機制", + ( + "prompt-injection 強健性、counterfactual 替代方案、provenance 引用鏈", + "reviewer personas 多視角 + 衝突顯化、reproducibility 一致性訊號", + "PR 類型自適應審查深度、risk-weighted attention 分配 findings 預算", + ), + ), + ( + "prthinker:CI/CD 部署與工程化", + ( + "CI matrix 分片 + 非同步 job endpoint,化解 30B 推論的逾時與 GPU OOM", + "force-push 差分 cache、崩潰安全部分結果、PR 留言冪等性", + "secret 預過濾、SARIF / HTML 報告匯出、diff bomb 偵測", + ), + ), + ( + "prthinker:零推論的無模型定向信號", + ( + "12 項確定性啟發式,無需 GPU 或 API 即可作前置分流", + "遺留衝突標記、Trojan-Source 隱形字元、遺留除錯敘述、吞噬例外", + "大塊新增、新增 TODO / FIXME、純格式變更、檔案模式變更等", + ), + ), + ), + # ---- 4 評估指標與實驗設計 ----------------------------------------- + evaluation_sections=( + ( + "評估指標", + ( + "CRSCORE++:完整性 / 簡潔性 / 相關性 三維 1–5 分", + "LLM-as-a-Judge-Our:五維百分制(可讀性 / 建設性 / 正確性 / 多評論覆蓋 / 完整性)", + "百分制較 1–5 分更能反映細微差異", + ), + ), + ( + "實驗設計與環境", + ( + "44 筆測試資料,由 GPT-5 + Copilot 生成並經人工驗證", + "GPT-5 與 Gemini-3 雙裁判,降低單一裁判偏差", + "硬體:2× Intel Xeon Gold 6526Y、512 GB RAM、2× NVIDIA L40S(96 GB)", + ), + ), + ), + # ---- 研究問題 ----------------------------------------------------- + research_questions=( + ("RQ1", "多階段 CoT 提示詞 + LoRA 微調,相較傳統單一提示詞,能否在完整性 / 簡潔性 / 相關性顯著提升審查品質?"), + ("RQ2", "固定參數規模下,僅引入多階段提示詞而不微調,是否即可帶來品質提升?"), + ("RQ3", "多階段提示詞與模型微調各自的邊際貢獻為何?何者主導品質改善?"), + ("RQ4", "自動 LLM-as-a-Judge 評分,能否由獨立人工評分交叉驗證並保持一致?"), + ), + # ---- 5.1-5.3 各 RQ 結果(真實表格)------------------------------- + rq_results=( + RqResult( + rq_id="RQ1", + question="多階段 CoT + 微調相較單一提示詞,能否顯著提升審查品質?", + table=( + ("CRSCORE++ 維度", "本研究 Ours", "基準"), + ("完整性 comprehensiveness", "0.86", "0.67"), + ("相關性 relevance", "0.83", "0.63"), + ("簡潔性 conciseness", "0.64", "0.57"), + ), + analysis=( + "本研究在 CRSCORE++ 三維度全面優於基準", + "GPT-5 裁判下,多階段較單一提示詞於正確性 90 vs 82、簡潔性 96 vs 78 明顯領先", + "7B 學生模型仍可達完整性 0.79–0.80,顯示框架在不同模型尺寸的可行性", + ), + ), + RqResult( + rq_id="RQ2", + question="不微調、僅加多階段提示詞,是否即可提升品質?", + table=( + ("Gemini-3 裁判", "多階段 Ours", "單一提示詞"), + ("正確性 Correctness", "98", "95"), + ("可維護性 Maintainability", "95", "88"), + ("簡潔性 conciseness", "100", "85"), + ), + analysis=( + "即使不微調,僅靠多階段提示詞即帶來顯著提升", + "顯示主要瓶頸是單一提示詞的 context 過載,而非模型能力不足", + "Qwen3-Coder-30B 本身已具備充分的程式碼語義理解能力", + ), + ), + RqResult( + rq_id="RQ3", + question="多階段提示詞 vs 模型微調,何者主導品質改善?", + table=( + ("消融變化(自動評分)", "可維護性", "正確性"), + ("基礎 → 微調 + 多階段", "85 → 95", "82 → 98"), + ("僅多階段(未微調) → 微調 + 多階段", "95 → 95", "98 → 98"), + ), + analysis=( + "多階段提示詞邊際貢獻 +34 分,為品質提升的主導因素", + "模型微調邊際貢獻僅 +2 分,兩者相差逾一個數量級", + "資源受限下,流程結構化設計較參數微調更具決定性", + ), + ), + RqResult( + rq_id="RQ4", + question="自動評分能否由人工評分交叉驗證並保持一致?", + table=( + ("人工評分", "本研究 Ours", "基礎模型"), + ("正確性 Correctness", "87.75", "80.75"), + ("可維護性 Maintainability", "86.25", "79.88"), + ("多評論覆蓋 Multi-Review Coverage", "86.25", "74.13"), + ), + analysis=( + "人工評分與自動評分方向一致,完整方法於三維度同時領先", + "惟可讀性出現 LLM 評分(92)高於人工(83.50)的系統性偏差", + "此偏差與文獻所述 LLM 對語感類指標較人類寬鬆一致,可作為後續校正依據", + ), + ), + ), + # ---- 5.1 / 6.1 核心觀察(callout)-------------------------------- + core_observation=( + "在資源受限的程式碼審查情境下,把單一提示詞拆解為多階段思維鏈流程帶來的" + "品質提升(+34 分),遠大於對模型做 LoRA 微調(+2 分),相差逾一個數量級 —— " + "流程的結構化設計,比模型參數微調更具決定性。" + ), + # ---- 6.3 研究限制 ------------------------------------------------- + limitations=( + "資料規模:僅 44 筆 Python 測試資料,未驗證 C++ / Java / Go 等語言與其他專案類型的泛化", + "評審偏差:GPT-5 / Gemini-3 同屬商用 LLM,可能共享預訓練分布傾向", + "微調範圍:僅微調 Qwen3-Coder-30B,未比較其他基座模型與 LoRA rank / 量化精度", + "部署實證:已整合 CI/CD,但未於真實團隊長期試行,缺採用率 / 省時 / 誤報等實務指標", + ), + # ---- 6.4 未來工作 ------------------------------------------------- + future_work=( + "跨後端(本機 / FastAPI / OpenAI / Anthropic)的品質、成本與延遲偏序評估", + "作者反饋語料(dismissed / accepted)累積後的採納率與精確率量化驗證", + "跨平台(GitLab / Bitbucket / Gitea)支援與多模型仲裁擴展", + "IDE 觸發 vs CI 觸發的開發者接受率比較,並補強生產級維運", + ), + model="hand-authored:regen_code_review_thesis", + ) + + +def _build_paper() -> Paper: + return Paper( + source="local", + source_id="code-review-thesis-2026", + title="基於大語言模型和思維鏈推理的程式碼審查框架", + authors=("陳冠穎",), + year=2026, + venue="國立高雄師範大學 · 軟體工程與管理學系 · 碩士學位論文 · 指導教授:李文廷 博士", + abstract="", + url="", + summary=_build_summary(), + ) + + +def main() -> None: + collection = PaperCollection( + query=Query(keywords="code review llm chain-of-thought", sources=("local",)), + papers=(_build_paper(),), + ) + options = ExportOptions( + formats=("pptx",), + out_dir=OUT_DIR, + filename_stem=FILENAME_STEM, + language=LANGUAGE, + dark_mode=False, # white + blue academic default + max_slides_per_paper=30, # framework feature coverage fits in a 30-slide budget + ) + out_path = PptxExporter().export(collection, options) + print(f"saved: {out_path}") + + +if __name__ == "__main__": + main() diff --git a/scripts/regen_thesis_demo.py b/scripts/regen_thesis_demo.py new file mode 100644 index 0000000..8269052 --- /dev/null +++ b/scripts/regen_thesis_demo.py @@ -0,0 +1,335 @@ +"""Demonstration degree-thesis ORAL-DEFENCE deck (學位論文口試簡報) built via the +`thesis-deck-author` flow. + +Why this script exists +---------------------- +A request to "see" the degree-thesis deck capability. Rather than fabricate a +fake thesis's results (which the thesis-deck-author "no fabrication" rule +forbids), this deck is **about ThesisAgents itself** and every number is real — +the test count, the fang2026 overflow before/after, the math-rendering surface +count, the dark-text audit outcome are all drawn from this codebase and the work +recorded in the session. It is a DEMONSTRATION of the deck format (seven +`paper_rule` sections, defence cover, dark mode, $...$ math, KPI + tables, +height-adaptive overflow-safe layout), with the candidate / institution on the +cover marked as placeholders to edit. + +Seven-section coverage (paper_rule → PaperSummary field): + Abstract ............... core_observation + headline_metrics + 1. Introduction ........ pain_points (1.2) + research_question (1.3) + + contributions_detailed (1.5, cap 4) + 2. Literature Review ... literature_table (2.3) + technique_table + 3. Methodology ......... system_flow (3.1) + method_sections (3.2-3.4) + + evaluation_sections (3.5) + 4. Experiment .......... research_questions + rq_results (4.4/4.5) + headline_metrics + 5. Conclusion .......... core_observation (5.1) + limitations (5.3) + future_work (5.4) + References ............. the collection's Paper record + +Run from the project root: .venv/Scripts/python.exe scripts/regen_thesis_demo.py +""" +from __future__ import annotations + +import sys +from pathlib import Path + +_PROJECT_ROOT = Path(__file__).resolve().parents[1] +if str(_PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(_PROJECT_ROOT)) + +from thesisagents.core.models import ( # noqa: E402 + ExportOptions, + Paper, + PaperCollection, + PaperSummary, + Query, + RqResult, +) +from thesisagents.exporters.pptx import PptxExporter # noqa: E402 + +OUT_DIR = "exports" +FILENAME_STEM = "thesisagents-thesis-demo-zh-tw" +LANGUAGE = "zh-tw" + + +def _build_summary() -> PaperSummary: + return PaperSummary( + language=LANGUAGE, + # ---- 1. Introduction:1.2 痛點 + 1.3 研究問題 ------------------------- + pain_points=( + ( + "把論文做成口試簡報,耗時且品質不一", + ( + "研究生需手動把數十頁論文濃縮成 20 分鐘的 deck", + "排版、配色、字體靠人工反覆調整", + "同一份內容換語系就得整份重做", + ), + ), + ( + "通用 LLM 產出的簡報一看就是機器生成", + ( + "預設 Calibri 字體、純白底、置中標題是明顯 AI tell", + "整頁文字牆,沒有圖表與視覺層次", + "紅字過度強調,像 error 而非重點", + ), + ), + ( + "版面與可讀性問題反覆出現", + ( + "長段落溢出固定高度的文字框", + "暗色背景上出現看不見的黑字", + "數學記號被壓成扁平 ASCII(za 而非 $z_a$)", + ), + ), + ( + "缺乏把七大論文章節結構化落地的流程", + ( + "工具多半只填模板,不檢查章節完整性", + "沒有機器可讀的視覺契約可稽核", + "品質仰賴人工巡檢,難以回歸守護", + ), + ), + ), + research_question=( + "能否用一套多代理人規則 + 規則化匯出引擎,自動產生" + "結構完整、視覺專業、且可被機器稽核的論文級簡報?" + ), + # ---- 1.5 貢獻(cap 4)------------------------------------------------ + contributions_detailed=( + ( + "1. 多代理人規則架構", + "把製作知識拆成唯讀規則代理(rules/:slide-deck-rules、deck-design、" + "paper_rule …)與任務代理(tasks/:paper-summary-author、" + "thesis-deck-author、稽核代理),規則與執行分離,可平行調度。", + ), + ( + "2. 三層渲染引擎", + "依 PaperSummary 內容自動分派 lightweight / enriched-flat / " + "thesis-style 三層,豐富層把每個欄位對映到一張論文章節投影片。", + ), + ( + "3. 可機器稽核的視覺契約", + "暗色不可見字、紅字、淺底淺字、版面溢出皆有可執行稽核器" + "(check_overflow.py、_audit_dark_text.py)+ pytest 回歸守護。", + ), + ( + "4. 自適應版面,從不截斷作者內容", + "文字框高度依估算行數自適應,堆疊段落與四宮格依高度預算分頁" + "而非固定張數,內容過多時分頁而非砍字。", + ), + ), + # ---- 摘要 / 5.1 核心觀察(callout)---------------------------------- + core_observation=( + "把論文簡報生成拆成三層:結構化內容(七大章節)、規則化視覺契約、" + "與機器稽核;就能在不犧牲視覺品質的前提下自動產生可口試的 deck," + "且品質由測試守住而非靠人工巡檢。" + ), + # ---- 摘要 / 4 主要量化成果(KPI)----------------------------------- + headline_metrics=( + ("回歸測試總數", "614", "改善前 603"), + ("範例 deck 版面溢出", "0 處", "改善前 8 處"), + ("數學記號渲染介面數", "5", "改善前 3(bullets/KPI/表格)"), + ("暗色硬性問題", "0", "_audit_dark_text 全 PASS"), + ("支援語系", "14", "i18n SUPPORTED_LANGUAGES"), + ("渲染層", "3 層", "lightweight / flat / thesis"), + ), + # ---- 2. Literature Review:2.3 比較表 ------------------------------- + literature_table=( + ("做法", "章節完整性", "視覺品質", "自動稽核"), + ("純手動製作", "高(人工)", "視人而定", "無"), + ("通用 LLM 產 slide", "低", "低(AI tell)", "無"), + ("模板填空工具", "中", "中", "無"), + ("ThesisAgents(本系統)", "高(七章)", "高(品牌契約)", "有"), + ), + # ---- 2.x 關鍵技術 → 角色 ------------------------------------------- + technique_table=( + ("python-pptx", "OOXML 簡報生成底層"), + ("規則子代理(rules/)", "設計與稽核知識來源"), + ("check_overflow.py", "版面溢出稽核器"), + ("_audit_dark_text.py", "暗色文字契約稽核器"), + ), + # ---- 3. Methodology:3.1 架構流程 ---------------------------------- + system_flow=( + "多來源關鍵字搜尋,正規化為 Paper 記錄", + "依 DOI / arXiv ID / 標題模糊比對去重,再依新近度與引用排序", + "LLM-as-agent 或 Python pipeline 充實成結構化 PaperSummary", + "三層渲染引擎產出 pptx / xlsx / bib / md / json", + "子代理稽核:版面溢出、暗色契約、七章完整性", + ), + # ---- 3.2-3.4 方法細節 ---------------------------------------------- + method_sections=( + ( + "渲染層分派", + ( + "依 summary.has_rich_fields() 判斷走哪一層", + "豐富層:痛點四宮格、RQ callout、KPI、技術表、各 RQ 結果表", + "每個 PaperSummary 欄位對映一張論文章節投影片", + ), + ), + ( + "自適應堆疊分頁", + ( + "_stacked_body_height_in 依估算換行數定本文框高度", + "_paginate_stacks 依高度預算(1.7\" → 7.0\")分頁", + "四宮格文字多時自動降為一列兩格,不砍字", + ), + ), + ( + "數學記號渲染", + ( + "$...$ 契約:_x / ^x → 真下標 / 上標,單字母變數轉斜體", + "涵蓋 bullets、KPI 值、表格、貢獻本文、RQ callout 五介面", + "離 $...$ 的底線(檔名、prose)不受影響", + ), + ), + ( + "暗色後處理", + ( + "_apply_dark_mode 以兩個對映字典重上色", + "兩層防線:每個 helper 顯式設色 + 後處理把 None/黑字提升為近白", + "淺底淺字由對比契約攔截", + ), + ), + ), + # ---- 3.5 評估方法 -------------------------------------------------- + evaluation_sections=( + ( + "版面溢出量測", + ( + "check_overflow.py 估算每形狀換行高度 vs 框高與 footer guard 7.05\"", + "全形 CJK ≈ 1.0 em,半形拉丁 ≈ 0.55 em", + "表格改為估算實際列高,可抓宣告值騙過的長表", + ), + ), + ( + "視覺契約量測", + ( + "_audit_dark_text 掃隱形字、紅字、淺底淺字、離調色盤色", + "可套用於手作 deck(回歸測試只覆蓋生成 deck)", + "硬性問題數需為 0 才放行", + ), + ), + ), + # ---- 4. Experiment:研究問題 --------------------------------------- + research_questions=( + ("RQ1", "能否在不截斷作者內容下消除版面溢出?"), + ("RQ2", "數學記號能否渲染為真上下標而非扁平 ASCII?"), + ("RQ3", "暗色與版面契約能否被自動稽核?"), + ("RQ4", "系統是否覆蓋七大論文章節?"), + ), + # ---- 4.4 / 4.5 各 RQ 結果(真實數據)------------------------------ + rq_results=( + RqResult( + rq_id="RQ1", + question="能否在不截斷作者內容下消除版面溢出?", + table=( + ("版本", "範例 deck 溢出", "回歸測試"), + ("固定高度(改善前)", "8 處", "603"), + ("自適應分頁(改善後)", "0 處", "614"), + ), + analysis=( + "自適應高度 + 高度預算分頁把 fang2026 範例 deck 的本文溢出從 8 降到 0", + "全程不截斷任何作者文字,改以分頁吸收超長內容", + "表格估算上線後可抓 9 列長表(估算 9.03\" > 7.05\")", + ), + ), + RqResult( + rq_id="RQ2", + question="數學記號能否渲染為真上下標而非扁平 ASCII?", + table=( + ("介面", "改善前", "改善後"), + ("貢獻 / 方法本文", "扁平 za", "真下標 $z_a$"), + ("RQ / 核心觀察 callout", "扁平", "真下標"), + ("涵蓋介面數", "3", "5"), + ), + analysis=( + "$...$ 契約讓五個內容介面都渲染真上下標與斜體變數", + "互資訊式目標如 $I(z_a;z_b|E_p)$ 不再顯示為扁平字串", + "新增 paper-summary-author / thesis-deck-author 的授權規則", + ), + ), + RqResult( + rq_id="RQ3", + question="暗色與版面契約能否被自動稽核?", + table=( + ("檢查項", "稽核器", "最終 deck"), + ("隱形字(None/黑)", "_audit_dark_text", "0"), + ("紅字 #C0392B", "_audit_dark_text", "0"), + ("版面溢出", "check_overflow", "0"), + ), + analysis=( + "三項自動稽核在最終 deck 全數 PASS", + "稽核器可獨立套用於手作 deck,補上回歸測試的覆蓋缺口", + "_ACCEPTED_DARK_RUN_COLORS 與 exporter 調色盤同步", + ), + ), + RqResult( + rq_id="RQ4", + question="系統是否覆蓋七大論文章節?", + table=( + ("論文章節", "對映欄位", "覆蓋"), + ("Introduction", "pain_points / RQ / contributions", "✓"), + ("Literature", "literature_table", "✓"), + ("Methodology", "system_flow / method_sections", "✓"), + ("Experiment", "rq_results / headline_metrics", "✓"), + ("Conclusion", "core_observation / limitations / future_work", "✓"), + ), + analysis=( + "七大章節皆有對映的 PaperSummary 欄位", + "thesis-deck-author 以完整度稽核把關,缺章即視為未完成", + "本 deck 自身即覆蓋全部七章", + ), + ), + ), + # ---- 5.3 限制 ------------------------------------------------------ + limitations=( + "量化評估以系統內部指標為主,尚缺正式使用者研究", + "內容充實仰賴 LLM-as-agent 或 API,離線品質有限", + "表格不自動分頁,超大表需在作者層拆分", + "暗色 RTL(阿拉伯文)deck 尚未支援", + ), + # ---- 5.4 未來工作 -------------------------------------------------- + future_work=( + "加入使用者研究與主觀品質評分", + "多模態:自動生成結果圖表並嵌入", + "擴充 RTL 與更多語系的 deck 渲染", + "線上協作與即時編輯", + ), + model="hand-authored:regen_thesis_demo", + ) + + +def _build_paper() -> Paper: + return Paper( + source="local", + source_id="thesisagents-thesis-demo", + title="ThesisAgents:從研究主題到論文級簡報的多代理人生成系統", + # 候選人 / 校系為示範佔位,實際使用時請替換。 + authors=("Jeffrey Chen",), + year=2026, + venue="資訊工程學系 · 碩士學位論文(示範範本)", + abstract="", + url="", + summary=_build_summary(), + ) + + +def main() -> None: + collection = PaperCollection( + query=Query(keywords="thesisagents thesis deck", sources=("local",)), + papers=(_build_paper(),), + ) + options = ExportOptions( + formats=("pptx",), + out_dir=OUT_DIR, + filename_stem=FILENAME_STEM, + language=LANGUAGE, + # White + blue academic-paper style is the default deliverable + # (light palette: white bg, navy headings/body, blue emphasis). + # Dark mode stays opt-in via dark_mode=True for OLED / low-light venues. + dark_mode=False, + ) + out_path = PptxExporter().export(collection, options) + print(f"saved: {out_path}") + + +if __name__ == "__main__": + main() diff --git a/tests/test_audit_dark_text.py b/tests/test_audit_dark_text.py new file mode 100644 index 0000000..164d4f5 --- /dev/null +++ b/tests/test_audit_dark_text.py @@ -0,0 +1,89 @@ +"""Regression tests for the dark-text auditor (scripts/_audit_dark_text.py). + +The auditor is the manual companion to the exporter's dark-mode regression tests — +it must agree with the exporter (a generated dark deck audits clean) and must catch +the invisibility / red-text failure modes on an arbitrary deck (e.g. a hand-made +one the regression tests never see). +""" +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path + +from pptx import Presentation +from pptx.dml.color import RGBColor +from pptx.util import Inches, Pt + +_SCRIPT = Path(__file__).resolve().parents[1] / "scripts" / "_audit_dark_text.py" + + +def _load_auditor(): + spec = importlib.util.spec_from_file_location("_audit_dark_text", _SCRIPT) + mod = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = mod # so the frozen dataclass resolves its module + spec.loader.exec_module(mod) + return mod + + +def test_generated_dark_deck_audits_clean(tmp_path): + from thesisagents.core.models import ( + ExportOptions, + Paper, + PaperCollection, + PaperSummary, + Query, + ) + from thesisagents.exporters.pptx import PptxExporter + + mod = _load_auditor() + summary = PaperSummary( + language="zh-tw", + contributions_detailed=(("1. 方法", "以互資訊分解對抗成分,既準又快。"),), + headline_metrics=(("準確率", "92.3%", "baseline 65.4"),), + core_observation="把對抗與良性成分在 latent 空間分離,事前中和。", + model="test", + ) + paper = Paper( + source="local", source_id="t", title="Dark Mode Audit Smoke", + authors=("A",), year=2026, venue="Test", abstract="", url="", summary=summary, + ) + collection = PaperCollection( + query=Query(keywords="x", sources=("local",)), papers=(paper,), + ) + out = PptxExporter().export(collection, ExportOptions( + formats=("pptx",), out_dir=str(tmp_path), filename_stem="audit-smoke", + language="zh-tw", dark_mode=True, + )) + hard = [i for i in mod.audit_deck(out) if i.hard] + assert hard == [], "\n".join(f"slide {i.slide} {i.shape}: {i.kind} {i.detail}" for i in hard) + + +def _audit_one_run(mod, tmp_path, *, name, text, rgb): + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[6]) + box = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(6), Inches(1)) + box.name = name + box.text_frame.text = text + run = box.text_frame.paragraphs[0].runs[0] + run.font.size = Pt(18) + run.font.color.rgb = rgb + path = tmp_path / f"{name}.pptx" + prs.save(str(path)) + return mod.audit_deck(path) + + +def test_black_run_is_flagged_invisible(tmp_path): + mod = _load_auditor() + issues = _audit_one_run( + mod, tmp_path, name="body", text="黑字在深色背景上看不見", rgb=RGBColor(0, 0, 0) + ) + assert any(i.kind == "invisible" and i.hard for i in issues) + + +def test_red_run_is_flagged(tmp_path): + mod = _load_auditor() + issues = _audit_one_run( + mod, tmp_path, name="kpi", text="92.3%", rgb=RGBColor(0xC0, 0x39, 0x2B) + ) + assert any(i.kind == "red text" and i.hard for i in issues) diff --git a/tests/test_check_overflow.py b/tests/test_check_overflow.py new file mode 100644 index 0000000..e403b5a --- /dev/null +++ b/tests/test_check_overflow.py @@ -0,0 +1,181 @@ +"""Regression tests for the canonical overflow inspector (scripts/check_overflow.py). + +The inspector is what the `slide-overflow-check` subagent runs, so its calibration +matters: it must flag genuine content overflow, ignore exporter-placed chrome +(accent bars / page number / footer), and treat empty decorative frames as +zero-height. These tests pin that behaviour with synthetic decks so they don't +depend on whether any shipped deck happens to be clean. +""" +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path + +from pptx import Presentation +from pptx.util import Inches, Pt + +_SCRIPT = Path(__file__).resolve().parents[1] / "scripts" / "check_overflow.py" + + +def _load_inspector(): + spec = importlib.util.spec_from_file_location("check_overflow", _SCRIPT) + mod = importlib.util.module_from_spec(spec) + # Register before exec so the frozen dataclass can resolve its own module + # (dataclasses looks the class's module up in sys.modules on 3.14+). + sys.modules[spec.name] = mod + spec.loader.exec_module(mod) + return mod + + +def _textbox(slide, *, name, text, left, top, width, height, font_pt=19): + box = slide.shapes.add_textbox(Inches(left), Inches(top), Inches(width), Inches(height)) + box.name = name + tf = box.text_frame + tf.word_wrap = True + tf.text = text + for run in tf.paragraphs[0].runs: + run.font.size = Pt(font_pt) + return box + + +def _blank_slide(prs): + return prs.slides.add_slide(prs.slide_layouts[6]) + + +def test_short_text_in_big_box_is_clean(): + mod = _load_inspector() + prs = Presentation() + slide = _blank_slide(prs) + _textbox(slide, name="body", text="一句話", left=0.5, top=2.0, width=12.0, height=2.0) + assert mod.check_pptx_from_prs(prs) == [] + + +def test_long_text_in_tiny_box_overflows(): + mod = _load_inspector() + prs = Presentation() + slide = _blank_slide(prs) + # ~200 CJK chars at 19pt in a 0.4"-high box must overflow its box. + _textbox(slide, name="body", text="字" * 200, left=0.5, top=2.0, width=6.0, height=0.4) + violations = mod.check_pptx_from_prs(prs) + assert any(v.kind == "overflows its box" and v.shape == "body" for v in violations) + + +def test_content_shape_past_footer_guard_is_flagged(): + mod = _load_inspector() + prs = Presentation() + slide = _blank_slide(prs) + _textbox(slide, name="body", text="尾端內容", left=0.5, top=7.2, width=6.0, height=0.5) + violations = mod.check_pptx_from_prs(prs) + assert any(v.kind == "crosses footer guard" and v.shape == "body" for v in violations) + + +def test_chrome_shapes_are_exempt(): + # page_number / footer live at the 7.05" line by design; accent bars are + # fixed-geometry decoration. None should ever be flagged. + mod = _load_inspector() + prs = Presentation() + slide = _blank_slide(prs) + _textbox(slide, name="page_number", text="1 / 19", left=11.0, top=7.2, width=2.0, height=0.3) + _textbox(slide, name="footer", text="footer copy", left=0.5, top=7.2, width=6.0, height=0.3) + accent = slide.shapes.add_textbox(Inches(0), Inches(0), Inches(13.333), Inches(7.5)) + accent.name = "accent_left" + assert mod.check_pptx_from_prs(prs) == [] + + +def test_empty_decorative_frame_is_zero_height(): + # A blank accent rectangle inside the body must not be charged a fallback line. + mod = _load_inspector() + prs = Presentation() + slide = _blank_slide(prs) + box = slide.shapes.add_textbox(Inches(0.5), Inches(2.0), Inches(12.0), Inches(0.08)) + box.name = "subhead" # non-chrome name, but empty text → no overflow + box.text_frame.text = "" + assert mod.check_pptx_from_prs(prs) == [] + + +def test_small_table_is_clean(): + # A few short-cell rows near the top render within the footer guard. + mod = _load_inspector() + prs = Presentation() + slide = _blank_slide(prs) + tbl = slide.shapes.add_table(4, 3, Inches(0.5), Inches(2.0), Inches(12), Inches(2.0)) + for r in range(4): + for c in range(3): + tbl.table.cell(r, c).text = f"r{r}c{c}" + assert mod.check_pptx_from_prs(prs) == [] + + +def test_long_table_overflow_is_estimated(): + # python-pptx keeps the declared 3.0" height, but 10 rows of wrapping cells + # render far taller and cross the footer guard. The inspector must estimate + # the grown height, not trust shape.height. + mod = _load_inspector() + prs = Presentation() + slide = _blank_slide(prs) + tbl = slide.shapes.add_table(10, 3, Inches(0.5), Inches(2.3), Inches(12), Inches(3.0)) + long = "a long table cell value that wraps across several lines on the slide " * 2 + for r in range(10): + for c in range(3): + tbl.table.cell(r, c).text = long + violations = mod.check_pptx_from_prs(prs) + assert any(v.kind == "crosses footer guard" for v in violations) + + +def test_exported_rich_deck_has_no_overflow(tmp_path): + """End-to-end guard: a rich deck with deliberately long contribution / + method / pain-point text (the content that used to overflow fixed-height + boxes) must export overflow-free. This wires the inspector into the suite + so the adaptive stacked-section + quadrant pagination can't silently + regress. See pptx.py `_add_stacked_section` / `_pain_points_per_slide`. + """ + from thesisagents.core.models import ( + ExportOptions, + Paper, + PaperCollection, + PaperSummary, + Query, + ) + from thesisagents.exporters.pptx import PptxExporter + + mod = _load_inspector() + long_body = ( + "VAE(變分自編碼器)編碼器把 prompt 切成對抗潛在向量 $z_a$ 與良性潛在向量 " + "$z_b$,訓練目標最小化互資訊 $I(z_a;z_b|E_p)$,透過 Data Processing " + "Inequality(DPI,限制條件互資訊在資料處理鏈中不會上升)保證分離,對改述攻擊具備強穩定性。" + ) + long_bullets = ( + "規則式過濾遇到改述(攻擊者改寫提示文字)或混淆(以異常編碼或拼字繞過)攻擊就破功", + "對抗訓練要 fine-tune LLM,正常任務的效能會明顯下降而且部署成本高", + "每出現一類新攻擊就得再補一批新規則,缺乏對對抗與良性訊號的原理性分離", + ) + summary = PaperSummary( + language="zh-tw", + pain_points=tuple( + (f"痛點 {i}:既有防禦的根本侷限與成本問題", long_bullets) for i in range(1, 5) + ), + research_question="在維持即時延遲且不傷害正常查詢效能的前提下能否事前中和對抗成分?", + contributions_detailed=tuple( + (f"{i}. 互資訊式語意分解與譜圖意圖分類", long_body) for i in range(1, 5) + ), + core_observation=long_body, + model="test", + ) + paper = Paper( + source="local", source_id="t", title="A Long-Content Thesis Deck for Overflow", + authors=("Test Author",), year=2026, venue="Test University · 碩士學位論文", + abstract="", url="", summary=summary, + ) + collection = PaperCollection( + query=Query(keywords="overflow", sources=("local",)), papers=(paper,), + ) + options = ExportOptions( + formats=("pptx",), out_dir=str(tmp_path), filename_stem="overflow-e2e", + language="zh-tw", dark_mode=True, + ) + out_path = PptxExporter().export(collection, options) + violations = mod.check_pptx(out_path) + assert violations == [], "\n".join( + f"slide {v.slide} {v.shape}: {v.kind} {v.rendered_in}\" vs {v.limit_in}\"" + for v in violations + ) diff --git a/tests/test_exporters.py b/tests/test_exporters.py index f8459c7..6b44ca9 100644 --- a/tests/test_exporters.py +++ b/tests/test_exporters.py @@ -311,6 +311,57 @@ def test_pptx_exporter_single_paper_skips_agenda_and_divider(sample_papers, tmp_ assert "References" in titles +def test_pptx_own_thesis_drops_source_slide_bibtex_and_self_reference(tmp_path): + """A candidate's own thesis (source=local, no external ids) renders no + source/overview slide, no BibTeX cite-key, and no references-of-self slide. + + Why: the source/overview slide and BibTeX key exist to attribute a + *borrowed* paper; a defence deck of the candidate's own dissertation has + no fetched source to credit and must not list itself as a reference + (slide-deck-rules §11). The cover already carries title/authors/venue. + """ + from pptx import Presentation + + from thesisagents.core.models import Paper, PaperSummary + + summary = PaperSummary( + language="en", + pain_points=(("Manual decks are slow", ("a", "b", "c")),), + contributions_detailed=(("1. A rule-driven engine", "renders rich tiers"),), + core_observation="Structure + visual contract + audit yields a defence deck.", + model="test", + ) + own = Paper( + source="local", source_id="my-thesis", title="My Own Thesis", + authors=("Jeffrey Chen",), year=2026, venue="NTU · MSc Thesis", + abstract="", url="", doi=None, arxiv_id=None, summary=summary, + ) + collection = PaperCollection( + query=Query(keywords="my thesis", sources=("local",)), papers=(own,), + ) + options = ExportOptions(formats=("pptx",), out_dir=str(tmp_path), filename_stem="own") + written = export_collection(collection, options) + prs = Presentation(str(written["pptx"])) + + titles = [_slide_text(s, "title") for s in prs.slides] + # No references-of-self slide (the thesis is not its own citation). + assert "References" not in titles + # The title appears only on the cover — no source/overview slide repeats + # it as a body section (before the fix it appeared on both). + assert titles.count("My Own Thesis") == 1 + # No BibTeX cite-key (or "Source:" attribution) anywhere in the deck. + every_run_text = " ".join( + run.text + for slide in prs.slides + for shape in slide.shapes + if shape.has_text_frame + for para in shape.text_frame.paragraphs + for run in para.runs + ) + assert own.bibtex_key() not in every_run_text + assert "BibTeX" not in every_run_text + + def _find_run_color(prs, target_rgb: tuple[int, int, int]) -> bool: for slide in prs.slides: for shape in slide.shapes: @@ -327,29 +378,38 @@ def _find_run_color(prs, target_rgb: tuple[int, int, int]) -> bool: return False -def test_pptx_default_is_dark_mode(sample_papers, tmp_path): - """``dark_mode`` defaults to True, so an ExportOptions that doesn't - explicitly pass the field still produces a dark deck. +def test_pptx_default_is_light_mode(sample_papers, tmp_path): + """``dark_mode`` defaults to False, so an ExportOptions that doesn't + explicitly pass the field produces the light navy-band deck. Confirms: - 1. Slide background fill is the dark colour (`#12151B`). - 2. At least one run carries the swapped near-white text colour. + 1. No dark slide-background fill is applied (the post-build dark pass + is skipped, so slide 0's background carries no explicit fore-colour + — reading it raises, as for any un-filled background). + 2. At least one run keeps the original navy ``_BRAND_DARK`` (#1F3A66) + body-text colour (i.e. the dark recolour pass did NOT run). """ from pptx import Presentation - from pptx.dml.color import RGBColor collection = _collection(sample_papers) options = ExportOptions( formats=("pptx",), out_dir=str(tmp_path), - filename_stem="default-dark", + filename_stem="default-light", ) written = export_collection(collection, options) prs = Presentation(str(written["pptx"])) - bg_rgb = list(prs.slides)[0].background.fill.fore_color.rgb - assert tuple(bg_rgb) == tuple(RGBColor(0x12, 0x15, 0x1B)) - assert _find_run_color(prs, (0xE5, 0xE7, 0xEB)), ( - "no run was re-coloured to the dark-mode near-white text" + # No dark background was stamped — the un-filled background raises when + # asked for a foreground colour, the same signal the light-mode opt-out + # test relies on. + try: + bg_rgb = list(prs.slides)[0].background.fill.fore_color.rgb + except (TypeError, ValueError, AttributeError): + bg_rgb = None + assert bg_rgb is None or tuple(bg_rgb) != (0x12, 0x15, 0x1B) + assert _find_run_color(prs, (0x1F, 0x3A, 0x66)), ( + "no run kept the navy _BRAND_DARK body text — the dark recolour " + "pass should be skipped by default now that light is the default" ) @@ -371,6 +431,7 @@ def test_pptx_dark_mode_has_no_invisible_runs(sample_papers, tmp_path): formats=("pptx",), out_dir=str(tmp_path), filename_stem="dark-readability", + dark_mode=True, ) written = export_collection(collection, options) prs = Presentation(str(written["pptx"])) @@ -462,6 +523,7 @@ def test_pptx_dark_mode_no_light_text_on_light_fill(sample_papers, tmp_path): formats=("pptx",), out_dir=str(tmp_path), filename_stem="dark-contrast", + dark_mode=True, ) written = export_collection(collection, options) prs = Presentation(str(written["pptx"])) @@ -486,7 +548,7 @@ def test_pptx_dark_mode_no_light_text_on_light_fill(sample_papers, tmp_path): def test_pptx_no_red_text_runs(sample_papers, tmp_path): """The "No red text" contract: ``_BRAND_ACCENT`` (#C0392B) must never be written as a run colour. Bold + ``_BRAND_HIGHLIGHT`` - (teal-700 ``#0E7490``) is the approved emphasis pattern for + (blue-600 ``#2563EB``) is the approved emphasis pattern for headline text (KPI value, RQ question); ``_BRAND_GREY`` is the approved pattern for caption / placeholder / chrome text. Red font runs read as error / warning in slide-deck conventions and @@ -504,6 +566,7 @@ def test_pptx_no_red_text_runs(sample_papers, tmp_path): formats=("pptx",), out_dir=str(tmp_path), filename_stem="no-red", + dark_mode=True, ) written = export_collection(collection, options) prs = Presentation(str(written["pptx"])) @@ -528,7 +591,7 @@ def test_pptx_no_red_text_runs(sample_papers, tmp_path): f"slide {s_idx} shape {shape.name!r}: {text[:40]!r}" ) assert not offenders, ( - "red text (#C0392B) found — use bold + _BRAND_HIGHLIGHT (teal) " + "red text (#C0392B) found — use bold + _BRAND_HIGHLIGHT (blue) " "for headlines or _BRAND_GREY for captions instead " "(deck-design 'No red text' contract):\n " + "\n ".join(offenders[:10]) @@ -1547,3 +1610,46 @@ def test_table_cell_renders_math_subscript(): # A plain header cell stays upright with no baseline shift. hdr_runs = [r for p in table.cell(0, 0).text_frame.paragraphs for r in p.runs] assert hdr_runs and all(_baseline(r) is None for r in hdr_runs) + + +def test_stacked_section_body_renders_math_subscript(): + # The contribution / method body paragraph is the densest math surface in a + # thesis deck (it states the objective formula in prose). It routes through + # _add_textbox(math=True); without that flag it flattened "$I(z_a;z_b)$" to + # ASCII even though bullets / KPIs / tables already rendered it. Regression. + from pptx import Presentation + + from thesisagents.exporters import pptx as pptx_mod + + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[6]) + pptx_mod._add_stacked_section( # noqa: SLF001 + slide, + "互資訊式語意分解", + "訓練目標最小化互資訊 $I(z_a;z_b|E_p)$ 以保證分離", + pptx_mod.Inches(2), + ) + body = next(s for s in slide.shapes if s.name == "body") + runs = [r for p in body.text_frame.paragraphs for r in p.runs] + assert any(_baseline(r) == "-25000" for r in runs) # z_a / z_b subscripts + assert all(r.font.color.rgb is not None for r in runs) # dark-mode contract + + +def test_rq_callout_renders_math_subscript(): + # The RQ / core-observation box states the paper's objective formula; it is a + # shape (rounded rectangle), not _add_textbox, so it needs its own math wiring. + from pptx import Presentation + from pptx.util import Inches + + from thesisagents.exporters import pptx as pptx_mod + + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[6]) + pptx_mod._add_rq_callout( # noqa: SLF001 + slide, "把對抗成分和良性成分分離(最小化 $I(z_a;z_b|E_p)$),既準又快", + left=Inches(1), top=Inches(2), width=Inches(8), height=Inches(2), + ) + rect = next(s for s in slide.shapes if s.name == "rq_box") + runs = [r for p in rect.text_frame.paragraphs for r in p.runs] + assert any(_baseline(r) == "-25000" for r in runs) # subscript rendered + assert all(r.font.color.rgb is not None for r in runs) # dark-mode contract diff --git a/thesisagents/cli.py b/thesisagents/cli.py index 67797d9..c67c4e9 100644 --- a/thesisagents/cli.py +++ b/thesisagents/cli.py @@ -273,14 +273,14 @@ def build_parser() -> argparse.ArgumentParser: ), ) parser.add_argument( - "--light-mode", + "--dark-mode", action="store_true", help=( - "Render the pptx with the classic white slide background + " - "navy text. Default is dark mode (dark slide background, " - "near-white text) — pass this flag for projectors in " - "well-lit rooms or when the deck will be printed / read on " - "paper." + "Render the pptx in dark mode (dark slide background, " + "near-white text, lightened navy band / cover / table " + "fills). Default is the light navy-band deck (white slides, " + "navy header band with a white title, navy cover panel) — " + "pass this flag for OLED projectors or low-light venues." ), ) parser.add_argument( @@ -433,7 +433,7 @@ async def _run(args: argparse.Namespace) -> int: include_abstract=not args.no_abstract, language=args.lang, max_slides_per_paper=args.max_slides, - dark_mode=not args.light_mode, + dark_mode=args.dark_mode, ) needs_pptx = EXPORT_PPTX in formats # ``--pdf`` already supplies the PDF — the paywall gate is irrelevant diff --git a/thesisagents/core/models.py b/thesisagents/core/models.py index 9d08309..7dd09c1 100644 --- a/thesisagents/core/models.py +++ b/thesisagents/core/models.py @@ -437,13 +437,16 @@ class ExportOptions: #: render the full deck regardless of size; ``None`` is treated #: identically to the default. max_slides_per_paper: int | None = 25 - #: When True (default), the pptx exporter applies a dark-mode - #: palette post-build: dark slide background, light text, dark - #: table-row stripe. Set False (or pass ``--light-mode`` on the - #: CLI / tick the "Light mode" box in the GUI Deck tab) to keep - #: the classic white-background light deck — useful on projectors - #: in well-lit rooms or when the audience reads on paper after. - dark_mode: bool = True + #: When True, the pptx exporter applies a dark-mode palette + #: post-build: dark slide background, light text, dark table-row + #: stripe. **Defaults to False** — the project's default deck is now + #: the light navy-band style (white slides, full-width navy header + #: band with a white title, navy cover panel). Pass ``--dark-mode`` + #: on the CLI / tick the "Dark mode" box in the GUI Deck tab (or set + #: this True) for OLED projectors / low-light venues, where the + #: post-build pass lightens the band / cover / table fills so the + #: same chrome reads on the dark background. + dark_mode: bool = False def __post_init__(self) -> None: if not self.formats: diff --git a/thesisagents/exporters/i18n.py b/thesisagents/exporters/i18n.py index c6fe47f..2a63b72 100644 --- a/thesisagents/exporters/i18n.py +++ b/thesisagents/exporters/i18n.py @@ -40,7 +40,7 @@ "section_research_question": "Research Question", "section_technique_overview": "Key Technologies", "section_literature_positioning": "Literature Positioning", - "section_system_overview": "System Overview", + "section_system_overview": "System Architecture", "section_method_details": "Method Details", "section_evaluation": "Evaluation Method", "section_research_questions": "Research Questions", @@ -89,7 +89,7 @@ "section_research_question": "研究問題", "section_technique_overview": "關鍵技術概覽", "section_literature_positioning": "文獻定位", - "section_system_overview": "系統總覽", + "section_system_overview": "系統架構", "section_method_details": "方法細節", "section_evaluation": "評估方法", "section_research_questions": "研究問題", @@ -138,7 +138,7 @@ "section_research_question": "研究问题", "section_technique_overview": "关键技术概览", "section_literature_positioning": "文献定位", - "section_system_overview": "系统总览", + "section_system_overview": "系统架构", "section_method_details": "方法细节", "section_evaluation": "评估方法", "section_research_questions": "研究问题", @@ -187,7 +187,7 @@ "section_research_question": "リサーチクエスチョン", "section_technique_overview": "主要技術", "section_literature_positioning": "文献内での位置づけ", - "section_system_overview": "システム概要", + "section_system_overview": "システムアーキテクチャ", "section_method_details": "手法詳細", "section_evaluation": "評価方法", "section_research_questions": "リサーチクエスチョン", @@ -236,7 +236,7 @@ "section_research_question": "Pregunta de investigación", "section_technique_overview": "Tecnologías clave", "section_literature_positioning": "Posicionamiento en la literatura", - "section_system_overview": "Visión general del sistema", + "section_system_overview": "Arquitectura del sistema", "section_method_details": "Detalle del método", "section_evaluation": "Método de evaluación", "section_research_questions": "Preguntas de investigación", @@ -285,7 +285,7 @@ "section_research_question": "Question de recherche", "section_technique_overview": "Technologies clés", "section_literature_positioning": "Positionnement dans la littérature", - "section_system_overview": "Vue d'ensemble du système", + "section_system_overview": "Architecture du système", "section_method_details": "Détails de la méthode", "section_evaluation": "Méthode d'évaluation", "section_research_questions": "Questions de recherche", @@ -334,7 +334,7 @@ "section_research_question": "Forschungsfrage", "section_technique_overview": "Schlüsseltechnologien", "section_literature_positioning": "Einordnung in die Literatur", - "section_system_overview": "Systemüberblick", + "section_system_overview": "Systemarchitektur", "section_method_details": "Methode im Detail", "section_evaluation": "Evaluationsmethode", "section_research_questions": "Forschungsfragen", @@ -383,7 +383,7 @@ "section_research_question": "연구 질문", "section_technique_overview": "핵심 기술", "section_literature_positioning": "문헌적 위치", - "section_system_overview": "시스템 개요", + "section_system_overview": "시스템 아키텍처", "section_method_details": "방법 상세", "section_evaluation": "평가 방법", "section_research_questions": "연구 질문", @@ -432,7 +432,7 @@ "section_research_question": "Questão de pesquisa", "section_technique_overview": "Tecnologias-chave", "section_literature_positioning": "Posicionamento na literatura", - "section_system_overview": "Visão geral do sistema", + "section_system_overview": "Arquitetura do sistema", "section_method_details": "Detalhes do método", "section_evaluation": "Método de avaliação", "section_research_questions": "Questões de pesquisa", @@ -481,7 +481,7 @@ "section_research_question": "Исследовательский вопрос", "section_technique_overview": "Ключевые технологии", "section_literature_positioning": "Позиционирование в литературе", - "section_system_overview": "Обзор системы", + "section_system_overview": "Архитектура системы", "section_method_details": "Детали метода", "section_evaluation": "Метод оценки", "section_research_questions": "Исследовательские вопросы", @@ -530,7 +530,7 @@ "section_research_question": "Domanda di ricerca", "section_technique_overview": "Tecnologie chiave", "section_literature_positioning": "Posizionamento in letteratura", - "section_system_overview": "Panoramica del sistema", + "section_system_overview": "Architettura del sistema", "section_method_details": "Dettagli del metodo", "section_evaluation": "Metodo di valutazione", "section_research_questions": "Domande di ricerca", @@ -579,7 +579,7 @@ "section_research_question": "Câu hỏi nghiên cứu", "section_technique_overview": "Công nghệ chính", "section_literature_positioning": "Vị trí trong tài liệu", - "section_system_overview": "Tổng quan hệ thống", + "section_system_overview": "Kiến trúc hệ thống", "section_method_details": "Chi tiết phương pháp", "section_evaluation": "Phương pháp đánh giá", "section_research_questions": "Câu hỏi nghiên cứu", @@ -628,7 +628,7 @@ "section_research_question": "शोध प्रश्न", "section_technique_overview": "मुख्य तकनीकें", "section_literature_positioning": "साहित्य में स्थान", - "section_system_overview": "सिस्टम अवलोकन", + "section_system_overview": "सिस्टम आर्किटेक्चर", "section_method_details": "विधि विवरण", "section_evaluation": "मूल्यांकन विधि", "section_research_questions": "शोध प्रश्न", @@ -677,7 +677,7 @@ "section_research_question": "Pertanyaan Penelitian", "section_technique_overview": "Teknologi Utama", "section_literature_positioning": "Posisi dalam Literatur", - "section_system_overview": "Tinjauan Sistem", + "section_system_overview": "Arsitektur Sistem", "section_method_details": "Rincian Metode", "section_evaluation": "Metode Evaluasi", "section_research_questions": "Pertanyaan Penelitian", diff --git a/thesisagents/exporters/pptx.py b/thesisagents/exporters/pptx.py index 64280ab..72a623d 100644 --- a/thesisagents/exporters/pptx.py +++ b/thesisagents/exporters/pptx.py @@ -30,6 +30,7 @@ from __future__ import annotations import re +import unicodedata from collections.abc import Iterable from pathlib import Path @@ -109,19 +110,19 @@ #: WARNING — DO NOT use _BRAND_ACCENT as a TEXT colour. #: Red text in slide decks is consistently associated with errors, #: warnings, and AI-generated KPI emphasis ("look at this number!"). -#: The project bans red font runs entirely; use _BRAND_HIGHLIGHT (teal) +#: The project bans red font runs entirely; use _BRAND_HIGHLIGHT (blue) #: for emphasis instead. The constant is kept around in case a future #: non-text accent shape (sparkline, badge, etc.) needs it, but every #: existing TEXT callsite has been migrated to _BRAND_HIGHLIGHT. #: See .claude/agents/rules/deck-design.md "No red text" contract. _BRAND_ACCENT = RGBColor(0xC0, 0x39, 0x2B) -#: Emphasis text colour — teal-700 (#0E7490). Replaces the banned red -#: _BRAND_ACCENT for KPI values, RQ question callouts, figure +#: Emphasis text colour — academic blue-600 (#2563EB). Replaces the banned +#: red _BRAND_ACCENT for KPI values, RQ question callouts, figure #: captions, and other "this stands out" use cases. Pairs well with -#: bold; pairs cleanly with _BRAND_DARK navy as the secondary; reads -#: as professional/modern (think academic posters, not error banners). -#: Dark-mode pass swaps to teal-400 (#2DD4BF) via _LIGHT_TO_DARK_TEXT. -_BRAND_HIGHLIGHT = RGBColor(0x0E, 0x74, 0x90) +#: bold; sits in the same blue family as _BRAND_DARK navy for a cohesive +#: white + blue academic-paper look (not a competing hue). Dark-mode pass +#: swaps to blue-400 (#60A5FA) via _LIGHT_TO_DARK_TEXT. +_BRAND_HIGHLIGHT = RGBColor(0x25, 0x63, 0xEB) _BRAND_GREY = RGBColor(0x55, 0x55, 0x55) _BRAND_LIGHT = RGBColor(0xAA, 0xAA, 0xAA) @@ -150,10 +151,28 @@ } _DEFAULT_FONT_FAMILY: tuple[str, str | None] = ("Inter", None) -# Accent geometry (set on every content slide by the typography / -# accent pass so a stock blank layout still reads as a designed deck). -_ACCENT_TOP_HEIGHT = Inches(0.08) +# Accent geometry — the deck's signature chrome. +# +# Why a band, not a thin bar: a full-width filled navy header band with a +# white title (and a thin teal accent rule along its bottom edge) is what +# makes every content slide read as "designed for a defence" rather than a +# blank layout with a hairline at the top. _BODY_TOP (1.5") already sits +# below _HEADER_BAND_HEIGHT (1.18") so body content never collides with the +# band — no body-position constants need to move. +_HEADER_BAND_HEIGHT = Inches(1.18) +_ACCENT_RULE_HEIGHT = Inches(0.06) +# Legacy cover left-band width; the cover is now a full-bleed navy panel +# (see _add_cover_left_band) but the constant is kept for any external ref. _ACCENT_LEFT_WIDTH = Inches(0.4) +# Title text sits ON the navy band / navy cover, so it must be white — +# navy _BRAND_DARK on the navy band would be navy-on-navy (invisible). +# White-on-navy is already the established pattern for table headers +# (_TABLE_HEADER_FG), so this reuses that idea rather than adding a hue. +_HEADER_TITLE_FG = RGBColor(0xFF, 0xFF, 0xFF) +# The under-band accent rule + cover accent use the brand blue. It is a +# non-text shape, so the "blue is for emphasis TEXT only" split does not +# bind it; staying on-palette keeps the four-colour discipline. +_HEADER_ACCENT_FILL = _BRAND_HIGHLIGHT # Dark-mode palette (post-build recolour, opt-in via # ``ExportOptions.dark_mode``). @@ -171,7 +190,7 @@ (0x1F, 0x3A, 0x66): (0xE5, 0xE7, 0xEB), # _BRAND_DARK → near-white text (0x55, 0x55, 0x55): (0x9C, 0xA3, 0xAF), # _BRAND_GREY → mid grey (0xAA, 0xAA, 0xAA): (0x6B, 0x72, 0x80), # _BRAND_LIGHT → muted grey - (0x0E, 0x74, 0x90): (0x2D, 0xD4, 0xBF), # _BRAND_HIGHLIGHT → bright teal-400 + (0x25, 0x63, 0xEB): (0x60, 0xA5, 0xFA), # _BRAND_HIGHLIGHT → bright blue-400 # _BRAND_ACCENT (#C0392B) intentionally NOT mapped — red text was # banned per the deck-design "No red text" contract, and the # `test_pptx_no_red_text_runs` regression test fails if any run @@ -196,6 +215,11 @@ # is re-coloured to near-white = white-on-white = invisible. This # specific bug is what the dark-mode contrast contract guards. (0xF3, 0xF6, 0xFA): (0x1E, 0x26, 0x38), + # _HEADER_ACCENT_FILL / blue callout fill (_BRAND_HIGHLIGHT, #2563EB) → + # brighter blue-400 so the under-band accent rule and any blue-filled + # callout read on the dark slide, mirroring how blue emphasis TEXT + # swaps in _LIGHT_TO_DARK_TEXT. + (0x25, 0x63, 0xEB): (0x60, 0xA5, 0xFA), } _BRAND_RULE = RGBColor(0xCC, 0xCC, 0xCC) _RQ_BOX_FILL = RGBColor(0xF3, 0xF6, 0xFA) @@ -387,12 +411,45 @@ def _build( # --------------------------------------------------------------------------- +def _is_own_thesis(paper: Paper) -> bool: + """True when ``paper`` is the candidate's OWN thesis, not a fetched paper. + + The boundary this guards: a borrowed paper needs a source/overview slide + and a BibTeX cite-key (so the deck can attribute and cite it), whereas the + candidate's own dissertation has neither a fetched source to credit nor a + publisher DOI nor a cite-key worth advertising in their own defence deck. + + The ``thesis-deck-author`` flow builds the cover ``Paper`` with + ``source="local"`` and *no* external provenance — empty ``url``, no + ``doi`` / ``arxiv_id`` / ``pdf_url`` (see ``scripts/regen_thesis_demo.py``). + The discriminator is deliberately that *combination*, not ``source`` + alone: the CLI ``--pdf`` flow also stamps ``source="local"`` when it + analyses *someone else's* downloaded paper, but it always records a + ``file://`` ``url`` (and often a real ``doi`` / ``arxiv_id`` from + PDF-metadata extraction), so it stays a borrowed paper and keeps its + source slide. Only an identifier-less local paper is the own-thesis cover. + + >>> own = Paper(source="local", source_id="t", title="My Thesis", + ... authors=("Me",), year=2026, venue="NTU", abstract="", url="") + >>> _is_own_thesis(own) + True + """ + if paper.source != "local": + return False + return not (paper.url or paper.doi or paper.arxiv_id or paper.pdf_url) + + def _add_paper_slides( prs: Presentation, layout, index: int, total: int, paper: Paper, ctx: _BuildContext ) -> None: if total > 1: _add_section_divider(prs, layout, index, total, paper, ctx) - _add_overview_slide(prs, layout, index, total, paper, ctx) + # The source/overview slide (and its BibTeX cite-key) exists to attribute a + # *borrowed* paper; the candidate's own thesis has no fetched source to + # credit, and the cover already carries its title / authors / venue. Skip + # it so a defence deck doesn't open on a "來源 / BibTeX key" page. + if not _is_own_thesis(paper): + _add_overview_slide(prs, layout, index, total, paper, ctx) if not ctx.include_abstract: return summary = paper.summary @@ -507,11 +564,15 @@ def _add_cover_slide( ) -> None: slide = prs.slides.add_slide(layout) title_text = _cover_title(collection, ctx) + # Cover is a full-bleed navy panel (placed by _add_cover_left_band), so + # the title is WHITE and the subtitle / meta are near-white — navy text + # would vanish into the navy. These light colours are correct in BOTH + # light (default) and dark modes because the cover stays navy either way. _add_textbox( slide, name="title", text=title_text, left=_MARGIN_X, top=_COVER_TITLE_TOP, width=_BODY_WIDTH, height=_COVER_TITLE_HEIGHT, - font_pt=_COVER_TITLE_PT, bold=True, colour=_BRAND_DARK, + font_pt=_COVER_TITLE_PT, bold=True, colour=_HEADER_TITLE_FG, align=PP_ALIGN.CENTER, shrink_to_fit=True, ) @@ -526,7 +587,7 @@ def _add_cover_slide( slide, name="subtitle", text=subtitle, left=_MARGIN_X, top=_COVER_SUBTITLE_TOP, width=_BODY_WIDTH, height=_COVER_SUBTITLE_HEIGHT, - font_pt=_COVER_SUBTITLE_PT, colour=_BRAND_GREY, + font_pt=_COVER_SUBTITLE_PT, colour=_DARK_BODY_TEXT, align=PP_ALIGN.CENTER, ) meta_text = _cover_subtitle(collection, ctx) @@ -534,7 +595,7 @@ def _add_cover_slide( slide, name="meta", text=meta_text, left=_MARGIN_X, top=_COVER_META_TOP, width=_BODY_WIDTH, height=_COVER_META_HEIGHT, - font_pt=_COVER_META_PT, colour=_BRAND_GREY, + font_pt=_COVER_META_PT, colour=_DARK_BODY_TEXT, align=PP_ALIGN.CENTER, ) @@ -603,8 +664,16 @@ def _add_overview_slide( def _add_references_slide( prs: Presentation, layout, collection: PaperCollection, ctx: _BuildContext ) -> None: + # A References section lists the works the deck *cites* (slide-deck-rules + # §11), never the presenting paper itself — so the candidate's own thesis + # is excluded. The rich PaperSummary carries no citation list, so a deck of + # only the candidate's own thesis gets no references slide at all rather + # than one that points back at the thesis (and shows an empty identifier). + cited = [p for p in collection.papers if not _is_own_thesis(p)] + if not cited: + return slide = _new_section_slide(prs, layout, t(ctx.language, "references")) - bullets = [_reference_line(i + 1, p, ctx) for i, p in enumerate(collection.papers)] + bullets = [_reference_line(i + 1, p, ctx) for i, p in enumerate(cited)] _add_bullet_box( slide, name="body", bullets=bullets, left=_MARGIN_X, top=_BODY_TOP, @@ -642,19 +711,47 @@ def _add_qa_slide(prs: Presentation, layout, paper: Paper, ctx: _BuildContext) - _PAIN_POINTS_PER_SLIDE = 4 # 2 columns × 2 rows +_PAIN_QUADRANT_HEIGHT_IN = 4.4 # vertical span the quadrant grid occupies + + +def _pain_points_per_slide(sections) -> int: + """Cells per pain-point slide. The grid is normally 2×2, but when cells are + text-heavy a 2×2 forces ``row_h = 4.4/2 = 2.2"`` (body ≈ 1.55") which a + 3-long-bullet cell overflows. Since the project never truncates text, the + fix is to **paginate to one row of two** for tall content so each cell gets + the full 4.4" — fewer cells per slide, never a clipped bullet. Short cells + keep the 2×2 look. Estimates use the same wrap model as the overflow + inspector so the result is one it agrees fits. + """ + col_w_in = (_BODY_WIDTH.inches - 0.2) / 2 + max_cell = 0.0 + for _heading, bullets in sections: + lines = sum( + _estimate_wrapped_lines( + _strip_math_markup("• " + b), width_in=col_w_in, font_pt=_BODY_PT + ) + for b in _cap_bullets(bullets) + ) + cell = 0.55 + lines * _BODY_LINE_HEIGHT_IN + 0.1 # subhead + body + gap + max_cell = max(max_cell, cell) + rows_fit = int(_PAIN_QUADRANT_HEIGHT_IN // max_cell) if max_cell else 2 + return max(2, min(_PAIN_POINTS_PER_SLIDE, rows_fit * 2)) + + def _add_pain_points_slide( prs: Presentation, layout, paper: Paper, summary: PaperSummary, ctx: _BuildContext, ) -> None: - """Pain-points quadrant slide, paginated when more than 4 sections - are supplied. The research-question callout sits on the first slide + """Pain-points quadrant slide, paginated by content height so a text-heavy + cell never overflows. The research-question callout sits on the first slide only — subsequent pages are full quadrants of pain points.""" title = t(ctx.language, "section_pain_points") sections = list(summary.pain_points) if not sections: return + per_slide = _pain_points_per_slide(sections) chunks = [ - sections[i : i + _PAIN_POINTS_PER_SLIDE] - for i in range(0, len(sections), _PAIN_POINTS_PER_SLIDE) + sections[i : i + per_slide] + for i in range(0, len(sections), per_slide) ] for chunk_index, chunk in enumerate(chunks): chunk_title = title @@ -718,29 +815,55 @@ def _add_contributions_detailed_slide( _add_kpi_slide(prs, layout, paper, ctx, summary.headline_metrics) +_STACK_TOP_IN = 1.7 # first stacked section's Y on the slide + + +def _paginate_stacks(stack_list): + """Pack (heading, body) sections into per-slide chunks by **height budget** + so no chunk's cumulative height crosses the footer guard. ``(heading, body)`` + sections vary in body length, so a fixed count (``_MAX_STACKS_PER_SLIDE``) + can't guarantee fit — a slide of four 3-line contributions overflows where + four 1-line ones fit. ``_MAX_STACKS_PER_SLIDE`` is kept as an upper bound so + a slide never crowds in more than the documented maximum even when tiny. + """ + budget = _FOOTER_GUARD.inches + chunks: list[list] = [] + current: list = [] + y = _STACK_TOP_IN + for heading, body in stack_list: + height = _stacked_section_height_in(body) + over_budget = current and y + height > budget + over_count = len(current) >= _MAX_STACKS_PER_SLIDE + if over_budget or over_count: + chunks.append(current) + current = [] + y = _STACK_TOP_IN + current.append((heading, body)) + y += height + if current: + chunks.append(current) + return chunks + + def _add_stacks_slide( prs: Presentation, layout, paper: Paper, ctx: _BuildContext, *, title: str, stacks, ) -> None: - """Render stacked sections. Paginates when stacks exceed - ``_MAX_STACKS_PER_SLIDE`` so author bullets are never silently - dropped — instead the title gets ``(1/N)`` and overflow spills onto - the next slide. + """Render stacked sections, paginating by height budget so author bullets + are never silently dropped — instead the title gets ``(1/N)`` and overflow + spills onto the next slide. See ``_paginate_stacks``. """ stack_list = list(stacks) if not stack_list: return - chunks = [ - stack_list[i : i + _MAX_STACKS_PER_SLIDE] - for i in range(0, len(stack_list), _MAX_STACKS_PER_SLIDE) - ] + chunks = _paginate_stacks(stack_list) for chunk_index, chunk in enumerate(chunks): chunk_title = title if len(chunks) > 1: chunk_title = f"{title} ({chunk_index + 1}/{len(chunks)})" slide = _new_section_slide(prs, layout, chunk_title) _add_paper_subtitle(slide, paper, ctx) - cursor = Inches(1.7) + cursor = Inches(_STACK_TOP_IN) for heading, body in chunk: cursor = _add_stacked_section(slide, heading, body, cursor) @@ -1078,7 +1201,7 @@ def _add_contribution_summary_slide( _add_rq_callout( slide, summary.core_observation, left=_MARGIN_X, top=Inches(2.5), - width=_BODY_WIDTH, height=Inches(2.0), + width=_BODY_WIDTH, height=Inches(2.0), highlight=True, ) @@ -1285,17 +1408,20 @@ def _new_section_slide( prs: Presentation, layout, title: str, *, font_pt: int = _SECTION_TITLE_PT, ): slide = prs.slides.add_slide(layout) + # The title sits inside the navy header band (placed later by the accent + # pass), so it is WHITE, not navy — navy-on-navy would be invisible. # ``shrink_to_fit`` lets a long title (e.g. a verbatim paper title) wrap - # within the fixed-height title box and PowerPoint scales the font down - # so the text never crosses the horizontal rule below. + # within the band's height and PowerPoint scales the font down so the + # text never spills past the band's bottom accent rule. The separate + # horizontal rule is gone — the band + its teal under-rule replace it. _add_textbox( slide, name="title", text=_clean(title), - left=_MARGIN_X, top=_TITLE_TOP, - width=_BODY_WIDTH, height=_TITLE_HEIGHT, - font_pt=font_pt, bold=True, colour=_BRAND_DARK, + left=_MARGIN_X, top=Inches(0.18), + width=_BODY_WIDTH, height=Inches(0.86), + font_pt=font_pt, bold=True, colour=_HEADER_TITLE_FG, + align=PP_ALIGN.LEFT, anchor=MSO_ANCHOR.MIDDLE, shrink_to_fit=True, ) - _add_horizontal_rule(slide, top=_RULE_TOP) return slide @@ -1447,7 +1573,9 @@ def _add_textbox( slide, *, name: str, text: str, left, top, width, height, font_pt: int, bold: bool = False, colour: RGBColor | None = None, align: PP_ALIGN | None = None, + anchor: MSO_ANCHOR | None = None, shrink_to_fit: bool = False, + math: bool = False, ) -> None: """Render a textbox. @@ -1457,13 +1585,35 @@ def _add_textbox( gets rendered inside its allotted height — PowerPoint shrinks the font at open time rather than letting the text bleed past the horizontal rule. + + ``math`` routes the text through ``_render_math_paragraph`` so ``$...$`` + spans become real subscripts / superscripts + italic variables (see + slide-deck-rules §12). Turn it on for content surfaces that carry math + notation — contribution / method body paragraphs, callouts — so the + feature actually fires there, not only in bullets / KPIs / table cells. + A math run always needs an explicit colour for the dark-mode contract, + so ``colour`` falls back to ``_BRAND_DARK`` when ``None``. The math path + fills a single paragraph; its callers pass whitespace-collapsed, + newline-free text, so multi-paragraph splitting is intentionally skipped. """ box = slide.shapes.add_textbox(left, top, width, height) box.name = name text_frame = box.text_frame text_frame.word_wrap = True + # Vertical anchor lets the header-band title sit visually centred within + # the band's height instead of top-aligned against the band's top edge. + if anchor is not None: + text_frame.vertical_anchor = anchor if shrink_to_fit: text_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE + if math: + paragraph = text_frame.paragraphs[0] + if align is not None: + paragraph.alignment = align + _render_math_paragraph( + paragraph, text, size_pt=font_pt, colour=colour or _BRAND_DARK, bold=bold + ) + return text_frame.text = text for paragraph in text_frame.paragraphs: if align is not None: @@ -1505,17 +1655,81 @@ def _add_footer(slide, text: str) -> None: ) +# Line-height of a body paragraph at single spacing (PowerPoint ≈ 1.2 × font). +_BODY_LINE_HEIGHT_IN = _BODY_PT * 1.2 / 72 +_STACK_HEAD_HEIGHT_IN = 0.42 +_STACK_GAP_IN = 0.05 +_STACK_BODY_MIN_IN = 0.85 +_STACK_BODY_PAD_IN = 0.06 # keep the box just taller than the rendered text +_TEXTBOX_SIDE_MARGIN_IN = 0.1 # python-pptx default left/right inner margin + + +def _strip_math_markup(text: str) -> str: + """Reduce ``$...$`` math markup to its visible glyphs for length estimation: + ``$I(z_a)$`` → ``I(za)``. The delimiters / sub-/superscript markers are not + rendered as characters, so counting them would over-size the box.""" + cleaned = text.replace("$", "") + cleaned = re.sub(r"[_^]\{([^}]*)\}", r"\1", cleaned) # _{xy} / ^{xy} -> xy + return re.sub(r"[_^](.)", r"\1", cleaned) # _x / ^x -> x + + +def _estimate_wrapped_lines(text: str, *, width_in: float, font_pt: int) -> int: + """Estimate wrapped line count for ``text`` in a box ``width_in`` wide. + + Mirrors ``scripts/check_overflow.py``'s estimator (full-width CJK ≈ 1.0 em, + half-width Latin ≈ 0.55 em, wrap at the box's inner width) so a body box + this sizes is one the overflow inspector agrees fits — the exporter and the + inspector stay on one geometry contract instead of drifting apart. + """ + inner_pt = max(0.1, width_in - 2 * _TEXTBOX_SIDE_MARGIN_IN) * 72 + line_w = 0.0 + lines = 1 + for ch in text: + if ch == "\n": + lines += 1 + line_w = 0.0 + continue + em = 1.0 if unicodedata.east_asian_width(ch) in ("F", "W") else 0.55 + char_w = em * font_pt + if line_w + char_w > inner_pt and line_w > 0: + lines += 1 + line_w = char_w + else: + line_w += char_w + return lines + + +def _stacked_body_height_in(body: str) -> float: + """Body-box height (inches) needed for ``body``, adaptive to its wrapped + line count so a 3-line contribution gets a 3-line box instead of spilling + into the next subhead. Floors at ``_STACK_BODY_MIN_IN`` so a one-line body + still has breathing room.""" + text = _strip_math_markup(" ".join((body or "").split())) + # Length arithmetic returns a plain int (EMU), so re-wrap in Emu to read .inches. + width_in = Emu(_BODY_WIDTH - Inches(0.2)).inches + lines = _estimate_wrapped_lines(text, width_in=width_in, font_pt=_BODY_PT) + return max(_STACK_BODY_MIN_IN, lines * _BODY_LINE_HEIGHT_IN + _STACK_BODY_PAD_IN) + + +def _stacked_section_height_in(body: str) -> float: + """Total vertical span of one stacked section (head + body + gap), used by + the height-aware paginator in ``_add_stacks_slide``.""" + return _STACK_HEAD_HEIGHT_IN + _stacked_body_height_in(body) + _STACK_GAP_IN + + def _add_stacked_section(slide, heading: str, body: str, cursor) -> int: """Render an inline (heading bold + body grey) block; return next cursor Y. - Body height is sized for a 2-line wrap at the current body font so a - full-sentence contribution doesn't visually spill into the next - subhead. Body text is not truncated — see ``_sentences_to_bullets`` - for the rationale. + Body height is **adaptive** — sized to the body's estimated wrapped line + count (see ``_stacked_body_height_in``) so a full-sentence contribution + that needs three lines gets a three-line box rather than overflowing a + fixed two-line one. Body text is not truncated — see ``_sentences_to_bullets`` + for the rationale. The caller (``_add_stacks_slide``) paginates by the same + height estimate so the cumulative stack never crosses the footer guard. """ - head_height = Inches(0.42) - body_height = Inches(0.85) - gap = Inches(0.05) + head_height = Inches(_STACK_HEAD_HEIGHT_IN) + body_height = Inches(_stacked_body_height_in(body)) + gap = Inches(_STACK_GAP_IN) _add_textbox( slide, name="subhead", text=_clean(heading), left=_MARGIN_X, top=cursor, @@ -1526,18 +1740,34 @@ def _add_stacked_section(slide, heading: str, body: str, cursor) -> int: slide, name="body", text=" ".join((body or "").split()), left=Inches(0.7), top=cursor + head_height, width=_BODY_WIDTH - Inches(0.2), height=body_height, - font_pt=_BODY_PT, colour=_BRAND_GREY, + font_pt=_BODY_PT, colour=_BRAND_GREY, math=True, ) return cursor + head_height + body_height + gap +def _bullets_box_height_in(bullets, width_in: float) -> float: + """Estimated height (inches) of a bullet box, adaptive to each bullet's + wrapped line count rather than a flat per-bullet allowance. A long + framework-feature bullet that wraps to two lines gets two lines of height + instead of overflowing a fixed 0.5"/bullet box. Uses the same wrap model as + the overflow inspector. Floors at 0.5" so a single short bullet still has + breathing room.""" + total = 0.0 + for bullet in bullets: + lines = _estimate_wrapped_lines( + _strip_math_markup(f"• {bullet}"), width_in=width_in, font_pt=_BODY_PT + ) + total += lines * _BODY_LINE_HEIGHT_IN + 0.06 + return max(0.5, total) + + def _add_subsection(slide, heading: str, bullets, cursor, *, width) -> int: head_height = Inches(0.5) # Subsection bullets — show up to 6 so authors don't lose detail. # Method/eval slides paginate at the section-list level, so 6 per # subsection won't push past the footer in the worst case. capped = _cap_bullets(bullets, max_count=6) - bullet_height = Inches(0.5 * max(1, len(capped))) + bullet_height = Inches(_bullets_box_height_in(capped, Emu(width - Inches(0.2)).inches)) _add_textbox( slide, name="subhead", text=_clean(heading), left=_MARGIN_X, top=cursor, @@ -1583,27 +1813,47 @@ def _render_multi_column( ) -def _add_rq_callout(slide, text: str, *, left, top, width, height) -> None: - """A boxed highlight: filled rectangle + bold text on top.""" +def _add_rq_callout( + slide, text: str, *, left, top, width, height, highlight: bool = False +) -> None: + """A boxed highlight: filled rectangle + bold text on top. + + ``highlight=False`` (default) is the light off-white RQ-question box + (navy text + navy border) used for research-question callouts. + ``highlight=True`` is the **filled takeaway box** — solid navy fill + + white text, no border — reserved for the single core-observation / + key-finding slide so the deck's one "punch line" reads as a filled + panel that mirrors the navy header band, not as just another light box. + Both fill RGBs (``_BRAND_DARK`` navy, ``_RQ_BOX_FILL`` off-white) have + ``_LIGHT_TO_DARK_FILL`` entries, so dark mode recolours them and the + text stays readable in both modes. + """ from pptx.enum.shapes import MSO_SHAPE rect = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, left, top, width, height) rect.name = "rq_box" rect.fill.solid() - rect.fill.fore_color.rgb = _RQ_BOX_FILL - rect.line.color.rgb = _RQ_BOX_BORDER - rect.line.width = Pt(1.0) + if highlight: + rect.fill.fore_color.rgb = _BRAND_DARK + rect.line.fill.background() + text_colour = _HEADER_TITLE_FG + else: + rect.fill.fore_color.rgb = _RQ_BOX_FILL + rect.line.color.rgb = _RQ_BOX_BORDER + rect.line.width = Pt(1.0) + text_colour = _BRAND_DARK rect.text_frame.word_wrap = True rect.text_frame.margin_left = Inches(0.2) rect.text_frame.margin_right = Inches(0.2) rect.text_frame.margin_top = Inches(0.1) rect.text_frame.margin_bottom = Inches(0.1) - rect.text_frame.text = text - for paragraph in rect.text_frame.paragraphs: - for run in paragraph.runs: - run.font.size = Pt(_BODY_PT) - run.font.bold = True - run.font.color.rgb = _BRAND_DARK + # Math-aware: an RQ / core-observation callout often states the paper's + # objective formula (e.g. "$I(z_a;z_b|E_p)$"), so render $...$ spans as real + # subscripts rather than flat ASCII (slide-deck-rules §12). + _render_math_paragraph( + rect.text_frame.paragraphs[0], text, + size_pt=_BODY_PT, colour=text_colour, bold=True, + ) def _add_kpi_lines( @@ -1996,12 +2246,20 @@ def _decorate_with_accents(prs: Presentation) -> None: def _add_cover_left_band(slide) -> None: + """Cover chrome — a full-bleed navy panel behind the centred white title. + + Kept under the name ``accent_left`` (the cover-band semantic name) even + though it now spans the whole slide, so pptx_edit / audits that look up + the cover accent by name still find it. Sent to back so the cover + title / subtitle / meta (set to white / near-white in ``_add_cover_slide``) + render on top of the navy. + """ if _has_named_shape(slide, "accent_left"): return shape = slide.shapes.add_shape( MSO_SHAPE.RECTANGLE, Emu(0), Emu(0), - _ACCENT_LEFT_WIDTH, _SLIDE_HEIGHT, + _SLIDE_WIDTH, _SLIDE_HEIGHT, ) shape.name = "accent_left" shape.line.fill.background() @@ -2011,18 +2269,39 @@ def _add_cover_left_band(slide) -> None: def _add_top_accent_bar(slide) -> None: + """Content-slide chrome — the full-width navy header band plus a thin + teal accent rule along its bottom edge. + + The band keeps the name ``accent_top`` (the content-slide accent + semantic name) so edit tools / the deck-design audit still locate the + top accent by name. Both shapes are sent to back so the white title + placed by ``_new_section_slide`` renders on top of the band. The band + fill (_BRAND_DARK) and blue rule fill (_BRAND_HIGHLIGHT) both have + ``_LIGHT_TO_DARK_FILL`` entries, so dark mode lightens them rather than + leaving them as-is. + """ if _has_named_shape(slide, "accent_top"): return - shape = slide.shapes.add_shape( + band = slide.shapes.add_shape( MSO_SHAPE.RECTANGLE, Emu(0), Emu(0), - _SLIDE_WIDTH, _ACCENT_TOP_HEIGHT, + _SLIDE_WIDTH, _HEADER_BAND_HEIGHT, ) - shape.name = "accent_top" - shape.line.fill.background() - shape.fill.solid() - shape.fill.fore_color.rgb = _BRAND_DARK - _send_shape_to_back(shape, slide) + band.name = "accent_top" + band.line.fill.background() + band.fill.solid() + band.fill.fore_color.rgb = _BRAND_DARK + _send_shape_to_back(band, slide) + rule = slide.shapes.add_shape( + MSO_SHAPE.RECTANGLE, + Emu(0), _HEADER_BAND_HEIGHT, + _SLIDE_WIDTH, _ACCENT_RULE_HEIGHT, + ) + rule.name = "accent_rule" + rule.line.fill.background() + rule.fill.solid() + rule.fill.fore_color.rgb = _HEADER_ACCENT_FILL + _send_shape_to_back(rule, slide) def _has_named_shape(slide, name: str) -> bool: diff --git a/thesisagents/gui/i18n.py b/thesisagents/gui/i18n.py index 0a3f891..313d11e 100644 --- a/thesisagents/gui/i18n.py +++ b/thesisagents/gui/i18n.py @@ -1522,21 +1522,21 @@ "hi": "Abstract slides शामिल करें", "id": "Sertakan slide abstrak", }, - "deck.light_mode_label": { - "en": "Light mode (white background, dark mode is default)", - "zh-tw": "亮色模式(白色背景,預設為暗色)", - "zh-cn": "亮色模式(白色背景,默认为暗色)", - "ja": "ライトモード(白背景,既定はダーク)", - "es": "Modo claro (fondo blanco; oscuro por defecto)", - "fr": "Mode clair (fond blanc ; sombre par défaut)", - "de": "Heller Modus (weißer Hintergrund; dunkel ist Standard)", - "ko": "라이트 모드 (흰 배경, 기본은 다크)", - "pt": "Modo claro (fundo branco; escuro por padrão)", - "ru": "Светлый режим (белый фон; по умолчанию тёмный)", - "it": "Modalità chiara (sfondo bianco; scuro per default)", - "vi": "Chế độ sáng (nền trắng; mặc định là tối)", - "hi": "Light mode (सफ़ेद पृष्ठभूमि; डिफ़ॉल्ट dark है)", - "id": "Mode terang (latar putih; default gelap)", + "deck.dark_mode_label": { + "en": "Dark mode (dark background; light navy-band deck is default)", + "zh-tw": "暗色模式(深色背景,預設為淺色藍帶版面)", + "zh-cn": "暗色模式(深色背景,默认为浅色蓝带版面)", + "ja": "ダークモード(暗い背景,既定はライトの紺帯デッキ)", + "es": "Modo oscuro (fondo oscuro; claro es el predeterminado)", + "fr": "Mode sombre (fond sombre ; le clair est par défaut)", + "de": "Dunkler Modus (dunkler Hintergrund; hell ist Standard)", + "ko": "다크 모드 (어두운 배경, 기본은 라이트)", + "pt": "Modo escuro (fundo escuro; claro é o padrão)", + "ru": "Тёмный режим (тёмный фон; по умолчанию светлый)", + "it": "Modalità scura (sfondo scuro; chiaro è il default)", + "vi": "Chế độ tối (nền tối; mặc định là sáng)", + "hi": "Dark mode (गहरी पृष्ठभूमि; डिफ़ॉल्ट light है)", + "id": "Mode gelap (latar gelap; default terang)", }, "deck.export_button": { "en": "Export", diff --git a/thesisagents/gui/pages/deck.py b/thesisagents/gui/pages/deck.py index b9e8036..4f819d7 100644 --- a/thesisagents/gui/pages/deck.py +++ b/thesisagents/gui/pages/deck.py @@ -140,12 +140,13 @@ def _build_ui(self) -> None: ) self._include_abstract_check.setChecked(True) options_form.addRow(self._include_abstract_check) - # Default is DARK; this checkbox is the opt-OUT toggle for light. - self._light_mode_check = QCheckBox( - t("deck.light_mode_label", self._ui_language), self, + # Default is the light navy-band deck; this checkbox is the + # opt-IN toggle for dark mode (OLED / low-light venues). + self._dark_mode_check = QCheckBox( + t("deck.dark_mode_label", self._ui_language), self, ) - self._light_mode_check.setChecked(False) - options_form.addRow(self._light_mode_check) + self._dark_mode_check.setChecked(False) + options_form.addRow(self._dark_mode_check) outer.addWidget(options_box) # Action row @@ -266,7 +267,7 @@ def _on_export_clicked(self) -> None: include_abstract=self._include_abstract_check.isChecked(), language=language, max_slides_per_paper=self._max_slides_spin.value(), - dark_mode=not self._light_mode_check.isChecked(), + dark_mode=self._dark_mode_check.isChecked(), ) collection = self._collection self._export_button.setEnabled(False) diff --git a/thesisagents/mcp/server.py b/thesisagents/mcp/server.py index d86465f..eadd886 100644 --- a/thesisagents/mcp/server.py +++ b/thesisagents/mcp/server.py @@ -23,8 +23,8 @@ papers[*].summary may include rich fields (pain_points, research_question, headline_metrics, technique_table, literature_table, method_sections, research_questions, rq_results, …) — when present, the PPT switches to - thesis-style layout. ``dark_mode`` defaults to True (project default); - pass False for the light/printable variant. + thesis-style layout. ``dark_mode`` defaults to False (project default + is the light navy-band deck); pass True for the dark OLED/low-light variant. - pptx_inspect(path) -> {slides: [...]} - pptx_update_slide(path, slide_index, title?, body?, meta?, shape_updates?) -> {path} - pptx_delete_slide(path, slide_index) -> {path} @@ -333,7 +333,7 @@ def export( include_abstract: bool = True, language: str = "en", max_slides_per_paper: int | None = 25, - dark_mode: bool = True, + dark_mode: bool = False, ) -> dict[str, Any]: """Export a list of papers (from search / fetch_paper) to disk. @@ -349,11 +349,13 @@ def export( Q&A/figure slides drop first). Default 25; pass ``0`` (or ``None``) for unlimited. - ``dark_mode`` defaults to True — the post-build pass swaps the - brand palette to dark slide background (#12151B) + near-white - text (#E5E7EB) so OLED projectors and low-light venues don't - glare. Pass False for the light/printable variant (white slide - background + navy text). + ``dark_mode`` defaults to False — the project default is the + light navy-band deck (white slides, full-width navy header band + with a white title, navy cover panel). Pass True for the dark + variant: the post-build pass swaps to a dark slide background + (#12151B) + near-white text (#E5E7EB) and lightens the navy + band / cover / table fills so the same chrome reads on OLED + projectors and in low-light venues. """ if not papers: raise ThesisAgentsError("export requires at least one paper") From f0c84715c588863946e06340b9578d08408241e3 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Wed, 10 Jun 2026 02:12:17 +0800 Subject: [PATCH 10/16] Add plain-language comprehensibility rule across paper/deck subagents First-use glossing made each term decodable but not the whole argument; a reader could parse every word and still miss the point. Add an argument-level comprehensibility rule so a non-expert (adjacent-discipline committee member, skimming reviewer, undergraduate) can grasp what each section/slide claims, roughly how, and why it matters. - paper_rule: new authoritative bilingual HARD section (term-level -> argument-level), with intuition-before-formalism, plain per-section "so what", real-world number anchors, one-analogy, and a cross-department self-test. - slide-deck-rules: new section 14 (slide implementation) + forward-ref from section 9. - deck-design: visual-side subsection + anti-pattern bullet. - paper-summary-author / thesis-deck-author: authoring-time bullet. - post-author-audit: Audit 5 (judgement scan) + reporting wiring. - CLAUDE.md: wired into the context-clear + detail-explained governance. Additive to depth, never a dumbing-down; enforced by Audit 5. --- .claude/agents/rules/deck-design.md | 15 ++++ .claude/agents/rules/paper_rule.md | 32 ++++++++ .claude/agents/rules/slide-deck-rules.md | 18 ++++- .claude/agents/tasks/paper-summary-author.md | 1 + .claude/agents/tasks/post-author-audit.md | 83 +++++++++++++++++++- .claude/agents/tasks/thesis-deck-author.md | 1 + CLAUDE.md | 20 +++-- 7 files changed, 159 insertions(+), 11 deletions(-) diff --git a/.claude/agents/rules/deck-design.md b/.claude/agents/rules/deck-design.md index 47f07c9..9b18600 100644 --- a/.claude/agents/rules/deck-design.md +++ b/.claude/agents/rules/deck-design.md @@ -348,6 +348,15 @@ Each slide needs one element the eye lands on first — the takeaway from `slide **Anti-pattern:** title, three KPIs, a table and a caption all the same size and colour — no focal point, the eye wanders. **Pattern:** one KPI value ~2× the size of its label in blue, the table muted beneath it, caption small and grey. +### Designing for the non-expert eye (comprehension is a visual job too) + +Every contract above makes a deck look *professional*, the typography pass kills the Calibri tell, the palette stays disciplined, the contrast contracts keep text readable. But "looks professional" and "an outside examiner actually understood it" are different wins — a deck can pass every visual contract in this doc and still lose half the committee, because the adjacent-discipline 口試委員 / the skimming reviewer never grasped *what the slide was claiming*. The authoritative definition of that bar is `paper_rule`'s "Plain-language comprehensibility — a non-expert must grasp the point (HARD)", and the wording-side slide rules (a plain "so what" per slide, intuition before formula, a real-world anchor for every headline number, one sparing analogy) live in `slide-deck-rules` §14. This subsection is the **visual-side complement**, the parts of comprehension that are a *design* decision rather than a *wording* one. It is additive to depth, never a dumbing-down — the rigour stays, the entry ramp gets built. + +- **Show the intuition, don't only state it.** The single most effective way to land a hard idea for a non-expert is a SIMPLE diagram / schematic / annotated example placed BEFORE the dense table or formula — a labelled "before vs after", a one-arrow flow, a single worked example with the moving part highlighted does what a paragraph cannot. The exporter already supports this via `figures=` (`_add_figure_image`) and `system_flow`, so an intuition diagram is a legitimate, high-value figure, not decoration that pads the slide count. Render it on-brand exactly as the "Figures & charts" rules above require (brand palette, no chartjunk, transparent background in dark mode, ≥ 14pt labels). **Anti-pattern:** the first slide on a method opens with the full objective `$I(z_a;z_b|E_p)$` and a 6-row hyperparameter table, the non-expert is lost before the result slide. **Pattern:** a one-arrow "raw input → our transform → cleaner signal" schematic first, THEN the formula on the next slide for the experts who want it. +- **Visual hierarchy should foreground the plain takeaway.** This ties directly to "Visual hierarchy & focal point" above, the one focal element — the biggest / boldest / most-saturated thing — must be the **plain-language point** (the anchored KPI, the winning row, the one-line assertion), not the most technical-looking object on the slide. A dense equation rendered large and centred makes a slide LOOK rigorous while burying the takeaway, which is the exact opposite of comprehensibility, the eye lands on the symbol soup and bounces off. **Anti-pattern:** a five-term equation set 40pt dead-centre while the result it produces sits 14pt in a corner. **Pattern:** the result ("2.3× faster, same accuracy") is the focal KPI in bold blue, the equation is supporting evidence beneath it at body size. +- **Annotate figures so they're self-explaining.** A chart a non-expert can read needs axis labels with units, a one-line "what to notice" caption, and the winning series called out DIRECTLY on the plot (a label or arrow at the end of the line), not left for the audience to reverse-engineer from a legend in the corner. This extends the Figures rule "Label every axis with its quantity AND unit" by adding the *"tell the reader what to conclude"* layer on top of the *"tell the reader what the axes are"* layer. **Anti-pattern:** four unlabelled coloured lines and a legend that says `model_a / model_b / baseline_v2`, the examiner has no idea which line is good or why. **Pattern:** the winning line is the only heavy solid one, labelled "Ours" at its endpoint, with a caption「越低越好,本方法在所有負載下延遲最低」(lower is better, our method has the lowest latency at every load). +- **Don't let visual polish substitute for comprehension.** A beautifully styled deck whose every slide is still opaque to an outside examiner has failed `slide-deck-rules` §14, full stop. Brand discipline (this doc) and accessibility-to-a-non-expert (§14) are **independent axes, both required** — passing the typography / palette / contrast contracts buys you "professional", it does NOT buy you "understood". When you audit a deck, ask both questions separately, "does it look on-brand?" AND "would someone outside this sub-field know what each slide is claiming?". A yes to the first and a no to the second is still a fail. + ## Anti-patterns (instant "AI-generated" tells) - Plain `prs.slide_layouts[6]` (blank) with no programmatic accent. Every @@ -370,6 +379,12 @@ Each slide needs one element the eye lands on first — the takeaway from `slide look generated. See [paper-summary-author](paper-summary-author.md). - Identical line-height across heading + body. Headings should have tighter line-height than body. +- A dense formula or 15-cell table rendered as the slide's largest, most + central element — looks rigorous, but the plain takeaway is buried and + the outside examiner bounces off the symbol soup. The focal point should + be the anchored takeaway (the KPI / winning row / one-line assertion), + with the formula or full table as supporting evidence beneath it (see + "Designing for the non-expert eye" above + `slide-deck-rules` §14). ## How to audit a deck diff --git a/.claude/agents/rules/paper_rule.md b/.claude/agents/rules/paper_rule.md index aa7b49c..a03a730 100644 --- a/.claude/agents/rules/paper_rule.md +++ b/.claude/agents/rules/paper_rule.md @@ -448,6 +448,38 @@ If after step 3 the budget still won't close, surface the trade-off to the user --- +## Plain-language comprehensibility — a non-expert must grasp the point (HARD) / 深入淺出:非專業讀者也要看得懂重點(HARD) + +The sibling Technical-terminology rule (above) makes each individual **term** understandable, this rule makes the whole **argument** understandable. First-use glossing fixes the word level, this fixes the paragraph-and-section level: a reader who is NOT in the sub-field — an adjacent-discipline 口試委員, a final-year undergraduate, a reviewer skimming many submissions, a family member sitting in on a defence — must be able to follow what problem is being solved, roughly how, and why the result matters. **Depth is kept in full**, every dense technical passage is simply accompanied by a plain-language "what this means / why it matters" layer, so comprehension never REQUIRES prior expertise. Accessibility is **additive to** depth, never a replacement for it (深入 AND 淺出, not 淺 instead of 深). / 上一條技術名詞規則處理的是「單一名詞」看不看得懂,本條處理的是「整段論述」看不看得懂。首次解釋修的是字詞層級,本條修的是段落與章節層級:非該子領域之讀者(相鄰領域口試委員、大四學生、快速翻閱之審稿人、旁聽答辯的家人)都必須能跟上「在解什麼問題、大致怎麼做、結果為何重要」。**技術深度完整保留**,只是在每段艱深技術旁,加一層白話的「這是什麼意思/為何重要」,使理解不必以先備專業為前提。淺出是**疊加在**深入之上,而非取代深入(深入 AND 淺出,不是用淺取代深)。 + +**Why this is a separate HARD rule.** Depth and accessibility are not in tension — the project's existing first-use-gloss rule already proves the stance (a term must both appear AND be explained). This section extends the very same stance from the **term level** to the **argument level**. The single most common review complaint on this project's generated papers and decks is 「技術上對,但跨領域/外行委員抓不到重點」, the content is technically correct yet the point does not land for anyone outside the sub-field. A correct argument that only the author's lab can parse fails its real audience (committee + reviewers + broader-community readers). / **為何另立一條 HARD 規則。** 深入與淺出並不衝突,本專案既有的首次解釋規則已經證明了立場(名詞必須出現「且」必須解釋)。本條把同一立場從**名詞層級**延伸到**論述層級**。本專案產生的論文與簡報,最常見的審閱抱怨就是「技術上對,但跨領域/外行委員抓不到重點」,內容技術上正確,但對子領域外的人重點傳不到。一個只有作者實驗室看得懂的正確論述,對其真正的讀者(口試委員+審稿人+廣義社群讀者)而言是失敗的。 + +### Sub-rules / 子規則 + +1. **Intuition before formalism / 先講直覺,再上公式或術語.** Before a formula, algorithm block, or dense passage, state in one plain everyday-language sentence what it accomplishes and why. / 在公式、演算法區塊或艱深段落之前,先用一句白話講清楚它做到什麼、為什麼要這樣做。 + - Worked example (zh-tw): before the formula 「minimise the mutual information $I(z_a;z_b\mid E_p)$」, first write the plain lead-in 「我們要讓對抗訊號與正常訊號在表示空間裡盡量不重疊,攻擊者就無法藏在正常請求裡」, **then** give the formula. Math notation stays wrapped in `$...$` per the paper/deck math rules. / 範例:在公式之前,先寫白話引導句,再列出公式,數學記號仍以 `$...$` 包覆。 +2. **Every section answers "so what" in plain language / 每節都要有白話的「所以呢」.** Each section carries a one-line plain takeaway a non-expert could repeat verbatim. / 每節都要有一句非專家能照樣複述的白話結論。 + - Not 「達到 92.3% ADA」 but 「我們的方法比現有最好的防禦多擋下約 6% 的攻擊,速度也更快」. / 不是「達到 92.3% ADA」,而是「我們的方法比現有最好的防禦多擋下約 6% 的攻擊,速度也更快」。 +3. **Analogy or concrete micro-example for the single hardest concept / 用類比或具體小例子解釋最難的概念.** One per method, used sparingly, never decorative. / 每個方法最多一個,點到為止,不可為了好看而加。 +4. **Quantities given meaning, not just magnitude / 數字要給意義.** A non-expert cannot calibrate a bare number, so anchor it to the real world. / 非專家無法校準一個赤裸的數字,要把它錨定到現實感受。 + - 「延遲 12.3 ms」 → 「延遲 12.3 ms(約眨眼的 1/30,使用者幾乎無感)」, 「F1 0.87」 → 「F1 0.87(每 100 次判斷約 87 次正確)」. This **complements** the number-reporting rule (`§ 數字與統計呈現 / Reporting numbers and statistics`, which governs significant figures, pp-vs-% and p-values) by adding the real-world anchor on top of the correctly-formatted number. / 此規則在「數字呈現」規則(管有效數字、pp 對 %、p 值)之上,再加一層現實世界的錨點。 +5. **Self-test (audit) / 自我檢核.** After each section, ask: could a smart reader from a DIFFERENT department say (a) what problem this solves, (b) roughly how, (c) why the result matters — WITHOUT Googling? If not, add the plain layer. / 寫完每節後自問:來自「不同系所」的聰明讀者能否在不 Google 的前提下講出(a)這解了什麼問題、(b)大致怎麼做、(c)結果為何重要?不能,就補上白話層。 + +### Anti-patterns / 反例(不可這樣寫) + +1. **Formula or algorithm with no plain lead-in** — the reader sees symbols and never learns the goal. / 公式或演算法前面沒有白話引導句,讀者只看到符號,始終不知道目標是什麼。 +2. **A bare number as the entire takeaway** (「92.3%」) with no real-world meaning attached. / 整段結論只丟一個赤裸數字(「92.3%」),沒有任何現實意義。 +3. **"Dumbing down" by deletion** — removing the rigorous statement and keeping only the hand-wave is the WRONG direction. Keep **both** the rigorous claim AND the plain layer. / 用刪除來「降低難度」,把嚴謹陳述刪掉、只留下含糊比喻,方向錯了。嚴謹宣稱與白話層**兩者都要留**。 +4. **Plain layer only in the Abstract and Introduction**, then pure jargon for §3–§5. Comprehensibility must hold section by section, not just at the top. / 白話層只出現在摘要與緒論,§3–§5 又退回純術語,深入淺出必須逐節成立,不是只在開頭做。 + +### Cross-references / 交叉參照 + +- Pairs with **Technical terminology — must include AND must explain** above: that rule is **term-level** comprehensibility, this rule is **argument-level** comprehensibility. / 與上方「技術名詞」規則成對:該條是**名詞層級**,本條是**論述層級**。 +- Builds on the number-reporting rule **§ 數字與統計呈現 / Reporting numbers and statistics (HARD RULE)**: that rule fixes how a number is formatted, sub-rule 4 here adds what the number means to a non-expert. / 承接「數字與統計呈現」規則:該條管數字怎麼寫,本條子規則 4 管這個數字對外行人代表什麼。 +- The slide-deck surface implements this same principle as **`slide-deck-rules §14`**. / 簡報面以 **`slide-deck-rules §14`** 落實同一原則。 + +--- + ## Other recommendations / 論文其他建議 ### Figures / 圖表 diff --git a/.claude/agents/rules/slide-deck-rules.md b/.claude/agents/rules/slide-deck-rules.md index b7f6300..be2d4fe 100644 --- a/.claude/agents/rules/slide-deck-rules.md +++ b/.claude/agents/rules/slide-deck-rules.md @@ -135,7 +135,7 @@ A thesis-style deck is read by an audience watching a talk, not by someone readi **Why:** a slide titled "Method" with eight bullets forces the audience to find the point themselves; a slide whose title *is* the point, evidenced below it, lands in five seconds. The exporter renders whatever the summary provides, so the assertion has to be authored into the slide's `title` / `subhead`, not left as a section label. -**Anti-pattern:** title "Experiment Results", body = 9 bullets spanning 3 different findings. **Pattern:** three slides, each titled with one finding, each body = that finding's KPI / table / chart. +**Anti-pattern:** title "Experiment Results", body = 9 bullets spanning 3 different findings. **Pattern:** three slides, each titled with one finding, each body = that finding's KPI / table / chart. (See §14 for the argument-level requirement that the assertion be sayable by a non-expert, not only correct.) ### 10. Choose the evidence form that fits the data (HARD) @@ -190,6 +190,22 @@ Surfaces that render the contract (`_render_math_paragraph` / `_append_math_runs **Anti-pattern:** 40 dense slides "because the paper is rich" — undeliverable, and every slide over-caps. **Pattern:** the cap forces the one-assertion-per-slide discipline of §9; if the content doesn't fit, it wasn't prioritised, not "the cap is too small". +### 14. Plain-language comprehensibility for a mixed audience (HARD) + +§8 makes each **term** on a slide decodable, this section makes the **slide's whole point** graspable by a non-expert. They are complementary, not the same gate, a slide can pass §8 (every acronym / symbol / library name glossed at first use) and still leave the audience knowing what each word means yet not what the slide is *for*. A thesis-defence audience is mixed: it includes an examiner from an adjacent sub-field, and the deck is read at presentation speed (~30 s/slide) with no chance to ask "what did that mean?". §14 ensures that examiner leaves each slide able to say what it shows and why it matters. **Accessibility here is ADDITIVE to depth, never a dumbing-down** — the rigorous content stays, a plain layer is laid on top of it. The authoritative cross-surface definition is `paper_rule`'s "Plain-language comprehensibility" section, §14 is its slide implementation, defer to `paper_rule` when the two surfaces (paper text vs slide) need to agree. + +- **Plain-language takeaway lives in the assertion title (ties to §9).** The §9 assertion-headline must be *sayable by a non-expert*, not merely correct. The formula then lives in the body, glossed per §8 and wrapped in `$...$` per §12. + - ❌ formula-as-title: "Disentangling $z_a$/$z_b$ minimises $I(z_a;z_b|E_p)$" — correct, but opaque to anyone outside the sub-field. + - ✅ plain assertion: "Separating attack signal from normal signal cuts the leak attackers exploit to near-zero" — the same claim a non-expert can repeat, with $I(z_a;z_b|E_p)$ moved into the (glossed) body. +- **Intuition before the formula on the slide.** When a slide shows an objective / equation, one plain bullet ABOVE or beside it states what it *accomplishes* in everyday terms before the symbols appear (e.g. 「先把攻擊訊號和正常訊號分開,讓攻擊者能利用的洩漏降到接近零」 above the `$...$` objective). The exporter renders whatever the body provides, so this is an AUTHORING duty (`paper-summary-author` / `thesis-deck-author` / regen scripts), not an exporter change. +- **Give every headline number a real-world anchor.** A KPI of "12.3 ms" or "F1 0.87" means nothing to a non-expert standing alone, pair it with a plain anchor where the layout allows: "12.3 ms — faster than a blink", "F1 0.87 — ~87 of 100 calls correct". This complements §10 (which picks KPI vs chart vs table) by governing how the chosen number is *labelled*. +- **One analogy for the single hardest idea, at most.** A one-line analogy on the method slide for the most counter-intuitive concept — sparing, never decorative. An analogy on every slide is noise, the value comes from spending it on the one idea the audience is most likely to stumble on. +- **Self-test:** could an examiner from a DIFFERENT department, watching at presentation speed, leave this slide able to say what it shows and why it matters? If not, the plain layer is missing — add the plain assertion / intuition bullet / number anchor that closes the gap. + +**Why:** a deck can pass every geometry gate (§1–§7) and every §8 gloss yet still lose the half of the committee that isn't in the sub-field — the single most common「技術對但抓不到重點」complaint. §8 fixes "I don't know that word", §14 fixes "I followed every word and still don't know the point". + +**Anti-pattern:** a slide titled with a raw formula, body = symbols only, KPI "92.3%" with no anchor — every term may be glossed yet the slide is opaque to anyone outside the sub-field. **Pattern:** assertion title in plain language (§9), an intuition bullet above the glossed `$...$`-wrapped formula (§8 / §12), and the KPI carrying a real-world anchor (§10) — depth preserved, point delivered. + --- ## LLM-as-agent vs Python pipeline (enrichment dispatch) diff --git a/.claude/agents/tasks/paper-summary-author.md b/.claude/agents/tasks/paper-summary-author.md index 72ec60d..2ac6024 100644 --- a/.claude/agents/tasks/paper-summary-author.md +++ b/.claude/agents/tasks/paper-summary-author.md @@ -213,6 +213,7 @@ The fields you write here are what `slide-deck-rules` and `paper_rule` later gov - **Each slide-driving string is an assertion, not a topic label** (slide-deck-rules §9). Write a `rq_results` question / `pain_points` sub-head / contribution heading as a claim — "Disentangling za / zb cuts adversarial leakage to near-zero", not "Method". One message per unit: never fold two RQs into one `rq_results` block to save a slide. - **Pick the field that fits the data** (slide-deck-rules §10). A trend / many-value comparison goes in a `technique_table` / `rq_results.table` (→ table) or a `figures` entry (→ chart); the headline numbers go in `headline_metrics` (→ KPI callout); qualitative / sequential points go in the bullet fields. Don't cram a 5×4 result grid into prose bullets. - **Numbers follow the reporting rules** (paper_rule §數字與統計呈現). `headline_metrics` values use measurement-appropriate significant figures (92.3%, not 92.31748%), label percentage-points vs relative %, and report p-values as actual values — and never invent a digit the PDF doesn't state. +- **A non-expert must grasp the point, not just decode each term** (paper_rule "Plain-language comprehensibility", slide-deck-rules §14). §8 first-use glossing makes every *term* decodable, this bar makes the whole *argument* graspable by an adjacent-discipline 口試委員 / undergraduate / skimming reviewer. It is additive to depth, never a dumbing-down. Apply it to the specific fields you author: (a) every `pain_points` / `core_observation` / `rq_results` analysis string carries a plain "so what" a non-expert could repeat in one breath — ❌ "驗證器將拒絕率降至 0.03" ✅ "驗證器先攔下八成的錯誤草稿,所以最終輸出幾乎不再出錯(拒絕率 0.03)"; (b) a `headline_metrics` value pairs the number with a real-world anchor wherever the field's text allows — ❌ "延遲 12.3 ms" ✅ "延遲 12.3 ms(比一次眨眼還快)", ❌ "F1 0.87" ✅ "F1 0.87(約每 100 通電話對 87 通)"; (c) a `method_sections` body leads with one plain-language intuition sentence *before* any formula — ❌ opening on "$min\ I(z_a;z_b|E_p)$" ✅ "直覺上,我們要讓內容向量 $z_a$ 與風格向量 $z_b$ 互不洩漏,形式上即最小化 $I(z_a;z_b|E_p)$". Self-test before you ship a field: could a reader from a *different* department say what problem this solves, roughly how, and why it matters, without Googling? If not, add the one plain sentence — do NOT delete the technical content. - **Wrap math notation in `$...$`** (slide-deck-rules §12 math-delimiter contract). The exporter renders real subscripts / superscripts / italic variables only for `$...$`-delimited spans, and it does so on every content surface a thesis deck uses — bullets, KPI values, table cells, contribution / method body paragraphs, and RQ / core-observation callouts. So author `$I(z_a;z_b|E_p)$`, `$λ_{max}$`, `$x^2$` — never bare `I(za;zb|Ep)` / `lambda_max` / `x^2`, which ship flat ASCII. Use `_x` / `_{xy}` for subscript, `^x` / `^{xy}` for superscript; a single letter inside the span italicises as a variable, a multi-letter token stays upright as an operator. This is the same notation in `contributions_detailed`, `method_sections`, `core_observation`, and any `rq_results` text — keep it consistent across fields (one notation per concept). The original fang2026 deck shipped flat `za` precisely because its regen script omitted the `$...$`; don't repeat it. - **No fabrication** (paper_rule §不謊造). Every number / RQ result / limitation must come from the PDF you read. If the paper doesn't report it, leave the field empty — the exporter skips empty fields, which is correct. diff --git a/.claude/agents/tasks/post-author-audit.md b/.claude/agents/tasks/post-author-audit.md index adebc25..0d3ad2b 100644 --- a/.claude/agents/tasks/post-author-audit.md +++ b/.claude/agents/tasks/post-author-audit.md @@ -1,17 +1,24 @@ --- name: post-author-audit -description: After a regen_*.py with hand-authored PaperSummary entries has been written and run, perform four mandatory audits before the deck ships — (1) compare each authored Paper.url/doi/arxiv_id against the search xlsx to catch fabricated URLs, (2) classify off-topic downloads (keyword matches that don't fit the user's actual intent) and delete their pdf + lightweight pptx, (3) scan authored fields for drafting-management metadata (version tags, writing-guide file names, insertion markers) that must not reach the slides, and (4) scan for bare math notation not wrapped in $...$ (which ships as flat ASCII instead of real subscripts). Use after paper-summary-author finishes, before reporting deck-ready. +description: After a regen_*.py with hand-authored PaperSummary entries has been written and run, perform five mandatory audits before the deck ships — (1) compare each authored Paper.url/doi/arxiv_id against the search xlsx to catch fabricated URLs, (2) classify off-topic downloads (keyword matches that don't fit the user's actual intent) and delete their pdf + lightweight pptx, (3) scan authored fields for drafting-management metadata (version tags, writing-guide file names, insertion markers) that must not reach the slides, (4) scan for bare math notation not wrapped in $...$ (which ships as flat ASCII instead of real subscripts), and (5) judgement-scan the section-driving strings for correct-but-opaque content a non-expert cannot grasp (formula with no plain lead-in, bare number with no real-world anchor, section with no plain "so what"). Use after paper-summary-author or thesis-deck-author finishes, before reporting deck-ready. tools: Read, Bash, Edit, Grep, Glob --- -You are the post-authoring auditor for ThesisAgents's LLM-as-agent flow. You run AFTER `paper-summary-author` has authored a regen script and produced rich `.pptx` files. Your job is to catch the three failure modes that have historically slipped through: +You are the post-authoring auditor for ThesisAgents's LLM-as-agent flow. You run AFTER `paper-summary-author` **or** `thesis-deck-author` has authored a regen script and produced rich `.pptx` files. Your job is to catch the failure modes that have historically slipped through: 1. **Fabricated URL / DOI / arxiv_id** in a hand-authored `Paper`. Publisher URL paths cannot be guessed; the agent's first instinct is often wrong (e.g. inventing `view/fang2026` for AAAI when AAAI uses numeric volume IDs). A fabricated URL in the deck is worse than no URL — it visibly 404s the user. 2. **Off-topic downloads left in the run directory.** The search is keyword-based, so off-topic papers slip in (e.g. a Viterbi decoder paper matching "Claude code" because both contain "code"). The user sees the run dir; leaving off-topic pdf + lightweight pptx there is noise. 3. **Drafting-management metadata in an authored field.** Summaries assembled from a drop-in insert set or an earlier draft often carry version tags (「v3.5 新增」), a writing-guide file name (`paper_rule.md`), or insertion markers. Pasted verbatim into a `PaperSummary` field, they ride onto the slide where the reader cannot parse them. 4. **Bare math notation that ships flat.** The exporter renders real subscripts / superscripts only for notation wrapped in `$...$` (slide-deck-rules §12). An authored field that writes `I(za;zb|Ep)` or `lambda_max` without the delimiters renders as flat ASCII — "za" reads as a word, not z-sub-a. The original fang2026 deck shipped exactly this way, so it is a confirmed, recurring failure mode. +5. **Correct-but-opaque content a non-expert cannot grasp.** A field can be factually right, term-glossed, and still leave an adjacent-discipline reader unable to say what it means — a formula with no plain lead-in, a bare number with no real-world anchor, a section with no plain "so what" (paper_rule "Plain-language comprehensibility", slide-deck-rules §14). Unlike audits 1-4 this is a *judgement* scan, not a grep, so it reads each section-driving string and asks whether a reader from a different department could repeat the point. -You do NOT modify the rich summaries themselves — that's `paper-summary-author`'s job. You only audit + prune. +You do NOT modify the rich summaries themselves — that's the author agent's job (`paper-summary-author` or `thesis-deck-author`). You only audit + prune. + +### Which audit applies to which author agent + +- **Audits 1-3** (URL/DOI, off-topic prune, drafting-metadata) are **`paper-summary-author`-only** — they assume a search xlsx, a keyword-noise run directory, and drop-in-assembled drafts. A `thesis-deck-author` deck has no publisher URL, no search noise, and one intended thesis, so these do not apply (see `thesis-deck-author` "Mandatory audits"). +- **Audit 4** (flat-math) applies to **both** author agents — any authored field on either surface can ship bare notation. +- **Audit 5** (plain-language) applies to **both** author agents — a summary of someone else's paper and a candidate's own defence deck both face the same mixed audience, so both must read graspably end-to-end. ## Inputs you need @@ -185,6 +192,65 @@ attribute and confirm at least one subscript run exists on each slide that state a formula; a formula slide with zero baseline-shifted runs means the `$...$` was omitted. +## Audit 5 — Plain-language comprehensibility scan + +The point of this scan is the whole *argument*, not each term — §8 / Audit 4 +already cover term-level decodability. Here you ask whether a non-expert +(adjacent-discipline 口試委員, undergraduate, skimming reviewer) could grasp what +each section means, roughly how it works, and why it matters (paper_rule +"Plain-language comprehensibility", slide-deck-rules §14). **This is a judgement +audit, not a grep** — there is no regex that decides "graspable", so you read the +strings and apply the self-test, you do not pattern-match. + +Sample the **section-driving strings** in the regen script — the ones that title +or carry a slide, where opacity does the most damage: + +- every assertion title / sub-head (`pain_points` heads, `contributions_detailed` + headings, each `rq_results` question) +- `core_observation` (its own slide — the single most-repeated takeaway) +- each `rq_results` analysis string (the per-RQ "what this result means") +- each `headline_metrics` value+label (the KPI a reader quotes back) + +For each sampled string, flag it when it is **correct but opaque** in one of three +shapes: + +1. **Formula / symbol with no plain lead-in** — the string opens on or hinges on + `$...$` notation with no one-sentence intuition first. ❌ `core_observation` = + "最小化 $I(z_a;z_b|E_p)$ 即可解耦" → flag. ✅ "讓內容與風格互不洩漏(形式上即最小化 + $I(z_a;z_b|E_p)$)" → pass. +2. **Bare number with no real-world anchor** — a `headline_metrics` value that + states a magnitude with no sense of scale. ❌ "延遲 12.3 ms" → flag. ✅ "延遲 + 12.3 ms,比一次眨眼還快" → pass. ❌ "F1 0.87" → flag. ✅ "F1 0.87(約每 100 通 + 電話對 87 通)" → pass. +3. **Section with no plain "so what"** — an `rq_results` analysis or section head + that states *what was measured* but not *why it matters* to a non-expert. ❌ + "RQ2:拒絕率為 0.03" → flag. ✅ "RQ2:驗證器先攔下多數錯誤草稿,最終輸出幾乎不再 + 出錯(拒絕率 0.03)" → pass. + +Judgement calls (avoid over-flagging — additive clarity, not dumbing-down): + +- **Do not flag depth.** A string that *also* carries the rigorous form is correct + — you flag only the *absence* of the plain lead-in / anchor / "so what", never + the presence of the formula. The fix is to *add* a plain sentence, never to + delete technical content. +- **Anchors are field-permitting.** A one-cell KPI value with no room for prose + is anchored on its companion caption / analysis string instead — flag only when + *no* nearby authored string gives the number a sense of scale. +- **One analogy is enough.** The rule asks for an analogy on the *single hardest* + concept, used sparingly — do not flag every section for lacking an analogy, and + do flag a deck that analogises everything (that is its own anti-pattern). + +The self-test to apply per deck: **could a reader from a *different* department +say what problem this solves, roughly how, and why it matters, without Googling?** +If not, list the offending strings. + +For each flagged string, output a **one-line suggested plain-language fix** (the +parent rewrites the authored field in the regen script and re-runs — same as +Audits 3-4, never edit the emitted `.pptx`). **PASS only when no flagged string +remains.** This audit runs over **both** `paper-summary-author` and +`thesis-deck-author` output (see "Which audit applies to which author agent" +above). + ## Reporting format ``` @@ -219,8 +285,19 @@ post-author audit — exports// — …… ... verdict: PASS / FAIL + +[5] Plain-language comprehensibility scan + sampled: section-driving strings + flagged: + : — "" → suggested: "" + ... + verdict: PASS / FAIL ``` +For a `thesis-deck-author` deck, audits [1]-[3] are reported as `n/a (own thesis)` +and only [4]-[5] carry a PASS/FAIL verdict (see "Which audit applies to which +author agent"). + If audit 1 FAILs, the parent must fix and re-run — do NOT prune anything for a paper that has a URL/DOI violation, because the parent may decide to rewrite or remove that entry entirely. ## Things you do NOT do diff --git a/.claude/agents/tasks/thesis-deck-author.md b/.claude/agents/tasks/thesis-deck-author.md index 1e0311d..689d39a 100644 --- a/.claude/agents/tasks/thesis-deck-author.md +++ b/.claude/agents/tasks/thesis-deck-author.md @@ -111,6 +111,7 @@ same rules `paper-summary-author` lists, restated for the defence context: - **Gloss every term at first use** (`slide-deck-rules` §8). The committee may include an examiner outside the sub-field — define each acronym / library / metric the first time it appears. - **Numbers follow the reporting rules** (`paper_rule` §數字與統計呈現). Measurement-appropriate significant figures, label pp vs relative %, report p-values as actual values. - **No fabrication** (`paper_rule` §不謊造). Every metric, RQ result, and limitation comes from the candidate's actual thesis / experiments. If the thesis does not report a number, leave the field empty or carry the qualitative claim — never invent a digit to make a KPI slide look fuller. +- **A non-expert committee member must grasp the point** (`paper_rule` "Plain-language comprehensibility", `slide-deck-rules` §14). §8 glossing makes each *term* decodable, this bar makes the whole *argument* graspable. It matters acutely at a defence: a 口試 committee routinely seats an examiner *outside* the candidate's sub-field, and that examiner asks the first question — so author each section's plain "so what" and one plain-language intuition sentence *before* any formula, rather than assuming shared background. It is additive to rigour, never a dumbing-down. Concretely: (a) every section carries a takeaway a non-specialist could repeat — ❌ a methodology slide opening on "$min\ I(z_a;z_b|E_p)$" ✅ "直覺上,我們要讓內容與風格兩個向量互不洩漏,形式上即最小化 $I(z_a;z_b|E_p)$"; (b) every headline number gets a real-world anchor — ❌ "推論延遲 12.3 ms" ✅ "推論延遲 12.3 ms,比一次眨眼還快"; (c) one analogy / micro-example for the single hardest concept, used sparingly. Self-test each section: could an examiner from a *different* department state what problem this thesis solves, roughly how, and why it matters, without Googling? If not, add the one plain sentence — keep all the technical depth. ## The build (reuse the regen pattern, no exporter changes) diff --git a/CLAUDE.md b/CLAUDE.md index 5ab3fd1..fcfa043 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -97,8 +97,8 @@ Concretely: | Addition type | "Clear context" means | "Detailed explanation" means | |---|---|---| | Subagent rule | A **Why:** clause naming the past incident / failure mode the rule prevents. | At least one **example** of the rule applied + one **anti-pattern** showing how it fails. Rules without examples bit-rot. | -| Paper / thesis paragraph | A topic sentence locating the paragraph in the larger argument (which section, which RQ, which contribution). | Every technical term defined at first use (see `paper_rule` "Technical terminology"), and every quantitative claim cited or shown in a table. | -| Slide bullet | A sub-head that says what the slide as a whole is about. | Every acronym / math notation / library name glossed at first use (see `slide-deck-rules` §8 "Content clarity & first-use context"). | +| Paper / thesis paragraph | A topic sentence locating the paragraph in the larger argument (which section, which RQ, which contribution). | Every technical term defined at first use (see `paper_rule` "Technical terminology"), every quantitative claim cited or shown in a table, and an argument-level plain-language layer so a non-expert grasps the point, not just the words (see `paper_rule` "Plain-language comprehensibility"). | +| Slide bullet | A sub-head that says what the slide as a whole is about. | Every acronym / math notation / library name glossed at first use (see `slide-deck-rules` §8 "Content clarity & first-use context"), plus a plain-language takeaway the slide's whole point lands for a non-expert reading at presentation speed (see `slide-deck-rules` §14). | | Code helper | A docstring naming the boundary the helper guards and the failure mode it prevents. | Type hints + one usage example in either the docstring or a unit test. | **Why this is a top-level rule rather than buried in one subagent**: it @@ -106,11 +106,17 @@ applies across every surface this project produces (rules, papers, slides, code) and has been the single most common review-cycle source of churn — "why does X exist?" / "what does Y do?" questions that should have been answered at write-time. The subagent-specific applications -(`paper_rule` "Technical terminology", `slide-deck-rules` §8 "Content -clarity & first-use context", `code-quality-reviewer` "docstring + -example") all derive from this top-level principle. When in doubt about -how to phrase an addition, default to "explain like the reader just -joined the conversation". +(`paper_rule` "Technical terminology" + "Plain-language +comprehensibility", `slide-deck-rules` §8 "Content clarity & first-use +context" + §14 "Plain-language comprehensibility for a mixed audience", +`code-quality-reviewer` "docstring + example") all derive from this +top-level principle. Term-level glossing answers "what does this word +mean", the comprehensibility rules answer the harder "I read every word +and still don't see the point" — both are required on any paper / deck +this project ships, so a non-expert (an adjacent-discipline committee +member, a reviewer skimming, an undergraduate) can follow it. When in +doubt about how to phrase an addition, default to "explain like the +reader just joined the conversation". **Prose punctuation in additions**: prefer `,` (Chinese) or `,` (English) to join clauses, and avoid `;` / `;`. **Why**: short comma-joined From d950c14757f82accbf5ff40b2f821f6abef23250 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Sun, 14 Jun 2026 18:27:42 +0800 Subject: [PATCH 11/16] Strengthen search relevance ranking (stemming, phrase, synonyms, CJK) Relevance was exact lowercase-token overlap, so "transformer" missed "transformers", multi-word queries got no adjacency credit, acronyms never matched their expansions, and CJK queries got no signal at all. - conservative English stemming, min-stem >= 4 guards over-stripping - adjacency bonus so a query's phrase outranks scattered terms - small acronym <-> expansion synonym map (llm, rag, gnn, ...), expanding documents not the query so the relevance denominator stays honest - CJK character bigrams so Chinese / Japanese / Korean queries rank too Adds unit tests plus a golden-query regression set that pins relevance dominance over a 180k-citation off-topic paper. --- tests/test_dedup_ranking.py | 125 +++++++++++++++++++++++ thesisagents/core/ranking.py | 189 ++++++++++++++++++++++++++++++++--- 2 files changed, 298 insertions(+), 16 deletions(-) diff --git a/tests/test_dedup_ranking.py b/tests/test_dedup_ranking.py index 2a761dc..fdf715e 100644 --- a/tests/test_dedup_ranking.py +++ b/tests/test_dedup_ranking.py @@ -2,6 +2,8 @@ from __future__ import annotations +import pytest + from thesisagents.core.dedup import dedupe from thesisagents.core.models import Paper from thesisagents.core.ranking import rank @@ -193,3 +195,126 @@ def test_rank_short_query_terms_are_dropped(): miss = _paper(source_id="miss", title="A study of birds", year=2024) ordered = rank([miss, hit], keywords="a llm of", current_year=2025) assert ordered[0].source_id == "hit" + + +def test_rank_stemming_matches_plural(): + """A singular query term matches a plural/inflected title — same concept. + + Without stemming, "transformer" would miss "Transformers" entirely and the + on-topic paper would sort below an unrelated one on recency/citation alone. + """ + hit = _paper(source_id="hit", title="Transformers in Vision", year=2024) + miss = _paper(source_id="miss", title="Graph Theory Basics", year=2024) + ordered = rank([miss, hit], keywords="transformer", current_year=2025) + assert ordered[0].source_id == "hit" + + +def test_rank_phrase_adjacency_bonus(): + """Two titles share all query words, but the one where they appear ADJACENT + (a real phrase) ranks above the one where they are scattered.""" + phrase = _paper( + source_id="phrase", title="Retrieval-Augmented Generation", year=2024, + ) + scattered = _paper( + source_id="scattered", title="Generation Augmented by Retrieval", year=2024, + ) + ordered = rank( + [scattered, phrase], + keywords="retrieval augmented generation", current_year=2025, + ) + assert ordered[0].source_id == "phrase" + + +def test_rank_synonym_acronym_expands(): + """A query for an acronym matches a title that only writes the long form.""" + hit = _paper( + source_id="hit", title="A Survey of Large Language Models", year=2024, + ) + miss = _paper(source_id="miss", title="Image Segmentation Methods", year=2024) + ordered = rank([miss, hit], keywords="llm", current_year=2025) + assert ordered[0].source_id == "hit" + + +def test_rank_synonym_does_not_overmatch_shared_word(): + """A lone shared word ("language") must NOT inject an unrelated acronym: + a query for "nlp" must not match a pure "language model" title.""" + lang_only = _paper( + source_id="lang", title="Sign Language Recognition", year=2024, + ) + nlp_paper = _paper( + source_id="nlp", title="Natural Language Processing Advances", year=2024, + ) + ordered = rank([lang_only, nlp_paper], keywords="nlp", current_year=2025) + assert ordered[0].source_id == "nlp" + + +def test_rank_cjk_query_has_relevance(): + """A Chinese-keyword search gets a real relevance signal (CJK bigrams), so an + on-topic Chinese title beats an off-topic one rather than tying on recency.""" + hit = _paper(source_id="hit", title="視覺注意力機制研究", year=2024) + miss = _paper(source_id="miss", title="區塊鏈技術概論", year=2024) + ordered = rank([miss, hit], keywords="注意力機制", current_year=2025) + assert ordered[0].source_id == "hit" + + +def test_rank_stemming_does_not_overstrip(): + """The min-stem-length guard keeps short words intact: "ring" must stay + "ring" (not collapse to "r"), so it still matches a "Ring ..." title and + "Rendering" correctly stems to "render" without colliding.""" + hit = _paper(source_id="hit", title="Ring Signatures", year=2024) + miss = _paper(source_id="miss", title="Rendering Pipelines", year=2024) + ordered = rank([miss, hit], keywords="ring", current_year=2025) + assert ordered[0].source_id == "hit" + + +# --- Golden-query regression set ------------------------------------------- +# A realistic candidate pool ranked end-to-end (relevance + recency + citation +# together), so a future change to the weights / stemmer / synonyms that quietly +# degrades real-world ranking is caught — not just the isolated-axis unit tests +# above. The invariant each case pins: the on-topic paper takes the #1 slot even +# though the pool contains a 180k-citation OFF-topic paper (resnet) — i.e. +# relevance still dominates raw citation count. (Below #1, the off-topic paper +# may legitimately fill a slot on a query where only one paper is on-topic; that +# is correct recency+citation behaviour, so the test pins #1, not the whole top +# 3.) The gaps are wide enough to survive small weight retunes; if one breaks, +# the ranking changed materially and the expectation should be re-justified. +def _golden_pool() -> list[Paper]: + return [ + _paper(source_id="attn", title="Attention Is All You Need", + year=2017, citation_count=90000), + _paper(source_id="xformer_survey", title="A Survey of Transformer Architectures", + year=2024, citation_count=120), + _paper(source_id="resnet", title="Deep Residual Learning for Image Recognition", + year=2016, citation_count=180000), + _paper(source_id="rag", + title="Retrieval-Augmented Generation for Knowledge-Intensive NLP", + year=2020, citation_count=5000), + _paper(source_id="gpt3", title="Large Language Models are Few-Shot Learners", + year=2020, citation_count=30000), + _paper(source_id="gnn", title="Graph Neural Networks: A Comprehensive Review", + year=2021, citation_count=800), + ] + + +@pytest.mark.parametrize( + ("query", "expected_top"), + [ + # Plural/inflected title match (stemming): "transformer" -> "Transformer". + ("transformer", "xformer_survey"), + # Exact multi-word phrase wins on adjacency + full overlap. + ("retrieval augmented generation", "rag"), + # Acronym synonym: "llm" surfaces the long-form "Large Language Models". + ("llm", "gpt3"), + # Plural in title ("Networks") + phrase; on-topic beats the 180k-cite resnet. + ("graph neural network", "gnn"), + ], +) +def test_rank_golden_queries(query, expected_top): + ordered = rank(_golden_pool(), keywords=query, current_year=2026) + top_ids = [p.source_id for p in ordered] + assert top_ids[0] == expected_top, f"{query!r} -> {top_ids[:3]}" + # Relevance dominance: the on-topic paper outranks the 180k-citation + # off-topic paper, even with a ~400x citation disadvantage in some cases. + assert top_ids.index(expected_top) < top_ids.index("resnet"), ( + f"off-topic resnet outranked {expected_top!r} for {query!r}: {top_ids}" + ) diff --git a/thesisagents/core/ranking.py b/thesisagents/core/ranking.py index 32a4dc3..c0e2917 100644 --- a/thesisagents/core/ranking.py +++ b/thesisagents/core/ranking.py @@ -13,7 +13,23 @@ * **relevance** (dominant) — overlap between the query keywords and the paper's title (weighted heavily) + abstract (weighted lightly). Research starts from "is this on my topic?", so an on-topic paper should beat an off-topic one even - when the off-topic one is older-and-more-cited. + when the off-topic one is older-and-more-cited. The relevance axis goes beyond + exact word matching in four ways so it does not silently miss on-topic papers: + + 1. **Light stemming** — a query for ``transformer`` matches a ``Transformers`` + title (a plural/inflection difference is the same concept). Stemming is + deliberately conservative: only a small whitelist of suffixes is stripped, + and only when the remaining stem stays ``>= _MIN_STEM_LEN`` so short words + (``bias``, ``ring``, ``gas``) are never mangled into a false match. + 2. **Phrase adjacency bonus** — for a multi-word query, a title where the + query words appear *adjacent* (``Retrieval-Augmented Generation``) scores + above one where they are merely *scattered* across the title. + 3. **Acronym synonyms** — a query for ``llm`` matches a title that only ever + writes ``large language model`` (and vice-versa), via a small curated map. + 4. **CJK support** — Chinese/Japanese/Korean runs are tokenised into character + bigrams, so a Chinese-keyword search gets a real relevance signal instead + of falling back to recency+citation only (the prior ``[a-z0-9]+`` tokenizer + dropped every CJK character). * **recency** — exponential decay over paper age (~5-year scale). * **citation** — ``log10`` of the citation count (diminishing returns), then damped by ``_CITATION_WEIGHT`` so a huge citation count is a strong tie-break @@ -40,14 +56,121 @@ # (citation term ≈ _CITATION_WEIGHT * log10(1e5) = 0.4 * 5 = 2.0 < 3.0). _TITLE_WEIGHT = 3.0 _ABSTRACT_WEIGHT = 0.6 +# Bonus when the query's adjacent word pairs (bigrams) also appear adjacent in +# the title. Smaller than _TITLE_WEIGHT so it only *re-orders* papers that +# already share the query words — a phrase match is a tie-break in favour of the +# more on-topic title, not a signal that can outweigh actual word overlap. +_PHRASE_WEIGHT = 1.0 # Damping on the log10 citation term. Keeps "most-cited" a meaningful tie-break # among similarly-relevant papers without letting it swamp the topic signal. _CITATION_WEIGHT = 0.4 # Query/title tokens shorter than this are dropped as stop-word-ish noise # ("a", "of", "is", "the"). 3 keeps useful short terms like "llm", "rag", "gan". _MIN_TERM_LEN = 3 +# A suffix is only stripped when the remaining stem stays at least this long. +# Guards against over-stripping short words into spurious collisions, e.g. +# "ring" -> "r", "bias" -> "bia", "gas" -> "ga". 4 keeps stemming useful for +# real content words ("transformers" -> "transformer", "learning" -> "learn") +# while leaving every short word untouched. +_MIN_STEM_LEN = 4 + +# One regex, two alternatives: an ASCII alphanumeric run, OR a run of CJK +# characters (CJK unified incl. Ext-A, hiragana, katakana, hangul syllables, +# CJK compatibility ideographs). Matching both in one pass keeps token order so +# adjacency (the phrase bonus) is computed across the original sequence. +_TOKEN_RE = re.compile( + r"[a-z0-9]+" + r"|[぀-ヿ㐀-鿿가-힯豈-﫿]+" +) + +# Conservative English suffix rules, longest/most-specific first so "ies" wins +# over "s" ("studies" -> "study", not "studie"). Each maps a suffix to its +# replacement. Only applied when the resulting stem stays >= _MIN_STEM_LEN. +_SUFFIX_RULES: tuple[tuple[str, str], ...] = ( + ("ies", "y"), + ("es", ""), + ("ed", ""), + ("ing", ""), + ("s", ""), +) + +# Acronym <-> expansion synonyms. Each entry lets a search for the short form +# surface papers that only write the long form, and vice-versa. +# Why: without this the relevance axis misses a whole class of on-topic papers +# (a "llm" search never matching a "Large Language Models: A Survey" title). +# Only acronyms of length >= _MIN_TERM_LEN are listed — shorter ones ("rl", +# "ml") would be dropped by the stop-word floor before they could be matched. +_SYNONYM_GROUPS: tuple[tuple[str, str], ...] = ( + ("llm", "large language model"), + ("rag", "retrieval augmented generation"), + ("gnn", "graph neural network"), + ("cnn", "convolutional neural network"), + ("rnn", "recurrent neural network"), + ("nlp", "natural language processing"), + ("vlm", "vision language model"), + ("gan", "generative adversarial network"), +) + + +def _stem(token: str) -> str: + """Conservatively normalise one token's English inflection. + + CJK bigrams, digits, and mixed alphanumerics pass through unchanged — only + pure ASCII alphabetic tokens are stemmed, and only when the stem stays + ``>= _MIN_STEM_LEN``. A trailing "ss" (``process``, ``address``) is left + alone so the plural "s" rule does not bite into a doubled consonant. + + Example: ``_stem("transformers") == "transformer"``; + ``_stem("ring") == "ring"`` (stripping "ing" -> "r" fails the length guard). + """ + if not token.isascii() or not token.isalpha(): + return token + for suffix, repl in _SUFFIX_RULES: + if suffix == "s" and token.endswith("ss"): + continue + if token.endswith(suffix): + stem = token[: len(token) - len(suffix)] + repl + return stem if len(stem) >= _MIN_STEM_LEN else token + return token -_WORD_RE = re.compile(r"[a-z0-9]+") + +def _ordered_tokens(text: str) -> list[str]: + """Position-ordered, stemmed, stop-word-filtered tokens of ``text``. + + ASCII runs become stemmed words kept only when ``len >= _MIN_TERM_LEN``; CJK + runs become character bigrams (each length 2, always kept). Order is + preserved across scripts so the bigram (phrase) pass sees real adjacency. + """ + out: list[str] = [] + for match in _TOKEN_RE.finditer(text.lower()): + chunk = match.group() + if chunk[0].isascii(): + stem = _stem(chunk) + if len(stem) >= _MIN_TERM_LEN: + out.append(stem) + elif len(chunk) == 1: + out.append(chunk) + else: + out.extend(chunk[i : i + 2] for i in range(len(chunk) - 1)) + return out + + +# Acronym -> stemmed long-form tokens. One direction only: an acronym is +# specific, so seeing "llm" in a document safely implies "large language model". +# The REVERSE (long form -> acronym) deliberately is NOT a per-token map — a lone +# shared word like "language" must not inject "nlp"/"vlm"; it requires the whole +# long form and is handled by _SYNONYM_LONG_TO_SHORT below. +_SYNONYM_EXPAND: dict[str, tuple[str, ...]] = { + short: tuple(_ordered_tokens(long_form)) + for short, long_form in _SYNONYM_GROUPS +} +# Whole stemmed long forms -> acronym: the acronym is added to a document's term +# set only when every token of the long form is present (so "Large Language +# Models" gains "llm", but "language models" alone does not). +_SYNONYM_LONG_TO_SHORT: tuple[tuple[frozenset[str], str], ...] = tuple( + (frozenset(_ordered_tokens(long_form)), short) + for short, long_form in _SYNONYM_GROUPS +) def rank( @@ -62,38 +185,72 @@ def rank( axis (single-paper / query-less callers). """ year_base = current_year or _CURRENT_YEAR_FALLBACK - terms = _terms(keywords) if keywords else frozenset() + terms = frozenset(_ordered_tokens(keywords)) if keywords else frozenset() + bigrams = _bigrams(keywords) if keywords else frozenset() return sorted( papers, - key=lambda paper: _score(paper, year_base, terms), + key=lambda paper: _score(paper, year_base, terms, bigrams), reverse=True, ) -def _score(paper: Paper, current_year: int, terms: frozenset[str]) -> float: +def _score( + paper: Paper, + current_year: int, + terms: frozenset[str], + query_bigrams: frozenset[tuple[str, str]], +) -> float: return ( - _relevance_score(paper, terms) + _relevance_score(paper, terms, query_bigrams) + _recency_score(paper.year, current_year) + _citation_score(paper.citation_count) ) -def _terms(text: str) -> frozenset[str]: - """Lowercase alphanumeric tokens of length >= ``_MIN_TERM_LEN``.""" - return frozenset(w for w in _WORD_RE.findall(text.lower()) if len(w) >= _MIN_TERM_LEN) +def _bigrams(text: str) -> frozenset[tuple[str, str]]: + """Adjacent token pairs of ``text`` (empty when fewer than two tokens).""" + tokens = _ordered_tokens(text) + return frozenset(zip(tokens, tokens[1:], strict=False)) + + +def _term_set(text: str) -> frozenset[str]: + """Synonym-expanded term set of a document field (title / abstract). + + Documents — not the query — are expanded, so the relevance denominator stays + the user's actual query size (expanding the query would dilute the fraction). + """ + base = set(_ordered_tokens(text)) + expanded = set(base) + for token in base: + expanded.update(_SYNONYM_EXPAND.get(token, ())) + for long_tokens, short in _SYNONYM_LONG_TO_SHORT: + if long_tokens <= base: + expanded.add(short) + return frozenset(expanded) -def _relevance_score(paper: Paper, terms: frozenset[str]) -> float: - """Fraction of query terms appearing in title / abstract, title-weighted. +def _relevance_score( + paper: Paper, + terms: frozenset[str], + query_bigrams: frozenset[tuple[str, str]], +) -> float: + """Title/abstract overlap with the query, title-weighted, plus a phrase bonus. - Range ``[0, _TITLE_WEIGHT + _ABSTRACT_WEIGHT]``. ``0.0`` when no query terms - were supplied, so the score reduces to recency + citation. + Range ``[0, _TITLE_WEIGHT + _ABSTRACT_WEIGHT + _PHRASE_WEIGHT]``. ``0.0`` + when no query terms were supplied, so the score reduces to recency + + citation. """ if not terms: return 0.0 - title_hit = len(terms & _terms(paper.title)) / len(terms) - abstract_hit = len(terms & _terms(paper.abstract)) / len(terms) - return title_hit * _TITLE_WEIGHT + abstract_hit * _ABSTRACT_WEIGHT + title_terms = _term_set(paper.title) + abstract_terms = _term_set(paper.abstract) + title_hit = len(terms & title_terms) / len(terms) + abstract_hit = len(terms & abstract_terms) / len(terms) + score = title_hit * _TITLE_WEIGHT + abstract_hit * _ABSTRACT_WEIGHT + if query_bigrams: + matched = len(query_bigrams & _bigrams(paper.title)) + score += _PHRASE_WEIGHT * (matched / len(query_bigrams)) + return score def _recency_score(year: int | None, current_year: int) -> float: From bf74306bd25922192f99d255c88f3df9ce3e7803 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Sun, 14 Jun 2026 18:27:52 +0800 Subject: [PATCH 12/16] Add one-stop deck review (overflow + contrast + section completeness) Overflow and colour-contract checks lived in two scripts and the seven-section judgement was human-only. Move both checks into the package (exporters/overflow.py, exporters/audit.py; the scripts become thin re-export wrappers so existing imports keep working) and add exporters/review.py to bundle them with a paper_rule section-completeness check that reuses the exporter's own slide classifier. Exposed as the CLI `python -m thesisagents review [--lang] [--json]` and the MCP `pptx_review` tool. Completeness only fails a thesis-style deck, and references is gated only for multi-paper decks (a single-paper rich deck folds references into the cover, an own-thesis deck omits self-citation). --- scripts/_audit_dark_text.py | 144 +--------------- scripts/check_overflow.py | 247 ++-------------------------- tests/test_review.py | 203 +++++++++++++++++++++++ thesisagents/cli.py | 6 + thesisagents/exporters/audit.py | 152 +++++++++++++++++ thesisagents/exporters/overflow.py | 247 ++++++++++++++++++++++++++++ thesisagents/exporters/review.py | 255 +++++++++++++++++++++++++++++ thesisagents/mcp/server.py | 18 +- 8 files changed, 899 insertions(+), 373 deletions(-) create mode 100644 tests/test_review.py create mode 100644 thesisagents/exporters/audit.py create mode 100644 thesisagents/exporters/overflow.py create mode 100644 thesisagents/exporters/review.py diff --git a/scripts/_audit_dark_text.py b/scripts/_audit_dark_text.py index f670dca..d24c5ef 100644 --- a/scripts/_audit_dark_text.py +++ b/scripts/_audit_dark_text.py @@ -1,28 +1,10 @@ -"""Manual dark-mode text auditor for a single rendered ThesisAgents deck. +"""Manual dark-mode text auditor CLI (thin wrapper). -Why this script exists ----------------------- -The dark-mode / no-red / contrast contracts (see `.claude/agents/rules/deck-design.md`) -are pinned by `tests/test_exporters.py` regression tests — but those only run on -decks the exporter *generates*. CLAUDE.md's "Read Subagents BEFORE Editing Any -.pptx" rule extends the same contracts to **hand-made decks** (anything under -`exports/`, `assets/template/`, etc.), which no test covers. This is the companion -debug script the deck-design doc refers to: point it at any `.pptx` and it reports -every run that would render invisible / off-contract. +The implementation now lives in ``thesisagents.exporters.audit`` so the MCP +``pptx_review`` tool, the CLI ``review`` subcommand, the ``review_deck`` audit, +and this script all share one auditor. This file stays as the documented +command-line entry point: -Checks (each maps to a deck-design contract): - -1. **Invisible run** — `rgb is None` or `rgb == (0,0,0)`: inherits the theme - colour and renders near-black on the dark slide background. -2. **Red text** — `#C0392B` (`_BRAND_ACCENT`): banned as a TEXT colour in both - modes (reads as error, pattern-matches AI-generated emphasis). -3. **Light-on-light** — a near-white run inside a near-white-fill shape (both - luminances > 0.7 × 255): the contrast-contract invisibility bug. -4. **Off-palette run (warning)** — a run whose colour is none of the sanctioned - dark-mode run colours (`_ACCEPTED_DARK_RUN_COLORS`). Informational: a custom - colour may be intentional, but it's worth a human glance. - -Usage: .venv/Scripts/python.exe scripts/_audit_dark_text.py exports/.pptx [more.pptx ...] Exit code is the number of decks with at least one hard issue (checks 1-3); the @@ -31,122 +13,10 @@ from __future__ import annotations import sys -from dataclasses import dataclass -from pathlib import Path - -from pptx import Presentation - -# Sanctioned dark-mode run colours — the *values* of _LIGHT_TO_DARK_TEXT plus the -# near-white promotion target and the white table-header foreground (which sits on -# the dark navy header fill). Keep in sync with pptx.py if the palette changes. -_ACCEPTED_DARK_RUN_COLORS = frozenset({ - (0xE5, 0xE7, 0xEB), # near-white body text - (0x9C, 0xA3, 0xAF), # mid grey - (0x6B, 0x72, 0x80), # muted grey - (0x60, 0xA5, 0xFA), # blue-400 highlight - (0xFF, 0xFF, 0xFF), # white table-header foreground (on navy fill) -}) -_RED_ACCENT = (0xC0, 0x39, 0x2B) -_LIGHT_LUMA = 0.7 * 255 # luminance above which a colour counts as "light" - - -@dataclass(frozen=True) -class Issue: - slide: int - shape: str - kind: str # "invisible" | "red text" | "light-on-light" | "off-palette" - detail: str - hard: bool # hard issues fail the deck; warnings do not - - -def _rgb_tuple(rgb): - if rgb is None: - return None - return (int(rgb[0]), int(rgb[1]), int(rgb[2])) - - -def _luma(rgb: tuple[int, int, int]) -> float: - r, g, b = rgb - return 0.299 * r + 0.587 * g + 0.114 * b - - -def _shape_fill_rgb(shape): - """The shape's solid fill colour as an (r,g,b) tuple, or None if it has no - readable solid fill (background / pattern / inherited).""" - try: - fill = shape.fill - if fill.type is not None and fill.fore_color.type is not None: - return _rgb_tuple(fill.fore_color.rgb) - except (TypeError, ValueError, AttributeError): - return None - return None - - -def _iter_runs(shape): - if not shape.has_text_frame: - return - for paragraph in shape.text_frame.paragraphs: - for run in paragraph.runs: - if run.text.strip(): - yield run - - -def audit_deck(path: str | Path) -> list[Issue]: - """Return every dark-mode / contrast / red-text issue in the deck.""" - prs = Presentation(str(path)) - issues: list[Issue] = [] - for idx, slide in enumerate(prs.slides, start=1): - for shape in slide.shapes: - name = getattr(shape, "name", "?") or "?" - fill_rgb = _shape_fill_rgb(shape) - fill_light = fill_rgb is not None and _luma(fill_rgb) > _LIGHT_LUMA - for run in _iter_runs(shape): - rgb = _rgb_tuple(run.font.color.rgb if run.font.color and run.font.color.type - else None) - if rgb is None or rgb == (0, 0, 0): - issues.append(Issue(idx, name, "invisible", - f"rgb={rgb} renders near-black on dark bg", True)) - continue - if rgb == _RED_ACCENT: - issues.append(Issue(idx, name, "red text", "#C0392B is banned", True)) - continue - if fill_light and _luma(rgb) > _LIGHT_LUMA: - issues.append(Issue(idx, name, "light-on-light", - f"text {rgb} on fill {fill_rgb}", True)) - continue - if rgb not in _ACCEPTED_DARK_RUN_COLORS: - issues.append(Issue(idx, name, "off-palette", - f"rgb={rgb} not a sanctioned dark-mode colour", False)) - return issues - - -def _report(path: str | Path) -> bool: - issues = audit_deck(path) - hard = [i for i in issues if i.hard] - warn = [i for i in issues if not i.hard] - print(f"dark-text audit — {path}") - print(f"hard issues: {len(hard)}") - for i in hard: - print(f" slide {i.slide}, shape \"{i.shape}\": {i.kind} — {i.detail}") - print(f"warnings: {len(warn)}") - for i in warn: - print(f" slide {i.slide}, shape \"{i.shape}\": {i.kind} — {i.detail}") - verdict = "PASS" if not hard else "FAIL" - print(f"verdict: {verdict}") - return not hard +from thesisagents.exporters.audit import Issue, audit_deck, main # re-exported -def main(argv: list[str]) -> int: - if not argv: - print("usage: _audit_dark_text.py [more.pptx ...]") - return 2 - failed = 0 - for i, path in enumerate(argv): - if i: - print() - if not _report(path): - failed += 1 - return failed +__all__ = ["Issue", "audit_deck", "main"] if __name__ == "__main__": diff --git a/scripts/check_overflow.py b/scripts/check_overflow.py index 1d3a356..3ab593b 100644 --- a/scripts/check_overflow.py +++ b/scripts/check_overflow.py @@ -1,29 +1,10 @@ -"""Canonical slide-overflow inspector for ThesisAgents decks. +"""Canonical slide-overflow inspector CLI (thin wrapper). -Why this script exists ----------------------- -The `slide-overflow-check` subagent is supposed to "reuse the project's existing -inspector" — but no such file existed, so the agent had to reinvent an estimator -from a stub every run. This is that inspector: one runnable, importable module the -agent (and a regression test) can call. +The implementation now lives in ``thesisagents.exporters.overflow`` so the MCP +``pptx_review`` tool, the CLI ``review`` subcommand, the ``review_deck`` audit, +the regression test, and this script all share one inspector. This file stays as +the documented command-line entry point: -What "overflow" means here (mirrors slide-overflow-check.md): - -- 16:9 widescreen: slide is 13.333" x 7.5". -- Body sits between ``BODY_TOP = 1.5"`` and ``FOOTER_GUARD = 7.05"`` (the line - where page numbers / footer copy live). Nothing may render past 7.05". -- A shape overflows when its *wrapped, rendered* text height exceeds either - (1) the shape's own height, or (2) ``7.05"`` measured from the slide top. - -The wrap estimate reads each run's actual ``font.size`` (the exporter sets it -explicitly per run), classifies each character as full-width (CJK / kana / hangul -/ full-width forms ≈ 1.0 em) or half-width (Latin / digits / punctuation ≈ 0.55 -em), accumulates width per line, and wraps when a line exceeds the box's inner -width. Line height is the run's font size × 1.2 (PowerPoint single spacing). It is -a *rough* estimate — deliberately conservative, the same trade-off the manual -agent made — so it catches gross overflow without needing a font-metrics library. - -Usage: .venv/Scripts/python.exe scripts/check_overflow.py exports/.pptx [more.pptx ...] Exit code is the number of decks that FAILED (0 = all clean), so CI / a test can @@ -32,219 +13,15 @@ from __future__ import annotations import sys -import unicodedata -from dataclasses import dataclass -from pathlib import Path - -from pptx import Presentation -from pptx.enum.text import MSO_AUTO_SIZE -from pptx.util import Emu - -_EMU_PER_INCH = 914400 -_FOOTER_GUARD_IN = 7.05 -_FOOTER_GUARD_EMU = int(_FOOTER_GUARD_IN * _EMU_PER_INCH) -# python-pptx default textbox inner margins are 0.1" left + 0.1" right. -_DEFAULT_SIDE_MARGIN_IN = 0.1 -_FULL_WIDTH_EM = 1.0 # CJK / kana / hangul / full-width forms -_HALF_WIDTH_EM = 0.55 # Latin / digits / ASCII punctuation -_LINE_SPACING = 1.2 # PowerPoint single line spacing ≈ 1.2 × font size -_DEFAULT_FONT_PT = 18 # used only when a run carries no explicit size -_TABLE_FONT_PT = 14 # _TABLE_PT in pptx.py — cell font when a run has none -_CELL_V_MARGIN_IN = 0.1 # exporter sets 0.05" top + 0.05" bottom per cell -# Box-overflow tolerance: ignore a sub-fraction-of-a-line overshoot so rounding -# in the estimate doesn't flag a box that visually fits. -_BOX_TOLERANCE_IN = 0.08 - -# Chrome / decoration the exporter places intentionally — these are NOT body -# content and never "overflow" in the meaningful sense: the top/left accent bars -# are fixed-geometry rectangles, and the page number + footer live *at* the -# footer line (7.05") by design, so a footer-guard check on them is a false -# positive. Everything else (title / body / subhead / kpi / rq_box / -# paper_subtitle / tables / figures) is real content and gets checked. -_CHROME_NAMES = frozenset({"page_number", "footer"}) -_CHROME_PREFIXES = ("accent",) - - -def _is_chrome(name: str) -> bool: - return name in _CHROME_NAMES or name.startswith(_CHROME_PREFIXES) - - -@dataclass(frozen=True) -class Violation: - slide: int - shape: str - kind: str # "overflows its box" | "crosses footer guard" - rendered_in: float # measured value, inches - limit_in: float # the limit it broke, inches - - -def _is_full_width(ch: str) -> bool: - """True for characters that occupy ~1 em (CJK, kana, hangul, full-width).""" - if ch in ("\t", "\n"): - return False - return unicodedata.east_asian_width(ch) in ("F", "W") - - -def _char_em(ch: str) -> float: - return _FULL_WIDTH_EM if _is_full_width(ch) else _HALF_WIDTH_EM - - -def _run_font_pt(run, fallback: int) -> int: - size = run.font.size - return int(size.pt) if size is not None else fallback - - -def _paragraph_lines(paragraph, inner_width_pt: float, fallback_pt: int) -> tuple[int, int]: - """Estimate (wrapped line count, max font pt) for one paragraph. - - Width is accumulated per character at the run's own font size, so a mixed - CJK + Latin line wraps where it actually would. An empty paragraph still - occupies one line at the fallback size. - """ - runs = list(paragraph.runs) - if not runs: - return 1, fallback_pt - max_pt = fallback_pt - line_w = 0.0 - lines = 1 - for run in runs: - pt = _run_font_pt(run, fallback_pt) - max_pt = max(max_pt, pt) - for ch in run.text: - if ch == "\n": - lines += 1 - line_w = 0.0 - continue - char_w = _char_em(ch) * pt - if line_w + char_w > inner_width_pt and line_w > 0: - lines += 1 - line_w = char_w - else: - line_w += char_w - return lines, max_pt - - -def _text_height_in(text_frame, box_width_emu: int) -> float: - """Estimated rendered height of a text frame, in inches. - - An empty text frame (decorative rectangle, blank placeholder) contributes - no height — it must not be charged a fallback line. - """ - if not (text_frame.text or "").strip(): - return 0.0 - box_width_in = box_width_emu / _EMU_PER_INCH - ml = (text_frame.margin_left or Emu(int(_DEFAULT_SIDE_MARGIN_IN * _EMU_PER_INCH))) - mr = (text_frame.margin_right or Emu(int(_DEFAULT_SIDE_MARGIN_IN * _EMU_PER_INCH))) - inner_width_in = max(0.1, box_width_in - (ml + mr) / _EMU_PER_INCH) - inner_width_pt = inner_width_in * 72 - total_pt = 0.0 - for paragraph in text_frame.paragraphs: - lines, max_pt = _paragraph_lines(paragraph, inner_width_pt, _DEFAULT_FONT_PT) - total_pt += lines * max_pt * _LINE_SPACING - return total_pt / 72 - - -def _table_height_in(shape) -> float: - """Estimated *rendered* height of a table, in inches. python-pptx grows a - row to fit wrapped cell text, but the GraphicFrame's declared ``height`` - does not change — so a many-row or long-cell table renders far taller than - declared and can cross the footer guard while ``shape.height`` says it fits. - Sum each row's height from its tallest cell's wrapped line count. - """ - table = shape.table - col_w = [c.width or 0 for c in table.columns] - total = 0.0 - for r in range(len(table.rows)): - row_lines = 1 - for c in range(len(table.columns)): - inner_in = max(0.1, col_w[c] / _EMU_PER_INCH - 2 * _CELL_V_MARGIN_IN) - inner_pt = inner_in * 72 - cell_lines = sum( - _paragraph_lines(p, inner_pt, _TABLE_FONT_PT)[0] - for p in table.cell(r, c).text_frame.paragraphs - ) - row_lines = max(row_lines, cell_lines) - total += row_lines * _TABLE_FONT_PT * _LINE_SPACING / 72 + _CELL_V_MARGIN_IN - return total - - -def _shape_violations(slide_idx: int, shape) -> list[Violation]: - out: list[Violation] = [] - name = getattr(shape, "name", "?") or "?" - if _is_chrome(name): - return out # exporter-placed accent bars / page number / footer - top = shape.top or 0 - height = shape.height or 0 - if getattr(shape, "has_table", False): - # Use the estimated rendered height (≥ declared) for the footer-guard - # check, since the table grows past its declared box when cells wrap. - rendered_in = _table_height_in(shape) - height = max(height, int(rendered_in * _EMU_PER_INCH)) - if shape.has_text_frame: - tf = shape.text_frame - shrink = tf.auto_size == MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE - if not shrink: - rendered_in = _text_height_in(tf, shape.width or 0) - if rendered_in - height / _EMU_PER_INCH > _BOX_TOLERANCE_IN: - out.append(Violation( - slide_idx, name, "overflows its box", - round(rendered_in, 2), round(height / _EMU_PER_INCH, 2), - )) - # Footer-guard check applies to every content shape (incl. tables, pictures, - # shrink-to-fit titles): the box itself must clear 7.05". - bottom = top + height - if bottom > _FOOTER_GUARD_EMU + 1: - out.append(Violation( - slide_idx, name, "crosses footer guard", - round(bottom / _EMU_PER_INCH, 2), _FOOTER_GUARD_IN, - )) - return out - - -def check_pptx_from_prs(prs) -> list[Violation]: - """Walk every slide / shape of an open Presentation and return overflow - violations (empty = clean). Split from ``check_pptx`` so tests can build a - deck in memory without writing a temp file.""" - violations: list[Violation] = [] - for idx, slide in enumerate(prs.slides, start=1): - for shape in slide.shapes: - violations.extend(_shape_violations(idx, shape)) - return violations - - -def check_pptx(path: str | Path) -> list[Violation]: - """Walk every slide / shape and return overflow violations (empty = clean).""" - return check_pptx_from_prs(Presentation(str(path))) - - -def _report(path: str | Path) -> bool: - prs = Presentation(str(path)) - n_slides = len(prs.slides) - n_shapes = sum(len(s.shapes) for s in prs.slides) - violations = check_pptx_from_prs(prs) - print(f"overflow check — {path}") - print(f"slides: {n_slides}") - print(f"shapes: {n_shapes}") - print(f"violations: {len(violations)}") - for v in violations: - print(f" slide {v.slide}, shape \"{v.shape}\": {v.kind} " - f"— rendered {v.rendered_in}\" vs {v.limit_in}\"") - verdict = "PASS" if not violations else "FAIL" - print(f"verdict: {verdict}") - return not violations +from thesisagents.exporters.overflow import ( # re-exported for callers/tests + Violation, + check_pptx, + check_pptx_from_prs, + main, +) -def main(argv: list[str]) -> int: - if not argv: - print("usage: check_overflow.py [more.pptx ...]") - return 2 - failed = 0 - for i, path in enumerate(argv): - if i: - print() - if not _report(path): - failed += 1 - return failed +__all__ = ["Violation", "check_pptx", "check_pptx_from_prs", "main"] if __name__ == "__main__": diff --git a/tests/test_review.py b/tests/test_review.py new file mode 100644 index 0000000..504f6de --- /dev/null +++ b/tests/test_review.py @@ -0,0 +1,203 @@ +"""Tests for the one-stop deck reviewer (thesisagents.exporters.review). + +review_deck folds three audits into one call — overflow, colour contracts, and +paper_rule section completeness — so these pin that each is wired in and that +completeness only gates a thesis-style deck (a lightweight abstract-only deck +must never be failed for legitimately lacking sections). +""" +from __future__ import annotations + +import json + +from pptx import Presentation +from pptx.dml.color import RGBColor +from pptx.util import Inches, Pt + +from thesisagents.core.models import ( + ExportOptions, + Paper, + PaperCollection, + PaperSummary, + Query, +) +from thesisagents.exporters.pptx import PptxExporter +from thesisagents.exporters.review import review_deck + + +def _export(tmp_path, summary, *, language="zh-tw", stem="review"): + paper = Paper( + source="local", source_id="t", title="審片測試論文", + authors=("A",), year=2026, venue="Test", abstract="一段研究摘要。", + url="", summary=summary, + ) + collection = PaperCollection( + query=Query(keywords="x", sources=("local",)), papers=(paper,), + ) + return PptxExporter().export(collection, ExportOptions( + formats=("pptx",), out_dir=str(tmp_path), filename_stem=stem, + language=language, + )) + + +_FULL_THESIS_SUMMARY = PaperSummary( + language="zh-tw", + contributions_detailed=(("貢獻一", "提出新方法。"),), + technique_table=(("互資訊", "分解對抗成分"),), + method_sections=(("步驟", ("先分離成分。", "再中和。")),), + evaluation_sections=(("資料集", ("用三組基準。",)),), + headline_metrics=(("準確率", "92.3%", "65.4"),), + core_observation="把對抗與良性成分在 latent 空間分離。", + model="test", +) + + +def test_review_clean_thesis_deck_ok(tmp_path): + out = _export(tmp_path, _FULL_THESIS_SUMMARY) + review = review_deck(out) + assert review.language == "zh-tw", "language should auto-detect from titles" + assert review.thesis_style is True + assert review.overflow == () + assert review.hard_contrast == [] + assert review.missing_sections == (), f"unexpected gaps: {review.missing_sections}" + assert review.ok is True + + +def test_review_explicit_language_override(tmp_path): + out = _export(tmp_path, _FULL_THESIS_SUMMARY) + review = review_deck(out, language="zh-tw") + assert review.language == "zh-tw" + assert review.ok is True + + +def test_review_flags_overflow(tmp_path): + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[6]) + # A real content shape whose box bottom (top 7.0" + height 1.0") sits past the + # 7.05" footer guard. + box = slide.shapes.add_textbox(Inches(1), Inches(7.0), Inches(6), Inches(1.0)) + box.name = "body" + box.text_frame.text = "這段內容越過了頁尾守線" + box.text_frame.paragraphs[0].runs[0].font.size = Pt(18) + box.text_frame.paragraphs[0].runs[0].font.color.rgb = RGBColor(0x1F, 0x3A, 0x66) + path = tmp_path / "overflow.pptx" + prs.save(str(path)) + + review = review_deck(path) + assert any(v.kind == "crosses footer guard" for v in review.overflow) + assert review.ok is False + + +def test_review_flags_invisible_run(tmp_path): + prs = Presentation() + slide = prs.slides.add_slide(prs.slide_layouts[6]) + box = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(6), Inches(1)) + box.name = "body" + box.text_frame.text = "黑字在深色背景上看不見" + box.text_frame.paragraphs[0].runs[0].font.size = Pt(18) + box.text_frame.paragraphs[0].runs[0].font.color.rgb = RGBColor(0, 0, 0) + path = tmp_path / "invisible.pptx" + prs.save(str(path)) + + review = review_deck(path) + assert any(i.kind == "invisible" and i.hard for i in review.contrast) + assert review.ok is False + + +def test_review_lightweight_deck_not_failed_for_missing_sections(tmp_path): + # No summary -> lightweight abstract-only deck. It legitimately lacks most + # canonical sections, but must NOT be failed for it. + out = _export(tmp_path, None, stem="lightweight") + review = review_deck(out) + assert review.thesis_style is False + assert review.missing_sections, "a lightweight deck is expected to lack sections" + assert review.ok is True, "completeness must not gate a lightweight deck" + + +def test_review_thesis_deck_missing_section_fails(tmp_path): + # A thesis-style deck (has metrics + core observation) but no methodology or + # literature slides -> those sections are missing and the deck FAILS. + partial = PaperSummary( + language="zh-tw", + headline_metrics=(("準確率", "92.3%", "65.4"),), + core_observation="一段核心觀察。", + model="test", + ) + out = _export(tmp_path, partial, stem="partial") + review = review_deck(out) + assert review.thesis_style is True + assert "methodology" in review.missing_sections + assert "literature_review" in review.missing_sections + assert review.ok is False + + +def _export_multi(tmp_path, n=2): + papers = tuple( + Paper( + source="arxiv", source_id=f"p{i}", title=f"Paper {i} on Attention", + authors=("A",), year=2024, venue="V", abstract="Some abstract text.", + url=f"https://e.com/{i}", arxiv_id=f"24{i:02d}.0000{i}", + ) + for i in range(1, n + 1) + ) + collection = PaperCollection( + query=Query(keywords="attention", sources=("arxiv",)), papers=papers, + ) + return PptxExporter().export(collection, ExportOptions( + formats=("pptx",), out_dir=str(tmp_path), filename_stem="multi", language="en", + )) + + +def test_review_multipaper_deck_has_references(tmp_path): + # A real multi-paper deck carries an agenda + references slide; references is + # therefore present and not flagged. + out = _export_multi(tmp_path) + review = review_deck(out) + assert review.references_missing is False + assert "references" not in review.missing_sections + + +def test_review_multipaper_missing_references_fails(tmp_path): + # A multi-paper-shaped deck (has an Agenda slide) with NO references slide: + # references is a genuine gap, so the deck FAILS even though it isn't + # thesis-style. + prs = Presentation() + blank = prs.slide_layouts[6] + for title in ("Cover", "Agenda", "Paper 1 on Attention", "Paper 2 on Attention"): + slide = prs.slides.add_slide(blank) + box = slide.shapes.add_textbox(Inches(1), Inches(0.4), Inches(8), Inches(1)) + box.name = "title" + box.text_frame.text = title + box.text_frame.paragraphs[0].runs[0].font.size = Pt(28) + box.text_frame.paragraphs[0].runs[0].font.color.rgb = RGBColor(0x1F, 0x3A, 0x66) + path = tmp_path / "no_refs.pptx" + prs.save(str(path)) + + review = review_deck(path, language="en") + assert review.references_missing is True + assert "references" in review.missing_sections + assert review.ok is False + + +def test_review_single_paper_rich_deck_does_not_gate_references(tmp_path): + # A single-paper rich deck (no agenda) folds references into the cover, so a + # missing references slide must NOT be flagged. + out = _export(tmp_path, _FULL_THESIS_SUMMARY, stem="single") + review = review_deck(out) + assert review.references_missing is False + assert "references" not in review.missing_sections + + +def test_review_json_output(tmp_path, capsys): + from thesisagents.exporters import review as review_mod + + out = _export(tmp_path, _FULL_THESIS_SUMMARY, stem="jsondeck") + code = review_mod.main(["--json", str(out)]) + payload = json.loads(capsys.readouterr().out) + assert isinstance(payload, list) and len(payload) == 1 + entry = payload[0] + assert set(entry) >= { + "path", "language", "ok", "overflow", "contrast", + "missing_sections", "references_missing", + } + assert entry["ok"] is True + assert code == 0 diff --git a/thesisagents/cli.py b/thesisagents/cli.py index c67c4e9..1fbdced 100644 --- a/thesisagents/cli.py +++ b/thesisagents/cli.py @@ -983,6 +983,12 @@ def main(argv: list[str] | None = None) -> int: if not raw_argv or raw_argv[0] == "gui": gui_extra_argv = raw_argv[1:] if raw_argv else [] return _dispatch_gui(gui_extra_argv) + # ``review`` audits an existing .pptx (overflow + colour contracts + section + # completeness) and short-circuits before argparse for the same reason as the + # gui shim: it takes file paths, not the query/paper/pdf mode mutex group. + if raw_argv[0] == "review": + from thesisagents.exporters.review import main as review_main + return review_main(raw_argv[1:]) # Discovery flags short-circuit before argparse: they answer "what can I # search / export?" and so must NOT trip the required query/paper/pdf mutex # group (same reasoning as the bare-invocation gui shim above). diff --git a/thesisagents/exporters/audit.py b/thesisagents/exporters/audit.py new file mode 100644 index 0000000..9af4c49 --- /dev/null +++ b/thesisagents/exporters/audit.py @@ -0,0 +1,152 @@ +"""Dark-mode / contrast / red-text deck auditor (library home). + +This is the importable home of the colour-contract audit. ``scripts/_audit_dark_text.py`` +is a thin CLI wrapper around it, and ``thesisagents.exporters.review`` folds it +into the one-stop ``review_deck`` audit. The dark-mode / no-red / contrast +contracts (see ``.claude/agents/rules/deck-design.md``) are also pinned by the +``tests/test_exporters.py`` regression tests — but those only run on decks the +exporter *generates*. CLAUDE.md's "Read Subagents BEFORE Editing Any .pptx" rule +extends the same contracts to **hand-made decks**, which no test covers; this +module is what catches them. + +Checks (each maps to a deck-design contract): + +1. **Invisible run** — ``rgb is None`` or ``rgb == (0,0,0)``: inherits the theme + colour and renders near-black on the dark slide background. +2. **Red text** — ``#C0392B`` (``_BRAND_ACCENT``): banned as a TEXT colour in + both modes (reads as error, pattern-matches AI-generated emphasis). +3. **Light-on-light** — a near-white run inside a near-white-fill shape (both + luminances > 0.7 × 255): the contrast-contract invisibility bug. +4. **Off-palette run (warning)** — a run whose colour is none of the sanctioned + dark-mode run colours (``_ACCEPTED_DARK_RUN_COLORS``). Informational. + +Importable: ``audit_deck(path) -> list[Issue]``. +""" +from __future__ import annotations + +import sys +from dataclasses import dataclass +from pathlib import Path + +from pptx import Presentation + +# Sanctioned dark-mode run colours — the *values* of _LIGHT_TO_DARK_TEXT plus the +# near-white promotion target and the white table-header foreground (which sits on +# the dark navy header fill). Keep in sync with pptx.py if the palette changes. +_ACCEPTED_DARK_RUN_COLORS = frozenset({ + (0xE5, 0xE7, 0xEB), # near-white body text + (0x9C, 0xA3, 0xAF), # mid grey + (0x6B, 0x72, 0x80), # muted grey + (0x60, 0xA5, 0xFA), # blue-400 highlight + (0xFF, 0xFF, 0xFF), # white table-header foreground (on navy fill) +}) +_RED_ACCENT = (0xC0, 0x39, 0x2B) +_LIGHT_LUMA = 0.7 * 255 # luminance above which a colour counts as "light" + + +@dataclass(frozen=True) +class Issue: + slide: int + shape: str + kind: str # "invisible" | "red text" | "light-on-light" | "off-palette" + detail: str + hard: bool # hard issues fail the deck; warnings do not + + +def _rgb_tuple(rgb): + if rgb is None: + return None + return (int(rgb[0]), int(rgb[1]), int(rgb[2])) + + +def _luma(rgb: tuple[int, int, int]) -> float: + r, g, b = rgb + return 0.299 * r + 0.587 * g + 0.114 * b + + +def _shape_fill_rgb(shape): + """The shape's solid fill colour as an (r,g,b) tuple, or None if it has no + readable solid fill (background / pattern / inherited).""" + try: + fill = shape.fill + if fill.type is not None and fill.fore_color.type is not None: + return _rgb_tuple(fill.fore_color.rgb) + except (TypeError, ValueError, AttributeError): + return None + return None + + +def _iter_runs(shape): + if not shape.has_text_frame: + return + for paragraph in shape.text_frame.paragraphs: + for run in paragraph.runs: + if run.text.strip(): + yield run + + +def audit_deck(path: str | Path) -> list[Issue]: + """Return every dark-mode / contrast / red-text issue in the deck.""" + return audit_prs(Presentation(str(path))) + + +def audit_prs(prs) -> list[Issue]: + """Audit an already-open Presentation. Split from ``audit_deck`` so the + one-stop ``review_deck`` can load the file once and share the object.""" + issues: list[Issue] = [] + for idx, slide in enumerate(prs.slides, start=1): + for shape in slide.shapes: + name = getattr(shape, "name", "?") or "?" + fill_rgb = _shape_fill_rgb(shape) + fill_light = fill_rgb is not None and _luma(fill_rgb) > _LIGHT_LUMA + for run in _iter_runs(shape): + rgb = _rgb_tuple(run.font.color.rgb if run.font.color and run.font.color.type + else None) + if rgb is None or rgb == (0, 0, 0): + issues.append(Issue(idx, name, "invisible", + f"rgb={rgb} renders near-black on dark bg", True)) + continue + if rgb == _RED_ACCENT: + issues.append(Issue(idx, name, "red text", "#C0392B is banned", True)) + continue + if fill_light and _luma(rgb) > _LIGHT_LUMA: + issues.append(Issue(idx, name, "light-on-light", + f"text {rgb} on fill {fill_rgb}", True)) + continue + if rgb not in _ACCEPTED_DARK_RUN_COLORS: + issues.append(Issue(idx, name, "off-palette", + f"rgb={rgb} not a sanctioned dark-mode colour", False)) + return issues + + +def _report(path: str | Path) -> bool: + issues = audit_deck(path) + hard = [i for i in issues if i.hard] + warn = [i for i in issues if not i.hard] + print(f"dark-text audit — {path}") + print(f"hard issues: {len(hard)}") + for i in hard: + print(f" slide {i.slide}, shape \"{i.shape}\": {i.kind} — {i.detail}") + print(f"warnings: {len(warn)}") + for i in warn: + print(f" slide {i.slide}, shape \"{i.shape}\": {i.kind} — {i.detail}") + verdict = "PASS" if not hard else "FAIL" + print(f"verdict: {verdict}") + return not hard + + +def main(argv: list[str]) -> int: + if not argv: + print("usage: _audit_dark_text.py [more.pptx ...]") + return 2 + failed = 0 + for i, path in enumerate(argv): + if i: + print() + if not _report(path): + failed += 1 + return failed + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/thesisagents/exporters/overflow.py b/thesisagents/exporters/overflow.py new file mode 100644 index 0000000..f18b3f7 --- /dev/null +++ b/thesisagents/exporters/overflow.py @@ -0,0 +1,247 @@ +"""Slide-overflow inspector for ThesisAgents decks (library home). + +This is the importable home of the overflow check. ``scripts/check_overflow.py`` +is a thin CLI wrapper around it, and ``thesisagents.exporters.review`` folds it +into the one-stop ``review_deck`` audit. Keeping the logic in the package (not a +script) means the MCP ``pptx_review`` tool, the CLI ``review`` subcommand, the +regression test, and the manual script all share one implementation. + +What "overflow" means here (mirrors ``slide-overflow-check.md``): + +- 16:9 widescreen: slide is 13.333" x 7.5". +- Body sits between ``BODY_TOP = 1.5"`` and ``FOOTER_GUARD = 7.05"`` (the line + where page numbers / footer copy live). Nothing may render past 7.05". +- A shape overflows when its *wrapped, rendered* text height exceeds either + (1) the shape's own height, or (2) ``7.05"`` measured from the slide top. + +The wrap estimate reads each run's actual ``font.size`` (the exporter sets it +explicitly per run), classifies each character as full-width (CJK / kana / hangul +/ full-width forms ≈ 1.0 em) or half-width (Latin / digits / punctuation ≈ 0.55 +em), accumulates width per line, and wraps when a line exceeds the box's inner +width. Line height is the run's font size × 1.2 (PowerPoint single spacing). It is +a *rough* estimate — deliberately conservative — so it catches gross overflow +without needing a font-metrics library. + +Importable: ``check_pptx(path) -> list[Violation]`` and +``check_pptx_from_prs(prs) -> list[Violation]``. +""" +from __future__ import annotations + +import sys +import unicodedata +from dataclasses import dataclass +from pathlib import Path + +from pptx import Presentation +from pptx.enum.text import MSO_AUTO_SIZE +from pptx.util import Emu + +_EMU_PER_INCH = 914400 +_FOOTER_GUARD_IN = 7.05 +_FOOTER_GUARD_EMU = int(_FOOTER_GUARD_IN * _EMU_PER_INCH) +# python-pptx default textbox inner margins are 0.1" left + 0.1" right. +_DEFAULT_SIDE_MARGIN_IN = 0.1 +_FULL_WIDTH_EM = 1.0 # CJK / kana / hangul / full-width forms +_HALF_WIDTH_EM = 0.55 # Latin / digits / ASCII punctuation +_LINE_SPACING = 1.2 # PowerPoint single line spacing ≈ 1.2 × font size +_DEFAULT_FONT_PT = 18 # used only when a run carries no explicit size +_TABLE_FONT_PT = 14 # _TABLE_PT in pptx.py — cell font when a run has none +_CELL_V_MARGIN_IN = 0.1 # exporter sets 0.05" top + 0.05" bottom per cell +# Box-overflow tolerance: ignore a sub-fraction-of-a-line overshoot so rounding +# in the estimate doesn't flag a box that visually fits. +_BOX_TOLERANCE_IN = 0.08 + +# Chrome / decoration the exporter places intentionally — these are NOT body +# content and never "overflow" in the meaningful sense: the top/left accent bars +# are fixed-geometry rectangles, and the page number + footer live *at* the +# footer line (7.05") by design, so a footer-guard check on them is a false +# positive. Everything else (title / body / subhead / kpi / rq_box / +# paper_subtitle / tables / figures) is real content and gets checked. +_CHROME_NAMES = frozenset({"page_number", "footer"}) +_CHROME_PREFIXES = ("accent",) + + +def _is_chrome(name: str) -> bool: + return name in _CHROME_NAMES or name.startswith(_CHROME_PREFIXES) + + +@dataclass(frozen=True) +class Violation: + slide: int + shape: str + kind: str # "overflows its box" | "crosses footer guard" + rendered_in: float # measured value, inches + limit_in: float # the limit it broke, inches + + +def _is_full_width(ch: str) -> bool: + """True for characters that occupy ~1 em (CJK, kana, hangul, full-width).""" + if ch in ("\t", "\n"): + return False + return unicodedata.east_asian_width(ch) in ("F", "W") + + +def _char_em(ch: str) -> float: + return _FULL_WIDTH_EM if _is_full_width(ch) else _HALF_WIDTH_EM + + +def _run_font_pt(run, fallback: int) -> int: + size = run.font.size + return int(size.pt) if size is not None else fallback + + +def _paragraph_lines(paragraph, inner_width_pt: float, fallback_pt: int) -> tuple[int, int]: + """Estimate (wrapped line count, max font pt) for one paragraph. + + Width is accumulated per character at the run's own font size, so a mixed + CJK + Latin line wraps where it actually would. An empty paragraph still + occupies one line at the fallback size. + """ + runs = list(paragraph.runs) + if not runs: + return 1, fallback_pt + max_pt = fallback_pt + line_w = 0.0 + lines = 1 + for run in runs: + pt = _run_font_pt(run, fallback_pt) + max_pt = max(max_pt, pt) + for ch in run.text: + if ch == "\n": + lines += 1 + line_w = 0.0 + continue + char_w = _char_em(ch) * pt + if line_w + char_w > inner_width_pt and line_w > 0: + lines += 1 + line_w = char_w + else: + line_w += char_w + return lines, max_pt + + +def _text_height_in(text_frame, box_width_emu: int) -> float: + """Estimated rendered height of a text frame, in inches. + + An empty text frame (decorative rectangle, blank placeholder) contributes + no height — it must not be charged a fallback line. + """ + if not (text_frame.text or "").strip(): + return 0.0 + box_width_in = box_width_emu / _EMU_PER_INCH + ml = (text_frame.margin_left or Emu(int(_DEFAULT_SIDE_MARGIN_IN * _EMU_PER_INCH))) + mr = (text_frame.margin_right or Emu(int(_DEFAULT_SIDE_MARGIN_IN * _EMU_PER_INCH))) + inner_width_in = max(0.1, box_width_in - (ml + mr) / _EMU_PER_INCH) + inner_width_pt = inner_width_in * 72 + total_pt = 0.0 + for paragraph in text_frame.paragraphs: + lines, max_pt = _paragraph_lines(paragraph, inner_width_pt, _DEFAULT_FONT_PT) + total_pt += lines * max_pt * _LINE_SPACING + return total_pt / 72 + + +def _table_height_in(shape) -> float: + """Estimated *rendered* height of a table, in inches. python-pptx grows a + row to fit wrapped cell text, but the GraphicFrame's declared ``height`` + does not change — so a many-row or long-cell table renders far taller than + declared and can cross the footer guard while ``shape.height`` says it fits. + Sum each row's height from its tallest cell's wrapped line count. + """ + table = shape.table + col_w = [c.width or 0 for c in table.columns] + total = 0.0 + for r in range(len(table.rows)): + row_lines = 1 + for c in range(len(table.columns)): + inner_in = max(0.1, col_w[c] / _EMU_PER_INCH - 2 * _CELL_V_MARGIN_IN) + inner_pt = inner_in * 72 + cell_lines = sum( + _paragraph_lines(p, inner_pt, _TABLE_FONT_PT)[0] + for p in table.cell(r, c).text_frame.paragraphs + ) + row_lines = max(row_lines, cell_lines) + total += row_lines * _TABLE_FONT_PT * _LINE_SPACING / 72 + _CELL_V_MARGIN_IN + return total + + +def _shape_violations(slide_idx: int, shape) -> list[Violation]: + out: list[Violation] = [] + name = getattr(shape, "name", "?") or "?" + if _is_chrome(name): + return out # exporter-placed accent bars / page number / footer + top = shape.top or 0 + height = shape.height or 0 + if getattr(shape, "has_table", False): + # Use the estimated rendered height (≥ declared) for the footer-guard + # check, since the table grows past its declared box when cells wrap. + rendered_in = _table_height_in(shape) + height = max(height, int(rendered_in * _EMU_PER_INCH)) + if shape.has_text_frame: + tf = shape.text_frame + shrink = tf.auto_size == MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE + if not shrink: + rendered_in = _text_height_in(tf, shape.width or 0) + if rendered_in - height / _EMU_PER_INCH > _BOX_TOLERANCE_IN: + out.append(Violation( + slide_idx, name, "overflows its box", + round(rendered_in, 2), round(height / _EMU_PER_INCH, 2), + )) + # Footer-guard check applies to every content shape (incl. tables, pictures, + # shrink-to-fit titles): the box itself must clear 7.05". + bottom = top + height + if bottom > _FOOTER_GUARD_EMU + 1: + out.append(Violation( + slide_idx, name, "crosses footer guard", + round(bottom / _EMU_PER_INCH, 2), _FOOTER_GUARD_IN, + )) + return out + + +def check_pptx_from_prs(prs) -> list[Violation]: + """Walk every slide / shape of an open Presentation and return overflow + violations (empty = clean). Split from ``check_pptx`` so tests can build a + deck in memory without writing a temp file.""" + violations: list[Violation] = [] + for idx, slide in enumerate(prs.slides, start=1): + for shape in slide.shapes: + violations.extend(_shape_violations(idx, shape)) + return violations + + +def check_pptx(path: str | Path) -> list[Violation]: + """Walk every slide / shape and return overflow violations (empty = clean).""" + return check_pptx_from_prs(Presentation(str(path))) + + +def _report(path: str | Path) -> bool: + prs = Presentation(str(path)) + n_slides = len(prs.slides) + n_shapes = sum(len(s.shapes) for s in prs.slides) + violations = check_pptx_from_prs(prs) + print(f"overflow check — {path}") + print(f"slides: {n_slides}") + print(f"shapes: {n_shapes}") + print(f"violations: {len(violations)}") + for v in violations: + print(f" slide {v.slide}, shape \"{v.shape}\": {v.kind} " + f"— rendered {v.rendered_in}\" vs {v.limit_in}\"") + verdict = "PASS" if not violations else "FAIL" + print(f"verdict: {verdict}") + return not violations + + +def main(argv: list[str]) -> int: + if not argv: + print("usage: check_overflow.py [more.pptx ...]") + return 2 + failed = 0 + for i, path in enumerate(argv): + if i: + print() + if not _report(path): + failed += 1 + return failed + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/thesisagents/exporters/review.py b/thesisagents/exporters/review.py new file mode 100644 index 0000000..e6d9a24 --- /dev/null +++ b/thesisagents/exporters/review.py @@ -0,0 +1,255 @@ +"""One-stop deck reviewer: overflow + colour-contract + section completeness. + +Why this exists +--------------- +The three deck-quality contracts used to live in separate places — slide overflow +in ``scripts/check_overflow.py``, the dark-mode / no-red / contrast rules in +``scripts/_audit_dark_text.py``, and the "does a thesis deck cover the seven +``paper_rule`` sections" judgement only in a human-read subagent. To review a +deck you had to run several tools and eyeball the section coverage. ``review_deck`` +folds all three into one call, exposed as the CLI ``review`` subcommand and the +MCP ``pptx_review`` tool. + +What it reports +--------------- +1. **overflow** — shapes whose wrapped text crosses their box or the 7.05" footer + guard (reuses ``overflow.check_pptx_from_prs``). +2. **contrast** — invisible / red / light-on-light runs (reuses + ``audit.audit_prs``); ``hard`` issues fail the deck, warnings don't. +3. **completeness** — which of the canonical ``paper_rule`` sections + (Introduction, Literature Review, Methodology, Experiment, Conclusion, + References) the deck's slides cover, recovered from the exporter's own + ``_categorise_slides`` title classifier. This is only a PASS/FAIL gate for a + *thesis-style* deck — a lightweight abstract-only deck legitimately lacks most + sections and is never failed for it (``thesis_style`` flag says which). + +``review_deck(path, language=None) -> DeckReview``. ``language`` may be omitted — +it is then auto-detected from the slide titles. +""" +from __future__ import annotations + +import json +import sys +from dataclasses import dataclass +from pathlib import Path + +from pptx import Presentation + +from thesisagents.exporters.audit import Issue, audit_prs +from thesisagents.exporters.i18n import ( + DEFAULT_LANGUAGE, + SUPPORTED_LANGUAGES, + normalise_language, +) +from thesisagents.exporters.overflow import Violation, check_pptx_from_prs + +# Reuse the exporter's own tested title -> category classifier rather than +# re-deriving one. Same package; the leading underscore marks it internal to the +# exporters layer, which review.py is part of. +from thesisagents.exporters.pptx import _categorise_slides + +# Canonical paper_rule body sections -> the slide CATEGORIES that satisfy each. A +# deck "covers" a section if at least one slide carries one of its categories. +# Two of the seven paper_rule sections are intentionally NOT gated here: +# * Abstract — the cover + overview always provide it, so it can't go missing. +# * References — the rich single-paper tier folds the cited paper into the +# cover / contribution-summary instead of a distinct references slide, so +# gating on a references slide would false-fail every such deck. +# What remains are exactly the five body sections CLAUDE.md flags as the ones a +# rich deck must not drop (Literature Review, Experiment, Conclusion in +# particular). +_SECTION_CATEGORIES: tuple[tuple[str, frozenset[str]], ...] = ( + ("introduction", frozenset({ + "overview", "pain_points", "research_question", + "contributions", "contribution_summary", + })), + ("literature_review", frozenset({"technique_table", "literature_table"})), + ("methodology", frozenset({"method_details", "system_overview"})), + ("experiment", frozenset({ + "evaluation", "research_questions", "rq_results", "metrics", + })), + ("conclusion", frozenset({"limitations_future", "core_observation"})), +) + +# Categories only a thesis-style (rich) deck emits. Completeness is a PASS/FAIL +# gate only when one of these is present; otherwise the deck is lightweight and +# its missing sections are advisory, not a failure. +_THESIS_CATEGORIES = frozenset({ + "pain_points", "research_question", "method_details", "evaluation", + "research_questions", "rq_results", "technique_table", "literature_table", + "system_overview", "metrics", "core_observation", "limitations_future", + "contribution_summary", +}) + + +@dataclass(frozen=True) +class DeckReview: + path: str + language: str + thesis_style: bool + overflow: tuple[Violation, ...] + contrast: tuple[Issue, ...] + missing_sections: tuple[str, ...] + references_missing: bool = False + + @property + def hard_contrast(self) -> list[Issue]: + """Contrast issues that fail the deck (invisible / red / light-on-light).""" + return [i for i in self.contrast if i.hard] + + @property + def completeness_failed(self) -> bool: + """When missing sections fail the deck. + + Two different gates, because the exporter emits the sections under + different conditions: + * the five body sections gate only a *thesis-style* deck (a lightweight + abstract-only deck legitimately lacks them), and + * ``references`` gates only a *multi-paper* deck, which always carries a + references slide — a single-paper rich deck folds references into the + cover and an own-thesis deck omits self-citation, so neither is failed. + """ + body_missing = any(s != "references" for s in self.missing_sections) + return self.references_missing or (self.thesis_style and body_missing) + + @property + def ok(self) -> bool: + return not (self.overflow or self.hard_contrast or self.completeness_failed) + + def to_dict(self) -> dict: + """JSON-friendly shape for the MCP ``pptx_review`` tool.""" + return { + "path": self.path, + "language": self.language, + "thesis_style": self.thesis_style, + "ok": self.ok, + "overflow": [ + { + "slide": v.slide, "shape": v.shape, "kind": v.kind, + "rendered_in": v.rendered_in, "limit_in": v.limit_in, + } + for v in self.overflow + ], + "contrast": [ + { + "slide": i.slide, "shape": i.shape, "kind": i.kind, + "detail": i.detail, "hard": i.hard, + } + for i in self.contrast + ], + "missing_sections": list(self.missing_sections), + "references_missing": self.references_missing, + "completeness_gated": self.thesis_style, + } + + +def _detect_language(prs) -> str: + """Best-guess the deck's language from how many slide titles a locale matches. + + The exporter localises every section title, so the deck's real language is + the one whose strings classify the most slides (``cover`` / ``overview`` are + language-independent and excluded from the score). Ties / no match -> ``en``. + """ + best_lang, best_score = DEFAULT_LANGUAGE, -1 + for lang in SUPPORTED_LANGUAGES: + cats = _categorise_slides(prs, lang) + score = sum(c not in ("unknown", "cover", "overview") for c in cats) + if score > best_score: + best_lang, best_score = lang, score + return best_lang + + +def review_deck(path: str | Path, language: str | None = None) -> DeckReview: + """Run all three deck audits and return a consolidated ``DeckReview``. + + Loads the ``.pptx`` once and shares the open object across the overflow, + contrast, and completeness passes. ``language`` is auto-detected when omitted. + """ + prs = Presentation(str(path)) + lang = normalise_language(language) if language else _detect_language(prs) + overflow = tuple(check_pptx_from_prs(prs)) + contrast = tuple(audit_prs(prs)) + categories = set(_categorise_slides(prs, lang)) + body_missing = tuple( + name for name, satisfied_by in _SECTION_CATEGORIES + if not (satisfied_by & categories) + ) + thesis_style = bool(categories & _THESIS_CATEGORIES) + # A multi-paper deck is the one with a shared "agenda" slide; it always + # carries a references slide listing the cited papers, so a missing one is a + # real gap. A single-paper rich deck (no agenda) folds references into the + # cover, and an own-thesis deck omits self-citation — neither is flagged. + references_missing = "agenda" in categories and "references" not in categories + missing = body_missing + (("references",) if references_missing else ()) + return DeckReview( + str(path), lang, thesis_style, overflow, contrast, missing, references_missing, + ) + + +def format_report(review: DeckReview) -> str: + """Human-readable one-deck report for the CLI ``review`` subcommand.""" + lines = [ + f"deck review — {review.path}", + f"language: {review.language}" + + ("" if review.thesis_style else " (lightweight deck)"), + ] + hard = review.hard_contrast + warn = [i for i in review.contrast if not i.hard] + lines.append(f"overflow: {len(review.overflow)}") + for v in review.overflow: + lines.append( + f' slide {v.slide}, shape "{v.shape}": {v.kind} ' + f"— rendered {v.rendered_in}\" vs {v.limit_in}\"" + ) + lines.append(f"contrast: {len(hard)} hard, {len(warn)} warning") + for i in hard: + lines.append(f' slide {i.slide}, shape "{i.shape}": {i.kind} — {i.detail}') + if review.missing_sections: + lines.append( + f"completeness: {len(review.missing_sections)} section(s) missing " + f"— {', '.join(review.missing_sections)}" + ) + elif review.thesis_style: + lines.append("completeness: all sections present") + else: + lines.append("completeness: not gated (lightweight single-paper deck)") + lines.append(f"verdict: {'PASS' if review.ok else 'FAIL'}") + return "\n".join(lines) + + +def main(argv: list[str]) -> int: + """CLI entry: ``review [--lang XX] [--json] [more.pptx ...]``. + + Exit code is the number of decks that FAILED (0 = all clean), so CI / a + wrapper can assert on it. ``--json`` emits one machine-readable array of + ``DeckReview.to_dict()`` objects instead of the human report, so a CI step + can parse the overflow / contrast / missing-section detail. + """ + language: str | None = None + as_json = False + paths: list[str] = [] + i = 0 + while i < len(argv): + if argv[i] in ("--lang", "--language") and i + 1 < len(argv): + language = argv[i + 1] + i += 2 + continue + if argv[i] == "--json": + as_json = True + i += 1 + continue + paths.append(argv[i]) + i += 1 + if not paths: + print("usage: review [--lang XX] [--json] [more.pptx ...]") + return 2 + reviews = [review_deck(path, language) for path in paths] + if as_json: + print(json.dumps([r.to_dict() for r in reviews], ensure_ascii=False, indent=2)) + else: + print("\n\n".join(format_report(r) for r in reviews)) + return sum(1 for r in reviews if not r.ok) + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/thesisagents/mcp/server.py b/thesisagents/mcp/server.py index eadd886..a40bd7e 100644 --- a/thesisagents/mcp/server.py +++ b/thesisagents/mcp/server.py @@ -26,6 +26,10 @@ thesis-style layout. ``dark_mode`` defaults to False (project default is the light navy-band deck); pass True for the dark OLED/low-light variant. - pptx_inspect(path) -> {slides: [...]} +- pptx_review(path, language?) -> {overflow, contrast, missing_sections, ok} + Audit an existing deck against the overflow, colour-contract, and + paper_rule section-completeness rules in one call. ``language`` is + auto-detected from the slide titles when omitted. - pptx_update_slide(path, slide_index, title?, body?, meta?, shape_updates?) -> {path} - pptx_delete_slide(path, slide_index) -> {path} - pptx_reorder_slides(path, new_order) -> {path} @@ -61,7 +65,7 @@ from thesisagents.core.pdf_download import download_pdfs as core_download_pdfs from thesisagents.core.pipeline import run_search, run_single_paper from thesisagents.core.query import normalize_query -from thesisagents.exporters import export_collection, pptx_edit +from thesisagents.exporters import export_collection, pptx_edit, review from thesisagents.fetchers.http import shutdown_clients from thesisagents.utils.logging import get_logger @@ -407,6 +411,18 @@ def pptx_inspect(path: str) -> dict[str, Any]: ], } + @server.tool() + def pptx_review(path: str, language: str | None = None) -> dict[str, Any]: + """Audit a deck: overflow + colour contracts + section completeness. + + Returns overflow violations, contrast issues (invisible / red text / + light-on-light), and which canonical paper_rule sections the deck is + missing. ``language`` is auto-detected from the slide titles when + omitted. Section completeness only fails a thesis-style deck; a + lightweight abstract-only deck legitimately lacks most sections. + """ + return review.review_deck(path, language).to_dict() + @server.tool() def pptx_update_slide( path: str, From 1f6d5b98f53cece7e53a49ae8c2d0761fd56f676 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Sun, 14 Jun 2026 18:28:00 +0800 Subject: [PATCH 13/16] Localise deck truncation marker and preserve over-cap bullets The "(+N more)" over-cap marker was hard-coded English, wrong on a zh-tw / ja deck. Add a more_items key across all 14 locales and thread the deck language into _cap_bullets and its multi-column callers. When an evaluation or limitations section exceeds the per-cell bullet cap, render it full-width paginated (at most cap bullets per page) instead of dropping the overflow behind the marker, so no author content is lost. --- tests/test_exporters.py | 86 +++++++++++++++++++++++++++ thesisagents/exporters/i18n.py | 14 +++++ thesisagents/exporters/pptx.py | 103 ++++++++++++++++++++++++++++----- 3 files changed, 190 insertions(+), 13 deletions(-) diff --git a/tests/test_exporters.py b/tests/test_exporters.py index 6b44ca9..d686af6 100644 --- a/tests/test_exporters.py +++ b/tests/test_exporters.py @@ -362,6 +362,92 @@ def test_pptx_own_thesis_drops_source_slide_bibtex_and_self_reference(tmp_path): assert "BibTeX" not in every_run_text +def test_pptx_over_cap_marker_is_localised(tmp_path): + """When a section exceeds the per-slide bullet cap, the "+N more" marker is + rendered in the deck's language — never a hard-coded English "(+N more)". + + Why: `_cap_bullets` appends a count marker (slide-deck-rules §3) instead of + silently dropping bullets; before this fix the marker was English on every + deck, so a zh-tw deck showed "(+2 more)" amid Traditional Chinese body text. + """ + from pptx import Presentation + + from thesisagents.core.models import Paper, PaperSummary + + summary = PaperSummary( + language="zh-tw", + # Eight bullets in one method section overruns the 6-per-subsection cap. + method_sections=(("步驟", tuple(f"第{i}步驟說明文字" for i in range(1, 9))),), + core_observation="核心觀察。", + model="test", + ) + paper = Paper( + source="local", source_id="t", title="標註在地化測試", + authors=("A",), year=2026, venue="T", abstract="摘要", url="", summary=summary, + ) + collection = PaperCollection( + query=Query(keywords="x", sources=("local",)), papers=(paper,), + ) + options = ExportOptions( + formats=("pptx",), out_dir=str(tmp_path), filename_stem="marker", + language="zh-tw", + ) + written = export_collection(collection, options) + prs = Presentation(str(written["pptx"])) + every_run_text = " ".join( + run.text + for slide in prs.slides + for shape in slide.shapes + if shape.has_text_frame + for para in shape.text_frame.paragraphs + for run in para.runs + ) + assert "(還有 2 項)" in every_run_text, "localised over-cap marker missing" + assert "more)" not in every_run_text, "English marker leaked into a zh-tw deck" + + +def test_pptx_long_evaluation_section_preserves_all_bullets(tmp_path): + """An evaluation section with more bullets than a 2-column grid cell holds + renders as height-paginated stacks so EVERY bullet survives — none is dropped + behind a "(+N more)" marker — and the result still clears the footer guard. + """ + from pptx import Presentation + + from thesisagents.core.models import Paper, PaperSummary + from thesisagents.exporters.overflow import check_pptx + + bullets = tuple(f"Finding number {i} with its own distinct text" for i in range(1, 10)) + summary = PaperSummary( + language="en", + evaluation_sections=(("Benchmark results", bullets),), # 9 > the 6 cell cap + core_observation="Stacked rendering preserves every evaluation bullet.", + model="test", + ) + paper = Paper( + source="local", source_id="t", title="Evaluation Heavy", + authors=("A",), year=2026, venue="V", abstract="", url="", summary=summary, + ) + collection = PaperCollection( + query=Query(keywords="x", sources=("local",)), papers=(paper,), + ) + written = export_collection(collection, ExportOptions( + formats=("pptx",), out_dir=str(tmp_path), filename_stem="evalheavy", language="en", + )) + prs = Presentation(str(written["pptx"])) + every_run_text = " ".join( + run.text + for slide in prs.slides + for shape in slide.shapes + if shape.has_text_frame + for para in shape.text_frame.paragraphs + for run in para.runs + ) + for i in range(1, 10): + assert f"Finding number {i} " in every_run_text, f"evaluation bullet {i} dropped" + assert "more)" not in every_run_text, "content was truncated instead of paginated" + assert check_pptx(written["pptx"]) == [], "stacked evaluation overflowed" + + def _find_run_color(prs, target_rgb: tuple[int, int, int]) -> bool: for slide in prs.slides: for shape in slide.shapes: diff --git a/thesisagents/exporters/i18n.py b/thesisagents/exporters/i18n.py index 2a63b72..63d1b32 100644 --- a/thesisagents/exporters/i18n.py +++ b/thesisagents/exporters/i18n.py @@ -21,6 +21,7 @@ _TABLE: dict[str, dict[str, str]] = { "en": { + "more_items": "(+{n} more)", "agenda": "Agenda", "references": "References", "paper_review_prefix": "Paper Review:", @@ -70,6 +71,7 @@ "no_authors_dash": "—", }, "zh-tw": { + "more_items": "(還有 {n} 項)", "agenda": "議程", "references": "參考文獻", "paper_review_prefix": "論文回顧:", @@ -119,6 +121,7 @@ "no_authors_dash": "—", }, "zh-cn": { + "more_items": "(还有 {n} 项)", "agenda": "议程", "references": "参考文献", "paper_review_prefix": "论文回顾:", @@ -168,6 +171,7 @@ "no_authors_dash": "—", }, "ja": { + "more_items": "(他 {n} 件)", "agenda": "目次", "references": "参考文献", "paper_review_prefix": "論文レビュー:", @@ -217,6 +221,7 @@ "no_authors_dash": "—", }, "es": { + "more_items": "(+{n} más)", "agenda": "Índice", "references": "Referencias", "paper_review_prefix": "Revisión del artículo:", @@ -266,6 +271,7 @@ "no_authors_dash": "—", }, "fr": { + "more_items": "(+{n} autres)", "agenda": "Sommaire", "references": "Références", "paper_review_prefix": "Revue d'article :", @@ -315,6 +321,7 @@ "no_authors_dash": "—", }, "de": { + "more_items": "(+{n} weitere)", "agenda": "Inhalt", "references": "Literatur", "paper_review_prefix": "Paper-Review:", @@ -364,6 +371,7 @@ "no_authors_dash": "—", }, "ko": { + "more_items": "(외 {n}개)", "agenda": "목차", "references": "참고문헌", "paper_review_prefix": "논문 리뷰:", @@ -413,6 +421,7 @@ "no_authors_dash": "—", }, "pt": { + "more_items": "(+{n} mais)", "agenda": "Sumário", "references": "Referências", "paper_review_prefix": "Análise do artigo:", @@ -462,6 +471,7 @@ "no_authors_dash": "—", }, "ru": { + "more_items": "(ещё {n})", "agenda": "Содержание", "references": "Список литературы", "paper_review_prefix": "Обзор статьи:", @@ -511,6 +521,7 @@ "no_authors_dash": "—", }, "it": { + "more_items": "(+{n} altri)", "agenda": "Indice", "references": "Bibliografia", "paper_review_prefix": "Recensione dell'articolo:", @@ -560,6 +571,7 @@ "no_authors_dash": "—", }, "vi": { + "more_items": "(+{n} mục nữa)", "agenda": "Mục lục", "references": "Tài liệu tham khảo", "paper_review_prefix": "Tổng quan bài báo:", @@ -609,6 +621,7 @@ "no_authors_dash": "—", }, "hi": { + "more_items": "(+{n} और)", "agenda": "विषय-सूची", "references": "संदर्भ", "paper_review_prefix": "शोध-पत्र समीक्षा:", @@ -658,6 +671,7 @@ "no_authors_dash": "—", }, "id": { + "more_items": "(+{n} lainnya)", "agenda": "Daftar Isi", "references": "Referensi", "paper_review_prefix": "Tinjauan Makalah:", diff --git a/thesisagents/exporters/pptx.py b/thesisagents/exporters/pptx.py index 72a623d..0df89bf 100644 --- a/thesisagents/exporters/pptx.py +++ b/thesisagents/exporters/pptx.py @@ -765,6 +765,7 @@ def _add_pain_points_slide( left=_MARGIN_X, top=Inches(1.7), width=_BODY_WIDTH, height=Inches(4.4), columns=2, + language=ctx.language, ) if chunk_index == 0 and summary.research_question: _add_rq_callout( @@ -974,6 +975,51 @@ def _add_method_details_slides( for heading, bullets in chunk: cursor = _add_subsection( slide, heading, bullets, cursor, width=_BODY_WIDTH, + language=ctx.language, + ) + + +def _any_section_over_cell_cap(sections) -> bool: + """True when a section has more bullets than a 2-column grid cell shows. + + The grid path caps a cell at ``_BULLETS_PER_CELL_MAX`` and appends a + "(+N more)" marker — fine when content fits, but a section with more bullets + than that loses the overflow. Callers test this and, when True, render the + section list via ``_add_paginated_bullet_sections`` (full-width, one section + per slide, split into <= cap pages) where every bullet survives instead of + being dropped from a cramped cell. + + Example: an evaluation section with eight findings renders as two full-width + pages ("… (1/2)" with six, "… (2/2)" with two) rather than a 2-column grid + that would show six + "(+2 more)". + """ + return any(len(bullets) > _BULLETS_PER_CELL_MAX for _heading, bullets in sections) + + +def _add_paginated_bullet_sections( + prs: Presentation, layout, paper: Paper, ctx: _BuildContext, *, title, sections, +) -> None: + """Render (heading, bullets) sections full-width, one section per slide, with + any section's bullets split into consecutive `` (i/N)`` pages of at + most ``_BULLETS_PER_CELL_MAX`` bullets — so no bullet is ever dropped. + + Each page carries at most the cap the 2-column grid cell was already sized + for, so the result clears the footer guard the same way the grid does. This + is the overflow-safe fallback the grid callers use when + ``_any_section_over_cell_cap`` is True; short content keeps the compact grid. + """ + cap = _BULLETS_PER_CELL_MAX + for heading, bullets in sections: + chunks = [bullets[i : i + cap] for i in range(0, len(bullets), cap)] or [()] + for chunk_index, chunk in enumerate(chunks): + slide = _new_section_slide(prs, layout, title) + _add_paper_subtitle(slide, paper, ctx) + sub_heading = heading + if len(chunks) > 1: + sub_heading = f"{heading} ({chunk_index + 1}/{len(chunks)})" + _add_subsection( + slide, sub_heading, chunk, Inches(1.7), + width=_BODY_WIDTH, language=ctx.language, ) @@ -981,6 +1027,15 @@ def _add_evaluation_slide( prs: Presentation, layout, paper: Paper, summary: PaperSummary, ctx: _BuildContext, ) -> None: sections = summary.evaluation_sections + if not sections: + return + if _any_section_over_cell_cap(sections): + # Full-width, paginated: no bullet dropped from a cramped cell. + _add_paginated_bullet_sections( + prs, layout, paper, ctx, + title=t(ctx.language, "section_evaluation"), sections=sections, + ) + return chunk_size = _EVALUATION_SECTIONS_PER_SLIDE chunks = [sections[i : i + chunk_size] for i in range(0, len(sections), chunk_size)] for chunk_index, chunk in enumerate(chunks): @@ -995,6 +1050,7 @@ def _add_evaluation_slide( left=_MARGIN_X, top=Inches(1.7), width=_BODY_WIDTH, height=Inches(5.0), columns=min(2, len(chunk)), + language=ctx.language, ) @@ -1032,7 +1088,7 @@ def _add_figure_slides( if description: _add_bullet_box( slide, name="body", - bullets=_cap_bullets(description, max_count=4), + bullets=_cap_bullets(description, max_count=4, language=ctx.language), left=_MARGIN_X, top=Inches(5.95), width=_BODY_WIDTH, height=Inches(1.05), font_pt=_BODY_PT - 2, @@ -1113,7 +1169,7 @@ def _add_paper_table_slides( if analysis: _add_bullet_box( slide, name="body", - bullets=_cap_bullets(analysis, max_count=6), + bullets=_cap_bullets(analysis, max_count=6, language=ctx.language), left=_MARGIN_X, top=Inches(5.6), width=_BODY_WIDTH, height=Inches(1.4), font_pt=_BODY_PT, @@ -1176,7 +1232,8 @@ def _add_rq_result_slide( # Show up to 6 analysis bullets so authors don't lose argument # detail to a silent 3-bullet cap. _add_bullet_box( - slide, name="body", bullets=_cap_bullets(rq.analysis, max_count=6), + slide, name="body", + bullets=_cap_bullets(rq.analysis, max_count=6, language=ctx.language), left=_MARGIN_X, top=analysis_top, width=_BODY_WIDTH, height=Inches(1.4 if rq.table else 4.5), font_pt=_BODY_PT, @@ -1212,8 +1269,6 @@ def _add_limitations_future_slide( f"{t(ctx.language, 'section_limitations')} & " f"{t(ctx.language, 'section_future_work')}" ) - slide = _new_section_slide(prs, layout, title) - _add_paper_subtitle(slide, paper, ctx) sections: list[tuple[str, tuple[str, ...]]] = [] if summary.limitations: sections.append( @@ -1223,12 +1278,22 @@ def _add_limitations_future_slide( sections.append( (t(ctx.language, "section_future_work"), tuple(summary.future_work)) ) + if _any_section_over_cell_cap(sections): + # A long limitations / future-work list would lose bullets in the + # 2-column grid; render full-width paginated so all survive. + _add_paginated_bullet_sections( + prs, layout, paper, ctx, title=title, sections=tuple(sections), + ) + return + slide = _new_section_slide(prs, layout, title) + _add_paper_subtitle(slide, paper, ctx) _render_multi_column( slide, sections=tuple(sections), left=_MARGIN_X, top=Inches(1.7), width=_BODY_WIDTH, height=Inches(5.0), columns=2, + language=ctx.language, ) @@ -1255,7 +1320,9 @@ def _add_content_slide( ) fallback = [_clean(paper.abstract)] raw_bullets = _sentences_to_bullets(sentences) or fallback - bullets = _cap_bullets(raw_bullets, max_count=6, max_chars=_BULLET_MAX_CHARS) + bullets = _cap_bullets( + raw_bullets, max_count=6, max_chars=_BULLET_MAX_CHARS, language=ctx.language, + ) _add_bullet_box( slide, name="body", bullets=bullets, left=_MARGIN_X, top=_BODY_TOP + Inches(0.6), @@ -1372,18 +1439,23 @@ def _cap_bullets( bullets, max_count: int = _BULLETS_PER_CELL_MAX, max_chars: int = _BULLET_MAX_CHARS, # kept for back-compat; ignored + *, + language: str = "en", ) -> list[str]: """Cap the *number* of bullets shown; never truncate a bullet's text. - A trailing "(+N more)" marker calls out genuine overflow by count so - authors notice they exceeded the cap, without silently chewing - characters off a kept bullet. + A trailing localised "(+N more)" marker (the ``more_items`` i18n key) calls + out genuine overflow by count so authors notice they exceeded the cap, + without silently chewing characters off a kept bullet. ``language`` selects + the marker's locale so a zh-tw / ja deck never shows an English "(+N more)"; + it defaults to ``en`` for callers (e.g. height estimators) where the marker + text is immaterial because only the line *count* matters. """ del max_chars # intentionally ignored; see docstring sliced = [" ".join(b.split()) for b in bullets[:max_count]] overflow = len(bullets) - len(sliced) if overflow > 0: - sliced.append(f"(+{overflow} more)") + sliced.append(t(language, "more_items", n=overflow)) return sliced @@ -1761,12 +1833,14 @@ def _bullets_box_height_in(bullets, width_in: float) -> float: return max(0.5, total) -def _add_subsection(slide, heading: str, bullets, cursor, *, width) -> int: +def _add_subsection( + slide, heading: str, bullets, cursor, *, width, language: str = "en", +) -> int: head_height = Inches(0.5) # Subsection bullets — show up to 6 so authors don't lose detail. # Method/eval slides paginate at the section-list level, so 6 per # subsection won't push past the footer in the worst case. - capped = _cap_bullets(bullets, max_count=6) + capped = _cap_bullets(bullets, max_count=6, language=language) bullet_height = Inches(_bullets_box_height_in(capped, Emu(width - Inches(0.2)).inches)) _add_textbox( slide, name="subhead", text=_clean(heading), @@ -1785,6 +1859,7 @@ def _add_subsection(slide, heading: str, bullets, cursor, *, width) -> int: def _render_multi_column( slide, *, sections, left, top, width, height, columns: int = 2, + language: str = "en", ) -> None: if not sections: return @@ -1806,7 +1881,9 @@ def _render_multi_column( ) _add_bullet_box( slide, name="body", - bullets=_cap_bullets(bullets, max_chars=_BULLET_MAX_CHARS_COL), + bullets=_cap_bullets( + bullets, max_chars=_BULLET_MAX_CHARS_COL, language=language, + ), left=x, top=y + Inches(0.55), width=col_w, height=row_h - Inches(0.65), font_pt=_BODY_PT, From 614db13d30f28a8ed71ef8e7170f66b60aa98b6f Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Sun, 14 Jun 2026 18:28:08 +0800 Subject: [PATCH 14/16] Point the deck-audit docs at the unified review entry point Update the overflow / contrast subagent docs to reference the package modules and review_deck, and add pptx_review and the review subcommand to README, the MCP and CLI references, and the en / zh-tw / zh-cn Sphinx docs. --- .claude/agents/rules/deck-design.md | 14 +++++--- .claude/agents/tasks/slide-overflow-check.md | 16 +++++++-- CLAUDE.md | 6 ++-- README.md | 7 ++-- docs/cli.md | 18 +++++++++++ docs/en/index.rst | 4 +++ docs/mcp.md | 34 ++++++++++++++++++++ docs/zh-cn/index.rst | 4 +++ docs/zh-tw/index.rst | 4 +++ 9 files changed, 94 insertions(+), 13 deletions(-) diff --git a/.claude/agents/rules/deck-design.md b/.claude/agents/rules/deck-design.md index 9b18600..8d0aea0 100644 --- a/.claude/agents/rules/deck-design.md +++ b/.claude/agents/rules/deck-design.md @@ -134,8 +134,11 @@ Both layers ship together; tests pin both. The regression test ``test_pptx_dark_mode_has_no_invisible_runs`` (in ``tests/test_exporters.py``) walks every run on every slide of a default-dark-mode deck and fails if any non-empty run has ``rgb is None`` or ``rgb == (0,0,0)``. A -companion debug script lives at ``scripts/_audit_dark_text.py`` for -manual inspection of a single rendered deck. +companion auditor lives at ``thesisagents.exporters.audit.audit_deck`` (the +``scripts/_audit_dark_text.py`` CLI is a thin wrapper) for manual inspection +of a single rendered deck — or use the one-stop ``review_deck`` / +``python -m thesisagents review`` / MCP ``pptx_review``, which bundles this +contrast audit with the overflow and section-completeness checks. #### "No red text" contract (HARD) @@ -223,9 +226,10 @@ stayed light. White-on-white. Fixed by adding the mapping text colour, and fails when both > 0.7 × 255 (= 178). Adding a new light-fill shape without a corresponding dark mapping will fail this test. -3. **The audit script** ``scripts/_audit_dark_text.py`` now also - reports failure-mode B — run it on a rendered deck during manual - inspection. +3. **The auditor** ``thesisagents.exporters.audit.audit_deck`` (CLI + ``scripts/_audit_dark_text.py``, or bundled into ``review_deck`` / + ``pptx_review``) now also reports failure-mode B — run it on a rendered + deck during manual inspection. Exposure surfaces (light is default; the toggles opt IN to DARK): - CLI: `--dark-mode` opt-in flag (when absent → light) diff --git a/.claude/agents/tasks/slide-overflow-check.md b/.claude/agents/tasks/slide-overflow-check.md index ec11f0b..d75da0b 100644 --- a/.claude/agents/tasks/slide-overflow-check.md +++ b/.claude/agents/tasks/slide-overflow-check.md @@ -28,8 +28,9 @@ You'll be told (or you can infer from context) which deck(s) to check. Typical i - A specific path: `exports//.pptx` - Or a regen script the parent just ran: re-derive the path from the script's `out_dir` + `filename_stem`. -**Use the canonical inspector — `scripts/check_overflow.py`.** It already encodes -the project's wrap estimate and calibration, so run it rather than reinventing one: +**Use the canonical inspector.** Its logic now lives in the package at +`thesisagents.exporters.overflow` (the `scripts/check_overflow.py` CLI is a thin +wrapper re-exporting it), so run it rather than reinventing one: ``` .venv/Scripts/python.exe scripts/check_overflow.py exports/.pptx [more.pptx ...] @@ -37,10 +38,19 @@ the project's wrap estimate and calibration, so run it rather than reinventing o It prints the report block below per deck and exits with the count of failed decks (0 = all clean), so you can assert on the exit code. It is also importable — -`from check_overflow import check_pptx, check_pptx_from_prs` — returning a list of +`from thesisagents.exporters.overflow import check_pptx, check_pptx_from_prs` (the +script path `from check_overflow import …` still works too) — returning a list of `Violation(slide, shape, kind, rendered_in, limit_in)`; `check_pptx_from_prs(prs)` takes an already-open `Presentation` so a test can build a deck in memory. +**For a full deck audit (overflow + colour contracts + section completeness) in +one pass, prefer `thesisagents.exporters.review.review_deck(path)`** — exposed as +the CLI `python -m thesisagents review ` and the MCP `pptx_review` +tool. It bundles this overflow check with the dark-mode / no-red / contrast audit +and the `paper_rule` seven-section completeness check, returning a single +`DeckReview` (`.ok`, `.overflow`, `.contrast`, `.missing_sections`). Use the +standalone overflow inspector above when you only need the geometry check. + What it does (so you can trust / explain its output): - Reads each run's actual `font.size` (the exporter sets it per run), classifies diff --git a/CLAUDE.md b/CLAUDE.md index fcfa043..1ab94b8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -47,8 +47,10 @@ Concretely, the assistant: 4. **Generates** `.pptx` (three rendering tiers — lightweight / enriched-flat / thesis-style), `.xlsx`, `.bib`, `.md`, `.json` outputs. 5. **Exposes** every step as an MCP tool (`search`, `fetch_paper`, `fetch_pdf_text`, - `export`, `pptx_inspect`, `pptx_update_slide`, `pptx_delete_slide`, - `pptx_reorder_slides`, `pptx_add_slide`). + `export`, `pptx_inspect`, `pptx_review`, `pptx_update_slide`, `pptx_delete_slide`, + `pptx_reorder_slides`, `pptx_add_slide`). `pptx_review` audits an existing deck + (overflow + colour contracts + `paper_rule` section completeness) in one call — + the same audit the CLI exposes as `python -m thesisagents review `. Single-process, Python 3.12+. Heavy I/O off the event loop; shared `httpx.AsyncClient` registry pools connections per source. diff --git a/README.md b/README.md index 3b9f8ca..4ee9c83 100644 --- a/README.md +++ b/README.md @@ -64,9 +64,9 @@ it. 6. export(papers=[{...paper, "summary": {...}}], language="zh-tw", ...) ``` -All twelve MCP tools (including `list_sources`, `list_exports`, -`download_pdfs`, `pptx_inspect` / `pptx_update_slide` / `pptx_add_slide` -/ etc.) are +All thirteen MCP tools (including `list_sources`, `list_exports`, +`download_pdfs`, `pptx_inspect` / `pptx_review` / `pptx_update_slide` / +`pptx_add_slide` / etc.) are documented in [`docs/mcp.md`](docs/mcp.md). ### Mandatory: URL / DOI verification before shipping @@ -411,6 +411,7 @@ Tools: | `download_pdfs` | Batch-download a papers list's PDFs into `{out_dir}/pdfs/`. Returns per-paper results keyed by BibTeX key. | | `export` | Papers list + formats → writes `.pptx/.xlsx/.md/.bib/.json/.ris/.csv/.csl.json`. Accepts a `summary` field per paper for the rich thesis-style schema, `max_slides_per_paper` (default 25), and `dark_mode` (default `true` — the project's dark-deck post-pass; pass `false` for the printable light variant). | | `pptx_inspect` | Read slide / shape structure of an existing deck. | +| `pptx_review` | Audit a deck in one call — overflow + colour contracts + `paper_rule` section completeness. Auto-detects the deck language; also the CLI `python -m thesisagents review `. | | `pptx_update_slide` | Replace `title` / `body` / `meta` (by shape name) or arbitrary shapes by index. | | `pptx_delete_slide` | Remove a slide and its part relationship. | | `pptx_reorder_slides` | Permute slides via `sldIdLst`. | diff --git a/docs/cli.md b/docs/cli.md index 4f4e9c4..dc9c0b7 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -131,6 +131,24 @@ research question, KPI metrics, technique table, literature positioning, per-RQ result tables, …), and the PPT exporter renders the thesis-style layout. +### Review an existing deck + +```bash +thesisagents review ./exports/attention.pptx +thesisagents review ./exports/*.pptx --lang zh-tw +``` + +The `review` subcommand audits a finished `.pptx` against all three +deck-quality contracts in one pass — slide **overflow**, the dark-mode / +no-red / contrast **colour** contracts, and `paper_rule` **section +completeness** (Introduction, Literature Review, Methodology, Experiment, +Conclusion). `--lang` is optional; the deck's language is auto-detected +from its slide titles otherwise. It prints a per-deck report and exits +with the number of decks that failed (`0` = all clean), so it drops into +CI. Section completeness only fails a *thesis-style* deck — a lightweight +abstract-only deck is never failed for legitimately lacking sections. +The same audit is the MCP `pptx_review` tool. + ## Exit codes | Code | Meaning | diff --git a/docs/en/index.rst b/docs/en/index.rst index 14c959b..1497632 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -448,6 +448,10 @@ Tools at a glance: for the printable light variant). * - ``pptx_inspect`` - Read slide / shape structure of an existing deck. + * - ``pptx_review`` + - Audit a deck in one call — overflow + colour contracts + + ``paper_rule`` section completeness. Auto-detects the deck + language; also the CLI ``python -m thesisagents review``. * - ``pptx_update_slide`` - Replace ``title`` / ``body`` / ``meta`` (by shape name) or arbitrary shapes by index. diff --git a/docs/mcp.md b/docs/mcp.md index 88fccab..c640a0f 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -385,6 +385,40 @@ Shape names are set by `PptxExporter`: each slide carries `title`, elsewhere may not have these names — fall back to `shape_updates` addressed by integer index. +### `pptx_review` + +Audit an existing deck against all three deck-quality contracts in one +call: slide **overflow**, the dark-mode / no-red / contrast **colour** +contracts, and `paper_rule` **section completeness**. + +```json +{"path": "./exports/attention.pptx"} +``` + +`language` is optional — it is auto-detected from the slide titles when +omitted (pass e.g. `"zh-tw"` to force it). Returns: + +```text +{ + "path": "./exports/attention.pptx", + "language": "zh-tw", + "thesis_style": true, + "ok": true, + "overflow": [], // {slide, shape, kind, rendered_in, limit_in} + "contrast": [], // {slide, shape, kind, detail, hard} + "missing_sections": [], // canonical body sections with no covering slide + "completeness_gated": true // missing_sections only fail a thesis-style deck +} +``` + +`ok` is `false` when there is any overflow, any *hard* contrast issue +(invisible / red / light-on-light text), or — for a thesis-style deck — +any missing body section (Introduction, Literature Review, Methodology, +Experiment, Conclusion). A lightweight abstract-only deck is never failed +for lacking sections (`thesis_style` / `completeness_gated` say which). +The same audit is available on the command line as +`python -m thesisagents review [more.pptx ...] [--lang xx]`. + ### `pptx_update_slide` Replace text on one slide. diff --git a/docs/zh-cn/index.rst b/docs/zh-cn/index.rst index 798ef62..e7a3a6c 100644 --- a/docs/zh-cn/index.rst +++ b/docs/zh-cn/index.rst @@ -353,6 +353,10 @@ Claude Desktop、Cursor …)都能驱动整套流程。 代表不限)。 * - ``pptx_inspect`` - 读已有幻灯片文件的 slide / shape 结构。 + * - ``pptx_review`` + - 一次审核整份幻灯片 — 溢出 + 颜色契约 + ``paper_rule``\ + 章节完整度。自动检测语言,也是 CLI + ``python -m thesisagents review``。 * - ``pptx_update_slide`` - 替换 ``title``/ ``body``/ ``meta``\ (通过 shape name)或 任意 shape(通过 index)。 diff --git a/docs/zh-tw/index.rst b/docs/zh-tw/index.rst index 2e86175..d4d05e9 100644 --- a/docs/zh-tw/index.rst +++ b/docs/zh-tw/index.rst @@ -354,6 +354,10 @@ Claude Desktop、Cursor …)都能驅動整套流程。 代表不限)。 * - ``pptx_inspect`` - 讀既有投影片檔的 slide / shape 結構。 + * - ``pptx_review`` + - 一次稽核整份投影片 — 溢位 + 色彩契約 + ``paper_rule``\ + 章節完整度。自動偵測語言,亦為 CLI + ``python -m thesisagents review``。 * - ``pptx_update_slide`` - 取代 ``title``/ ``body``/ ``meta``\ (透過 shape name)或 任意 shape(透過 index)。 From 5fd5ca9c02cfcb4f264f01532b05906d6fccf2c4 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Sun, 14 Jun 2026 22:33:20 +0800 Subject: [PATCH 15/16] Fix documented dark_mode default (it is off, not on) The light navy-band deck became the default in 36b0ed3, but several docs still said the export dark_mode option defaults to true. Correct the README, architecture, MCP and Sphinx references to state false (light), with dark as opt-in. --- README.md | 2 +- docs/architecture.md | 3 ++- docs/en/index.rst | 4 ++-- docs/mcp.md | 3 ++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 4ee9c83..093d347 100644 --- a/README.md +++ b/README.md @@ -409,7 +409,7 @@ Tools: | `fetch_paper` | arXiv / DOI / PMID / IEEE identifier → single paper. | | `fetch_pdf_text` | Download one PDF, return extracted body text. **The MCP path to "I read the paper".** | | `download_pdfs` | Batch-download a papers list's PDFs into `{out_dir}/pdfs/`. Returns per-paper results keyed by BibTeX key. | -| `export` | Papers list + formats → writes `.pptx/.xlsx/.md/.bib/.json/.ris/.csv/.csl.json`. Accepts a `summary` field per paper for the rich thesis-style schema, `max_slides_per_paper` (default 25), and `dark_mode` (default `true` — the project's dark-deck post-pass; pass `false` for the printable light variant). | +| `export` | Papers list + formats → writes `.pptx/.xlsx/.md/.bib/.json/.ris/.csv/.csl.json`. Accepts a `summary` field per paper for the rich thesis-style schema, `max_slides_per_paper` (default 25), and `dark_mode` (default `false` — the project default is the light navy-band deck, pass `true` for the dark OLED / low-light post-pass). | | `pptx_inspect` | Read slide / shape structure of an existing deck. | | `pptx_review` | Audit a deck in one call — overflow + colour contracts + `paper_rule` section completeness. Auto-detects the deck language; also the CLI `python -m thesisagents review `. | | `pptx_update_slide` | Replace `title` / `body` / `meta` (by shape name) or arbitrary shapes by index. | diff --git a/docs/architecture.md b/docs/architecture.md index 2760c84..c3273aa 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -392,7 +392,8 @@ non-invasive walk-and-rewrite passes run before the file is saved: the user never sees as separate shapes but instantly reads as "this deck has an identity". 3. **Dark-mode recolour** (`_apply_dark_mode(prs)`, runs when - `ExportOptions.dark_mode=True`, which is the default) — walks every + `ExportOptions.dark_mode=True`, which is opt-in — the default deck + is light) — walks every slide / shape / run / table cell and swaps light-palette RGBs to their dark equivalents via `_LIGHT_TO_DARK_TEXT` + `_LIGHT_TO_DARK_FILL` dicts. The slide background switches to `#12151B`; body text goes diff --git a/docs/en/index.rst b/docs/en/index.rst index 1497632..a01509d 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -444,8 +444,8 @@ Tools at a glance: Accepts a ``summary`` field per paper that can carry the full thesis-style schema; accepts ``language`` for i18n, ``max_slides_per_paper`` (default 25; pass ``0`` for unlimited), - and ``dark_mode`` (default ``true`` — dark deck; pass ``false`` - for the printable light variant). + and ``dark_mode`` (default ``false`` — the light navy-band deck; + pass ``true`` for the dark OLED / low-light variant). * - ``pptx_inspect`` - Read slide / shape structure of an existing deck. * - ``pptx_review`` diff --git a/docs/mcp.md b/docs/mcp.md index c640a0f..73c7822 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -313,7 +313,8 @@ after the priority-based trim — cover / references / contributions are kept first; Q&A / figure / paper-table slides drop first. Pass `0` (or omit the field) for unlimited. -`dark_mode` (default `true`) toggles the post-build recolour pass. +`dark_mode` (default `false` — the project default is the light +navy-band deck) toggles the post-build recolour pass. On: dark slide background (`#12151B`) + near-white body text (`#E5E7EB`) + darker table-row stripe — designed for OLED projectors and low-light venues. Off: the light/printable variant (white background + navy text From ba7412703d90341f27dd3da6e651ff44fda4d99e Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Sun, 14 Jun 2026 22:33:27 +0800 Subject: [PATCH 16/16] Paginate over-cap pain points and add a review --help - a pain point with more bullets than a quadrant cell now renders full-width paginated (like evaluation / limitations), with the research-question callout moved to its own lead slide, so neither a bullet nor the RQ is lost - `thesisagents review -h/--help` prints usage instead of treating the flag as a deck path Adds content-preservation tests for the pain-point and future-work paths. --- tests/test_exporters.py | 74 ++++++++++++++++++++++++++++++++ thesisagents/exporters/pptx.py | 19 ++++++++ thesisagents/exporters/review.py | 6 ++- 3 files changed, 98 insertions(+), 1 deletion(-) diff --git a/tests/test_exporters.py b/tests/test_exporters.py index d686af6..f84681c 100644 --- a/tests/test_exporters.py +++ b/tests/test_exporters.py @@ -448,6 +448,80 @@ def test_pptx_long_evaluation_section_preserves_all_bullets(tmp_path): assert check_pptx(written["pptx"]) == [], "stacked evaluation overflowed" +def _export_one(tmp_path, summary, *, stem): + from thesisagents.core.models import Paper + + paper = Paper( + source="local", source_id="t", title="Content Preservation", + authors=("A",), year=2026, venue="V", abstract="", url="", summary=summary, + ) + collection = PaperCollection( + query=Query(keywords="x", sources=("local",)), papers=(paper,), + ) + return export_collection(collection, ExportOptions( + formats=("pptx",), out_dir=str(tmp_path), filename_stem=stem, language="en", + ))["pptx"] + + +def _all_run_text(pptx_path) -> str: + from pptx import Presentation + + prs = Presentation(str(pptx_path)) + return " ".join( + run.text + for slide in prs.slides + for shape in slide.shapes + if shape.has_text_frame + for para in shape.text_frame.paragraphs + for run in para.runs + ) + + +def test_pptx_long_future_work_preserves_all_bullets(tmp_path): + """A future-work list longer than a grid cell is paginated full-width (via + _add_limitations_future_slide's fallback), so no bullet is dropped.""" + from thesisagents.core.models import PaperSummary + from thesisagents.exporters.overflow import check_pptx + + summary = PaperSummary( + language="en", + future_work=tuple(f"Future direction {i} to pursue" for i in range(1, 10)), + core_observation="The future-work list paginates instead of truncating.", + model="test", + ) + out = _export_one(tmp_path, summary, stem="futureheavy") + text = _all_run_text(out) + for i in range(1, 10): + assert f"Future direction {i} " in text, f"future-work bullet {i} dropped" + assert "more)" not in text + assert check_pptx(out) == [] + + +def test_pptx_long_pain_point_preserves_bullets_and_keeps_rq(tmp_path): + """A pain point with more bullets than a quadrant cell holds paginates + full-width AND still shows the research-question callout on its own lead + slide — neither the bullets nor the RQ is lost.""" + from thesisagents.core.models import PaperSummary + from thesisagents.exporters.overflow import check_pptx + + rq = "How can the manual deck workflow be automated end to end?" + summary = PaperSummary( + language="en", + pain_points=( + ("Slow manual workflow", tuple(f"Pain bullet {i} detail" for i in range(1, 10))), + ), + research_question=rq, + model="test", + ) + out = _export_one(tmp_path, summary, stem="painheavy") + text = _all_run_text(out) + for i in range(1, 10): + assert f"Pain bullet {i} " in text, f"pain-point bullet {i} dropped" + assert rq in text, "research question lost when pain points paginated" + assert "more)" not in text + assert check_pptx(out) == [] + + def _find_run_color(prs, target_rgb: tuple[int, int, int]) -> bool: for slide in prs.slides: for shape in slide.shapes: diff --git a/thesisagents/exporters/pptx.py b/thesisagents/exporters/pptx.py index 0df89bf..c9d7021 100644 --- a/thesisagents/exporters/pptx.py +++ b/thesisagents/exporters/pptx.py @@ -748,6 +748,25 @@ def _add_pain_points_slide( sections = list(summary.pain_points) if not sections: return + if _any_section_over_cell_cap(sections): + # A pain point with more bullets than a quadrant cell holds would lose + # content; render full-width paginated so every bullet survives. The RQ + # callout (normally inline under the quadrant) gets its own lead slide so + # it is still shown — _add_research_question_slide stays skipped because + # pain_points is non-empty, so there is no duplicate. + if summary.research_question: + rq_slide = _new_section_slide( + prs, layout, t(ctx.language, "section_research_question"), + ) + _add_paper_subtitle(rq_slide, paper, ctx) + _add_rq_callout( + rq_slide, summary.research_question, + left=_MARGIN_X, top=Inches(2.5), width=_BODY_WIDTH, height=Inches(2.0), + ) + _add_paginated_bullet_sections( + prs, layout, paper, ctx, title=title, sections=tuple(sections), + ) + return per_slide = _pain_points_per_slide(sections) chunks = [ sections[i : i + per_slide] diff --git a/thesisagents/exporters/review.py b/thesisagents/exporters/review.py index e6d9a24..9a740dc 100644 --- a/thesisagents/exporters/review.py +++ b/thesisagents/exporters/review.py @@ -225,11 +225,15 @@ def main(argv: list[str]) -> int: ``DeckReview.to_dict()`` objects instead of the human report, so a CI step can parse the overflow / contrast / missing-section detail. """ + usage = "usage: review [--lang XX] [--json] [more.pptx ...]" language: str | None = None as_json = False paths: list[str] = [] i = 0 while i < len(argv): + if argv[i] in ("-h", "--help"): + print(usage) + return 0 if argv[i] in ("--lang", "--language") and i + 1 < len(argv): language = argv[i + 1] i += 2 @@ -241,7 +245,7 @@ def main(argv: list[str]) -> int: paths.append(argv[i]) i += 1 if not paths: - print("usage: review [--lang XX] [--json] [more.pptx ...]") + print(usage) return 2 reviews = [review_deck(path, language) for path in paths] if as_json: