From f0fc1f810b060115647071faf8d01212e3c6f6b3 Mon Sep 17 00:00:00 2001 From: Matthew Horoszowski Date: Wed, 13 May 2026 19:33:41 -0400 Subject: [PATCH] feat(text): _Paragraph.fields field-discovery accessor (Phase 4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Public Python API for the headers/footers/slide-numbers/dates epic (#20). Phase 4 ships the small read-side companion to Phase 3's authoring API: `_Paragraph.fields` returns the paragraph's `` children wrapped as `_Field` instances in document order. Combined with Phase 3, a user can now `add_field()` to write, save, re-open, and `paragraph.fields[0]` to read or mutate. Why this is small. The heavy lift landed in Phase 3 — `_Field` itself, the OOXML primitives (Phase 1), and the field-aware `content_children` + `_Paragraph.text` (also Phase 3). Phase 4 just exposes the parallel- to-`runs` discovery accessor that the existing `fld_lst` on `CT_TextParagraph` made trivial. Changes: - pptx.text.text._Paragraph.fields — new `@property`, returns `tuple[_Field, ...]` built from `self._element.fld_lst`. Mirrors the shape of `_Paragraph.runs` exactly so the idiom is instantly familiar. Out of scope for Phase 4 (deliberate): - Interleaved ordered iterator combining `_Run` / `_Field` / `_LineBreak` in a single sequence. `content_children` already exposes this at the oxml layer; surfacing as public API can land later if real users ask. - `Slide.has_auto_slide_number` / `has_auto_date` convenience flags — derive from `.fields` if useful; deferred. - HandoutMaster class and watermark helper — Phase 5. Anti-criteria upheld: - `_Paragraph.runs` continues to yield only `_Run` instances. The new test `it_keeps_runs_field_free_on_mixed_paragraphs` regression-pins that on a mixed `(a:r, a:fld, a:r)` paragraph. - `_Paragraph.text` semantics unchanged (still field-inclusive, as Phase 3 made it). - `_Field` class itself is read-only here — no modifications. Verification (local, CPython 3.14.4): - python3 -m pytest tests/ -q → 3632 passed in 5.24s (+6 vs Phase 3 baseline) - python3 -m ruff check src tests → All checks passed! - python3 -m ruff format --check src tests → 216 files already formatted - python3 -m behave features/ --no-color → 1048 scenarios, 0 failed - python3 uat/uat_headers_footers_phase4.py → PASS (opened uat/out_headers_footers_phase3.pptx, discovered fields[0] with type='slidenum' and id={2ED44585-...}, mutated text to "X" via the discovered handle, re-saved + re-opened, round-tripped clean) Refs #20. --- src/pptx/text/text.py | 11 ++++++++++ tests/text/test_text.py | 47 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/src/pptx/text/text.py b/src/pptx/text/text.py index 04ecbabfd..a36605cfb 100644 --- a/src/pptx/text/text.py +++ b/src/pptx/text/text.py @@ -778,6 +778,17 @@ def runs(self) -> tuple[_Run, ...]: """Sequence of runs in this paragraph.""" return tuple(_Run(r, self) for r in self._element.r_lst) + @property + def fields(self) -> tuple[_Field, ...]: + """Sequence of fields in this paragraph in document order. + + Mirrors :attr:`runs` but yields :class:`_Field` instances wrapping each + ```` child element. Useful for discovering existing slide-number, + date, and other PowerPoint-resolved fields in a deck — `.runs` deliberately + excludes fields so that pre-existing iteration semantics stay intact. + """ + return tuple(_Field(f, self) for f in self._element.fld_lst) + @property def space_after(self) -> Length | None: """The spacing to appear between this paragraph and the subsequent paragraph. diff --git a/tests/text/test_text.py b/tests/text/test_text.py index 928340728..93b8c1c75 100644 --- a/tests/text/test_text.py +++ b/tests/text/test_text.py @@ -902,6 +902,53 @@ def it_provides_access_to_its_runs(self, runs_fixture): assert isinstance(r, _Run) assert r._parent == paragraph + def it_returns_an_empty_tuple_of_fields_when_paragraph_has_none(self): + paragraph = _Paragraph(element("a:p"), None) + assert paragraph.fields == () + + def it_provides_access_to_a_single_field(self): + paragraph = _Paragraph(element('a:p/a:fld{id=fld-1,type=slidenum}/a:t"sn"'), None) + fields = paragraph.fields + assert len(fields) == 1 + assert isinstance(fields[0], _Field) + assert fields[0].type == "slidenum" + assert fields[0].text == "sn" + + def it_yields_multiple_fields_in_document_order(self): + paragraph = _Paragraph( + element( + 'a:p/(a:fld{id=fld-1,type=slidenum}/a:t"sn",a:fld{id=fld-2,type=datetime1}/a:t"date")' + ), + None, + ) + fields = paragraph.fields + assert tuple(f.type for f in fields) == ("slidenum", "datetime1") + assert tuple(f.text for f in fields) == ("sn", "date") + + def it_chains_each_field_parent_back_to_the_paragraph(self): + paragraph = _Paragraph(element('a:p/(a:fld{id=fld-1}/a:t"a",a:fld{id=fld-2}/a:t"b")'), None) + for f in paragraph.fields: + assert f._parent is paragraph + + def it_returns_a_tuple_not_a_list(self): + paragraph = _Paragraph(element("a:p/a:fld{id=fld-1}"), None) + assert isinstance(paragraph.fields, tuple) + + def it_keeps_runs_field_free_on_mixed_paragraphs(self): + # ---a:r and a:fld interleaved: .runs yields only _Run, .fields yields only _Field + paragraph = _Paragraph( + element('a:p/(a:r/a:t"head",a:fld{id=fld-1,type=slidenum}/a:t"sn",a:r/a:t"tail")'), + None, + ) + runs = paragraph.runs + fields = paragraph.fields + assert len(runs) == 2 + assert tuple(r.text for r in runs) == ("head", "tail") + assert all(isinstance(r, _Run) for r in runs) + assert len(fields) == 1 + assert fields[0].type == "slidenum" + assert isinstance(fields[0], _Field) + def it_knows_its_space_after(self, after_get_fixture): paragraph, expected_value = after_get_fixture assert paragraph.space_after == expected_value