Skip to content

Commit 8597d87

Browse files
cigraingerclaude
andcommitted
Pre-cached iterator, lazy ElementList, O(1) sibling lookup
#1 Iterator pre-caching: ElementIterator pre-builds all (tag_idx, cached_tag) pairs in a single Document borrow at creation time. __next__ no longer borrows Document — just clone_ref on pre-cached values. #2 Lazy ElementList: Element.xpath() and CompiledXPath.eval() now return ElementList — holds one Py<Document> + Vec<usize> of tag indices. Elements created on demand via __getitem__/__iter__. compiled.eval() for 100K results: 4ms -> 0.07ms (57x faster). Supports __len__, __getitem__, __iter__, __bool__, __eq__ (with list comparison). #7 O(1) sibling lookup: child_positions[i] stored in IndexWithMeta at parse time. getnext/getprevious use direct index instead of linear scan over siblings. O(1) instead of O(siblings). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent d79d8be commit 8597d87

File tree

4 files changed

+167
-68
lines changed

4 files changed

+167
-68
lines changed

python/simdxml/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
CompiledXPath,
3535
Document,
3636
Element,
37+
ElementList,
3738
compile,
3839
parse,
3940
)
@@ -42,6 +43,7 @@
4243
"CompiledXPath",
4344
"Document",
4445
"Element",
46+
"ElementList",
4547
"compile",
4648
"parse",
4749
]

python/simdxml/_core.pyi

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,10 @@ class Element:
9494
def text_content(self) -> str:
9595
"""All descendant text concatenated into a single string."""
9696
...
97-
def xpath(self, expr: str) -> list[Element]:
97+
def xpath(self, expr: str) -> ElementList:
9898
"""Evaluate an XPath 1.0 expression with this element as context.
9999
100-
Returns a list of matching Element objects.
100+
Returns an ElementList of matching elements (lazy — created on access).
101101
"""
102102
...
103103
def xpath_text(self, expr: str) -> list[str]:
@@ -143,6 +143,18 @@ class Element:
143143
def __eq__(self, other: object) -> bool: ...
144144
def __hash__(self) -> int: ...
145145

146+
class ElementList:
147+
"""A lazy sequence of elements from an XPath query.
148+
149+
Elements are created on demand when accessed by index or iteration.
150+
Holds a single Document reference regardless of result size.
151+
"""
152+
153+
def __len__(self) -> int: ...
154+
def __getitem__(self, index: int) -> Element: ...
155+
def __iter__(self) -> Iterator[Element]: ...
156+
def __bool__(self) -> bool: ...
157+
146158
class CompiledXPath:
147159
"""A compiled XPath expression for repeated use.
148160
@@ -153,8 +165,8 @@ class CompiledXPath:
153165
def eval_text(self, doc: Document) -> list[str]:
154166
"""Evaluate and return text content of matching nodes."""
155167
...
156-
def eval(self, doc: Document) -> list[Element]:
157-
"""Evaluate and return matching Element objects."""
168+
def eval(self, doc: Document) -> ElementList:
169+
"""Evaluate and return matching elements as an ElementList (lazy)."""
158170
...
159171
def eval_exists(self, doc: Document) -> bool:
160172
"""Check whether any nodes match the expression."""

python/simdxml/etree/ElementTree.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,6 @@ def _findall(
167167
"""Find all matching subelements."""
168168
xpath = _path_to_xpath(path)
169169
try:
170-
return element.xpath(xpath)
170+
return list(element.xpath(xpath))
171171
except ValueError:
172172
return []

0 commit comments

Comments
 (0)