diff --git a/.codex/AGENTS_EXTRA.md b/.codex/AGENTS_EXTRA.md new file mode 100644 index 00000000..baf97a99 --- /dev/null +++ b/.codex/AGENTS_EXTRA.md @@ -0,0 +1,1436 @@ +# Critical + +1. Always load and use the .codex/AGENTS_EXTRA.md, if it exists, when working on the project. AGENTS_EXTRA.md contains specialized project-tailored information. +2. Before starting actual work on documentation, but not earlier than that, load the .codex/PROTOCOL_AFAD.md, and use it for all your work on the documentation. + +--- + +# 1. ARCHITECTURAL PRIME DIRECTIVE + +## 1.1 Library Identity + +FTLLexEngine is the Python runtime for the **Fluent Template Language specification**, with +**CLDR-backed locale-aware formatting** and **fail-fast boot validation with structured audit evidence**. Every +public symbol must arise from one of these three purposes. The library is not a general +utilities collection, not a financial domain toolkit, not a concurrency framework — it is +the i18n layer that production systems build directly on top of, and nothing else. + +The primary use case is production systems where every locale resource must load cleanly, +every message schema must match exactly, and every failure must produce named, traceable +evidence — regulated deployments, audited backends, compliance-constrained services. This +purpose drives every API design decision. + +**Three Design Axioms:** + +**Axiom 1 — Downstream Burden Elimination:** +Before adding any symbol to a public facade, ask: *what downstream composition does this +replace?* Every public surface must eliminate a pattern that serious callers would otherwise +implement themselves. `require_locale_code()` replaced per-caller trim/blank/length/normalize +chains. `LocalizationBootConfig` replaced per-caller boot sequence assembly. `make_fluent_number()` +replaced per-caller visible-precision inference. Primitives that serve only internal composition +belong in submodules, not in `ftllexengine`, `ftllexengine.runtime`, or +`ftllexengine.localization`. + +**Axiom 2 — Fail-Fast at Boot, Structured Evidence at Runtime:** +Validate everything before accepting traffic. The canonical boot chain — +`LocalizationBootConfig.boot()`, or `FluentLocalization` + `require_clean()` + +`validate_message_schemas()` — raises `IntegrityCheckFailedError` if any resource fails to +load cleanly or any schema mismatches. At runtime, errors are returned as immutable structured +evidence (`FrozenFluentError`, `WriteLogEntry`, `LoadSummary`) so callers can build auditable, +loggable, compliant systems on top. Silent degradation is prohibited; all failures are explicit. + +**Axiom 3 — Explicit Failures, Immutable Evidence:** +Every failure produces a named, typed, immutable error object with structured context. +`strict=True` is the default on `FluentBundle` and `FluentLocalization` — exceptions, not +silent empty strings, are the correct response to integrity failures. `strict=False` is an +explicit opt-in for soft-error return semantics where `format_pattern` returns a +`(result, errors)` tuple. Audit structures (`WriteLogEntry`, `IntegrityContext`) carry dual +timestamps (`timestamp` for monotonic ordering, `wall_time_unix` for cross-system correlation) +because compliance traces must be reproducible across restarts. + +**API Design Review — apply before any new public surface:** +1. What downstream composition does this replace? (Axiom 1) +2. Does construction fail fast? Does runtime return immutable structured evidence? (Axiom 2) +3. Does it belong in a facade `__init__`, or is it an internal primitive? (see §1.5) +4. Does it introduce any upward layer dependency? (see §1.5) +5. Does it fall within one of the owned domains in §1.6 — FTL spec, CLDR locale + formatting, compliance boot/audit, ISO 4217, or ISO 3166? Apply the full rejection + test (§1.6) before answering yes. + +## 1.2 Runtime Environment Constraints (Python 3.13+) +**Constraint:** The solution space targets **Python 3.13** as the baseline, targeting forward +compatibility with the current and next CPython release by avoiding constructs documented as +deprecated or removed. +* **Version Support Policy:** Support the baseline release and forward. Current values (update + when a new CPython stable release occurs): baseline=3.13, current=3.14, next=3.15. +* **Forward Compatibility:** Use only stable language features. Avoid deprecated constructs and + CPython-specific internals that may change between releases. +* **Syntax Enforcement System:** + * **Type Topology:** Leverage **PEP 695** generics and `type` aliases as the foundational + data modeling layer (e.g., `class Buffer[T]: ...`). Type hints are not documentation; + they are structural contracts. + * **Control Flow:** Utilize `match/case` structural pattern matching as the primary dispatch + mechanism, reducing the cyclomatic complexity inherent in `if/elif` chains. +* **Dependency Isolation:** The **Python Standard Library** is the sole permitted toolkit, with + the below stated explicit permitted exception. External dependencies are treated as system + contaminants and are prohibited unless creating the solution within the Standard Library + bounds is not achievable. + * **Permitted Exception:** `Babel` is the sole external dependency (optional), providing + Unicode CLDR locale data (plural rules, currency symbols, number formatting). CLDR data + is a curated international standard dataset that cannot be derived algorithmically. Babel + is the canonical Python interface to CLDR. + * **Babel Optionality:** Babel is an **optional** dependency. The package supports two + installation modes: + * **Parser-only** (`pip install ftllexengine`): No external dependencies. Provides + syntax parsing (`parse_ftl`, `serialize_ftl`), AST manipulation, and validation. + * **Full runtime** (`pip install ftllexengine[babel]`): Includes Babel for locale-aware + formatting via `FluentBundle`, `FluentLocalization`, and `ftllexengine.parsing` + modules. +* **Obsolescence Filter:** The system strictly rejects features scheduled for removal. + * Legacy import mechanics (`imp`, `sys.path`) and pre-PEP 695 typing (e.g., `typing.List`) + are structurally invalid inputs. + +## 1.3 Structural Mechanics +* **Immutability Protocol:** State mutation creates hidden coupling and non-determinism. The + system defaults to **Immutable Data Structures** (`frozen=True` dataclasses, `tuples`) to + enforce referential transparency. Mutation is permitted in exactly two bounded cases: + 1. **Performance-critical accumulation buffers:** isolated parse-buffer components where + temporary accumulation is the direct implementation mechanism (e.g., parser's internal + character/token accumulation). + 2. **Scoped context managers:** classes implementing the `__enter__`/`__exit__` protocol + where tracked mutable state (e.g., a depth counter) has deterministic enter/exit + lifetime and no external visibility (e.g., `DepthGuard`). +* **Explicit Control Topology:** Implicit behavior and "magic" methods increase cognitive load + and reduce auditability. The system demands explicit control flow and dependency injection + over global state or `threading.local` thread-local storage. **ContextVars + (`contextvars.ContextVar`) are permitted** for task-scoped state in high-frequency primitive + operations — they provide automatic async task isolation and do not share state between + concurrent parse operations. Any `ContextVar` usage MUST be documented as an architectural + decision per §3.6 and included in the Known Waiver Registry (§3.7). +* **Constants Placement Policy:** The `constants.py` module is for **cross-package + configuration constants** (depth limits, cache sizes, input bounds). Module-local private + constants (leading underscore) that are semantic to a single module's functionality belong IN + that module, not in `constants.py`. Examples: Unicode escape lengths in parser primitives, + indentation strings in serializer, cache tuning parameters in cache implementation. This + follows the principle of locality — implementation details stay with their implementation. + +## 1.4 Specification Authority (Fluent) +**Constraint:** The Fluent specification is the authoritative reference for runtime behavior. + +**Specification Sources:** +* Primary: [Project Fluent Guide](https://projectfluent.org/fluent/guide/) +* Syntax: [Fluent Syntax 1.0](https://github.com/projectfluent/fluent/blob/master/spec/fluent.ebnf) +* Validation: [valid.md](https://github.com/projectfluent/fluent/blob/master/spec/valid.md) +* Reference implementation: [Mozilla python-fluent](https://github.com/projectfluent/python-fluent) + +**Specification Primacy:** + +When AI agents or developers assume behavior that differs from the specification, the +specification wins. Common misunderstandings: + +| Assumption | Specification Reality | +|:-----------|:---------------------| +| `{ $count }` should format locale-aware | Variables are formatted as-is via `str()` | +| `NUMBER($count)` is optional for numbers | `NUMBER()` is REQUIRED for locale-aware formatting | +| Implicit date formatting exists | `DATETIME()` is REQUIRED for locale-aware dates | +| Messages and terms share a namespace | Separate namespaces: `foo` and `-foo` can coexist | +| `NUMBER(style: "currency")` for currency | Use `CURRENCY()` function, not NUMBER with style | +| `NUMBER(style: "percent")` for percent | No percent style; use `NUMBER()` with manual `%` | + +**Example: Locale-Aware Number Formatting** + +```python +# Input: count = 1000, locale = "de_DE" + +# Fluent message: { $count } +# Output: "1000" (NOT "1.000") +# Reason: Per spec, variables are interpolated as-is + +# Fluent message: { NUMBER($count) } +# Output: "1.000" (locale-aware) +# Reason: NUMBER() explicitly requests locale formatting +``` + +This is SPEC-COMPLIANT behavior, not a bug. The Fluent specification intentionally separates: +* Raw interpolation: `{ $var }` — developer controls formatting +* Locale-aware formatting: `{ NUMBER($var) }`, `{ DATETIME($var) }` — locale determines format + +**JavaScript Intl API Conflation (Common Agent Error):** + +Agents familiar with JavaScript's `Intl.NumberFormat` API frequently assume FTLLexEngine uses +the same patterns. This is incorrect. + +| JavaScript Intl Pattern | FTLLexEngine Equivalent | +|:------------------------|:-----------------------| +| `Intl.NumberFormat(locale, {style: 'currency', currency: 'EUR'})` | `CURRENCY($val, currency: "EUR")` | +| `Intl.NumberFormat(locale, {style: 'percent'})` | Not supported; use `NUMBER()` + literal `%` | +| `Intl.NumberFormat(locale, {style: 'decimal'})` | `NUMBER($val)` (default behavior) | +| `Intl.DateTimeFormat(locale, {year: 'numeric', month: 'long'})` | `DATETIME($val, dateStyle: "long")` | + +**Root Cause:** JavaScript's `Intl` API uses a single constructor with `style` parameter to +switch modes. Fluent/FTLLexEngine uses **separate functions** for each formatting type. The FTL +parser accepts any named arguments (it's syntax-agnostic), so `NUMBER($x, style: "currency")` +parses successfully but the `style` argument is ignored at runtime. + +**Agent Responsibility:** Before flagging runtime behavior as incorrect: +1. Verify behavior against Fluent specification +2. Check Mozilla python-fluent reference implementation +3. If behavior matches spec: NOT a bug, even if counterintuitive +4. If behavior differs from spec: VALID issue; proceed with filing +5. Never assume JavaScript API patterns apply; verify function signatures against + DOC_04_Runtime.md + +## 1.5 Layer Architecture and Facade Contract + +### 1.5.1 Layer Graph (Architectural Law) + +The package layer hierarchy is a hard structural invariant, not a style convention: + +``` +core ← syntax ← parsing ← runtime ← localization + ↑ ↑ + introspection analysis + ↑ + diagnostics ← validation +``` + +| Layer | Contents | May import from | +|:------|:---------|:----------------| +| `core` | Depth guards, Babel compat, locale utils, value types | stdlib only | +| `diagnostics` | Error types, validation results, formatter | `core` | +| `validation` | Resource validation | `core`, `syntax`, `diagnostics` | +| `syntax` | AST, parser, serializer, validator | `core`, `diagnostics` | +| `introspection` | Message introspection, ISO lookup (Babel) | `core`, `syntax` | +| `analysis` | Cycle detection, dependency graph | `core`, `syntax` | +| `parsing` | Locale-aware parsers (Babel required) | `core`, `syntax` | +| `runtime` | FluentBundle, resolver, cache, functions | `core`, `syntax`, `introspection`, `analysis`, `diagnostics` | +| `localization` | FluentLocalization, boot, loaders | `runtime` and all below | + +**Upward dependencies are structural violations, not style issues.** A module in layer N may +not import from layer M > N. Violations must be fixed by moving the symbol to the correct +layer, not by using a runtime local import to paper over the problem. + +**Detection pattern:** When layer N needs a symbol from layer M > N, ask: "Does this symbol +conceptually belong in layer ≤ N?" If yes, move the symbol. The 0.154.0 `FluentNumber` +relocation (`runtime.value_types` → `core.value_types`) is the canonical example — it was a +violation because `parsing` needed `FluentNumber` to implement `parse_fluent_number()`, but +`parsing` cannot import from `runtime`. + +### 1.5.2 Public Facade Contract + +The three public facades are permanent API contracts. A symbol on a facade cannot be removed +or renamed without a CHANGELOG.md `### Breaking Changes` entry. + +| Facade | Import path | Scope | +|:-------|:------------|:------| +| Root | `ftllexengine` | All end-user entry points | +| Runtime | `ftllexengine.runtime` | FluentBundle, AsyncFluentBundle, FluentNumber, FunctionRegistry | +| Localization | `ftllexengine.localization` | FluentLocalization, LocalizationBootConfig, loader types | + +**Submodule paths** (`ftllexengine.runtime.bundle`, `ftllexengine.core.value_types`) are +internal navigation paths, not contracted surfaces. They may be reorganized without breaking +the public contract provided facade re-exports are maintained. + +**Export hygiene:** Every symbol in a facade `__init__.py` must have an explicit `__all__` +entry. Implicit reachability via attribute traversal does not constitute a public contract. + +**Prohibited facade additions:** Symbols that exist only to expose implementation details +(internal cache structures, private lock primitives, parser internals) must not be promoted +to a facade even if callers request it. The facade is a curated surface, not a namespace dump. + +## 1.6 Public Surface Scope Constraint + +**Constraint:** FTLLexEngine is the Python runtime for the Fluent Template Language +specification, with CLDR-backed locale-aware formatting and fail-fast boot validation +with structured audit evidence. Its public surface is bounded by three owned domains plus two narrowly-named +standards datasets. Symbols outside these domains do not belong on any public facade, +regardless of technical merit or caller convenience. + +**The Owned Domains (exhaustive — not a representative sample):** + +| Domain | Bounded by | Examples of in-scope symbols | +|:-------|:-----------|:-----------------------------| +| **FTL specification** | The Fluent 1.0 EBNF and valid.md | parse_ftl, serialize_ftl, validate_resource, AST nodes, FTL built-in functions | +| **CLDR-backed locale formatting** | Babel + Unicode CLDR | FluentBundle, FluentNumber, LocaleCode, normalize_locale, CLDR lookups | +| **Compliance-grade boot and audit** | The FTL/locale pipeline only | LocalizationBootConfig, IntegrityContext, LoadSummary, integrity exceptions arising from FTL resource loading | +| **ISO 4217 currency data** | The ISO 4217 standard as exposed by Babel/CLDR | CurrencyCode, is_valid_currency_code, get_currency_decimal_digits | +| **ISO 3166 territory data** | The ISO 3166-1 alpha-2 standard as exposed by Babel/CLDR | TerritoryCode, is_valid_territory_code, require_territory_code | + +The last two domains are named standards with fixed scope — not a generic "international +standards" category. A new standard (ISO 8601, IETF BCP-47 extensions, ITU-T E.164) is +NOT automatically in-scope because a standard exists; it must be added to this table with +explicit justification, because the table is exhaustive. + +**Mechanical Rejection Test — apply before any new public symbol:** + +1. Does this symbol address a failure mode or composition burden that arises specifically + from the FTL spec, CLDR locale formatting, or the boot/audit pipeline — and not from + general programming? +2. Would this symbol need to exist in a library that exclusively implements FTL parsing, + CLDR-backed locale formatting, and fail-fast boot validation — with no knowledge + of the caller's domain (financial, medical, logistics, etc.)? +3. Is this symbol's definition or behaviour meaningfully coupled to FTL, CLDR, or the + boot pipeline — or could it exist without modification in an unrelated Python library? + +All three questions must be answered YES. A symbol that fails any one is OUT OF SCOPE for +the public facade. It may exist internally if the implementation requires it, but must not +appear in `__all__` of any facade module. + +**Bootstrapping trap:** Defining a new type (e.g., `PhoneNumber`) does not automatically +make a corresponding validator (`require_phone_number`) in-scope. Question 2 applies to +the type itself: would a pure FTL/CLDR/boot library need `PhoneNumber`? If not, neither +the type nor its validator belongs on a public facade. + +**Explicitly Out-of-Scope Categories:** + +* **Generic type validators** (`require_int`, `require_non_negative_int`, + `require_non_empty_str`, `coerce_tuple`, etc.): Every Python program needs integer and + string validation. A stripped FTL/CLDR/boot library would not. Validators are in-scope + only when the validated type is intrinsic to FTL, CLDR, or boot (e.g., + `require_fluent_number` — `FluentNumber` cannot exist outside this library; + `require_locale_code` — locale canonicalization is required by the CLDR formatting + pipeline). + +* **Fiscal calendar** (`FiscalCalendar`, `FiscalDelta`, `FiscalPeriod`, `MonthEndPolicy`, + `fiscal_year`, `fiscal_quarter`, `fiscal_month`, `fiscal_year_start`, `fiscal_year_end`, + `require_fiscal_calendar`, `require_fiscal_period`): Pure date arithmetic with no CLDR + interaction, no Babel dependency, and no FTL parser involvement. Not an ISO standard. + Fails the mechanical rejection test on all three questions — no FTL/CLDR/boot coupling; + would not exist in a stripped FTL/CLDR/boot library; could exist unmodified in any + financial or accounting library. + +* **Accounting/ledger domain** (`LedgerInvariantError`, invariant codes such as + `BALANCE`, `DUPLICATE_ACCOUNT`, `PERIOD_OVERLAP`): Financial ledger semantics are the + caller's domain. A stripped FTL/CLDR/boot library has no concept of a ledger. These + symbols would exist unchanged in a CRM or ERP library that never touches FTL. + +* **Storage and persistence domain** (`PersistenceIntegrityError`): Resource *loading* + into the FTL pipeline is in-scope (`ResourceLoader`, `PathResourceLoader` — these are + the boundary at which FTL resources enter the library). Storage layer failures below + that boundary are the caller's concern; a stripped FTL/CLDR/boot library would have + no concept of persistence integrity independent of FTL resource loading. + +* **General concurrency primitives** (`RWLock`, `InterpreterPool`): Concurrency is an + implementation detail of the runtime layer, not a contract offered to callers. Internal + modules use `RWLock` for bundle thread-safety; callers have no need to instantiate it. + `InterpreterPool` is a general PEP 734 pool with no FTL-specific semantics. + +* **Internal resolver machinery** (`FluentResolver`, `ResolutionContext`): These are + implementation details of message resolution. The extension API is `FunctionRegistry` + + `fluent_function`. Callers do not instantiate resolvers. + +**Scope Creep Detection:** + +Scope creep occurs when the library adds a symbol because a caller *could use it* rather +than because *the FTL/CLDR/boot pipeline specifically requires it*. The test is not +"does this help callers?" — everything helpful passes that test. The test is: would a +library stripped to only FTL parsing + CLDR formatting + boot validation still need this +symbol? If not, it does not belong. "Could use" adds surface; "the pipeline requires" +eliminates downstream burden. Only the latter justifies promotion. + +# 2. CODE & OUTPUT CONSTRAINTS + +## 2.1 Professional Output Standard (No-Emoji Policy) +**Constraint:** Enforce strict adherence to professional ASCII standards. +* **PROHIBITED:** Emojis or decorative characters in source code, comments, docstrings, or + commit messages. +* **PERMITTED:** Emojis are *only* permissible within **Test Data strings** to validate + Unicode/FTL specification handling (e.g., `parse_ftl("greeting = 👋")` as FTL message + content inside a test fixture). + +## 2.2 Status & Logging Indicators +Use only standardized ASCII indicators for logging and CLI output. + +| Status | Indicator | Rationale | +| :--- | :--- | :--- | +| **Success** | `[OK]`, `[PASS]` | Unambiguous status reporting. | +| **Failure** | `[FAIL]`, `[ERROR]` | High-priority failure flag. | +| **Warning** | `[WARN]` | Deprecation or non-critical state alert. | + +## 2.3 Documentation Standard +* **Docstrings:** All public modules, classes, and functions must have concise docstrings. +* **Style:** Use Google-style docstrings. This is the style established in the existing + codebase; consistency with existing code takes precedence. +* **Typing:** Do not duplicate type information in docstrings; rely on type hints. + +## 2.4 Self-Containment Principle +**Constraint:** Source code, tests, and documentation must NEVER reference CLAUDE.md. + +* **PROHIBITED:** Comments/docstrings referencing "CLAUDE.md", "Section X.Y", or + "per CLAUDE.md" +* **REQUIRED:** All architectural justifications must be self-contained and self-explanatory +* **RATIONALE:** CLAUDE.md is an AI agent directive, not developer documentation. Human + developers must understand design decisions without consulting AI protocols. + +**Examples:** +```python +# PROHIBITED +# Violates CLAUDE.md §1.3 explicit control flow principle + +# REQUIRED +# Uses task-local ContextVar for performance: primitives called 100+ times per parse. +# Explicit context parameter would require 10+ signature changes and 200+ call site updates. +``` + +**Scope:** Applies to all `.py` files, CHANGELOG.md, and user-facing documentation. Internal +protocol files (`.claude/*.md`, `.codex/*.md`, `.gemini/*.md`) are exempt. + +# 3. QUALITY HIERARCHY & WAIVERS + +Maintain distinct quality configurations for static analysis. You must respect the specific +configuration files associated with each directory scope. + +## 3.1 Core Production Code (`src/`): STRICT +* **Quality Target:** All linters exit 0: Ruff (zero errors), Mypy (`strict = true`). See §5.7 + for enforcement order. +* **Ruff Configuration:** `select = ["ALL"]` with focused `ignore` list in `pyproject.toml` + (D, ANN, COM812, ISC001, and framework-specific families). New rules apply automatically; + explicit `ignore` or per-file-ignores required for any suppression. +* **Mypy Configuration:** `strict = true`. No unchecked types; full type annotation coverage + required. +* **Waiver Philosophy:** Only permit **Architectural Waivers** (see §3.6). Never permit waivers + for logic bugs, security issues, performance flaws, or dead code. + +## 3.2 Verification Test Code (`tests/`): PRAGMATIC +* **Quality Target:** All linters exit 0: Ruff (zero errors), Mypy (pragmatic). See §5.7 for + enforcement order. +* **Configuration Scope:** + * **Linter:** `pyproject.toml` (Ruff per-directory overrides). + * **Type Checker:** `tests/mypy.ini`. +* **Key Allowed Waivers:** + * `N802` (Naming): Permitted for FTL specification mimicry (e.g., `UPPERCASE_functions`). + * `SLF001` (Private Access): Permitted for integration tests verifying internal object + state. + * `E402`, `PLC0415` (Import Position): Permitted for Hypothesis strategy isolation. + +## 3.3 Example Code (`examples/`): DEMONSTRATIVE +* **Configuration Scope:** + * **Type Checker:** `examples/mypy.ini`. +* **Waiver Philosophy:** Inline configuration is preferred here to serve as documentation for + users on how to handle linting in their own implementations. + +## 3.4 Operational Fuzzing Code (`fuzz_atheris/`): OPERATIONAL +* **Quality Target:** Ruff (zero errors), Mypy (operational — `fuzz_atheris/mypy.ini`). See + §5.7 for enforcement order. +* **Configuration Scope:** + * **Linter:** `pyproject.toml` (fuzz_atheris per-directory overrides). + * **Type Checker:** `fuzz_atheris/mypy.ini`. +* **Key Allowed Waivers:** + * `PLR0912`, `PLR0915` (Dispatch Complexity): Pattern handler functions in fuzz modules + MUST use dispatch-to-sub-handlers (see §4.3) rather than monolithic if/elif chains. + Sub-handler functions are individually simple; the dispatcher itself is a one-liner index + into a tuple of callables. This is the canonical pattern — do not suppress PLR0912 on + a monolithic function; refactor first. + * `S101` (assert): Permitted for invariant checks inside fuzz patterns. +* **Fuzz Pattern Architecture:** Each fuzz pattern function (`_pattern_*`) dispatches to a + tuple of sub-handler functions (`_check_*`). Each sub-handler tests one behavioral scenario. + This mirrors the dispatch-to-sub-handlers pattern in §4.3 and keeps individual functions + within McCabe complexity limits. + +## 3.5 No Deferrals Policy +**Constraint:** Technical debt is prohibited. Every issue identified must be resolved +immediately. + +**Prohibited Deferrals:** +* "Fix in next version" — If an issue is found, fix it now. +* "TODO: refactor later" — Refactor immediately or not at all. +* "Known issue" — Unknown issues become known; known issues become fixed. +* Backwards-compatibility shims — Make clean breaks; remove deprecated code entirely. +* Migration paths — Users adapt to the current API; old APIs are deleted, not deprecated. +* Suppression as fix — Never suppress lint/static analysis warnings when the underlying code + can be corrected. Suppression (`# noqa`, `# type: ignore`, `per-file-ignores`) is only + valid for permanent architectural patterns (see §3.6), not for avoiding proper remediation. + +**Rationale:** Deferred fixes accumulate interest. A "small" workaround today becomes an +architectural constraint tomorrow. The cost of immediate remediation is always lower than the +cost of accumulated technical debt. + +## 3.6 Waiver Implementation Protocol +Waivers are for **permanent architectural necessities**, never for deferring fixes. + +1. **Fix First:** Attempt remediation before waivering. Waivers are a last resort. +2. **Permanence Requirement:** A waiver must address a permanent constraint (e.g., Visitor + pattern naming), not a temporary inconvenience. +3. **Scope:** Use `per-file-ignores` in the relevant configuration file for patterns that + apply uniformly to an entire file or directory. Use inline `# noqa` directives for + isolated single-line exceptions within otherwise conformant files. +4. **Documentation:** Every waiver must be accompanied by a concise, high-value comment + justifying the *permanent architectural necessity*. + +**Prohibited Waiver Justifications:** +* "Will fix later" — No. Fix now or not at all. +* "Not enough time" — Time is not an accepted constraint; correctness is. +* "Too complex to refactor" — Complexity is a symptom of design issues; address the root + cause. + +## 3.7 Design Principle Hierarchy (Waiver Recognition) +**Constraint:** Documented architectural waivers OVERRIDE general principles stated in this +document. + +The codebase contains intentional deviations from stated principles where trade-off analysis +justifies the design. AI agents must distinguish between: + +| Category | Definition | Action | +|:---------|:-----------|:-------| +| **Principle** | Default mode of operation stated in CLAUDE.md | Apply unless waiver documented | +| **Waiver** | Documented exception with trade-off rationale | Respect; do NOT flag as violation | +| **Violation** | Undocumented deviation without justification | Flag for remediation | + +**Waiver Recognition Signals:** + +A design decision is a DOCUMENTED WAIVER if ANY of these signals are present: +* Module docstring explains trade-off (e.g., "Task-Local State (Architectural Decision)") +* Inline comment includes keywords: "intentional", "trade-off", "architectural", "design + decision" +* Suppression comment provides rationale (e.g., `# noqa: PLC0415 - circular import`) +* Comment explicitly states "permanent" or "accepted" + +**Example: Task-Local ContextVar vs. Explicit Control (§1.3)** + +§1.3 states: "The system demands explicit control flow... over global state or +`threading.local`." + +`primitives.py` uses `contextvars.ContextVar` task-local state (NOT `threading.local`) with +documented justification. ContextVars are async-safe and task-isolated; they do not violate +§1.3's prohibition. The waiver covers the *implicit state* aspect of the principle: +``` +# Task-Local State (Architectural Decision): +# - Primitive functions called 100+ times per parse operation +# - Explicit context threading would require ~10 signature changes +# - ContextVar.get()/set() is O(1) with automatic async task isolation +# This is a permanent architectural pattern... +``` + +This is a WAIVER, not a VIOLATION. The documentation: +1. Acknowledges the principle being relaxed (implicit state) +2. Provides quantitative justification (100+ calls, 10 signatures) +3. Explicitly marks it as "permanent architectural pattern" + +**Violation Detection:** + +An issue is a TRUE VIOLATION only if: +1. Behavior contradicts a stated principle (e.g., uses `threading.local` or module-global + mutable state) +2. No documentation within the module docstring OR within the enclosing function/class scope + justifies the deviation +3. No suppression comment provides rationale + +**Agent Responsibility:** + +Before flagging ANY apparent principle violation: +1. Read the module docstring for architectural decisions +2. Search within the enclosing function or class scope for waiver documentation +3. Consult the Known Waiver Registry below +4. If documented with rationale: NOT a violation; respect the waiver +5. If undocumented: VALID violation; proceed with issue + +**Known Waiver Registry:** + +All architectural waivers in `src/`. Each entry is a documented, permanent decision — not a +deferral. + +| Module | Suppressed Rule(s) | Principle Relaxed | Permanent Justification | +|:-------|:------------------|:------------------|:------------------------| +| `syntax/parser/primitives.py` | §1.3 explicit control | §1.3 explicit control topology | `ContextVar` task-local state; 100+ calls/parse; threading via ContextVar gives automatic async isolation with O(1) overhead | +| `core/depth_guard.py` | §1.3 immutability | §1.3 immutability protocol | Mutable `current_depth` counter required by context-manager `__enter__`/`__exit__` protocol; state is strictly scoped to each `with` block | +| `core/babel_compat.py` | PLW0603, F401, PLC0415 | §1.3 explicit control (global singleton) | `_babel_available` is a module-level sentinel; computed once at first call; `global` statement is the only stdlib mechanism for a mutable module-level singleton without a class | +| `syntax/parser/core.py`, `rules.py` | PLR0911, PLR0912, PLR0915 | §4.3 dispatch complexity | EBNF grammar rule dispatch: one function = one grammar rule; branching is structural, not accidental | +| `syntax/serializer.py` | PLR0912 | §4.3 dispatch complexity | Classification-dispatch model (§4.6): `_serialize_pattern`, `_emit_classified_line`, `_serialize_expression` branches are exhaustive over closed grammar types | +| `syntax/visitor.py` | ERA001, PLR0911, PLR0912 | §4.3 dispatch complexity | Visitor dispatch + docstring examples (`ERA001`); branching from closed AST node set | +| `runtime/resolver.py` | PLR0911, type:ignore[unreachable] | §4.3 dispatch complexity | `_resolve_expression`, `_get_fallback_for_placeable`: closed `Expression` union type, one return path per variant; `type:ignore[unreachable]` on `_get_fallback_for_placeable` `case _:` — union is statically exhaustive but wildcard is retained as safety net: error-recovery contract must always return a string, never raise | +| `runtime/cache.py` | PLR0911, PLR0912 | §4.3 dispatch complexity | `_make_hashable`: type dispatch over heterogeneous Python values; each branch handles a distinct Python type | +| `introspection/message.py` | N802, RUF022 | §4.1 visitor naming | `visit_NodeName` methods follow stdlib `ast.NodeVisitor` convention; `__all__` organized by category for public/internal clarity | +| `runtime/bundle.py` | PLR0912, E501 | §4.3 dispatch complexity | Resource registration and validation coordination; long lines in structured logging messages | +| `parsing/currency.py` | PLR0911, PLR0912 | §4.3 dispatch complexity | Ambiguous currency symbol disambiguation requires exhaustive symbol/territory resolution | +| `parsing/dates.py` | DTZ007 | Naive datetime | Library does not impose timezone; caller provides timezone-aware values or explicitly opts into naive datetime | +| `runtime/locale_context.py` | DTZ001 | Naive datetime | `format_datetime` promotes a plain `date` to midnight `datetime` with no tzinfo; the date carried no timezone, so none is inferred — this is the correct semantics for a calendar date with no intrinsic time | +| `syntax/parser/whitespace.py` | SIM102 | Style | Nested `if` guards cursor state and EOF simultaneously; merging the conditions hides the state machine intent | +| `syntax/validator.py` | EM102 | Style | `TypeError` f-string messages: violation type includes dynamic type; static string would omit it | +| Babel-optional modules (`parsing/`, `runtime/`, `introspection/`, `core/`) | PLC0415 | §4.2 runtime imports | Babel is optional; imports inside functions are the only way to make them lazy (avoids `ImportError` at module load for parser-only installs) | +| `diagnostics/formatter.py`, `diagnostics/validation.py` | PLC0415 | §4.2 runtime imports | Mutual runtime circular: `ValidationError`/`ValidationWarning` require runtime `isinstance` checks in formatter; `DiagnosticFormatter` is instantiated at runtime in validation factory. Neither is type-only — both execute code at call time. §4.2 pattern 2 is the correct resolution. | +| `diagnostics/codes.py` | PLC0415 | §4.2 runtime imports | `Diagnostic.format()` instantiates `DiagnosticFormatter` at runtime; circular between codes and formatter resolved per §4.2 pattern 2. | +| `validation/resource.py` | PLC0415 | §4.2 runtime imports | Resource validation triggers re-parse for annotation extraction; runtime circular between validation and syntax/parser layers. | +| `runtime/resolution_context.py` | §1.3 immutability | §1.3 immutability protocol | `ResolutionContext` uses mutable `_stack`, `_seen`, `_total_chars`, and `_expression_guard` for cycle detection and expansion tracking; §1.3 explicitly permits mutable accumulation buffers in performance-critical operations; isolation is guaranteed by creating a fresh instance per resolution call — no state leaks between concurrent resolutions | +| `runtime/function_bridge.py` | PLC0415 | §4.2 runtime imports | Function metadata loaded lazily on first call; runtime circular between bridge and function_metadata modules. | +| `runtime/bundle.py` (PLC0415) | PLC0415 | §4.2 runtime imports | Bundle loads `analysis.graph.entry_dependency_set` and `introspection.extract_references` at runtime; circular between runtime layer and analysis/introspection layers. | +| `core/__init__.py` | PLC0415, module `__getattr__` | §1.3 immutability | Lazy-loads `DepthGuard`/`depth_clamp` via module `__getattr__` to break circular import: `depth_guard` → `diagnostics` → `syntax.__init__` → `serializer` → `core.depth_guard`. Eager import during `ftllexengine` package init would deadlock the import chain. `globals()` mutation in `__getattr__` is a permanent, accepted stdlib pattern for module-level lazy singletons. | +| `parsing/guards.py` | TC003 | §4.2 TYPE_CHECKING | `date`, `datetime`, `Decimal` cannot be moved under TYPE_CHECKING: `typing.get_type_hints()` evaluates TypeIs annotation strings at runtime and requires these names in module globals; moving them causes `NameError` in callers using `get_type_hints()` | +| `syntax/ast.py` | TC001 | §4.2 TYPE_CHECKING | `CommentType` is a public re-exported symbol; consumers do `from ftllexengine.syntax.ast import CommentType` at runtime; moving under TYPE_CHECKING would break this import | +| `localization/boot.py` | §1.3 immutability (`object.__setattr__`) | §1.3 immutability protocol | `_booted` guard requires a single post-init mutation (False→True) on a frozen dataclass. `object.__setattr__` bypasses the generated `__setattr__` — the same mechanism Python's own frozen dataclass `__init__` uses. Config fields remain permanently immutable; only the one-shot guard transitions, once, permanently. No alternative exists without abandoning `frozen=True` or changing the public API. | + + +# 4. DESIGN PATTERNS & LINT INTEGRATION + +## 4.1 Visitor Pattern Implementation +* **Pattern:** Follow the standard library's `ast.NodeVisitor` convention for AST traversal. +* **Waiver:** Suppress `N802` (function name snake_case) for dispatch methods like + `visit_Message` to match the node class names. + +## 4.2 Runtime Imports (Circular Dependency Avoidance) +**Two distinct patterns, applied in priority order:** + +1. **`TYPE_CHECKING` guard (preferred for type-only imports):** When a circular dependency + exists only because a type annotation references the other module, wrap the import under + `TYPE_CHECKING`. No `PLC0415` suppression is required (the import is still top-level); the + import is elided at runtime. + ```python + from typing import TYPE_CHECKING + if TYPE_CHECKING: + from ftllexengine.introspection import MessageIntrospection + ``` +2. **Function-local import (runtime circular dependency):** Use only when the circular + dependency cannot be resolved via `TYPE_CHECKING` because the import is needed at runtime + (not just for type annotations). Requires `PLC0415` suppression with rationale. + ```python + def _resolve(self) -> ...: + from ftllexengine.runtime.cache import IntegrityCache # noqa: PLC0415 - runtime circular + ... + ``` +* **Constraint:** `TYPE_CHECKING` is always preferred. New `PLC0415` suppressions require + explicit justification proving `TYPE_CHECKING` is insufficient. + +## 4.3 Handling Complex Dispatch Logic +* **Pattern:** Grammar-derived or specification-driven dispatch logic has inherently high + branching complexity. This applies to both the parser and the serializer. +* **Waiver:** Suppress `PLR0912` (too many branches) and `PLR0915` (too many statements) for: + * The main parser loop (`syntax/parser/core.py`) — EBNF grammar rule dispatch + * Serializer classification-dispatch methods (`syntax/serializer.py`: + `_serialize_pattern`, `_emit_classified_line`) — documented in §4.6 + +**Fuzz pattern handlers:** `_pattern_*` functions in `fuzz_atheris/` that cover many +behavioral scenarios MUST use dispatch-to-sub-handlers rather than a single if/elif chain. +The top-level handler selects a sub-handler via an integer index into a tuple; each +sub-handler is a standalone function covering one scenario. This keeps each function under +complexity limits and makes scenario coverage explicit. Do NOT suppress PLR0912 on a monolithic +function — refactor it. + +## 4.4 Type Narrowing (Union Types) +**Critical Implementation:** Never access attributes of a Union type without prior runtime +validation. +* **Action:** Always use explicit `isinstance()` checks or `match/case` blocks to narrow the + type before accessing specific attributes. + +```python +# Type-Safe Narrowing Example +from ftllexengine.syntax.ast import Message, Term, Pattern + +def get_entry_id(entry: Message | Term) -> str: + """Extract identifier from Message or Term using pattern matching.""" + match entry: + case Message(id=identifier): + return identifier.name + case Term(id=identifier): + return identifier.name + case _: + raise TypeError(f"Unexpected entry type: {type(entry)}") +``` + +## 4.5 Facade Layer (FluentBundle, FluentLocalization, LocalizationBootConfig) + +The facade layer is where the platform axioms from §1.1 are realized. All three facade classes +coordinate subsystems; none implement the logic they coordinate. The dependency graph is +**unidirectional** — delegate modules MUST NOT import any facade class. + +### 4.5.1 FluentBundle — Single-Locale Formatting Unit + +`FluentBundle` is the core formatting unit. It owns a single locale and a set of parsed FTL +resources. + +| Responsibility | Delegate Module | FluentBundle Role | +|:---------------|:----------------|:------------------| +| Parsing | `syntax.parser.FluentParserV1` | Calls `parse()`, registers results | +| Resolution | `runtime.resolver.FluentResolver` | Instantiates, calls `resolve_message()` | +| Validation | `validation.validate_resource()` | Single-line delegation | +| Introspection | `introspection.extract_variables()`, `introspect_message()` | Single-line delegation | +| Caching | `runtime.cache.IntegrityCache` | Holds reference, calls `get()`/`put()` | + +**Metric Clarification:** FluentBundle has a high docstring-to-code ratio because it is the +primary public API facade. This is expected given the mandate in §2.3. High docstring ratio +is not debt. + +### 4.5.2 FluentLocalization — Multi-Locale Coordinator + +`FluentLocalization` coordinates a set of locale-scoped `FluentBundle` instances and +implements the fallback chain. It does not hold bundles eagerly — bundle creation is lazy on +first `format_pattern` call for a given locale. + +| Responsibility | Delegate | FluentLocalization Role | +|:---------------|:---------|:------------------------| +| Resource loading | `ResourceLoader` protocol | Calls `loader.load(locale, resource_id)` | +| Bundle management | `FluentBundle` | Creates on demand, holds in `_bundles` dict | +| Fallback resolution | Locale chain | Iterates locale list until format succeeds | +| Boot validation | `require_clean()`, `validate_message_schemas()` | Provides pre-traffic validation API | +| Audit log | `FluentBundle.get_cache_audit_log()` | Aggregates per-locale logs into dict | + +### 4.5.3 LocalizationBootConfig — Strict-Mode Boot Orchestrator + +`LocalizationBootConfig` is a one-shot boot coordinator, not a persistent object. It composes +`FluentLocalization`, `require_clean()`, and `validate_message_schemas()` into a single +audited boot sequence and discards itself after `boot()` returns the live `FluentLocalization`. + +* `boot()` → `(FluentLocalization, LoadSummary, tuple[MessageVariableValidationResult, ...])`: + PRIMARY API; executes full boot sequence and returns structured evidence for audit trails; + raises `IntegrityCheckFailedError` on any load failure, required-message absence, or schema + mismatch. +* `boot_simple()` → `FluentLocalization`: simplified form; raises on failure but discards + audit evidence; use when structured evidence is not required. +* The `LocalizationBootConfig` instance has no role after `boot()` completes. It is not + thread-safe to share across calls. + +**PROHIBITED Refactorings (all three facades):** +* Extracting facade methods into mixins (creates hidden C3 linearization complexity) +* Creating "Service" wrappers around single-line delegation methods (adds indirection, zero + benefit) +* Lifting delegate module internals to the facade (violates the unidirectional dependency + graph) + +## 4.6 Serializer Architecture (FluentSerializer) +**Pattern:** The serializer is a deterministic AST-to-FTL compiler. Its architecture separates +three concern layers and enforces a classify-then-dispatch model for continuation line emission. + +### 4.6.1 Architectural Layers + +| Layer | Responsibility | Methods | +|:------|:---------------|:--------| +| **Validation** | AST structural correctness (separate pass, runs first) | `_validate_resource`, `_validate_expression`, `_validate_pattern`, `_validate_call_arguments`, `_validate_identifier`, `_validate_select_expression` | +| **Node Serialization** | AST node dispatch via `match/case` | `_serialize_entry`, `_serialize_message`, `_serialize_term`, `_serialize_attribute`, `_serialize_comment`, `_serialize_junk`, `_serialize_expression`, `_serialize_call_arguments`, `_serialize_select_expression` | +| **Pattern Emission** | Continuation line classification, whitespace preservation, character escaping | `_serialize_pattern`, `_classify_line`, `_escape_text` | + +**Constraint:** Validation runs BEFORE serialization. Serialization code assumes validated +input. These layers MUST NOT be merged. + +### 4.6.2 Continuation Line Model + +The FTL parser interprets continuation lines structurally: leading whitespace is syntactic +indent, blank lines are stripped, and characters `.`, `*`, `[` as the first non-whitespace +trigger attribute/variant parsing. The serializer MUST ensure that content whitespace and +content syntax characters are not misinterpreted as structural. + +**Invariant:** Every continuation line emitted by the serializer must be unambiguous under FTL +parsing rules. Ambiguity is resolved by wrapping problematic content in `StringLiteral` +placeables (`{ "..." }`), which the parser treats as expression content, not structural syntax. + +**Classification-Before-Dispatch:** + +Each continuation line is classified ONCE by a pure function, then handled through a single +`match/case` dispatch: + +```python +class _LineKind(Enum): + EMPTY = auto() # No content (just structural indent) + WHITESPACE_ONLY = auto() # All spaces; parser would strip as blank line + SYNTAX_LEADING = auto() # First non-ws char is . or * or [; parser + # would interpret as attribute/variant + NORMAL = auto() # Unambiguous text content +``` + +| Kind | Ambiguity | Resolution | +|:-----|:----------|:-----------| +| `EMPTY` | None | Emit structural indent only | +| `WHITESPACE_ONLY` | Parser strips blank continuation lines | Wrap entire line in `StringLiteral` placeable | +| `SYNTAX_LEADING` | Parser treats first non-ws char as structural | Emit leading spaces as text, wrap syntax char in `StringLiteral` placeable | +| `NORMAL` | None (may contain braces that need escaping) | Emit with brace escaping via `_escape_text` | + +**PROHIBITED:** +* Handling whitespace ambiguity classes outside the classification-dispatch model (no scattered + `if` branches in multiple methods) +* Adding line-level concerns to `_escape_text` (it handles character-level brace escaping only) +* Modifying AST nodes to carry serializer-specific layout hints (AST represents language + structure, not rendering) +* Event/Layout/Emitter pipeline abstractions (overengineered for the Fluent 1.0 grammar, + which is a finalized specification with a fixed, closed node set) + +### 4.6.3 Separate-Line Mode + +When a pattern contains cross-element whitespace dependencies (a `TextElement` starting with +spaces follows a newline-ending element), the serializer outputs the pattern on a separate +line from `=` to establish `initial_common_indent` before any semantic whitespace. This is a +**pattern-level** decision, orthogonal to the per-line classification in §4.6.2. + +**Interaction:** `WHITESPACE_ONLY` and `SYNTAX_LEADING` lines are handled by per-line +wrapping, NOT by separate-line mode. Only `NORMAL` lines with leading whitespace after a +cross-element newline trigger separate-line mode. + +### 4.6.4 Character-Level Escaping (`_escape_text`) + +The `_escape_text` function handles ONLY brace escaping: `{` and `}` at any position are +wrapped as `StringLiteral` placeables (per Fluent spec, braces in `TextElement` content must +be expressed as `{ "{" }` and `{ "}" }`). + +All other ambiguity concerns are resolved BEFORE `_escape_text` is called: +* Syntax characters (`.`, `*`, `[`) at continuation line starts: handled by + `_emit_classified_line` (`SYNTAX_LEADING` branch) +* Whitespace-only lines: handled by `_emit_classified_line` (`WHITESPACE_ONLY` branch) +* Newline detection and continuation line boundaries: text is pre-split by + `_serialize_pattern` + +### 4.6.5 Exhaustiveness + +All `match/case` dispatches on closed union types (`Entry`, `Expression`, `_LineKind`) MUST be +exhaustive. Use `assert_never()` from `typing` for enum dispatches and explicit +`case _: raise TypeError(...)` for AST union dispatches where the union may grow. + +## 4.7 Ruff Configuration and Operational Rules + +**Configuration:** `select = ["ALL"]` in `[tool.ruff.lint]`. New rules apply automatically; +explicit `ignore` or per-file-ignores required for any suppression. No curated allow-list — +the ignore list must justify every exception. + +### 4.7.1 Global `ignore` vs Per-File-Ignores + +| Mechanism | Use when | +|:----------|:---------| +| Global `ignore` | Rule NEVER applies anywhere in the codebase (wrong framework, redundant with mypy strict, formatter territory) | +| Per-file-ignores | Rule is valid for most files but a specific file has a documented architectural reason for an exception | +| Per-directory blanket | Entire directory has a distinct quality standard (`tests/`, `examples/`, `fuzz_atheris/`, `scripts/`) | + +**Prohibited:** Suppressing a rule globally because one file needs it. One file's exception +belongs in per-file-ignores, not the global ignore list. + +### 4.7.2 TC001/TC003 (TYPE_CHECKING Imports) — Non-Negotiable Exceptions + +Two categories of imports **must never** be moved under `TYPE_CHECKING`, even when TC fires: + +1. **TypeIs annotation types**: `typing.get_type_hints()` evaluates annotation strings at + runtime in the module's `globals()`. If `date`, `datetime`, `Decimal` (or any type used in + `-> TypeIs[X]`) are under `TYPE_CHECKING`, `get_type_hints()` raises `NameError` at runtime + in callers. + - Affected: `parsing/guards.py` (`date`, `datetime`, `Decimal`) + - Fix: keep as direct import; add + `# noqa: TC003 - TypeIs return annotation requires X at runtime for get_type_hints() resolution` + +2. **Public re-exported symbols**: If callers do + `from ftllexengine.syntax.ast import CommentType` at runtime, moving `CommentType` under + `TYPE_CHECKING` in `ast.py` makes the import fail. + - Affected: `syntax/ast.py` (`CommentType`) + - Fix: keep as direct import; add + `# noqa: TC001 - CommentType is re-exported as a public runtime symbol` + +Both are in the Known Waiver Registry (§3.7). + +### 4.7.3 FBT001/FBT002 (Boolean Traps) — Fix Pattern + +Ruff FBT flags boolean-typed positional parameters. **Preferred fix:** make the argument +keyword-only by adding `*` before it. + +```python +# BEFORE (FBT001 fires) +def get_patterns(locale: str, allow_expansion: bool = True) -> list[str]: ... + +# AFTER (FBT resolved) +def get_patterns(locale: str, *, allow_expansion: bool = True) -> list[str]: ... +``` + +After making an arg keyword-only, check all call sites — mypy reports "too many positional +arguments" for any missed site. + +**Acceptable waiver** (for truly internal private functions): add to per-file-ignores with +rationale. Do not add FBT to the global ignore. + +### 4.7.4 C901 (McCabe Complexity) — Waiver Pattern + +Grammar rules, AST visitor dispatch, and closed-union dispatch legitimately exceed the McCabe +threshold. Add C901 alongside PLR0912 in per-file-ignores: + +```toml +"src/ftllexengine/syntax/parser/rules.py" = ["PLR0911", "PLR0912", "PLR0915", "C901"] +``` + +Rationale comment template: `"Grammar/AST dispatch: one function = one grammar rule; +cyclomatic complexity is structural, not accidental."` + + +# 5. VERIFICATION METHODOLOGY + +## 5.1 Test File Naming Schema + +Test file naming is a hard structural constraint, not a style preference. It determines +discoverability: an agent searching for tests covering `runtime/bundle.py` must be able to +predict the filename without scanning all 200+ test files. + +**Canonical schema:** `test_{package}_{module}[_{qualifier}].py` + +| Segment | Derived from | Examples | +|:--------|:-------------|:---------| +| `{package}` | `src/ftllexengine/` subpackage name | `runtime`, `syntax`, `parsing`, `diagnostics` | +| `{module}` | Module filename without `.py` | `bundle`, `resolver`, `serializer` | +| `{qualifier}` | Optional single axis (see permitted list) | `_property`, `_integration` | + +For nested subpackages, join segments with underscore: +`src/ftllexengine/syntax/parser/core.py` → `test_syntax_parser_core.py` + +For top-level modules (`src/ftllexengine/enums.py`), omit the package segment: +`test_enums.py` + +**Permitted qualifiers (exhaustive list):** + +| Qualifier | Meaning | Runs in CI? | +|:----------|:--------|:------------| +| *(none)* | Primary unit/contract tests | Yes | +| `_property` | Hypothesis `@given` tests | Yes | +| `_integration` | Multi-component tests crossing module boundaries | Yes | +| `_roundtrip` | Serialization/parse identity verification | Yes | +| `_state_machine` | `RuleBasedStateMachine` tests (in `tests/fuzz/` only) | No | + +No other qualifiers are permitted. If a file cannot be classified by one of these axes, +it belongs in an existing file or signals that file should be split. + +**Fuzz-marker test location:** All tests carrying `@pytest.mark.fuzz` MUST reside in +`tests/fuzz/`. The `tests/` root contains only tests that run in CI without the fuzz marker. +A `_property` file in `tests/` root is NOT a fuzz file even if it uses `@given`; the marker +and directory are what determine fuzz status (see §5.8). + +**Deprecated suffixes — prohibited for new files:** + +| Deprecated suffix | Canonical replacement | +|:------------------|:----------------------| +| `_hypothesis` | `_property` | +| `_fuzzing` | Move file to `tests/fuzz/` | +| `_properties` | `_property` | +| `_comprehensive` | *(none; split into focused files by axis)* | +| `_advanced` | *(none; not a behavioral axis)* | +| `_edge_cases` | *(none; fold edge cases into primary or property file)* | + +**Files name systems under test, not motivations for writing them:** + +``` +PROHIBITED: test_system_quality_audit_fixes.py (internal task reference) +PROHIBITED: test_diagnostics_and_runtime_behaviors.py ("and" = two subjects) +PROHIBITED: test_cross_module_branch_coverage.py (coverage technique, not subject) +PROHIBITED: test_bundle_advanced_hypothesis.py (two deprecated qualifiers) + +REQUIRED: test_runtime_bundle_property.py +REQUIRED: test_diagnostics_formatter_integration.py +REQUIRED: test_runtime_resolver_property.py +``` + +"And" in a filename is a mandatory split signal: the file covers two subjects and must +become two files. A file name that cannot map back to a single source module path is invalid. + +## 5.2 Hypothesis-First Protocol +Property-Based Testing (Hypothesis) is the **primary** mechanism for verification, not an +afterthought. Unit tests with fixed inputs are appropriate only for CLDR-mandated exact output +values and `@example`-promoted Hypothesis failures (regression cases). All other verification +uses Hypothesis. + +**HypoFuzz Symbiosis:** All Hypothesis tests are designed for coverage-guided fuzzing via +HypoFuzz. Tests and strategies MUST emit semantic coverage signals via `hypothesis.event()` to +guide the fuzzer toward interesting code paths. + +## 5.3 Test Construction Strategy +Do not simply "fuzz" the code. You must construct tests based on deep code analysis: + +### 5.3.1 Identify Properties +Before writing code, identify the mathematical properties of the component: +* *Roundtrip:* `decode(encode(x)) == x` +* *Idempotence:* `parse(parse(x).to_string()) == parse(x)` +* *Oracle:* Compare behavior against ShadowBundle or reference implementation +* *Metamorphic:* Predictable relationships (e.g., `len(filter(xs)) <= len(xs)`) + +### 5.3.2 Emit Semantic Coverage Events (MANDATORY) +**Constraint:** Every `@given` test — regardless of file or marker — MUST use `hypothesis.event()` +to signal semantically interesting behaviors invisible to code coverage. HypoFuzz treats events +as virtual branches, actively seeking inputs that produce new events. Preflight enforces this +across ALL `@given` tests, not just fuzz-marked modules. + +```python +from hypothesis import event, given +from tests.strategies.ftl import ftl_placeables + +@given(placeable=ftl_placeables()) +def test_placeable_serialization(placeable: Placeable) -> None: + # REQUIRED: Emit event for expression type diversity + event(f"expr_type={type(placeable.expression).__name__}") + + result = serialize(placeable) + parsed = parse(result) + + # REQUIRED: Emit event for error paths + if parsed.errors: + event(f"error={type(parsed.errors[0]).__name__}") + + assert parsed.ast == placeable +``` + +**Event Taxonomy (Use Consistently):** + +| Category | Format | Examples | +|:---------|:-------|:---------| +| Strategy choice | `strategy={variant}` | `strategy=placeable_variable`, `strategy=chaos_prefix_brace` | +| Domain classification | `{domain}={variant}` | `currency_decimals=2`, `territory_region=europe` | +| Boundary/depth | `boundary={name}`, `depth={n}` | `boundary=at_max_depth`, `depth=99` | +| Unicode category | `unicode={category}` | `unicode=emoji`, `unicode=cjk` | +| Property outcome | `outcome={result}` | `outcome=roundtrip_success`, `outcome=immutability_enforced` | +| Test parameter | `{param}={value}` | `thread_count=20`, `cache_size=50`, `reentry_depth=3` | +| State machine | `rule={name}`, `invariant={name}` | `rule=add_simple_message`, `invariant=cache_stats_consistent` | + +**Strategy Events vs Test Events:** + +* **Strategy events** are emitted by strategy functions in `tests/strategies/`. They are + tracked by `EXPECTED_EVENTS` in `tests/strategy_metrics.py` and drive strategy-level coverage + metrics. Format: `strategy={family}_{variant}` or `{domain}={variant}`. +* **Test events** are emitted by individual `@given` test functions and `@rule`/`@invariant` + methods. They guide HypoFuzz per-test but are NOT tracked by `EXPECTED_EVENTS`. Format: + `{param}={value}`, `outcome={result}`, `rule={name}`. + +When adding a new strategy, update `EXPECTED_EVENTS`. When adding test events, no metrics +update is needed. + +### 5.3.3 Strategy Construction (Soundness Over Exhaustion) +* Use `st.from_type()` and `st.builds()` to construct valid domain objects +* **Avoid:** High-rejection-rate filters on loose primitives (e.g., + `st.text().filter(is_valid_ftl)`). Low-rejection filters on constrained strategies are + acceptable when they improve readability. +* **REQUIRED:** Strategies MUST emit events when selecting between semantically distinct + variants + +```python +@composite +def ftl_placeables(draw: st.DrawFn, max_depth: int = 2) -> Placeable: + """Generate Placeable AST nodes. + + Events emitted: + - strategy=placeable_{choice}: Type of expression generated + """ + choice = draw(st.sampled_from(["variable", "function_ref", "term_ref"])) + + # REQUIRED: Emit strategy choice for fuzzer guidance + event(f"strategy=placeable_{choice}") + + # ... generation logic ... +``` + +### 5.3.4 Contextual Awareness +Investigate how code is called. Define strategies that mirror real usage patterns (e.g., +chunked buffer inputs vs. whole-string inputs). + +### 5.3.5 Event Verification +**Constraint:** Verify event infrastructure coverage. + +```bash +./scripts/fuzz_hypofuzz.sh --preflight +``` + +**Enforcement Levels:** +1. **File-level:** Every `@pytest.mark.fuzz` module MUST contain `event()` calls. +2. **Per-test (AST-based):** Every `@given` test function across ALL test files (both + `tests/` root and `tests/fuzz/`) MUST emit at least one semantic event. The preflight tool + parses all test files via Python AST to verify this — the check is not scoped to fuzz-marked + modules. Any `@given` test without `event()` fails preflight with exit code 1. +3. **Strategy file coverage:** Every strategy implementation file in `tests/strategies/` MUST + emit `event()` calls. `__init__.py` is exempt as a pure re-export aggregator (enforced by + `_STRATEGY_REEXPORT_FILES` in the preflight script). A strategy file with 0 events gives + HypoFuzz zero semantic guidance — treated as an error, not a warning. +4. **Zero gaps:** Preflight must report zero gaps at all three levels. Any gap causes exit + code 1. + +**Violation:** If preflight shows fuzz modules, individual tests, or strategy files without +events, fuzzing sessions will have reduced semantic guidance. HypoFuzz captures events +internally for coverage decisions — components without events provide no semantic signals. + +**Scope Limitation:** Preflight validates `@given` tests only. `RuleBasedStateMachine` rules +and invariants use `@rule`/`@invariant` decorators (not `@given`), so their event coverage +is not checked by preflight. State machine event coverage is verified manually. + +### 5.3.6 Runtime Strategy Metrics + +The runtime metrics system (`tests/strategy_metrics.py`) complements preflight's static +analysis with dynamic event collection during test execution. + +**Three Core Constants:** + +| Constant | Purpose | +|:---------|:--------| +| `EXPECTED_EVENTS` | Set of fully-expanded event strings expected from all strategies | +| `STRATEGY_CATEGORIES` | Maps event prefixes to human-readable strategy family names | +| `INTENDED_WEIGHTS` | Expected per-variant distribution within each strategy family | + +**Metrics Collected:** Total events, per-strategy counts, weight skew (threshold: 0.15), +coverage gaps, performance percentiles. + +**Preflight vs Runtime Distinction:** + +| Aspect | Preflight (`--preflight`) | Runtime (`--deep --metrics`) | +|:-------|:--------------------------|:-----------------------------| +| Method | Static AST analysis | Dynamic event collection | +| Question | "Does `event()` exist in code?" | "Which events fired? At what frequencies?" | +| Catches | Missing instrumentation | Dead code paths, weight skew | +| Speed | Instant (no test execution) | Requires full test run | + +**Activation:** + +```bash +./scripts/fuzz_hypofuzz.sh --deep --metrics +``` + +Environment variables: `STRATEGY_METRICS=1`, `STRATEGY_METRICS_LIVE=1`, +`STRATEGY_METRICS_DETAILED=1`. Results saved to `.hypothesis/strategy_metrics.json`. + +**Maintenance:** When adding a new event-emitting strategy in `tests/strategies/`, update all +three constants in `tests/strategy_metrics.py`. Test-level events (emitted by `@given` tests, +not strategies) do not require metrics updates. + +## 5.4 The Feedback Loop (Regression Proofing) +* **Discovery:** When Hypothesis finds a failure, it caches the minimal failing example in + `.hypothesis/examples/` +* **Action:** Investigate the root cause. Distinguish between a genuine bug and an incorrect + test assumption +* **Promotion:** For every non-trivial bug found, **promote the failing example** into the + test suite using the `@example(...)` decorator + +```python +@example(ftl="edge-case = { $var") # Promoted from Hypothesis finding +@given(ftl=ftl_simple_messages()) +def test_roundtrip(ftl: str) -> None: + ... +``` + +**Crash Recording Infrastructure:** When a Hypothesis test fails, the `conftest.py` crash +recording hook (`pytest_runtest_makereport`) automatically: +1. Generates a standalone `repro_crash_.py` reproduction script in + `.hypothesis/crashes/` +2. Saves JSON metadata (test ID, example args, error type, timestamp) alongside the script +3. Creates portable crash files that persist independently of `.hypothesis/examples/` and + survive database cleanup + +Use `./scripts/fuzz_hypofuzz.sh --repro` or run crash scripts directly for reproduction. + +## 5.5 Database Persistence +The Hypothesis example database (`.hypothesis/examples/`) persists across fuzzing sessions. It +stores failing examples and covering examples (inputs that trigger distinct code paths during +`Phase.reuse`). + +**Cross-Session Value:** +* **Phase.reuse:** Replays stored examples FIRST, catching regressions immediately +* **Example accumulation:** Each `--deep` session discovers new covering examples and failures +* **Shrink memory:** Minimal failing examples preserved across runs + +**Constraint:** Do NOT delete `.hypothesis/` between fuzzing sessions unless intentionally +resetting the database. A 30-minute session today + 30-minute session tomorrow = 60 minutes +of cumulative learning. + +## 5.6 Hypothesis Profiles +Profiles are defined in `tests/conftest.py`. Use the appropriate profile for context: + +| Profile | max_examples | deadline | Use Case | +|:--------|:-------------|:---------|:---------| +| `dev` | 500 | 200ms | Local development | +| `ci` | 50 | 200ms | Fast CI feedback (reproducible) | +| `verbose` | 100 | 200ms | Debugging with progress output | +| `hypofuzz` | 10000 | None | Coverage-guided `--deep` runs | +| `stateful_fuzz` | 500 | None | State machine fuzzing | + +**Profile Details:** +* All profiles include `Phase.target` for targeted property exploration via `target()`. +* `ci` uses `derandomize=True` for reproducible builds and `print_blob=True` for failure + reproduction. +* `hypofuzz` suppresses `HealthCheck.too_slow` and `HealthCheck.data_too_large` for intensive + runs. +* `fuzz_hypofuzz.sh --deep` automatically sets `HYPOTHESIS_PROFILE=hypofuzz`. + +## 5.7 Workflow Execution Order +The execution of scripts defines the quality gate. **All three steps must pass in order.** + +1. **Lint:** `./scripts/lint.sh` (Ruff → Mypy). Must exit code 0. +2. **Test:** `./scripts/test.sh` (Pytest + Hypothesis + Coverage). Must meet the 95% + threshold. Must exit code 0. +3. **Preflight:** `./scripts/fuzz_hypofuzz.sh --preflight` (AST-based event audit). Must exit + code 0. Run whenever `tests/` or `tests/strategies/` files are modified. Runs in seconds + (no test execution); zero cost to always run. + +### Script Output Design (Agent-Native, Log-on-Fail) +Both `lint.sh` and `test.sh` are AI-agent-optimized with a **log-on-fail** design. Run them +directly without any output truncation: + +```bash +./scripts/lint.sh +./scripts/test.sh +``` + +**NEVER pipe through `tail`, `head`, or any output limiter. NEVER append redirection operators +(`2>&1`, `>`, `>>`).** The output is already appropriately sized: +* **On success:** emits only structured summary lines (`[PASS]`, JSON block). Already minimal + — no truncation needed. +* **On failure:** captures the full diagnostic log, then dumps it all at once. This dump IS + the analysis. Truncating it destroys the error context needed for diagnosis. + +Limiting output (e.g., `| tail -100`) means on failure you see only the summary footer, +missing the actual error details. Redirecting stderr (e.g., `2>&1`) loses the distinction +between stdout and the Bash tool's inherent stderr capture. The scripts are designed so the +agent never needs to re-run them to get more detail. + +## 5.8 Fuzz Test Skip Designation (Standardized) +**Constraint:** Intensive property tests excluded from normal runs use `@pytest.mark.fuzz` and +a standardized skip reason. + +### Decision Criteria: When to Apply `@pytest.mark.fuzz` + +The fuzz marker controls whether a test **runs at all** during `test.sh`. It is independent of +`event()` calls (which are mandatory in ALL `@given` tests per §5.3.2) and independent of +Hypothesis profiles (which control example counts when a test does run). + +| Test Category | Runs in CI? | Fuzz Marker? | Example Count | +|:--------------|:------------|:-------------|:--------------| +| Regular `@given` with `event()` | Yes | No | `ci`=50, `dev`=500 | +| Intensive fuzz-only | No (skipped) | `@pytest.mark.fuzz` | Only under `--deep` (10000) | + +**Apply `@pytest.mark.fuzz` ONLY when** the test meets one or more of these criteria: +* **State machines** (`RuleBasedStateMachine`) that explore exponential state spaces +* **Generators producing expensive objects** (deeply nested ASTs, large resources) where even + 50 examples would exceed CI time budgets +* **Tests with `deadline=None`** that intentionally allow slow individual examples +* **Tests requiring `suppress_health_check`** for `too_slow` or `data_too_large` + +**Hard placement rule:** Any test that uses `deadline=None` or +`suppress_health_check=[HealthCheck.too_slow]` MUST carry `@pytest.mark.fuzz` and reside in +`tests/fuzz/`. These settings signal that the test is intentionally slow — running 50 such +tests in CI would blow time budgets. Examples: boot-sequence tests that construct real loaders, +state machines. Do NOT place `deadline=None` +tests in `tests/` root even if they have bounded strategies. + +**Never hardcode `max_examples` in `tests/fuzz/`:** Fuzz tests MUST NOT set `max_examples=N` +in their `@settings` decorator. The `hypofuzz` profile controls exploration depth (10,000 for +`--deep --metrics`, continuous for HypoFuzz). Hardcoding `max_examples` overrides the profile +and artificially caps exploration — a `@settings(max_examples=20)` test runs only 20 examples +even under the `hypofuzz` profile's 10,000 budget. The only meaningful settings for fuzz tests +are `deadline=None`, `suppress_health_check`, and `stateful_step_count` (state machines only). + +**Do NOT apply `@pytest.mark.fuzz`** to standard `@given` tests with bounded strategies and +no deadline suppression. These run fast at 50 examples and benefit from CI regression +coverage. The Hypothesis profile system (`ci`/`dev`/`hypofuzz`) automatically scales example +counts — the same test runs with 50 examples in CI and 10000 under `--deep` without any +marker. + +### Marker Mechanics + +* **Marker:** `@pytest.mark.fuzz` at class or module level (`pytestmark = pytest.mark.fuzz`). +* **Skip Reason Prefix:** All fuzz skips use the reason prefix `"FUZZ:"`. The canonical reason + string is: + ``` + FUZZ: run with ./scripts/fuzz_hypofuzz.sh --deep or pytest -m fuzz + ``` +* **Prefix Requirement:** The `"FUZZ:"` prefix is a structural contract consumed by + `conftest.py` and `test.sh` for skip categorization. Do not alter the prefix. +* **Skip Breakdown Reporting:** `test.sh` emits `skipped_fuzz` and `skipped_other` in the + JSON summary. If `skipped_other > 0`, a `[WARN]` is emitted indicating non-fuzz tests were + skipped and require investigation. +* **Prohibited Variations:** `"SKIPPEDfuzz"`, `"SKIPPED fuzz"`, `"Fuzzing test"`, or any + other ad-hoc skip reason for fuzz tests. All fuzz skip reasons MUST use the `"FUZZ:"` prefix. + +### HypoFuzz Targeting Rationale + +`--deep` targets `tests/fuzz/` exclusively — NOT `tests/`. This is a deliberate concentration +strategy: + +| Target | Effect | +|:-------|:-------| +| `tests/fuzz/` (correct) | 4 workers concentrated on ~35 high-value, slow, open-ended targets | +| `tests/` (wrong) | 4 workers diluted across 1500+ tests, most of which are fast and bounded | + +Pointing HypoFuzz at `tests/` wastes worker capacity on tests that already run fine under CI's +50-example budget. The fuzz directory exists precisely to give HypoFuzz a concentrated set of +targets where unlimited exploration has the highest marginal value: state machines, pool +concurrency, boot sequences, subinterpreters. When adding new fuzz targets, always place them +in `tests/fuzz/`; `tests/` tests are CI regression suites, not fuzzing targets. + +## 5.9 Advanced: Targeted Fuzzing with target() +All profiles include `Phase.target`, so `target()` is active in every test run. Use it to +guide Hypothesis toward inputs that maximize specific metrics: + +```python +from hypothesis import given, settings, target + +@settings(deadline=None) +@given(source=ftl_chaos_source()) +def test_parser_recovery(source: str) -> None: + result = parse(source) + # Guide fuzzer toward inputs with more junk nodes (parser stress) + target(len([e for e in result.body if isinstance(e, Junk)]), label="junk_count") +``` + +The `target()` function accepts a numeric value and an optional label. Hypothesis actively +seeks inputs that maximize the targeted metric, making it effective for hunting specific bug +classes (deep nesting, large error counts, parser recovery stress). + +# 6. DOCUMENTATION PROTOCOL (MANDATORY) + +## 6.1 Governing Protocol +**Constraint:** All markdown file operations MUST comply with PROTOCOL_AFAD.md (v4.0). + +| File Pattern | Tier | Protocol Section | +|:-------------|:-----|:-----------------| +| `docs/DOC_*.md` | Reference | AFAD reference-doc rules | +| `README.md` (repository root) | Storefront special case | `AGENTS.md` root README exception | +| `*.md` (all other repo markdown) | Auxiliary / special | AFAD auxiliary-doc rules or native document convention | + +**Protocol Location:** `.codex/PROTOCOL_AFAD.md` + +## 6.2 Protocol Enforcement +**Before ANY markdown file operation**, the AI agent MUST: + +1. **LOAD** `.codex/PROTOCOL_AFAD.md`. +2. **IDENTIFY** the file tier (Reference = `DOC_*.md`, Auxiliary = other). +3. **COMPLY** with all schema requirements, formatting rules, and validation checks. +4. **REJECT** any user request that would violate the protocol. + +## 6.3 Reference Documentation (AFAD v4.0) +Applies to: `docs/DOC_00_Index.md`, `docs/DOC_01_*.md`, `docs/DOC_02_*.md`, etc. + +**Requirements:** +* YAML frontmatter with `afad: "4.0"`, `version`, `domain`, `updated`, `route` +* Component Entry Schema: Signature, Parameters table, Constraints +* First line states what symbol IS (embeddability) +* Minimal one-shot examples permitted (≤5 lines) +* Full type annotations on all signatures +* Entry ≤600 tokens (atomicity) + +## 6.4 Auxiliary Documentation (AFAD v4.0) +Applies to: any repo `*.md` file that does NOT match `docs/DOC_*.md`, except the repository +root `README.md` storefront special case. Examples: `CHANGELOG.md`, `docs/*_GUIDE.md`, +`docs/THREAD_SAFETY.md`, `examples/README.md`. + +**Requirements:** +* YAML frontmatter with `afad: "4.0"`, `version`, `domain`, `updated`, `route` where the file convention permits it +* Purpose/Prerequisites/Overview structure for guides +* Economy of words (no filler phrases) +* All code blocks specify language and are runnable +* QUICK_REFERENCE: task-oriented, copy-paste, zero prose +* Root `README.md` stays human-first and does not require AFAD frontmatter + +## 6.5 Prohibited Actions +The AI agent MUST NOT: + +* Create or modify markdown files without loading the protocol +* Violate schema requirements (missing Signature, missing Constraints) +* Add prose to Parameters tables (fragments only, ≤10 words) +* Add full API signatures to auxiliary docs (belongs in `DOC_*.md`) +* Duplicate content across files (consolidation required) +* Use filler phrases ("It is important to note...", "As mentioned earlier...") +* Create entries >600 tokens (split into atoms) + +## 6.6 Protocol Loading Requirement +**This is a BLOCKING requirement.** If instructed to create or modify any `*.md` file: + +``` +LOAD .codex/PROTOCOL_AFAD.md +APPLY tier-appropriate AFAD 4.0 rules (reference-doc or auxiliary-doc path as applicable) +VALIDATE per AFAD 4.0 validation rules (L0-L2 blocking, L3 advisory) +``` + +Failure to load and comply with the governing protocol is a system failure. + +# 7. VERSION DOCUMENTATION POLICY + +## 7.1 Single Source of Truth +**CHANGELOG.md is the authoritative record of version history.** +Version change documentation MUST NOT be duplicated in source code comments, docstrings, or +test documentation. + +## 7.2 Prohibited Patterns in Source Code +**PROHIBITED** in `src/`, `tests/`, and `examples/`: +* `# v0.X.0: Feature added` — Version provenance comments +* `(TICKET-001 fix)` — Ticket reference annotations +* `As of v0.X.0` or `Since v0.X.0` — Behavioral version notes in docstrings +* `Updated in v0.X.0` — Change markers in comments + +**PERMITTED** locations for version information: +* `__version__` in `__init__.py` +* `version` field in `pyproject.toml` +* `version:` in YAML frontmatter +* `- Version: Added in v0.X.0.` in `docs/DOC_*.md` reference documentation only + +**NOTE on MIGRATION.md**: This document is for **fluent.runtime → FTLLexEngine** migration +(external library), NOT for FTLLexEngine version-to-version upgrades. Version upgrade guidance +belongs in CHANGELOG.md. + +## 7.3 Test Documentation Standard +Test docstrings describe **WHAT** is tested, not **WHEN** it changed: +```python +# PROHIBITED +"""v0.39.0: Pound symbol is now ambiguous (GBP, EGP, GIP).""" + +# REQUIRED +"""Pound symbol requires locale-aware resolution (ambiguous: GBP, EGP, GIP).""" +``` + +## 7.4 Reference Documentation Exception +Per §6.3 above, inline version metadata is permitted ONLY in `docs/DOC_*.md` files as part of +the Constraints section: +```markdown +- Version: Added in v0.31.0. +``` +This is the single permitted location for inline version notes outside CHANGELOG.md. + +## 7.5 Rationale +* **Maintenance Burden:** Version references scattered across 200+ locations require manual + updates each release. +* **Duplication:** Same change documented in CHANGELOG.md and inline creates drift risk. +* **Staleness:** Old version numbers remain as historical noise. +* **Mixed Concerns:** Behavioral documentation entangled with change history obscures intent. + +## 7.6 Enforcement +* New code MUST NOT introduce version provenance comments. +* Existing version references are grandfathered but SHOULD be removed when the code section is + modified for other reasons. + +# 8. INCIDENTAL OBSERVATION PROTOCOL + +## 8.1 Passive Discovery Mandate +**Constraint:** While performing any task that involves reading source code, the AI agent +naturally forms assessments about code quality, defects, efficiency, and modernization +opportunities. These observations MUST be captured rather than discarded. + +**Rationale:** The agent processes significant context during routine operations (file reads, +debugging, implementation). Optimization opportunities and defects noticed during this work +have value but are typically lost because no explicit directive exists to record them. + +## 8.2 Observation Scope +Record observations that are optimization opportunities and defects to +`.codex/OBSERVATIONS_INCIDENTAL.txt`: + +| Category | Examples | +|:---------|:---------| +| Performance | O(N) loop replaceable with O(1) lookup, unnecessary allocations | +| Modernization | Pre-PEP 695 patterns, deprecated stdlib usage | +| Simplification | Dead code paths, over-engineered abstractions | +| Memory | Cacheable computations, object pooling opportunities | +| Defects | Bugs, spec violations, security issues, API gaps | + +## 8.3 Recording Protocol +**Location:** `.codex/OBSERVATIONS_INCIDENTAL.txt` + +**When to Record:** Upon noticing an optimization opportunity or a defect during ANY file read +operation, append an entry. Do not interrupt the current task workflow — record concisely and +continue. + +**Entry Format:** +``` +------------------------------------------------------------------------ +OBSERVED: +FILE: : +CATEGORY: PERF | MODERN | SIMPLIFY | MEMORY | DEFECT +OBSERVATION: <1-2 sentence description of what could be improved or fixed> +CURRENT: +SUGGESTED: +EFFORT: TRIVIAL | MINOR | MODERATE +------------------------------------------------------------------------ +``` + +**Field Definitions:** +* `EFFORT: TRIVIAL` — Single-line or mechanical change +* `EFFORT: MINOR` — Localized change, <20 lines affected +* `EFFORT: MODERATE` — Cross-function or requires careful testing + +## 8.4 Non-Interruption Principle +Recording an observation MUST NOT: +* Interrupt the user's current task +* Trigger immediate remediation (unless user requests) +* Generate chat output announcing the observation +* Slow down the primary workflow + +The file serves as a backlog for future optimization and defect sprints, not an action queue. + +## 8.5 Deduplication +Before recording, check if an equivalent observation already exists. If so, do not add a +duplicate entry. Observations that have been promoted to `ISSUES-VALID.txt` should be removed +from `OBSERVATIONS_INCIDENTAL.txt`. diff --git a/.codex/AGENTS_JAVA26_GRADLE.md b/.codex/AGENTS_JAVA26_GRADLE.md new file mode 100644 index 00000000..50deb46b --- /dev/null +++ b/.codex/AGENTS_JAVA26_GRADLE.md @@ -0,0 +1,691 @@ +# Java 26+ / Gradle Agent Protocol + +**Scope:** Java **26+** projects built with **Gradle**: applications, libraries, CLIs, services, frameworks, plugins, tools, and multi-module builds. + +**Primary objective:** produce Java that is correct, explicit, maintainable, compatible with the repository's real baseline, and validated through the narrowest sufficient feedback path. + +Optimize in this order: + +**correctness → explicit contracts → concurrency correctness → narrow API → evolution safety → readability → terseness** + +Terseness loses to clarity. Convenience loses to correctness. Cleverness loses to maintainability. + +## 1. Repository intake + +Before touching source, establish the repository baseline: + +1. **Build:** Gradle wrapper version, wrapper checksum posture, Gradle DSL (`.gradle.kts` vs `.gradle`), version catalog, toolchain configuration, `--release`/source/target settings, preview-feature wiring. +2. **Shape:** application, library, plugin, framework, CLI, or multi-module build; JPMS usage; generated code; publication targets; runtime packaging. +3. **Tests and CI:** test framework, canonical verification tasks, coverage tools, static analysis, CI matrix, release gates. +4. **Compatibility posture:** internal tool, published library, plugin, framework, service API, wire protocol, serialized format, or migration-sensitive data model. +5. **System map:** truth, evidence, consequence, invariant, and preservation for the touched surface. + +Do not assume the project wants the newest syntax, the broadest refactor, or a published-library compatibility posture. Derive the posture from the repository and task. + +## 2. Change loop + +For new behavior, start with the smallest failing proof of behavior: test, assertion, reproducible check, type-level constraint, contract test, or manual verification path. + +Then: + +1. make the minimal coherent implementation; +2. run the narrowest relevant verification task; +3. refactor immediately until the touched code is clearer and easier to change; +4. widen the change only when local repair would preserve or deepen a bad structure; +5. widen verification when contracts, module boundaries, public APIs, serialization, concurrency, or build logic change. + +When a build or test fails, read the actual failure output. Fix the structural, type, logic, or configuration cause. Do not pile up cascading errors, cargo-cult compiler suggestions, or suppress warnings merely to pass. + +## 3. Java 26 baseline posture + +Java 26 is the target baseline for this protocol. Use Java 26 capabilities when the repository baseline permits them and when they make the result clearer, safer, or more maintainable. + +### 3.1 Normal Java 26+ toolset + +Use these as ordinary tools when they improve the result: + +| Capability | Use when | Avoid when | +|---|---|---| +| **Records** | Immutable value carriers with named components and invariants | Identity-bearing, lifecycle-heavy, mutable, or behavior-rich objects | +| **Sealed types** | Closed result families, state machines, protocol variants, controlled error families | Future external extension is a real requirement | +| **Pattern matching `switch`** | Exhaustive dispatch over sealed families or enums | A single simple `if` is clearer | +| **Record patterns** | Multiple record components are consumed immediately in a pattern arm | Only one component is used, or a named variable is clearer | +| **Unnamed variables and patterns** | A binding is intentionally discarded | The ignored value carries useful meaning | +| **Sequenced Collections** | First/last/reversed operations on ordered collections | As a reason to prefer `LinkedList`; `ArrayList` remains the default list | +| **Stream Gatherers** | A named gatherer expresses reusable domain pipeline behavior | A plain loop or collector is clearer | +| **Text blocks** | Multiline SQL, JSON, XML, HTML, templates, and structured constants | Single-line strings or whitespace-sensitive values where escapes are clearer | +| **Virtual threads** | Highly concurrent blocking I/O workloads | CPU-bound parallelism or work that needs bounded compute pools | +| **Scoped values** | Immutable request/task context flowing down a bounded call tree | Recreating ambient global state under a new name | +| **Module import declarations** | Small tools or files with genuinely module-oriented imports | Blanket style that hides which types are actually used | +| **Compact source files / instance `main`** | Demos, learning material, scripts, one-file tools | Production code that benefits from explicit class structure | +| **Flexible constructor bodies** | Early validation, normalization, and computing delegation arguments | Hiding substantive business logic inside constructors | + +### 3.2 Java 26 changes to account for + +- **Final field mutation by deep reflection now warns by default.** Do not design new code, tests, serializers, or dependency-injection paths that mutate `final` fields by reflection. Prefer constructors, factories, builders with real invariants, or supported serialization mechanisms. +- **Applet API is removed.** Do not add or preserve applet dependencies. Remove applet-era compatibility paths when no real contract depends on them. +- **HTTP/3 is available through the JDK HTTP Client API.** Before adding a networking dependency solely for HTTP/3, check whether `java.net.http` satisfies the requirement. Keep fallback and compatibility behavior explicit. +- **AOT object caching and GC/runtime improvements are operational tools.** Use them only with deployment, measurement, and rollback discipline. Do not introduce JVM flags speculatively. +- **Synchronized virtual-thread pinning was largely eliminated in Java 24.** On Java 26+, do not replace `synchronized` with `ReentrantLock` solely to avoid historical virtual-thread pinning. Choose the locking primitive by semantics: `synchronized` where practical; `ReentrantLock`, `ReadWriteLock`, `StampedLock`, or `Condition` when their extra capabilities are needed. Still avoid blocking or slow I/O while holding any lock. + +### 3.3 Preview and incubator features + +Preview and incubator features are useful tools, not defaults. They require deliberate governance. + +A preview or incubator feature is acceptable only when all of the following are true: + +1. it materially improves design, control flow, concurrency semantics, API shape, security, or performance; +2. the repository already accepts the operational cost, or the task explicitly authorizes it; +3. compile, test, runtime, CI, IDE, and developer workflow implications are updated together; +4. the usage is contained to the smallest reasonable surface; +5. the repository can tolerate source, binary, or behavior changes in a later JDK. + +When introducing one: + +- enable it explicitly and consistently across all affected phases; +- keep the blast radius small; +- avoid leaking preview-dependent types through broad public APIs unless the project accepts that risk; +- document why it is worth the cost; +- prefer wrappers or adapters if later redesign is likely. + +Currently relevant Java 26 preview/incubator features: + +| Feature | Posture | +|---|---| +| **Structured Concurrency** | Prefer for related subtasks that need shared cancellation, ownership, and failure semantics, when preview is enabled. Otherwise preserve the same ownership discipline with the repository's approved concurrency model. | +| **Primitive types in patterns, `instanceof`, and `switch`** | Use only when it materially improves clarity or correctness. Do not enable preview syntax to look modern. | +| **Lazy Constants** | Specialized deferred-immutability tool for expensive values that should be initialized at most once and then treated as constant. Do not use as a general cache. | +| **PEM encodings of cryptographic objects** | Use when the task is actually about encoding or decoding cryptographic objects. Do not invent a custom parser or add a library first. | +| **Vector API** | Use only for measured or clearly motivated performance work. Do not introduce speculatively. | + +## 4. Domain modeling + +Choose the narrowest construct that represents domain truth. + +### 4.1 Preferred constructs + +- **Record:** immutable value carrier. Default when the type's main job is to carry named values with invariants. +- **Sealed interface/class:** closed alternatives callers must distinguish behaviorally: outcomes, state machines, protocol messages, controlled error descriptors. +- **Enum:** closed symbolic set with stable vocabulary. +- **Small semantic record/enum:** primitive values that are easy to confuse: `UserId`, `Port`, `CurrencyCode`, `Retries`. +- **Ordinary class:** identity-bearing, lifecycle-heavy, mutable, or behavior-rich object. + +Do not use `String`, boolean flags, integer codes, or `null` where the caller must distinguish domain alternatives behaviorally. + +### 4.2 Records + +A compact constructor is the normalization and invariant boundary. + +Rules: + +- Null-check required fields at the trust boundary. +- Reject blank semantic strings with precise messages. +- Defensively copy collection components with `List.copyOf`, `Set.copyOf`, or `Map.copyOf`. +- Store immutable views only. +- Normalize once inside the compact constructor, not at every call site. +- Throw `IllegalArgumentException` for business invariant violations. + +Every record with a collection component must have an explicit compact constructor that performs the defensive copy unless the component type is already an immutable project-owned type. A record with a `List`, `Set`, or `Map` component and no defensive-copy boundary is an invariant leak. + +### 4.3 Sealed hierarchies + +Keep sealed families coherent. Avoid catch-all variants such as `Unknown`, `Other`, or `GenericFailure` unless the boundary genuinely permits unknown values. + +For public sealed families, every subtype is part of the API. Adding a subtype is a compatibility event because exhaustive switches in consumers may need to change. + +### 4.4 Exception families + +Exception families that share accessor fields should use a sealed interface to declare those accessors. Each concrete subtype should extend the appropriate JDK exception class and implement the interface, carrying its own fields directly. + +```java +public sealed interface ParseProblem + permits InvalidTokenException, UnexpectedEofException { + String source(); + int position(); +} + +public final class InvalidTokenException extends IllegalArgumentException + implements ParseProblem { + private final String source; + private final int position; + // ... +} +``` + +Do not use an abstract sealed exception class merely to share fields. That couples subtypes to a shared mutable state carrier rather than a pure interface contract. + +### 4.5 Construction + +Direct construction is preferred when the constructor is the clearest contract. + +Builders are justified for many independent optional fields, staged construction with real invariants, generated external APIs that conventionally use builders, or readability at complex call sites. Do not introduce builders by reflex. + +### 4.6 Shadow types and wire boundaries + +Fields representing a finite set of values must use the canonical enum or semantic type, not a `String` or a locally defined shadow type that duplicates a type already defined elsewhere. + +Wire serialization is a permitted translation boundary. Convert to canonical wire names at the boundary, not by storing wire strings in internal records. + +Never call `.name()` on a third-party or external-layer enum in application code to produce a wire string when the wire vocabulary is an external contract. Use an explicit exhaustive `switch` to produce the canonical wire string. + +## 5. Null discipline, outcomes, and exceptions + +### 5.1 Null policy + +Default posture is non-null. + +`null` is permitted only where absence is unsurprising and the API clearly models it: external APIs that use `null`, narrow legacy boundaries, internal caches where null means not yet loaded, or framework fields populated reflectively. + +In domain code, `null` must not represent business alternatives. Model absence with `Optional` for a simple optional return, a sealed result family for domain alternatives, or early rejection at the trust boundary. + +### 5.2 JSpecify discipline + +When the project uses JSpecify: + +- Annotate every production package with `@org.jspecify.annotations.NullMarked` in `package-info.java`. +- Use `@Nullable` only where null is a deliberate, documented value at that exact site. +- Add `org.jspecify:jspecify` as `compileOnly`. +- Wire enforcement through NullAway in JSpecify mode, Checker Framework, or the repository's chosen null checker. +- Adopt nullness at package or module boundaries, not as isolated individual-method decoration. + +Annotations are compile-time signals. Runtime checks at external boundaries and compact constructors remain necessary. + +### 5.3 Optional discipline + +`Optional` models deliberate absence of a single non-null value. + +Permitted: + +- return type where absence is an expected outcome and callers must handle it; +- a record component representing a single independent optional attribute. + +Avoid: + +- method parameters; +- serialized DTO/entity fields unless the framework explicitly handles the desired shape; +- multiple optionals that secretly encode mutually exclusive states. + +Never call `Optional.get()` without first establishing presence through control flow or by using `orElseThrow`, `ifPresent`, or another explicit handling method. + +### 5.4 Exceptions + +Throw exceptions for invariant breaches, contract violations, infrastructure failures, and states that should not occur under the type contract. + +Do not throw exceptions for ordinary business alternatives that callers are expected to handle. + +Prefer unchecked exceptions in domain and application logic. Checked exceptions are warranted at I/O, parsing, and external seams where callers must acknowledge a specific recoverable condition. Translate exceptions across boundaries only when the boundary requires a different contract. + +### 5.5 Catch policy + +Catch narrowly. Preserve meaning and cause. Do not swallow exceptions, catch broadly and return fake success, destroy interrupt status, or convert cancellation into ordinary domain failure. + +### 5.6 No dead defensive checks + +Do not add null checks on values whose non-null return is guaranteed by JDK or library contract. Dead checks create branches that cannot be meaningfully covered. + +When uncertain, read the API contract. Add a null check only when the contract permits null or the boundary is demonstrably unreliable. + +## 6. Control flow and exhaustiveness + +### 6.1 Exhaustive switching + +For sealed families and enums, prefer exhaustive `switch` expressions. Do not add `default` branches when all real alternatives are known; defaults weaken compiler help and can hide missing handling during evolution. + +### 6.2 Pattern matching + +Use pattern matching `switch` for closed-domain dispatch. Use `instanceof` pattern matching for a single simple type test followed immediately by use of the bound variable. + +Do not build long `instanceof` ladders in place of an exhaustive switch over a sealed family. + +### 6.3 Record patterns + +Destructure a record in a pattern arm when multiple fields are consumed immediately and the record name adds no clarity in the arm body. + +```java +case Committed(PostingId id, _, LocalDate date, _) -> format(id, date) +``` + +Prefer a named binding when only one or two fields are used, or when the variable is reused later. + +Do not nest record patterns beyond two levels. Deep nesting is a signal to extract a named helper. + +### 6.4 Guards + +Use guarded pattern cases only when the guard materially improves clarity. If the guard-false path has meaningful behavior, prefer a separate case, an inner exhaustive switch, or a simple `if` inside the arm. + +When a guard tests a sealed component field, prefer an inner exhaustive switch over the component type. + +### 6.5 Multi-label pattern arms + +Prefer one pattern subtype per arm. Multi-label pattern arms can obscure coverage and future evolution. + +```java +// Prefer +case Foo _ -> handleBoth(); +case Bar _ -> handleBoth(); + +// Avoid +case Foo _, Bar _ -> handleBoth(); +``` + +### 6.6 No pre-filter before exhaustive switch + +Do not filter out one subtype before an exhaustive switch over the same domain. The switch should be the sole dispatch site. + +```java +// Prefer +switch (cell) { + case BlankSnapshot _ -> blankCount++; + case TextSnapshot _ -> populatedCount++; +} +``` + +### 6.7 Loops, streams, and gatherers + +Choose the form with the clearest intent and cost model. Loops are fine. Streams are fine. Gatherers are justified only when they express a real, named stream transformation that is clearer than a loop or collector. + +### 6.8 Local variable type inference + +Use `var` when the type is obvious from the right-hand side and repeating it adds no information. + +```java +var counts = new HashMap(); +``` + +Do not use `var` when the type name documents domain intent or the return type is non-obvious. + +```java +PostEntryResult result = applicationService.commit(command); +``` + +`var` is for local variables only, not fields, method signatures, or constructor parameters. + +## 7. Concurrency, parallelism, and context propagation + +### 7.1 Ownership + +Every asynchronous task needs an owner, lifetime, cancellation path, and shutdown path. No orphan tasks. No hidden background work. + +### 7.2 Virtual threads + +Prefer virtual threads for highly concurrent blocking I/O workloads. Do not use them as a CPU-bound speedup. + +Do not create unbounded work just because virtual threads are cheap. Upstream and downstream resources still have limits: databases, sockets, queues, rate limits, files, locks, memory, and external APIs. + +### 7.3 Locking + +On Java 26+, choose between `synchronized` and `java.util.concurrent.locks` by semantics, not by obsolete virtual-thread pinning folklore. + +Use `synchronized` where it is practical and clear. Use `ReentrantLock` or related locks when you need interruptible acquisition, timed acquisition, fairness, multiple conditions, read/write semantics, optimistic reads, or other advanced behavior. + +Keep critical sections small. Avoid I/O, blocking calls, callbacks, or unknown user code while holding any lock. + +### 7.4 Structured concurrency + +When preview is enabled and subtasks are related, prefer Structured Concurrency for lexical ownership, shared cancellation, and coordinated failure handling. + +If preview is not enabled, preserve the same discipline with the repository's approved concurrency model. + +### 7.5 Context propagation + +Prefer explicit parameters for local context. Prefer scoped values for immutable context flowing down a bounded task tree. Avoid `ThreadLocal` proliferation, especially in virtual-thread-heavy code. + +### 7.6 Executors and pools + +If you introduce an executor, define who creates it, who closes it, what workload it serves, its bounds, and why an existing managed facility is insufficient. + +Do not create thread pools casually. + +### 7.7 Blocking and cancellation + +If a method blocks, make that operational fact discoverable through naming, placement, contract, or documentation. + +Treat interruption and cancellation as real control flow. Restore interrupt status where appropriate. Avoid retry loops that ignore cancellation. + +## 8. Architecture and boundaries + +### 8.1 Visibility + +Default to `private` or package-private. Widen visibility only when real consumers require it. For libraries and plugins, public surface is a compatibility commitment. + +### 8.2 Layering + +Keep domain logic separate from transport, persistence, framework glue, serialization, and generated code. Do not let framework annotations colonize the core domain by default. + +Reusing one type across layers is acceptable only when the sameness is genuinely true and stable. + +### 8.3 Serialization is a contract + +Serialized shape is external contract. Do not casually change field names, optionality, enum symbols, discriminator values, polymorphic structure, date/number formatting, or error envelope shape. + +For polymorphic sealed types, keep discriminator registration visible at the sealed family boundary when the serializer supports it. Discriminator values must be stable protocol vocabulary strings, not Java class names. + +Never use `Id.CLASS`-style discriminators for external protocols; they leak implementation names and break versioning. + +### 8.4 Naming and organization + +Names must reveal domain capability. Avoid vague type, package, or module names such as `Manager`, `Helper`, `Utils`, `Misc`, `Common`, `Shared`, or `Base` unless they carry precise domain meaning in context. + +If you cannot explain what a package contains without listing its members, the package needs a sharper name or a different structure. + +### 8.5 JPMS + +If the repository uses JPMS, module boundaries are architectural decisions. + +- `exports ` exposes public types to other modules. +- `opens to ` grants targeted deep reflection access. + +Never add broad `opens` merely to silence an `InaccessibleObjectException`. Diagnose the specific reflective consumer and open the narrowest package to the narrowest module. + +When a type moves package, update `exports` and `opens` in the same change. + +### 8.6 Project-owned tooling seams + +When a project uses a narrow slice of a third-party or native-backed API, define a project-owned seam for that slice and keep application code behind it. + +Rules: + +- Expose only operations the project consumes today. +- Name the seam by domain purpose, not vendor type. +- Remove old direct third-party call sites once the seam exists. +- Prefer deterministic pure-Java replay or test adapters when exact semantics can be reproduced locally. + +### 8.7 Canonical ownership + +If something is canonical, define it once: domain invariants, operation catalogs, protocol semantics, error classification systems, enum vocabularies, validation rules, configuration schema. + +Every surface that exposes the fact must derive from that owner or from generated artifacts rooted in it. + +## 9. Gradle and build logic + +### 9.1 Wrapper, toolchains, and Java 26 compatibility + +Use the Gradle wrapper. Do not invoke a globally installed `gradle`. + +Use Java toolchains for compilation and, where appropriate, test and runtime tasks. The build must not depend on whichever JDK happens to be installed on the machine. + +For Java 26: + +- Gradle must be new enough to support Java 26 toolchains and, if needed, running Gradle on Java 26. Use Gradle **9.4.0+** for Java 26 support. +- When upgrading the wrapper, prefer the current stable Gradle version supported by the repository's plugins rather than a minimal version alone. +- Verify Kotlin, Groovy, Android Gradle Plugin, JaCoCo, Error Prone, NullAway, Checkstyle, PMD, SpotBugs, and other tooling against the configured Java toolchain. + +### 9.2 Build authoring language + +For new build logic, prefer Gradle Kotlin DSL. If the repository uses Groovy DSL, preserve that choice unless migration is part of the task. + +Do not turn a Java task into an accidental DSL migration. + +### 9.3 Bytecode targeting + +For libraries, reusable modules, plugins, or mixed-JDK ecosystems, use explicit bytecode targeting with `--release`. Do not assume `sourceCompatibility` and `targetCompatibility` alone express compatibility intent precisely enough. + +### 9.4 Dependencies + +Prefer version catalogs (`libs.versions.toml`) for shared dependency coordinates. Do not scatter repeated version strings across build files. + +Pin versions. Avoid floating versions such as `latest.release`, `latest.integration`, or `1.+`. + +Before adding a dependency: + +- verify exact group ID, artifact ID, and version in the declared repository; +- verify it is not EOL or incompatible with Java 26; +- verify it is not already provided by the JDK, existing stack, or an existing dependency; +- verify the API from current documentation, not memory. + +Do not add a library to avoid writing a small amount of straightforward code. + +### 9.5 Repositories + +Keep repositories minimal and explicit. Do not add broad or duplicate repositories casually. + +### 9.6 Shared build logic + +For substantial shared build logic, prefer convention plugins in an included build such as `build-logic`. + +`buildSrc` is acceptable when the repository already uses it, the logic is small and local, or migration cost exceeds benefit. + +Convention plugin IDs must be qualified (`com.example.project.java-library`), not generic (`java-library`, `jvm-conventions`). + +### 9.7 Preview-feature wiring + +If preview syntax or APIs are used, synchronize configuration across compilation, test execution, runtime tasks, CI, IDE/developer workflow, packaging, and documentation. + +Do not wire preview support for only one phase. + +### 9.8 Build performance features + +Configuration cache, build cache, parallelism, and test distribution are good when correct for the repository. Correctness first. Do not cargo-cult performance flags. + +### 9.9 Multi-module structure + +Keep module responsibilities sharp. Avoid circular dependencies. Put shared policy in convention plugins rather than duplicated snippets. Do not create modules that exist only to look clean without reducing coupling. + +### 9.10 Null annotation build wiring + +When adopting JSpecify: + +- add `org.jspecify:jspecify` as `compileOnly` in annotated modules; +- wire NullAway or the chosen checker in shared build logic; +- enable JSpecify mode where supported; +- enforce consistently across modules; +- avoid partial annotation that produces false confidence. + +### 9.11 Build isolation and daemon management + +Never run multiple Gradle invocations concurrently against the same project directory. + +For concurrent builds across different projects, isolate Gradle user homes: + +```bash +GRADLE_USER_HOME="$PROJECT_ROOT/.gradle-home" ./gradlew check +``` + +Add `.gradle-home/` to `.gitignore` if this convention is adopted. + +Do not use `./gradlew --stop` routinely. Stop daemons only to recover from confirmed daemon corruption. + +Keep `org.gradle.jvmargs` at the minimum heap the project actually requires. + +## 10. Testing and coverage + +### 10.1 Determinism + +Tests must be deterministic. Control time, randomness, environment variables, filesystem layout, locale, timezone, network behavior, and concurrency timing where practical. + +### 10.2 What to test + +Prioritize domain invariants, boundary mappings, result-shape decisions, serialization contracts, error translation, concurrency ownership/cancellation, public API behavior, and regressions for fixed bugs. + +### 10.3 Test style + +Test observable behavior and contract. Do not couple tests to incidental implementation details unless the task is specifically about those details. + +Avoid reflection in tests. If a class requires reflection to test, prefer improving the design. If a private branch is genuinely unreachable through public behavior but still important, expose a narrow package-private helper in the same package and test it directly. + +### 10.4 Coverage + +Coverage is a signal, not the goal. Do not distort design merely to satisfy a metric. Investigate meaningful blind spots. + +Avoid dead branches created by default arms over sealed domains, pre-filters before exhaustive switches, and defensive checks against impossible nulls. + +### 10.5 Test organization + +Default style: + +- one top-level test class per production class: `Test`; +- nested classes to group scenarios; +- behavior names such as `execute_returnsFailure_whenSourcePathIsBlank()`; +- direct constructors for records; +- no mocking of record types; +- assert expected values directly, not through reflection, ordinals, or runtime type inspection. + +Repository convention overrides naming style when already consistent. + +## 11. Refactoring and deletion + +### 11.1 Coherent repair + +If a small patch would preserve or deepen a bad structure, widen to the nearest coherent boundary. Do not stack another workaround on a workaround. + +When existing code violates hard boundaries, fix it if the fix is small and local. If systemic, flag it or record it through the repository's observation process. Do not silently extend the bad pattern. + +### 11.2 Compatibility-aware refactoring + +Refactor aggressively inside private and internal surfaces. Refactor public or published surfaces deliberately, with migration cost, binary/source compatibility, serialization, and user contracts treated as design inputs. + +### 11.3 Structural tasks + +When the task is about scaffolding, architecture, or repository cleanup, audit the whole affected surface: module layout, package names, build logic, convention plugins, dependency centralization, CI assumptions, generated code, and verification tasks. + +Do not stop at the first file named in the prompt if the real problem is structural. + +### 11.4 God constructs + +A god construct concentrates unrelated responsibilities in one place. + +Refactoring signals: + +- **God class:** factory logic, descriptors, validation, lookup, and lifecycle logic mixed together. +- **God record:** many optional or nullable fields encoding mutually exclusive states. +- **God method:** long method split by inline phase comments or mixing unrelated responsibilities. + +Refactor by extracting cohesive types or helpers named for domain purpose. Never extract merely to save lines. + +### 11.5 Safe deletion + +Before deleting code, prove the blast radius: static references, dynamic references, generated code, serialized forms, migrations, external consumers, jobs, dashboards, alerts, runbooks, and human workflows. + +If uncertainty remains, make the smallest reversible simplification and preserve the uncertainty in the work summary or observation log. + +## 12. Documentation and self-containment + +### 12.1 Javadoc + +Public APIs require Javadoc that states purpose and contract. Published-library APIs require especially careful compatibility and behavior prose. + +Package-private APIs require Javadoc when they are part of an internal contract, widened for testing, non-obvious, or reused across classes. + +Record component accessors usually do not need Javadoc beyond clear component names. `@Nullable` parameters and returns must explain when null is expected and what it means. + +### 12.2 Style + +- One clear sentence first. +- No filler such as "This method..." or "This class...". +- Use `@param` and `@return` only when names alone are insufficient. +- Do not add comments or Javadoc that merely restate code. +- Use inline comments only for non-obvious reasoning, invariants, or boundary decisions. + +### 12.3 Self-containment + +Source code, Javadoc, comments, and product documentation must not reference agent directive files by name, section number, or as justification for a design decision. + +```java +// Forbidden +// Per AGENTS.md, no default on sealed switch. + +// Correct +// No default: compiler enforces exhaustiveness over sealed subtypes. +``` + +Agent directive files are operational instructions, not developer-facing design records. + +## 13. CI and project automation + +### 13.1 CI mirrors local verification + +The canonical verification command must pass locally and in CI with identical strictness. Do not create CI-only checks that cannot be reproduced locally. Do not soften local checks based on `CI=true`. + +### 13.2 Pin third-party actions + +Third-party CI actions should be pinned to full-length commit SHAs, not mutable tags. + +```yaml +# Prefer +uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + +# Avoid +uses: actions/checkout@v4 +``` + +### 13.3 Timeouts and stale runs + +Every CI job should declare `timeout-minutes` appropriate to observed runtime. + +Use concurrency groups with `cancel-in-progress: true` to abort obsolete runs on the same branch. + +```yaml +concurrency: + group: ci-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true +``` + +### 13.4 Dependency freshness + +Use either asynchronous dependency automation or a sync gate paired with automation. A blocking dependency-freshness gate without automated PR creation turns unrelated work into manual dependency maintenance. + +## 14. Incidental observation protocol + +When reading a file surfaces a defect, rule violation, or clear improvement outside the active task, record it in the project's designated observation log if one exists. Do not derail the active task unless the issue blocks correctness or safety. + +Each entry should record: + +- stable ID; +- date; +- status; +- file and line range; +- category; +- what is wrong and why it matters; +- current pattern or excerpt; +- resolving change; +- effort level. + +When resolved, update the entry in place rather than deleting it. If no observation log is defined, mention material observations in the work summary only when relevant. + +## 15. Pre-output checklist + +Run this before producing output. + +### System theory + +- Did you identify truth, evidence, consequence, invariant, and preservation for the touched surface? +- Did you avoid patching derived state when the source of truth was wrong? +- Did you consider blast radius beyond direct callers? + +### Java semantics + +- Are domain alternatives explicit rather than hidden in `null`, flags, magic strings, or exceptions? +- Are invariants enforced at type boundaries or constructors? +- Are expected outcomes separated from exceptional failures? +- Are sealed or enum domains handled exhaustively? +- Are `Optional` and `@Nullable` used deliberately? + +### API and boundaries + +- Is visibility as narrow as possible? +- Are public surfaces compatible with their consumers? +- Are serialization and external contracts preserved or intentionally evolved? +- Are enum-to-wire mappings explicit where the wire vocabulary matters? +- Are canonical contract facts owned once? + +### Concurrency + +- Does every asynchronous task have an owner, cancellation path, and shutdown path? +- Are locks chosen by semantics rather than folklore? +- Is context propagation explicit and bounded? +- Is interruption/cancellation preserved? + +### Build + +- Did you use the wrapper and toolchains? +- Is Gradle new enough for Java 26 when Java 26 is required? +- Are versions pinned and centralized? +- Are preview features wired consistently if used? +- Did you avoid concurrent Gradle invocations in the same project? + +### Verification + +- Did you run the smallest sufficient verification path? +- Did verification widen when the change widened? +- Are warnings resolved rather than suppressed? +- Does the repository end more coherent than it started? diff --git a/.codex/AGENTS_KOTLIN24_GRADLE.md b/.codex/AGENTS_KOTLIN24_GRADLE.md new file mode 100644 index 00000000..35d3d23b --- /dev/null +++ b/.codex/AGENTS_KOTLIN24_GRADLE.md @@ -0,0 +1,926 @@ +# Kotlin 2.4+ / Gradle Agent Protocol + +**Scope:** Kotlin repositories that intentionally use Kotlin **2.4+** or are being migrated to it. This includes Kotlin/JVM, Kotlin Multiplatform, Kotlin/Native, Kotlin/Wasm, Kotlin/JS, Android Kotlin modules, libraries, CLIs, services, plugins, and multi-module Gradle builds. + +**Current posture:** Kotlin 2.4 may be an EAP/Beta in the target repository. Treat EAP adoption as an explicit repository decision, not as a default upgrade path. If the project is on Kotlin 2.3.x or lower, do not silently migrate it to Kotlin 2.4+ unless the task is a migration or the repository already opts in. + +**Build default:** Gradle Kotlin DSL. Do not introduce Groovy build logic. Use Maven guidance only when the repository is already Maven-based. + +**Compiler default:** K2 is the normal compiler path. Do not add compatibility shims for K1-era behavior unless the repository has a documented reason. + +Optimize in this order: + +```text +correctness → explicit contracts → concurrency correctness → narrow API → evolution safety → readability → terseness +``` + +Terseness loses to clarity. Convenience loses to correctness. Cleverness loses to maintainability. + +This protocol inherits `.codex/UNIVERSAL_ENGINEERING_CONTRACT.md`. Do not duplicate the universal contract here; apply it before all Kotlin-specific rules. + +--- + +## 1. Repository intake before touching code + +Before editing Kotlin, derive the repository's actual baseline. + +Check: + +- Kotlin version and plugin versions in `gradle/libs.versions.toml`, `settings.gradle.kts`, root `build.gradle.kts`, and convention plugins. +- Whether Kotlin 2.4 is GA, Beta, RC, or EAP in this repository, and whether EAP repositories are configured deliberately. +- Target platforms: JVM, Android, Multiplatform, Native, Wasm, JS. +- Java toolchain and `jvmTarget` / `compilerOptions` alignment. +- Gradle wrapper version and whether it supports the selected JDK/toolchain. +- Compiler flags: context parameters, collection literals, explicit context arguments, return-value checker, warning policy, explicit API mode, progressive mode, opt-ins. +- Kotlin compiler plugins: serialization, KSP, all-open, no-arg, Compose compiler, Spring, JPA, Dokka, binary compatibility validation, Android Gradle Plugin. +- Public API posture: application, internal library, published SDK, Gradle plugin, framework integration, or multiplatform package. +- Test infrastructure: JUnit, Kotest, kotlinx-coroutines-test, MockK, Testcontainers, Android instrumentation, Native/JS/Wasm test tasks. +- CI tasks and whether local verification exactly mirrors CI. + +Do not infer the baseline from file names alone. The version catalog and convention plugins are usually the canonical build truth. + +--- + +## 2. Kotlin 2.4+ feature posture + +Use Kotlin 2.4+ features only according to their stability and repository opt-in status. + +### 2.1 Stable in Kotlin 2.4.0-Beta2 + +When the repository is intentionally on Kotlin 2.4+, these can be treated as normal language or library tools unless project policy says otherwise: + +- Context parameters, except callable references and explicit context arguments. +- Explicit backing fields. +- `@all` meta-target for properties. +- New defaulting rules for annotation use-site targets. +- `kotlin.uuid.Uuid` common API, except UUID V4/V7 generation functions that still require opt-in. +- Sorted-order checks such as `isSorted`, `isSortedDescending`, `isSortedWith`, `isSortedBy`, and `isSortedByDescending`. +- JVM `UInt.toBigInteger()` and `ULong.toBigInteger()`. +- Kotlin/JVM support for Java 26 bytecode. +- Kotlin metadata annotations enabled by default on JVM. +- Kotlin/Wasm incremental compilation enabled by default. +- Kotlin/JS value class export to JavaScript/TypeScript and ES2015 support inside `js()` inline code. + +Use these features only when they improve the system's theory, not merely because they are new. + +### 2.2 Experimental in Kotlin 2.4.0-Beta2 + +Do not introduce these unless the repository already enables the flag or the task explicitly asks for adoption: + +- Explicit context arguments: `-Xexplicit-context-arguments`. +- Collection literals: `-Xcollection-literals`. +- Improved compile-time constant evaluation: `-XXLanguage:+IntrinsicConstEvaluation`. +- WebAssembly Component Model support. +- UUID V4/V7 generation APIs. +- Any Kotlin/Native, Swift export, JS, Wasm, or metadata feature still marked Experimental by the compiler or documentation. + +When adding an experimental feature deliberately, update the canonical build policy, add a short rationale, isolate the feature behind explicit compiler flags, and add verification that fails if the flag is removed accidentally. + +### 2.3 Rich Errors posture + +Do **not** write production Kotlin as if Rich Errors are an available Kotlin 2.4 compiler feature unless the repository already contains an officially supported compiler build and syntax for them. + +Until Rich Errors are implemented and stabilized in the compiler, model recoverable domain failures with named sealed hierarchies, explicit result types, or a carefully justified `Result`/Either-style abstraction. + +Do not hallucinate syntax such as: + +```kotlin +fun loadUser(id: UserId): User | UserError +error class UserError(...) +``` + +Treat Rich Errors as a future design direction: valuable for thinking about explicit recoverable failures, not as code the agent may invent. + +--- + +## 3. Hard boundaries + +Violating these requires explicit repository policy or user authorization. + +### 3.1 Type and domain safety + +- Never use `!!` unless an invariant is already proven locally or by an external contract and the proof is visible. +- Never use unsafe casts where `as?`, smart casts, generics, sealed types, or better modeling remove the need. +- Never encode ordinary domain alternatives as crashes. +- Never use nullable return types to represent multiple distinct business outcomes. +- Never expose mutable collections or mutable state directly from public APIs. +- Never widen mutability or visibility for convenience. +- Never represent protocol facts as magic strings, booleans, or integers when a named type, enum, value class, or sealed family expresses the contract. + +### 3.2 Coroutine and concurrency safety + +- Never use `GlobalScope` in production code. +- Never launch coroutines without an owning lifecycle or scope. +- Never call `runBlocking` from suspending code or from code that may run on an event loop, UI thread, servlet thread, or test scheduler. +- Never convert `CancellationException` into ordinary failure. +- Never swallow failures with broad `catch` blocks that hide cancellation or erase evidence. +- Never expose fire-and-forget APIs unless the owning lifecycle, cancellation, and failure reporting are explicit. + +### 3.3 API and structure + +- Never make declarations `public` by accident. +- Never use boolean mode flags in public APIs when separate functions or a semantic enum is clearer. +- Never create generic buckets named `util`, `helpers`, `misc`, `common`, `manager`, `processor`, or `base` when a domain name exists. +- Never use inheritance where composition expresses the problem more directly. +- Never add abstraction layers that do not pay for themselves immediately. +- Never write Java-shaped Kotlin when idiomatic Kotlin is clearer and equally explicit. + +### 3.4 Build and dependency safety + +- Never guess Gradle plugin IDs, version catalog coordinates, Maven coordinates, compiler flags, or library APIs. +- Never introduce an EAP, Beta, Alpha, snapshot, or unreleased dependency unless the repository already has an EAP policy or the user explicitly asks. +- Never run concurrent `./gradlew` invocations against the same project directory. +- Never respond to Kotlin daemon failures by editing source or build logic before verifying daemon/process state. +- Never suppress warnings to make verification pass unless the suppression is narrow, documented, and tied to a real false positive or unavoidable interop boundary. + +--- + +## 4. Type system and domain modeling + +### 4.1 Prefer types that make invalid states difficult + +Choose types that express the domain: + +| Need | Preferred Kotlin construct | +|---|---| +| Stateless transformation | top-level or member function | +| Data carrier with value semantics | `data class` | +| Closed alternatives | `sealed interface` / `sealed class` | +| Single-instance alternative | `data object` | +| Small symbolic set | `enum class` | +| Primitive-shaped semantic identity | `@JvmInline value class` | +| Capability contract | `interface` | +| Internal naming shortcut | nested `typealias` when it improves locality | +| Shared dependency available in a lexical context | context parameter, when deliberate | + +Do not introduce a type merely to look abstract. Every type must either prevent misuse, name a domain concept, isolate a boundary, or make evolution safer. + +### 4.2 Nullability + +Use nullable types only when absence is a normal state: optional field, cache miss, absent parent, missing value in a parsed external record. + +Do not use nullable types for validation failure, protocol rejection, authorization failure, parse ambiguity, or multiple distinguishable outcomes. Use a named result model. + +Avoid nullable parameters in public APIs. Prefer overloads, default parameters, or a semantic option type. A nullable parameter pushes ambiguity to every caller. + +A data class with several nullable fields that are only valid in certain combinations is a hidden state machine. Refactor it into a sealed family or another explicit state representation. + +### 4.3 Recoverable outcomes + +Use sealed hierarchies when callers must distinguish outcomes behaviorally: + +```kotlin +sealed interface RegistrationResult { + data class Success(val id: UserId) : RegistrationResult + data object EmailAlreadyTaken : RegistrationResult + data class ValidationError(val violations: List) : RegistrationResult +} +``` + +Prefer named sealed outcomes for domain workflows. Use `kotlin.Result` only when the failure domain is naturally exception-shaped and the caller does not need a domain-specific error taxonomy. + +Do not use magic strings, booleans, or `Pair`-style structures to encode failures. + +### 4.4 Sealed families + +Use sealed families for protocol messages, parser outcomes, command results, state machines, domain failures, and capability results. + +Rules: + +- Variants must be coherent and semantically named. +- Exhaustive `when` is preferred for closed families. +- Do not add `else` to a `when` over a sealed family unless there is a real open-world boundary. +- Adding a public sealed subtype is a compatibility decision for libraries, not a refactor. +- Do not use `Unknown`, `Other`, or `GenericError` buckets unless the external boundary genuinely permits unknown cases. + +### 4.5 Value classes + +Use `@JvmInline value class` to prevent mix-ups of primitive-shaped values: + +```kotlin +@JvmInline value class UserId(val value: String) +@JvmInline value class Cents(val value: Long) +@JvmInline value class TenantSlug(val value: String) +``` + +Do not wrap primitives decoratively. A value class must encode domain meaning or boundary safety. + +For Java callers, verify constructor/function exposure. Use interop annotations only when they materially improve the Java API. + +### 4.6 Immutability + +Default to `val`. Prefer immutable state transitions over in-place mutation. + +`List`, `Set`, and `Map` are read-only interfaces, not proof of deep immutability. + +Rules: + +- Store mutable collections privately. +- Expose read-only views only when aliasing is safe. +- Copy defensively at boundaries where mutation aliasing is dangerous. +- Use persistent immutable collections when real structural immutability is required. +- Keep state ownership explicit: source of truth, derived state, cache, and view state must not be confused. + +### 4.7 Nested type aliases + +Use nested type aliases when they keep implementation vocabulary local and reduce package-level clutter. + +Do not use type aliases to hide an important dependency, erase domain meaning, or create shadow contracts. A type alias is naming help, not a new type. + +--- + +## 5. Kotlin 2.4 language features in practice + +### 5.1 Context parameters + +Context parameters can express ambient capabilities that are genuinely shared across a lexical operation: clock, logger, transaction context, locale, authorization view, tracing span, or domain policy. + +Use them when they make dependency flow clearer than manual parameter threading. + +Do not use context parameters as a service locator, global registry, or hidden dependency bag. If a dependency is part of an object's durable state, constructor injection is usually clearer. If a dependency is part of one operation, an explicit parameter may be clearer. + +Rules: + +- Name context parameters unless `_` is materially clearer. +- Keep context sets small and capability-oriented. +- Do not mix `-Xcontext-receivers` and `-Xcontext-parameters`. +- Do not convert existing dependency injection to context parameters merely because the feature is stable. +- Treat explicit context arguments as experimental until the compiler stabilizes them. + +### 5.2 Explicit backing fields + +Use explicit backing fields when they clarify state ownership and remove a noisy private backing property: + +```kotlin +val city: StateFlow + field = MutableStateFlow("") + +fun updateCity(newCity: String) { + city.value = newCity +} +``` + +This is useful when the exposed type is narrower than the stored implementation type. + +Do not use explicit backing fields to hide mutation, bypass invariants, or make ownership unclear. If state is externally observable, the invariant and mutation paths must remain obvious. + +### 5.3 Annotation target rules and `@all` + +At framework and wire boundaries, annotations are part of the contract. + +Use explicit use-site targets when the target matters: + +```kotlin +data class UserDto( + @field:Email + @get:Email + val email: String, +) +``` + +Use `@all:` only when the annotation genuinely belongs on all relevant property targets and doing so does not change framework behavior unexpectedly. + +When migrating to Kotlin 2.4 annotation defaulting rules, verify serialization, validation, DI, persistence, reflection, and annotation-processing behavior. Annotation placement drift can be a runtime contract bug. + +### 5.4 Guard conditions and context-sensitive resolution + +Use guard conditions in `when` when they make closed-domain branching clearer. Keep guards simple and side-effect-free. + +Context-sensitive resolution can reduce noise around enum and sealed members, but do not sacrifice readability at module or API boundaries. In ambiguous files or public examples, explicit qualification may still be better. + +### 5.5 Multi-dollar interpolation and multiline strings + +Use triple-quoted strings for multiline SQL, JSON, XML, GraphQL, expected output, shell scripts, and generated snippets. + +Use multi-dollar interpolation only when it makes literal `$`-heavy content clearer. Do not introduce it for ordinary strings. + +### 5.6 Collection literals + +Collection literals are experimental in Kotlin 2.4. Do not use them in production code unless the repository explicitly enables `-Xcollection-literals`. + +Even when enabled, prefer conventional constructors when the expected collection type is not obvious. Bracket syntax must not hide mutability or custom `operator fun of` behavior. + +### 5.7 Compile-time constants + +Improved compile-time constant evaluation is experimental. Do not rely on it unless the repository enables the feature deliberately. + +If enabled, keep compile-time constants boring and auditable. Do not encode business logic into constants merely because the compiler can evaluate more expressions. + +### 5.8 UUID and sorted checks + +Use `kotlin.uuid.Uuid` for common multiplatform UUID values when it avoids platform-specific UUID shims. + +Do not use experimental UUID generation APIs without explicit opt-in. + +Use sorted-order functions such as `isSorted()` and `isSortedBy()` instead of hand-written loops when they directly express the invariant being checked. + +--- + +## 6. Functions, expressions, and control flow + +### 6.1 Function design + +A function should do one coherent thing at one abstraction level. Inputs, outputs, side effects, and failure shape must be visible. + +Prefer: + +- small parameter lists with semantic types, +- explicit return types for public/protected declarations, +- deterministic behavior where possible, +- no hidden I/O, +- no hidden mutation, +- named local values for multi-step reasoning. + +### 6.2 Expression style + +Expression-bodied functions are good when the expression is obvious. Block bodies are better for branching, local names, side effects, validation, or non-trivial reasoning. + +Do not compress code into a single expression to appear idiomatic. + +### 6.3 Scope functions + +Use scope functions only when the receiver or temporary name improves clarity. + +| Function | Good use | +|---|---| +| `apply` | object configuration | +| `also` | observation side effect, such as logging or metrics | +| `let` | short transform, especially on nullable value | +| `run` | receiver-based block that genuinely helps | +| `with` | local block around one clear receiver | + +Rules: + +- Do not chain more than two scope functions. +- Do not nest scope functions when `this`/`it` becomes ambiguous. +- Prefer named locals over clever receiver gymnastics. +- Break call chains before stack traces and debugging become hard. + +### 6.4 Destructuring + +Use destructuring when all components are consumed immediately and their local names are clearer than property access. + +Do not destructure solely for brevity. Prefer named access when only one component is needed, when the original object is reused later, or when component order is not self-evident. + +### 6.5 Return-value discipline + +Ignored non-`Unit` return values are often bugs. When the repository enables the unused return-value checker, treat findings as correctness signals. + +Use `@MustUseReturnValues` for APIs where ignoring the result is dangerous. Use `@IgnorableReturnValue` only for functions where ignoring the result is conventional and safe. + +Assign to `val _ = ...` only when the discard is deliberate and locally obvious. + +--- + +## 7. Error handling and failure semantics + +### 7.1 Separate recoverable errors from exceptional failures + +Use exceptions for unrecoverable failures, broken preconditions, framework boundaries, or non-local failure handling where exception flow is the right contract. + +Use explicit result models for recoverable business outcomes, validation failures, parse results, authorization decisions, and protocol alternatives. + +### 7.2 Catch narrowly + +Catch the narrowest exception type you can handle meaningfully. + +Do not catch `Throwable` except at process-level supervision, crash reporting, or framework boundaries that must prevent process termination. Always preserve the cause and context. + +### 7.3 `runCatching` + +Do not use `runCatching` as a blanket replacement for error modeling. + +Use it only when: + +- the failure domain is truly exception-shaped, +- cancellation is preserved, +- the resulting code is clearer than explicit `try`/`catch`, and +- callers still receive a meaningful failure shape. + +Do not accidentally normalize `CancellationException`. + +### 7.4 Validation + +Ordinary invalid user input should normally produce an explicit validation result, not an exception. + +Throw for violated programmer preconditions. Return a domain result for user/business validation. + +### 7.5 Error messages + +Error messages are user and operator feedback surfaces. They must preserve enough context to diagnose the failure without leaking secrets. + +Do not replace specific errors with vague strings like `failed`, `invalid`, or `unknown` unless the boundary requires redaction. + +--- + +## 8. Coroutines, Flow, and concurrency + +### 8.1 Structured concurrency + +Prefer `suspend` functions for one-shot async operations. Use `Flow` for asynchronous streams, not for a single immediate value. + +Child work must belong to a parent scope. Use `coroutineScope` when child failure should fail the whole operation. Use `supervisorScope` only when sibling isolation is intentional and failures are still observed. + +### 8.2 Cancellation + +Cancellation is normal control flow. + +Rules: + +- Rethrow `CancellationException` after cleanup. +- Do not log normal cancellation as an application error. +- Ensure long loops cooperate with cancellation through `ensureActive()`, `yield()`, suspending calls, or explicit checkpoints. +- Do not hide cancellation inside `Result`, `Either`, `runCatching`, retry wrappers, or broad catch blocks. + +### 8.3 Dispatchers and blocking work + +Do not hide blocking I/O in CPU-oriented paths or default dispatchers. + +Use dispatcher boundaries deliberately. `withContext(Dispatchers.IO)` is not a magic fix; it is a statement that blocking or I/O work is happening. + +For libraries, avoid hardcoding dispatchers unless the dispatcher is part of the contract. Prefer accepting a dispatcher, scope, or execution policy when needed. + +### 8.4 Flow + +Use `Flow` for streams with multiple values over time, reactive pipelines, event feeds, or observable state. + +Rules: + +- Keep cold vs hot flow semantics explicit. +- Do not expose mutable flow types directly. +- Prefer `StateFlow` / `SharedFlow` only when their replay, lifecycle, and ownership semantics fit. +- Document threading, replay, completion, and error behavior for public flows. +- In Swift export / multiplatform APIs, verify how `Flow` appears to consumers. + +### 8.5 Shared mutable state + +Shared mutable state must have one owner and a synchronization strategy. + +Choose the simplest correct tool: + +- immutable snapshots, +- actor/message passing, +- `Mutex`, +- atomic primitives, +- database transaction, +- single-threaded confinement, +- framework-managed state container. + +Do not mix synchronization strategies casually. + +--- + +## 9. Architecture and module boundaries + +### 9.1 Visibility default + +Use the narrowest visibility: + +- `private` for file/class internals, +- `internal` for module collaboration, +- `public` only for deliberate external contracts. + +For libraries, enable explicit API mode. Public and protected declarations must have explicit visibility and return types. + +### 9.2 Layering + +Keep domain logic separate from transport, persistence, serialization, UI, CLI, HTTP, framework annotations, and infrastructure where that separation buys clarity and testability. + +Do not over-layer small applications. The correct boundary is the one that preserves truth, feedback, consequence, and invariants with minimum ceremony. + +### 9.3 Dependency injection + +Prefer constructor injection and explicit parameters. + +Reject service locators, ambient registries, hidden singletons, and global mutable objects unless a framework boundary makes them unavoidable. When unavoidable, isolate them at the adapter edge. + +Context parameters can support scoped capabilities, but they are not a substitute for architecture. + +### 9.4 Files, packages, and modules + +Organize by domain capability and boundary, not by vague reuse category. + +Good names: `billing`, `settlement`, `ledger`, `verification`, `identity`, `jurisdiction`, `invoice`, `outbox`. + +Bad names without sharper meaning: `utils`, `common`, `shared`, `misc`, `core`, `base`, `helpers`. + +A file should contain one coherent responsibility cluster. Do not create god files or dozens of trivial fragments. + +### 9.5 Canonical ownership + +Every contract-defining fact has one canonical owner: + +- enum wire vocabularies, +- validation limits, +- feature capabilities, +- command names, +- error codes, +- schema names, +- route names, +- event types, +- permission names, +- generated docs and examples. + +Other surfaces derive from the canonical owner or from generated artifacts rooted in it. Drift must fail verification. + +--- + +## 10. Serialization and external contracts + +### 10.1 Serialized shape is public API + +Persisted or published serialized shape is an external contract. Do not casually change field names, nullability, optionality, enum symbols, discriminator values, polymorphic structure, date/time formats, numeric precision, or default values. + +### 10.2 DTOs and domain models + +Use separate DTOs when external shape and domain shape differ materially. + +Do not contort domain objects to mirror poor transport formats unless the repository intentionally accepts that tradeoff. + +### 10.3 Enum wire vocabulary + +Do not rely on enum `.name` for wire representation when the vocabulary is externally meaningful. + +For `kotlinx.serialization`, use `@SerialName` explicitly when the wire name differs from the Kotlin name or is part of a published contract. Treat `@SerialName` as stable once published. + +For Jackson, map wire values explicitly with the appropriate annotations or adapters. Verify Kotlin module behavior. + +### 10.4 Polymorphic serialization + +For sealed or polymorphic hierarchies, define discriminator policy at the sealed root or central serializers module. Do not scatter discriminator logic across subtypes. + +Adding a new subtype must force a visible update to serialization registration, docs, and tests. + +### 10.5 Parse defensively, emit canonically + +Accept external inputs defensively where the boundary requires tolerance. Emit one canonical form. Keep adaptation at the boundary, not in the domain core. + +--- + +## 11. Java, JVM, Android, and metadata interop + +### 11.1 Java 26 and JVM target alignment + +Kotlin 2.4.0-Beta2 can generate Java 26 bytecode. Use Java 26 only when the repository's deployment, Gradle wrapper, toolchain, test runtime, static analysis, and downstream consumers support it. + +Align: + +- Java toolchain, +- Kotlin `jvmTarget`, +- Java `--release` / source / target settings, +- Gradle wrapper version, +- CI JDK, +- runtime container/base image, +- bytecode consumers and published artifact expectations. + +Do not raise bytecode target as a local convenience. + +### 11.2 Java-facing APIs + +Design Java interop deliberately when Java callers exist. + +Use `@JvmStatic`, `@JvmOverloads`, `@Throws`, `@JvmName`, `@JvmSynthetic`, and boxed value-class exposure only when they improve the Java contract. Do not decorate APIs automatically. + +For public libraries, test Java call sites for important APIs. + +### 11.3 Interface defaults + +Kotlin's JVM default-method behavior is a binary compatibility surface. For new code, `NO_COMPATIBILITY` may be appropriate. For existing published libraries, changing default-method mode can break consumers. + +Treat `jvmDefault` as build policy, not per-module improvisation. + +### 11.4 Kotlin metadata annotations + +Kotlin 2.4 enables annotations in metadata by default on JVM. This can affect annotation processors, metadata readers, serialization tooling, binary compatibility tools, and code generators. + +When modifying annotations on public declarations, verify both bytecode-level behavior and Kotlin metadata consumers where applicable. + +### 11.5 Android + +For Android projects, verify Android Gradle Plugin, Kotlin plugin, Compose compiler, KSP, desugaring, minSdk, targetSdk, JDK, and test tooling compatibility before upgrading Kotlin or Java target. + +Do not assume server-side JVM advice applies unchanged to Android. Android runtime, desugaring, bytecode target, resource processing, and instrumentation tests are separate constraints. + +--- + +## 12. Multiplatform, Native, Wasm, and JS + +### 12.1 Multiplatform source sets + +Keep common code genuinely platform-neutral. Do not leak JVM classes, Android classes, Foundation types, Node globals, or browser APIs into `commonMain`. + +Use `expect`/`actual` when the domain contract is common but implementation is platform-specific. + +### 12.2 Kotlin/Native and Swift export + +When exposing APIs to Swift: + +- verify exported names, nullability, generics, exceptions, suspend functions, `Flow`, enum mapping, and value semantics, +- keep Swift-facing APIs small and deliberate, +- test from Swift where the contract matters, +- do not assume a Kotlin-idiomatic API is Swift-idiomatic. + +Swift package import and Flow export are important Kotlin 2.4 Native capabilities, but adoption must be verified against Xcode, Gradle, dependency, and CI constraints. + +### 12.3 Native memory and concurrency + +Kotlin/Native GC behavior changed over recent releases. Do not cargo-cult old freezing or memory-manager rules. Verify the current runtime behavior and repository target versions. + +Performance-sensitive Native changes need benchmarks or measurable runtime evidence. + +### 12.4 Kotlin/Wasm + +Kotlin/Wasm incremental compilation is stable and enabled by default in Kotlin 2.4. Do not disable it unless diagnosing a confirmed compiler/build issue. + +The WebAssembly Component Model support is experimental. Do not introduce it without explicit opt-in, runnable examples, and CI coverage. + +### 12.5 Kotlin/JS + +When exporting to JavaScript/TypeScript: + +- exported names and generated declarations are public API, +- verify TypeScript consumption, +- avoid exposing Kotlin-only domain shapes that are awkward in JS, +- treat value-class export as a contract decision, +- keep `js()` inline code constant, auditable, and minimal. + +Do not use JavaScript dynamic interop where typed declarations or generated bindings can express the contract. + +--- + +## 13. Build logic and Gradle + +### 13.1 Default build posture + +Use: + +- `settings.gradle.kts`, `build.gradle.kts`, and convention plugins, +- `gradle/libs.versions.toml` for plugin and dependency versions, +- `build-logic` included build for substantial shared build policy, +- explicit Java toolchains, +- centralized Kotlin `compilerOptions`, +- explicit test tasks and quality gate, +- type-safe project accessors for multi-module projects when appropriate. + +Do not hardcode versions in module build files. + +### 13.2 Kotlin and plugin version alignment + +Kotlin compiler plugins must be version-compatible with the Kotlin compiler: + +- `org.jetbrains.kotlin.plugin.serialization`, +- KSP, +- Compose compiler plugin, +- all-open/no-arg/spring/jpa plugins, +- Dokka, +- binary compatibility tooling, +- static analysis that embeds or expects a Kotlin compiler version. + +Before upgrading Kotlin, verify every compiler plugin and static analysis tool. If a stable tool does not yet support Kotlin 2.4, prefer postponing that tool or the migration over adding unstable tooling to production. + +### 13.3 Compiler options + +Put shared compiler options in convention plugins, not copy-pasted module blocks. + +Use `compilerOptions {}` rather than deprecated configuration surfaces. + +Keep opt-ins explicit and scoped. A broad project-wide opt-in is a design decision and must be justified by the repository policy. + +### 13.4 Explicit API mode + +Enable explicit API mode for libraries and SDKs. Application modules usually do not need it. + +Use warning mode only as a migration step. Published libraries should converge to strict explicit API mode. + +### 13.5 Warning policy + +Warnings are feedback. Do not globally disable them. + +Use `-Werror` only when the repository can keep it green consistently. If specific diagnostics must be tuned, use a centralized warning policy and document why. + +The return-value checker is especially useful for command-style APIs, persistence writes, validation results, and domain operations where ignored results are dangerous. + +### 13.6 Build isolation and daemon management + +The Gradle daemon and Kotlin compile daemon are shared machine resources. + +Rules: + +- Do not run multiple Gradle invocations concurrently in the same project directory. +- In multi-agent/multi-project environments, isolate daemon pools with project-local `GRADLE_USER_HOME` when needed. +- Add project-local Gradle homes to `.gitignore`. +- Use `./gradlew --stop` only to recover from confirmed daemon corruption, not as routine build hygiene. +- Treat “Could not connect to Kotlin compile daemon” and similar failures as infrastructure first. Retry cleanly before editing build logic. + +### 13.7 Dependency anti-hallucination + +Before adding or updating any dependency: + +- verify exact group, artifact, and version in the declared repository, +- verify Kotlin version compatibility, +- verify platform compatibility, +- verify license and security posture if relevant, +- verify scope: `implementation`, `api`, `compileOnly`, `runtimeOnly`, `testImplementation`, `ksp`, `kapt`, or platform-specific source set, +- verify whether the standard library, JDK, kotlinx library, or existing stack already covers the need. + +Do not invent coordinates or assume APIs from memory. + +--- + +## 14. Testing and verification + +### 14.1 Red → Green → Refactor + +For new behavior, start with the smallest failing proof: unit test, property test, integration test, compiler check, serialization round trip, or reproducible script. + +Then implement the minimum code that passes. Then refactor until the touched system is clearer and easier to change. + +### 14.2 Test behavior and invariants + +Prioritize: + +- domain invariants, +- error paths, +- cancellation behavior, +- boundary conditions, +- serialization round trips, +- Java/Swift/JS interop where public, +- concurrency correctness, +- numeric precision, +- public API compatibility, +- migration behavior. + +Do not overfit tests to private implementation details. + +### 14.3 Coroutine tests + +Use `kotlinx-coroutines-test` for coroutine behavior. Prefer `runTest` and controlled schedulers. + +Avoid sleeps, wall-clock timing, thread races, and remote networks. Advance virtual time deliberately. + +Verify cancellation and child failure semantics, not only happy-path results. + +### 14.4 Multiplatform tests + +For multiplatform code, test the common contract and platform-specific actual implementations. + +Do not assume JVM tests prove Native, JS, Wasm, or Android behavior. + +### 14.5 Verification scope + +Run the narrowest relevant verification first, then widen based on consequence: + +1. touched module compile/test, +2. dependent modules if public contracts changed, +3. affected platform tasks, +4. full quality gate for scaffolding, build, serialization, public API, or cross-module changes. + +Report what was run and what could not be run. + +--- + +## 15. Public API and evolution + +### 15.1 Published contracts + +For libraries, SDKs, Gradle plugins, public APIs, and multiplatform packages, treat the following as contract: + +- public/protected declarations, +- constructor signatures, +- default parameter values, +- type parameters and variance, +- sealed family shape, +- annotations visible to processors or reflection, +- serialized shape, +- generated TypeScript/Swift/Java API, +- binary compatibility and metadata, +- compiler flags that affect emitted ABI. + +### 15.2 Compatibility checks + +Use binary compatibility validation or equivalent where the project publishes Kotlin APIs. + +When changing public API, state whether the change is source-compatible, binary-compatible, and serialization-compatible. + +### 15.3 App vs library posture + +Application code can prioritize operational clarity and internal maintainability. + +Library code must prioritize consumer clarity, compatibility, conservative API surface, and predictable behavior across Kotlin/Java/platform versions. + +--- + +## 16. Documentation, KDoc, and generated surfaces + +Load `.codex/PROTOCOL_AFAD.md` when doing documentation work or code changes that alter documented public contracts, except for the repository root `README.md` special case defined in `AGENTS.md`. + +KDoc rules: + +- Add KDoc to public library APIs where the signature does not fully explain behavior, failure, threading, units, or compatibility. +- Do not add KDoc that restates obvious code. +- Document cancellation, dispatcher, and Flow semantics for coroutine APIs. +- Document serialization, wire names, and compatibility-sensitive defaults. +- Keep examples small, compileable, and synchronized with the canonical API. + +Generated documentation must derive from canonical code or schema. Do not maintain duplicate contract facts manually. + +--- + +## 17. Scaffolding and structural overhaul + +When creating or restructuring a Kotlin/Gradle project, audit the whole project shape, not just the requested file. + +Required surfaces: + +- `settings.gradle.kts` with included builds, project names, repositories, modules, and type-safe accessors when appropriate. +- `gradle/libs.versions.toml` with all plugin and dependency versions. +- `build-logic/` convention plugins for shared Kotlin/JVM, Kotlin library, Kotlin application, Kotlin Multiplatform, Android, serialization, test, and publishing policy as needed. +- Root `build.gradle.kts` with only root-level coordination. +- `gradle.properties` with deliberate Gradle/Kotlin daemon, cache, parallelism, configuration cache, and Kotlin code style settings. +- `.gitignore` matching Kotlin/Gradle/IDE/build artifacts and any project-local Gradle home. +- CI workflows that use the wrapper, correct JDK, Gradle caching, explicit timeouts, stale-run cancellation, and the same quality gate as local verification. + +Break old layout when it is structurally wrong. Backwards compatibility with a poor repository layout is not a goal unless consumers depend on it. + +Recommended multi-module physical grouping when no better domain layout exists: + +| Directory | Purpose | +|---|---| +| `libs/` | core domain libraries | +| `adapters/` | technology adapters: HTTP, DB, XML, PDF, messaging, persistence | +| `apps/` | application entry points: CLI, server, worker | +| `features/` or `packs/` | pluggable feature packs or jurisdiction/domain packs | +| `testkit/` | shared fixtures, generators, test utilities | +| `build-logic/` | convention plugins | + +Keep logical Gradle paths clear even if physical directories are grouped. + +--- + +## 18. Agent output contract for Kotlin work + +For non-trivial Kotlin changes, the summary must include: + +- changed behavior, +- source of truth touched, +- validation or feedback added/used, +- blast radius considered, +- invariant preserved, +- verification run, +- Kotlin/Gradle/compiler flags affected, +- public or serialized contract impact, +- documentation or system theory preserved. + +Do not provide only “updated the code.” Explain the engineering consequence proportionally to risk. + +--- + +## 19. Pre-output checklist + +Before yielding Kotlin code, verify: + +**System theory** + +- [ ] Source of truth identified. +- [ ] Feedback path identified or added. +- [ ] Blast radius considered. +- [ ] Invariant preserved. +- [ ] Theory preserved in tests/docs/code where appropriate. + +**Semantics** + +- [ ] Nullability, mutability, and failure states are visible in types. +- [ ] Domain alternatives are explicit where behavior differs. +- [ ] Public APIs avoid nullable parameters unless `null` is semantically precise. +- [ ] Sealed families are exhaustive where closed. +- [ ] Rich Errors syntax was not invented. + +**Concurrency** + +- [ ] Coroutine work is lifecycle-owned. +- [ ] Cancellation is preserved. +- [ ] Blocking work is isolated. +- [ ] Flow semantics are clear. + +**API and compatibility** + +- [ ] Visibility is intentional. +- [ ] Public return types are explicit where required. +- [ ] Java/Swift/JS/serialization contracts were considered where applicable. +- [ ] Annotation targets are deliberate. + +**Build** + +- [ ] Kotlin version, Gradle version, JDK/toolchain, and compiler plugins are compatible. +- [ ] No guessed dependencies or compiler flags. +- [ ] Shared build policy lives in convention plugins. +- [ ] No concurrent Gradle invocations were used. + +**Testing** + +- [ ] Smallest relevant verification was run first. +- [ ] Wider verification was run when contracts changed. +- [ ] Coroutine/time/concurrency tests are deterministic. +- [ ] Any skipped verification is stated honestly. + +If any answer is “no” or “unclear,” refactor or surface the uncertainty before final output. diff --git a/.codex/AGENTS_PYTHON313.md b/.codex/AGENTS_PYTHON313.md new file mode 100644 index 00000000..e0d5b09e --- /dev/null +++ b/.codex/AGENTS_PYTHON313.md @@ -0,0 +1,765 @@ +# Python 3.13+ Agent Protocol + +This protocol governs agent work on Python projects that target Python 3.13 or newer, or that use Python 3.13 as the lowest supported runtime. + +Scope: libraries, services, CLIs, daemons, data pipelines, notebooks, web APIs, test suites, build scripts, code generators, Python-backed plugins, C/Rust extension packages, and mixed-language repositories with Python surfaces. + +Primary objective: produce Python that is explicit, typed where it matters, verifiable, maintainable, secure at boundaries, concurrency-safe, packaging-correct, and aligned with the repository's actual compatibility contract. + +Optimize in this order: + +```text +correctness → invariants → explicit contracts → observability → packaging compatibility → maintainability → performance where measured → terseness +``` + +Terseness loses to clarity. Dynamic convenience loses to explicit system boundaries. A passing import is not the finish line. A green test suite is not enough if the change weakens state ownership, API contracts, or failure evidence. + +This protocol inherits `.codex/UNIVERSAL_ENGINEERING_CONTRACT.md`. Do not duplicate the universal contract here; apply it before all Python-specific rules. + +--- + +## 1. Repository intake before touching Python + +Before editing Python, derive the repository's actual baseline. + +Inspect the relevant subset of: + +- `pyproject.toml`, `setup.cfg`, `setup.py`, `requirements*.txt`, `constraints*.txt`, lock files, and dependency-group definitions; +- `.python-version`, `.tool-versions`, `runtime.txt`, Dockerfiles, CI matrices, tox/nox sessions, and declared supported Python versions; +- package manager and build backend: uv, pip, Poetry, PDM, Hatch, setuptools, Flit, Bazel, Pants, or project-specific tooling; +- test framework: pytest, unittest, doctest, hypothesis, tox, nox, coverage, integration harnesses, fixtures, snapshots, golden files, and service emulators; +- type checker: pyright, basedpyright, mypy, pyre, pytype, or no checker; +- linter/formatter: Ruff, Black, isort, pylint, flake8, bandit, custom checks, pre-commit hooks; +- package layout: `src/` layout, namespace packages, editable installs, generated code, vendored code, data files, entry points, extras, plugins, and import boundaries; +- runtime surface: web framework, ORM, async runtime, scheduler, worker queue, CLI framework, notebook/runtime environment, external APIs, databases, caches, message brokers, and config sources; +- public API surface: imports, type stubs, protocols, entry points, CLI flags, HTTP routes, event schemas, model schemas, generated clients, and documented examples; +- C/Rust/foreign extension surfaces, ABI policy, free-threaded compatibility, wheels, platform tags, and build isolation; +- repository verification commands and the exact CI gates that define success. + +Classify the touched Python surface before designing the change: + +- **Published library:** supported Python versions, public imports, SemVer, type hints, stubs, docs, examples, and extras are contracts. +- **Internal library:** API evolution is easier, but invariants, import boundaries, and type contracts still matter. +- **Application/service:** configuration, persistence, migrations, logs, metrics, alerts, operational safety, and dependency pins are contracts. +- **CLI:** flags, arguments, environment variables, exit codes, stdout/stderr shape, config files, and shell completions are contracts. +- **Data/ML pipeline:** schema, reproducibility, randomness, artifacts, data freshness, lineage, and idempotency are contracts. +- **Build/codegen/test tooling:** determinism, generated output, local/CI parity, and developer ergonomics are contracts. +- **Extension package:** ABI, wheel tags, platform support, free-threaded behavior, and memory/thread safety are contracts. + +Do not infer the baseline from a single file. In Python projects, compatibility truth is often split across packaging metadata, lock files, CI, docs, and release tooling. + +--- + +## 2. Change loop in Python terms + +For every non-trivial change, apply the Universal Engineering Contract concretely. + +### 2.1 Minimum system map + +Before editing, identify: + +```text +Truth: +- Source of truth for the relevant state, config, schema, model, dependency, generated artifact, cache, migration, or runtime value: +- Mutation paths: +- Derived/cached/generated copies: + +Evidence: +- Existing checks: unit/integration/property tests, type checks, lint, format, coverage, fixtures, logs, metrics, traces, CLI repros, notebooks, CI: +- Missing feedback worth adding: + +Consequence: +- Direct Python dependencies: imports, callers, subclasses, protocols, entry points, tests, generated clients, stubs: +- Indirect dependencies: serialization, CLI output, HTTP contracts, database schema, queues, cron jobs, docs, dashboards, support workflows: + +Invariant: +- Type, domain, data, idempotency, authorization, concurrency, compatibility, or operational rule that must remain true: + +Preservation: +- Where the learned theory should live: type, test, docstring, module name, comment, migration note, docs, runbook, schema, config validation: +``` + +Keep the map lightweight for low-risk changes. Do not skip it for changes that touch state, public APIs, persistence, concurrency, packaging, security, or external contracts. + +### 2.2 Red → Green → Refactor + +For new behavior, start with the smallest failing proof: + +- unit test; +- integration test; +- contract test; +- property test; +- regression fixture; +- CLI invocation; +- doctest; +- type-checking expectation; +- migration check; +- notebook smoke check; +- runtime reproduction from logs or issue evidence. + +Then make the smallest coherent implementation and immediately refactor until the touched Python is easier to understand, easier to test, and harder to misuse. + +### 2.3 Narrow-to-wide verification + +Work in small increments: + +1. make one coherent change; +2. run the narrowest useful check, such as the targeted pytest node, module import, type-check target, or CLI repro; +3. read the first real failure; +4. fix the root cause; +5. rerun the narrow check; +6. widen to repository-required checks before completion. + +Do not accumulate speculative edits while verification is failing. + +### 2.4 Root-cause fixes only + +When verification fails: + +- read the exact traceback, assertion diff, type-check message, linter diagnostic, or runtime log; +- identify whether the root is domain logic, type shape, import path, dependency version, fixture design, async scheduling, serialization, environment, permissions, or stale generated state; +- fix that cause; +- rerun the relevant check; +- preserve the failing proof if it guards a real regression. + +Do not: + +- catch broad exceptions to silence failures; +- loosen types to `Any` to appease a type checker; +- mutate fixtures or expected files without proving the new behavior is correct; +- skip tests because the environment is inconvenient; +- edit generated code without updating its generator or source of truth; +- claim completion while required checks still fail. + +--- + +## 3. Python 3.13+ baseline posture + +### 3.1 Runtime compatibility + +Use the repository's declared interpreter policy. If the repository is governed by this protocol and no stronger local policy exists, assume Python 3.13+ as the baseline. + +For new Python packages created under this protocol, prefer: + +```toml +[project] +requires-python = ">=3.13" +``` + +For existing projects: + +- do not raise `requires-python` without a concrete benefit and compatibility judgment; +- treat `requires-python`, CI Python matrices, lock files, Docker images, deployment runtimes, and docs as a single compatibility contract; +- do not use Python 3.14+ syntax or APIs in a Python 3.13-baseline project unless guarded, backported, or explicitly allowed; +- do not assume CPython-only behavior unless the repository declares CPython as part of the contract; +- if PyPy, GraalPy, embedded Python, iOS, Android, or WASI support matters, verify behavior on that target or preserve target-specific guards. + +### 3.2 Python 3.13 capabilities + +Use Python 3.13 capabilities when they make the system clearer and the repository baseline permits them. + +Normal Python 3.13 tools include: + +- PEP 695 generic type-parameter syntax from Python 3.12, when supported by the repository's type checker; +- PEP 696 defaults for type parameters when they reduce overload noise or make generic APIs clearer; +- `typing.ReadOnly` for read-only `TypedDict` items; +- `typing.TypeIs` for precise user-defined narrowing; +- `warnings.deprecated()` for deprecations that should be visible both at runtime and to type checkers; +- defined `locals()` / `frame.f_locals` behavior, especially for debuggers, tracers, REPL tools, and dynamic execution code; +- `copy.replace()` and `__replace__` where immutable update semantics are clearer than hand-written reconstruction; +- `queue.Queue.shutdown()` / `queue.ShutDown` when coordinating queue lifecycle explicitly; +- standard-library improvements such as the `dbm.sqlite3` backend when the project actually benefits from them. + +Do not use new features merely for novelty. Prefer them when they reduce ambiguity, make invariants visible, or remove compatibility shims that no longer serve the baseline. + +### 3.3 Experimental CPython features + +Python 3.13 includes experimental implementation paths. Treat them as opt-in runtime targets, not assumptions. + +#### Free-threaded CPython + +Free-threaded CPython disables the GIL in a separate experimental build. + +Rules: + +- do not rely on the GIL as a correctness mechanism for mutable shared state; +- protect shared mutable state with explicit ownership, locks, queues, actors, immutable snapshots, atomics in extension code, or process boundaries; +- assume C extensions may re-enable or require the GIL unless they explicitly advertise free-threaded support; +- do not claim free-threaded compatibility unless tests run under the free-threaded executable or the repository has equivalent CI evidence; +- treat hidden global caches, singletons, lazy imports, monkeypatching, module-level mutation, and process-wide environment changes as concurrency risks. + +#### Experimental JIT + +The Python 3.13 JIT is experimental and disabled by default unless CPython is built/configured for it. + +Rules: + +- do not depend on JIT availability for correctness; +- do not promise performance improvements without measurement on the target interpreter and workload; +- do not micro-optimize around undocumented JIT internals; +- prefer algorithmic improvements, I/O reduction, data-shape fixes, and measured hot-path changes. + +### 3.4 Removed Python 3.13 surfaces + +Do not introduce dependencies on modules and APIs removed in Python 3.13. + +Removed legacy standard-library modules include: + +```text +aifc, audioop, cgi, cgitb, chunk, crypt, imghdr, mailcap, msilib, nis, +nntplib, ossaudiodev, pipes, sndhdr, spwd, sunau, telnetlib, uu, xdrlib +``` + +Also avoid `lib2to3`, the `2to3` tool, `tkinter.tix`, `locale.resetlocale()`, `typing.io`, `typing.re`, and chained `classmethod` descriptor patterns. + +When migrating old code, remove the dependency, choose a maintained replacement, and add compatibility tests around the behavior that mattered. Do not vendor dead stdlib behavior by copying unreviewed code. + +--- + +## 4. Hard boundaries + +Violating these requires explicit repository policy or user authorization. + +### 4.1 Correctness and contracts + +- Never change public API shape without compatibility analysis. +- Never change persisted data format, migration ordering, serialization keys, CLI output, error codes, route semantics, or environment-variable names without tracing downstream consumers. +- Never duplicate canonical contract facts across code, docs, tests, generated clients, schemas, or examples. +- Never edit generated files without editing the generator or canonical source unless the repository explicitly stores generated outputs as the source of truth. +- Never weaken validation to make tests pass. +- Never replace a failing proof with a weaker assertion unless the old assertion was wrong and the new one proves the real invariant. + +### 4.2 Type and dynamic-safety boundaries + +- Never introduce `Any` as an escape hatch where a protocol, type variable, overload, `TypedDict`, dataclass, Pydantic model, or narrower type can express the contract. +- Never suppress type errors globally; suppress locally only with a reason tied to a real limitation. +- Never use `cast()` to lie. A cast must document a boundary where runtime evidence already proves the type. +- Never use mutable default arguments. +- Never return heterogeneous dictionaries as domain objects when a named type would make the contract clear. +- Never use stringly typed state where an enum, literal, dataclass, typed model, or value object is the real contract. +- Never rely on import-time side effects unless the repository intentionally uses plugin registration or framework discovery and tests cover it. + +### 4.3 Error and security boundaries + +- Never use bare `except:` or broad `except Exception:` unless re-raising, narrowing, or preserving cancellation/interrupt semantics is explicit. +- Never swallow `KeyboardInterrupt`, `SystemExit`, `asyncio.CancelledError`, or process termination signals accidentally. +- Never log secrets, credentials, tokens, private keys, passwords, session cookies, PII, or unredacted authorization headers. +- Never use unsafe deserialization, `eval`, `exec`, shell interpolation, path traversal, or SSRF-prone URL handling without a narrowly justified, validated boundary. +- Never pass user-controlled strings to `subprocess` with `shell=True` unless the shell itself is the explicit product surface and inputs are safely quoted/validated. +- Never make network, filesystem, database, or process side effects at import time unless the project has a deliberate plugin/bootstrap pattern. + +### 4.4 Async and concurrency boundaries + +- Never create orphan tasks without ownership, cancellation, and failure observation. +- Never call blocking I/O inside an event loop without moving it to an executor or using an async-native API. +- Never ignore backpressure in queues, streams, workers, or message consumers. +- Never mutate process environment after concurrent work starts unless the operation is serialized and isolated. +- Never use global mutable caches without invalidation, capacity, thread-safety, and test evidence. +- Never rely on CPython's GIL for logical thread safety in code that may run on Python 3.13 free-threaded builds or alternative interpreters. + +### 4.5 Build and dependency boundaries + +- Never install packages globally for repository work. +- Never change dependency constraints or lock files without understanding direct, transitive, security, and deployment impact. +- Never add a dependency when a small local function or existing dependency is enough. +- Never vendor code without license, update, and security implications. +- Never mix package managers casually. Preserve the repository's canonical tool. +- Never claim a package is compatible with Python 3.13 unless tests/imports/builds verify the relevant dependency set. + +--- + +## 5. Types, domain modeling, and API design + +### 5.1 Prefer named domain shapes + +Choose constructs that express the domain: + +| Need | Preferred Python construct | +|---|---| +| Immutable data value | `@dataclass(frozen=True, slots=True)` or `NamedTuple` where appropriate | +| Mutable internal record | `@dataclass(slots=True)` with controlled mutation | +| External validated model | Repository-standard schema/model tool, such as Pydantic, attrs, dataclass, Marshmallow, or framework model | +| Closed symbolic states | `Enum` / `StrEnum` / `Literal` depending on runtime needs | +| Structural capability | `typing.Protocol` | +| Mapping with fixed keys | `TypedDict`, using `Required`, `NotRequired`, and `ReadOnly` when useful | +| API narrowing helper | `TypeIs` when both true and false branches narrow correctly; `TypeGuard` only when its semantics are intended | +| Simple result pair | named dataclass or tuple only when positional meaning is obvious | +| Distinct domain identity | small value object, validated newtype-like wrapper, or repository-standard model | + +Do not create a type merely to look enterprise. Every type must prevent misuse, name a domain concept, isolate a boundary, or make evolution safer. + +### 5.2 Type hints are contracts, not decoration + +Use type hints to communicate real API contracts. + +Rules: + +- prefer precise collection and callable types from `collections.abc` for parameters; +- prefer concrete return types where callers depend on behavior; +- use `Self`, `Protocol`, `TypeVar`, `ParamSpec`, `TypeVarTuple`, overloads, and type aliases when they remove ambiguity; +- use `Literal` for small protocol strings only when the set is stable and public; +- use `Final` and `ClassVar` where mutation semantics matter; +- use `ReadOnly` for `TypedDict` items that callers must not mutate; +- keep annotations import-safe under the repository's chosen annotation policy; +- avoid runtime type introspection on annotations without understanding postponed annotation behavior and `typing.get_type_hints()` consequences. + +Avoid: + +- annotating everything as `dict`, `list`, `Callable`, or `Any` when the shape matters; +- type aliases that hide complexity without naming a domain concept; +- overloads where a small object model or enum would be clearer; +- casts that mask incorrect validation or parsing. + +### 5.3 Public API evolution + +For public packages: + +- preserve import paths unless the change is a deliberate deprecation or major-version break; +- add deprecation warnings through the repository's established mechanism, using `warnings.deprecated()` where appropriate; +- keep type stubs, `py.typed`, docs, examples, and runtime behavior synchronized; +- test public imports and representative type-checking examples; +- update changelog or migration notes when user behavior changes. + +For internal packages: + +- prefer cohesive refactors over compatibility shims that no real caller needs; +- delete dead wrappers once callers are migrated; +- keep import boundaries clean so internal convenience does not leak into public contracts. + +### 5.4 Dynamic behavior needs stronger evidence + +Python allows dynamic patterns. Use them only when they earn their keep. + +Dynamic dispatch, monkeypatching, runtime imports, metaclasses, descriptors, decorators, `__getattr__`, `__getattribute__`, module-level plugin discovery, and reflection require: + +- a clear owner of the registry or dynamic state; +- tests for registration, lookup, error messages, and duplicate/missing cases; +- type stubs or protocols where static tools cannot infer the contract; +- documentation when public users must participate in the pattern. + +Do not use dynamic machinery to avoid naming the domain model. + +--- + +## 6. State, configuration, and truth ownership + +### 6.1 State must have one owner + +Before changing stateful code, identify the authority: + +- database row or transaction; +- migration or schema file; +- environment variable or config file; +- CLI argument or parsed settings object; +- cache, memoized value, singleton, or lazy-loaded object; +- queue message, event, task state, or job record; +- external API, webhook, or generated client; +- notebook cell state or pipeline artifact; +- package metadata or lock file. + +Rules: + +- centralize parsing and validation at the boundary; +- represent validated configuration as a named object rather than repeatedly reading `os.environ`; +- pass dependencies explicitly where practical; +- isolate global state behind a small owner with reset hooks for tests; +- keep derived state either recomputable or explicitly invalidated; +- make idempotency and transaction boundaries visible. + +### 6.2 Imports are a dependency graph + +Python imports execute code. Treat imports as design, not plumbing. + +Rules: + +- avoid import cycles by improving module boundaries, not by adding random local imports; +- use local imports only for measured startup cost, optional dependencies, cycle-breaking with rationale, or plugin loading; +- keep `__init__.py` exports deliberate and tested; +- preserve package data and resources using `importlib.resources` rather than filesystem assumptions; +- do not shadow standard-library or dependency module names; +- avoid side effects at import time except deliberate registration patterns. + +### 6.3 Configuration is a contract + +Configuration facts must have one canonical owner. + +Rules: + +- prefer typed settings objects over scattered environment reads; +- validate config at startup with actionable errors; +- keep defaults in one place; +- ensure docs, examples, deployment manifests, and tests derive from or match the canonical config; +- keep secret values out of code, logs, tests, and docs; +- test missing, malformed, defaulted, and override cases. + +--- + +## 7. Errors, failures, and observability + +### 7.1 Model recoverable outcomes deliberately + +Use exceptions for exceptional control transfer and integration boundaries. Use named result objects or sealed-like domain models when callers must distinguish ordinary business outcomes. + +Examples of ordinary outcomes that should not be hidden in a generic exception: + +- validation failure; +- authorization denial; +- duplicate record; +- cache miss; +- parse ambiguity; +- idempotent no-op; +- unavailable optional feature. + +When using exceptions: + +- choose the narrowest meaningful exception type; +- include enough context to debug without leaking secrets; +- preserve cause chains with `raise ... from ...`; +- do not erase cancellation, timeout, or interrupt signals; +- use `ExceptionGroup` / `except*` where concurrent failures must be preserved. + +### 7.2 Logs and metrics are feedback surfaces + +Use structured, actionable observability. + +Rules: + +- log at the boundary where context exists; +- include stable correlation identifiers where available; +- avoid duplicate noisy logs at every stack layer; +- make failure messages useful to operators and users; +- do not log secrets or sensitive payloads; +- add metrics/traces where tests cannot prove runtime health; +- keep CLI stdout for machine/user output and stderr for diagnostics. + +### 7.3 User-facing errors are contract surfaces + +For CLIs, APIs, SDKs, and libraries: + +- test error messages when users or tools depend on them; +- keep exit codes stable; +- preserve HTTP status semantics and response schemas; +- avoid exposing internal traceback details across service boundaries; +- document new failure modes when they affect users. + +--- + +## 8. Concurrency, async, and scheduling + +### 8.1 `asyncio` and structured concurrency + +Prefer structured ownership of async work. + +Rules: + +- use `asyncio.TaskGroup` or repository-standard structured-concurrency tools for related tasks; +- keep task ownership, cancellation, timeout, and error aggregation explicit; +- propagate `CancelledError` unless deliberately translating it at a boundary; +- use `asyncio.timeout()` or repository-standard timeout policy for bounded work; +- do not block the event loop with synchronous file, network, database, subprocess, CPU, or sleep calls; +- isolate sync/async boundaries with clear adapters; +- test cancellation, timeout, and partial-failure cases for non-trivial async code. + +### 8.2 Threads and processes + +Use threads for blocking I/O and integration with thread-safe libraries. Use processes for CPU-bound work unless free-threaded compatibility and measurement justify threads. + +Rules: + +- guard shared mutable state explicitly; +- keep executor lifetime owned and shut down; +- pass immutable or serialized data across process boundaries; +- design worker shutdown and queue draining deliberately; +- test race-prone logic with deterministic synchronization where possible; +- do not mutate module globals from multiple threads without a lock or owner. + +### 8.3 Background jobs and queues + +For workers, schedulers, and queues: + +- define idempotency keys and retry semantics; +- record durable job state where work must survive process death; +- distinguish queued, running, succeeded, failed, cancelled, and retried states; +- preserve backpressure; +- make poison-message handling explicit; +- test shutdown and restart behavior. + +--- + +## 9. Packaging, environments, and dependencies + +### 9.1 `pyproject.toml` is a design surface + +`pyproject.toml` communicates build backend, project metadata, dependency groups, Python compatibility, entry points, tool configuration, and packaging behavior. + +Rules: + +- do not guess build-backend syntax; +- preserve the repository's package manager and lock-file semantics; +- keep package metadata, import package name, docs, and distribution name aligned; +- define console scripts through project entry points rather than ad hoc shell wrappers; +- include package data deliberately; +- ensure `py.typed` is present for typed published packages where appropriate; +- do not move tool configuration without checking whether the tool reads the new location. + +### 9.2 Dependencies and lock files + +Dependency changes are system changes. + +Before adding or changing dependencies, check: + +- direct need and alternatives; +- transitive dependency and license impact; +- Python 3.13 wheel availability; +- C extension and platform compatibility; +- free-threaded compatibility when relevant; +- security advisories; +- lock-file updates and deployment reproducibility; +- CI and Docker image implications. + +Prefer narrow, explicit dependencies. Do not add a dependency merely for a small function unless the dependency already exists or the capability is non-trivial and maintained. + +### 9.3 Environments + +Rules: + +- use the repository's environment tool; +- never install into global Python for project work; +- prefer `python -m ` when it avoids PATH ambiguity; +- ensure local commands use the same interpreter and dependency group as CI; +- do not mix venvs, pyenv, uv, conda, Poetry, PDM, tox, and system Python without establishing which one is canonical; +- record new required environment variables, services, and system packages in the appropriate setup docs or runbook. + +### 9.4 Wheels, extensions, and ABI + +For packages with native extensions: + +- prefer the stable ABI / `abi3` only when the extension's API usage truly fits it; +- test source builds and wheels for supported platforms; +- keep build isolation correct; +- pin or declare build dependencies in `pyproject.toml`; +- account for Python 3.13 free-threaded builds only with explicit evidence; +- avoid private CPython C APIs unless the repository accepts version-specific breakage. + +--- + +## 10. Testing and verification + +### 10.1 Test the contract, not implementation trivia + +Good Python tests prove behavior, boundaries, and regressions. + +Prefer tests that cover: + +- public API behavior; +- domain invariants; +- edge cases and invalid inputs; +- serialization/deserialization round trips; +- CLI outputs and exit codes; +- config parsing and defaults; +- database migrations and rollback-sensitive paths; +- async cancellation and timeout behavior; +- dependency-injection boundaries; +- import/export compatibility; +- type-checking examples for library APIs. + +Avoid tests that only assert private call order unless the private order is itself the contract. + +### 10.2 Pytest posture + +When pytest is used: + +- keep fixtures explicit, narrow, and named by domain role; +- avoid autouse fixtures unless they protect a global invariant; +- use `tmp_path`, monkeypatching, and dependency injection to isolate state; +- mark slow, integration, network, or flaky tests according to repository policy; +- do not hide real flakes by broad retries; identify race, time, I/O, or ordering causes; +- use parametrization to clarify behavior matrices without obscuring failures. + +### 10.3 Property, fuzz, and snapshot tests + +Use stronger test forms when examples are insufficient. + +- Use property tests for parsers, serializers, normalizers, validators, and state machines. +- Use fuzzing for untrusted input boundaries where the repository supports it. +- Use snapshots/golden files only when the output is a real contract. Review diffs manually. +- Keep fixtures minimal and meaningful. + +### 10.4 Type checking as verification + +For typed Python code: + +- run the repository's type checker on the narrowest useful target first; +- do not weaken annotations to make checks pass; +- add type tests or examples for generic public APIs; +- keep `py.typed` and stubs synchronized; +- treat type-checker differences as tool contracts, not as runtime truth. + +### 10.5 Required verification summary + +For non-trivial work, report: + +```text +Verification: +- Narrow checks run: +- Full or CI-equivalent checks run: +- Checks not run and why: +- Runtime/manual evidence: +``` + +Do not claim a check passed unless it actually ran and passed. + +--- + +## 11. Refactoring Python safely + +### 11.1 Boy Scout + Mikado + +When touching Python, leave the touched surface better: + +- improve names; +- extract coherent functions/classes; +- remove dead branches; +- collapse needless indirection; +- reduce import cycles; +- tighten types; +- delete stale compatibility shims; +- replace stringly contracts with named types; +- make validation central and explicit; +- improve test coverage around real behavior. + +Use Mikado sequencing for broader refactors: + +1. identify target design; +2. make the smallest safe prerequisite change; +3. verify; +4. repeat; +5. stop when the next improvement is a separate slice. + +Do not perform broad rewrites without executable evidence and a rollback path. + +### 11.2 Deleting code + +Before deleting Python code, trace the blast radius: + +- static imports and references; +- dynamic imports and plugin registrations; +- entry points and console scripts; +- framework discovery patterns; +- test fixtures and monkeypatch targets; +- docs, examples, generated clients, and stubs; +- serialized names, pickled paths, migration references, and config keys; +- external user imports and SemVer commitments. + +Deletion is safe only when the contract is gone, deprecated, or migrated and evidence proves no live dependency remains. + +### 11.3 Generated code and migrations + +Generated outputs and migrations require source-of-truth discipline. + +Rules: + +- update the generator, schema, or template first; +- regenerate outputs using the canonical command; +- inspect generated diffs for unexpected drift; +- preserve migration ordering; +- do not edit database migrations casually after release; +- test upgrade paths and data invariants where possible. + +--- + +## 12. Framework and boundary posture + +### 12.1 Web services and APIs + +For FastAPI, Django, Flask, Starlette, aiohttp, or other frameworks: + +- identify the canonical route/schema/dependency owner; +- keep request validation, authorization, business logic, and persistence boundaries distinct; +- avoid framework globals leaking into pure domain code; +- test auth, validation, error response, and serialization contracts; +- keep OpenAPI/schema/docs synchronized with runtime behavior; +- preserve middleware ordering and dependency-injection semantics. + +### 12.2 Databases and ORMs + +For SQLAlchemy, Django ORM, async ORMs, migrations, or raw SQL: + +- identify schema truth: migration, model, generated schema, or database introspection; +- use transactions deliberately; +- avoid N+1 regressions; +- handle isolation, locking, and retries explicitly; +- test migrations and representative queries; +- do not change cascade, nullability, uniqueness, or index semantics without blast-radius analysis. + +### 12.3 CLIs + +For CLIs: + +- treat flags, env vars, config files, output, and exit codes as public contracts; +- keep human-readable and machine-readable output distinct; +- avoid logging to stdout when stdout is data; +- test help text only where it is a maintained contract; +- preserve shell completion and packaging entry points. + +### 12.4 Data science, notebooks, and ML + +For notebooks and pipelines: + +- separate exploratory notebooks from production logic; +- move reusable logic into importable modules with tests; +- make randomness, data versions, feature definitions, and artifact paths explicit; +- preserve data lineage and reproducibility; +- avoid hidden state between notebook cells; +- test productionized transformations outside the notebook. + +--- + +## 13. Documentation and Python examples + +Use `.codex/PROTOCOL_AFAD.md` for agent-maintained documentation when public contracts, guides, runbooks, or code/document synchronization are involved, except for the repository root `README.md` exception defined in `AGENTS.md`. + +Python-specific documentation rules: + +- keep examples runnable against Python 3.13 unless explicitly marked otherwise; +- keep imports, entry points, package names, and type signatures synchronized with code; +- include version guards when behavior differs by Python version; +- prefer small complete examples over fragments that hide setup; +- document deprecations, migration paths, and public failure modes; +- use docstrings for local API semantics and AFAD-managed docs for broader contract theory. + +Root `README.md` remains a storefront. Keep it human-first and link to detailed docs rather than turning it into a reference database. + +--- + +## 14. Agent output checklist + +For non-trivial Python work, final output should include the relevant subset: + +```text +Python baseline: +- Interpreter/package baseline confirmed: +- Packaging/build tool used: + +System map: +- Truth owner: +- Evidence added/used: +- Blast radius checked: +- Invariant preserved: +- Theory preserved in: + +Change summary: +- Files changed: +- Public API/config/schema/CLI behavior changed: +- Dependencies or lock files changed: + +Verification: +- Narrow checks: +- Full checks: +- Checks not run: + +Risk: +- Remaining compatibility, concurrency, packaging, or operational risk: +``` + +Keep summaries proportional. Do not produce ceremony for a typo. Do not omit risk for changes that affect public contracts, persistence, packaging, concurrency, or security. diff --git a/.codex/AGENTS_RUST195_CARGO.md b/.codex/AGENTS_RUST195_CARGO.md new file mode 100644 index 00000000..a3f29744 --- /dev/null +++ b/.codex/AGENTS_RUST195_CARGO.md @@ -0,0 +1,753 @@ +# Rust 1.95+ / Cargo Agent Protocol + +This protocol governs agent work on Rust projects that target Rust 1.95 or newer and build with Cargo. + +Scope: libraries, services, CLIs, daemons, backends, systems tools, proc-macro crates, FFI crates, WebAssembly crates, embedded or `no_std` crates, Rust-backed desktop apps, and mixed-language repositories with Rust surfaces. + +Primary objective: produce Rust that is sound, explicit, type-driven, verifiable, maintainable, secure at boundaries, and aligned with the repository's actual compatibility contract. + +Optimize in this order: + +**soundness → invariants → ownership clarity → API compatibility → failure clarity → observability → performance where it matters → terseness** + +Terseness loses to explicitness. Local convenience loses to correctness. Borrow-checker workarounds lose to a clear ownership model. Passing `cargo check` is not the finish line. + +--- + +## 1. Repository intake + +Before touching Rust code, inspect the repository's actual shape. + +Always inspect the relevant subset of: + +- `rust-toolchain.toml`, `rust-toolchain`, installed toolchain channel, and whether nightly is pinned; +- workspace and member `Cargo.toml` files; +- `Cargo.lock`, and whether the project is an application, library, or publishable workspace; +- `package.edition`, `package.rust-version`, workspace `resolver`, and workspace inheritance; +- feature flags, optional dependencies, target-specific dependencies, and default-feature policy; +- `.cargo/config.toml`, custom target settings, linker settings, environment assumptions, and aliases; +- `build.rs`, generated code, bindgen/cbindgen/prost/tonic/sqlx/diesel outputs, and checked-in generated artifacts; +- crate boundaries, public exports, module structure, trait definitions, and re-export surfaces; +- `unsafe` blocks, `unsafe fn`, FFI boundaries, `extern` blocks, `repr(...)` types, global state, and manual memory management; +- async runtime, thread ownership, channels, cancellation, shutdown, backpressure, and blocking boundaries; +- existing tests, doc tests, property tests, fuzz targets, Miri/Loom checks, benchmarks, CI, and project-specific verification commands. + +Classify the touched crate before designing the change: + +- **Published library:** MSRV, public API, SemVer, features, docs, and examples are contracts. +- **Internal library:** API evolution is easier, but invariants and ergonomics still matter. +- **Binary/service/CLI:** operational behavior, config, logs, exit codes, and runtime failure modes are contracts. +- **Proc macro/build tooling:** generated output, diagnostics, determinism, and compile-time cost are contracts. +- **FFI/embedded/WASM/no_std:** layout, panic behavior, allocation, target support, and host integration are contracts. + +Do not assume repository state. Verify it. + +--- + +## 2. Change loop + +For every non-trivial change, apply the Universal Engineering Contract concretely in Rust terms. + +### 2.1 Minimum system map + +Before editing, identify: + +```text +Truth: +- Source of truth for the relevant state, config, schema, generated artifact, feature flag, or protocol value: +- Mutation paths: +- Derived/cached/generated copies: + +Evidence: +- Existing checks: cargo check/test/doc/clippy/fmt, contract tests, integration tests, property tests, fuzz/Miri/Loom, CI: +- Missing feedback worth adding: + +Consequence: +- Direct Rust dependencies: callers, trait impls, re-exports, features, cfg arms, tests: +- Indirect dependencies: serialization, FFI, generated code, build scripts, CLI output, docs, dashboards, human workflows: + +Invariant: +- Type, ownership, concurrency, memory-safety, protocol, or compatibility rule that must remain true: + +Preservation: +- Where the learned theory should live: type, test, rustdoc, safety comment, module name, build check, generated artifact, README, runbook: +``` + +Keep the map lightweight. For trivial changes, do not turn it into ceremony. For risky changes, do not skip it. + +### 2.2 Red → Green → Refactor + +For new behavior, start with the smallest failing proof: + +- unit test; +- integration test; +- doc test; +- compile-fail test where appropriate; +- property test; +- reproducible CLI invocation; +- fixture or golden file; +- Miri/Loom/fuzz reproduction; +- type-level or compile-time check. + +Then make the smallest coherent change, and immediately refactor until the touched surface is simpler, clearer, and easier to change. + +### 2.3 Compile-driven iteration + +Work in small increments: + +1. make one coherent change; +2. run the narrowest useful check, usually `cargo check -p --all-targets` or the repository's equivalent; +3. read the first real compiler error; +4. fix the root cause; +5. rerun the narrow check; +6. widen verification only after local shape is sound. + +Do not pile up cascading errors and try to reason about all of them at once. + +### 2.4 Root-cause fixes only + +When verification fails: + +- read the actual failure output; +- identify the type, ownership, lifetime, feature, cfg, build, dependency, or logic cause; +- fix that cause; +- rerun the narrowest relevant check; +- rerun full required verification before declaring completion. + +Do not: + +- guess at compiler failures; +- blindly apply compiler suggestions without understanding the ownership or API consequence; +- add `.clone()`, `Arc`, `Mutex`, `Box`, `RefCell`, `unwrap`, `expect`, wildcard matches, or broad trait bounds just to quiet the compiler; +- suppress lints unless the suppression is narrowly scoped, justified, and better than the alternative; +- claim completion while required checks still fail. + +--- + +## 3. Rust 1.95+ baseline posture + +### 3.1 Stable toolchain + +Use the repository's pinned toolchain when present. Otherwise, assume stable Rust 1.95+ for projects governed by this protocol. + +For new crates created under this protocol: + +```toml +[package] +edition = "2024" +rust-version = "1.95" +``` + +For existing crates: + +- do not raise `rust-version` without a concrete benefit and explicit compatibility judgment; +- treat `rust-version` as a public contract for published crates; +- preserve the existing edition unless the task is an edition migration or the repository clearly standardizes on Rust 2024; +- if moving to edition 2024, run the appropriate migration checks, then manually review semantics rather than treating `cargo fix --edition` output as design guidance. + +Nightly is allowed only when the repository already pins nightly or the task explicitly requires an unstable capability. Nightly use must be isolated, named, justified, and wired consistently in local verification and CI. + +### 3.2 Rust 2024 expectations + +When using edition 2024, account for the edition's safety and semantics changes: + +- `unsafe_op_in_unsafe_fn` warns by default; keep explicit `unsafe {}` blocks inside `unsafe fn`. +- `extern` blocks require `unsafe`. +- `export_name`, `link_section`, and `no_mangle` require unsafe attributes. +- references to `static mut` are denied by default; redesign around atomics, locks, `OnceLock`, or other safe state owners. +- `std::env::set_var`, `std::env::remove_var`, and Unix `CommandExt::before_exec` are unsafe; avoid mutating process environment after concurrency begins. +- `Future` and `IntoFuture` are in the prelude; avoid redundant imports unless they improve local readability. +- migration fixes are conservative. Review temporary lifetime changes, macro fragment changes, and never-type fallback implications deliberately. + +### 3.3 Rust 1.95 language and library posture + +Rust 1.95 adds useful stable tools. Use them when they make the code clearer, not merely because they are new. + +- Prefer `cfg_select!` for readable compile-time configuration selection when the repository baseline is Rust 1.95+ and the pattern would otherwise need ad hoc `#[cfg]` branching or the `cfg-if` crate. +- Use `if let` guards in `match` arms when they make pattern-dependent conditions clearer. Remember that these guards do not contribute to exhaustiveness; the remaining arms must still handle all cases. +- Use collection insertion helpers such as `Vec::push_mut`, `Vec::insert_mut`, and the analogous `VecDeque`/`LinkedList` helpers when they avoid awkward indexing or double lookup while preserving clarity. +- Use `Atomic*::update` and `Atomic*::try_update` when they express an atomic read-modify-write loop more clearly than handwritten compare-exchange loops. State the ordering rationale. +- Use `std::hint::cold_path` only for genuinely cold paths where the intent is clearer than relying on profiling folklore. +- Custom JSON target specifications are not stable on Rust 1.95. If a custom target is required, pin and justify nightly rather than pretending the stable toolchain supports it. + +### 3.4 Lint posture + +For new crates, prefer a strong but practical lint baseline: + +```rust +#![warn(missing_docs)] // libraries and public API crates +#![deny(unsafe_op_in_unsafe_fn)] +#![deny(unused_must_use)] +``` + +In `Cargo.toml`, prefer workspace-owned lint configuration where possible: + +```toml +[lints.rust] +unsafe_op_in_unsafe_fn = "deny" +unused_must_use = "deny" + +[lints.clippy] +all = "warn" +pedantic = "warn" +``` + +Do not enable noisy lint groups blindly in existing repositories. Match the repository's tolerance for warnings, then strengthen locally when it improves correctness and maintainability. + +--- + +## 4. Cargo and manifest contract + +### 4.1 `Cargo.toml` is a design surface + +`Cargo.toml` communicates the crate's identity, compatibility contract, feature model, dependency graph, build posture, and publication behavior. + +Rules: + +- no unused dependencies; +- no invented crate names, versions, or feature flags; +- no accidental default-feature sprawl; +- no duplicated package metadata where the workspace is the canonical owner; +- no path/git/registry dependency changes without compatibility and supply-chain judgment; +- no feature or dependency edits without checking the feature graph and build impact; +- no build-script side effects without explicit `cargo::rerun-if-*` discipline. + +Before modifying dependencies, verify actual crate versions and feature names through Cargo metadata, `cargo search`, crates.io, docs.rs, or authoritative upstream documentation. Do not hallucinate. + +### 4.2 Resolver, edition, and MSRV + +Cargo resolver behavior is part of the compatibility contract. + +- Edition 2024 implies resolver `"3"`, which uses Rust-version-aware dependency resolution. +- In virtual workspaces, set `resolver = "3"` explicitly at the workspace root when the workspace intends Rust 2024 resolver behavior. +- `package.rust-version` is an MSRV contract, not decoration. +- Do not run `cargo update` casually in published libraries or applications with locked dependency expectations. +- If a dependency upgrade raises MSRV, surface it explicitly and decide whether that is acceptable. + +### 4.3 Feature discipline + +Features must be additive capability switches. + +Use features for: + +- optional integrations; +- optional heavy dependencies; +- platform-specific support; +- `std` vs `alloc` vs `no_std` boundaries; +- runtime choices when the crate genuinely supports more than one. + +Do not use features to: + +- hide broken code; +- encode negative logic; +- change public API incompatibly; +- silently change serialization formats; +- create untested combinatorial explosions; +- make a dependency optional only in the manifest while code still assumes it exists. + +If feature combinations matter, verify them with the repository's feature-matrix tool or add one. `cargo hack` is appropriate when the repository already uses it or the feature matrix is non-trivial. + +### 4.4 Lockfiles + +Treat `Cargo.lock` according to crate posture: + +- applications, services, CLIs, and workspaces with binaries should usually check in `Cargo.lock`; +- published libraries may or may not check it in depending on repository policy; +- do not remove or rewrite the lockfile as incidental cleanup; +- when the lockfile changes, understand whether the change is required by the task or accidental dependency drift. + +### 4.5 Build scripts and generated code + +`build.rs` is part of the build contract. + +Build scripts must be deterministic, minimal, and explicit about inputs and outputs. Generated code must have a canonical source and a reproducible regeneration path. + +When touching generated code: + +- find the generator and its inputs; +- modify the canonical input where possible; +- regenerate with the repository's command; +- do not hand-edit generated output unless the repository explicitly treats it as source; +- verify that checked-in generated artifacts and source inputs are not drifting. + +--- + +## 5. Type, API, and domain modeling + +### 5.1 Make invalid states hard to express + +Prefer Rust's type system over runtime conventions. + +Use: + +- enums for domain alternatives; +- newtypes for IDs, names, tokens, durations, counters, and units; +- structs for coherent state with real invariants; +- smart constructors when a value has validation rules; +- `NonZero*`, bounded numeric types, and domain-specific wrappers where they clarify invariants; +- `PhantomData` only when it encodes a real type-level relationship. + +Avoid: + +- boolean mode flags in public APIs; +- magic strings for states, capabilities, or protocols; +- parallel enums that shadow a canonical enum without a boundary reason; +- `Option` fields that together encode a hidden state machine; +- `String` where a semantic type or borrowed `str` boundary is clearer; +- widening visibility for tests or convenience. + +### 5.2 Public API discipline + +Every `pub` item is a promise unless the crate is clearly internal. + +- Use the narrowest visibility: private, `pub(super)`, `pub(crate)`, then `pub`. +- Re-export deliberately. A re-export can become part of the public contract. +- Do not expose implementation types that prevent future refactoring. +- Avoid public type aliases that obscure ownership or error semantics. +- For extensibility, prefer sealed traits when downstream implementation would create compatibility hazards. +- For public enums that may grow, consider `#[non_exhaustive]` deliberately and document how callers should match. + +### 5.3 Failure modeling + +Use `Result` for fallible operations and `Option` for genuine absence. + +- Domain/library errors should usually be explicit enums, often implemented with `thiserror`. +- Binary/CLI/glue layers may use `anyhow`/`eyre` when precise downstream matching is not part of the contract. +- Do not use panics for expected domain failures. +- Do not use `unwrap` or `expect` in production paths unless the invariant is obvious, local, and explained by the surrounding code or a short message. +- Error messages at user or API boundaries are contract surfaces; keep them stable or versioned when consumers depend on them. +- Preserve source errors when context matters; do not flatten error chains into strings too early. + +### 5.4 Ownership and borrowing + +The ownership model is part of the design. + +- Prefer borrowing when the caller retains ownership and the callee only observes. +- Prefer owning when the value must outlive the call, move across threads, or become internal state. +- Add `Clone` only when duplication is semantically cheap and meaningful. +- Add `Copy` only for small value types where implicit duplication cannot hide cost or ownership meaning. +- Use `Arc` for shared ownership across threads, not as a borrow-checker escape hatch. +- Use `Rc` only for single-threaded shared ownership. +- Use `Cow` when the API genuinely benefits from accepting borrowed or owned data. +- Use `Box` for indirection, trait objects, or recursive types, not to hide design confusion. + +Do not convert everything to owned `String`, `Vec`, `Arc`, or `'static` merely to make lifetimes disappear. If lifetimes are painful, revisit the boundary and state ownership. + +### 5.5 Trait bounds and generics + +Trait bounds are API contracts. + +- Keep bounds as narrow as the implementation requires. +- Do not add `Clone`, `Default`, `Send`, `Sync`, `'static`, `Serialize`, or `Deserialize` bounds unless the function genuinely needs them. +- Prefer `impl Trait` for local API clarity when the concrete type should remain hidden. +- Prefer named generic parameters when callers or documentation need to reason about the relationship between types. +- Avoid blanket impls that block future specialization or create coherence hazards. + +--- + +## 6. Unsafe, FFI, and memory discipline + +### 6.1 Default stance + +Safe Rust is the default. `unsafe` is an implementation boundary that must buy something concrete: FFI, performance with proven invariants, low-level memory layout, atomics, embedded constraints, or API capabilities impossible in safe Rust. + +If a crate does not need unsafe, prefer: + +```rust +#![forbid(unsafe_code)] +``` + +If a crate needs unsafe, require: + +```rust +#![deny(unsafe_op_in_unsafe_fn)] +``` + +### 6.2 Unsafe block contract + +Every unsafe block must be small and must have a nearby `SAFETY:` explanation covering: + +- the exact invariant required; +- why it holds at that point; +- who maintains it in the future; +- what would make it invalid. + +Do not write vague safety comments such as "caller guarantees this" unless the caller contract is also expressed in the function signature and rustdoc. + +### 6.3 Unsafe functions + +Every `unsafe fn` must document: + +- `# Safety` preconditions; +- aliasing, lifetime, initialization, layout, threading, and ownership requirements; +- whether the function may be called concurrently; +- whether panic or unwind across the boundary is allowed. + +Inside `unsafe fn`, still use explicit unsafe blocks for unsafe operations. + +### 6.4 FFI boundaries + +For FFI: + +- use `unsafe extern` blocks; +- make ownership transfer explicit; +- define who allocates and who frees; +- avoid unwinding across FFI boundaries unless the ABI and project explicitly support it; +- use `repr(C)` only when layout compatibility is required; +- validate pointers, lengths, alignment, initialization, and lifetime assumptions; +- keep conversion between raw and safe types narrow and tested; +- consider Miri, sanitizer, or integration tests when memory invariants are subtle. + +### 6.5 Global state + +Global state must have an owner and a mutation policy. + +Prefer `OnceLock`, `LazyLock`, atomics, or scoped dependency injection over mutable statics. Avoid `static mut`. Avoid process-wide environment mutation after threads, async runtimes, or libraries may have started. + +--- + +## 7. Async, concurrency, and cancellation + +### 7.1 Runtime ownership + +Do not add an async runtime casually. + +- Binaries and services may own a runtime. +- Libraries should usually expose async functions without constructing a runtime internally. +- Runtime choice is a contract when it appears in public types, features, or docs. +- Do not block inside async tasks unless using an explicit blocking boundary such as `spawn_blocking`. + +### 7.2 Task lifecycle + +Every spawned task must have an owner, purpose, and shutdown path. + +Do not launch fire-and-forget work without: + +- a retained `JoinHandle` or supervised task set; +- cancellation or shutdown signaling; +- error propagation or logging; +- backpressure where input can outpace processing. + +### 7.3 Cancellation safety + +For async code, identify what happens when a future is dropped. + +- Do not hold locks across `.await` unless the lock type and scope are deliberately async-safe. +- Do not assume `select!` branches are cancellation-safe; verify the operation. +- Keep transactions, locks, and partial writes scoped so cancellation cannot leave corrupt state. +- Prefer explicit state machines when retry, rollback, or idempotency matters. + +### 7.4 Channels and shared state + +- Prefer bounded channels unless unbounded growth is proven safe. +- Document message ownership and shutdown semantics. +- Use `Mutex`, `RwLock`, atomics, or channels according to the invariant, not habit. +- Do not use `Arc>` as a default architecture. Sometimes it is right; often it is an unmodeled ownership problem. +- For atomics, state the memory ordering rationale. Do not use `SeqCst` as a substitute for understanding. + +### 7.5 Testing concurrency + +For concurrency-sensitive code, ordinary tests are often insufficient. Use the strongest practical feedback: + +- Loom for interleaving-sensitive synchronization logic; +- Miri for undefined behavior and aliasing-sensitive unsafe code; +- stress tests for operational timing bugs; +- deterministic fake clocks or schedulers where available; +- integration tests for shutdown and cancellation paths. + +--- + +## 8. Boundaries, protocols, and observability + +### 8.1 Serialization is a contract + +Serialization shape is not an implementation detail once external systems, stored data, or users depend on it. + +- Do not derive `Serialize`/`Deserialize` on domain types when the wire format should evolve independently. +- Use DTOs or wire types when the external shape differs from the domain model. +- Make enum-to-wire mapping explicit where spelling, casing, aliases, or compatibility matter. +- Preserve backward compatibility for stored or external formats unless the task explicitly changes the contract. +- Add golden tests for important wire formats. + +### 8.2 CLI and process boundaries + +For CLIs and process integration: + +- exit codes are contracts; +- stdout/stderr separation is a contract; +- human output and machine-readable output should not be casually mixed; +- environment variables and config keys must have canonical owners; +- secrets must not appear in logs, panic messages, debug output, or error chains. + +### 8.3 Configuration and platform gates + +Configuration facts must be canonical. + +- Use typed config structs at the boundary. +- Validate config once, early, and explicitly. +- Use `cfg_select!`, `#[cfg]`, and target-specific dependencies deliberately. +- Do not duplicate platform names, feature names, environment variable names, or protocol constants across code and docs. +- Test platform-specific code paths where feasible. If not feasible locally, preserve the verification story in CI or docs. + +### 8.4 Observability + +For services and operational tools, feedback must survive production. + +- Prefer structured logging/tracing at boundaries and state transitions. +- Do not log secrets or high-cardinality values casually. +- Attach context to errors close to where information is available. +- Add metrics or traces for behavior whose correctness cannot be inferred from tests alone. +- When fixing an incident-prone path, add the signal that would reveal recurrence. + +--- + +## 9. Testing and verification + +### 9.1 Verification ladder + +Use the cheapest check that proves the relevant behavior, then widen according to risk. + +Common ladder: + +```bash +cargo fmt --check +cargo check --workspace --all-targets +cargo clippy --workspace --all-targets --all-features -- -D warnings +cargo test --workspace --all-features +cargo test --workspace --doc +cargo doc --workspace --no-deps +``` + +Adapt the ladder to the repository. Do not force `--all-features` when features are intentionally mutually exclusive; use the repository's feature matrix instead. + +### 9.2 What to test + +Test behavior and invariants, not implementation trivia. + +Prioritize: + +- domain invariants and edge cases; +- parser/serializer round trips and golden outputs; +- error cases and failure messages at user/API boundaries; +- feature-flag combinations that change behavior; +- concurrency cancellation and shutdown; +- FFI safety contracts; +- migration and backward compatibility behavior; +- bug reproductions before fixes. + +### 9.3 Property, fuzz, and snapshot tests + +Use stronger test forms when ordinary examples miss the risk: + +- property tests for algebraic invariants, parsers, encoders, and state transitions; +- fuzz tests for parsers, protocol inputs, FFI boundaries, and unsafe code; +- snapshot/golden tests for user-facing output or wire formats, with deliberate review of changes; +- compile-fail tests for macros, public API constraints, and type-level guarantees. + +### 9.4 Rustdoc and examples + +Rustdoc is executable documentation when examples are doc tests. + +Public APIs should document: + +- purpose; +- errors; +- panics; +- safety preconditions; +- cancellation behavior for async APIs where relevant; +- feature flags or platform limitations; +- examples for non-obvious use. + +Do not write examples that require unstated global state, network availability, or timing assumptions unless marked and justified. + +--- + +## 10. Refactoring, deletion, and module design + +### 10.1 Coherent repair + +When a local patch exposes an incoherent module boundary, type model, or feature contract, fix the smallest coherent area rather than stacking workarounds. + +Examples of coherent repair: + +- replace stringly typed state with an enum and update the affected matches; +- move validation into a smart constructor and remove scattered checks; +- split a DTO from a domain type when serialization concerns are leaking inward; +- extract a module when a file mixes unrelated responsibilities; +- collapse a trait that has only one implementation and no current abstraction value. + +### 10.2 Compatibility-aware refactoring + +Refactor private/internal code aggressively when evidence stays green. Refactor public or published surfaces deliberately. + +Before changing public API: + +- check downstream compatibility promises; +- preserve SemVer where applicable; +- add deprecation paths when needed; +- update rustdoc and examples; +- verify feature flags and re-exports. + +### 10.3 No god constructs + +A god construct concentrates unrelated responsibilities. + +Refactor: + +- god modules that mix parsing, validation, storage, transport, and presentation; +- god structs with many optional fields representing multiple states; +- god enums that collapse unrelated protocols into a single catch-all type; +- god traits with broad, unrelated method sets; +- god functions with named comment phases that should be named helpers. + +Extraction must improve cohesion, not merely reduce line count. + +### 10.4 Safe deletion + +Before deleting Rust code, check: + +- direct references with search and compiler feedback; +- public exports and downstream API implications; +- feature-gated or cfg-gated references; +- proc macro or generated references; +- serialization formats and stored data; +- FFI symbols, `no_mangle`, exported names, and linker scripts; +- build scripts, examples, tests, benches, docs, CI, and human workflows. + +Deleting dead code is good. Deleting untraced contract surface is breakage. + +--- + +## 11. CI and project automation + +### 11.1 CI mirrors local verification + +The canonical verification path must be runnable locally and in CI with the same strictness. Do not create CI-only checks that developers or agents cannot reproduce. + +### 11.2 Toolchain pinning + +Use `rust-toolchain.toml` for repository toolchain policy when the project needs a specific toolchain, components, or targets. + +CI should install the same toolchain and components used locally, such as: + +- `rustfmt`; +- `clippy`; +- target triples; +- Miri/nightly only when explicitly part of the project policy. + +### 11.3 Supply-chain discipline + +- Pin third-party CI actions to immutable commit SHAs where repository policy requires supply-chain hardening. +- Do not add Git dependencies casually. +- Use `cargo audit`, `cargo deny`, SBOM generation, or equivalent checks when the repository already has them or the risk profile justifies them. +- Treat dependency updates as behavior changes unless proven otherwise. + +### 11.4 Build reproducibility + +- Avoid build scripts that depend on ambient machine state. +- Keep generated files reproducible. +- Use `--locked` or `--frozen` in CI when the lockfile is a contract. +- Do not rely on globally installed tools when the repository provides `just`, `xtask`, `cargo make`, `mise`, `nix`, or another pinned workflow. + +--- + +## 12. Documentation and self-containment + +### 12.1 Rustdoc requirements + +For public API crates: + +- public types, traits, functions, modules, and macros need rustdoc unless repository policy says otherwise; +- unsafe APIs require `# Safety`; +- fallible APIs should document errors; +- panicking APIs should document panics; +- async APIs with non-obvious cancellation behavior should document cancellation safety; +- feature-gated APIs should document the feature. + +### 12.2 Comments + +Comments should explain non-obvious invariants, safety, compatibility, or operational constraints. Do not comment what the code already says. + +Good comments explain why a seemingly simpler change is wrong, where an invariant is maintained, or what external contract constrains the implementation. + +### 12.3 Self-containment + +Source code, rustdoc, comments, and project documentation must never reference the agent directive file by name, section, or as justification for a design decision. + +Agent directive files are operational instructions for agents. Code and docs must stand on their own. + +```rust +// Forbidden: references the agent protocol as justification. +// Per AGENTS.md, do not use a wildcard match here. + +// Correct: self-contained engineering reason. +// No wildcard arm: adding a new state must force every transition table to be reviewed. +``` + +--- + +## 13. Incidental observation protocol + +When reading a file surfaces a defect, rule violation, or clear improvement opportunity unrelated to the active task, record it in the project's designated observation log and continue the active task. + +Do not fix unrelated observations in the current change unless they are prerequisites for correctness. Do not interrupt the workflow to discuss every incidental finding. + +Each observation should record: + +- stable ID; +- date; +- status; +- file and line range; +- category; +- what is wrong and why it matters; +- current pattern or excerpt; +- resolving change; +- effort level. + +If the project has no observation log, include only high-value observations in the final summary when they affect future safety or maintainability. + +--- + +## 14. Pre-output checklist + +Run this before declaring completion. + +### System theory + +- Truth: is the source of truth identified and changed at the right layer? +- Evidence: did you add or run feedback proportional to risk? +- Consequence: did you trace direct and indirect blast radius? +- Invariant: is the important invariant protected by type, test, assertion, or documented contract? +- Preservation: did important theory land somewhere durable? + +### Rust semantics + +- Are domain alternatives modeled with types rather than strings, flags, or scattered conventions? +- Are ownership and borrowing choices semantically justified? +- Are trait bounds no wider than needed? +- Are public APIs narrow, documented, and compatible with crate posture? +- Are `Option`, `Result`, and panic behavior used for their proper meanings? + +### Cargo and features + +- Are edition, resolver, MSRV, and feature changes deliberate? +- Are dependency versions and feature names verified? +- Are features additive and tested where meaningful? +- Did lockfile changes happen only when justified? +- Are generated artifacts and build scripts in sync with their canonical inputs? + +### Unsafe and concurrency + +- Is unsafe absent where unnecessary? +- Does every unsafe block or unsafe function have a real safety contract? +- Are task lifecycles, cancellation, blocking, locks, channels, and shutdown paths explicit? +- Are atomic orderings justified? +- Did you avoid global mutable state or give it a clear owner? + +### Verification + +- Did the narrow relevant check pass? +- Did verification widen when the change widened? +- Are formatting, linting, tests, doc tests, or stronger tools run as appropriate? +- Are remaining failures unrelated and explicitly stated? +- Is the touched Rust surface clearer and easier to change than before? diff --git a/.codex/AGENTS_SQLITE3MC233_SQLITE353.md b/.codex/AGENTS_SQLITE3MC233_SQLITE353.md new file mode 100644 index 00000000..54ea21e7 --- /dev/null +++ b/.codex/AGENTS_SQLITE3MC233_SQLITE353.md @@ -0,0 +1,755 @@ +# SQLite3 Multiple Ciphers 2.3.3 / SQLite 3.53.0 Agent Protocol + +This protocol governs agent work on projects that build, vendor, link, wrap, configure, distribute, test, or operate **SQLite3 Multiple Ciphers 2.3.3**, based on **SQLite 3.53.0**. + +Scope: C and C++ integrations, amalgamation builds, static or shared library packaging, embedded applications, CLIs, services, language bindings, JNI/JNA, Python/Rust/Node/.NET/Java/Kotlin wrappers, SQL migrations, encrypted database files, PRAGMA/URI configuration, key and rekey flows, backups, WAL/journal behavior, build flags, and cross-platform distribution. + +Primary objective: preserve data integrity, encryption correctness, key safety, SQLite compatibility, build reproducibility, and clear ownership of database/file-format contracts. + +Optimize in this order: + +```text +data integrity → key safety → cipher/file-format compatibility → source-of-truth clarity → portability → observability without leakage → performance where measured → terseness +``` + +Convenience loses to data safety. Local build success loses to runtime link correctness. Encryption that is not tested as encryption is not verified. A wrapper API that hides key ownership, cipher selection, or migration behavior is not finished. + +This protocol inherits `.codex/UNIVERSAL_ENGINEERING_CONTRACT.md`. Apply the universal Truth / Evidence / Consequence / Invariant / Preservation map before all SQLite3MC-specific rules. When SQLite3MC is used from Java, Kotlin, Python, Rust, C, C++, or another runtime, apply this protocol in addition to the relevant language protocol. + +--- + +## 1. Repository intake before touching SQLite3MC surfaces + +Before editing anything related to SQLite3 Multiple Ciphers, determine the repository's actual integration model. + +Inspect the relevant subset of: + +- vendored source files, especially `sqlite3mc_amalgamation.c`, `sqlite3mc_amalgamation.h`, `sqlite3.c`, `sqlite3.h`, `sqlite3ext.h`, `sqlite3mc.h`, patches, generated amalgamation scripts, and third-party manifests; +- version pins, release tags, commit hashes, checksums, package metadata, lock files, SBOM entries, release notes, and any source-ID assertions; +- build systems: Autotools, CMake, Premake, GNU Make, MSBuild/Visual Studio, Meson, Bazel, Gradle, Cargo build scripts, Python extension builds, npm native builds, or project-specific wrappers; +- compiler and linker flags, `SQLITE_*` options, `SQLITE3MC_*` options, default cipher configuration, legacy cipher flags, ICU/ZLIB/MINIZ configuration, and platform-specific defines; +- whether the repository links the SQLite3MC library directly, replaces a vanilla SQLite library, uses the amalgamation, or consumes a language binding that bundles SQLite3MC; +- runtime library resolution: static vs dynamic linking, DLL/shared-object search paths, rpath/install-name, package manager behavior, container images, Android/iOS/WASM targets, and CI artifacts; +- key paths: where passphrases, raw keys, KMS handles, secrets, environment variables, config values, user prompts, hardware-backed secrets, or test keys enter the system; +- SQL and API usage: `sqlite3_key`, `sqlite3_key_v2`, `sqlite3_rekey`, `sqlite3_rekey_v2`, `sqlite3mc_*` APIs, `PRAGMA key`, `PRAGMA rekey`, URI parameters, `ATTACH ... KEY`, backup APIs, and language-binding equivalents; +- database lifecycle: initial creation, open, authentication/keying, migration, attach/detach, backup, restore, VACUUM, WAL checkpointing, rekeying, decryption, compaction, corruption handling, and deletion; +- file-format assumptions: cipher scheme, page size, reserve bytes, plaintext header policy, KDF settings, legacy compatibility, `user_version`, schema migrations, and database compatibility fixtures; +- journaling and temp behavior: rollback journal, WAL, shared memory files, temporary tables, in-memory databases, temp-store configuration, and file-permission policy; +- tests and evidence: encrypted fixture files, wrong-key tests, rekey tests, migration tests, cross-platform CI, sanitizer runs, Valgrind, fuzzers, SQL logic tests, and production observability. + +Classify the touched surface before designing the change: + +- **Vendored native dependency:** version, patches, compile options, and source provenance are contracts. +- **Application database:** file format, key lifecycle, migrations, backups, durability, and restore behavior are contracts. +- **Published binding/package:** ABI/API, binary compatibility, platform wheels/artifacts, runtime linking, docs, examples, and package metadata are contracts. +- **Internal wrapper:** key ownership, error handling, connection lifecycle, and safe defaults are contracts. +- **CLI/tooling:** command-line flags, stdout/stderr shape, exit codes, script compatibility, secret handling, and non-interactive behavior are contracts. +- **Embedded/mobile/WASM build:** target support, compile flags, filesystem/VFS behavior, entropy source, memory constraints, and package size are contracts. + +Do not infer SQLite3MC behavior from ordinary SQLite alone. SQLite3MC is intentionally compatible with SQLite APIs, but encryption, VFS behavior, keying, and file-format state add contracts that ordinary SQLite does not have. + +--- + +## 2. Change loop in SQLite3MC terms + +### 2.1 Minimum system map + +For every non-trivial SQLite3MC change, identify: + +```text +Truth: +- Canonical owner of SQLite3MC version, SQLite source version, compile options, default cipher, legacy flags, and binding/runtime package version: +- Source of truth for key material and key lifecycle: +- Source of truth for database schema, migrations, cipher configuration, page format, and fixtures: +- Derived/generated copies: amalgamation, headers, wrappers, package metadata, docs, CI images, lock files: + +Evidence: +- Checks proving native build correctness, runtime link correctness, encryption roundtrip, wrong-key failure, rekey, migration, backup/restore, and language binding behavior: +- Missing feedback worth adding: + +Consequence: +- Direct dependencies: callers, wrappers, SQL scripts, migrations, bindings, tests, packaging, CLI tools, deployment images: +- Indirect dependencies: stored database files, backups, restore tools, support workflows, monitoring, user data, compliance, release process: + +Invariant: +- Data, encryption, file-format, key-safety, ABI/API, migration, or compatibility rule that must remain true: + +Preservation: +- Where learned theory belongs: build manifest, test fixture, migration note, wrapper API, safety comment, runbook, AFAD-managed doc, release checklist, CI assertion: +``` + +Keep this lightweight for low-risk edits. Do not skip it for changes that affect encryption, persisted files, build flags, runtime linking, migrations, or key handling. + +### 2.2 Red → Green → Refactor + +For new behavior or bug fixes, start with the smallest failing proof: + +- encrypted open/read/write roundtrip; +- wrong-key rejection; +- rekey or decrypt migration fixture; +- cross-version or legacy-cipher fixture; +- SQL migration test; +- native build/link test; +- language-binding integration test; +- WAL/journal/backup/restore test; +- sanitizer or memory-leak reproduction; +- CLI invocation with deterministic output mode; +- file header or plaintext-leak check. + +Then make the smallest coherent implementation and immediately refactor until the touched surface has clearer ownership, fewer hidden states, and better verification. + +### 2.3 Narrow-to-wide verification + +Work in small increments: + +1. make one coherent change; +2. run the narrowest relevant check, such as a native build target, one integration test, a single binding test, or one fixture migration; +3. inspect the first real failure; +4. fix the root cause; +5. rerun the narrow check; +6. widen to repository-required verification before completion. + +For SQLite3MC, widening usually means verifying both compile-time and runtime facts: the code compiled against the intended headers and also loaded the intended library at runtime. + +### 2.4 Root-cause fixes only + +When verification fails: + +- read the actual SQLite error code, extended error code, native build diagnostic, linker output, sanitizer report, migration diff, or fixture mismatch; +- determine whether the root is key timing, wrong cipher configuration, stale generated source, mixed headers/library, runtime library shadowing, unsupported SQL, file permissions, WAL/journal mode, platform target, or actual corruption; +- fix that cause; +- preserve the failing proof if it guards real data safety or compatibility. + +Do not: + +- swallow SQLite errors or collapse them into vague application errors; +- log passphrases, raw keys, key-bearing URIs, PRAGMA statements containing secrets, or decrypted data; +- downgrade to vanilla SQLite accidentally; +- mix SQLite headers from one version with a different runtime library; +- regenerate or edit amalgamation artifacts without updating the canonical generation path; +- change cipher defaults, page sizes, reserve bytes, KDF parameters, or legacy flags without a migration and fixture evidence; +- claim encryption correctness without a wrong-key failure test and a plaintext-leak check. + +--- + +## 3. Baseline posture: SQLite3MC 2.3.3 and SQLite 3.53.0 + +### 3.1 Version baseline + +For repositories governed by this protocol, assume: + +```text +SQLite3 Multiple Ciphers: 2.3.3 +Underlying SQLite: 3.53.0 +``` + +Use the repository's pinned version when it is more specific. Do not upgrade or downgrade SQLite3MC without a compatibility judgment, migration-risk assessment, and verification plan. + +SQLite3MC 2.3.3 includes the upstream SQLite 3.53.0 baseline and fixes secure nullification of cipher data structures on freeing. Treat any edit around cipher state cleanup as security-sensitive. Do not remove zeroization, nullification, or cleanup paths because they look redundant. + +SQLite 3.53.0 includes a fix for the WAL-reset database corruption bug. Do not downgrade to a pre-fix SQLite baseline without explicitly accepting the risk and preserving a reason. + +### 3.2 SQLite 3.53.0 feature posture + +Use SQLite 3.53.0 capabilities only when the deployed runtime is guaranteed to be SQLite3MC 2.3.3 / SQLite 3.53.0 or newer. + +Notable 3.53.0 behavior for agents: + +- `ALTER TABLE` can add and remove `NOT NULL` and `CHECK` constraints. Use this only when migration compatibility is acceptable. +- `REINDEX EXPRESSIONS` can rebuild expression indexes. Prefer it when repairing stale expression-index state rather than inventing application-level workarounds. +- `json_array_insert()` and `jsonb_array_insert()` are available in the 3.53.0 baseline. +- The CLI output defaults changed for interactive sessions through QRF. Tests and scripts must set explicit output modes instead of relying on human-oriented defaults. +- Bare semicolons at the end of dot-commands are silently ignored. Treat CLI script compatibility deliberately. +- New C interfaces such as `sqlite3_str_truncate()`, `sqlite3_str_free()`, `sqlite3_carray_bind_v2()`, `SQLITE_PREPARE_FROM_DDL`, `SQLITE_UTF8_ZT`, `SQLITE_LIMIT_PARSER_DEPTH`, and `SQLITE_DBCONFIG_FP_DIGITS` are available only when the runtime really is 3.53.0+. +- Floating-point text conversion behavior changed to round by default to 17 significant digits instead of the previous 15. Review golden outputs, text dumps, hash inputs, and deterministic serialization tests. +- The self-healing index feature may address stale expression index issues, but it does not replace tests for migration and query correctness. + +Do not write code or migrations that silently require 3.53.0 if production, tests, system packages, or bundled artifacts may still load an older SQLite. + +### 3.3 SQLite 3.52 warning + +SQLite 3.52.0 was withdrawn upstream. Do not select SQLite3MC 2.3.0 / SQLite 3.52.0 as a fallback baseline. If a repository already contains that version, surface the issue and prefer moving to SQLite3MC 2.3.3 or a project-approved fixed baseline. + +--- + +## 4. Canonical ownership and provenance + +### 4.1 One owner for version and build facts + +SQLite3MC version, SQLite source version, release tag, commit hash, checksums, compile flags, enabled extensions, default cipher, legacy options, and platform artifact versions must have one canonical owner. + +Acceptable owners include: + +- a third-party dependency manifest; +- a vendoring manifest; +- a build-system version catalog; +- a lock file plus package metadata; +- a dedicated `third_party/sqlite3mc/README` or manifest; +- a generated-source script with checksum assertions. + +Do not hard-code the SQLite3MC version, SQLite source ID, compile options, or cipher defaults independently across build scripts, docs, wrappers, and tests. Derive, generate, or validate secondary surfaces from the canonical owner. + +### 4.2 Provenance checks + +When adding or updating SQLite3MC: + +- use an authoritative upstream release, source archive, package, or repository tag; +- record the SQLite3MC version and underlying SQLite version; +- verify checksums or signed provenance when the repository supports it; +- preserve local patches as small, named, reviewable patches; +- update package metadata, lock files, SBOM, docs, and CI images together; +- run fixture tests against existing encrypted databases before release. + +If the repository uses prebuilt binaries, verify that binary provenance and compile options are inspectable. Opaque binaries are a supply-chain and compatibility risk. + +### 4.3 Header/library/runtime coherence + +The following must agree unless the repository has an explicit compatibility shim: + +- headers used at compile time; +- static or shared library linked at build time; +- dynamic library loaded at runtime; +- package metadata; +- `sqlite3_libversion()` and `sqlite3_sourceid()` observations; +- compile-option observations such as `PRAGMA compile_options` or `sqlite3_compileoption_get()`; +- language-binding reported versions. + +A common failure mode is compiling against the intended SQLite3MC headers while loading a system SQLite library at runtime. Always verify runtime identity when touching packaging, dynamic linking, containers, or language bindings. + +--- + +## 5. Build, linking, and packaging discipline + +### 5.1 Do not accidentally link vanilla SQLite + +SQLite3MC can be used as a drop-in replacement for SQLite in some build layouts, but replacement is not proof of correct encryption behavior. + +Agents must check: + +- the actual library file packaged into the application; +- symbol resolution order; +- DLL/shared-object search path; +- rpath/install-name settings; +- static-link symbol conflicts; +- transitive dependencies that also bundle SQLite; +- package manager postinstall behavior; +- runtime version reports. + +If both vanilla SQLite and SQLite3MC appear in the same process, trace which consumers bind to which symbols. Avoid duplicate SQLite global state unless the project deliberately isolates it. + +### 5.2 Amalgamation discipline + +When using the amalgamation: + +- treat the generated amalgamation as derived unless the repository explicitly vendors it as the source of truth; +- do not manually edit generated amalgamation code except for clearly named, documented emergency patches; +- keep headers, source, generated files, build flags, and docs in sync; +- preserve a reproducible regeneration path; +- validate the resulting source ID, version, and compile options. + +If the repository replaces `sqlite3.c` with `sqlite3mc_amalgamation.c`, ensure every consumer that expects encryption is compiled and linked against the replacement, not a system SQLite artifact. + +### 5.3 Compile-time options + +Compile-time options are contract facts. Changing them can alter SQL availability, file behavior, performance, compatibility, and security posture. + +For encrypted databases, pay special attention to: + +- `SQLITE_TEMP_STORE`; +- `SQLITE_SECURE_DELETE`; +- `SQLITE_USE_URI`; +- enabled extensions such as FTS, JSON, RTREE, GEOPOLY, CARRAY, CSV, SHA3, UUID, FILEIO, REGEXP, SERIES, user authentication, and optional ZLIB-backed extensions; +- default cipher `CODEC_TYPE`; +- legacy compatibility flags such as sqleet or SQLCipher legacy modes; +- platform-specific flags for WASM, Android, Windows, or cross-compilation. + +Changing compile options requires tests and documentation because runtime SQL behavior and file handling may change even when application source code does not. + +### 5.4 Platform-specific builds + +For Windows, verify architecture naming, CRT expectations, DLL placement, `.lib` import libraries, Visual Studio/MSBuild files, and MinGW/GNU Make variants. + +For Linux and macOS, verify Autotools/CMake or project-specific build output, install names, rpath, shared-library versioning, pkg-config files, and container images. + +For Android/iOS/mobile, verify ABI splits, bundled native libraries, filesystem behavior, entropy, backup behavior, and secure storage for keys. + +For WebAssembly, verify VFS behavior, exported C APIs, memory model, JS glue, OPFS or browser storage behavior, and whether encryption keys cross JS/WASM boundaries safely. + +For language bindings, verify both the native artifact and the high-level package. The package version alone is insufficient evidence. + +--- + +## 6. Encryption, ciphers, and key lifecycle + +### 6.1 Key ownership + +Key material must have one explicit owner and lifecycle. + +Identify: + +- where the key/passphrase originates; +- who can create, rotate, recover, or revoke it; +- how it is transported into SQLite3MC; +- whether it is a passphrase, raw key, KMS-derived secret, user credential, device secret, or test fixture; +- where it is stored, cached, zeroized, redacted, and destroyed; +- what happens on wrong key, missing key, expired key, or partial rekey failure. + +Do not hard-code production keys. Do not commit real encrypted database keys. Do not add default passphrases for convenience. Test keys must be visibly test-only and isolated from production configuration. + +### 6.2 Prefer safe API boundaries + +Prefer a wrapper API that applies the key immediately after opening a connection and before any schema reads, migrations, PRAGMAs, or application queries. + +C API posture: + +- `sqlite3_key()` and `sqlite3_key_v2()` set a database key and should normally be called immediately after `sqlite3_open()` / `sqlite3_open_v2()`. +- Use `sqlite3_key_v2()` when the schema name matters, including attached databases. +- `sqlite3_rekey()` and `sqlite3_rekey_v2()` change keys. They can also decrypt a database by specifying an empty key; require explicit migration intent for that path. +- SQLite3MC-specific functions use the `sqlite3mc_` prefix. Do not assume every SQLite Encryption Extension or SQLCipher convention is identical. + +SQL posture: + +- `PRAGMA key` and `PRAGMA rekey` are available, but they are easier to leak in logs, traces, query capture, debugging output, and crash reports. +- `ATTACH ... KEY` can attach encrypted databases, but the key string is still sensitive. +- URI parameters can configure encryption, but key-bearing URIs are high leakage risk because URIs commonly appear in logs, diagnostics, process listings, shell history, metrics, and crash reports. + +Use SQL or URI keying only when the repository has explicit redaction and logging discipline. + +### 6.3 Cipher choice + +For new encrypted databases, prefer the repository's existing secure default. If no repository default exists, prefer the modern authenticated default used by SQLite3MC rather than legacy compatibility modes. + +SQLite3MC supports multiple cipher schemes, including: + +- wxSQLite3 AES-128 CBC without HMAC; +- wxSQLite3 AES-256 CBC without HMAC; +- sqleet ChaCha20-Poly1305 HMAC; +- SQLCipher AES-256 CBC with SHA HMAC variants; +- System.Data.SQLite RC4; +- Ascon-128 v1.2; +- AEGIS family algorithms. + +For new development, do not choose AES-CBC-without-HMAC or RC4 unless the task is explicitly legacy compatibility. Treat legacy modes as migration targets, not modern defaults. + +Cipher configuration is file-format state. Changing cipher scheme, KDF parameters, page size, reserve bytes, plaintext header behavior, or legacy mode requires migration tests using real fixtures. + +### 6.4 Rekey and cipher migration + +Rekeying is a data migration, not a simple settings edit. + +Before implementing rekey or cipher migration, define: + +- the old cipher/key format; +- the new cipher/key format; +- whether migration is in-place or copy-based; +- transaction and crash-safety expectations; +- backup/rollback plan; +- verification after migration; +- behavior for wrong old key or failed new key; +- user-visible recovery path. + +Test rekey with fixtures, wrong keys, interrupted operations where feasible, and backup/restore workflows. + +### 6.5 Attachments and multiple databases + +SQLite3MC can handle encrypted and unencrypted databases together through `ATTACH`, and each database can use a different cipher scheme. + +When touching `ATTACH` behavior: + +- key each attached schema explicitly; +- test cross-database queries; +- test backup and detach behavior; +- verify that migration scripts do not accidentally copy plaintext into unencrypted files; +- ensure temp tables and intermediate data do not leak sensitive content to disk. + +--- + +## 7. Database-file, journal, WAL, temp, and backup safety + +### 7.1 What encryption covers and does not cover + +SQLite3MC encrypts database files and journal files, but not every byte or every storage path is equally protected. + +Important boundaries: + +- `TEMP` tables are not encrypted by SQLite3MC. +- In-memory databases are not encrypted because they are not database files at rest. +- Bytes 16 through 23 of the database file contain header information that is usually not encrypted. +- Plaintext header features, if enabled, intentionally expose header bytes for compatibility. +- Application logs, caches, telemetry, memory dumps, backups, export files, and temp files are outside SQLite3MC's at-rest encryption boundary unless separately protected. + +For sensitive workloads, use `SQLITE_TEMP_STORE=2` or `SQLITE_TEMP_STORE=3` where appropriate, and use `PRAGMA temp_store=MEMORY` when compile-time temp-store policy is not sufficient. + +### 7.2 WAL and rollback journals + +When a database uses WAL or rollback journaling: + +- verify encryption of sidecar files where applicable; +- test checkpoints, crash recovery, and reopen behavior; +- preserve file permissions for `-wal`, `-shm`, and journal files; +- avoid deleting sidecar files as a substitute for proper checkpoint/recovery logic; +- test multiple connections if the application uses them. + +SQLite 3.53.0 includes an upstream fix for a WAL-reset corruption bug, but this does not remove the need for connection, checkpoint, and backup discipline. + +### 7.3 Backup, restore, VACUUM, and export + +Backup and export paths are common leakage points. + +Rules: + +- distinguish encrypted database backup from plaintext export; +- document and test whether backups preserve encryption, cipher settings, page size, and reserve bytes; +- use SQLite backup APIs, `VACUUM INTO`, or application-specific copy flows deliberately; +- check whether `VACUUM INTO` target URI parameters such as `reserve=N` affect the generated database copy; +- protect dumps, CSV exports, JSON exports, logs, and support bundles separately from SQLite3MC encryption; +- test restore from real encrypted fixtures, not only creation of new databases. + +### 7.4 File permissions and deletion + +SQLite3MC is at-rest encryption, not a replacement for file permissions or access control. + +Preserve or improve: + +- restrictive permissions on database, WAL, SHM, journal, backup, and temp directories; +- secure deletion policy where the repository relies on it; +- cleanup of temporary exports and test fixtures; +- redaction in support bundles; +- platform-specific backup exclusion where applicable. + +--- + +## 8. SQLite API, SQL, and migration discipline + +### 8.1 SQLite error handling + +Expose enough SQLite detail to debug real failures without leaking secrets. + +Prefer preserving: + +- SQLite primary and extended error codes; +- connection/path context with redacted filenames when needed; +- operation phase: open, key, migrate, query, backup, checkpoint, rekey; +- whether failure was wrong key, missing key, unsupported cipher, corrupt file, permission failure, lock contention, or runtime link mismatch. + +Do not convert all SQLite failures into generic booleans or generic exceptions. + +### 8.2 SQL feature compatibility + +SQLite SQL compatibility is a runtime contract. + +Before using a 3.53.0 SQL feature in migrations or generated SQL, verify that all deployment targets load SQLite3MC 2.3.3 / SQLite 3.53.0 or newer. + +Be especially cautious with: + +- `ALTER TABLE` constraint changes; +- `REINDEX EXPRESSIONS`; +- JSONB functions; +- temp triggers touching the main schema; +- query plans that rely on new optimizer behavior; +- deterministic text output involving floating-point values. + +If a repository supports multiple SQLite baselines, write migrations and SQL to the lowest supported runtime or guard/version-check the new feature. + +### 8.3 CLI scripts and golden outputs + +SQLite 3.53.0 changed human-oriented CLI formatting through QRF. + +For tests and automation: + +- set `.mode`, `.headers`, `.nullvalue`, `.separator`, and other output controls explicitly; +- avoid comparing default interactive output; +- avoid relying on shell history or command lines that contain keys; +- quote dot-commands deliberately; +- test batch and non-interactive behavior separately from interactive usability. + +### 8.4 Generated code and migrations + +If SQL is generated by an ORM, migration tool, code generator, or binding: + +- update the generator or schema source of truth, not only generated SQL; +- regenerate in a deterministic path; +- test generated migrations against encrypted fixtures; +- verify that schema introspection works after keying the database; +- preserve `user_version`, migration history, and compatibility checks. + +--- + +## 9. Language binding and FFI rules + +### 9.1 Apply both protocols + +When SQLite3MC is used through a language binding, use this protocol plus the relevant language protocol. + +Examples: + +- Java/JDBC or JNI/JNA: apply Java protocol and verify native library loading, classpath/resource packaging, and thread/connection lifecycle. +- Kotlin/SQLDelight or JVM/native wrappers: apply Kotlin protocol and verify Gradle metadata, generated database code, and native packaging. +- Python/APSW-style bindings or extension modules: apply Python protocol and verify wheels, ABI, free-threaded CPython posture, and runtime native identity. +- Rust FFI or crates bundling SQLite3MC: apply Rust protocol and verify `build.rs`, `links`, bindgen output, `unsafe` boundaries, and feature flags. +- Node/Electron/native modules: verify prebuilds, Electron ABI, install scripts, and runtime platform selection. +- .NET/native bundles: verify RID-specific packaging and native asset resolution. + +### 9.2 Wrapper API design + +A good wrapper makes unsafe states hard to represent. + +Prefer APIs that: + +- require keying before queries or migrations can run; +- distinguish encrypted, plaintext, and unknown database state; +- make cipher and migration intent explicit; +- preserve SQLite errors with redaction; +- close connections deterministically; +- prevent URI/PRAGMA secret leakage; +- expose version and compile-option diagnostics for support; +- allow test fixtures for wrong-key and migration cases. + +Avoid APIs that: + +- accept optional keys with ambiguous defaults; +- silently create plaintext databases when keying fails; +- auto-migrate cipher formats without backup or user intent; +- hide native-library identity; +- expose raw database handles without lifecycle rules; +- run migrations before applying the key. + +### 9.3 FFI safety + +For FFI surfaces: + +- treat SQLite handles, statement handles, allocated strings, key buffers, and callback pointers as ownership-sensitive; +- pair every allocation/free convention correctly; +- do not keep pointers to temporary key buffers beyond their valid lifetime; +- define thread ownership and callback threading; +- prevent exceptions/panics from crossing C ABI boundaries; +- test with sanitizers where practical; +- document safety preconditions in the native language's idiom. + +--- + +## 10. Testing and verification matrix + +### 10.1 Minimum verification for encryption-affecting changes + +For changes that affect encryption, keying, cipher config, database lifecycle, or persisted files, verify at least: + +- create encrypted database; +- reopen with correct key; +- fail to open/read with wrong key; +- ensure file does not contain obvious plaintext table names or inserted sentinel values where the expected encryption boundary applies; +- run schema migration on encrypted fixture; +- backup and restore encrypted database; +- rekey when relevant; +- verify runtime library identity and compile options; +- verify logs/traces do not include secrets. + +### 10.2 Compatibility fixtures + +Maintain real fixture files when compatibility matters: + +- current default cipher fixture; +- each supported legacy cipher fixture; +- plaintext fixture if the application supports plaintext databases; +- old application-version fixture; +- wrong-key fixture or negative test; +- corrupted/truncated fixture where recovery behavior matters; +- WAL/journal fixture when sidecar handling matters. + +Do not replace all fixture tests with mock-level tests. The file format is the contract. + +### 10.3 Native verification + +Use the repository's exact commands. Where no commands exist, useful checks may include: + +```text +native build for each supported platform/configuration +runtime sqlite3_libversion() / sqlite3_sourceid() assertion +PRAGMA compile_options assertion +unit/integration tests using the packaged artifact +ASan/UBSan/Valgrind leak checks where feasible +cross-platform CI smoke tests +package install/uninstall tests +``` + +For release artifacts, test the installed package, not only the build-tree binary. + +### 10.4 Concurrency and durability tests + +If the application uses multiple connections, WAL, background workers, or concurrent readers/writers, add or preserve tests for: + +- multiple connections with correct keying; +- lock contention and busy timeouts; +- WAL checkpoint behavior; +- crash/restart or process-kill recovery where feasible; +- backup during active use; +- thread ownership rules in the language binding. + +### 10.5 Performance tests + +Measure before optimizing. + +Performance-sensitive changes should consider: + +- cipher cost; +- page size and reserve bytes; +- cache size; +- WAL vs rollback journal; +- synchronous mode; +- hardware acceleration and target CPU features; +- binding overhead; +- query planner changes in SQLite 3.53.0. + +Do not weaken encryption, durability, or compatibility for unmeasured performance claims. + +--- + +## 11. Security and operational posture + +### 11.1 Threat model clarity + +SQLite3MC protects database contents at rest under defined assumptions. It does not automatically protect: + +- data while the process is running; +- data returned through queries; +- temp tables unless temp storage is forced into memory; +- application logs and telemetry; +- exported files and backups; +- process memory dumps; +- keys stored beside the database; +- compromised application users or compromised hosts. + +State the real threat model when changing encryption behavior. + +### 11.2 Secret redaction + +Never emit secrets through: + +- logs; +- metrics; +- traces; +- SQL query capture; +- crash reports; +- exception messages; +- debug dumps; +- command-line arguments; +- test snapshots; +- support bundles; +- root README examples. + +Redact keys, passphrases, key IDs where needed, key-bearing URIs, and SQL statements containing `PRAGMA key`, `PRAGMA rekey`, or `ATTACH ... KEY`. + +### 11.3 Secure defaults + +For new work: + +- require explicit key configuration for encrypted databases; +- fail closed if a key is missing where encryption is required; +- avoid silently falling back to plaintext; +- prefer modern authenticated ciphers; +- use memory temp storage for sensitive workloads; +- keep file permissions restrictive; +- expose diagnostics for version/build identity without exposing secrets. + +### 11.4 Supply-chain safety + +SQLite3MC is security-relevant native code. Treat dependency changes as security-sensitive. + +When touching vendored or prebuilt artifacts: + +- verify source and artifact provenance; +- review changelog and security-relevant fixes; +- update SBOM or dependency inventory; +- avoid unpinned downloads in build scripts; +- avoid executing downloaded build tools without checksum/provenance controls; +- test downstream packages after update. + +--- + +## 12. Observability without leakage + +Operational feedback should prove the database subsystem works without exposing secrets or sensitive data. + +Useful signals: + +- SQLite3MC/SQLite version and source ID; +- compile options; +- database open/key/migration phase failures; +- busy/locked timeout counts; +- checkpoint and backup outcomes; +- migration duration and success; +- corrupt-file or wrong-key failure classification; +- native-library load path in debug diagnostics, redacted as needed; +- package artifact version. + +Do not log full SQL statements if they can include keys or sensitive data. If query logging is necessary, redact keying operations and sensitive values first. + +--- + +## 13. Deletion and blast-radius rules + +Before deleting or replacing any SQLite3MC component, prove the blast radius. + +Check: + +- native source files and generated amalgamation paths; +- headers and exported symbols; +- package artifacts, installers, Docker images, mobile bundles, and WASM glue; +- static and dynamic link references; +- language bindings and generated wrappers; +- SQL migrations and CLI scripts; +- encrypted fixtures and support tools; +- docs, examples, runbooks, and release checklists; +- production data files and backups that may require legacy cipher support. + +Removing a cipher, compile option, wrapper method, or legacy compatibility flag can strand existing encrypted databases. Treat such deletion as a data-migration decision, not cleanup. + +--- + +## 14. Documentation and preservation + +Use `.codex/PROTOCOL_AFAD.md` for docs that describe SQLite3MC integration, public APIs, migrations, operational procedures, or code/documentation synchronization. + +Preserve system theory in the smallest durable place: + +- version/build facts in the canonical dependency manifest; +- cipher choices and migration rationale in migration notes or AFAD-managed docs; +- key lifecycle in wrapper API docs or security runbooks; +- FFI safety rules in safety comments; +- compile options in build manifests and CI assertions; +- compatibility fixtures in tests; +- operational recovery in runbooks. + +The repository root `README.md` remains a storefront. It may mention that the project supports encrypted SQLite, but detailed cipher configuration, key management, and migration mechanics belong in deeper docs. + +--- + +## 15. Completion checklist + +Before declaring a SQLite3MC-related change complete, answer: + +```text +Baseline: +- Did I verify the intended SQLite3MC and SQLite versions at build time and runtime? + +Truth: +- Did I preserve one canonical owner for version, compile options, cipher defaults, key lifecycle, and migration state? + +Evidence: +- Did I run the narrow and required broad checks? +- For encryption changes, did I prove correct-key success, wrong-key failure, and absence of obvious plaintext leakage? + +Consequence: +- Did I trace packaging, linking, language bindings, stored files, backups, and support tools? + +Invariant: +- Did data integrity, key safety, cipher compatibility, ABI/API compatibility, and migration safety remain intact? + +Preservation: +- Did I update tests, fixtures, build assertions, docs, runbooks, or comments where the learned theory belongs? + +Leakage: +- Did I avoid logging, committing, or documenting real secrets or key-bearing commands? +``` + +Do not claim completion if runtime library identity is unverified, encryption behavior is untested, or existing encrypted database compatibility is unknown. diff --git a/.codex/PROTOCOL_AFAD.md b/.codex/PROTOCOL_AFAD.md new file mode 100644 index 00000000..13cdef7e --- /dev/null +++ b/.codex/PROTOCOL_AFAD.md @@ -0,0 +1,943 @@ +# PROTOCOL_AFAD.md — Agent-First Documentation Protocol + +Protocol: `AGENT_FIRST_DOCUMENTATION` +Version: `4.0` + +This protocol governs documentation that agents must maintain, retrieve, validate, or keep synchronized with code and system behavior. It is optimized for documentation that can be used by humans, retrieval systems, and future coding agents without requiring hidden context. + +It inherits the Universal Engineering Contract. Documentation work must still identify truth, evidence, consequence, invariants, and preservation. + +--- + +## 0. Agent routing and scope + +Do not maintain a long section index at the top of this file. Agents need a compact routing gateway more than a table of contents. Use this section to decide what kind of document you are touching and which rules apply. + +| Situation | Apply | +|---|---| +| Public API, exported symbol, schema, route, event, config key, error, or test fixture must be documented | AFAD reference atom rules | +| Existing reference docs drift from code | AFAD sync loop | +| Guide, runbook, ADR, tutorial, or nested component README needs improvement | AFAD auxiliary-doc rules, adjusted to the document's purpose | +| Code change alters public behavior, compatibility, architecture, operation, or tooling | Update docs in the same change when the existing docs cover that surface | +| Repository root `README.md` is the only touched document | Do not apply AFAD; use the root README storefront rule in `AGENTS.md` | +| `CHANGELOG.md`, `LICENSE`, `NOTICE`, `SECURITY.md`, `CONTRIBUTING.md`, release notes, or legal/governance files | Follow their own conventions unless the repository opts them into AFAD | + +### Root README boundary + +The root `README.md` exemption is defined in `AGENTS.md` because it is a repository-wide routing rule. This protocol repeats the boundary only to prevent accidental over-application: do not force AFAD frontmatter, atom schemas, exhaustive API signatures, or routing metadata into the repository root `README.md`. + +Nested README files are not automatically exempt. Classify them by function: + +- user-facing landing page for a package/example/integration: keep human-first and light; +- component guide or operational documentation: use the auxiliary-doc rules; +- API/reference material disguised as a README: convert or link to AFAD reference docs. + +--- + +## 1. Documentation theory + +AFAD documentation is not prose storage. It is a durable theory surface for the system. + +Before changing non-trivial documentation, answer the documentation form of the Universal Engineering Contract: + +```text +Truth: +- What is the canonical source for this fact: code, schema, config, generated artifact, ADR, release policy, runtime behavior, or this document? +- Is this document allowed to define the fact, or must it derive/link from another owner? + +Evidence: +- What proves the documentation is accurate: tests, signatures, schemas, examples, generated docs, CI, runtime traces, release artifacts, or manual repro? + +Consequence: +- What breaks if this documented concept is removed, renamed, or changed? +- Which users, agents, tools, generated artifacts, docs, examples, or workflows depend on it? + +Invariant: +- What must remain true about the documented behavior, API, procedure, or constraint? + +Preservation: +- Where should the knowledge live after this change: code, type, test, generated file, reference atom, guide, runbook, ADR, comment, or README link? +``` + +A documentation change is incomplete when it makes text nicer but leaves truth ownership, verification, or drift risk unclear. + +--- + +## 2. Documentation classes + +AFAD distinguishes reference documentation from narrative documentation. Do not use one shape for all documents. + +### 2.1 Reference documents + +Reference documents are retrieval-oriented and schema-driven. They describe stable contract surfaces such as exported symbols, data types, config keys, routes, events, errors, generated schemas, test fixtures, and operational interfaces. + +Recommended naming: + +```text +docs/DOC_00_Index.md +docs/DOC_01_Core.md +docs/DOC_02_Types.md +docs/DOC_03_.md +docs/DOC_04_.md +docs/DOC_05_Errors.md +docs/DOC_06_Testing.md +``` + +`DOC_*.md` files are strict. They use frontmatter, atom schemas, short self-contained entries, and sync validation. + +### 2.2 Auxiliary documents + +Auxiliary documents are human-guided but still agent-maintainable. They may be narrative when narrative improves understanding. + +Examples: + +```text +docs/GUIDE_.md +docs/RUNBOOK_.md +docs/ADR__.md +docs/TUTORIAL_.md +examples//README.md +packages//README.md +``` + +Auxiliary docs should not duplicate full reference atoms. They may link to reference docs, show runnable examples, explain workflows, record decisions, and preserve operational theory. + +### 2.3 Special documents + +The following are not AFAD reference docs by default: + +- root `README.md`; +- `CHANGELOG.md`; +- `LICENSE`, `NOTICE`, and legal files; +- `SECURITY.md`; +- `CONTRIBUTING.md`; +- release notes; +- governance documents. + +They must still be accurate and coherent, but their native conventions outrank AFAD structure unless project-specific instructions say otherwise. + +--- + +## 3. Core invariants + +The following invariants apply to AFAD-managed documents. + +### INV-1 Scope completeness + +Every documented public contract surface has exactly one canonical AFAD home. + +For published libraries and public APIs, the documented surface usually includes every externally visible public export. For applications, CLIs, services, and internal repositories, the documented surface is the contract users or operators rely on: commands, config, routes, schemas, events, error codes, public modules, runbooks, and externally meaningful behavior. + +Do not blindly document every language-level `public` or exported symbol when it is not part of the intended contract. Do not omit a public contract merely because the language visibility is narrow. + +### INV-2 Accuracy + +Documentation must match the canonical source of truth. + +- Signatures match code. +- Config keys, routes, event names, status values, limits, labels, and error codes match their canonical owner. +- Examples run as shown or are explicitly marked as conceptual. +- Operational procedures match current tooling and deployment shape. + +### INV-3 Canonical ownership + +Shared contract facts have one owner. Documentation may expose a contract fact, but it must not become an unmaintained parallel definition. + +If the owner is code, schema, generated artifact, config, or ADR, docs should derive from it, link to it, quote it minimally, or state the owner. If documentation is the owner, state that clearly and ensure code/tools derive from it or are validated against it. + +### INV-4 Atomicity + +Reference atoms are self-contained and retrieval-sized. + +- One concept per entry. +- First sentence states what the thing is. +- No `see above` or `see below` as required context. +- 200-400 tokens is the target for substantial entries. +- 600 tokens is the normal maximum for a reference atom. +- Split oversized entries unless splitting would damage correctness. + +The 600-token rule is about semantic precision and retrieval quality, not model context length. + +### INV-5 Current state + +Reference docs describe the current API and current behavior. + +Historical provenance such as `Added in vX.Y` belongs in `CHANGELOG.md`, release notes, or migration docs. Deprecation notices are allowed because they affect current user decisions. + +Preferred deprecation form: + +```text +Deprecated: vX.Y. Use . Removal: vZ.0. +``` + +### INV-6 Renderability + +Markdown must render cleanly on the repository's normal platform. + +- Use language tags on code fences. +- Avoid decorative emoji in AFAD-managed docs. +- Avoid pseudo-code unless explicitly labeled conceptual. +- Avoid frontmatter in files where it degrades the user-facing rendering, especially root `README.md`. + +--- + +## 4. Metadata + +### 4.1 Reference frontmatter + +Every AFAD reference document should start with frontmatter. + +```yaml +--- +afad: "4.0" +domain: CORE +updated: "YYYY-MM-DD" +scope: + paths: ["src/path/or/package"] + symbols: ["OptionalSymbolOrNamespace"] +route: + keywords: [distinctive, terms] + questions: ["natural language query this file uniquely answers"] +--- +``` + +Field semantics: + +| Field | Meaning | +|---|---| +| `afad` | Protocol version used by this document. | +| `domain` | Semantic cluster such as `CORE`, `TYPES`, `ERRORS`, `TESTING`, `CONFIG`, `OPERATIONS`, or a project domain. | +| `updated` | Last meaningful documentation update, ISO date. | +| `scope.paths` | Source paths, packages, modules, schemas, or generated artifacts covered by the file. | +| `scope.symbols` | Optional major symbols/namespaces covered by the file. Use when helpful, not as a complete export list. | +| `route.keywords` | Distinctive retrieval terms. Avoid generic terms. | +| `route.questions` | Natural-language questions this file should answer. | + +Do not require `project.version` in every doc. If a repository has a versioned public API and the doc is version-specific, add a project-specific field such as `project_version`, but do not create a second drifting source of release truth. + +### 4.2 Auxiliary metadata + +Auxiliary docs may use the same frontmatter when it renders cleanly and helps routing. If frontmatter would harm presentation, use a short HTML comment instead: + +```html + +``` + +Never add AFAD metadata to the root `README.md` unless the repository already has a deliberate convention for hidden metadata there. + +### 4.3 Route guidance + +Route metadata is for disambiguation. It is not a substitute for clear content. + +- Use 5-10 distinctive keywords when possible. +- Avoid generic keywords such as `function`, `class`, `method`, `handler`, `service`, `docs`. +- Use 2-5 questions only when they uniquely route to the file. +- Do not duplicate the same route questions across files. +- If you cannot find distinctive route terms, the file may be too broad or too vague. + +--- + +## 5. File architecture + +### 5.1 Index file + +`DOC_00_Index.md` is the routing table for reference docs. Agents should consult it before guessing where an atom belongs. + +Minimum structure: + +~~~markdown +| Contract surface | Kind | Canonical doc | Source owner | +|:--|:--|:--|:--| +| `Registry.resolve` | callable | `DOC_01_Core.md#registryresolve` | `src/registry.*` | +| `MAX_RETRIES` | constant | `DOC_04_Config.md#max_retries` | `src/config.*` | +~~~ + +Do not turn the index into a narrative guide. It is a route map. + +### 5.2 Domain files + +Use one coherent domain per reference file. Place high-frequency and high-risk atoms early, and rare edge cases later. + +Restructuring heuristics: + +| Action | Trigger | Reason | +|---|---|---| +| Create a domain file | More than 20 related contract surfaces | Improves routing and chunk precision | +| Merge files | Fewer than 8 sparse entries with no distinct domain | Avoids fragmented retrieval | +| Split files | More than 60 entries or repeated retrieval confusion | Reduces context and routing noise | + +Adjust thresholds to token density. Dense atoms require smaller files. Sparse atoms can tolerate larger files. + +### 5.3 Language adaptation + +AFAD is language-agnostic. Use the repository language in signatures and examples. + +```text +Java: public Result resolve(Key key) throws MissingKeyException +Rust: pub fn resolve(&self, key: &Key) -> Result +Python: def resolve(self, key: Key) -> Item | None: +TypeScript: resolve(key: Key): Item | undefined +Go: func (r *Registry) Resolve(key Key) (*Item, error) +``` + +Rules: + +- Code fence language tags must match the language or artifact: `java`, `rust`, `python`, `typescript`, `go`, `bash`, `yaml`, `json`, `toml`, `sql`, etc. +- Do not force Python terminology onto non-Python ecosystems. +- Translate `exception`, `error`, `fixture`, `property`, `type alias`, and `enum` into the language's actual constructs. +- Prefer the repository's domain vocabulary over generic schema names. + +--- + +## 6. Reference atom rules + +All reference atoms share the following shape unless a specific schema says otherwise. + +~~~markdown +## `ContractName` + +One sentence stating what this thing is. + +### Signature +```language +exact signature, declaration, schema fragment, route, config key, or event shape +``` + +### Constraints +- Return/Output: What is produced, including empty, null, sentinel, or error cases. +- State: Pure, read-only, mutates X, persists Y, emits Z, or derived from owner. +- Failure: Error, exception, result variant, status code, or never-fails rule. +- Thread/Async/Concurrency: Safety, blocking, cancellation, ordering, or not applicable. +- Compatibility: Public contract, internal, experimental, deprecated, or migration note. + +--- +~~~ + +General rules: + +- Heading uses backticks for named symbols and contract facts. +- First sentence says what the thing is, not a vague action phrase. +- Signature or definition is required for symbol, schema, route, config, and event atoms. +- Constraints are semantic; they preserve the invariant users and agents need. +- Optional sections may be added when they aid decisions: `Parameters`, `Members`, `Fields`, `Usage`, `Example`, `Recovery`, `Operations`, `Deprecation`. +- Examples in reference atoms must be minimal: usually 5 lines or fewer. + +--- + +## 7. Schema selection + +Choose the narrowest schema that fits the documented contract. + +| Contract kind | Schema | +|---|---| +| Function, method, constructor, command handler | Callable | +| Record, class, struct, interface, trait, data object | Type / data object | +| Enum, sealed hierarchy, algebraic data type, status set | Enum / variant set | +| Type alias, newtype, semantic wrapper | Alias / semantic type | +| Constant, config key, feature flag, limit | Constant / configuration | +| Route, endpoint, event, message, generated schema | Protocol surface | +| Error, exception, result variant, status code | Failure surface | +| Fixture, marker, test extension, shared test utility | Test infrastructure | +| Guide, runbook, ADR, tutorial | Auxiliary document | + +When a thing fits multiple schemas, use the schema that represents the user's decision point. For example, an HTTP endpoint is a protocol surface even if implemented by a method. + +--- + +## 8. Reference schemas + +### 8.1 Callable + +~~~markdown +## `Registry.resolve` + +Method that resolves a registered item by key. + +### Signature +```language + +``` + +### Parameters +| Name | Req | Semantics | +|:--|:--:|:--| +| `key` | Y | Registration key; non-empty | +| `strict` | N | Fail on missing key | + +### Constraints +- Return/Output: Registered item, optional value, result, response, or status. +- Failure: Exact failure mode and trigger; state `Never fails` only when true. +- State: Pure, read-only, mutates, persists, emits, or invalidates. +- Concurrency: Safe, unsafe, synchronized, async, blocking, cancellation-aware, or not applicable. +- Compatibility: Public, internal, deprecated, experimental, or migration-sensitive. + +### Usage +- Prefer when: Decision condition. +- Avoid when: Anti-pattern and reason. + +### Example +```language +minimal runnable example +``` + +--- +~~~ + +Parameter table rules: + +- `Name` is the exact parameter name in backticks. +- `Req` is `Y` or `N` only. +- `Semantics` is a short phrase, ideally 10 words or fewer. +- Do not include a Type column. Types live in the signature. + +Omit `Usage` and `Example` when they do not add decision value. + +### 8.2 Type / data object + +Use for records, structs, classes, interfaces, traits, DTOs, messages, and semantic wrappers. + +~~~markdown +## `UserRecord` + +Record representing an authenticated user visible to the API. + +### Signature +```language + +``` + +### Fields / Members +| Name | Req | Semantics | +|:--|:--:|:--| +| `id` | Y | Stable user identifier | +| `email` | Y | Normalized contact address | + +### Constraints +- Invariant: Rule that every instance must satisfy. +- Ownership: Who creates, mutates, serializes, or persists it. +- Compatibility: Wire format, database shape, public API, or internal-only. + +--- +~~~ + +For behavior-rich classes, document the class concept separately from major public methods. Do not pack every method into one atom. + +### 8.3 Enum / variant set + +~~~markdown +## `OrderStatus` + +Enumeration of externally visible order lifecycle states. + +### Signature +```language + +``` + +### Members +| Member | Value | Semantics | +|:--|:--|:--| +| `PENDING` | `"pending"` | Accepted, not fulfilled | +| `CANCELLED` | `"cancelled"` | Terminated before fulfillment | + +### Constraints +- Invariant: Allowed transitions, if applicable. +- Compatibility: Serialized values are stable public contract. + +--- +~~~ + +Use this schema for Rust enums, Java enums/sealed types, TypeScript union literals, Go tagged values, and similar closed vocabularies. + +### 8.4 Alias / semantic type + +~~~markdown +## `UserId` + +Semantic type representing a stable user identifier. + +### Definition +```language + +``` + +### Constraints +- Purpose: Prevents confusion with other identifiers. +- Validation: Format, normalization, or accepted range. +- Compatibility: Serialization or database representation. + +--- +~~~ + +Group related aliases only when they form one semantic family and remain under the token target. + +### 8.5 Constant / configuration + +~~~markdown +## `MAX_RETRIES` + +Constant defining the maximum retry attempts for transient delivery failures. + +### Definition +```language + +``` + +### Constraints +- Owner: Canonical source that defines the value. +- Effect: Behavior controlled by the value. +- Range: Valid values or units, if applicable. +- Compatibility: User-visible, operational, internal, or generated. + +--- +~~~ + +Use this schema for constants, limits, feature flags, environment variables, config file keys, and externally meaningful labels. + +### 8.6 Protocol surface + +Use for routes, endpoints, events, messages, queue payloads, generated schemas, CLI commands, and wire contracts. + +~~~markdown +## `POST /v1/orders` + +Endpoint that creates an order from a validated checkout request. + +### Shape +```language + +``` + +### Constraints +- Input: Required fields, validation, authentication, or permissions. +- Output: Response, emitted event, persisted state, or side effect. +- Failure: Status codes, errors, retries, idempotency, or dead-letter behavior. +- Compatibility: Versioning, migration, backward compatibility, or deprecation. + +--- +~~~ + +The protocol surface is often the real public contract even when implementation symbols are internal. + +### 8.7 Failure surface + +Use for errors, exceptions, result variants, status codes, and recoverable operational failures. + +~~~markdown +## `ResolveError.MissingKey` + +Failure raised or returned when a registry key has no registered item. + +### Signature +```language + +``` + +### Constraints +- Trigger: Exact condition that produces this failure. +- Recovery: Caller or operator action. +- State: Whether anything was mutated before failure. +- Compatibility: Public error contract, internal diagnostic, or deprecated. + +--- +~~~ + +If there is an error hierarchy, include one compact hierarchy atom near the start of the errors file. + +### 8.8 Test infrastructure + +Use for shared fixtures, markers, tags, extensions, hooks, test containers, test utilities, and golden data conventions. + +~~~markdown +## `databaseFixture` + +Test fixture providing an isolated database for integration tests. + +### Signature +```language + +``` + +### Constraints +- Scope: Per-test, per-class, per-suite, per-session, or repository-wide. +- Provides: Resource, state, or behavior made available. +- Cleanup: Teardown, rollback, deletion, or none. +- Concurrency: Parallel-safe or serial-only. + +--- +~~~ + +Terminology adapts to the framework: pytest fixtures, JUnit extensions/tags, Rust test helpers, Go test helpers, Jest setup hooks, property-test strategies, fuzz harnesses. + +--- + +## 9. Auxiliary document rules + +Auxiliary docs may be narrative, but they must not become stale parallel reference manuals. + +### 9.1 Guide + +Use for task-oriented explanation. + +~~~markdown +# Guide + +Purpose: What the reader will be able to do. +Prerequisites: What must already exist or be understood. + +## Overview + +Short context that explains the decision or workflow. + +## Procedure + +Concrete steps with runnable commands or examples. + +## Verification + +How the reader knows it worked. + +## Troubleshooting + +Common failures and recovery. + +## Related reference + +Links to AFAD reference atoms or canonical source owners. +~~~ + +### 9.2 Runbook + +Use for operational procedures. + +~~~markdown +# Runbook + +Purpose: Operational outcome. +When to use: Trigger condition. +When not to use: Unsafe or irrelevant cases. + +## Preconditions + +Access, environment, health checks, and safety checks. + +## Procedure + +Ordered steps with commands. + +## Verification + +Metrics, logs, traces, alerts, or user-visible checks. + +## Rollback + +How to restore the previous safe state. + +## Escalation + +Who or what to consult next. +~~~ + +### 9.3 ADR + +Use for architectural decisions that preserve theory. + +~~~markdown +# ADR : + +Status: Proposed | Accepted | Superseded | Rejected +Date: YYYY-MM-DD + +## Context + +Forces, constraints, and problem shape. + +## Decision + +Chosen direction. + +## Consequences + +Benefits, costs, risks, and follow-up work. + +## Alternatives considered + +Rejected options and why. +~~~ + +### 9.4 Nested README + +Nested READMEs should serve their directory's user. + +- Package/example landing page: short, human-first, no AFAD atom structure. +- Component guide: guide structure is appropriate. +- Reference material: move or link to `DOC_*.md` instead of embedding full API docs. + +--- + +## 10. Examples and snippets + +Examples are contract surfaces when users copy them. + +Rules: + +- Every code fence has a language tag. +- Prefer runnable examples over illustrative fragments. +- Keep reference atom examples short. +- Put larger examples in `examples/`, integration tests, doctests, or guide docs. +- Do not use placeholder ellipses in commands or code unless the text explicitly says the example is partial. +- Update examples when APIs, config, routes, flags, package names, or build commands change. + +If an example cannot be made runnable, label it clearly: + +~~~markdown +Conceptual sketch, not directly runnable: +~~~ + +--- + +## 11. Sync loop + +Run this loop when code changes may affect AFAD-managed docs, or when docs are suspected stale. + +```text +1. Inventory + Build the relevant contract map from code, schemas, generated artifacts, configs, routes, events, tests, and docs. + +2. Compare + Classify each contract surface: + - MATCH: doc and source agree. + - DRIFT: signature, shape, name, value, or behavior differs. + - ORPHAN-CODE: contract exists without required doc coverage. + - ORPHAN-DOC: doc exists for removed or non-contract surface. + - MOVE/RENAME: same concept moved or renamed. + - SEMANTIC-DRIFT: signature matches but behavior or invariant changed. + +3. Reconcile + Update, create, move, merge, split, or delete atoms. + +4. Validate + Check metadata, signatures, links, examples, routes, and token-sized atoms. + +5. Preserve + Put newly discovered theory in the most durable place: test, type, schema, doc atom, guide, runbook, ADR, or root README link. +``` + +### Co-evolution rule + +Docs and code should change together when a code change affects documented public behavior, public API, configuration, operational procedure, architecture boundary, generated schema, CLI contract, route, event, error, or user-visible example. + +Do not update docs for purely internal implementation changes unless the implementation change alters the theory users, operators, or future agents need. + +### Move detection + +When a doc atom appears orphaned, check for moves and renames before deleting it. + +```text +ORPHAN-DOC(A) + ORPHAN-CODE(B) with same concept, matching behavior, or compatible signature +→ classify as MOVE/RENAME +→ preserve useful constraints, examples, deprecation notes, and rationale +``` + +### Generated docs and hashes + +If the repository has tooling that generates docs, signatures, source maps, or implementation hashes, use that tooling. Do not invent manual hashes. Do not manually update generated docs without also updating the generator or source owner. + +--- + +## 12. Validation + +AFAD validation is layered. Block on correctness before style. + +| Level | Check | Blocking | +|---|---|:---:| +| L0 | File is in scope for AFAD | Yes | +| L0 | Metadata is valid when metadata is required | Yes | +| L1 | Reference atoms have required heading, first sentence, signature/shape, and constraints | Yes | +| L1 | Code fences have language tags | Yes | +| L2 | Signatures, shapes, routes, config keys, event names, errors, and examples match canonical owners | Yes | +| L2 | No required contract surface is undocumented | Yes | +| L2 | No AFAD atom documents a removed or non-contract surface without explanation | Yes | +| L2 | Links and backtick references resolve where practical | Yes | +| L2 | Reference atoms stay within retrieval-sized bounds or have justified split exceptions | Yes | +| L3 | Parameter fragments are concise | No | +| L3 | Route keywords are distinctive | No | +| L3 | Style is economical and non-repetitive | No | + +Recovery: + +| Failure | Recovery | +|---|---| +| Out-of-scope file treated as AFAD | Remove AFAD structure and apply the file's native convention | +| Invalid metadata | Fix or remove metadata according to file class | +| Signature/shape drift | Update doc from canonical owner and verify examples | +| Missing doc atom | Create minimal accurate atom, then refine | +| Orphan doc atom | Classify as move/rename or delete | +| Oversized atom | Split by concept, not by arbitrary length | +| Stale example | Update, move to test/example, or delete if no longer useful | + +--- + +## 13. Anti-patterns + +| Anti-pattern | Why it is wrong | Fix | +|---|---|---| +| Long quick index listing every section | Consumes tokens and goes stale | Use compact routing gateway and clear headings | +| Root `README.md` forced into AFAD | Damages storefront role | Keep root README human-first and link to docs | +| Types repeated in parameter tables | Creates second drift source | Keep types in signature only | +| Full API reference inside a guide | Duplicates reference docs | Link to `DOC_*.md` atoms | +| Generic route keywords | Poor retrieval disambiguation | Use distinctive domain terms | +| `see above` as required context | Breaks atom self-containment | Repeat the minimal needed fact | +| Decorative emoji in AFAD docs | Adds noise and rendering variance | Use plain text | +| Historical `Added vX.Y` in reference atom | Wrong current-state surface | Put history in changelog/release notes | +| Pseudo-code presented as runnable | Misleads users and agents | Make it runnable or label conceptual | +| Documentation as second source of contract truth | Creates drift | Derive from or identify canonical owner | +| Blindly documenting every language-public symbol | Bloats docs and hides real contracts | Document intended contract surfaces | +| Deleting orphan docs before checking moves | Loses preserved theory | Classify move/rename first | + +--- + +## 14. Conflict resolution + +Priority order: + +```text +P0 Accuracy and safety +P1 Canonical ownership +P2 Completeness of intended contract surface +P3 Retrieval structure +P4 Human readability +P5 Style economy +``` + +Examples: + +| Conflict | Resolution | +|---|---| +| Exact signature is long and ugly | Keep the accurate signature; structure around it | +| Atom exceeds token target but cannot be split without losing correctness | Keep accurate atom and note split exception | +| Guide wants narrative but repeats full API details | Keep narrative, link to reference atoms | +| Root README would benefit from one example but AFAD prefers reference structure | README storefront rule wins; include one concise runnable example | +| Undocumented contract surface has unclear semantics | Create minimal atom with explicit TODO/unknown constraint, then preserve follow-up | +| Style violation but content is accurate and needed | Keep content; fix style in a later pass if needed | + +--- + +## 15. Agent output contract + +For non-trivial documentation work, the work summary should state: + +```text +Documentation scope: +- Files changed: +- File class: reference, guide, runbook, ADR, nested README, root README exception, or special file: + +Truth/evidence: +- Canonical sources checked: +- Verification performed: + +Changes: +- Atoms created/updated/deleted/moved: +- Examples updated or validated: +- Links/routes changed: + +Remaining risk: +- Missing source owners, uncertain semantics, skipped validation, or follow-up needed: +``` + +Do not dump this template into trivial summaries. Use it to ensure the agent did not produce prettier but less trustworthy documentation. + +--- + +## 16. Worked examples + +### 16.1 Callable atom, Java + +~~~markdown +## `Registry.resolve` + +Method that resolves a registered item by key. + +### Signature +```java +public Item resolve(String key, boolean strict) throws KeyNotFoundException +``` + +### Parameters +| Name | Req | Semantics | +|:--|:--:|:--| +| `key` | Y | Registration key; non-blank | +| `strict` | N | Throw on missing key | + +### Constraints +- Return/Output: Registered `Item`, or `null` when `strict=false` and key is absent. +- Failure: Throws `KeyNotFoundException` when `strict=true` and key is absent. +- State: Read-only. +- Concurrency: Safe for concurrent reads. +- Compatibility: Public API. + +--- +~~~ + +### 16.2 Callable atom, Rust + +~~~markdown +## `Registry::resolve` + +Method that resolves a registered item by key. + +### Signature +```rust +pub fn resolve(&self, key: &str) -> Result, ResolveError> +``` + +### Parameters +| Name | Req | Semantics | +|:--|:--:|:--| +| `key` | Y | Registration key; non-empty | + +### Constraints +- Return/Output: `Ok(Some(Item))` when registered; `Ok(None)` when absent and absence is allowed. +- Failure: Returns `Err(ResolveError)` for invalid keys or unavailable backing store. +- State: Read-only. +- Concurrency: Safe for shared access when the registry is shared immutably. +- Compatibility: Public crate API. + +--- +~~~ + +### 16.3 Protocol surface atom + +~~~markdown +## `order.created` + +Event emitted after an order is durably created. + +### Shape +```json +{ + "type": "order.created", + "order_id": "string", + "created_at": "RFC3339 timestamp" +} +``` + +### Constraints +- Input: Emitted only after the order row is committed. +- Output: Downstream fulfillment and analytics consumers may process the event independently. +- Failure: Publishing failure must be retried or dead-lettered according to the event pipeline policy. +- Compatibility: `type` and `order_id` are stable wire-contract fields. + +--- +~~~ + +--- + +END OF PROTOCOL diff --git a/.codex/UNIVERSAL_ENGINEERING_CONTRACT.md b/.codex/UNIVERSAL_ENGINEERING_CONTRACT.md new file mode 100644 index 00000000..98b30c9f --- /dev/null +++ b/.codex/UNIVERSAL_ENGINEERING_CONTRACT.md @@ -0,0 +1,214 @@ +# Universal Engineering Contract + +This contract applies to all languages, runtimes, frameworks, tools, and repositories. + +## 1. Systems over goals + +The requested task is the entry point. The standard is to leave the touched system more coherent, more observable, and easier to change than it was before. + +Do not treat generated code, a passing build, or a closed issue as the whole outcome. The outcome is a validated improvement to the system's working theory: what is true, what changes it, what proves it works, what depends on it, and what must not break. + +Avoid orphan code: code that appears locally correct but has no clear owner, feedback loop, invariant, or understandable place in the system. + +## 2. Build the minimum system map before touching code + +Before making a non-trivial change, identify the relevant system theory. Keep this lightweight, but make it concrete enough that another engineer or agent could continue safely. + +### 2.1 Truth + +Ask: + +- Where does the relevant state live? +- What is the canonical source of truth? +- Who is allowed to mutate it? +- What state is cached, derived, duplicated, denormalized, persisted, remote, or eventually consistent? +- Where can this value become stale, invalid, or contradictory? + +Change the source of truth, not a symptom, unless the task is explicitly about presentation or derived behavior. + +### 2.2 Evidence + +Ask: + +- What tells us the system is working? +- What would tell us it is failing? +- Which tests, assertions, type checks, contracts, logs, metrics, traces, dashboards, alerts, or reproducible checks cover this behavior? +- If feedback is missing, what is the smallest useful feedback loop to add? + +A change without evidence is incomplete unless there is a clear, stated reason evidence cannot be added. + +### 2.3 Consequence + +Ask: + +- What breaks if this file, function, module, class, endpoint, table, message, job, flag, or configuration disappears? +- Who calls it directly? +- Who depends on it indirectly through reflection, routing, serialization, dependency injection, schemas, generated code, conventions, plugins, events, queues, webhooks, cron jobs, dashboards, documentation, or human workflow? +- What is the blast radius across code, data, runtime behavior, users, and operations? + +Do not rely only on intuition. Prove blast radius with the available tools: search, static analysis, dependency graphs, tests, traces, logs, schemas, build output, or runtime inspection. + +### 2.4 Invariant + +Ask: + +- What must remain true after this change? +- What domain rule, security property, compatibility contract, performance bound, idempotency rule, ordering guarantee, data-shape guarantee, or user-visible behavior must not be violated? + +State the invariant before changing behavior. Add or update executable checks for it where practical. + +### 2.5 Preservation + +Ask: + +- Where should the discovered theory live after this work? + +Preserve important knowledge in the most durable appropriate place: tests, names, types, schemas, comments, documentation, runbooks, architecture decision records, generated artifacts, or agent directive files. Do not leave essential system knowledge trapped in a chat transcript or temporary reasoning. + +## 3. Red → Green → Refactor + +For new behavior, start with the smallest failing proof of behavior: a test, assertion, contract check, type-level check, reproducible script, golden case, or manual verification path. + +Then: + +1. **Red:** demonstrate the missing or broken behavior. +2. **Green:** make the smallest coherent change that satisfies the proof. +3. **Refactor:** immediately simplify names, boundaries, structure, duplication, and control flow while keeping feedback green. + +Passing is not finished. Understandable, coherent, and changeable is finished. + +## 4. Boy Scout + Mikado + +When touching existing code, leave the local area better than you found it. + +Prefer small, safe, validated improvements: + +- Rename unclear concepts. +- Extract coherent units. +- Inline needless indirection. +- Delete dead paths. +- Collapse accidental complexity. +- Remove obsolete compatibility shims when no real contract depends on them. +- Replace parallel definitions with derivation from the canonical owner. +- Strengthen tests, assertions, types, or runtime checks around changed behavior. + +Use Mikado-style sequencing for broader change: identify the desired improvement, discover prerequisites, make the smallest safe prerequisite change, validate it, and continue only while each step remains understandable and reversible. + +If a local refactor naturally unlocks a broader system-wide improvement, continue only while the scope remains controlled and evidence remains strong. Stop when the next improvement is a separate slice. + +## 5. Architecture as preserved theory + +Do not preserve architecture merely because it exists. Do not replace architecture merely because a new design seems cleaner in isolation. + +Treat architecture as accumulated system theory. Preserve the parts that encode real constraints, useful boundaries, domain language, operational lessons, or compatibility contracts. Improve the parts that are accidental, duplicated, misleading, obsolete, or unnecessarily complex. + +Architecture should emerge through repeated validated improvements, not speculative rewrites. When changing structure, make the new structure easier to explain, test, and modify than the old one. + +## 6. Canonical ownership of contract facts + +Shared contract facts must have exactly one canonical owner. + +Contract facts include externally meaningful: + +- identifiers; +- names and labels; +- limits and quotas; +- permissions and capabilities; +- status values and state-machine transitions; +- routes, event names, message types, and schema fields; +- error codes and user-visible contract text; +- configuration keys and feature flags; +- protocol, API, CLI, UI, database, and integration contracts. + +Do not hard-code contract facts in parallel across code, interfaces, tools, tests, documentation, generated files, summaries, or error surfaces. + +Any surface that exposes a contract fact must derive it from the canonical source or from generated artifacts rooted in that source. Build-time or test-time validation should fail on drift, missing registration, contradictory definitions, or references to contract facts outside the canonical owner. + +When no canonical owner exists, create the smallest appropriate one before spreading the fact further. + +## 7. State ownership and mutation discipline + +Every meaningful piece of state needs an owner and a mutation policy. + +Before changing stateful behavior, identify: + +- the source of truth; +- all mutation paths; +- all readers; +- derived or cached copies; +- invalidation and reconciliation paths; +- concurrency, ordering, and idempotency assumptions; +- persistence, migration, and rollback implications. + +Do not introduce a second source of truth. Do not patch derived state when the canonical state or mutation path is wrong. Do not add hidden state that future maintainers cannot locate or reason about. + +## 8. Feedback must match risk + +Use the cheapest feedback that proves the important behavior, but do not confuse cheap feedback with sufficient feedback. + +A pure function may need a unit test. A protocol may need a contract test. A migration may need rollback validation. A distributed workflow may need integration coverage, idempotency checks, logs, metrics, and failure-mode tests. + +When fixing a bug, reproduce it first if practical. When preventing recurrence, add the feedback that would have caught it. + +## 9. Deletion and simplification require proof + +Deleting code is good when the dependency theory is sound. + +Before deleting or simplifying, check for: + +- static references; +- dynamic references; +- generated references; +- serialized or persisted formats; +- migrations and historical data; +- external consumers; +- scheduled jobs and asynchronous workers; +- observability, alerting, and operations dependencies; +- documentation and human processes. + +If safe deletion cannot be proven fully, reduce uncertainty with tooling and make the smallest reversible change. + +## 10. Agent output contract + +For non-trivial changes, produce more than a patch. Include a compact summary covering: + +```text +Truth: +- Source of truth: +- Mutation paths: +- Derived/cached state: + +Evidence: +- Existing feedback: +- Added or updated feedback: +- Manual verification, if any: + +Consequence: +- Direct dependencies: +- Indirect or operational dependencies: +- Blast-radius judgment: + +Invariant: +- Must remain true: +- How it is protected: + +Preservation: +- Where the relevant theory was recorded: +``` + +Keep the summary proportional to the change. Small changes need small summaries. Risky changes need explicit reasoning. + +## 11. Stop conditions + +Stop when: + +- the requested behavior is implemented; +- the relevant feedback is green; +- touched code is clearer, simpler, and easier to change; +- shared contract facts have a canonical owner; +- important invariants are protected; +- blast radius has been considered and checked with available tools; +- newly discovered system knowledge has been preserved in a durable place; and +- the next improvement is a separate slice. + +Do not continue expanding scope after the next step stops being clearly connected, safe, and validated. diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index eba9c5c7..103383e1 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -198,6 +198,23 @@ jobs: fi set -o pipefail + # Repository-only agent guidance must not ship in released artifacts + set +o pipefail + if unzip -l dist/*.whl | grep -qE '(^|[[:space:]])AGENTS\.md$|(^|[[:space:]])\.codex/'; then + set -o pipefail + echo "::error::Wheel unexpectedly contains AGENTS.md or .codex/" + exit 1 + fi + set -o pipefail + + set +o pipefail + if tar -tzf dist/*.tar.gz | grep -qE '(^|/)AGENTS\.md$|(^|/)\.codex/'; then + set -o pipefail + echo "::error::Source distribution unexpectedly contains AGENTS.md or .codex/" + exit 1 + fi + set -o pipefail + echo "Package integrity verified" - name: Create release checksum receipt diff --git a/.gitignore b/.gitignore index d51d1138..c1fef1f3 100644 --- a/.gitignore +++ b/.gitignore @@ -133,3 +133,10 @@ temp/ pip-log.txt pip-delete-this-directory.txt pip-selfcheck.json + +# Repository policy files are tracked on purpose. +!/AGENTS.md +!/.codex/ +!/.codex/** +/.codex/.DS_Store +/.codex/**/.DS_Store diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000..484ec900 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,90 @@ +# AGENTS.md — Agent Entry Protocol + +This file is the repository entry point for agent work. It defines load order, precedence, repository-wide exceptions, and the universal minimum that applies before any specialized language, database/native, or documentation rule. + +## 1. Required context loading + +When opening a repository, load context in this order: + +1. Read this file completely. +2. Load `.codex/UNIVERSAL_ENGINEERING_CONTRACT.md`. This is the cross-language engineering contract. +3. Load `.codex/AGENTS_EXTRA.md` if it exists. This contains project-specific instructions. +4. Load the language/runtime protocol for each touched surface: + - Java 26+ / Gradle: `.codex/AGENTS_JAVA26_GRADLE.md` + - Kotlin 2.4+ / Gradle: `.codex/AGENTS_KOTLIN24_GRADLE.md` + - Python 3.13+: `.codex/AGENTS_PYTHON313.md` + - Rust 1.95+ / Cargo: `.codex/AGENTS_RUST195_CARGO.md` +5. Load the database/native dependency protocol for each touched surface: + - SQLite3 Multiple Ciphers 2.3.3 / SQLite 3.53.0: `.codex/AGENTS_SQLITE3MC233_SQLITE353.md` +6. For documentation authoring, documentation refactoring, or code changes that alter documented public contracts, load `.codex/PROTOCOL_AFAD.md` unless the only touched document is the repository root `README.md`. + +If a referenced file is absent, continue with the best available context and state the missing file in the work summary when it matters. + +## 2. Precedence + +Use the most specific applicable instruction, but do not silently relax correctness, security, compatibility, or verification requirements. + +Precedence order: + +1. Explicit user request for the current task. +2. Project-specific instructions in `.codex/AGENTS_EXTRA.md`. +3. Repository-wide rules in this `AGENTS.md`, including the root `README.md` exception. +4. Applicable language/runtime-specific protocol. +5. Applicable database/native dependency protocol. +6. Applicable documentation protocol. +7. Universal Engineering Contract. +8. General language, framework, ecosystem, and documentation norms. + +When instructions conflict, prefer the stricter or more specific instruction unless it would make the task incorrect. Surface the conflict rather than guessing. + +## 3. Universal minimum before changing a system + +For every non-trivial change, build the smallest useful system map: + +- **Truth:** Where does the relevant state live? What is authoritative? Who can mutate it? +- **Evidence:** What proves the system is working? What would reveal failure? +- **Consequence:** What breaks if the touched component disappears or changes shape? +- **Invariant:** What must remain true after the change? +- **Preservation:** Where should the discovered system theory live after the work? + +Use this map to decide what to change, how far to widen the change, what to verify, and what to document. + +## 4. Surface dispatch + +Language/runtime surfaces: + +- Java 26+ / Gradle projects use `.codex/AGENTS_JAVA26_GRADLE.md`. +- Kotlin 2.4+ / Gradle projects use `.codex/AGENTS_KOTLIN24_GRADLE.md`. +- Python 3.13+ projects use `.codex/AGENTS_PYTHON313.md`. +- Rust 1.95+ / Cargo projects use `.codex/AGENTS_RUST195_CARGO.md`. + +Database/native dependency surfaces: + +- SQLite3 Multiple Ciphers 2.3.3 / SQLite 3.53.0 surfaces use `.codex/AGENTS_SQLITE3MC233_SQLITE353.md` in addition to any applicable language protocol. + +Other surfaces: + +- Other languages, runtimes, databases, and native dependencies use the Universal Engineering Contract plus repository-specific instructions. Do not apply Java-, Kotlin-, Python-, Rust-, or SQLite3MC-specific rules to unrelated systems unless the repository explicitly asks for them. +- If a repository spans multiple languages or native dependencies, use the relevant protocol for each touched surface and the Universal Engineering Contract across all boundaries. + +## 5. Documentation dispatch and root README exception + +Use `.codex/PROTOCOL_AFAD.md` for agent-maintained documentation that is meant to stay synchronized with code, public APIs, architectural boundaries, operational procedures, or generated/reference material. + +The repository root `README.md` is a special case. Treat it as the front window of the store, not as ordinary documentation and not as an AFAD-managed reference file. + +Root `README.md` rules: + +- Do not add AFAD frontmatter, symbol atoms, exhaustive API signatures, or schema tables to the root `README.md`. +- Optimize for a human first impression: what the project is, why it matters, how to install or run it, the shortest credible example, and where to go next. +- Keep runnable snippets, but prefer brevity over completeness. +- Link to AFAD-managed docs, reference files, guides, changelogs, or runbooks for detail. +- Preserve project-specific brand, tone, and release positioning unless the user asks to change them. + +Nested `README.md` files are governed by their actual role. If a nested README is a component guide, package guide, or operational document, use the documentation protocol where it fits. If it is a user-facing landing page for a package, example, or integration, keep it reader-first and do not force reference-atom structure. + +`CHANGELOG.md`, `LICENSE`, `NOTICE`, `SECURITY.md`, `CONTRIBUTING.md`, governance files, release notes, and legal/compliance files follow their own conventions unless project-specific instructions opt them into AFAD. + +## 6. Work summary requirement + +For non-trivial changes, the final work summary must include the verification performed and any important system theory preserved or still missing. Keep the summary proportional to the risk of the change. diff --git a/CHANGELOG.md b/CHANGELOG.md index 02e92bae..c3a8799f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: CHANGELOG -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [changelog, release notes, version history, breaking changes, migration, fixed, what's new] questions: ["what changed in version X?", "what are the breaking changes?", "what was fixed in the latest release?", "what is the release history?"] @@ -14,12 +14,52 @@ Notable changes to this project are documented in this file. The format is based and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] + +## [0.165.0] - 2026-04-24 ### Changed - **The release protocol now treats `uv` as a required tool and uses a `uv`-managed Python 3.13 environment for public install verification.** The maintainer runbook no longer assumes a host `python3.13` binary is present on PATH during the final release smoke test; it now verifies the published package in a seeded `uv venv --python 3.13` environment, which matches the project's actual Python management model. +- **Release preflight now checks built artifacts for repository-only guidance file leaks.** + The maintainer runbook now tells release authors to inspect the locally built sdist and wheel for + repo-only policy files such as `AGENTS.md` and `/.codex/`, so packaging drift is caught before a + tag or publish job is ever created. +- **Runtime and diagnostics internals are now split into focused modules instead of continuing to grow monolithic owners.** + `runtime.bundle` now delegates lifecycle and mutation responsibilities into dedicated runtime + modules, `runtime.cache` separates audit/stat/key helpers, `runtime.function_bridge` delegates + decorator and introspection helpers, and `diagnostics.templates` now composes focused + reference/runtime/parsing template families. Public facade imports stay the same, while the + architecture regression tests now enforce tighter size budgets so these seams do not collapse + again. +- **Thread-safety-sensitive internals no longer rely on CPython-only atomicity claims.** + The AST visitor field cache now uses an interpreter-independent cached helper instead of a + mutable shared dictionary with a CPython dict-atomicity rationale, which aligns the parser and + introspection surface with Python 3.13 free-threaded expectations. +- **Optional Babel-backed facade exports now derive from one canonical ownership map.** + The root, runtime, and localization facades now share a single optional-export definition for + parser-only versus full-runtime gating instead of duplicating symbol lists and mutating module + globals at import time, which keeps capability probing (`hasattr()` and `getattr(..., default)`) + consistent across the public facades. +- **Validation dependency-graph helpers now have one internal owner module.** + The graph-building and longest-path helpers now live only in `validation.resource_graph`; + `validation.resource` no longer carries compatibility aliases for those private names, and the + test and fuzz surfaces now target the real owner directly. +- **Async cancellation and Babel-backed ISO helpers now preserve failure meaning more precisely.** + Async formatting paths now re-raise `asyncio.CancelledError` instead of collapsing it into a + generic runtime warning path, and the ISO metadata helpers now narrow their Babel-backed error + handling so unknown locales, broken imports, and valid empty-result cases are distinguished + cleanly. +- **Architecture and runtime docs now describe the live seam ownership and concurrency contract more accurately.** + The runtime, thread-safety, and integrity architecture guides now state that stable imports live + on the package facades, that concurrency guarantees come from internal synchronization rather + than CPython-only assumptions, and that the public runtime/integrity namespaces are implemented + by smaller focused internal modules. +- **Repository agent instructions are now explicitly trackable in git while staying out of release artifacts.** + The root `AGENTS.md` and the `/.codex` tree are now protected by explicit un-ignore rules for + repository use, while the sdist/wheel policy and release workflow checks continue to exclude + those repository-only guidance files from published distribution payloads. ## [0.164.0] - 2026-04-23 ### Changed @@ -6982,7 +7022,8 @@ Both validators are re-exported from `ftllexengine.introspection` and the root [0.29.0]: https://github.com/resoltico/ftllexengine/releases/tag/v0.29.0 [0.28.1]: https://github.com/resoltico/ftllexengine/releases/tag/v0.28.1 [0.28.0]: https://github.com/resoltico/ftllexengine/releases/tag/v0.28.0 -[Unreleased]: https://github.com/resoltico/FTLLexEngine/compare/v0.164.0...HEAD +[Unreleased]: https://github.com/resoltico/FTLLexEngine/compare/v0.165.0...HEAD +[0.165.0]: https://github.com/resoltico/FTLLexEngine/compare/v0.164.0...v0.165.0 [0.164.0]: https://github.com/resoltico/FTLLexEngine/compare/v0.163.0...v0.164.0 [0.163.0]: https://github.com/resoltico/FTLLexEngine/compare/v0.162.0...v0.163.0 [0.162.0]: https://github.com/resoltico/FTLLexEngine/compare/v0.161.0...v0.162.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b410e7e2..06af2c09 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: CONTRIBUTING -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [contributing, development, uv, lint, test, fuzz, benchmark, release, virtualenv] questions: ["how do I set up development?", "how do I run lint and tests?", "how do I work on fuzzing?", "how do I prepare a release?"] diff --git a/PATENTS.md b/PATENTS.md index 8618daa0..e6769e31 100644 --- a/PATENTS.md +++ b/PATENTS.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: LEGAL -updated: "2026-04-22" +updated: "2026-04-24" route: keywords: [patents, legal, license, fluent, apache, mit, babel] questions: ["what is the patent position?", "does the project include a patent grant?", "what about the Fluent specification license?"] diff --git a/README.md b/README.md index e65ade8f..2dfe9a5f 100644 --- a/README.md +++ b/README.md @@ -1,733 +1,83 @@ - +# FTLLexEngine — Fluent runtime for real-world localization -[![FTLLexEngine Art](https://raw.githubusercontent.com/resoltico/FTLLexEngine/main/images/FTLLexEngine.jpg)](https://github.com/resoltico/FTLLexEngine) +FTLLexEngine is a Python runtime and parsing toolkit for Fluent `.ftl` resources, built for teams that need locale-aware text, money, dates, and user-input parsing without rebuilding the same rules in application code. ------ +If you are still stitching this together with string interpolation, one-off parsers, and per-locale edge-case fixes, the same bug tends to get fixed in three places. [![PyPI](https://img.shields.io/pypi/v/ftllexengine.svg)](https://pypi.org/project/ftllexengine/) [![Python Versions](https://img.shields.io/pypi/pyversions/ftllexengine.svg)](https://pypi.org/project/ftllexengine/) -[![codecov](https://codecov.io/github/resoltico/FTLLexEngine/graph/badge.svg?token=Q5KUGU3S3U)](https://codecov.io/github/resoltico/FTLLexEngine) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) ------ +- Keep plural rules and locale formatting in `.ftl`, close to the messages themselves. +- Parse localized numbers, dates, and currency back into exact Python types. +- Fail startup early when resources or message schemas drift. +- Share internally synchronized bundles safely across concurrent requests. -# FTLLexEngine +The nearby alternative is a mix of hand-kept formatting rules, ad-hoc parsing helpers, and translation checks that only happen after a request is already live. FTLLexEngine turns that into one repeatable runtime. -**Python runtime for the Fluent (FTL) specification. Locale-aware numbers, dates, and currency — bidirectional, thread-safe, strict-mode validated, Decimal-precise — in `.ftl` files, not your code.** +[Try a working snippet](docs/QUICK_REFERENCE.md) · [Take the deeper workflow tour](docs/WORKFLOW_TOUR.md) · [Get the package on PyPI](https://pypi.org/project/ftllexengine/) -## Why FTLLexEngine? +## One Small Workflow -- **Locale-aware numbers, dates, and currency** -- `NUMBER()`, `DATETIME()`, `CURRENCY()` format values per Unicode CLDR for 200+ locales. One function call, correct output everywhere -- **Bidirectional** -- Format data for display *and* parse user input back to exact Python types. `"12.450,00 EUR"` → `(Decimal('12450.00'), 'EUR')` -- **Thread-safe** -- No global state. 100 concurrent requests, zero locale conflicts -- **Strict by default** -- Errors raise exceptions, not silent `{$amount}` fallbacks. Pass `strict=False` for soft error recovery -- **Boot validation** -- `LocalizationBootConfig` validates all resources and message schemas before your application accepts traffic. Fail before the first request, not during it -- **Introspectable** -- Query what variables a message needs before you call it -- **Declarative grammar** -- Plurals, gender, and cases in `.ftl` files. Code stays clean -- **Decimal precision** -- `Decimal` throughout. No float math, no rounding surprises - ---- - -Meet **Alice** and **Bob**. - -**Alice** exports specialty coffee. Her invoices ship to buyers in Tokyo, Hamburg, and New York. Three languages, three currency formats, zero tolerance for rounding errors. "1 bag" in English, "1 Sack" in German, "1袋" in Japanese -- and Polish has four plural forms, Arabic has six. She moved grammar rules to `.ftl` files and never looked back. - -**Bob** runs supply operations at Mars Colony 1. Personnel from Germany, Japan, and Colombia order provisions in their own locale. A German engineer types `"12.450,00 EUR"`. A Japanese technician enters `"¥1,245,000"`. Bob's system needs exact `Decimal` values from both. One parsing error on a cargo manifest means delayed shipments for 200 colonists. - -FTLLexEngine keeps their systems coherent. Built on the [Fluent specification](https://projectfluent.org/) that powers Firefox. 200+ locales via Unicode CLDR. Thread-safe by default. - ---- - -## Quick Start +For a coffee exporter, one invoice line and one buyer reply are enough to create drift: display logic in one place, parsing logic in another, validation nowhere. FTLLexEngine keeps that move in one stack. ```python +from decimal import Decimal from ftllexengine import FluentBundle +from ftllexengine.parsing import parse_currency -bundle = FluentBundle("en_US", use_isolating=False) -bundle.add_resource(""" -coffee-order = { $bags -> - [one] 1 bag of { $origin } coffee - *[other] { $bags } bags of { $origin } coffee -} -""") +bundle = FluentBundle("de_DE", use_isolating=False) +bundle.add_resource('quote = Angebot: { CURRENCY($amount, currency: "EUR") }') -result, errors = bundle.format_pattern("coffee-order", {"bags": 500, "origin": "Ethiopian"}) +text, errors = bundle.format_pattern("quote", {"amount": Decimal("12450.00")}) assert errors == () -assert result == "500 bags of Ethiopian coffee" -``` - -Unknown locales raise `ValueError` on `FluentBundle`, -`FluentLocalization`, `number_format()`, `datetime_format()`, and -`currency_format()` rather than silently formatting with a fallback locale. - -> `use_isolating=False` removes Unicode bidi isolation markers from output, making strings suitable for direct comparison and logging. The default `use_isolating=True` wraps each placeable in U+2068/U+2069 markers for correct bidirectional text rendering in UI contexts. - -**Parse user input back to Python types:** +assert text == "Angebot: 12.450,00\u00a0€" -```python -from decimal import Decimal -from ftllexengine.parsing import parse_currency - -# German buyer enters a bid price -result, errors = parse_currency("12.450,00 EUR", "de_DE", default_currency="EUR") -if not errors: - amount, currency = result # (Decimal('12450.00'), 'EUR') - assert amount == Decimal("12450.00") - assert currency == "EUR" +parsed, errors = parse_currency("12.450,00 EUR", "de_DE", default_currency="EUR") +assert errors == () +assert parsed == (Decimal("12450.00"), "EUR") ``` ---- +The same locale-aware runtime formats the outgoing quote and parses the buyer’s reply back into an exact `Decimal`. -## Table of Contents +## Where It Fits -- [Installation](#installation) -- [Multi-Locale Formatting — Alice Ships to Every Port](#multi-locale-formatting--alice-ships-to-every-port) -- [Bidirectional Parsing — Bob Parses Every Input](#bidirectional-parsing--bob-parses-every-input) -- [Thread-Safe Concurrency — 100 Threads, Zero Race Conditions](#thread-safe-concurrency--100-threads-zero-race-conditions) -- [Streaming Resource Loading — Large Files Without Peak Memory](#streaming-resource-loading--large-files-without-peak-memory) -- [Async Applications — Non-Blocking Formatting](#async-applications--non-blocking-formatting) -- [Message Introspection — Pre-Flight Checks](#message-introspection--pre-flight-checks) -- [Production Boot Validation — Systems That Accept Traffic Safely](#production-boot-validation--systems-that-accept-traffic-safely) -- [Currency Data — Operations Across Borders](#currency-data--operations-across-borders) -- [Architecture at a Glance](#architecture-at-a-glance) -- [When to Use FTLLexEngine](#when-to-use-ftllexengine) -- [Documentation](#documentation) -- [Contributing](#contributing) -- [Legal](#legal) +Use FTLLexEngine when the same message has to survive more than one locale, more than one direction, or more than one layer of your system. ---- +- Good fit: Fluent-based apps, invoice and checkout flows, localized forms, startup validation for translation packs, and systems that care about exact decimals instead of float luck. +- Good fit: Teams that want message grammar, money formatting, and localized input parsing to stay consistent instead of drifting between templates, helpers, and validation code. +- Keep it simple: single-locale apps, plain string formatting, or projects that do not need Fluent at all. -## Installation +## Start In Two Paths -```bash -uv add ftllexengine[babel] -``` - -Or with pip: +Use the full runtime when you need formatting, localization orchestration, and localized parsing: ```bash -pip install ftllexengine[babel] +uv add ftllexengine[babel] ``` -This is the **full runtime** install: locale-aware formatting, localization orchestration, -bidirectional parsing, and Babel-backed ISO helpers. - -**Requirements**: Python >= 3.13 | Babel >= 2.18 - -
-Parser-only installation (no Babel dependency) +Use the parser-only install when you only need syntax parsing, AST work, validation, and zero-dependency helper surfaces: ```bash uv add ftllexengine ``` -Or: `pip install ftllexengine` - -**Available in parser-only installs:** -- FTL syntax parsing (`parse_ftl()`, `serialize_ftl()`) -- AST manipulation and transformation -- Validation and message introspection -- Zero-dependency runtime helpers such as `CacheConfig`, `FluentNumber`, - `FunctionRegistry`, `fluent_function`, and `make_fluent_number` -- Zero-dependency localization loading types such as `PathResourceLoader`, - `FallbackInfo`, `ResourceLoadResult`, and `LoadSummary` -- Embedded ISO 4217 decimal precision lookup via `get_currency_decimal_digits()` - -**Requires the full runtime install:** -- `FluentBundle` (locale-aware formatting) -- `AsyncFluentBundle` -- `FluentLocalization` (multi-locale fallback) -- `LocalizationBootConfig` -- Runtime formatter and registry helpers such as `number_format()`, - `datetime_format()`, `currency_format()`, `select_plural_category()`, - `create_default_registry()`, and `get_shared_registry()` -- Bidirectional parsing (numbers, dates, currency) -- Localized ISO territory/currency metadata lookups and ISO code validation helpers - -Public formatting and localization entry points reject unknown locales -instead of silently falling back to `en_US`. -Parser-only facade probes such as `hasattr(ftllexengine.runtime, "number_format")` -and `getattr(ftllexengine, "FluentBundle", None)` treat Babel-backed names -as absent instead of raising during feature detection. - -
- ---- - -## Multi-Locale Formatting — Alice Ships to Every Port - -Alice's invoices go to Tokyo, Hamburg, and New York. Same data, different languages, different number formats. She maintains one `.ftl` file per locale. Translators edit the files. Her trading platform ships features. - -**English (New York buyer):** - -```python -from decimal import Decimal -from ftllexengine import FluentBundle - -bundle = FluentBundle("en_US", use_isolating=False) -bundle.add_resource(""" -shipment-line = { $bags -> - [0] No bags shipped - [one] 1 bag of { $origin } coffee - *[other] { $bags } bags of { $origin } coffee -} - -invoice-total = Total: { CURRENCY($amount, currency: "USD") } -""") - -result, _ = bundle.format_pattern("shipment-line", {"bags": 500, "origin": "Colombian"}) -assert result == "500 bags of Colombian coffee" - -result, _ = bundle.format_pattern("invoice-total", {"amount": Decimal("187500.00")}) -assert result == "Total: $187,500.00" -``` - -**German (Hamburg buyer):** - -```python -from decimal import Decimal -from ftllexengine import FluentBundle - -bundle_de = FluentBundle("de_DE", use_isolating=False) -bundle_de.add_resource(""" -shipment-line = { $bags -> - [0] Keine Saecke versandt - [one] 1 Sack { $origin } Kaffee - *[other] { $bags } Saecke { $origin } Kaffee -} - -invoice-total = Gesamt: { CURRENCY($amount, currency: "EUR") } -""") - -result, _ = bundle_de.format_pattern("shipment-line", {"bags": 500, "origin": "kolumbianischer"}) -assert result == "500 Saecke kolumbianischer Kaffee" - -result, _ = bundle_de.format_pattern("invoice-total", {"amount": Decimal("187500.00")}) -assert result == "Gesamt: 187.500,00\u00a0€" # CLDR: non-breaking space before symbol -``` - -**Japanese (Tokyo buyer):** - -```python -from decimal import Decimal -from ftllexengine import FluentBundle - -bundle_ja = FluentBundle("ja_JP", use_isolating=False) -bundle_ja.add_resource(""" -shipment-line = { $bags -> - [0] 出荷なし - *[other] { $origin }コーヒー { $bags }袋 -} - -invoice-total = 合計:{ CURRENCY($amount, currency: "JPY") } -""") - -result, _ = bundle_ja.format_pattern("shipment-line", {"bags": 500, "origin": "コロンビア"}) -assert result == "コロンビアコーヒー 500袋" - -result, _ = bundle_ja.format_pattern("invoice-total", {"amount": Decimal("28125000")}) -assert result == "合計:¥28,125,000" -``` - -Bob uses the same pattern at Mars Colony 1. Spanish for the Colombian agronomists? Add one `.ftl` file. Zero code changes. - -> In production, translators maintain separate `.ftl` files per locale. Your code loads them with `Path("invoice_de.ftl").read_text()`. - ---- - -## Bidirectional Parsing — Bob Parses Every Input - -Most libraries only format outbound data. That's a one-way trip. - -Bob's colonists type orders and quantities in their local format. A German engineer enters `"12.450,00 EUR"`. A Colombian agronomist enters `"45.000.000 COP"`. A Japanese technician files a delivery date as `"2026年3月15日"`. FTLLexEngine parses them all to exact Python types. - -```python -from decimal import Decimal -from ftllexengine.parsing import ( - parse_currency, - parse_date, - parse_decimal, - parse_fluent_number, -) - -# German engineer enters a bid in EUR -bid_result, errors = parse_currency("12.450,00 EUR", "de_DE", default_currency="EUR") -if not errors: - bid_amount, bid_currency = bid_result # (Decimal('12450.00'), 'EUR') - -# Colombian agronomist enters an ask in COP -ask_result, errors = parse_currency("45.000.000 COP", "es_CO", default_currency="COP") -if not errors: - ask_amount, ask_currency = ask_result # (Decimal('45000000'), 'COP') - -# Japanese technician enters a delivery date -contract_date, errors = parse_date("2026年3月15日", "ja_JP") -assert not errors -assert contract_date.isoformat() == "2026-03-15" - -# German engineer enters a localized amount for use in a Fluent message -fnum, errors = parse_fluent_number("12.450,00", "de_DE") -if not errors: - # FluentNumber(value=Decimal('12450.00'), formatted='12.450,00', precision=2) - # Pass fnum directly as a $variable — it carries its formatting metadata - assert fnum.value == Decimal("12450.00") - assert str(fnum) == "12.450,00" -``` - -```mermaid -flowchart TB - A["German Engineer
12.450,00 EUR"] --> PA["parse_currency()
de_DE"] - B["Colombian Agronomist
45.000.000 COP"] --> PB["parse_currency()
es_CO"] - C["Japanese Technician
2026年3月15日"] --> PC["parse_date()
ja_JP"] - - PA --> RA["Decimal('12450.00')
EUR"] - PB --> RB["Decimal('45000000')
COP"] - PC --> RC["date(2026, 3, 15)"] - - RA & RB & RC --> SYS[("Inventory System
Exact Python types")] - - style PA fill:#f9f,stroke:#333,stroke-width:2px - style PB fill:#f9f,stroke:#333,stroke-width:2px - style PC fill:#f9f,stroke:#333,stroke-width:2px -``` - -**When parsing fails, you get structured errors -- not exceptions:** - -```python -from ftllexengine.parsing import parse_decimal - -price, errors = parse_decimal("twelve thousand", "en_US") -assert price is None -assert errors - -if errors: - err = errors[0] - print(err) # "Failed to parse decimal 'twelve thousand' for locale 'en_US': ..." -``` - -### Decimal Precision - -Alice calculates contract values. Float math fails: `0.1 + 0.2 = 0.30000000000000004`. - -FTLLexEngine uses `Decimal` throughout: - -```python -from decimal import Decimal -from ftllexengine.parsing import parse_currency - -price_result, errors = parse_currency("$4.25", "en_US", default_currency="USD") -if not errors: - price_per_lb, currency = price_result # (Decimal('4.25'), 'USD') - assert price_per_lb == Decimal("4.25") - assert currency == "USD" - - bags = 500 - lbs_per_bag = Decimal("132") # Standard 60kg bag - total_lbs = bags * lbs_per_bag - contract_value = total_lbs * price_per_lb - # Decimal('280500.00') -- exact, every time -``` - -### No Silent Failures - -> [!NOTE] -> A missing variable returns a fallback string like `"Contract: 500 bags at {!CURRENCY}/lb"`. In financial systems or mission-critical operations, displaying this to a user is unacceptable. - -`FluentBundle` defaults to `strict=True`. On any formatting error, FTLLexEngine raises immediately -- no bad data reaches the user. - -```python -from decimal import Decimal -from ftllexengine import CacheConfig, FluentBundle, FormattingIntegrityError - -# strict=True is the DEFAULT: raises FormattingIntegrityError on ANY formatting error -bundle = FluentBundle("en_US", cache=CacheConfig(), use_isolating=False) -bundle.add_resource('confirm = Contract: { $bags } bags at { CURRENCY($price, currency: "USD") }/lb') - -# Works normally when all variables are provided -result, _ = bundle.format_pattern("confirm", {"bags": 500, "price": Decimal("4.25")}) -assert result == "Contract: 500 bags at $4.25/lb" - -# Missing variable raises immediately (default strict=True behavior) -try: - bundle.format_pattern("confirm", {"bags": 500}) # forgot $price -except FormattingIntegrityError as e: - print(f"HALT: {e.message_id} failed") - # e.fallback_value = "Contract: 500 bags at {!CURRENCY}/lb" - # e.fluent_errors = (FrozenFluentError(...),) - -# For soft error recovery, opt in with strict=False -soft_bundle = FluentBundle("en_US", strict=False, use_isolating=False) -soft_result, soft_errors = soft_bundle.format_pattern("missing-message", {}) -assert soft_result == "{missing-message}" # fallback: key wrapped in braces -assert soft_errors -``` - ---- - -## Thread-Safe Concurrency — 100 Threads, Zero Race Conditions - -Alice's trading desk gets busy. Bids from Frankfurt, asks from Bogota, confirmations to Tokyo -- concurrent requests, each in a different locale. Bob's colony runs the same pattern: 200 settlers, simultaneous orders, mixed locales. - -**The problem:** Python's `locale` module uses global state. Thread A sets German, Thread B reads it, chaos ensues. - -**The solution:** FTLLexEngine bundles are isolated. No global state. No locks you manage. No race conditions. - -```python -from concurrent.futures import ThreadPoolExecutor -from decimal import Decimal -from ftllexengine import FluentBundle - -# Create locale-specific bundles (typically done once at startup) -de_bundle = FluentBundle("de_DE", use_isolating=False) -es_bundle = FluentBundle("es_CO", use_isolating=False) -ja_bundle = FluentBundle("ja_JP", use_isolating=False) - -ftl_source = 'confirm = { CURRENCY($amount, currency: "USD") } per { $unit }' -de_bundle.add_resource(ftl_source) -es_bundle.add_resource(ftl_source) -ja_bundle.add_resource(ftl_source) - -def format_confirmation(bundle, amount, unit): - result, _ = bundle.format_pattern("confirm", {"amount": amount, "unit": unit}) - return result - -with ThreadPoolExecutor(max_workers=100) as executor: - futures = [ - executor.submit(format_confirmation, de_bundle, Decimal("4.25"), "lb"), - executor.submit(format_confirmation, es_bundle, Decimal("4.25"), "lb"), - executor.submit(format_confirmation, ja_bundle, Decimal("4.25"), "lb"), - ] - confirmations = [f.result() for f in futures] - assert confirmations == ["4,25\u00a0$ per lb", "US$4,25 per lb", "$4.25 per lb"] - # CLDR locale-specific symbols; de_DE uses non-breaking space before $ -``` - -`FluentBundle` and `FluentLocalization` are thread-safe by design: -- Multiple threads can format messages simultaneously (read lock) -- Adding resources or functions acquires exclusive access (write lock) -- You don't manage any of this -- it just works - ---- - -## Streaming Resource Loading — Large Files Without Peak Memory - -Bob's colony manifest system loads `.ftl` files that grow as new message templates accumulate. Loading the entire file into a string before parsing wastes memory on large resources — and for pipelines reading from network streams, full-string loading isn't possible. - -`add_resource_stream` and `parse_stream_ftl` accept any line iterator. Memory stays proportional to the largest single FTL entry, not the full file: - -```python -from pathlib import Path -from tempfile import TemporaryDirectory -from ftllexengine import FluentBundle, parse_stream_ftl - -with TemporaryDirectory() as tmp: - source_path = Path(tmp) / "colony_messages.ftl" - source_path.write_text( - "hello = Hello from orbit\n" - "status = Cargo ready\n", - encoding="utf-8", - ) - - bundle = FluentBundle("en_US") - with source_path.open(encoding="utf-8") as handle: - junk = bundle.add_resource_stream(handle, source_path=source_path.name) - assert junk == () - - with source_path.open(encoding="utf-8") as handle: - entry_ids = [entry.id.name for entry in parse_stream_ftl(handle)] - assert entry_ids == ["hello", "status"] - print(entry_ids) -``` - -**Same guarantees as `add_resource`:** -- Strict mode: raises `SyntaxIntegrityError` on junk entries (default `strict=True`) -- Thread-safe: entries collected outside the lock, committed atomically -- Soft mode: `add_resource_stream` returns `tuple[Junk, ...]` when `strict=False` - -`FluentLocalization.add_resource_stream` works identically for multi-locale setups: - -```python -from pathlib import Path -from tempfile import TemporaryDirectory -from ftllexengine import FluentLocalization -from ftllexengine.localization import PathResourceLoader - -with TemporaryDirectory() as tmp: - base = Path(tmp) / "locales" - (base / "de_de").mkdir(parents=True) - (base / "en_us").mkdir(parents=True) - (base / "de_de" / "messages.ftl").write_text("hello = Hallo\n", encoding="utf-8") - (base / "en_us" / "messages.ftl").write_text("hello = Hello\n", encoding="utf-8") - extra_path = Path(tmp) / "extra_de.ftl" - extra_path.write_text("shipment = Zusatzdatei\n", encoding="utf-8") - - loader = PathResourceLoader(str(base / "{locale}")) - l10n = FluentLocalization(["de_DE", "en_US"], ["messages.ftl"], loader) - with extra_path.open(encoding="utf-8") as handle: - l10n.add_resource_stream("de_DE", handle, source_path=extra_path.name) - - shipment, errors = l10n.format_value("shipment") - assert errors == () - assert shipment == "Zusatzdatei" -``` - ---- - -## Async Applications — Non-Blocking Formatting - -Alice's trading platform runs on asyncio. `FluentBundle` is thread-safe, but calling it from an async handler blocks the event loop for the duration of each format call. `AsyncFluentBundle` eliminates this: every mutation and formatting operation runs in a thread pool via `asyncio.to_thread()`, leaving the event loop free. - -```python -import asyncio -from ftllexengine import AsyncFluentBundle - -async def handle_request(name: str, bags: int) -> str: - async with AsyncFluentBundle("en_US", use_isolating=False) as bundle: - await bundle.add_resource(""" -coffee-order = { $bags -> - [one] 1 bag for { $name } - *[other] { $bags } bags for { $name } -} -""") - result, _ = await bundle.format_pattern( - "coffee-order", {"name": name, "bags": bags} - ) - return result - -assert asyncio.run(handle_request("Alice", 2)) == "2 bags for Alice" - -# Shared bundle across requests (create once, reuse): -_bundle = AsyncFluentBundle("en_US") - -async def startup() -> None: - with open("messages.ftl", encoding="utf-8") as f: - await _bundle.add_resource_stream(f, source_path="messages.ftl") -``` - -**Same strict-mode guarantees as `FluentBundle`:** -- `strict=True` (default): raises `FormattingIntegrityError` on any resolution error -- Fast read operations (`has_message`, `get_message`, `introspect_message`) remain synchronous — O(1) dict lookups too short to block the event loop - ---- - -## Message Introspection — Pre-Flight Checks - -Bob's systems generate cargo manifests. Before calling `format_pattern()`, they verify: *what variables does this message require? Are all of them available?* - -Alice's compliance team uses the same introspection to catch missing variables at build time, not during live operations. - -```python -from ftllexengine import FluentBundle - -bundle = FluentBundle("en_US", use_isolating=False) -bundle.add_resource(""" -contract = { $buyer } purchases { $bags -> - [one] 1 bag - *[other] { $bags } bags - } of { $grade } coffee from { $seller } at { CURRENCY($price, currency: "USD") }/lb. - Shipment: { $port } by { DATETIME($ship_date) }. -""") - -info = bundle.introspect_message("contract") - -assert info.get_variable_names() == frozenset( - {"buyer", "bags", "grade", "seller", "price", "port", "ship_date"} -) -assert info.get_function_names() == frozenset({"CURRENCY", "DATETIME"}) -assert info.has_selectors is True -assert info.requires_variable("price") is True -``` - -**Use cases:** -- Verify all required data before generating manifests or confirmations -- Auto-generate input fields from message templates -- Catch missing variables at build time, not during live operations - ---- - -## Production Boot Validation — Systems That Accept Traffic Safely - -Alice's trading platform and Bob's colony manifest system can't discover a bad `.ftl` file mid-operation. They validate everything at startup. - -`LocalizationBootConfig` is the production boot sequence: load all resources, run `require_clean()` to assert every locale loaded without errors, and validate all message schemas before the first request arrives. If anything is wrong, it raises before traffic starts -- not during it. Each config instance is single-use, so create a new one for each boot attempt. - -```python -from pathlib import Path -from tempfile import TemporaryDirectory -from ftllexengine import LocalizationBootConfig - -with TemporaryDirectory() as tmp: - base = Path(tmp) / "locales" - for locale, invoice_label in { - "en_us": "Total", - "de_de": "Gesamt", - "ja_jp": "合計", - }.items(): - locale_dir = base / locale - locale_dir.mkdir(parents=True) - (locale_dir / "invoice.ftl").write_text( - f'invoice-total = {invoice_label}: {{ CURRENCY($amount, currency: "USD") }}\n', - encoding="utf-8", - ) - (locale_dir / "shipment.ftl").write_text( - 'shipment-line = { $bags } bags of { $origin }\n', - encoding="utf-8", - ) - - cfg = LocalizationBootConfig.from_path( - locales=("en_US", "de_DE", "ja_JP"), - resource_ids=("invoice.ftl", "shipment.ftl"), - base_path=base / "{locale}", - message_schemas={ - "invoice-total": {"amount"}, - "shipment-line": {"bags", "origin"}, - }, - required_messages=frozenset({"invoice-total", "shipment-line"}), - ) - - l10n, summary, schema_results = cfg.boot() - print(f"Loaded {summary.total_attempted} resources, {summary.errors} errors") - assert len(schema_results) == 2 -``` - -**When only the localization object is needed:** - -```python -from pathlib import Path -from tempfile import TemporaryDirectory -from ftllexengine import LocalizationBootConfig - -with TemporaryDirectory() as tmp: - base = Path(tmp) / "locales" - (base / "en_us").mkdir(parents=True) - (base / "en_us" / "main.ftl").write_text("ready = System ready\n", encoding="utf-8") - - cfg = LocalizationBootConfig.from_path( - locales=("en_US",), - resource_ids=("main.ftl",), - base_path=base / "{locale}", - required_messages=frozenset({"ready"}), - ) - - l10n = cfg.boot_simple() - result, errors = l10n.format_value("ready") - assert errors == () - print(result) -``` - -**Use cases:** -- Regulated systems that must prove clean boot before accepting requests -- Container health checks: boot validation as the readiness probe -- CI pipelines: fail the build if any `.ftl` file has junk or schema drift - ---- - -## Currency Data — Operations Across Borders - -Alice sources beans from Colombia, Ethiopia, and Brazil. She sells to importers in Japan, Germany, and the US. Each country uses different currencies with different decimal places. - -```python -from ftllexengine.introspection.iso import get_territory_currencies, get_currency - -# New buyer in Japan -- what currency? -currencies = get_territory_currencies("JP") -assert currencies == ("JPY",) - -# How many decimal places for yen? -jpy = get_currency("JPY") -assert jpy is not None -assert jpy.decimal_digits == 0 # no decimal places for yen - -# Compare to Colombian peso -cop = get_currency("COP") -assert cop is not None -assert cop.decimal_digits == 2 - -# Multi-currency territories -panama_currencies = get_territory_currencies("PA") -assert panama_currencies == ("PAB", "USD") # Panama uses both Balboa and US Dollar -``` - -Alice's invoices format correctly: JPY 28,125,000 in Tokyo, $187,500.00 in New York. - ---- - -## Architecture at a Glance - -| Component | What It Does | Install Mode | -|:----------|:-------------|:-------------| -| **Syntax** — `ftllexengine.syntax` | FTL parser, AST, serializer, visitor pattern | Parser-only install | -| **Runtime** — `ftllexengine.runtime` | `FluentBundle`, message resolution, thread-safe formatting, built-in functions (`NUMBER`, `CURRENCY`, `DATETIME`), plus zero-dependency helper types | Mixed: parser-only helpers + full-runtime formatters | -| **Localization** — `ftllexengine.localization` | `FluentLocalization` multi-locale fallback chains; `LocalizationBootConfig` strict-mode production boot; zero-dependency loading types | Mixed: parser-only loading types + full-runtime orchestration | -| **Parsing** — `ftllexengine.parsing` | Bidirectional parsing: numbers, dates, currency back to Python types | Full runtime install | -| **Introspection** — `ftllexengine.introspection` | Message-variable/function extraction, AST reference analysis, and ISO helpers; localized territory/currency metadata needs the full runtime while `get_currency_decimal_digits()` uses embedded tables | Mixed: parser-only helpers + full-runtime localized metadata | -| **Analysis** — `ftllexengine.analysis` | Dependency-graph helpers such as `detect_cycles()` | Parser-only install | -| **Validation** — `ftllexengine.validation` | Resource validation, unresolved-reference checks, semantic checks | Parser-only install | -| **Diagnostics** — `ftllexengine.diagnostics` | Structured error types, error codes, formatting | Parser-only install | -| **Integrity** — `ftllexengine.integrity` | BLAKE2b checksums, strict mode, immutable exceptions | Parser-only install | - ---- - -## When to Use FTLLexEngine - -### Use It When: - -| Scenario | Why FTLLexEngine | -| :--- | :--- | -| **Regulated / audited deployments** | Boot validation raises before traffic starts. Immutable structured errors for audit trails. | -| **Locale-aware numbers, dates, currency** | CLDR-backed `NUMBER()`, `DATETIME()`, `CURRENCY()` for 200+ locales. Correct by spec, not by approximation. | -| **Parsing user input** | Errors as data, not exceptions. Show helpful feedback. | -| **Financial calculations** | `Decimal` precision throughout. Strict mode on every bundle. | -| **Concurrent systems** | Thread-safe. No global locale state. | -| **Complex plurals** | Polish has 4 forms. Arabic has 6. Handle them declaratively. | -| **Multi-locale apps** | 200+ locales. CLDR-compliant. | -| **Multi-currency operations** | ISO 4217 data. Territory-to-currency mapping. Correct decimal places. | -| **AI integrations** | Introspect messages before formatting. | -| **Content/code separation** | Translators edit `.ftl` files. Developers ship code. | - -### Use Something Simpler When: - -| Scenario | Why Skip It | -| :--- | :--- | -| **Single locale, no user input** | `f"{value:,.2f}"` is enough | -| **No grammar logic** | No plurals, no conditionals | -| **Zero dependencies required** | You need pure stdlib | - ---- - -## Documentation - -| Resource | Description | -|:---------|:------------| -| [Quick Reference](docs/QUICK_REFERENCE.md) | Copy-paste patterns for common tasks | -| [API Reference](docs/DOC_00_Index.md) | Reference coverage for the exported package and module APIs | -| [Parsing Guide](docs/PARSING_GUIDE.md) | Bidirectional parsing deep-dive | -| [Data Integrity](docs/DATA_INTEGRITY_ARCHITECTURE.md) | Strict mode, checksums, immutable errors | -| [Terminology](docs/TERMINOLOGY.md) | Fluent and FTLLexEngine concepts | -| [Release Protocol](docs/RELEASE_PROTOCOL.md) | `gh`-first release-branch, tag, GitHub Release, and PyPI procedure | -| [Examples](examples/) | Working code you can run | - ---- +Start from the path that matches your job: -## Contributing +- [Copy the smallest working examples](docs/QUICK_REFERENCE.md) +- [Run the shipped examples](examples/README.md) +- [Browse parsing, thread-safety, and boot-validation guides](docs/DOC_00_Index.md) -Contributions welcome. See [CONTRIBUTING.md](CONTRIBUTING.md) for setup and guidelines. -The shortest full-repo verification path is `./check.sh`. +## Why It Feels Safe To Try ---- +- Published on [PyPI](https://pypi.org/project/ftllexengine/) for Python 3.13+. +- Built around the [Fluent specification](https://projectfluent.org/) and CLDR-backed locale data via Babel. +- Fully typed, MIT-licensed, and shipped with runnable examples plus repository checks for docs, examples, and version sync. +- Supports parser-only installs for syntax and validation work when you do not need the Babel-backed runtime surface. +- Release and publishing steps live in [docs/RELEASE_PROTOCOL.md](docs/RELEASE_PROTOCOL.md). ## Legal -ftllexengine is MIT-licensed. It has no required runtime dependencies. The optional -[babel] extra adds Babel (BSD 3-Clause). ftllexengine is an independent implementation -of the [FTL Syntax Specification](https://github.com/projectfluent/fluent/blob/master/spec/fluent.ebnf) -(Apache 2.0, Mozilla Foundation and others) and is not affiliated with or endorsed -by Mozilla. See [NOTICE](NOTICE) for attribution and [PATENTS.md](PATENTS.md) for -patent considerations. +FTLLexEngine is MIT-licensed. The optional `babel` extra adds Babel under BSD-3-Clause terms. FTLLexEngine is an independent implementation of the [Fluent syntax specification](https://github.com/projectfluent/fluent/blob/master/spec/fluent.ebnf) and is not affiliated with or endorsed by Mozilla. -[LICENSE](LICENSE) | [NOTICE](NOTICE) | [PATENTS.md](PATENTS.md) +[LICENSE](LICENSE) · [NOTICE](NOTICE) · [PATENTS.md](PATENTS.md) diff --git a/docs/CUSTOM_FUNCTIONS_GUIDE.md b/docs/CUSTOM_FUNCTIONS_GUIDE.md index 66952662..acede757 100644 --- a/docs/CUSTOM_FUNCTIONS_GUIDE.md +++ b/docs/CUSTOM_FUNCTIONS_GUIDE.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: CUSTOM_FUNCTIONS -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [custom functions, fluent_function, FunctionRegistry, locale injection, add_function] questions: ["how do I add a custom function?", "how does locale injection work?", "should I use a registry or add_function?"] diff --git a/docs/DATA_INTEGRITY_ARCHITECTURE.md b/docs/DATA_INTEGRITY_ARCHITECTURE.md index e164cfff..eb4c6b0f 100644 --- a/docs/DATA_INTEGRITY_ARCHITECTURE.md +++ b/docs/DATA_INTEGRITY_ARCHITECTURE.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: ARCHITECTURE -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [data integrity, strict mode, FrozenFluentError, IntegrityCheckFailedError, cache audit, boot validation] questions: ["how does strict mode relate to integrity?", "what audit evidence does the runtime expose?", "what is boot validation for?"] @@ -31,3 +31,14 @@ The library pushes validation as early as possible and represents runtime failur ## Boot Validation `LocalizationBootConfig.boot()` is the canonical fail-fast startup path when resources must be clean before the application accepts traffic. It combines resource loading, `require_clean()`, required-message enforcement, and message-schema validation. The config object is intentionally one-shot: create a new instance instead of reusing one after `boot()` or `boot_simple()`. + +## Internal Seams + +The public contract stays centered on the facade types, but the implementation is intentionally partitioned so integrity behavior can evolve without collapsing back into single large modules: + +- `runtime.bundle` remains the public home of `FluentBundle`, while lifecycle and mutation responsibilities are delegated into focused internal runtime modules. +- `runtime.cache` remains the public cache surface, while audit-log behavior, stats helpers, and cache-key shaping live in dedicated internal cache modules. +- `runtime.function_bridge` remains the public registry surface, while decorator metadata attachment and registry introspection helpers are separated internally. +- `diagnostics.templates` remains the public diagnostic-template namespace, while reference, runtime, and parsing template families are maintained in smaller focused modules. + +This split does not change user imports. It preserves clearer ownership boundaries for audit evidence, strict-mode failures, and runtime mutation paths. diff --git a/docs/DOC_00_Index.md b/docs/DOC_00_Index.md index 824002b9..9a6ee52c 100644 --- a/docs/DOC_00_Index.md +++ b/docs/DOC_00_Index.md @@ -1,11 +1,11 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: INDEX -updated: "2026-04-23" +updated: "2026-04-24" route: - keywords: [api index, routing, FluentBundle, FluentLocalization, parse_ftl, FunctionRegistry, FrozenFluentError, introspection] - questions: ["where is a symbol documented?", "which file documents the runtime APIs?", "which file documents locale parsing and introspection APIs?", "where are syntax, parsing, and diagnostics references?"] + keywords: [api index, routing, FluentBundle, FluentLocalization, parse_ftl, FunctionRegistry, FrozenFluentError, introspection, detect_cycles, entry_dependency_set] + questions: ["where is a symbol documented?", "which file documents the runtime APIs?", "which file documents locale parsing, introspection, and analysis APIs?", "where are syntax, parsing, diagnostics, and dependency-graph references?"] --- # FTLLexEngine API Reference Index @@ -110,7 +110,9 @@ route: | `clear_module_caches` | [DOC_04_Runtime.md](DOC_04_Runtime.md) | `clear_module_caches` | | `CacheAuditLogEntry` | [DOC_04_Runtime.md](DOC_04_Runtime.md) | `CacheAuditLogEntry` | | `WriteLogEntry` | [DOC_04_Runtime.md](DOC_04_Runtime.md) | `WriteLogEntry` | -| `detect_cycles` | [DOC_04_RuntimeUtilities.md](DOC_04_RuntimeUtilities.md) | `detect_cycles` | +| `detect_cycles` | [DOC_04_Analysis.md](DOC_04_Analysis.md) | `detect_cycles` | +| `entry_dependency_set` | [DOC_04_Analysis.md](DOC_04_Analysis.md) | `entry_dependency_set` | +| `make_cycle_key` | [DOC_04_Analysis.md](DOC_04_Analysis.md) | `make_cycle_key` | | `normalize_locale` | [DOC_04_RuntimeUtilities.md](DOC_04_RuntimeUtilities.md) | `normalize_locale` | | `get_system_locale` | [DOC_04_RuntimeUtilities.md](DOC_04_RuntimeUtilities.md) | `get_system_locale` | | `require_locale_code` | [DOC_04_RuntimeUtilities.md](DOC_04_RuntimeUtilities.md) | `require_locale_code` | @@ -177,7 +179,16 @@ route: - [QUICK_REFERENCE.md](QUICK_REFERENCE.md) - [CUSTOM_FUNCTIONS_GUIDE.md](CUSTOM_FUNCTIONS_GUIDE.md) +- [DATA_INTEGRITY_ARCHITECTURE.md](DATA_INTEGRITY_ARCHITECTURE.md) +- [FUZZING_GUIDE.md](FUZZING_GUIDE.md) +- [FUZZING_GUIDE_ATHERIS.md](FUZZING_GUIDE_ATHERIS.md) +- [FUZZING_GUIDE_HYPOFUZZ.md](FUZZING_GUIDE_HYPOFUZZ.md) - [LOCALE_GUIDE.md](LOCALE_GUIDE.md) +- [MIGRATION.md](MIGRATION.md) - [PARSING_GUIDE.md](PARSING_GUIDE.md) - [RELEASE_PROTOCOL.md](RELEASE_PROTOCOL.md) +- [TERMINOLOGY.md](TERMINOLOGY.md) +- [THREAD_SAFETY.md](THREAD_SAFETY.md) +- [TYPE_HINTS_GUIDE.md](TYPE_HINTS_GUIDE.md) - [VALIDATION_GUIDE.md](VALIDATION_GUIDE.md) +- [WORKFLOW_TOUR.md](WORKFLOW_TOUR.md) diff --git a/docs/DOC_01_Core.md b/docs/DOC_01_Core.md index 4cb56584..5e995db8 100644 --- a/docs/DOC_01_Core.md +++ b/docs/DOC_01_Core.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: CORE -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [FluentBundle, AsyncFluentBundle, FluentLocalization, LocalizationBootConfig, PathResourceLoader, LoadSummary, ResourceLoadResult, LocalizationCacheStats, require_clean, get_load_summary] questions: ["how do I format messages?", "how do I load multiple locales?", "how do I inspect localization load results?", "how do I boot localization safely?"] diff --git a/docs/DOC_02_SyntaxExpressions.md b/docs/DOC_02_SyntaxExpressions.md index 9c52e6b5..8794fc72 100644 --- a/docs/DOC_02_SyntaxExpressions.md +++ b/docs/DOC_02_SyntaxExpressions.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: SYNTAX_EXPRESSIONS -updated: "2026-04-22" +updated: "2026-04-24" route: keywords: [TextElement, Placeable, SelectExpression, VariableReference, FunctionReference, Entry, Expression] questions: ["which AST node types model Fluent expressions and references?", "what public syntax union aliases exist?", "where are placeables and selectors documented?"] diff --git a/docs/DOC_02_SyntaxTypes.md b/docs/DOC_02_SyntaxTypes.md index 8e2a28fd..8aa521e9 100644 --- a/docs/DOC_02_SyntaxTypes.md +++ b/docs/DOC_02_SyntaxTypes.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: SYNTAX_TYPES -updated: "2026-04-22" +updated: "2026-04-24" route: keywords: [AST, Resource, Message, Term, Pattern, Span, Annotation, syntax nodes] questions: ["how is FTL represented in the AST?", "which public AST container and declaration node types exist?", "where are spans and parser annotations documented?"] diff --git a/docs/DOC_02_Types.md b/docs/DOC_02_Types.md index d554c3b5..32953a7d 100644 --- a/docs/DOC_02_Types.md +++ b/docs/DOC_02_Types.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: TYPES -updated: "2026-04-22" +updated: "2026-04-24" route: keywords: [FluentNumber, FluentValue, ParseResult, LocaleCode, CurrencyCode, TerritoryInfo, MessageIntrospection] questions: ["what public types does FTLLexEngine expose?", "what value types can formatting accept?", "which semantic aliases and lookup-result types exist?", "what introspection result types exist?"] diff --git a/docs/DOC_03_LocaleParsing.md b/docs/DOC_03_LocaleParsing.md index 04be3565..1afff2d9 100644 --- a/docs/DOC_03_LocaleParsing.md +++ b/docs/DOC_03_LocaleParsing.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: LOCALE_PARSING -updated: "2026-04-22" +updated: "2026-04-24" route: keywords: [parse_decimal, parse_fluent_number, parse_date, parse_datetime, parse_currency, is_valid_decimal, clear_date_caches] questions: ["how do I parse localized numbers and dates?", "what do the locale-aware parse helpers return?", "which parsing type guards and cache-clear helpers are public?"] diff --git a/docs/DOC_03_Parsing.md b/docs/DOC_03_Parsing.md index 0a2da68d..4ed740d8 100644 --- a/docs/DOC_03_Parsing.md +++ b/docs/DOC_03_Parsing.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: PARSING -updated: "2026-04-22" +updated: "2026-04-24" route: keywords: [parse_ftl, serialize_ftl, validate_resource, FluentParserV1, Cursor, ASTVisitor, ASTTransformer, ParseError] questions: ["how do I parse FTL?", "what does validate_resource return?", "what syntax traversal helpers are public?", "where is the syntax parser API documented?"] diff --git a/docs/DOC_04_Analysis.md b/docs/DOC_04_Analysis.md new file mode 100644 index 00000000..460f6799 --- /dev/null +++ b/docs/DOC_04_Analysis.md @@ -0,0 +1,88 @@ +--- +afad: "4.0" +version: "0.165.0" +domain: ANALYSIS +updated: "2026-04-24" +route: + keywords: [analysis, detect_cycles, entry_dependency_set, make_cycle_key, dependency graph, cycle key] + questions: ["where are the dependency-graph helpers documented?", "how do I detect cycles in an FTL dependency graph?", "how do I build namespace-prefixed dependency sets?"] +--- + +# Analysis Reference + +Availability note: +- Parser-only safe: all `ftllexengine.analysis` exports are available without Babel. +- Public module surface: `detect_cycles()`, `entry_dependency_set()`, and `make_cycle_key()`. + +--- + +## `detect_cycles` + +Function that finds cyclic paths in a dependency graph. + +### Signature +```python +def detect_cycles(dependencies: dict[str, set[str]]) -> list[list[str]]: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `dependencies` | Y | Adjacency mapping keyed by node id | + +### Constraints +- Return: List of cycle paths, each closed by repeating the start node at the end +- State: Pure +- Thread: Safe +- Bounds: Honors the module-level cycle-count and DFS-stack limits used by the analysis facade +- Compatibility: Public parser-only helper exposed from `ftllexengine.analysis` + +--- + +## `entry_dependency_set` + +Function that builds the canonical mixed message/term dependency set. + +### Signature +```python +def entry_dependency_set( + message_refs: frozenset[str], + term_refs: frozenset[str], +) -> frozenset[str]: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `message_refs` | Y | Unprefixed referenced message ids | +| `term_refs` | Y | Unprefixed referenced term ids | + +### Constraints +- Return: Immutable dependency set using `msg:` and `term:` namespace prefixes +- State: Pure +- Thread: Safe +- Purpose: Canonical public helper for callers that need the same dependency encoding used by runtime and validation internals +- Compatibility: Public parser-only helper exposed from `ftllexengine.analysis` + +--- + +## `make_cycle_key` + +Function that converts a cycle path into its canonical display key. + +### Signature +```python +def make_cycle_key(cycle: list[str] | tuple[str, ...]) -> str: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `cycle` | Y | Closed cycle path to canonicalize | + +### Constraints +- Return: Stable arrow-joined key such as `"msg:a -> term:b -> msg:a"` +- State: Pure +- Thread: Safe +- Purpose: Normalizes equivalent cycle rotations to the same display string +- Compatibility: Public parser-only helper exposed from `ftllexengine.analysis` diff --git a/docs/DOC_04_Introspection.md b/docs/DOC_04_Introspection.md index 98cb3f9b..09631016 100644 --- a/docs/DOC_04_Introspection.md +++ b/docs/DOC_04_Introspection.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: INTROSPECTION -updated: "2026-04-22" +updated: "2026-04-24" route: keywords: [introspection, validate_message_variables, extract_variables, extract_references, ISO 4217, ISO 3166, get_currency, get_territory] questions: ["how do I inspect a message's variables and references?", "which ISO lookup helpers exist?", "how do I validate message-variable schemas?", "which Babel-backed introspection helpers are public?"] diff --git a/docs/DOC_04_Runtime.md b/docs/DOC_04_Runtime.md index fe40c7fe..33495a0d 100644 --- a/docs/DOC_04_Runtime.md +++ b/docs/DOC_04_Runtime.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: RUNTIME -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [CacheConfig, FunctionRegistry, fluent_function, number_format, currency_format, select_plural_category, clear_module_caches] questions: ["how do I configure runtime formatting?", "how do custom functions and registries work?", "where are cache config and write-log entry types documented?"] @@ -18,6 +18,10 @@ Parser-only facade note: - `create_default_registry`, `get_shared_registry`, `number_format`, `datetime_format`, `currency_format`, `select_plural_category`, `FluentBundle`, and `AsyncFluentBundle` require the full runtime install and are absent from `ftllexengine.runtime` in parser-only installs. - `clear_module_caches()` is a root-level helper that works in both parser-only and full-runtime installs. +Facade ownership note: +- The stable contract is the facade import path (`ftllexengine.runtime`, plus the root and localization facades where noted), not the internal helper module that implements a detail today. +- Smaller internal runtime modules exist to keep cache, bundle, function-registry, and diagnostic responsibilities partitioned; callers should continue importing from the documented facades. + ## `CacheConfig` Dataclass that configures optional format-result caching. @@ -81,6 +85,7 @@ def fluent_function( ### Constraints - Purpose: Mark custom functions for locale injection behavior +- Ownership: attaches the locale-injection metadata that `FunctionRegistry` reads during registration - State: Pure decorator - Thread: Safe diff --git a/docs/DOC_04_RuntimeUtilities.md b/docs/DOC_04_RuntimeUtilities.md index 402fcbeb..2c8b73ab 100644 --- a/docs/DOC_04_RuntimeUtilities.md +++ b/docs/DOC_04_RuntimeUtilities.md @@ -1,38 +1,17 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: RUNTIME_UTILITIES -updated: "2026-04-23" +updated: "2026-04-24" route: - keywords: [detect_cycles, normalize_locale, get_system_locale, require_locale_code, __version__, require_date, require_datetime] - questions: ["where are runtime utility exports documented?", "what package metadata constants are public?", "which boundary validators and locale helpers are exported from the root package?"] + keywords: [normalize_locale, get_system_locale, require_locale_code, __version__, require_date, require_datetime, require_fluent_number] + questions: ["where are root-level runtime utility exports documented?", "what package metadata constants are public?", "which boundary validators and locale helpers are exported from the root package?"] --- # Runtime Utilities Reference This reference covers root-level runtime-adjacent utilities, package metadata constants, locale helpers, and boundary validators. -Formatting functions, registries, cache configuration, and audit entry types live in [DOC_04_Runtime.md](DOC_04_Runtime.md). - -## `detect_cycles` - -Function that detects cycles in a dependency graph. - -### Signature -```python -def detect_cycles(dependencies: dict[str, set[str]]) -> list[list[str]]: -``` - -### Parameters -| Name | Req | Semantics | -|:-----|:----|:----------| -| `dependencies` | Y | Graph adjacency mapping | - -### Constraints -- Return: Canonicalized cycle paths -- State: Pure -- Thread: Safe - ---- +Formatting functions, registries, cache configuration, and audit entry types live in [DOC_04_Runtime.md](DOC_04_Runtime.md). Dependency-graph helpers live in [DOC_04_Analysis.md](DOC_04_Analysis.md). ## `normalize_locale` diff --git a/docs/DOC_05_Diagnostics.md b/docs/DOC_05_Diagnostics.md index e7ed272c..d75acda6 100644 --- a/docs/DOC_05_Diagnostics.md +++ b/docs/DOC_05_Diagnostics.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: DIAGNOSTICS -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [ParserAnnotation, ValidationResult, ValidationError, ValidationWarning, DiagnosticCode, DiagnosticFormatter, OutputFormat, SourceSpan] questions: ["what validation result types exist?", "how do I format diagnostics output?", "where are diagnostic codes and source spans documented?"] diff --git a/docs/DOC_05_Errors.md b/docs/DOC_05_Errors.md index 646463e5..46e89368 100644 --- a/docs/DOC_05_Errors.md +++ b/docs/DOC_05_Errors.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: ERRORS -updated: "2026-04-22" +updated: "2026-04-24" route: keywords: [FrozenFluentError, ErrorCategory, FrozenErrorContext, DataIntegrityError, BabelImportError, ErrorTemplate] questions: ["what errors does FTLLexEngine expose?", "how do parse and format failures surface?", "what integrity exceptions exist?", "how does missing Babel surface?"] diff --git a/docs/DOC_06_Testing.md b/docs/DOC_06_Testing.md index c95d895f..955ac0b1 100644 --- a/docs/DOC_06_Testing.md +++ b/docs/DOC_06_Testing.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: TESTING -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [testing, lint, pytest, fuzz, HypoFuzz, Atheris, test.sh, lint.sh, check.sh] questions: ["how do I run lint and tests?", "what is the fuzz marker for?", "which scripts drive testing?"] diff --git a/docs/FUZZING_GUIDE.md b/docs/FUZZING_GUIDE.md index 42e02d5d..0ed82cc9 100644 --- a/docs/FUZZING_GUIDE.md +++ b/docs/FUZZING_GUIDE.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: FUZZING -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [fuzzing, HypoFuzz, Atheris, Hypothesis, fuzz_hypofuzz.sh, fuzz_atheris.sh] questions: ["which fuzzer should I use?", "how do I start fuzzing?", "how do I reproduce a fuzz failure?"] diff --git a/docs/FUZZING_GUIDE_ATHERIS.md b/docs/FUZZING_GUIDE_ATHERIS.md index 8bf7b1b6..998f8fb7 100644 --- a/docs/FUZZING_GUIDE_ATHERIS.md +++ b/docs/FUZZING_GUIDE_ATHERIS.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: FUZZING -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [atheris, libfuzzer, fuzz_atheris.sh, replay, minimize, corpus] questions: ["how do I run an Atheris target?", "how do I replay a finding?", "how does the Atheris environment get created?"] diff --git a/docs/FUZZING_GUIDE_HYPOFUZZ.md b/docs/FUZZING_GUIDE_HYPOFUZZ.md index 75821607..83f0b148 100644 --- a/docs/FUZZING_GUIDE_HYPOFUZZ.md +++ b/docs/FUZZING_GUIDE_HYPOFUZZ.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: FUZZING -updated: "2026-04-22" +updated: "2026-04-24" route: keywords: [hypofuzz, hypothesis, fuzz_hypofuzz.sh, deep mode, preflight, repro] questions: ["how do I run HypoFuzz?", "what does --deep do?", "how do I reproduce a Hypothesis failure?"] diff --git a/docs/LOCALE_GUIDE.md b/docs/LOCALE_GUIDE.md index a1c32520..2280f5e1 100644 --- a/docs/LOCALE_GUIDE.md +++ b/docs/LOCALE_GUIDE.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: LOCALE -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [locale, NUMBER, DATETIME, CURRENCY, normalize_locale, get_system_locale, use_isolating] questions: ["why did my number not format?", "what locale string should I use?", "what does use_isolating do?"] diff --git a/docs/MIGRATION.md b/docs/MIGRATION.md index df2f3fc8..709b0459 100644 --- a/docs/MIGRATION.md +++ b/docs/MIGRATION.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: MIGRATION -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [migration, fluent.runtime, FluentBundle, FluentLocalization, strict mode] questions: ["how do I migrate from fluent.runtime?", "what changes when I switch to FTLLexEngine?"] diff --git a/docs/PARSING_GUIDE.md b/docs/PARSING_GUIDE.md index ee560114..f17aa47a 100644 --- a/docs/PARSING_GUIDE.md +++ b/docs/PARSING_GUIDE.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: PARSING -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [parsing, parse_decimal, parse_currency, parse_date, parse_datetime, parse_fluent_number] questions: ["how do I parse localized user input?", "how do I do roundtrip formatting and parsing?", "what do parse errors look like?"] diff --git a/docs/QUICK_REFERENCE.md b/docs/QUICK_REFERENCE.md index fca8de48..0e939394 100644 --- a/docs/QUICK_REFERENCE.md +++ b/docs/QUICK_REFERENCE.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: REFERENCE -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [quick reference, cheat sheet, fluentbundle, fluentlocalization, parsing, validation, boot] questions: ["show me the common commands", "what is the smallest working example?", "how do I boot localization safely?"] diff --git a/docs/RELEASE_PROTOCOL.md b/docs/RELEASE_PROTOCOL.md index 092d1965..4609bec5 100644 --- a/docs/RELEASE_PROTOCOL.md +++ b/docs/RELEASE_PROTOCOL.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: RELEASE -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [release, gh, github release, pypi, tag, assets, publish, verify, worktree, main] questions: ["how do I cut a release?", "how do I publish GitHub assets?", "how do I verify a release handoff?", "how do I rerun publish for an existing tag?"] @@ -116,6 +116,21 @@ PY_VERSION=3.14 ./scripts/test.sh uv run python scripts/validate_docs.py uv run python scripts/validate_version.py uv build +tar -tzf "dist/ftllexengine-X.Y.Z.tar.gz" | rg '(^|/)AGENTS\\.md$|(^|/)\\.codex/' || true +python - <<'PY' +import zipfile +from pathlib import Path + +wheel = Path("dist/ftllexengine-X.Y.Z-py3-none-any.whl") +with zipfile.ZipFile(wheel) as zf: + leaked = [ + name + for name in zf.namelist() + if name.endswith("AGENTS.md") or name.startswith(".codex/") or "/.codex/" in name + ] +if leaked: + raise SystemExit(f"wheel leaked repository-only files: {leaked}") +PY ``` Also confirm: @@ -124,6 +139,9 @@ Also confirm: - `pyproject.toml` has the final target version. - all version-carrying metadata that ships with the repo (for example markdown frontmatter and `uv.lock`) is synchronized to that target version. +- built distributions exclude repository-only guidance files that are intentionally committed for + repo use but must never ship in public artifacts. For this repository, `AGENTS.md` and `/.codex` + must be absent from both the sdist and wheel. - the release checkout is based on current `origin/main` or you explicitly understand the delta. Do not cut the release branch or tag anything while any gate is red. diff --git a/docs/TERMINOLOGY.md b/docs/TERMINOLOGY.md index 3342a4fa..fd70ebbc 100644 --- a/docs/TERMINOLOGY.md +++ b/docs/TERMINOLOGY.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: TERMINOLOGY -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [terminology, glossary, message, term, resource, locale code, strict mode] questions: ["what does resource mean here?", "what is the difference between a message and a term?", "what does strict mode mean in FTLLexEngine?"] diff --git a/docs/THREAD_SAFETY.md b/docs/THREAD_SAFETY.md index 536063ce..a6d27431 100644 --- a/docs/THREAD_SAFETY.md +++ b/docs/THREAD_SAFETY.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: ARCHITECTURE -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [thread safety, concurrency, FluentBundle, FluentLocalization, AsyncFluentBundle, shared bundle] questions: ["is FluentBundle thread-safe?", "can I share a localization object across threads?", "what does AsyncFluentBundle do?"] @@ -17,11 +17,14 @@ route: `FluentBundle` and `FluentLocalization` are designed for concurrent use. Read operations can run concurrently, while resource and function mutations take exclusive access internally. Callers do not need to provide their own external lock around normal formatting calls. +These guarantees come from the runtime's own synchronization boundaries. They are not documented as a CPython-only or GIL-dependent property. + ## Practical Rules - Share a `FluentBundle` across threads when all requests use the same locale. - Share a `FluentLocalization` across threads when the locale fallback chain is fixed. - Use `AsyncFluentBundle` in asyncio handlers when you want bundle work offloaded through `asyncio.to_thread()`. +- Treat custom functions as external code: if they share mutable process state outside the bundle, that state still needs its own synchronization. - Do not try to mutate a bundle from inside a custom function triggered by that same bundle’s formatting call. ## Async diff --git a/docs/TYPE_HINTS_GUIDE.md b/docs/TYPE_HINTS_GUIDE.md index 1dee6daa..df517597 100644 --- a/docs/TYPE_HINTS_GUIDE.md +++ b/docs/TYPE_HINTS_GUIDE.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: TYPE_HINTS -updated: "2026-04-22" +updated: "2026-04-24" route: keywords: [type hints, mypy, FluentValue, ParseResult, TypeIs, LocaleCode] questions: ["what types does the library expose?", "how do I type parse results?", "which helpers are type guards?"] diff --git a/docs/VALIDATION_GUIDE.md b/docs/VALIDATION_GUIDE.md index f0549226..364ce5c7 100644 --- a/docs/VALIDATION_GUIDE.md +++ b/docs/VALIDATION_GUIDE.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: VALIDATION -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [validation, validate_resource, ValidationResult, require_clean, boot validation, message schemas] questions: ["how do I validate FTL before loading it?", "how do I fail fast at startup?", "how do I validate message variables?"] diff --git a/docs/WORKFLOW_TOUR.md b/docs/WORKFLOW_TOUR.md new file mode 100644 index 00000000..5a2ee3a3 --- /dev/null +++ b/docs/WORKFLOW_TOUR.md @@ -0,0 +1,178 @@ +--- +afad: "4.0" +version: "0.165.0" +domain: GUIDE +updated: "2026-04-24" +route: + keywords: [workflow tour, deeper readme material, multi-locale, streaming resources, async bundle, boot validation, introspection] + questions: ["where did the deeper README workflows move?", "how do I see FTLLexEngine end-to-end workflows?", "which docs cover streaming, async, and boot validation together?"] +--- + +# FTLLexEngine Workflow Tour + +**Purpose**: Preserve the deeper workflows that do not belong in the storefront `README.md` while keeping them easy to find and grounded in runnable examples. +**Prerequisites**: Full runtime install (`ftllexengine[babel]`) for formatting, localization, localized parsing, and ISO metadata lookups. + +## Overview + +The root `README.md` is the front window: short promise, shortest credible example, and quick next steps. This guide keeps the richer material that matters once you want to evaluate FTLLexEngine as a real working stack instead of a headline. + +The library is strongest when you need one coherent path for: + +- formatting Fluent messages with locale-aware numbers, dates, and currency, +- parsing localized user input back into exact Python values, +- validating resources before traffic, +- and keeping those operations safe in threaded or asyncio applications. + +## Where The Deeper Material Lives + +| Topic moved out of the storefront | Best current home | +|:----------------------------------|:------------------| +| Smallest working setup | [QUICK_REFERENCE.md](QUICK_REFERENCE.md) | +| Multi-locale fallback chains | [examples/locale_fallback.py](../examples/locale_fallback.py) and [LOCALE_GUIDE.md](LOCALE_GUIDE.md) | +| Parsing localized input | [PARSING_GUIDE.md](PARSING_GUIDE.md) and [examples/bidirectional_formatting.py](../examples/bidirectional_formatting.py) | +| Thread-safe shared bundles | [THREAD_SAFETY.md](THREAD_SAFETY.md) and [examples/thread_safety.py](../examples/thread_safety.py) | +| Async applications | [examples/async_bundle.py](../examples/async_bundle.py) | +| Streaming resource loading | [examples/streaming_resources.py](../examples/streaming_resources.py) and [DOC_03_Parsing.md](DOC_03_Parsing.md) | +| Message introspection | [examples/parser_only.py](../examples/parser_only.py) and [DOC_04_Introspection.md](DOC_04_Introspection.md) | +| Startup and schema validation | [VALIDATION_GUIDE.md](VALIDATION_GUIDE.md) and [QUICK_REFERENCE.md](QUICK_REFERENCE.md) | +| Currency and territory metadata | [DOC_04_Introspection.md](DOC_04_Introspection.md) | +| Symbol-by-symbol API routing | [DOC_00_Index.md](DOC_00_Index.md) | + +## One Runtime For Format And Parse + +The core value proposition from the old root README still stands: the same locale theory can format outbound text and parse inbound user input, so the invoice you emit and the reply you accept do not drift into separate rule systems. + +- For the fastest copy-paste path, use [QUICK_REFERENCE.md](QUICK_REFERENCE.md). +- For a fuller parsing walkthrough, use [PARSING_GUIDE.md](PARSING_GUIDE.md). +- For runnable end-to-end examples, use [examples/quickstart.py](../examples/quickstart.py) and [examples/bidirectional_formatting.py](../examples/bidirectional_formatting.py). + +## Stream Resources Without Building One Giant String + +`add_resource_stream()` and `parse_stream_ftl()` let you work from line iterators instead of pre-assembling the entire source in memory first. + +```python +from pathlib import Path +from tempfile import TemporaryDirectory + +from ftllexengine import FluentBundle, parse_stream_ftl + +with TemporaryDirectory() as tmp: + source_path = Path(tmp) / "messages.ftl" + source_path.write_text( + "hello = Hello from orbit\n" + "status = Cargo ready\n", + encoding="utf-8", + ) + + bundle = FluentBundle("en_US", use_isolating=False) + with source_path.open(encoding="utf-8") as handle: + junk = bundle.add_resource_stream(handle, source_path=str(source_path)) + assert junk == () + + status, errors = bundle.format_pattern("status") + assert errors == () + assert status == "Cargo ready" + + with source_path.open(encoding="utf-8") as handle: + entry_ids = [entry.id.name for entry in parse_stream_ftl(handle)] + assert entry_ids == ["hello", "status"] +``` + +For a runnable script that also shows streamed localization loads, use [examples/streaming_resources.py](../examples/streaming_resources.py). + +## Use Async Bundles In Event-Loop Applications + +`AsyncFluentBundle` keeps the Fluent runtime behavior but offloads mutation and formatting work through `asyncio.to_thread()`, which is the right fit when your application is already organized around async request handling. + +```python +import asyncio +from decimal import Decimal + +from ftllexengine import AsyncFluentBundle + + +async def main() -> None: + async with AsyncFluentBundle("en_US", use_isolating=False) as bundle: + await bundle.add_resource( + 'price = Total: { CURRENCY($amount, currency: "USD") }\n' + "counter = Count: { $n }" + ) + + price, errors = await bundle.format_pattern("price", {"amount": Decimal("99.99")}) + assert errors == () + assert price == "Total: $99.99" + + results = await asyncio.gather( + *(bundle.format_pattern("counter", {"n": i}) for i in range(3)) + ) + assert [text for text, _ in results] == ["Count: 0", "Count: 1", "Count: 2"] + + +asyncio.run(main()) +``` + +For a fuller runnable script, use [examples/async_bundle.py](../examples/async_bundle.py). + +## Introspect Message Contracts Before Formatting + +The message-introspection APIs are the pre-flight surface: inspect required variables and called functions before a live format call, or use the same metadata to generate forms, validation rules, or build-time checks. + +```python +from ftllexengine import FluentBundle + +bundle = FluentBundle("en_US", use_isolating=False) +bundle.add_resource( + 'contract = { $buyer } pays { CURRENCY($amount, currency: "USD") } on { DATETIME($ship_date) }' +) + +info = bundle.introspect_message("contract") +assert info.get_variable_names() == frozenset({"buyer", "amount", "ship_date"}) +assert info.get_function_names() == frozenset({"CURRENCY", "DATETIME"}) +assert info.has_selectors is False +``` + +If you only need parsing, validation, and introspection without the Babel-backed runtime, start with [examples/parser_only.py](../examples/parser_only.py). + +## Validate Before Traffic + +The fail-fast startup path also remains important. `LocalizationBootConfig.boot()` is the canonical way to prove that required resources loaded cleanly and that required message contracts exist before the application starts serving requests. + +- Use [VALIDATION_GUIDE.md](VALIDATION_GUIDE.md) for the startup pattern. +- Use [DATA_INTEGRITY_ARCHITECTURE.md](DATA_INTEGRITY_ARCHITECTURE.md) for the underlying fail-fast model. +- Use [QUICK_REFERENCE.md](QUICK_REFERENCE.md) for the shortest runnable boot snippet. + +## Query Territory And Currency Metadata + +The ISO and CLDR-backed helper layer stays useful when product decisions depend on territory defaults or currency precision. + +```python +from ftllexengine.introspection import get_currency, get_territory_currencies + +assert get_territory_currencies("JP") == ("JPY",) + +yen = get_currency("JPY") +assert yen is not None +assert yen.decimal_digits == 0 +``` + +For the full set of helpers, use [DOC_04_Introspection.md](DOC_04_Introspection.md). + +## Surface Map + +| Surface | Use it for | Install mode | +|:--------|:-----------|:-------------| +| Syntax and validation | Parse, transform, serialize, and validate `.ftl` resources | Parser-only | +| Runtime | `FluentBundle`, built-in functions, locale-aware formatting | Full runtime | +| Localization | `FluentLocalization`, fallback chains, loaders, boot validation | Mixed | +| Parsing | Localized numbers, dates, datetimes, and currency back to Python values | Full runtime | +| Introspection and analysis | Message variables, references, dependency graphs, ISO helpers | Mixed | +| Diagnostics and integrity | Structured errors, strict mode, audit evidence, immutable failure data | Parser-only | + +Use [DOC_00_Index.md](DOC_00_Index.md) when you need the exact symbol home instead of the high-level subsystem map. + +## Good Fit Versus Simpler Fit + +- Strong fit: Fluent-based applications, invoice and checkout flows, localized forms, startup validation for translation packs, and systems that care about exact decimals. +- Strong fit: Teams that want message grammar, formatting rules, parsing rules, and startup checks to stay in one coherent runtime instead of drifting between template helpers and request-time patches. +- Simpler fit: single-locale applications, plain string formatting, or projects that do not need Fluent resources at all. diff --git a/examples/README.md b/examples/README.md index e05690c1..e34d369e 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: EXAMPLES -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [examples, quickstart, parser-only, localization, custom functions, thread safety, benchmarks] questions: ["what examples are available?", "how do I run the examples?", "which example should I start with?"] @@ -37,10 +37,12 @@ uv run --python 3.13 python scripts/run_examples.py | `parser_only.py` | Parser-only install surface: zero-dependency helper facades, parse, validate, inspect, serialize | | `locale_fallback.py` | `FluentLocalization`, fallback chains, disk and custom loaders | | `bidirectional_formatting.py` | Locale-aware parsing for numbers, dates, currency | +| `async_bundle.py` | `AsyncFluentBundle`, concurrent formatting, streamed loads in asyncio apps | | `custom_functions.py` | `FunctionRegistry`, `bundle.add_function()`, `@fluent_function` | | `function_introspection.py` | Introspection APIs and function metadata | | `ftl_transform.py` | AST transforms and serialization | | `ftl_linter.py` | Validation and custom lint-style checks | +| `streaming_resources.py` | `add_resource_stream()`, `parse_stream_ftl()`, streamed localization loads | | `thread_safety.py` | Shared bundle and task-local patterns | | `property_based_testing.py` | Hypothesis-oriented usage examples | | `benchmark_loaders.py` | Loader micro-benchmarks | @@ -51,6 +53,8 @@ uv run --python 3.13 python scripts/run_examples.py - Working in a parser-only install: start with `examples/parser_only.py`. - Building a multi-locale app: use `examples/locale_fallback.py`. - Accepting localized user input: use `examples/bidirectional_formatting.py`. +- Building asyncio handlers: use `examples/async_bundle.py`. +- Loading large or streamed FTL resources: use `examples/streaming_resources.py`. ## Type Checking diff --git a/examples/README_TYPE_CHECKING.md b/examples/README_TYPE_CHECKING.md index 389d3de8..cf675031 100644 --- a/examples/README_TYPE_CHECKING.md +++ b/examples/README_TYPE_CHECKING.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: EXAMPLES -updated: "2026-04-22" +updated: "2026-04-24" route: keywords: [examples, mypy, type checking, strict, explicit ownership, thread safety] questions: ["how do I type-check the examples?", "what mypy config do the examples use?", "how do the examples stay strict without local stubs?"] diff --git a/examples/async_bundle.py b/examples/async_bundle.py new file mode 100644 index 00000000..dd68dd1b --- /dev/null +++ b/examples/async_bundle.py @@ -0,0 +1,83 @@ +"""AsyncFluentBundle examples for asyncio applications. + +Demonstrates: + +1. Async context-manager usage +2. Concurrent ``format_pattern()`` calls +3. ``add_resource_stream()`` in async code + +Python 3.13+. +""" + +from __future__ import annotations + +import asyncio +from decimal import Decimal + +from ftllexengine import AsyncFluentBundle + + +async def example_async_context_manager() -> None: + """Format a currency message through the async wrapper.""" + print("=" * 68) + print("Example 1: Async context manager") + print("=" * 68) + + async with AsyncFluentBundle("en_US", use_isolating=False) as bundle: + await bundle.add_resource('price = Total: { CURRENCY($amount, currency: "USD") }') + result, errors = await bundle.format_pattern("price", {"amount": Decimal("99.99")}) + assert errors == () + assert result == "Total: $99.99" + print(f"[OK] Formatted price: {result}") + + print("[PASS] Async context-manager formatting works") + + +async def example_concurrent_formatting() -> None: + """Show concurrent async formatting against one shared bundle.""" + print("\n" + "=" * 68) + print("Example 2: Concurrent async formatting") + print("=" * 68) + + bundle = AsyncFluentBundle("en_US", use_isolating=False) + await bundle.add_resource("counter = Count: { $n }") + + results = await asyncio.gather( + *(bundle.format_pattern("counter", {"n": i}) for i in range(5)) + ) + texts = [text for text, errors in results if errors == ()] + assert texts == [f"Count: {i}" for i in range(5)] + print(f"[OK] Concurrent results: {texts}") + + print("[PASS] Concurrent async formatting works") + + +async def example_stream_loading() -> None: + """Load streamed FTL lines through the async wrapper.""" + print("\n" + "=" * 68) + print("Example 3: Async add_resource_stream") + print("=" * 68) + + bundle = AsyncFluentBundle("en_US", use_isolating=False) + junk = await bundle.add_resource_stream(["hello = Hello!\n", "status = Ready\n"]) + assert junk == () + assert bundle.has_message("hello") + + status, errors = await bundle.format_pattern("status") + assert errors == () + assert status == "Ready" + print(f"[OK] Stream-loaded status: {status}") + + print("[PASS] Async stream loading works") + + +async def main() -> None: + """Run all async examples.""" + await example_async_context_manager() + await example_concurrent_formatting() + await example_stream_loading() + print("\n[SUCCESS] Async bundle examples complete!") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/streaming_resources.py b/examples/streaming_resources.py new file mode 100644 index 00000000..5a5014f8 --- /dev/null +++ b/examples/streaming_resources.py @@ -0,0 +1,92 @@ +"""Streaming Resource Examples. + +Demonstrates the line-oriented resource APIs that avoid requiring one +pre-assembled source string before parsing: + +1. ``FluentBundle.add_resource_stream()`` +2. ``parse_stream_ftl()`` +3. ``FluentLocalization.add_resource_stream()`` + +Python 3.13+. +""" + +from __future__ import annotations + +from pathlib import Path +from tempfile import TemporaryDirectory + +from ftllexengine import FluentBundle, FluentLocalization, parse_stream_ftl +from ftllexengine.localization import PathResourceLoader + + +def example_bundle_stream_load() -> None: + """Load one bundle from a file handle and parse the same stream directly.""" + print("=" * 68) + print("Example 1: FluentBundle.add_resource_stream") + print("=" * 68) + + with TemporaryDirectory() as tmp: + source_path = Path(tmp) / "messages.ftl" + source_path.write_text( + "hello = Hello from orbit\n" + "status = Cargo ready\n", + encoding="utf-8", + ) + + bundle = FluentBundle("en_US", use_isolating=False) + with source_path.open(encoding="utf-8") as handle: + junk = bundle.add_resource_stream(handle, source_path=str(source_path)) + assert junk == () + + status, errors = bundle.format_pattern("status") + assert errors == () + assert status == "Cargo ready" + print(f"[OK] Formatted status: {status}") + + with source_path.open(encoding="utf-8") as handle: + entry_ids = [ + entry.id.name + for entry in parse_stream_ftl(handle) + if hasattr(entry, "id") + ] + assert entry_ids == ["hello", "status"] + print(f"[OK] Parsed entries: {entry_ids}") + + print("[PASS] Bundle stream loading works") + + +def example_localization_stream_load() -> None: + """Add a streamed resource to one locale inside a fallback chain.""" + print("\n" + "=" * 68) + print("Example 2: FluentLocalization.add_resource_stream") + print("=" * 68) + + with TemporaryDirectory() as tmp: + base = Path(tmp) / "locales" + (base / "de_de").mkdir(parents=True) + (base / "en_us").mkdir(parents=True) + (base / "de_de" / "messages.ftl").write_text("hello = Hallo\n", encoding="utf-8") + (base / "en_us" / "messages.ftl").write_text("hello = Hello\n", encoding="utf-8") + + extra = Path(tmp) / "extra_de.ftl" + extra.write_text("shipment = Zusatzdatei\n", encoding="utf-8") + + loader = PathResourceLoader(str(base / "{locale}")) + l10n = FluentLocalization(["de_DE", "en_US"], ["messages.ftl"], loader) + + with extra.open(encoding="utf-8") as handle: + junk = l10n.add_resource_stream("de_DE", handle, source_path=str(extra)) + assert junk == () + + shipment, errors = l10n.format_value("shipment") + assert errors == () + assert shipment == "Zusatzdatei" + print(f"[OK] Localized streamed message: {shipment}") + + print("[PASS] Localization stream loading works") + + +if __name__ == "__main__": + example_bundle_stream_load() + example_localization_stream_load() + print("\n[SUCCESS] Streaming resource examples complete!") diff --git a/fuzz_atheris/README.md b/fuzz_atheris/README.md index 35feb829..f616f585 100644 --- a/fuzz_atheris/README.md +++ b/fuzz_atheris/README.md @@ -1,8 +1,8 @@ --- -afad: "3.5" -version: "0.164.0" +afad: "4.0" +version: "0.165.0" domain: FUZZING -updated: "2026-04-23" +updated: "2026-04-24" route: keywords: [atheris, fuzz inventory, fuzz targets, libfuzzer, corpus] questions: ["what do the Atheris fuzzers cover?", "which targets exist?", "how do I map a target name to a file?"] diff --git a/pyproject.toml b/pyproject.toml index 1a79e31f..24cc4345 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ python_exec_globs = [ "docs/QUICK_REFERENCE.md", "docs/TYPE_HINTS_GUIDE.md", "docs/VALIDATION_GUIDE.md", + "docs/WORKFLOW_TOUR.md", ] # Substrings that trigger skipping a specific FTL code block skip_markers = [ @@ -52,7 +53,7 @@ parser_path = "ftllexengine.syntax.parser:FluentParserV1" [project] name = "ftllexengine" -version = "0.164.0" +version = "0.165.0" description = "Python runtime for the Fluent (FTL) specification: bidirectional parsing, CLDR-backed locale-aware formatting, and fail-fast boot validation with structured audit evidence." readme = "README.md" requires-python = ">=3.13" @@ -162,6 +163,8 @@ license-files = ["LICENSE", "NOTICE", "PATENTS.md"] # - Working examples (/examples) - educational value and learning resources # - User documentation (*.md) - offline reference and API docs # - License files (LICENSE, NOTICE, PATENTS.md) - legal compliance +# Intentionally excludes repository-only agent guidance (`AGENTS.md`, `/.codex`) +# even when those files are committed to git # # IMPORTANT: Hatch uses VCS (git) mode by default and respects git tracking # Files must be committed to git to be included in sdist @@ -182,6 +185,7 @@ only-include = [ [tool.hatch.build.targets.wheel] # Wheel contains ONLY runtime code (src/ftllexengine/) # Automatically excludes: tests, examples, docs +# Also excludes repository-only agent guidance such as `AGENTS.md` and `/.codex/` # Hatch auto-detects packages in src-layout (src/ftllexengine/) # No explicit configuration needed - hatch finds src/ftllexengine/ automatically diff --git a/scripts/run_examples.py b/scripts/run_examples.py index 6703167c..a7acf581 100644 --- a/scripts/run_examples.py +++ b/scripts/run_examples.py @@ -47,6 +47,7 @@ def _validator(stdout: str) -> str | None: EXAMPLE_CONTRACTS: dict[str, ExampleContract] = { + "async_bundle.py": _require_output_markers("[SUCCESS] Async bundle examples complete!"), "benchmark_loaders.py": _require_output_markers("[OK] Benchmarks complete!"), "bidirectional_formatting.py": _require_output_markers("All examples completed!"), "custom_functions.py": _require_output_markers( @@ -77,6 +78,9 @@ def _validator(stdout: str) -> str | None: "quickstart.py": _require_output_markers( "[SUCCESS] All examples completed successfully!" ), + "streaming_resources.py": _require_output_markers( + "[SUCCESS] Streaming resource examples complete!" + ), "thread_safety.py": _require_output_markers( "[SUCCESS] All thread safety examples complete!" ), diff --git a/src/ftllexengine/__init__.py b/src/ftllexengine/__init__.py index 3e21707c..efe4e960 100644 --- a/src/ftllexengine/__init__.py +++ b/src/ftllexengine/__init__.py @@ -112,10 +112,9 @@ from typing import TYPE_CHECKING from ._optional_exports import ( - ROOT_BABEL_OPTIONAL_ATTRS as _BABEL_OPTIONAL_ATTRS, -) -from ._optional_exports import ( - load_root_babel_optional_exports, + babel_optional_attr_set, + babel_optional_attr_tuple, + load_babel_optional_export, raise_missing_babel_symbol, ) from .analysis import detect_cycles @@ -130,10 +129,6 @@ require_datetime, require_fluent_number, ) - -# Error types must load before core to avoid circular import: -# diagnostics -> validation -> syntax.ast -> syntax.__init__ -> serializer -> core.depth_guard -# depth_guard imports from diagnostics; diagnostics must be in sys.modules first. from .diagnostics import ( ErrorCategory, FrozenErrorContext, @@ -179,17 +174,33 @@ from .validation import validate_resource if TYPE_CHECKING: - from .localization import FluentLocalization, LocalizationBootConfig, LocalizationCacheStats - from .runtime import AsyncFluentBundle, FluentBundle + from .localization import ( + FluentLocalization as FluentLocalization, + ) + from .localization import ( + LocalizationBootConfig as LocalizationBootConfig, + ) + from .localization import ( + LocalizationCacheStats as LocalizationCacheStats, + ) + from .runtime import ( + AsyncFluentBundle as AsyncFluentBundle, + ) + from .runtime import ( + FluentBundle as FluentBundle, + ) _BABEL_AVAILABLE = is_babel_available() - -if _BABEL_AVAILABLE: - globals().update(load_root_babel_optional_exports()) +_BABEL_OPTIONAL_ATTRS = babel_optional_attr_set(__name__) +_BABEL_OPTIONAL_NAMES = babel_optional_attr_tuple(__name__) def __getattr__(name: str) -> object: """Provide a helpful missing-symbol error for Babel-backed facade symbols.""" + if _BABEL_AVAILABLE and name in _BABEL_OPTIONAL_ATTRS: + value = load_babel_optional_export(__name__, name) + globals()[name] = value + return value return raise_missing_babel_symbol( module_name=__name__, name=name, @@ -218,20 +229,14 @@ def __getattr__(name: str) -> object: # Encoding requirements per Fluent spec recommendations.md __recommended_encoding__ = "UTF-8" # Per spec: "The recommended encoding for Fluent files is UTF-8" -# ruff: noqa: RUF022 - __all__ organized by category for readability, not alphabetically -__all__ = [ - # Babel-backed facades - "AsyncFluentBundle", +# ruff: noqa: RUF022 - grouped public exports are easier to audit by contract area +__all__: list[str] = [ # Runtime helpers and localization loading (parser-only safe) "CacheConfig", "FallbackInfo", - "FluentBundle", "FluentNumber", - "FluentLocalization", "FluentValue", "LoadSummary", - "LocalizationBootConfig", - "LocalizationCacheStats", "PathResourceLoader", "ResourceLoadResult", "ResourceLoader", @@ -296,6 +301,7 @@ def __getattr__(name: str) -> object: "__spec_url__", "__version__", ] +__all__[0:0] = list(_BABEL_OPTIONAL_NAMES) if not _BABEL_AVAILABLE: __all__ = [name for name in __all__ if name not in _BABEL_OPTIONAL_ATTRS] diff --git a/src/ftllexengine/__init__.pyi b/src/ftllexengine/__init__.pyi index bdc7966d..75f6b7d7 100644 --- a/src/ftllexengine/__init__.pyi +++ b/src/ftllexengine/__init__.pyi @@ -184,10 +184,4 @@ __all__: list[str] = [ "__recommended_encoding__", "__spec_url__", "__version__", - # Babel-backed facades - "AsyncFluentBundle", - "FluentBundle", - "FluentLocalization", - "LocalizationBootConfig", - "LocalizationCacheStats", ] diff --git a/src/ftllexengine/_optional_exports.py b/src/ftllexengine/_optional_exports.py index 4e4d6354..059bb982 100644 --- a/src/ftllexengine/_optional_exports.py +++ b/src/ftllexengine/_optional_exports.py @@ -1,111 +1,152 @@ -"""Helpers for Babel-backed facade exports. +"""Canonical owner for Babel-backed facade exports. -Centralizes facade wiring for symbols that genuinely require the Babel-enabled -runtime at import time. Zero-dependency symbols should be imported directly by -their facade modules instead of being routed through this helper. +Facade modules derive their optional ``__all__`` entries, lazy attribute +resolution, and parser-only diagnostics from the definitions in this module. +Zero-dependency symbols should be imported directly by their facades instead of +being routed through this helper. """ from __future__ import annotations +from dataclasses import dataclass +from importlib import import_module from typing import NoReturn -ROOT_BABEL_OPTIONAL_ATTRS: frozenset[str] = frozenset({ - "AsyncFluentBundle", - "FluentBundle", - "FluentLocalization", - "LocalizationBootConfig", - "LocalizationCacheStats", -}) - -LOCALIZATION_BABEL_OPTIONAL_ATTRS: frozenset[str] = frozenset({ - "FluentLocalization", - "LocalizationBootConfig", - "LocalizationCacheStats", -}) - -RUNTIME_BABEL_OPTIONAL_ATTRS: frozenset[str] = frozenset({ - "AsyncFluentBundle", - "create_default_registry", - "currency_format", - "datetime_format", - "FluentBundle", - "get_shared_registry", - "number_format", - "select_plural_category", -}) - - -def load_root_babel_optional_exports() -> dict[str, object]: - """Return root-facade exports that require the Babel-enabled runtime.""" - from .localization.boot import ( # noqa: PLC0415 - intentionally deferred optional import - LocalizationBootConfig, - ) - from .localization.orchestrator import ( # noqa: PLC0415 - intentionally deferred optional import - FluentLocalization, - LocalizationCacheStats, - ) - from .runtime.async_bundle import ( # noqa: PLC0415 - intentionally deferred optional import - AsyncFluentBundle, - ) - from .runtime.bundle import ( # noqa: PLC0415 - intentionally deferred optional import - FluentBundle, - ) - - return { - "AsyncFluentBundle": AsyncFluentBundle, - "FluentBundle": FluentBundle, - "FluentLocalization": FluentLocalization, - "LocalizationBootConfig": LocalizationBootConfig, - "LocalizationCacheStats": LocalizationCacheStats, - } - - -def load_localization_babel_optional_exports() -> dict[str, object]: - """Return localization-facade exports that require the Babel runtime.""" - from ftllexengine.localization.boot import ( # noqa: PLC0415 - intentionally deferred optional import - LocalizationBootConfig, - ) - from ftllexengine.localization.orchestrator import ( # noqa: PLC0415 - intentionally deferred optional import - FluentLocalization, - LocalizationCacheStats, - ) - - return { - "FluentLocalization": FluentLocalization, - "LocalizationBootConfig": LocalizationBootConfig, - "LocalizationCacheStats": LocalizationCacheStats, - } - - -def load_runtime_babel_optional_exports() -> dict[str, object]: - """Return runtime-facade exports that require the Babel runtime.""" - from .runtime.async_bundle import ( # noqa: PLC0415 - intentionally deferred optional import - AsyncFluentBundle, - ) - from .runtime.bundle import ( # noqa: PLC0415 - intentionally deferred optional import - FluentBundle, - ) - from .runtime.functions import ( # noqa: PLC0415 - intentionally deferred optional import - create_default_registry, - currency_format, - datetime_format, - get_shared_registry, - number_format, - ) - from .runtime.plural_rules import ( # noqa: PLC0415 - intentionally deferred optional import - select_plural_category, - ) - - return { - "AsyncFluentBundle": AsyncFluentBundle, - "create_default_registry": create_default_registry, - "currency_format": currency_format, - "datetime_format": datetime_format, - "FluentBundle": FluentBundle, - "get_shared_registry": get_shared_registry, - "number_format": number_format, - "select_plural_category": select_plural_category, - } +__all__ = [ + "OptionalFacadeExport", + "babel_optional_attr_set", + "babel_optional_attr_tuple", + "load_babel_optional_export", + "raise_missing_babel_symbol", +] + + +@dataclass(frozen=True, slots=True) +class OptionalFacadeExport: + """One Babel-backed export owned by a public facade.""" + + public_name: str + source_module: str + source_name: str + + +_OPTIONAL_EXPORTS_BY_FACADE: dict[str, tuple[OptionalFacadeExport, ...]] = { + "ftllexengine": ( + OptionalFacadeExport( + public_name="AsyncFluentBundle", + source_module="ftllexengine.runtime.async_bundle", + source_name="AsyncFluentBundle", + ), + OptionalFacadeExport( + public_name="FluentBundle", + source_module="ftllexengine.runtime.bundle", + source_name="FluentBundle", + ), + OptionalFacadeExport( + public_name="FluentLocalization", + source_module="ftllexengine.localization.orchestrator", + source_name="FluentLocalization", + ), + OptionalFacadeExport( + public_name="LocalizationBootConfig", + source_module="ftllexengine.localization.boot", + source_name="LocalizationBootConfig", + ), + OptionalFacadeExport( + public_name="LocalizationCacheStats", + source_module="ftllexengine.localization.orchestrator", + source_name="LocalizationCacheStats", + ), + ), + "ftllexengine.localization": ( + OptionalFacadeExport( + public_name="FluentLocalization", + source_module="ftllexengine.localization.orchestrator", + source_name="FluentLocalization", + ), + OptionalFacadeExport( + public_name="LocalizationBootConfig", + source_module="ftllexengine.localization.boot", + source_name="LocalizationBootConfig", + ), + OptionalFacadeExport( + public_name="LocalizationCacheStats", + source_module="ftllexengine.localization.orchestrator", + source_name="LocalizationCacheStats", + ), + ), + "ftllexengine.runtime": ( + OptionalFacadeExport( + public_name="AsyncFluentBundle", + source_module="ftllexengine.runtime.async_bundle", + source_name="AsyncFluentBundle", + ), + OptionalFacadeExport( + public_name="create_default_registry", + source_module="ftllexengine.runtime.functions", + source_name="create_default_registry", + ), + OptionalFacadeExport( + public_name="currency_format", + source_module="ftllexengine.runtime.functions", + source_name="currency_format", + ), + OptionalFacadeExport( + public_name="datetime_format", + source_module="ftllexengine.runtime.functions", + source_name="datetime_format", + ), + OptionalFacadeExport( + public_name="FluentBundle", + source_module="ftllexengine.runtime.bundle", + source_name="FluentBundle", + ), + OptionalFacadeExport( + public_name="get_shared_registry", + source_module="ftllexengine.runtime.functions", + source_name="get_shared_registry", + ), + OptionalFacadeExport( + public_name="number_format", + source_module="ftllexengine.runtime.functions", + source_name="number_format", + ), + OptionalFacadeExport( + public_name="select_plural_category", + source_module="ftllexengine.runtime.plural_rules", + source_name="select_plural_category", + ), + ), +} + + +def _optional_exports_for(module_name: str) -> tuple[OptionalFacadeExport, ...]: + """Return the canonical optional-export definitions for one facade.""" + exports = _OPTIONAL_EXPORTS_BY_FACADE.get(module_name) + if exports is None: + msg = f"No optional export contract registered for facade {module_name!r}" + raise KeyError(msg) + return exports + + +def babel_optional_attr_tuple(module_name: str) -> tuple[str, ...]: + """Return Babel-backed public names for one facade in canonical order.""" + return tuple(export.public_name for export in _optional_exports_for(module_name)) + + +def babel_optional_attr_set(module_name: str) -> frozenset[str]: + """Return Babel-backed public names for one facade as a set.""" + return frozenset(babel_optional_attr_tuple(module_name)) + + +def load_babel_optional_export(module_name: str, name: str) -> object: + """Resolve one Babel-backed export from the canonical facade contract.""" + for export in _optional_exports_for(module_name): + if export.public_name == name: + module = import_module(export.source_module) + return getattr(module, export.source_name) + msg = f"module {module_name!r} has no optional Babel export {name!r}" + raise AttributeError(msg) def raise_missing_babel_symbol( diff --git a/src/ftllexengine/core/__init__.py b/src/ftllexengine/core/__init__.py index 50c48c3e..08abbea9 100644 --- a/src/ftllexengine/core/__init__.py +++ b/src/ftllexengine/core/__init__.py @@ -5,31 +5,21 @@ core <- syntax <- parsing <- runtime -Exports (eager — no Babel dependency, no circular import risk): +Exports: + DepthGuard: Context manager for recursion depth limiting FluentNumber: Formatted number preserving numeric identity and precision FluentValue: Union of all Fluent-compatible value types + depth_clamp: Clamp depth values against Python recursion limits make_fluent_number: Public helper for manual FluentNumber construction require_date: Validate that a boundary value is a date (not datetime) require_datetime: Validate that a boundary value is a datetime require_fluent_number: Validate that a boundary value is a FluentNumber require_positive_int: Validate that a boundary value is a positive integer (internal) -Exports (lazy — depth_guard is loaded on first access to break circular import): - DepthGuard: Context manager for recursion depth limiting - depth_clamp: Utility function for clamping depth values against recursion limit - - Circular import note: depth_guard imports from ftllexengine.diagnostics, which - imports from ftllexengine.syntax, which imports from ftllexengine.core.depth_guard. - Eager import of depth_guard here would create a cycle when ftllexengine.__init__ - triggers core.__init__ loading before diagnostics is loaded. Lazy loading via - __getattr__ defers depth_guard until after all modules are initialized, at which - point sys.modules contains the complete import graph. - Python 3.13+. No external dependencies. """ -from typing import TYPE_CHECKING, Any - +from .depth_guard import DepthGuard, depth_clamp from .validators import ( require_date, require_datetime, @@ -38,9 +28,6 @@ ) from .value_types import FluentNumber, FluentValue, make_fluent_number -if TYPE_CHECKING: - from .depth_guard import DepthGuard, depth_clamp - __all__ = [ "DepthGuard", "FluentNumber", @@ -52,26 +39,3 @@ "require_fluent_number", "require_positive_int", ] - -_LAZY_DEPTH_GUARD = frozenset({"DepthGuard", "depth_clamp"}) - - -def __getattr__(name: str) -> Any: - """Lazy-load depth_guard symbols to break the circular import. - - depth_guard imports ftllexengine.diagnostics, which imports ftllexengine.syntax, - which imports ftllexengine.core.depth_guard. Eager loading here during - ftllexengine package initialization creates a circular dependency. Deferred - loading via __getattr__ resolves it: by the time any caller requests DepthGuard - or depth_clamp, sys.modules already contains the full import graph. - """ - if name in _LAZY_DEPTH_GUARD: - from .depth_guard import ( # noqa: PLC0415 - lazy load to break circular import; see module docstring - DepthGuard, - depth_clamp, - ) - globals()["DepthGuard"] = DepthGuard - globals()["depth_clamp"] = depth_clamp - return globals()[name] - msg = f"module {__name__!r} has no attribute {name!r}" - raise AttributeError(msg) diff --git a/src/ftllexengine/diagnostics/template_parsing.py b/src/ftllexengine/diagnostics/template_parsing.py new file mode 100644 index 00000000..00686e09 --- /dev/null +++ b/src/ftllexengine/diagnostics/template_parsing.py @@ -0,0 +1,140 @@ +"""Parsing and locale-input error template mixins.""" + +from __future__ import annotations + +from .codes import Diagnostic, DiagnosticCode + + +class _ParsingErrorTemplateMixin: + """ErrorTemplate methods for user-input parsing failures.""" + + @staticmethod + def parse_decimal_failed( + value: str, + locale_code: str, + reason: str, + ) -> Diagnostic: + """Decimal parsing failed.""" + msg = f"Failed to parse decimal '{value}' for locale '{locale_code}': {reason}" + return Diagnostic( + code=DiagnosticCode.PARSE_DECIMAL_FAILED, + message=msg, + span=None, + hint="Check that the decimal format matches the locale's conventions", + ) + + @staticmethod + def parse_date_failed( + value: str, + locale_code: str, + reason: str, + ) -> Diagnostic: + """Date parsing failed.""" + msg = f"Failed to parse date '{value}' for locale '{locale_code}': {reason}" + return Diagnostic( + code=DiagnosticCode.PARSE_DATE_FAILED, + message=msg, + span=None, + hint="Use ISO 8601 (YYYY-MM-DD) for unambiguous, locale-independent dates", + ) + + @staticmethod + def parse_datetime_failed( + value: str, + locale_code: str, + reason: str, + ) -> Diagnostic: + """Datetime parsing failed.""" + msg = f"Failed to parse datetime '{value}' for locale '{locale_code}': {reason}" + return Diagnostic( + code=DiagnosticCode.PARSE_DATETIME_FAILED, + message=msg, + span=None, + hint="Use ISO 8601 (YYYY-MM-DD HH:MM:SS) for unambiguous, locale-independent datetimes", + ) + + @staticmethod + def parse_currency_failed( + value: str, + locale_code: str, + reason: str, + ) -> Diagnostic: + """Currency parsing failed.""" + msg = f"Failed to parse currency '{value}' for locale '{locale_code}': {reason}" + return Diagnostic( + code=DiagnosticCode.PARSE_CURRENCY_FAILED, + message=msg, + span=None, + hint="Use ISO currency codes (USD, EUR, GBP) for unambiguous parsing", + ) + + @staticmethod + def parse_locale_unknown(locale_code: str) -> Diagnostic: + """Unknown locale for parsing.""" + msg = f"Unknown locale '{locale_code}'" + return Diagnostic( + code=DiagnosticCode.PARSE_LOCALE_UNKNOWN, + message=msg, + span=None, + hint="Use BCP 47 locale codes (e.g., 'en_US', 'de_DE', 'lv_LV')", + ) + + @staticmethod + def parse_currency_ambiguous( + symbol: str, + value: str, + ) -> Diagnostic: + """Ambiguous currency symbol.""" + msg = ( + f"Ambiguous currency symbol '{symbol}' in '{value}'. " + f"Symbol '{symbol}' is used by multiple currencies." + ) + return Diagnostic( + code=DiagnosticCode.PARSE_CURRENCY_AMBIGUOUS, + message=msg, + span=None, + hint="Use default_currency parameter, infer_from_locale=True, or ISO code (USD, EUR)", + ) + + @staticmethod + def parse_currency_symbol_unknown( + symbol: str, + value: str, + ) -> Diagnostic: + """Unknown currency symbol.""" + msg = f"Unknown currency symbol '{symbol}' in '{value}'" + return Diagnostic( + code=DiagnosticCode.PARSE_CURRENCY_SYMBOL_UNKNOWN, + message=msg, + span=None, + hint="Use ISO currency codes (USD, EUR, GBP) or supported symbols", + ) + + @staticmethod + def parse_currency_code_invalid( + code: str, + value: str, + ) -> Diagnostic: + """Invalid ISO 4217 currency code.""" + msg = f"Invalid ISO 4217 currency code '{code}' in '{value}'" + return Diagnostic( + code=DiagnosticCode.PARSE_CURRENCY_CODE_INVALID, + message=msg, + span=None, + hint="Use valid ISO 4217 codes (USD, EUR, GBP, JPY, etc.)", + ) + + @staticmethod + def parse_amount_invalid( + amount_str: str, + value: str, + reason: str, + ) -> Diagnostic: + """Invalid amount in currency string.""" + msg = f"Failed to parse amount '{amount_str}' from '{value}': {reason}" + return Diagnostic( + code=DiagnosticCode.PARSE_AMOUNT_INVALID, + message=msg, + span=None, + hint="Check that the amount format matches the locale's conventions", + ) diff --git a/src/ftllexengine/diagnostics/template_reference.py b/src/ftllexengine/diagnostics/template_reference.py new file mode 100644 index 00000000..6c82131f --- /dev/null +++ b/src/ftllexengine/diagnostics/template_reference.py @@ -0,0 +1,182 @@ +"""Reference and resolution error template mixins.""" + +from __future__ import annotations + +from .codes import Diagnostic, DiagnosticCode +from .template_shared import docs_url + + +class _ReferenceErrorTemplateMixin: + """ErrorTemplate methods for lookup, reference, and traversal failures.""" + + @staticmethod + def message_not_found(message_id: str) -> Diagnostic: + """Message reference not found in bundle.""" + msg = f"Message '{message_id}' not found" + return Diagnostic( + code=DiagnosticCode.MESSAGE_NOT_FOUND, + message=msg, + span=None, + hint="Check that the message is defined in the loaded resources", + help_url=docs_url("messages.html"), + ) + + @staticmethod + def attribute_not_found(attribute: str, message_id: str) -> Diagnostic: + """Message attribute not found.""" + msg = f"Attribute '{attribute}' not found in message '{message_id}'" + return Diagnostic( + code=DiagnosticCode.ATTRIBUTE_NOT_FOUND, + message=msg, + span=None, + hint=f"Check that message '{message_id}' has an attribute '.{attribute}'", + help_url=docs_url("attributes.html"), + ) + + @staticmethod + def term_not_found(term_id: str) -> Diagnostic: + """Term reference not found.""" + msg = f"Term '-{term_id}' not found" + return Diagnostic( + code=DiagnosticCode.TERM_NOT_FOUND, + message=msg, + span=None, + hint="Terms must be defined before they are referenced", + help_url=docs_url("terms.html"), + ) + + @staticmethod + def term_attribute_not_found(attribute: str, term_id: str) -> Diagnostic: + """Term attribute not found.""" + msg = f"Attribute '{attribute}' not found in term '-{term_id}'" + return Diagnostic( + code=DiagnosticCode.TERM_ATTRIBUTE_NOT_FOUND, + message=msg, + span=None, + hint=f"Check that term '-{term_id}' has an attribute '.{attribute}'", + help_url=docs_url("terms.html"), + ) + + @staticmethod + def term_positional_args_ignored(term_name: str, count: int) -> Diagnostic: + """Term positional arguments ignored.""" + plural = "argument" if count == 1 else "arguments" + msg = ( + f"Term '-{term_name}' does not accept positional arguments " + f"(got {count}). Use named arguments: -term(key: value)" + ) + return Diagnostic( + code=DiagnosticCode.TERM_POSITIONAL_ARGS_IGNORED, + message=msg, + span=None, + hint=f"Remove the {count} positional {plural} and use named arguments instead", + help_url=docs_url("terms.html"), + ) + + @staticmethod + def plural_support_unavailable() -> Diagnostic: + """Plural variant matching unavailable due to missing Babel dependency.""" + msg = ( + "Plural variant matching unavailable (Babel not installed). " + "Install with: pip install ftllexengine[babel]" + ) + return Diagnostic( + code=DiagnosticCode.PLURAL_SUPPORT_UNAVAILABLE, + message=msg, + span=None, + hint="Install Babel for CLDR-based plural category matching", + help_url=docs_url("selectors.html"), + ) + + @staticmethod + def variable_not_provided( + variable_name: str, + *, + resolution_path: tuple[str, ...] | None = None, + ) -> Diagnostic: + """Variable not provided in arguments.""" + msg = f"Variable '${variable_name}' not provided" + return Diagnostic( + code=DiagnosticCode.VARIABLE_NOT_PROVIDED, + message=msg, + span=None, + hint=f"Pass '{variable_name}' in the arguments dictionary", + help_url=docs_url("variables.html"), + resolution_path=resolution_path, + ) + + @staticmethod + def message_no_value(message_id: str) -> Diagnostic: + """Message has no value (only attributes).""" + msg = f"Message '{message_id}' has no value" + return Diagnostic( + code=DiagnosticCode.MESSAGE_NO_VALUE, + message=msg, + span=None, + hint="Message has only attributes; specify which attribute to format", + help_url=docs_url("messages.html"), + ) + + @staticmethod + def cyclic_reference(resolution_path: list[str]) -> Diagnostic: + """Circular reference detected.""" + cycle_chain = " -> ".join(resolution_path) + msg = f"Circular reference detected: {cycle_chain}" + return Diagnostic( + code=DiagnosticCode.CYCLIC_REFERENCE, + message=msg, + span=None, + hint="Break the circular dependency by removing one of the references", + help_url=docs_url("references.html"), + ) + + @staticmethod + def max_depth_exceeded(message_id: str, max_depth: int) -> Diagnostic: + """Maximum resolution depth exceeded.""" + msg = f"Maximum resolution depth ({max_depth}) exceeded while resolving '{message_id}'" + return Diagnostic( + code=DiagnosticCode.MAX_DEPTH_EXCEEDED, + message=msg, + span=None, + hint="Reduce message reference chain depth or refactor to avoid deep nesting", + help_url=docs_url("references.html"), + ) + + @staticmethod + def depth_exceeded(max_depth: int) -> Diagnostic: + """Maximum nesting depth exceeded.""" + msg = f"Maximum nesting depth ({max_depth}) exceeded" + return Diagnostic( + code=DiagnosticCode.MAX_DEPTH_EXCEEDED, + message=msg, + span=None, + hint="Reduce nesting depth or check for malformed AST construction", + help_url=docs_url("references.html"), + ) + + @staticmethod + def expansion_budget_exceeded(total_chars: int, max_chars: int) -> Diagnostic: + """Expansion budget exceeded during resolution.""" + msg = ( + f"Expansion budget exceeded: {total_chars} characters produced " + f"(limit: {max_chars})" + ) + return Diagnostic( + code=DiagnosticCode.EXPANSION_BUDGET_EXCEEDED, + message=msg, + span=None, + hint="Check for exponentially expanding message references (Billion Laughs pattern)", + help_url=docs_url("references.html"), + ) + + @staticmethod + def no_variants() -> Diagnostic: + """Select expression has no variants.""" + msg = "No variants in select expression" + return Diagnostic( + code=DiagnosticCode.NO_VARIANTS, + message=msg, + span=None, + hint="Select expressions must have at least one variant", + help_url=docs_url("selectors.html"), + ) diff --git a/src/ftllexengine/diagnostics/template_runtime.py b/src/ftllexengine/diagnostics/template_runtime.py new file mode 100644 index 00000000..9189a60d --- /dev/null +++ b/src/ftllexengine/diagnostics/template_runtime.py @@ -0,0 +1,182 @@ +"""Runtime and function error template mixins.""" + +from __future__ import annotations + +from .codes import Diagnostic, DiagnosticCode +from .template_shared import docs_url + + +class _RuntimeErrorTemplateMixin: + """ErrorTemplate methods for runtime evaluation and function failures.""" + + @staticmethod + def function_not_found(function_name: str) -> Diagnostic: + """Function not found in registry.""" + msg = f"Function '{function_name}' not found" + return Diagnostic( + code=DiagnosticCode.FUNCTION_NOT_FOUND, + message=msg, + span=None, + hint="Built-in functions: NUMBER, DATETIME, CURRENCY. Check spelling.", + help_url=docs_url("functions.html"), + ) + + @staticmethod + def function_failed(function_name: str, error_msg: str) -> Diagnostic: + """Function execution failed.""" + msg = f"Function '{function_name}' failed: {error_msg}" + return Diagnostic( + code=DiagnosticCode.FUNCTION_FAILED, + message=msg, + span=None, + hint="Check the function arguments and their types", + help_url=docs_url("functions.html"), + function_name=function_name, + ) + + @staticmethod + def formatting_failed( + function_name: str, + value: str, + error_reason: str, + ) -> Diagnostic: + """Locale-aware formatting failed.""" + msg = f"{function_name}() formatting failed for value '{value}': {error_reason}" + return Diagnostic( + code=DiagnosticCode.FORMATTING_FAILED, + message=msg, + span=None, + hint="Check that the value is valid for the specified format options", + help_url=docs_url("functions.html"), + function_name=function_name, + ) + + @staticmethod + def function_arity_mismatch( + function_name: str, + expected: int, + received: int, + ) -> Diagnostic: + """Function called with wrong number of positional arguments.""" + msg = ( + f"Function '{function_name}' expects {expected} argument(s), " + f"got {received}" + ) + return Diagnostic( + code=DiagnosticCode.FUNCTION_ARITY_MISMATCH, + message=msg, + span=None, + hint=f"Pass exactly {expected} value(s) to {function_name}()", + help_url=docs_url("functions.html"), + function_name=function_name, + ) + + @staticmethod + def type_mismatch( + function_name: str, + argument_name: str, + expected_type: str, + received_type: str, + *, + ftl_location: str | None = None, + ) -> Diagnostic: + """Type mismatch in function argument.""" + msg = f"Type mismatch in {function_name}(): expected {expected_type}, got {received_type}" + hint = f"Convert '{argument_name}' to {expected_type} before passing to {function_name}()" + return Diagnostic( + code=DiagnosticCode.TYPE_MISMATCH, + message=msg, + span=None, + hint=hint, + help_url=docs_url("functions.html"), + function_name=function_name, + argument_name=argument_name, + expected_type=expected_type, + received_type=received_type, + ftl_location=ftl_location, + ) + + @staticmethod + def invalid_argument( + function_name: str, + argument_name: str, + reason: str, + *, + ftl_location: str | None = None, + ) -> Diagnostic: + """Invalid argument value.""" + msg = f"Invalid argument '{argument_name}' in {function_name}(): {reason}" + return Diagnostic( + code=DiagnosticCode.INVALID_ARGUMENT, + message=msg, + span=None, + hint=f"Check the value of '{argument_name}' argument", + help_url=docs_url("functions.html"), + function_name=function_name, + argument_name=argument_name, + ftl_location=ftl_location, + ) + + @staticmethod + def argument_required( + function_name: str, + argument_name: str, + *, + ftl_location: str | None = None, + ) -> Diagnostic: + """Required argument not provided.""" + msg = f"Required argument '{argument_name}' not provided for {function_name}()" + return Diagnostic( + code=DiagnosticCode.ARGUMENT_REQUIRED, + message=msg, + span=None, + hint=f"Add '{argument_name}' argument to {function_name}() call", + help_url=docs_url("functions.html"), + function_name=function_name, + argument_name=argument_name, + ftl_location=ftl_location, + ) + + @staticmethod + def pattern_invalid( + function_name: str, + pattern: str, + reason: str, + *, + ftl_location: str | None = None, + ) -> Diagnostic: + """Invalid format pattern.""" + msg = f"Invalid pattern in {function_name}(): {reason}" + return Diagnostic( + code=DiagnosticCode.PATTERN_INVALID, + message=msg, + span=None, + hint=f"Check pattern syntax: '{pattern}'", + help_url=docs_url("functions.html"), + function_name=function_name, + argument_name="pattern", + ftl_location=ftl_location, + severity="error", + ) + + @staticmethod + def unknown_expression(expr_type: str) -> Diagnostic: + """Unknown expression type encountered.""" + msg = f"Unknown expression type: {expr_type}" + return Diagnostic( + code=DiagnosticCode.UNKNOWN_EXPRESSION, + message=msg, + span=None, + hint="This is likely a bug in the parser or resolver", + ) + + @staticmethod + def unexpected_eof(position: int) -> Diagnostic: + """Unexpected end of file.""" + msg = f"Unexpected EOF at position {position}" + return Diagnostic( + code=DiagnosticCode.UNEXPECTED_EOF, + message=msg, + span=None, + hint="Check for unclosed braces or incomplete syntax", + ) diff --git a/src/ftllexengine/diagnostics/template_shared.py b/src/ftllexengine/diagnostics/template_shared.py new file mode 100644 index 00000000..2cba7c64 --- /dev/null +++ b/src/ftllexengine/diagnostics/template_shared.py @@ -0,0 +1,10 @@ +"""Shared helpers for diagnostic template builders.""" + +from __future__ import annotations + +_DOCS_BASE = "https://projectfluent.org/fluent/guide" + + +def docs_url(path: str) -> str: + """Build a stable documentation URL for one Fluent guide page.""" + return f"{_DOCS_BASE}/{path}" diff --git a/src/ftllexengine/diagnostics/templates.py b/src/ftllexengine/diagnostics/templates.py index 154847f8..7f3cf9ab 100644 --- a/src/ftllexengine/diagnostics/templates.py +++ b/src/ftllexengine/diagnostics/templates.py @@ -4,12 +4,18 @@ Python 3.13+. Zero external dependencies. """ -from .codes import Diagnostic, DiagnosticCode +from .template_parsing import _ParsingErrorTemplateMixin +from .template_reference import _ReferenceErrorTemplateMixin +from .template_runtime import _RuntimeErrorTemplateMixin __all__ = ["ErrorTemplate"] -class ErrorTemplate: +class ErrorTemplate( + _ReferenceErrorTemplateMixin, + _RuntimeErrorTemplateMixin, + _ParsingErrorTemplateMixin, +): """Centralized error message templates. All error messages are created here. NO f-strings in exception constructors! @@ -19,753 +25,3 @@ class ErrorTemplate: - Easy i18n in the future - Documentation of all error cases """ - - # Base documentation URL - _DOCS_BASE = "https://projectfluent.org/fluent/guide" - - @staticmethod - def message_not_found(message_id: str) -> Diagnostic: - """Message reference not found in bundle. - - Args: - message_id: The message identifier that was not found - - Returns: - Diagnostic for MESSAGE_NOT_FOUND - """ - msg = f"Message '{message_id}' not found" - return Diagnostic( - code=DiagnosticCode.MESSAGE_NOT_FOUND, - message=msg, - span=None, - hint="Check that the message is defined in the loaded resources", - help_url=f"{ErrorTemplate._DOCS_BASE}/messages.html", - ) - - @staticmethod - def attribute_not_found(attribute: str, message_id: str) -> Diagnostic: - """Message attribute not found. - - Args: - attribute: The attribute name that was not found - message_id: The message containing (or not) the attribute - - Returns: - Diagnostic for ATTRIBUTE_NOT_FOUND - """ - msg = f"Attribute '{attribute}' not found in message '{message_id}'" - return Diagnostic( - code=DiagnosticCode.ATTRIBUTE_NOT_FOUND, - message=msg, - span=None, - hint=f"Check that message '{message_id}' has an attribute '.{attribute}'", - help_url=f"{ErrorTemplate._DOCS_BASE}/attributes.html", - ) - - @staticmethod - def term_not_found(term_id: str) -> Diagnostic: - """Term reference not found. - - Args: - term_id: The term identifier (without leading -) - - Returns: - Diagnostic for TERM_NOT_FOUND - """ - msg = f"Term '-{term_id}' not found" - return Diagnostic( - code=DiagnosticCode.TERM_NOT_FOUND, - message=msg, - span=None, - hint="Terms must be defined before they are referenced", - help_url=f"{ErrorTemplate._DOCS_BASE}/terms.html", - ) - - @staticmethod - def term_attribute_not_found(attribute: str, term_id: str) -> Diagnostic: - """Term attribute not found. - - Args: - attribute: The attribute name that was not found - term_id: The term identifier (without leading -) - - Returns: - Diagnostic for TERM_ATTRIBUTE_NOT_FOUND - """ - msg = f"Attribute '{attribute}' not found in term '-{term_id}'" - return Diagnostic( - code=DiagnosticCode.TERM_ATTRIBUTE_NOT_FOUND, - message=msg, - span=None, - hint=f"Check that term '-{term_id}' has an attribute '.{attribute}'", - help_url=f"{ErrorTemplate._DOCS_BASE}/terms.html", - ) - - @staticmethod - def term_positional_args_ignored(term_name: str, count: int) -> Diagnostic: - """Term positional arguments ignored. - - Args: - term_name: The term identifier (without leading -) - count: Number of positional arguments that were ignored - - Returns: - Diagnostic for TERM_POSITIONAL_ARGS_IGNORED - """ - plural = "argument" if count == 1 else "arguments" - msg = ( - f"Term '-{term_name}' does not accept positional arguments " - f"(got {count}). Use named arguments: -term(key: value)" - ) - return Diagnostic( - code=DiagnosticCode.TERM_POSITIONAL_ARGS_IGNORED, - message=msg, - span=None, - hint=f"Remove the {count} positional {plural} and use named arguments instead", - help_url=f"{ErrorTemplate._DOCS_BASE}/terms.html", - ) - - @staticmethod - def plural_support_unavailable() -> Diagnostic: - """Plural variant matching unavailable due to missing Babel dependency. - - Returns: - Diagnostic for PLURAL_SUPPORT_UNAVAILABLE - """ - msg = ( - "Plural variant matching unavailable (Babel not installed). " - "Install with: pip install ftllexengine[babel]" - ) - return Diagnostic( - code=DiagnosticCode.PLURAL_SUPPORT_UNAVAILABLE, - message=msg, - span=None, - hint="Install Babel for CLDR-based plural category matching", - help_url=f"{ErrorTemplate._DOCS_BASE}/selectors.html", - ) - - @staticmethod - def variable_not_provided( - variable_name: str, - *, - resolution_path: tuple[str, ...] | None = None, - ) -> Diagnostic: - """Variable not provided in arguments. - - Args: - variable_name: The variable name (without leading $) - resolution_path: Optional resolution stack for debugging nested references - - Returns: - Diagnostic for VARIABLE_NOT_PROVIDED - """ - msg = f"Variable '${variable_name}' not provided" - return Diagnostic( - code=DiagnosticCode.VARIABLE_NOT_PROVIDED, - message=msg, - span=None, - hint=f"Pass '{variable_name}' in the arguments dictionary", - help_url=f"{ErrorTemplate._DOCS_BASE}/variables.html", - resolution_path=resolution_path, - ) - - @staticmethod - def message_no_value(message_id: str) -> Diagnostic: - """Message has no value (only attributes). - - Args: - message_id: The message identifier - - Returns: - Diagnostic for MESSAGE_NO_VALUE - """ - msg = f"Message '{message_id}' has no value" - return Diagnostic( - code=DiagnosticCode.MESSAGE_NO_VALUE, - message=msg, - span=None, - hint="Message has only attributes; specify which attribute to format", - help_url=f"{ErrorTemplate._DOCS_BASE}/messages.html", - ) - - @staticmethod - def cyclic_reference(resolution_path: list[str]) -> Diagnostic: - """Circular reference detected. - - Args: - resolution_path: The path of message references forming the cycle - - Returns: - Diagnostic for CYCLIC_REFERENCE - """ - # Build cycle visualization - cycle_chain = " -> ".join(resolution_path) - msg = f"Circular reference detected: {cycle_chain}" - - return Diagnostic( - code=DiagnosticCode.CYCLIC_REFERENCE, - message=msg, - span=None, - hint="Break the circular dependency by removing one of the references", - help_url=f"{ErrorTemplate._DOCS_BASE}/references.html", - ) - - @staticmethod - def max_depth_exceeded(message_id: str, max_depth: int) -> Diagnostic: - """Maximum resolution depth exceeded. - - Args: - message_id: The message that was being resolved when depth limit hit - max_depth: The maximum allowed depth - - Returns: - Diagnostic for MAX_DEPTH_EXCEEDED - """ - msg = f"Maximum resolution depth ({max_depth}) exceeded while resolving '{message_id}'" - - return Diagnostic( - code=DiagnosticCode.MAX_DEPTH_EXCEEDED, - message=msg, - span=None, - hint="Reduce message reference chain depth or refactor to avoid deep nesting", - help_url=f"{ErrorTemplate._DOCS_BASE}/references.html", - ) - - @staticmethod - def depth_exceeded(max_depth: int) -> Diagnostic: - """Maximum nesting depth exceeded. - - Used when any nesting (expressions, validation traversal, serialization) - exceeds the configured depth limit. Protects against stack overflow from - adversarial or malformed input. - - Args: - max_depth: The maximum allowed nesting depth - - Returns: - Diagnostic for MAX_DEPTH_EXCEEDED - """ - msg = f"Maximum nesting depth ({max_depth}) exceeded" - - return Diagnostic( - code=DiagnosticCode.MAX_DEPTH_EXCEEDED, - message=msg, - span=None, - hint="Reduce nesting depth or check for malformed AST construction", - help_url=f"{ErrorTemplate._DOCS_BASE}/references.html", - ) - - @staticmethod - def expansion_budget_exceeded(total_chars: int, max_chars: int) -> Diagnostic: - """Expansion budget exceeded during resolution. - - Prevents Billion Laughs attacks where small FTL input expands to - gigabytes via nested message references. - - Args: - total_chars: Total characters produced so far - max_chars: Maximum allowed characters - - Returns: - Diagnostic for EXPANSION_BUDGET_EXCEEDED - """ - msg = ( - f"Expansion budget exceeded: {total_chars} characters produced " - f"(limit: {max_chars})" - ) - - return Diagnostic( - code=DiagnosticCode.EXPANSION_BUDGET_EXCEEDED, - message=msg, - span=None, - hint="Check for exponentially expanding message references (Billion Laughs pattern)", - help_url=f"{ErrorTemplate._DOCS_BASE}/references.html", - ) - - @staticmethod - def no_variants() -> Diagnostic: - """Select expression has no variants. - - Returns: - Diagnostic for NO_VARIANTS - """ - msg = "No variants in select expression" - return Diagnostic( - code=DiagnosticCode.NO_VARIANTS, - message=msg, - span=None, - hint="Select expressions must have at least one variant", - help_url=f"{ErrorTemplate._DOCS_BASE}/selectors.html", - ) - - @staticmethod - def function_not_found(function_name: str) -> Diagnostic: - """Function not found in registry. - - Args: - function_name: The function name (e.g., "NUMBER", "DATETIME", "CURRENCY") - - Returns: - Diagnostic for FUNCTION_NOT_FOUND - """ - msg = f"Function '{function_name}' not found" - return Diagnostic( - code=DiagnosticCode.FUNCTION_NOT_FOUND, - message=msg, - span=None, - hint="Built-in functions: NUMBER, DATETIME, CURRENCY. Check spelling.", - help_url=f"{ErrorTemplate._DOCS_BASE}/functions.html", - ) - - @staticmethod - def function_failed(function_name: str, error_msg: str) -> Diagnostic: - """Function execution failed. - - Args: - function_name: The function that failed - error_msg: The error message from the function - - Returns: - Diagnostic for FUNCTION_FAILED - """ - msg = f"Function '{function_name}' failed: {error_msg}" - return Diagnostic( - code=DiagnosticCode.FUNCTION_FAILED, - message=msg, - span=None, - hint="Check the function arguments and their types", - help_url=f"{ErrorTemplate._DOCS_BASE}/functions.html", - function_name=function_name, - ) - - @staticmethod - def formatting_failed( - function_name: str, - value: str, - error_reason: str, - ) -> Diagnostic: - """Locale-aware formatting failed. - - Used when NUMBER(), DATETIME(), or CURRENCY() fails to format a value. - - Args: - function_name: The formatting function (NUMBER, DATETIME, CURRENCY) - value: The value that failed to format - error_reason: The reason formatting failed - - Returns: - Diagnostic for FORMATTING_FAILED - """ - msg = f"{function_name}() formatting failed for value '{value}': {error_reason}" - return Diagnostic( - code=DiagnosticCode.FORMATTING_FAILED, - message=msg, - span=None, - hint="Check that the value is valid for the specified format options", - help_url=f"{ErrorTemplate._DOCS_BASE}/functions.html", - function_name=function_name, - ) - - @staticmethod - def function_arity_mismatch( - function_name: str, - expected: int, - received: int, - ) -> Diagnostic: - """Function called with wrong number of positional arguments. - - Args: - function_name: The function that was called - expected: Expected number of positional arguments - received: Actual number of positional arguments - - Returns: - Diagnostic for FUNCTION_ARITY_MISMATCH - """ - msg = ( - f"Function '{function_name}' expects {expected} argument(s), " - f"got {received}" - ) - return Diagnostic( - code=DiagnosticCode.FUNCTION_ARITY_MISMATCH, - message=msg, - span=None, - hint=f"Pass exactly {expected} value(s) to {function_name}()", - help_url=f"{ErrorTemplate._DOCS_BASE}/functions.html", - function_name=function_name, - ) - - @staticmethod - def type_mismatch( - function_name: str, - argument_name: str, - expected_type: str, - received_type: str, - *, - ftl_location: str | None = None, - ) -> Diagnostic: - """Type mismatch in function argument. - - Args: - function_name: Function where type mismatch occurred - argument_name: Argument name that has wrong type - expected_type: Expected type (e.g., "Number", "String") - received_type: Actual type received - ftl_location: FTL file location (optional) - - Returns: - Diagnostic for TYPE_MISMATCH - """ - msg = f"Type mismatch in {function_name}(): expected {expected_type}, got {received_type}" - hint = f"Convert '{argument_name}' to {expected_type} before passing to {function_name}()" - return Diagnostic( - code=DiagnosticCode.TYPE_MISMATCH, - message=msg, - span=None, - hint=hint, - help_url=f"{ErrorTemplate._DOCS_BASE}/functions.html", - function_name=function_name, - argument_name=argument_name, - expected_type=expected_type, - received_type=received_type, - ftl_location=ftl_location, - ) - - @staticmethod - def invalid_argument( - function_name: str, - argument_name: str, - reason: str, - *, - ftl_location: str | None = None, - ) -> Diagnostic: - """Invalid argument value. - - Args: - function_name: Function where invalid argument was provided - argument_name: Argument name that is invalid - reason: Why the argument is invalid - ftl_location: FTL file location (optional) - - Returns: - Diagnostic for INVALID_ARGUMENT - """ - msg = f"Invalid argument '{argument_name}' in {function_name}(): {reason}" - return Diagnostic( - code=DiagnosticCode.INVALID_ARGUMENT, - message=msg, - span=None, - hint=f"Check the value of '{argument_name}' argument", - help_url=f"{ErrorTemplate._DOCS_BASE}/functions.html", - function_name=function_name, - argument_name=argument_name, - ftl_location=ftl_location, - ) - - @staticmethod - def argument_required( - function_name: str, - argument_name: str, - *, - ftl_location: str | None = None, - ) -> Diagnostic: - """Required argument not provided. - - Args: - function_name: Function missing required argument - argument_name: Name of required argument - ftl_location: FTL file location (optional) - - Returns: - Diagnostic for ARGUMENT_REQUIRED - """ - msg = f"Required argument '{argument_name}' not provided for {function_name}()" - return Diagnostic( - code=DiagnosticCode.ARGUMENT_REQUIRED, - message=msg, - span=None, - hint=f"Add '{argument_name}' argument to {function_name}() call", - help_url=f"{ErrorTemplate._DOCS_BASE}/functions.html", - function_name=function_name, - argument_name=argument_name, - ftl_location=ftl_location, - ) - - @staticmethod - def pattern_invalid( - function_name: str, - pattern: str, - reason: str, - *, - ftl_location: str | None = None, - ) -> Diagnostic: - """Invalid format pattern. - - Args: - function_name: Function with invalid pattern - pattern: The invalid pattern string - reason: Why the pattern is invalid - ftl_location: FTL file location (optional) - - Returns: - Diagnostic for PATTERN_INVALID - """ - msg = f"Invalid pattern in {function_name}(): {reason}" - return Diagnostic( - code=DiagnosticCode.PATTERN_INVALID, - message=msg, - span=None, - hint=f"Check pattern syntax: '{pattern}'", - help_url=f"{ErrorTemplate._DOCS_BASE}/functions.html", - function_name=function_name, - argument_name="pattern", - ftl_location=ftl_location, - severity="error", - ) - - @staticmethod - def unknown_expression(expr_type: str) -> Diagnostic: - """Unknown expression type encountered. - - Args: - expr_type: The expression type name - - Returns: - Diagnostic for UNKNOWN_EXPRESSION - """ - msg = f"Unknown expression type: {expr_type}" - return Diagnostic( - code=DiagnosticCode.UNKNOWN_EXPRESSION, - message=msg, - span=None, - hint="This is likely a bug in the parser or resolver", - ) - - @staticmethod - def unexpected_eof(position: int) -> Diagnostic: - """Unexpected end of file. - - Args: - position: The position where EOF was encountered - - Returns: - Diagnostic for UNEXPECTED_EOF - """ - msg = f"Unexpected EOF at position {position}" - return Diagnostic( - code=DiagnosticCode.UNEXPECTED_EOF, - message=msg, - span=None, - hint="Check for unclosed braces or incomplete syntax", - ) - - # ========================================================================= - # PARSING ERRORS (4000-4999) - Bi-directional localization - # ========================================================================= - - @staticmethod - def parse_decimal_failed( - value: str, - locale_code: str, - reason: str, - ) -> Diagnostic: - """Decimal parsing failed. - - Args: - value: The input string that failed to parse - locale_code: The locale used for parsing - reason: The reason parsing failed - - Returns: - Diagnostic for PARSE_DECIMAL_FAILED - """ - msg = f"Failed to parse decimal '{value}' for locale '{locale_code}': {reason}" - return Diagnostic( - code=DiagnosticCode.PARSE_DECIMAL_FAILED, - message=msg, - span=None, - hint="Check that the decimal format matches the locale's conventions", - ) - - @staticmethod - def parse_date_failed( - value: str, - locale_code: str, - reason: str, - ) -> Diagnostic: - """Date parsing failed. - - Args: - value: The input string that failed to parse - locale_code: The locale used for parsing - reason: The reason parsing failed - - Returns: - Diagnostic for PARSE_DATE_FAILED - """ - msg = f"Failed to parse date '{value}' for locale '{locale_code}': {reason}" - return Diagnostic( - code=DiagnosticCode.PARSE_DATE_FAILED, - message=msg, - span=None, - hint="Use ISO 8601 (YYYY-MM-DD) for unambiguous, locale-independent dates", - ) - - @staticmethod - def parse_datetime_failed( - value: str, - locale_code: str, - reason: str, - ) -> Diagnostic: - """Datetime parsing failed. - - Args: - value: The input string that failed to parse - locale_code: The locale used for parsing - reason: The reason parsing failed - - Returns: - Diagnostic for PARSE_DATETIME_FAILED - """ - msg = f"Failed to parse datetime '{value}' for locale '{locale_code}': {reason}" - return Diagnostic( - code=DiagnosticCode.PARSE_DATETIME_FAILED, - message=msg, - span=None, - hint="Use ISO 8601 (YYYY-MM-DD HH:MM:SS) for unambiguous, locale-independent datetimes", - ) - - @staticmethod - def parse_currency_failed( - value: str, - locale_code: str, - reason: str, - ) -> Diagnostic: - """Currency parsing failed. - - Args: - value: The input string that failed to parse - locale_code: The locale used for parsing - reason: The reason parsing failed - - Returns: - Diagnostic for PARSE_CURRENCY_FAILED - """ - msg = f"Failed to parse currency '{value}' for locale '{locale_code}': {reason}" - return Diagnostic( - code=DiagnosticCode.PARSE_CURRENCY_FAILED, - message=msg, - span=None, - hint="Use ISO currency codes (USD, EUR, GBP) for unambiguous parsing", - ) - - @staticmethod - def parse_locale_unknown(locale_code: str) -> Diagnostic: - """Unknown locale for parsing. - - Args: - locale_code: The unknown locale code - - Returns: - Diagnostic for PARSE_LOCALE_UNKNOWN - """ - msg = f"Unknown locale '{locale_code}'" - return Diagnostic( - code=DiagnosticCode.PARSE_LOCALE_UNKNOWN, - message=msg, - span=None, - hint="Use BCP 47 locale codes (e.g., 'en_US', 'de_DE', 'lv_LV')", - ) - - @staticmethod - def parse_currency_ambiguous( - symbol: str, - value: str, - ) -> Diagnostic: - """Ambiguous currency symbol. - - Args: - symbol: The ambiguous currency symbol - value: The full currency string - - Returns: - Diagnostic for PARSE_CURRENCY_AMBIGUOUS - """ - msg = ( - f"Ambiguous currency symbol '{symbol}' in '{value}'. " - f"Symbol '{symbol}' is used by multiple currencies." - ) - return Diagnostic( - code=DiagnosticCode.PARSE_CURRENCY_AMBIGUOUS, - message=msg, - span=None, - hint="Use default_currency parameter, infer_from_locale=True, or ISO code (USD, EUR)", - ) - - @staticmethod - def parse_currency_symbol_unknown( - symbol: str, - value: str, - ) -> Diagnostic: - """Unknown currency symbol. - - Args: - symbol: The unknown currency symbol - value: The full currency string - - Returns: - Diagnostic for PARSE_CURRENCY_SYMBOL_UNKNOWN - """ - msg = f"Unknown currency symbol '{symbol}' in '{value}'" - return Diagnostic( - code=DiagnosticCode.PARSE_CURRENCY_SYMBOL_UNKNOWN, - message=msg, - span=None, - hint="Use ISO currency codes (USD, EUR, GBP) or supported symbols", - ) - - @staticmethod - def parse_currency_code_invalid( - code: str, - value: str, - ) -> Diagnostic: - """Invalid ISO 4217 currency code. - - Args: - code: The invalid 3-letter currency code - value: The full currency string - - Returns: - Diagnostic for PARSE_CURRENCY_CODE_INVALID - """ - msg = f"Invalid ISO 4217 currency code '{code}' in '{value}'" - return Diagnostic( - code=DiagnosticCode.PARSE_CURRENCY_CODE_INVALID, - message=msg, - span=None, - hint="Use valid ISO 4217 codes (USD, EUR, GBP, JPY, etc.)", - ) - - @staticmethod - def parse_amount_invalid( - amount_str: str, - value: str, - reason: str, - ) -> Diagnostic: - """Invalid amount in currency string. - - Args: - amount_str: The amount portion that failed to parse - value: The full currency string - reason: The reason parsing failed - - Returns: - Diagnostic for PARSE_AMOUNT_INVALID - """ - msg = f"Failed to parse amount '{amount_str}' from '{value}': {reason}" - return Diagnostic( - code=DiagnosticCode.PARSE_AMOUNT_INVALID, - message=msg, - span=None, - hint="Check that the amount format matches the locale's conventions", - ) diff --git a/src/ftllexengine/introspection/iso_babel.py b/src/ftllexengine/introspection/iso_babel.py index 7781630f..e2f4defb 100644 --- a/src/ftllexengine/introspection/iso_babel.py +++ b/src/ftllexengine/introspection/iso_babel.py @@ -3,6 +3,7 @@ from __future__ import annotations from functools import lru_cache +from typing import TYPE_CHECKING from ftllexengine.core.babel_compat import ( BabelImportError, @@ -23,8 +24,11 @@ "_is_unknown_locale_error", ] +if TYPE_CHECKING: + from babel import Locale -def _get_babel_locale(locale_str: str) -> object: + +def _get_babel_locale(locale_str: str) -> Locale: """Get Babel Locale object, raising BabelImportError if unavailable.""" locale_class = get_locale_class() return locale_class.parse(locale_str) @@ -39,54 +43,75 @@ def _is_unknown_locale_error(exc: Exception) -> bool: return isinstance(exc, unknown_locale_error_class) +def _maybe_unknown_locale_error_class() -> type[Exception] | None: + """Return Babel's UnknownLocaleError class when available.""" + try: + return get_unknown_locale_error_class() + except BabelImportError: + return None + + def _get_babel_territories(locale_str: str) -> dict[str, str]: """Get territory names from Babel for a locale.""" + unknown_locale_error = _maybe_unknown_locale_error_class() + if unknown_locale_error is None: + try: + locale = _get_babel_locale(locale_str) + return dict(locale.territories) + except (ValueError, LookupError, KeyError, AttributeError): + return {} + try: locale = _get_babel_locale(locale_str) - return locale.territories # type: ignore[attr-defined, no-any-return] - except (ValueError, LookupError, KeyError, AttributeError): + return dict(locale.territories) + except (ValueError, LookupError, KeyError, AttributeError, unknown_locale_error): return {} - except Exception as exc: - if _is_unknown_locale_error(exc): - return {} - raise @lru_cache(maxsize=1) def _get_babel_currencies() -> dict[str, str]: """Get English currency names from Babel. Result is invariant; cached once.""" locale = _get_babel_locale("en") - return locale.currencies # type: ignore[attr-defined, no-any-return] + return dict(locale.currencies) def _get_babel_currency_name(code: str, locale_str: str) -> str | None: """Get localized currency name from Babel.""" locale_class = get_locale_class() babel_numbers = get_babel_numbers() + unknown_locale_error = _maybe_unknown_locale_error_class() + if unknown_locale_error is None: + try: + locale = locale_class.parse(locale_str) + if code.upper() not in locale.currencies: + return None + return str(babel_numbers.get_currency_name(code, locale=locale_str)) + except (ValueError, LookupError, KeyError, AttributeError): + return None + try: locale = locale_class.parse(locale_str) if code.upper() not in locale.currencies: return None return str(babel_numbers.get_currency_name(code, locale=locale_str)) - except (ValueError, LookupError, KeyError, AttributeError): + except (ValueError, LookupError, KeyError, AttributeError, unknown_locale_error): return None - except Exception as exc: - if _is_unknown_locale_error(exc): - return None - raise def _get_babel_currency_symbol(code: str, locale_str: str) -> str: """Get localized currency symbol from Babel.""" babel_numbers = get_babel_numbers() + unknown_locale_error = _maybe_unknown_locale_error_class() + if unknown_locale_error is None: + try: + return str(babel_numbers.get_currency_symbol(code, locale=locale_str)) + except (ValueError, LookupError, KeyError, AttributeError): + return code + try: return str(babel_numbers.get_currency_symbol(code, locale=locale_str)) - except (ValueError, LookupError, KeyError, AttributeError): + except (ValueError, LookupError, KeyError, AttributeError, unknown_locale_error): return code - except Exception as exc: - if _is_unknown_locale_error(exc): - return code - raise def _get_babel_territory_currencies(territory: str) -> list[str]: diff --git a/src/ftllexengine/localization/__init__.py b/src/ftllexengine/localization/__init__.py index a32a0f01..aecbb06e 100644 --- a/src/ftllexengine/localization/__init__.py +++ b/src/ftllexengine/localization/__init__.py @@ -21,15 +21,12 @@ Python 3.13+. """ -# ruff: noqa: RUF022 - __all__ organized by category for readability - from typing import TYPE_CHECKING from ftllexengine._optional_exports import ( - LOCALIZATION_BABEL_OPTIONAL_ATTRS as _BABEL_OPTIONAL_ATTRS, -) -from ftllexengine._optional_exports import ( - load_localization_babel_optional_exports, + babel_optional_attr_set, + babel_optional_attr_tuple, + load_babel_optional_export, raise_missing_babel_symbol, ) from ftllexengine.core.babel_compat import is_babel_available @@ -45,20 +42,27 @@ from ftllexengine.runtime.cache import CacheAuditLogEntry if TYPE_CHECKING: - from ftllexengine.localization.boot import LocalizationBootConfig + from ftllexengine.localization.boot import ( + LocalizationBootConfig as LocalizationBootConfig, + ) from ftllexengine.localization.orchestrator import ( - FluentLocalization, - LocalizationCacheStats, + FluentLocalization as FluentLocalization, + ) + from ftllexengine.localization.orchestrator import ( + LocalizationCacheStats as LocalizationCacheStats, ) _BABEL_AVAILABLE = is_babel_available() - -if _BABEL_AVAILABLE: - globals().update(load_localization_babel_optional_exports()) +_BABEL_OPTIONAL_ATTRS = babel_optional_attr_set(__name__) +_BABEL_OPTIONAL_NAMES = babel_optional_attr_tuple(__name__) def __getattr__(name: str) -> object: """Raise a targeted missing-symbol error for Babel-backed localization symbols.""" + if _BABEL_AVAILABLE and name in _BABEL_OPTIONAL_ATTRS: + value = load_babel_optional_export(__name__, name) + globals()[name] = value + return value return raise_missing_babel_symbol( module_name=__name__, name=name, @@ -70,22 +74,21 @@ def __getattr__(name: str) -> object: ) -__all__ = [ +# ruff: noqa: RUF022 - grouped localization exports mirror the reader-facing facade +__all__: list[str] = [ "CacheAuditLogEntry", "FallbackInfo", "FTLSource", - "FluentLocalization", "LoadStatus", "LoadSummary", "LocaleCode", - "LocalizationBootConfig", - "LocalizationCacheStats", "MessageId", "PathResourceLoader", "ResourceId", "ResourceLoadResult", "ResourceLoader", ] +__all__[6:6] = list(_BABEL_OPTIONAL_NAMES) if not _BABEL_AVAILABLE: __all__ = [name for name in __all__ if name not in _BABEL_OPTIONAL_ATTRS] diff --git a/src/ftllexengine/runtime/__init__.py b/src/ftllexengine/runtime/__init__.py index cb4b6311..e873f5c0 100644 --- a/src/ftllexengine/runtime/__init__.py +++ b/src/ftllexengine/runtime/__init__.py @@ -11,10 +11,9 @@ from typing import TYPE_CHECKING from ftllexengine._optional_exports import ( - RUNTIME_BABEL_OPTIONAL_ATTRS as _BABEL_OPTIONAL_ATTRS, -) -from ftllexengine._optional_exports import ( - load_runtime_babel_optional_exports, + babel_optional_attr_set, + babel_optional_attr_tuple, + load_babel_optional_export, raise_missing_babel_symbol, ) from ftllexengine.core.babel_compat import is_babel_available @@ -26,25 +25,36 @@ from .value_types import make_fluent_number if TYPE_CHECKING: - from .async_bundle import AsyncFluentBundle - from .bundle import FluentBundle + from .async_bundle import AsyncFluentBundle as AsyncFluentBundle + from .bundle import FluentBundle as FluentBundle + from .functions import ( + create_default_registry as create_default_registry, + ) + from .functions import ( + currency_format as currency_format, + ) from .functions import ( - create_default_registry, - currency_format, - datetime_format, - get_shared_registry, - number_format, + datetime_format as datetime_format, ) - from .plural_rules import select_plural_category + from .functions import ( + get_shared_registry as get_shared_registry, + ) + from .functions import ( + number_format as number_format, + ) + from .plural_rules import select_plural_category as select_plural_category _BABEL_AVAILABLE = is_babel_available() - -if _BABEL_AVAILABLE: - globals().update(load_runtime_babel_optional_exports()) +_BABEL_OPTIONAL_ATTRS = babel_optional_attr_set(__name__) +_BABEL_OPTIONAL_NAMES = babel_optional_attr_tuple(__name__) def __getattr__(name: str) -> object: """Raise a targeted missing-symbol error for Babel-backed runtime symbols.""" + if _BABEL_AVAILABLE and name in _BABEL_OPTIONAL_ATTRS: + value = load_babel_optional_export(__name__, name) + globals()[name] = value + return value return raise_missing_babel_symbol( module_name=__name__, name=name, @@ -57,24 +67,18 @@ def __getattr__(name: str) -> object: ) -__all__ = [ - "AsyncFluentBundle", + +__all__: list[str] = [ "CacheAuditLogEntry", "CacheConfig", - "FluentBundle", "FluentNumber", "FunctionRegistry", "ValidationResult", "WriteLogEntry", - "create_default_registry", - "currency_format", - "datetime_format", "fluent_function", - "get_shared_registry", "make_fluent_number", - "number_format", - "select_plural_category", ] +__all__[0:0] = list(_BABEL_OPTIONAL_NAMES) if not _BABEL_AVAILABLE: __all__ = [name for name in __all__ if name not in _BABEL_OPTIONAL_ATTRS] diff --git a/src/ftllexengine/runtime/bundle.py b/src/ftllexengine/runtime/bundle.py index fa9f0242..100d0c01 100644 --- a/src/ftllexengine/runtime/bundle.py +++ b/src/ftllexengine/runtime/bundle.py @@ -1,97 +1,60 @@ -"""FluentBundle - Main API for Fluent message formatting. - -Python 3.13+. External dependency: Babel (CLDR locale data). -""" +"""FluentBundle public type composed from focused runtime mixins.""" from __future__ import annotations -import logging from typing import TYPE_CHECKING -from ftllexengine.constants import ( - DEFAULT_MAX_EXPANSION_SIZE, - MAX_DEPTH, - MAX_SOURCE_SIZE, -) -from ftllexengine.core.depth_guard import depth_clamp -from ftllexengine.core.locale_utils import get_system_locale, require_locale_code -from ftllexengine.runtime.bundle_formatting import _BundleFormattingMixin -from ftllexengine.runtime.bundle_queries import _BundleQueryMixin -from ftllexengine.runtime.bundle_registration import _BundleRegistrationMixin -from ftllexengine.runtime.cache import CacheAuditLogEntry, CacheStats, IntegrityCache -from ftllexengine.runtime.function_bridge import FunctionRegistry -from ftllexengine.runtime.functions import get_shared_registry -from ftllexengine.runtime.locale_context import LocaleContext -from ftllexengine.runtime.rwlock import RWLock -from ftllexengine.syntax import Entry, Junk, Message, Resource, Term -from ftllexengine.syntax.parser import FluentParserV1 -from ftllexengine.validation import validate_resource as _validate_resource_impl +from .bundle_formatting import _BundleFormattingMixin +from .bundle_lifecycle import _BundleLifecycleMixin +from .bundle_mutation import _BundleMutationMixin +from .bundle_queries import _BundleQueryMixin +from .bundle_registration import _BundleRegistrationMixin + +__all__ = ["FluentBundle"] if TYPE_CHECKING: - from collections.abc import Callable, Iterable, Mapping + from collections.abc import Mapping from ftllexengine.core.semantic_types import LocaleCode from ftllexengine.core.value_types import FluentValue - from ftllexengine.diagnostics import FrozenFluentError, ValidationResult - from ftllexengine.runtime.cache_config import CacheConfig - from ftllexengine.runtime.resolver import FluentResolver - -__all__ = ["FluentBundle"] - -logger = logging.getLogger(__name__) - - -class FluentBundle(_BundleQueryMixin, _BundleFormattingMixin, _BundleRegistrationMixin): - """Fluent message bundle for specific locale. - - Main public API for Fluent localization. Aligned with Mozilla python-fluent - error handling that returns (result, errors) tuples. - - Thread Safety: - FluentBundle is always thread-safe using a readers-writer lock (RWLock). - This enables high-concurrency access patterns: - - - Read operations (format_pattern, format_message, has_message, etc.) - can execute concurrently without blocking each other. - - Write operations (add_resource, add_function) acquire exclusive access. - - Writers have priority to prevent starvation in read-heavy workloads. - - This design provides superior throughput for multi-threaded applications - while maintaining full thread safety. Typical web servers with 100+ - concurrent format requests will see significant performance improvements - compared to coarse-grained locking. - - Reentrancy Limitation: - Modifying the bundle from within format operations is PROHIBITED and - raises RuntimeError. This includes calling add_resource() or add_function() - from custom functions invoked during formatting. The RWLock does not - support read-to-write lock upgrading (deadlock prevention). - - If you need lazy-loading patterns, load resources before formatting - or use a separate bundle instance for dynamic content. - - Parser Security: - Configurable limits prevent DoS attacks: - - max_source_size: Maximum FTL source length in characters (default: 10,000,000) - - max_nesting_depth: Maximum placeable nesting depth (default: 100) - - Examples: - >>> bundle = FluentBundle("lv_LV") # doctest: +SKIP - >>> bundle.add_resource(''' # doctest: +SKIP - ... hello = Sveiki, pasaule! - ... welcome = Laipni lūdzam, { $name }! - ... ''') - >>> result, errors = bundle.format_pattern("hello") # doctest: +SKIP - >>> assert result == 'Sveiki, pasaule!' # doctest: +SKIP - >>> assert errors == () # doctest: +SKIP - - >>> result, errors = bundle.format_pattern("welcome", {"name": "Jānis"}) # doctest: +SKIP - >>> assert result == 'Laipni lūdzam, Jānis!' # doctest: +SKIP - >>> assert errors == () # doctest: +SKIP - - Custom security limits for stricter environments: - >>> strict_bundle = FluentBundle("en_US", max_source_size=1_000_000) # doctest: +SKIP - """ + from ftllexengine.diagnostics import FrozenFluentError + from ftllexengine.syntax import Message, Term + from ftllexengine.syntax.parser import FluentParserV1 + + from .bundle_protocols import BundleStateProtocol + from .cache import IntegrityCache + from .cache_config import CacheConfig + from .function_bridge import FunctionRegistry + from .resolver import FluentResolver + from .rwlock import RWLock + + +class FluentBundle( + _BundleLifecycleMixin, + _BundleQueryMixin, + _BundleFormattingMixin, + _BundleRegistrationMixin, + _BundleMutationMixin, +): + """Fluent message bundle for specific locale.""" + + _cache: IntegrityCache | None + _cache_config: CacheConfig | None + _function_registry: FunctionRegistry + _locale: LocaleCode + _max_expansion_size: int + _max_nesting_depth: int + _max_source_size: int + _messages: dict[str, Message] + _msg_deps: dict[str, frozenset[str]] + _owns_registry: bool + _parser: FluentParserV1 + _resolver: FluentResolver + _rwlock: RWLock + _strict: bool + _term_deps: dict[str, frozenset[str]] + _terms: dict[str, Term] + _use_isolating: bool __slots__ = ( "_cache", @@ -113,717 +76,14 @@ class FluentBundle(_BundleQueryMixin, _BundleFormattingMixin, _BundleRegistratio "_use_isolating", ) - def __init__( - self, - locale: str, - /, - *, - use_isolating: bool = True, - cache: CacheConfig | None = None, - functions: FunctionRegistry | None = None, - max_source_size: int | None = None, - max_nesting_depth: int | None = None, - max_expansion_size: int | None = None, - strict: bool = True, - ) -> None: - """Initialize bundle for locale. - - Args: - locale: Locale code (lv_LV, en_US, de_DE, pl_PL) [positional-only] - use_isolating: Wrap interpolated values in Unicode bidi isolation marks (default: True). - Set to False only if you're certain RTL languages won't be used. - See Unicode TR9: http://www.unicode.org/reports/tr9/ - cache: Cache configuration (default: None = caching disabled). - Pass ``CacheConfig()`` for default settings or customize fields. - Cache provides 50x speedup on repeated format calls. - functions: Custom FunctionRegistry to use (default: standard registry with - NUMBER, DATETIME, CURRENCY). Pass a custom registry to use - pre-registered custom functions or override default behavior. - The registry is copied on construction; later mutations to the - original have no effect on this bundle. - max_source_size: Maximum FTL source length in characters (default: 10,000,000). - Set to 0 to disable limit (not recommended for untrusted input). - max_nesting_depth: Maximum placeable nesting depth (default: 100). - Prevents DoS via deeply nested { { { ... } } } structures. - max_expansion_size: Maximum total characters produced during resolution (default: 1,000,000). - Prevents Billion Laughs attacks via exponentially expanding message references. - strict: Fail-fast on formatting errors (default: True). - When True, format_pattern raises FormattingIntegrityError on ANY error - instead of returning fallback values. Set to False only for development - or when soft error recovery is explicitly required. Also affects cache - corruption handling: raises CacheCorruptionError instead of silent eviction. - - Raises: - ValueError: If locale code is empty, structurally invalid, or not - recognized by Babel/CLDR - - Thread Safety: - FluentBundle is always thread-safe using a readers-writer lock (RWLock). - Read operations (format calls) execute concurrently without blocking. - Write operations (add_resource, add_function) acquire exclusive access. - - Example: - >>> from ftllexengine.runtime.cache_config import CacheConfig # doctest: +SKIP - - Using the default registry (standard functions): - >>> bundle = FluentBundle("en") # doctest: +SKIP - - Using a custom registry with additional functions: - >>> from ftllexengine.runtime.functions import create_default_registry # doctest: +SKIP - >>> registry = create_default_registry() # doctest: +SKIP - >>> registry.register(my_custom_func, ftl_name="CUSTOM") # doctest: +SKIP - >>> bundle = FluentBundle("en", functions=registry) # doctest: +SKIP - - Stricter limits for untrusted input: - >>> bundle = FluentBundle("en", max_source_size=100_000, max_nesting_depth=20) # doctest: +SKIP - - Financial-grade default: `strict=True` with a write-once cache: - >>> bundle = FluentBundle("en", cache=CacheConfig(write_once=True)) # doctest: +SKIP - - Audit-enabled cache for compliance: - >>> bundle = FluentBundle("en", cache=CacheConfig(enable_audit=True)) # doctest: +SKIP - """ - # Validate against Babel/CLDR at the public boundary so the bundle never - # advertises one locale while formatting with a different fallback locale. - canonical_locale = require_locale_code(locale, "locale") - locale_context = LocaleContext.create_or_raise(canonical_locale) - self._locale = locale_context.locale_code - self._use_isolating = use_isolating - self._strict = strict - self._messages: dict[str, Message] = {} - self._terms: dict[str, Term] = {} - - # Dependency tracking for cross-resource cycle detection. - # Maps entry ID to set of (type-prefixed) dependencies. - # E.g., {"greeting": {"msg:welcome", "term:brand"}} - self._msg_deps: dict[str, frozenset[str]] = {} - self._term_deps: dict[str, frozenset[str]] = {} - - # Parser security configuration - self._max_source_size = max_source_size if max_source_size is not None else MAX_SOURCE_SIZE - requested_depth = max_nesting_depth if max_nesting_depth is not None else MAX_DEPTH - self._max_nesting_depth = depth_clamp(requested_depth) - self._max_expansion_size = ( - max_expansion_size if max_expansion_size is not None else DEFAULT_MAX_EXPANSION_SIZE - ) - self._parser = FluentParserV1( - max_source_size=self._max_source_size, - max_nesting_depth=self._max_nesting_depth, - ) - - # Thread safety: always enabled via RWLock (readers-writer lock) - self._rwlock = RWLock() - - # Function registry: copy-on-write optimization - if functions is not None: - if not isinstance(functions, FunctionRegistry): - msg = ( # type: ignore[unreachable] - f"functions must be FunctionRegistry, not {type(functions).__name__}. " - "Use create_default_registry() or FunctionRegistry() to create one." - ) - raise TypeError(msg) - self._function_registry = functions.copy() - self._owns_registry = True - else: - self._function_registry = get_shared_registry() - self._owns_registry = False - - # Cache configuration and instance - self._cache_config: CacheConfig | None = cache - self._cache: IntegrityCache | None = None - - if cache is not None: - # The bundle's strict flag gates cache exception propagation: a - # non-strict bundle must never raise CacheCorruptionError from - # format_pattern. When strict=False, corruption is always handled - # by silent eviction regardless of CacheConfig.integrity_strict. - # When strict=True, CacheConfig.integrity_strict is the user's - # explicit fine-grained control (AND-gate: both must be True for - # CacheCorruptionError to propagate). - self._cache = IntegrityCache( - maxsize=cache.size, - max_entry_weight=cache.max_entry_weight, - max_errors_per_entry=cache.max_errors_per_entry, - write_once=cache.write_once, - strict=cache.integrity_strict and strict, - enable_audit=cache.enable_audit, - max_audit_entries=cache.max_audit_entries, - ) - - # Resolver: eagerly created, re-created only when function_registry changes. - # Holds dict references (not copies) so add_resource() mutations are immediately - # visible without re-creation. Initialized here to eliminate the read-lock - # write race that existed in the previous lazy-initialization pattern. - self._resolver: FluentResolver = self._create_resolver() - - logger.info( - "FluentBundle initialized for locale: %s (use_isolating=%s, cache=%s, strict=%s)", - self._locale, - use_isolating, - "enabled" if cache is not None else "disabled", - strict, - ) - - @property - def locale(self) -> LocaleCode: - """Get the canonical locale code for this bundle (read-only). - - Returns: - LocaleCode: Canonical lowercase POSIX locale code (e.g., "en_us", "lv_lv") - - Example: - >>> bundle = FluentBundle("lv_LV") # doctest: +SKIP - >>> bundle.locale # doctest: +SKIP - 'lv_lv' - """ - return self._locale - - @property - def use_isolating(self) -> bool: - """Get whether Unicode bidi isolation is enabled (read-only). - - Returns: - bool: True if bidi isolation is enabled, False otherwise - - Example: - >>> bundle = FluentBundle("ar_EG", use_isolating=True) # doctest: +SKIP - >>> bundle.use_isolating # doctest: +SKIP - True - """ - return self._use_isolating - - @property - def strict(self) -> bool: - """Get whether strict mode is enabled (read-only). - - Strict mode raises FormattingIntegrityError on ANY formatting error - instead of returning fallback values. Essential for financial applications - where silent fallbacks are unacceptable. - - Returns: - bool: True if strict mode is enabled, False otherwise - - Example: - >>> bundle = FluentBundle("en", strict=True) # doctest: +SKIP - >>> bundle.strict # doctest: +SKIP - True - >>> bundle_normal = FluentBundle("en") # doctest: +SKIP - >>> bundle_normal.strict # doctest: +SKIP - True - """ - return self._strict - - @property - def cache_enabled(self) -> bool: - """Get whether format caching is enabled (read-only). - - Returns: - bool: True if caching is enabled, False otherwise - - Example: - >>> from ftllexengine.runtime.cache_config import CacheConfig # doctest: +SKIP - >>> bundle = FluentBundle("en", cache=CacheConfig()) # doctest: +SKIP - >>> bundle.cache_enabled # doctest: +SKIP - True - >>> bundle_no_cache = FluentBundle("en") # doctest: +SKIP - >>> bundle_no_cache.cache_enabled # doctest: +SKIP - False - """ - return self._cache is not None - - @property - def cache_config(self) -> CacheConfig | None: - """Get cache configuration (read-only). - - Returns: - CacheConfig if caching is enabled, None if caching is disabled. - - Example: - >>> from ftllexengine.runtime.cache_config import CacheConfig # doctest: +SKIP - >>> bundle = FluentBundle("en", cache=CacheConfig(size=500)) # doctest: +SKIP - >>> bundle.cache_config.size # doctest: +SKIP - 500 - >>> bundle_no_cache = FluentBundle("en") # doctest: +SKIP - >>> bundle_no_cache.cache_config is None # doctest: +SKIP - True - """ - return self._cache_config - - @property - def cache_usage(self) -> int: - """Get current number of cached format results (read-only). - - Returns: - int: Number of entries currently in cache (0 if caching disabled) - """ - if self._cache is None: - return 0 - return self._cache.size - - @property - def max_source_size(self) -> int: - """Maximum FTL source size in characters (read-only). - - Python measures string length in characters (code points), not bytes. - UTF-8 encoding means 1 character = 1-4 bytes, but this limit counts - characters as returned by len(source). - - Returns: - int: Maximum source size limit for add_resource() - - Example: - >>> bundle = FluentBundle("en", max_source_size=1_000_000) # doctest: +SKIP - >>> bundle.max_source_size # doctest: +SKIP - 1000000 - """ - return self._max_source_size - - @property - def max_nesting_depth(self) -> int: - """Maximum placeable nesting depth (read-only). - - Returns: - int: Maximum nesting depth limit for parser - - Example: - >>> bundle = FluentBundle("en", max_nesting_depth=50) # doctest: +SKIP - >>> bundle.max_nesting_depth # doctest: +SKIP - 50 - """ - return self._max_nesting_depth - - @property - def max_expansion_size(self) -> int: - """Maximum total characters produced during resolution (read-only). - - Returns: - int: Maximum expansion budget for DoS prevention - """ - return self._max_expansion_size - - @property - def function_registry(self) -> FunctionRegistry: - """Get the function registry for this bundle (read-only). - - Provides read access to the registered formatting functions without - requiring access to private attributes. - - Returns: - FunctionRegistry: The function registry for this bundle - - Example: - >>> bundle = FluentBundle("en") # doctest: +SKIP - >>> registry = bundle.function_registry # doctest: +SKIP - >>> "NUMBER" in registry # doctest: +SKIP - True - """ - return self._function_registry - - @classmethod - def for_system_locale( - cls, - *, - use_isolating: bool = True, - cache: CacheConfig | None = None, - functions: FunctionRegistry | None = None, - max_source_size: int | None = None, - max_nesting_depth: int | None = None, - max_expansion_size: int | None = None, - strict: bool = True, - ) -> FluentBundle: - """Factory method to create a FluentBundle using the system locale. - - Detects and uses the current system locale (from locale.getlocale(), - LC_ALL, LC_MESSAGES, or LANG environment variables). - - Args: - use_isolating: Wrap interpolated values in Unicode bidi isolation marks - cache: Cache configuration. Pass ``CacheConfig()`` to enable caching - with defaults, or ``CacheConfig(size=500, ...)`` for custom settings. - ``None`` disables caching (default). - functions: Custom FunctionRegistry to use (default: standard registry). - Copied on construction; later mutations to the original have no effect. - max_source_size: Maximum FTL source size in characters (default: 10,000,000) - max_nesting_depth: Maximum placeable nesting depth (default: 100) - strict: Fail-fast mode (default True): raises on formatting errors. Pass False for soft error recovery. - - Returns: - Configured FluentBundle instance for system locale - - Raises: - RuntimeError: If system locale cannot be determined - - Example: - >>> bundle = FluentBundle.for_system_locale() # doctest: +SKIP - >>> bundle.locale # Returns canonical detected system locale # doctest: +SKIP - 'en_us' - """ - # Delegate to unified locale detection (raises RuntimeError on failure) - system_locale = get_system_locale(raise_on_failure=True) - - return cls( - system_locale, - use_isolating=use_isolating, - cache=cache, - functions=functions, - max_source_size=max_source_size, - max_nesting_depth=max_nesting_depth, - max_expansion_size=max_expansion_size, - strict=strict, - ) - - def __repr__(self) -> str: - """Return string representation for debugging. - - Returns: - String representation showing locale and loaded messages count - - Example: - >>> bundle = FluentBundle("lv_LV") # doctest: +SKIP - >>> repr(bundle) # doctest: +SKIP - "FluentBundle(locale='lv_lv', messages=0, terms=0)" - """ - with self._rwlock.read(): - return ( - f"FluentBundle(locale={self._locale!r}, " - f"messages={len(self._messages)}, " - f"terms={len(self._terms)})" - ) - - def get_babel_locale(self) -> str: - """Get the Babel locale identifier for this bundle (introspection API). - - This is a debugging/introspection method that returns the actual Babel locale - identifier being used for NUMBER(), DATETIME(), and CURRENCY() formatting. - - Useful for troubleshooting locale-related formatting issues, especially when - verifying which CLDR data is being applied. - - Returns: - str: Babel locale identifier (e.g., "en_US", "lv_LV", "ar_EG") - - Example: - >>> bundle = FluentBundle("lv") # doctest: +SKIP - >>> bundle.get_babel_locale() # doctest: +SKIP - 'lv' - >>> bundle_us = FluentBundle("en-US") # doctest: +SKIP - >>> bundle_us.get_babel_locale() # doctest: +SKIP - 'en_US' - - Note: - This creates a LocaleContext temporarily to access Babel locale - information. The return value shows the Babel/CLDR locale, which - may differ in casing from bundle.locale. - - See Also: - - bundle.locale: The canonical LocaleCode stored by FluentBundle - - LocaleContext.babel_locale: The underlying Babel Locale object - """ - ctx = LocaleContext.create_or_raise(self._locale) - return str(ctx.babel_locale) - - def add_resource( - self, source: str, /, *, source_path: str | None = None - ) -> tuple[Junk, ...]: - """Add FTL resource to bundle. - - Parses FTL source and adds messages/terms to registry. - Thread-safe (uses internal RWLock). - - Parse operation occurs outside the write lock to minimize reader - contention. Only registration (dict updates) requires exclusive access. - - Args: - source: FTL file content [positional-only] - source_path: Optional path to source file for better error messages - (e.g., "locales/lv/ui.ftl"). Used as source identifier - in warning messages. Defaults to "" if not provided. - - Returns: - Tuple of Junk entries encountered during parsing. Empty tuple if - parsing succeeded without errors. Each Junk entry contains the - unparseable content and associated annotations. - - Logging: - Syntax errors (Junk entries) are logged at WARNING level regardless - of whether source_path is provided. This ensures syntax errors are - visible whether loading from files, databases, or in-memory strings. - - Note: - Parser continues after errors (robustness principle). Junk entries - are returned for programmatic error handling. - - Raises: - TypeError: If source is not a string (e.g., bytes were passed). - SyntaxIntegrityError: In strict mode only, if parsing produces any - Junk entries. Financial applications using strict=True get - fail-fast behavior on syntax errors. - - Thread Safety: - Parser is stateless and thread-safe. Parse operation can occur - outside write lock without risk. Only registration step requires - exclusive write access. - """ - # Type validation at API boundary - type hints are not enforced at runtime. - # Defensive check: users may pass bytes despite str annotation. - if not isinstance(source, str): - msg = ( # type: ignore[unreachable] - f"source must be str, not {type(source).__name__}. " - "Decode bytes to str (e.g., source.decode('utf-8')) before calling add_resource()." - ) - raise TypeError(msg) - - # Parse outside lock (expensive, but safe - parser is stateless, source is immutable) - resource = self._parser.parse(source) - - # Only hold lock for registration (fast, O(N) where N is entry count) - with self._rwlock.write(): - return self._register_resource(resource, source_path) - - def add_resource_stream( - self, lines: Iterable[str], /, *, source_path: str | None = None - ) -> tuple[Junk, ...]: - """Add FTL resource to bundle from a line-oriented source stream. - - Semantically identical to add_resource() but accepts any iterable of - lines rather than a pre-assembled source string. Memory usage is - proportional to the largest single FTL entry in the stream, not the - total resource size. - - The stream is split at blank-line boundaries (which delimit top-level - FTL entries). Each chunk is parsed independently, then all entries are - committed together via the same two-phase protocol used by add_resource(). - Strict mode, overwrite warnings, cache invalidation, and thread safety - are identical. - - Args: - lines: Iterable of FTL source lines [positional-only]. Trailing - newlines are stripped per line. - source_path: Optional path to source file for better error messages - (e.g., "locales/lv/ui.ftl"). Defaults to "". - - Returns: - Tuple of Junk entries encountered during parsing. Empty tuple if - parsing succeeded without errors. - - Raises: - SyntaxIntegrityError: In strict mode, if any Junk entries are parsed. - - Example: - >>> bundle = FluentBundle("en") # doctest: +SKIP - >>> with open("locales/en/ui.ftl") as f: # doctest: +SKIP - ... bundle.add_resource_stream(f, source_path="locales/en/ui.ftl") - """ - # Collect parsed entries outside lock (stateless parse, immutable input) - collected: list[Entry] = list(self._parser.parse_stream(lines)) - resource = Resource(entries=tuple(collected)) - - with self._rwlock.write(): - return self._register_resource(resource, source_path) - - def validate_resource(self, source: str) -> ValidationResult: - """Validate FTL resource without adding to bundle. - - Use this to check FTL files in CI/tooling before adding them. - Unlike add_resource(), this does not modify the bundle. - - Performs both syntax validation (errors) and semantic validation (warnings): - - Errors: Parse failures (Junk entries) - - Warnings: Duplicate IDs, messages without values, undefined references, - circular dependencies - - Args: - source: FTL file content - - Returns: - ValidationResult with parse errors and semantic warnings - - Raises: - TypeError: If source is not a string (e.g., bytes were passed). - - Example: - >>> bundle = FluentBundle("lv") # doctest: +SKIP - >>> result = bundle.validate_resource(ftl_source) # doctest: +SKIP - >>> if not result.is_valid: # doctest: +SKIP - ... for error in result.errors: - ... print(f"Error [{error.code}]: {error.message}") - >>> if result.warning_count > 0: # doctest: +SKIP - ... for warning in result.warnings: - ... print(f"Warning [{warning.code}]: {warning.message}") - - See Also: - ftllexengine.validation.validate_resource: Standalone validation function - """ - # Type validation at API boundary - type hints are not enforced at runtime. - # Defensive check: users may pass bytes despite str annotation. - if not isinstance(source, str): - msg = ( # type: ignore[unreachable] - f"source must be str, not {type(source).__name__}. " - "Decode bytes to str (e.g., source.decode('utf-8')) before calling validate_resource()." - ) - raise TypeError(msg) - - # Delegate to validation module, reusing bundle's parser for consistency - # Pass existing bundle entries and their dependencies for cross-resource validation - with self._rwlock.read(): - return _validate_resource_impl( - source, - parser=self._parser, - known_messages=frozenset(self._messages.keys()), - known_terms=frozenset(self._terms.keys()), - known_msg_deps=self._msg_deps, - known_term_deps=self._term_deps, - ) - def format_pattern( - self, + self: BundleStateProtocol, message_id: str, /, args: Mapping[str, FluentValue] | None = None, *, attribute: str | None = None, ) -> tuple[str, tuple[FrozenFluentError, ...]]: - """Format message to string with error reporting. - - Mozilla python-fluent aligned API that returns both the formatted - string and any errors encountered during resolution. Thread-safe. - - Args: - message_id: Message identifier [positional-only] - args: Variable arguments for interpolation - attribute: Attribute name (optional, keyword-only) - - Returns: - Tuple of (formatted_string, errors) - - formatted_string: Best-effort formatted output (never empty) - - errors: Tuple of FrozenFluentError instances encountered during resolution (immutable) - - Raises: - FormattingIntegrityError: In strict mode, if ANY error occurs during formatting. - The exception carries the original errors, fallback value, and message ID. - - Note: - In strict mode (default: strict=True), FormattingIntegrityError is raised - immediately when ANY error occurs. This is the default for financial applications - where silent fallbacks are unacceptable. The exception provides: - - fluent_errors: The original FrozenFluentError instances - - fallback_value: What would have been returned in soft mode - - message_id: The message that failed to format - - In soft error mode (strict=False), formatting errors are collected and - returned in the errors tuple. The formatted string always contains a - readable fallback value per Fluent specification. - - If an attribute name is duplicated within a message (validation warning), - the last definition is used during resolution (last-wins semantics). - This matches the Fluent specification and Mozilla reference implementation. - - Examples: - Successful formatting: - >>> result, errors = bundle.format_pattern("hello") # doctest: +SKIP - >>> assert result == 'Sveiki, pasaule!' # doctest: +SKIP - >>> assert errors == () # doctest: +SKIP - - Missing variable returns a fallback plus an error in non-strict mode: - >>> bundle.add_resource('msg = Hello { $name }!') # doctest: +SKIP - >>> result, errors = bundle.format_pattern("msg", {}) # doctest: +SKIP - >>> assert result == 'Hello {$name}!' # Readable fallback # doctest: +SKIP - >>> assert len(errors) == 1 # doctest: +SKIP - >>> assert errors[0].category == ErrorCategory.REFERENCE # doctest: +SKIP - - Attribute access: - >>> result, errors = bundle.format_pattern("button-save", attribute="tooltip") # doctest: +SKIP - >>> assert result == 'Saglabā pašreizējo ierakstu datubāzē' # doctest: +SKIP - >>> assert errors == () # doctest: +SKIP - - Default `strict=True` raises on errors, including missing `$name`: - >>> bundle_strict = FluentBundle("en") # doctest: +SKIP - >>> bundle_strict.add_resource('msg = Hello { $name }!') # doctest: +SKIP - >>> bundle_strict.format_pattern("msg", {}) # Raises FormattingIntegrityError # doctest: +SKIP - """ + """Format one message or attribute to a string.""" with self._rwlock.read(): return self._format_pattern_impl(message_id, args, attribute) - - def add_function(self, name: str, func: Callable[..., FluentValue]) -> None: - """Add custom function to bundle. - - Args: - name: Function name (UPPERCASE by convention) - func: Callable function that returns a FluentValue - - Example: - >>> def CUSTOM(value): # doctest: +SKIP - ... return value.upper() - >>> bundle.add_function("CUSTOM", CUSTOM) # doctest: +SKIP - """ - with self._rwlock.write(): - # Copy-on-write: copy the shared registry on first modification - if not self._owns_registry: - self._function_registry = self._function_registry.copy() - self._owns_registry = True - logger.debug("Registry copied on first add_function") - - self._function_registry.register(func, ftl_name=name) - logger.debug("Added custom function: %s", name) - - # Re-create resolver so it captures the updated function registry - self._resolver = self._create_resolver() - - # Invalidate cache (functions changed) - if self._cache is not None: - self._cache.clear() - logger.debug("Cache cleared after add_function") - - def clear_cache(self) -> None: - """Clear format cache. - - Call this when you want to force cache invalidation. - Automatically called by add_resource() and add_function(). - - Example: - >>> bundle = FluentBundle("en", cache=CacheConfig()) # doctest: +SKIP - >>> bundle.add_resource("msg = Hello") # doctest: +SKIP - >>> bundle.format_pattern("msg") # Caches result # doctest: +SKIP - >>> bundle.clear_cache() # Manual invalidation # doctest: +SKIP - """ - with self._rwlock.write(): - if self._cache is not None: - self._cache.clear() - logger.debug("Cache manually cleared") - - def get_cache_stats(self) -> CacheStats | None: - """Get cache statistics. - - Returns: - CacheStats snapshot, or None if caching is disabled. - All fields are read atomically under the cache lock. - See CacheStats for the complete field specification. - - Example: - >>> bundle = FluentBundle("en", cache=CacheConfig()) # doctest: +SKIP - >>> bundle.add_resource("msg = Hello") # doctest: +SKIP - >>> bundle.format_pattern("msg", {}) # Cache miss # doctest: +SKIP - >>> bundle.format_pattern("msg", {}) # Cache hit # doctest: +SKIP - >>> stats = bundle.get_cache_stats() # doctest: +SKIP - >>> stats["hits"] # doctest: +SKIP - 1 - >>> stats["misses"] # doctest: +SKIP - 1 - >>> isinstance(stats["hit_rate"], float) # doctest: +SKIP - True - """ - if self._cache is not None: - return self._cache.get_stats() - return None - - def get_cache_audit_log(self) -> tuple[CacheAuditLogEntry, ...] | None: - """Get immutable cache audit log entries. - - Returns: - Tuple of cache audit-log entry snapshots, or None if caching is disabled. - Returns an empty tuple when caching is enabled but audit logging is - disabled or no cache operations have been recorded. - """ - if self._cache is not None: - return self._cache.get_audit_log() - return None diff --git a/src/ftllexengine/runtime/bundle_lifecycle.py b/src/ftllexengine/runtime/bundle_lifecycle.py new file mode 100644 index 00000000..28aa75c8 --- /dev/null +++ b/src/ftllexengine/runtime/bundle_lifecycle.py @@ -0,0 +1,201 @@ +"""Lifecycle and configuration helpers for FluentBundle.""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, cast + +from ftllexengine.constants import ( + DEFAULT_MAX_EXPANSION_SIZE, + MAX_DEPTH, + MAX_SOURCE_SIZE, +) +from ftllexengine.core.depth_guard import depth_clamp +from ftllexengine.core.locale_utils import get_system_locale, require_locale_code +from ftllexengine.syntax.parser import FluentParserV1 + +from .cache import IntegrityCache +from .function_bridge import FunctionRegistry +from .functions import get_shared_registry +from .locale_context import LocaleContext +from .rwlock import RWLock + +if TYPE_CHECKING: + from ftllexengine.core.semantic_types import LocaleCode + from ftllexengine.syntax import Message, Term + + from .bundle import FluentBundle + from .bundle_protocols import BundleStateProtocol + from .cache_config import CacheConfig + +logger = logging.getLogger("ftllexengine.runtime.bundle") + + +class _BundleLifecycleMixin: + """Construction, configuration, and identity behavior for FluentBundle.""" + + def __init__( + self: BundleStateProtocol, + locale: str, + /, + *, + use_isolating: bool = True, + cache: CacheConfig | None = None, + functions: FunctionRegistry | None = None, + max_source_size: int | None = None, + max_nesting_depth: int | None = None, + max_expansion_size: int | None = None, + strict: bool = True, + ) -> None: + """Initialize bundle state for one locale.""" + canonical_locale = require_locale_code(locale, "locale") + locale_context = LocaleContext.create_or_raise(canonical_locale) + self._locale = locale_context.locale_code + self._use_isolating = use_isolating + self._strict = strict + self._messages: dict[str, Message] = {} + self._terms: dict[str, Term] = {} + self._msg_deps: dict[str, frozenset[str]] = {} + self._term_deps: dict[str, frozenset[str]] = {} + + self._max_source_size = max_source_size if max_source_size is not None else MAX_SOURCE_SIZE + requested_depth = max_nesting_depth if max_nesting_depth is not None else MAX_DEPTH + self._max_nesting_depth = depth_clamp(requested_depth) + self._max_expansion_size = ( + max_expansion_size if max_expansion_size is not None else DEFAULT_MAX_EXPANSION_SIZE + ) + self._parser = FluentParserV1( + max_source_size=self._max_source_size, + max_nesting_depth=self._max_nesting_depth, + ) + self._rwlock = RWLock() + + provided_functions: object = functions + if provided_functions is not None: + if not isinstance(provided_functions, FunctionRegistry): + msg = ( + f"functions must be FunctionRegistry, not {type(provided_functions).__name__}. " + "Use create_default_registry() or FunctionRegistry() to create one." + ) + raise TypeError(msg) + self._function_registry = provided_functions.copy() + self._owns_registry = True + else: + self._function_registry = get_shared_registry() + self._owns_registry = False + + self._cache_config = cache + self._cache: IntegrityCache | None = None + if cache is not None: + self._cache = IntegrityCache( + maxsize=cache.size, + max_entry_weight=cache.max_entry_weight, + max_errors_per_entry=cache.max_errors_per_entry, + write_once=cache.write_once, + strict=cache.integrity_strict and strict, + enable_audit=cache.enable_audit, + max_audit_entries=cache.max_audit_entries, + ) + + self._resolver = self._create_resolver() + logger.info( + "FluentBundle initialized for locale: %s (use_isolating=%s, cache=%s, strict=%s)", + self._locale, + use_isolating, + "enabled" if cache is not None else "disabled", + strict, + ) + + @property + def locale(self: BundleStateProtocol) -> LocaleCode: + """Get the canonical locale code for this bundle.""" + return self._locale + + @property + def use_isolating(self: BundleStateProtocol) -> bool: + """Get whether Unicode bidi isolation is enabled.""" + return self._use_isolating + + @property + def strict(self: BundleStateProtocol) -> bool: + """Get whether strict mode is enabled.""" + return self._strict + + @property + def cache_enabled(self: BundleStateProtocol) -> bool: + """Get whether format caching is enabled.""" + return self._cache is not None + + @property + def cache_config(self: BundleStateProtocol) -> CacheConfig | None: + """Get cache configuration.""" + return self._cache_config + + @property + def cache_usage(self: BundleStateProtocol) -> int: + """Get current number of cached format results.""" + if self._cache is None: + return 0 + return self._cache.size + + @property + def max_source_size(self: BundleStateProtocol) -> int: + """Maximum FTL source size in characters.""" + return self._max_source_size + + @property + def max_nesting_depth(self: BundleStateProtocol) -> int: + """Maximum placeable nesting depth.""" + return self._max_nesting_depth + + @property + def max_expansion_size(self: BundleStateProtocol) -> int: + """Maximum total characters produced during resolution.""" + return self._max_expansion_size + + @property + def function_registry(self: BundleStateProtocol) -> FunctionRegistry: + """Get the function registry for this bundle.""" + return self._function_registry + + @classmethod + def for_system_locale( + cls, + *, + use_isolating: bool = True, + cache: CacheConfig | None = None, + functions: FunctionRegistry | None = None, + max_source_size: int | None = None, + max_nesting_depth: int | None = None, + max_expansion_size: int | None = None, + strict: bool = True, + ) -> FluentBundle: + """Factory method to create a FluentBundle using the system locale.""" + system_locale = get_system_locale(raise_on_failure=True) + return cast( + "FluentBundle", + cls( + system_locale, + use_isolating=use_isolating, + cache=cache, + functions=functions, + max_source_size=max_source_size, + max_nesting_depth=max_nesting_depth, + max_expansion_size=max_expansion_size, + strict=strict, + ), + ) + + def __repr__(self: BundleStateProtocol) -> str: + """Return string representation for debugging.""" + with self._rwlock.read(): + return ( + f"FluentBundle(locale={self._locale!r}, " + f"messages={len(self._messages)}, " + f"terms={len(self._terms)})" + ) + + def get_babel_locale(self: BundleStateProtocol) -> str: + """Get the Babel locale identifier for this bundle.""" + ctx = LocaleContext.create_or_raise(self._locale) + return str(ctx.babel_locale) diff --git a/src/ftllexengine/runtime/bundle_mutation.py b/src/ftllexengine/runtime/bundle_mutation.py new file mode 100644 index 00000000..1e7f8df5 --- /dev/null +++ b/src/ftllexengine/runtime/bundle_mutation.py @@ -0,0 +1,121 @@ +"""Mutation and cache-management helpers for FluentBundle.""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING + +from ftllexengine.syntax import Resource +from ftllexengine.validation import validate_resource as _validate_resource_impl + +if TYPE_CHECKING: + from collections.abc import Callable, Iterable + + from ftllexengine.core.value_types import FluentValue + from ftllexengine.diagnostics import ValidationResult + from ftllexengine.syntax import Entry, Junk + + from .bundle_protocols import BundleStateProtocol + from .cache import CacheAuditLogEntry, CacheStats + +logger = logging.getLogger("ftllexengine.runtime.bundle") + + +class _BundleMutationMixin: + """Resource mutation, validation, and cache helpers for FluentBundle.""" + + def add_resource( + self: BundleStateProtocol, + source: str, + /, + *, + source_path: str | None = None, + ) -> tuple[Junk, ...]: + """Add FTL resource to bundle.""" + raw_source: object = source + if not isinstance(raw_source, str): + msg = ( + f"source must be str, not {type(raw_source).__name__}. " + "Decode bytes to str (e.g., source.decode('utf-8')) before calling add_resource()." + ) + raise TypeError(msg) + + resource = self._parser.parse(raw_source) + with self._rwlock.write(): + return self._register_resource(resource, source_path) + + def add_resource_stream( + self: BundleStateProtocol, + lines: Iterable[str], + /, + *, + source_path: str | None = None, + ) -> tuple[Junk, ...]: + """Add FTL resource to bundle from a line-oriented source stream.""" + collected: list[Entry] = list(self._parser.parse_stream(lines)) + resource = Resource(entries=tuple(collected)) + + with self._rwlock.write(): + return self._register_resource(resource, source_path) + + def validate_resource(self: BundleStateProtocol, source: str) -> ValidationResult: + """Validate FTL resource without adding to bundle.""" + raw_source: object = source + if not isinstance(raw_source, str): + msg = ( + f"source must be str, not {type(raw_source).__name__}. " + "Decode bytes to str (e.g., source.decode('utf-8')) " + "before calling validate_resource()." + ) + raise TypeError(msg) + + with self._rwlock.read(): + return _validate_resource_impl( + raw_source, + parser=self._parser, + known_messages=frozenset(self._messages.keys()), + known_terms=frozenset(self._terms.keys()), + known_msg_deps=self._msg_deps, + known_term_deps=self._term_deps, + ) + + def add_function( + self: BundleStateProtocol, + name: str, + func: Callable[..., FluentValue], + ) -> None: + """Add custom function to bundle.""" + with self._rwlock.write(): + if not self._owns_registry: + self._function_registry = self._function_registry.copy() + self._owns_registry = True + logger.debug("Registry copied on first add_function") + + self._function_registry.register(func, ftl_name=name) + logger.debug("Added custom function: %s", name) + self._resolver = self._create_resolver() + + if self._cache is not None: + self._cache.clear() + logger.debug("Cache cleared after add_function") + + def clear_cache(self: BundleStateProtocol) -> None: + """Clear format cache.""" + with self._rwlock.write(): + if self._cache is not None: + self._cache.clear() + logger.debug("Cache manually cleared") + + def get_cache_stats(self: BundleStateProtocol) -> CacheStats | None: + """Get cache statistics.""" + if self._cache is not None: + return self._cache.get_stats() + return None + + def get_cache_audit_log( + self: BundleStateProtocol, + ) -> tuple[CacheAuditLogEntry, ...] | None: + """Get immutable cache audit log entries.""" + if self._cache is not None: + return self._cache.get_audit_log() + return None diff --git a/src/ftllexengine/runtime/bundle_protocols.py b/src/ftllexengine/runtime/bundle_protocols.py index e3d9e163..dc73b331 100644 --- a/src/ftllexengine/runtime/bundle_protocols.py +++ b/src/ftllexengine/runtime/bundle_protocols.py @@ -13,22 +13,28 @@ from ftllexengine.diagnostics.codes import DiagnosticCode from ftllexengine.runtime.bundle_registration import _PendingRegistration from ftllexengine.runtime.cache import IntegrityCache + from ftllexengine.runtime.cache_config import CacheConfig from ftllexengine.runtime.function_bridge import FunctionRegistry from ftllexengine.runtime.resolver import FluentResolver from ftllexengine.runtime.rwlock import RWLock - from ftllexengine.syntax import Message, Resource, Term + from ftllexengine.syntax import Junk, Message, Resource, Term + from ftllexengine.syntax.parser import FluentParserV1 class BundleStateProtocol(Protocol): """Structural contract implemented by FluentBundle for its mixins.""" _cache: IntegrityCache | None + _cache_config: CacheConfig | None _function_registry: FunctionRegistry _locale: LocaleCode _max_expansion_size: int _max_nesting_depth: int + _max_source_size: int _messages: dict[str, Message] _msg_deps: dict[str, frozenset[str]] + _owns_registry: bool + _parser: FluentParserV1 _resolver: FluentResolver _rwlock: RWLock _strict: bool @@ -39,6 +45,14 @@ class BundleStateProtocol(Protocol): def _collect_pending_entries(self, resource: Resource) -> _PendingRegistration: ... # pragma: no cover - typing-only protocol declaration + def _register_resource( + self, resource: Resource, source_path: str | None + ) -> tuple[Junk, ...]: + ... # pragma: no cover - typing-only protocol declaration + + def _create_resolver(self) -> FluentResolver: + ... # pragma: no cover - typing-only protocol declaration + def _raise_strict_error( self, message_id: str, @@ -73,3 +87,11 @@ def _lookup_cached_pattern( attribute: str | None, ) -> tuple[str, tuple[FrozenFluentError, ...]] | None: ... # pragma: no cover - typing-only protocol declaration + + def _format_pattern_impl( + self, + message_id: str, + args: Mapping[str, FluentValue] | None, + attribute: str | None, + ) -> tuple[str, tuple[FrozenFluentError, ...]]: + ... # pragma: no cover - typing-only protocol declaration diff --git a/src/ftllexengine/runtime/cache.py b/src/ftllexengine/runtime/cache.py index 3d018be0..a6b7696b 100644 --- a/src/ftllexengine/runtime/cache.py +++ b/src/ftllexengine/runtime/cache.py @@ -30,25 +30,20 @@ from __future__ import annotations -import hashlib import hmac import time from collections import OrderedDict, deque from threading import Lock from typing import TYPE_CHECKING, final -from ftllexengine.constants import DEFAULT_CACHE_SIZE, DEFAULT_MAX_ENTRY_WEIGHT, MAX_DEPTH +from ftllexengine.constants import DEFAULT_CACHE_SIZE, DEFAULT_MAX_ENTRY_WEIGHT from ftllexengine.integrity import ( CacheCorruptionError, IntegrityContext, WriteConflictError, ) -from ftllexengine.runtime.cache_keys import ( - HASHABLE_NODE_BUDGET, - compute_key_hash, - make_hashable, - make_key, -) +from ftllexengine.runtime.cache_audit import _CacheAuditMixin +from ftllexengine.runtime.cache_introspection import _CacheKeyMixin, _CacheStatsMixin from ftllexengine.runtime.cache_types import ( _DEFAULT_MAX_ERRORS_PER_ENTRY, CacheAuditLogEntry, @@ -77,7 +72,7 @@ @final -class IntegrityCache: +class IntegrityCache(_CacheStatsMixin, _CacheAuditMixin, _CacheKeyMixin): """Financial-grade format cache with integrity verification. Thread-safe LRU cache that provides: @@ -448,202 +443,3 @@ def clear(self) -> None: # — cumulative counters for production observability and audit. # Note: sequence NOT reset (monotonic for audit trail) # Note: audit log NOT cleared (historical record) - - def get_stats(self) -> CacheStats: - """Get cache statistics. - - Thread-safe. Returns a consistent snapshot taken under the lock. - All fields are read atomically; calling individual properties (e.g., - .hits, .misses) gives weaker consistency across multiple calls. - - Returns: - CacheStats TypedDict with per-field type precision. - See CacheStats for field documentation. - """ - with self._lock: - total = self._hits + self._misses - hit_rate = (self._hits / total * 100) if total > 0 else 0.0 - - return { - "size": len(self._cache), - "maxsize": self._maxsize, - "max_entry_weight": self._max_entry_weight, - "max_errors_per_entry": self._max_errors_per_entry, - "hits": self._hits, - "misses": self._misses, - "hit_rate": round(hit_rate, 2), - "unhashable_skips": self._unhashable_skips, - "oversize_skips": self._oversize_skips, - "error_bloat_skips": self._error_bloat_skips, - "combined_weight_skips": self._combined_weight_skips, - "corruption_detected": self._corruption_detected, - "idempotent_writes": self._idempotent_writes, - "write_once_conflicts": self._write_once_conflicts, - "sequence": self._sequence, - "write_once": self._write_once, - "strict": self._strict, - "audit_enabled": self._audit_log is not None, - "audit_entries": len(self._audit_log) if self._audit_log is not None else 0, - } - - def get_audit_log(self) -> tuple[WriteLogEntry, ...]: - """Get audit log entries. - - Thread-safe. Returns immutable copy of audit log. - - Returns: - Tuple of WriteLogEntry instances (empty if audit disabled) - """ - with self._lock: - if self._audit_log is None: - return () - return tuple(self._audit_log) - - def _audit( - self, - operation: str, - key: _CacheKey, - entry: IntegrityCacheEntry | None, - ) -> None: - """Record audit log entry (internal, assumes lock held). - - Args: - operation: Operation type (GET, PUT, HIT, MISS, EVICT, CORRUPTION) - key: Cache key - entry: Cache entry (None for MISS operations) - """ - if self._audit_log is None: - return - - # Create privacy-preserving key hash - key_hash = hashlib.blake2b( - str(key).encode("utf-8", errors="surrogatepass"), - digest_size=8, - ).hexdigest() - - log_entry = WriteLogEntry( - operation=operation, - key_hash=key_hash, - timestamp=time.monotonic(), - sequence=entry.sequence if entry is not None else 0, - checksum_hex=entry.checksum.hex() if entry is not None else "", - wall_time_unix=time.time(), - ) - - # deque with maxlen provides automatic O(1) eviction of oldest entries - self._audit_log.append(log_entry) - - # Bound recursive cache-key normalization to prevent DAG expansion abuse. - _MAX_HASHABLE_NODES: int = HASHABLE_NODE_BUDGET - - @staticmethod - def _make_hashable(value: object, depth: int = MAX_DEPTH) -> HashableValue: - """Convert potentially unhashable cache arguments into a stable hashable form.""" - return make_hashable(value, depth=depth) - - @staticmethod - def _compute_key_hash(key: _CacheKey) -> bytes: - """Compute the 8-byte key binding used to detect cache slot confusion.""" - return compute_key_hash(key) - - @staticmethod - def _make_key( - message_id: str, - args: Mapping[str, FluentValue] | None, - attribute: str | None, - locale_code: str, - *, - use_isolating: bool, - ) -> _CacheKey | None: - """Create the immutable lookup key for a formatting request.""" - return make_key( - message_id, - args, - attribute, - locale_code, - use_isolating=use_isolating, - ) - - def __len__(self) -> int: - """Get current cache size. Thread-safe.""" - with self._lock: - return len(self._cache) - - @property - def size(self) -> int: - """Current number of cached entries. Thread-safe.""" - return len(self) - - @property - def maxsize(self) -> int: - """Maximum cache size.""" - return self._maxsize - - @property - def hits(self) -> int: - """Number of cache hits. Thread-safe.""" - with self._lock: - return self._hits - - @property - def misses(self) -> int: - """Number of cache misses. Thread-safe.""" - with self._lock: - return self._misses - - @property - def unhashable_skips(self) -> int: - """Number of operations skipped due to unhashable args. Thread-safe.""" - with self._lock: - return self._unhashable_skips - - @property - def oversize_skips(self) -> int: - """Number of operations skipped due to result weight. Thread-safe.""" - with self._lock: - return self._oversize_skips - - @property - def max_entry_weight(self) -> int: - """Maximum memory weight for cached results.""" - return self._max_entry_weight - - @property - def corruption_detected(self) -> int: - """Number of checksum mismatches detected. Thread-safe.""" - with self._lock: - return self._corruption_detected - - @property - def idempotent_writes(self) -> int: - """Number of benign concurrent writes with identical content. Thread-safe.""" - with self._lock: - return self._idempotent_writes - - @property - def error_bloat_skips(self) -> int: - """Number of puts skipped due to excess error count. Thread-safe.""" - with self._lock: - return self._error_bloat_skips - - @property - def combined_weight_skips(self) -> int: - """Number of puts skipped due to combined formatted+error weight. Thread-safe.""" - with self._lock: - return self._combined_weight_skips - - @property - def write_once_conflicts(self) -> int: - """Number of true write-once conflicts (different content, same key). Thread-safe.""" - with self._lock: - return self._write_once_conflicts - - @property - def write_once(self) -> bool: - """Whether write-once mode is enabled.""" - return self._write_once - - @property - def strict(self) -> bool: - """Whether strict mode is enabled.""" - return self._strict diff --git a/src/ftllexengine/runtime/cache_audit.py b/src/ftllexengine/runtime/cache_audit.py new file mode 100644 index 00000000..53ba7ec7 --- /dev/null +++ b/src/ftllexengine/runtime/cache_audit.py @@ -0,0 +1,48 @@ +"""Audit helpers for IntegrityCache.""" + +from __future__ import annotations + +import hashlib +import time +from typing import TYPE_CHECKING + +from .cache_types import IntegrityCacheEntry, WriteLogEntry, _CacheKey + +if TYPE_CHECKING: + from .cache_protocols import CacheStateProtocol + + +class _CacheAuditMixin: + """Audit-log behavior for IntegrityCache.""" + + def get_audit_log(self: CacheStateProtocol) -> tuple[WriteLogEntry, ...]: + """Get audit log entries.""" + with self._lock: + if self._audit_log is None: + return () + return tuple(self._audit_log) + + def _audit( + self: CacheStateProtocol, + operation: str, + key: _CacheKey, + entry: IntegrityCacheEntry | None, + ) -> None: + """Record audit log entry (internal, assumes lock held).""" + if self._audit_log is None: + return + + key_hash = hashlib.blake2b( + str(key).encode("utf-8", errors="surrogatepass"), + digest_size=8, + ).hexdigest() + + log_entry = WriteLogEntry( + operation=operation, + key_hash=key_hash, + timestamp=time.monotonic(), + sequence=entry.sequence if entry is not None else 0, + checksum_hex=entry.checksum.hex() if entry is not None else "", + wall_time_unix=time.time(), + ) + self._audit_log.append(log_entry) diff --git a/src/ftllexengine/runtime/cache_introspection.py b/src/ftllexengine/runtime/cache_introspection.py new file mode 100644 index 00000000..99dfeab1 --- /dev/null +++ b/src/ftllexengine/runtime/cache_introspection.py @@ -0,0 +1,168 @@ +"""Stats and key-shaping helpers for IntegrityCache.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ftllexengine.constants import MAX_DEPTH + +from .cache_keys import HASHABLE_NODE_BUDGET, compute_key_hash, make_hashable, make_key + +if TYPE_CHECKING: + from collections.abc import Mapping + + from ftllexengine.core.value_types import FluentValue + + from .cache_protocols import CacheStateProtocol + from .cache_types import CacheStats, HashableValue, _CacheKey + + +class _CacheKeyMixin: + """Static key-shaping helpers preserved on IntegrityCache.""" + + _MAX_HASHABLE_NODES: int = HASHABLE_NODE_BUDGET + + @staticmethod + def _make_hashable(value: object, depth: int = MAX_DEPTH) -> HashableValue: + """Convert potentially unhashable cache arguments into a stable hashable form.""" + return make_hashable(value, depth=depth) + + @staticmethod + def _compute_key_hash(key: _CacheKey) -> bytes: + """Compute the 8-byte key binding used to detect cache slot confusion.""" + return compute_key_hash(key) + + @staticmethod + def _make_key( + message_id: str, + args: Mapping[str, FluentValue] | None, + attribute: str | None, + locale_code: str, + *, + use_isolating: bool, + ) -> _CacheKey | None: + """Create the immutable lookup key for a formatting request.""" + return make_key( + message_id, + args, + attribute, + locale_code, + use_isolating=use_isolating, + ) + + +class _CacheStatsMixin: + """Stats and property accessors for IntegrityCache.""" + + def get_stats(self: CacheStateProtocol) -> CacheStats: + """Get cache statistics.""" + with self._lock: + total = self._hits + self._misses + hit_rate = (self._hits / total * 100) if total > 0 else 0.0 + + return { + "size": len(self._cache), + "maxsize": self._maxsize, + "max_entry_weight": self._max_entry_weight, + "max_errors_per_entry": self._max_errors_per_entry, + "hits": self._hits, + "misses": self._misses, + "hit_rate": round(hit_rate, 2), + "unhashable_skips": self._unhashable_skips, + "oversize_skips": self._oversize_skips, + "error_bloat_skips": self._error_bloat_skips, + "combined_weight_skips": self._combined_weight_skips, + "corruption_detected": self._corruption_detected, + "idempotent_writes": self._idempotent_writes, + "write_once_conflicts": self._write_once_conflicts, + "sequence": self._sequence, + "write_once": self._write_once, + "strict": self._strict, + "audit_enabled": self._audit_log is not None, + "audit_entries": len(self._audit_log) if self._audit_log is not None else 0, + } + + def __len__(self: CacheStateProtocol) -> int: + """Get current cache size. Thread-safe.""" + with self._lock: + return len(self._cache) + + @property + def size(self: CacheStateProtocol) -> int: + """Current number of cached entries. Thread-safe.""" + with self._lock: + return len(self._cache) + + @property + def maxsize(self: CacheStateProtocol) -> int: + """Maximum cache size.""" + return self._maxsize + + @property + def hits(self: CacheStateProtocol) -> int: + """Number of cache hits. Thread-safe.""" + with self._lock: + return self._hits + + @property + def misses(self: CacheStateProtocol) -> int: + """Number of cache misses. Thread-safe.""" + with self._lock: + return self._misses + + @property + def unhashable_skips(self: CacheStateProtocol) -> int: + """Number of operations skipped due to unhashable args. Thread-safe.""" + with self._lock: + return self._unhashable_skips + + @property + def oversize_skips(self: CacheStateProtocol) -> int: + """Number of operations skipped due to result weight. Thread-safe.""" + with self._lock: + return self._oversize_skips + + @property + def max_entry_weight(self: CacheStateProtocol) -> int: + """Maximum memory weight for cached results.""" + return self._max_entry_weight + + @property + def corruption_detected(self: CacheStateProtocol) -> int: + """Number of checksum mismatches detected. Thread-safe.""" + with self._lock: + return self._corruption_detected + + @property + def idempotent_writes(self: CacheStateProtocol) -> int: + """Number of benign concurrent writes with identical content. Thread-safe.""" + with self._lock: + return self._idempotent_writes + + @property + def error_bloat_skips(self: CacheStateProtocol) -> int: + """Number of puts skipped due to excess error count. Thread-safe.""" + with self._lock: + return self._error_bloat_skips + + @property + def combined_weight_skips(self: CacheStateProtocol) -> int: + """Number of puts skipped due to combined formatted+error weight. Thread-safe.""" + with self._lock: + return self._combined_weight_skips + + @property + def write_once_conflicts(self: CacheStateProtocol) -> int: + """Number of true write-once conflicts (different content, same key). Thread-safe.""" + with self._lock: + return self._write_once_conflicts + + @property + def write_once(self: CacheStateProtocol) -> bool: + """Whether write-once mode is enabled.""" + return self._write_once + + @property + def strict(self: CacheStateProtocol) -> bool: + """Whether strict mode is enabled.""" + return self._strict diff --git a/src/ftllexengine/runtime/cache_protocols.py b/src/ftllexengine/runtime/cache_protocols.py new file mode 100644 index 00000000..7de3ba63 --- /dev/null +++ b/src/ftllexengine/runtime/cache_protocols.py @@ -0,0 +1,37 @@ +"""Typing protocols for IntegrityCache mixins.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Protocol + +if TYPE_CHECKING: + from collections import OrderedDict, deque + from threading import Lock + + from .cache_types import CacheStats, IntegrityCacheEntry, WriteLogEntry, _CacheKey + + +class CacheStateProtocol(Protocol): + """Structural contract implemented by IntegrityCache.""" + + _audit_log: deque[WriteLogEntry] | None + _cache: OrderedDict[_CacheKey, IntegrityCacheEntry] + _combined_weight_skips: int + _corruption_detected: int + _error_bloat_skips: int + _hits: int + _idempotent_writes: int + _lock: Lock + _max_entry_weight: int + _max_errors_per_entry: int + _maxsize: int + _misses: int + _oversize_skips: int + _sequence: int + _strict: bool + _unhashable_skips: int + _write_once: bool + _write_once_conflicts: int + + def get_stats(self) -> CacheStats: + ... # pragma: no cover - typing-only protocol declaration diff --git a/src/ftllexengine/runtime/function_bridge.py b/src/ftllexengine/runtime/function_bridge.py index 7da1cd7c..b75d2a24 100644 --- a/src/ftllexengine/runtime/function_bridge.py +++ b/src/ftllexengine/runtime/function_bridge.py @@ -1,49 +1,32 @@ -"""Function call bridge between Python and FTL calling conventions. - -Provides a bidirectional mapping layer: - - Python: snake_case parameters (PEP 8) - - FTL: camelCase parameters (JavaScript/ICU heritage) - -This allows Python functions to use Pythonic APIs while maintaining -compatibility with FTL syntax in .ftl files. - -Architecture: - - FunctionRegistry: Manages function registration and calling - - Auto-generates parameter mappings from function signatures - - Converts FTL camelCase args → Python snake_case args at call time - -Example: - # Python function (snake_case): - def number_format(value, *, minimum_fraction_digits=0): - ... - - # FTL file (camelCase): - price = { $amount NUMBER(minimumFractionDigits: 2) } - - # Bridge automatically converts: minimumFractionDigits → minimum_fraction_digits - -Python 3.13+. Zero external dependencies. -""" +"""Function call bridge between Python and FTL calling conventions.""" from __future__ import annotations -from collections.abc import Callable, Iterator, Mapping, Sequence -from functools import wraps -from inspect import Parameter, signature -from typing import TYPE_CHECKING, overload +from typing import TYPE_CHECKING from ftllexengine.core.value_types import FluentNumber, FluentValue from ftllexengine.diagnostics import ErrorCategory, ErrorTemplate, FrozenFluentError -from ftllexengine.runtime.value_types import FluentFunction, FunctionSignature -# Attribute name for marking functions that require locale injection. -# Defined here (the function bridge) because only fluent_function(), @fluent_function -# decorator, and FunctionRegistry.should_inject_locale() read/write this attribute. -# Exported so that runtime/functions.py can access it without importing from value_types. -_FTL_REQUIRES_LOCALE_ATTR: str = "_ftl_requires_locale" +from .function_decorator import ( + _FTL_REQUIRES_LOCALE_ATTR as _DECORATOR_REQUIRES_LOCALE_ATTR, +) +from .function_decorator import ( + fluent_function, +) +from .function_registry_helpers import ( + build_function_signature, + call_registered_function, + to_camel_case, +) +from .function_registry_introspection import ( + _FunctionRegistryIntrospectionMixin, +) +from .value_types import FluentFunction, FunctionSignature if TYPE_CHECKING: - from ftllexengine.runtime.function_metadata import FunctionMetadata + from collections.abc import Callable, Mapping, Sequence + +_FTL_REQUIRES_LOCALE_ATTR = _DECORATOR_REQUIRES_LOCALE_ATTR __all__ = [ "FluentFunction", @@ -55,97 +38,7 @@ def number_format(value, *, minimum_fraction_digits=0): ] -@overload -def fluent_function[F: Callable[..., FluentValue]]( # pragma: no cover - func: F, - *, - inject_locale: bool = False, -) -> F: ... - - -@overload -def fluent_function[F: Callable[..., FluentValue]]( # pragma: no cover - func: None = None, - *, - inject_locale: bool = False, -) -> Callable[[F], F]: ... - - -def fluent_function[F: Callable[..., FluentValue]]( - func: F | None = None, - *, - inject_locale: bool = False, -) -> F | Callable[[F], F]: - """Decorator for marking custom functions with Fluent metadata. - - Use this decorator to configure how your custom function integrates - with the Fluent resolution system. - - Args: - func: The function to decorate (auto-filled when used without parentheses) - inject_locale: If True, the bundle's locale code will be appended as - the final positional argument when the function is called from FTL. - Use this for locale-aware formatting functions. - - Returns: - Decorated function with Fluent metadata attributes set. - - Locale Injection Protocol: - When inject_locale=True, the bundle's locale code is APPENDED after all - positional arguments provided by FTL. For single-argument functions (the - common case for formatting), this effectively makes locale the second - positional argument. - - Expected function signature pattern: - def my_func(value: T, locale_code: str, *, keyword_args...) -> R - - FTL call pattern: - { MY_FUNC($value, kwarg: "x") } -> my_func(value, locale_code, kwarg="x") - - Built-in functions (NUMBER, DATETIME, CURRENCY) follow this pattern and - the resolver validates arity before injection. For custom functions, ensure - your signature matches the expected pattern. - - Example - Simple function (no locale): - >>> @fluent_function # doctest: +SKIP - ... def my_upper(value: str) -> str: - ... return value.upper() - >>> bundle.add_function("MYUPPER", my_upper) # doctest: +SKIP - FTL: `{ MY_UPPER($name) }` - - Example - Locale-aware function: - >>> @fluent_function(inject_locale=True) # doctest: +SKIP - ... def my_format(value: int, locale_code: str) -> str: - ... # Format number according to locale - ... return format_for_locale(value, locale_code) - >>> bundle.add_function("MYFORMAT", my_format) # doctest: +SKIP - FTL: `{ MY_FORMAT($count) }` - Bundle appends locale: `my_format(count_value, "en_US")` - """ - - def decorator(fn: F) -> F: - if inject_locale: - # Only create wrapper when we need to attach the locale marker. - # Wrapping is required because setattr on built-in functions or - # C-level callables may fail without a Python wrapper. - @wraps(fn) - def wrapper(*args: object, **kwargs: object) -> FluentValue: - return fn(*args, **kwargs) - - setattr(wrapper, _FTL_REQUIRES_LOCALE_ATTR, True) - return wrapper # type: ignore[return-value] # wrapper preserves F signature - - # No locale injection: return the original function unchanged, - # avoiding call-dispatch overhead on every invocation. - return fn - - # Handle both @fluent_function and @fluent_function() usage - if func is not None: - return decorator(func) - return decorator - - -class FunctionRegistry: +class FunctionRegistry(_FunctionRegistryIntrospectionMixin): """Manages Python ↔ FTL function calling convention bridge. Provides automatic parameter name conversion: @@ -224,83 +117,12 @@ def register( ) raise TypeError(msg) - # Default FTL name: UPPERCASE version of function name - if ftl_name is None: - ftl_name = getattr(func, "__name__", "unknown").upper() - - # Auto-generate parameter mappings from function signature - try: - sig = signature(func) - except ValueError as e: - # Some callables (certain C functions, mock objects) don't have signatures - msg = ( - f"Cannot register '{ftl_name}': callable has no inspectable signature. " - f"Use param_mapping parameter to provide explicit mappings. Error: {e}" - ) - raise TypeError(msg) from e - - # Validate signature compatibility with locale injection if required - if getattr(func, _FTL_REQUIRES_LOCALE_ATTR, False): - # Count positional-or-keyword parameters that can accept positional arguments - # POSITIONAL_ONLY and POSITIONAL_OR_KEYWORD both accept positional args - positional_capable = [ - p for p in sig.parameters.values() - if p.kind in (Parameter.POSITIONAL_ONLY, Parameter.POSITIONAL_OR_KEYWORD) - and p.name != "self" - ] - # Check if function has VAR_POSITIONAL (*args) which can accept any number of - # positional arguments. A function with *args can receive (value, locale_code). - has_var_positional = any( - p.kind == Parameter.VAR_POSITIONAL for p in sig.parameters.values() - ) - if not has_var_positional and len(positional_capable) < 2: - msg = ( - f"Function '{ftl_name}' marked with inject_locale=True requires " - f"at least 2 positional parameters (value, locale_code), but has " - f"{len(positional_capable)}. Signature: {sig}" - ) - raise TypeError(msg) - - auto_map: dict[str, str] = {} - - for param_name in sig.parameters: - # Skip 'self' and positional-only markers - if param_name in ("self", "/", "*"): - continue - - # Strip leading underscores for FTL name (Python convention for unused/private) - # but keep original param_name for the mapping value - stripped_name = param_name.lstrip("_") - - # Convert Python snake_case → FTL camelCase - camel_case = self._to_camel_case(stripped_name) - - # Detect underscore collision: e.g., both `_value` and `value` map to `value` - if camel_case in auto_map and auto_map[camel_case] != param_name: - msg = ( - f"Parameter name collision in function '{ftl_name}': " - f"'{auto_map[camel_case]}' and '{param_name}' both map to FTL " - f"parameter '{camel_case}'" - ) - raise ValueError(msg) - - auto_map[camel_case] = param_name - - # Merge custom mappings with auto-generated ones - # Custom mappings override auto-generated ones - final_map = {**auto_map, **(param_map or {})} - - # Convert to immutable sorted tuple for safe sharing across registries - # Sorting ensures deterministic ordering for testing and debugging - immutable_mapping = tuple(sorted(final_map.items())) - - # Store function signature - self._functions[ftl_name] = FunctionSignature( - python_name=getattr(func, "__name__", "unknown"), + signature_metadata = build_function_signature( + func, ftl_name=ftl_name, - param_mapping=immutable_mapping, - callable=func, + param_map=param_map, ) + self._functions[signature_metadata.ftl_name] = signature_metadata def call( self, @@ -326,48 +148,16 @@ def call( FrozenFluentError: If function not found (category=REFERENCE) FrozenFluentError: If function execution fails (category=RESOLUTION) """ - # Check if function exists if ftl_name not in self._functions: diag = ErrorTemplate.function_not_found(ftl_name) raise FrozenFluentError(str(diag), ErrorCategory.RESOLUTION, diagnostic=diag) - func_sig = self._functions[ftl_name] - - # Convert FTL camelCase args → Python snake_case args - # Uses cached MappingProxyType for O(1) lookup without per-call dict construction - python_kwargs = {} - for ftl_param, value in named.items(): - python_param = func_sig.param_dict.get(ftl_param, ftl_param) - python_kwargs[python_param] = value - - # Call Python function - # Only catch TypeError and ValueError which typically indicate argument issues: - # - TypeError: Wrong number/types of arguments passed to function - # - ValueError: Function explicitly rejected an argument value - # - # Do NOT catch KeyError, AttributeError, ArithmeticError, etc. These indicate - # bugs in the custom function implementation and should propagate to expose - # the real issue. Swallowing them masks debugging information. - # - # Type safety note: positional is Sequence[FluentValue] but custom functions - # may expect specific types. Type checking is enforced at runtime via - # TypeError, not at compile time. This is intentional for dynamic dispatch. - try: - return func_sig.callable(*positional, **python_kwargs) - except (TypeError, ValueError) as e: - diag = ErrorTemplate.function_failed(ftl_name, str(e)) - raise FrozenFluentError(str(diag), ErrorCategory.RESOLUTION, diagnostic=diag) from e - - def has_function(self, ftl_name: str) -> bool: - """Check if function is registered. - - Args: - ftl_name: Function name from FTL - - Returns: - True if function is registered - """ - return ftl_name in self._functions + return call_registered_function( + self._functions[ftl_name], + ftl_name=ftl_name, + positional=positional, + named=named, + ) def freeze(self) -> None: """Freeze registry to prevent further modifications. @@ -390,270 +180,13 @@ def frozen(self) -> bool: """ return self._frozen - def get_python_name(self, ftl_name: str) -> str | None: - """Get Python function name for FTL function. - - Args: - ftl_name: Function name from FTL - - Returns: - Python function name, or None if not found - """ - sig = self._functions.get(ftl_name) - return sig.python_name if sig else None - - def list_functions(self) -> list[str]: - """List all registered function names (FTL names). - - Returns: - List of FTL function names (e.g., ["NUMBER", "DATETIME", "CURRENCY"]) - - Example: - >>> registry = FunctionRegistry() # doctest: +SKIP - >>> registry.register(lambda x: str(x), ftl_name="CUSTOM") # doctest: +SKIP - >>> registry.list_functions() # doctest: +SKIP - ['CUSTOM'] - """ - return list(self._functions.keys()) - - def get_function_info(self, ftl_name: str) -> FunctionSignature | None: - """Get function metadata by FTL name. - - Args: - ftl_name: Function name from FTL (e.g., "NUMBER") - - Returns: - FunctionSignature with metadata, or None if not found - - Example: - >>> registry = FunctionRegistry() # doctest: +SKIP - >>> def my_func(value, *, min_digits=0): return str(value) # doctest: +SKIP - >>> registry.register(my_func, ftl_name="MYFUNC") # doctest: +SKIP - >>> info = registry.get_function_info("MYFUNC") # doctest: +SKIP - >>> info.python_name # doctest: +SKIP - 'my_func' - >>> info.ftl_name # doctest: +SKIP - 'MYFUNC' - """ - return self._functions.get(ftl_name) - - def get_callable(self, ftl_name: str) -> Callable[..., FluentValue] | None: - """Get the underlying callable for a registered function. - - Public API for accessing function callables without exposing internal - storage. Use this instead of accessing _functions directly. - - Args: - ftl_name: Function name from FTL (e.g., "NUMBER") - - Returns: - The registered callable, or None if function not found - - Example: - >>> registry = FunctionRegistry() # doctest: +SKIP - >>> def my_func(value): return str(value) # doctest: +SKIP - >>> registry.register(my_func, ftl_name="MYFUNC") # doctest: +SKIP - >>> callable_func = registry.get_callable("MYFUNC") # doctest: +SKIP - >>> callable_func is my_func # doctest: +SKIP - True - """ - sig = self._functions.get(ftl_name) - return sig.callable if sig else None - - def __iter__(self) -> Iterator[str]: - """Iterate over FTL function names. - - Returns: - Iterator over FTL function names - - Example: - >>> registry = FunctionRegistry() # doctest: +SKIP - >>> registry.register(lambda x: str(x), ftl_name="FUNC1") # doctest: +SKIP - >>> registry.register(lambda x: str(x), ftl_name="FUNC2") # doctest: +SKIP - >>> for name in registry: # doctest: +SKIP - ... print(name) - FUNC1 - FUNC2 - """ - return iter(self._functions) - - def __len__(self) -> int: - """Count of registered functions. - - Returns: - Number of registered functions - - Example: - >>> registry = FunctionRegistry() # doctest: +SKIP - >>> len(registry) # doctest: +SKIP - 0 - >>> registry.register(lambda x: str(x), ftl_name="FUNC") # doctest: +SKIP - >>> len(registry) # doctest: +SKIP - 1 - """ - return len(self._functions) - - def __contains__(self, ftl_name: str) -> bool: - """Check if function is registered using 'in' operator. - - Args: - ftl_name: Function name from FTL - - Returns: - True if function is registered - - Example: - >>> registry = FunctionRegistry() # doctest: +SKIP - >>> registry.register(lambda x: str(x), ftl_name="CUSTOM") # doctest: +SKIP - >>> "CUSTOM" in registry # doctest: +SKIP - True - >>> "MISSING" in registry # doctest: +SKIP - False - """ - return ftl_name in self._functions - - def __repr__(self) -> str: - """Return string representation for debugging. - - Returns: - String representation showing registered functions - - Example: - >>> registry = FunctionRegistry() # doctest: +SKIP - >>> repr(registry) # doctest: +SKIP - 'FunctionRegistry(functions=0)' - """ - return f"FunctionRegistry(functions={len(self._functions)})" - def copy(self) -> FunctionRegistry: - """Create an unfrozen copy of this registry. - - Returns: - New FunctionRegistry instance with the same functions. - The copy is always unfrozen, even if the original was frozen. - - Note: - FunctionSignature objects are shared between the original and - copy, but this is safe because FunctionSignature is fully - immutable (frozen dataclass with immutable tuple for param_mapping). - Modifications to the registry (adding/removing functions) in - either copy won't affect the other. - - Example: - >>> frozen_registry = get_shared_registry() # Frozen # doctest: +SKIP - >>> my_registry = frozen_registry.copy() # Unfrozen copy # doctest: +SKIP - >>> my_registry.register(my_custom_func) # Works! # doctest: +SKIP - """ + """Create an unfrozen copy of this registry.""" new_registry = FunctionRegistry() new_registry._functions = self._functions.copy() - # Note: _frozen is already False from __init__, copy is always unfrozen return new_registry - def should_inject_locale(self, ftl_name: str) -> bool: - """Check if locale should be injected for this function call. - - This is the canonical way to check locale injection requirements. - It checks the callable's _ftl_requires_locale attribute, which is - set by the @fluent_function decorator or _mark_locale_required(). - - Args: - ftl_name: FTL function name (e.g., "NUMBER", "CURRENCY") - - Returns: - True if locale should be injected, False otherwise. - - Logic: - 1. Check if function exists in registry - 2. Get the callable and check its _ftl_requires_locale attribute - 3. Only inject if the callable has the marker set to True - - Example: - >>> registry = FunctionRegistry() # doctest: +SKIP - >>> @fluent_function(inject_locale=True) # doctest: +SKIP - ... def my_format(value, locale_code): return str(value) - >>> registry.register(my_format, ftl_name="MYFORMAT") # doctest: +SKIP - >>> registry.should_inject_locale("MYFORMAT") # doctest: +SKIP - True - """ - if ftl_name not in self._functions: - return False - - callable_func = self._functions[ftl_name].callable - return getattr(callable_func, _FTL_REQUIRES_LOCALE_ATTR, False) is True - - def get_expected_positional_args(self, ftl_name: str) -> int | None: - """Get expected positional argument count for a built-in function. - - Used for arity validation before locale injection to prevent - TypeError from incorrect argument positioning. - - For custom functions (not in BUILTIN_FUNCTIONS), returns None - and the registry allows any number of positional arguments. - - Args: - ftl_name: FTL function name (e.g., "NUMBER", "CURRENCY") - - Returns: - Expected positional arg count (from FTL, before locale injection), - or None if not a built-in function with known arity. - - Example: - >>> registry = create_default_registry() # doctest: +SKIP - >>> registry.get_expected_positional_args("NUMBER") # doctest: +SKIP - 1 - >>> registry.get_expected_positional_args("CUSTOM") # doctest: +SKIP - None - """ - # Lazy import to avoid circular dependency at module load time - from ftllexengine.runtime.function_metadata import ( # noqa: PLC0415 - circular - BUILTIN_FUNCTIONS, - ) - - metadata = BUILTIN_FUNCTIONS.get(ftl_name) - return metadata.expected_positional_args if metadata else None - - def get_builtin_metadata(self, ftl_name: str) -> FunctionMetadata | None: - """Get metadata for a built-in function. - - Args: - ftl_name: FTL function name (e.g., "NUMBER", "DATETIME") - - Returns: - FunctionMetadata for built-in functions, None for custom functions. - - Example: - >>> registry = create_default_registry() # doctest: +SKIP - >>> meta = registry.get_builtin_metadata("NUMBER") # doctest: +SKIP - >>> meta.requires_locale # doctest: +SKIP - True - """ - # Lazy import to avoid circular dependency at module load time - from ftllexengine.runtime.function_metadata import ( # noqa: PLC0415 - circular - BUILTIN_FUNCTIONS, - ) - - return BUILTIN_FUNCTIONS.get(ftl_name) - @staticmethod def _to_camel_case(snake_case: str) -> str: - """Convert Python snake_case to FTL camelCase. - - Args: - snake_case: Python parameter name (e.g., "minimum_fraction_digits") - - Returns: - FTL parameter name (e.g., "minimumFractionDigits") - - Examples: - >>> FunctionRegistry._to_camel_case("minimum_fraction_digits") # doctest: +SKIP - 'minimumFractionDigits' - >>> FunctionRegistry._to_camel_case("use_grouping") # doctest: +SKIP - 'useGrouping' - >>> FunctionRegistry._to_camel_case("value") # doctest: +SKIP - 'value' - """ - # Split on underscores - components = snake_case.split("_") - - # First component stays lowercase, rest are capitalized - return components[0] + "".join(comp.capitalize() for comp in components[1:]) + """Convert Python snake_case to FTL camelCase.""" + return to_camel_case(snake_case) diff --git a/src/ftllexengine/runtime/function_decorator.py b/src/ftllexengine/runtime/function_decorator.py new file mode 100644 index 00000000..48b592db --- /dev/null +++ b/src/ftllexengine/runtime/function_decorator.py @@ -0,0 +1,52 @@ +"""Decorator helpers for Fluent-callable functions.""" + +from __future__ import annotations + +from collections.abc import Callable +from functools import wraps +from typing import overload + +from ftllexengine.core.value_types import FluentValue + +_FTL_REQUIRES_LOCALE_ATTR: str = "_ftl_requires_locale" + +__all__ = ["_FTL_REQUIRES_LOCALE_ATTR", "fluent_function"] + + +@overload +def fluent_function[F: Callable[..., FluentValue]]( # pragma: no cover + func: F, + *, + inject_locale: bool = False, +) -> F: ... + + +@overload +def fluent_function[F: Callable[..., FluentValue]]( # pragma: no cover + func: None = None, + *, + inject_locale: bool = False, +) -> Callable[[F], F]: ... + + +def fluent_function[F: Callable[..., FluentValue]]( + func: F | None = None, + *, + inject_locale: bool = False, +) -> F | Callable[[F], F]: + """Decorator for marking custom functions with Fluent metadata.""" + + def decorator(fn: F) -> F: + if inject_locale: + @wraps(fn) + def wrapper(*args: object, **kwargs: object) -> FluentValue: + return fn(*args, **kwargs) + + setattr(wrapper, _FTL_REQUIRES_LOCALE_ATTR, True) + return wrapper # type: ignore[return-value] + + return fn + + if func is not None: + return decorator(func) + return decorator diff --git a/src/ftllexengine/runtime/function_registry_helpers.py b/src/ftllexengine/runtime/function_registry_helpers.py new file mode 100644 index 00000000..f6e94570 --- /dev/null +++ b/src/ftllexengine/runtime/function_registry_helpers.py @@ -0,0 +1,110 @@ +"""Helper functions for FunctionRegistry registration and dispatch.""" + +from __future__ import annotations + +from inspect import Parameter, signature +from typing import TYPE_CHECKING + +from ftllexengine.diagnostics import ErrorCategory, ErrorTemplate, FrozenFluentError + +from .function_decorator import _FTL_REQUIRES_LOCALE_ATTR +from .value_types import FunctionSignature + +__all__ = ["build_function_signature", "call_registered_function", "to_camel_case"] + +if TYPE_CHECKING: + from collections.abc import Callable, Mapping, Sequence + + from ftllexengine.core.value_types import FluentValue + + +def to_camel_case(snake_case: str) -> str: + """Convert Python snake_case to FTL camelCase.""" + components = snake_case.split("_") + return components[0] + "".join(comp.capitalize() for comp in components[1:]) + + +def build_function_signature( + func: Callable[..., FluentValue], + *, + ftl_name: str | None = None, + param_map: dict[str, str] | None = None, +) -> FunctionSignature: + """Build immutable registration metadata for one callable.""" + if ftl_name is None: + ftl_name = getattr(func, "__name__", "unknown").upper() + + try: + sig = signature(func) + except ValueError as e: + msg = ( + f"Cannot register '{ftl_name}': callable has no inspectable signature. " + f"Use param_mapping parameter to provide explicit mappings. Error: {e}" + ) + raise TypeError(msg) from e + + if getattr(func, _FTL_REQUIRES_LOCALE_ATTR, False): + positional_capable = [ + p + for p in sig.parameters.values() + if p.kind in (Parameter.POSITIONAL_ONLY, Parameter.POSITIONAL_OR_KEYWORD) + and p.name != "self" + ] + has_var_positional = any( + p.kind == Parameter.VAR_POSITIONAL for p in sig.parameters.values() + ) + if not has_var_positional and len(positional_capable) < 2: + msg = ( + f"Function '{ftl_name}' marked with inject_locale=True requires " + f"at least 2 positional parameters (value, locale_code), but has " + f"{len(positional_capable)}. Signature: {sig}" + ) + raise TypeError(msg) + + auto_map: dict[str, str] = {} + for param_name in sig.parameters: + if param_name in ("self", "/", "*"): + continue + + stripped_name = param_name.lstrip("_") + camel_case = to_camel_case(stripped_name) + + if camel_case in auto_map and auto_map[camel_case] != param_name: + msg = ( + f"Parameter name collision in function '{ftl_name}': " + f"'{auto_map[camel_case]}' and '{param_name}' both map to FTL " + f"parameter '{camel_case}'" + ) + raise ValueError(msg) + + auto_map[camel_case] = param_name + + final_map = {**auto_map, **(param_map or {})} + immutable_mapping = tuple(sorted(final_map.items())) + + return FunctionSignature( + python_name=getattr(func, "__name__", "unknown"), + ftl_name=ftl_name, + param_mapping=immutable_mapping, + callable=func, + ) + + +def call_registered_function( + func_sig: FunctionSignature, + *, + ftl_name: str, + positional: Sequence[FluentValue], + named: Mapping[str, FluentValue], +) -> FluentValue: + """Call a registered function signature with FTL-style named arguments.""" + python_kwargs = {} + for ftl_param, value in named.items(): + python_param = func_sig.param_dict.get(ftl_param, ftl_param) + python_kwargs[python_param] = value + + try: + return func_sig.callable(*positional, **python_kwargs) + except (TypeError, ValueError) as e: + diag = ErrorTemplate.function_failed(ftl_name, str(e)) + raise FrozenFluentError(str(diag), ErrorCategory.RESOLUTION, diagnostic=diag) from e diff --git a/src/ftllexengine/runtime/function_registry_introspection.py b/src/ftllexengine/runtime/function_registry_introspection.py new file mode 100644 index 00000000..cc6da725 --- /dev/null +++ b/src/ftllexengine/runtime/function_registry_introspection.py @@ -0,0 +1,88 @@ +"""Introspection mixin for FunctionRegistry.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Protocol + +from .function_decorator import _FTL_REQUIRES_LOCALE_ATTR +from .function_metadata import BUILTIN_FUNCTIONS, FunctionMetadata + +if TYPE_CHECKING: + from collections.abc import Callable, Iterator + + from ftllexengine.core.value_types import FluentValue + + from .value_types import FunctionSignature + + +class _FunctionRegistryState(Protocol): + """Structural contract implemented by FunctionRegistry.""" + + _functions: dict[str, FunctionSignature] + + +class _FunctionRegistryIntrospectionMixin: + """Read-only and copy helpers for FunctionRegistry.""" + + def has_function(self: _FunctionRegistryState, ftl_name: str) -> bool: + """Check if function is registered.""" + return ftl_name in self._functions + + def get_python_name(self: _FunctionRegistryState, ftl_name: str) -> str | None: + """Get Python function name for FTL function.""" + sig = self._functions.get(ftl_name) + return sig.python_name if sig else None + + def list_functions(self: _FunctionRegistryState) -> list[str]: + """List all registered FTL function names.""" + return list(self._functions.keys()) + + def get_function_info( + self: _FunctionRegistryState, ftl_name: str + ) -> FunctionSignature | None: + """Get function metadata by FTL name.""" + return self._functions.get(ftl_name) + + def get_callable( + self: _FunctionRegistryState, ftl_name: str + ) -> Callable[..., FluentValue] | None: + """Get the underlying callable for a registered function.""" + sig = self._functions.get(ftl_name) + return sig.callable if sig else None + + def __iter__(self: _FunctionRegistryState) -> Iterator[str]: + """Iterate over registered FTL function names.""" + return iter(self._functions) + + def __len__(self: _FunctionRegistryState) -> int: + """Count registered functions.""" + return len(self._functions) + + def __contains__(self: _FunctionRegistryState, ftl_name: str) -> bool: + """Check if function is registered using the ``in`` operator.""" + return ftl_name in self._functions + + def __repr__(self: _FunctionRegistryState) -> str: + """Return string representation for debugging.""" + return f"FunctionRegistry(functions={len(self._functions)})" + + def should_inject_locale(self: _FunctionRegistryState, ftl_name: str) -> bool: + """Check if locale should be injected for this function call.""" + if ftl_name not in self._functions: + return False + + callable_func = self._functions[ftl_name].callable + return getattr(callable_func, _FTL_REQUIRES_LOCALE_ATTR, False) is True + + def get_expected_positional_args( + self: _FunctionRegistryState, ftl_name: str + ) -> int | None: + """Get expected positional argument count for a built-in function.""" + metadata = BUILTIN_FUNCTIONS.get(ftl_name) + return metadata.expected_positional_args if metadata else None + + def get_builtin_metadata( + self: _FunctionRegistryState, ftl_name: str + ) -> FunctionMetadata | None: + """Get metadata for a built-in function.""" + return BUILTIN_FUNCTIONS.get(ftl_name) diff --git a/src/ftllexengine/runtime/functions.py b/src/ftllexengine/runtime/functions.py index f7fa11f5..74283254 100644 --- a/src/ftllexengine/runtime/functions.py +++ b/src/ftllexengine/runtime/functions.py @@ -43,7 +43,8 @@ _make_fluent_number, ) -from .function_bridge import _FTL_REQUIRES_LOCALE_ATTR, FunctionRegistry +from .function_bridge import FunctionRegistry +from .function_decorator import _FTL_REQUIRES_LOCALE_ATTR from .locale_context import LocaleContext __all__ = ["create_default_registry", "get_shared_registry"] @@ -389,7 +390,7 @@ def currency_format( # Mark built-in functions that require locale injection. # This attribute is checked by FunctionRegistry.should_inject_locale() to determine # whether to append the bundle's locale to the function call arguments. -# The constant _FTL_REQUIRES_LOCALE_ATTR is imported from function_bridge.py +# The constant _FTL_REQUIRES_LOCALE_ATTR is imported from function_decorator.py # to ensure a single source of truth. diff --git a/src/ftllexengine/runtime/resolver_runtime.py b/src/ftllexengine/runtime/resolver_runtime.py index 1b952136..9868c13b 100644 --- a/src/ftllexengine/runtime/resolver_runtime.py +++ b/src/ftllexengine/runtime/resolver_runtime.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio import logging from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING @@ -106,6 +107,8 @@ def _call_function_safe( return self._function_registry.call(func_name, positional, named) except FrozenFluentError: raise + except asyncio.CancelledError: + raise except Exception as error: # noqa: BLE001 - function adapters may raise arbitrary user exceptions logger.warning( "Custom function %s raised %s: %s", diff --git a/src/ftllexengine/syntax/visitor.py b/src/ftllexengine/syntax/visitor.py index 93a5b069..d4ceb442 100644 --- a/src/ftllexengine/syntax/visitor.py +++ b/src/ftllexengine/syntax/visitor.py @@ -16,7 +16,8 @@ """ from dataclasses import Field, fields, replace -from typing import ClassVar, cast +from functools import cache +from typing import Any, ClassVar, cast from ftllexengine.constants import MAX_DEPTH from ftllexengine.core.depth_guard import DepthGuard @@ -60,6 +61,12 @@ type TransformerResult = ASTNode | None | list[ASTNode] +@cache +def _cached_node_fields(node_type: type[object]) -> tuple[Field[object], ...]: + """Cache dataclass field lookups with interpreter-managed locking.""" + return fields(cast("type[ASTNode]", node_type)) + + class ASTVisitor[T = ASTNode]: """Base visitor for traversing Fluent AST. @@ -106,10 +113,6 @@ class ASTVisitor[T = ASTNode]: # that require gc cycle collection. _class_visit_methods: ClassVar[dict[str, str]] = {} - # Class-level cache for dataclass fields per node type - # Avoids repeated introspection in generic_visit (PERF-VISITOR-002) - _fields_cache: ClassVar[dict[type[ASTNode], tuple[Field[object], ...]]] = {} - def __init_subclass__(cls, **kwargs: object) -> None: """Build class-level dispatch table when subclass is defined.""" super().__init_subclass__(**kwargs) @@ -172,14 +175,14 @@ def visit(self, node: ASTNode) -> T: node_type_name = type(node).__name__ method_name = self._class_visit_methods.get(node_type_name) if method_name is not None: - return getattr(self, method_name)(node) # type: ignore[no-any-return] + return cast("T", getattr(self, method_name)(node)) return self.generic_visit(node) def _get_node_fields(self, node_type: type[ASTNode]) -> tuple[Field[object], ...]: """Get cached dataclass fields for a node type. - Uses class-level cache to avoid repeated introspection. - Thread-safe: dict operations are atomic in CPython. + Uses an ``lru_cache``-backed helper to avoid repeated introspection + without relying on CPython dict atomicity for correctness. Args: node_type: The AST node type to get fields for @@ -187,9 +190,7 @@ def _get_node_fields(self, node_type: type[ASTNode]) -> tuple[Field[object], ... Returns: Tuple of dataclass Field objects """ - if node_type not in ASTVisitor._fields_cache: - ASTVisitor._fields_cache[node_type] = fields(node_type) - return ASTVisitor._fields_cache[node_type] + return _cached_node_fields(cast("Any", node_type)) def generic_visit(self, node: ASTNode) -> T: """Default visitor that traverses all child nodes. diff --git a/src/ftllexengine/validation/resource.py b/src/ftllexengine/validation/resource.py index f06edab3..27a9cd53 100644 --- a/src/ftllexengine/validation/resource.py +++ b/src/ftllexengine/validation/resource.py @@ -10,7 +10,7 @@ - _collect_entries(): Pass 2 - Collect messages/terms, check duplicates - _check_undefined_references(): Pass 3 - Validate message/term references - _detect_circular_references(): Pass 4 - Check for reference cycles - - _detect_long_chains(): Pass 5 - Check for chains exceeding MAX_DEPTH + - detect_long_chains(): Pass 5 - Check for chains exceeding MAX_DEPTH - SemanticValidator: Pass 6 - Fluent spec compliance Python 3.13+. @@ -35,9 +35,6 @@ from ftllexengine.syntax.cursor import LineOffsetCache from ftllexengine.syntax.reference_extraction import extract_references from ftllexengine.syntax.validator import SemanticValidator -from ftllexengine.validation.resource_graph import ( - _compute_longest_paths as _compute_longest_paths_impl, -) from ftllexengine.validation.resource_graph import ( build_dependency_graph, detect_long_chains, @@ -53,11 +50,6 @@ __all__ = ["validate_resource"] -# Backward-compatible private re-exports for existing tests and internal callers. -_build_dependency_graph = build_dependency_graph -_compute_longest_paths = _compute_longest_paths_impl -_detect_long_chains = detect_long_chains - logger = logging.getLogger(__name__) diff --git a/tests/fuzz/test_validation_resource_property.py b/tests/fuzz/test_validation_resource_property.py index 0135a800..658d06e3 100644 --- a/tests/fuzz/test_validation_resource_property.py +++ b/tests/fuzz/test_validation_resource_property.py @@ -23,10 +23,8 @@ from ftllexengine.diagnostics import ValidationResult, WarningSeverity from ftllexengine.diagnostics.codes import DiagnosticCode -from ftllexengine.validation.resource import ( - _compute_longest_paths, - validate_resource, -) +from ftllexengine.validation.resource import validate_resource +from ftllexengine.validation.resource_graph import _compute_longest_paths from tests.strategies import ( dependency_graphs, ftl_simple_messages, diff --git a/tests/strategies/validation.py b/tests/strategies/validation.py index d02ebc37..cc198416 100644 --- a/tests/strategies/validation.py +++ b/tests/strategies/validation.py @@ -461,7 +461,7 @@ def validation_dependency_graphs( """Generate typed dependency graphs for _compute_longest_paths testing. Generates adjacency lists with msg:/term: prefixed node names, mirroring - the output of _build_dependency_graph(). Covers msg-only, term-only, + the output of build_dependency_graph(). Covers msg-only, term-only, mixed-namespace, and empty graphs. Args: diff --git a/tests/test_architecture_contract.py b/tests/test_architecture_contract.py index e144123c..95be89e9 100644 --- a/tests/test_architecture_contract.py +++ b/tests/test_architecture_contract.py @@ -38,17 +38,29 @@ VERSION_PROVENANCE_PATTERN = re.compile(r"\b(?:Added|Pre|Post|Prior to)\s+v\d+\.\d+\.\d+\b|v\d+\.\d+\.\d+\+") FILE_LINE_BUDGETS = { - "src/ftllexengine/runtime/bundle.py": 900, - "src/ftllexengine/runtime/cache.py": 700, + "src/ftllexengine/runtime/bundle.py": 120, + "src/ftllexengine/runtime/bundle_lifecycle.py": 260, + "src/ftllexengine/runtime/bundle_mutation.py": 180, + "src/ftllexengine/runtime/cache.py": 500, + "src/ftllexengine/runtime/cache_audit.py": 80, + "src/ftllexengine/runtime/cache_introspection.py": 220, + "src/ftllexengine/runtime/cache_protocols.py": 80, "src/ftllexengine/runtime/locale_context.py": 500, "src/ftllexengine/runtime/locale_formatting.py": 400, "src/ftllexengine/runtime/resolver.py": 600, + "src/ftllexengine/runtime/function_bridge.py": 250, + "src/ftllexengine/runtime/function_decorator.py": 80, + "src/ftllexengine/runtime/function_registry_helpers.py": 160, + "src/ftllexengine/runtime/function_registry_introspection.py": 140, "src/ftllexengine/introspection/iso.py": 200, "src/ftllexengine/localization/orchestrator.py": 400, "src/ftllexengine/parsing/currency.py": 650, "src/ftllexengine/parsing/dates.py": 350, "src/ftllexengine/syntax/serializer.py": 700, - "src/ftllexengine/diagnostics/templates.py": 800, + "src/ftllexengine/diagnostics/templates.py": 80, + "src/ftllexengine/diagnostics/template_reference.py": 220, + "src/ftllexengine/diagnostics/template_runtime.py": 190, + "src/ftllexengine/diagnostics/template_parsing.py": 150, "src/ftllexengine/syntax/visitor.py": 750, "src/ftllexengine/syntax/cursor.py": 700, "tests/test_runtime_bundle_property_core.py": 800, diff --git a/tests/test_core_init.py b/tests/test_core_init.py index 0510e1bc..fd6e41e4 100644 --- a/tests/test_core_init.py +++ b/tests/test_core_init.py @@ -1,8 +1,4 @@ -"""Tests for ftllexengine.core package __getattr__ lazy-loading. - -Covers lines 85-86: AttributeError raised when a non-lazy attribute is -accessed on the core module (i.e., name not in _LAZY_DEPTH_GUARD). -""" +"""Tests for ftllexengine.core package re-exports.""" from __future__ import annotations @@ -11,29 +7,22 @@ import ftllexengine.core as core_module -class TestCoreModuleGetattr: - """Tests for core/__init__.py lazy-loading via __getattr__.""" +class TestCoreModuleExports: + """Tests for core/__init__.py stable re-exports.""" def test_unknown_attribute_raises_attribute_error(self) -> None: """Accessing a non-existent attribute on ftllexengine.core raises AttributeError. - Covers lines 85-86: the branch where name is not in _LAZY_DEPTH_GUARD. - The module __getattr__ falls through to raise AttributeError with - the standard module-attribute error message. + Unknown names should still raise the standard module-attribute error. """ + missing_name = "nonexistent_attribute" with pytest.raises(AttributeError, match="has no attribute"): - _ = core_module.nonexistent_attribute - - def test_depth_guard_lazy_loads_successfully(self) -> None: - """DepthGuard is lazily loaded from ftllexengine.core without error. + _ = getattr(core_module, missing_name) - Covers lines 77-84: the lazy-load branch for DepthGuard. - """ + def test_depth_guard_is_reexported(self) -> None: + """DepthGuard is re-exported from ftllexengine.core.""" assert core_module.DepthGuard is not None - def test_depth_clamp_lazy_loads_successfully(self) -> None: - """depth_clamp is lazily loaded from ftllexengine.core without error. - - Covers lines 77-84: the lazy-load branch for depth_clamp. - """ + def test_depth_clamp_is_reexported(self) -> None: + """depth_clamp is re-exported from ftllexengine.core.""" assert callable(core_module.depth_clamp) diff --git a/tests/test_documentation_tooling.py b/tests/test_documentation_tooling.py index 9cba86d0..a8a69e9b 100644 --- a/tests/test_documentation_tooling.py +++ b/tests/test_documentation_tooling.py @@ -25,6 +25,7 @@ "ftllexengine.parsing", "ftllexengine.diagnostics", "ftllexengine.introspection", + "ftllexengine.analysis", "ftllexengine.validation", ) DOCUMENTED_REPO_SCRIPTS = ( @@ -138,7 +139,7 @@ def test_run_examples_registers_contracts_for_all_shipped_examples() -> None: def test_validate_version_uses_afad_frontmatter_version_contract() -> None: - """validate_version should enforce the AFAD v3.5 `version:` frontmatter key.""" + """validate_version should enforce the AFAD v4.0 `version:` frontmatter key.""" pyproject = tomllib.loads((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8")) validate_version = _load_script_module( @@ -413,6 +414,22 @@ def test_sdist_includes_root_frontmatter_docs_and_readme() -> None: assert missing == [] +def test_repo_agent_guidance_is_git_trackable_but_not_in_sdist() -> None: + """Agent instructions should be committable without becoming package payload.""" + gitignore = (REPO_ROOT / ".gitignore").read_text(encoding="utf-8") + pyproject = tomllib.loads((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8")) + only_include = set(pyproject["tool"]["hatch"]["build"]["targets"]["sdist"]["only-include"]) + + assert "!/AGENTS.md" in gitignore + assert "!/.codex/" in gitignore + assert "!/.codex/**" in gitignore + + assert "AGENTS.md" not in only_include + assert "/AGENTS.md" not in only_include + assert "/.codex" not in only_include + assert "/.codex/" not in only_include + + def test_release_protocol_lives_under_docs_and_repo_links_follow_it() -> None: """Release protocol should live under docs/ and repo surfaces should link there.""" release_doc = REPO_ROOT / "docs" / "RELEASE_PROTOCOL.md" diff --git a/tests/test_init_module.py b/tests/test_init_module.py index 60d009c6..3945bd5c 100644 --- a/tests/test_init_module.py +++ b/tests/test_init_module.py @@ -277,11 +277,22 @@ def test_parser_only_runtime_formatter_access_still_gives_install_hint(self) -> def test_internal_runtime_import_failure_is_not_masked_as_missing_babel(self) -> None: """A broken runtime import must surface its real error instead of a Babel hint.""" - with ( - pytest.raises(ModuleNotFoundError, match=r"ftllexengine\.runtime\.bundle"), - _fresh_ftl_import(blocked_imports=frozenset({"ftllexengine.runtime.bundle"})), + import importlib + + def fail_bundle_import(name: str, package: str | None = None) -> ModuleType: + if name == "ftllexengine.runtime.bundle": + msg = "blocked import: ftllexengine.runtime.bundle" + raise ModuleNotFoundError(msg) + return importlib.import_module(name, package) + + with _fresh_ftl_import() as ftllexengine, pytest.raises( + ModuleNotFoundError, + match=r"blocked import: ftllexengine\.runtime\.bundle", + ), patch( + "ftllexengine._optional_exports.import_module", + side_effect=fail_bundle_import, ): - pass + _ = ftllexengine.FluentBundle class TestUnknownAttributeError: @@ -328,6 +339,26 @@ def test_helper_without_parser_only_hint_raises_plain_attribute_error(self) -> N optional_attrs=frozenset({"FluentBundle"}), ) + def test_unknown_facade_contract_raises_key_error(self) -> None: + """Unknown facade names fail fast instead of fabricating empty optional exports.""" + from ftllexengine._optional_exports import babel_optional_attr_tuple + + with pytest.raises( + KeyError, + match=r"No optional export contract registered for facade 'ftllexengine\.unknown'", + ): + babel_optional_attr_tuple("ftllexengine.unknown") + + def test_unknown_optional_export_name_raises_attribute_error(self) -> None: + """Known facades reject optional-export names they do not own.""" + from ftllexengine._optional_exports import load_babel_optional_export + + with pytest.raises( + AttributeError, + match=r"module 'ftllexengine' has no optional Babel export 'DefinitelyMissing'", + ): + load_babel_optional_export("ftllexengine", "DefinitelyMissing") + class TestDirectImportIntrospectionSymbols: """MessageVariableValidationResult and validate_message_variables imported directly.""" diff --git a/tests/test_introspection_iso.py b/tests/test_introspection_iso.py index 80dfdf4c..dd5e562b 100644 --- a/tests/test_introspection_iso.py +++ b/tests/test_introspection_iso.py @@ -41,8 +41,10 @@ _get_babel_currency_name, _get_babel_currency_symbol, _get_babel_official_languages, + _get_babel_territories, _get_babel_territory_currencies, ) +from ftllexengine.introspection.iso_babel import _is_unknown_locale_error class TestTerritoryInfo: @@ -1739,6 +1741,193 @@ def mock_import( assert exc_info.value is original_exc +class TestIsoBabelDefensiveBranches: + """Direct coverage for defensive helper branches in iso_babel.py.""" + + def test_is_unknown_locale_error_returns_false_when_babel_is_unavailable(self) -> None: + """BabelImportError while resolving the error class yields False.""" + with patch( + "ftllexengine.introspection.iso_babel.get_unknown_locale_error_class", + side_effect=BabelImportError("UnknownLocaleError"), + ): + assert _is_unknown_locale_error(ValueError("not a locale error")) is False + + def test_is_unknown_locale_error_returns_true_for_matching_exception(self) -> None: + """The helper returns True when the exception matches Babel's error class.""" + + class FakeUnknownLocaleError(Exception): + """Stand-in for babel.core.UnknownLocaleError.""" + + with patch( + "ftllexengine.introspection.iso_babel.get_unknown_locale_error_class", + return_value=FakeUnknownLocaleError, + ): + assert _is_unknown_locale_error(FakeUnknownLocaleError("bad locale")) is True + + def test_get_babel_territories_without_unknown_locale_class_success(self) -> None: + """The no-UnknownLocaleError branch still returns territory data when lookup succeeds.""" + + class FakeLocale: + def __init__(self) -> None: + self.territories = {"US": "United States"} + + with ( + patch( + "ftllexengine.introspection.iso_babel._maybe_unknown_locale_error_class", + return_value=None, + ), + patch( + "ftllexengine.introspection.iso_babel._get_babel_locale", + return_value=FakeLocale(), + ), + ): + assert _get_babel_territories("en") == {"US": "United States"} + + def test_get_babel_territories_without_unknown_locale_class_failure(self) -> None: + """The no-UnknownLocaleError branch returns an empty mapping on locale lookup errors.""" + with ( + patch( + "ftllexengine.introspection.iso_babel._maybe_unknown_locale_error_class", + return_value=None, + ), + patch( + "ftllexengine.introspection.iso_babel._get_babel_locale", + side_effect=ValueError("bad locale"), + ), + ): + assert _get_babel_territories("en") == {} + + def test_get_babel_currency_name_without_unknown_locale_class_success(self) -> None: + """The no-UnknownLocaleError branch returns the localized currency name.""" + + class FakeLocale: + def __init__(self) -> None: + self.currencies = {"USD": "US Dollar"} + + class FakeLocaleClass: + @staticmethod + def parse(_locale_str: str) -> FakeLocale: + return FakeLocale() + + class FakeNumbers: + @staticmethod + def get_currency_name(_code: str, *, locale: str) -> str: + assert locale == "en" + return "US Dollar" + + with ( + patch( + "ftllexengine.introspection.iso_babel._maybe_unknown_locale_error_class", + return_value=None, + ), + patch( + "ftllexengine.introspection.iso_babel.get_locale_class", + return_value=FakeLocaleClass, + ), + patch( + "ftllexengine.introspection.iso_babel.get_babel_numbers", + return_value=FakeNumbers, + ), + ): + assert _get_babel_currency_name("USD", "en") == "US Dollar" + + def test_get_babel_currency_name_without_unknown_locale_class_failure(self) -> None: + """The no-UnknownLocaleError branch returns None on locale parse errors.""" + + class FakeLocaleClass: + @staticmethod + def parse(_locale_str: str) -> object: + msg = "bad locale" + raise ValueError(msg) + + with ( + patch( + "ftllexengine.introspection.iso_babel._maybe_unknown_locale_error_class", + return_value=None, + ), + patch( + "ftllexengine.introspection.iso_babel.get_locale_class", + return_value=FakeLocaleClass, + ), + patch( + "ftllexengine.introspection.iso_babel.get_babel_numbers", + return_value=MagicMock(), + ), + ): + assert _get_babel_currency_name("USD", "en") is None + + def test_get_babel_currency_name_without_unknown_locale_class_missing_code(self) -> None: + """The no-UnknownLocaleError branch returns None for absent currency codes.""" + + class FakeLocale: + def __init__(self) -> None: + self.currencies = {"EUR": "Euro"} + + class FakeLocaleClass: + @staticmethod + def parse(_locale_str: str) -> FakeLocale: + return FakeLocale() + + with ( + patch( + "ftllexengine.introspection.iso_babel._maybe_unknown_locale_error_class", + return_value=None, + ), + patch( + "ftllexengine.introspection.iso_babel.get_locale_class", + return_value=FakeLocaleClass, + ), + patch( + "ftllexengine.introspection.iso_babel.get_babel_numbers", + return_value=MagicMock(), + ), + ): + assert _get_babel_currency_name("USD", "en") is None + + def test_get_babel_currency_symbol_without_unknown_locale_class_success(self) -> None: + """The no-UnknownLocaleError branch returns the localized symbol when lookup succeeds.""" + + class FakeNumbers: + @staticmethod + def get_currency_symbol(_code: str, *, locale: str) -> str: + assert locale == "en" + return "$" + + with ( + patch( + "ftllexengine.introspection.iso_babel._maybe_unknown_locale_error_class", + return_value=None, + ), + patch( + "ftllexengine.introspection.iso_babel.get_babel_numbers", + return_value=FakeNumbers, + ), + ): + assert _get_babel_currency_symbol("USD", "en") == "$" + + def test_get_babel_currency_symbol_without_unknown_locale_class_failure(self) -> None: + """The no-UnknownLocaleError branch falls back to the code on lookup errors.""" + + class FakeNumbers: + @staticmethod + def get_currency_symbol(_code: str, *, locale: str) -> str: + _ = locale + msg = "bad locale" + raise ValueError(msg) + + with ( + patch( + "ftllexengine.introspection.iso_babel._maybe_unknown_locale_error_class", + return_value=None, + ), + patch( + "ftllexengine.introspection.iso_babel.get_babel_numbers", + return_value=FakeNumbers, + ), + ): + assert _get_babel_currency_symbol("USD", "en") == "USD" + + # =========================================================================== # get_currency_decimal_digits # =========================================================================== diff --git a/tests/test_regression_currency_decimal.py b/tests/test_regression_currency_decimal.py index 6841cbf4..25600bbb 100644 --- a/tests/test_regression_currency_decimal.py +++ b/tests/test_regression_currency_decimal.py @@ -16,7 +16,7 @@ from ftllexengine.constants import ISO_4217_DECIMAL_DIGITS from ftllexengine.runtime.locale_context import LocaleContext from ftllexengine.validation import validate_resource -from ftllexengine.validation.resource import _detect_long_chains +from ftllexengine.validation.resource_graph import detect_long_chains # ============================================================================ # LOGIC-CURRENCY-INCONSISTENCY-001: CURRENCY DECIMAL CONSISTENCY @@ -122,7 +122,7 @@ def test_single_long_chain_reported(self) -> None: "msg:d": {"msg:e"}, # Depth 4 chain: a->b->c->d->e } - warnings = _detect_long_chains(graph, max_depth=3) + warnings = detect_long_chains(graph, max_depth=3) assert len(warnings) == 1 assert "exceeds maximum" in warnings[0].message @@ -142,7 +142,7 @@ def test_multiple_long_chains_all_reported(self) -> None: "msg:w": set(), } - warnings = _detect_long_chains(graph, max_depth=2) + warnings = detect_long_chains(graph, max_depth=2) # Both chains should be reported assert len(warnings) >= 1, "At least one chain should be reported" @@ -153,7 +153,7 @@ def test_no_warnings_within_limit(self) -> None: "msg:b": set(), } - warnings = _detect_long_chains(graph, max_depth=5) + warnings = detect_long_chains(graph, max_depth=5) assert len(warnings) == 0 def test_warnings_sorted_by_depth(self) -> None: @@ -174,7 +174,7 @@ def test_warnings_sorted_by_depth(self) -> None: "msg:l5": set(), } - warnings = _detect_long_chains(graph, max_depth=2) + warnings = detect_long_chains(graph, max_depth=2) # If multiple warnings, deepest should be first if len(warnings) >= 2: # Extract depths from messages @@ -205,7 +205,7 @@ def test_validate_resource_reports_multiple_chains(self) -> None: def test_empty_graph_no_warnings(self) -> None: """Empty graph produces no warnings.""" - warnings = _detect_long_chains({}, max_depth=100) + warnings = detect_long_chains({}, max_depth=100) assert len(warnings) == 0 diff --git a/tests/test_runtime_bundle.py b/tests/test_runtime_bundle.py index 71d300d6..45e98da9 100644 --- a/tests/test_runtime_bundle.py +++ b/tests/test_runtime_bundle.py @@ -983,7 +983,7 @@ class TestBundleForSystemLocale: def test_creates_bundle_with_detected_locale(self) -> None: """for_system_locale creates bundle with system locale.""" with patch( - "ftllexengine.runtime.bundle.get_system_locale", + "ftllexengine.runtime.bundle_lifecycle.get_system_locale", return_value="en_US", ): bundle = FluentBundle.for_system_locale() @@ -992,7 +992,7 @@ def test_creates_bundle_with_detected_locale(self) -> None: def test_passes_configuration_parameters(self) -> None: """for_system_locale passes all configuration parameters.""" with patch( - "ftllexengine.runtime.bundle.get_system_locale", + "ftllexengine.runtime.bundle_lifecycle.get_system_locale", return_value="de_DE", ): bundle = FluentBundle.for_system_locale( @@ -1012,7 +1012,7 @@ def test_passes_configuration_parameters(self) -> None: def test_raises_when_locale_unavailable(self) -> None: """for_system_locale raises RuntimeError when locale unavailable.""" with patch( - "ftllexengine.runtime.bundle.get_system_locale", + "ftllexengine.runtime.bundle_lifecycle.get_system_locale", side_effect=RuntimeError("Cannot determine system locale"), ), pytest.raises(RuntimeError, match="Cannot determine"): FluentBundle.for_system_locale() diff --git a/tests/test_runtime_bundle_cache_security.py b/tests/test_runtime_bundle_cache_security.py index 927fc36d..64780b7d 100644 --- a/tests/test_runtime_bundle_cache_security.py +++ b/tests/test_runtime_bundle_cache_security.py @@ -297,7 +297,7 @@ def test_for_system_locale_accepts_cache_config(self) -> None: max_entry_weight=8000, max_errors_per_entry=30, ) - with patch("ftllexengine.runtime.bundle.get_system_locale", return_value="en_US"): + with patch("ftllexengine.runtime.bundle_lifecycle.get_system_locale", return_value="en_US"): bundle = FluentBundle.for_system_locale(cache=cfg, strict=True) assert bundle.cache_enabled is True @@ -312,7 +312,7 @@ def test_for_system_locale_accepts_cache_config(self) -> None: def test_for_system_locale_cache_parameters_default(self) -> None: """for_system_locale uses default CacheConfig values.""" - with patch("ftllexengine.runtime.bundle.get_system_locale", return_value="en_US"): + with patch("ftllexengine.runtime.bundle_lifecycle.get_system_locale", return_value="en_US"): bundle = FluentBundle.for_system_locale(cache=CacheConfig()) assert bundle.cache_enabled is True diff --git a/tests/test_runtime_function_bridge.py b/tests/test_runtime_function_bridge.py index ad1d783b..ae302882 100644 --- a/tests/test_runtime_function_bridge.py +++ b/tests/test_runtime_function_bridge.py @@ -936,7 +936,7 @@ def __call__(self, *args: object, **kwargs: object) -> str: # noqa: ARG002 - un with ( patch( - "ftllexengine.runtime.function_bridge.signature", + "ftllexengine.runtime.function_registry_helpers.signature", side_effect=ValueError("No signature"), ), pytest.raises( diff --git a/tests/test_runtime_function_registry.py b/tests/test_runtime_function_registry.py index 61211ba3..79c00b45 100644 --- a/tests/test_runtime_function_registry.py +++ b/tests/test_runtime_function_registry.py @@ -126,7 +126,7 @@ def custom_curr(_value: int, *, currency: str = "USD") -> str: registry.register(custom_curr, ftl_name="CURRENCY") - with patch("ftllexengine.runtime.bundle.get_system_locale", return_value="en_US"): + with patch("ftllexengine.runtime.bundle_lifecycle.get_system_locale", return_value="en_US"): bundle = FluentBundle.for_system_locale(functions=registry) bundle.add_resource('test = { CURRENCY(100, currency: "EUR") }') result, errors = bundle.format_pattern("test") diff --git a/tests/test_runtime_resolver_functions.py b/tests/test_runtime_resolver_functions.py index 97f1e1d1..7799d27e 100644 --- a/tests/test_runtime_resolver_functions.py +++ b/tests/test_runtime_resolver_functions.py @@ -7,6 +7,9 @@ from __future__ import annotations +import asyncio + +import pytest from hypothesis import event, given from hypothesis import strategies as st @@ -79,6 +82,19 @@ def double_func(value: object) -> str: assert "10" in result assert len(errors) == 0 + def test_custom_function_cancelled_error_propagates(self) -> None: + """Custom function cancellation must propagate instead of degrading to fallback text.""" + + def cancelled_func(_value: object) -> str: + raise asyncio.CancelledError + + bundle = FluentBundle("en-US", strict=False) + bundle.add_function("CANCEL", cancelled_func) + bundle.add_resource("msg = { CANCEL($x) }") + + with pytest.raises(asyncio.CancelledError): + bundle.format_pattern("msg", {"x": 42}) + # ============================================================================ # FUNCTION EXCEPTION HANDLING (locale injection path - lines 913-928) diff --git a/tests/test_validation_resource.py b/tests/test_validation_resource.py index 62fd7110..1993ee8e 100644 --- a/tests/test_validation_resource.py +++ b/tests/test_validation_resource.py @@ -21,12 +21,14 @@ ) from ftllexengine.syntax.cursor import LineOffsetCache from ftllexengine.validation.resource import ( - _build_dependency_graph, - _compute_longest_paths, _detect_circular_references, _extract_syntax_errors, validate_resource, ) +from ftllexengine.validation.resource_graph import ( + _compute_longest_paths, + build_dependency_graph, +) class TestSyntaxErrorExtraction: @@ -746,7 +748,7 @@ def test_duplicate_cycle_detection_line_243(self) -> None: terms_dict: dict[str, Term] = {} # Build dependency graph - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) # Call the real function without mocking warnings = _detect_circular_references(graph) @@ -793,7 +795,7 @@ def test_duplicate_cycle_detection_line_257(self) -> None: terms_dict = {"ta": term_a, "tb": term_b} # Build dependency graph - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) # Call the real function without mocking warnings = _detect_circular_references(graph) @@ -1273,7 +1275,7 @@ def test_diamond_dependency_in_real_messages(self) -> None: terms_dict: dict[str, Term] = {} # Build dependency graph - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) # Compute longest paths (exercises diamond pattern) result = _compute_longest_paths(graph) @@ -1315,7 +1317,7 @@ def test_cross_type_diamond_message_and_term(self) -> None: terms_dict = {"termB": term_b} # Build dependency graph - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) # Compute longest paths result = _compute_longest_paths(graph) @@ -1375,7 +1377,7 @@ def test_property_complex_dependency_graphs(self, num_messages: int) -> None: terms_dict: dict[str, Term] = {} # Build and compute - should not raise - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) result = _compute_longest_paths(graph) # All messages should be in result diff --git a/tests/test_validation_resource_chain_depth.py b/tests/test_validation_resource_chain_depth.py index cd002358..e15220a7 100644 --- a/tests/test_validation_resource_chain_depth.py +++ b/tests/test_validation_resource_chain_depth.py @@ -25,11 +25,11 @@ Term, TextElement, ) -from ftllexengine.validation.resource import ( - _build_dependency_graph, +from ftllexengine.validation.resource import validate_resource +from ftllexengine.validation.resource_graph import ( _compute_longest_paths, - _detect_long_chains, - validate_resource, + build_dependency_graph, + detect_long_chains, ) # ============================================================================ @@ -142,9 +142,9 @@ def test_chain_longer_than_10_nodes_shows_truncation(self) -> None: terms_dict: dict[str, Term] = {} # Build dependency graph - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) # Use max_depth=5 to trigger warning - warnings = _detect_long_chains(graph, max_depth=5) + warnings = detect_long_chains(graph, max_depth=5) # VAL-REDUNDANT-REPORTS-001: Reports ALL chains exceeding max_depth # With 15 nodes and max_depth=5, chains from msg0-msg8 all exceed the limit @@ -181,9 +181,9 @@ def test_chain_exactly_10_nodes_no_truncation(self) -> None: terms_dict: dict[str, Term] = {} - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) # Use max_depth=5 to trigger warning - warnings = _detect_long_chains(graph, max_depth=5) + warnings = detect_long_chains(graph, max_depth=5) # VAL-REDUNDANT-REPORTS-001: Reports ALL chains exceeding max_depth assert len(warnings) >= 1 @@ -218,8 +218,8 @@ def test_chain_less_than_10_nodes_no_truncation(self) -> None: terms_dict: dict[str, Term] = {} - graph = _build_dependency_graph(messages_dict, terms_dict) - warnings = _detect_long_chains(graph, max_depth=3) + graph = build_dependency_graph(messages_dict, terms_dict) + warnings = detect_long_chains(graph, max_depth=3) # VAL-REDUNDANT-REPORTS-001: Reports ALL chains exceeding max_depth assert len(warnings) >= 1 @@ -269,8 +269,8 @@ def test_long_chain_truncation_property(self, chain_length: int) -> None: terms_dict: dict[str, Term] = {} - graph = _build_dependency_graph(messages_dict, terms_dict) - warnings = _detect_long_chains(graph, max_depth=5) + graph = build_dependency_graph(messages_dict, terms_dict) + warnings = detect_long_chains(graph, max_depth=5) # VAL-REDUNDANT-REPORTS-001: Reports ALL chains exceeding max_depth # First warning (deepest chain) should show truncation for > 10 nodes @@ -414,14 +414,14 @@ def test_validate_resource_detects_long_chain(self) -> None: def test_empty_graph_no_chain_warnings(self) -> None: """Empty dependency graph produces no chain warnings.""" graph: dict[str, set[str]] = {} - warnings = _detect_long_chains(graph) + warnings = detect_long_chains(graph) assert len(warnings) == 0 def test_single_node_no_chain_warnings(self) -> None: """Single node with no dependencies produces no chain warnings.""" graph: dict[str, set[str]] = {"msg:single": set()} - warnings = _detect_long_chains(graph) + warnings = detect_long_chains(graph) assert len(warnings) == 0 diff --git a/tests/test_validation_resource_cross_resource.py b/tests/test_validation_resource_cross_resource.py index 2054d12b..1eb7c16e 100644 --- a/tests/test_validation_resource_cross_resource.py +++ b/tests/test_validation_resource_cross_resource.py @@ -28,11 +28,11 @@ ) from ftllexengine.syntax.cursor import LineOffsetCache from ftllexengine.validation.resource import ( - _build_dependency_graph, _check_undefined_references, _collect_entries, validate_resource, ) +from ftllexengine.validation.resource_graph import build_dependency_graph # ============================================================================ # Shadow Warnings @@ -291,7 +291,7 @@ def test_known_messages_added_as_graph_nodes(self) -> None: messages_dict: dict[str, Message] = {} terms_dict: dict[str, Term] = {} - graph = _build_dependency_graph( + graph = build_dependency_graph( messages_dict, terms_dict, known_messages=frozenset(["known1", "known2"]), @@ -309,7 +309,7 @@ def test_known_terms_added_as_graph_nodes(self) -> None: messages_dict: dict[str, Message] = {} terms_dict: dict[str, Term] = {} - graph = _build_dependency_graph( + graph = build_dependency_graph( messages_dict, terms_dict, known_terms=frozenset(["known1", "known2"]), @@ -340,7 +340,7 @@ def test_local_entries_reference_known_entries_in_graph(self) -> None: messages_dict = {"local": message} terms_dict: dict[str, Term] = {} - graph = _build_dependency_graph( + graph = build_dependency_graph( messages_dict, terms_dict, known_messages=frozenset(["known"]), diff --git a/tests/test_validation_resource_dependency_graph.py b/tests/test_validation_resource_dependency_graph.py index 17ceeb3d..d8862560 100644 --- a/tests/test_validation_resource_dependency_graph.py +++ b/tests/test_validation_resource_dependency_graph.py @@ -1,7 +1,7 @@ -"""Dependency graph construction tests for validation/resource.py. +"""Dependency graph construction tests for validation/resource_graph.py. Tests attribute-qualified reference resolution and known entry dependency -propagation to achieve 100% coverage of _build_dependency_graph and +propagation to achieve 100% coverage of build_dependency_graph and related helper functions. Coverage targets: @@ -29,10 +29,8 @@ TextElement, Variant, ) -from ftllexengine.validation.resource import ( - _build_dependency_graph, - _detect_circular_references, -) +from ftllexengine.validation.resource import _detect_circular_references +from ftllexengine.validation.resource_graph import build_dependency_graph class TestAttributeQualifiedMessageReferences: @@ -64,7 +62,7 @@ def test_undefined_attribute_qualified_message_reference(self) -> None: messages_dict = {"referrer": ref_msg} terms_dict: dict[str, Term] = {} - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) # Should have "msg:referrer" node but NO dependency (undefined.tooltip ignored) assert "msg:referrer" in graph @@ -111,7 +109,7 @@ def test_message_attribute_reference_creates_qualified_node(self) -> None: messages_dict = {"base": base_msg, "referrer": ref_msg} terms_dict: dict[str, Term] = {} - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) # Should have "msg:referrer" node with dependency on "msg:base.tooltip" assert "msg:referrer" in graph @@ -143,7 +141,7 @@ def test_message_attribute_reference_with_known_messages(self) -> None: terms_dict: dict[str, Term] = {} known_messages = frozenset({"known"}) - graph = _build_dependency_graph( + graph = build_dependency_graph( messages_dict, terms_dict, known_messages=known_messages, @@ -175,7 +173,7 @@ def test_bare_message_reference_creates_unqualified_node(self) -> None: messages_dict = {"a": msg_a, "b": msg_b} terms_dict: dict[str, Term] = {} - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) # Should have "msg:b" -> "msg:a" (no attribute qualification) assert "msg:b" in graph @@ -211,7 +209,7 @@ def test_undefined_attribute_qualified_term_reference(self) -> None: messages_dict = {"msg": msg} terms_dict: dict[str, Term] = {} - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) # Should have "msg:msg" node but NO dependency (undefined term ignored) assert "msg:msg" in graph @@ -258,7 +256,7 @@ def test_term_attribute_reference_creates_qualified_node(self) -> None: messages_dict = {"welcome": msg} terms_dict = {"brand": base_term} - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) # Should have "msg:welcome" node with dependency on "term:brand.short" assert "msg:welcome" in graph @@ -290,7 +288,7 @@ def test_term_attribute_reference_with_known_terms(self) -> None: terms_dict: dict[str, Term] = {} known_terms = frozenset({"known_term"}) - graph = _build_dependency_graph( + graph = build_dependency_graph( messages_dict, terms_dict, known_terms=known_terms, @@ -323,7 +321,7 @@ def test_bare_term_reference_creates_unqualified_node(self) -> None: messages_dict = {"welcome": msg} terms_dict = {"brand": term_brand} - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) # Should have "msg:welcome" -> "term:brand" (no attribute qualification) assert "msg:welcome" in graph @@ -355,7 +353,7 @@ def test_known_message_with_dependencies_propagates_to_graph(self) -> None: "known_a": frozenset({"msg:known_b", "term:some_term"}), } - graph = _build_dependency_graph( + graph = build_dependency_graph( messages_dict, terms_dict, known_messages=known_messages, @@ -387,7 +385,7 @@ def test_known_message_without_deps_entry_gets_empty_set(self) -> None: "some_other_msg": frozenset({"msg:dependency"}), } - graph = _build_dependency_graph( + graph = build_dependency_graph( messages_dict, terms_dict, known_messages=known_messages, @@ -430,7 +428,7 @@ def test_known_message_already_in_graph_not_overwritten(self) -> None: "shared": frozenset({"msg:different_dependency"}), } - graph = _build_dependency_graph( + graph = build_dependency_graph( messages_dict, terms_dict, known_messages=known_messages, @@ -469,7 +467,7 @@ def test_known_term_with_dependencies_propagates_to_graph(self) -> None: "known_term_a": frozenset({"term:known_term_b", "msg:some_msg"}), } - graph = _build_dependency_graph( + graph = build_dependency_graph( messages_dict, terms_dict, known_terms=known_terms, @@ -501,7 +499,7 @@ def test_known_term_without_deps_entry_gets_empty_set(self) -> None: "some_other_term": frozenset({"term:dependency"}), } - graph = _build_dependency_graph( + graph = build_dependency_graph( messages_dict, terms_dict, known_terms=known_terms, @@ -544,7 +542,7 @@ def test_known_term_already_in_graph_not_overwritten(self) -> None: "shared_term": frozenset({"term:different_dependency"}), } - graph = _build_dependency_graph( + graph = build_dependency_graph( messages_dict, terms_dict, known_terms=known_terms, @@ -588,7 +586,7 @@ def test_cross_resource_cycle_detected_via_known_deps(self) -> None: "b": frozenset({"msg:a"}), } - graph = _build_dependency_graph( + graph = build_dependency_graph( messages_dict, terms_dict, known_messages=known_messages, @@ -658,7 +656,7 @@ def test_message_attribute_reference_roundtrip( messages_dict = {base_id: base_msg, "ref": ref_msg} terms_dict: dict[str, Term] = {} - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) # Property: qualified node exists expected_node = f"msg:{base_id}.{attr_id}" @@ -714,7 +712,7 @@ def test_term_attribute_reference_roundtrip( messages_dict = {"msg": msg} terms_dict = {base_id: base_term} - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) # Property: qualified node exists expected_node = f"term:{base_id}.{attr_id}" @@ -776,7 +774,7 @@ def test_message_with_multiple_attribute_references(self) -> None: messages_dict = {"a": msg_a, "b": msg_b, "complex": msg_complex} terms_dict: dict[str, Term] = {} - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) # Should have dependencies on both qualified attributes assert "msg:complex" in graph @@ -811,7 +809,7 @@ def test_message_attribute_itself_has_references(self) -> None: messages_dict = {"base": base_msg, "complex": msg_with_attr_ref} terms_dict: dict[str, Term] = {} - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) # Should have "msg:complex.tooltip" node with dependency on "msg:base" assert "msg:complex.tooltip" in graph @@ -857,7 +855,7 @@ def test_select_expression_in_attribute_creates_variant_dependencies(self) -> No messages_dict = {"base": base_msg, "selector": msg_with_select_attr} terms_dict: dict[str, Term] = {} - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) # Should have "msg:selector.dynamic" node with dependency on "msg:base" assert "msg:selector.dynamic" in graph @@ -923,7 +921,7 @@ def test_cycle_detection_loop_iterations(self) -> None: messages_dict = {"a": msg_a, "b": msg_b} terms_dict = {"x": term_x, "y": term_y} - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) warnings = _detect_circular_references(graph) cycle_warnings = [w for w in warnings if "circular" in w.message.lower()] @@ -954,7 +952,7 @@ def test_cross_type_cycle_detection(self) -> None: messages_dict = {"a": msg_a} terms_dict = {"t": term_t} - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) warnings = _detect_circular_references(graph) assert any("circular" in w.message.lower() for w in warnings) @@ -998,7 +996,7 @@ def test_cycle_detection_with_multiple_independent_cycles(self) -> None: messages_dict = {"a": msg_a, "b": msg_b, "x": msg_x, "y": msg_y} terms_dict: dict[str, Term] = {} - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) warnings = _detect_circular_references(graph) cycle_warnings = [w for w in warnings if "circular" in w.message.lower()] @@ -1036,7 +1034,7 @@ def test_no_cycles_in_linear_chain(self) -> None: messages_dict = {"a": msg_a, "b": msg_b, "c": msg_c, "d": msg_d} terms_dict: dict[str, Term] = {} - graph = _build_dependency_graph(messages_dict, terms_dict) + graph = build_dependency_graph(messages_dict, terms_dict) warnings = _detect_circular_references(graph) cycle_warnings = [w for w in warnings if "circular" in w.message.lower()] diff --git a/uv.lock b/uv.lock index fc80187e..ab5de8f2 100644 --- a/uv.lock +++ b/uv.lock @@ -301,7 +301,7 @@ wheels = [ [[package]] name = "ftllexengine" -version = "0.164.0" +version = "0.165.0" source = { editable = "." } [package.optional-dependencies]