diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 367f7606..95e00f68 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,22 +1,40 @@ name: Build and Publish on: - release: - types: [published] + push: + tags: + - 'v*' workflow_dispatch: + inputs: + release_tag: + description: Existing release tag to build, publish, or verify + required: true + type: string + publish_to_testpypi: + description: Also publish the tagged distribution to TestPyPI + required: false + default: false + type: boolean permissions: - contents: read + contents: write + id-token: write + +concurrency: + group: publication-${{ github.workflow }}-${{ inputs.release_tag || github.ref_name }} + cancel-in-progress: true jobs: build: name: Build & Verify Quality (Python ${{ matrix.python-version }}) runs-on: ubuntu-latest + timeout-minutes: 60 # Force all uv commands (sync, run, etc.) to use the same versioned venv as the # scripts. Without this, bare "uv sync" creates the default ".venv" while the # scripts pivot to ".venv-", wasting ~150 MiB on a duplicate environment. env: UV_PROJECT_ENVIRONMENT: .venv-${{ matrix.python-version }} + RELEASE_TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.release_tag || github.ref_name }} strategy: fail-fast: false matrix: @@ -25,7 +43,10 @@ jobs: # When Python 3.15 releases (~late 2026), add "3.15" here. python-version: ["3.13", "3.14"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + ref: ${{ github.event_name == 'workflow_dispatch' && inputs.release_tag || github.ref_name }} - name: Verify clean working tree run: | @@ -41,13 +62,16 @@ jobs: echo "Working tree is clean" - name: Set up uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5 with: enable-cache: true python-version: ${{ matrix.python-version }} - name: Make scripts executable - run: chmod +x ./scripts/lint.sh ./scripts/test.sh + run: chmod +x ./scripts/*.sh + + - name: Verify shell syntax + run: bash -n scripts/*.sh - name: Detect package name id: detect @@ -63,12 +87,10 @@ jobs: echo "PACKAGE_NAME=$PACKAGE_NAME" >> $GITHUB_ENV echo "Detected package: $PACKAGE_NAME" - - name: Validate version tag (Release only) - if: github.event_name == 'release' + - name: Validate version tag run: | set -euo pipefail - TAG_VERSION="${GITHUB_REF#refs/tags/}" - TAG_VERSION="${TAG_VERSION#v}" + TAG_VERSION="${RELEASE_TAG#v}" # Validate tag format if ! [[ "$TAG_VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.]+)?$ ]]; then @@ -89,19 +111,17 @@ jobs: - name: Install dependencies run: | - uv sync --all-groups --locked + uv sync --group dev --group release --locked echo "Locked environment synchronized" - name: Validate runtime version matches tag - if: github.event_name == 'release' run: | set -euo pipefail PACKAGE="${{ steps.detect.outputs.name }}" RUNTIME_VERSION=$(uv run python -c "import ${PACKAGE}; print(${PACKAGE}.__version__)") - TAG_VERSION="${GITHUB_REF#refs/tags/}" - TAG_VERSION="${TAG_VERSION#v}" + TAG_VERSION="${RELEASE_TAG#v}" echo "Package: $PACKAGE" echo "Runtime version: $RUNTIME_VERSION" @@ -126,7 +146,7 @@ jobs: - name: Upload coverage reports to Codecov if: matrix.python-version == '3.14' - uses: codecov/codecov-action@v5 + uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5 with: token: ${{ secrets.CODECOV_TOKEN }} files: coverage.xml @@ -177,13 +197,26 @@ jobs: echo "Package integrity verified" + - name: Create release checksum receipt + if: matrix.python-version == '3.14' + run: | + set -euo pipefail + PACKAGE="${{ steps.detect.outputs.name }}" + VERSION="${RELEASE_TAG#v}" + + cd dist + shasum -a 256 \ + "${PACKAGE}-${VERSION}.tar.gz" \ + "${PACKAGE}-${VERSION}-py3-none-any.whl" \ + > "${PACKAGE}-${VERSION}.sha256" + - name: Debug Artifacts if: matrix.python-version == '3.14' run: ls -laR dist/ - name: Store build artifacts if: matrix.python-version == '3.14' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: dist path: dist/ @@ -193,15 +226,16 @@ jobs: test-publish: needs: build runs-on: ubuntu-latest - if: github.event_name == 'workflow_dispatch' + timeout-minutes: 30 + if: github.event_name == 'workflow_dispatch' && inputs.publish_to_testpypi steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 with: name: dist path: dist/ - name: Publish to TestPyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 with: repository-url: https://test.pypi.org/legacy/ password: ${{ secrets.TEST_PYPI_API_TOKEN }} @@ -211,16 +245,22 @@ jobs: name: Verify TestPyPI Publication needs: test-publish runs-on: ubuntu-latest - if: github.event_name == 'workflow_dispatch' + timeout-minutes: 30 + if: github.event_name == 'workflow_dispatch' && inputs.publish_to_testpypi strategy: matrix: # Test installation on both minimum and latest supported Python versions. python-version: ["3.13", "3.14"] + env: + RELEASE_TAG: ${{ inputs.release_tag }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + ref: ${{ inputs.release_tag }} - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: ${{ matrix.python-version }} @@ -236,14 +276,6 @@ jobs: echo "name=$PACKAGE_NAME" >> $GITHUB_OUTPUT echo "Detected package: $PACKAGE_NAME" - - name: Extract version - id: version - run: | - set -euo pipefail - VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml', 'rb'))['project']['version'])") - echo "version=$VERSION" >> $GITHUB_OUTPUT - echo "Version: $VERSION" - - name: Wait for TestPyPI CDN propagation run: | echo "Waiting 60 seconds for TestPyPI CDN to propagate..." @@ -253,7 +285,7 @@ jobs: run: | set -euo pipefail PACKAGE="${{ steps.detect.outputs.name }}" - VERSION="${{ steps.version.outputs.version }}" + VERSION="${RELEASE_TAG#v}" MAX_ATTEMPTS=5 ATTEMPT=1 @@ -282,7 +314,7 @@ jobs: set -euo pipefail PACKAGE="${{ steps.detect.outputs.name }}" INSTALLED_VERSION=$(python -c "import ${PACKAGE}; print(${PACKAGE}.__version__)") - EXPECTED_VERSION="${{ steps.version.outputs.version }}" + EXPECTED_VERSION="${RELEASE_TAG#v}" echo "Installed version: $INSTALLED_VERSION" echo "Expected version: $EXPECTED_VERSION" @@ -299,37 +331,86 @@ jobs: PACKAGE="${{ steps.detect.outputs.name }}" python -c "import ${PACKAGE} as pkg; r = pkg.parse_ftl('greeting = Hello, World!'); s = pkg.serialize_ftl(r); assert 'greeting' in s; print('Smoke test passed: ${PACKAGE} v' + pkg.__version__)" + publish-release-assets: + name: Publish GitHub Release Assets + needs: build + runs-on: ubuntu-latest + timeout-minutes: 20 + env: + GH_TOKEN: ${{ github.token }} + RELEASE_TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.release_tag || github.ref_name }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + ref: ${{ github.event_name == 'workflow_dispatch' && inputs.release_tag || github.ref_name }} + + - name: Make scripts executable + run: chmod +x ./scripts/*.sh + + - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 + with: + name: dist + path: dist/ + + - name: Publish GitHub release assets + run: ./scripts/publish-github-release-assets.sh "$RELEASE_TAG" + + verify-github-release: + name: Verify GitHub Release + needs: publish-release-assets + runs-on: ubuntu-latest + timeout-minutes: 15 + env: + GH_TOKEN: ${{ github.token }} + RELEASE_TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.release_tag || github.ref_name }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + ref: ${{ github.event_name == 'workflow_dispatch' && inputs.release_tag || github.ref_name }} + + - name: Make scripts executable + run: chmod +x ./scripts/*.sh + + - name: Verify GitHub release handoff + run: ./scripts/verify-github-release.sh "$RELEASE_TAG" + publish: needs: build runs-on: ubuntu-latest - if: github.event_name == 'release' - permissions: - id-token: write + timeout-minutes: 20 steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 with: name: dist path: dist/ - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 with: attestations: true + skip-existing: true verify-publish: name: Verify PyPI Publication needs: publish runs-on: ubuntu-latest - if: github.event_name == 'release' + timeout-minutes: 30 strategy: matrix: # Test installation on both minimum and latest supported Python versions. python-version: ["3.13", "3.14"] + env: + RELEASE_TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.release_tag || github.ref_name }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + ref: ${{ github.event_name == 'workflow_dispatch' && inputs.release_tag || github.ref_name }} - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: ${{ matrix.python-version }} @@ -353,8 +434,7 @@ jobs: - name: Install from PyPI with retry run: | set -euo pipefail - TAG_VERSION="${GITHUB_REF#refs/tags/}" - TAG_VERSION="${TAG_VERSION#v}" + TAG_VERSION="${RELEASE_TAG#v}" PACKAGE="${{ steps.detect.outputs.name }}" MAX_ATTEMPTS=5 @@ -383,8 +463,7 @@ jobs: set -euo pipefail PACKAGE="${{ steps.detect.outputs.name }}" INSTALLED_VERSION=$(python -c "import ${PACKAGE}; print(${PACKAGE}.__version__)") - TAG_VERSION="${GITHUB_REF#refs/tags/}" - TAG_VERSION="${TAG_VERSION#v}" + TAG_VERSION="${RELEASE_TAG#v}" echo "Installed version: $INSTALLED_VERSION" echo "Expected version: $TAG_VERSION" @@ -405,9 +484,14 @@ jobs: name: Verify Python 3.13+ Requirement needs: publish runs-on: ubuntu-latest - if: github.event_name == 'release' + timeout-minutes: 15 + env: + RELEASE_TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.release_tag || github.ref_name }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + ref: ${{ github.event_name == 'workflow_dispatch' && inputs.release_tag || github.ref_name }} - name: Detect package name id: detect @@ -428,21 +512,22 @@ jobs: # available on GitHub Actions runners by default. # GitHub Actions runners ship with Python 3.12 as their system Python as of 2025. - name: Set up Python 3.12 - uses: actions/setup-python@v5 + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.12" - name: Verify install is blocked on Python 3.12 (below requires-python floor) run: | set -euo pipefail + TAG_VERSION="${RELEASE_TAG#v}" PACKAGE="${{ steps.detect.outputs.name }}" - echo "Attempting to install $PACKAGE on Python 3.12 (should fail)..." + echo "Attempting to install $PACKAGE==$TAG_VERSION on Python 3.12 (should fail)..." - if pip install "$PACKAGE" 2>/dev/null; then + if pip install --no-cache-dir "${PACKAGE}==${TAG_VERSION}" 2>/dev/null; then echo "::error::Installation succeeded on Python 3.12 (should have failed)" echo "::error::Check requires-python in pyproject.toml" exit 1 fi - echo "Installation correctly blocked on Python 3.12" \ No newline at end of file + echo "Installation correctly blocked on Python 3.12" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d78ed842..89e7a05e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,10 +9,15 @@ on: permissions: contents: read +concurrency: + group: test-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: name: Test (Python ${{ matrix.python-version }}) runs-on: ubuntu-latest + timeout-minutes: 45 # Force all uv commands (sync, run, etc.) to use the same versioned venv as the # scripts. Without this, bare "uv sync" creates the default ".venv" while the # scripts pivot to ".venv-", wasting ~150 MiB on a duplicate environment. @@ -26,16 +31,19 @@ jobs: # When Python 3.15 releases (~late 2026), add "3.15" here. python-version: ["3.13", "3.14"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5 with: enable-cache: true python-version: ${{ matrix.python-version }} - name: Make scripts executable - run: chmod +x ./scripts/lint.sh ./scripts/test.sh + run: chmod +x ./scripts/*.sh + + - name: Verify shell syntax + run: bash -n scripts/*.sh - name: Detect package name id: detect @@ -51,7 +59,7 @@ jobs: - name: Install dependencies run: | - uv sync --all-groups --locked + uv sync --group dev --locked echo "Locked environment synchronized" - name: Run Linters diff --git a/.gitignore b/.gitignore index 268628f2..d51d1138 100644 --- a/.gitignore +++ b/.gitignore @@ -95,17 +95,6 @@ pdm.lock .env .env.local -# macOS -.DS_Store - -# Windows -Thumbs.db -ehthumbs.db -Desktop.ini - -# Linux -*~ - # Backup files (all variants) *.bak* *.backup* @@ -113,8 +102,6 @@ Desktop.ini *.orig *_backup *_old -*.swp -*.swo # Build artifacts *.whl @@ -127,6 +114,15 @@ docs/.doctrees/ # Security scans bandit-results.json +# OS and local scratch files +*.swp +*.swo +*~ +.DS_Store +Thumbs.db +ehthumbs.db +Desktop.ini + # Temporary files tmp/ temp/ diff --git a/CHANGELOG.md b/CHANGELOG.md index c1e84b77..90e2091e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ --- -afad: "3.3" -version: "0.162.0" +afad: "3.5" +version: "0.163.0" domain: CHANGELOG -updated: "2026-03-24" +updated: "2026-04-22" route: keywords: [changelog, release notes, version history, breaking changes, migration, fixed, what's new] questions: ["what changed in version X?", "what are the breaking changes?", "what was fixed in the latest release?", "what is the release history?"] @@ -15,6 +15,75 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.163.0] - 2026-04-22 + +### Added + +- **`docs/RELEASE_PROTOCOL.md` maintainer runbook.** Release operations are now documented as a + `gh`-first branch-protected procedure with clean-worktree guidance for dirty primary checkouts, + explicit PR scope checks, merged-`main` verification before tagging, remote tag verification, + workflow-dispatch reruns against an existing tag, GitHub Release inspection, and PyPI + verification. +- **`./check.sh`, `scripts/run_examples.py`, and architecture regression tests.** The repo + now has a single top-level verification entrypoint that runs version/docs validation, shipped + examples, lint, tests, HypoFuzz preflight, and bounded live Atheris smoke checks. The new + architecture tests also lock in layer direction and reject `sys.path`/`PYTHONPATH=src` + import hacks from code and public docs. + +### Changed + +- **Oversized runtime, introspection, and parsing modules are now split by responsibility, with regression guards to keep them that way.** `runtime.bundle`, `runtime.resolver`, `runtime.cache`, `runtime.locale_context`, `introspection.iso`, `localization.orchestrator`, `parsing.currency`, `parsing.dates`, and `syntax.serializer` now delegate focused responsibilities into dedicated internal modules instead of accumulating more behavior in single god-files, and `tests/test_architecture_contract.py` now enforces module line budgets, rejects tracked generated `,cover` artifacts, and blocks version-provenance annotations outside `CHANGELOG.md`. +- **Public examples now model explicit ownership instead of `threading.local()` tricks.** The thread-safety example switched its per-worker customization path to worker-owned bundles, the example mypy config no longer depends on a local threading stub overlay, and the example type-checking guide now documents the simpler strict-check workflow. +- **Fluent parser grammar internals are now split by responsibility instead of living in one monolith.** + `syntax.parser.rules` is now a thin aggregated surface over focused `context`, + `patterns`, `expressions`, and `entries` modules, so pattern continuation logic, + inline/select expression parsing, and entry parsing can evolve independently + without collapsing back into a single 2,100-line implementation bucket. +- **Release automation hardened around pinned actions and explicit handoff checks.** The `Test` + and `Build and Publish` workflows now pin GitHub Actions by commit SHA, add concurrency and job + timeouts, verify shell syntax for all `scripts/*.sh`, and allow targeted `workflow_dispatch` + reruns against an explicit `release_tag` instead of relying on ambient branch state. +- **Coverage policy is now explicitly enforced at 100% line and 100% branch coverage.** The + `pytest-cov` project configuration and `./scripts/test.sh` default gate now both require + `100.0%`, and the contributor/testing docs state the same baseline instead of leaving the + stricter standard implicit. +- **Fuzz-only toolchains are now outside the default gate surface.** `hypofuzz` moved out of the + default `dev` group into an explicit `fuzz` group, and GitHub Actions now sync only the + dependency groups required for routine lint, test, build, and release work. Atheris remains an + opt-in specialist toolchain instead of a release prerequisite. +- **Published GitHub Releases now carry the Python distribution artifacts.** The publish workflow + now attaches `ftllexengine-X.Y.Z.tar.gz`, `ftllexengine-X.Y.Z-py3-none-any.whl`, and + `ftllexengine-X.Y.Z.sha256` to the GitHub Release and verifies that handoff directly with `gh` + before treating the release as complete. +- **Maintainer docs now assume protected `main` and automatic branch cleanup.** `README.md`, + `CONTRIBUTING.md`, and the docs index now point to the release protocol under `docs/` so the + documented workflow matches the repository-side `main` protection and + `delete_branch_on_merge` posture. +- **Semantic localization aliases now live at the core layer and are exported from the root facade.** + `LocaleCode`, `MessageId`, `ResourceId`, and `FTLSource` now resolve from `ftllexengine` and + share a lower-layer implementation home, eliminating upward imports from core/runtime modules + and aligning public docs with stable package facades instead of helper submodules. +- **Reference extraction and dependency-graph helpers were split into lower, shared modules.** + AST-only reference extraction now lives under `ftllexengine.syntax`, dependency-graph + algorithms now live under `ftllexengine.core`, and `validation.resource`/`introspection.message` + were trimmed to orchestration-focused roles instead of carrying unrelated graph logic. +- **Repo tooling now runs against the installed package surface instead of `src` path injection.** + Documentation validation, example execution, lint, tests, and Atheris corpus health all clear + `PYTHONPATH`/`sys.path` overrides so quality gates exercise the same import contract users get. +- **Atheris corpus health now bootstraps its dedicated environment on demand.** The + `./scripts/fuzz_atheris.sh --corpus` path now creates `.venv-atheris` before invoking the + health checker, so fresh machines and `./check.sh` no longer depend on a pre-existing + Atheris venv. +- **Public examples and parser-focused tests now describe current behavior instead of stale cleanup notes or old line coordinates.** + The shipped transformer example no longer embeds inline `TODO` markers, parser coverage tests + now describe behavior rather than historical line numbers from the pre-split parser, and the + docs/tooling regression suite enforces that public docs/examples stay free of `TODO`/`FIXME`/`HACK` + markers. +- **`scripts/validate_version.py` now uses modern typed dataclasses and accurate check descriptions.** + The result model no longer relies on `NamedTuple` plus `type: ignore[type-arg]` workarounds, + configurable footer checks are documented alongside frontmatter checks, and display-name handling + now comes from explicit configuration instead of a misleading `capitalize()` fallback. + ## [0.162.0] - 2026-03-24 ### Breaking Changes @@ -6820,3 +6889,6 @@ Both validators are re-exported from `ftllexengine.introspection` and the root [0.29.0]: https://github.com/resoltico/ftllexengine/releases/tag/v0.29.0 [0.28.1]: https://github.com/resoltico/ftllexengine/releases/tag/v0.28.1 [0.28.0]: https://github.com/resoltico/ftllexengine/releases/tag/v0.28.0 +[Unreleased]: https://github.com/resoltico/FTLLexEngine/compare/v0.163.0...HEAD +[0.163.0]: https://github.com/resoltico/FTLLexEngine/compare/v0.162.0...v0.163.0 +[0.162.0]: https://github.com/resoltico/FTLLexEngine/compare/v0.161.0...v0.162.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 73e04018..da2a5494 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,154 +1,135 @@ --- -afad: "3.1" -version: "0.107.0" -domain: contributing -updated: "2026-03-10" +afad: "3.5" +version: "0.163.0" +domain: CONTRIBUTING +updated: "2026-04-22" route: - keywords: [contributing, development, setup, pull request, code style, workflow, pivot] - questions: ["how to contribute?", "how to set up development?", "how to submit PR?"] + keywords: [contributing, development, uv, lint, test, fuzz, benchmark, release, virtualenv] + questions: ["how do I set up development?", "how do I run lint and tests?", "how do I work on fuzzing?", "how do I prepare a release?"] --- # Contributing to FTLLexEngine -## Setup +**Purpose**: Set up a working development environment and run the same validation paths the repo expects. +**Prerequisites**: `uv`, Bash 5+, Python 3.13 available locally. Python 3.14 is recommended for forward-compat checks. -FTLLexEngine uses `uv` for ultra-fast, deterministic dependency management. We employ a **Pivot** architecture that isolates your IDE environment from the validation silos. +## Overview -```bash -git clone https://github.com/resoltico/FTLLexEngine.git -cd ftllexengine +This repository uses `uv` for dependency management and self-isolating shell scripts for the main quality gates. The root `.venv` is the manual development environment; the scripted gates pivot into versioned environments such as `.venv-3.13`, `.venv-3.14`, and `.venv-atheris` as needed. -# 1. Setup your "IDE Sanctuary" (.venv) -# This environment is for your editor, LSP, and manual exploration. -uv sync --all-groups +The shortest reliable workflow is: -# 2. Verify and Initialize Atheris (macOS) -# This sets up the isolated .venv-fuzzing environment. -./scripts/check-atheris.sh --install +```bash +uv sync --group dev --group release +./check.sh ``` ---- +The default test gate enforces **100% line coverage and 100% branch coverage** for `src/ftllexengine`. -## Environment Hierarchy +## Setup -To ensure data integrity and zero "environment stomping," the project is strictly siloed: +```bash +git clone https://github.com/resoltico/FTLLexEngine.git +cd FTLLexEngine +uv sync --group dev --group release +uv sync --group fuzz +``` -| Environment | Purpose | Managed By | -|-------------|---------|------------| -| `.venv` (Root) | **IDE Sanctuary** - Autocomplete, LSP, manual runs. | You (`uv sync`) | -| `.venv-3.13` | **Validation Silo** - Clean-room lint/test baseline (Python 3.13, declared minimum). | `scripts/lint.sh`, `scripts/test.sh` | -| `.venv-atheris` | **Atheris Fuzzing** - Python 3.13 venv; active on 3.13 baseline. | `scripts/fuzz_atheris.sh --setup` | +Optional environments: ---- +- `PY_VERSION=3.14 ./scripts/lint.sh` and `PY_VERSION=3.14 ./scripts/test.sh` create or reuse `.venv-3.14`. +- `./scripts/fuzz_atheris.sh --help` bootstraps `.venv-atheris` on demand and requires Python 3.13. -## Automated Scripts (The Pivot) +## Daily Workflow -All validation scripts are **self-isolating**. They automatically "pivot" into `.venv-3.13`, ensuring a clean, reproducible baseline independent of your IDE venv or Atheris toolchain. +Run the repo gates directly; the scripts manage their own interpreter pivots. -| Script | Purpose | Preferred Command | -|--------|---------|-------------------| -| `scripts/lint.sh` | Quality checks (ruff, mypy, pylint) | `./scripts/lint.sh` | -| `scripts/test.sh` | Test suite with coverage | `./scripts/test.sh` | -| `scripts/check-atheris.sh` | Atheris/LLVM health check | `./scripts/check-atheris.sh` | -| `scripts/fuzz_hypofuzz.sh` | Hypothesis/HypoFuzz fuzzing | `./scripts/fuzz_hypofuzz.sh` | -| `scripts/fuzz_atheris.sh` | Atheris/libFuzzer fuzzing | `./scripts/fuzz_atheris.sh` | -| `scripts/benchmark.sh` | Performance benchmarks | `./scripts/benchmark.sh` | +```bash +./check.sh +``` -**Optimization**: Do not use `uv run --python X.Y` with these scripts. The scripts handle their internally versioned `uv run` pivots silently to avoid noise and environment overlap. +Useful variants: ---- +- `uv run python scripts/run_examples.py` +- `PY_VERSION=3.14 ./scripts/lint.sh` +- `PY_VERSION=3.14 ./scripts/test.sh` +- `./scripts/benchmark.sh` +- `./scripts/fuzz_hypofuzz.sh` +- `./scripts/fuzz_hypofuzz.sh --deep --time 300` +- `./scripts/fuzz_atheris.sh --list` -## Multi-Version Development +## Documentation Work -Python 3.13 is the declared minimum. Python 3.14 is the current stable target; Python 3.15 is the forward-compatibility target (N+1 policy). +Markdown changes should stay synchronized with the code and examples they describe. -### The Master Control: `PY_VERSION` +```bash +uv run python scripts/validate_docs.py +uv run python scripts/validate_version.py +uv run python scripts/run_examples.py +``` -The `PY_VERSION` environment variable selects the target Python version. The default is 3.13. +Expectations: -| Task | Command | Target Silo | -|------|---------|-------------| -| **Lint (default)** | `./scripts/lint.sh` | `.venv-3.13` | -| **Lint (3.14 forward-compat)** | `PY_VERSION=3.14 ./scripts/lint.sh` | `.venv-3.14` | -| **Test (default)** | `./scripts/test.sh` | `.venv-3.13` | -| **Test (3.14 forward-compat)** | `PY_VERSION=3.14 ./scripts/test.sh` | `.venv-3.14` | -| **Benchmark (default)** | `./scripts/benchmark.sh` | `.venv-3.13` | -| **Benchmark (3.14 forward-compat)** | `PY_VERSION=3.14 ./scripts/benchmark.sh` | `.venv-3.14` | +- README and guide Python snippets should run as written. +- `examples/*.py` should execute cleanly under the dev environment. +- Source-code docstring transcripts are illustrative API notes, not an executable test suite. Keep runnable examples in Markdown or `examples/`, and mark any source `>>>` transcript with `# doctest: +SKIP`. +- Reference docs should describe current symbols, not removed or internal machinery. -### Why this works -- **Zero Stomping**: Running 3.14 checks will **never** wipe your 3.13 environment. -- **Instant Switching**: Switching between 3.13 and 3.14 is instant (no `uv sync` overhead). -- **Parallel Testing**: You can run 3.13 tests in one terminal and 3.14 tests in another simultaneously. +## Type Checking Examples ---- +The `examples/` directory has its own `mypy.ini` and local stubs. -## Code Standards +```bash +uv run mypy --config-file examples/mypy.ini examples +``` -Style: -- **PEP 8** adherence via Ruff. -- **100 char** line limit. -- **Strict Typing**: Type hints are mandatory. -- **Immutability**: Preference for `frozen=True, slots=True` dataclasses. +## Fuzzing -```python -from __future__ import annotations -from dataclasses import dataclass +Two fuzzing surfaces are maintained: -@dataclass(frozen=True, slots=True) -class LocaleContext: - """Context-aware locale container.""" - tag: str - is_clobbered: bool = False -``` +- `./scripts/fuzz_hypofuzz.sh` for Hypothesis and HypoFuzz. +- `./scripts/fuzz_atheris.sh` for native Atheris/libFuzzer targets. ---- +See: -## Testing & Coverage +- [docs/FUZZING_GUIDE.md](docs/FUZZING_GUIDE.md) +- [docs/FUZZING_GUIDE_HYPOFUZZ.md](docs/FUZZING_GUIDE_HYPOFUZZ.md) +- [docs/FUZZING_GUIDE_ATHERIS.md](docs/FUZZING_GUIDE_ATHERIS.md) -All logic must be verified via deterministic unit tests and non-deterministic property tests. +## Benchmarks ```bash -./scripts/test.sh # Full suite (95%+ requirement) -./scripts/test.sh --quick # Fast mode (no coverage) +./scripts/benchmark.sh +./scripts/benchmark.sh --save baseline +./scripts/benchmark.sh --compare ``` -### Property-Based Testing (Hypothesis) -If you see `HYPOTHESIS DETECTED A LOGIC FLAW`, an edge case has been found. -1. The failing input is saved to `.hypothesis/examples/`. -2. Review the `Falsifying example:` output. -3. Fix the bug and re-run `./scripts/test.sh`. +## Releases ---- +Release work goes through a release branch and `gh`-driven verification. -## Pull Requests +Authoritative procedure: -### Mandatory Pre-Flight -Before submitting a PR, ensure both versions pass verification: +- [docs/RELEASE_PROTOCOL.md](docs/RELEASE_PROTOCOL.md) -```bash -# Verify Baseline (Python 3.13) -./scripts/lint.sh && ./scripts/test.sh +Support scripts: -# Verify Tomorrow (Python 3.15 forward-compat) -PY_VERSION=3.15 ./scripts/lint.sh && PY_VERSION=3.15 ./scripts/test.sh -``` +- `./scripts/publish-github-release-assets.sh` +- `./scripts/verify-github-release.sh` -### CI Requirements -- Parallel matrix testing on 3.13 and 3.14. -- Coverage >= 95.00%. -- Strict type checking (mypy) on all targets. -- Successful documentation validation (`scripts/validate_docs.py`). +## Pull Requests ---- +Before opening a PR, make sure the baseline gates pass: -## Versioning +```bash +./check.sh +``` -**Single Source of Truth**: The version is managed exclusively in `pyproject.toml`. Do not manually edit `__version__` in `src/`. It is auto-derived from package metadata at runtime to prevent version drift. +`./scripts/test.sh` is expected to fail on any coverage regression below the repository's 100% line-and-branch baseline. -Standard workflow: -1. Update version in pyproject.toml -2. Sync to refresh package metadata +When the change touches runtime behavior or supported Python versions, also run the forward-compat pass: ```bash -uv sync +PY_VERSION=3.14 ./scripts/lint.sh +PY_VERSION=3.14 ./scripts/test.sh ``` diff --git a/NOTICE b/NOTICE index 82d9e2ee..21ef1141 100644 --- a/NOTICE +++ b/NOTICE @@ -3,145 +3,69 @@ Copyright (c) 2025-2026 Ervins Strauhmanis Licensed under the MIT License. See LICENSE file for details. -This product is an independent implementation of the FTL Syntax -Specification (Apache License 2.0, Mozilla Foundation and others). - +================================================================================ +Specification Attribution ================================================================================ -FTL Specification Attribution ------------------------------ - -This software implements the FTL Syntax Specification. +FTLLexEngine is an independent implementation of the FTL Syntax Specification. Specification: https://github.com/projectfluent/fluent/blob/master/spec/fluent.ebnf Copyright: Mozilla Foundation and others License: Apache License 2.0 Project: https://projectfluent.org/ -The specification is licensed under the Apache License 2.0. This -implementation is independent work and licensed separately under MIT. +The specification is licensed under the Apache License, Version 2.0, full text +available at https://www.apache.org/licenses/LICENSE-2.0 and included in this +distribution as LICENSE-APACHE-2.0. -Full Apache License 2.0 text: http://www.apache.org/licenses/LICENSE-2.0 +This implementation is independent work — not a fork or derivative of Mozilla's +reference implementation — and is licensed separately under MIT. ================================================================================ - -Important Clarifications ------------------------- - -1. This library is an INDEPENDENT IMPLEMENTATION of the FTL Syntax Specification -2. This is NOT a fork or derivative of Mozilla's reference implementation code -3. All implementation code in this library is original work, licensed under MIT -4. This implementation is independent work and licensed separately from the - specification - +Optional Dependencies ================================================================================ -Trademark Usage ---------------- - -This library uses "FTL" (Fluent Translation List) as a descriptive term to -indicate technical compatibility with the FTL Syntax Specification v1.0. - -This usage is descriptive only and does not imply affiliation with or -endorsement by the Mozilla Foundation. - -"Fluent" and "FTL" may be trademarks of the Mozilla Foundation. All trademarks -are the property of their respective owners. +FTLLexEngine has no required runtime dependencies. The following package is an +optional dependency available via the [babel] extra: + +-------------------------------------------------------------------------------- +Babel + Copyright (c) 2013-2024 by the Babel Team + https://github.com/python-babel/babel + License: BSD 3-Clause + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. ================================================================================ - -Disclaimer of Affiliation and Endorsement ------------------------------------------- - -This project is NOT affiliated with, endorsed by, or sponsored by: -- Mozilla Foundation -- Mozilla Corporation -- Project Fluent -- Any Mozilla-related entity - -This is an independent, third-party implementation created by the copyright -holder listed above. - +Trademark Notice ================================================================================ -Patent Considerations ---------------------- - -The FTL Specification is licensed under Apache License 2.0, which includes an -explicit patent grant from specification contributors. - -This implementation (ftllexengine) is licensed under the MIT License, which -does not include explicit patent language. Users should be aware that: - -1. The Apache 2.0 patent grant applies to the specification itself -2. This independent implementation makes no patent claims -3. No patents are knowingly infringed by this implementation -4. Contributors to this implementation grant MIT License permissions only -5. The MIT License does not provide explicit patent protection - -For patent-related inquiries, consult with legal counsel. - -================================================================================ - -Third-Party Dependencies ------------------------- - -This software depends on the following third-party packages: - -1. Babel (BSD-3-Clause License) - Copyright (c) 2013-2024 by the Babel Team - https://github.com/python-babel/babel - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - 3. Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -2. returns (BSD-2-Clause License) - Copyright (c) 2018, dry-python team - https://github.com/dry-python/returns - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. +"Fluent" and "FTL" may be trademarks of the Mozilla Foundation. Their use in +this project is descriptive only — indicating technical compatibility with the +FTL Syntax Specification v1.0 — and does not imply affiliation with or +endorsement by the Mozilla Foundation, Mozilla Corporation, or Project Fluent. - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. +ftllexengine is not affiliated with, endorsed by, or sponsored by the Mozilla +Foundation or any Mozilla-related entity. diff --git a/PATENTS.md b/PATENTS.md index ead4efc3..3f2412a3 100644 --- a/PATENTS.md +++ b/PATENTS.md @@ -1,214 +1,38 @@ -# Patent Considerations - -This document provides detailed information about patent considerations for FTLLexEngine users and contributors. - -## Summary for Users - -**FTLLexEngine is licensed under the MIT License, which does not include explicit patent grant language.** - -If patent protection is a concern for your use case, please consult with legal counsel to assess your specific situation. - -## Background: Specification vs. Implementation - -FTLLexEngine implements the FTL Syntax Specification, which has different licensing from this implementation: - -| Component | License | Patent Grant | -|-----------|---------|--------------| -| FTL Syntax Specification | Apache License 2.0 | Yes (explicit) | -| FTLLexEngine (this implementation) | MIT License | No (implicit only) | - -## Apache 2.0 Specification Patent Grant - -The FTL Syntax Specification is licensed under Apache License 2.0, which includes Section 3 (Grant of Patent License): - -> Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work... - -**This patent grant applies to the specification itself**, covering contributions made by specification authors (primarily Mozilla Foundation and contributors to the Fluent project). - -## MIT License and Patents - -The MIT License grants broad permissions ("to deal in the Software without restriction") but does not explicitly mention patents. Legal interpretation varies: - -### Implicit Patent License Theory - -Some legal scholars argue the MIT License includes an **implied patent license** through phrases like: -- "without restriction" -- "without limitation" -- "use, copy, modify, merge, publish, distribute, sublicense" - -However, this is **not universally accepted** and may not hold in all jurisdictions. - -### No Explicit Grant - -Unlike Apache 2.0, the MIT License: -- Does NOT explicitly grant patent rights -- Does NOT include patent retaliation clauses -- Does NOT define what "use" means in patent terms - -## What This Means for FTLLexEngine - -### For Users - -**FTLLexEngine makes no patent claims and knowingly infringes no patents.** - -This is an **independent, clean-room implementation** of a publicly available specification: - -1. **Specification Patents**: The Apache 2.0 patent grant from specification authors may provide coverage for implementing the specification -2. **Implementation Patents**: This implementation is original work with no known patent issues -3. **No Patent Claims**: The copyright holder (Ervins Strauhmanis) makes no patent claims on this implementation - -### Comparison with Other Licenses - -| License | Explicit Patent Grant | Patent Retaliation | Widely Used | -|---------|----------------------|-------------------|-------------| -| Apache 2.0 | Yes | Yes | Yes | -| MIT | No | No | Yes | -| BSD-2-Clause | No | No | Yes | -| BSD-3-Clause | No | No | Yes | -| GPL v3 | Yes | Yes | Yes | - -**Note**: Many successful open-source projects use MIT/BSD licenses without explicit patent grants, including: -- jQuery (MIT) -- Rails (MIT) -- Node.js (MIT) -- React (MIT, changed from BSD+Patents in 2017) -- Angular (MIT) - -## For Contributors - -By contributing to FTLLexEngine, you: - -1. **Grant MIT License permissions** for your contributions (copyright license) -2. **Do NOT explicitly grant patent rights** (MIT License has no patent clause) -3. **Should not contribute code** that you know infringes patents you hold -4. **Should disclose** if you have patent concerns about your contribution - -### Contribution Guidelines - -**Before contributing:** - -- Ensure your contribution is your original work -- Do not contribute code you know infringes patents (yours or others) -- If you hold patents related to your contribution, consider whether you're comfortable with the MIT License's implicit permissions -- Disclose any known patent issues in your pull request - -**By submitting a pull request, you represent that:** - -- You have the right to submit the code -- Your contribution does not knowingly infringe patents -- You grant MIT License permissions for your contribution - -## Why Not Use Apache 2.0? - -You might ask: "Why not license FTLLexEngine under Apache 2.0 to get explicit patent grants?" - -**Reasons for MIT License:** - -1. **Simplicity**: MIT is one of the shortest, easiest-to-understand licenses -2. **Compatibility**: MIT is compatible with virtually all other licenses -3. **Ecosystem**: Python ecosystem heavily uses MIT (matches community norms) -4. **Low Barrier**: MIT imposes minimal requirements on users -5. **No Patent Claims**: This implementation makes no patent claims to grant - -## Risk Assessment - -### Realistic Patent Risk - -For most users, patent risk is **extremely low**: - -1. **Specification Coverage**: Apache 2.0 patent grant from specification authors likely covers implementation -2. **Published Specification**: Implementing published specs is generally considered low-risk -3. **No Known Issues**: No known patent claims against Fluent implementations -4. **Defensive Publication**: Public specifications serve as prior art - -### Higher-Risk Scenarios - -Consult legal counsel if: - -- You work in highly patent-litigious industries (e.g., telecommunications) -- You have specific patent concerns about localization technology -- Your organization has strict patent policy requirements -- You're considering patenting derivative works - -## Alternative Implementations - -If explicit patent grants are required for your use case, consider: - -| Implementation | License | Patent Grant | -|----------------|---------|--------------| -| FTLLexEngine | MIT | No | -| fluent.runtime (Mozilla) | Apache 2.0 | Yes | -| fluent-compiler | Apache 2.0 | Yes | - -All three implement the same FTL Specification v1.0 and are functionally compatible. - -## Patent Non-Assertion - -The copyright holder (Ervins Strauhmanis) states: - -**"This implementation makes no patent claims and is not aware of any patents that this implementation infringes. If any patents are held by the copyright holder that relate to this implementation, permission is granted under the MIT License to use this implementation without patent liability."** - -This is a non-binding statement of intent, not a legal patent grant. - -## Questions and Concerns - -### I found a patent issue - -Please report immediately: -1. Open a GitHub issue (mark as SECURITY if sensitive) -2. Email: [your-contact-email] (TODO: Add contact email) -3. Include: patent number, jurisdiction, specific claims - -### I need explicit patent protection - -**Options:** -1. Use Apache 2.0-licensed alternatives (fluent.runtime, fluent-compiler) -2. Obtain legal opinion that MIT License provides sufficient coverage -3. Negotiate separate patent license (contact copyright holder) - -### I want to contribute but hold patents - -**Please:** -1. Disclose in your pull request -2. Confirm you're comfortable with MIT License implicit permissions -3. Consider whether you want to make a patent non-assertion statement - -### Can FTLLexEngine change to Apache 2.0? - -Relicensing would require: -1. Agreement from all past contributors -2. Architectural decision to prioritize patent grants over MIT simplicity -3. Community discussion - -This is possible but not currently planned. - -## Legal Disclaimer - -**This document is for informational purposes only and does not constitute legal advice.** +--- +afad: "3.5" +version: "0.163.0" +domain: LEGAL +updated: "2026-04-22" +route: + keywords: [patents, legal, license, fluent, apache, mit, babel] + questions: ["what is the patent position?", "does the project include a patent grant?", "what about the Fluent specification license?"] +--- -Patent law is complex and varies by jurisdiction. This document represents the copyright holder's understanding and intent but may not be legally binding. +# Patent Notes -**For patent-related concerns, consult qualified legal counsel in your jurisdiction.** +**Purpose**: Summarize the patent posture of FTLLexEngine and its main upstream legal inputs. +**Prerequisites**: None. -## Further Reading +## Overview -### MIT License and Patents +FTLLexEngine is distributed under the MIT License. MIT does not contain an explicit patent grant or patent retaliation clause, so this repository does not add one on top of the license text shipped in `LICENSE`. -- [MIT License on OSI](https://opensource.org/licenses/MIT) -- [MIT License Compatibility](https://en.wikipedia.org/wiki/MIT_License) +The project’s legal posture is shaped by two notable upstream inputs plus this repository’s own license choice: -### Apache 2.0 Patent Provisions +| Component | License | Explicit Patent Grant | +|:----------|:--------|:----------------------| +| FTLLexEngine | MIT | No explicit patent clause | +| Fluent specification materials | Apache-2.0 | Yes | +| Babel (optional dependency) | BSD-3-Clause | No explicit patent clause | -- [Apache 2.0 License Full Text](https://www.apache.org/licenses/LICENSE-2.0) -- [Understanding Apache 2.0 Patent Grant](https://opensource.com/article/18/2/apache-2-patent-license) +## Fluent Specification -### General Patent Information +Project Fluent specification materials are published under Apache License 2.0, which includes an explicit contributor patent license in Section 3. That grant applies to the specification materials and upstream contributions to them; it does not convert this repository into an Apache-licensed implementation. -- [USPTO: Explore Intellectual Property](https://www.uspto.gov/kids/explore-intellectual-property) -- [Open Source Licenses and Patents](https://www.fossa.com/blog/open-source-licenses-101-apache-license-2-0/) +## Contributor Guidance ---- +Contributors should only submit code they are authorized to license under this repository’s terms. If you know code or data is encumbered by patent restrictions that would conflict with normal project use, do not contribute it here. -**Last Updated**: 2025-11-25 +## Disclaimer -**Contact**: See [NOTICE](NOTICE) file for copyright holder information. +This file is informational and not legal advice. For legal interpretation or patent risk analysis, consult qualified counsel. diff --git a/README.md b/README.md index 4fae615d..ea8e2364 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,8 @@ coffee-order = { $bags -> """) result, errors = bundle.format_pattern("coffee-order", {"bags": 500, "origin": "Ethiopian"}) -# "500 bags of Ethiopian coffee" +assert errors == () +assert result == "500 bags of Ethiopian coffee" ``` > `use_isolating=False` removes Unicode bidi isolation markers from output, making strings suitable for direct comparison and logging. The default `use_isolating=True` wraps each placeable in U+2068/U+2069 markers for correct bidirectional text rendering in UI contexts. @@ -65,12 +66,15 @@ result, errors = bundle.format_pattern("coffee-order", {"bags": 500, "origin": " **Parse user input back to Python types:** ```python +from decimal import Decimal from ftllexengine.parsing import parse_currency # German buyer enters a bid price result, errors = parse_currency("12.450,00 EUR", "de_DE", default_currency="EUR") if not errors: amount, currency = result # (Decimal('12450.00'), 'EUR') + assert amount == Decimal("12450.00") + assert currency == "EUR" ``` --- @@ -90,7 +94,7 @@ if not errors: - [When to Use FTLLexEngine](#when-to-use-ftllexengine) - [Documentation](#documentation) - [Contributing](#contributing) -- [License](#license) +- [Legal](#legal) --- @@ -120,12 +124,13 @@ Or: `pip install ftllexengine` **Works without Babel:** - FTL syntax parsing (`parse_ftl()`, `serialize_ftl()`) - AST manipulation and transformation -- Validation and introspection +- Validation and message introspection **Requires Babel:** - `FluentBundle` (locale-aware formatting) - `FluentLocalization` (multi-locale fallback) - Bidirectional parsing (numbers, dates, currency) +- ISO territory and currency lookups @@ -153,15 +158,18 @@ invoice-total = Total: { CURRENCY($amount, currency: "USD") } """) result, _ = bundle.format_pattern("shipment-line", {"bags": 500, "origin": "Colombian"}) -# "500 bags of Colombian coffee" +assert result == "500 bags of Colombian coffee" result, _ = bundle.format_pattern("invoice-total", {"amount": Decimal("187500.00")}) -# "Total: $187,500.00" +assert result == "Total: $187,500.00" ``` **German (Hamburg buyer):** ```python +from decimal import Decimal +from ftllexengine import FluentBundle + bundle_de = FluentBundle("de_DE", use_isolating=False) bundle_de.add_resource(""" shipment-line = { $bags -> @@ -174,15 +182,18 @@ invoice-total = Gesamt: { CURRENCY($amount, currency: "EUR") } """) result, _ = bundle_de.format_pattern("shipment-line", {"bags": 500, "origin": "kolumbianischer"}) -# "500 Saecke kolumbianischer Kaffee" +assert result == "500 Saecke kolumbianischer Kaffee" result, _ = bundle_de.format_pattern("invoice-total", {"amount": Decimal("187500.00")}) -# "Gesamt: 187.500,00\u00a0€" (CLDR: non-breaking space before symbol) +assert result == "Gesamt: 187.500,00\u00a0€" # CLDR: non-breaking space before symbol ``` **Japanese (Tokyo buyer):** ```python +from decimal import Decimal +from ftllexengine import FluentBundle + bundle_ja = FluentBundle("ja_JP", use_isolating=False) bundle_ja.add_resource(""" shipment-line = { $bags -> @@ -194,10 +205,10 @@ invoice-total = 合計:{ CURRENCY($amount, currency: "JPY") } """) result, _ = bundle_ja.format_pattern("shipment-line", {"bags": 500, "origin": "コロンビア"}) -# "コロンビアコーヒー 500袋" +assert result == "コロンビアコーヒー 500袋" result, _ = bundle_ja.format_pattern("invoice-total", {"amount": Decimal("28125000")}) -# "合計:¥28,125,000" +assert result == "合計:¥28,125,000" ``` Bob uses the same pattern at Mars Colony 1. Spanish for the Colombian agronomists? Add one `.ftl` file. Zero code changes. @@ -213,6 +224,7 @@ Most libraries only format outbound data. That's a one-way trip. Bob's colonists type orders and quantities in their local format. A German engineer enters `"12.450,00 EUR"`. A Colombian agronomist enters `"45.000.000 COP"`. A Japanese technician files a delivery date as `"2026年3月15日"`. FTLLexEngine parses them all to exact Python types. ```python +from decimal import Decimal from ftllexengine.parsing import ( parse_currency, parse_date, @@ -232,14 +244,16 @@ if not errors: # Japanese technician enters a delivery date contract_date, errors = parse_date("2026年3月15日", "ja_JP") -# datetime.date(2026, 3, 15) +assert not errors +assert contract_date.isoformat() == "2026-03-15" # German engineer enters a localized amount for use in a Fluent message fnum, errors = parse_fluent_number("12.450,00", "de_DE") if not errors: # FluentNumber(value=Decimal('12450.00'), formatted='12.450,00', precision=2) # Pass fnum directly as a $variable — it carries its formatting metadata - pass + assert fnum.value == Decimal("12450.00") + assert str(fnum) == "12.450,00" ``` ```mermaid @@ -262,9 +276,11 @@ flowchart TB **When parsing fails, you get structured errors -- not exceptions:** ```python +from ftllexengine.parsing import parse_decimal + price, errors = parse_decimal("twelve thousand", "en_US") -# price = None -# errors = (FrozenFluentError(...),) +assert price is None +assert errors if errors: err = errors[0] @@ -284,6 +300,8 @@ from ftllexengine.parsing import parse_currency price_result, errors = parse_currency("$4.25", "en_US", default_currency="USD") if not errors: price_per_lb, currency = price_result # (Decimal('4.25'), 'USD') + assert price_per_lb == Decimal("4.25") + assert currency == "USD" bags = 500 lbs_per_bag = Decimal("132") # Standard 60kg bag @@ -309,7 +327,7 @@ bundle.add_resource('confirm = Contract: { $bags } bags at { CURRENCY($price, cu # Works normally when all variables are provided result, _ = bundle.format_pattern("confirm", {"bags": 500, "price": Decimal("4.25")}) -# "Contract: 500 bags at $4.25/lb" +assert result == "Contract: 500 bags at $4.25/lb" # Missing variable raises immediately (default strict=True behavior) try: @@ -322,8 +340,8 @@ except FormattingIntegrityError as e: # For soft error recovery, opt in with strict=False soft_bundle = FluentBundle("en_US", strict=False, use_isolating=False) soft_result, soft_errors = soft_bundle.format_pattern("missing-message", {}) -# soft_result = "{missing-message}" (fallback: key wrapped in braces) -# soft_errors = (FrozenFluentError(...),) +assert soft_result == "{missing-message}" # fallback: key wrapped in braces +assert soft_errors ``` --- @@ -362,8 +380,8 @@ with ThreadPoolExecutor(max_workers=100) as executor: executor.submit(format_confirmation, ja_bundle, Decimal("4.25"), "lb"), ] confirmations = [f.result() for f in futures] - # ["4,25\u00a0$ per lb", "US$4,25 per lb", "$4.25 per lb"] - # (CLDR locale-specific symbols; de_DE uses non-breaking space before $) + assert confirmations == ["4,25\u00a0$ per lb", "US$4,25 per lb", "$4.25 per lb"] + # CLDR locale-specific symbols; de_DE uses non-breaking space before $ ``` `FluentBundle` and `FluentLocalization` are thread-safe by design: @@ -380,17 +398,27 @@ Bob's colony manifest system loads `.ftl` files that grow as new message templat `add_resource_stream` and `parse_stream_ftl` accept any line iterator. Memory stays proportional to the largest single FTL entry, not the full file: ```python +from pathlib import Path +from tempfile import TemporaryDirectory from ftllexengine import FluentBundle, parse_stream_ftl -# Load directly from a file object — no full-file read -bundle = FluentBundle("en_US") -with open("colony_messages.ftl", encoding="utf-8") as f: - junk = bundle.add_resource_stream(f, source_path="colony_messages.ftl") - -# Or iterate entries without a bundle (parser-only install works too) -with open("colony_messages.ftl", encoding="utf-8") as f: - for entry in parse_stream_ftl(f): - print(type(entry).__name__, getattr(entry.id, "name", "")) +with TemporaryDirectory() as tmp: + source_path = Path(tmp) / "colony_messages.ftl" + source_path.write_text( + "hello = Hello from orbit\n" + "status = Cargo ready\n", + encoding="utf-8", + ) + + bundle = FluentBundle("en_US") + with source_path.open(encoding="utf-8") as handle: + junk = bundle.add_resource_stream(handle, source_path=source_path.name) + assert junk == () + + with source_path.open(encoding="utf-8") as handle: + entry_ids = [entry.id.name for entry in parse_stream_ftl(handle)] + assert entry_ids == ["hello", "status"] + print(entry_ids) ``` **Same guarantees as `add_resource`:** @@ -401,12 +429,28 @@ with open("colony_messages.ftl", encoding="utf-8") as f: `FluentLocalization.add_resource_stream` works identically for multi-locale setups: ```python +from pathlib import Path +from tempfile import TemporaryDirectory from ftllexengine import FluentLocalization from ftllexengine.localization import PathResourceLoader -l10n = FluentLocalization(["de_DE", "en_US"], ["messages"], PathResourceLoader("locales/")) -with open("extra_de.ftl", encoding="utf-8") as f: - l10n.add_resource_stream("de_DE", f, source_path="extra_de.ftl") +with TemporaryDirectory() as tmp: + base = Path(tmp) / "locales" + (base / "de_de").mkdir(parents=True) + (base / "en_us").mkdir(parents=True) + (base / "de_de" / "messages.ftl").write_text("hello = Hallo\n", encoding="utf-8") + (base / "en_us" / "messages.ftl").write_text("hello = Hello\n", encoding="utf-8") + extra_path = Path(tmp) / "extra_de.ftl" + extra_path.write_text("shipment = Zusatzdatei\n", encoding="utf-8") + + loader = PathResourceLoader(str(base / "{locale}")) + l10n = FluentLocalization(["de_DE", "en_US"], ["messages.ftl"], loader) + with extra_path.open(encoding="utf-8") as handle: + l10n.add_resource_stream("de_DE", handle, source_path=extra_path.name) + + shipment, errors = l10n.format_value("shipment") + assert errors == () + assert shipment == "Zusatzdatei" ``` --- @@ -420,7 +464,7 @@ import asyncio from ftllexengine import AsyncFluentBundle async def handle_request(name: str, bags: int) -> str: - async with AsyncFluentBundle("en_US") as bundle: + async with AsyncFluentBundle("en_US", use_isolating=False) as bundle: await bundle.add_resource(""" coffee-order = { $bags -> [one] 1 bag for { $name } @@ -432,6 +476,8 @@ coffee-order = { $bags -> ) return result +assert asyncio.run(handle_request("Alice", 2)) == "2 bags for Alice" + # Shared bundle across requests (create once, reuse): _bundle = AsyncFluentBundle("en_US") @@ -466,17 +512,12 @@ contract = { $buyer } purchases { $bags -> info = bundle.introspect_message("contract") -info.get_variable_names() -# frozenset({'buyer', 'bags', 'grade', 'seller', 'price', 'port', 'ship_date'}) - -info.get_function_names() -# frozenset({'CURRENCY', 'DATETIME'}) - -info.has_selectors -# True (uses plural selection for bags) - -info.requires_variable("price") -# True +assert info.get_variable_names() == frozenset( + {"buyer", "bags", "grade", "seller", "price", "port", "ship_date"} +) +assert info.get_function_names() == frozenset({"CURRENCY", "DATETIME"}) +assert info.has_selectors is True +assert info.requires_variable("price") is True ``` **Use cases:** @@ -490,39 +531,70 @@ info.requires_variable("price") Alice's trading platform and Bob's colony manifest system can't discover a bad `.ftl` file mid-operation. They validate everything at startup. -`LocalizationBootConfig` is the production boot sequence: load all resources, run `require_clean()` to assert every locale loaded without errors, and validate all message schemas before the first request arrives. If anything is wrong, it raises before traffic starts -- not during it. +`LocalizationBootConfig` is the production boot sequence: load all resources, run `require_clean()` to assert every locale loaded without errors, and validate all message schemas before the first request arrives. If anything is wrong, it raises before traffic starts -- not during it. Each config instance is single-use, so create a new one for each boot attempt. ```python +from pathlib import Path +from tempfile import TemporaryDirectory from ftllexengine import LocalizationBootConfig -# Load .ftl files from disk, validate schemas, raise before accepting traffic -cfg = LocalizationBootConfig.from_path( - locales=("en_US", "de_DE", "ja_JP"), - resource_ids=("invoice.ftl", "shipment.ftl"), - base_path="locales/{locale}", - message_schemas={ - "invoice-total": {"amount"}, - "shipment-line": {"bags", "origin"}, - }, - # Enforce that critical messages exist in at least one locale - required_messages=frozenset({"invoice-total", "shipment-line"}), -) - -# Primary API: returns structured evidence for audit trails -l10n, summary, schema_results = cfg.boot() -# raises IntegrityCheckFailedError if any resource fails, required message is absent, or schema mismatches - -print(f"Loaded {summary.total_attempted} resources, {summary.errors} errors") -# "Loaded 6 resources, 0 errors" +with TemporaryDirectory() as tmp: + base = Path(tmp) / "locales" + for locale, invoice_label in { + "en_us": "Total", + "de_de": "Gesamt", + "ja_jp": "合計", + }.items(): + locale_dir = base / locale + locale_dir.mkdir(parents=True) + (locale_dir / "invoice.ftl").write_text( + f'invoice-total = {invoice_label}: {{ CURRENCY($amount, currency: "USD") }}\n', + encoding="utf-8", + ) + (locale_dir / "shipment.ftl").write_text( + 'shipment-line = { $bags } bags of { $origin }\n', + encoding="utf-8", + ) -# schema_results: tuple[MessageVariableValidationResult, ...] -- one per message_schemas entry + cfg = LocalizationBootConfig.from_path( + locales=("en_US", "de_DE", "ja_JP"), + resource_ids=("invoice.ftl", "shipment.ftl"), + base_path=base / "{locale}", + message_schemas={ + "invoice-total": {"amount"}, + "shipment-line": {"bags", "origin"}, + }, + required_messages=frozenset({"invoice-total", "shipment-line"}), + ) + + l10n, summary, schema_results = cfg.boot() + print(f"Loaded {summary.total_attempted} resources, {summary.errors} errors") + assert len(schema_results) == 2 ``` **When only the localization object is needed:** ```python -l10n = cfg.boot_simple() # raises on failure, discards audit evidence -# l10n is now safe to use for the lifetime of the application +from pathlib import Path +from tempfile import TemporaryDirectory +from ftllexengine import LocalizationBootConfig + +with TemporaryDirectory() as tmp: + base = Path(tmp) / "locales" + (base / "en_us").mkdir(parents=True) + (base / "en_us" / "main.ftl").write_text("ready = System ready\n", encoding="utf-8") + + cfg = LocalizationBootConfig.from_path( + locales=("en_US",), + resource_ids=("main.ftl",), + base_path=base / "{locale}", + required_messages=frozenset({"ready"}), + ) + + l10n = cfg.boot_simple() + result, errors = l10n.format_value("ready") + assert errors == () + print(result) ``` **Use cases:** @@ -541,21 +613,21 @@ from ftllexengine.introspection.iso import get_territory_currencies, get_currenc # New buyer in Japan -- what currency? currencies = get_territory_currencies("JP") -# ("JPY",) +assert currencies == ("JPY",) # How many decimal places for yen? jpy = get_currency("JPY") -jpy.decimal_digits -# 0 -- no decimal places for yen +assert jpy is not None +assert jpy.decimal_digits == 0 # no decimal places for yen # Compare to Colombian peso cop = get_currency("COP") -cop.decimal_digits -# 2 +assert cop is not None +assert cop.decimal_digits == 2 # Multi-currency territories panama_currencies = get_territory_currencies("PA") -# ("PAB", "USD") -- Panama uses both Balboa and US Dollar +assert panama_currencies == ("PAB", "USD") # Panama uses both Balboa and US Dollar ``` Alice's invoices format correctly: JPY 28,125,000 in Tokyo, $187,500.00 in New York. @@ -570,8 +642,9 @@ Alice's invoices format correctly: JPY 28,125,000 in Tokyo, $187,500.00 in New Y | **Runtime** — `ftllexengine.runtime` | `FluentBundle`, message resolution, thread-safe formatting, built-in functions (`NUMBER`, `CURRENCY`, `DATETIME`) | Yes | | **Localization** — `ftllexengine.localization` | `FluentLocalization` multi-locale fallback chains; `LocalizationBootConfig` strict-mode production boot | Yes | | **Parsing** — `ftllexengine.parsing` | Bidirectional parsing: numbers, dates, currency back to Python types | Yes | -| **Introspection** — `ftllexengine.introspection` | Message variable/function extraction, ISO 3166/4217 territory and currency data | Partial | -| **Validation** — `ftllexengine.validation` | Cycle detection, reference validation, semantic checks | No | +| **Introspection** — `ftllexengine.introspection` | Message-variable/function extraction, ISO 3166/4217 territory and currency data | Partial | +| **Analysis** — `ftllexengine.analysis` | Dependency-graph helpers such as `detect_cycles()` | No | +| **Validation** — `ftllexengine.validation` | Resource validation, unresolved-reference checks, semantic checks | No | | **Diagnostics** — `ftllexengine.diagnostics` | Structured error types, error codes, formatting | No | | **Integrity** — `ftllexengine.integrity` | BLAKE2b checksums, strict mode, immutable exceptions | No | @@ -609,10 +682,11 @@ Alice's invoices format correctly: JPY 28,125,000 in Tokyo, $187,500.00 in New Y | Resource | Description | |:---------|:------------| | [Quick Reference](docs/QUICK_REFERENCE.md) | Copy-paste patterns for common tasks | -| [API Reference](docs/DOC_00_Index.md) | Complete class and function documentation | +| [API Reference](docs/DOC_00_Index.md) | Reference coverage for the exported package and module APIs | | [Parsing Guide](docs/PARSING_GUIDE.md) | Bidirectional parsing deep-dive | | [Data Integrity](docs/DATA_INTEGRITY_ARCHITECTURE.md) | Strict mode, checksums, immutable errors | | [Terminology](docs/TERMINOLOGY.md) | Fluent and FTLLexEngine concepts | +| [Release Protocol](docs/RELEASE_PROTOCOL.md) | `gh`-first release-branch, tag, GitHub Release, and PyPI procedure | | [Examples](examples/) | Working code you can run | --- @@ -620,13 +694,17 @@ Alice's invoices format correctly: JPY 28,125,000 in Tokyo, $187,500.00 in New Y ## Contributing Contributions welcome. See [CONTRIBUTING.md](CONTRIBUTING.md) for setup and guidelines. +The shortest full-repo verification path is `./check.sh`. --- -## License - -MIT License - See [LICENSE](LICENSE). +## Legal -Implements the [Fluent Specification](https://github.com/projectfluent/fluent/blob/master/spec/fluent.ebnf) (Apache 2.0). +ftllexengine is MIT-licensed. It has no required runtime dependencies. The optional +[babel] extra adds Babel (BSD 3-Clause). ftllexengine is an independent implementation +of the [FTL Syntax Specification](https://github.com/projectfluent/fluent/blob/master/spec/fluent.ebnf) +(Apache 2.0, Mozilla Foundation and others) and is not affiliated with or endorsed +by Mozilla. See [NOTICE](NOTICE) for attribution and [PATENTS.md](PATENTS.md) for +patent considerations. -**Legal**: [PATENTS.md](PATENTS.md) | [NOTICE](NOTICE) +[LICENSE](LICENSE) | [NOTICE](NOTICE) | [PATENTS.md](PATENTS.md) diff --git a/check.sh b/check.sh new file mode 100755 index 00000000..a8dfeb52 --- /dev/null +++ b/check.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +set -euo pipefail + +PY_VERSION="${PY_VERSION:-3.13}" +UV_ENV=".venv-${PY_VERSION}" +ATHERIS_SMOKE_TIME="${ATHERIS_SMOKE_TIME:-5}" + +ROOT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +cd "$ROOT_DIR" + +run_step() { + local title="$1" + shift + printf '\n== %s ==\n' "$title" + "$@" +} + +uv_python() { + UV_PROJECT_ENVIRONMENT="$UV_ENV" uv run --python "$PY_VERSION" --group dev python "$@" +} + +run_step "Version Validation" uv_python scripts/validate_version.py +run_step "Documentation Validation" uv_python scripts/validate_docs.py +run_step "Examples" uv_python scripts/run_examples.py +run_step "Lint" ./scripts/lint.sh +run_step "Tests" ./scripts/test.sh +run_step "HypoFuzz Preflight" ./scripts/fuzz_hypofuzz.sh --preflight +run_step "Atheris Corpus Health" ./scripts/fuzz_atheris.sh --corpus +run_step "Atheris Graph Smoke" ./scripts/fuzz_atheris.sh graph --time "$ATHERIS_SMOKE_TIME" +run_step "Atheris Introspection Smoke" ./scripts/fuzz_atheris.sh introspection --time "$ATHERIS_SMOKE_TIME" + +printf '\n[PASS] Full repository check completed.\n' diff --git a/docs/CUSTOM_FUNCTIONS_GUIDE.md b/docs/CUSTOM_FUNCTIONS_GUIDE.md index 3b70e228..4873fdec 100644 --- a/docs/CUSTOM_FUNCTIONS_GUIDE.md +++ b/docs/CUSTOM_FUNCTIONS_GUIDE.md @@ -1,998 +1,80 @@ --- -afad: "3.3" -version: "0.153.0" -domain: custom-functions -updated: "2026-03-13" +afad: "3.5" +version: "0.163.0" +domain: CUSTOM_FUNCTIONS +updated: "2026-04-22" route: - keywords: [custom functions, add_function, fluent functions, factory pattern, locale-aware, formatting functions] - questions: ["how to create custom function?", "how to add custom function?", "how to make locale-aware function?"] + keywords: [custom functions, fluent_function, FunctionRegistry, locale injection, add_function] + questions: ["how do I add a custom function?", "how does locale injection work?", "should I use a registry or add_function?"] --- -# Advanced Custom Functions Guide +# Custom Functions Guide -**Purpose**: Extend FTLLexEngine with custom formatting functions. -**Prerequisites**: Basic FluentBundle usage. +**Purpose**: Add domain-specific functions to `FluentBundle` or `FluentLocalization`. +**Prerequisites**: Familiarity with `FluentBundle.format_pattern()` and FTL function calls. ---- - -## Table of Contents - -1. [Introduction](#introduction) -2. [When to Use Custom Functions](#when-to-use-custom-functions) -3. [Function Naming Conventions](#function-naming-conventions) -4. [Parameter Conventions](#parameter-conventions) -5. [Error Handling Patterns](#error-handling-patterns) -6. [Locale-Aware Functions (Factory Pattern)](#locale-aware-functions-factory-pattern) -7. [Integration with Babel for i18n](#integration-with-babel-for-i18n) -8. [Complete Examples](#complete-examples) -9. [Testing Custom Functions](#testing-custom-functions) -10. [Best Practices and Pitfalls](#best-practices-and-pitfalls) - ---- - -## Introduction - -FTLLexEngine includes built-in functions for common formatting needs: -- **NUMBER()**: Locale-aware number formatting -- **DATETIME()**: Locale-aware date/time formatting -- **CURRENCY()**: Locale-aware currency formatting - -However, domain-specific applications often require custom formatters for specialized data types. This guide shows you how to implement custom functions that integrate seamlessly with FTLLexEngine's i18n infrastructure. - -**What You'll Learn**: -- How to create custom functions that follow FTL conventions -- How to make functions locale-aware using the factory pattern -- How to integrate with Babel for CLDR-compliant formatting -- Best practices for error handling and thread safety - ---- - -## When to Use Custom Functions - -### Use Custom Functions For: -- **Domain-specific formatting**: Phone numbers, file sizes, durations, credit cards -- **Business logic**: Loyalty points, shipping estimates, inventory status -- **Rich text rendering**: Markdown, HTML sanitization, custom markup -- **Specialized localization**: Industry-specific terminology, custom plural rules - -### Use Built-in Functions For: -- **Numbers**: Always use `NUMBER()` instead of Python's `format()` -- **Dates/Times**: Always use `DATETIME()` instead of `strftime()` -- **Currency**: Always use `CURRENCY()` instead of custom implementations - -**Why?** Built-in functions use Babel for CLDR-compliant formatting, which handles: -- Locale-specific separators (1,234.56 vs 1.234,56 vs 1 234,56) -- Currency-specific decimal places (JPY: 0, BHD: 3) -- Symbol placement (en_US: "$1.23" vs lv_LV: "1,23 €") -- Right-to-left language support (Arabic, Hebrew) - ---- - -## Function Naming Conventions +## Overview -### FTL Naming Convention: UPPERCASE (Recommended) +FTLLexEngine supports two patterns: -By convention, FTL functions use UPPERCASE names to distinguish them from message references: +- `bundle.add_function("NAME", func)` for one bundle or one localization object. +- `FunctionRegistry` for reusable or shared function sets. -```python -# RECOMMENDED - FTL convention (UPPERCASE) -def FILESIZE(bytes_count: int) -> str: - ... - -def PHONE(number: str) -> str: - ... - -def MARKDOWN(text: str) -> str: - ... -``` - -```python -# VALID BUT NOT RECOMMENDED - lowercase/camelCase works but breaks convention -def filesize(bytes_count: int) -> str: # Works, but unconventional - ... - -def phoneNumber(number: str) -> str: # Works, but unconventional - ... -``` - -**Rationale**: FTL syntax is case-sensitive. UPPERCASE names are a convention that: -1. Visually distinguishes functions from message references in FTL code -2. Matches the style of built-in functions (NUMBER, DATETIME) -3. Makes function calls immediately recognizable - -**Note**: Function names can use any case (lowercase, camelCase, UPPERCASE). UPPERCASE remains the recommended convention for consistency with built-in functions. - -### Python Linters - -Disable naming warnings for FTL functions: - -```python -def FILESIZE(bytes_count: int) -> str: # noqa: N802 - """Format file size.""" - ... - -# Or use pylint disable -def PHONE(number: str) -> str: # pylint: disable=invalid-name - """Format phone number.""" - ... -``` - ---- +FTL uses uppercase function names by convention. Python callables can keep normal snake_case parameter names; the bridge maps FTL camelCase named arguments onto Python snake_case parameters automatically. -## Parameter Conventions - -### Positional vs Keyword Arguments - -FTL function calls use **named parameters** for all arguments except the first: - -```ftl -# FTL syntax (camelCase for multi-word parameter names) -file-size = { FILESIZE($bytes, precision: 2) } -phone = { PHONE($number, formatStyle: "international") } -``` - -**Python implementation should use keyword-only arguments** after the first parameter (recommended for clarity): - -```python -# CORRECT - Uses * to enforce keyword-only args -def FILESIZE(bytes_count: int, *, precision: int = 2) -> str: - """Format file size in human-readable format. - - Args: - bytes_count: Number of bytes (positional) - precision: Decimal precision (keyword-only with default) - """ - ... - -def PHONE(number: str, *, format_style: str = "international") -> str: - """Format phone number. - - Args: - number: Phone number (positional) - format_style: Format type (keyword-only with default) - """ - ... -``` - -```python -# WRONG - Missing * separator -def FILESIZE(bytes_count: int, precision: int = 2) -> str: # [WRONG] positional after keyword - ... -``` - -### Parameter Naming: snake_case - -Use Python's `snake_case` convention for parameter names: - -```python -# CORRECT -def PHONE(number: str, *, format_style: str = "international") -> str: - ... - -# WRONG - Don't use camelCase -def PHONE(number: str, *, formatStyle: str = "international") -> str: # [WRONG] use snake_case - ... -``` - -**Why?** FunctionRegistry automatically bridges FTL's camelCase to Python's snake_case: - -```ftl -# FTL uses camelCase -phone = { PHONE($number, formatStyle: "international") } -``` - -```python -# Python receives snake_case -def PHONE(number: str, *, format_style: str = "international") -> str: - # format_style receives "international" - ... -``` - -**Supported conversions**: -- `formatStyle` → `format_style` -- `currencyDisplay` → `currency_display` -- `minimumFractionDigits` → `minimum_fraction_digits` - ---- - -## Error Handling Patterns - -### Rule #1: Custom Functions MUST NEVER Raise Exceptions - -Fluent's error model requires graceful degradation: - -```python -# CORRECT - Returns fallback on error -def FILESIZE(bytes_count: int, *, precision: int = 2) -> str: - """Format file size.""" - try: - bytes_count = float(bytes_count) - # ... formatting logic ... - return f"{bytes_count:.{precision}f} {unit}" - except (ValueError, TypeError): - # Graceful fallback for invalid input - return f"{bytes_count} bytes" - except Exception: - # Catch-all for unexpected errors - return str(bytes_count) -``` +## Single-Bundle Function ```python -# WRONG - Raising exceptions crashes the application -def FILESIZE(bytes_count: int, *, precision: int = 2) -> str: - if not isinstance(bytes_count, int): - raise TypeError("bytes_count must be numeric") # [WRONG] NEVER raise from a custom function - ... -``` - -### Rule #2: Return Readable Fallbacks - -When errors occur, return a fallback that helps developers debug: - -```python -# GOOD - Descriptive fallback -def CURRENCY_CUSTOM(amount: int | Decimal, *, currency_code: str = "USD") -> str: - try: - from babel import numbers - return numbers.format_currency(amount, currency_code, locale="en_US") - except ImportError: - return f"{currency_code} {amount:.2f}" # Shows what failed - except Exception: - return f"{currency_code} {amount}" # Minimal fallback -``` - -```python -# BAD - Useless fallback -def CURRENCY_CUSTOM(amount: int | Decimal, *, currency_code: str = "USD") -> str: - try: - ... - except Exception: - return "???" # [WRONG] Not helpful for debugging -``` - -### Rule #3: Log Debug Information (Optional) - -For production deployments, log unexpected errors: - -```python -import logging - -logger = logging.getLogger(__name__) - -def PHONE(number: str, *, format_style: str = "international") -> str: - """Format phone number.""" - try: - # ... formatting logic ... - return formatted_number - except Exception as e: - logger.debug(f"PHONE formatting failed: {e}") - return str(number) -``` - -**Note**: Use `logger.debug()`, not `logger.warning()`, since formatting errors are expected in normal operation (e.g., user input). - ---- - -## Locale-Aware Functions (Factory Pattern) - -### Problem: Functions Need Bundle's Locale - -Custom functions often need to format differently based on the bundle's locale: - -```python -# How do we make GREETING() use the bundle's locale? -bundle = FluentBundle("lv_LV") -bundle.add_function("GREETING", ???) # Need access to "lv_LV" -``` - -### Solution: Factory Pattern - -Create a **factory function** that captures the bundle's locale in a closure: - -```python -def make_greeting_function(bundle_locale: str) -> Callable: - """Factory for locale-aware greeting function. - - Args: - bundle_locale: The bundle's locale string (e.g., "lv_LV", "de_DE") - - Returns: - GREETING function customized for the locale - """ - def GREETING(name: str, *, formal: str = "false") -> str: - """Locale-aware greeting. - - Args: - name: Person's name - formal: "true" for formal greeting, "false" for informal - - Returns: - Localized greeting - """ - is_formal = formal.lower() == "true" - locale_lower = bundle_locale.lower() - - if locale_lower.startswith("lv"): - return f"Labdien, {name}!" if is_formal else f"Sveiki, {name}!" - if locale_lower.startswith("de"): - return f"Guten Tag, {name}!" if is_formal else f"Hallo, {name}!" - if locale_lower.startswith("pl"): - return f"Dzień dobry, {name}!" if is_formal else f"Cześć, {name}!" - return f"Good day, {name}!" if is_formal else f"Hello, {name}!" - - return GREETING -``` - -### Usage - -```python -# Create locale-specific greeting function -bundle_en = FluentBundle("en_US") -bundle_en.add_function("GREETING", make_greeting_function(bundle_en.locale)) - -bundle_lv = FluentBundle("lv_LV") -bundle_lv.add_function("GREETING", make_greeting_function(bundle_lv.locale)) - -# FTL usage (same in all locales) -bundle_en.add_resource('greet = { GREETING($name, formal: "false") }') -bundle_lv.add_resource('greet = { GREETING($name, formal: "false") }') - -# Different output based on locale -result, _ = bundle_en.format_pattern("greet", {"name": "Alice"}) -# → "Hello, Alice!" - -result, _ = bundle_lv.format_pattern("greet", {"name": "Anna"}) -# → "Sveiki, Anna!" -``` - -### Alternative: Automatic Locale Injection - -Instead of using the factory pattern, use the `@fluent_function` decorator for automatic locale injection: - -```python -from ftllexengine import FluentBundle, fluent_function - - -@fluent_function(inject_locale=True) -def GREETING(name: str, locale_code: str, /, *, formal: str = "false") -> str: - """Locale-aware greeting with automatic locale injection. - - Args: - name: Person's name (positional, from FTL) - locale_code: Bundle's canonical lowercase underscore locale (auto-injected) - formal: "true" for formal greeting, "false" for informal (keyword) - - Returns: - Localized greeting - """ - is_formal = formal.lower() == "true" - locale_lower = locale_code.lower() - - if locale_lower.startswith("lv"): - return f"Labdien, {name}!" if is_formal else f"Sveiki, {name}!" - if locale_lower.startswith("de"): - return f"Guten Tag, {name}!" if is_formal else f"Hallo, {name}!" - return f"Good day, {name}!" if is_formal else f"Hello, {name}!" - - -# Register - locale will be injected automatically -bundle = FluentBundle("lv_LV") -bundle.add_function("GREETING", GREETING) - -bundle.add_resource('greet = { GREETING($name, formal: "false") }') -result, _ = bundle.format_pattern("greet", {"name": "Anna"}) -# → "Sveiki, Anna!" -``` - -**How it works:** -1. Apply `@fluent_function(inject_locale=True)` to your function -2. The runtime checks for this via `FunctionRegistry.should_inject_locale()` -3. When calling the function, the bundle's locale is injected as the second positional argument - -**When to use which approach:** - -| Approach | Use When | -|:---------|:---------| -| Factory pattern | Function needs locale at definition time (closures) | -| `@fluent_function(inject_locale=True)` | Function accepts locale as parameter at call time | -| Neither | Function doesn't need locale (e.g., FILESIZE) | - -### Alternative: Use Babel Locale - -For CLDR-compliant formatting, create a LocaleContext inside the function: - -```python -def make_date_range_function(bundle_locale: str) -> Callable: - """Factory for locale-aware date range formatter.""" - def DATE_RANGE(start: str, end: str) -> str: - """Format date range with locale-specific formatting.""" - from ftllexengine.runtime.locale_context import LocaleContext - from datetime import datetime - - try: - ctx = LocaleContext.create(bundle_locale) - start_dt = datetime.fromisoformat(start) - end_dt = datetime.fromisoformat(end) - - # Use Babel for locale-aware formatting - from babel.dates import format_date - - start_formatted = format_date(start_dt, format="medium", locale=ctx.babel_locale) - end_formatted = format_date(end_dt, format="medium", locale=ctx.babel_locale) - - return f"{start_formatted} – {end_formatted}" - except Exception: - return f"{start} – {end}" - - return DATE_RANGE -``` - ---- - -## Integration with Babel for i18n - -### When to Use Babel - -Use Babel for **international data types** where formatting rules vary by locale: -- Dates and times -- Numbers and percentages -- Currency -- Units (distances, weights, volumes) -- Lists and conjunctions - -### Example: Locale-Aware Custom Currency (Educational Only) - -**NOTE**: FTLLexEngine has a built-in `CURRENCY()` function. This example is for educational purposes only, demonstrating how to integrate Babel in custom functions. - -```python -def CURRENCY_CUSTOM_EXAMPLE(amount: int | Decimal, *, currency_code: str = "USD", locale: str = "en_US") -> str: - """Format currency with CLDR-compliant locale-aware formatting. - - EDUCATIONAL EXAMPLE ONLY - Use built-in CURRENCY() function instead! - - This demonstrates proper Babel integration for i18n-aware formatting. - - Args: - amount: Monetary amount - currency_code: ISO 4217 currency code (USD, EUR, JPY, BHD, etc.) - locale: Babel locale identifier for formatting - - Returns: - Formatted currency string using CLDR rules - - Why the naive approach is wrong: - - Hardcoded symbol placement (always before amount) - wrong for many locales - - Hardcoded 2 decimals - wrong for JPY (0 decimals), BHD (3 decimals) - - Ignored locale-specific spacing and formatting rules - - Did not use CLDR data - """ - try: - from babel import numbers - - # Use Babel's format_currency for proper CLDR compliance - return numbers.format_currency(amount, currency_code, locale=locale) - except ImportError: - # Fallback if Babel not installed (should never happen in FTLLexEngine env) - return f"{currency_code} {amount:.2f}" - except Exception: - # Fluent functions must never crash - return f"{currency_code} {amount}" -``` - -### Why Babel Integration Matters - -```python -# WRONG - Naive implementation -def CURRENCY_NAIVE(amount: int | Decimal, *, currency_code: str = "USD") -> str: - symbols = {"USD": "$", "EUR": "€", "JPY": "¥"} - symbol = symbols.get(currency_code, currency_code) - return f"{symbol}{amount:,.2f}" # [WRONG] Many problems! - -# Problems with naive approach: -# 1. Always puts symbol before amount (wrong for lv_LV, de_DE) -# 2. Always uses 2 decimals (wrong for JPY: 0, BHD: 3) -# 3. Uses English thousand separators (wrong for de_DE: period, lv_LV: space) -# 4. Missing currency codes (180+ currencies in ISO 4217) -``` - -```python -# CORRECT - Babel integration -from babel import numbers - -def CURRENCY_CORRECT(amount: int | Decimal, *, currency_code: str = "USD", locale: str = "en_US") -> str: - try: - return numbers.format_currency(amount, currency_code, locale=locale) - except Exception: - return f"{currency_code} {amount}" - -# Benefits: -# CLDR-compliant symbol placement -# Currency-specific decimal places -# Locale-specific grouping and separators -# Supports all ISO 4217 currencies -# Handles RTL languages (Arabic, Hebrew) -``` - ---- - -## Complete Examples - -### Example 1: PHONE Formatting - -```python -def PHONE(number: str, *, format_style: str = "international") -> str: - """Format phone number. - - Args: - number: Phone number (digits only or with separators) - format_style: "international", "national", or "compact" - - Returns: - Formatted phone number - """ - # Remove non-digits - digits = "".join(c for c in str(number) if c.isdigit()) - - if format_style == "international" and len(digits) >= 10: - # US/Canada format: +1 (555) 123-4567 - return f"+{digits[0]} ({digits[1:4]}) {digits[4:7]}-{digits[7:]}" - if format_style == "national" and len(digits) >= 10: - # (555) 123-4567 - return f"({digits[-10:-7]}) {digits[-7:-4]}-{digits[-4:]}" - if format_style == "compact": - # 5551234567 - return digits - return number # Fallback -``` - -**Usage**: -```python -bundle.add_function("PHONE", PHONE) -bundle.add_resource(""" -support-phone = Call us at { PHONE($number, formatStyle: "international") } -""") - -result, _ = bundle.format_pattern("support-phone", {"number": "15551234567"}) -# → "Call us at +1 (555) 123-4567" -``` - ---- - -### Example 2: MARKDOWN Rendering - -```python -import re - -def MARKDOWN(text: str, *, render: str = "html") -> str: - """Render markdown to HTML (simplified). - - Args: - text: Markdown text - render: Output format ("html" or "plain") - - Returns: - Rendered text - """ - if render == "plain": - # Strip markdown syntax - text = re.sub(r"\*\*(.*?)\*\*", r"\1", text) # Remove **bold** - text = re.sub(r"\*(.*?)\*", r"\1", text) # Remove *italic* - return re.sub(r"\[(.*?)\]\(.*?\)", r"\1", text) # Remove [links](url) - - # Simple HTML rendering - text = re.sub(r"\*\*(.*?)\*\*", r"\1", text) # **bold** - text = re.sub(r"\*(.*?)\*", r"\1", text) # *italic* - return re.sub(r"\[(.*?)\]\((.*?)\)", r'\1', text) # [text](url) -``` - -**Usage**: -```python -bundle.add_function("MARKDOWN", MARKDOWN) -bundle.add_resource(""" -welcome-html = { MARKDOWN($text, render: "html") } -welcome-plain = { MARKDOWN($text, render: "plain") } -""") - -result, _ = bundle.format_pattern("welcome-html", { - "text": "Welcome to **FTLLexEngine**! Visit [our site](https://example.com)." -}) -# → "Welcome to FTLLexEngine! Visit our site." -``` - ---- - -### Example 3: FILESIZE Formatting - -```python -def FILESIZE(bytes_count: int, *, precision: int = 2) -> str: - """Format file size in human-readable format. - - Args: - bytes_count: Number of bytes - precision: Decimal precision - - Returns: - Human-readable file size (e.g., "1.23 MB") - """ - try: - bytes_count = float(bytes_count) - units = ["B", "KB", "MB", "GB", "TB", "PB"] - - for unit in units: - if bytes_count < 1024.0: - return f"{bytes_count:.{precision}f} {unit}" - bytes_count /= 1024.0 +from ftllexengine import FluentBundle - return f"{bytes_count:.{precision}f} EB" - except (ValueError, TypeError): - return f"{bytes_count} bytes" -``` +def FILESIZE(value: int) -> str: + return f"{value / 1_000_000:.2f} MB" -**Usage**: -```python +bundle = FluentBundle("en_US", use_isolating=False) bundle.add_function("FILESIZE", FILESIZE) -bundle.add_resource(""" -file-info = { $filename } ({ FILESIZE($bytes) }) -""") - -result, _ = bundle.format_pattern("file-info", { - "filename": "video.mp4", - "bytes": 157286400 -}) -# → "video.mp4 (150.00 MB)" +bundle.add_resource("attachment = Size: { FILESIZE($bytes) }") +result, errors = bundle.format_pattern("attachment", {"bytes": 15_000_000}) +assert errors == () +assert result == "Size: 15.00 MB" ``` ---- - -### Example 4: DURATION Formatting +## Reusable Registry ```python -def DURATION(seconds: int | Decimal, *, format_style: str = "long") -> str: - """Format duration in human-readable format. - - Args: - seconds: Duration in seconds - format_style: "long", "short", or "compact" - - Returns: - Formatted duration - """ - try: - seconds = int(seconds) - days, remainder = divmod(seconds, 86400) - hours, remainder = divmod(remainder, 3600) - minutes, secs = divmod(remainder, 60) +from ftllexengine import FluentBundle +from ftllexengine.runtime import create_default_registry - if format_style == "long": - parts = [] - if days > 0: - parts.append(f"{days} day{'s' if days != 1 else ''}") - if hours > 0: - parts.append(f"{hours} hour{'s' if hours != 1 else ''}") - if minutes > 0: - parts.append(f"{minutes} minute{'s' if minutes != 1 else ''}") - if secs > 0 or not parts: - parts.append(f"{secs} second{'s' if secs != 1 else ''}") - return ", ".join(parts) +registry = create_default_registry() - if format_style == "short": - parts = [] - if days > 0: - parts.append(f"{days}d") - if hours > 0: - parts.append(f"{hours}h") - if minutes > 0: - parts.append(f"{minutes}m") - if secs > 0 or not parts: - parts.append(f"{secs}s") - return " ".join(parts) +def UPPER(value: str) -> str: + return value.upper() - # Compact - if days > 0: - return f"{days}d{hours}h" - if hours > 0: - return f"{hours}h{minutes}m" - if minutes > 0: - return f"{minutes}m{secs}s" - return f"{secs}s" - except (ValueError, TypeError): - return str(seconds) +registry.register(UPPER, ftl_name="UPPER") +bundle = FluentBundle("en_US", functions=registry, use_isolating=False) ``` -**Usage**: -```python -bundle.add_function("DURATION", DURATION) -bundle.add_resource(""" -video-duration = Duration: { DURATION($seconds, formatStyle: "short") } -""") - -result, _ = bundle.format_pattern("video-duration", {"seconds": 3725}) -# → "Duration: 1h 2m 5s" -``` - ---- - -## Testing Custom Functions +## Locale Injection -### Unit Tests +Use `@fluent_function(inject_locale=True)` when the callable needs the bundle’s locale code appended as the last positional argument. ```python -import pytest from ftllexengine import FluentBundle +from ftllexengine.runtime import fluent_function -class TestFileSizeFunction: - """Test FILESIZE custom function.""" - - def test_filesize_bytes(self) -> None: - """Test file size in bytes.""" - bundle = FluentBundle("en_US", use_isolating=False) - bundle.add_function("FILESIZE", FILESIZE) - bundle.add_resource('size = { FILESIZE($bytes) }') - - result, errors = bundle.format_pattern("size", {"bytes": 512}) - assert result == "512.00 B" - assert not errors - - def test_filesize_megabytes(self) -> None: - """Test file size in megabytes.""" - bundle = FluentBundle("en_US", use_isolating=False) - bundle.add_function("FILESIZE", FILESIZE) - bundle.add_resource('size = { FILESIZE($bytes) }') - - result, errors = bundle.format_pattern("size", {"bytes": 157286400}) - assert result == "150.00 MB" - assert not errors - - def test_filesize_precision(self) -> None: - """Test file size with custom precision.""" - bundle = FluentBundle("en_US", use_isolating=False) - bundle.add_function("FILESIZE", FILESIZE) - bundle.add_resource('size = { FILESIZE($bytes, precision: 4) }') - - result, errors = bundle.format_pattern("size", {"bytes": 1536}) - assert result == "1.5000 KB" - assert not errors - - def test_filesize_error_handling(self) -> None: - """Test file size error handling.""" - bundle = FluentBundle("en_US", use_isolating=False) - bundle.add_function("FILESIZE", FILESIZE) - bundle.add_resource('size = { FILESIZE($bytes) }') - - result, errors = bundle.format_pattern("size", {"bytes": "invalid"}) - assert "bytes" in result # Should return fallback - assert not errors # Function handles error gracefully -``` - -### Property-Based Testing with Hypothesis - -```python -from hypothesis import given, strategies as st, settings - -class TestFileSizeHypothesis: - """Property-based tests for FILESIZE function.""" - - @given(bytes_count=st.integers(min_value=0, max_value=10**15)) - @settings(max_examples=100) - def test_filesize_never_crashes(self, bytes_count: int) -> None: - """FILESIZE must never crash for any valid byte count.""" - bundle = FluentBundle("en_US", use_isolating=False) - bundle.add_function("FILESIZE", FILESIZE) - bundle.add_resource('size = { FILESIZE($bytes) }') - - result, errors = bundle.format_pattern("size", {"bytes": bytes_count}) - assert isinstance(result, str) - assert len(result) > 0 - assert not errors - - @given( - bytes_count=st.integers(min_value=0, max_value=10**15), - precision=st.integers(min_value=0, max_value=10) - ) - @settings(max_examples=100) - def test_filesize_precision_never_crashes(self, bytes_count: int, precision: int) -> None: - """FILESIZE with precision must never crash.""" - bundle = FluentBundle("en_US", use_isolating=False) - bundle.add_function("FILESIZE", FILESIZE) - bundle.add_resource(f"size = {{ FILESIZE($bytes, precision: {precision}) }}") +@fluent_function(inject_locale=True) +def GREETING(name: str, locale_code: str) -> str: + return "Sveiki" if locale_code.startswith("lv") else "Hello" - result, errors = bundle.format_pattern("size", { - "bytes": bytes_count, - }) - assert isinstance(result, str) - assert len(result) > 0 +bundle = FluentBundle("lv_LV", use_isolating=False) +bundle.add_function("GREETING", GREETING) +bundle.add_resource("msg = { GREETING($name) }, { $name }!") +result, errors = bundle.format_pattern("msg", {"name": "Anna"}) +assert errors == () +assert result == "Sveiki, Anna!" ``` ---- - -## Best Practices and Pitfalls - -### Best Practices - -1. **Use `*` for keyword-only arguments** - ```python - def CUSTOM(value: str, *, option: str = "default") -> str: - ... - ``` - -2. **Return type must be `str`** - ```python - def CUSTOM(value: int) -> str: # Returns str - return str(value) - ``` - -3. **Never raise exceptions** - ```python - def CUSTOM(value: str) -> str: - try: - return process(value) - except Exception: - return value # Graceful fallback - ``` - -4. **Use factory pattern for locale-aware functions** - ```python - def make_custom_function(bundle_locale: str) -> Callable: - def CUSTOM(value: str) -> str: - # Use bundle_locale here - ... - return CUSTOM - - bundle.add_function("CUSTOM", make_custom_function(bundle.locale)) - ``` - -5. **Integrate with Babel for i18n data types** - ```python - from babel import numbers, dates - - def CUSTOM(value: int | Decimal, *, locale: str = "en_US") -> str: - return numbers.format_decimal(value, locale=locale) - ``` - -6. **Write comprehensive tests** - - Unit tests for expected behavior - - Property-based tests with Hypothesis for robustness - - Error handling tests - -7. **Document parameters clearly** - ```python - def CUSTOM(value: str, *, format_style: str = "default") -> str: - """Short description. - - Args: - value: Description of value parameter - format_style: Description of format_style parameter - - Returns: - Description of return value - """ - ... - ``` - ---- - -### Common Pitfalls: What to Avoid - -1. **Raising exceptions** - ```python - # WRONG - def CUSTOM(value: int) -> str: - if value < 0: - raise ValueError("Negative values not allowed") # [WRONG] Crashes! - return str(value) - - # CORRECT - def CUSTOM(value: int) -> str: - if value < 0: - return "0" # Graceful fallback - return str(value) - ``` - -2. **Not using keyword-only arguments** - ```python - # WRONG - def CUSTOM(value: str, option: str = "default") -> str: # [WRONG] Missing * - ... - - # CORRECT - def CUSTOM(value: str, *, option: str = "default") -> str: # Uses * - ... - ``` - -3. **Returning non-string types** - ```python - # WRONG - def CUSTOM(value: int) -> int: # [WRONG] Returns int - return value * 2 - - # CORRECT - def CUSTOM(value: int) -> str: # Returns str - return str(value * 2) - ``` - -4. **Using camelCase for parameters** - ```python - # WRONG - def CUSTOM(value: str, *, formatStyle: str = "default") -> str: # [WRONG] camelCase - ... - - # CORRECT - def CUSTOM(value: str, *, format_style: str = "default") -> str: # snake_case - ... - ``` - -5. **Hardcoding locale-specific formatting** - ```python - # WRONG - Only works for US locale - def CUSTOM_NUMBER(value: int | Decimal) -> str: - return f"${value:,.2f}" # [WRONG] Always uses $ and US formatting - - # CORRECT - Uses Babel for locale-aware formatting - def CUSTOM_NUMBER(value: int | Decimal, *, locale: str = "en_US") -> str: - from babel import numbers - return numbers.format_currency(value, "USD", locale=locale) - ``` - -6. **Not handling invalid input** - ```python - # WRONG - Crashes on invalid input - def FILESIZE(bytes_count: int) -> str: - return f"{bytes_count / 1024:.2f} KB" # [WRONG] Crashes if bytes_count is string - - # CORRECT - Handles invalid input - def FILESIZE(bytes_count: int) -> str: - try: - return f"{float(bytes_count) / 1024:.2f} KB" - except (ValueError, TypeError): - return f"{bytes_count} bytes" # Fallback - ``` - -7. **Ignoring thread safety** - ```python - # WRONG - Using global mutable state - _cache = {} # [WRONG] Not thread-safe! - - def CUSTOM(value: str) -> str: - if value not in _cache: - _cache[value] = expensive_computation(value) - return _cache[value] - - # CORRECT - Use a module-level dict protected by a lock, or a ContextVar - # for task-scoped state. ContextVar provides automatic async task isolation. - from contextvars import ContextVar - - _task_cache: ContextVar[dict[str, str]] = ContextVar("_task_cache", default={}) - - def CUSTOM(value: str) -> str: - cache = _task_cache.get() - if value not in cache: - cache = {**cache, value: expensive_computation(value)} - _task_cache.set(cache) - return cache[value] - ``` - ---- - -## Summary - -**Key Takeaways**: - -1. **Use built-in functions** for common data types (NUMBER, DATETIME, CURRENCY) -2. **Create custom functions** for domain-specific formatting needs -3. **Follow naming conventions**: UPPERCASE for function names, snake_case for parameters -4. **Never raise exceptions** - always return graceful fallbacks -5. **Use factory pattern** for locale-aware functions -6. **Integrate with Babel** for CLDR-compliant i18n formatting -7. **Test comprehensively** with unit tests and property-based tests - -**For More Examples**: -- See [examples/custom_functions.py](../examples/custom_functions.py) for complete working code -- See [tests/fuzz/test_runtime_bundle_functions.py](../tests/fuzz/test_runtime_bundle_functions.py) for fuzz test patterns - -**Questions?** -- Open an issue: https://github.com/resoltico/ftllexengine/issues -- Read the full API docs: [DOC_00_Index.md](DOC_00_Index.md) - ---- +## Guidance -**Python Requirement**: 3.13+ +- Prefer readable fallback values to raising exceptions from custom functions. +- Do not mutate a bundle from inside a formatting callback. +- Use a registry when the same function set must be reused across many bundles. diff --git a/docs/DATA_INTEGRITY_ARCHITECTURE.md b/docs/DATA_INTEGRITY_ARCHITECTURE.md index b7129425..38ca2b0e 100644 --- a/docs/DATA_INTEGRITY_ARCHITECTURE.md +++ b/docs/DATA_INTEGRITY_ARCHITECTURE.md @@ -1,323 +1,33 @@ --- -afad: "3.3" -version: "0.161.0" -domain: "architecture" -updated: "2026-03-21" +afad: "3.5" +version: "0.163.0" +domain: ARCHITECTURE +updated: "2026-04-22" route: - keywords: [data integrity, strict mode, FrozenFluentError, IntegrityCache, CacheCorruptionError, WriteConflictError, BLAKE2b, checksum, write-once, idempotent, RWLock, security, FormattingIntegrityError, SyntaxIntegrityError] - questions: ["how does data integrity work?", "what is strict mode?", "how does cache checksum verification work?", "what is write-once mode?", "how to detect cache corruption?", "what is the data integrity architecture?"] + keywords: [data integrity, strict mode, FrozenFluentError, IntegrityCheckFailedError, cache audit, boot validation] + questions: ["how does strict mode relate to integrity?", "what audit evidence does the runtime expose?", "what is boot validation for?"] --- # Data Integrity Architecture -This document describes the architectural design for data integrity in FTLLexEngine. +**Purpose**: Summarize the fail-fast and immutable-evidence patterns used by FTLLexEngine. +**Prerequisites**: Familiarity with `FluentBundle`, `FluentLocalization`, and `LocalizationBootConfig`. -## Design Principle: Configurable Data Safety +## Overview -The system separates two distinct safety concerns: formatting error handling and cache integrity. +The library pushes validation as early as possible and represents runtime failures as immutable, structured evidence: -**Formatting error handling** defaults to fail-fast (`strict=True`). On any formatting error, `FormattingIntegrityError` is raised immediately. Soft error recovery (returning a placeholder like `{$amount}` alongside errors as data) is opt-in via `strict=False`. The Fluent specification defines fallback behavior as valid — FTLLexEngine defaults to the stricter interpretation to prevent silent data errors in production. +- `FrozenFluentError` captures formatting and parsing failures without mutable side channels. +- `FormattingIntegrityError`, `SyntaxIntegrityError`, and `IntegrityCheckFailedError` surface strict-mode failures explicitly. +- `LoadSummary`, `ResourceLoadResult`, and boot schema results provide startup evidence for localization initialization. +- `CacheConfig(enable_audit=True)` exposes immutable audit-log entries for cache operations. -**Cache integrity** is always-on by default (`integrity_strict=True`). Cache corruption is a system-level failure independent of how an application handles formatting errors. +## Strict Mode -| Failure Mode | Default (`strict=True`) | Soft Mode (`strict=False`) | -|:-------------|:------------------------|:---------------------------| -| Missing message | Raise `FormattingIntegrityError` | Return placeholder `{message-id}` + error | -| Missing variable | Raise `FormattingIntegrityError` | Return placeholder `{$var}` + error | -| Cache corruption | Raise `CacheCorruptionError` (always) | Raise `CacheCorruptionError` (always) | -| Error mutation | Immutable `FrozenFluentError` (always) | Immutable `FrozenFluentError` (always) | +- `FluentBundle` and `FluentLocalization` default to `strict=True`. +- Resource junk and formatting failures raise instead of silently degrading. +- `strict=False` is an explicit opt-in for fallback-return behavior. -**Rationale for strict default:** A bank displaying `{$amount}` when a variable is missing may show no financial figure at all — which is worse than an explicit error. By defaulting to `strict=True`, FTLLexEngine guarantees that every successful `format_pattern()` return is a correct, fully-resolved result. Applications that prefer graceful degradation opt in to soft mode with `strict=False`. +## Boot Validation -## Architecture Overview - -``` -+------------------------------------------------------------------+ -| FluentBundle / FluentLocalization | -| +------------------------------------------------------------+ | -| | Strict Mode Layer | | -| | Responsibility: Fail-fast on ANY formatting error | | -| | Raises: FormattingIntegrityError | | -| | Scope: Both FluentBundle AND FluentLocalization | | -| +------------------------------------------------------------+ | -| | | -| +------------------------------------------------------------+ | -| | Error Layer | | -| | Responsibility: Immutable, verifiable error objects | | -| | Type: FrozenFluentError (sealed, content-addressed) | | -| +------------------------------------------------------------+ | -| | | -| +------------------------------------------------------------+ | -| | Cache Layer | | -| | Responsibility: Checksum-verified format result caching | | -| | Type: IntegrityCache (BLAKE2b-128, write-once option) | | -| | Integrity: Independent of strict mode (always-on default) | | -| +------------------------------------------------------------+ | -| | | -| +------------------------------------------------------------+ | -| | Integrity Exception Layer (cross-cutting) | | -| | Responsibility: System failure signaling (not Fluent) | | -| | Types: DataIntegrityError hierarchy | | -| | Used by: Strict Mode Layer and Cache Layer | | -| +------------------------------------------------------------+ | -+------------------------------------------------------------------+ -``` - -## Component Responsibilities - -### Strict Mode Layer - -**Responsibility:** Provide fail-fast behavior at both bundle and localization levels. - -**Design Decision:** Strict mode is the default (`strict=True`). Soft error recovery is opt-out via `strict=False`. - -**Why fail-fast by default?** - -Silent fallbacks are the primary source of data errors in localized applications. When formatting fails and returns `{$amount}`, the application may render no financial figure at all without any indication that something went wrong. Defaulting to fail-fast eliminates this silent failure class. - -| Consideration | Rationale | -|:--------------|:----------| -| Financial safety | Missing balance variable must not silently render as `{$amount}` | -| Explicit opt-out | Applications that prefer graceful degradation set `strict=False` deliberately | -| Spec compliance | Fluent spec defines fallback behavior; FTLLexEngine enforces it only when explicitly requested | -| Development feedback | Errors surface immediately rather than appearing as mysterious placeholders in the UI | - -**Why offer soft mode at all?** - -Some applications — particularly those with partially-translated resources or during migration — prefer graceful degradation: show something rather than crash. These applications opt in to soft mode: - -**Soft mode activation:** -- `FluentBundle(..., strict=False)` - explicit opt-out to soft error recovery -- `FluentLocalization(..., strict=False)` - propagates to all bundles -- Combine with `cache=CacheConfig()` for caching - -**Invariant:** When `strict=True`, NO formatting operation returns a fallback value. Every error path raises `FormattingIntegrityError`. This invariant holds at both levels: -- `FluentBundle.format_pattern()` - raises on resolver errors -- `FluentLocalization.format_value()` / `format_pattern()` - raises on missing messages across all locales - -### Strict Mode vs Cache Integrity - -These are independent concerns controlled by separate parameters: - -| Parameter | Controls | Default | -|:----------|:---------|:--------| -| `FluentBundle(strict=...)` | Formatting error handling (raise vs return fallback) | `True` | -| `FluentLocalization(strict=...)` | Propagated to each bundle | `True` | -| `CacheConfig(integrity_strict=...)` | Cache corruption response (raise vs evict) | `True` | - -**Rationale:** Cache corruption is a system-level integrity failure independent of how an application handles formatting errors. A non-strict application (returning fallbacks for missing translations) should still detect and report cache corruption. The default `integrity_strict=True` ensures this. - -### Error Layer (FrozenFluentError) - -**Responsibility:** Provide immutable, verifiable error objects. - -**Design Decisions:** - -| Decision | Rationale | -|:---------|:----------| -| Sealed class (`@final`) | Prevents subclass invariant violations | -| Content-addressed (BLAKE2b-128) | Enables corruption detection | -| Slots-only | Memory efficiency, prevents dynamic attributes | -| Composition over inheritance | `ErrorCategory` enum replaces class hierarchy | - -**Content Hash Composition:** - -The BLAKE2b-128 content hash includes ALL error fields for complete audit trail integrity: - -1. **Core fields:** `message`, `category.value` -2. **Diagnostic (if present):** - - Core: `code.name`, `message` - - Location: `span` (start, end, line, column as 4-byte big-endian) - - Context: `hint`, `help_url`, `function_name`, `argument_name`, `expected_type`, `received_type`, `ftl_location` - - Metadata: `severity`, `resolution_path` (each element) -3. **Context (if present):** `input_value`, `locale_code`, `parse_type`, `fallback_value` - -**Length-Prefixing:** All string fields are length-prefixed (4-byte big-endian UTF-8 byte length) before hashing. This prevents collision attacks where concatenating different field sequences produces identical byte streams (e.g., `("ab", "c")` vs `("a", "bc")`). - -**Sentinel Bytes:** None values are distinguished from empty values using sentinel bytes, preventing collision between `span=None` and `span=SourceSpan(0, 0, 0, 0)`. - -**Freeze Ordering:** `Exception.__init__` is called before `_frozen` is set to `True`. This ensures compatibility with alternative Python runtimes (PyPy, free-threaded builds) where `Exception.__init__` may route through `__setattr__`. - -**Invariants:** -- All attributes frozen after `__init__` completes -- `verify_integrity()` always returns True for uncorrupted errors -- Hash is stable for object lifetime - -**Security Properties:** -- Constant-time hash comparison (`hmac.compare_digest`) prevents timing attacks -- Surrogate handling (`errors="surrogatepass"`) prevents Unicode exploits -- Complete field coverage prevents metadata tampering - -### Cache Layer (IntegrityCache) - -**Responsibility:** Provide checksum-verified caching of format results. - -**Design Decisions:** - -| Decision | Rationale | -|:---------|:----------| -| BLAKE2b-128 checksums | Fast cryptographic hash, 16-byte overhead per entry | -| Write-once option | Prevents data races from overwriting cached results | -| Independent integrity_strict | Cache corruption detection decoupled from formatting strict mode | -| Audit logging | Compliance and debugging for financial systems | -| Sequence numbers | Monotonic ordering for audit trail integrity | -| Idempotent write detection | Content-hash comparison for thundering herd tolerance | -| Node budget protection | `_MAX_HASHABLE_NODES` prevents DAG expansion attacks on all paths | - -**Configuration via CacheConfig:** - -`CacheConfig` validates all parameters at construction time (fail-fast). Invalid values raise `ValueError` immediately rather than deferring to `IntegrityCache.__init__`. - -```python -config = CacheConfig( - size=500, - write_once=True, - integrity_strict=True, # Cache corruption: raise (default) - enable_audit=True, -) -bundle = FluentBundle("en", cache=config) # strict=True is the default -``` - -**Checksum Composition:** - -The BLAKE2b-128 checksum includes ALL entry fields for complete audit trail integrity: - -1. **Content:** `formatted` (UTF-8 encoded, length-prefixed message output) -2. **Errors:** Each error's `content_hash` bytes (BLAKE2b-128, always present; `FrozenFluentError` is `@final`) -3. **Metadata:** - - `created_at`: 8-byte IEEE 754 double (monotonic timestamp) - - `sequence`: 8-byte signed big-endian integer (audit trail ordering) - -**Length-Prefixing:** All variable-length fields (formatted string, error messages) are length-prefixed (4-byte big-endian UTF-8 byte length) before hashing, preventing collision attacks from field concatenation. - -This means different entries with identical content will have different checksums if their metadata differs. This is correct behavior: the checksum protects the complete entry, not just its content. - -**Idempotent Write Detection:** - -In write-once mode, concurrent writes of the same message pose a challenge: multiple threads may resolve the same message simultaneously (thundering herd). Without idempotent detection, all but the first thread would trigger `WriteConflictError`, even though all produced identical results. - -The cache computes a **content-only hash** (excluding metadata like `created_at` and `sequence`) to detect idempotent writes: - -1. Second write arrives for an existing key -2. Cache computes content hash of new entry: `BLAKE2b-128(formatted, errors)` -3. Compares with existing entry's content hash (constant-time via `hmac.compare_digest`) -4. If identical: increment `idempotent_writes` counter, return silently (benign race) -5. If different: TRUE conflict - raise `WriteConflictError` (integrity_strict) or log (non-strict) - -This allows write-once mode to work correctly under load without false-positive conflicts. - -**Type-Tagging for Cache Keys:** - -Cache keys must distinguish between values that hash identically but have different types. The `_make_hashable()` function applies type-tagging to prevent collisions: - -| Type | Tag Format | Purpose | -|:-----|:-----------|:--------| -| `bool` | `("__bool__", value)` | Distinguish `True` from `1` (`bool` subclasses `int`) | -| `int` | `("__int__", value)` | Distinguish `1` from `True` (must be checked after `bool`) | -| `Decimal` | `("__decimal__", str(value))` | Preserve scale for CLDR plural rules (`Decimal("1.0")` vs `Decimal("1.00")`) | -| `FluentNumber` | `("__fluentnumber__", type, value, formatted, precision)` | Preserve underlying type and formatting info | -| `list` | `("__list__", tuple(...))` | Distinguish from tuple in formatted output | -| `tuple` | `("__tuple__", tuple(...))` | Distinguish from list | - -**CLDR Plural Rule Preservation:** Decimal type-tagging uses `str(value)` instead of the numeric value. This preserves scale information critical for CLDR plural rules: `Decimal("1.0")` and `Decimal("1.00")` must cache separately because some locales have scale-dependent plural forms. - -**Recursive Verification:** - -The `IntegrityCacheEntry.verify()` method performs recursive integrity verification: - -1. Recomputes entry checksum from current field values -2. For each `FrozenFluentError` in the errors tuple, calls `verify_integrity()` -3. Returns `True` only if ALL checks pass (entry checksum AND all error content hashes) - -This defense-in-depth approach detects corruption at any level of the data hierarchy. - -**Invariants:** -- Every `get()` verifies checksum before returning -- Corrupted entries are never returned (either raise or evict) -- Sequence numbers never decrease, even after `clear()` -- Metadata tampering is detected by checksum verification -- Node budget enforced on all recursive `_make_hashable` paths (including Mapping ABC) - -**Trade-offs:** -- Checksum verification adds ~0.1 microseconds per `get()` - acceptable for financial correctness -- Write-once mode prevents legitimate cache updates - use only when data race prevention is critical -- Different timestamps produce different checksums - not suitable for content-only comparison -- Idempotent detection adds hash comparison on cache hit - negligible for concurrent workloads - -### Integrity Exception Layer - -**Responsibility:** Signal system failures distinct from Fluent errors. - -**Design Decision:** Separate hierarchy from `FrozenFluentError` because: -1. Different error domains (system failure vs. translation issue) -2. Different handling requirements (escalate vs. fallback) -3. Prevents confusion when catching exceptions - -**Hierarchy:** -``` -DataIntegrityError (base - immutable after construction) -+-- CacheCorruptionError - Checksum mismatch detected -+-- FormattingIntegrityError - Strict mode formatting failure -+-- ImmutabilityViolationError - Mutation attempt on frozen object -+-- IntegrityCheckFailedError - Generic verification failure (boot/load) -+-- SyntaxIntegrityError - Strict mode syntax error during resource loading -+-- WriteConflictError - Write-once cache violation -``` - -**Invariant:** All integrity exceptions carry `IntegrityContext` for post-mortem analysis. - -## Concurrency Model - -### Lock Architecture - -| Component | Lock Type | Rationale | -|:----------|:----------|:----------| -| `FluentBundle._rwlock` | Custom `RWLock` | High-concurrency format operations; read-heavy; writer-preference; reentrant reads supported (custom function re-entry); write reentrancy and downgrade prohibited | -| `FluentLocalization._lock` | Custom `RWLock` | Brief read lock for bundle map lookup; write lock for lazy bundle creation; exclusive writes for add_resource/add_function | -| `IntegrityCache._lock` | `threading.Lock` | Short operations; no reentrant acquisition in the call path; `RLock` thread-tracking overhead eliminated | -| `LocaleContext._cache_lock` | `threading.Lock` | Class-level LRU cache; two sequential (never nested) acquisitions; `RLock` thread-tracking overhead unnecessary | - -## Security Model - -### Attack Vectors Mitigated - -| Attack | Mitigation | -|:-------|:-----------| -| Error mutation after logging | `__setattr__` raises after freeze | -| Subclass overrides | `@final` + `__init_subclass__` raises | -| Dynamic attribute injection | `__slots__` only, no `__dict__` | -| Hash tampering | Constant-time comparison | -| Cache poisoning | Checksum verification on every read | -| Data race overwrites | Write-once semantics option | -| Metadata tampering | Complete field coverage in checksums/hashes | -| Diagnostic field tampering | All 12 Diagnostic fields included in error hash | -| Timestamp/sequence forgery | Metadata included in cache checksum | -| Field concatenation collision | Length-prefixing prevents `("ab","c")` = `("a","bc")` | -| Type confusion in cache keys | Type-tagging distinguishes `1` from `1.0` from `True` | -| Decimal scale loss | `str(Decimal)` preserves scale for CLDR plural rules | -| Nested error corruption | Recursive verification checks entry AND all contained errors | -| DAG expansion in cache keys | Node budget (`_MAX_HASHABLE_NODES`) on all recursive paths | - -### Trust Boundaries - -1. **External input** (FTL source, format arguments): Validated at parser/bundle boundary -2. **Cached data**: Verified on every read via checksum -3. **Error objects**: Immutable after construction -4. **Configuration**: Validated at `CacheConfig` construction (fail-fast) - -## Performance Characteristics - -| Operation | Overhead | Acceptable Because | -|:----------|:---------|:-------------------| -| Error hash computation | ~0.1 microseconds | One-time at construction | -| Cache checksum verification | ~0.1 microseconds | Correctness over speed for financial | -| Slots vs dict | ~200 bytes saved per error | Net memory reduction | -| RWLock vs RLock | Negligible for writes, better for reads | Concurrent format operations scale linearly | - -## References - -- [FrozenFluentError API](DOC_05_Errors.md) -- [ErrorCategory Enum](DOC_05_Errors.md) -- [FluentBundle strict mode](DOC_01_Core.md) -- [Thread Safety](THREAD_SAFETY.md) -- [BLAKE2 Specification](https://www.blake2.net/) +`LocalizationBootConfig.boot()` is the canonical fail-fast startup path when resources must be clean before the application accepts traffic. It combines resource loading, `require_clean()`, required-message enforcement, and message-schema validation. The config object is intentionally one-shot: create a new instance instead of reusing one after `boot()` or `boot_simple()`. diff --git a/docs/DOC_00_Index.md b/docs/DOC_00_Index.md index 1b6b1061..7c63e088 100644 --- a/docs/DOC_00_Index.md +++ b/docs/DOC_00_Index.md @@ -1,350 +1,182 @@ --- -afad: "3.3" -version: "0.161.0" +afad: "3.5" +version: "0.163.0" domain: INDEX -updated: "2026-03-21" +updated: "2026-04-22" route: - keywords: [api reference, documentation, exports, imports, AsyncFluentBundle, fluentbundle, fluentlocalization, cache-audit, boot-validation, LocalizationBootConfig, validate_message_variables, require_locale_code, make_fluent_number, parse_fluent_number, FluentNumber, decimal_value, iso, currency, get_currency_decimal_digits, LocaleCode, normalize_locale, get_system_locale, LoadSummary, ResourceLoadResult, FallbackInfo, LoadStatus, PathResourceLoader, ResourceLoader, LocalizationCacheStats, parse_stream, parse_stream_ftl, add_resource_stream, incremental, streaming, async, asyncio, CurrencyCode, TerritoryCode, NewType, require_date, require_datetime, require_fluent_number, require_currency_code, require_territory_code, detect_cycles, WarningSeverity] - questions: ["what classes are available?", "how to import ftllexengine?", "what are the module exports?", "how do I validate localization at boot?", "how do I validate one message schema?", "how do I canonicalize a locale code?", "how do I construct a FluentNumber manually?", "how do I parse a FluentNumber?", "how do I get the cache audit log?", "how to import ISO introspection?", "how do I boot FluentLocalization with strict validation?", "what is LocalizationBootConfig?", "how do I validate a date at a boundary?", "how do I validate a datetime?", "how do I validate a FluentNumber?", "how do I validate an ISO currency or territory code?"] + keywords: [api index, routing, FluentBundle, FluentLocalization, parse_ftl, FunctionRegistry, FrozenFluentError, introspection] + questions: ["where is a symbol documented?", "which file documents the runtime APIs?", "which file documents locale parsing and introspection APIs?", "where are syntax, parsing, and diagnostics references?"] --- # FTLLexEngine API Reference Index -## Module Exports - -### Root Exports (`from ftllexengine import ...`) -```python -from ftllexengine import ( - # Core API - AsyncFluentBundle, # Async-native wrapper around FluentBundle; offloads to thread pool - FluentBundle, - FluentLocalization, - CacheConfig, # Cache configuration dataclass - parse_ftl, - parse_stream_ftl, # Incremental FTL parse from line iterator, yields entries - serialize_ftl, - validate_resource, # FTL resource validation (no Babel required) - FluentNumber, # Immutable formatted-number wrapper - FluentValue, # Type alias for function argument values - fluent_function, # Decorator for custom functions - make_fluent_number, # Construct FluentNumber from int/Decimal - clear_module_caches, # Clear all library caches - # Errors - FrozenFluentError, # Immutable error type with ErrorCategory - ErrorCategory, # Error classification enum - FrozenErrorContext, # Context for parse/formatting errors - # Data Integrity - DataIntegrityError, - FormattingIntegrityError, - ImmutabilityViolationError, - SyntaxIntegrityError, - CacheCorruptionError, - WriteConflictError, - IntegrityCheckFailedError, - IntegrityContext, - # Localization boot (require Babel) - LocalizationBootConfig, # One-shot boot orchestrator for strict-mode assembly - LoadSummary, # Aggregate of all resource load results from initialization - ResourceLoadResult, # Immutable result of a single resource load attempt - FallbackInfo, # Immutable record of a locale fallback event - ResourceLoader, # Protocol for loading FTL resources (structural typing) - PathResourceLoader, # Disk-based loader with path-traversal prevention - LocalizationCacheStats, # Cache statistics across all locales - # Locale utilities (no Babel required) - LoadStatus, # Enum: SUCCESS, NOT_FOUND, ERROR, SKIPPED - LocaleCode, # Type alias for BCP-47 / POSIX locale codes - normalize_locale, # Convert BCP-47 to canonical lowercase POSIX form - get_system_locale, # Detect locale from OS environment variables - # Boundary validators (no Babel required) - require_locale_code, # Validate and canonicalize a locale code - require_date, # Validate date; rejects datetime subtypes - require_datetime, # Validate datetime; rejects plain date - require_fluent_number, # Validate FluentNumber at a system boundary - # Parsing return type (no Babel required; lazy-loaded) - ParseResult, # tuple[T | None, tuple[FrozenFluentError, ...]] - # Message introspection (no Babel required) - MessageVariableValidationResult, - validate_message_variables, - # Analysis (no Babel required) - detect_cycles, # Detect cycles in a dependency graph - # Diagnostics extras (no Babel required) - WarningSeverity, # Warning severity enum (ERROR, WARNING, INFO) - # ISO utilities (importable without Babel; call-time Babel for type guards/lookups) - CurrencyCode, # NewType for ISO 4217 currency codes - TerritoryCode, # NewType for ISO 3166-1 alpha-2 territory codes - get_currency_decimal_digits, # ISO 4217 decimal precision (no Babel required) - is_valid_currency_code, # TypeIs guard; Babel required at call time - is_valid_territory_code, # TypeIs guard; Babel required at call time - require_currency_code, # Validate ISO 4217 code at a boundary - require_territory_code, # Validate ISO 3166-1 alpha-2 code at a boundary - get_cldr_version, - # Metadata - __version__, - __fluent_spec_version__, - __spec_url__, - __recommended_encoding__, -) -``` - -### AST Types (`from ftllexengine.syntax.ast import ...`) -```python -from ftllexengine.syntax.ast import ( - Resource, Message, Term, Pattern, Attribute, - Placeable, TextElement, Identifier, Junk, Comment, - VariableReference, MessageReference, TermReference, FunctionReference, - SelectExpression, Variant, NumberLiteral, StringLiteral, - CallArguments, NamedArgument, Span, Annotation, - # Type aliases (PEP 695) - Entry, Expression, PatternElement, InlineExpression, VariantKey, - SelectorExpression, FTLLiteral, ASTNode, -) -``` - -### Syntax Utilities (`from ftllexengine.syntax import ...`) -```python -from ftllexengine.syntax import ( - FluentParserV1, ASTVisitor, ASTTransformer, - Cursor, ParseError, ParseResult, - parse, serialize, - SerializationValidationError, SerializationDepthError, -) -``` - -### Errors & Validation (`from ftllexengine.diagnostics import ...`) -```python -from ftllexengine.diagnostics import ( - FrozenFluentError, ErrorCategory, FrozenErrorContext, - Diagnostic, DiagnosticCode, - ValidationResult, ValidationError, ValidationWarning, WarningSeverity, - DiagnosticFormatter, OutputFormat, -) -``` - -### Introspection (`from ftllexengine.introspection import ...`) -```python -from ftllexengine.introspection import ( - # Message introspection (no Babel required) - introspect_message, MessageIntrospection, - extract_variables, extract_references, extract_references_by_attribute, - clear_introspection_cache, - VariableInfo, FunctionCallInfo, ReferenceInfo, - # Variable schema validation (no Babel required) - validate_message_variables, MessageVariableValidationResult, - # ISO introspection (requires Babel) - TerritoryCode, CurrencyCode, # Type aliases - TerritoryInfo, CurrencyInfo, # Data classes - get_territory, get_currency, get_currency_decimal_digits, - list_territories, list_currencies, - get_territory_currencies, # Lookup functions - is_valid_territory_code, is_valid_currency_code, # Type guards - require_currency_code, require_territory_code, # Boundary validators - clear_iso_cache, # Cache management - BabelImportError, # Exception - get_cldr_version, # Babel/CLDR diagnostics -) -``` - -### Enums (`from ftllexengine.enums import ...`) -```python -from ftllexengine.enums import ( - CommentType, # COMMENT, GROUP, RESOURCE - VariableContext, # PATTERN, SELECTOR, VARIANT, FUNCTION_ARG - ReferenceKind, # MESSAGE, TERM - LoadStatus, # SUCCESS, PARTIAL, FAILED -) -``` - -### Analysis (`from ftllexengine.analysis import ...`) -```python -from ftllexengine.analysis import detect_cycles, entry_dependency_set, make_cycle_key -``` - -### Validation (`from ftllexengine.validation import ...`) -```python -from ftllexengine.validation import validate_resource -``` - -### Core Utilities (`from ftllexengine.core import ...`) -```python -from ftllexengine.core import ( - DepthGuard, depth_clamp, # Depth limiting -) -from ftllexengine.core.babel_compat import ( - BabelImportError, require_babel, # Babel availability checking - is_babel_available, get_locale_class, # Babel introspection - get_cldr_version, # CLDR version -) -``` - -### Runtime (`from ftllexengine.runtime import ...`) -```python -from ftllexengine.runtime import ( - CacheAuditLogEntry, FluentBundle, FluentNumber, - FunctionRegistry, WriteLogEntry, fluent_function, - create_default_registry, get_shared_registry, - number_format, datetime_format, currency_format, make_fluent_number, - select_plural_category, -) -``` - -### Localization (`from ftllexengine.localization import ...`) -```python -from ftllexengine.localization import ( - CacheAuditLogEntry, FluentLocalization, LocalizationBootConfig, LocalizationCacheStats, - PathResourceLoader, ResourceLoader, - LoadStatus, LoadSummary, ResourceLoadResult, FallbackInfo, - MessageId, LocaleCode, ResourceId, FTLSource, -) -``` - -### Parsing (`from ftllexengine.parsing import ...`) - -> **Babel required** for this entire module. - -```python -from ftllexengine.parsing import ( - # Parse functions (require Babel) - parse_decimal, parse_fluent_number, parse_date, parse_datetime, parse_currency, - # Type guards - is_valid_decimal, is_valid_date, is_valid_datetime, is_valid_currency, - # Type alias - ParseResult, - # Cache management - clear_date_caches, clear_currency_caches, -) -``` - ---- - -## File Routing Table - -| Query Pattern | Target File | Domain | -|:--------------|:------------|:-------| -| AsyncFluentBundle, FluentBundle, FluentLocalization, add_resource, add_resource_stream, format_pattern, require_clean, validate_message_schemas, validate_message_variables, require_locale_code, require_date, require_datetime, require_fluent_number, get_cache_audit_log, LocaleCode, normalize_locale, get_system_locale, LocalizationBootConfig, LoadSummary, ResourceLoadResult, FallbackInfo, LoadStatus, PathResourceLoader, ResourceLoader, LocalizationCacheStats | [DOC_01_Core.md](DOC_01_Core.md) | Core API | -| Message, Term, Pattern, Resource, AST, Identifier, FTLLiteral, NamedArgument, dataclass | [DOC_02_Types.md](DOC_02_Types.md) | AST Types | -| parse, parse_stream, parse_stream_ftl, serialize, parse_ftl, serialize_ftl, parse_decimal, parse_fluent_number, parse_date, parse_currency, FluentParserV1 | [DOC_03_Parsing.md](DOC_03_Parsing.md) | Parsing | -| NUMBER, DATETIME, CURRENCY, FluentNumber, make_fluent_number, fluent_function, add_function, FunctionRegistry, CacheAuditLogEntry, clear_module_caches | [DOC_04_Runtime.md](DOC_04_Runtime.md) | Runtime | -| FrozenFluentError, ErrorCategory, FrozenErrorContext, BabelImportError, DepthGuard, ValidationResult, Diagnostic, DiagnosticCode | [DOC_05_Errors.md](DOC_05_Errors.md) | Errors | -| detect_cycles, entry_dependency_set, make_cycle_key, validate_resource | [DOC_04_Runtime.md](DOC_04_Runtime.md) | Analysis | -| extract_variables, extract_references, extract_references_by_attribute, introspect_message, MessageIntrospection | [DOC_02_Types.md](DOC_02_Types.md) | Message Introspection | -| TerritoryInfo, CurrencyInfo, get_territory, get_currency, require_currency_code, require_territory_code, ISO 3166, ISO 4217 | [DOC_02_Types.md](DOC_02_Types.md) | ISO Introspection | -| WarningSeverity, detect_cycles | [DOC_05_Errors.md](DOC_05_Errors.md) | Diagnostics / Analysis | - ---- - -## Submodule Structure - -``` -ftllexengine/ - __init__.py # Public API exports - constants.py # MAX_DEPTH, MAX_IDENTIFIER_LENGTH, MAX_LOCALE_LENGTH_HARD_LIMIT, cache limits, fallback strings, ISO_4217_DECIMAL_DIGITS - enums.py # CommentType, VariableContext, ReferenceKind, LoadStatus - integrity.py # DataIntegrityError hierarchy (6 sealed subclasses), IntegrityContext - localization/ - __init__.py # FluentLocalization, LocalizationBootConfig, PathResourceLoader, ResourceLoader, LoadStatus, LoadSummary, ResourceLoadResult, FallbackInfo, type aliases - types.py # PEP 695 type aliases: MessageId, LocaleCode, ResourceId, FTLSource - loading.py # ResourceLoader protocol, PathResourceLoader, LoadSummary, ResourceLoadResult, FallbackInfo - boot.py # LocalizationBootConfig (strict-mode boot API) - orchestrator.py # FluentLocalization class, LocalizationCacheStats - introspection/ - __init__.py # Introspection API exports (message + ISO) - message.py # MessageIntrospection, introspect_message, extract_variables, extract_references, extract_references_by_attribute - iso.py # TerritoryInfo, CurrencyInfo, get_territory, get_currency, require_currency_code, require_territory_code - core/ - __init__.py # Core exports (BabelImportError, DepthGuard, FrozenFluentError, require_non_empty_str) - babel_compat.py # BabelImportError, Babel lazy import infrastructure - depth_guard.py # DepthGuard, depth_clamp - errors.py # ErrorCategory, FrozenErrorContext, FrozenFluentError (re-exports) - identifier_validation.py # FTL identifier validation utilities - validators.py # require_positive_int, require_date, require_datetime, require_fluent_number (internal validators) - locale_utils.py # require_locale_code, get_system_locale, normalize_locale, get_babel_locale, clear_locale_cache - analysis/ - __init__.py # Analysis API exports - graph.py # detect_cycles, entry_dependency_set, make_cycle_key - syntax/ - __init__.py # AST exports, parse(), serialize() - ast.py # AST node definitions - cursor.py # Cursor, ParseError, ParseResult - position.py # Source position tracking - validation_helpers.py # Shared validation helper functions - validator.py # SemanticValidator (AST node-level validation) - visitor.py # ASTVisitor, ASTTransformer - serializer.py # FluentSerializer - parser/ - __init__.py # FluentParserV1, ParseContext - core.py # Parser main entry point - primitives.py # Parser primitive operations (identifier, number, string literal parsing) - rules.py # ParseContext, pattern/expression parsing - whitespace.py # Whitespace handling - runtime/ - __init__.py # Runtime exports - bundle.py # FluentBundle - cache.py # IntegrityCache, IntegrityCacheEntry, CacheStats - function_bridge.py # FunctionRegistry, fluent_function - function_metadata.py # Function metadata helpers (requires_locale_injection, etc.) - functions.py # Built-in functions, create_default_registry, get_shared_registry - locale_context.py # Locale context for runtime formatting - plural_rules.py # select_plural_category - resolution_context.py # GlobalDepthGuard, ResolutionContext - resolver.py # FluentResolver - rwlock.py # RWLock (internal readers-writer lock) - value_types.py # FluentNumber, make_fluent_number, FluentValue, FluentFunction, FunctionSignature - parsing/ - __init__.py # Parsing API exports (requires Babel) - numbers.py # parse_decimal, parse_fluent_number - dates.py # parse_date, parse_datetime - currency.py # parse_currency - guards.py # Type guards - diagnostics/ - __init__.py # Error exports - errors.py # FrozenFluentError, ErrorCategory, FrozenErrorContext - codes.py # DiagnosticCode, Diagnostic, SourceSpan - templates.py # ErrorTemplate - validation.py # ValidationResult, ValidationError, ValidationWarning - formatter.py # DiagnosticFormatter, OutputFormat - validation/ - __init__.py # validate_resource - resource.py # Standalone resource validation -``` - ---- - -## Type Alias Quick Reference - -| Alias | Definition | Location | -|:------|:-----------|:---------| -| `FluentValue` | `str \| int \| Decimal \| datetime \| date \| FluentNumber \| None \| Sequence[FluentValue] \| Mapping[str, FluentValue]` | runtime/value_types.py (exported from root) | -| `ParseResult[T]` | `tuple[T \| None, tuple[FrozenFluentError, ...]]` | parsing/__init__.py (also `ftllexengine`) | -| `MessageId` | `str` | localization.py | -| `LocaleCode` | `str` | localization.py | -| `ResourceId` | `str` | localization.py | -| `FTLSource` | `str` | localization.py | -| `TerritoryCode` | `NewType("TerritoryCode", str)` | introspection/iso.py | -| `CurrencyCode` | `NewType("CurrencyCode", str)` | introspection/iso.py | -| `Entry` | `Message \| Term \| Comment \| Junk` | syntax/ast.py | -| `PatternElement` | `TextElement \| Placeable` | syntax/ast.py | -| `Expression` | `SelectExpression \| InlineExpression` | syntax/ast.py | -| `InlineExpression` | Union of inline AST types (superset of SelectorExpression) | syntax/ast.py | -| `SelectorExpression` | Restricted subset of InlineExpression valid as SelectExpression.selector (excludes Placeable) | syntax/ast.py | -| `FTLLiteral` | `StringLiteral \| NumberLiteral` | syntax/ast.py | -| `ASTNode` | Union of all AST node types | syntax/ast.py | -| `VariantKey` | `Identifier \| NumberLiteral` | syntax/ast.py | - ---- - -## Cross-Reference: Non-Reference Documentation - -| File | Purpose | Audience | -|:-----|:--------|:---------| -| [README.md](../README.md) | Entry point, installation, quick start | Humans | -| [QUICK_REFERENCE.md](QUICK_REFERENCE.md) | Cheat sheet, common patterns | Humans | -| [PARSING_GUIDE.md](PARSING_GUIDE.md) | Bi-directional parsing tutorial | Humans | -| [TYPE_HINTS_GUIDE.md](TYPE_HINTS_GUIDE.md) | Python 3.13+ type patterns | Humans | -| [TERMINOLOGY.md](TERMINOLOGY.md) | Glossary, disambiguation | Both | -| [MIGRATION.md](MIGRATION.md) | fluent.runtime migration guide | Humans | -| [CUSTOM_FUNCTIONS_GUIDE.md](CUSTOM_FUNCTIONS_GUIDE.md) | Custom function tutorial | Humans | -| [LOCALE_GUIDE.md](LOCALE_GUIDE.md) | Locale formatting behavior (str vs NUMBER) | Humans | -| [VALIDATION_GUIDE.md](VALIDATION_GUIDE.md) | Validation architecture and responsibility matrix | Humans | -| [THREAD_SAFETY.md](THREAD_SAFETY.md) | Thread safety architectural decisions | Humans | - ---- +## Routing Table + +| Symbol | File | Section | +|:-------|:-----|:--------| +| `FluentBundle` | [DOC_01_Core.md](DOC_01_Core.md) | `FluentBundle` | +| `AsyncFluentBundle` | [DOC_01_Core.md](DOC_01_Core.md) | `AsyncFluentBundle` | +| `FluentLocalization` | [DOC_01_Core.md](DOC_01_Core.md) | `FluentLocalization` | +| `LocalizationBootConfig` | [DOC_01_Core.md](DOC_01_Core.md) | `LocalizationBootConfig` | +| `PathResourceLoader` | [DOC_01_Core.md](DOC_01_Core.md) | `PathResourceLoader` | +| `ResourceLoader` | [DOC_01_Core.md](DOC_01_Core.md) | `ResourceLoader` | +| `LoadStatus` | [DOC_01_Core.md](DOC_01_Core.md) | `LoadStatus` | +| `LoadSummary` | [DOC_01_Core.md](DOC_01_Core.md) | `LoadSummary` | +| `ResourceLoadResult` | [DOC_01_Core.md](DOC_01_Core.md) | `ResourceLoadResult` | +| `FallbackInfo` | [DOC_01_Core.md](DOC_01_Core.md) | `FallbackInfo` | +| `LocalizationCacheStats` | [DOC_01_Core.md](DOC_01_Core.md) | `LocalizationCacheStats` | +| `FluentNumber` | [DOC_02_Types.md](DOC_02_Types.md) | `FluentNumber` | +| `FluentValue` | [DOC_02_Types.md](DOC_02_Types.md) | `FluentValue` | +| `ParseResult` | [DOC_02_Types.md](DOC_02_Types.md) | `ParseResult` | +| `LocaleCode` | [DOC_02_Types.md](DOC_02_Types.md) | `LocaleCode` | +| `MessageId` | [DOC_02_Types.md](DOC_02_Types.md) | `MessageId` | +| `ResourceId` | [DOC_02_Types.md](DOC_02_Types.md) | `ResourceId` | +| `FTLSource` | [DOC_02_Types.md](DOC_02_Types.md) | `FTLSource` | +| `CurrencyCode` | [DOC_02_Types.md](DOC_02_Types.md) | `CurrencyCode` | +| `TerritoryCode` | [DOC_02_Types.md](DOC_02_Types.md) | `TerritoryCode` | +| `Span` | [DOC_02_SyntaxTypes.md](DOC_02_SyntaxTypes.md) | `Span` | +| `Annotation` | [DOC_02_SyntaxTypes.md](DOC_02_SyntaxTypes.md) | `Annotation` | +| `Identifier` | [DOC_02_SyntaxTypes.md](DOC_02_SyntaxTypes.md) | `Identifier` | +| `Resource` | [DOC_02_SyntaxTypes.md](DOC_02_SyntaxTypes.md) | `Resource` | +| `Message` | [DOC_02_SyntaxTypes.md](DOC_02_SyntaxTypes.md) | `Message` | +| `Term` | [DOC_02_SyntaxTypes.md](DOC_02_SyntaxTypes.md) | `Term` | +| `Attribute` | [DOC_02_SyntaxTypes.md](DOC_02_SyntaxTypes.md) | `Attribute` | +| `Comment` | [DOC_02_SyntaxTypes.md](DOC_02_SyntaxTypes.md) | `Comment` | +| `Junk` | [DOC_02_SyntaxTypes.md](DOC_02_SyntaxTypes.md) | `Junk` | +| `Pattern` | [DOC_02_SyntaxTypes.md](DOC_02_SyntaxTypes.md) | `Pattern` | +| `TextElement` | [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md) | `TextElement` | +| `Placeable` | [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md) | `Placeable` | +| `SelectExpression` | [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md) | `SelectExpression` | +| `Variant` | [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md) | `Variant` | +| `StringLiteral` | [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md) | `StringLiteral` | +| `NumberLiteral` | [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md) | `NumberLiteral` | +| `VariableReference` | [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md) | `VariableReference` | +| `MessageReference` | [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md) | `MessageReference` | +| `TermReference` | [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md) | `TermReference` | +| `FunctionReference` | [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md) | `FunctionReference` | +| `CallArguments` | [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md) | `CallArguments` | +| `NamedArgument` | [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md) | `NamedArgument` | +| `Entry` | [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md) | `Entry` | +| `PatternElement` | [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md) | `PatternElement` | +| `Expression` | [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md) | `Expression` | +| `SelectorExpression` | [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md) | `SelectorExpression` | +| `FTLLiteral` | [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md) | `FTLLiteral` | +| `MessageVariableValidationResult` | [DOC_02_Types.md](DOC_02_Types.md) | `MessageVariableValidationResult` | +| `MessageIntrospection` | [DOC_02_Types.md](DOC_02_Types.md) | `MessageIntrospection` | +| `VariableInfo` | [DOC_02_Types.md](DOC_02_Types.md) | `VariableInfo` | +| `FunctionCallInfo` | [DOC_02_Types.md](DOC_02_Types.md) | `FunctionCallInfo` | +| `ReferenceInfo` | [DOC_02_Types.md](DOC_02_Types.md) | `ReferenceInfo` | +| `TerritoryInfo` | [DOC_02_Types.md](DOC_02_Types.md) | `TerritoryInfo` | +| `CurrencyInfo` | [DOC_02_Types.md](DOC_02_Types.md) | `CurrencyInfo` | +| `CommentType` | [DOC_02_Types.md](DOC_02_Types.md) | `CommentType` | +| `VariableContext` | [DOC_02_Types.md](DOC_02_Types.md) | `VariableContext` | +| `ReferenceKind` | [DOC_02_Types.md](DOC_02_Types.md) | `ReferenceKind` | +| `parse_ftl` | [DOC_03_Parsing.md](DOC_03_Parsing.md) | `parse_ftl` | +| `parse_stream_ftl` | [DOC_03_Parsing.md](DOC_03_Parsing.md) | `parse_stream_ftl` | +| `serialize_ftl` | [DOC_03_Parsing.md](DOC_03_Parsing.md) | `serialize_ftl` | +| `validate_resource` | [DOC_03_Parsing.md](DOC_03_Parsing.md) | `validate_resource` | +| `FluentParserV1` | [DOC_03_Parsing.md](DOC_03_Parsing.md) | `FluentParserV1` | +| `parse` | [DOC_03_Parsing.md](DOC_03_Parsing.md) | `parse` | +| `parse_stream` | [DOC_03_Parsing.md](DOC_03_Parsing.md) | `parse_stream` | +| `serialize` | [DOC_03_Parsing.md](DOC_03_Parsing.md) | `serialize` | +| `Cursor` | [DOC_03_Parsing.md](DOC_03_Parsing.md) | `Cursor` | +| `ftllexengine.syntax.ParseResult` | [DOC_03_Parsing.md](DOC_03_Parsing.md) | `ftllexengine.syntax.ParseResult` | +| `ParseError` | [DOC_03_Parsing.md](DOC_03_Parsing.md) | `ParseError` | +| `SerializationValidationError` | [DOC_03_Parsing.md](DOC_03_Parsing.md) | `SerializationValidationError` | +| `SerializationDepthError` | [DOC_03_Parsing.md](DOC_03_Parsing.md) | `SerializationDepthError` | +| `ASTVisitor` | [DOC_03_Parsing.md](DOC_03_Parsing.md) | `ASTVisitor` | +| `ASTTransformer` | [DOC_03_Parsing.md](DOC_03_Parsing.md) | `ASTTransformer` | +| `parse_decimal` | [DOC_03_LocaleParsing.md](DOC_03_LocaleParsing.md) | `parse_decimal` | +| `parse_fluent_number` | [DOC_03_LocaleParsing.md](DOC_03_LocaleParsing.md) | `parse_fluent_number` | +| `parse_date` | [DOC_03_LocaleParsing.md](DOC_03_LocaleParsing.md) | `parse_date` | +| `parse_datetime` | [DOC_03_LocaleParsing.md](DOC_03_LocaleParsing.md) | `parse_datetime` | +| `parse_currency` | [DOC_03_LocaleParsing.md](DOC_03_LocaleParsing.md) | `parse_currency` | +| `is_valid_decimal` | [DOC_03_LocaleParsing.md](DOC_03_LocaleParsing.md) | `is_valid_decimal` | +| `is_valid_date` | [DOC_03_LocaleParsing.md](DOC_03_LocaleParsing.md) | `is_valid_date` | +| `is_valid_datetime` | [DOC_03_LocaleParsing.md](DOC_03_LocaleParsing.md) | `is_valid_datetime` | +| `is_valid_currency` | [DOC_03_LocaleParsing.md](DOC_03_LocaleParsing.md) | `is_valid_currency` | +| `clear_date_caches` | [DOC_03_LocaleParsing.md](DOC_03_LocaleParsing.md) | `clear_date_caches` | +| `clear_currency_caches` | [DOC_03_LocaleParsing.md](DOC_03_LocaleParsing.md) | `clear_currency_caches` | +| `CacheConfig` | [DOC_04_Runtime.md](DOC_04_Runtime.md) | `CacheConfig` | +| `FunctionRegistry` | [DOC_04_Runtime.md](DOC_04_Runtime.md) | `FunctionRegistry` | +| `fluent_function` | [DOC_04_Runtime.md](DOC_04_Runtime.md) | `fluent_function` | +| `create_default_registry` | [DOC_04_Runtime.md](DOC_04_Runtime.md) | `create_default_registry` | +| `get_shared_registry` | [DOC_04_Runtime.md](DOC_04_Runtime.md) | `get_shared_registry` | +| `number_format` | [DOC_04_Runtime.md](DOC_04_Runtime.md) | `number_format` | +| `datetime_format` | [DOC_04_Runtime.md](DOC_04_Runtime.md) | `datetime_format` | +| `currency_format` | [DOC_04_Runtime.md](DOC_04_Runtime.md) | `currency_format` | +| `select_plural_category` | [DOC_04_Runtime.md](DOC_04_Runtime.md) | `select_plural_category` | +| `make_fluent_number` | [DOC_04_Runtime.md](DOC_04_Runtime.md) | `make_fluent_number` | +| `clear_module_caches` | [DOC_04_Runtime.md](DOC_04_Runtime.md) | `clear_module_caches` | +| `CacheAuditLogEntry` | [DOC_04_Runtime.md](DOC_04_Runtime.md) | `CacheAuditLogEntry` | +| `WriteLogEntry` | [DOC_04_Runtime.md](DOC_04_Runtime.md) | `WriteLogEntry` | +| `detect_cycles` | [DOC_04_RuntimeUtilities.md](DOC_04_RuntimeUtilities.md) | `detect_cycles` | +| `normalize_locale` | [DOC_04_RuntimeUtilities.md](DOC_04_RuntimeUtilities.md) | `normalize_locale` | +| `get_system_locale` | [DOC_04_RuntimeUtilities.md](DOC_04_RuntimeUtilities.md) | `get_system_locale` | +| `require_locale_code` | [DOC_04_RuntimeUtilities.md](DOC_04_RuntimeUtilities.md) | `require_locale_code` | +| `require_currency_code` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `require_currency_code` | +| `require_territory_code` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `require_territory_code` | +| `is_valid_currency_code` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `is_valid_currency_code` | +| `is_valid_territory_code` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `is_valid_territory_code` | +| `get_currency_decimal_digits` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `get_currency_decimal_digits` | +| `get_cldr_version` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `get_cldr_version` | +| `__version__` | [DOC_04_RuntimeUtilities.md](DOC_04_RuntimeUtilities.md) | `__version__` | +| `__fluent_spec_version__` | [DOC_04_RuntimeUtilities.md](DOC_04_RuntimeUtilities.md) | `__fluent_spec_version__` | +| `__spec_url__` | [DOC_04_RuntimeUtilities.md](DOC_04_RuntimeUtilities.md) | `__spec_url__` | +| `__recommended_encoding__` | [DOC_04_RuntimeUtilities.md](DOC_04_RuntimeUtilities.md) | `__recommended_encoding__` | +| `require_date` | [DOC_04_RuntimeUtilities.md](DOC_04_RuntimeUtilities.md) | `require_date` | +| `require_datetime` | [DOC_04_RuntimeUtilities.md](DOC_04_RuntimeUtilities.md) | `require_datetime` | +| `require_fluent_number` | [DOC_04_RuntimeUtilities.md](DOC_04_RuntimeUtilities.md) | `require_fluent_number` | +| `validate_message_variables` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `validate_message_variables` | +| `introspect_message` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `introspect_message` | +| `extract_variables` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `extract_variables` | +| `extract_references` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `extract_references` | +| `extract_references_by_attribute` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `extract_references_by_attribute` | +| `clear_introspection_cache` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `clear_introspection_cache` | +| `get_territory` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `get_territory` | +| `get_currency` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `get_currency` | +| `list_territories` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `list_territories` | +| `list_currencies` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `list_currencies` | +| `get_territory_currencies` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `get_territory_currencies` | +| `clear_iso_cache` | [DOC_04_Introspection.md](DOC_04_Introspection.md) | `clear_iso_cache` | +| `FrozenFluentError` | [DOC_05_Errors.md](DOC_05_Errors.md) | `FrozenFluentError` | +| `ErrorCategory` | [DOC_05_Errors.md](DOC_05_Errors.md) | `ErrorCategory` | +| `ParseTypeLiteral` | [DOC_05_Errors.md](DOC_05_Errors.md) | `ParseTypeLiteral` | +| `FrozenErrorContext` | [DOC_05_Errors.md](DOC_05_Errors.md) | `FrozenErrorContext` | +| `BabelImportError` | [DOC_05_Errors.md](DOC_05_Errors.md) | `BabelImportError` | +| `ErrorTemplate` | [DOC_05_Errors.md](DOC_05_Errors.md) | `ErrorTemplate` | +| `DataIntegrityError` | [DOC_05_Errors.md](DOC_05_Errors.md) | `DataIntegrityError` | +| `IntegrityContext` | [DOC_05_Errors.md](DOC_05_Errors.md) | `IntegrityContext` | +| `CacheCorruptionError` | [DOC_05_Errors.md](DOC_05_Errors.md) | `CacheCorruptionError` | +| `ImmutabilityViolationError` | [DOC_05_Errors.md](DOC_05_Errors.md) | `ImmutabilityViolationError` | +| `IntegrityCheckFailedError` | [DOC_05_Errors.md](DOC_05_Errors.md) | `IntegrityCheckFailedError` | +| `FormattingIntegrityError` | [DOC_05_Errors.md](DOC_05_Errors.md) | `FormattingIntegrityError` | +| `SyntaxIntegrityError` | [DOC_05_Errors.md](DOC_05_Errors.md) | `SyntaxIntegrityError` | +| `WriteConflictError` | [DOC_05_Errors.md](DOC_05_Errors.md) | `WriteConflictError` | +| `ValidationResult` | [DOC_05_Diagnostics.md](DOC_05_Diagnostics.md) | `ValidationResult` | +| `ValidationError` | [DOC_05_Diagnostics.md](DOC_05_Diagnostics.md) | `ValidationError` | +| `ValidationWarning` | [DOC_05_Diagnostics.md](DOC_05_Diagnostics.md) | `ValidationWarning` | +| `WarningSeverity` | [DOC_05_Diagnostics.md](DOC_05_Diagnostics.md) | `WarningSeverity` | +| `Diagnostic` | [DOC_05_Diagnostics.md](DOC_05_Diagnostics.md) | `Diagnostic` | +| `DiagnosticCode` | [DOC_05_Diagnostics.md](DOC_05_Diagnostics.md) | `DiagnosticCode` | +| `DiagnosticFormatter` | [DOC_05_Diagnostics.md](DOC_05_Diagnostics.md) | `DiagnosticFormatter` | +| `OutputFormat` | [DOC_05_Diagnostics.md](DOC_05_Diagnostics.md) | `OutputFormat` | +| `SourceSpan` | [DOC_05_Diagnostics.md](DOC_05_Diagnostics.md) | `SourceSpan` | +| `scripts/validate_docs.py` | [DOC_06_Testing.md](DOC_06_Testing.md) | `scripts/validate_docs.py` | +| `scripts/validate_version.py` | [DOC_06_Testing.md](DOC_06_Testing.md) | `scripts/validate_version.py` | +| `scripts/run_examples.py` | [DOC_06_Testing.md](DOC_06_Testing.md) | `scripts/run_examples.py` | +| `check.sh` | [DOC_06_Testing.md](DOC_06_Testing.md) | `check.sh` | +| `scripts/lint.sh` | [DOC_06_Testing.md](DOC_06_Testing.md) | `scripts/lint.sh` | +| `scripts/test.sh` | [DOC_06_Testing.md](DOC_06_Testing.md) | `scripts/test.sh` | +| `scripts/fuzz_hypofuzz.sh` | [DOC_06_Testing.md](DOC_06_Testing.md) | `scripts/fuzz_hypofuzz.sh` | +| `scripts/fuzz_atheris.sh` | [DOC_06_Testing.md](DOC_06_Testing.md) | `scripts/fuzz_atheris.sh` | +| `pytest.mark.fuzz` | [DOC_06_Testing.md](DOC_06_Testing.md) | `pytest.mark.fuzz` | + +## Guide Links + +- [QUICK_REFERENCE.md](QUICK_REFERENCE.md) +- [CUSTOM_FUNCTIONS_GUIDE.md](CUSTOM_FUNCTIONS_GUIDE.md) +- [LOCALE_GUIDE.md](LOCALE_GUIDE.md) +- [PARSING_GUIDE.md](PARSING_GUIDE.md) +- [RELEASE_PROTOCOL.md](RELEASE_PROTOCOL.md) +- [VALIDATION_GUIDE.md](VALIDATION_GUIDE.md) diff --git a/docs/DOC_01_Core.md b/docs/DOC_01_Core.md index cc78b58e..f4afa2fc 100644 --- a/docs/DOC_01_Core.md +++ b/docs/DOC_01_Core.md @@ -1,56 +1,21 @@ --- -afad: "3.3" -version: "0.161.0" +afad: "3.5" +version: "0.163.0" domain: CORE -updated: "2026-03-21" +updated: "2026-04-22" route: - keywords: [AsyncFluentBundle, FluentBundle, FluentLocalization, add_resource, add_resource_stream, format_pattern, has_message, has_attribute, require_clean, validate_message_schemas, validate_message_variables, require_locale_code, require_date, require_datetime, require_fluent_number, validate_resource, introspect_message, introspect_term, get_cache_audit_log, strict, CacheConfig, IntegrityCache, CacheStats, LocalizationCacheStats, CacheAuditLogEntry, LocaleCode, normalize_locale, get_system_locale, LoadStatus, LoadSummary, ResourceLoadResult, FallbackInfo, ResourceLoader, PathResourceLoader, incremental, streaming, line iterator, async, asyncio, event loop, thread pool, CurrencyCode, TerritoryCode, NewType, date, datetime, FluentNumber] - questions: ["how to format message?", "how to add translations?", "how to validate ftl?", "how do I validate one message schema at boot?", "how do I validate localization at boot?", "how to check message exists?", "how do I canonicalize a locale code?", "is bundle thread safe?", "how to use strict mode?", "how to enable cache audit?", "how do I get the cache audit log?", "how do I validate a date at a boundary?", "how do I validate a datetime?", "how do I validate a FluentNumber?"] + keywords: [FluentBundle, AsyncFluentBundle, FluentLocalization, LocalizationBootConfig, PathResourceLoader, LoadSummary, ResourceLoadResult, LocalizationCacheStats, require_clean, get_load_summary] + questions: ["how do I format messages?", "how do I load multiple locales?", "how do I inspect localization load results?", "how do I boot localization safely?"] --- # Core API Reference --- -## `CacheConfig` - -`CacheConfig` is a frozen dataclass that encapsulates all cache configuration parameters for `FluentBundle` and `FluentLocalization`. - -### Signature -```python -@dataclass(frozen=True, slots=True) -class CacheConfig: - size: int = 1000 - write_once: bool = False - integrity_strict: bool = True - enable_audit: bool = False - max_audit_entries: int = 10000 - max_entry_weight: int = 10000 - max_errors_per_entry: int = 50 -``` - -### Parameters -| Field | Type | Default | Description | -|:------|:-----|:--------|:------------| -| `size` | `int` | 1000 | Maximum cache entries (LRU eviction). | -| `write_once` | `bool` | False | Reject updates to existing keys (data race prevention). | -| `integrity_strict` | `bool` | True | Raise on checksum mismatch and write-once violations. | -| `enable_audit` | `bool` | False | Maintain audit log of cache operations. | -| `max_audit_entries` | `int` | 10000 | Maximum audit log entries before oldest eviction. | -| `max_entry_weight` | `int` | 10000 | Maximum memory weight for cached results. | -| `max_errors_per_entry` | `int` | 50 | Maximum errors per cache entry. | - -### Constraints -- Immutable: Frozen dataclass; fields cannot be modified after construction. -- Validation: `__post_init__` rejects non-positive `size`, `max_entry_weight`, `max_errors_per_entry`, `max_audit_entries`. -- Independence: `integrity_strict` controls cache corruption response independently of `FluentBundle.strict` (formatting behavior). -- Import: `from ftllexengine import CacheConfig` or `from ftllexengine.runtime.cache_config import CacheConfig`. -- Usage: Pass `cache=CacheConfig()` to enable caching with defaults; `cache=None` (default) disables caching. - ---- - ## `FluentBundle` +Class that formats FTL messages for one locale. + ### Signature ```python class FluentBundle: @@ -66,128 +31,33 @@ class FluentBundle: max_nesting_depth: int | None = None, max_expansion_size: int | None = None, strict: bool = True, - ) -> None: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `locale` | `str` | Y | BCP 47 locale code (positional-only, ASCII alphanumeric only). | -| `use_isolating` | `bool` | N | Wrap interpolated values in Unicode bidi marks. | -| `cache` | `CacheConfig \| None` | N | Cache configuration. `None` disables caching (default). `CacheConfig()` enables with defaults. | -| `functions` | `FunctionRegistry \| None` | N | Custom function registry (must be `FunctionRegistry`, not `dict`). | -| `max_source_size` | `int \| None` | N | Maximum FTL source length in characters (default: 10,000,000). | -| `max_nesting_depth` | `int \| None` | N | Maximum placeable nesting depth (default: 100). | -| `max_expansion_size` | `int \| None` | N | Maximum total characters produced during resolution (default: 1,000,000). Prevents Billion Laughs DoS. | -| `strict` | `bool` | N | Fail-fast mode (default `True`): raises `FormattingIntegrityError` on ANY error; raises `SyntaxIntegrityError` when `add_resource` produces junk. Pass `False` for soft-error recovery (returns `(fallback, errors)` tuple). | - -### Constraints -- Return: FluentBundle instance. -- Raises: `ValueError` on invalid locale format (must be ASCII alphanumeric with underscore/hyphen separators) or locale code exceeding 1000 characters (DoS prevention). -- State: Creates internal message/term registries. -- Thread: Always thread-safe via internal RWLock. -- Import: `FunctionRegistry` from `ftllexengine.runtime.function_bridge`. `FluentValue` from `ftllexengine.core.value_types`. -- Strict: Default `strict=True` raises `FormattingIntegrityError` on any resolution error and `SyntaxIntegrityError` on junk FTL. Use `strict=False` for soft-error recovery; errors are then returned as a tuple. Errors are cached before raising; subsequent cache hits re-raise without re-resolution. -- Cache: Security parameters expose `IntegrityCache` features for financial-grade applications. - ---- - -## `FluentBundle.for_system_locale` - -### Signature -```python -@classmethod -def for_system_locale( - cls, - *, - use_isolating: bool = True, - cache: CacheConfig | None = None, - functions: FunctionRegistry | None = None, - max_source_size: int | None = None, - max_nesting_depth: int | None = None, - max_expansion_size: int | None = None, - strict: bool = True, -) -> FluentBundle: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `use_isolating` | `bool` | N | Wrap interpolated values in Unicode bidi marks. | -| `cache` | `CacheConfig \| None` | N | Cache configuration. `None` disables (default). | -| `functions` | `FunctionRegistry \| None` | N | Custom function registry (must be `FunctionRegistry`, not `dict`). | -| `max_source_size` | `int \| None` | N | Maximum FTL source length in characters (default: 10,000,000). | -| `max_nesting_depth` | `int \| None` | N | Maximum placeable nesting depth (default: 100). | -| `max_expansion_size` | `int \| None` | N | Maximum total characters during resolution (default: 1,000,000). | -| `strict` | `bool` | N | Fail-fast mode (default `True`): raises on errors. Pass `False` for soft-error recovery. | - -### Constraints -- Return: FluentBundle with system locale. -- Raises: `RuntimeError` if locale cannot be determined. -- State: Delegates to `get_system_locale(raise_on_failure=True)`. -- Thread: Safe. - ---- - -## `FluentBundle.add_resource` - -### Signature -```python -def add_resource( - self, - source: str, - /, - *, - source_path: str | None = None -) -> tuple[Junk, ...]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `source` | `str` | Y | FTL source code (positional-only). | -| `source_path` | `str \| None` | N | Path for error messages. | - -### Constraints -- Return: Tuple of Junk entries (syntax errors). Empty if parse succeeded. -- Raises: `TypeError` if source is not a str. `SyntaxIntegrityError` in strict mode if parsing produces any Junk. -- State: Mutates internal message/term registries. Clears cache. -- Thread: Safe (RWLock). Parse occurs outside write lock; only registration requires exclusive access. - ---- - -## `FluentBundle.add_resource_stream` - -### Signature -```python -def add_resource_stream( - self, - lines: Iterable[str], - /, - *, - source_path: str | None = None -) -> tuple[Junk, ...]: + ) -> None: ``` ### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `lines` | `Iterable[str]` | Y | FTL source as a line iterator (positional-only). | -| `source_path` | `str \| None` | N | Path for error messages. | +| Name | Req | Semantics | +|:-----|:----|:----------| +| `locale` | Y | Locale code for bundle | +| `use_isolating` | N | Enable bidi isolation | +| `cache` | N | Cache configuration | +| `functions` | N | Custom function registry | +| `max_source_size` | N | FTL input bound | +| `max_nesting_depth` | N | Nesting safety bound | +| `max_expansion_size` | N | Expansion safety bound | +| `strict` | N | Raise on integrity failures | ### Constraints -- Return: Tuple of Junk entries. Empty if parse succeeded. -- Purpose: Identical semantics to `add_resource()` but accepts a line iterator instead of a full string. Memory usage is proportional to the largest single FTL entry in the stream, not the total resource size. -- Raises: `SyntaxIntegrityError` in strict mode if parsing produces any Junk. -- State: Mutates internal message/term registries. Clears cache. -- Thread: Safe (RWLock). Stream is consumed and parsed outside write lock. -- Import: `from ftllexengine.runtime import FluentBundle` (method on `FluentBundle`). +- Return: Bundle with normalized locale and empty resource store +- Raises: `ValueError` on invalid locale; `TypeError` on invalid registry +- State: Mutable resources/functions; optional cache +- Thread: Safe +- Main methods: `add_resource()`, `add_resource_stream()`, `format_pattern()`, `add_function()`, `validate_resource()` --- ## `AsyncFluentBundle` -`AsyncFluentBundle` is an asyncio-native wrapper around `FluentBundle` that offloads all CPU-bound operations to a thread pool via `asyncio.to_thread()`, keeping the event loop unblocked. +Class that exposes the `FluentBundle` API for asyncio callers. ### Signature ```python @@ -204,1587 +74,250 @@ class AsyncFluentBundle: max_nesting_depth: int | None = None, max_expansion_size: int | None = None, strict: bool = True, - ) -> None: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `locale` | `str` | Y | BCP 47 locale code (positional-only). | -| `use_isolating` | `bool` | N | Wrap interpolated values in Unicode bidi marks. | -| `cache` | `CacheConfig \| None` | N | Cache configuration. `None` disables (default). | -| `functions` | `FunctionRegistry \| None` | N | Custom function registry. | -| `max_source_size` | `int \| None` | N | Maximum FTL source length in characters. | -| `max_nesting_depth` | `int \| None` | N | Maximum placeable nesting depth. | -| `max_expansion_size` | `int \| None` | N | Maximum total characters during resolution. | -| `strict` | `bool` | N | Fail-fast mode (default `True`). | - -### Constraints -- Return: `AsyncFluentBundle` instance. Supports `async with` (no cleanup required on exit). -- Async: `add_resource`, `add_resource_stream`, `format_pattern`, `add_function` are `async def`; offload to `asyncio.to_thread()`. -- Sync: `has_message`, `has_attribute`, `get_message_ids`, `get_message`, `get_term`, `introspect_message`, `clear_cache`, `get_cache_stats`, `get_cache_audit_log` are synchronous (O(1) dict lookups, hold read lock for nanoseconds). -- Concurrency: Underlying `FluentBundle` handles all thread safety via `RWLock`. No additional locking in `AsyncFluentBundle`. -- Strict: Same strict/soft-error semantics as `FluentBundle`. `strict=True` raises `FormattingIntegrityError`/`SyntaxIntegrityError`; `strict=False` returns `(fallback, errors)`. -- Import: `from ftllexengine import AsyncFluentBundle` or `from ftllexengine.runtime import AsyncFluentBundle`. - -```python -async with AsyncFluentBundle("en_US") as bundle: - await bundle.add_resource("greeting = Hello, { $name }!") - result, errors = await bundle.format_pattern("greeting", {"name": "Alice"}) -``` - ---- - -## `AsyncFluentBundle.for_system_locale` - -### Signature -```python -@classmethod -def for_system_locale( - cls, - *, - use_isolating: bool = True, - cache: CacheConfig | None = None, - functions: FunctionRegistry | None = None, - max_source_size: int | None = None, - max_nesting_depth: int | None = None, - max_expansion_size: int | None = None, - strict: bool = True, -) -> AsyncFluentBundle: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `use_isolating` | `bool` | N | Wrap interpolated values in Unicode bidi marks. | -| `cache` | `CacheConfig \| None` | N | Cache configuration. `None` disables (default). | -| `functions` | `FunctionRegistry \| None` | N | Custom function registry. | -| `max_source_size` | `int \| None` | N | Maximum FTL source length in characters. | -| `max_nesting_depth` | `int \| None` | N | Maximum placeable nesting depth. | -| `max_expansion_size` | `int \| None` | N | Maximum total characters during resolution. | -| `strict` | `bool` | N | Fail-fast mode (default `True`). | - -### Constraints -- Return: `AsyncFluentBundle` for the detected system locale. -- Raises: `RuntimeError` if locale cannot be determined from OS environment. -- State: Reads locale from `LANG`, `LC_ALL`, `LC_MESSAGES` environment variables. - ---- - -## `FluentBundle.format_pattern` - -### Signature -```python -def format_pattern( - self, - message_id: str, - /, - args: Mapping[str, FluentValue] | None = None, - *, - attribute: str | None = None, -) -> tuple[str, tuple[FrozenFluentError, ...]]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message_id` | `str` | Y | Message identifier (positional-only). | -| `args` | `Mapping[str, FluentValue] \| None` | N | Variable arguments. | -| `attribute` | `str \| None` | N | Attribute name to format. | - -### Constraints -- Return: Tuple of (formatted_string, errors). -- Raises: `FormattingIntegrityError` in strict mode (default) if ANY error occurs. In non-strict mode (`strict=False`), never raises; all errors collected in tuple. -- State: Read-only (may update cache). -- Thread: Safe for concurrent reads. -- Duplicate Attributes: When message has duplicate attributes with same name, last attribute wins (per Fluent spec). - ---- - -## `FluentBundle.validate_resource` - -### Signature -```python -def validate_resource(self, source: str) -> ValidationResult: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `source` | `str` | Y | FTL source code to validate. | - -### Constraints -- Return: ValidationResult with errors and warnings. -- Cross-Resource: References to existing bundle messages/terms do not produce undefined warnings. -- Raises: `TypeError` if source is not a str. -- State: None. Does not modify bundle. -- Thread: Safe. - ---- - -## `FluentBundle.has_message` - -### Signature -```python -def has_message(self, message_id: str) -> bool: + ) -> None: ``` ### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message_id` | `str` | Y | Message identifier to check. | +| Name | Req | Semantics | +|:-----|:----|:----------| +| `locale` | Y | Locale code for bundle | +| `use_isolating` | N | Enable bidi isolation | +| `cache` | N | Cache configuration | +| `functions` | N | Custom function registry | +| `max_source_size` | N | FTL input bound | +| `max_nesting_depth` | N | Nesting safety bound | +| `max_expansion_size` | N | Expansion safety bound | +| `strict` | N | Raise on integrity failures | ### Constraints -- Return: True if message exists. -- Raises: None. -- State: Read-only. -- Thread: Safe. +- Return: Async wrapper around the same runtime semantics as `FluentBundle` +- State: Delegates to an internal bundle instance +- Thread: Safe +- Async: Formatting and mutation paths run through `asyncio.to_thread()` --- -## `FluentBundle.has_attribute` - -### Signature -```python -def has_attribute(self, message_id: str, attribute: str) -> bool: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message_id` | `str` | Y | Message identifier to check. | -| `attribute` | `str` | Y | Attribute name to check. | - -### Constraints -- Return: True if message exists AND has the specified attribute. -- Raises: None. -- State: Read-only. -- Thread: Safe. -- Duplicate Attributes: Checks existence only; does not indicate which definition will be used if duplicates exist (see format_pattern for last-wins resolution). - ---- - -## `FluentBundle.get_message_ids` - -### Signature -```python -def get_message_ids(self) -> list[str]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: List of all message identifiers. -- Raises: None. -- State: Read-only. -- Thread: Safe. - ---- +## `FluentLocalization` -## `FluentBundle.get_message_variables` +Class that orchestrates multiple locale bundles with fallback chains. ### Signature ```python -def get_message_variables(self, message_id: str) -> frozenset[str]: +class FluentLocalization: + def __init__( + self, + locales: Iterable[LocaleCode], + resource_ids: Iterable[ResourceId] | None = None, + resource_loader: ResourceLoader | None = None, + *, + use_isolating: bool = True, + cache: CacheConfig | None = None, + on_fallback: Callable[[FallbackInfo], None] | None = None, + strict: bool = True, + ) -> None: ``` ### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message_id` | `str` | Y | Message identifier. | +| Name | Req | Semantics | +|:-----|:----|:----------| +| `locales` | Y | Fallback-ordered locale chain | +| `resource_ids` | N | Resource identifiers to load | +| `resource_loader` | N | Loader for startup resources | +| `use_isolating` | N | Enable bidi isolation | +| `cache` | N | Per-bundle cache config | +| `on_fallback` | N | Fallback callback hook | +| `strict` | N | Raise on integrity failures | ### Constraints -- Return: Frozen set of variable names (without $ prefix). -- Raises: `KeyError` if message not found. -- State: Read-only. -- Thread: Safe. +- Return: Multi-locale runtime with canonicalized locale chain +- Raises: `ValueError` on empty locales or inconsistent loader inputs +- State: Hybrid initialization. When `resource_loader` and `resource_ids` are supplied, resource loads happen eagerly during `__init__()`, bundles for those loaded locales are created eagerly, and untouched fallback bundles stay lazy until first access +- Thread: Safe +- Main methods: `format_value()`, `format_pattern()`, `add_resource()`, `add_function()`, `get_load_summary()`, `require_clean()`, `validate_message_schemas()`, `get_cache_stats()` --- -## `FluentBundle.get_all_message_variables` - -### Signature -```python -def get_all_message_variables(self) -> dict[str, frozenset[str]]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: Dict mapping message IDs to variable sets. -- Raises: None. -- State: Read-only. Acquires single read lock for atomic snapshot. -- Thread: Safe. Provides consistent snapshot during concurrent mutations. - ---- +## `LocalizationBootConfig` -## `FluentBundle.introspect_message` +Dataclass that composes strict localization startup into one boot contract. ### Signature ```python -def introspect_message(self, message_id: str) -> MessageIntrospection: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message_id` | `str` | Y | Message identifier. | - -### Constraints -- Return: MessageIntrospection with complete metadata. -- Raises: `KeyError` if message not found. -- State: Read-only. -- Thread: Safe. +@dataclass(frozen=True, slots=True) +class LocalizationBootConfig: + locales: tuple[str, ...] + resource_ids: tuple[str, ...] + loader: ResourceLoader | None = None + base_path: str | None = None + message_schemas: Mapping[MessageId, frozenset[str] | set[str]] | None = None + required_messages: frozenset[str] | None = None + strict: bool = True + use_isolating: bool = True + cache: CacheConfig | None = None + on_fallback: Callable[[FallbackInfo], None] | None = None +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `locales` | Y | Fallback locale chain | +| `resource_ids` | Y | Required resource list | +| `loader` | N | Custom resource loader | +| `base_path` | N | Loader path template | +| `message_schemas` | N | Expected message variables | +| `required_messages` | N | Presence contract set | +| `strict` | N | Runtime strict mode | +| `use_isolating` | N | Enable bidi isolation | +| `cache` | N | Bundle cache config | +| `on_fallback` | N | Fallback callback hook | + +### Constraints +- Return: Immutable boot plan object +- Raises: `ValueError` when loader/base_path invariants are broken +- Raises: `RuntimeError` if `boot()` or `boot_simple()` is called more than once on the same instance +- State: One-shot boot coordinator +- Thread: Safe +- Main methods: `boot()`, `boot_simple()`, `from_path()` --- -## `FluentBundle.introspect_term` - -### Signature -```python -def introspect_term(self, term_id: str) -> MessageIntrospection: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `term_id` | `str` | Y | Term identifier (without leading dash). | - -### Constraints -- Return: MessageIntrospection with complete metadata. -- Raises: `KeyError` if term not found. -- State: Read-only. -- Thread: Safe. - ---- +## `PathResourceLoader` -## `FluentBundle.add_function` +Dataclass that loads FTL source from a locale-substituted path template. ### Signature ```python -def add_function(self, name: str, func: Callable[..., FluentValue]) -> None: +@dataclass(frozen=True, slots=True) +class PathResourceLoader: + base_path: str + root_dir: str | None = None ``` ### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `name` | `str` | Y | Function name (UPPERCASE convention). | -| `func` | `Callable[..., FluentValue]` | Y | Python function returning FluentValue. | +| Name | Req | Semantics | +|:-----|:----|:----------| +| `base_path` | Y | Path template with `{locale}` | +| `root_dir` | N | Root for path safety checks | ### Constraints -- Return: None. -- Raises: `TypeError` if registry is frozen or if callable has no inspectable signature. -- State: Mutates function registry. Clears cache. -- Thread: Safe (RWLock). +- Raises: `ValueError` if `base_path` lacks `{locale}` +- Security: Rejects absolute paths and traversal-style `resource_id` values +- State: Immutable +- Thread: Safe --- -## `FluentBundle.clear_cache` - -### Signature -```python -def clear_cache(self) -> None: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: None. -- Raises: None. -- State: Clears format cache. -- Thread: Safe. - ---- +## `ResourceLoader` -## `FluentBundle.get_cache_stats` +Protocol that supplies FTL source for a locale and resource id pair. ### Signature ```python -def get_cache_stats(self) -> CacheStats | None: +class ResourceLoader(Protocol): + def load(self, locale: LocaleCode, resource_id: ResourceId) -> FTLSource: ... + def describe_path(self, locale: LocaleCode, resource_id: ResourceId) -> str: ... ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - ### Constraints -- Return: `CacheStats` TypedDict snapshot, or `None` if caching disabled. See `CacheStats` for all 19 fields with precise per-field types. -- Import: `from ftllexengine.runtime.cache import CacheStats` -- Raises: Never. -- State: Read-only. -- Thread: Safe. +- Purpose: Loader contract for `FluentLocalization` and `LocalizationBootConfig` +- State: Implementation-defined +- Thread: Implementation-defined --- -## `FluentBundle.get_cache_audit_log` - -### Signature -```python -def get_cache_audit_log(self) -> tuple[CacheAuditLogEntry, ...] | None: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: Tuple of immutable `CacheAuditLogEntry` snapshots, or `None` if caching disabled. Audit-disabled caches return `()`. -- Import: `from ftllexengine.localization import CacheAuditLogEntry` -- Raises: Never. -- State: Read-only. -- Thread: Safe. - ---- +## `LoadStatus` -## `FluentBundle.locale` +Enumeration of resource-load outcomes. ### Signature ```python -@property -def locale(self) -> str: +class LoadStatus(StrEnum): + SUCCESS = "success" + NOT_FOUND = "not_found" + ERROR = "error" ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - ### Constraints -- Return: Canonical lowercase underscore `LocaleCode`. -- Raises: None. -- State: Read-only property. -- Thread: Safe. +- Purpose: Classify startup load attempts +- Type: `StrEnum` --- -## `FluentBundle.use_isolating` - -### Signature -```python -@property -def use_isolating(self) -> bool: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: Boolean indicating bidi isolation enabled. -- Raises: None. -- State: Read-only property. -- Thread: Safe. - ---- +## `ResourceLoadResult` -## `FluentBundle.cache_enabled` +Dataclass representing one locale/resource load attempt. ### Signature ```python -@property -def cache_enabled(self) -> bool: +@dataclass(frozen=True, slots=True) +class ResourceLoadResult: + locale: LocaleCode + resource_id: ResourceId + status: LoadStatus + error: Exception | None = None + source_path: str | None = None + junk_entries: tuple[Junk, ...] = () ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - ### Constraints -- Return: True if caching enabled. -- Raises: None. -- State: Read-only property. -- Thread: Safe. +- Purpose: Immutable startup/load evidence record +- State: Immutable +- Thread: Safe +- Key properties: `is_success`, `is_not_found`, `is_error`, `has_junk` --- -## `FluentBundle.cache_config` - -### Signature -```python -@property -def cache_config(self) -> CacheConfig | None: -``` - -### Constraints -- Return: `CacheConfig` when caching enabled; `None` when `cache=None` was passed to constructor. -- Raises: None. -- State: Read-only property. -- Thread: Safe. - ---- +## `LoadSummary` -## `FluentBundle.strict` +Dataclass aggregating all startup load results. ### Signature ```python -@property -def strict(self) -> bool: +@dataclass(frozen=True, slots=True) +class LoadSummary: + results: tuple[ResourceLoadResult, ...] ``` ### Constraints -- Return: True if strict mode enabled (fail-fast on any error). -- Raises: None. -- State: Read-only property. -- Thread: Safe. -- Note: When True, any formatting error raises FormattingIntegrityError. Errors are cached before raising; cache hits re-raise without re-resolution. +- Purpose: Summarize boot cleanliness and resource outcomes +- State: Immutable +- Thread: Safe +- Key properties: `total_attempted`, `successful`, `not_found`, `errors`, `junk_count`, `has_errors`, `has_junk`, `all_successful`, `all_clean` +- Helper methods: `get_errors()`, `get_not_found()`, `get_successful()`, `get_by_locale()`, `get_with_junk()`, `get_all_junk()` --- -## `FluentBundle.cache_usage` - -### Signature -```python -@property -def cache_usage(self) -> int: -``` - -### Constraints -- Return: Current number of cached format results. -- Raises: None. -- State: Read-only property. -- Thread: Safe. - ---- +## `FallbackInfo` -## `FluentBundle.max_expansion_size` +Dataclass describing one fallback-resolution event. ### Signature ```python -@property -def max_expansion_size(self) -> int: +@dataclass(frozen=True, slots=True) +class FallbackInfo: + requested_locale: LocaleCode + resolved_locale: LocaleCode + message_id: MessageId ``` ### Constraints -- Return: Maximum total characters produced during resolution. -- Raises: None. -- State: Read-only property. -- Thread: Safe. -- Default: 1000000. +- Purpose: Callback payload for fallback observability +- State: Immutable +- Thread: Safe --- -## `FluentBundle.max_nesting_depth` - -### Signature -```python -@property -def max_nesting_depth(self) -> int: -``` - -### Constraints -- Return: Maximum placeable nesting depth. -- Raises: None. -- State: Read-only property. -- Thread: Safe. -- Default: 100. - ---- +## `LocalizationCacheStats` -## `FluentBundle.max_source_size` +Typed dict representing aggregate cache metrics across localization bundles. ### Signature ```python -@property -def max_source_size(self) -> int: +class LocalizationCacheStats(CacheStats, total=True): + bundle_count: int ``` ### Constraints -- Return: Maximum FTL source size in characters. -- Raises: None. -- State: Read-only property. -- Thread: Safe. -- Default: 10,000,000. - ---- - -## `FluentBundle.function_registry` - -### Signature -```python -@property -def function_registry(self) -> FunctionRegistry: -``` - -### Constraints -- Return: The `FunctionRegistry` for this bundle. -- Raises: None. -- State: Read-only property. -- Thread: Safe. - ---- - -## `FluentBundle.get_babel_locale` - -### Signature -```python -def get_babel_locale(self) -> str: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: Babel locale identifier string. -- Raises: None. -- State: Read-only. -- Thread: Safe. - ---- - -## `FluentLocalization` - -### Signature -```python -class FluentLocalization: - def __init__( - self, - locales: Iterable[LocaleCode], - resource_ids: Iterable[ResourceId] | None = None, - resource_loader: ResourceLoader | None = None, - *, - use_isolating: bool = True, - cache: CacheConfig | None = None, - on_fallback: Callable[[FallbackInfo], None] | None = None, - strict: bool = True, - ) -> None: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `locales` | `Iterable[LocaleCode]` | Y | Locale codes in fallback order. | -| `resource_ids` | `Iterable[ResourceId] \| None` | N | FTL files to auto-load. | -| `resource_loader` | `ResourceLoader \| None` | N | Loader for FTL files. | -| `use_isolating` | `bool` | N | Wrap interpolated values in bidi marks. | -| `cache` | `CacheConfig \| None` | N | Cache configuration. `None` disables (default). | -| `on_fallback` | `Callable[[FallbackInfo], None] \| None` | N | Callback on fallback locale resolution. | -| `strict` | `bool` | N | Fail-fast mode (default `True`): raises `FormattingIntegrityError` on errors. Pass `False` for soft-error recovery. | - -### Constraints -- Return: FluentLocalization instance. -- Raises: `ValueError` if locales empty, invalid locale format, or resource_ids without loader. Locale codes must match `[a-zA-Z0-9]+([_-][a-zA-Z0-9]+)*` (BCP 47 subset). -- State: Lazy bundle initialization. Bundles created on first access. Locale format validated eagerly at construction. -- Thread: Safe (RWLock-protected; concurrent reads, exclusive writes). -- Fallback: `on_fallback` invoked when message resolved from non-primary locale. -- Strict: When True, all underlying FluentBundle instances use strict mode. `_handle_message_not_found` raises `FormattingIntegrityError`. - ---- - -## `FluentLocalization.add_resource` - -### Signature -```python -def add_resource(self, locale: LocaleCode, ftl_source: FTLSource) -> tuple[Junk, ...]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `locale` | `LocaleCode` | Y | Target locale boundary value; canonicalized before fallback-chain lookup. | -| `ftl_source` | `FTLSource` | Y | FTL source code. | - -### Constraints -- Return: Tuple of Junk entries (syntax errors). Empty if parse succeeded. -- Raises: `ValueError` if locale is not in the fallback chain after canonicalization, or if the locale boundary is blank/invalid. -- State: Mutates target bundle. -- Thread: Safe (RWLock write lock). - ---- - -## `FluentLocalization.add_resource_stream` - -### Signature -```python -def add_resource_stream( - self, - locale: LocaleCode, - lines: Iterable[str], - *, - source_path: str | None = None -) -> tuple[Junk, ...]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `locale` | `LocaleCode` | Y | Target locale; canonicalized before fallback-chain lookup. | -| `lines` | `Iterable[str]` | Y | FTL source as a line iterator. | -| `source_path` | `str \| None` | N | Path for error messages. | - -### Constraints -- Return: Tuple of Junk entries. Empty if parse succeeded. -- Purpose: Identical semantics to `add_resource()` but accepts a line iterator instead of a full string. -- Raises: `ValueError` if locale is not in the fallback chain. -- State: Mutates target bundle. -- Thread: Safe (RWLock write lock). - ---- - -## `FluentLocalization.format_value` - -### Signature -```python -def format_value( - self, - message_id: MessageId, - args: Mapping[str, FluentValue] | None = None -) -> tuple[str, tuple[FrozenFluentError, ...]]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message_id` | `MessageId` | Y | Message identifier. | -| `args` | `Mapping[str, FluentValue] \| None` | N | Variable arguments. | - -### Constraints -- Return: Tuple of (formatted_string, errors). -- Raises: `FormattingIntegrityError` when strict mode enabled. -- State: Read-only. -- Thread: Safe (RWLock read lock). - ---- - -## `FluentLocalization.format_pattern` - -### Signature -```python -def format_pattern( - self, - message_id: MessageId, - args: Mapping[str, FluentValue] | None = None, - *, - attribute: str | None = None, -) -> tuple[str, tuple[FrozenFluentError, ...]]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message_id` | `MessageId` | Y | Message identifier. | -| `args` | `Mapping[str, FluentValue] \| None` | N | Variable arguments. | -| `attribute` | `str \| None` | N | Attribute name. | - -### Constraints -- Return: Tuple of (formatted_string, errors). -- Raises: `FormattingIntegrityError` when strict mode enabled. -- State: Read-only. -- Thread: Safe (RWLock read lock). - ---- - -## `FluentLocalization.has_message` - -### Signature -```python -def has_message(self, message_id: MessageId) -> bool: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message_id` | `MessageId` | Y | Message identifier. | - -### Constraints -- Return: True if message exists in any locale. -- Raises: None. -- State: Read-only. -- Thread: Safe. - ---- - -## `FluentLocalization.has_attribute` - -### Signature -```python -def has_attribute(self, message_id: MessageId, attribute: str) -> bool: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message_id` | `MessageId` | Y | Message identifier. | -| `attribute` | `str` | Y | Attribute name to check. | - -### Constraints -- Return: True if message exists in any locale AND has the specified attribute. -- Raises: None. -- State: Read-only. -- Thread: Safe. - ---- - -## `FluentLocalization.get_message_ids` - -### Signature -```python -def get_message_ids(self) -> list[str]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: List of all message identifiers across all locales. -- Raises: None. -- State: Read-only. -- Thread: Safe. - ---- - -## `FluentLocalization.get_message_variables` - -### Signature -```python -def get_message_variables(self, message_id: MessageId) -> frozenset[str]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message_id` | `MessageId` | Y | Message identifier. | - -### Constraints -- Return: Frozen set of variable names (without $ prefix). -- Raises: `KeyError` if message not found in any locale. -- State: Read-only. -- Thread: Safe. - ---- - -## `FluentLocalization.get_all_message_variables` - -### Signature -```python -def get_all_message_variables(self) -> dict[str, frozenset[str]]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: Dict mapping message IDs to variable sets across all locales. -- Raises: None. -- State: Read-only. -- Thread: Safe. - ---- - -## `FluentLocalization.introspect_term` - -### Signature -```python -def introspect_term(self, term_id: str) -> MessageIntrospection | None: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `term_id` | `str` | Y | Term identifier (without leading dash). | - -### Constraints -- Return: MessageIntrospection from first bundle with term, or None. -- Raises: None. -- State: Read-only. -- Thread: Safe. - ---- - -## `FluentLocalization.add_function` - -### Signature -```python -def add_function(self, name: str, func: Callable[..., FluentValue]) -> None: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `name` | `str` | Y | Function name. | -| `func` | `Callable[..., FluentValue]` | Y | Python function returning FluentValue. | - -### Constraints -- Return: None. -- Raises: None. -- State: Stores function for existing and future bundles. -- Thread: Safe (RWLock write lock). -- Behavior: Preserves lazy bundle initialization. Functions are stored and applied when bundles are first accessed. - ---- - -## `FluentLocalization.get_bundles` - -### Signature -```python -def get_bundles(self) -> Generator[FluentBundle]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: Generator yielding bundles in fallback order. -- Raises: None. -- State: Read-only. -- Thread: Safe. - ---- - -## `FluentLocalization.locales` - -### Signature -```python -@property -def locales(self) -> tuple[LocaleCode, ...]: -``` - -### Constraints -- Return: Immutable tuple of canonical lowercase underscore `LocaleCode` values. -- Raises: None. -- State: Read-only property. -- Thread: Safe. - ---- - -## `FluentLocalization.get_load_summary` - -### Signature -```python -def get_load_summary(self) -> LoadSummary: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: LoadSummary with aggregated load results from initialization. -- Raises: None. -- State: Read-only. -- Thread: Safe. - ---- - -## `FluentLocalization.require_clean` - -Method that enforces a clean initialization `LoadSummary`. - -### Signature -```python -def require_clean(self) -> LoadSummary: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: Initialization `LoadSummary` when `all_clean` is true. -- Raises: `IntegrityCheckFailedError` when initialization had missing resources, load errors, or junk entries. -- State: Read-only. -- Thread: Safe. -- Scope: Checks only loader-driven initialization results. Dynamic `add_resource()` calls are excluded, matching `get_load_summary()`. - ---- - -## `FluentLocalization.validate_message_schemas` - -Method that enforces exact message-variable schemas across the fallback chain. - -### Signature -```python -def validate_message_schemas( - self, - expected_schemas: Mapping[MessageId, frozenset[str] | set[str]], -) -> tuple[MessageVariableValidationResult, ...]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `expected_schemas` | `Mapping[MessageId, frozenset[str] \| set[str]]` | Y | Expected variables per message ID. | - -### Constraints -- Return: Immutable tuple of `MessageVariableValidationResult` values in input mapping order when every schema matches exactly. -- Raises: `IntegrityCheckFailedError` when a message is missing or its declared variables differ from the expected set. -- State: Read-only. -- Thread: Safe. -- Exactness: Declared variables must equal the expected set; both missing and extra variables fail validation. -- Scope: Resolves each message through the fallback chain via `get_message()`. Terms remain available through `get_term()` plus `validate_message_variables()`. - ---- - -## `FluentLocalization.validate_message_variables` - -Method that enforces an exact variable schema for a single fallback-resolved message. - -### Signature -```python -def validate_message_variables( - self, - message_id: str, - expected_variables: frozenset[str] | set[str], -) -> MessageVariableValidationResult: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message_id` | `str` | Y | Message ID to resolve through the fallback chain. | -| `expected_variables` | `frozenset[str] \| set[str]` | Y | Exact variable set expected for that message. | - -### Constraints -- Return: Immutable `MessageVariableValidationResult` when the message exists and declares exactly the expected variables. -- Raises: `IntegrityCheckFailedError` when the message is missing or its declared variables differ from `expected_variables`. -- State: Read-only. -- Thread: Safe. -- Scope: Uses the same fallback lookup semantics as `get_message()`. - ---- - -## `FallbackInfo` - -Dataclass providing fallback event metadata when FluentLocalization resolves a message from a non-primary locale. - -### Signature -```python -@dataclass(frozen=True, slots=True) -class FallbackInfo: - requested_locale: LocaleCode - resolved_locale: LocaleCode - message_id: MessageId -``` - -### Parameters -| Field | Type | Description | -|:------|:-----|:------------| -| `requested_locale` | `LocaleCode` | Primary locale that was requested. | -| `resolved_locale` | `LocaleCode` | Locale that actually contained the message. | -| `message_id` | `MessageId` | Message identifier that triggered fallback. | - -### Constraints -- Return: Frozen dataclass instance. -- State: Immutable. -- Thread: Safe. -- Usage: Passed to `on_fallback` callback in FluentLocalization. -- Import: `from ftllexengine import FallbackInfo` or `from ftllexengine.localization import FallbackInfo`. - ---- - -## `LoadStatus` - -### Signature -```python -class LoadStatus(StrEnum): - SUCCESS = "success" - NOT_FOUND = "not_found" - ERROR = "error" -``` - -### Parameters -| Value | Description | -|:------|:------------| -| `SUCCESS` | Resource loaded successfully. | -| `NOT_FOUND` | Resource file not found (expected for optional locales). | -| `ERROR` | Resource load failed with error. | - -### Constraints -- StrEnum: Members ARE strings. -- Babel: NOT required. Defined in `ftllexengine.enums`; no Babel import chain. -- Import: `from ftllexengine import LoadStatus` or `from ftllexengine.localization import LoadStatus`. - ---- - -## `ResourceLoadResult` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class ResourceLoadResult: - locale: LocaleCode - resource_id: ResourceId - status: LoadStatus - error: Exception | None = None - source_path: str | None = None - junk_entries: tuple[Junk, ...] = () - - @property - def is_success(self) -> bool: ... - @property - def is_not_found(self) -> bool: ... - @property - def is_error(self) -> bool: ... - @property - def has_junk(self) -> bool: ... -``` - -### Parameters -| Field | Type | Description | -|:------|:-----|:------------| -| `locale` | `LocaleCode` | Locale code for this resource. | -| `resource_id` | `ResourceId` | Resource identifier (e.g., 'main.ftl'). | -| `status` | `LoadStatus` | Load status (success, not_found, error). | -| `error` | `Exception \| None` | Exception if status is ERROR. | -| `source_path` | `str \| None` | Full path to resource (if available). | -| `junk_entries` | `tuple[Junk, ...]` | Unparseable content found during parsing. | - -### Constraints -- Return: Immutable load result record. -- State: Frozen dataclass. -- Junk: `has_junk` property returns True if any Junk entries present. -- Import: `from ftllexengine import ResourceLoadResult` or `from ftllexengine.localization import ResourceLoadResult`. - ---- - -## `LoadSummary` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class LoadSummary: - results: tuple[ResourceLoadResult, ...] # sole dataclass field - - @property - def total_attempted(self) -> int: ... - @property - def successful(self) -> int: ... - @property - def not_found(self) -> int: ... - @property - def errors(self) -> int: ... - @property - def junk_count(self) -> int: ... - @property - def has_errors(self) -> bool: ... - @property - def all_successful(self) -> bool: ... - @property - def all_clean(self) -> bool: ... - @property - def has_junk(self) -> bool: ... - - def get_errors(self) -> tuple[ResourceLoadResult, ...]: ... - def get_not_found(self) -> tuple[ResourceLoadResult, ...]: ... - def get_successful(self) -> tuple[ResourceLoadResult, ...]: ... - def get_by_locale(self, locale: LocaleCode) -> tuple[ResourceLoadResult, ...]: ... - def get_with_junk(self) -> tuple[ResourceLoadResult, ...]: ... - def get_all_junk(self) -> tuple[Junk, ...]: ... -``` - -### Parameters -| Field | Type | Description | -|:------|:-----|:------------| -| `results` | `tuple[ResourceLoadResult, ...]` | All individual load results (sole constructor field). | - -### Constraints -- Return: Immutable summary record. -- State: Frozen dataclass. Statistics (`total_attempted`, `successful`, `not_found`, `errors`, `junk_count`) are `@property` methods computed from `results`; not constructor parameters. -- Junk: `get_with_junk()` returns results with Junk; `get_all_junk()` aggregates all Junk. -- Import: `from ftllexengine import LoadSummary` or `from ftllexengine.localization import LoadSummary`. - ---- - -## `LoadSummary.all_clean` - -Property checking if all resources loaded successfully without Junk entries. - -### Signature -```python -@property -def all_clean(self) -> bool: -``` - -### Constraints -- Return: True if errors == 0 and not_found == 0 and junk_count == 0. -- State: Read-only property. -- Purpose: Stricter validation than all_successful. Use for validation workflows requiring zero unparseable content. -- Contrast: `all_successful` ignores Junk (only checks I/O success), `all_clean` requires perfect parse. - ---- - -## `FluentLocalization.cache_enabled` - -### Signature -```python -@property -def cache_enabled(self) -> bool: -``` - -### Constraints -- Return: True if format caching enabled for all bundles. -- Raises: None. -- State: Read-only property. -- Thread: Safe. - ---- - -## `FluentLocalization.cache_config` - -### Signature -```python -@property -def cache_config(self) -> CacheConfig | None: -``` - -### Constraints -- Return: The `CacheConfig` instance passed to this localization, or `None` if caching disabled. -- Raises: None. -- State: Read-only property. -- Thread: Safe. - ---- - -## `FluentLocalization.get_babel_locale` - -### Signature -```python -def get_babel_locale(self) -> str: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: Babel locale identifier from the primary bundle's canonical locale. -- Raises: None. -- State: Read-only. -- Thread: Safe. - ---- - -## `FluentLocalization.validate_resource` - -### Signature -```python -def validate_resource(self, ftl_source: FTLSource) -> ValidationResult: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `ftl_source` | `FTLSource` | Y | FTL source code to validate. | - -### Constraints -- Return: ValidationResult with errors and warnings. -- Raises: `TypeError` if ftl_source is not a str (propagated from primary bundle). -- State: None. Does not modify bundles. -- Thread: Safe. - ---- - -## `FluentLocalization.clear_cache` - -### Signature -```python -def clear_cache(self) -> None: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: None. -- Raises: None. -- State: Clears format cache on all bundles. -- Thread: Safe. - ---- - -## `FluentLocalization.get_cache_stats` - -### Signature -```python -def get_cache_stats(self) -> LocalizationCacheStats | None: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: `LocalizationCacheStats` TypedDict with 20 aggregated fields, or `None` if caching disabled. Extends `CacheStats` with `bundle_count`. See `LocalizationCacheStats` for all fields. -- Note: Numeric fields summed across all bundles; boolean fields reflect first bundle's `CacheConfig`. -- Note: `bundle_count` reflects only initialized bundles, not total locales. -- Import: `from ftllexengine import LocalizationCacheStats` or `from ftllexengine.localization import LocalizationCacheStats`. -- Raises: None. -- State: Reads cache statistics from all initialized bundles. -- Thread: Safe. - ---- - -## `FluentLocalization.get_cache_audit_log` - -### Signature -```python -def get_cache_audit_log(self) -> dict[LocaleCode, tuple[CacheAuditLogEntry, ...]] | None: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: Per-locale mapping of immutable `CacheAuditLogEntry` tuples, or `None` if caching disabled. -- Note: Only initialized bundles are included; this method does not create lazy bundles. -- Note: Audit-disabled bundles return `()`. -- Import: `from ftllexengine.runtime import CacheAuditLogEntry` -- Raises: Never. -- State: Reads audit logs from initialized bundles. -- Thread: Safe. - ---- - -## `LocalizationCacheStats` - -TypedDict representing aggregate cache statistics across all bundles in a `FluentLocalization`. - -### Signature -```python -class LocalizationCacheStats(CacheStats, total=True): - bundle_count: int -``` - -### Constraints -- Purpose: Extends `CacheStats` with `bundle_count` for multi-bundle monitoring. All 19 `CacheStats` fields are inherited with the same semantics; numeric fields are summed across all bundles. -- `bundle_count`: number of initialized bundles contributing to the aggregated statistics. -- Import: `from ftllexengine import LocalizationCacheStats` or `from ftllexengine.localization import LocalizationCacheStats`. -- Boolean fields: `write_once`, `strict`, `audit_enabled` reflect the first bundle's `CacheConfig` (all bundles share one config). - ---- - -## `FluentLocalization.introspect_message` - -### Signature -```python -def introspect_message(self, message_id: MessageId) -> MessageIntrospection | None: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message_id` | `MessageId` | Y | Message identifier. | - -### Constraints -- Return: MessageIntrospection from first bundle with message, or None. -- Raises: None. -- State: Read-only. -- Thread: Safe. - ---- - -## `PathResourceLoader` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class PathResourceLoader: - base_path: str - root_dir: str | None = None - - def load(self, locale: LocaleCode, resource_id: ResourceId) -> FTLSource: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `base_path` | `str` | Y | Path template with {locale} placeholder. | -| `root_dir` | `str \| None` | N | Fixed root directory for path traversal validation. | - -### Constraints -- Return: FTL source string from file. -- Raises: `FileNotFoundError` if file missing, `OSError` on read error, `ValueError` on path traversal attempt. -- State: None. Immutable dataclass. -- Thread: Safe. -- Security: - - Validates `locale` parameter against directory traversal attacks (rejects "..", "/", "\\"). - - Validates `resource_id` against directory traversal attacks (rejects "..", absolute paths). - - Empty locale codes are rejected. - - `root_dir` provides fixed anchor unaffected by locale parameter. -- Import: `from ftllexengine import PathResourceLoader` or `from ftllexengine.localization import PathResourceLoader`. - ---- - -## `ResourceLoader` - -### Signature -```python -class ResourceLoader(Protocol): - def load(self, locale: LocaleCode, resource_id: ResourceId) -> FTLSource: ... - def describe_path(self, locale: LocaleCode, resource_id: ResourceId) -> str: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `locale` | `LocaleCode` | Y | Locale code. | -| `resource_id` | `ResourceId` | Y | Resource identifier. | - -### Constraints -- Return (`load`): FTL source string. -- Return (`describe_path`): Human-readable path string for diagnostics; default `"{locale}/{resource_id}"`. -- Raises: Implementation-dependent. -- State: Protocol. No implementation. -- Thread: Implementation-dependent. -- Import: `from ftllexengine import ResourceLoader` or `from ftllexengine.localization import ResourceLoader`. - ---- - -## `normalize_locale` - -### Signature -```python -def normalize_locale(locale_code: str) -> str: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `locale_code` | `str` | Y | BCP-47 or POSIX locale code. | - -### Constraints -- Return: Lowercase POSIX-formatted locale code (hyphens to underscores, lowercased). -- State: None. Pure function. -- Thread: Safe. -- Babel: NOT required. Pure string manipulation. -- Import: `from ftllexengine import normalize_locale` or `from ftllexengine.core.locale_utils import normalize_locale`. - ---- - -## `LocaleCode` - -Type alias for BCP-47 / POSIX locale codes. - -### Signature -```python -type LocaleCode = str -``` - -### Constraints -- Value: Any `str`; narrowed by context to a BCP-47 or POSIX locale code (e.g., `"en_US"`, `"de"`). -- Babel: NOT required. Defined in `ftllexengine.localization.types`; no Babel import chain. -- Import: `from ftllexengine import LocaleCode` or `from ftllexengine.localization.types import LocaleCode`. - ---- - -## `require_positive_int` - -Validate that a boundary value is a positive integer, rejecting bool, non-int types, zero, and negative values. - -### Signature -```python -def require_positive_int(value: object, field_name: str) -> int: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `object` | Y | Raw boundary value to validate. Any Python object; non-int and bool always raise TypeError. | -| `field_name` | `str` | Y | Human-readable field label used in error messages. | - -### Constraints -- Return: The validated integer, identical to the input value. -- Raises: `TypeError` if `value` is not an `int` instance, or if it is `bool` (bool is an int subtype but is rejected as semantically wrong for numeric-quantity fields). -- Raises: `ValueError` if `value` is zero or negative. -- State: Pure function; no side effects, no external dependencies. -- Thread: Safe. -- Babel: NOT required. -- Import: `from ftllexengine.core.validators import require_positive_int`. - ---- - -## `require_locale_code` - -Validate and canonicalize a locale code at a system boundary. - -### Signature -```python -def require_locale_code(value: object, field_name: str) -> LocaleCode: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `object` | Y | Raw boundary value to validate. | -| `field_name` | `str` | Y | Field label used in error messages. | - -### Constraints -- Return: Lowercase POSIX-formatted locale code (hyphens to underscores, lowercased). -- Raises: `TypeError` if `value` is not a string. -- Raises: `ValueError` if `value` is blank, too long, or structurally invalid. -- State: Trims surrounding whitespace and normalizes the accepted locale. -- Thread: Safe. -- Babel: NOT required. -- Import: `from ftllexengine import require_locale_code` or `from ftllexengine.core.locale_utils import require_locale_code`. - ---- - -## `get_babel_locale` - -### Signature -```python -def get_babel_locale(locale_code: str) -> Locale: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `locale_code` | `str` | Y | BCP-47 or POSIX locale code. Normalized before cache lookup. | - -### Constraints -- Return: Babel Locale object. Results are cached; semantically equivalent locale codes (e.g., `"en-US"` and `"en_US"`) share a single cache entry. -- Raises: `BabelImportError` if Babel not installed. -- Raises: `TypeError` / `ValueError` if the locale boundary value is not a valid locale string. -- Raises: `babel.core.UnknownLocaleError` when the canonical locale is structurally valid but unknown to Babel. -- State: Normalizes `locale_code` before delegating to the internal cache. -- Thread: Safe (internal LRU cache uses its own locking). -- Babel: REQUIRED. Install with `pip install ftllexengine[babel]`. -- Import: `from ftllexengine.core.locale_utils import get_babel_locale` - ---- - -## `get_system_locale` - -### Signature -```python -def get_system_locale(*, raise_on_failure: bool = False) -> str: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `raise_on_failure` | `bool` | N | Raise RuntimeError if locale cannot be determined. | - -### Constraints -- Return: Detected locale code in normalized POSIX format (lowercase), or "en_us" if not determinable. -- Raises: `RuntimeError` if raise_on_failure=True and locale cannot be determined. -- State: Reads OS locale via locale.getlocale() and env vars LC_ALL, LC_MESSAGES, LANG. -- Thread: Safe. -- Babel: NOT required. Uses only stdlib. -- Import: `from ftllexengine import get_system_locale` or `from ftllexengine.core.locale_utils import get_system_locale`. - ---- - -## `clear_module_caches` - -Function that clears all module-level caches in the library. - -### Signature -```python -def clear_module_caches() -> None: -``` - -### Constraints -- Return: None. -- Raises: Never. -- State: Clears currency caches, date caches, locale cache, LocaleContext cache, message introspection cache, and ISO introspection cache. -- Thread: Safe (each cache has internal thread safety). -- Babel: Clears Babel-related caches only if Babel was used. -- Import: `from ftllexengine import clear_module_caches` - ---- - -## `clear_locale_cache` - -Function that clears the Babel locale object cache. - -### Signature -```python -def clear_locale_cache() -> None: -``` - -### Constraints -- Return: None. -- Raises: Never. -- State: Clears the internal locale object cache shared by `get_babel_locale`. -- Thread: Safe (functools.cache internal locking). -- Babel: REQUIRED. Install with `pip install ftllexengine[babel]`. -- Import: `from ftllexengine.core.locale_utils import clear_locale_cache` - ---- - -## `require_date` - -Validate that a boundary value is a `date` (not `datetime`). Rejects `datetime` explicitly because `datetime` is a subclass of `date` and would otherwise pass an `isinstance(value, date)` check. - -### Signature -```python -def require_date(value: object, field_name: str) -> date: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `object` | Y | Raw boundary value to validate. | -| `field_name` | `str` | Y | Human-readable field label used in error messages. | - -### Constraints -- Return: The validated `date` object, unchanged. -- Raises: `TypeError` if `value` is a `datetime` (subtype check runs first), with message "must be date, got datetime". -- Raises: `TypeError` if `value` is not a `date` at all, with message including `field_name` and `type(value).__name__`. -- State: Pure function; no side effects. -- Thread: Safe. -- Babel: NOT required. -- Import: `from ftllexengine import require_date` or `from ftllexengine.core.validators import require_date`. - ---- - -## `require_datetime` - -Validate that a boundary value is a `datetime`. Rejects plain `date` objects (which are NOT subtypes of `datetime`). - -### Signature -```python -def require_datetime(value: object, field_name: str) -> datetime: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `object` | Y | Raw boundary value to validate. | -| `field_name` | `str` | Y | Human-readable field label used in error messages. | - -### Constraints -- Return: The validated `datetime` object, unchanged. -- Raises: `TypeError` if `value` is not a `datetime` instance (includes plain `date`). -- State: Pure function; no side effects. -- Thread: Safe. -- Babel: NOT required. -- Import: `from ftllexengine import require_datetime` or `from ftllexengine.core.validators import require_datetime`. - ---- - -## `require_fluent_number` - -Validate that a boundary value is a `FluentNumber`. - -### Signature -```python -def require_fluent_number(value: object, field_name: str) -> FluentNumber: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `object` | Y | Raw boundary value to validate. | -| `field_name` | `str` | Y | Human-readable field label used in error messages. | - -### Constraints -- Return: The validated `FluentNumber` object, unchanged. -- Raises: `TypeError` if `value` is not a `FluentNumber` instance. Plain `int`, `float`, and `Decimal` are rejected. -- State: Pure function; no side effects. -- Thread: Safe. -- Babel: NOT required. -- Import: `from ftllexengine import require_fluent_number` or `from ftllexengine.core.validators import require_fluent_number`. - ---- +- Purpose: Summarize per-locale cache state from `FluentLocalization.get_cache_stats()` +- Fields: Includes all `CacheStats` fields aggregated across initialized bundles, plus `bundle_count` +- State: Read-only result object diff --git a/docs/DOC_02_SyntaxExpressions.md b/docs/DOC_02_SyntaxExpressions.md new file mode 100644 index 00000000..694bbdb1 --- /dev/null +++ b/docs/DOC_02_SyntaxExpressions.md @@ -0,0 +1,341 @@ +--- +afad: "3.5" +version: "0.163.0" +domain: SYNTAX_EXPRESSIONS +updated: "2026-04-22" +route: + keywords: [TextElement, Placeable, SelectExpression, VariableReference, FunctionReference, Entry, Expression] + questions: ["which AST node types model Fluent expressions and references?", "what public syntax union aliases exist?", "where are placeables and selectors documented?"] +--- + +# Syntax Expression Types Reference + +This reference covers pattern elements, expression/reference nodes, call argument nodes, and public syntax union aliases. +Core resource and declaration nodes live in [DOC_02_SyntaxTypes.md](DOC_02_SyntaxTypes.md). + +## `TextElement` + +AST node for literal text inside a pattern. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class TextElement: + value: str + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import TextElement` +- Helper: `TextElement.guard()` performs runtime narrowing + +--- + +## `Placeable` + +AST node for `{ ... }` dynamic content inside a pattern. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class Placeable: + expression: Expression + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import Placeable` +- Helper: `Placeable.guard()` performs runtime narrowing + +--- + +## `SelectExpression` + +AST node for Fluent select expressions. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class SelectExpression: + selector: SelectorExpression + variants: tuple[Variant, ...] + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import SelectExpression` +- Invariants: at least one variant and exactly one default variant +- Selector type: restricted to the `SelectorExpression` union, not arbitrary `Expression` +- Helper: `SelectExpression.guard()` performs runtime narrowing + +--- + +## `Variant` + +AST node for one branch of a select expression. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class Variant: + key: Identifier | NumberLiteral + value: Pattern + default: bool = False + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import Variant` +- `default=True` marks the `*` branch + +--- + +## `StringLiteral` + +AST node for quoted string literals. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class StringLiteral: + value: str + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import StringLiteral` +- Helper: `StringLiteral.guard()` performs runtime narrowing + +--- + +## `NumberLiteral` + +AST node for integer or decimal literals. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class NumberLiteral: + value: int | Decimal + raw: str + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import NumberLiteral` +- Invariants: `value` cannot be `bool`; `raw` must parse back to the same finite numeric value +- Helper: `NumberLiteral.guard()` performs runtime narrowing + +--- + +## `VariableReference` + +AST node for `$name` variable references. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class VariableReference: + id: Identifier + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import VariableReference` +- Helper: `VariableReference.guard()` performs runtime narrowing + +--- + +## `MessageReference` + +AST node for message references like `hello` or `hello.attr`. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class MessageReference: + id: Identifier + attribute: Identifier | None = None + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import MessageReference` +- Helper: `MessageReference.guard()` performs runtime narrowing + +--- + +## `TermReference` + +AST node for term references like `-brand` or `-brand.attr`. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class TermReference: + id: Identifier + attribute: Identifier | None = None + arguments: CallArguments | None = None + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import TermReference` +- Helper: `TermReference.guard()` performs runtime narrowing + +--- + +## `FunctionReference` + +AST node for function calls such as `NUMBER($count)`. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class FunctionReference: + id: Identifier + arguments: CallArguments + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import FunctionReference` +- Helper: `FunctionReference.guard()` performs runtime narrowing + +--- + +## `CallArguments` + +AST node for positional and named function arguments. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class CallArguments: + positional: tuple[ + StringLiteral + | NumberLiteral + | VariableReference + | MessageReference + | TermReference + | FunctionReference + | Placeable, + ..., + ] + named: tuple[NamedArgument, ...] + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import CallArguments` +- Used by: `FunctionReference.arguments` and optional `TermReference.arguments` + +--- + +## `NamedArgument` + +AST node for `name: literal` function arguments. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class NamedArgument: + name: Identifier + value: FTLLiteral + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import NamedArgument` +- Invariant: `value` is restricted to `FTLLiteral`, not any inline expression + +--- + +## `Entry` + +Type alias for every top-level AST entry that can appear in a `Resource`. + +### Signature +```python +type Entry = Message | Term | Comment | Junk +``` + +### Constraints +- Import: `from ftllexengine.syntax import Entry` +- Used by: `Resource.entries` and `parse_stream()` +- Purpose: closed top-level union for syntax tooling + +--- + +## `PatternElement` + +Type alias for the elements that make up a `Pattern`. + +### Signature +```python +type PatternElement = TextElement | Placeable +``` + +### Constraints +- Import: `from ftllexengine.syntax import PatternElement` +- Used by: `Pattern.elements` +- Purpose: restrict pattern contents to literal text or embedded expressions + +--- + +## `Expression` + +Type alias for all expression forms valid inside a `Placeable`. + +### Signature +```python +type Expression = ( + SelectExpression + | StringLiteral + | NumberLiteral + | VariableReference + | MessageReference + | TermReference + | FunctionReference + | Placeable +) +``` + +### Constraints +- Import: `from ftllexengine.syntax import Expression` +- Used by: `Placeable.expression` +- Purpose: closed union for parser, serializer, and visitor dispatch + +--- + +## `SelectorExpression` + +Type alias for expression forms valid as a `SelectExpression.selector`. + +### Signature +```python +type SelectorExpression = ( + VariableReference | MessageReference | TermReference | FunctionReference | NumberLiteral +) +``` + +### Constraints +- Import: `from ftllexengine.syntax import SelectorExpression` +- Narrower than: `Expression` +- Purpose: encode the Fluent selector restriction at the type level + +--- + +## `FTLLiteral` + +Type alias for literal values allowed in named function and term arguments. + +### Signature +```python +type FTLLiteral = StringLiteral | NumberLiteral +``` + +### Constraints +- Import: `from ftllexengine.syntax import FTLLiteral` +- Used by: `NamedArgument.value` +- Purpose: enforce the Fluent rule that named arguments are literal-only diff --git a/docs/DOC_02_SyntaxTypes.md b/docs/DOC_02_SyntaxTypes.md new file mode 100644 index 00000000..b56257cc --- /dev/null +++ b/docs/DOC_02_SyntaxTypes.md @@ -0,0 +1,211 @@ +--- +afad: "3.5" +version: "0.163.0" +domain: SYNTAX_TYPES +updated: "2026-04-22" +route: + keywords: [AST, Resource, Message, Term, Pattern, Span, Annotation, syntax nodes] + questions: ["how is FTL represented in the AST?", "which public AST container and declaration node types exist?", "where are spans and parser annotations documented?"] +--- + +# Syntax Types Reference + +This reference covers the core AST containers and declaration nodes. +Pattern/expression/reference nodes and union aliases are documented in [DOC_02_SyntaxExpressions.md](DOC_02_SyntaxExpressions.md). + +## `Span` + +Immutable source span in normalized character offsets. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class Span: + start: int + end: int +``` + +### Constraints +- Import: `from ftllexengine.syntax import Span` +- Semantics: `start` is inclusive and `end` is exclusive, both in normalized character offsets +- Invariants: `start >= 0` and `end >= start` + +--- + +## `Annotation` + +Parser annotation attached to `Junk` nodes. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class Annotation: + code: str + message: str + arguments: tuple[tuple[str, str], ...] | None = None + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import Annotation` +- Purpose: preserves parser error metadata during recovery +- Used by: `Junk.annotations` and `ValidationResult.annotations` + +--- + +## `Identifier` + +Immutable AST node for Fluent identifiers. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class Identifier: + name: str + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import Identifier` +- Purpose: wraps identifier text so AST nodes can retain spans +- Helper: `Identifier.guard()` performs runtime narrowing + +--- + +## `Resource` + +Root AST node returned by `parse_ftl()`. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class Resource: + entries: tuple[Entry, ...] +``` + +### Constraints +- Import: `from ftllexengine.syntax import Resource` +- Purpose: immutable container for top-level Fluent entries in source order + +--- + +## `Message` + +AST node for a public Fluent message. + +### Signature +```python +@dataclass(frozen=True, slots=True, weakref_slot=True) +class Message: + id: Identifier + value: Pattern | None + attributes: tuple[Attribute, ...] + comment: Comment | None = None + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import Message` +- Invariant: a message must have `value`, `attributes`, or both +- Helper: `Message.guard()` performs runtime narrowing + +--- + +## `Term` + +AST node for a private Fluent term. + +### Signature +```python +@dataclass(frozen=True, slots=True, weakref_slot=True) +class Term: + id: Identifier + value: Pattern + attributes: tuple[Attribute, ...] + comment: Comment | None = None + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import Term` +- Invariant: a term always has a `value` +- Helper: `Term.guard()` performs runtime narrowing + +--- + +## `Attribute` + +AST node for a message or term attribute. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class Attribute: + id: Identifier + value: Pattern + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import Attribute` +- Used on: `Message.attributes` and `Term.attributes` + +--- + +## `Comment` + +AST node for Fluent comments. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class Comment: + content: str + type: CommentType + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import Comment` +- `type` uses `CommentType` +- Helper: `Comment.guard()` performs runtime narrowing + +--- + +## `Junk` + +AST node for unparseable content preserved during recovery. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class Junk: + content: str + annotations: tuple[Annotation, ...] = () + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import Junk` +- Purpose: keeps invalid source and its parser annotations available to tooling +- Helper: `Junk.guard()` performs runtime narrowing + +--- + +## `Pattern` + +AST node for a sequence of literal text and placeables. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class Pattern: + elements: tuple[PatternElement, ...] + span: Span | None = None +``` + +### Constraints +- Import: `from ftllexengine.syntax import Pattern` +- Used by: `Message.value`, `Term.value`, `Attribute.value`, `Variant.value` + +--- diff --git a/docs/DOC_02_Types.md b/docs/DOC_02_Types.md index 22c90f42..a5074d0b 100644 --- a/docs/DOC_02_Types.md +++ b/docs/DOC_02_Types.md @@ -1,680 +1,340 @@ --- -afad: "3.3" -version: "0.162.0" +afad: "3.5" +version: "0.163.0" domain: TYPES -updated: "2026-03-23" +updated: "2026-04-22" route: - keywords: [Resource, Message, Term, Pattern, Attribute, Placeable, AST, dataclass, FluentValue, FTLLiteral, TerritoryInfo, CurrencyInfo, ISO 3166, ISO 4217, require_currency_code, require_territory_code] - questions: ["what AST nodes exist?", "how is FTL represented?", "what is the Resource structure?", "what types can FluentValue hold?", "how to get territory info?", "how to get currency info?", "how do I validate an ISO currency code?", "how do I validate an ISO territory code?"] + keywords: [FluentNumber, FluentValue, ParseResult, LocaleCode, CurrencyCode, TerritoryInfo, MessageIntrospection] + questions: ["what public types does FTLLexEngine expose?", "what value types can formatting accept?", "which semantic aliases and lookup-result types exist?", "what introspection result types exist?"] --- -# AST Types Reference +# Types Reference --- -## `Resource` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class Resource: - entries: tuple[Entry, ...] -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `entries` | `tuple[Entry, ...]` | Y | All top-level entries. | - -### Constraints -- Return: Immutable root AST node. -- State: Frozen dataclass. - ---- +## `FluentNumber` -## `Message` +Immutable wrapper that keeps a numeric value, its rendered string, and visible precision together. ### Signature ```python @dataclass(frozen=True, slots=True) -class Message: - id: Identifier - value: Pattern | None - attributes: tuple[Attribute, ...] - comment: Comment | None = None - span: Span | None = None - - @staticmethod - def guard(entry: object) -> TypeIs[Message]: ... +class FluentNumber: + value: int | Decimal + formatted: str + precision: int | None = None ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `id` | `Identifier` | Y | Message identifier. | -| `value` | `Pattern \| None` | Y | Message value pattern. | -| `attributes` | `tuple[Attribute, ...]` | Y | Message attributes. | -| `comment` | `Comment \| None` | N | Associated comment. | -| `span` | `Span \| None` | N | Source position. | - ### Constraints -- Return: Immutable message node. -- State: Frozen dataclass. -- Validation: `__post_init__` validates that value or attributes is non-empty. Raises `ValueError` if both value is None and attributes is empty. +- Import: `from ftllexengine import FluentNumber` +- Purpose: lets formatted numbers still participate in plural resolution and exact numeric comparisons +- Invariants: `value` must be `int | Decimal`, never `bool`; `precision` is `None` or `>= 0` +- Helpers: `decimal_value` returns exact `Decimal`; `str(value)` returns `formatted` +- Thread: safe --- -## `Term` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class Term: - id: Identifier - value: Pattern - attributes: tuple[Attribute, ...] - comment: Comment | None = None - span: Span | None = None - - @staticmethod - def guard(entry: object) -> TypeIs[Term]: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `id` | `Identifier` | Y | Term identifier (without - prefix). | -| `value` | `Pattern` | Y | Term value pattern (required). | -| `attributes` | `tuple[Attribute, ...]` | Y | Term attributes. | -| `comment` | `Comment \| None` | N | Associated comment. | -| `span` | `Span \| None` | N | Source position. | - -### Constraints -- Return: Immutable term node. -- State: Frozen dataclass. -- Validation: `__post_init__` validates that value is not None. Raises `ValueError` if value is None. - ---- +## `FluentValue` -## `Attribute` +Recursive type alias for values accepted by runtime formatting and custom functions. ### Signature ```python -@dataclass(frozen=True, slots=True) -class Attribute: - id: Identifier - value: Pattern - span: Span | None = None +type FluentValue = ( + str + | int + | Decimal + | datetime + | date + | FluentNumber + | None + | Sequence["FluentValue"] + | Mapping[str, "FluentValue"] +) ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `id` | `Identifier` | Y | Attribute name. | -| `value` | `Pattern` | Y | Attribute value pattern. | -| `span` | `Span \| None` | N | Source position. | - ### Constraints -- Return: Immutable attribute node. -- State: Frozen dataclass. +- Import: `from ftllexengine import FluentValue` +- Includes: scalar values, `FluentNumber`, nested sequences, and string-keyed mappings +- Excludes: `float` by design; `bool` is not intended even though it is an `int` subtype +- Purpose: canonical runtime boundary type for formatting and custom functions --- -## `Comment` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class Comment: - content: str - type: CommentType - span: Span | None = None - - @staticmethod - def guard(entry: object) -> TypeIs[Comment]: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `content` | `str` | Y | Comment text. | -| `type` | `CommentType` | Y | COMMENT, GROUP, or RESOURCE. | -| `span` | `Span \| None` | N | Source position. | - -### Constraints -- Return: Immutable comment node. -- State: Frozen dataclass. - ---- +## `ParseResult` -## `Junk` +Generic return type for locale-aware parse helpers. ### Signature ```python -@dataclass(frozen=True, slots=True) -class Junk: - content: str - annotations: tuple[Annotation, ...] = () - span: Span | None = None - - @staticmethod - def guard(entry: object) -> TypeIs[Junk]: ... +type ParseResult[T] = tuple[T | None, tuple[FrozenFluentError, ...]] ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `content` | `str` | Y | Unparseable source text. | -| `annotations` | `tuple[Annotation, ...]` | N | Parse error annotations. | -| `span` | `Span \| None` | N | Source position. | - ### Constraints -- Return: Immutable junk node. -- State: Frozen dataclass. +- Import: `from ftllexengine import ParseResult` +- Success contract: parsed value in slot 0 and empty error tuple in slot 1 +- Failure contract: `None` in slot 0 and one or more `FrozenFluentError` instances in slot 1 +- Used by: `parse_decimal()`, `parse_date()`, `parse_datetime()`, `parse_currency()`, `parse_fluent_number()` --- -## `Pattern` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class Pattern: - elements: tuple[PatternElement, ...] - span: Span | None = None -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `elements` | `tuple[PatternElement, ...]` | Y | Text and placeable elements. | -| `span` | `Span \| None` | N | Source location span. | - -### Constraints -- Return: Immutable pattern node. -- State: Frozen dataclass. - ---- +## `LocaleCode` -## `TextElement` +Semantic alias for locale identifiers in localization APIs. ### Signature ```python -@dataclass(frozen=True, slots=True) -class TextElement: - value: str - span: Span | None = None - - @staticmethod - def guard(elem: object) -> TypeIs[TextElement]: ... +type LocaleCode = str ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `str` | Y | Plain text content. | -| `span` | `Span \| None` | N | Source location span. | - ### Constraints -- Return: Immutable text element. -- State: Frozen dataclass. +- Import: `from ftllexengine import LocaleCode` +- Semantics: BCP-47 or POSIX-style locale code such as `"en"`, `"lv"`, `"de-DE"`, or `"en_US"` --- -## `Placeable` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class Placeable: - expression: Expression - span: Span | None = None - - @staticmethod - def guard(elem: object) -> TypeIs[Placeable]: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `expression` | `Expression` | Y | Contained expression. | -| `span` | `Span \| None` | N | Source location span. | - -### Constraints -- Return: Immutable placeable node. -- State: Frozen dataclass. - ---- +## `MessageId` -## `SelectExpression` +Semantic alias for Fluent message identifiers. ### Signature ```python -@dataclass(frozen=True, slots=True) -class SelectExpression: - selector: SelectorExpression - variants: tuple[Variant, ...] - span: Span | None = None - - @staticmethod - def guard(expr: object) -> TypeIs[SelectExpression]: ... +type MessageId = str ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `selector` | `SelectorExpression` | Y | Value to select on (InlineExpression minus Placeable). | -| `variants` | `tuple[Variant, ...]` | Y | Selection variants. | -| `span` | `Span \| None` | N | Source position (start/end). | - ### Constraints -- Return: Immutable select expression. -- State: Frozen dataclass. -- Validation: `__post_init__` validates that variants is non-empty and exactly one default variant exists. Raises `ValueError` on constraint violation. -- Selector: `selector` is typed `SelectorExpression`, a restricted subset of `InlineExpression` that excludes `Placeable`. The parser enforces this; programmatic construction via `object.__setattr__` bypass is rejected at validation time. +- Import: `from ftllexengine import MessageId` +- Semantics: message key like `"welcome"` or `"error-network"` --- -## `Variant` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class Variant: - key: VariantKey - value: Pattern - default: bool = False - span: Span | None = None -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `key` | `VariantKey` | Y | Variant key (Identifier or NumberLiteral). | -| `value` | `Pattern` | Y | Variant pattern. | -| `default` | `bool` | N | True for default variant (*). | -| `span` | `Span \| None` | N | Source position. | - -### Constraints -- Return: Immutable variant node. -- State: Frozen dataclass. - ---- +## `ResourceId` -## `StringLiteral` +Semantic alias for resource identifiers used by localization loaders. ### Signature ```python -@dataclass(frozen=True, slots=True) -class StringLiteral: - value: str - span: Span | None = None - - @staticmethod - def guard(key: object) -> TypeIs[StringLiteral]: ... +type ResourceId = str ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `str` | Y | String content (without quotes). | -| `span` | `Span \| None` | N | Source position. | - ### Constraints -- Return: Immutable string literal. -- State: Frozen dataclass. -- Guard: `StringLiteral.guard(obj)` returns `TypeIs[StringLiteral]` for type narrowing. +- Import: `from ftllexengine import ResourceId` +- Semantics: logical resource name such as `"main.ftl"` or `"errors.ftl"` --- -## `NumberLiteral` +## `FTLSource` + +Semantic alias for raw Fluent source text. ### Signature ```python -from decimal import Decimal - -@dataclass(frozen=True, slots=True) -class NumberLiteral: - value: int | Decimal - raw: str - span: Span | None = None - - @staticmethod - def guard(key: object) -> TypeIs[NumberLiteral]: ... +type FTLSource = str ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `int \| Decimal` | Y | Parsed numeric value (int for integers, Decimal for decimals). | -| `raw` | `str` | Y | Original source representation for serialization. | -| `span` | `Span \| None` | N | Source position. | - ### Constraints -- Return: Immutable number literal. -- State: Frozen dataclass. -- Precision: Integer literals use `int` for memory efficiency. Decimal literals use `Decimal` for financial-grade precision, eliminating float rounding errors (0.1 + 0.2 = 0.3, not 0.30000000000000004). -- Invariant: AST transformers creating new nodes must ensure raw represents value. Parser guarantees consistency at construction. +- Import: `from ftllexengine import FTLSource` +- Semantics: normalized or unnormalized FTL text before parsing --- -## `FTLLiteral` +## `CurrencyCode` -Type alias for the closed set of values valid as named-argument values in Fluent call expressions. +Nominal wrapper for ISO 4217 currency codes. ### Signature ```python -type FTLLiteral = StringLiteral | NumberLiteral +CurrencyCode = NewType("CurrencyCode", str) ``` -### Parameters -| Type | Description | -|:-----|:------------| -| `StringLiteral` | Quoted string value. | -| `NumberLiteral` | Numeric literal (int or Decimal). | - ### Constraints -- FTL EBNF: `named-argument ::= identifier ":" (StringLiteral \| NumberLiteral)`. -- `NamedArgument.value` is typed `FTLLiteral`; only these two variants are spec-compliant. -- Import: `from ftllexengine.syntax.ast import FTLLiteral` or `from ftllexengine.syntax import FTLLiteral`. +- Import: `from ftllexengine import CurrencyCode` +- Purpose: distinguish validated currency codes from arbitrary strings +- Validation path: use `is_valid_currency_code()` or `require_currency_code()` before constructing or narrowing --- -## `VariableReference` +## `TerritoryCode` + +Nominal wrapper for ISO 3166-1 alpha-2 territory codes. ### Signature ```python -@dataclass(frozen=True, slots=True) -class VariableReference: - id: Identifier - span: Span | None = None - - @staticmethod - def guard(expr: object) -> TypeIs[VariableReference]: ... +TerritoryCode = NewType("TerritoryCode", str) ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `id` | `Identifier` | Y | Variable identifier (without $). | -| `span` | `Span \| None` | N | Source position for IDE integration. | - ### Constraints -- Return: Immutable variable reference. -- State: Frozen dataclass. -- Span: Populated by parser for source-tracked ASTs. +- Import: `from ftllexengine import TerritoryCode` +- Purpose: distinguish validated territory codes from arbitrary strings +- Validation path: use `is_valid_territory_code()` or `require_territory_code()` before constructing or narrowing --- -## `MessageReference` +## `CurrencyInfo` + +Immutable ISO 4217 lookup result. ### Signature ```python @dataclass(frozen=True, slots=True) -class MessageReference: - id: Identifier - attribute: Identifier | None = None - span: Span | None = None - - @staticmethod - def guard(expr: object) -> TypeIs[MessageReference]: ... +class CurrencyInfo: + code: CurrencyCode + name: str + symbol: str + decimal_digits: int ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `id` | `Identifier` | Y | Message identifier. | -| `attribute` | `Identifier \| None` | N | Attribute name if present. | -| `span` | `Span \| None` | N | Source position for IDE integration. | - ### Constraints -- Return: Immutable message reference. -- State: Frozen dataclass. -- Span: Populated by parser for source-tracked ASTs. +- Import: `from ftllexengine.introspection.iso import CurrencyInfo` +- Produced by: `get_currency()` and `list_currencies()` +- Locale note: `name` and `symbol` depend on the lookup locale; `decimal_digits` follows the embedded ISO 4217 table +- Thread: safe --- -## `TermReference` +## `TerritoryInfo` + +Immutable ISO 3166-1 lookup result. ### Signature ```python @dataclass(frozen=True, slots=True) -class TermReference: - id: Identifier - attribute: Identifier | None = None - arguments: CallArguments | None = None - span: Span | None = None - - @staticmethod - def guard(expr: object) -> TypeIs[TermReference]: ... +class TerritoryInfo: + alpha2: TerritoryCode + name: str + currencies: tuple[CurrencyCode, ...] + official_languages: tuple[str, ...] ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `id` | `Identifier` | Y | Term identifier (without -). | -| `attribute` | `Identifier \| None` | N | Attribute name if present. | -| `arguments` | `CallArguments \| None` | N | Parameterized term args. | -| `span` | `Span \| None` | N | Source position for IDE integration. | - ### Constraints -- Return: Immutable term reference. -- State: Frozen dataclass. -- Span: Populated by parser for source-tracked ASTs. +- Import: `from ftllexengine.introspection.iso import TerritoryInfo` +- Produced by: `get_territory()` and `list_territories()` +- Locale note: `name` depends on the lookup locale; currencies and languages come from CLDR data +- Thread: safe --- -## `FunctionReference` +## `CommentType` + +Enum of Fluent comment kinds. ### Signature ```python -@dataclass(frozen=True, slots=True) -class FunctionReference: - id: Identifier - arguments: CallArguments - span: Span | None = None - - @staticmethod - def guard(expr: object) -> TypeIs[FunctionReference]: ... +class CommentType(StrEnum): + COMMENT = "comment" + GROUP = "group" + RESOURCE = "resource" ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `id` | `Identifier` | Y | Function name (e.g., NUMBER). | -| `arguments` | `CallArguments` | Y | Function arguments. | -| `span` | `Span \| None` | N | Source position for IDE integration. | - ### Constraints -- Return: Immutable function reference. -- State: Frozen dataclass. -- Span: Populated by parser for source-tracked ASTs. +- Import: `from ftllexengine.enums import CommentType` +- Used by: `syntax.ast.Comment.type` +- Type: `StrEnum` --- -## `CallArguments` +## `ReferenceKind` + +Enum describing whether a reference points at a message or a term. ### Signature ```python -@dataclass(frozen=True, slots=True) -class CallArguments: - positional: tuple[InlineExpression, ...] - named: tuple[NamedArgument, ...] - span: Span | None = None +class ReferenceKind(StrEnum): + MESSAGE = "message" + TERM = "term" ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `positional` | `tuple[InlineExpression, ...]` | Y | Positional arguments. | -| `named` | `tuple[NamedArgument, ...]` | Y | Named arguments. | -| `span` | `Span \| None` | N | Source position. | - ### Constraints -- Return: Immutable call arguments. -- State: Frozen dataclass. -- Validation: `serialize(validate=True)` rejects duplicate named argument names. +- Import: `from ftllexengine.enums import ReferenceKind` +- Used by: `ReferenceInfo.kind` +- Type: `StrEnum` --- -## `NamedArgument` +## `VariableContext` + +Enum describing where a variable appears inside a message. ### Signature ```python -@dataclass(frozen=True, slots=True) -class NamedArgument: - name: Identifier - value: FTLLiteral - span: Span | None = None +class VariableContext(StrEnum): + PATTERN = "pattern" + SELECTOR = "selector" + VARIANT = "variant" + FUNCTION_ARG = "function_arg" ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `name` | `Identifier` | Y | Argument name. | -| `value` | `FTLLiteral` | Y | Literal argument value (StringLiteral or NumberLiteral). | -| `span` | `Span \| None` | N | Source position. | - ### Constraints -- Return: Immutable named argument. -- State: Frozen dataclass. -- FTL EBNF: `NamedArgument ::= Identifier blank? ":" blank? (StringLiteral | NumberLiteral)`. -- Type enforced: `value` is `FTLLiteral = StringLiteral | NumberLiteral`; passing any other `InlineExpression` subtype is a static type error. -- Defense: Serializer validates value type at runtime as defense-in-depth against `object.__setattr__` bypass. +- Import: `from ftllexengine.enums import VariableContext` +- Used by: `VariableInfo.context` +- Type: `StrEnum` --- -## `Identifier` +## `VariableInfo` + +Immutable metadata about a variable occurrence discovered during introspection. ### Signature ```python @dataclass(frozen=True, slots=True) -class Identifier: +class VariableInfo: name: str + context: VariableContext span: Span | None = None - - @staticmethod - def guard(key: object) -> TypeIs[Identifier]: ... ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `name` | `str` | Y | Identifier string. | -| `span` | `Span \| None` | N | Source position. | - ### Constraints -- Return: Immutable identifier. -- State: Frozen dataclass. +- Import: `from ftllexengine.introspection.message import VariableInfo` +- Produced by: `introspect_message()` --- -## `Span` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class Span: - start: int - end: int -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `start` | `int` | Y | Start character offset (inclusive). | -| `end` | `int` | Y | End character offset (exclusive). | - -### Constraints -- Return: Immutable span. -- Raises: `ValueError` if start < 0 or end < start. -- State: Frozen dataclass. -- Note: Positions are character offsets (code points), not bytes. - ---- +## `FunctionCallInfo` -## `Annotation` +Immutable metadata about a function call discovered during introspection. ### Signature ```python @dataclass(frozen=True, slots=True) -class Annotation: - code: str - message: str - arguments: tuple[tuple[str, str], ...] | None = None +class FunctionCallInfo: + name: str + positional_arg_vars: tuple[str, ...] + named_args: frozenset[str] span: Span | None = None ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `code` | `str` | Y | Error code. | -| `message` | `str` | Y | Error message. | -| `arguments` | `tuple[tuple[str, str], ...] \| None` | N | Additional context as key-value pairs. | -| `span` | `Span \| None` | N | Error location. | - ### Constraints -- Return: Immutable annotation. -- State: Frozen dataclass. +- Import: `from ftllexengine.introspection.message import FunctionCallInfo` +- Produced by: `introspect_message()` +- `positional_arg_vars` contains only variable names, not literal argument values --- -## `ASTVisitor` - -### Signature -```python -class ASTVisitor[T = ASTNode]: - def __init__(self, *, max_depth: int | None = None) -> None: ... - def visit(self, node: ASTNode) -> T: ... - def generic_visit(self, node: ASTNode) -> T: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `max_depth` | `int \| None` | N | Maximum traversal depth (default: 100). | - -### Constraints -- Return: Visited/transformed node. -- State: Maintains dispatch cache and depth guard. -- Thread: Not thread-safe (instance state). -- Subclass: MUST call `super().__init__()` to initialize depth guard. -- Raises: `FrozenFluentError` (category=RESOLUTION) when traversal exceeds max_depth. -- Depth: Guard in `visit()` protects all traversals (bypass-proof). - ---- +## `ReferenceInfo` -## `ASTTransformer` +Immutable metadata about a message or term dependency discovered during introspection. ### Signature ```python -class ASTTransformer(ASTVisitor[ASTNode | None | list[ASTNode]]): - def __init__(self, *, max_depth: int | None = None) -> None: ... - def transform(self, node: ASTNode) -> ASTNode | None | list[ASTNode]: ... - def generic_visit(self, node: ASTNode) -> ASTNode | None | list[ASTNode]: ... +@dataclass(frozen=True, slots=True) +class ReferenceInfo: + id: str + kind: ReferenceKind + attribute: str | None + span: Span | None = None ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `max_depth` | `int \| None` | N | Maximum traversal depth (default: 100). | - ### Constraints -- Return: Modified node, None (removes from optional fields or collections), or list (expands in collections). -- State: Maintains dispatch cache and depth guard. -- Thread: Not thread-safe (instance state). -- Subclass: MUST call `super().__init__()` to initialize depth guard. -- Raises: `FrozenFluentError` (category=RESOLUTION) when traversal exceeds max_depth. `TypeError` if visit method returns None for required scalar field, list for any scalar field, or a node whose type does not match the field's expected types. -- Depth: Guard inherited from ASTVisitor.visit() (bypass-proof). -- Immutable: Uses `dataclasses.replace()` for node modifications. -- Type Validation: `_transform_list` validates that each transformed node matches the field's expected types. For example, `Pattern.elements` accepts only `TextElement | Placeable`; producing a `Message` raises `TypeError` identifying the field and unexpected type. -- Required Fields: `Message.id`, `Term.id`, `Term.value`, `Placeable.expression`, `Variant.key`, `Variant.value`, etc. require single ASTNode return. Returning None or list raises TypeError. -- Optional Fields: `Message.comment`, `Message.value`, `Term.comment`, `MessageReference.attribute`, `TermReference.attribute`, `TermReference.arguments` accept None returns for node removal. Returning list still raises TypeError. +- Import: `from ftllexengine.introspection.message import ReferenceInfo` +- Produced by: `introspect_message()` and reference extraction helpers --- ## `MessageIntrospection` +Complete immutable summary of a message or term's variables, function calls, and references. + ### Signature ```python @dataclass(frozen=True, slots=True) @@ -684,106 +344,19 @@ class MessageIntrospection: functions: frozenset[FunctionCallInfo] references: frozenset[ReferenceInfo] has_selectors: bool - - def get_variable_names(self) -> frozenset[str]: ... - def requires_variable(self, name: str) -> bool: ... - def get_function_names(self) -> frozenset[str]: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message_id` | `str` | Y | Message identifier. | -| `variables` | `frozenset[VariableInfo]` | Y | Variable references. | -| `functions` | `frozenset[FunctionCallInfo]` | Y | Function calls. | -| `references` | `frozenset[ReferenceInfo]` | Y | Message/term references. | -| `has_selectors` | `bool` | Y | Uses select expressions. | - -### Constraints -- Return: Immutable introspection result. -- State: Frozen dataclass. -- Import: `from ftllexengine.introspection import MessageIntrospection` - ---- - -## `introspect_message` - -Function that extracts complete metadata from a Message or Term AST node. - -### Signature -```python -def introspect_message( - message: Message | Term, - *, - use_cache: bool = True, -) -> MessageIntrospection: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message` | `Message \| Term` | Y | AST node to introspect. | -| `use_cache` | `bool` | N | Use WeakKeyDictionary cache (default: True). | - -### Constraints -- Return: MessageIntrospection with variables, functions, references. -- Raises: `TypeError` if message is not Message or Term. -- State: Caches result in WeakKeyDictionary when use_cache=True. -- Thread: Safe (worst case: redundant computation on cache miss). -- Cache: WeakKeyDictionary auto-cleans when AST nodes garbage collected. -- Import: `from ftllexengine.introspection import introspect_message` - ---- - -## `clear_introspection_cache` - -Function that clears the introspection WeakKeyDictionary cache. - -### Signature -```python -def clear_introspection_cache() -> None: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: None. -- Raises: Never. -- State: Clears module-level introspection cache. -- Thread: Safe. -- Usage: Testing, memory pressure. Normal usage relies on WeakKeyDictionary auto-cleanup. -- Import: `from ftllexengine.introspection import clear_introspection_cache` - ---- - -## `extract_variables` - -Function that extracts variable names from a Message or Term (simplified API). - -### Signature -```python -def extract_variables(message: Message | Term) -> frozenset[str]: ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message` | `Message \| Term` | Y | AST node to analyze. | - ### Constraints -- Return: Frozen set of variable names (without $ prefix). -- Raises: Never. -- State: Delegates to introspect_message (uses cache). -- Thread: Safe. -- Import: `from ftllexengine.introspection import extract_variables` +- Import: `from ftllexengine.introspection.message import MessageIntrospection` +- Produced by: `introspect_message()` +- Helpers: `get_variable_names()`, `requires_variable()`, `get_function_names()` +- Cached: module-level weak-reference cache memoizes results per `Message` or `Term` --- ## `MessageVariableValidationResult` -Immutable result of comparing declared FTL message variables against an expected schema. +Structured diff between the variables a message declares and the variables you expect it to declare. ### Signature ```python @@ -797,787 +370,6 @@ class MessageVariableValidationResult: ``` ### Constraints -- `is_valid`: `True` only when `declared_variables == expected` exactly (no missing, no extra). -- `missing_variables`: variables present in expected but absent from the FTL message. -- `extra_variables`: variables declared in FTL but absent from expected. -- Immutable. Hashable. -- Import: `from ftllexengine.introspection import MessageVariableValidationResult` -- Also available: `from ftllexengine import MessageVariableValidationResult` - ---- - -## `validate_message_variables` - -Function that validates an FTL message or term declares exactly the expected variables. - -### Signature -```python -def validate_message_variables( - message: Message | Term, - expected_variables: frozenset[str] | set[str], -) -> MessageVariableValidationResult: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message` | `Message \| Term` | Y | AST node to inspect. | -| `expected_variables` | `frozenset[str] \| set[str]` | Y | Variable names (without $ prefix) the message should declare. | - -### Constraints -- Return: `MessageVariableValidationResult`; `is_valid=True` iff declared == expected. -- Raises: Never. No Babel dependency. -- State: Delegates to `extract_variables()` (uses introspection cache). -- Thread: Safe. -- Import: `from ftllexengine.introspection import validate_message_variables` -- Also available: `from ftllexengine import validate_message_variables` -- Version: Added in v0.148.0. - ---- - -## `extract_references` - -Function that extracts message and term references from a Message or Term. - -### Signature -```python -def extract_references(entry: Message | Term) -> tuple[frozenset[str], frozenset[str]]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `entry` | `Message \| Term` | Y | AST node to analyze. | - -### Constraints -- Return: `(message_refs, term_refs)` — two frozen sets. `message_refs` may be attribute-qualified (e.g., `"msg.tooltip"`). `term_refs` are bare IDs (e.g., `"brand"`). -- Raises: Never. -- State: No cache. Traverses the AST on every call. -- Thread: Safe (no shared mutable state). -- Import: `from ftllexengine.introspection import extract_references` - -```python -resource = parse_ftl("msg = { welcome } uses { -brand }") -msg_refs, term_refs = extract_references(resource.entries[0]) -# msg_refs == frozenset({"welcome"}) -# term_refs == frozenset({"brand"}) -``` - ---- - -## `extract_references_by_attribute` - -Function that extracts references per source attribute for attribute-granular analysis. - -### Signature -```python -def extract_references_by_attribute( - entry: Message | Term, -) -> dict[str | None, tuple[frozenset[str], frozenset[str]]]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `entry` | `Message \| Term` | Y | AST node to analyze. | - -### Constraints -- Return: `{key: (message_refs, term_refs)}`. Key is `None` for the value pattern, or the attribute name string (e.g., `"tooltip"`). -- Raises: Never. -- State: No cache. Traverses the AST on every call. -- Thread: Safe. -- Use case: Attribute-granular dependency cycle detection. Finer-grained than `extract_references`. -- Import: `from ftllexengine.introspection import extract_references_by_attribute` - -```python -resource = parse_ftl("btn = Click\n .label = { -brand } button") -refs = extract_references_by_attribute(resource.entries[0]) -# refs[None] == (frozenset(), frozenset()) # value pattern -# refs["label"] == (frozenset(), frozenset({"brand"})) # attribute -``` - ---- - -## Type Aliases - -### Signature -```python -type Entry = Message | Term | Comment | Junk -type PatternElement = TextElement | Placeable -type Expression = SelectExpression | InlineExpression -type InlineExpression = ( - StringLiteral | NumberLiteral | VariableReference | - MessageReference | TermReference | FunctionReference | Placeable -) -type SelectorExpression = ( - StringLiteral | NumberLiteral | VariableReference | - MessageReference | TermReference | FunctionReference -) -type FTLLiteral = StringLiteral | NumberLiteral -type VariantKey = Identifier | NumberLiteral -type ASTNode = ( - Resource | Message | Term | Attribute | Comment | Junk | - Pattern | TextElement | Placeable | SelectExpression | Variant | - StringLiteral | NumberLiteral | VariableReference | MessageReference | - TermReference | FunctionReference | CallArguments | NamedArgument | - Identifier | Annotation | Span -) -``` - -### Constraints -- PEP 695 type aliases. Cannot use with isinstance(). -- Use pattern matching or `.guard()` methods for runtime checks. -- `SelectorExpression`: Subset of `InlineExpression` excluding `Placeable`; used as `SelectExpression.selector` type. -- `FTLLiteral`: Subset of `InlineExpression` restricted to literal values; used as `NamedArgument.value` type. -- `ASTNode`: Union of all 21 AST node types (includes `Span` and `Annotation` utility nodes). - ---- - -## `FluentValue` - -Type alias for values passable to Fluent functions and format_pattern(). - -### Signature -```python -type FluentValue = ( - str | int | Decimal | datetime | date | FluentNumber | None | - Sequence["FluentValue"] | Mapping[str, "FluentValue"] -) -``` - -### Parameters -| Type | Description | -|:-----|:------------| -| `str` | String arguments. | -| `int` | Integer arguments (includes `bool` via subtype; use `int(flag)` explicitly). | -| `Decimal` | Precise decimal arguments (currency, any fractional value). | -| `datetime` | Date-time arguments. | -| `date` | Date-only arguments. | -| `FluentNumber` | Formatted number from NUMBER() function. | -| `None` | Absent/null arguments. | -| `Sequence["FluentValue"]` | Lists, tuples of FluentValue (recursive). | -| `Mapping[str, "FluentValue"]` | Dicts with string keys (recursive). | - -### Constraints -- PEP 695 recursive type alias. Export: `from ftllexengine import FluentValue`. -- Used for type-hinting resolver arguments: `args: dict[str, FluentValue]`. -- `bool` is absent from the explicit union. `bool` is an `int` subtype so type checkers accept it; the explicit omission signals that `bool` has no numeric localization semantics. `NUMBER()` and `CURRENCY()` raise `TypeError` for `bool` at runtime. Convert explicitly: `int(flag)` or `str(flag)`. -- `float` is absent. IEEE 754 cannot represent most decimal fractions exactly. Use `Decimal(str(float_val))` at system boundaries. -- Collections: Arbitrarily nested structures supported (e.g., `{"items": [1, 2, {"nested": "value"}]}`). -- Cache: Collections handled correctly by `_make_hashable()` for cache key generation. -- Location: `runtime/value_types.py`, exported from package root. - ---- - -## `FluentNumber` - -Frozen dataclass wrapping a formatted number to preserve numeric identity, formatted display, and precision simultaneously. - -### Signature -```python -@dataclass(frozen=True, slots=True) -class FluentNumber: - value: int | Decimal - formatted: str - precision: int | None = None - - def __str__(self) -> str: ... -``` - -### Parameters -| Name | Type | Req | Semantics | -|:-----|:-----|:----|:----------| -| `value` | `int \| Decimal` | Y | Original numeric value for plural matching | -| `formatted` | `str` | Y | Locale-formatted string for display output | -| `precision` | `int \| None` | N | Visible fraction digit count (CLDR v operand); None if unspecified | - -### Constraints -- Return: Immutable number wrapper. -- State: Frozen dataclass. -- Purpose: NUMBER() produces FluentNumber so the resolver can use `formatted` for output while using `value` and `precision` for CLDR plural category matching. -- `__str__`: Returns `formatted` (the display string). -- `precision`: Reflects ACTUAL visible fraction digits from the formatted string, not the minimum_fraction_digits parameter. -- Import: `from ftllexengine.runtime import FluentNumber` -- Also available: `from ftllexengine import FluentNumber` - ---- - -## `VariableInfo` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class VariableInfo: - name: str - context: VariableContext - span: Span | None = None -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `name` | `str` | Y | Variable name (without $ prefix). | -| `context` | `VariableContext` | Y | Context where variable appears. | -| `span` | `Span \| None` | N | Source position for IDE integration. | - -### Constraints -- Return: Immutable variable metadata. -- State: Frozen dataclass. -- Span: Populated from VariableReference.span for parser-produced ASTs. -- Import: `from ftllexengine.introspection import VariableInfo` - ---- - -## `FunctionCallInfo` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class FunctionCallInfo: - name: str - positional_arg_vars: tuple[str, ...] - named_args: frozenset[str] - span: Span | None = None -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `name` | `str` | Y | Function name (e.g., 'NUMBER'). | -| `positional_arg_vars` | `tuple[str, ...]` | Y | Variable names used as positional arguments (excludes literals). | -| `named_args` | `frozenset[str]` | Y | Named argument keys. | -| `span` | `Span \| None` | N | Source position for IDE integration. | - -### Constraints -- Return: Immutable function call metadata. -- State: Frozen dataclass. -- Span: Populated from FunctionReference.span for parser-produced ASTs. -- positional_arg_vars: Contains only VariableReference names; literals and other expressions not included. -- Import: `from ftllexengine.introspection import FunctionCallInfo` - ---- - -## `ReferenceInfo` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class ReferenceInfo: - id: str - kind: ReferenceKind - attribute: str | None - span: Span | None = None -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `id` | `str` | Y | Referenced message or term ID. | -| `kind` | `ReferenceKind` | Y | Reference type (MESSAGE or TERM). | -| `attribute` | `str \| None` | N | Attribute name if present. | -| `span` | `Span \| None` | N | Source position for IDE integration. | - -### Constraints -- Return: Immutable reference metadata. -- State: Frozen dataclass. -- Span: Populated from MessageReference.span or TermReference.span for parser-produced ASTs. -- Import: `from ftllexengine.introspection import ReferenceInfo` - ---- - -## `CommentType` - -### Signature -```python -class CommentType(StrEnum): - COMMENT = "comment" - GROUP = "group" - RESOURCE = "resource" -``` - -### Parameters -| Value | Description | -|:------|:------------| -| `COMMENT` | Standalone comment: `# text` | -| `GROUP` | Group comment: `## text` | -| `RESOURCE` | Resource comment: `### text` | - -### Constraints -- StrEnum: Members ARE strings. `str(CommentType.COMMENT) == "comment"` -- Import: `from ftllexengine.enums import CommentType` - ---- - -## `VariableContext` - -### Signature -```python -class VariableContext(StrEnum): - PATTERN = "pattern" - SELECTOR = "selector" - VARIANT = "variant" - FUNCTION_ARG = "function_arg" -``` - -### Parameters -| Value | Description | -|:------|:------------| -| `PATTERN` | Variable in message pattern. | -| `SELECTOR` | Variable in select expression selector. | -| `VARIANT` | Variable in select variant. | -| `FUNCTION_ARG` | Variable in function argument. | - -### Constraints -- StrEnum: Members ARE strings. `str(VariableContext.PATTERN) == "pattern"` -- Import: `from ftllexengine.enums import VariableContext` - ---- - -## `ReferenceKind` - -### Signature -```python -class ReferenceKind(StrEnum): - MESSAGE = "message" - TERM = "term" -``` - -### Parameters -| Value | Description | -|:------|:------------| -| `MESSAGE` | Reference to a message: `{ message-id }` | -| `TERM` | Reference to a term: `{ -term-id }` | - -### Constraints -- StrEnum: Members ARE strings. `str(ReferenceKind.MESSAGE) == "message"` -- Import: `from ftllexengine.enums import ReferenceKind` - ---- - -## ISO Introspection Types - -The introspection module provides type-safe access to ISO 3166 (territories) and ISO 4217 (currencies) data via Babel CLDR. Requires Babel installation: `pip install ftllexengine[babel]`. - ---- - -## `BabelImportError` - -Exception raised when a Babel-dependent feature is called without Babel installed. - -### Signature -```python -class BabelImportError(ImportError): - feature: str - - def __init__(self, feature: str) -> None: ... -``` - -### Parameters -| Name | Type | Req | Semantics | -|:-----|:-----|:----|:----------| -| `feature` | `str` | Y | Name of the feature/function requiring Babel | - -### Constraints -- Purpose: Fail-fast error with installation instructions when Babel is absent. -- Message: `"{feature} requires Babel for CLDR locale data. Install with: pip install ftllexengine[babel]"` -- Raised by: `get_territory`, `get_currency`, `list_territories`, `list_currencies`, `get_territory_currencies`, `is_valid_territory_code`, `is_valid_currency_code`, `get_cldr_version`, `get_babel_locale`, `clear_locale_cache`, and all `ftllexengine.parsing` parse functions. -- Import: `from ftllexengine.introspection import BabelImportError` - ---- - -## `get_cldr_version` - -Function that returns the Unicode CLDR version used by Babel. - -### Signature -```python -def get_cldr_version() -> str: -``` - -### Constraints -- Return: CLDR version string (e.g., `"47"`). -- Raises: `BabelImportError` if Babel not installed. -- State: No mutable state. -- Thread: Safe. -- Purpose: Debugging locale-specific formatting differences; verifying deployment environments. -- Import: `from ftllexengine.introspection import get_cldr_version` -- Also available: `from ftllexengine import get_cldr_version` - ---- - -## `get_currency_decimal_digits` - -Convenience function returning the ISO 4217 standard decimal precision for a currency code without a locale parameter. - -### Signature -```python -def get_currency_decimal_digits(code: str) -> int | None: -``` - -### Parameters - -| Name | Type | Description | -|:-----|:-----|:------------| -| `code` | `str` | ISO 4217 currency code (e.g., `"USD"`, `"KWD"`). Case-insensitive. | - -### Constraints -- Return: ISO 4217 decimal digit count (`0` for JPY/XAU/XAG, `2` for USD/EUR, `3` for KWD, `4` for CLF), or `None` for unknown or invalid codes. -- Raises: Nothing. Babel is not required. -- State: Pure; consults static tables in `ftllexengine.constants` only. -- Thread: Safe. -- Babel: Not required. Available in parser-only installs (`pip install ftllexengine`). -- Casefold: `len(code) != 3` returns `None` before `.upper()`; single-char expansions (e.g., `'ß'`) are rejected. -- Import: `from ftllexengine.introspection import get_currency_decimal_digits` -- Also available: `from ftllexengine import get_currency_decimal_digits` - ---- - -## `TerritoryCode` - -Type alias for ISO 3166-1 alpha-2 territory codes. - -### Signature -```python -type TerritoryCode = str -``` - -### Constraints -- Purpose: Type annotation for territory codes (e.g., "US", "LV", "DE"). -- Validation: Use `is_valid_territory_code()` to verify. -- Import: `from ftllexengine.introspection import TerritoryCode` - ---- - -## `CurrencyCode` - -Type alias for ISO 4217 currency codes. - -### Signature -```python -type CurrencyCode = str -``` - -### Constraints -- Purpose: Type annotation for currency codes (e.g., "USD", "EUR", "GBP"). -- Validation: Use `is_valid_currency_code()` to verify. -- Import: `from ftllexengine.introspection import CurrencyCode` - ---- - -## `TerritoryInfo` - -ISO 3166-1 territory data with localized name and official language codes. - -### Signature -```python -@dataclass(frozen=True, slots=True) -class TerritoryInfo: - alpha2: TerritoryCode - name: str - currencies: tuple[CurrencyCode, ...] - official_languages: tuple[str, ...] -``` - -### Parameters -| Name | Type | Req | Semantics | -|:-----|:-----|:----|:----------| -| `alpha2` | `TerritoryCode` | Y | ISO 3166-1 alpha-2 code | -| `name` | `str` | Y | Localized display name | -| `currencies` | `tuple[CurrencyCode, ...]` | Y | Currency codes in priority order (may be empty) | -| `official_languages` | `tuple[str, ...]` | Y | BCP-47 official language codes (may be empty) | - -### Constraints -- Return: Immutable territory data. -- State: Frozen dataclass. -- Thread: Safe. -- Hashable: Yes. -- Multi-Currency: Territories may have multiple legal tender currencies (e.g., Panama: PAB, USD). -- Multi-Language: Territories may have multiple official languages (e.g., Belgium: fr, nl, de). -- Import: `from ftllexengine.introspection import TerritoryInfo` - ---- - -## `CurrencyInfo` - -ISO 4217 currency data with localized presentation. - -### Signature -```python -@dataclass(frozen=True, slots=True) -class CurrencyInfo: - code: CurrencyCode - name: str - symbol: str - decimal_digits: int -``` - -### Parameters -| Name | Type | Req | Semantics | -|:-----|:-----|:----|:----------| -| `code` | `CurrencyCode` | Y | ISO 4217 currency code | -| `name` | `str` | Y | Localized display name | -| `symbol` | `str` | Y | Locale-specific symbol | -| `decimal_digits` | `int` | Y | Standard decimal places (0, 2, 3, or 4) | - -### Constraints -- Return: Immutable currency data. -- State: Frozen dataclass. -- Thread: Safe. -- Hashable: Yes. -- Import: `from ftllexengine.introspection import CurrencyInfo` - ---- - -## `get_territory` - -Look up ISO 3166-1 territory by alpha-2 code. - -### Signature -```python -def get_territory( - code: str, - locale: str = "en", -) -> TerritoryInfo | None: -``` - -### Parameters -| Name | Type | Req | Semantics | -|:-----|:-----|:----|:----------| -| `code` | `str` | Y | ISO 3166-1 alpha-2 code (case-insensitive) | -| `locale` | `str` | N | Locale for name localization (default: "en") | - -### Constraints -- Return: TerritoryInfo if found, None if unknown. -- Raises: `BabelImportError` if Babel not installed. -- State: Bounded cache per normalized (code, locale) pair. -- Thread: Safe. -- Normalization: Code uppercased, locale normalized (BCP-47/POSIX/lowercase accepted). -- Import: `from ftllexengine.introspection import get_territory` - ---- - -## `get_currency` - -Look up ISO 4217 currency by code. - -### Signature -```python -def get_currency( - code: str, - locale: str = "en", -) -> CurrencyInfo | None: -``` - -### Parameters -| Name | Type | Req | Semantics | -|:-----|:-----|:----|:----------| -| `code` | `str` | Y | ISO 4217 currency code (case-insensitive) | -| `locale` | `str` | N | Locale for name/symbol localization (default: "en") | - -### Constraints -- Return: CurrencyInfo if found, None if unknown. -- Raises: `BabelImportError` if Babel not installed. -- State: Bounded cache per normalized (code, locale) pair. -- Thread: Safe. -- Normalization: Code uppercased, locale normalized (BCP-47/POSIX/lowercase accepted). -- Import: `from ftllexengine.introspection import get_currency` - ---- - -## `list_territories` - -List all known ISO 3166-1 territories. - -### Signature -```python -def list_territories( - locale: str = "en", -) -> frozenset[TerritoryInfo]: -``` - -### Parameters -| Name | Type | Req | Semantics | -|:-----|:-----|:----|:----------| -| `locale` | `str` | N | Locale for name localization (default: "en") | - -### Constraints -- Return: Frozen set of all TerritoryInfo objects. -- Raises: `BabelImportError` if Babel not installed. -- State: Bounded cache per normalized locale. -- Thread: Safe. -- Normalization: Locale normalized (BCP-47/POSIX/lowercase accepted). -- Import: `from ftllexengine.introspection import list_territories` - ---- - -## `list_currencies` - -List all known ISO 4217 currencies. - -### Signature -```python -def list_currencies( - locale: str = "en", -) -> frozenset[CurrencyInfo]: -``` - -### Parameters -| Name | Type | Req | Semantics | -|:-----|:-----|:----|:----------| -| `locale` | `str` | N | Locale for name/symbol localization (default: "en") | - -### Constraints -- Return: Frozen set of all CurrencyInfo objects. -- Raises: `BabelImportError` if Babel not installed. -- State: Bounded cache per normalized locale. -- Thread: Safe. -- Normalization: Locale normalized (BCP-47/POSIX/lowercase accepted). -- Consistency: Same currency count across all locales (uses English fallback for localized names). -- Import: `from ftllexengine.introspection import list_currencies` - ---- - -## `get_territory_currencies` - -Get all currencies for a territory in priority order. - -### Signature -```python -def get_territory_currencies(territory: str) -> tuple[CurrencyCode, ...]: -``` - -### Parameters -| Name | Type | Req | Semantics | -|:-----|:-----|:----|:----------| -| `territory` | `str` | Y | ISO 3166-1 alpha-2 code (case-insensitive) | - -### Constraints -- Return: Tuple of ISO 4217 currency codes (empty if unknown territory). -- Raises: `BabelImportError` if Babel not installed. -- State: Bounded cache per normalized territory code. -- Thread: Safe. -- Normalization: Territory code uppercased internally. -- Multi-Currency: Returns all legal tender currencies, primary first (e.g., Panama: ("PAB", "USD")). -- Import: `from ftllexengine.introspection import get_territory_currencies` - ---- - -## `is_valid_territory_code` - -Check if string is a valid ISO 3166-1 alpha-2 code. - -### Signature -```python -def is_valid_territory_code(value: str) -> TypeIs[TerritoryCode]: -``` - -### Parameters -| Name | Type | Req | Semantics | -|:-----|:-----|:----|:----------| -| `value` | `str` | Y | String to validate | - -### Constraints -- Return: True if known ISO 3166-1 alpha-2 code. -- Raises: `BabelImportError` if Babel not installed. -- State: Uses cached territory lookups. -- Thread: Safe. -- TypeIs: Narrows type in type checkers. -- Import: `from ftllexengine.introspection import is_valid_territory_code` - ---- - -## `is_valid_currency_code` - -Check if string is a valid ISO 4217 currency code. - -### Signature -```python -def is_valid_currency_code(value: str) -> TypeIs[CurrencyCode]: -``` - -### Parameters -| Name | Type | Req | Semantics | -|:-----|:-----|:----|:----------| -| `value` | `str` | Y | String to validate | - -### Constraints -- Return: True if known ISO 4217 currency code. -- Raises: `BabelImportError` if Babel not installed. -- State: Uses cached currency lookups. -- Thread: Safe. -- TypeIs: Narrows type in type checkers. -- Import: `from ftllexengine.introspection import is_valid_currency_code` - ---- - -## `require_currency_code` - -Validate, strip, and normalise a boundary value to a canonical `CurrencyCode`. Eliminates per-caller trim / blank / case normalisation chains for ISO 4217 codes. - -### Signature -```python -def require_currency_code(value: object, field_name: str) -> CurrencyCode: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `object` | Y | Raw boundary value. Non-str always raises TypeError. | -| `field_name` | `str` | Y | Human-readable field label used in error messages. | - -### Constraints -- Return: `CurrencyCode` — stripped, uppercased, valid ISO 4217 code. -- Raises: `TypeError` if `value` is not `str`. -- Raises: `ValueError` if the stripped/uppercased value is not a recognised ISO 4217 code. -- Raises: `BabelImportError` if Babel not installed (delegated from `is_valid_currency_code`). -- State: Pure function with cache delegation. -- Thread: Safe. -- Import: `from ftllexengine import require_currency_code` or `from ftllexengine.introspection import require_currency_code`. - ---- - -## `require_territory_code` - -Validate, strip, and normalise a boundary value to a canonical `TerritoryCode`. Eliminates per-caller trim / blank / case normalisation chains for ISO 3166-1 alpha-2 codes. - -### Signature -```python -def require_territory_code(value: object, field_name: str) -> TerritoryCode: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `object` | Y | Raw boundary value. Non-str always raises TypeError. | -| `field_name` | `str` | Y | Human-readable field label used in error messages. | - -### Constraints -- Return: `TerritoryCode` — stripped, uppercased, valid ISO 3166-1 alpha-2 code. -- Raises: `TypeError` if `value` is not `str`. -- Raises: `ValueError` if the stripped/uppercased value is not a recognised ISO 3166-1 territory code. -- Raises: `BabelImportError` if Babel not installed (delegated from `is_valid_territory_code`). -- Note: Casefold-expansion guard prevents `"ß"` (len=1) from matching `"SS"` — raw length is checked before `.upper()`. -- State: Pure function with cache delegation. -- Thread: Safe. -- Import: `from ftllexengine import require_territory_code` or `from ftllexengine.introspection import require_territory_code`. - ---- - -## `clear_iso_cache` - -Clear all ISO introspection caches. - -### Signature -```python -def clear_iso_cache() -> None: -``` - -### Constraints -- Return: None. -- Raises: Never. -- State: Clears all bounded ISO introspection caches. -- Thread: Safe. -- Usage: Testing, memory pressure, locale configuration changes. -- Import: `from ftllexengine.introspection import clear_iso_cache` - ---- +- Import: `from ftllexengine import MessageVariableValidationResult` +- Produced by: `validate_message_variables()` +- Valid when: both `missing_variables` and `extra_variables` are empty diff --git a/docs/DOC_03_LocaleParsing.md b/docs/DOC_03_LocaleParsing.md new file mode 100644 index 00000000..89506040 --- /dev/null +++ b/docs/DOC_03_LocaleParsing.md @@ -0,0 +1,231 @@ +--- +afad: "3.5" +version: "0.163.0" +domain: LOCALE_PARSING +updated: "2026-04-22" +route: + keywords: [parse_decimal, parse_fluent_number, parse_date, parse_datetime, parse_currency, is_valid_decimal, clear_date_caches] + questions: ["how do I parse localized numbers and dates?", "what do the locale-aware parse helpers return?", "which parsing type guards and cache-clear helpers are public?"] +--- + +# Locale Parsing Reference + +This reference covers locale-aware parsing helpers from `ftllexengine.parsing`, including type guards and cache lifecycle utilities. +FTL syntax parsing and AST traversal helpers live in [DOC_03_Parsing.md](DOC_03_Parsing.md). + +## `parse_decimal` + +Function that parses a localized number string into `Decimal`. + +### Signature +```python +def parse_decimal(value: str, locale_code: str) -> ParseResult[Decimal]: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `value` | Y | Localized numeric input | +| `locale_code` | Y | Locale for parsing | + +### Constraints +- Return: Parsed `Decimal` or `None` with errors +- Raises: `BabelImportError` when Babel is unavailable +- State: Pure +- Thread: Safe + +--- + +## `parse_fluent_number` + +Function that parses a localized number into `FluentNumber`. + +### Signature +```python +def parse_fluent_number(value: str, locale_code: str) -> ParseResult[FluentNumber]: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `value` | Y | Localized numeric input | +| `locale_code` | Y | Locale for parsing | + +### Constraints +- Return: Parsed `FluentNumber` or `None` with errors +- Raises: `BabelImportError` when Babel is unavailable +- State: Pure +- Thread: Safe + +--- + +## `parse_date` + +Function that parses a localized date string into `datetime.date`. + +### Signature +```python +def parse_date(value: str, locale_code: str) -> tuple[date | None, tuple[FrozenFluentError, ...]]: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `value` | Y | Localized date input | +| `locale_code` | Y | Locale for parsing | + +### Constraints +- Return: Parsed `date` or `None` with errors +- Raises: `BabelImportError` when Babel is unavailable +- State: Pure +- Thread: Safe + +--- + +## `parse_datetime` + +Function that parses a localized datetime string into `datetime.datetime`. + +### Signature +```python +def parse_datetime( + value: str, + locale_code: str, + *, + tzinfo: timezone | None = None, +) -> tuple[datetime | None, tuple[FrozenFluentError, ...]]: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `value` | Y | Localized datetime input | +| `locale_code` | Y | Locale for parsing | +| `tzinfo` | N | Fallback timezone | + +### Constraints +- Return: Parsed `datetime` or `None` with errors +- Raises: `BabelImportError` when Babel is unavailable +- State: Pure +- Thread: Safe + +--- + +## `parse_currency` + +Function that parses localized money input into `(Decimal, ISO code)`. + +### Signature +```python +def parse_currency( + value: str, + locale_code: str, + *, + default_currency: str | None = None, + infer_from_locale: bool = False, +) -> tuple[tuple[Decimal, str] | None, tuple[FrozenFluentError, ...]]: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `value` | Y | Localized money input | +| `locale_code` | Y | Locale for parsing | +| `default_currency` | N | Explicit ISO code | +| `infer_from_locale` | N | Infer ISO code from locale | + +### Constraints +- Return: `(amount, code)` or `None` with errors +- Raises: `BabelImportError` when Babel is unavailable +- State: Pure +- Thread: Safe + +--- + +## `is_valid_decimal` + +Function that acts as a `TypeIs[Decimal]` guard for parsed decimal results. + +### Signature +```python +def is_valid_decimal(value: Decimal | None) -> TypeIs[Decimal]: +``` + +### Constraints +- Return: `True` only for usable decimal results +- State: Pure + +--- + +## `is_valid_date` + +Function that acts as a `TypeIs[date]` guard for parsed date results. + +### Signature +```python +def is_valid_date(value: date | None) -> TypeIs[date]: +``` + +### Constraints +- Return: `True` only for usable date results +- State: Pure + +--- + +## `is_valid_datetime` + +Function that acts as a `TypeIs[datetime]` guard for parsed datetime results. + +### Signature +```python +def is_valid_datetime(value: datetime | None) -> TypeIs[datetime]: +``` + +### Constraints +- Return: `True` only for usable datetime results +- State: Pure + +--- + +## `is_valid_currency` + +Function that acts as a `TypeIs[tuple[Decimal, str]]` guard for parsed currency results. + +### Signature +```python +def is_valid_currency(value: tuple[Decimal, str] | None) -> TypeIs[tuple[Decimal, str]]: +``` + +### Constraints +- Return: `True` only for usable currency results +- State: Pure + +--- + +## `clear_date_caches` + +Function that clears cached locale-specific date parsing patterns. + +### Signature +```python +def clear_date_caches() -> None: +``` + +### Constraints +- State: Mutates module cache state +- Thread: Safe + +--- + +## `clear_currency_caches` + +Function that clears cached locale-specific currency parsing data. + +### Signature +```python +def clear_currency_caches() -> None: +``` + +### Constraints +- State: Mutates module cache state +- Thread: Safe diff --git a/docs/DOC_03_Parsing.md b/docs/DOC_03_Parsing.md index b615414a..9c40259c 100644 --- a/docs/DOC_03_Parsing.md +++ b/docs/DOC_03_Parsing.md @@ -1,97 +1,88 @@ --- -afad: "3.3" -version: "0.153.0" +afad: "3.5" +version: "0.163.0" domain: PARSING -updated: "2026-03-13" +updated: "2026-04-22" route: - keywords: [parse, parse_stream, parse_stream_ftl, serialize, validate_resource, FluentParserV1, parse_ftl, serialize_ftl, parse_decimal, parse_fluent_number, syntax, validation, BabelImportError, line_offset, column_offset, format_position, get_line_content, get_error_context, position, incremental, streaming, line iterator] - questions: ["how to parse FTL?", "how to serialize AST?", "how to validate FTL?", "what parser options exist?", "what exceptions do parsing functions raise?", "how do I parse a FluentNumber?", "how to get line and column from offset?", "how to format error position?", "how to get source context for errors?", "how to parse FTL incrementally?", "how to parse FTL from a line iterator?", "how to stream FTL parsing?"] + keywords: [parse_ftl, serialize_ftl, validate_resource, FluentParserV1, Cursor, ASTVisitor, ASTTransformer, ParseError] + questions: ["how do I parse FTL?", "what does validate_resource return?", "what syntax traversal helpers are public?", "where is the syntax parser API documented?"] --- # Parsing Reference ---- +This reference covers FTL syntax parsing, validation, serialization, cursor primitives, and AST traversal helpers. +Locale-aware number/date/currency parsing is documented in [DOC_03_LocaleParsing.md](DOC_03_LocaleParsing.md). -## `parse` +## `parse_ftl` + +Function that parses FTL source into a `Resource` AST. ### Signature ```python -def parse(source: str) -> Resource: +def parse_ftl(source: str) -> Resource: ``` ### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `source` | `str` | Y | FTL source code. | +| Name | Req | Semantics | +|:-----|:----|:----------| +| `source` | Y | Raw FTL source | ### Constraints -- Return: Resource AST containing parsed entries. -- Raises: Never (robustness principle: invalid syntax becomes Junk nodes). -- State: None. -- Thread: Safe. +- Return: Parsed `Resource` +- Raises: Never for syntax junk; parse recovery is represented in the AST +- State: Pure +- Thread: Safe --- -## `parse_stream` +## `parse_stream_ftl` + +Function that yields parsed FTL entries from a line iterator. ### Signature ```python -def parse_stream(lines: Iterable[str]) -> Iterator[Entry]: +def parse_stream_ftl(lines: Iterable[str]) -> Iterator[Entry]: ``` ### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `lines` | `Iterable[str]` | Y | FTL source lines. Trailing newlines stripped per line. | +| Name | Req | Semantics | +|:-----|:----|:----------| +| `lines` | Y | Line-oriented FTL source | ### Constraints -- Return: Iterator of `Message | Term | Comment | Junk` nodes in document order. -- Purpose: Incremental alternative to `parse()`. Splits at blank-line boundaries, yields entries as each chunk is parsed. Memory proportional to largest single entry, not full source. -- Note: Span positions in yielded entries are chunk-relative, not stream-relative. Use `parse()` when absolute spans are required (e.g., IDE tooling). -- Raises: Never (robustness: invalid syntax becomes Junk nodes). -- State: None. -- Thread: Safe. -- Import: `from ftllexengine.syntax import parse_stream` or `from ftllexengine import parse_stream_ftl`. +- Return: Entry iterator in source order +- State: Streaming parse +- Thread: Safe --- -## `serialize` +## `serialize_ftl` + +Function that serializes a `Resource` AST back to FTL text. ### Signature ```python -def serialize( - resource: Resource, - *, - validate: bool = True, - max_depth: int = MAX_DEPTH, -) -> str: +def serialize_ftl(resource: Resource, *, validate: bool = True, max_depth: int = 100) -> str: ``` ### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `resource` | `Resource` | Y | Resource AST node. | -| `validate` | `bool` | N | Validate AST before serialization (default: True). | -| `max_depth` | `int` | N | Maximum nesting depth (default: 100). | +| Name | Req | Semantics | +|:-----|:----|:----------| +| `resource` | Y | AST to serialize | +| `validate` | N | Validate before writing | +| `max_depth` | N | Serialization depth guard | ### Constraints -- Return: FTL source string. -- Raises: `SerializationValidationError` when `validate=True` and AST invalid: - - Identifier names violating grammar `[a-zA-Z][a-zA-Z0-9_-]*` - - Duplicate named argument names within CallArguments - - Named argument values not StringLiteral or NumberLiteral per FTL EBNF -- Note: `SelectExpression` with wrong default-variant count raises `ValueError` at construction (enforced by `SelectExpression.__post_init__`), not by the serializer. -- Raises: `SerializationDepthError` when AST exceeds `max_depth` during validation or serialization. -- State: None. -- Thread: Safe. -- Security: DepthGuard prevents stack overflow from adversarial ASTs. Identifier validation prevents invalid FTL output from programmatic AST construction. -- Roundtrip: Programmatic ASTs with embedded newlines followed by whitespace within a single TextElement are serialized using separate-line format with structural indentation, preserving whitespace through serialize-parse cycles. +- Return: FTL source string +- Raises: `SerializationValidationError` or `SerializationDepthError` +- State: Pure +- Thread: Safe --- ## `validate_resource` -Function validating FTL source for syntax and semantic errors. +Function that validates FTL source without loading it into a runtime bundle. ### Signature ```python @@ -107,35 +98,26 @@ def validate_resource( ``` ### Parameters -| Name | Type | Req | Semantics | -|:-----|:-----|:----|:----------| -| `source` | `str` | Y | FTL source code to validate | -| `parser` | `FluentParserV1 \| None` | N | Custom parser instance | -| `known_messages` | `frozenset[str] \| None` | N | Known message IDs from other resources | -| `known_terms` | `frozenset[str] \| None` | N | Known term IDs from other resources | -| `known_msg_deps` | `Mapping[str, frozenset[str]] \| None` | N | Dependency graph for known messages (enables cross-resource cycle detection) | -| `known_term_deps` | `Mapping[str, frozenset[str]] \| None` | N | Dependency graph for known terms (enables cross-resource cycle detection) | +| Name | Req | Semantics | +|:-----|:----|:----------| +| `source` | Y | Raw FTL source | +| `parser` | N | Parser override | +| `known_messages` | N | Cross-resource message ids | +| `known_terms` | N | Cross-resource term ids | +| `known_msg_deps` | N | Existing message deps | +| `known_term_deps` | N | Existing term deps | ### Constraints -- Return: ValidationResult with errors (syntax), warnings (semantic), metadata -- Raises: `TypeError` if source is not a str. -- State: Read-only +- Return: `ValidationResult` +- State: Pure - Thread: Safe -- Complexity: O(n) where n is AST node count - -### Usage -- When: Validate FTL files in CI/CD pipelines without runtime bundle -- Prefer: This over FluentBundle.validate_resource for parser-only workflows -- Avoid: Repeatedly parsing same FTL source (cache the Resource AST instead) - -### Notes -- Available at top-level: `from ftllexengine import validate_resource` -- No Babel dependency (uses AST inspection only) --- ## `FluentParserV1` +Class that parses FTL source with configurable safety limits. + ### Signature ```python class FluentParserV1: @@ -145,626 +127,248 @@ class FluentParserV1: max_source_size: int | None = None, max_nesting_depth: int | None = None, max_parse_errors: int | None = None, - ) -> None: ... - def parse(self, source: str) -> Resource: ... - @property - def max_source_size(self) -> int: ... - @property - def max_nesting_depth(self) -> int: ... - @property - def max_parse_errors(self) -> int: ... + ) -> None: ``` ### Parameters -| Parameter | Type | Req | Semantics | -|:----------|:-----|:----|:----------| -| `max_source_size` | `int \| None` | N | Maximum source size in characters (default: 10,000,000). | -| `max_nesting_depth` | `int \| None` | N | Maximum nesting depth (default: 100); must be positive if specified. | -| `max_parse_errors` | `int \| None` | N | Maximum Junk entries before parser aborts (default: 100). Set to 0 to disable limit. | +| Name | Req | Semantics | +|:-----|:----|:----------| +| `max_source_size` | N | Input length bound | +| `max_nesting_depth` | N | Nesting safety bound | +| `max_parse_errors` | N | Recovery error bound | ### Constraints -- Return: Parser instance. -- Raises: `ValueError` if max_nesting_depth is specified and <= 0. -- State: Stores max_source_size, max_nesting_depth, and max_parse_errors configuration. -- Thread: Safe for concurrent parse() calls. -- Security: Validates source size, nesting depth, and error accumulation (DoS prevention). After max_parse_errors Junk entries, parse() aborts to prevent memory exhaustion from malformed input. Setting max_parse_errors=0 disables the limit (not recommended for production). -- Depth Validation: max_nesting_depth automatically clamped to sys.getrecursionlimit() - 50. Logs warning if clamped. - ---- - -## `FluentParserV1.parse` - -### Signature -```python -def parse(self, source: str) -> Resource: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `source` | `str` | Y | FTL source code. | - -### Constraints -- Return: Resource AST containing parsed entries. -- Raises: `ValueError` if source exceeds max_source_size. -- State: None. -- Thread: Safe. -- Security: Enforces input size limit. +- Return: Parser instance +- State: Reusable parser configuration +- Thread: Safe +- Main methods: `parse()`, `parse_stream()` --- -## `FluentParserV1.parse_stream` - -### Signature -```python -def parse_stream(self, lines: Iterable[str]) -> Iterator[Entry]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `lines` | `Iterable[str]` | Y | FTL source lines. Trailing newlines stripped per line. | - -### Constraints -- Return: Iterator of `Message | Term | Comment | Junk` nodes in document order. -- Purpose: Incremental parse. Splits stream at blank-line boundaries (top-level FTL entry delimiters per the Fluent spec); each chunk parsed independently. Memory proportional to largest single entry. -- Note: Span positions are chunk-relative, not stream-relative. Comment attachment is semantics-correct — a comment on the line immediately before a message/term (no blank line between) is attached to that entry; comments separated by blank lines are yielded as standalone Comment entries. -- Raises: `ValueError` if any single chunk exceeds max_source_size. -- State: None. -- Thread: Safe. -- Import: `from ftllexengine.syntax.parser import FluentParserV1` (method on FluentParserV1). - ---- +## `parse` -## `ParseContext` +Function that aliases `ftllexengine.syntax.parse()` to `FluentParserV1.parse()`. ### Signature ```python -@dataclass(slots=True) -class ParseContext: - max_nesting_depth: int = MAX_DEPTH - current_depth: int = 0 - - def is_depth_exceeded(self) -> bool: ... - def enter_nesting(self) -> ParseContext: ... +def parse(source: str) -> Resource: ``` ### Parameters -| Field | Type | Description | -|:------|:-----|:------------| -| `max_nesting_depth` | `int` | Maximum nesting depth for placeables and function calls. | -| `current_depth` | `int` | Current nesting depth (0 = top level). | +| Name | Req | Semantics | +|:-----|:----|:----------| +| `source` | Y | Raw FTL source | ### Constraints -- Immutable: Uses slots for memory efficiency. -- Thread: Safe (explicit parameter passing, no global state). -- Purpose: Replaces thread-local state for async/concurrent compatibility. -- Security: Tracks depth for BOTH placeables and function calls (DoS prevention). -- Import: `from ftllexengine.syntax.parser import ParseContext` +- Import: `from ftllexengine.syntax import parse` +- Return: Parsed `Resource` +- Purpose: syntax-module convenience alias for parser-only tooling code +- State: Pure +- Thread: Safe --- -## `ParseContext.is_depth_exceeded` - -### Signature -```python -def is_depth_exceeded(self) -> bool: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: True if current_depth >= max_nesting_depth. -- State: Read-only. -- Thread: Safe. - ---- +## `parse_stream` -## `ParseContext.enter_nesting` +Function that aliases `ftllexengine.syntax.parse_stream()` to `FluentParserV1.parse_stream()`. ### Signature ```python -def enter_nesting(self) -> ParseContext: +def parse_stream(lines: Iterable[str]) -> Iterator[Entry]: ``` ### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| +| Name | Req | Semantics | +|:-----|:----|:----------| +| `lines` | Y | Line-oriented FTL source | ### Constraints -- Return: New ParseContext with incremented depth. -- State: None (returns new instance). -- Thread: Safe. -- Usage: Called when entering placeables, function calls, or term calls with arguments. +- Import: `from ftllexengine.syntax import parse_stream` +- Return: Entry iterator in source order +- Purpose: syntax-module convenience alias for streaming parse workflows +- State: Streaming parse +- Thread: Safe --- -## `parse_decimal` - -### Signature -```python -def parse_decimal( - value: str, - locale_code: str, -) -> ParseResult[Decimal]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `str` | Y | Locale-formatted number string. | -| `locale_code` | `str` | Y | BCP 47 locale identifier. | - -### Constraints -- Return: Tuple of (Decimal or None, errors). -- Raises: `BabelImportError` if Babel not installed. -- State: None. -- Thread: Safe. -- Dependency: Requires Babel for CLDR data. - ---- +## `serialize` -## `parse_fluent_number` +Function that aliases `ftllexengine.syntax.serialize()` to the serializer implementation. ### Signature ```python -def parse_fluent_number( - value: str, - locale_code: str, -) -> ParseResult[FluentNumber]: +def serialize(resource: Resource, *, validate: bool = True, max_depth: int = 100) -> str: ``` ### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `str` | Y | Locale-formatted number string. | -| `locale_code` | `str` | Y | BCP 47 locale identifier. | +| Name | Req | Semantics | +|:-----|:----|:----------| +| `resource` | Y | AST to serialize | +| `validate` | N | Validate before writing | +| `max_depth` | N | Serialization depth guard | ### Constraints -- Return: Tuple of (`FluentNumber` or None, errors). -- Raises: `BabelImportError` if Babel not installed. -- State: None. -- Thread: Safe. -- Dependency: Requires Babel for CLDR data. -- Precision: Preserves visible fraction digits using the original localized display string. -- Usage: Use when parsed numeric input should flow back into Fluent formatting or select expressions without manual `parse_decimal()` + `make_fluent_number()` composition. +- Import: `from ftllexengine.syntax import serialize` +- Return: FTL source string +- Raises: `SerializationValidationError` or `SerializationDepthError` +- Purpose: syntax-module serializer entry point +- State: Pure +- Thread: Safe --- -## `parse_date` - -### Signature -```python -def parse_date( - value: str, - locale_code: str, -) -> tuple[date | None, tuple[FrozenFluentError, ...]]: -``` +## `Cursor` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `str` | Y | Locale-formatted date string. | -| `locale_code` | `str` | Y | BCP 47 locale identifier. | - -### Constraints -- Return: Tuple of (date or None, errors). -- Raises: `BabelImportError` if Babel not installed. -- State: None. -- Thread: Safe. -- Dependency: Requires Babel for CLDR data. -- Preprocessing: Era strings stripped (English defaults + localized from Babel CLDR). Timezone pattern tokens stripped from format. Leading/trailing whitespace normalized after pattern conversion. -- Styles: Tries "short", "medium", "long", and "full" CLDR date patterns. -- Year variants: CLDR short patterns often specify a 2-digit year (`yy`); a 4-digit year variant is also accepted automatically. Both `"15.01.26"` and `"15.01.2026"` succeed for `lv_LV`. -- Safety: Uses `hasattr` fallback for Babel format object attribute access. - ---- - -## `parse_datetime` +Class that tracks an immutable parse position inside LF-normalized source text. ### Signature ```python -def parse_datetime( - value: str, - locale_code: str, - *, - tzinfo: timezone | None = None, -) -> tuple[datetime | None, tuple[FrozenFluentError, ...]]: +class Cursor: + def __init__(self, source: str, pos: int) -> None: ``` ### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `str` | Y | Locale-formatted datetime string. | -| `locale_code` | `str` | Y | BCP 47 locale identifier. | -| `tzinfo` | `timezone \| None` | N | Timezone to assign. | +| Name | Req | Semantics | +|:-----|:----|:----------| +| `source` | Y | LF-normalized source text | +| `pos` | Y | Character offset | ### Constraints -- Return: Tuple of (datetime or None, errors). -- Raises: `BabelImportError` if Babel not installed. -- State: None. -- Thread: Safe. -- Dependency: Requires Babel for CLDR data. -- Preprocessing: Era strings stripped (English defaults + localized from Babel CLDR). Timezone pattern tokens stripped from format. Leading/trailing whitespace normalized after pattern conversion. -- Styles: Tries "short", "medium", "long", and "full" CLDR datetime patterns. -- Year variants: 4-digit year inputs accepted for CLDR styles specifying `yy` (same as `parse_date`). -- Safety: Uses `hasattr` fallback for Babel format object attribute access. +- Import: `from ftllexengine.syntax import Cursor` +- Purpose: parser-building primitive for forward-only source traversal +- Invariants: `0 <= pos <= len(source)` +- Helpers: `is_eof`, `current`, `peek()`, `advance()`, `compute_line_col()` +- State: Immutable +- Thread: Safe --- -## `parse_currency` - -### Signature -```python -def parse_currency( - value: str, - locale_code: str, - *, - default_currency: str | None = None, - infer_from_locale: bool = False, -) -> tuple[tuple[Decimal, str] | None, tuple[FrozenFluentError, ...]]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `str` | Y | Currency string with amount and symbol. | -| `locale_code` | `str` | Y | BCP 47 locale identifier. | -| `default_currency` | `str \| None` | N | Fallback currency for ambiguous symbols ($, kr, ¥). | -| `infer_from_locale` | `bool` | N | Infer currency from locale if symbol ambiguous. | - -### Constraints -- Return: Tuple of ((amount, currency_code) or None, errors). -- Raises: `BabelImportError` if Babel not installed. -- State: None. -- Thread: Safe. -- Dependency: Requires Babel for CLDR data. -- Validation: ISO 4217 codes validated against CLDR data. Non-ASCII uppercase letters rejected (ASCII A-Z only). -- Ambiguous: Yen sign (`¥`) resolves to CNY for `zh_*` locales, JPY otherwise. -- Ambiguous: Pound sign (`£`) resolves to EGP for `ar_*` locales, GBP otherwise. -- Resolution: With `infer_from_locale=True`, ambiguous symbols use locale-aware defaults. - ---- +## `ftllexengine.syntax.ParseResult` -## `is_valid_decimal` +Generic syntax-parser result object carrying both a parsed value and the next cursor. ### Signature ```python -def is_valid_decimal(value: Decimal | None) -> TypeIs[Decimal]: +@dataclass(frozen=True, slots=True) +class ParseResult[T]: + value: T + cursor: Cursor ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `Decimal \| None` | Y | Decimal to validate (may be None). | - ### Constraints -- Return: True if finite Decimal, False if None/NaN/Infinity. -- Raises: None. -- State: None. -- Thread: Safe. - ---- - -## `is_valid_date` - -### Signature -```python -def is_valid_date(value: date | None) -> TypeIs[date]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `date \| None` | Y | Date to validate. | - -### Constraints -- Return: True if not None. -- Raises: None. -- State: None. +- Import: `from ftllexengine.syntax import ParseResult` +- Distinct from: root `ParseResult[T]`, which is the locale-parsing `(value, errors)` alias +- Purpose: low-level parser-combinator result for syntax internals and tooling +- State: Immutable +- Thread: Safe --- -## `is_valid_datetime` +## `ParseError` -### Signature -```python -def is_valid_datetime(value: datetime | None) -> TypeIs[datetime]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `datetime \| None` | Y | Datetime to validate. | - -### Constraints -- Return: True if not None. -- Raises: None. -- State: None. - ---- - -## `is_valid_currency` +Immutable syntax parse error carrying a `Cursor` and optional expected-token list. ### Signature ```python -def is_valid_currency( - value: tuple[Decimal, str] | None, -) -> TypeIs[tuple[Decimal, str]]: +@dataclass(frozen=True, slots=True) +class ParseError: + message: str + cursor: Cursor + expected: tuple[str, ...] = () ``` ### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `tuple[Decimal, str] \| None` | Y | Currency tuple to validate. | +| Name | Req | Semantics | +|:-----|:----|:----------| +| `message` | Y | Human-readable failure message | +| `cursor` | Y | Error location | +| `expected` | N | Expected token spellings | ### Constraints -- Return: True if not None and amount is finite. -- Raises: None. -- State: None. +- Import: `from ftllexengine.syntax import ParseError` +- Helpers: `format_error()`, `format_with_context()` +- Purpose: parser-building error object for tooling and low-level syntax helpers +- State: Immutable +- Thread: Safe --- -## `clear_date_caches` +## `SerializationValidationError` -Function that clears date and datetime pattern caches. +Exception raised when an AST would serialize into invalid Fluent syntax. ### Signature ```python -def clear_date_caches() -> None: +class SerializationValidationError(ValueError): ... ``` ### Constraints -- Return: None. -- Raises: Never. -- State: Clears `_get_date_patterns` and `_get_datetime_patterns` functools.cache. -- Thread: Safe (functools.cache internal locking). -- Babel: REQUIRED. Install with `pip install ftllexengine[babel]`. -- Import: `from ftllexengine.parsing import clear_date_caches` +- Import: `from ftllexengine.syntax import SerializationValidationError` +- Typical triggers: invalid identifiers, duplicate named arguments, or non-literal named-argument values +- Raised by: `serialize()` when `validate=True` --- -## `clear_currency_caches` +## `SerializationDepthError` -Function that clears currency-related caches. +Exception raised when serialization exceeds the configured AST nesting limit. ### Signature ```python -def clear_currency_caches() -> None: +class SerializationDepthError(ValueError): ... ``` ### Constraints -- Return: None. -- Raises: Never. -- State: Clears CLDR currency maps, pattern, and locale caches. -- Thread: Safe (functools.cache internal locking). -- Babel: REQUIRED. Install with `pip install ftllexengine[babel]`. -- Import: `from ftllexengine.parsing import clear_currency_caches` - ---- - -## Module Constants - -### `ISO_CURRENCY_CODE_LENGTH` - -```python -ISO_CURRENCY_CODE_LENGTH: int = 3 -``` - -| Attribute | Value | -|:----------|:------| -| Type | `int` | -| Value | 3 | -| Location | `ftllexengine.parsing.currency` | - -- Purpose: ISO 4217 currency codes are exactly 3 uppercase ASCII letters (A-Z only; non-ASCII uppercase rejected). -- Usage: Validation of currency code format in parsing functions. - ---- - -### `MAX_DEPTH` - -```python -MAX_DEPTH: int = 100 -``` - -| Attribute | Value | -|:----------|:------| -| Type | `int` | -| Value | 100 | -| Location | `ftllexengine.constants` | - -- Purpose: Unified depth limit for parser, resolver, serializer, and validators. -- Usage: Default for ParseContext.max_nesting_depth, FluentParserV1, serialize(max_depth=...). -- Security: Prevents DoS via deeply nested placeables and stack overflow from adversarial ASTs. - ---- - -### `MAX_LOCALE_LENGTH_HARD_LIMIT` - -```python -MAX_LOCALE_LENGTH_HARD_LIMIT: int = 1000 -``` - -| Attribute | Value | -|:----------|:------| -| Type | `int` | -| Value | 1000 | -| Location | `ftllexengine.constants` | - -- Purpose: Hard limit on locale code length for DoS prevention. -- Usage: FluentBundle input validation. Codes exceeding limit are rejected. -- Security: Prevents memory exhaustion from extremely long locale strings. -- Note: MAX_LOCALE_CODE_LENGTH (35) triggers warnings; this limit triggers rejection. - ---- - -## Parsing Behavior - -### Line Ending Normalization - -Parser normalizes all line endings to LF before parsing. - -### Constraints -- Normalization: CRLF (`\r\n`) and CR (`\r`) converted to LF (`\n`). -- Timing: Applied before any parsing occurs. -- Scope: Affects all line/column tracking and comment merging. -- Rationale: Per Fluent spec, ensures consistent AST representation across platforms. +- Import: `from ftllexengine.syntax import SerializationDepthError` +- Typical trigger: adversarial or malformed AST nesting beyond `max_depth` +- Raised by: `serialize()` --- -### Column-1 Enforcement +## `ASTVisitor` -Top-level entries must start at column 1 (beginning of line). - -### Constraints -- Rule: Messages, terms, and comments must start at column 1. -- Indented: Indented content at top level becomes Junk entry. -- Error: Junk annotation includes "Entry must start at column 1". -- Rationale: Per Fluent spec for message/term/comment positioning. - ---- - -### Pattern Whitespace Handling - -Patterns have leading/trailing blank lines trimmed. - -### Constraints -- Leading: Leading whitespace/blank lines removed from first TextElement. -- Trailing: Trailing blank lines removed (but trailing spaces on content lines preserved). -- Continuation: Multi-line patterns joined with newline (`\n`), not space. -- Implementation: `_trim_pattern_blank_lines()` post-processes pattern elements. -- Rationale: Per Fluent spec whitespace handling rules. - ---- - -## Source Position Utilities - -Character-offset-to-line/column conversion for FTL error reporting. All positions are character-based (Unicode code points, not bytes). Import: `from ftllexengine.syntax.position import ...` - ---- - -## `line_offset` - -### Signature -```python -def line_offset(source: str, pos: int) -> int: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `source` | `str` | Y | Complete FTL source text. | -| `pos` | `int` | Y | Character offset (0-indexed). | - -### Constraints -- Return: 0-based line number at `pos`. -- Raises: `ValueError` if `pos < 0`. -- State: None. -- Thread: Safe. -- Clamping: `pos` clamped to `len(source)` before counting. -- Complexity: O(pos) — counts `\n` chars in range. -- Import: `from ftllexengine.syntax.position import line_offset` - ---- - -## `column_offset` - -### Signature -```python -def column_offset(source: str, pos: int) -> int: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `source` | `str` | Y | Complete FTL source text. | -| `pos` | `int` | Y | Character offset (0-indexed). | - -### Constraints -- Return: 0-based column number at `pos` (characters since last `\n`). -- Raises: `ValueError` if `pos < 0`. -- State: None. -- Thread: Safe. -- Clamping: `pos` clamped to `len(source)` before computing. -- Import: `from ftllexengine.syntax.position import column_offset` - ---- - -## `format_position` +Generic base visitor class for read-only Fluent AST traversal. ### Signature ```python -def format_position(source: str, pos: int, zero_based: bool = True) -> str: +class ASTVisitor[T = ASTNode]: + def __init__(self, *, max_depth: int | None = None) -> None: ``` ### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `source` | `str` | Y | Complete FTL source text. | -| `pos` | `int` | Y | Character offset (0-indexed). | -| `zero_based` | `bool` | N | Use 0-based indexing (default: True). | +| Name | Req | Semantics | +|:-----|:----|:----------| +| `max_depth` | N | Traversal depth guard | ### Constraints -- Return: `"line:col"` string (e.g., `"2:5"` zero-based, `"3:6"` one-based). -- Raises: `ValueError` if `pos < 0`. -- State: None. -- Thread: Safe. -- Import: `from ftllexengine.syntax.position import format_position` +- Import: `from ftllexengine.syntax import ASTVisitor` +- Purpose: subclass and override `visit_NodeType()` methods for analysis or linting +- Helpers: `visit()` dispatches by node type; `generic_visit()` traverses child nodes +- Depth: protected by `DepthGuard` +- Thread: Safe for independent visitor instances --- -## `get_line_content` - -### Signature -```python -def get_line_content(source: str, line_number: int, zero_based: bool = True) -> str: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `source` | `str` | Y | Complete FTL source text. | -| `line_number` | `int` | Y | Line to extract. | -| `zero_based` | `bool` | N | Line number is 0-based (default: True). | - -### Constraints -- Return: Content of requested line without trailing newline. -- Raises: `ValueError` if `line_number < 0` or `line_number >= len(lines)`. -- State: None. -- Thread: Safe. -- Import: `from ftllexengine.syntax.position import get_line_content` +## `ASTTransformer` ---- - -## `get_error_context` +Generic base visitor class for Fluent AST rewrite passes. ### Signature ```python -def get_error_context( - source: str, - pos: int, - context_lines: int = 2, - marker: str = "^", -) -> str: +class ASTTransformer(ASTVisitor[ASTNode | None | list[ASTNode]]): + def __init__(self, *, max_depth: int | None = None) -> None: ``` ### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `source` | `str` | Y | Complete FTL source text. | -| `pos` | `int` | Y | Character offset of error (0-indexed). | -| `context_lines` | `int` | N | Lines of context before/after error (default: 2). | -| `marker` | `str` | N | Character for error marker line (default: `"^"`). | +| Name | Req | Semantics | +|:-----|:----|:----------| +| `max_depth` | N | Traversal depth guard | ### Constraints -- Return: Multi-line string with context lines and marker pointing to error column. -- Raises: `ValueError` if `pos < 0`. -- State: None. -- Thread: Safe. -- EOF: If `pos` points past the last line, emits an empty line with marker. -- Import: `from ftllexengine.syntax.position import get_error_context` +- Import: `from ftllexengine.syntax import ASTTransformer` +- Purpose: return replacement nodes, `None`, or node lists while walking the AST +- Typical use: transforms, migrations, or source-to-source rewrites before `serialize()` +- Depth: protected by `DepthGuard` +- Thread: Safe for independent transformer instances --- diff --git a/docs/DOC_04_Introspection.md b/docs/DOC_04_Introspection.md new file mode 100644 index 00000000..0e094409 --- /dev/null +++ b/docs/DOC_04_Introspection.md @@ -0,0 +1,362 @@ +--- +afad: "3.5" +version: "0.163.0" +domain: INTROSPECTION +updated: "2026-04-22" +route: + keywords: [introspection, validate_message_variables, extract_variables, extract_references, ISO 4217, ISO 3166, get_currency, get_territory] + questions: ["how do I inspect a message's variables and references?", "which ISO lookup helpers exist?", "how do I validate message-variable schemas?", "which Babel-backed introspection helpers are public?"] +--- + +# Introspection Reference + +--- + +## `validate_message_variables` + +Function that checks a parsed message or term against an expected variable schema. + +### Signature +```python +def validate_message_variables( + message: Message | Term, + expected_variables: frozenset[str] | set[str], +) -> MessageVariableValidationResult: +``` + +### Constraints +- Return: `MessageVariableValidationResult` +- Purpose: boot-time or CI enforcement that messages declare exactly the variables the caller expects +- Babel: not required; operates on AST only + +--- + +## `introspect_message` + +Function that extracts variables, function calls, references, and selector presence from a `Message` or `Term`. + +### Signature +```python +def introspect_message( + message: Message | Term, + *, + use_cache: bool = True, +) -> MessageIntrospection: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `message` | Y | Message or term AST node | +| `use_cache` | N | Enable weak-reference memoization | + +### Constraints +- Return: `MessageIntrospection` +- Raises: `TypeError` when `message` is not a `Message` or `Term` +- Cache: weak-reference cache keyed by AST node identity +- Babel: not required; operates on AST only + +--- + +## `extract_variables` + +Function that returns the declared variable names for a `Message` or `Term`. + +### Signature +```python +def extract_variables(message: Message | Term) -> frozenset[str]: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `message` | Y | Message or term AST node | + +### Constraints +- Return: variable names without `$` prefixes +- Purpose: simplified convenience wrapper over `introspect_message()` +- Babel: not required; operates on AST only + +--- + +## `extract_references` + +Function that returns all referenced message ids and term ids from a `Message` or `Term`. + +### Signature +```python +def extract_references(entry: Message | Term) -> tuple[frozenset[str], frozenset[str]]: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `entry` | Y | Message or term AST node | + +### Constraints +- Return: `(message_refs, term_refs)` with attribute-qualified ids preserved +- Purpose: dependency analysis and impact assessment +- Babel: not required; operates on AST only + +--- + +## `extract_references_by_attribute` + +Function that returns message and term references grouped by source attribute. + +### Signature +```python +def extract_references_by_attribute( + entry: Message | Term, +) -> dict[str | None, tuple[frozenset[str], frozenset[str]]]: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `entry` | Y | Message or term AST node | + +### Constraints +- Return: mapping from attribute name, or `None` for the value pattern, to reference sets +- Purpose: attribute-granular dependency and cycle analysis +- Babel: not required; operates on AST only + +--- + +## `clear_introspection_cache` + +Function that clears the message-introspection weak-reference cache. + +### Signature +```python +def clear_introspection_cache() -> None: +``` + +### Constraints +- State: Mutates module cache state +- Purpose: testing, benchmarking, or manual memory-pressure relief +- Thread: Safe + +--- + +## `require_currency_code` + +Boundary validator for ISO 4217 currency codes. + +### Signature +```python +def require_currency_code(value: object, field_name: str) -> CurrencyCode: +``` + +### Constraints +- Return: canonical uppercase `CurrencyCode` +- Raises: `TypeError` for non-strings; `ValueError` for invalid codes; `BabelImportError` when Babel is unavailable +- Purpose: validated currency boundary input for formatting and domain models + +--- + +## `require_territory_code` + +Boundary validator for ISO 3166-1 alpha-2 territory codes. + +### Signature +```python +def require_territory_code(value: object, field_name: str) -> TerritoryCode: +``` + +### Constraints +- Return: canonical uppercase `TerritoryCode` +- Raises: `TypeError` for non-strings; `ValueError` for invalid codes; `BabelImportError` when Babel is unavailable +- Purpose: validated territory boundary input for locale-aware domain logic + +--- + +## `is_valid_currency_code` + +Type guard for ISO 4217 currency codes. + +### Signature +```python +def is_valid_currency_code(value: str) -> TypeIs[CurrencyCode]: +``` + +### Constraints +- Return: `True` only for known ISO 4217 codes +- Raises: `BabelImportError` when Babel is unavailable +- Purpose: runtime narrowing from `str` to `CurrencyCode` + +--- + +## `is_valid_territory_code` + +Type guard for ISO 3166-1 alpha-2 territory codes. + +### Signature +```python +def is_valid_territory_code(value: str) -> TypeIs[TerritoryCode]: +``` + +### Constraints +- Return: `True` only for known ISO 3166-1 alpha-2 codes +- Raises: `BabelImportError` when Babel is unavailable +- Purpose: runtime narrowing from `str` to `TerritoryCode` + +--- + +## `get_currency_decimal_digits` + +Function that returns the embedded ISO 4217 decimal precision for a currency code. + +### Signature +```python +def get_currency_decimal_digits(code: str) -> int | None: +``` + +### Constraints +- Return: decimal precision for a known code, otherwise `None` +- Babel: not required; uses the embedded ISO 4217 tables +- Purpose: authoritative ISO currency exponent lookup + +--- + +## `get_cldr_version` + +Function that reports the Babel CLDR data version. + +### Signature +```python +def get_cldr_version() -> str: +``` + +### Constraints +- Return: CLDR version string from Babel +- Raises: `BabelImportError` when Babel is unavailable + +--- + +## `get_territory` + +Function that looks up localized ISO 3166-1 territory metadata. + +### Signature +```python +def get_territory(code: str, locale: str = "en") -> TerritoryInfo | None: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `code` | Y | ISO alpha-2 territory code | +| `locale` | N | Localization locale | + +### Constraints +- Return: `TerritoryInfo`, or `None` for unknown codes +- Raises: `BabelImportError` when Babel is unavailable +- Cache: cached per normalized `(code, locale)` pair +- Thread: Safe + +--- + +## `get_currency` + +Function that looks up localized ISO 4217 currency metadata. + +### Signature +```python +def get_currency(code: str, locale: str = "en") -> CurrencyInfo | None: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `code` | Y | ISO currency code | +| `locale` | N | Localization locale | + +### Constraints +- Return: `CurrencyInfo`, or `None` for unknown codes +- Raises: `BabelImportError` when Babel is unavailable +- Cache: cached per normalized `(code, locale)` pair +- Thread: Safe + +--- + +## `list_territories` + +Function that lists all known territories for a locale. + +### Signature +```python +def list_territories(locale: str = "en") -> frozenset[TerritoryInfo]: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `locale` | N | Localization locale | + +### Constraints +- Return: `frozenset[TerritoryInfo]` +- Raises: `BabelImportError` when Babel is unavailable +- Cache: cached per normalized locale +- Thread: Safe + +--- + +## `list_currencies` + +Function that lists all known currencies for a locale. + +### Signature +```python +def list_currencies(locale: str = "en") -> frozenset[CurrencyInfo]: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `locale` | N | Localization locale | + +### Constraints +- Return: `frozenset[CurrencyInfo]` +- Raises: `BabelImportError` when Babel is unavailable +- Completeness: returns the full ISO 4217 set, falling back to English names when CLDR localization is missing +- Cache: cached per normalized locale +- Thread: Safe + +--- + +## `get_territory_currencies` + +Function that returns the active legal-tender ISO currency codes for a territory. + +### Signature +```python +def get_territory_currencies(territory: str) -> tuple[CurrencyCode, ...]: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `territory` | Y | ISO alpha-2 territory code | + +### Constraints +- Return: tuple of active ISO 4217 currency codes, or `()` for unknown territories +- Raises: `BabelImportError` when Babel is unavailable +- Cache: cached per normalized territory code +- Thread: Safe + +--- + +## `clear_iso_cache` + +Function that clears the ISO lookup caches used by territory and currency introspection. + +### Signature +```python +def clear_iso_cache() -> None: +``` + +### Constraints +- State: Mutates module cache state +- Purpose: testing, benchmarking, or manual memory-pressure relief +- Thread: Safe diff --git a/docs/DOC_04_Runtime.md b/docs/DOC_04_Runtime.md index 83151e77..1b9d344f 100644 --- a/docs/DOC_04_Runtime.md +++ b/docs/DOC_04_Runtime.md @@ -1,1615 +1,280 @@ --- -afad: "3.3" -version: "0.162.0" +afad: "3.5" +version: "0.163.0" domain: RUNTIME -updated: "2026-03-23" +updated: "2026-04-22" route: - keywords: [number_format, datetime_format, currency_format, make_fluent_number, FluentNumber, fluent_function, formatting, locale, IntegrityCache, CacheConfig, CacheStats, LocalizationCacheStats, CacheAuditLogEntry, WriteLogEntry, audit-log, NaN, idempotent_writes, content_hash, IntegrityCacheEntry, detect_cycles, entry_dependency_set, make_cycle_key, required_messages, clear_module_caches, component_filter] - questions: ["how to format numbers?", "how to format dates?", "how to format currency?", "what is FluentNumber?", "how do I construct a FluentNumber manually?", "how do I register a custom Fluent function?", "what is IntegrityCache?", "how to enable cache audit?", "how do I read the cache audit log?", "how does cache handle NaN?", "what is idempotent write?", "how does thundering herd work?", "how to detect dependency cycles?", "what is CacheStats?", "what fields does get_cache_stats return?", "what is required_messages in LocalizationBootConfig?", "how do I clear specific caches?"] + keywords: [CacheConfig, FunctionRegistry, fluent_function, number_format, currency_format, select_plural_category, clear_module_caches] + questions: ["how do I configure runtime formatting?", "how do custom functions and registries work?", "where are cache config and write-log entry types documented?"] --- # Runtime Reference ---- - -## `FluentNumber` - -Wrapper preserving numeric identity and precision through NUMBER() formatting. - -### Signature -```python -@dataclass(frozen=True, slots=True) -class FluentNumber: - value: int | Decimal - formatted: str - precision: int | None = None -``` - -### Parameters -| Field | Type | Req | Description | -|:------|:-----|:----|:------------| -| `value` | `int \| Decimal` | Y | Original numeric value for plural matching. `bool` is rejected — use `int(b)` at call site. | -| `formatted` | `str` | Y | Locale-formatted string for display. | -| `precision` | `int \| None` | N | Visible fraction digit count (CLDR v operand). Must be >= 0 when set. None if not specified. | - -### Properties -| Property | Type | Description | -|:---------|:-----|:------------| -| `decimal_value` | `Decimal` | Returns `value` as exact `Decimal`. `int` is coerced via `Decimal(value)` (no precision loss); `Decimal` is returned as the same object. | - -### Constraints -- Return: Frozen dataclass instance. -- Raises: `TypeError` if `value` is `bool` (no numeric localization semantics). `ValueError` if `precision < 0` (CLDR v operand is always non-negative). -- State: Immutable. Safe for caching. -- Thread: Safe. -- Usage: Returned by `number_format()`, `currency_format()`, `parse_fluent_number()`, and `make_fluent_number()`. Preserves numeric identity and precision metadata for select expressions. -- Str: `str(fluent_number)` returns `formatted` for display. -- Plural: Precision affects CLDR plural category selection. For example, "1.00" with precision=2 selects "other" category (v=2), not "one" (v=0). -- Import: `from ftllexengine.runtime import FluentNumber` -- Also available: `from ftllexengine import FluentNumber` - ---- - -## `make_fluent_number` - -Helper that constructs a `FluentNumber` from a domain numeric value. - -### Signature -```python -def make_fluent_number( - value: int | Decimal, - *, - formatted: str | None = None, -) -> FluentNumber: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `int \| Decimal` | Y | Canonical numeric value. | -| `formatted` | `str \| None` | N | Display string. Defaults to `str(value)`. | - -### Constraints -- Return: `FluentNumber` with inferred visible precision. -- Raises: `TypeError` if `value` is `bool` or not `int \| Decimal`. -- State: Pure. -- Thread: Safe. -- Precision: When `formatted` is provided, visible fraction digits are inferred from the rendered string when it still represents `value`; otherwise precision falls back to the value's own decimal places. -- Usage: Use when downstream code already has an `int` or `Decimal` and needs Fluent selector semantics without calling `NUMBER()` or `CURRENCY()`. -- Import: `from ftllexengine.runtime import make_fluent_number` - ---- - -## `number_format` - -### Signature -```python -def number_format( - value: int | Decimal, - locale_code: str = "en-US", - *, - minimum_fraction_digits: int = 0, - maximum_fraction_digits: int = 3, - use_grouping: bool = True, - pattern: str | None = None, - numbering_system: str = "latn", -) -> FluentNumber: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `int \| Decimal` | Y | Number to format. | -| `locale_code` | `str` | N | BCP 47 locale code. | -| `minimum_fraction_digits` | `int` | N | Minimum decimal places. | -| `maximum_fraction_digits` | `int` | N | Maximum decimal places. | -| `use_grouping` | `bool` | N | Use thousands separator. | -| `pattern` | `str \| None` | N | Custom Babel number pattern. | -| `numbering_system` | `str` | N | CLDR numbering system (e.g. `"arab"`, `"deva"`). Default: `"latn"`. | - -### Constraints -- Return: `FluentNumber` with formatted string, original numeric value, and precision metadata. -- Raises: `TypeError` / `ValueError` for invalid locale boundary values. -- Raises: Unknown but structurally valid locales fall back to en_US with a logged warning. -- State: None. -- Thread: Safe. -- Plural: Original value and precision preserved for correct CLDR plural category matching in select expressions. Precision parameter affects plural category selection (e.g., "1.00" with minimum_fraction_digits=2 selects "other" category due to v=2, not "one"). -- Bounds: Fraction digit parameters clamped to `MAX_FORMAT_DIGITS` (100). Values exceeding the limit are rejected with `ValueError`. -- Clamp: When `minimum_fraction_digits > maximum_fraction_digits`, `maximum` is silently raised to `minimum`. Matches JavaScript `Intl.NumberFormat` semantics: `NUMBER($n, minimumFractionDigits: 4)` yields 4 decimal places, not an error. -- Rounding: Delegates to Babel `format_decimal` with `decimal_quantization=True` (default). Uses `ROUND_HALF_EVEN` (IEEE 754 banker's rounding). Non-finite `Decimal` values (`Infinity`, `NaN`) are forwarded as-is; behaviour is locale-defined. - ---- - -## `datetime_format` - -### Signature -```python -def datetime_format( - value: date | datetime | str, - locale_code: str = "en-US", - *, - date_style: Literal["short", "medium", "long", "full"] = "medium", - time_style: Literal["short", "medium", "long", "full"] | None = None, - pattern: str | None = None, -) -> str: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `date \| datetime \| str` | Y | Date, datetime, or ISO 8601 string. Plain `date` is promoted to midnight `datetime` when `time_style` or `pattern` is set. | -| `locale_code` | `str` | N | BCP 47 locale code. | -| `date_style` | `Literal[...]` | N | Date format style. | -| `time_style` | `Literal[...] \| None` | N | Time format style. | -| `pattern` | `str \| None` | N | Custom Babel datetime pattern. | - -### Constraints -- Return: Formatted date/datetime string. -- Raises: `FrozenFluentError` (FORMATTING) for invalid ISO 8601 strings. -- Raises: `TypeError` / `ValueError` for invalid locale boundary values. -- Raises: Unknown but structurally valid locales fall back to en_US. -- State: None. -- Thread: Safe. - ---- - -## `currency_format` - -### Signature -```python -def currency_format( - value: int | Decimal, - locale_code: str = "en-US", - *, - currency: str, - currency_display: Literal["symbol", "code", "name"] = "symbol", - pattern: str | None = None, - use_grouping: bool = True, - currency_digits: bool = True, - numbering_system: str = "latn", -) -> FluentNumber: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `value` | `int \| Decimal` | Y | Monetary amount. | -| `locale_code` | `str` | N | BCP 47 locale code. | -| `currency` | `str` | Y | ISO 4217 currency code. | -| `currency_display` | `Literal[...]` | N | Display style: `"symbol"`, `"code"`, or `"name"`. | -| `pattern` | `str \| None` | N | Custom CLDR currency pattern. | -| `use_grouping` | `bool` | N | Use thousands separator. Default: `True`. | -| `currency_digits` | `bool` | N | Apply ISO 4217 decimal digit count (e.g. JPY=0, BHD=3). Ignored when `pattern` is set. Default: `True`. | -| `numbering_system` | `str` | N | CLDR numbering system (e.g. `"arab"`, `"deva"`). Default: `"latn"`. | - -### Constraints -- Return: `FluentNumber` with formatted currency string and computed precision. Enables CURRENCY results as selectors in plural/select expressions. -- Raises: `TypeError` / `ValueError` for invalid locale boundary values. -- Raises: Unknown but structurally valid locales fall back to en_US with a logged warning. -- State: None. -- Thread: Safe. -- Rounding: Delegates to Babel `format_currency` with `decimal_quantization=True` (default). Uses `ROUND_HALF_EVEN`. `currency_digits=True` (default) applies ISO 4217 digit counts before formatting. `currency_digits` is ignored when `pattern` is provided — the pattern controls precision. - ---- - -## `FunctionSignature` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class FunctionSignature: - python_name: str - ftl_name: str - param_mapping: tuple[tuple[str, str], ...] - callable: Callable[..., FluentValue] -``` - -### Parameters -| Field | Type | Description | -|:------|:-----|:------------| -| `python_name` | `str` | Python function name (snake_case). | -| `ftl_name` | `str` | FTL function name (UPPERCASE). | -| `param_mapping` | `tuple[tuple[str, str], ...]` | Immutable mapping of FTL camelCase to Python snake_case params. | -| `callable` | `Callable[..., FluentValue]` | The registered Python function. | - -### Constraints -- Return: Frozen dataclass instance. -- State: Fully immutable. param_mapping uses tuple for safe sharing across registries. -- Thread: Safe for reads. - ---- - -## `FunctionRegistry` - -### Signature -```python -class FunctionRegistry: - __slots__ = ("_frozen", "_functions") - - def __init__(self) -> None: ... - def register( - self, - func: Callable[..., FluentValue], - *, - ftl_name: str | None = None - ) -> None: ... - def call( - self, - ftl_name: str, - positional: Sequence[FluentValue], - named: Mapping[str, FluentValue], - ) -> FluentValue: ... - def has_function(self, ftl_name: str) -> bool: ... - def freeze(self) -> None: ... - @property - def frozen(self) -> bool: ... - def get_callable(self, ftl_name: str) -> Callable[..., FluentValue] | None: ... - def get_function_info(self, ftl_name: str) -> FunctionSignature | None: ... - def get_python_name(self, ftl_name: str) -> str | None: ... - def list_functions(self) -> list[str]: ... - def copy(self) -> FunctionRegistry: ... - def __iter__(self) -> Iterator[str]: ... - def __len__(self) -> int: ... - def __contains__(self, ftl_name: str) -> bool: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: Registry instance. -- State: Mutable until frozen. Shared registry is frozen after creation. -- Thread: Unsafe for concurrent register(). Safe for reads after freeze(). -- Memory: Uses __slots__ for reduced memory footprint. -- Freeze: Once frozen, register() raises TypeError. Use copy() for mutable clone. - ---- - -## `FunctionRegistry.get_callable` - -### Signature -```python -def get_callable(self, ftl_name: str) -> Callable[..., FluentValue] | None: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `ftl_name` | `str` | Y | FTL function name (e.g., "NUMBER"). | - -### Constraints -- Return: Registered callable, or None if not found. -- State: Read-only access. -- Thread: Safe for reads. - ---- - -## `FunctionRegistry.call` - -### Signature -```python -def call( - self, - ftl_name: str, - positional: Sequence[FluentValue], - named: Mapping[str, FluentValue], -) -> FluentValue: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `ftl_name` | `str` | Y | Function name from FTL (e.g., "NUMBER"). | -| `positional` | `Sequence[FluentValue]` | Y | Positional arguments. | -| `named` | `Mapping[str, FluentValue]` | Y | Named arguments from FTL (camelCase). | - -### Constraints -- Return: Function result as FluentValue. -- Raises: `FrozenFluentError` (category=REFERENCE) if function not found. -- Raises: `FrozenFluentError` (category=RESOLUTION) if function execution fails. -- State: Read-only access to registry. -- Thread: Safe for calls. - ---- - -## `FunctionRegistry.register` - -### Signature -```python -def register( - self, - func: Callable[..., FluentValue], - *, - ftl_name: str | None = None, - param_map: dict[str, str] | None = None, -) -> None: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `func` | `Callable[..., FluentValue]` | Y | Function to register. | -| `ftl_name` | `str \| None` | N | FTL name override (default: UPPERCASE of func name). | -| `param_map` | `dict[str, str] \| None` | N | Custom parameter mappings (overrides auto-generation). | - -### Constraints -- Return: None. -- Raises: `TypeError` if registry is frozen (via `freeze()` method) or if function marked with `inject_locale=True` has incompatible signature (requires ≥2 positional parameters for value and locale_code). -- Raises: `ValueError` if parameter names collide after underscore stripping (e.g., `_value` and `value`). -- State: Mutates registry. Validates function signature at registration (fail-fast). -- Thread: Unsafe. - ---- - -## `create_default_registry` - -### Signature -```python -def create_default_registry() -> FunctionRegistry: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: Fresh FunctionRegistry with NUMBER, DATETIME, CURRENCY registered. -- Raises: Never. -- State: Returns new isolated instance each call. -- Thread: Safe. -- Import: `from ftllexengine.runtime.functions import create_default_registry` - ---- - -## `get_shared_registry` - -### Signature -```python -def get_shared_registry() -> FunctionRegistry: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: Frozen FunctionRegistry singleton with NUMBER, DATETIME, CURRENCY. -- Raises: Never. -- State: Returns shared frozen singleton (lazy initialized). Calling `register()` raises `TypeError`. -- Thread: Safe for reads. Use `copy()` to get mutable registry for customization. -- Performance: Avoids repeated registry creation for multi-bundle applications. -- Import: `from ftllexengine.runtime.functions import get_shared_registry` - ---- - -## `FunctionCategory` - -### Signature -```python -class FunctionCategory(StrEnum): - FORMATTING = "formatting" - TEXT = "text" - CUSTOM = "custom" -``` - -### Parameters -| Value | Description | -|:------|:------------| -| `FORMATTING` | Number, date, currency formatting functions. | -| `TEXT` | Text manipulation functions. | -| `CUSTOM` | User-defined functions. | - -### Constraints -- StrEnum: Members ARE strings. `str(FunctionCategory.FORMATTING) == "formatting"` -- Import: `from ftllexengine.runtime.function_metadata import FunctionCategory` - ---- - -## `FunctionMetadata` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class FunctionMetadata: - python_name: str - ftl_name: str - requires_locale: bool - expected_positional_args: int = 1 - category: FunctionCategory = FunctionCategory.FORMATTING -``` - -### Parameters -| Field | Type | Req | Description | -|:------|:-----|:----|:------------| -| `python_name` | `str` | Y | Python function name (snake_case). | -| `ftl_name` | `str` | Y | FTL function name (UPPERCASE). | -| `requires_locale` | `bool` | Y | Whether function needs bundle locale injected. | -| `expected_positional_args` | `int` | N | Expected positional args from FTL (before locale). | -| `category` | `FunctionCategory` | N | Function category for documentation. | - -### Constraints -- Immutable: Frozen dataclass with slots. -- Thread: Safe. -- Import: `from ftllexengine.runtime.function_metadata import FunctionMetadata` - ---- - -## `BUILTIN_FUNCTIONS` - -### Signature -```python -BUILTIN_FUNCTIONS: dict[str, FunctionMetadata] = { - "NUMBER": FunctionMetadata(...), - "DATETIME": FunctionMetadata(...), - "CURRENCY": FunctionMetadata(...), -} -``` - -### Constraints -- Type: `dict[str, FunctionMetadata]` -- Contents: Metadata for NUMBER, DATETIME, CURRENCY. -- Read-only: Do not modify at runtime. -- Import: `from ftllexengine.runtime.function_metadata import BUILTIN_FUNCTIONS` - ---- - -## `is_builtin_with_locale_requirement` - -### Signature -```python -def is_builtin_with_locale_requirement(func: object) -> bool: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `func` | `object` | Y | Callable to check. | - -### Constraints -- Return: True if func has `_ftl_requires_locale = True`. -- Thread: Safe. -- Import: `from ftllexengine.runtime.functions import is_builtin_with_locale_requirement` - ---- - -## `FunctionRegistry.get_expected_positional_args` - -### Signature -```python -def get_expected_positional_args(self, ftl_name: str) -> int | None: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `ftl_name` | `str` | Y | FTL function name (e.g., "NUMBER"). | - -### Constraints -- Return: Expected positional arg count from `BUILTIN_FUNCTIONS` metadata, or None if not built-in. -- Thread: Safe. -- Access: Via `bundle.function_registry.get_expected_positional_args(name)` or registry instance. - ---- - -## `FunctionRegistry.should_inject_locale` - -### Signature -```python -def should_inject_locale(self, ftl_name: str) -> bool: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `ftl_name` | `str` | Y | FTL function name. | - -### Constraints -- Return: True if locale should be injected for this call. -- Logic: Checks callable's `_ftl_requires_locale` attribute set by `@fluent_function(inject_locale=True)`. -- Thread: Safe. -- Access: Via `bundle.function_registry.should_inject_locale(name)` or registry instance. - ---- - -## `fluent_function` - -### Signature -```python -@overload -def fluent_function[F: Callable[..., FluentValue]](func: F, *, inject_locale: bool = False) -> F: ... -@overload -def fluent_function[F: Callable[..., FluentValue]](func: None = None, *, inject_locale: bool = False) -> Callable[[F], F]: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `func` | `F \| None` | N | Function to decorate. | -| `inject_locale` | `bool` | N | If True, inject bundle locale as second argument. | - -### Constraints -- Return: Decorated function with Fluent metadata attributes. -- Thread: Safe. -- Import: `from ftllexengine.runtime import fluent_function` -- Also available: `from ftllexengine import fluent_function` - ---- - -## `select_plural_category` - -Function that selects the CLDR plural category for a number using Babel's CLDR data. - -### Signature -```python -def select_plural_category( - n: int | Decimal, - locale: str, - precision: int | None = None, - *, - ordinal: bool = False, -) -> str: -``` - -### Parameters -| Parameter | Type | Req | Semantics | -|:----------|:-----|:----|:----------| -| `n` | `int \| Decimal` | Y | Number to categorize | -| `locale` | `str` | Y | BCP-47 or POSIX locale code | -| `precision` | `int \| None` | N | Fraction digits for CLDR v operand | -| `ordinal` | `bool` | N | Use ordinal rules (1st/2nd/3rd); default `False` = cardinal | - -### Constraints -- Return: CLDR plural category (`"zero"`, `"one"`, `"two"`, `"few"`, `"many"`, `"other"`). -- Raises: `BabelImportError` if Babel not installed. Returns `"other"` on invalid locale. -- State: Read-only. -- Thread: Safe. -- Rounding: Uses `ROUND_HALF_EVEN` when `precision` is set, matching Babel's default rounding. -- Ordinal: When `ordinal=True`, uses `Locale.ordinal_form` (rank context: "1st", "2nd", "3rd") instead of `Locale.plural_form` (count context: "1 item", "2 items"). - ---- - -## FTL Function Name Mapping - -| FTL Name | Python Function | Parameter Mapping | -|:---------|:----------------|:------------------| -| `NUMBER` | `number_format` | minimumFractionDigits -> minimum_fraction_digits, useGrouping -> use_grouping, numberingSystem -> numbering_system | -| `DATETIME` | `datetime_format` | dateStyle -> date_style, timeStyle -> time_style | -| `CURRENCY` | `currency_format` | currencyDisplay -> currency_display, useGrouping -> use_grouping, currencyDigits -> currency_digits, numberingSystem -> numbering_system | - ---- - -## Custom Function Protocol - -### Signature (without locale injection) -```python -def CUSTOM_FUNCTION( - positional_arg: FluentValue, - /, - *, - keyword_arg: str = "default", -) -> FluentValue: -``` - -### Signature (with locale injection via `@fluent_function(inject_locale=True)`) -```python -@fluent_function(inject_locale=True) -def CUSTOM_FUNCTION( - positional_arg: FluentValue, - locale_code: str, - /, - *, - keyword_arg: str = "default", -) -> FluentValue: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| First positional | `FluentValue` | Y | Primary input value. | -| `locale_code` | `str` | Opt | Canonical lowercase underscore locale code (positional-only). Only present when `@fluent_function(inject_locale=True)` is applied. | -| Keyword args | `FluentValue` | N | Named options. | - -### Constraints -- Return: FluentValue (typically str; non-string values converted by resolver). -- Raises: Should not raise. Return fallback on error. -- State: Should be stateless. -- Thread: Should be safe. -- Locale: `locale_code` is NOT automatically injected. Use `@fluent_function(inject_locale=True)` to opt in. When injected, it is the bundle's canonical lowercase underscore `LocaleCode`. - ---- - -## `validate_resource` - -### Signature -```python -def validate_resource( - source: str, - *, - parser: FluentParserV1 | None = None, - known_messages: frozenset[str] | None = None, - known_terms: frozenset[str] | None = None, - known_msg_deps: Mapping[str, frozenset[str]] | None = None, - known_term_deps: Mapping[str, frozenset[str]] | None = None, -) -> ValidationResult: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `source` | `str` | Y | FTL file content. | -| `parser` | `FluentParserV1 \| None` | N | Parser instance (creates default if not provided). | -| `known_messages` | `frozenset[str] \| None` | N | Message IDs from other resources (cross-resource validation). | -| `known_terms` | `frozenset[str] \| None` | N | Term IDs from other resources (cross-resource validation). | -| `known_msg_deps` | `Mapping[str, frozenset[str]] \| None` | N | Dependency graph for known messages (prefixed: "msg:name", "term:name"). | -| `known_term_deps` | `Mapping[str, frozenset[str]] \| None` | N | Dependency graph for known terms (prefixed: "msg:name", "term:name"). | - -### Constraints -- Return: ValidationResult with errors, warnings, and semantic annotations. -- Validation Passes: (1) Syntax errors, (2) Structural issues + duplicate attributes + shadow conflicts, (3) Undefined refs, (4) Cycles (intra-resource and cross-resource), (5) Chain depth, (6) Semantic (Fluent spec E0001-E0013). -- Chain Depth: Warns if reference chains exceed MAX_DEPTH (would fail at runtime with MAX_DEPTH_EXCEEDED). -- Cross-Resource: References to `known_messages`/`known_terms` do not produce undefined warnings. Cycles detected across resource boundaries. Shadow warnings emitted when current resource redefines known entry. -- Duplicate Attributes: Emits VALIDATION_DUPLICATE_ATTRIBUTE (5107) for duplicate attribute IDs within entry. -- Shadow Warnings: Emits VALIDATION_SHADOW_WARNING (5108) when entry ID matches known bundle entry. -- Raises: `TypeError` if source is not a str. -- State: None (creates isolated parser if not provided). -- Thread: Safe. -- Import: `from ftllexengine.validation import validate_resource` - ---- - -## `ResolutionContext` - -### Signature -```python -@dataclass(slots=True) -class ResolutionContext: - stack: list[str] = field(default_factory=list) - _seen: set[str] = field(default_factory=set) - max_depth: int = MAX_DEPTH - max_expression_depth: int = MAX_DEPTH - max_expansion_size: int = DEFAULT_MAX_EXPANSION_SIZE - _total_chars: int = 0 - _expression_guard: DepthGuard = field(init=False) - - def __post_init__(self) -> None: ... - def push(self, key: str) -> None: ... - def pop(self) -> str: ... - def contains(self, key: str) -> bool: ... - def track_expansion(self, char_count: int) -> None: ... - @property - def total_chars(self) -> int: ... - @property - def expression_guard(self) -> DepthGuard: ... - @property - def expression_depth(self) -> int: ... - @property - def depth(self) -> int: ... - def is_depth_exceeded(self) -> bool: ... - def get_cycle_path(self, key: str) -> list[str]: ... -``` - -### Parameters -| Field | Type | Description | -|:------|:-----|:------------| -| `stack` | `list[str]` | Resolution stack for cycle path. | -| `_seen` | `set[str]` | O(1) membership check set. | -| `max_depth` | `int` | Maximum resolution depth (default: MAX_DEPTH=100). | -| `max_expression_depth` | `int` | Maximum expression depth (default: MAX_DEPTH=100). | -| `max_expansion_size` | `int` | Maximum total output characters (default: 1,000,000). Prevents Billion Laughs. | -| `_total_chars` | `int` | Running character count (internal; use `total_chars` property). | -| `_expression_guard` | `DepthGuard` | Internal depth guard (init=False). | - -### Constraints -- Thread: Safe (explicit parameter passing, no global state). -- Purpose: Internal resolver state; created fresh per resolution call in FluentBundle. -- Complexity: contains() is O(1) via _seen set. -- Expansion: track_expansion() raises EXPANSION_BUDGET_EXCEEDED when total_chars exceeds max_expansion_size. -- Constants: `MAX_DEPTH`, `DEFAULT_MAX_EXPANSION_SIZE` from `ftllexengine.constants` - ---- - -## `ResolutionContext.push` - -### Signature -```python -def push(self, key: str) -> None: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `key` | `str` | Y | Message key to push onto stack. | - -### Constraints -- Return: None. -- State: Mutates stack. - ---- - -## `ResolutionContext.pop` - -### Signature -```python -def pop(self) -> str: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: Removed message key. -- Raises: `DataIntegrityError` if stack is empty (underflow) or if `_stack` and `_seen` are out of sync (state corruption). Peek-before-mutate: neither structure is modified when corruption is detected. -- State: Mutates `_stack` and `_seen` atomically only after invariant verification. - ---- - -## `ResolutionContext.contains` - -### Signature -```python -def contains(self, key: str) -> bool: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `key` | `str` | Y | Message key to check. | - -### Constraints -- Return: True if key is in resolution stack (cycle detected). -- Complexity: O(1) via _seen set lookup. -- State: Read-only. - ---- - -## `ResolutionContext.total_chars` - -### Signature -```python -@property -def total_chars(self) -> int: -``` - -### Constraints -- Return: Running count of resolved characters. -- State: Read-only property over internal `_total_chars`. -- Usage: Preferred over direct `_total_chars` access for encapsulation. - ---- - -## `ResolutionContext.expression_guard` - -### Signature -```python -@property -def expression_guard(self) -> DepthGuard: -``` - -### Constraints -- Return: DepthGuard for expression depth tracking. -- Usage: Use as context manager (`with context.expression_guard:`). -- Raises: `FrozenFluentError` (category=RESOLUTION) when depth limit exceeded. -- State: Read-only property returning internal DepthGuard. - ---- - -## `ResolutionContext.expression_depth` - -### Signature -```python -@property -def expression_depth(self) -> int: -``` - -### Constraints -- Return: Current expression nesting depth. -- State: Read-only property (delegates to expression_guard.current_depth). - ---- - -## `ResolutionContext.depth` - -### Signature -```python -@property -def depth(self) -> int: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: Current resolution depth (stack length). -- State: Read-only. - ---- - -## `ResolutionContext.is_depth_exceeded` - -### Signature -```python -def is_depth_exceeded(self) -> bool: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: True if depth >= max_depth. -- State: Read-only. - ---- - -## `ResolutionContext.get_cycle_path` - -### Signature -```python -def get_cycle_path(self, key: str) -> list[str]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `key` | `str` | Y | Message key that caused cycle. | - -### Constraints -- Return: List of keys showing cycle path including triggering key. -- State: Read-only. - ---- - -## `FluentResolver` - -### Signature -```python -class FluentResolver: - __slots__ = ("function_registry", "locale", "messages", "terms", "use_isolating", "_max_nesting_depth") - - def __init__( - self, - locale: str, - messages: dict[str, Message], - terms: dict[str, Term], - *, - function_registry: FunctionRegistry, - use_isolating: bool = True, - max_nesting_depth: int = 100, - ) -> None: ... - - def resolve_message( - self, - message: Message, - args: Mapping[str, FluentValue] | None = None, - attribute: str | None = None, - *, - context: ResolutionContext | None = None, - ) -> tuple[str, tuple[FrozenFluentError, ...]]: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `locale` | `str` | Y | Locale code for plural selection. | -| `messages` | `dict[str, Message]` | Y | Message registry. | -| `terms` | `dict[str, Term]` | Y | Term registry. | -| `function_registry` | `FunctionRegistry` | Y | Function registry. | -| `use_isolating` | `bool` | N | Wrap values in Unicode bidi marks. | -| `max_nesting_depth` | `int` | N | Maximum resolution depth limit (default: 100). | - -### Constraints -- Return: Resolver instance. -- State: Immutable after construction. -- Thread: Safe (uses explicit context). -- Internal: `FluentResolver` is an implementation detail of `FluentBundle`; callers do not instantiate it directly. - ---- - -## `FluentResolver.resolve_message` - -### Signature -```python -def resolve_message( - self, - message: Message, - args: Mapping[str, FluentValue] | None = None, - attribute: str | None = None, - *, - context: ResolutionContext | None = None, -) -> tuple[str, tuple[FrozenFluentError, ...]]: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message` | `Message` | Y | Message AST. | -| `args` | `Mapping[str, FluentValue] \| None` | N | Variable arguments. | -| `attribute` | `str \| None` | N | Attribute name to resolve. | -| `context` | `ResolutionContext \| None` | N | Resolution context (creates fresh if None). | - -### Constraints -- Return: Tuple of (formatted_string, errors). -- Raises: Never. Collects errors in tuple. -- State: Read-only. -- Thread: Safe. -- Duplicate Attributes: When message has duplicate attributes with same name, last attribute wins (per Fluent spec). - ---- - -## Module Constants - -### `DEFAULT_CACHE_SIZE` - -```python -DEFAULT_CACHE_SIZE: int = 1000 -``` - -| Attribute | Value | -|:----------|:------| -| Type | `int` | -| Value | 1000 | -| Location | `ftllexengine.constants` | - -- Purpose: Default maximum cache entries for FluentBundle format results. -- Usage: Referenced by `FluentBundle.__init__`, `create()`, `for_system_locale()`. -- Import: `from ftllexengine.constants import DEFAULT_CACHE_SIZE` - ---- - -### `DEFAULT_MAX_EXPANSION_SIZE` +This reference covers cache configuration, function registries, built-in formatters, plural selection, and cache/audit entry types. +Runtime-adjacent utilities, validators, and package metadata constants are documented in [DOC_04_RuntimeUtilities.md](DOC_04_RuntimeUtilities.md). -```python -DEFAULT_MAX_EXPANSION_SIZE: int = 1_000_000 -``` - -| Attribute | Value | -|:----------|:------| -| Type | `int` | -| Value | 1,000,000 | -| Location | `ftllexengine.constants` | - -- Purpose: Maximum total characters produced during message resolution. Prevents Billion Laughs attacks. -- Usage: Referenced by `ResolutionContext`, `FluentResolver`, `FluentBundle`. -- Import: `from ftllexengine.constants import DEFAULT_MAX_EXPANSION_SIZE` - ---- - -### `MAX_CURRENCY_CACHE_SIZE` - -```python -MAX_CURRENCY_CACHE_SIZE: int = 300 -``` - -| Attribute | Value | -|:----------|:------| -| Type | `int` | -| Value | 300 | -| Location | `ftllexengine.constants` | - -- Purpose: Maximum LRU cache entries for individual currency lookups. -- Usage: `_get_currency_impl` in `ftllexengine.introspection.iso`. -- Import: `from ftllexengine.constants import MAX_CURRENCY_CACHE_SIZE` - ---- - -### `UNICODE_FSI` / `UNICODE_PDI` - -```python -UNICODE_FSI: str = "\u2068" # U+2068 FIRST STRONG ISOLATE -UNICODE_PDI: str = "\u2069" # U+2069 POP DIRECTIONAL ISOLATE -``` - -| Attribute | Value | -|:----------|:------| -| Type | `str` | -| Location | `ftllexengine.runtime.resolver` | - -- Purpose: Unicode bidirectional isolation characters per Unicode TR9. -- Usage: Wraps interpolated values when `use_isolating=True`. - ---- - -### `MAX_DEPTH` - -```python -MAX_DEPTH: int = 100 -``` - -| Attribute | Value | -|:----------|:------| -| Type | `int` | -| Value | 100 | -| Location | `ftllexengine.constants` | -| Re-exported | `ftllexengine.core.depth_guard` | - -- Purpose: Unified maximum depth for all recursion protection. -- Usage: Message reference chains, expression nesting, serialization, validation. -- Import: `from ftllexengine.constants import MAX_DEPTH` - ---- - -### `MAX_FORMAT_DIGITS` - -```python -MAX_FORMAT_DIGITS: int = 100 -``` - -| Attribute | Value | -|:----------|:------| -| Type | `int` | -| Value | 100 | -| Location | `ftllexengine.constants` | - -- Purpose: Upper bound on `minimum_fraction_digits` and `maximum_fraction_digits` in `number_format()` and `currency_format()`. -- Usage: Values exceeding this limit are clamped to prevent excessive memory allocation during formatting. -- Security: Prevents DoS via pathological fraction digit requests. -- Import: `from ftllexengine.constants import MAX_FORMAT_DIGITS` - ---- - -## `DepthGuard` - -### Signature -```python -@dataclass(slots=True) -class DepthGuard: - max_depth: int = MAX_DEPTH - current_depth: int = field(default=0, init=False) - - def __enter__(self) -> DepthGuard: ... - def __exit__(self, exc_type, exc_val, exc_tb) -> None: ... - @property - def depth(self) -> int: ... - def is_exceeded(self) -> bool: ... - def check(self) -> None: ... - def increment(self) -> None: ... - def decrement(self) -> None: ... - def reset(self) -> None: ... -``` - -### Parameters -| Field | Type | Description | -|:------|:-----|:------------| -| `max_depth` | `int` | Maximum allowed depth. | -| `current_depth` | `int` | Current recursion depth. | - -### Constraints -- Thread: Safe (explicit instance state, reentrant). -- Usage: Context manager or manual increment/decrement. -- Raises: `FrozenFluentError` (category=RESOLUTION) when depth limit exceeded. -- Behavior: `__enter__` validates limit BEFORE incrementing; prevents state corruption on exception. -- Import: `from ftllexengine.core.depth_guard import DepthGuard` - ---- - -## `GlobalDepthGuard` +## `CacheConfig` -Global depth tracking across format_pattern calls using `contextvars`. +Dataclass that configures optional format-result caching. ### Signature ```python -class GlobalDepthGuard: - __slots__ = ("_max_depth", "_token") - - def __init__(self, max_depth: int = MAX_DEPTH) -> None: ... - def __enter__(self) -> GlobalDepthGuard: ... - def __exit__(self, exc_type, exc_val, exc_tb) -> None: ... +@dataclass(frozen=True, slots=True) +class CacheConfig: + size: int = 1000 + write_once: bool = False + integrity_strict: bool = True + enable_audit: bool = False + max_audit_entries: int = 10000 + max_entry_weight: int = 10000 + max_errors_per_entry: int = 50 ``` -### Parameters -| Field | Type | Description | -|:------|:-----|:------------| -| `max_depth` | `int` | Maximum allowed global depth (default: MAX_DEPTH=100). | - ### Constraints -- Thread: Safe (uses `contextvars.ContextVar` for async-safe per-task state). -- Purpose: Prevents depth limit bypass via custom function callbacks. -- Security: Custom functions calling `bundle.format_pattern()` cannot bypass limits. -- Raises: `FrozenFluentError` (category=RESOLUTION) when global depth limit exceeded. -- Internal: Used automatically by `FluentResolver.resolve_message()`. - ---- +- Purpose: Single cache configuration object for bundle/localization runtime +- State: Immutable +- Thread: Safe -## `RWLock` - -Readers-writer lock with writer preference for high-concurrency FluentBundle access. - -### Signature -```python -class RWLock: - def __init__(self) -> None: ... - def read(self, timeout: float | None = None) -> Generator[None, None, None]: ... - def write(self, timeout: float | None = None) -> Generator[None, None, None]: ... -``` - -### Constraints -- Return: RWLock instance. -- State: Tracks active readers, active writer (as `int` thread identity), waiting writers, reader thread counts. -- Thread: Safe for all operations. Read lock is reentrant (same thread can reacquire multiple times). Write lock is non-reentrant. -- Purpose: Allows multiple concurrent readers OR single exclusive writer. -- Timeout: Optional `timeout` parameter on `read()` and `write()`. `None` (default) waits indefinitely; `0.0` is non-blocking; positive float is deadline in seconds. Raises `TimeoutError` on expiry, `ValueError` if negative. -- Writer Preference: Writers are prioritized when waiting to prevent reader starvation. -- Upgrade Limitation: Read-to-write lock upgrades are prohibited (raises RuntimeError). -- Downgrade Limitation: Write-to-read lock downgrade is prohibited (raises RuntimeError). -- Reentry Limitation: Write lock cannot be reacquired by the same thread (raises RuntimeError). -- Usage: FluentBundle and FluentLocalization use RWLock internally for concurrent read-heavy operations; downstream code can use the same public primitive. -- Import: `from ftllexengine.runtime import RWLock` --- -## `RWLock.read` +## `FunctionRegistry` -Context manager acquiring read lock for shared access. +Class that maps Python callables onto FTL function names and argument conventions. ### Signature ```python -@contextmanager -def read(self, timeout: float | None = None) -> Generator[None, None, None]: +class FunctionRegistry: + def __init__(self) -> None: ``` -### Parameters -| Parameter | Type | Req | Semantics | -|:----------|:-----|:----|:----------| -| `timeout` | `float \| None` | N | Max seconds to wait. `None` = indefinite, `0.0` = non-blocking | - ### Constraints -- Return: Context manager yielding None. -- State: Increments active readers count. Reentrant for same thread. -- Thread: Safe. Multiple threads can hold read locks concurrently. -- Blocks: When writer is active or writers are waiting (writer preference). -- Raises: `RuntimeError` if thread holds write lock (downgrade prohibited). `TimeoutError` if lock not acquired within timeout. `ValueError` if timeout negative. -- Usage: `with lock.read(): # read data` or `with lock.read(timeout=1.0): # bounded wait` +- Purpose: Register, freeze, copy, and dispatch custom functions +- State: Mutable until `freeze()` +- Thread: Safe for normal runtime use after registration +- Main methods: `register()`, `call()`, `get_callable()`, `list_functions()`, `copy()` --- -## `RWLock.write` +## `fluent_function` -Context manager acquiring write lock for exclusive access. +Decorator that attaches Fluent-specific metadata to a Python callable. ### Signature ```python -@contextmanager -def write(self, timeout: float | None = None) -> Generator[None, None, None]: +def fluent_function( + func: F | None = None, + *, + inject_locale: bool = False, +) -> F | Callable[[F], F]: ``` ### Parameters -| Parameter | Type | Req | Semantics | -|:----------|:-----|:----|:----------| -| `timeout` | `float \| None` | N | Max seconds to wait. `None` = indefinite, `0.0` = non-blocking | - -### Constraints -- Return: Context manager yielding None. -- State: Sets active writer. Blocks all other readers and writers. Non-reentrant: raises `RuntimeError` if called while already holding write lock. -- Thread: Safe. Only one thread can hold write lock at a time. -- Blocks: Until all readers release their locks. -- Raises: `RuntimeError` if thread attempts read-to-write lock upgrade. `RuntimeError` if thread already holds the write lock. `TimeoutError` if lock not acquired within timeout. `ValueError` if timeout negative. -- Usage: `with lock.write(): # modify data` or `with lock.write(timeout=2.0): # bounded wait` - ---- - -## `RWLock.reader_count` - -Read-only snapshot of the number of threads currently holding read locks. - -### Signature -```python -@property -def reader_count(self) -> int: -``` +| Name | Req | Semantics | +|:-----|:----|:----------| +| `func` | N | Callable to decorate | +| `inject_locale` | N | Append locale argument | ### Constraints -- Return: Non-negative integer; 0 when no readers are active. -- State: Thread-safe point-in-time snapshot. A thread holding a reentrant read lock (acquired multiple times) counts as one reader. -- Thread: Safe. -- Usage: Production monitoring for read lock contention. +- Purpose: Mark custom functions for locale injection behavior +- State: Pure decorator +- Thread: Safe --- -## `RWLock.writer_active` +## `create_default_registry` -Read-only flag indicating whether any thread currently holds the write lock. +Function that returns a mutable registry seeded with built-in functions. ### Signature ```python -@property -def writer_active(self) -> bool: +def create_default_registry() -> FunctionRegistry: ``` ### Constraints -- Return: True if write lock is held, False otherwise. -- State: Thread-safe point-in-time snapshot. -- Thread: Safe. -- Usage: Production monitoring to detect write lock contention or stalled writers. +- Return: New mutable registry +- State: Fresh object on each call --- -## `RWLock.writers_waiting` +## `get_shared_registry` -Read-only snapshot of the number of threads currently blocked waiting for the write lock. +Function that returns the shared frozen registry of built-in functions. ### Signature ```python -@property -def writers_waiting(self) -> int: +def get_shared_registry() -> FunctionRegistry: ``` ### Constraints -- Return: Non-negative integer; 0 when no writers are waiting. -- State: Thread-safe point-in-time snapshot. A non-zero value means new readers are also being blocked (writer preference). -- Thread: Safe. -- Usage: Diagnosing write starvation or identifying write-heavy contention patterns. - ---- - -## Analysis Functions +- Return: Shared frozen registry +- State: Shared singleton-style object --- -## `entry_dependency_set` +## `number_format` -Function that builds a namespace-prefixed dependency frozenset from reference sets. +Function that formats a numeric value as `FluentNumber`. ### Signature ```python -def entry_dependency_set( - message_refs: frozenset[str], - term_refs: frozenset[str], -) -> frozenset[str]: +def number_format( + value: int | Decimal, + locale_code: str = "en-US", + *, + minimum_fraction_digits: int = 0, + maximum_fraction_digits: int = 3, + use_grouping: bool = True, + pattern: str | None = None, + numbering_system: str = "latn", +) -> FluentNumber: ``` -### Parameters -| Parameter | Type | Req | Semantics | -|:----------|:-----|:----|:----------| -| `message_refs` | `frozenset[str]` | Y | Message IDs referenced by entry | -| `term_refs` | `frozenset[str]` | Y | Term IDs referenced by entry | - ### Constraints -- Return: Frozenset of prefixed dependency keys (e.g., `frozenset({"msg:welcome", "term:brand"})`). -- Raises: Never. -- State: None (pure function). -- Thread: Safe. -- Namespace: `msg:` prefix for message refs, `term:` prefix for term refs. Prevents collisions between same-name messages and terms. -- Complexity: O(N) where N = total references. -- Import: `from ftllexengine.analysis import entry_dependency_set` - -### Example -```python -deps = entry_dependency_set(frozenset({"greeting"}), frozenset({"brand"})) -# deps: frozenset({"msg:greeting", "term:brand"}) -``` +- Return: `FluentNumber` +- Raises: Locale/value boundary errors +- State: Pure +- Thread: Safe --- -## `make_cycle_key` +## `datetime_format` -Function that creates a canonical display string from a cycle path. +Function that formats a date or datetime value for a locale. ### Signature ```python -def make_cycle_key(cycle: Sequence[str]) -> str: +def datetime_format( + value: date | datetime | str, + locale_code: str = "en-US", + *, + date_style: Literal["short", "medium", "long", "full"] = "medium", + time_style: Literal["short", "medium", "long", "full"] | None = None, + pattern: str | None = None, +) -> str: ``` -### Parameters -| Parameter | Type | Req | Semantics | -|:----------|:-----|:----|:----------| -| `cycle` | `Sequence[str]` | Y | Cycle path with closing repeat | - ### Constraints -- Return: Canonical arrow-separated string (e.g., `"A -> B -> C -> A"`). Empty string for empty input. -- Raises: Never. -- State: None (pure function). -- Thread: Safe. -- Canonical: Rotates cycle to start with lexicographically smallest node. All rotations of the same cycle produce identical keys. -- Import: `from ftllexengine.analysis import make_cycle_key` - -### Example -```python -key = make_cycle_key(["B", "C", "A", "B"]) -# key: "A -> B -> C -> A" -``` +- Return: Formatted string +- Raises: Locale/value boundary errors +- State: Pure +- Thread: Safe --- -## `detect_cycles` +## `currency_format` -Function that detects all cycles in a dependency graph using iterative DFS. +Function that formats a monetary value as `FluentNumber`. ### Signature ```python -def detect_cycles(dependencies: Mapping[str, set[str]]) -> list[list[str]]: +def currency_format( + value: int | Decimal, + locale_code: str = "en-US", + *, + currency: str, + currency_display: Literal["symbol", "code", "name"] = "symbol", + pattern: str | None = None, + use_grouping: bool = True, + currency_digits: bool = True, + numbering_system: str = "latn", +) -> FluentNumber: ``` -### Parameters -| Parameter | Type | Req | Semantics | -|:----------|:-----|:----|:----------| -| `dependencies` | `Mapping[str, set[str]]` | Y | Node ID to set of referenced node IDs | - ### Constraints -- Return: List of cycles where each cycle is a list of node IDs forming the cycle path (closed: last element repeats first). Empty list if no cycles detected. Cycles are deduplicated via canonical tuple form. -- Raises: Never. -- State: None (pure function). -- Thread: Safe. -- Algorithm: Iterative DFS with explicit stack. Prevents RecursionError on deep graphs (>1000 nodes in linear chain). -- Complexity: O(V + E) time, O(V) space where V = nodes, E = edges. -- Security: Uses iterative DFS to prevent stack overflow attacks via deeply nested dependency chains in untrusted FTL resources. -- Import: `from ftllexengine.analysis import detect_cycles` - -### Example -```python -deps = {"a": {"b"}, "b": {"c"}, "c": {"a"}} -cycles = detect_cycles(deps) -# cycles: [['a', 'b', 'c', 'a']] (canonical rotation) -``` +- Return: `FluentNumber` +- Raises: Locale/value boundary errors +- State: Pure +- Thread: Safe --- -## `IntegrityCache` +## `select_plural_category` -Format result cache with cryptographic integrity verification for financial-grade applications. +Function that resolves a CLDR plural category for a locale-aware number. ### Signature ```python -class IntegrityCache: - def __init__( - self, - maxsize: int = 1000, - max_entry_weight: int = 10000, - max_errors_per_entry: int = 50, - *, - write_once: bool = False, - strict: bool = True, - enable_audit: bool = False, - max_audit_entries: int = 10000, - ) -> None: ... +def select_plural_category( + n: int | Decimal, + locale: str, + precision: int | None = None, + *, + ordinal: bool = False, +) -> str: ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `maxsize` | `int` | N | Maximum cache entries (LRU eviction). | -| `max_entry_weight` | `int` | N | Maximum memory weight per entry in approximate bytes. | -| `max_errors_per_entry` | `int` | N | Maximum errors stored per entry. | -| `write_once` | `bool` | N | Reject updates to existing keys (data race prevention). | -| `strict` | `bool` | N | Raise on corruption/write conflicts. Sourced from `CacheConfig.integrity_strict`. | -| `enable_audit` | `bool` | N | Maintain operation history for compliance. | -| `max_audit_entries` | `int` | N | Maximum audit log entries before oldest eviction. | - ### Constraints -- Return: IntegrityCache instance. -- State: Mutable cache with integrity verification. -- Thread: Safe (internal locking). -- Integrity: Each entry has BLAKE2b-128 checksum computed at creation and verified on retrieval. -- Corruption: Corrupted entries are evicted silently (strict=False) or raise CacheCorruptionError (strict=True). -- Key Normalization: Cache keys are normalized to prevent hash collisions between values that format differently: - - NaN: `Decimal("NaN")` normalized to `"__NaN__"` (IEEE 754 NaN != NaN; prevents unretrievable cache entries). - - Decimal: Uses `str(value)` to preserve scale (`Decimal("1.0")` vs `Decimal("1.00")` are distinct for CLDR plural rules). - - Datetime: Includes isoformat and tzinfo string; same-UTC-instant different-timezone datetimes produce distinct keys. - - Collections: Supports Sequence/Mapping ABCs (UserList, ChainMap) in addition to list/tuple/dict. -- Idempotent Writes: When `write_once=True`, concurrent writes with identical content are treated as idempotent success (not conflict). Content comparison uses `IntegrityCacheEntry.content_hash` which excludes metadata (created_at, sequence). -- Import: `from ftllexengine.runtime.cache import IntegrityCache` -- Independence: `strict` controls cache corruption response independently of `FluentBundle.strict` (formatting behavior). Sourced from `CacheConfig.integrity_strict`. -- Access: Typically accessed via FluentBundle cache parameters, not directly constructed. +- Return: CLDR plural category string +- State: Pure +- Thread: Safe --- -## `IntegrityCache.get` +## `make_fluent_number` -Retrieve cached format result with integrity verification. +Function that constructs a `FluentNumber` from an `int` or `Decimal`. ### Signature ```python -def get( - self, - message_id: str, - args: Mapping[str, FluentValue] | None, - attribute: str | None, - locale_code: str, - use_isolating: bool, -) -> IntegrityCacheEntry | None: +def make_fluent_number(value: int | Decimal, *, formatted: str | None = None) -> FluentNumber: ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message_id` | `str` | Y | Message identifier. | -| `args` | `Mapping[str, FluentValue] \| None` | Y | Message arguments (may contain unhashable values). | -| `attribute` | `str \| None` | Y | Attribute name. | -| `locale_code` | `str` | Y | Locale code. | -| `use_isolating` | `bool` | Y | Whether Unicode isolation marks are used. | - ### Constraints -- Return: IntegrityCacheEntry if found and valid, None otherwise. -- Raises: `CacheCorruptionError` if strict=True and entry fails verification. -- State: Read (may evict corrupted entries). -- Thread: Safe. +- Return: `FluentNumber` +- State: Pure +- Thread: Safe --- -## `IntegrityCache.put` +## `clear_module_caches` -Store format result with integrity checksum. +Function that clears selected module-level caches or all of them. ### Signature ```python -def put( - self, - message_id: str, - args: Mapping[str, FluentValue] | None, - attribute: str | None, - locale_code: str, - use_isolating: bool, - formatted: str, - errors: tuple[FrozenFluentError, ...], -) -> None: +def clear_module_caches(components: frozenset[str] | None = None) -> None: ``` ### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message_id` | `str` | Y | Message identifier. | -| `args` | `Mapping[str, FluentValue] \| None` | Y | Message arguments (may contain unhashable values). | -| `attribute` | `str \| None` | Y | Attribute name. | -| `locale_code` | `str` | Y | Locale code. | -| `use_isolating` | `bool` | Y | Whether Unicode isolation marks are used. | -| `formatted` | `str` | Y | Formatted message result. | -| `errors` | `tuple[FrozenFluentError, ...]` | Y | Frozen errors from resolution. | - -### Constraints -- Return: None. -- Raises: `WriteConflictError` if write_once=True and strict=True and key exists with different content. -- Idempotent: When write_once=True, identical content (same formatted+errors) silently succeeds without error (thundering herd safe). -- State: Mutates cache. -- Thread: Safe. -- Skip: Entry not stored if weight exceeds max_entry_weight or error count exceeds max_errors_per_entry. - ---- - -## `IntegrityCache.get_stats` - -Get cache statistics including security parameters. - -### Signature -```python -def get_stats(self) -> CacheStats: -``` - -### Constraints -- Return: `CacheStats` TypedDict snapshot with 19 precisely-typed fields. See `CacheStats`. -- State: Read-only. -- Thread: Safe. - ---- - -## `CacheStats` - -TypedDict representing a cache statistics snapshot returned by `IntegrityCache.get_stats`. - -### Signature -```python -class CacheStats(TypedDict): - size: int - maxsize: int - max_entry_weight: int - max_errors_per_entry: int - hits: int - misses: int - hit_rate: float - unhashable_skips: int - oversize_skips: int - error_bloat_skips: int - corruption_detected: int - idempotent_writes: int - write_once_conflicts: int - combined_weight_skips: int - sequence: int - write_once: bool - strict: bool - audit_enabled: bool - audit_entries: int -``` - -### Constraints -- Purpose: Precise per-field types for cache monitoring (hits/misses → int, hit_rate → float, write_once/strict/audit_enabled → bool). -- Corruption: `corruption_detected` is the primary financial-grade alert field; non-zero requires investigation. -- Conflicts: `write_once_conflicts` counts PUT attempts on an existing key when `write_once=True`; non-zero may indicate bugs in calling code. -- Weight: `combined_weight_skips` counts PUT rejections where the entry weight (size + error count) exceeds `max_entry_weight`; non-zero indicates entries too large for the configured cache policy. -- Import: `from ftllexengine.runtime.cache import CacheStats` -- Extension: `LocalizationCacheStats(CacheStats)` adds `bundle_count: int` for multi-bundle aggregates. - ---- - -## `IntegrityCache.idempotent_writes` - -Property returning count of benign concurrent writes with identical content. - -### Signature -```python -@property -def idempotent_writes(self) -> int: -``` +| Name | Req | Semantics | +|:-----|:----|:----------| +| `components` | N | Specific cache components | ### Constraints -- Return: Number of writes detected as idempotent (identical content already cached). -- State: Read-only. -- Thread: Safe. -- Counter: Reset to 0 when cache is cleared. +- State: Mutates module cache state +- Thread: Safe --- -## `IntegrityCacheEntry` +## `CacheAuditLogEntry` -Immutable cache entry with cryptographic integrity metadata. +Dataclass representing one immutable cache audit-log record. ### Signature ```python @dataclass(frozen=True, slots=True) -class IntegrityCacheEntry: - formatted: str - errors: tuple[FrozenFluentError, ...] - checksum: bytes - created_at: float +class CacheAuditLogEntry: + operation: str + key_hash: str + timestamp: float sequence: int - key_hash: bytes - content_hash: bytes # field(init=False) — computed in __post_init__, not an __init__ parameter -``` - -### Constraints -- Return: Frozen dataclass instance. -- Immutable: All fields are read-only after creation. -- Init: `key_hash` is a required `__init__` parameter (BLAKE2b-8 hash of the cache key, for privacy-preserving audit). `content_hash` is NOT an `__init__` parameter; it is computed in `__post_init__` as a BLAKE2b-128 hash of `(formatted, errors)`. -- Checksum: BLAKE2b-128 hash of all init fields (content + metadata) for complete audit trail integrity. -- Import: `from ftllexengine.runtime.cache import IntegrityCacheEntry` - ---- - -## `IntegrityCacheEntry.content_hash` - -Computed field holding content-only hash for idempotent write detection. - -### Signature -```python -content_hash: bytes # field(init=False, repr=False, compare=False, hash=False) -``` - -### Constraints -- Type: `bytes` — dataclass `field(init=False)`, set by `__post_init__` via `object.__setattr__()`. -- Value: 16-byte BLAKE2b digest of `(formatted, errors)` only. -- Excludes: Does NOT include metadata (`created_at`, `sequence`, `key_hash`). -- Purpose: Two entries with identical content have identical `content_hash` regardless of when they were created. -- Usage: Used by `IntegrityCache.put()` for idempotent write detection in thundering herd scenarios. -- Note: Because `repr=False, compare=False, hash=False`, this field is excluded from `__repr__`, `__eq__`, and `__hash__`; it participates only in integrity lookups. - ---- - -## `IntegrityCacheEntry.verify` - -Method to verify entry integrity. - -### Signature -```python -def verify(self) -> bool: + checksum_hex: str + wall_time_unix: float ``` ### Constraints -- Return: True if checksum matches recomputed value AND all errors verify, False otherwise. -- Thread: Safe (read-only). -- Recursive: Verifies each FrozenFluentError's integrity if verify_integrity() method available. +- Purpose: Public audit-log payload +- State: Immutable +- Thread: Safe --- -## `CacheAuditLogEntry` +## `WriteLogEntry` -Public alias for the immutable audit log entry used by cache-observability APIs. +Dataclass alias used by the runtime cache implementation for audit-log records. ### Signature ```python -CacheAuditLogEntry = WriteLogEntry -``` - -### Underlying Dataclass -```python @dataclass(frozen=True, slots=True) class WriteLogEntry: operation: str @@ -1620,145 +285,9 @@ class WriteLogEntry: wall_time_unix: float ``` -### Parameters -| Field | Type | Description | -|:------|:-----|:------------| -| `operation` | `str` | Operation type (GET, PUT, HIT, MISS, EVICT, CORRUPTION). | -| `key_hash` | `str` | BLAKE2b hash of cache key (privacy-preserving). | -| `timestamp` | `float` | Monotonic clock timestamp of operation. Preserves ordering; not wall-clock aligned. | -| `sequence` | `int` | Cache entry sequence number (for PUT operations). | -| `checksum_hex` | `str` | Hex representation of entry checksum (for tracing). | -| `wall_time_unix` | `float` | Unix wall-clock timestamp (`time.time()`). Enables cross-system log correlation. | - -### Constraints -- Immutable: Frozen dataclass with slots. -- Purpose: Post-mortem analysis and debugging when audit logging enabled. -- Facade: Returned by `FluentBundle.get_cache_audit_log()` and `FluentLocalization.get_cache_audit_log()`. -- Import: `from ftllexengine.runtime import CacheAuditLogEntry` -- Identity: `CacheAuditLogEntry is WriteLogEntry` - ---- - -## `IntegrityCache.get_audit_log` - -Get audit log entries. - -### Signature -```python -def get_audit_log(self) -> tuple[WriteLogEntry, ...]: -``` - -### Constraints -- Return: Tuple of audit-log entry instances (empty if audit disabled). -- Prefer: Use bundle/localization facade accessors unless managing `IntegrityCache` directly. -- State: Read-only. -- Thread: Safe. - ---- - -## `IntegrityCache.clear` - -Clear all cached entries. Observability metrics are preserved. - -### Signature -```python -def clear(self) -> None: -``` - -### Constraints -- Return: None. -- State: Removes all cached entries from the LRU store. All counters (hits, misses, unhashable_skips, oversize_skips, error_bloat_skips, corruption_detected, idempotent_writes, write_once_conflicts, combined_weight_skips) and sequence number accumulate across `clear()` calls; they are never reset. Audit log is NOT cleared (historical record). -- Thread: Safe. -- Usage: Called automatically by FluentBundle on `add_resource()` or `add_function()`. - ---- - -## `LocalizationBootConfig` - -Frozen dataclass providing a canonical strict-mode boot API for `FluentLocalization`. - -Composes `PathResourceLoader` (or a custom `ResourceLoader`), `FluentLocalization`, -`require_clean()`, and `validate_message_schemas()` into a single audited sequence. -Designed for regulated systems where every resource must load cleanly and all declared -message schemas must match exactly before the application accepts traffic. - -### Signature -```python -@dataclass(frozen=True, slots=True) -class LocalizationBootConfig: - locales: tuple[str, ...] - resource_ids: tuple[str, ...] - loader: ResourceLoader | None = None - base_path: str | None = None - message_schemas: Mapping[MessageId, frozenset[str] | set[str]] | None = None - required_messages: frozenset[str] | None = None - strict: bool = True - use_isolating: bool = True - cache: CacheConfig | None = None - on_fallback: Callable[[FallbackInfo], None] | None = None -``` - -### Parameters -| Field | Type | Req | Description | -|:------|:-----|:----|:------------| -| `locales` | `tuple[str, ...]` | Y | Locale codes in fallback priority order (e.g., `('lv', 'en')`). | -| `resource_ids` | `tuple[str, ...]` | Y | FTL file identifiers to load (e.g., `('ui.ftl',)`). | -| `loader` | `ResourceLoader \| None` | — | Custom loader implementing `ResourceLoader`. Mutually exclusive with `base_path`. | -| `base_path` | `str \| None` | — | Path template with `{locale}` placeholder (e.g., `'locales/{locale}'`). Mutually exclusive with `loader`. | -| `message_schemas` | `Mapping[...] \| None` | N | Message ID → expected variable frozenset. When set, `boot()` enforces exact variable contracts. | -| `required_messages` | `frozenset[str] \| None` | N | Set of message IDs that must exist in at least one locale. Raises `IntegrityCheckFailedError` if any are absent from all locales. | -| `strict` | `bool` | N | Fail-fast on formatting errors (default: `True`). | -| `use_isolating` | `bool` | N | Unicode bidi isolation marks (default: `True`). | -| `cache` | `CacheConfig \| None` | N | Cache configuration, or `None` to disable. | -| `on_fallback` | `Callable \| None` | N | Callback invoked when a message resolves from a fallback locale. | - -### Methods - -#### `boot() -> tuple[FluentLocalization, LoadSummary, tuple[MessageVariableValidationResult, ...]]` -PRIMARY boot API. Executes the full boot sequence and returns structured evidence: -1. Create `FluentLocalization` (loads all resources). -2. Call `require_clean()` — raises `IntegrityCheckFailedError` on any load failure or junk entries. -3. If `required_messages` set, verify each ID is resolvable in at least one locale. -4. If `message_schemas` set, call `validate_message_schemas()` — raises `IntegrityCheckFailedError` on mismatch. -Returns `(FluentLocalization, LoadSummary, tuple[MessageVariableValidationResult, ...])` for audit trails. - -#### `boot_simple() -> FluentLocalization` -Simplified form. Executes the identical boot sequence but discards audit evidence. Use when structured evidence is not needed. - -#### `from_path(locales, resource_ids, base_path, *, ...) -> LocalizationBootConfig` -Static factory. `base_path` accepts `str` or `pathlib.Path` (converted to POSIX string). - -### Constraints -- Raises (`__post_init__`): `ValueError` if `locales` or `resource_ids` is empty; `ValueError` if neither or both of `loader`/`base_path` are provided. -- Raises (`boot`/`boot_simple`): `IntegrityCheckFailedError` on load failures, required-message absence, or schema mismatches; `ValueError` if `base_path` lacks `{locale}`. -- State: Immutable (frozen dataclass). -- Thread: Safe. Each `boot()` call creates a new `FluentLocalization` instance. -- Version: `required_messages` field added in v0.155.0; `boot()` returns 3-tuple as of v0.155.0 (was `FluentLocalization`). -- Import: `from ftllexengine import LocalizationBootConfig` or `from ftllexengine.localization import LocalizationBootConfig`. - ---- - -## `clear_module_caches` - -Clear module-level LRU caches to release cached memory. - -### Signature -```python -def clear_module_caches(components: frozenset[str] | None = None) -> None: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `components` | `frozenset[str] \| None` | N | Component names to clear. `None` clears all. | - ### Constraints -- Default: `components=None` clears all six caches. -- Component Names: `'introspection.iso'`, `'introspection.message'`, `'parsing.currency'`, `'parsing.dates'`, `'runtime.functions'`, `'runtime.locale_context'`. -- Unknown Components: Silently ignored (no error raised for unrecognized names). -- Empty Set: `frozenset()` clears nothing. -- Thread: Not safe to call concurrently with active cache reads (no lock held). -- Import: `from ftllexengine import clear_module_caches` -- Version: `components` parameter added in v0.155.0. +- Purpose: Same public payload shape as `CacheAuditLogEntry` +- State: Immutable +- Thread: Safe --- diff --git a/docs/DOC_04_RuntimeUtilities.md b/docs/DOC_04_RuntimeUtilities.md new file mode 100644 index 00000000..6b7dff2b --- /dev/null +++ b/docs/DOC_04_RuntimeUtilities.md @@ -0,0 +1,194 @@ +--- +afad: "3.5" +version: "0.163.0" +domain: RUNTIME_UTILITIES +updated: "2026-04-22" +route: + keywords: [detect_cycles, normalize_locale, get_system_locale, require_locale_code, __version__, require_date, require_datetime] + questions: ["where are runtime utility exports documented?", "what package metadata constants are public?", "which boundary validators and locale helpers are exported from the root package?"] +--- + +# Runtime Utilities Reference + +This reference covers root-level runtime-adjacent utilities, package metadata constants, locale helpers, and boundary validators. +Formatting functions, registries, cache configuration, and audit entry types live in [DOC_04_Runtime.md](DOC_04_Runtime.md). + +## `detect_cycles` + +Function that detects cycles in a dependency graph. + +### Signature +```python +def detect_cycles(dependencies: Mapping[str, set[str]]) -> list[list[str]]: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `dependencies` | Y | Graph adjacency mapping | + +### Constraints +- Return: Canonicalized cycle paths +- State: Pure +- Thread: Safe + +--- + +## `normalize_locale` + +Function that canonicalizes locale codes to lowercase POSIX form. + +### Signature +```python +def normalize_locale(locale_code: str) -> str: +``` + +### Constraints +- Return: lowercase locale code with hyphens converted to underscores +- Purpose: canonical cache key and comparison form for locale handling +- State: Pure +- Thread: Safe + +--- + +## `get_system_locale` + +Function that detects the process locale from Python and environment variables. + +### Signature +```python +def get_system_locale(*, raise_on_failure: bool = False) -> str: +``` + +### Parameters +| Name | Req | Semantics | +|:-----|:----|:----------| +| `raise_on_failure` | N | Raise instead of falling back to `"en_us"` | + +### Constraints +- Return: normalized POSIX-style locale string +- Fallback: returns `"en_us"` when detection fails and `raise_on_failure` is false +- State: Pure with respect to library state; reads OS process locale and env vars + +--- + +## `require_locale_code` + +Boundary validator for locale-code inputs. + +### Signature +```python +def require_locale_code(value: object, field_name: str) -> LocaleCode: +``` + +### Constraints +- Return: canonical normalized locale code +- Raises: `TypeError` for non-strings; `ValueError` for blank, overlong, or structurally invalid codes +- Purpose: system-boundary validation before locale lookup or cache-key creation + +--- + +## `__version__` + +Package version string for the installed `ftllexengine` distribution. + +### Signature +```python +__version__: str +``` + +### Constraints +- Return: Installed package version from distribution metadata, or `"0.0.0+dev"` when running from an uninstalled development checkout +- Purpose: Runtime-visible package version for diagnostics, tooling, and support reporting + +--- + +## `__fluent_spec_version__` + +Constant declaring the Fluent specification version targeted by the package. + +### Signature +```python +__fluent_spec_version__: str = "1.0" +``` + +### Constraints +- Return: `"1.0"` +- Purpose: Exposes the Fluent spec baseline used by the runtime and parser + +--- + +## `__spec_url__` + +Constant pointing to the upstream Fluent grammar/specification reference. + +### Signature +```python +__spec_url__: str = "https://github.com/projectfluent/fluent/blob/master/spec/fluent.ebnf" +``` + +### Constraints +- Return: Canonical upstream Fluent EBNF/spec URL string +- Purpose: Lets tooling and diagnostics point back to the normative grammar source + +--- + +## `__recommended_encoding__` + +Constant declaring the recommended encoding for Fluent resource files. + +### Signature +```python +__recommended_encoding__: str = "UTF-8" +``` + +### Constraints +- Return: `"UTF-8"` +- Purpose: Mirrors the package guidance and upstream Fluent recommendation for `.ftl` resources + +--- + +## `require_date` + +Boundary validator for strict calendar-date values. + +### Signature +```python +def require_date(value: object, field_name: str) -> date: +``` + +### Constraints +- Return: validated `date` +- Raises: `TypeError` for non-dates and for `datetime` instances specifically +- Purpose: reject accidental time-bearing values at system boundaries + +--- + +## `require_datetime` + +Boundary validator for strict `datetime` values. + +### Signature +```python +def require_datetime(value: object, field_name: str) -> datetime: +``` + +### Constraints +- Return: validated `datetime` +- Raises: `TypeError` for non-`datetime` values, including plain `date` + +--- + +## `require_fluent_number` + +Boundary validator for `FluentNumber` values. + +### Signature +```python +def require_fluent_number(value: object, field_name: str) -> FluentNumber: +``` + +### Constraints +- Return: validated `FluentNumber` +- Raises: `TypeError` for all other values +- Purpose: domain-boundary validation for preformatted numeric values diff --git a/docs/DOC_05_Diagnostics.md b/docs/DOC_05_Diagnostics.md new file mode 100644 index 00000000..7199d229 --- /dev/null +++ b/docs/DOC_05_Diagnostics.md @@ -0,0 +1,207 @@ +--- +afad: "3.5" +version: "0.163.0" +domain: DIAGNOSTICS +updated: "2026-04-22" +route: + keywords: [ValidationResult, ValidationError, ValidationWarning, DiagnosticCode, DiagnosticFormatter, OutputFormat, SourceSpan] + questions: ["what validation result types exist?", "how do I format diagnostics output?", "where are diagnostic codes and source spans documented?"] +--- + +# Diagnostics Reference + +This reference covers validation result types, diagnostic codes, spans, and formatter APIs. +Immutable Fluent errors and integrity exceptions live in [DOC_05_Errors.md](DOC_05_Errors.md). + +## `WarningSeverity` + +Severity levels for validation warnings. + +### Signature +```python +class WarningSeverity(StrEnum): + CRITICAL = "critical" + WARNING = "warning" + INFO = "info" +``` + +### Constraints +- Import: `from ftllexengine import WarningSeverity` +- Type: `StrEnum` +- Used by: `ValidationWarning.severity` + +--- + +## `ValidationError` + +Structured validation error for invalid resource content. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class ValidationError: + code: DiagnosticCode + message: str + content: str + line: int | None = None + column: int | None = None +``` + +### Constraints +- Import: `from ftllexengine.diagnostics import ValidationError` +- Produced by: `validate_resource()` +- Formatting helper: `.format()` delegates to `DiagnosticFormatter` +- Security note: `content` may contain source text; use sanitizing formatter options for multi-tenant logs + +--- + +## `ValidationWarning` + +Structured semantic warning returned by resource validation. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class ValidationWarning: + code: DiagnosticCode + message: str + context: str | None = None + line: int | None = None + column: int | None = None + severity: WarningSeverity = WarningSeverity.WARNING +``` + +### Constraints +- Import: `from ftllexengine.diagnostics import ValidationWarning` +- Produced by: `validate_resource()` +- Formatting helper: `.format()` delegates to `DiagnosticFormatter` + +--- + +## `ValidationResult` + +Unified immutable validation result. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class ValidationResult: + errors: tuple[ValidationError, ...] + warnings: tuple[ValidationWarning, ...] + annotations: tuple[Annotation, ...] +``` + +### Constraints +- Import: `from ftllexengine.diagnostics import ValidationResult` +- Produced by: `validate_resource()` +- Properties: `is_valid`, `error_count`, `warning_count`, `annotation_count` +- Factories: `valid()`, `invalid()`, `from_annotations()` +- Formatting helper: `.format()` delegates to `DiagnosticFormatter` + +--- + +## `DiagnosticCode` + +Enum of stable diagnostic identifiers. + +### Signature +```python +class DiagnosticCode(Enum): ... +``` + +### Constraints +- Import: `from ftllexengine.diagnostics import DiagnosticCode` +- Coverage: reference, resolution, syntax, parsing, and validation categories +- Stability: intended for programmatic handling and log/search indexing + +--- + +## `SourceSpan` + +Immutable source location for diagnostics. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class SourceSpan: + start: int + end: int + line: int + column: int +``` + +### Constraints +- Import: `from ftllexengine.diagnostics import SourceSpan` +- Semantics: `start`/`end` are character offsets, `line`/`column` are 1-indexed +- Invariants: `start >= 0`, `end >= start`, `line >= 1`, `column >= 1` + +--- + +## `Diagnostic` + +Structured diagnostic payload for tool and human consumption. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class Diagnostic: + code: DiagnosticCode + message: str + span: SourceSpan | None = None + hint: str | None = None + help_url: str | None = None + function_name: str | None = None + argument_name: str | None = None + expected_type: str | None = None + received_type: str | None = None + ftl_location: str | None = None + severity: Literal["error", "warning"] = "error" + resolution_path: tuple[str, ...] | None = None +``` + +### Constraints +- Import: `from ftllexengine.diagnostics import Diagnostic` +- Purpose: single structured payload for rich error reporting +- Helper: `format_error()` delegates to `DiagnosticFormatter` + +--- + +## `OutputFormat` + +Enum of supported diagnostic formatter output styles. + +### Signature +```python +class OutputFormat(StrEnum): + RUST = "rust" + SIMPLE = "simple" + JSON = "json" +``` + +### Constraints +- Import: `from ftllexengine.diagnostics import OutputFormat` +- Type: `StrEnum` +- Used by: `DiagnosticFormatter.output_format` + +--- + +## `DiagnosticFormatter` + +Central formatting service for diagnostics and validation output. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class DiagnosticFormatter: + output_format: OutputFormat = OutputFormat.RUST + sanitize: bool = False + redact_content: bool = False + color: bool = False + max_content_length: int = 100 +``` + +### Constraints +- Import: `from ftllexengine.diagnostics import DiagnosticFormatter` +- Main methods: `format()`, `format_all()`, `format_error()`, `format_warning()`, `format_validation_result()` +- Output styles: Rust-style multi-line, simple one-line, and JSON +- Sanitization: can truncate or redact source-bearing fields before output diff --git a/docs/DOC_05_Errors.md b/docs/DOC_05_Errors.md index f9c8d147..ed61bd4f 100644 --- a/docs/DOC_05_Errors.md +++ b/docs/DOC_05_Errors.md @@ -1,20 +1,21 @@ --- -afad: "3.3" -version: "0.161.0" +afad: "3.5" +version: "0.163.0" domain: ERRORS -updated: "2026-03-21" +updated: "2026-04-22" route: - keywords: [FrozenFluentError, ErrorCategory, FrozenErrorContext, ParseTypeLiteral, ImmutabilityViolationError, DataIntegrityError, IntegrityContext, CacheCorruptionError, IntegrityCheckFailedError, WriteConflictError, SyntaxIntegrityError, FormattingIntegrityError, ValidationResult, DiagnosticCode, Diagnostic, VALIDATION_PLACEABLE_SELECTOR] - questions: ["what errors can occur?", "how to handle errors?", "what are the error codes?", "how to format diagnostics?", "what exceptions do parsing functions raise?", "how to verify error integrity?", "what is SyntaxIntegrityError?", "what is FormattingIntegrityError?", "what is IntegrityContext?", "what is CacheCorruptionError?", "what is WriteConflictError?"] + keywords: [FrozenFluentError, ErrorCategory, FrozenErrorContext, DataIntegrityError, BabelImportError, ErrorTemplate] + questions: ["what errors does FTLLexEngine expose?", "how do parse and format failures surface?", "what integrity exceptions exist?", "how does missing Babel surface?"] --- # Errors Reference ---- +This reference covers immutable Fluent errors, optional-dependency failures, and fail-fast integrity exceptions. +Validation result types and formatter infrastructure are documented in [DOC_05_Diagnostics.md](DOC_05_Diagnostics.md). ## `ErrorCategory` -Error categorization string enum replacing the exception class hierarchy. +Enum that classifies `FrozenFluentError` instances. ### Signature ```python @@ -26,27 +27,53 @@ class ErrorCategory(StrEnum): FORMATTING = "formatting" ``` -### Parameters -| Value | Description | -|:------|:------------| -| `REFERENCE` | Unknown message, term, or variable reference. | -| `RESOLUTION` | Runtime resolution failure (depth exceeded, function error). | -| `CYCLIC` | Cyclic reference detected (e.g., `hello = { hello }`). | -| `PARSE` | Bi-directional parsing failure (number, date, currency). | -| `FORMATTING` | Locale-aware formatting failure. | +### Constraints +- Import: `from ftllexengine import ErrorCategory` +- Type: `StrEnum` +- Purpose: replaces a subclass hierarchy for normal Fluent errors + +--- + +## `ParseTypeLiteral` + +Closed literal set for parse/format context. + +### Signature +```python +type ParseTypeLiteral = Literal["", "currency", "date", "datetime", "decimal", "number"] +``` + +### Constraints +- Import: `from ftllexengine import ParseTypeLiteral` +- `""` is the sentinel meaning "not applicable" +- Used by: `FrozenErrorContext.parse_type` + +--- + +## `FrozenErrorContext` + +Immutable context attached to parse and formatting failures. + +### Signature +```python +@dataclass(frozen=True, slots=True) +class FrozenErrorContext: + input_value: str = "" + locale_code: str = "" + parse_type: ParseTypeLiteral = "" + fallback_value: str = "" +``` ### Constraints -- Type: `StrEnum` — each member IS a `str`; `ErrorCategory.REFERENCE == "reference"` is `True` -- String repr: `str(ErrorCategory.REFERENCE) == "reference"` (not `"ErrorCategory.REFERENCE"`) -- Value: `.value` is still the plain string (`"reference"`, `"resolution"`, etc.) -- Usage: Check category instead of using isinstance() on subclasses. -- Import: `from ftllexengine.diagnostics import ErrorCategory` +- Import: `from ftllexengine import FrozenErrorContext` +- Purpose: keeps parse/format metadata immutable and hashable +- Typical use: attached to `FrozenFluentError.context` --- ## `FrozenFluentError` -Immutable, content-addressable Fluent error for financial-grade data integrity. +Sealed, immutable, content-addressable Fluent error type. ### Signature ```python @@ -59,122 +86,60 @@ class FrozenFluentError(Exception): diagnostic: Diagnostic | None = None, context: FrozenErrorContext | None = None, ) -> None: ... - - def verify_integrity(self) -> bool: ... - - @property - def message(self) -> str: ... - @property - def category(self) -> ErrorCategory: ... - @property - def diagnostic(self) -> Diagnostic | None: ... - @property - def context(self) -> FrozenErrorContext | None: ... - @property - def content_hash(self) -> bytes: ... - @property - def fallback_value(self) -> str: ... - @property - def input_value(self) -> str: ... - @property - def locale_code(self) -> str: ... - @property - def parse_type(self) -> Literal["", "currency", "date", "datetime", "decimal", "number"]: ... ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message` | `str` | Y | Human-readable error description. | -| `category` | `ErrorCategory` | Y | Error categorization (replaces subclass hierarchy). | -| `diagnostic` | `Diagnostic \| None` | N | Structured diagnostic information. | -| `context` | `FrozenErrorContext \| None` | N | Additional context for parse/formatting errors. | - ### Constraints -- Immutable: All attributes frozen after construction. Mutation raises `ImmutabilityViolationError`. -- Exception Attributes: Python exception mechanism attributes (`__traceback__`, `__context__`, `__cause__`, `__suppress_context__`, `__notes__`) are allowed even after freeze to support exception chaining and Python 3.11+ exception groups. -- Sealed: Cannot be subclassed. Use `ErrorCategory` for classification. -- Content-Addressed: BLAKE2b-128 hash computed at construction for integrity verification. -- Hashable: Can be used in sets and as dict keys. Hash based on content, not identity. `__hash__` returns `int.from_bytes(content_hash, "big")` using all 16 bytes of the BLAKE2b-128 hash; Python's `hash()` protocol then reduces via `int.__hash__()` (Mersenne prime modulus). -- Convenience Properties: `input_value`, `locale_code`, `parse_type` delegate to `context` (return empty string if context is None). -- Hash Composition: Content hash includes ALL fields for complete audit trail integrity: - - Core: `message`, `category.value` - - Diagnostic (if present): `code.name`, `message`, `span` (start/end/line/column), `hint`, `help_url`, `function_name`, `argument_name`, `expected_type`, `received_type`, `ftl_location`, `severity`, `resolution_path` - - Context (if present): `input_value`, `locale_code`, `parse_type`, `fallback_value` -- Length-Prefixing: All string fields are length-prefixed (4-byte big-endian) before hashing. This prevents collision attacks where `("ab", "c")` and `("a", "bc")` would hash identically. -- Import: `from ftllexengine.diagnostics import FrozenFluentError` +- Import: `from ftllexengine import FrozenFluentError` +- Purpose: normal parse, formatting, resolution, and reference failures +- Integrity: stores a BLAKE2b-128 `content_hash` and exposes `verify_integrity()` +- Immutability: attribute mutation or deletion raises `ImmutabilityViolationError` +- Sealed: runtime and static checks prevent subclassing +- Convenience properties: `input_value`, `locale_code`, `parse_type`, and `fallback_value` proxy `context` --- -## `FrozenFluentError.verify_integrity` +## `BabelImportError` -Verify error content hasn't been corrupted. +Exception raised when a Babel-backed feature is called in a parser-only installation. ### Signature ```python -def verify_integrity(self) -> bool: +class BabelImportError(ImportError): + def __init__(self, feature: str) -> None: ... ``` ### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Return: True if content hash matches, False if corrupted. -- Method: Recomputes BLAKE2b-128 hash and compares using constant-time comparison. -- Security: Defense against timing attacks via `hmac.compare_digest()`. - ---- - -## `ParseTypeLiteral` - -Type alias for the `parse_type` field of `FrozenErrorContext`. - -### Signature -```python -type ParseTypeLiteral = Literal["", "currency", "date", "datetime", "decimal", "number"] -``` +| Name | Req | Semantics | +|:-----|:----|:----------| +| `feature` | Y | Missing Babel feature label | ### Constraints -- `""` is the absent sentinel meaning "no parse type associated with this error", not an empty string value. -- Closed set: any value outside this literal is a static type error. -- Import: `from ftllexengine.diagnostics import ParseTypeLiteral` +- Import: `from ftllexengine.introspection import BabelImportError` +- Purpose: consistent optional-dependency failure for CLDR-backed features +- Message: instructs callers to install `ftllexengine[babel]` --- -## `FrozenErrorContext` +## `ErrorTemplate` -Immutable context for parse/formatting errors. +Class namespace that builds standardized `Diagnostic` objects for common runtime failures. ### Signature ```python -@dataclass(frozen=True, slots=True) -class FrozenErrorContext: - input_value: str - locale_code: str - parse_type: ParseTypeLiteral - fallback_value: str +class ErrorTemplate: ... ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `input_value` | `str` | Y | String that failed parsing/formatting. | -| `locale_code` | `str` | Y | Locale used for parsing/formatting. | -| `parse_type` | `ParseTypeLiteral` | Y | Formatting type domain; `""` when absent. | -| `fallback_value` | `str` | Y | Value to use when formatting fails. | - ### Constraints -- Immutable: Frozen dataclass, cannot be modified. -- `parse_type`: see `ParseTypeLiteral`. The `""` sentinel means "not applicable". -- Usage: Passed to `FrozenFluentError` for PARSE/FORMATTING errors. -- Import: `from ftllexengine.diagnostics import FrozenErrorContext` +- Import: `from ftllexengine.diagnostics import ErrorTemplate` +- Purpose: centralized diagnostic-message construction instead of ad-hoc exception strings +- Output: returns `Diagnostic` objects from named factory methods such as `message_not_found()` and `plural_support_unavailable()` +- State: Stateless factory namespace --- ## `DataIntegrityError` -Base exception for data integrity violations. +Base exception for system integrity failures, distinct from normal Fluent errors. ### Signature ```python @@ -184,29 +149,19 @@ class DataIntegrityError(Exception): message: str, context: IntegrityContext | None = None, ) -> None: ... - - @property - def context(self) -> IntegrityContext | None: ... ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message` | `str` | Y | Human-readable error description. | -| `context` | `IntegrityContext \| None` | N | Structured diagnostic context for post-mortem analysis. | - ### Constraints -- Purpose: Base class for all system-level integrity exceptions (NOT a `FrozenFluentError` subclass — different error domain). -- Hierarchy: Six sealed `@final` subclasses: `CacheCorruptionError`, `FormattingIntegrityError`, `ImmutabilityViolationError`, `IntegrityCheckFailedError`, `SyntaxIntegrityError`, `WriteConflictError`. -- Immutable: Attributes frozen after construction. Mutation raises `ImmutabilityViolationError` (except Python exception machinery attributes). -- Context: `context` property exposes the `IntegrityContext` passed at construction. -- Import: `from ftllexengine.integrity import DataIntegrityError` or `from ftllexengine import DataIntegrityError` +- Import: `from ftllexengine import DataIntegrityError` +- Purpose: corruption, strict-mode violations, or mutation attempts that should fail fast +- Domain boundary: not a `FrozenFluentError` subclass +- Immutability: frozen after initialization; mutation raises `ImmutabilityViolationError` --- ## `IntegrityContext` -Structured diagnostic context for post-mortem analysis of integrity failures. +Structured context for `DataIntegrityError` instances. ### Signature ```python @@ -218,195 +173,92 @@ class IntegrityContext: expected: str | None = None actual: str | None = None timestamp: float | None = None + wall_time_unix: float | None = None ``` -### Parameters -| Field | Type | Req | Description | -|:------|:-----|:----|:------------| -| `component` | `str` | Y | System component where error occurred (e.g., `"cache"`, `"error"`, `"bundle"`). | -| `operation` | `str` | Y | Operation being performed (e.g., `"get"`, `"put"`, `"verify"`, `"mutate"`). | -| `key` | `str \| None` | N | Cache key or identifier involved. | -| `expected` | `str \| None` | N | Expected value or hash. | -| `actual` | `str \| None` | N | Actual value or hash found. | -| `timestamp` | `float \| None` | N | Time of error detection (`time.monotonic()`). | - ### Constraints -- Immutable: Frozen dataclass; all fields read-only after construction. -- Purpose: Passed to all `DataIntegrityError` subclass constructors as `context=` to enable structured post-mortem analysis. -- Import: `from ftllexengine.integrity import IntegrityContext` or `from ftllexengine import IntegrityContext` +- Import: `from ftllexengine import IntegrityContext` +- Purpose: post-mortem metadata for cache, formatting, and strict-load failures +- Timestamps: `timestamp` is monotonic-process time; `wall_time_unix` is wall-clock correlation time --- ## `CacheCorruptionError` -Checksum mismatch detected in a cache entry. +Raised when cached content fails checksum verification. ### Signature ```python @final -class CacheCorruptionError(DataIntegrityError): - def __init__( - self, - message: str, - context: IntegrityContext | None = None, - ) -> None: ... +class CacheCorruptionError(DataIntegrityError): ... ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message` | `str` | Y | Human-readable error description. | -| `context` | `IntegrityContext \| None` | N | Structured diagnostic context. | - ### Constraints -- Purpose: Raised when a cached entry's checksum does not match the stored checksum, indicating memory corruption, hardware fault, or tampering. -- Severity: CRITICAL — should trigger immediate investigation and incident response. -- Sealed: `@final` decorator prevents subclassing. -- Import: `from ftllexengine.integrity import CacheCorruptionError` or `from ftllexengine import CacheCorruptionError` +- Import: `from ftllexengine import CacheCorruptionError` +- Typical meaning: corruption, tampering, or checksum mismatch in cached data --- -## `IntegrityCheckFailedError` +## `ImmutabilityViolationError` -Generic integrity verification failure not covered by a more specific subclass. +Raised when frozen error or integrity objects are mutated. ### Signature ```python @final -class IntegrityCheckFailedError(DataIntegrityError): - def __init__( - self, - message: str, - context: IntegrityContext | None = None, - ) -> None: ... +class ImmutabilityViolationError(DataIntegrityError): ... ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message` | `str` | Y | Human-readable error description. | -| `context` | `IntegrityContext \| None` | N | Structured diagnostic context. | - ### Constraints -- Purpose: Raised for generic integrity failures (e.g., newly created cache entry fails immediate verification, write log integrity check fails, error verification fails). -- Sealed: `@final` decorator prevents subclassing. -- Import: `from ftllexengine.integrity import IntegrityCheckFailedError` or `from ftllexengine import IntegrityCheckFailedError` +- Import: `from ftllexengine import ImmutabilityViolationError` +- Triggered by: invalid mutation attempts on `FrozenFluentError`, `DataIntegrityError`, or related frozen evidence --- -## `WriteConflictError` - -Write-once violation: attempt to overwrite an existing cache entry in write-once mode. - -### Signature -```python -@final -class WriteConflictError(DataIntegrityError): - def __init__( - self, - message: str, - context: IntegrityContext | None = None, - *, - existing_seq: int = 0, - new_seq: int = 0, - ) -> None: ... - - @property - def existing_seq(self) -> int: ... - @property - def new_seq(self) -> int: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message` | `str` | Y | Human-readable error description. | -| `context` | `IntegrityContext \| None` | N | Structured diagnostic context. | -| `existing_seq` | `int` | N | Sequence number of the existing cache entry (keyword-only). | -| `new_seq` | `int` | N | Sequence number of the rejected write attempt (keyword-only). | - -### Constraints -- Purpose: Raised when `CacheConfig.write_once=True` and code attempts to overwrite an existing cached entry. This is a security feature: overwrites can mask data races in financial applications. -- Properties: `existing_seq` and `new_seq` identify the conflicting entries for audit. -- Sealed: `@final` decorator prevents subclassing. -- Import: `from ftllexengine.integrity import WriteConflictError` or `from ftllexengine import WriteConflictError` - ---- - -## `ImmutabilityViolationError` +## `IntegrityCheckFailedError` -Attempt to mutate an immutable object. +Generic integrity-verification failure when no narrower subtype applies. ### Signature ```python @final -class ImmutabilityViolationError(DataIntegrityError): - def __init__( - self, - message: str, - context: IntegrityContext | None = None, - ) -> None: ... +class IntegrityCheckFailedError(DataIntegrityError): ... ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message` | `str` | Y | Description of mutation attempt. | -| `context` | `IntegrityContext \| None` | N | Structured diagnostic context. | - ### Constraints -- Purpose: Raised when code attempts to modify a frozen `FrozenFluentError`, `DataIntegrityError`, or cache entry. -- Raised By: `FrozenFluentError.__setattr__()`, `FrozenFluentError.__delattr__()`, `DataIntegrityError.__setattr__()`, `DataIntegrityError.__delattr__()`. -- Sealed: `@final` decorator prevents subclassing. -- Import: `from ftllexengine.integrity import ImmutabilityViolationError` or `from ftllexengine import ImmutabilityViolationError` +- Import: `from ftllexengine import IntegrityCheckFailedError` +- Used for: verification failures that are not specifically checksum, write-conflict, syntax, or formatting failures --- -## `SyntaxIntegrityError` +## `WriteConflictError` -Syntax errors detected in strict mode during FTL source loading. +Raised when write-once cache mode rejects an overwrite. ### Signature ```python @final -class SyntaxIntegrityError(DataIntegrityError): +class WriteConflictError(DataIntegrityError): def __init__( self, message: str, context: IntegrityContext | None = None, *, - junk_entries: tuple[Junk, ...] = (), - source_path: str | None = None, + existing_seq: int = 0, + new_seq: int = 0, ) -> None: ... - - @property - def junk_entries(self) -> tuple[Junk, ...]: ... - @property - def source_path(self) -> str | None: ... - @property - def context(self) -> IntegrityContext | None: ... ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message` | `str` | Y | Human-readable error description. | -| `context` | `IntegrityContext \| None` | N | Structured diagnostic context. | -| `junk_entries` | `tuple[Junk, ...]` | N | Junk AST nodes representing syntax errors. | -| `source_path` | `str \| None` | N | Path to source file for error context. | - ### Constraints -- Purpose: Raised by `FluentBundle.add_resource()` in strict mode when syntax errors (Junk entries) are detected. -- Immutable: All attributes frozen after construction. Mutation raises `ImmutabilityViolationError`. -- Sealed: `@final` decorator prevents subclassing. -- Financial: Financial applications require fail-fast behavior. Silent failures during FTL source loading are unacceptable for monetary formatting. -- Import: `from ftllexengine.integrity import SyntaxIntegrityError` or `from ftllexengine import SyntaxIntegrityError` +- Import: `from ftllexengine import WriteConflictError` +- Extra properties: `existing_seq` and `new_seq` +- Typical meaning: concurrent or forbidden overwrite in write-once cache mode --- ## `FormattingIntegrityError` -Formatting errors detected in strict mode during message formatting. +Strict-mode formatting failure that carries the underlying Fluent errors. ### Signature ```python @@ -421,607 +273,36 @@ class FormattingIntegrityError(DataIntegrityError): fallback_value: str = "", message_id: str = "", ) -> None: ... - - @property - def fluent_errors(self) -> tuple[FrozenFluentError, ...]: ... - @property - def fallback_value(self) -> str: ... - @property - def message_id(self) -> str: ... - @property - def context(self) -> IntegrityContext | None: ... ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `message` | `str` | Y | Human-readable error description. | -| `context` | `IntegrityContext \| None` | N | Structured diagnostic context. | -| `fluent_errors` | `tuple[FrozenFluentError, ...]` | N | Original Fluent errors that triggered exception. | -| `fallback_value` | `str` | N | Fallback value that would have been returned in non-strict mode. | -| `message_id` | `str` | N | Message ID that failed to format. | - ### Constraints -- Purpose: Raised by `FluentBundle.format_pattern()` and `FluentLocalization.format_pattern()` / `format_value()` in strict mode when formatting errors occur. -- Immutable: All attributes frozen after construction. Mutation raises `ImmutabilityViolationError`. -- Sealed: `@final` decorator prevents subclassing. -- Financial: Financial applications require fail-fast behavior. Silent fallback values are unacceptable when formatting monetary amounts. -- Import: `from ftllexengine.integrity import FormattingIntegrityError` or `from ftllexengine import FormattingIntegrityError` +- Import: `from ftllexengine import FormattingIntegrityError` +- Extra properties: `fluent_errors`, `fallback_value`, `message_id` +- Raised by: strict formatting paths that refuse to return fallback text --- -## `BabelImportError` - -### Signature -```python -class BabelImportError(ImportError): - feature: str - - def __init__(self, feature: str) -> None: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `feature` | `str` | Y | Feature name requiring Babel (e.g., "parse_date"). | - -### Constraints -- Purpose: Raised when Babel is required but not installed. -- Behavior: Provides installation instructions in error message. -- Raised by: `parse_decimal()`, `parse_fluent_number()`, `parse_date()`, `parse_datetime()`, `parse_currency()`, `select_plural_category()`, `LocaleContext.create()`, `get_cldr_version()`, `get_territory()`, `get_currency()`, `list_territories()`, `list_currencies()`, `get_territory_currencies()`, `is_valid_territory_code()`, `is_valid_currency_code()`. -- Import: `from ftllexengine.core.babel_compat import BabelImportError` - ---- - -## `SerializationValidationError` - -### Signature -```python -class SerializationValidationError(ValueError): ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Purpose: AST validation errors during serialization. -- Raised: When `serialize(validate=True)` detects invalid AST. -- Common: Identifier names violating `[a-zA-Z][a-zA-Z0-9_-]*`, duplicate named argument names, named argument values not StringLiteral or NumberLiteral. -- Import: `from ftllexengine.syntax import SerializationValidationError` - ---- - -## `SerializationDepthError` - -### Signature -```python -class SerializationDepthError(ValueError): ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Purpose: AST nesting exceeds maximum serialization depth. -- Cause: Adversarial input, malformed AST, or deep Placeable nesting. -- Raised: When AST depth exceeds `max_depth` parameter (default: 100). -- Security: Prevents stack overflow from adversarially constructed ASTs. -- Import: `from ftllexengine.syntax import SerializationDepthError` - ---- - -## `ValidationResult` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class ValidationResult: - errors: tuple[ValidationError, ...] - warnings: tuple[ValidationWarning, ...] - annotations: tuple[Annotation, ...] - - @property - def is_valid(self) -> bool: ... - @property - def error_count(self) -> int: ... # len(self.errors) only - @property - def annotation_count(self) -> int: ... # len(self.annotations) only - @property - def warning_count(self) -> int: ... - @staticmethod - def valid() -> ValidationResult: ... - @staticmethod - def invalid(...) -> ValidationResult: ... - @staticmethod - def from_annotations(annotations: tuple[Annotation, ...]) -> ValidationResult: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `errors` | `tuple[ValidationError, ...]` | Y | Syntax validation errors. | -| `warnings` | `tuple[ValidationWarning, ...]` | Y | Semantic warnings. | -| `annotations` | `tuple[Annotation, ...]` | Y | Parser annotations. | - -### Constraints -- Return: Immutable validation result. -- State: Frozen dataclass. -- `error_count`: Count of `ValidationError` entries only (syntax errors); does not include annotations. -- `annotation_count`: Count of `Annotation` entries only (parser informational notes); does not include syntax errors. -- `is_valid`: True iff `error_count == 0 AND annotation_count == 0`; annotations DO affect validity — parser annotations indicate parse failures and render the resource invalid. - ---- - -## `ValidationResult.format` - -### Signature -```python -def format( - self, - *, - sanitize: bool = False, - redact_content: bool = False, - include_warnings: bool = True, -) -> str: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `sanitize` | `bool` | N | Truncate content to prevent information leakage. | -| `redact_content` | `bool` | N | Completely redact content (requires sanitize=True). | -| `include_warnings` | `bool` | N | Include warnings in output (default: True). | - -### Constraints -- Return: Formatted string with errors, annotations, optionally warnings. -- Security: Set sanitize=True for multi-tenant applications. - ---- - -## `ValidationError` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class ValidationError: - code: DiagnosticCode - message: str - content: str - line: int | None = None - column: int | None = None -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `code` | `DiagnosticCode` | Y | Structured error code. | -| `message` | `str` | Y | Error message. | -| `content` | `str` | Y | Unparseable FTL content. | -| `line` | `int \| None` | N | Line number (1-indexed). | -| `column` | `int \| None` | N | Column number (1-indexed). | - -### Constraints -- Return: Immutable error record. -- State: Frozen dataclass. -- Code: `DiagnosticCode` enum, not a string. Use `.name` for the string form. - ---- - -## `ValidationError.format` - -### Signature -```python -def format( - self, - *, - sanitize: bool = False, - redact_content: bool = False, -) -> str: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `sanitize` | `bool` | N | Truncate content to prevent information leakage. | -| `redact_content` | `bool` | N | Completely redact content (requires sanitize=True). | - -### Constraints -- Return: Formatted error string with location and content. -- Security: Set sanitize=True for multi-tenant applications. - ---- - -## `WarningSeverity` - -Severity levels for validation warnings. - -### Signature -```python -class WarningSeverity(StrEnum): - CRITICAL = "critical" - WARNING = "warning" - INFO = "info" -``` - -### Parameters -| Value | Description | -|:------|:------------| -| `CRITICAL` | Will cause runtime failure (undefined reference). | -| `WARNING` | May cause issues (duplicate ID, missing value). | -| `INFO` | Informational only (style suggestions). | - -### Constraints -- StrEnum: Members ARE strings. `str(WarningSeverity.CRITICAL) == "critical"` -- Usage: Filter warnings by severity in tooling. -- Import: `from ftllexengine.diagnostics import WarningSeverity` - ---- +## `SyntaxIntegrityError` -## `ValidationWarning` +Strict-load failure raised when resource loading encounters syntax junk. ### Signature ```python -@dataclass(frozen=True, slots=True) -class ValidationWarning: - code: DiagnosticCode - message: str - context: str | None = None - line: int | None = None - column: int | None = None - severity: WarningSeverity = WarningSeverity.WARNING - - def format( +@final +class SyntaxIntegrityError(DataIntegrityError): + def __init__( self, + message: str, + context: IntegrityContext | None = None, *, - sanitize: bool = False, - redact_content: bool = False, - ) -> str: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `code` | `DiagnosticCode` | Y | Structured warning code. | -| `message` | `str` | Y | Warning message. | -| `context` | `str \| None` | N | Additional context. | -| `line` | `int \| None` | N | Line number (1-indexed). | -| `column` | `int \| None` | N | Column number (1-indexed). | -| `severity` | `WarningSeverity` | N | Severity level (default: WARNING). | - -### Constraints -- Return: Immutable warning record. -- State: Frozen dataclass. -- Code: `DiagnosticCode` enum, not a string. Use `.name` for the string form. -- IDE: Line/column fields enable IDE/LSP integration for warning display. -- Severity: Enables filtering by importance (CRITICAL > WARNING > INFO). - ---- - -## `ValidationWarning.format` - -### Signature -```python -def format( - self, - *, - sanitize: bool = False, - redact_content: bool = False, -) -> str: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `sanitize` | `bool` | N | Truncate context to prevent information leakage. | -| `redact_content` | `bool` | N | Completely redact context (requires sanitize=True). | - -### Constraints -- Return: Formatted warning string with location and optional context. -- Format: `[code] at line N, column M: message (context: 'ctx')` -- Security: Set sanitize=True for multi-tenant applications. - ---- - -## `DiagnosticCode` - -### Signature -```python -class DiagnosticCode(Enum): - # Reference errors (1000-1999) - MESSAGE_NOT_FOUND = 1001 - ATTRIBUTE_NOT_FOUND = 1002 - TERM_NOT_FOUND = 1003 - TERM_ATTRIBUTE_NOT_FOUND = 1004 - VARIABLE_NOT_PROVIDED = 1005 - MESSAGE_NO_VALUE = 1006 - - # Resolution errors (2000-2999) - CYCLIC_REFERENCE = 2001 - NO_VARIANTS = 2002 - FUNCTION_NOT_FOUND = 2003 - FUNCTION_FAILED = 2004 - UNKNOWN_EXPRESSION = 2005 - TYPE_MISMATCH = 2006 - INVALID_ARGUMENT = 2007 - ARGUMENT_REQUIRED = 2008 - PATTERN_INVALID = 2009 - MAX_DEPTH_EXCEEDED = 2010 - FUNCTION_ARITY_MISMATCH = 2011 - TERM_POSITIONAL_ARGS_IGNORED = 2012 - PLURAL_SUPPORT_UNAVAILABLE = 2013 - FORMATTING_FAILED = 2014 - EXPANSION_BUDGET_EXCEEDED = 2015 - - # Syntax errors (3000-3999) - UNEXPECTED_EOF = 3001 - # 3002, 3003: not assigned — character/token-level errors are AST Annotation codes - PARSE_JUNK = 3004 - PARSE_NESTING_DEPTH_EXCEEDED = 3005 - - # Parsing errors (4000-4999) - Bi-directional localization - # 4001: not assigned - PARSE_DECIMAL_FAILED = 4002 - PARSE_DATE_FAILED = 4003 - PARSE_DATETIME_FAILED = 4004 - PARSE_CURRENCY_FAILED = 4005 - PARSE_LOCALE_UNKNOWN = 4006 - PARSE_CURRENCY_AMBIGUOUS = 4007 - PARSE_CURRENCY_SYMBOL_UNKNOWN = 4008 - PARSE_AMOUNT_INVALID = 4009 - PARSE_CURRENCY_CODE_INVALID = 4010 - - # Validation errors (5000-5099) - Fluent spec semantic validation - # 5001-5003: handled as parse-time syntax errors (Junk), not post-parse codes. - # 5008-5009: represented at runtime via VARIABLE_NOT_PROVIDED and NO_VARIANTS. - # 5012-5013: not defined by the current Fluent spec valid.md revision. - VALIDATION_TERM_NO_VALUE = 5004 - VALIDATION_SELECT_NO_DEFAULT = 5005 - VALIDATION_SELECT_NO_VARIANTS = 5006 - VALIDATION_VARIANT_DUPLICATE = 5007 - VALIDATION_NAMED_ARG_DUPLICATE = 5010 - VALIDATION_PLACEABLE_SELECTOR = 5011 - - # Validation warnings (5100-5199) - Resource-level validation - VALIDATION_PARSE_ERROR = 5100 - VALIDATION_CRITICAL_PARSE_ERROR = 5101 - VALIDATION_DUPLICATE_ID = 5102 - VALIDATION_NO_VALUE_OR_ATTRS = 5103 - VALIDATION_UNDEFINED_REFERENCE = 5104 - VALIDATION_CIRCULAR_REFERENCE = 5105 - VALIDATION_CHAIN_DEPTH_EXCEEDED = 5106 - VALIDATION_DUPLICATE_ATTRIBUTE = 5107 - VALIDATION_SHADOW_WARNING = 5108 - VALIDATION_TERM_POSITIONAL_ARGS = 5109 - # 5110: not assigned (VALIDATION_PLACEABLE_SELECTOR reclassified to error range, code 5011) -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| - -### Constraints -- Purpose: Unique error code identifiers for diagnostics. -- Ranges: 1000-1999 (reference), 2000-2999 (resolution), 3000-3999 (syntax), 4000-4999 (parsing), 5000-5099 (validation errors), 5100-5199 (validation warnings). -- Gaps: 4001 is unassigned. Codes 5001-5003, 5008-5009, 5012-5013, 5110 are intentionally unassigned (see inline comments). -- Import: `from ftllexengine.diagnostics import DiagnosticCode` - ---- - -## `Diagnostic` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class Diagnostic: - code: DiagnosticCode - message: str - span: SourceSpan | None = None - hint: str | None = None - help_url: str | None = None - function_name: str | None = None - argument_name: str | None = None - expected_type: str | None = None - received_type: str | None = None - ftl_location: str | None = None - severity: Literal["error", "warning"] = "error" - resolution_path: tuple[str, ...] | None = None - - def format_error(self) -> str: ... -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `code` | `DiagnosticCode` | Y | Error code. | -| `message` | `str` | Y | Error description. | -| `span` | `SourceSpan \| None` | N | Source location. | -| `hint` | `str \| None` | N | Fix suggestion. | -| `help_url` | `str \| None` | N | Documentation URL. | -| `function_name` | `str \| None` | N | Function where error occurred. | -| `argument_name` | `str \| None` | N | Argument causing error. | -| `expected_type` | `str \| None` | N | Expected type. | -| `received_type` | `str \| None` | N | Actual type received. | -| `ftl_location` | `str \| None` | N | FTL file location. | -| `severity` | `Literal[...]` | N | "error" or "warning". | -| `resolution_path` | `tuple[str, ...] \| None` | N | Resolution stack for debugging nested references. | - -### Constraints -- Return: Immutable diagnostic record. -- State: Frozen dataclass. -- Resolution Path: Shows message reference chain (e.g., `("welcome", "greeting", "base")`). - ---- - -## `SourceSpan` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class SourceSpan: - start: int - end: int - line: int - column: int -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `start` | `int` | Y | Start character offset (0-indexed). | -| `end` | `int` | Y | End character offset (exclusive, 0-indexed). | -| `line` | `int` | Y | Line number (1-indexed). | -| `column` | `int` | Y | Column number (1-indexed). | - -### Constraints -- Immutable: Frozen dataclass. Mutation raises `FrozenInstanceError`. -- Invariants (enforced by `__post_init__`): `start >= 0`; `end >= start`; `line >= 1` (1-indexed); `column >= 1` (1-indexed). Violation raises `ValueError`. -- Import: `from ftllexengine.diagnostics import SourceSpan` - ---- - -## `OutputFormat` - -### Signature -```python -class OutputFormat(StrEnum): - RUST = "rust" - SIMPLE = "simple" - JSON = "json" -``` - -### Parameters -| Value | Description | -|:------|:------------| -| `RUST` | Rust compiler-style output with hints and help URLs. | -| `SIMPLE` | Single-line format (code: message). | -| `JSON` | JSON format for tooling integration. | - -### Constraints -- StrEnum: Members ARE strings. `str(OutputFormat.RUST) == "rust"` -- Import: `from ftllexengine.diagnostics import OutputFormat` - ---- - -## `DiagnosticFormatter` - -### Signature -```python -@dataclass(frozen=True, slots=True) -class DiagnosticFormatter: - output_format: OutputFormat = OutputFormat.RUST - sanitize: bool = False - redact_content: bool = False - color: bool = False - max_content_length: int = 100 - - def format(self, diagnostic: Diagnostic) -> str: ... - def format_all(self, diagnostics: Iterable[Diagnostic]) -> str: ... - def format_validation_result(self, result: ValidationResult) -> str: ... - def format_error(self, error: ValidationError) -> str: ... - def format_warning(self, warning: ValidationWarning) -> str: ... -``` - -### Parameters -| Field | Type | Description | -|:------|:-----|:------------| -| `output_format` | `OutputFormat` | Output style (rust, simple, json). | -| `sanitize` | `bool` | Truncate content to prevent information leakage. | -| `redact_content` | `bool` | Completely redact content (requires sanitize=True). | -| `color` | `bool` | Enable ANSI color codes (for terminal output). | -| `max_content_length` | `int` | Maximum content length when sanitizing. | - -### Constraints -- Return: Immutable formatter instance. -- State: Frozen dataclass. -- Thread: Safe. -- Security: All formatted output passes through `_escape_control_chars()` (full C0 range 0x00–0x1f and DEL 0x7f) to prevent log injection via embedded control characters in diagnostic messages. -- Import: `from ftllexengine.diagnostics import DiagnosticFormatter` - ---- - -## `DiagnosticFormatter.format` - -### Signature -```python -def format(self, diagnostic: Diagnostic) -> str: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `diagnostic` | `Diagnostic` | Y | Diagnostic to format. | - -### Constraints -- Return: Formatted diagnostic string. -- State: Read-only. -- Thread: Safe. - ---- - -## `DiagnosticFormatter.format_all` - -### Signature -```python -def format_all(self, diagnostics: Iterable[Diagnostic]) -> str: -``` - -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `diagnostics` | `Iterable[Diagnostic]` | Y | Diagnostics to format. | - -### Constraints -- Return: Formatted string with all diagnostics separated by newlines. -- State: Read-only. -- Thread: Safe. - ---- - -## `DiagnosticFormatter.format_validation_result` - -### Signature -```python -def format_validation_result( - self, - result: ValidationResult, - *, - include_warnings: bool = True, -) -> str: + junk_entries: tuple[Junk, ...] = (), + source_path: str | None = None, + ) -> None: ... ``` -### Parameters -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `result` | `ValidationResult` | Y | Validation result to format. | -| `include_warnings` | `bool` | N | Include warnings in output (default: True). | - -### Constraints -- Return: Formatted string with summary, errors, warnings, and annotations. -- State: Read-only. -- Thread: Safe. - ---- - -## Error Fallback Formats - -When resolution errors occur, FTLLexEngine returns readable fallback strings instead of raising exceptions. The fallback format varies by expression type. - -### Fallback Format Table - -| Expression Type | Fallback Format | Example | -|:----------------|:----------------|:--------| -| `VariableReference` | `{$name}` | `{$count}` | -| `MessageReference` | `{message-id}` | `{welcome}` | -| `TermReference` | `{-term-id}` | `{-brand}` | -| `FunctionReference` | `{FUNC(...)}` | `{NUMBER(...)}` | -| `SelectExpression` | `{{selector} -> ...}` | `{{$count} -> ...}` | -| Unknown expression | `{???}` | `{???}` | - ### Constraints -- Fallbacks preserve FTL-like syntax for debugging. -- SelectExpression fallback shows selector context. -- All fallbacks wrapped in braces for visual distinction. +- Import: `from ftllexengine import SyntaxIntegrityError` +- Extra properties: `junk_entries`, `source_path` +- Raised by: strict boot and resource-loading paths that reject partial parse success --- diff --git a/docs/DOC_06_Testing.md b/docs/DOC_06_Testing.md index 3aa0a4ae..9d2417ed 100644 --- a/docs/DOC_06_Testing.md +++ b/docs/DOC_06_Testing.md @@ -1,676 +1,161 @@ --- -afad: "3.3" -version: "0.153.0" +afad: "3.5" +version: "0.163.0" domain: TESTING -updated: "2026-03-13" +updated: "2026-04-22" route: - keywords: [pytest, hypothesis, fuzz, marker, profile, conftest, fixture, test.sh, metrics] - questions: ["how to run tests?", "how to skip fuzz tests?", "what hypothesis profiles exist?", "what test markers are available?", "how to see strategy metrics?"] + keywords: [testing, lint, pytest, fuzz, HypoFuzz, Atheris, test.sh, lint.sh, check.sh] + questions: ["how do I run lint and tests?", "what is the fuzz marker for?", "which scripts drive testing?"] --- -# DOC_06_Testing - -Testing infrastructure reference. Pytest configuration, Hypothesis profiles, markers, scripts, contracts. +# Testing Reference --- -## Test Categories +## `scripts/validate_docs.py` -### Categories +Repository script that validates runnable Markdown examples against the live package behavior. -| Category | Location | Execution | Duration | Marker | -|:---------|:---------|:----------|:---------|:-------| -| Unit | `tests/test_*.py` | `uv run scripts/test.sh` | Seconds | N/A | -| Property | `tests/test_*_property.py` | `uv run scripts/test.sh` | Minutes | N/A | -| Fuzzing | `tests/fuzz/*.py` | `pytest -m fuzz` | 10+ min | `fuzz` | -| Oracle | `tests/fuzz/test_runtime_bundle_oracle.py` | `pytest -m fuzz` | 10+ min | `fuzz` | -| Depth | `tests/fuzz/test_core_depth_guard_exhaustion.py` | `pytest -m fuzz` | 5+ min | `fuzz` | +### Signature +```bash +uv run python scripts/validate_docs.py +``` ### Constraints -- Categories are mutually exclusive: No. -- Default category: Unit. -- Property tests use Hypothesis with moderate `max_examples` (50 in CI, 500 in dev). -- Fuzzing tests use Hypothesis with high `max_examples` (10000 under `--deep` via `hypofuzz` profile). +- Purpose: parse repository Markdown, run configured Python fences, and validate FTL fences with the project parser +- Coverage: executes the runnable example set configured in `pyproject.toml` +- Failure mode: exits non-zero on invalid snippets, parser errors, or failing Python blocks +- Related guard: `tests/test_documentation_tooling.py` verifies the validator configuration --- -## `pytest.mark.fuzz` +## `scripts/validate_version.py` -### Rationale -Fuzzing tests use `max_examples=10000` (via `hypofuzz` profile) and take 10+ minutes to complete; excluding them from normal test runs keeps `uv run scripts/test.sh` fast while preserving the option for dedicated deep testing. +Repository script that enforces package-version sync across code, metadata, and documentation frontmatter. -### Usage -```python -@pytest.mark.fuzz -def test_example() -> None: ... - -# File-level: -pytestmark = pytest.mark.fuzz +### Signature +```bash +uv run python scripts/validate_version.py ``` -### Behavior - -| Trigger | Action | -|:--------|:-------| -| Normal test run (`pytest tests/`) | Test SKIPPED | -| Marker filter (`pytest -m fuzz`) | Test RUNS | -| Specific file (`pytest tests/fuzz/test_syntax_parser_grammar.py`) | Test RUNS (bypass) | - ### Constraints -- Location: Defined in `tests/conftest.py`. -- Arguments: None. -- Run: `pytest -m fuzz` or `./scripts/fuzz_hypofuzz.sh --deep`. -- Skip: `uv run scripts/test.sh` (default behavior). +- Purpose: verify `pyproject.toml`, runtime version exports, and configured Markdown frontmatter stay synchronized +- Coverage: enforces the AFAD `version:` contract over the configured Markdown set +- Failure mode: exits non-zero on version drift or metadata mismatch +- Related guard: `tests/test_documentation_tooling.py` verifies the frontmatter key contract --- -## `pytest_configure` +## `scripts/run_examples.py` + +Repository script that executes every shipped example under the active project interpreter. ### Signature -```python -def pytest_configure(config: pytest.Config) -> None: +```bash +uv run python scripts/run_examples.py [--pattern '*.py'] [--list] ``` -### Parameters - -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `config` | `pytest.Config` | Y | Pytest configuration object. | - -### Behavior - -| Phase | Action | -|:------|:-------| -| Configure | Registers `fuzz` marker via `addinivalue_line`. | - ### Constraints -- Return: None. -- Location: `tests/conftest.py`. -- Hook Type: configure. +- Purpose: keep `examples/*.py` runnable as a supported, repeatable gate +- Import mode: clears `PYTHONPATH` so examples run against the installed package contract +- Failure mode: exits non-zero when any example script fails --- -## `pytest_collection_modifyitems` +## `check.sh` + +Top-level orchestration script for the repository's full quality surface. ### Signature -```python -def pytest_collection_modifyitems( - config: pytest.Config, - items: list[pytest.Item] -) -> None: +```bash +./check.sh ``` -### Parameters - -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `config` | `pytest.Config` | Y | Pytest configuration object. | -| `items` | `list[pytest.Item]` | Y | Collected test items. | - -### Behavior - -| Phase | Action | -|:------|:-------| -| Collection | Adds skip marker to fuzz-tagged tests unless bypass condition met. | - ### Constraints -- Return: None. -- Location: `tests/conftest.py`. -- Hook Type: collect. -- Bypass conditions: `-m fuzz` in args OR fuzz-related file patterns in args. +- Purpose: run version/docs validation, examples, lint, tests, HypoFuzz preflight, and bounded Atheris checks in one command +- Environment: uses the same Python-versioned uv environment contract as the repo shell gates +- Fuzzing scope: includes corpus health plus short live Atheris smoke runs for graph and introspection targets --- -## `pytest_runtest_makereport` +## `pytest.mark.fuzz` + +Pytest marker indicating an intensive fuzz-only test surface. ### Signature ```python -def pytest_runtest_makereport( - item: pytest.Item, - call: pytest.CallInfo[None] -) -> None: +@pytest.mark.fuzz ``` -### Parameters - -| Parameter | Type | Req | Description | -|:----------|:-----|:----|:------------| -| `item` | `pytest.Item` | Y | Test item being reported on. | -| `call` | `pytest.CallInfo[None]` | Y | Call information with exception details. | - -### Behavior - -| Phase | Action | -|:------|:-------| -| Report | Detects Hypothesis failures via "Falsifying example" in exception. | -| Report | Generates standalone `repro_crash__.py` script. | -| Report | Writes JSON metadata to `.hypothesis/crashes/`. | - -### Output Files - -| File | Contents | -|:-----|:---------| -| `repro_crash__.py` | Standalone reproduction script | -| `crash__.json` | Machine-readable crash metadata | - ### Constraints -- Return: None. -- Location: `tests/conftest.py`. -- Hook Type: runtest. -- Trigger: Only on test failures containing "Falsifying example". +- Purpose: Separate slow or open-ended fuzz tests from default test runs +- Behavior: Normal `./scripts/test.sh` runs skip these tests +- Location: Declared in `pyproject.toml` --- -## Strategy Metrics Collection - -### Hooks - -Two session hooks in `tests/conftest.py` manage strategy metrics: - -| Hook | Phase | Action | -|:-----|:------|:-------| -| `pytest_sessionstart` | Start | Enable metrics, install event hook | -| `pytest_sessionfinish` | End | Write JSON report, stop live reporter | - -### Enabling Conditions - -Metrics are automatically enabled when any of: -- `STRATEGY_METRICS=1` environment variable -- `HYPOTHESIS_PROFILE=hypofuzz` -- Running with `-m fuzz` marker - -### Event Interception +## `scripts/lint.sh` -The `_install_event_hook()` function patches `hypothesis.event()` to also record to the metrics collector: +Repository lint runner script for the main static-analysis gate. -```python -_original_event = hypothesis.event -hypothesis.event = _wrapped_event # Calls original + records metrics -``` - -This enables automatic metrics capture from all event-emitting strategies without modifying strategy code. - -### Output Files - -| File | When Generated | Contents | -|:-----|:---------------|:---------| -| `.hypothesis/strategy_metrics.json` | Every session | Full metrics report | -| `.hypothesis/strategy_metrics_summary.txt` | If issues detected | Human-readable summary | - -### Per-Strategy Metrics - -With `--metrics` flag (or `STRATEGY_METRICS_DETAILED=1`), tracks per strategy: -- `invocations`: Total event count -- `wall_time_ms`: Total execution time -- `mean_cost_ms`: Average time per invocation -- `weight_pct`: Percentage of total events - -### Environment Variables - -| Variable | Default | Description | -|:---------|:--------|:------------| -| `STRATEGY_METRICS` | `0` | Enable collection | -| `STRATEGY_METRICS_LIVE` | `0` | Enable live console output | -| `STRATEGY_METRICS_DETAILED` | `0` | Show per-strategy table | -| `STRATEGY_METRICS_INTERVAL` | `10` | Live reporting interval (seconds) | - ---- - -## Hypothesis Profiles - -### Registration -```python -from hypothesis import settings, Phase, Verbosity - -settings.register_profile( - name, - max_examples=N, - phases=[Phase.explicit, Phase.reuse, Phase.generate, Phase.shrink], - derandomize=bool, - verbosity=Verbosity.X, -) +### Signature +```bash +./scripts/lint.sh [--verbose] ``` -### Registered Profiles - -| Profile | `max_examples` | `derandomize` | `deadline` | Use Case | -|:--------|---------------:|:--------------|:-----------|:---------| -| `dev` | 500 | False | default | Local development | -| `ci` | 50 | True | default | GitHub Actions | -| `verbose` | 100 | False | default | Debugging | -| `hypofuzz` | 10000 | False | None | Coverage-guided --deep runs | -| `stateful_fuzz` | 500 | False | None | RuleBasedStateMachine tests | - -### Selection Logic - -| Priority | Condition | Profile | -|---------:|:----------|:--------| -| 1 | `HYPOTHESIS_PROFILE=` env var | Explicit override | -| 2 | `CI=true` env var | `ci` | -| 3 | Default | `dev` | - ### Constraints -- Location: `tests/conftest.py`. -- Override: `HYPOTHESIS_PROFILE=verbose pytest tests/`. -- Note: Individual `@settings(max_examples=N)` overrides profile. +- Purpose: Run Ruff then mypy under the repo's expected isolated environment pivot +- Behavior: Pivots to `.venv-3.13` by default; `PY_VERSION` overrides target +- Import mode: keeps `PYTHONPATH` unset so tooling resolves the installed package surface +- Output: Quiet-on-success, log-on-fail, agent-oriented summary markers +- Failure mode: exits non-zero on any Ruff or mypy violation --- ## `scripts/test.sh` -### Purpose -Run full test suite with coverage enforcement. +Repository test runner script for the main correctness gate. -### Invocation +### Signature ```bash -uv run scripts/test.sh [--quick] [--ci] [--no-clean] [-- pytest-args] +./scripts/test.sh [--quick] [--ci] [--verbose] [-- ...pytest args] ``` -### Options - -| Option | Default | Description | -|:-------|:--------|:------------| -| `--quick` | Off | Skip coverage measurement | -| `--ci` | Off | CI mode, verbose output | -| `--no-clean` | Off | Preserve pytest caches | -| `--` | N/A | Pass remaining args to pytest | - -### Environment - -| Variable | Default | Description | -|:---------|:--------|:------------| -| `CI` | `false` | Triggers `ci` Hypothesis profile | -| `HYPOTHESIS_PROFILE` | auto | Override profile selection | - -### Exit Codes - -| Code | Meaning | -|-----:|:--------| -| 0 | All tests passed, coverage >= 95% | -| 1 | Test failures or coverage below threshold | - -### Output - -| Format | Location | -|:-------|:---------| -| JSON summary | stdout (`[SUMMARY-JSON-BEGIN]...[SUMMARY-JSON-END]`) | -| Coverage XML | `coverage.xml` | +### Constraints +- Purpose: Run pytest with the project’s expected environment pivot and reporting +- Behavior: Pivots to `.venv-3.13` by default; `PY_VERSION` overrides target +- Import mode: keeps `PYTHONPATH` unset so tests exercise the installed package surface +- Coverage: Enforces 100% line coverage and 100% branch coverage for `src/ftllexengine` in normal full mode +- Output: Log-on-fail summary plus structured status markers --- -## `scripts/fuzz_hypofuzz.sh` (HypoFuzz Interface) +## `scripts/fuzz_hypofuzz.sh` -### Purpose -Entry point for HypoFuzz-based fuzzing operations. +Repository script for Hypothesis and HypoFuzz workflows. -### Invocation +### Signature ```bash -./scripts/fuzz_hypofuzz.sh [MODE] [OPTIONS] +./scripts/fuzz_hypofuzz.sh [--deep | --preflight | --repro TEST | --list | --clean] [OPTIONS] ``` -### Modes - -| Mode | Description | -|:-----|:------------| -| (default) | Fast property tests (500 examples) | -| `--deep` | Continuous HypoFuzz (until Ctrl+C) | -| `--preflight` | Audit test infrastructure (events, strategies) | -| `--repro FILE` | Reproduce crash file | -| `--list` | List captured failures (with ages) | -| `--clean` | Remove all failure artifacts | - -### Options - -| Option | Default | Description | -|:-------|:--------|:------------| -| `--json` | Off | Output JSON for CI | -| `--verbose` | Off | Detailed progress | -| `--metrics` | Off | Single-pass pytest mode with per-strategy metrics (10s interval) | -| `--workers N` | 4 | Parallel workers | -| `--time N` | Endless | Time limit (seconds) | - -### Exit Codes - -| Code | Meaning | -|-----:|:--------| -| 0 | All tests passed, no findings | -| 1 | Findings detected | -| 2 | Error (environment/script) | -| 3 | Python version incompatible (Atheris requires 3.11-3.13) | +### Constraints +- Purpose: Run default property checks, deep fuzzing, preflight audits, and repro flows +- Behavior: Supports `--deep`, `--preflight`, `--repro`, `--metrics` +- Output: Structured heartbeat and summary markers --- ## `scripts/fuzz_atheris.sh` -### Purpose -Byte-level mutation fuzzing with Atheris/libFuzzer. +Repository script for native Atheris/libFuzzer targets. -### Invocation +### Signature ```bash -./scripts/fuzz_atheris.sh [MODE] [OPTIONS] -``` - -### Modes - -| Mode | Description | -|:-----|:------------| -| (default) | Interactive target selection | -| `--target NAME` | Run specific target | -| `--repro FILE` | Reproduce crash file | -| `--list` | List available targets | - -### Options - -| Option | Default | Description | -|:-------|:--------|:------------| -| `--workers N` | 4 | Number of parallel workers | -| `--time N` | Endless | Time limit (seconds) | - -### Available Targets - -| Target | Strategy | Use Case | -|:-------|:---------|:---------| -| `fuzz_roundtrip` | Parse-serialize roundtrip | Parser/serializer consistency | -| `fuzz_structured` | Grammar-aware generation | Deep logic bugs | -| `fuzz_oom` | Memory exhaustion | Resource limits | -| `fuzz_serializer` | Serializer edge cases | Whitespace/escaping correctness | -| `fuzz_runtime` | Bundle format operations | Runtime resolution bugs | -| `fuzz_builtins` | Built-in functions | NUMBER/DATETIME/CURRENCY edge cases | -| `fuzz_numbers` | Number formatting | Locale-aware number handling | -| `fuzz_currency` | Currency parsing | Symbol detection, ambiguity | -| `fuzz_plural` | Plural rules | CLDR plural category selection | -| `fuzz_iso` | ISO introspection | Territory/currency lookups | -| `fuzz_integrity` | Data integrity | Cache, error immutability | -| `fuzz_cache` | IntegrityCache | Checksum, write-once, eviction | -| `fuzz_bridge` | Function bridge | Custom function invocation | -| `fuzz_graph` | Dependency graph | Cycle detection, chain analysis | -| `fuzz_lock` | RWLock | Concurrency, timeout behavior | -| `fuzz_scope` | Scope resolution | Variable/term scoping | - -### Environment - -| Variable | Default | Description | -|:---------|:--------|:------------| -| `TMPDIR` | `/tmp` | Temp directory | - -### Exit Codes - -| Code | Meaning | -|-----:|:--------| -| 0 | Completed without crashes | -| 1+ | Crash detected | -| 3 | Python version incompatible | - -### Output - -| Format | Location | -|:-------|:---------| -| JSON summary | stdout (`[SUMMARY-JSON-BEGIN]...[SUMMARY-JSON-END]`) | -| Crash files | `.fuzz_atheris_corpus//crash_*` | - ---- - -## Fuzz Exclusion Behavior - -### Normal Test Run - -``` -pytest tests/ - | - +-- test_syntax_parser.py [RUN] - +-- test_syntax_parser_property.py [RUN] - +-- tests/fuzz/test_syntax_parser_grammar.py - | +-- has pytestmark = fuzz - | +-- conftest adds skip marker [SKIP] - +-- tests/fuzz/test_runtime_bundle_oracle.py - +-- has pytestmark = fuzz - +-- conftest adds skip marker [SKIP] -``` - -### Explicit Fuzz Run - -``` -pytest -m fuzz - | - +-- test_syntax_parser.py [SKIP - no fuzz marker] - +-- tests/fuzz/test_syntax_parser_grammar.py - | +-- has pytestmark = fuzz [RUN] - +-- tests/fuzz/test_runtime_bundle_oracle.py - +-- has pytestmark = fuzz [RUN] -``` - -### Specific File Bypass - -``` -pytest tests/fuzz/test_runtime_bundle_oracle.py - | - +-- conftest detects tests/fuzz/ path prefix - +-- Bypass skip logic - +-- All tests in file [RUN] +./scripts/fuzz_atheris.sh [TARGET | --setup | --list | --corpus | --minimize TARGET FILE | --replay TARGET [DIR] | --report TARGET | --clean TARGET] [OPTIONS] ``` ### Constraints -- Logic location: `tests/conftest.py:pytest_collection_modifyitems`. -- Bypass: Target file explicitly or use `-m fuzz`. -- Fuzz patterns: `tests/fuzz/` directory (any file directly invoked from this path prefix). - ---- - -## Parser Exception Contract - -### Rule -Parser must only raise `ValueError`, `RecursionError`, or `MemoryError` on invalid input. - -### Allowed - -| Exception | Condition | -|:----------|:----------| -| `ValueError` | Invalid syntax, malformed input | -| `RecursionError` | Deeply nested expressions exceeding stack | -| `MemoryError` | Extremely large input exhausting memory | - -### Violation - -| Trigger | Result | -|:--------|:-------| -| Any other exception type | `pytest.fail()` with exception details | - -### Location -- Enforced in: `src/ftllexengine/syntax/parser/`. -- Tested by: `tests/fuzz/test_syntax_parser_grammar.py:test_random_input_stability`. - ---- - -## Performance Contract - -### Rule -Parser must complete in adaptive time threshold based on input size. - -### Allowed - -| Input Size | Threshold | -|-----------:|----------:| -| 1 KB | 120ms | -| 10 KB | 300ms | -| 50 KB | 1100ms | - -Formula: `threshold = 100ms + (20ms * input_size_kb)` - -### Violation - -| Trigger | Result | -|:--------|:-------| -| Exceeds threshold | `SlowParsing` exception from Atheris target | - -### Location -- Enforced in: `fuzz_atheris/fuzz_perf.py`. -- Tested by: `./scripts/fuzz_atheris.sh perf`. - ---- - -## Test Artifact Storage - -| Location | Contents | Git Status | Distinguishable? | -|:---------|:---------|:-----------|:-----------------| -| `.hypothesis/` | Entire Hypothesis database | Ignored | N/A | -| `.hypothesis/examples/` | Coverage + failures mixed | Ignored | No | -| `.hypothesis/crashes/` | Portable crash reproduction files | Ignored | Yes (auto-generated) | -| `.hypothesis/hypofuzz.log` | HypoFuzz session log | Ignored | N/A | -| `.hypothesis/strategy_metrics.json` | Strategy metrics report | Ignored | N/A | -| `.hypothesis/strategy_metrics_summary.txt` | Human-readable summary | Ignored | N/A | -| `.pytest_cache/` | Pytest cache | Ignored | N/A | -| `.fuzz_atheris_corpus/` | Atheris working corpus | Ignored | N/A | -| `.fuzz_atheris_corpus/*/crash_*` | Crash artifacts | Ignored | Yes (prefix) | -| `fuzz_atheris/seeds/` | Atheris seed corpus | Tracked | N/A | -| `coverage.xml` | Coverage report | Ignored | N/A | - -### Automatic Failure Capture - -The `pytest_runtest_makereport` hook in `conftest.py` automatically generates crash files: - -```bash -./scripts/fuzz_hypofuzz.sh --list # List captured failures -``` - -### Hypothesis Bug Preservation - -When Hypothesis finds a failing input: - -1. **Automatic**: `pytest_runtest_makereport` hook generates `.hypothesis/crashes/repro_crash_*.py` -2. **Manual**: Promote the failing example to an `@example()` decorator - -```python -from hypothesis import example, given -from hypothesis import strategies as st - -@given(st.text()) -@example("the_failing_input") # Preserved in version control -def test_parser_handles_input(text: str) -> None: - ... -``` - -**Crash reproduction:** -```bash -# Run auto-generated reproduction script -uv run python .hypothesis/crashes/repro_crash_20260204_103000_a1b2c3d4.py - -# Or use the repro tool for JSON output -uv run python scripts/fuzz_hypofuzz_repro.py --json test_module::test_name -``` - -**Rationale**: HypoFuzz stores 100k+ coverage examples in `.hypothesis/examples/`. Committing would add 100MB+ to git. Instead: -1. Keep corpus local to each machine -2. Auto-capture crashes to `.hypothesis/crashes/` (portable, shareable) -3. Promote failures to `@example()` decorators (version controlled) -4. Each machine rebuilds its own coverage corpus - -### Atheris Bug Preservation - -When Atheris finds a crash, use the replay script for reproduction: - -| Step | Action | -|:-----|:-------| -| 1 | Reproduce: `uv run python fuzz_atheris/fuzz_atheris_replay_finding.py .fuzz_atheris_corpus//crash_*` | -| 2 | Add `@example(...)` decorator to relevant test function | -| 3 | Fix the bug, run tests to confirm | -| 4 | Delete crash file after committing test | - -**Crash-proof reporting**: Fuzz targets emit `[SUMMARY-JSON-BEGIN]...[SUMMARY-JSON-END]` -on exit via atexit handler, ensuring metadata is never lost on crash. - -**Rationale**: `.fuzz_atheris_corpus/` is git-ignored (contains binary seeds, machine-specific). -Unit tests with literal inputs are permanent, readable, and version-controlled. - ---- - -## File Pattern Reference - -| Pattern | Category | Marker | Notes | -|:--------|:---------|:-------|:------| -| `tests/test_*.py` | Unit | N/A | Standard tests | -| `tests/test_*_property.py` | Property | N/A | Hypothesis property-based | -| `tests/fuzz/*.py` | Fuzzing | `fuzz` | Excluded from normal runs | -| `tests/fuzz/test_runtime_bundle_oracle.py` | Oracle | `fuzz` | Differential testing vs ShadowBundle | -| `tests/fuzz/test_core_depth_guard_exhaustion.py` | Depth | `fuzz` | MAX_DEPTH boundary testing | -| `tests/fuzz/shadow_bundle.py` | Support | N/A | Reference implementation (not a test) | - ---- - -## Hypothesis Precedence - -| Priority | Source | Example | Notes | -|---------:|:-------|:--------|:------| -| 1 (highest) | `@settings` decorator | `@settings(max_examples=1500)` | Per-test override | -| 2 | `@settings` with profile | `@settings(settings.get_profile("dev"))` | Explicit profile use | -| 3 | Loaded profile | `settings.load_profile("dev")` | From conftest.py | -| 4 (lowest) | Default | `max_examples=100` | Hypothesis default | - -### Interaction Rules -- Decorator `@settings(max_examples=N)` ALWAYS overrides profile. -- Fuzzing tests intentionally hardcode high values to ensure deep exploration. -- Profile-based testing only works when tests don't specify their own settings. - ---- - -## Design Decisions - -### Marker-Based Exclusion -**Question**: How to separate fast unit tests from slow fuzzing tests? -**Decision**: `tests/fuzz/` subdirectory + `@pytest.mark.fuzz` marker + conftest.py skip logic. -**Rationale**: Directory isolates fuzz tests visually and structurally; marker integrates with -pytest `-m` filter; conftest enforces skipping in normal runs while allowing direct file bypass. -**Alternatives Rejected**: -- Naming convention only: No enforcement mechanism, easily forgotten. -- Environment variable check in tests: Invisible, hard to audit. - -### Profile Auto-Detection -**Question**: How to configure Hypothesis for different environments? -**Decision**: Auto-detect CI vs local via environment variables. -**Rationale**: Zero-config for common cases; explicit override available. -**Alternatives Rejected**: -- Always use same settings: CI would be slow, local would miss bugs. -- Require explicit configuration: Developer friction, easy to forget. - -### File Bypass Logic -**Question**: What if developer wants to run fuzz tests directly? -**Decision**: Detect specific file in pytest args and bypass skip. -**Rationale**: Allows `pytest tests/fuzz/test_syntax_parser_grammar.py` without `-m fuzz`. -**Alternatives Rejected**: -- Always require `-m fuzz`: Extra typing, breaks muscle memory. -- Never skip if any fuzz file mentioned: Too broad, confusing. - ---- - -## Quirks - -| Quirk | Behavior | Reason | -|:------|:---------|:-------| -| File bypass | `pytest tests/fuzz/test_syntax_parser_grammar.py` runs despite fuzz marker | Detected in `pytest_collection_modifyitems` via `config.invocation_params.args` | -| String matching | Bypass uses substring match, not exact | Allows both absolute and relative paths | -| Profile load timing | Profiles loaded at import time in conftest.py | Must happen before any test collection | -| `pytestmark` position | Must be before any `from` imports in file | Python module-level variable ordering | - ---- - -## Pitfalls - -| Mistake | Consequence | Correct Approach | -|:--------|:------------|:-----------------| -| Commit `.hypothesis/` | 100MB+ git bloat from HypoFuzz corpus | Keep ignored; use `.hypothesis/crashes/` for portable repros | -| Rely on `.fuzz_atheris_corpus/*/crash_*` | Bug lost when files cleaned up | Create unit test with crash input as literal | -| Hardcode `@settings(max_examples=N)` | Overrides profile, CI takes forever | Omit decorator or use profile-based settings for fuzz-only | -| Forget `pytestmark` in new fuzz file | Tests run in normal suite, slow | Add `pytestmark = pytest.mark.fuzz` at top of file | -| Run `pytest tests/` expecting fuzz tests | Fuzz tests silently skipped | Use `pytest -m fuzz` or `./scripts/fuzz_hypofuzz.sh --deep` | -| Set `HYPOTHESIS_PROFILE` wrong | Unexpected example counts | Valid values: `dev`, `ci`, `verbose`, `hypofuzz`, `stateful_fuzz` | -| Long socket paths in fuzzing | `AF_UNIX path too long` error | Scripts set `TMPDIR=/tmp` automatically | -| Ignore `.hypothesis/crashes/` | Miss portable crash reproductions | Check for auto-generated `repro_crash_*.py` scripts | - ---- - -## See Also - -- [FUZZING_GUIDE.md](FUZZING_GUIDE.md): Overview and comparison of fuzzing approaches -- [FUZZING_GUIDE_HYPOFUZZ.md](FUZZING_GUIDE_HYPOFUZZ.md): HypoFuzz operational guide -- [FUZZING_GUIDE_ATHERIS.md](FUZZING_GUIDE_ATHERIS.md): Atheris operational guide -- [tests/conftest.py](../tests/conftest.py): Profile, marker, and crash recording configuration -- [tests/fuzz/](../tests/fuzz/): Oracle and depth exhaustion fuzz tests +- Purpose: Run, replay, list, and minimize Atheris findings +- Behavior: Manages `.venv-atheris` separately from the main project venvs +- Output: Target-oriented CLI workflow around the `fuzz_atheris/` tree diff --git a/docs/FUZZING_GUIDE.md b/docs/FUZZING_GUIDE.md index 9947b2a2..6b25bbe5 100644 --- a/docs/FUZZING_GUIDE.md +++ b/docs/FUZZING_GUIDE.md @@ -1,225 +1,40 @@ --- -afad: "3.3" -version: "0.153.0" -domain: fuzzing -updated: "2026-03-13" +afad: "3.5" +version: "0.163.0" +domain: FUZZING +updated: "2026-04-22" route: - keywords: [fuzzing, testing, hypothesis, hypofuzz, atheris, property-based, coverage, crash, security, metrics, workers] - questions: ["how to run fuzzing?", "how to fuzz the parser?", "how to find bugs with fuzzing?", "which fuzzer to use?", "how do workers affect metrics?"] + keywords: [fuzzing, HypoFuzz, Atheris, Hypothesis, fuzz_hypofuzz.sh, fuzz_atheris.sh] + questions: ["which fuzzer should I use?", "how do I start fuzzing?", "how do I reproduce a fuzz failure?"] --- # Fuzzing Guide -**Purpose**: Overview of FTLLexEngine's fuzzing infrastructure. -**Prerequisites**: Basic pytest knowledge. +**Purpose**: Choose the right fuzzing entry point and run it with the repo-supported scripts. +**Prerequisites**: Dev environment synced with `uv`; Python 3.13 available locally for Atheris. -FTLLexEngine uses two complementary fuzzing systems: +## Overview -| System | Script | Best For | -|--------|--------|----------| -| **HypoFuzz** (Hypothesis) | `fuzz_hypofuzz.sh` | Logic errors, property violations, edge cases | -| **Atheris** (libFuzzer) | `fuzz_atheris.sh` | Crashes, security issues, memory safety | +Use: -**Detailed Guides:** -- [FUZZING_GUIDE_HYPOFUZZ.md](FUZZING_GUIDE_HYPOFUZZ.md) - Hypothesis/HypoFuzz property testing -- [FUZZING_GUIDE_ATHERIS.md](FUZZING_GUIDE_ATHERIS.md) - Native fuzzing with Atheris +- `./scripts/fuzz_hypofuzz.sh` for Hypothesis and HypoFuzz property exploration. +- `./scripts/fuzz_atheris.sh` for native Atheris/libFuzzer targets. ---- - -## Quick Start (30 Seconds) +## Fast Start ```bash -# Quick property test check (recommended before committing) ./scripts/fuzz_hypofuzz.sh -``` - -You will see either: -- **[PASS]** - All tests passed -- **[FAIL]** - A bug was found. Follow the "Next steps" shown. - ---- - -## Testing Pyramid - -``` -+---------------------------------------------------------------+ -| FUZZING LAYER | -| +---------------------------+ +---------------------------+ | -| | fuzz_hypofuzz.sh --deep | | fuzz_atheris.sh native | | -| | - Property-based testing | | - Byte-level mutation | | -| | - Coverage-guided | | - Crash detection | | -| | - Logic errors | | - Security issues | | -| +---------------------------+ +---------------------------+ | -+---------------------------------------------------------------+ -| UNIT TEST LAYER | -| +----------------------------------------------------------+ | -| | uv run scripts/test.sh | | -| | - Comprehensive unit tests (95%+ coverage) | | -| +----------------------------------------------------------+ | -+---------------------------------------------------------------+ -``` - -Fuzzing finds issues that traditional unit tests miss by exploring vast input spaces automatically. - ---- - -## Choosing a Fuzzer - -### Use HypoFuzz (Hypothesis) When: - -- Testing **properties** (e.g., "parse then serialize equals original") -- Finding **logic errors** and **edge cases** -- Working with **typed data structures** (strategies generate valid Python objects) -- You want **automatic shrinking** to minimal failing examples -- Running **before every commit** (fast, no special setup) - -```bash -./scripts/fuzz_hypofuzz.sh # Quick check -./scripts/fuzz_hypofuzz.sh --deep # Continuous fuzzing (until Ctrl+C) -./scripts/fuzz_hypofuzz.sh --deep --metrics # Single-pass with metrics -``` - -### Use Atheris (libFuzzer) When: - -- Testing **crash resistance** and **memory safety** -- Looking for **security vulnerabilities** -- Testing **byte-level parsing** robustness -- Doing **security audits** before releases -- You need **raw mutation** that ignores grammar rules - -```bash -./scripts/fuzz_atheris.sh native # Crash detection -./scripts/fuzz_atheris.sh structured # Grammar-aware fuzzing -``` - ---- - -## Command Reference - -### HypoFuzz Commands - -| Command | Description | -|---------|-------------| -| `./scripts/fuzz_hypofuzz.sh` | Quick property tests | -| `./scripts/fuzz_hypofuzz.sh --deep` | Continuous HypoFuzz (until Ctrl+C) | -| `./scripts/fuzz_hypofuzz.sh --deep --metrics` | Single-pass pytest with strategy metrics | -| `./scripts/fuzz_hypofuzz.sh --preflight` | Audit test infrastructure | -| `./scripts/fuzz_hypofuzz.sh --list` | Show reproduction info | -| `./scripts/fuzz_hypofuzz.sh --repro TEST` | Reproduce failing test | -| `./scripts/fuzz_hypofuzz.sh --clean` | Remove .hypothesis/ | - -### Atheris Commands - -| Command | Description | -|---------|-------------| -| `./scripts/fuzz_atheris.sh native` | Stability fuzzing | -| `./scripts/fuzz_atheris.sh structured` | Grammar-aware fuzzing | -| `./scripts/fuzz_atheris.sh --list` | List crashes/findings | -| `./scripts/fuzz_atheris.sh --corpus` | Check seed health | -| `./scripts/fuzz_atheris.sh --replay TARGET` | Replay findings | - ---- - -## Key Differences - -| Aspect | HypoFuzz | Atheris | -|--------|----------|---------| -| Input type | Python objects | Raw bytes | -| Grammar awareness | Yes (via strategies) | No (mutations) | -| Storage | `.hypothesis/examples/` | `.fuzz_atheris_corpus/` | -| Filename format | SHA-384 hashes | `crash_*`, `finding_p{PID}_*` | -| Reproduction | Automatic pytest replay | Manual via repro script | -| Corpus | Implicit (coverage DB) | Explicit (seeds/) | -| Workers | Multiprocessing (metrics need single-worker) | fork() (metrics need `--workers 1`) | -| Best for | Logic bugs | Crashes, security | - ---- - -## Parallelism and Metrics - -Both fuzzing systems use multiprocessing, which fragments in-process -metrics across worker processes. Each system handles this differently: - -| System | Worker Model | Metrics Constraint | -|--------|-------------|-------------------| -| HypoFuzz | Python `multiprocessing` | `--metrics` forces single-process pytest | -| Atheris | libFuzzer `fork()` | `--workers 1` (default) for reliable metrics | - -**Root cause:** Both systems accumulate metrics (iterations, coverage, -performance history) in process-local state. Forked/spawned workers each -get independent copies. There is no cross-process aggregation. - -**Recommendation:** Use single-worker mode for metrics-sensitive runs -(debugging, performance analysis, weight skew detection). Use -multi-worker mode only for throughput-oriented crash detection. - -See [FUZZING_GUIDE_ATHERIS.md](FUZZING_GUIDE_ATHERIS.md) for details on -Atheris worker behavior under `fork()`. - ---- - -## Data Directories - -| Directory | System | Git Tracked | Contents | -|-----------|--------|-------------|----------| -| `.hypothesis/examples/` | HypoFuzz | No | Coverage database | -| `.hypothesis/hypofuzz.log` | HypoFuzz | No | Session log | -| `.fuzz_atheris_corpus/` | Atheris | No | Working corpus, crashes | -| `fuzz_atheris/seeds/` | Atheris | Yes | Seed corpus | - ---- - -## Workflow Summary - -### Daily Development - -```bash -# Before committing -./scripts/fuzz_hypofuzz.sh -``` - -### Deep Testing - -```bash -# Run HypoFuzz for extended period ./scripts/fuzz_hypofuzz.sh --deep --time 300 +./scripts/fuzz_atheris.sh --list ``` -### Security Audit +## Choosing A Surface -```bash -# Run all Atheris modes -./scripts/fuzz_atheris.sh native --time 300 -./scripts/fuzz_atheris.sh structured --time 300 -./scripts/fuzz_atheris.sh perf --time 300 -``` - -### Reproducing Failures - -```bash -# HypoFuzz failures (automatic replay) -./scripts/fuzz_hypofuzz.sh --repro tests/fuzz/test_syntax_parser_property.py::test_roundtrip - -# Atheris crashes (manual) -uv run python scripts/fuzz_atheris_repro.py .fuzz_atheris_corpus/crash_xxx -``` - ---- - -## Scripts Reference - -| Script | Purpose | -|--------|---------| -| `scripts/fuzz_hypofuzz.sh` | HypoFuzz entry point | -| `scripts/fuzz_hypofuzz_repro.py` | Reproduce Hypothesis failures | -| `scripts/fuzz_atheris.sh` | Atheris entry point | -| `scripts/fuzz_atheris_repro.py` | Reproduce Atheris crashes | -| `scripts/fuzz_atheris_corpus_health.py` | Seed corpus health check | - ---- +- Prefer HypoFuzz when you are exploring Python-level invariants and stateful/property-based tests. +- Prefer Atheris when you need native-style mutation, corpus management, or target-specific replay/minimization. -## See Also +## Related Guides -- [FUZZING_GUIDE_HYPOFUZZ.md](FUZZING_GUIDE_HYPOFUZZ.md) - Full HypoFuzz documentation -- [FUZZING_GUIDE_ATHERIS.md](FUZZING_GUIDE_ATHERIS.md) - Full Atheris documentation -- [DOC_06_Testing.md](DOC_06_Testing.md) - Testing overview +- [FUZZING_GUIDE_HYPOFUZZ.md](FUZZING_GUIDE_HYPOFUZZ.md) +- [FUZZING_GUIDE_ATHERIS.md](FUZZING_GUIDE_ATHERIS.md) +- [DOC_06_Testing.md](DOC_06_Testing.md) diff --git a/docs/FUZZING_GUIDE_ATHERIS.md b/docs/FUZZING_GUIDE_ATHERIS.md index 361ae524..559392a5 100644 --- a/docs/FUZZING_GUIDE_ATHERIS.md +++ b/docs/FUZZING_GUIDE_ATHERIS.md @@ -1,514 +1,34 @@ --- -afad: "3.3" -version: "0.155.0" -domain: fuzzing -updated: "2026-03-16" +afad: "3.5" +version: "0.163.0" +domain: FUZZING +updated: "2026-04-22" route: - keywords: [fuzzing, atheris, libfuzzer, native, crash, security, corpus, workers, metrics, subinterpreters, concurrent.interpreters] - questions: ["how to run atheris?", "how to do native fuzzing?", "how to reproduce crashes?", "how to manage corpus?", "how do atheris workers work?", "why are metrics wrong with multiple workers?", "how to fuzz interpreter pool?", "concurrent.interpreters missing?"] + keywords: [atheris, libfuzzer, fuzz_atheris.sh, replay, minimize, corpus] + questions: ["how do I run an Atheris target?", "how do I replay a finding?", "how does the Atheris environment get created?"] --- -# Atheris Guide (Native Fuzzing with libFuzzer) +# Atheris Guide -**Purpose**: Run byte-level mutation fuzzing for crash and security testing. -**Prerequisites**: macOS with LLVM, isolated virtualenv. +**Purpose**: Run and manage the native Atheris/libFuzzer targets in `fuzz_atheris/`. +**Prerequisites**: Python 3.13 available locally. ---- - -## Quick Start - -```bash -# Run native fuzzing on parser -./scripts/fuzz_atheris.sh native - -# List crashes and findings -./scripts/fuzz_atheris.sh --list - -# Check corpus health -./scripts/fuzz_atheris.sh --corpus - -# Reproduce a crash -uv run python scripts/fuzz_atheris_repro.py .fuzz_atheris_corpus/crash_xxx -``` - ---- - -## How Atheris Works - -Atheris is a coverage-guided Python fuzzer built on libFuzzer: - -1. **Mutates** raw bytes using libFuzzer's mutation strategies -2. **Instruments** Python code for coverage feedback -3. **Crashes** are captured as binary files in `.fuzz_atheris_corpus/` -4. **Minimizes** crash inputs automatically - -Key difference from Hypothesis: Atheris works with **raw bytes**, not Python objects. - ---- - -## Prerequisites (macOS) - -Atheris requires LLVM. Install it once: - -```bash -brew install llvm -``` - -No other manual setup is needed. The script bootstraps `.venv-atheris` automatically -on first run. - -### Verification +## Common Commands ```bash ./scripts/fuzz_atheris.sh --help -``` - -Lists all available fuzzing targets. The first run creates `.venv-atheris` if it does -not exist. - ---- - -## Environment Architecture - -`.venv-atheris` is managed **independently of uv's project system**. This is a hard -architectural constraint: - -| Constraint | Detail | -|:-----------|:-------| -| Atheris Python | <= 3.13 (Atheris does not support Python 3.14+) | -| Project baseline | Python 3.13 (`requires-python = ">=3.13"` in `pyproject.toml`) | -| Consequence | Atheris and the project baseline are aligned on 3.13 — no bypass needed | - -**Solution**: the script uses `.venv-atheris` managed independently of uv's project -system. On every run, `ensure_atheris_venv` checks `.venv-atheris`: - -1. If it exists with Python 3.13 — proceed immediately. -2. If it is missing or has the wrong Python — locate Python 3.13 via pyenv (or - `python3.13`), create the venv directly with `python3.13 -m venv`, then install - `atheris`, `psutil`, and `ftllexengine[babel]` via pip. - -**`UV_PROJECT_ENVIRONMENT` must not be set** for `fuzz_atheris.sh`. If it were set to -`.venv-atheris`, uv would silently recreate that venv with Python 3.14 on each run, -making Atheris uninstallable. - -All fuzzer execution, report parsing, corpus listing, and replay go through -`$ATHERIS_PYTHON` (`.venv-atheris/bin/python`) directly — no `uv run` in the atheris -execution path. - ---- - -## Command Reference - -| Command | Description | -|---------|-------------| -| `./scripts/fuzz_atheris.sh native` | Stability fuzzing (crash detection) | -| `./scripts/fuzz_atheris.sh runtime` | Runtime stack fuzzing | -| `./scripts/fuzz_atheris.sh structured` | Grammar-aware FTL fuzzing | -| `./scripts/fuzz_atheris.sh perf` | Performance/ReDoS fuzzing | -| `./scripts/fuzz_atheris.sh iso` | ISO introspection fuzzing | -| `./scripts/fuzz_atheris.sh graph` | Dependency graph algorithm fuzzing | -| `./scripts/fuzz_atheris.sh builtins` | Built-in function rounding oracle fuzzing | -| `./scripts/fuzz_atheris.sh --list` | List crashes and findings | -| `./scripts/fuzz_atheris.sh --corpus` | Check seed corpus health | -| `./scripts/fuzz_atheris.sh --replay TARGET` | Replay findings without Atheris | -| `./scripts/fuzz_atheris.sh --clean TARGET` | Clean corpus for target | -| `./scripts/fuzz_atheris.sh --workers N` | Parallel workers (default: 1; see Workers section) | -| `./scripts/fuzz_atheris.sh --time N` | Time limit in seconds | - ---- - -## Fuzzing Targets - -Targets are dynamically discovered from `fuzz_atheris/fuzz_*.py` files: - -| Target | Mode | Focus | -|--------|------|-------| -| `native` | Stability | Crashes, hangs, memory safety in FTL parser | -| `runtime` | End-to-End | FluentBundle, IntegrityCache, Strict Mode | -| `structured` | Structured | Syntactically plausible FTL for deeper paths | -| `perf` | Performance | Algorithmic complexity, ReDoS vulnerabilities | -| `iso` | Introspection | ISO 3166/4217 lookups, type guards, cache | -| `integrity` | Validation | IntegrityCache hash verification | -| `lock` | Concurrency | RWLock timeout and contention paths | -| `roundtrip` | Convergence | Parser-serializer round-trip consistency | -| `serializer` | AST-construction | Serializer idempotence via programmatic AST | -| `graph` | Algorithms | Dependency graph cycle detection, adversarial topologies | -| `builtins` | Functions | NUMBER/CURRENCY/DATETIME rounding oracles, locale coverage | - ---- - -## Workflow 1: Crash Detection - -```bash -# Run stability fuzzing -./scripts/fuzz_atheris.sh native --time 300 - -# Or until Ctrl+C -./scripts/fuzz_atheris.sh native -``` - -**Interpreting Output:** - -``` -#12345 REDUCE cov: 1234 ft: 567 corp: 89/10Kb exec/s: 456 -``` - -- `#12345` - Iteration count -- `cov:` - Code coverage (edges) -- `ft:` - Feature count -- `corp:` - Corpus size -- `exec/s` - Executions per second - -**Crash Detected:** - -``` -==12345== ERROR: libFuzzer: deadly signal -``` - -Crash file saved to `.fuzz_atheris_corpus/crash_*`. - ---- - -## Workflow 2: Reproducing Crashes - -When a crash is found: - -```bash -# List all crashes +./scripts/fuzz_atheris.sh numbers --time 60 ./scripts/fuzz_atheris.sh --list - -# Reproduce a specific crash -uv run python scripts/fuzz_atheris_repro.py .fuzz_atheris_corpus/crash_abc123 - -# Generate @example decorator -uv run python scripts/fuzz_atheris_repro.py --example .fuzz_atheris_corpus/crash_abc123 -``` - -**Output:** - -``` -[FINDING] Parser crashed with ValueError: ... - -Full traceback: ------------------------------------------------------------- -... ------------------------------------------------------------- - -Next steps: - 1. Add @example decorator to preserve this case: - @example(ftl='...') - 2. Fix the bug in the parser - 3. Re-run fuzzing to verify -``` - ---- - -## Workflow 3: Structured Fuzzing - -For grammar-aware fuzzing that generates syntactically valid FTL: - -```bash -./scripts/fuzz_atheris.sh structured --time 300 -``` - -Structured fuzzing finds issues in deeper parser code paths that random bytes cannot reach. - -**Finding Artifacts:** - -Structured fuzzing saves finding details to `.fuzz_atheris_corpus//findings/`: - -``` -.fuzz_atheris_corpus/structured/findings/ -├── finding_p12345_0001_source.ftl # Original FTL (PID-prefixed) -├── finding_p12345_0001_s1.ftl # Serialized once -├── finding_p12345_0001_s2.ftl # Serialized twice -└── finding_p12345_0001_meta.json # Finding metadata -``` - ---- - -## Corpus Management - -### Seed Corpus - -Seeds are stored in `fuzz_atheris/seeds/` and are **git tracked**: - -``` -fuzz_atheris/seeds/ -├── 00_minimal.ftl # Minimal valid FTL -├── 01_message.ftl # Basic message -├── 10_select.ftl # Select expressions -├── 20_terms.ftl # Terms and references -├── 30_complex.ftl # Complex nested structures -└── iso_baseline.bin # Binary seed for ISO fuzzer -``` - -### Working Corpus - -The working corpus in `.fuzz_atheris_corpus/` is **not git tracked**: - -``` -.fuzz_atheris_corpus/ -├── crash_abc123 # Crash artifacts -├── native/ # Native fuzzer corpus -├── structured/ # Structured fuzzer corpus -│ └── findings/ # Finding artifacts -└── ... -``` - -### Health Check - -```bash -./scripts/fuzz_atheris.sh --corpus -``` - -Reports: -- Total seeds and byte count -- Parse success/failure rate -- Grammar feature coverage -- Duplicate detection - -### Adding Seeds - -1. Create seed file: - ```ftl - # fuzz_atheris/seeds/40_new_feature.ftl - new-feature = { $arg -> - [case1] Value 1 - *[other] Default - } - ``` - -2. Verify: - ```bash - ./scripts/fuzz_atheris.sh --corpus - ``` - -### Binary Seeds - -For ISO fuzzers that expect structured binary input: - -```python -# Example: ISO seed -seed = bytes([ - 5, # locale length - *b"en_US", # locale string - 3, # code length - *b"USD", # currency code - 0b00000011, # flags -]) - -with open("fuzz_atheris/seeds/iso_custom.bin", "wb") as f: - f.write(seed) -``` - -### Removing Duplicates - -```bash -uv run python scripts/fuzz_atheris_corpus_health.py --dedupe -``` - -Shows duplicates. Add `--execute` to remove them. - ---- - -## Adding New Fuzzers - -FTLLexEngine uses a dynamic plugin system. New fuzzers are automatically discovered. - -### Plugin Header Schema - -All `fuzz_atheris/fuzz_*.py` files must include: - -```python -# FUZZ_PLUGIN_HEADER_START -# FUZZ_PLUGIN: - -# Intentional: This header is intentionally placed for dynamic plugin discovery. -# CRITICAL: DO NOT REMOVE THIS HEADER - REQUIRED FOR FUZZ_ATHERIS.SH -# FUZZ_PLUGIN_HEADER_END -``` - -### Creating a New Fuzzer - -1. Create `fuzz_atheris/fuzz_myfuzzer.py` with the header -2. Implement the fuzzer target function -3. Run `./scripts/fuzz_atheris.sh --help` to verify discovery -4. Execute with `./scripts/fuzz_atheris.sh myfuzzer` - ---- - -## Troubleshooting - -### ImportError: symbol not found (LLVM mismatch) - -Atheris was compiled against a different LLVM than the one on PATH. The script -auto-heals this on macOS by reinstalling from source with the Homebrew LLVM. To -trigger manually: - -```bash -./scripts/fuzz_atheris.sh --setup -``` - -If the auto-heal fails, install LLVM and reset the venv: - -```bash -brew install llvm -rm -rf .venv-atheris -./scripts/fuzz_atheris.sh --setup -``` - -### `.venv-atheris` has the wrong Python (e.g. rebuilt with 3.14) - -The script detects this automatically and rebuilds the venv. To force a rebuild: - -```bash -rm -rf .venv-atheris -./scripts/fuzz_atheris.sh --setup +./scripts/fuzz_atheris.sh --replay runtime path/to/finding ``` -The setup step creates `.venv-atheris` with Python 3.13, installs `atheris` and -`psutil`, and installs `ftllexengine[babel]` in editable mode. +## Environment -### WARNING: Failed to find function "__sanitizer_..." - -Harmless. This occurs because Python isn't compiled with AddressSanitizer. Coverage collection still works. - -### Slow Execution - -Check corpus size: - -```bash -ls -la .fuzz_atheris_corpus// | wc -l -``` - -Large corpus (100k+) slows startup. Clean periodically: - -```bash -./scripts/fuzz_atheris.sh --clean native -``` - -### Timeout/Hang Detection - -For performance fuzzing: - -```bash -./scripts/fuzz_atheris.sh perf --time 600 -``` - -Perf fuzzing detects ReDoS and algorithmic complexity issues. - ---- - -## Workers and Metrics - -### Single-Worker Mode (Default) - -The default `--workers 1` runs Atheris in a single process. All metrics -(iterations, findings, pattern coverage, performance history, memory -tracking, weight skew detection) are collected in-process and written to -the JSON report file at exit. This mode provides reliable, complete metrics. - -```bash -# Reliable metrics (default) -./scripts/fuzz_atheris.sh roundtrip --time 300 -``` - -### Multi-Worker Mode - -`--workers N` (N > 1) uses libFuzzer's `fork()`-based parallelism. Each -worker is a forked child process with its own copy of all state: - -| Concern | Behavior Under fork() | -|---------|----------------------| -| `BaseFuzzerState` | Independent copy per worker; never aggregated | -| JSON report file | All workers write to the same path; last writer wins | -| Finding artifacts | PID-prefixed filenames prevent collisions | -| `atexit` handlers | Fire in each worker independently | -| Performance history | Per-worker only | -| Pattern coverage | Per-worker shard, not global distribution | - -**When to use multi-worker mode:** - -- Maximum throughput for crash detection (findings are raw crash files, - not dependent on metrics) -- You do not need accurate aggregate metrics -- Corpus evolution (libFuzzer shares corpus via filesystem, not memory) - -```bash -# Throughput-oriented crash detection (metrics unreliable) -./scripts/fuzz_atheris.sh native --workers 4 --time 600 -``` - -**Metrics limitation:** This is the same class of problem that -HypoFuzz encounters with multiprocessing. HypoFuzz solves it by -falling back to single-process pytest when `--metrics` is enabled. -For Atheris, the solution is simpler: use `--workers 1` (the default) -when you need reliable metrics, and `--workers N` only for throughput. - -### Signal Handling - -In single-worker mode, the script disables libFuzzer's SIGINT handler -(`-handle_int=0`) so Python owns Ctrl+C and can run `atexit` handlers -cleanly. In multi-worker mode, libFuzzer's SIGINT handler is preserved -because the parent process needs it to propagate shutdown to children. - ---- - -## Architecture - -### Directory Structure - -``` -fuzz_atheris/ -├── seeds/ # Seed corpus (git tracked) -│ ├── bridge/, builtins/, cache/, ... # Per-target seed dirs -│ ├── *.ftl # FTL text seeds (top-level legacy) -│ └── *.bin # Binary seeds (top-level legacy) -├── fuzz_native.py # Stability fuzzer -├── fuzz_runtime.py # Runtime fuzzer -├── fuzz_structured.py # Grammar-aware fuzzer -├── fuzz_serializer.py # AST-construction serializer fuzzer -├── fuzz_scope.py # Variable scoping / expansion budget -├── fuzz_bridge.py # Function bridge machinery -├── fuzz_builtins.py # NUMBER/DATETIME/CURRENCY builtins -├── fuzz_cache.py # Cache concurrency and integrity -├── fuzz_currency.py # Currency symbol extraction -├── fuzz_dates.py # Date/datetime locale-aware parsing -├── fuzz_diagnostics_formatter.py # DiagnosticFormatter output/escaping -├── fuzz_graph.py # Dependency graph -├── fuzz_integrity.py # Semantic validation + strict mode -├── fuzz_introspection.py # IntrospectionVisitor + ReferenceExtractor -├── fuzz_iso.py # ISO 3166/4217 introspection -├── fuzz_locale_context.py # LocaleContext direct formatting API -├── fuzz_localization.py # FluentLocalization multi-locale orchestration -├── fuzz_lock.py # RWLock concurrency primitives -├── fuzz_numbers.py # Locale-aware numeric parsing -├── fuzz_oom.py # Parser object explosion (DoS) -├── fuzz_perf.py # Performance fuzzer -├── fuzz_plural.py # CLDR plural category selection -├── fuzz_roundtrip.py # Parser-serializer convergence -├── fuzz_atheris_replay_finding.py # Finding replay utility -└── mypy.ini # Type checking config - -.fuzz_atheris_corpus/ # Working corpus (gitignored) -├── crash_* # Crash artifacts -└── / # Per-target corpus - └── findings/ # Finding artifacts -``` - -### libFuzzer Integration - -Atheris wraps libFuzzer, providing: - -- **Corpus management**: Automatic minimization and evolution -- **Coverage tracking**: Inline 8-bit counters -- **Crash isolation**: Precise reproduction via crash files -- **Parallel fuzzing**: fork()-based workers (metrics per-worker only; see Workers section) - ---- +The script manages `.venv-atheris` itself and keeps it separate from the normal project venvs. If the Atheris environment is missing or built with the wrong Python version, the script recreates it automatically. -## See Also +## Useful Operations -- [FUZZING_GUIDE.md](FUZZING_GUIDE.md) - Overview and comparison -- [FUZZING_GUIDE_HYPOFUZZ.md](FUZZING_GUIDE_HYPOFUZZ.md) - Hypothesis property testing -- [DOC_06_Testing.md](DOC_06_Testing.md) - Full testing documentation +- `--list` to inspect captured findings. +- `--replay` to replay stored findings without starting a fresh fuzz run. +- `--minimize TARGET FILE` to shrink a failing input for one target. +- `--corpus` to run the corpus health check. diff --git a/docs/FUZZING_GUIDE_HYPOFUZZ.md b/docs/FUZZING_GUIDE_HYPOFUZZ.md index cd6c7056..ab4bf46d 100644 --- a/docs/FUZZING_GUIDE_HYPOFUZZ.md +++ b/docs/FUZZING_GUIDE_HYPOFUZZ.md @@ -1,687 +1,35 @@ --- -afad: "3.3" -version: "0.153.0" -domain: fuzzing -updated: "2026-03-13" +afad: "3.5" +version: "0.163.0" +domain: FUZZING +updated: "2026-04-22" route: - keywords: [fuzzing, hypothesis, hypofuzz, property-based, testing, coverage, metrics, workers] - questions: ["how to run hypothesis tests?", "how to use hypofuzz?", "how to reproduce hypothesis failures?", "how to see strategy metrics?", "why does --metrics use pytest instead of hypofuzz?"] + keywords: [hypofuzz, hypothesis, fuzz_hypofuzz.sh, deep mode, preflight, repro] + questions: ["how do I run HypoFuzz?", "what does --deep do?", "how do I reproduce a Hypothesis failure?"] --- -# HypoFuzz Guide (Hypothesis Property-Based Testing) +# HypoFuzz Guide -**Purpose**: Run and understand Hypothesis/HypoFuzz property-based testing. -**Prerequisites**: Basic pytest knowledge. +**Purpose**: Run the property-testing and HypoFuzz entry points shipped by the repository. +**Prerequisites**: `uv sync --group dev --group fuzz`. ---- - -## Quick Start - -```bash -# Quick property test check -./scripts/fuzz_hypofuzz.sh - -# Continuous deep fuzzing -./scripts/fuzz_hypofuzz.sh --deep - -# Reproduce a specific failing test -./scripts/fuzz_hypofuzz.sh --repro tests/fuzz/test_syntax_parser_property.py::test_roundtrip -``` - ---- - -## How Hypothesis Works - -Hypothesis is a property-based testing framework that: - -1. **Generates** inputs based on type strategies -2. **Tests** properties you define (e.g., "parsing then serializing round-trips") -3. **Shrinks** failing inputs to minimal reproducible examples -4. **Stores** examples in `.hypothesis/examples/` for regression testing - -Key difference from Atheris: Hypothesis generates **Python objects** via strategies, not raw bytes. - ---- - -## Command Reference - -| Command | Description | -|---------|-------------| -| `./scripts/fuzz_hypofuzz.sh` | Quick property tests (default mode) | -| `./scripts/fuzz_hypofuzz.sh --deep` | Continuous HypoFuzz fuzzing (until Ctrl+C) | -| `./scripts/fuzz_hypofuzz.sh --deep --metrics` | Single-pass pytest with strategy metrics | -| `./scripts/fuzz_hypofuzz.sh --preflight` | Audit test infrastructure (events, strategies) | -| `./scripts/fuzz_hypofuzz.sh --list` | Show reproduction info and failures | -| `./scripts/fuzz_hypofuzz.sh --repro TEST` | Reproduce failing test with verbose output | -| `./scripts/fuzz_hypofuzz.sh --clean` | Remove .hypothesis/ database | -| `./scripts/fuzz_hypofuzz.sh --verbose` | Show detailed progress | -| `./scripts/fuzz_hypofuzz.sh --metrics` | Enable periodic per-strategy metrics | -| `./scripts/fuzz_hypofuzz.sh --workers N` | Parallel workers (default: 4; see Workers section) | -| `./scripts/fuzz_hypofuzz.sh --time N` | Time limit in seconds (--deep mode) | - ---- - -## Workflow 1: Quick Check (Recommended) - -Run before committing: - -```bash -./scripts/fuzz_hypofuzz.sh -``` - -**Interpreting Results:** - -- `[PASS]` - All property tests passed -- `[FAIL]` - Failures detected with falsifying examples shown -- `[STOPPED]` - Run interrupted by user (Ctrl+C) - ---- - -## Workflow 2: Deep Fuzzing - -For thorough exploration: - -```bash -# Run for 5 minutes -./scripts/fuzz_hypofuzz.sh --deep --time 300 - -# Run until Ctrl+C -./scripts/fuzz_hypofuzz.sh --deep -``` - -HypoFuzz uses coverage-guided fuzzing to explore new code paths. It runs all Hypothesis tests continuously, learning which inputs increase coverage. - -### Deep Mode with Metrics - -Adding `--metrics` changes the behavior: - -```bash -# Single-pass with strategy metrics (NOT continuous) -./scripts/fuzz_hypofuzz.sh --deep --metrics -``` - -**Trade-off**: `--metrics` uses pytest instead of HypoFuzz because HypoFuzz's multiprocessing prevents metrics collection across workers. This runs all fuzz tests once with 10,000 examples per test (hypofuzz profile) and emits live metrics every 10 seconds. - -For continuous fuzzing, use `--deep` without `--metrics`. - -**Session Log:** - -HypoFuzz output is logged to `.hypothesis/hypofuzz.log`. View failures: - -```bash -./scripts/fuzz_hypofuzz.sh --list -``` - ---- - -## Workflow 3: Reproducing Failures - -When Hypothesis finds a failing example: - -1. It prints the "Falsifying example" to the terminal -2. It stores the shrunk example in `.hypothesis/examples/` -3. On re-run, it **automatically replays** the failure - -**To reproduce with verbose output:** - -```bash -./scripts/fuzz_hypofuzz.sh --repro tests/fuzz/test_syntax_parser_property.py::test_roundtrip -``` - -**To extract @example decorator:** - -```bash -uv run python scripts/fuzz_hypofuzz_repro.py --example tests/fuzz/test_syntax_parser_property.py::test_roundtrip -``` - -This parses the failure output and generates a copy-paste ready `@example` decorator. - -**JSON output for automation:** - -```bash -uv run python scripts/fuzz_hypofuzz_repro.py --json tests/fuzz/test_syntax_parser_property.py::test_roundtrip -``` - -Output format: - -```json -{ - "test_path": "tests/fuzz/test_syntax_parser_property.py::test_roundtrip", - "status": "fail", - "exit_code": 1, - "timestamp": "2026-02-04T10:30:00+00:00", - "error_type": "AssertionError", - "traceback": "E AssertionError: ...", - "example": {"ftl": "msg = { $x"}, - "example_decorator": "@example(ftl='msg = { $x')", - "hypothesis_seed": 12345 -} -``` - ---- - -## Crash Recording - -When a Hypothesis test fails, the `conftest.py` hook automatically: - -1. Extracts the falsifying example from the failure -2. Generates a standalone `repro_crash__.py` script -3. Saves it to `.hypothesis/crashes/` with a companion JSON file - -**Benefits:** -- Crashes are never lost (even if `.hypothesis/examples/` is cleared) -- Each crash has a portable, standalone reproduction script -- Crash files can be shared between developers - -**Run a crash reproduction:** - -```bash -uv run python .hypothesis/crashes/repro_crash_20260204_103000_a1b2c3d4.py -``` - ---- - -## Database Structure - -Hypothesis stores data in `.hypothesis/`: - -``` -.hypothesis/ -├── examples/ # Coverage database (SHA-384 hashed filenames) -│ ├── a1b2c3d4... # Stored example (pickled Python objects) -│ └── ... -├── crashes/ # Portable crash reproduction files -│ ├── repro_crash_20260204_103000_a1b2c3d4.py # Standalone repro script -│ ├── crash_20260204_103000_a1b2c3d4.json # Machine-readable details -│ └── ... -└── hypofuzz.log # Session log from --deep runs -``` - -**Key Points:** - -- Examples are **pickled Python objects**, not text files -- Filenames are SHA-384 hashes of the test function signature -- Hypothesis automatically replays stored examples on test re-runs -- The `crashes/` directory contains portable reproduction scripts (auto-generated on failure) -- There is NO `.hypothesis/failures/` directory (this is a common misconception) - ---- - -## Promoting Failures to @example - -When a bug is found and fixed, promote the failing example to a permanent regression test: - -```python -from hypothesis import given, example -from tests.strategies.ftl import valid_ftl - -@example(ftl="edge-case = { $var }") # Promoted from Hypothesis failure -@given(ftl=valid_ftl()) -def test_roundtrip(ftl: str) -> None: - """Parse-serialize-parse produces identical AST.""" - ... -``` - -This ensures the edge case is tested deterministically in every run. - ---- - -## Oracle Testing (Differential Fuzzing) - -The `tests/fuzz/` directory contains oracle-based fuzzers that compare `FluentBundle` against a reference implementation: - -``` -tests/fuzz/ -├── __init__.py -├── shadow_bundle.py # Reference implementation (unoptimized, simple) -├── test_runtime_bundle_oracle.py # State machine oracle fuzzer -├── test_core_depth_guard_exhaustion.py # MAX_DEPTH boundary testing -└── ... # Additional fuzz modules (grammar, serializer, etc.) -``` - -**ShadowBundle** is a deliberately simple implementation for differential testing: -- No caching (computes everything fresh) -- No optimizations (simple recursive traversal) -- Explicit error handling (no silent failures) - -**Run oracle tests:** - -```bash -# Run all oracle fuzz tests -uv run pytest tests/fuzz/ -v -m fuzz - -# Run state machine fuzzer -uv run pytest tests/fuzz/test_runtime_bundle_oracle.py -v -m fuzz -``` - -The state machine fuzzer generates random sequences of operations and verifies both implementations produce consistent results. - ---- - -## Depth Exhaustion Testing - -`test_core_depth_guard_exhaustion.py` tests behavior at the `MAX_DEPTH` boundary (100 in `constants.py`): - -- **99-deep nesting**: Should succeed normally -- **100-deep nesting**: Should hit limit gracefully -- **101-deep nesting**: Should fail cleanly (no crash) - -```bash -uv run pytest tests/fuzz/test_core_depth_guard_exhaustion.py -v -m fuzz -``` - -This ensures the resolver handles pathological inputs without stack overflow or infinite recursion. - ---- - -## Test Markers - -FTLLexEngine uses pytest markers to categorize tests: - -| Marker | Purpose | When Run | -|--------|---------|----------| -| `@pytest.mark.hypothesis` | Standard Hypothesis tests | CI and fuzzing | -| `@pytest.mark.fuzz` | Intensive fuzz tests | --deep mode only | - -Tests marked with `@pytest.mark.fuzz` are skipped in normal runs: - -```python -pytestmark = pytest.mark.fuzz # Module-level marker - -@given(ftl=valid_ftl()) -@settings(max_examples=10000) -def test_intensive_fuzzing(ftl: str) -> None: - ... -``` - ---- - -## Troubleshooting - -### "AF_UNIX path too long" Error - -macOS limits Unix socket paths. The script sets `TMPDIR=/tmp` automatically. If running HypoFuzz directly: - -```bash -TMPDIR=/tmp uv run hypothesis fuzz tests/ -``` - -### Failures Not Reproducing - -Hypothesis stores failures in `.hypothesis/examples/`. If a test passes when you expect failure: - -1. The bug may have been fixed -2. The example database may be stale - -Try clearing and re-running: +## Common Commands ```bash -./scripts/fuzz_hypofuzz.sh --clean ./scripts/fuzz_hypofuzz.sh -``` - -### Finding the Falsifying Example - -The failing example is printed to stdout. Look for: - -``` -Falsifying example: test_roundtrip( - ftl='problematic input here', -) -``` - -Use `--repro` with `--verbose` for full output. - ---- - -## Workers and Metrics - -### Multi-Worker Mode (Default for --deep) - -HypoFuzz uses Python `multiprocessing` to run N worker processes in -parallel (default: `--workers 4`). Each worker is a separate Python -process with its own memory space. This provides high throughput for -continuous coverage-guided fuzzing. - -However, the strategy metrics collector (`tests/strategy_metrics.py`) -runs in-process and cannot aggregate events across worker boundaries. -Each worker accumulates its own event counts independently, and there -is no cross-process shared state. - -### --metrics Forces Single-Process Mode - -When `--metrics` is enabled, the script bypasses HypoFuzz entirely -and runs `pytest -m fuzz` in a single process. This ensures all -`hypothesis.event()` calls are captured by the same metrics collector: - -```bash -# Continuous fuzzing (multi-worker, no detailed metrics) -./scripts/fuzz_hypofuzz.sh --deep - -# Single-pass with reliable metrics (single-process pytest) -./scripts/fuzz_hypofuzz.sh --deep --metrics -``` - -**Trade-off**: `--metrics` mode runs a finite pass (10,000 examples -per test via the `hypofuzz` profile) instead of continuous fuzzing. -Use `--deep` for throughput and `--deep --metrics` for diagnostics. - -### Comparison with Atheris - -Both fuzzing systems face the same fundamental constraint: metrics -collected in process-local state cannot be shared across forked or -spawned workers. Each system handles it differently: - -| Aspect | HypoFuzz | Atheris | -|--------|----------|---------| -| Worker model | Python `multiprocessing` | libFuzzer `fork()` | -| Default workers | 4 (throughput-oriented) | 1 (metrics-reliable) | -| Metrics mode | `--metrics` forces single-process pytest | `--workers 1` (default) | -| Multi-worker metrics | Not collected (bypassed) | Per-worker only, last writer wins | - -See [FUZZING_GUIDE.md](FUZZING_GUIDE.md) for the cross-system overview. - ---- - -## Strategy Reference - -Custom strategies in `tests/strategies/ftl.py` generate valid FTL constructs: - -| Strategy | Description | -|----------|-------------| -| `ftl_identifiers()` | Valid FTL identifiers (`[a-zA-Z][a-zA-Z0-9_-]*`) | -| `ftl_simple_messages()` | Simple message definitions (`id = value`) | -| `ftl_simple_text()` | Text without FTL special characters | -| `ftl_terms()` | Term definitions (`-id = value`) | -| `ftl_placeables()` | Placeable expressions (variables, literals, nested) | -| `ftl_function_references()` | Function calls (`{ NUMBER($x) }`) | -| `ftl_message_references()` | Message references (`{ other-msg }`) | -| `ftl_term_references()` | Term references (`{ -brand }`) | -| `ftl_select_expressions()` | Select expressions with variants | -| `resolver_mixed_args()` | Mixed argument dictionaries for formatting | - -**Chaos mode strategies** (for parser stress testing): - -| Strategy | Description | -|----------|-------------| -| `ftl_chaos_text()` | Text including FTL structural characters | -| `ftl_boundary_depth_placeables()` | Placeables at MAX_DEPTH boundary | -| `ftl_circular_references()` | A -> B -> A reference patterns | -| `ftl_invalid_ftl()` | Structurally invalid FTL for error handling | -| `ftl_semantically_broken()` | Parses successfully but fails at runtime | - -**Usage:** - -```python -from hypothesis import given -from tests.strategies.ftl import ftl_simple_messages, resolver_mixed_args - -@given(source=ftl_simple_messages(), args=resolver_mixed_args()) -def test_bundle_format(source: str, args: dict) -> None: - bundle = FluentBundle("en_US") - bundle.add_resource(source) - # ... assertions -``` - ---- - -## Semantic Coverage with Events - -HypoFuzz uses code coverage to guide mutation. But code coverage cannot see *semantic* differences - two inputs that execute the same lines but test different logical cases appear identical to the fuzzer. - -`hypothesis.event()` creates "virtual branches" that guide HypoFuzz toward semantically interesting inputs even when code paths are identical. - -### Why Events Matter - -Consider a currency formatting test: - -```python -# Without events - fuzzer sees same coverage for JPY and USD -@given(currency=currency_codes) -def test_format_currency(currency: str) -> None: - result = format_currency(1000, currency) - assert isinstance(result, str) -``` - -```python -# With events - fuzzer actively seeks 0, 2, and 3 decimal currencies -from hypothesis import event - -@given(currency=currency_by_decimals()) # Strategy emits events -def test_format_currency(currency: str) -> None: - result = format_currency(1000, currency) - assert isinstance(result, str) -``` - -The `currency_by_decimals()` strategy emits `currency_decimals=0`, `currency_decimals=2`, or `currency_decimals=3` events, telling HypoFuzz these are distinct semantic cases to explore. - -### Event Taxonomy - -Use consistent event naming across the codebase: - -| Category | Format | Examples | -|:---------|:-------|:---------| -| Strategy choice | `strategy={variant}` | `strategy=placeable_variable`, `strategy=chaos_prefix_brace` | -| Domain classification | `{domain}={variant}` | `currency_decimals=2`, `territory_region=europe` | -| Boundary/depth | `boundary={name}`, `depth={n}` | `boundary=at_max_depth`, `depth=99` | -| Unicode category | `unicode={category}` | `unicode=emoji`, `unicode=cjk` | -| Property outcome | `outcome={result}` | `outcome=roundtrip_success`, `outcome=immutability_enforced` | -| Test parameter | `{param}={value}` | `thread_count=20`, `cache_size=50`, `reentry_depth=3` | -| State machine | `rule={name}`, `invariant={name}` | `rule=add_simple_message`, `invariant=cache_stats_consistent` | - -**Strategy events vs test events:** - -* **Strategy events** are emitted by strategy functions in `tests/strategies/`. They are tracked by `EXPECTED_EVENTS` in `tests/strategy_metrics.py` and drive strategy-level coverage metrics. -* **Test events** are emitted by individual `@given` test functions and `@rule`/`@invariant` methods. They guide HypoFuzz per-test but are NOT tracked by `EXPECTED_EVENTS`. - -### Writing Event-Aware Tests - -**Option 1: Use event-emitting strategies** - -Pre-built strategies in `tests/strategies/` emit events automatically: - -```python -from tests.strategies.ftl import ftl_placeables # Emits strategy=placeable_* -from tests.strategies.iso import currency_by_decimals # Emits currency_decimals=* - -@given(source=ftl_placeables()) -def test_placeables(source: str) -> None: - ... -``` - -**Option 2: Emit events in tests** - -For test-specific semantic categories: - -```python -from hypothesis import given, event - -@given(ftl=valid_ftl()) -def test_parser_coverage(ftl: str) -> None: - result = parse(ftl) - - # Emit events based on parse result characteristics - if result.errors: - event("parse_result=has_errors") - else: - event("parse_result=clean") - - for entry in result.body: - event(f"entry_type={type(entry).__name__}") -``` - -**Option 3: Composite strategies with events** - -Create domain-specific strategies: - -```python -from hypothesis import event -from hypothesis.strategies import composite - -@composite -def ftl_by_complexity(draw): - complexity = draw(st.sampled_from(["minimal", "moderate", "complex"])) - - match complexity: - case "minimal": - source = draw(ftl_simple_messages()) - case "moderate": - source = draw(ftl_with_placeables()) - case "complex": - source = draw(ftl_with_selects()) - - event(f"ftl_complexity={complexity}") - return source -``` - -### Checking Event Diversity - -After a `--deep` fuzzing session, the script reports event diversity: - -```bash ./scripts/fuzz_hypofuzz.sh --deep --time 300 - -# Output includes: -# [EVENT DIVERSITY] -# Top 15 events observed: -# 1247 expr_type=Message -# 892 expr_type=Term -# 456 strategy=placeable_variable -# ... -``` - -**Good diversity**: Events are distributed across categories, indicating the fuzzer explored varied semantic paths. - -**Poor diversity**: One event dominates (e.g., 95% `expr_type=Message`), indicating the fuzzer is stuck in a narrow region. - -**Fix poor diversity**: Add more event-emitting strategies or adjust strategy weights to guide exploration. - -### Event-Enabled Strategies Reference - -| Module | Strategy | Events Emitted | -|:-------|:---------|:---------------| -| `ftl.py` | `ftl_placeables()` | `strategy=placeable_{variable,literal,nested,...}` | -| `ftl.py` | `ftl_chaos_source()` | `strategy=chaos_{prefix_brace,unbalanced,...}` | -| `ftl.py` | `ftl_pathological_nesting()` | `boundary={under,at,over}_max_depth`, `depth={N}` | -| `ftl.py` | `ftl_circular_references()` | `strategy=circular_{self_ref,two_way,...}` | -| `ftl.py` | `ftl_invalid_ftl()` | `strategy=invalid_{unclosed,missing_id,...}` | -| `ftl.py` | `ftl_unicode_stress_text()` | `unicode={bidi,combining,emoji,surrogate,...}` | -| `iso.py` | `currency_by_decimals()` | `currency_decimals={0,2,3}` | -| `iso.py` | `territory_by_region()` | `territory_region={g7,brics,baltic,...}` | -| `iso.py` | `locale_by_script()` | `locale_script={latin,cjk,cyrillic,arabic,other}` | - ---- - -## Strategy Metrics - -The metrics system tracks per-strategy behavior during `--deep` runs, similar to Atheris fuzzer target metrics. - -### Enabling Metrics - -```bash -# Continuous HypoFuzz (no detailed metrics, multiprocessing) -./scripts/fuzz_hypofuzz.sh --deep - -# Single-pass pytest with live metrics every 10 seconds -./scripts/fuzz_hypofuzz.sh --deep --metrics +./scripts/fuzz_hypofuzz.sh --preflight +./scripts/fuzz_hypofuzz.sh --repro tests/fuzz/test_runtime_bundle_state_machine.py::test_state_machine ``` -**Note**: `--metrics` mode uses pytest instead of HypoFuzz because HypoFuzz's multiprocessing prevents metrics collection across workers. The trade-off is single-pass execution (10,000 examples) instead of continuous fuzzing. - -### What Metrics Track - -**Universal metrics:** -- Total event counts across all strategies -- Weight skew detection (intended vs actual distribution) -- Coverage gaps (expected events not observed) -- Performance percentiles (p95, p99, max) - -**Per-strategy metrics (with `--metrics`):** - -| Metric | Description | -|--------|-------------| -| `invocations` | Total event count for this strategy | -| `wall_time_ms` | Total time spent in this strategy | -| `mean_cost_ms` | Average time per invocation | -| `weight_pct` | Percentage of total invocations | - -### Live Output Example - -With `--metrics`, every 10 seconds you see: - -``` -[METRICS] 120s | 45,678 events | 380/s | +11,234 since last -[METRICS] Top: strategy=placeable_variable=1234, currency_decimals=2=890, ... - -[METRICS] Per-Strategy Breakdown: -Strategy Invocations Wall Time Mean Cost Weight ------------------------------------------------------------------------------- -ftl_placeables 1,234 456.7ms 0.370ms 15.2% -currency_by_decimals 890 123.4ms 0.139ms 10.9% -... ------------------------------------------------------------------------------- -``` - -### Output Files - -After each session, metrics are saved to: - -| File | Contents | -|------|----------| -| `.hypothesis/strategy_metrics.json` | Full metrics report (JSON) | -| `.hypothesis/strategy_metrics_summary.txt` | Human-readable summary (if issues detected) | - -### Environment Variables - -| Variable | Default | Description | -|----------|---------|-------------| -| `STRATEGY_METRICS` | `0` | Enable metrics collection (`1` to enable) | -| `STRATEGY_METRICS_LIVE` | `0` | Enable live console output (`1` to enable) | -| `STRATEGY_METRICS_DETAILED` | `0` | Show per-strategy table (`1` to enable) | -| `STRATEGY_METRICS_INTERVAL` | `10` | Reporting interval in seconds (with --metrics) | - -### Weight Skew Detection +## Modes -The system detects when actual strategy distribution deviates from intended weights by more than 15%. This indicates: - -- Strategy filtering issues -- Biased random generation -- Dead code paths - -Example skew warning: - -``` -[WARN] Weight skew detected: - - strategy=placeable_variable (intended=40.00%, actual=72.00%, deviation=32.00%) -``` - -### Integration with Events - -Strategy metrics work by intercepting `hypothesis.event()` calls. Event-emitting strategies (see Event-Enabled Strategies Reference above) automatically contribute to metrics collection without modification. - ---- - -## Architecture - -### How HypoFuzz Works - -1. **Discovery**: Finds all `@given` decorated test functions -2. **Execution**: Runs tests with coverage instrumentation -3. **Learning**: Identifies which inputs increase code coverage -4. **Mutation**: Generates new inputs based on coverage feedback -5. **Shrinking**: When a failure is found, shrinks to minimal example - -### Database Management - -- `.hypothesis/examples/` grows as coverage increases -- Large databases (100k+ entries) are normal for HypoFuzz runs -- The database is gitignored (see `.gitignore`) -- Promote important failures to `@example` decorators for permanent regression tests - ---- +- Default mode runs the standard Hypothesis-backed checks. +- `--deep` runs the intensive fuzz surface. +- `--preflight` audits event instrumentation and strategy coverage. +- `--repro` replays a known failing target. -## See Also +## Notes -- [FUZZING_GUIDE.md](FUZZING_GUIDE.md) - Overview and comparison -- [FUZZING_GUIDE_ATHERIS.md](FUZZING_GUIDE_ATHERIS.md) - Native fuzzing with Atheris -- [DOC_06_Testing.md](DOC_06_Testing.md) - Full testing documentation +- The script pivots into `.venv-3.13` by default. +- `--metrics` is intended for metric-focused runs rather than indefinite continuous fuzzing. diff --git a/docs/LOCALE_GUIDE.md b/docs/LOCALE_GUIDE.md index 26af277d..3741e2ef 100644 --- a/docs/LOCALE_GUIDE.md +++ b/docs/LOCALE_GUIDE.md @@ -1,317 +1,54 @@ --- -afad: "3.3" -version: "0.153.0" -domain: locale -updated: "2026-03-13" +afad: "3.5" +version: "0.163.0" +domain: LOCALE +updated: "2026-04-22" route: - keywords: [locale, NUMBER, DATETIME, CURRENCY, formatting, BCP-47, locale normalization, str vs NUMBER] - questions: ["why isn't my number formatted?", "how does locale formatting work?", "NUMBER vs raw variable?", "how to format numbers with locale?", "how to format currency?"] + keywords: [locale, NUMBER, DATETIME, CURRENCY, normalize_locale, get_system_locale, use_isolating] + questions: ["why did my number not format?", "what locale string should I use?", "what does use_isolating do?"] --- -# Locale Formatting Guide +# Locale Guide -**Purpose**: Understand Fluent's locale-aware formatting behavior and common misconceptions. -**Prerequisites**: Basic FluentBundle usage. +**Purpose**: Explain how locale normalization and locale-aware formatting work in FTLLexEngine. +**Prerequisites**: Basic Fluent syntax. ## Overview -Fluent uses **explicit formatting functions** for locale-aware output. Raw variable interpolation produces unformatted strings. This is **by design** per the Fluent specification. - -**Key Insight**: If your numbers don't have grouping separators, you need `NUMBER()`. This is not a bug. - ---- - -## Raw Interpolation vs. Formatted Output - -| Pattern | Input | Output (de_DE) | Reason | -|:--------|:------|:---------------|:-------| -| `{ $count }` | `1000` | `1000` | Raw interpolation via `str()` | -| `{ NUMBER($count) }` | `1000` | `1.000` | Locale-aware via `NUMBER()` | -| `{ $date }` | `datetime(...)` | `2026-01-12 14:30:00` | Raw `str()` representation | -| `{ DATETIME($date) }` | `datetime(...)` | `12.01.2026, 14:30` | Locale-aware via `DATETIME()` | - ---- - -## Why This Design? - -The Fluent specification intentionally separates: - -1. **Raw interpolation** (`{ $var }`): Developer controls formatting -2. **Locale-aware formatting** (`{ NUMBER($var) }`): Locale determines format - -**Rationale**: -- Not all numbers need locale formatting (IDs, codes, versions) -- Explicit is better than implicit (Python Zen) -- Developers choose when localization applies -- Consistent behavior across implementations - -**Reference**: [Project Fluent Guide - Variables](https://projectfluent.org/fluent/guide/variables.html) - -**Note**: Examples use `use_isolating=False` for readable output. Default bundles wrap interpolated values in Unicode bidi isolation marks (U+2068/U+2069), which are invisible but present in the string. Never disable bidi isolation in production applications that support RTL languages. - ---- - -## NUMBER() Function - -Formats numeric values with locale-appropriate separators and decimal points. +Raw Fluent variable interpolation does not perform locale formatting. Locale-aware rendering only happens when the message explicitly calls `NUMBER()`, `DATETIME()`, or `CURRENCY()`. ```python +from decimal import Decimal from ftllexengine import FluentBundle bundle = FluentBundle("de_DE", use_isolating=False) bundle.add_resource(""" -raw-count = Count: { $count } -formatted-count = Count: { NUMBER($count) } -""") - -# Raw interpolation -result, _ = bundle.format_pattern("raw-count", {"count": 1234567}) -# → "Count: 1234567" - -# Locale-aware formatting -result, _ = bundle.format_pattern("formatted-count", {"count": 1234567}) -# → "Count: 1.234.567" -``` - -**NUMBER() Options**: - -```python -bundle.add_resource(""" -decimal = { NUMBER($value, minimumFractionDigits: 2) } -no-grouping = { NUMBER($value, useGrouping: 0) } -custom = { NUMBER($value, minimumFractionDigits: 2, maximumFractionDigits: 4) } +raw = { $amount } +fmt = { CURRENCY($amount, currency: "EUR") } """) +raw, _ = bundle.format_pattern("raw", {"amount": Decimal("1234.50")}) +fmt, _ = bundle.format_pattern("fmt", {"amount": Decimal("1234.50")}) +assert raw == "1234.50" +assert fmt == "1.234,50\u00a0€" ``` -| Option | Values | Effect | -|:-------|:-------|:-------| -| `minimumFractionDigits` | integer | Minimum decimal places | -| `maximumFractionDigits` | integer | Maximum decimal places | -| `useGrouping` | `0` or `1` | Thousands separators (default: enabled). FTL named args accept NumberLiteral, not booleans. | -| `pattern` | string | Custom Babel number pattern | - ---- - -## CURRENCY() Function +## Locale Codes -Formats monetary values with currency symbol and locale-appropriate formatting. +- Public runtime APIs normalize locale codes to the canonical internal form. +- `normalize_locale()` is useful when you need the exact canonical string yourself. +- `get_system_locale()` reads the OS and environment variables for a default locale. ```python -bundle = FluentBundle("de_DE", use_isolating=False) -bundle.add_resource(""" -price = { CURRENCY($amount, currency: "EUR") } -price-code = { CURRENCY($amount, currency: "EUR", currencyDisplay: "code") } -""") - -result, _ = bundle.format_pattern("price", {"amount": 1234.56}) -# → "1.234,56\xa0€" (NBSP U+00A0 before symbol, per CLDR) +from ftllexengine import get_system_locale, normalize_locale -result, _ = bundle.format_pattern("price-code", {"amount": 1234.56}) -# → "1.234,56 EUR" +assert normalize_locale("de-DE") == "de_de" +try: + detected = get_system_locale() +except ValueError: + detected = None +assert detected is None or isinstance(detected, str) ``` -**Non-Breaking Spaces**: CLDR uses non-breaking space (U+00A0) as the grouping separator in many locales (lv_LV, fr_FR, etc.) and between the amount and currency symbol (de_DE, etc.). This is intentional per the Unicode CLDR standard -- it prevents line breaks between amounts and symbols. When comparing formatted output in tests, use `"\xa0"` or `"\u00a0"` instead of regular space. - -**CURRENCY() Options**: - -| Option | Values | Effect | -|:-------|:-------|:-------| -| `currency` | ISO 4217 code | Required. Currency code (e.g., "EUR", "USD") | -| `currencyDisplay` | `"symbol"`, `"code"`, `"name"` | Display style (default: "symbol") | -| `pattern` | string | Custom CLDR currency pattern | - ---- - -## DATETIME() Function - -Formats date/datetime values with locale-appropriate patterns. - -```python -from datetime import datetime -from ftllexengine import FluentBundle - -bundle = FluentBundle("lv_LV", use_isolating=False) -bundle.add_resource(""" -raw-date = Date: { $date } -formatted-date = Date: { DATETIME($date) } -""") - -now = datetime(2026, 1, 12, 14, 30) - -# Raw interpolation -result, _ = bundle.format_pattern("raw-date", {"date": now}) -# → "Date: 2026-01-12 14:30:00" - -# Locale-aware formatting -result, _ = bundle.format_pattern("formatted-date", {"date": now}) -# → "Date: 2026. gada 12. janv." -``` - -**DATETIME() Options**: - -```python -bundle.add_resource(""" -short = { DATETIME($date, dateStyle: "short") } -long = { DATETIME($date, dateStyle: "long", timeStyle: "short") } -date-only = { DATETIME($date, dateStyle: "medium") } -""") -``` - -| Option | Values | Effect | -|:-------|:-------|:-------| -| `dateStyle` | `"full"`, `"long"`, `"medium"`, `"short"` | Preset date format (default: "medium") | -| `timeStyle` | `"full"`, `"long"`, `"medium"`, `"short"` | Preset time format (omit for date-only) | -| `pattern` | string | Custom Babel datetime pattern | - ---- - -## Common Misconceptions - -### "My numbers should auto-format" - -**Misconception**: `{ $count }` should produce locale-formatted output. - -**Reality**: Raw variables use `str()`. Use `NUMBER($count)` for locale formatting. - -**Why**: Fluent is explicit by design. Not all numbers need localization (IDs, version numbers, codes). - -### "This must be a bug" - -**Misconception**: Seeing `1000` instead of `1,000` means something is broken. - -**Reality**: This is spec-compliant behavior. The Fluent specification explicitly requires `NUMBER()` for locale-aware number formatting. - -### "Other i18n libraries auto-format" - -**Misconception**: Because ICU MessageFormat auto-formats, Fluent should too. - -**Reality**: Fluent made a deliberate design choice for explicit formatting. This matches Mozilla's implementation and the official specification. - ---- - -## Locale Handling - -### Locale Property vs. Babel Locale - -`FluentBundle` provides two locale-related properties: - -```python -bundle = FluentBundle("en-US") - -# Returns FTLLexEngine's canonical LocaleCode -bundle.locale # → "en_us" - -# Returns Babel's CLDR identifier -bundle.get_babel_locale() # → "en_US" -``` - -**Design Rationale**: -- `locale`: Returns the canonical lowercase underscore `LocaleCode` -- `get_babel_locale()`: Returns Babel's CLDR-facing identifier - -### Locale Normalization - -Internally, locales are normalized for consistent cache keys: - -```python -# All of these produce the same canonical LocaleCode: -"en-US" → "en_us" -"en_US" → "en_us" -"EN-US" → "en_us" -"en-us" → "en_us" -``` - -BCP-47 is case-insensitive by specification, so all variants are equivalent. - -### Locale Context Caching - -`LocaleContext` normalizes and caches Babel locale objects: - -```python -# Internal: cache key uses normalized form -cache_key = normalize_locale(locale_code) # "en_us" - -# The public locale_code is canonical too -ctx.locale_code # "en_us" -``` - ---- - -## Troubleshooting - -### Numbers Not Formatted - -**Symptom**: `{ $count }` produces `1000` instead of `1,000`. - -**Solution**: Use `{ NUMBER($count) }`. - -```python -# Before -bundle.add_resource("count = { $count }") -# Output: "1000" - -# After -bundle.add_resource("count = { NUMBER($count) }") -# Output: "1,000" (for en_US) -``` - -### Dates Not Formatted - -**Symptom**: `{ $date }` produces `2026-01-12 14:30:00`. - -**Solution**: Use `{ DATETIME($date) }`. - -```python -# Before -bundle.add_resource("date = { $date }") -# Output: "2026-01-12 14:30:00" - -# After -bundle.add_resource('date = { DATETIME($date, dateStyle: "long") }') -# Output: "January 12, 2026" (for en_US) -``` - -### Wrong Locale Format - -**Symptom**: Numbers/dates formatted for wrong locale. - -**Check**: -1. Verify bundle locale: `bundle.locale` -2. Verify Babel locale: `bundle.get_babel_locale()` -3. Ensure Babel is installed: `pip install ftllexengine[babel]` - ---- - -## Bi-Directional Localization - -For parsing locale-formatted user input back to Python types, see [PARSING_GUIDE.md](PARSING_GUIDE.md). - -```python -from ftllexengine import FluentBundle -from ftllexengine.parsing import parse_decimal - -# Format for display -bundle = FluentBundle("de_DE") -bundle.add_resource("price = { NUMBER($amount) } EUR") -formatted, _ = bundle.format_pattern("price", {"amount": 1234.56}) -# → "1.234,56 EUR" - -# Parse user input back -result, errors = parse_decimal("1.234,56", "de_DE") -# → Decimal('1234.56') -``` - ---- - -## Summary - -| Concept | Behavior | -|:--------|:---------| -| `{ $var }` | Raw `str()` interpolation | -| `{ NUMBER($var) }` | Locale-aware number formatting | -| `{ DATETIME($var) }` | Locale-aware date/time formatting | -| `{ CURRENCY($var, currency: "XXX") }` | Locale-aware currency formatting | -| `bundle.locale` | Canonical lowercase underscore `LocaleCode` | -| `bundle.get_babel_locale()` | Normalized Babel identifier | +## Bidi Isolation -**Remember**: Fluent's explicit formatting is a feature, not a bug. When in doubt, check the [Fluent specification](https://projectfluent.org/fluent/guide/). +`use_isolating=True` is the default on bundle and localization classes. It wraps placeables with Unicode bidi isolation marks so interpolated values do not corrupt surrounding RTL/LTR text. Keep it enabled for UI output unless you know the output will stay LTR-only and you need plain strings for logging or snapshot assertions. diff --git a/docs/MIGRATION.md b/docs/MIGRATION.md index d766fe93..0c77e329 100644 --- a/docs/MIGRATION.md +++ b/docs/MIGRATION.md @@ -1,786 +1,40 @@ --- -afad: "3.3" -version: "0.153.0" -domain: migration -updated: "2026-03-13" +afad: "3.5" +version: "0.163.0" +domain: MIGRATION +updated: "2026-04-22" route: - keywords: [migration, fluent.runtime, upgrade, breaking changes, mozilla fluent, python fluent] - questions: ["how to migrate from fluent.runtime?", "how to upgrade to ftllexengine?"] + keywords: [migration, fluent.runtime, FluentBundle, FluentLocalization, strict mode] + questions: ["how do I migrate from fluent.runtime?", "what changes when I switch to FTLLexEngine?"] --- -# Migration Guide: fluent.runtime → FTLLexEngine +# Migration From `fluent.runtime` -**Complete guide for migrating from Mozilla's fluent.runtime to FTLLexEngine** +**Purpose**: Highlight the main API and behavior differences when moving to FTLLexEngine. +**Prerequisites**: Familiarity with `fluent.runtime`. -**Target Audience**: Developers currently using `fluent.runtime` (Mozilla's Python implementation) who want to migrate to FTLLexEngine for Python 3.13+ features, better type safety, and improved performance. +## High-Level Differences -**[IMPORTANT] REQUIREMENT: Python 3.13+** -FTLLexEngine requires Python 3.13 or later. If your project uses Python 3.12 or earlier, you must upgrade your Python version before migrating. +- `FluentBundle.format_pattern()` takes a message id directly; you do not fetch a message object first. +- `FluentLocalization` is the multi-locale orchestration layer for fallback chains. +- Strict mode is the default. Formatting and resource-integrity problems raise immediately unless you opt into `strict=False`. +- Boot validation is built in through `LocalizationBootConfig`. ---- - -## Why Migrate? - -### FTLLexEngine Advantages - -1. **Python 3.13+ Modern Features**: - - PEP 695 `type` keyword aliases - - PEP 742 `TypeIs` type guards - - Pattern matching for cleaner code - - Frozen dataclasses with slots - -2. **Better Type Safety**: - - Full `mypy --strict` compatibility - - Type-safe introspection APIs - - Complete type annotations - -3. **Single Dependency**: - - Only requires Babel (vs fluent.runtime: fluent.syntax, attrs, babel, pytz, typing-extensions) - - Smaller dependency footprint - -4. **Simpler Architecture**: - - Cleaner API with fewer abstractions - - Direct imports from main package - - No separate fluent.syntax dependency - -5. **Comprehensive Documentation**: - - 100% API coverage - - Working examples - - Quick reference guide - - Migration guide (this document!) - -### When to Stay with fluent.runtime - -- Your project requires Python 3.6-3.12 (FTLLexEngine requires 3.13+) -- You need Mozilla's exact reference implementation behavior -- Your project is tightly integrated with Firefox/Thunderbird ecosystem -- Migration effort outweighs benefits for your use case - ---- - -## Quick Migration Checklist - -- [ ] Verify Python 3.13+ is available -- [ ] Update dependencies in requirements.txt/pyproject.toml -- [ ] Change import statements -- [ ] Update FluentBundle constructor (remove transform_func) -- [ ] Update FluentResource → parse_ftl() and add_resource() -- [ ] Update error handling (errors are already in list, not separate iteration) -- [ ] Test with existing .ftl files -- [ ] Update type annotations to use FTLLexEngine type aliases -- [ ] Run test suite and verify behavior - ---- - -## API Comparison - -### Installation - -```bash -# fluent.runtime -pip install fluent.runtime - -# FTLLexEngine -pip install ftllexengine -``` - -**Dependencies Comparison**: -``` -fluent.runtime → fluent.syntax, attrs, babel, pytz, typing-extensions -FTLLexEngine → Babel only -``` - ---- - -### Import Statements - -#### fluent.runtime - -```python -from fluent.runtime import FluentBundle, FluentResource -from fluent.runtime.errors import FluentFormatError -``` - -#### FTLLexEngine - -```python -from ftllexengine import FluentBundle, parse_ftl -from ftllexengine import FrozenFluentError, ErrorCategory -``` - -**Changes**: -- Top-level imports (no `ftllexengine.runtime` submodule needed) -- All public APIs available from main package -- `FluentResource` replaced by `parse_ftl()` -- `FluentFormatError` replaced by `FrozenFluentError` with `ErrorCategory` for classification - ---- - -### Creating a Bundle - -#### fluent.runtime - -```python -bundle = FluentBundle(['en-US'], use_isolating=True) -``` - -#### FTLLexEngine - -```python -bundle = FluentBundle('en-US', use_isolating=True) -``` - -**Changes**: -- [WARN] **Single locale string** instead of list: `'en-US'` not `['en-US']` -- [OK] `use_isolating` parameter works identically - -**Migration**: -```python -# fluent.runtime -bundle = FluentBundle(['en-US'], use_isolating=True) - -# FTLLexEngine - extract first locale from list -locales = ['en-US'] -bundle = FluentBundle(locales[0], use_isolating=True) -``` - ---- - -### Adding Resources - -#### fluent.runtime - -```python -from fluent.runtime import FluentResource - -# Parse resource -resource = FluentResource(""" -hello = Hello, World! -""") - -# Add to bundle -bundle.add_resource(resource) -``` - -#### FTLLexEngine - -```python -# Direct string - simpler! -bundle.add_resource(""" -hello = Hello, World! -""") - -# If you need AST for introspection, use parse_ftl() separately -from ftllexengine import parse_ftl -resource_ast = parse_ftl("""hello = Hello, World!""") -# Inspect AST here if needed -# Then add the original string to bundle -bundle.add_resource("""hello = Hello, World!""") -``` - -**Changes**: -- [OK] **No FluentResource wrapper needed** - pass string directly to `add_resource()` -- [OK] Simpler API with one less step -- [INFO] `parse_ftl()` is for AST introspection only - `add_resource()` accepts strings - -**Migration**: -```python -# fluent.runtime -resource = FluentResource(ftl_source) -bundle.add_resource(resource) - -# FTLLexEngine - direct string -bundle.add_resource(ftl_source) -``` - ---- - -### Formatting Messages - -#### fluent.runtime - -```python -# Get message first, then format pattern -msg = bundle.get_message('hello') -result, errors = bundle.format_pattern(msg.value, {}) -``` - -#### FTLLexEngine - -```python -# Direct message ID - no get_message() step needed -result, errors = bundle.format_pattern('hello', {}) -``` - -**Changes**: -- [OK] **Simpler API**: Direct message ID, no `get_message()` step needed -- [OK] **Same return pattern**: Both return `(result, errors)` tuple -- [OK] **Cleaner code**: One call instead of two - -**Migration**: -```python -# fluent.runtime -msg = bundle.get_message('hello') -if msg: - result, errors = bundle.format_pattern(msg.value, {}) - -# FTLLexEngine - simpler (no get_message step) -result, errors = bundle.format_pattern('hello', {}) -``` - ---- - -### Error Handling - -#### fluent.runtime - -```python -from fluent.runtime.errors import FluentFormatError - -# Get message first, then format -msg = bundle.get_message('hello') -result, errors = bundle.format_pattern(msg.value, {}) - -for error in errors: - if isinstance(error, FluentFormatError): - print(f"Error: {error}") -``` - -#### FTLLexEngine - -```python -from ftllexengine import FrozenFluentError - -# Direct message ID, no get_message() needed -result, errors = bundle.format_pattern('hello', {}) - -for error in errors: - print(f"Error ({error.category}): {error}") -``` - -**Changes**: -- Simpler API: Use message ID directly, no `get_message()` step -- Same return pattern: Both return `(result, errors)` tuple -- Single error type `FrozenFluentError` with `ErrorCategory` replaces class hierarchy - ---- - -### Attribute Access - -#### fluent.runtime - -```python -msg = bundle.get_message('login-button') -value, errors = bundle.format_pattern(msg.value, {}) -tooltip, errors = bundle.format_pattern(msg.attributes['tooltip'], {}) -``` - -#### FTLLexEngine - -```python -# Value -result, errors = bundle.format_pattern('login-button') - -# Attribute -result, errors = bundle.format_pattern('login-button', attribute='tooltip') -``` - -**Changes**: -- [OK] **Much simpler**: Use `attribute` parameter instead of `get_message().attributes[...]` -- [OK] **No manual attribute lookup**: Bundle handles it - ---- - -### Custom Functions - -#### fluent.runtime - -```python -def number_formatter(num, **options): - return str(num) - -bundle.add_function('NUMBER', number_formatter) -``` - -#### FTLLexEngine - -```python -def NUMBER(num, **options): - return str(num) - -bundle.add_function('NUMBER', NUMBER) -``` - -**Changes**: -- [OK] **Identical API**: Works the same way -- [OK] Function names should be UPPERCASE (convention in both) - ---- - -### Multi-Locale Support - -#### fluent.runtime - -```python -from fluent.runtime import FluentLocalization, FluentResourceLoader - -# Load from files with resource loader -loader = FluentResourceLoader("locales/{locale}") -l10n = FluentLocalization(['lv', 'en'], ['main.ftl'], loader) - -result = l10n.format_value('hello') -``` - -#### FTLLexEngine - -```python -from ftllexengine import FluentLocalization -from ftllexengine.localization import PathResourceLoader - -# Similar API with resource loader -loader = PathResourceLoader("locales/{locale}") -l10n = FluentLocalization(['lv', 'en'], ['main.ftl'], loader) - -result, errors = l10n.format_value('hello') -``` - -**Note**: `PathResourceLoader` is in `ftllexengine.localization`, not the main package. - -**Changes**: -- [OK] **Similar API**: Both have FluentLocalization for multi-locale -- [WARN] **CRITICAL Return difference**: - - fluent.runtime: Returns just string: `result = l10n.format_value('hello')` - - FTLLexEngine: Returns tuple: `result, errors = l10n.format_value('hello')` (errors is immutable tuple) -- [OK] **PathResourceLoader**: Similar to FluentResourceLoader - ---- - -## Complete Migration Example - -### Before (fluent.runtime) - -```python -from fluent.runtime import FluentBundle, FluentResource -from fluent.runtime.errors import FluentFormatError - -# Create bundle -bundle = FluentBundle(['en-US'], use_isolating=True) - -# Load resource -resource = FluentResource(""" -welcome = Welcome, { $name }! -emails = You have { $count -> - [one] one email - *[other] { $count } emails -}. -""") -bundle.add_resource(resource) - -# Format message -msg = bundle.get_message('welcome') -if msg: - result, errors = bundle.format_pattern(msg.value, {'name': 'Alice'}) - if errors: - for error in errors: - print(f"Error: {error}") - print(result) -``` - -### After (FTLLexEngine) - -```python -from ftllexengine import FluentBundle - -# Create bundle -bundle = FluentBundle('en-US', use_isolating=True) - -# Load resource - direct string, no wrapper -bundle.add_resource(""" -welcome = Welcome, { $name }! -emails = You have { $count -> - [one] one email - *[other] { $count } emails -}. -""") - -# Format message - simpler API -result, errors = bundle.format_pattern('welcome', {'name': 'Alice'}) -if errors: - for error in errors: - print(f"Error: {error}") -print(result) -``` - -**Lines of Code**: -- fluent.runtime: 19 lines -- FTLLexEngine: 13 lines (32% reduction) - ---- - -## API Mapping Table - -### Core Classes - -| fluent.runtime | FTLLexEngine | Notes | -|----------------|--------------|-------| -| `FluentBundle(['locale'])` | `FluentBundle('locale')` | Single locale, not list | -| `FluentResource(str)` | Direct string to `add_resource()` | No wrapper needed; use `parse_ftl(str)` only for AST introspection | -| N/A | `FluentLocalization` | Built-in multi-locale support | -| N/A | `PathResourceLoader` | File system loader | - -### Methods - -| fluent.runtime | FTLLexEngine | Changes | -|----------------|--------------|---------| -| `bundle.add_resource(FluentResource)` | `bundle.add_resource(str)` | Direct string, no wrapper | -| `bundle.get_message(id).value` then `format_pattern()` | `bundle.format_pattern(id, args)` | Direct formatting - no intermediate Message object needed | -| `bundle.has_message(id)` | `bundle.has_message(id)` | Identical | -| N/A | `bundle.get_message_ids()` | List all messages | -| N/A | `bundle.get_message_variables(id)` | Get required variables | -| N/A | `bundle.introspect_message(id)` | Full message metadata | -| N/A | `bundle.validate_resource(str)` | Validate before loading | - -### Error Types - -| fluent.runtime | FTLLexEngine | Notes | -|----------------|--------------|-------| -| `FluentFormatError` | `FrozenFluentError` | Immutable error with `ErrorCategory` (main package) | -| N/A | `ErrorCategory.REFERENCE` | Missing messages/variables | -| N/A | `ErrorCategory.RESOLUTION` | Runtime errors | -| N/A | `ErrorCategory.CYCLIC` | Circular references | - ---- - -## Migration Patterns - -### Pattern 1: Simple Single-Locale App - -#### fluent.runtime -```python -from fluent.runtime import FluentBundle, FluentResource - -def setup_i18n(locale): - bundle = FluentBundle([locale]) - with open(f'locales/{locale}/main.ftl') as f: - resource = FluentResource(f.read()) - bundle.add_resource(resource) - return bundle - -bundle = setup_i18n('en-US') -``` - -#### FTLLexEngine -```python -from pathlib import Path -from ftllexengine import FluentBundle - -def setup_i18n(locale): - bundle = FluentBundle(locale) - ftl_source = Path(f'locales/{locale}/main.ftl').read_text() - bundle.add_resource(ftl_source) - return bundle - -bundle = setup_i18n('en-US') -``` +## Example ---- - -### Pattern 2: Multi-Locale with Manual Fallback - -#### fluent.runtime -```python -from fluent.runtime import FluentLocalization, FluentResourceLoader - -# Built-in multi-locale support -loader = FluentResourceLoader('locales/{locale}') -l10n = FluentLocalization(['lv', 'en'], ['main.ftl'], loader) - -# Returns just the string (no error tuple) -result = l10n.format_value('welcome', {'name': 'Anna'}) -``` - -#### FTLLexEngine -```python -from ftllexengine import FluentLocalization -from ftllexengine.localization import PathResourceLoader - -# Similar API -loader = PathResourceLoader('locales/{locale}') -l10n = FluentLocalization(['lv', 'en'], ['main.ftl'], loader) - -# Returns (result, errors) tuple -result, errors = l10n.format_value('welcome', {'name': 'Anna'}) -``` - -**Benefit**: Similar API, FTLLexEngine returns errors for better handling - ---- - -### Pattern 3: Custom Functions - -#### fluent.runtime -```python -from fluent.runtime import FluentBundle - -def upper_formatter(text, **options): - return str(text).upper() - -bundle = FluentBundle(['en']) -bundle.add_function('UPPER', upper_formatter) -``` - -#### FTLLexEngine ```python from ftllexengine import FluentBundle -def UPPER(text, **options): - return str(text).upper() - -bundle = FluentBundle('en') -bundle.add_function('UPPER', UPPER) -``` - -**Identical API** - no changes needed! - ---- - -## Type Annotations Migration - -### fluent.runtime (Limited typing) - -```python -from fluent.runtime import FluentBundle -from typing import Dict, Any - -def format_message(bundle: FluentBundle, msg_id: str, args: Dict[str, Any]) -> str: - msg = bundle.get_message(msg_id) - if msg: - result, errors = bundle.format_pattern(msg.value, args) - return result - return msg_id -``` - -### FTLLexEngine (Full mypy --strict) - -```python -from ftllexengine import FluentBundle, FrozenFluentError -from ftllexengine.localization import MessageId - -def format_message(bundle: FluentBundle, msg_id: MessageId, args: dict[str, object]) -> str: - """Format message with error logging.""" - result, errors = bundle.format_pattern(msg_id, args) - if errors: - for error in errors: - # error is properly typed as FrozenFluentError - logger.warning(f"Translation error: {error}") - return result -``` - -**Improvements**: -- [OK] Full type safety with `mypy --strict` -- [OK] Type aliases for clarity (`MessageId`) -- [OK] Modern Python 3.13 dict syntax (`dict[str, object]` vs `Dict[str, Any]`) - ---- - -## Testing Migration - -### Update Test Assertions - -#### fluent.runtime -```python -def test_message_formatting(): - bundle = FluentBundle(['en']) - bundle.add_resource(FluentResource("hello = Hello!")) - - # Two-step process: get_message() then format_pattern() - msg = bundle.get_message('hello') - result, errors = bundle.format_pattern(msg.value, {}) - - assert result == "Hello!" - assert not errors -``` - -#### FTLLexEngine -```python -def test_message_formatting(): - bundle = FluentBundle('en', use_isolating=False) # Clean assertions - bundle.add_resource("hello = Hello!") - - # One-step process: format_pattern() with message ID directly - result, errors = bundle.format_pattern('hello') - - assert result == "Hello!" - assert errors == () # Empty immutable tuple -``` - -**Benefits**: -- **Simpler**: One call instead of two (no `get_message()` step) -- **Cleaner**: Fewer lines, direct assertions -- **Type-safe**: Returns tuple instead of mutable error list - ---- - -## Troubleshooting Migration - -### Issue 1: "FluentBundle() takes 1 positional argument but 1 list was given" - -**Cause**: Using fluent.runtime syntax with list of locales - -**Solution**: -```python -# [OLD] fluent.runtime syntax -bundle = FluentBundle(['en-US']) - -# [NEW] FTLLexEngine syntax -bundle = FluentBundle('en-US') -``` - ---- - -### Issue 2: "FluentResource is not defined" - -**Cause**: Importing non-existent FluentResource - -**Solution**: -```python -# [OLD] fluent.runtime -from fluent.runtime import FluentResource # Old library had wrapper class - -# [NEW] FTLLexEngine - no wrapper needed -bundle.add_resource(ftl_source) # Direct string - -# Or if you need AST manipulation -from ftllexengine import parse_ftl -from ftllexengine.syntax.ast import Resource -resource_ast = parse_ftl(ftl_source) -``` - ---- - -### Issue 3: "format_pattern() missing required argument: 'message_id'" - -**Cause**: Using fluent.runtime pattern-first API - -**Solution**: -```python -# [OLD] fluent.runtime - requires get_message step -msg = bundle.get_message('hello') -result, errors = bundle.format_pattern(msg.value, {}) - -# [NEW] FTLLexEngine - direct message ID -result, errors = bundle.format_pattern('hello', {}) +bundle = FluentBundle("en_US", use_isolating=False) +bundle.add_resource("welcome = Hello, { $name }!") +result, errors = bundle.format_pattern("welcome", {"name": "Alice"}) +assert errors == () +assert result == "Hello, Alice!" ``` ---- - -### Issue 4: Import errors for specific exception types - -**Cause**: Different exception hierarchy - -**Solution**: -```python -# fluent.runtime -from fluent.runtime.errors import FluentFormatError - -# FTLLexEngine - single error type with category classification -from ftllexengine import FrozenFluentError, ErrorCategory - -# Classify errors by category: -# ErrorCategory.REFERENCE - Missing messages/variables -# ErrorCategory.RESOLUTION - Runtime errors -# ErrorCategory.CYCLIC - Circular references -# ErrorCategory.PARSE - Parse failures -# ErrorCategory.FORMATTING - Formatting failures -``` - ---- - -## Compatibility Notes - -### What Works Identically - -[OK] Custom functions -[OK] Built-in NUMBER and DATETIME functions -[OK] Select expressions and plural rules -[OK] Terms and message references -[OK] Unicode bidi isolation -[OK] Error handling philosophy (graceful degradation) - -### What's Different - -[WARN] Constructor takes single locale, not list -[WARN] No FluentResource wrapper - direct string to `add_resource()` -[WARN] Different exception types (but same behavior) -Return immutable error tuples instead of mutable lists (`tuple[FrozenFluentError, ...]`) -[WARN] Python 3.13+ required (vs 3.6+) - -### What's New in FTLLexEngine - -[NEW] `FluentLocalization` for multi-locale -[NEW] `PathResourceLoader` for file systems -[NEW] `validate_resource()` for pre-flight validation -[NEW] `introspect_message()` for metadata -[NEW] `get_message_variables()` for variable discovery -[NEW] `get_message_ids()` for listing messages -[NEW] Full `mypy --strict` type safety -[NEW] Python 3.13 modern features -[NEW] **Bi-directional parsing** (not in fluent.runtime): - - `parse_decimal()` - locale-aware number parsing (financial-grade Decimal precision) - - `parse_date()`, `parse_datetime()` - locale-aware date parsing - - `parse_currency()` - currency parsing with symbol detection - - Type guards: `is_valid_decimal()`, `is_valid_currency()` - ---- - -## Migration Checklist - -### Pre-Migration - -- [ ] Verify Python 3.13+ available in all environments -- [ ] Review breaking changes section -- [ ] Identify custom function usage -- [ ] List all multi-locale fallback logic -- [ ] Backup current codebase - -### During Migration - -- [ ] Update `requirements.txt` or `pyproject.toml` -- [ ] Install FTLLexEngine: `pip install ftllexengine` -- [ ] Update imports: `fluent.runtime` → `ftllexengine` -- [ ] Change FluentBundle constructor (list → single locale) -- [ ] Remove FluentResource wrappers -- [ ] Update format_pattern calls (use message ID directly) -- [ ] Update error handling (tuple returns) -- [ ] Add multi-locale support with FluentLocalization -- [ ] Update type annotations - -### Post-Migration - -- [ ] Run full test suite -- [ ] Verify all .ftl files load correctly -- [ ] Test custom functions -- [ ] Test multi-locale fallback -- [ ] Test error handling -- [ ] Update documentation -- [ ] Update CI/CD to use Python 3.13+ -- [ ] Performance testing (should be faster!) - ---- - -## Getting Help - -- **FTLLexEngine Documentation**: [DOC_00_Index.md](DOC_00_Index.md) -- **Examples**: [examples/](../examples/) -- **Quick Reference**: [QUICK_REFERENCE.md](QUICK_REFERENCE.md) -- **Issues**: https://github.com/resoltico/ftllexengine/issues - ---- - -**fluent.runtime Version Referenced**: 0.4.0 - -**Note**: Babel is optional. Parser-only installation (`pip install ftllexengine`) works without external dependencies. For locale-aware formatting, install with `pip install ftllexengine[babel]`. - -**Note**: For FTLLexEngine version-to-version upgrade guidance, see [CHANGELOG.md](../CHANGELOG.md). +## Recommended Migration Order -**Feedback**: If you encounter migration issues not covered here, please open an issue! +1. Replace message-object lookup flows with direct message-id formatting calls. +2. Decide whether you need single-locale `FluentBundle` or multi-locale `FluentLocalization`. +3. Make strict-mode behavior explicit in tests. +4. Add startup validation with `LocalizationBootConfig` if resources come from disk or another loader. diff --git a/docs/PARSING_GUIDE.md b/docs/PARSING_GUIDE.md index dabdd257..e29a420f 100644 --- a/docs/PARSING_GUIDE.md +++ b/docs/PARSING_GUIDE.md @@ -1,854 +1,66 @@ --- -afad: "3.3" -version: "0.153.0" -domain: parsing -updated: "2026-03-13" +afad: "3.5" +version: "0.163.0" +domain: PARSING +updated: "2026-04-22" route: - keywords: [parsing, parse_decimal, parse_fluent_number, parse_date, parse_currency, bi-directional, user input, forms, BabelImportError] - questions: ["how to parse user input?", "how to parse number?", "how to parse a FluentNumber?", "how to parse date?", "how to parse currency?", "what exceptions do parsing functions raise?"] + keywords: [parsing, parse_decimal, parse_currency, parse_date, parse_datetime, parse_fluent_number] + questions: ["how do I parse localized user input?", "how do I do roundtrip formatting and parsing?", "what do parse errors look like?"] --- -# Parsing Guide - Bi-Directional Localization +# Parsing Guide -**Purpose**: Parse locale-formatted user input back to Python types. -**Prerequisites**: Basic FluentBundle usage. +**Purpose**: Parse locale-formatted numbers, currency, dates, and datetimes back into Python values. +**Prerequisites**: Babel-enabled install (`ftllexengine[babel]`). -FTLLexEngine provides comprehensive **bi-directional localization**: both formatting (data → display) and parsing (display → data). +## Overview -## Table of Contents - -1. [Quick Start](#quick-start) -2. [API Reference](#api-reference) -3. [Best Practices](#best-practices) -4. [Common Patterns](#common-patterns) -5. [Migration from Babel](#migration-from-babel) -6. [Troubleshooting](#troubleshooting) - ---- - -## Quick Start - -### Basic Number Parsing - -```python -from ftllexengine.parsing import parse_decimal -from ftllexengine.parsing import is_valid_decimal - -# Parse locale-formatted number -result, errors = parse_decimal("1 234,56", "lv_LV") -if is_valid_decimal(result): # guards accept None, return False - amount = result # Decimal('1234.56') - -# Parse US format -result, errors = parse_decimal("1,234.56", "en_US") -if is_valid_decimal(result): - amount_us = result # Decimal('1234.56') -``` - -**Note**: Type guards (`is_valid_decimal`, `is_valid_date`, `is_valid_currency`) accept `None` and return `False`. This simplifies the pattern from `if not errors and is_valid_decimal(result)` to just `if is_valid_decimal(result)`. - -### Direct FluentNumber Parsing - -```python -from decimal import Decimal - -from ftllexengine.parsing import parse_fluent_number - -result, errors = parse_fluent_number("1 234,56", "lv_LV") -if not errors and result is not None: - assert result.value == Decimal("1234.56") - assert str(result) == "1 234,56" - assert result.precision == 2 -``` - -### Bi-Directional Workflow +The parsing API returns `(result, errors)` tuples. Success means `errors == ()`; failure means `result is None` and `errors` contains immutable `FrozenFluentError` objects. ```python from decimal import Decimal -from ftllexengine import FluentBundle -from ftllexengine.parsing import parse_decimal -from ftllexengine.parsing import is_valid_decimal - -# Create bundle -bundle = FluentBundle("lv_LV") -bundle.add_resource('price = { CURRENCY($amount, currency: "EUR") }') - -# Format for display -formatted, _ = bundle.format_pattern("price", {"amount": 1234.56}) -# → "1 234,56 €" - -# Parse user input back to data -user_input = "1 234,56" -result, errors = parse_decimal(user_input, "lv_LV") - -if is_valid_decimal(result): # guards accept None, return False - # Roundtrip: format → parse → format preserves value - assert result == Decimal("1234.56") -``` - ---- +from ftllexengine.parsing import parse_currency, parse_date, parse_decimal -## API Reference +amount, errors = parse_decimal("12.450,50", "de_DE") +assert errors == () +assert amount == Decimal("12450.50") -### parse_decimal() +money, errors = parse_currency("12.450,50 EUR", "de_DE", default_currency="EUR") +assert errors == () +assert money == (Decimal("12450.50"), "EUR") -Parse locale-formatted number string to `Decimal` (financial precision). - -Returns `tuple[Decimal | None, tuple[FrozenFluentError, ...]]`. - -```python -from decimal import Decimal -from ftllexengine.parsing import parse_decimal -from ftllexengine.parsing import is_valid_decimal - -# Financial precision - no float rounding errors -result, errors = parse_decimal("100,50", "lv_LV") -if is_valid_decimal(result): # guards accept None - vat = result * Decimal("0.21") # → Decimal('21.105') - exact! - -# Float would lose precision -float_amount = 100.50 -float_vat = float_amount * 0.21 # → 21.105000000000004 - precision loss! +delivery_date, errors = parse_date("2026年3月15日", "ja_JP") +assert errors == () +assert delivery_date.isoformat() == "2026-03-15" ``` -**When to use**: -- Financial calculations (invoices, payments, VAT) -- Currency amounts -- Any calculation where precision matters - -### parse_fluent_number() +## FluentNumber Parsing -Parse locale-formatted number string directly to `FluentNumber`. - -Returns `tuple[FluentNumber | None, tuple[FrozenFluentError, ...]]`. +`parse_fluent_number()` returns a `FluentNumber`, preserving both the numeric value and the localized display string. ```python from decimal import Decimal - from ftllexengine.parsing import parse_fluent_number -result, errors = parse_fluent_number("100,50", "lv_LV") -if not errors and result is not None: - assert result.value == Decimal("100.50") - assert result.precision == 2 -``` - -**When to use**: -- User-entered numeric text should go back into `FluentBundle` / `FluentLocalization` -- Select expressions must preserve visible precision (`1.0` vs `1.00`) -- You want the public one-step API instead of manual `parse_decimal()` + `make_fluent_number()` composition - -### parse_date() - -Parse locale-formatted date string to `date` object. - -Returns `tuple[date | None, tuple[FrozenFluentError, ...]]`. - -```python -from ftllexengine.parsing import parse_date -from ftllexengine.parsing import is_valid_date - -# US format (MM/DD/YYYY) -result, errors = parse_date("1/28/2025", "en_US") -if is_valid_date(result): # guards accept None - date_value = result # date(2025, 1, 28) - -# European format (DD.MM.YYYY) -result, errors = parse_date("28.01.2025", "lv_LV") -# result → date(2025, 1, 28) - -# ISO 8601 (works everywhere) -result, errors = parse_date("2025-01-28", "en_US") -# result → date(2025, 1, 28) -``` - -**Implementation Details**: -- **Python 3.13 stdlib only** - Uses `datetime.strptime()` and `datetime.fromisoformat()` -- **Babel CLDR patterns** - Converts Babel date patterns to strptime format directives - - Example conversions: `"M/d/yy"` → `"%m/%d/%y"`, `"dd.MM.yyyy"` → `"%d.%m.%Y"` -- **Token-based converter** - Replaces fragile regex approach for pattern conversion -- **Fast path optimization** - ISO 8601 dates (`"2025-01-28"`) use native `fromisoformat()` for maximum speed -- **Safe pattern matching** - No ambiguous fallback patterns: - 1. ISO 8601 format (fastest, unambiguous, always works) - 2. Locale-specific CLDR patterns from Babel ONLY - 3. No generic fallback patterns (prevents misinterpretation) -- **Locale determines interpretation** - Day-first (EU) vs month-first (US) based on CLDR patterns -- **Thread-safe** - No global state, immutable pattern lists -- **Zero external dependencies** - Uses only Python 3.13 stdlib + Babel (already a dependency) - -**Important**: Ambiguous dates like "1/2/25" will FAIL unless: -- Input matches locale's CLDR pattern (e.g., "1/2/25" only works for en_US, not lv_LV) -- Input uses ISO 8601 format "2025-01-02" (works everywhere, recommended) - -### parse_datetime() - -Parse locale-formatted datetime string to `datetime` object. - -Returns `tuple[datetime | None, tuple[FrozenFluentError, ...]]`. - -```python -from datetime import timezone -from ftllexengine.parsing import parse_datetime -from ftllexengine.parsing import is_valid_datetime - -# Parse datetime -result, errors = parse_datetime("1/28/2025 14:30", "en_US") -if is_valid_datetime(result): # guards accept None - dt = result # datetime(2025, 1, 28, 14, 30) - -# With timezone -result, errors = parse_datetime("2025-01-28 14:30", "en_US", tzinfo=timezone.utc) -# result → datetime(2025, 1, 28, 14, 30, tzinfo=timezone.utc) -``` - -**Implementation Details**: -- Same implementation as `parse_date()` but with time components -- Uses Babel CLDR datetime patterns converted to strptime format -- Pattern conversion includes time directives: `"HH:mm:ss"` → `"%H:%M:%S"` -- Fast path for ISO 8601 datetime strings -- Thread-safe, no external dependencies beyond Babel - -### parse_currency() - -Parse locale-formatted currency string to `(Decimal, currency_code)` tuple. - -Returns `tuple[tuple[Decimal, str] | None, tuple[FrozenFluentError, ...]]`. - -```python -from ftllexengine.parsing import parse_currency -from ftllexengine.parsing import is_valid_currency - -# Unambiguous symbols - work without default_currency -result, errors = parse_currency("€100.50", "en_US") -if is_valid_currency(result): # guards accept None - amount, currency = result # (Decimal('100.50'), 'EUR') - -# Latvian format -result, errors = parse_currency("1 234,56 €", "lv_LV") -# result → (Decimal('1234.56'), 'EUR') - -# ISO codes - always unambiguous -result, errors = parse_currency("USD 1,234.56", "en_US") -# result → (Decimal('1234.56'), 'USD') - -# Ambiguous symbols require explicit currency -result, errors = parse_currency("$100", "en_US", default_currency="USD") -# result → (Decimal('100'), 'USD') - -result, errors = parse_currency("$100", "en_CA", default_currency="CAD") -# result → (Decimal('100'), 'CAD') - -# Or infer from locale -result, errors = parse_currency("$100", "en_CA", infer_from_locale=True) -# result → (Decimal('100'), 'CAD') # Inferred from Canadian locale - -# Ambiguous symbols without default_currency return error -result, errors = parse_currency("$100", "en_US") -if errors: - print(f"Ambiguous currency: {errors[0]}") -``` - -**Currency Symbol Handling**: -- **Ambiguous**: $ (USD/CAD/AUD/SGD/HKD/NZD/MXN), ¢, ₨, ₱, kr, ¥ (JPY/CNY), £ (GBP/EGP/GIP/FKP/SHP/SSP) -- **Unambiguous**: € (EUR), ₹ (INR), ₽ (RUB), etc. -- **Always safe**: ISO codes (USD, CAD, EUR, etc.) -- **Locale resolution**: ¥ -> CNY for zh_* locales, JPY otherwise. £ -> EGP for ar_* locales, GBP otherwise. - -**Supported currencies**: All ISO 4217 codes plus major currency symbols (€, $, £, ¥, etc.) - ---- - -## Best Practices - -### 1. Always Use Same Locale - -**CRITICAL**: Format and parse must use the **same locale** for roundtrip correctness. - -```python -# CORRECT - Same locale -locale = "lv_LV" -formatted, _ = bundle.format_pattern("price", {"amount": 1234.56}) -result, errors = parse_decimal(formatted, locale) # Same locale! - -# WRONG - Different locales break roundtrip -formatted, _ = bundle.format_pattern("price", {"amount": 1234.56}) # lv_LV -result, errors = parse_decimal(formatted, "en_US") # Wrong locale! -# Result: errors will contain parse error -``` - -### 2. Check Errors in Production - -```python -from ftllexengine.parsing import parse_decimal -from ftllexengine.parsing import is_valid_decimal - -# Check errors tuple instead of try/except -result, errors = parse_decimal(user_input, locale) - -if errors: - show_error_to_user(f"Invalid amount: {errors[0]}") - return - -if not is_valid_decimal(result): - show_error_to_user("Amount cannot be NaN or Infinity") - return - -# Safe to use result -process_payment(result) +fnum, errors = parse_fluent_number("12.450,00", "de_DE") +assert errors == () +assert fnum.value == Decimal("12450.00") +assert fnum.precision == 2 +assert str(fnum) == "12.450,00" ``` -### 3. Financial Precision - Always Use Decimal +## Type Guards -```python -from decimal import Decimal -from ftllexengine.parsing import parse_decimal -from ftllexengine.parsing import is_valid_decimal - -# CORRECT - Decimal for financial data -result, errors = parse_decimal("100,50", "lv_LV") -if is_valid_decimal(result): # guards accept None - vat = result * Decimal("0.21") # → Decimal('21.105') - exact - -# WRONG - Float arithmetic loses precision -amount = 100.50 # float -vat = amount * 0.21 # → 21.105000000000004 - precision loss! -``` - -**Impact**: Float precision loss accumulates in calculations and causes rounding errors in financial reports. - -**Note on Special Values**: Babel's `parse_decimal()` accepts `NaN`, `Infinity`, and `Inf` (case-insensitive) as valid Decimal values per IEEE 754 standard. Use `is_valid_decimal()` to reject these for financial data: +The `is_valid_*` helpers are useful when you want a boolean guard after parsing. ```python -from ftllexengine.parsing import is_valid_decimal - -result, errors = parse_decimal(user_input, locale) - -if errors: - raise ValueError(f"Parse error: {errors[0]}") +from ftllexengine.parsing import is_valid_decimal, parse_decimal -# Reject special values for financial data -if not is_valid_decimal(result): - raise ValueError("Amount must be a finite number") +value, errors = parse_decimal("not-a-number", "en_US") +assert not is_valid_decimal(value) +assert errors ``` -### 4. Validate Before Processing - -```python -from ftllexengine.parsing import parse_decimal -from ftllexengine.parsing import is_valid_decimal - -def parse_user_amount(input_str: str, locale: str) -> Decimal | None: - # Trim whitespace - input_str = input_str.strip() - - # Check not empty - if not input_str: - return None - - # Parse and validate - result, errors = parse_decimal(input_str, locale) - - if not is_valid_decimal(result): # guards handle None - return None - - return result - -# Usage -amount = parse_user_amount(user_input, "lv_LV") -if amount is None: - show_error("Please enter a valid amount") -``` - -### 5. Roundtrip Validation - -```python -from decimal import Decimal -from ftllexengine.parsing import parse_currency -from ftllexengine.runtime import currency_format -from ftllexengine.parsing import is_valid_currency - -# Verify roundtrip in tests -def test_roundtrip(): - original = Decimal("1234.56") - formatted = currency_format(original, "lv-LV", currency="EUR") - result, errors = parse_currency(formatted, "lv_LV") - - assert not errors - assert is_valid_currency(result) - assert result[0] == original # Roundtrip preserved! -``` - ---- - -## Common Patterns - -### Invoice Processing - -```python -from decimal import Decimal -from ftllexengine import FluentBundle -from ftllexengine.parsing import parse_decimal -from ftllexengine.parsing import is_valid_decimal - -bundle = FluentBundle("lv_LV") -bundle.add_resource(""" - subtotal = Summa: { CURRENCY($amount, currency: "EUR") } - vat = PVN (21%): { CURRENCY($vat, currency: "EUR") } - total = Kopa: { CURRENCY($total, currency: "EUR") } -""") - -def process_invoice(user_input: str) -> dict | None: - # Parse user input (subtotal) - result, errors = parse_decimal(user_input, "lv_LV") - - if not is_valid_decimal(result): # guards handle None - return None - - subtotal = result - - # Calculate VAT (financial precision) - vat_rate = Decimal("0.21") - vat = subtotal * vat_rate - total = subtotal + vat - - # Format for display - display = { - "subtotal": bundle.format_pattern("subtotal", {"amount": subtotal})[0], - "vat": bundle.format_pattern("vat", {"vat": vat})[0], - "total": bundle.format_pattern("total", {"total": total})[0], - } - - return { - "display": display, - "data": {"subtotal": subtotal, "vat": vat, "total": total} - } - -# Example usage -result = process_invoice("1 234,56") -# display: {"subtotal": "Summa: 1 234,56 €", ...} -# data: {"subtotal": Decimal('1234.56'), ...} -``` - -### Form Input Validation - -```python -from decimal import Decimal -from ftllexengine.parsing import parse_decimal -from ftllexengine.parsing import is_valid_decimal - -def validate_amount_field(input_value: str, locale: str) -> tuple[Decimal | None, str | None]: - """Validate and parse amount input field. - - Returns: - (parsed_value, error_message) - error_message is None if valid - """ - # Trim whitespace - input_value = input_value.strip() - - # Check not empty - if not input_value: - return (None, "Amount is required") - - # Parse - result, errors = parse_decimal(input_value, locale) - - if errors: - return (None, f"Invalid amount format for {locale}") - - # Validate finite (not NaN/Infinity) - if not is_valid_decimal(result): - return (None, "Amount must be a finite number") - - # Validate range - if result <= 0: - return (None, "Amount must be positive") - - if result > Decimal("1000000"): - return (None, "Amount exceeds maximum (1,000,000)") - - return (result, None) - -# Usage in web form -amount, error = validate_amount_field(request.form['amount'], user_locale) -if error: - flash(error, 'error') - return redirect(url_for('form')) - -# Amount is valid Decimal, use in calculations -process_payment(amount) -``` - -### Data Import from CSV - -```python -from ftllexengine.parsing import parse_decimal, parse_date -from ftllexengine.parsing import is_valid_date, is_valid_decimal - -def import_transactions_csv(csv_path: str, locale: str) -> tuple[list[dict], list[str]]: - """Import financial transactions from CSV.""" - import csv - - transactions = [] - import_errors = [] - - with open(csv_path, 'r', encoding='utf-8') as f: - reader = csv.DictReader(f) - - for row_num, row in enumerate(reader, start=2): # Start at 2 (header is row 1) - # Parse date (type guards accept None) - date_result, _ = parse_date(row['date'], locale) - if not is_valid_date(date_result): - import_errors.append(f"Row {row_num}: Invalid date '{row['date']}'") - continue - - # Parse amount (type guards accept None) - amount_result, _ = parse_decimal(row['amount'], locale) - if not is_valid_decimal(amount_result): - import_errors.append(f"Row {row_num}: Invalid amount '{row['amount']}'") - continue - - transactions.append({ - "date": date_result, - "amount": amount_result, - "description": row['description'] - }) - - return transactions, import_errors - -# Usage -transactions, errors = import_transactions_csv("export.csv", "lv_LV") -if errors: - print(f"Import completed with {len(errors)} errors:") - for error in errors: - print(f" - {error}") -``` - ---- - -## Migration from Babel - -### Before (Babel only) - -```python -from babel.numbers import parse_decimal as babel_parse_decimal -from ftllexengine import FluentBundle - -# Formatting: FTLLexEngine -bundle = FluentBundle("lv_LV") -formatted, _ = bundle.format_pattern("price", {"amount": 1234.56}) - -# Parsing: Babel directly -user_input = "1 234,56" -parsed = babel_parse_decimal(user_input, locale="lv_LV") -``` - -### After (FTLLexEngine for both) - -```python -from ftllexengine import FluentBundle -from ftllexengine.parsing import parse_decimal -from ftllexengine.parsing import is_valid_decimal - -# Formatting: FTLLexEngine -bundle = FluentBundle("lv_LV") -formatted, _ = bundle.format_pattern("price", {"amount": 1234.56}) - -# Parsing: FTLLexEngine (consistent API) -user_input = "1 234,56" -result, errors = parse_decimal(user_input, "lv_LV") - -if is_valid_decimal(result): # guards accept None - parsed = result # Same locale format! -``` - -**Benefits**: -- Single import source -- Consistent locale code format -- Symmetric API design (format ↔ parse) -- Better error handling with structured errors - ---- - -## Troubleshooting - -### BabelImportError When Parsing - -**Problem**: Calling `parse_decimal()`, `parse_date()`, or other parsing functions raises `BabelImportError` - -**Cause**: Parsing functions require Babel for CLDR locale data. Babel is not installed. - -**Solution**: -```bash -# Install with Babel support -pip install ftllexengine[babel] - -# Or install Babel separately -pip install Babel -``` - -**Context**: FTLLexEngine supports two installation modes: -- **Parser-only** (`pip install ftllexengine`): Syntax parsing only, no Babel dependency -- **Full runtime** (`pip install ftllexengine[babel]`): Includes Babel for locale-aware formatting and parsing - -All functions in `ftllexengine.parsing` require Babel and will raise `BabelImportError` if Babel is not installed. This is a configuration error, not a parse error. - -### Parse Returns Errors - -**Problem**: `parse_decimal()` returns non-empty errors tuple - -**Common causes**: -1. **Wrong locale**: Make sure parsing locale matches formatting locale -2. **Invalid format**: Input doesn't match locale's number format -3. **Non-numeric input**: Input contains letters or unexpected characters - -**Solution**: -```python -from ftllexengine.parsing import parse_decimal -# Use `if errors:` to check for parse errors - -# Debug: Print the error details -result, errors = parse_decimal(user_input, locale) -if errors: - print(f"Parse error: {errors[0]}") - print(f"Input: '{user_input}'") - print(f"Locale: {locale}") - if errors[0].diagnostic: - print(f"Error code: {errors[0].diagnostic.code}") -``` - -### Roundtrip Doesn't Preserve Value - -**Problem**: format → parse → format changes the value - -**Cause**: Different locales used for format and parse - -**Solution**: -```python -from decimal import Decimal -from ftllexengine.parsing import parse_decimal -from ftllexengine.runtime import number_format - -# Correct: Same locale throughout -locale = "lv_LV" -formatted = str(number_format(Decimal("1234.56"), f"{locale.replace('_', '-')}")) -result, errors = parse_decimal(formatted, locale) # Same locale! - -# Wrong: Different locales -formatted = str(number_format(Decimal("1234.56"), "lv-LV")) -result, errors = parse_decimal(formatted, "en_US") # Different locale! -``` - -### Float Arithmetic Precision Loss - -**Problem**: Calculations give unexpected results like `21.105000000000004` - -**Cause**: Using Python `float` arithmetic instead of `Decimal` for financial data - -**Solution**: -```python -from decimal import Decimal -from ftllexengine.parsing import parse_decimal -# Use `if errors:` to check for parse errors - -# Wrong: Float arithmetic loses precision -amount = 100.50 # float -vat = amount * 0.21 # 21.105000000000004 - -# Correct: Decimal arithmetic is exact -result, errors = parse_decimal("100,50", "lv_LV") -if not errors: - vat = result * Decimal("0.21") # Decimal('21.105') - exact! -``` - -### Special Values (NaN, Infinity) Accepted - -**Problem**: `parse_decimal("NaN", locale)` succeeds instead of returning error - -**Cause**: Babel's `parse_decimal()` accepts `NaN`, `Infinity`, and `Inf` (case-insensitive) as valid Decimal values per IEEE 754 standard - -**Solution**: -```python -from ftllexengine.parsing import parse_decimal -from ftllexengine.parsing import is_valid_decimal - -result, errors = parse_decimal(user_input, locale) - -if errors: - raise ValueError(f"Parse failed: {errors[0]}") - -# Reject NaN and Infinity for financial calculations -if not is_valid_decimal(result): - raise ValueError("Amount must be a finite number") -``` - -**Background**: These special values are mathematically valid but typically inappropriate for financial calculations. Use `is_valid_decimal()` type guard to reject them. - -### Date Parsing Ambiguity - -**Problem**: `parse_date("01/02/2025")` - is this Jan 2 or Feb 1? - -**Cause**: Ambiguous date format depends on locale - -**Solution**: -```python -from ftllexengine.parsing import parse_date -from ftllexengine.parsing import is_valid_date - -# US: Interprets as month-first (Jan 2) -result, _ = parse_date("01/02/2025", "en_US") # → date(2025, 1, 2) - -# Europe: Interprets as day-first (Feb 1) -result, _ = parse_date("01/02/2025", "lv_LV") # → date(2025, 2, 1) - -# Recommendation: Use ISO 8601 (unambiguous) -result, errors = parse_date("2025-01-02", locale) # Always Jan 2 -``` - -### Timezone Pattern Limitations - -**Problem**: `parse_datetime()` returns error with input containing timezone names - -**Cause**: Timezone name patterns (z, zz, zzz, zzzz, v, V series, O series, ZZZZ) are locale-specific and cannot be parsed by Python's `strptime`. These patterns are silently skipped during pattern conversion. - -**Unsupported patterns**: -- Timezone names: `z`, `zz`, `zzz`, `zzzz` (e.g., "PST", "Pacific Standard Time") -- Generic timezone: `v`, `vvvv` (e.g., "PT", "Pacific Time") -- Location timezone: `V`, `VV`, `VVV`, `VVVV` (e.g., "America/Los_Angeles") -- Localized GMT: `ZZZZ`, `O`, `OOOO` (e.g., "GMT-08:00") - -**Supported patterns**: -- UTC offset: `Z`, `ZZ`, `ZZZ`, `ZZZZZ` (e.g., "-0800", "-08:00") -- ISO offset: `x`, `xx`, `xxx`, `xxxx`, `xxxxx`, `X`, `XX`, `XXX`, `XXXX`, `XXXXX` - -**Solution**: -```python -from ftllexengine.parsing import parse_datetime - -# Wrong: Input with timezone name -result, errors = parse_datetime("2025-01-28 14:30 PST", "en_US") -# → errors (timezone name not parsed) - -# Correct: Pre-strip timezone name or use UTC offset -result, errors = parse_datetime("2025-01-28 14:30", "en_US") -# Or use ISO 8601 with offset -result, errors = parse_datetime("2025-01-28T14:30:00-08:00", "en_US") -``` - ---- - -## Known Limitations (strptime) - -Date/datetime parsing uses Python's `strptime()` with CLDR pattern conversion. Some Babel CLDR patterns cannot be represented in strptime format. - -### Pattern Conversion Reference - -**Fully Supported Patterns**: - -| CLDR | strptime | Example | -|:-----|:---------|:--------| -| `yyyy` | `%Y` | 2026 | -| `yy` | `%y` | 26 | -| `MM`, `M` | `%m` | 01, 1 | -| `dd`, `d` | `%d` | 28, 8 | -| `HH`, `H` | `%H` | 14, 4 | -| `mm`, `m` | `%M` | 30, 5 | -| `ss`, `s` | `%S` | 45, 5 | -| `a` | `%p` | AM, PM | - -**Partially Supported Patterns**: - -| CLDR | Issue | Workaround | -|:-----|:------|:-----------| -| `MMM`, `MMMM` | Language-dependent | Works for English locales only | -| `EEE`, `EEEE` | Language-dependent | Works for English locales only | -| `SSS` (milliseconds) | strptime expects 6 digits | Pad to microseconds or pre-process | - -**Unsupported Patterns (Silently Skipped)**: - -| Pattern | Description | Reason | -|:--------|:------------|:-------| -| `z`, `zz`, `zzz`, `zzzz` | Timezone names (PST) | Locale-specific, no strptime equivalent | -| `v`, `vvvv` | Generic timezone (PT) | No strptime equivalent | -| `V`, `VV`, `VVV`, `VVVV` | Location timezone | No strptime equivalent | -| `O`, `OOOO`, `ZZZZ` | Localized GMT | No strptime equivalent | -| `G`, `GG`, `GGG`, `GGGG`, `GGGGG` | Era (AD, BC) | No strptime equivalent | -| `Q`, `QQ`, `QQQ`, `QQQQ` | Quarter | No strptime equivalent | -| `w`, `ww` | Week of year | Limited strptime support | -| `W` | Week of month | No strptime equivalent | - -### Fractional Seconds - -**Limitation**: strptime `%f` expects exactly 6 digits (microseconds). - -**CLDR Pattern**: `SSS` (3 digits for milliseconds) - -**Behavior**: Patterns with `SSS` are converted to `%f`, which may fail if input has exactly 3 digits. - -```python -# Problem: strptime expects 6 digits -"14:30:45.123" # 3-digit milliseconds → parse error - -# Solutions: -# 1. Pad to 6 digits before parsing -input_padded = "14:30:45.123000" - -# 2. Use ISO 8601 format -"14:30:45.123000" # 6 digits → works -``` - -### Two-Digit Year Century Cutoff - -**Limitation**: strptime interprets `%y` (2-digit year) with a 2000 cutoff. - -| Input | Interpretation | -|:------|:---------------| -| `00-68` | 2000-2068 | -| `69-99` | 1969-1999 | - -```python -# "25" → 2025 (correct for current decade) -# "85" → 1985 (may be unexpected) -``` - -**Recommendation**: Use 4-digit years (`yyyy`) when possible. - -### Non-English Month/Day Names - -**Limitation**: strptime only parses English month/day names by default. - -```python -# Works: English locale -parse_date("January 15, 2026", "en_US") # OK - -# May fail: Non-English locale -parse_date("Januar 15, 2026", "de_DE") # German month name -# Depends on system locale configuration -``` - -**Recommendation**: Use numeric formats for non-English locales. - -### Workarounds Summary - -| Limitation | Workaround | -|:-----------|:-----------| -| Timezone names | Strip timezone before parsing; add after | -| Era patterns | Assume AD; handle BC separately | -| Milliseconds | Pad to 6 digits or remove fractional part | -| Non-English names | Use numeric date formats | -| Two-digit years | Use 4-digit years (yyyy) | - -**Design Note**: These limitations are inherent to Python's `strptime()` function. FTLLexEngine uses strptime intentionally to avoid external dependencies beyond Babel. For applications requiring broader pattern support, pre-process input strings before calling parse functions. - ---- - -## See Also - -- [LOCALE_GUIDE.md](LOCALE_GUIDE.md) - Locale formatting behavior -- [DOC_00_Index.md](DOC_00_Index.md) - Complete API reference -- [README.md](../README.md) - Getting started -- [CHANGELOG.md](../CHANGELOG.md) - Version history -- [Babel Documentation](https://babel.pocoo.org/) - Number and date formatting patterns - ---- +## Roundtrip Rule -**Python Requirement**: 3.13+ +For format → parse workflows, use the same locale on both sides. That keeps separators, currency symbols, and CLDR patterns aligned. diff --git a/docs/QUICK_REFERENCE.md b/docs/QUICK_REFERENCE.md index 918e8c5a..7d8bcb5f 100644 --- a/docs/QUICK_REFERENCE.md +++ b/docs/QUICK_REFERENCE.md @@ -1,966 +1,120 @@ --- -afad: "3.3" -version: "0.161.0" -domain: reference -updated: "2026-03-21" +afad: "3.5" +version: "0.163.0" +domain: REFERENCE +updated: "2026-04-22" route: - keywords: [cheat sheet, quick reference, examples, code snippets, patterns, copy paste, BabelImportError, cache, clear cache, CacheConfig, audit-log, require_clean, validate_message_schemas, validate_message_variables, require_locale_code, make_fluent_number, parse_fluent_number, FluentNumber] - questions: ["how to format message?", "how to parse number?", "how to use bundle?", "what exceptions can occur?", "how do I validate localization at boot?", "how do I validate one message schema?", "how do I canonicalize a locale code?", "how do I construct a FluentNumber manually?", "how to clear cache?", "how do I get the cache audit log?"] + keywords: [quick reference, cheat sheet, fluentbundle, fluentlocalization, parsing, validation, boot] + questions: ["show me the common commands", "what is the smallest working example?", "how do I boot localization safely?"] --- # FTLLexEngine Quick Reference -**One-page cheat sheet for common tasks** - -Python 3.13+ | [Full API Documentation](DOC_00_Index.md) | [Examples](../examples/) - ---- - -## Installation +## Install ```bash -# Parser-only (no external dependencies) -pip install ftllexengine - -# Full runtime with locale formatting -pip install ftllexengine[babel] +uv add ftllexengine[babel] ``` -**Requirements**: Python 3.13+ | Babel>=2.18.0 (optional for locale formatting) - ---- - -## Parser-Only Usage (No Babel Required) - -```python -from ftllexengine import parse_ftl, serialize_ftl, validate_resource - -# Parse FTL source to AST -resource = parse_ftl(""" -hello = Hello, World! -greeting = Welcome, { $name }! -""") - -# Inspect AST -for entry in resource.entries: - print(f"Message: {entry.id.name}") - -# Validate FTL source (syntax and semantic checks) -result = validate_resource(""" -hello = Hello, World! -greeting = Welcome, { $name }! -""") -if result.errors: - print(f"Errors: {result.errors}") - -# Serialize back to FTL string -ftl_source = serialize_ftl(resource) +```bash +uv add ftllexengine ``` ---- - -## Basic Usage - -### Single Locale Application +## Format One Message ```python from ftllexengine import FluentBundle -# Create bundle -bundle = FluentBundle("en_US") - -# Load translations -bundle.add_resource(""" -hello = Hello, World! -welcome = Welcome, { $name }! -emails = You have { $count -> - [one] one email - *[other] { $count } emails -}. -""") - -# Format messages -result, errors = bundle.format_pattern("hello") -# → "Hello, World!" - +bundle = FluentBundle("en_US", use_isolating=False) +bundle.add_resource("welcome = Hello, { $name }!") result, errors = bundle.format_pattern("welcome", {"name": "Alice"}) -# → "Welcome, Alice!" - -result, errors = bundle.format_pattern("emails", {"count": 5}) -# → "You have 5 emails." -``` - ---- - -### Multi-Locale Application (with fallback) - -```python -from ftllexengine import FluentLocalization - -# Create with fallback chain: Latvian → English -l10n = FluentLocalization(['lv', 'en']) - -# Add translations -l10n.add_resource('lv', """ -welcome = Laipni lūdzam, { $name }! -cart = Grozs -""") - -l10n.add_resource('en', """ -welcome = Welcome, { $name }! -cart = Cart -checkout = Checkout -""") - -# Format with automatic fallback -result, errors = l10n.format_value('welcome', {'name': 'Anna'}) -# → "Laipni lūdzam, Anna!" (from Latvian) - -result, errors = l10n.format_value('checkout') -# → "Checkout" (falls back to English) -``` - ---- - -### Loading from Files - -```python -from pathlib import Path -from ftllexengine import FluentBundle - -# Read .ftl file -ftl_source = Path("locales/en/main.ftl").read_text(encoding="utf-8") - -# Add to bundle -bundle = FluentBundle("en") -bundle.add_resource(ftl_source) - -result, errors = bundle.format_pattern("message-id") +assert errors == () +assert result == "Hello, Alice!" ``` ---- - -### Loading from Directory Structure +## Multi-Locale Fallback ```python from ftllexengine import FluentLocalization -from ftllexengine.localization import PathResourceLoader - -# Directory structure: -# locales/en/main.ftl -# locales/en/errors.ftl -# locales/lv/main.ftl - -loader = PathResourceLoader("locales/{locale}") -l10n = FluentLocalization(['lv', 'en'], ['main.ftl', 'errors.ftl'], loader) - -result, errors = l10n.format_value('welcome') -``` - ---- - -## Common Patterns - -### Error Handling (Production Pattern) - -```python -# ALWAYS check errors in production -result, errors = bundle.format_pattern("msg", {"var": value}) - -if errors: - for error in errors: - logger.warning(f"Translation error: {error}") - # error is FrozenFluentError; use error.category for classification - -print(result) # Always returns usable fallback -``` - -### Error Handling (Test Pattern) - -```python -# In tests/examples, use underscore to explicitly ignore errors -# (When errors are not relevant to what you're testing) -result, _ = bundle.format_pattern("msg", {"var": value}) -assert result == "Expected output" -``` - ---- - -### Accessing Attributes - -```python -# FTL with attributes -bundle.add_resource(""" -submit-button = Submit - .tooltip = Click to submit form - .aria-label = Submit button -""") - -# Access attribute -result, errors = bundle.format_pattern("submit-button", attribute="tooltip") -# → "Click to submit form" - -# Access value (default) -result, errors = bundle.format_pattern("submit-button") -# → "Submit" -``` - ---- - -### Validation Before Loading - -```python -from pathlib import Path - -bundle = FluentBundle("en") -ftl_source = Path("locale/main.ftl").read_text() - -# Validate before adding -result = bundle.validate_resource(ftl_source) - -if not result.is_valid: - print(f"Found {result.error_count} syntax errors:") - for error in result.errors: - location = f"line {error.line}" if error.line else "unknown" - print(f" - {location}: {error.message[:80]}") - sys.exit(1) - -if result.warning_count > 0: - print(f"Found {result.warning_count} warnings:") - for warning in result.warnings: - location = f"line {warning.line}" if warning.line else "unknown" - print(f" - {location}: {warning.message}") - -# Safe to add -bundle.add_resource(ftl_source) -``` - ---- - -### Custom Functions - -```python -# Define custom function (FILESIZE example) -def FILESIZE(bytes_count: int, *, precision: int = 2) -> str: - """Format file size in human-readable format.""" - size = float(bytes_count) - units = ["B", "KB", "MB", "GB", "TB"] - - for unit in units: - if size < 1024.0: - return f"{size:.{precision}f} {unit}" - size /= 1024.0 - return f"{size:.{precision}f} PB" - -# Register function -bundle = FluentBundle("en") -bundle.add_function("FILESIZE", FILESIZE) - -# Use in FTL -bundle.add_resource(""" -file-info = { $filename } ({ FILESIZE($bytes) }) -""") - -result, errors = bundle.format_pattern("file-info", {"filename": "video.mp4", "bytes": 157286400}) -# → "video.mp4 (150.00 MB)" -``` - -**Note**: For currency formatting, use the built-in `CURRENCY()` function instead of custom implementations. See Built-in Functions section below. - ---- - -### Locale-Aware Custom Functions (Factory Pattern) - -```python -def make_greeting_function(locale: str): - """Factory for locale-aware custom function.""" - def GREETING(name: str, *, formal: str = "false") -> str: - is_formal = formal.lower() == "true" - if locale.startswith("lv"): - return f"Labdien, {name}!" if is_formal else f"Sveiki, {name}!" - return f"Good day, {name}!" if is_formal else f"Hello, {name}!" - return GREETING - -bundle = FluentBundle("lv_LV") -bundle.add_function("GREETING", make_greeting_function(bundle.locale)) - -bundle.add_resource('msg = { GREETING($name, formal: "false") }') -result, errors = bundle.format_pattern("msg", {"name": "Anna"}) -# → "Sveiki, Anna!" -``` - ---- - -## Core API Reference - -### FluentBundle - -**Constructor**: -```python -FluentBundle( - locale: str, - /, - *, - use_isolating: bool = True, - cache: CacheConfig | None = None, - functions: FunctionRegistry | None = None, - max_source_size: int | None = None, - max_nesting_depth: int | None = None, - max_expansion_size: int | None = None, - strict: bool = True, -) -``` - -**Factory Methods**: -```python -# Auto-detect system locale (from LC_ALL, LC_MESSAGES, LANG) -bundle = FluentBundle.for_system_locale() - -# Context manager support (no-op: use for structured scoping only) -with FluentBundle("en", cache=CacheConfig()) as bundle: - bundle.add_resource("hello = Hello!") # Cache cleared immediately - result, _ = bundle.format_pattern("hello") # Cache populated -``` - -**Key Methods**: -```python -bundle.add_resource(ftl_source: str) -> tuple[Junk, ...] -bundle.format_pattern(message_id, args=None, *, attribute=None) -> tuple[str, tuple[FrozenFluentError, ...]] -bundle.validate_resource(ftl_source: str) -> ValidationResult -bundle.has_message(message_id: str) -> bool -bundle.has_attribute(message_id: str, attribute: str) -> bool -bundle.get_message_ids() -> list[str] -bundle.get_message_variables(message_id: str) -> frozenset[str] -bundle.get_all_message_variables() -> dict[str, frozenset[str]] -bundle.introspect_message(message_id: str) -> MessageIntrospection -bundle.introspect_term(term_id: str) -> MessageIntrospection -bundle.add_function(name: str, func: Callable) -> None -bundle.clear_cache() -> None -bundle.get_cache_stats() -> CacheStats | None -bundle.get_cache_audit_log() -> tuple[CacheAuditLogEntry, ...] | None -bundle.get_babel_locale() -> str -``` - -**Properties**: -```python -bundle.locale -> LocaleCode # Read-only canonical lowercase underscore locale -bundle.use_isolating -> bool # Read-only -bundle.cache_config -> CacheConfig | None # Read-only; None when caching disabled -bundle.cache_enabled -> bool # Read-only -bundle.cache_usage -> int # Read-only -bundle.max_source_size -> int # Read-only -bundle.max_nesting_depth -> int # Read-only -bundle.max_expansion_size -> int # Read-only -bundle.strict -> bool # Read-only -bundle.function_registry -> FunctionRegistry # Read-only -``` - ---- - -### FluentLocalization - -**Constructor**: -```python -FluentLocalization( - locales: Iterable[str], - resource_ids: Iterable[str] | None = None, - resource_loader: ResourceLoader | None = None, - *, - use_isolating: bool = True, - cache: CacheConfig | None = None, - on_fallback: Callable[[FallbackInfo], None] | None = None, - strict: bool = True, -) -``` - -**Key Methods**: -```python -l10n.add_resource(locale: str, ftl_source: str) -> tuple[Junk, ...] -l10n.format_pattern(message_id, args=None, *, attribute=None) -> tuple[str, tuple[FrozenFluentError, ...]] -l10n.format_value(message_id, args=None) -> tuple[str, tuple[FrozenFluentError, ...]] -l10n.validate_resource(ftl_source: str) -> ValidationResult -l10n.has_message(message_id: str) -> bool -l10n.has_attribute(message_id: str, attribute: str) -> bool -l10n.get_message_ids() -> list[str] -l10n.get_message_variables(message_id: str) -> frozenset[str] -l10n.get_all_message_variables() -> dict[str, frozenset[str]] -l10n.introspect_message(message_id: str) -> MessageIntrospection -l10n.introspect_term(term_id: str) -> MessageIntrospection | None -l10n.add_function(name: str, func: Callable) -> None -l10n.clear_cache() -> None -l10n.get_cache_stats() -> LocalizationCacheStats | None -l10n.get_cache_audit_log() -> dict[str, tuple[CacheAuditLogEntry, ...]] | None -l10n.get_load_summary() -> LoadSummary -l10n.require_clean() -> LoadSummary -l10n.validate_message_variables(message_id: str, expected_variables: frozenset[str] | set[str]) -> MessageVariableValidationResult -l10n.validate_message_schemas(expected_schemas: Mapping[str, frozenset[str] | set[str]]) -> tuple[MessageVariableValidationResult, ...] -l10n.get_bundles() -> Generator[FluentBundle] -l10n.get_babel_locale() -> str -``` - -**Properties**: -```python -l10n.locales -> tuple[str, ...] # Read-only -l10n.strict -> bool # Read-only -l10n.cache_config -> CacheConfig | None # Read-only -l10n.cache_enabled -> bool # Read-only -``` - -**Caching**: Pass `cache=CacheConfig()` for 50x speedup on repeated format calls. - -**Boot Validation**: -```python -l10n.require_clean() -l10n.validate_message_variables("invoice-total", frozenset({"amount", "customer"})) -l10n.validate_message_schemas({ - "invoice-total": frozenset({"amount", "customer"}), -}) -``` - ---- - -## FTL Syntax Quick Reference - -### Messages - -```ftl -# Simple message -hello = Hello, World! - -# With variable -welcome = Welcome, { $name }! - -# Multi-line -description = This is a long message - that spans multiple lines. -``` - -### Attributes - -```ftl -login-button = Login - .tooltip = Click to log in - .aria-label = Login button -``` - -### Select Expressions (Plurals) - -```ftl -emails = You have { $count -> - [one] one email - *[other] { $count } emails -}. -``` - -### Select Expressions (Gender/Custom) - -```ftl -greeting = { $gender -> - [male] Mr. { $name } - [female] Ms. { $name } - *[other] { $name } -} -``` - -### Terms (Reusable) - -```ftl --brand-name = Acme Corp --product-name = Super Widget - -welcome = Welcome to { -brand-name }! -about = About { -product-name } -``` - -### Functions - -```ftl -# Built-in NUMBER function -quantity = { NUMBER($amount, minimumFractionDigits: 2) } - -# Built-in DATETIME function -date = { DATETIME($timestamp, dateStyle: "short") } -# Built-in CURRENCY function -price = { CURRENCY($amount, currency: "EUR") } - -# Custom function -file-size = { FILESIZE($bytes) } -``` - ---- - -## Built-in Functions - -### NUMBER(value, options) - -**Options**: -- `minimumFractionDigits` (int): Minimum decimal places (default: 0) -- `maximumFractionDigits` (int): Maximum decimal places (default: 3) -- `useGrouping` (bool): Use thousand separators (default: true) -- `pattern` (string): Custom number pattern (overrides other options) -**Examples**: -```ftl -price = { NUMBER($amount, minimumFractionDigits: 2) } -percent = { NUMBER($value, maximumFractionDigits: 0) }% -accounting = { NUMBER($amount, pattern: "#,##0.00;(#,##0.00)") } -``` - -### DATETIME(value, options) - -**Options**: -- `dateStyle`: "short" | "medium" | "long" | "full" (default: "medium") -- `timeStyle`: "short" | "medium" | "long" | "full" | null (default: null) -- `pattern` (string): Custom datetime pattern (overrides style options) -**Examples**: -```ftl -short-date = { DATETIME($timestamp, dateStyle: "short") } -full-datetime = { DATETIME($timestamp, dateStyle: "long", timeStyle: "short") } -iso-date = { DATETIME($timestamp, pattern: "yyyy-MM-dd") } +l10n = FluentLocalization(["lv_LV", "en_US"], strict=False) +l10n.add_resource("en_US", "checkout = Checkout") +l10n.add_resource("lv_LV", "checkout = Apmaksa") +result, errors = l10n.format_value("checkout") +assert errors == () +assert result == "Apmaksa" ``` -### CURRENCY(value, options) - -**Returns**: `FluentNumber` (usable as selector in plural/select expressions). - -**Options**: -- `currency` (string, **required**): ISO 4217 currency code (e.g., "USD", "EUR", "JPY") -- `currencyDisplay`: "symbol" | "code" | "name" (default: "symbol") - -**Examples**: -```ftl -# Symbol display (default) -price = { CURRENCY($amount, currency: "USD") } -# en_US → "$1,234.56" -# lv_LV → "1\xa0234,56\xa0$" (CLDR uses NBSP U+00A0) - -# Code display -price-code = { CURRENCY($amount, currency: "EUR", currencyDisplay: "code") } -# en_US → "EUR1,234.56" - -# Name display -price-name = { CURRENCY($amount, currency: "EUR", currencyDisplay: "name") } -# → "1,234.56 euros" -``` - -**CLDR Compliance**: -- Currency-specific decimals: JPY (0), BHD/KWD/OMR (3), most others (2) -- Locale-specific symbol placement: en_US (before), lv_LV/de_DE (after with space) -- Uses Babel for CLDR-compliant formatting - ---- - -## Manual FluentNumber Construction +## Parse Localized Input ```python from decimal import Decimal +from ftllexengine.parsing import parse_currency, parse_decimal -from ftllexengine import FluentNumber -from ftllexengine.parsing import parse_fluent_number -from ftllexengine.runtime import make_fluent_number - -raw_amount = make_fluent_number(Decimal("12.3400")) -rendered_amount = make_fluent_number(42, formatted="42.00") -localized_amount = make_fluent_number(Decimal("1234.50"), formatted="1 234,50 EUR") -manual_amount = FluentNumber(value=Decimal("5.00"), formatted="5.00", precision=2) -parsed_amount, errors = parse_fluent_number("1 234,50", "lv_LV") -``` - ---- - -## Parsing API - -**Bi-directional localization**: Parse locale-formatted strings back to Python types. - -```python -from ftllexengine.parsing import parse_decimal, parse_fluent_number, parse_date, parse_currency -from ftllexengine.parsing import is_valid_decimal, is_valid_date, is_valid_currency - -# Parse numbers (guards accept None) -result, errors = parse_decimal("1 234,56", "lv_LV") -if is_valid_decimal(result): - amount = result # Decimal('1234.56') - -# Parse directly to FluentNumber -result, errors = parse_fluent_number("1 234,56", "lv_LV") -if not errors and result is not None: - amount_for_ftl = result # FluentNumber(value=Decimal('1234.56'), formatted='1 234,56', precision=2) - -# Parse dates -result, errors = parse_date("28.01.2025", "lv_LV") -if is_valid_date(result): - date_value = result # date(2025, 1, 28) - -# Parse currency -result, errors = parse_currency("1 234,56 €", "lv_LV") -if is_valid_currency(result): - amount, currency = result # (Decimal('1234.56'), 'EUR') - -# Note: Yen sign (¥) is ambiguous -# Resolves to CNY for zh_* locales, JPY otherwise -result, errors = parse_currency("¥1,234", "ja_JP") # JPY -result, errors = parse_currency("¥1,234", "zh_CN") # CNY - -# Note: Pound sign (£) is ambiguous -# Resolves to EGP for ar_* locales, GBP otherwise -result, errors = parse_currency("£100", "en_GB", infer_from_locale=True) # GBP -result, errors = parse_currency("£100", "ar_EG", infer_from_locale=True) # EGP -``` - -**Key Functions**: -- `parse_decimal(value, locale)` → `tuple[Decimal | None, tuple[FrozenFluentError, ...]]` -- `parse_fluent_number(value, locale)` → `tuple[FluentNumber | None, tuple[FrozenFluentError, ...]]` -- `parse_date(value, locale)` → `tuple[date | None, tuple[FrozenFluentError, ...]]` -- `parse_datetime(value, locale, tzinfo=None)` → `tuple[datetime | None, tuple[FrozenFluentError, ...]]` -- `parse_currency(value, locale)` → `tuple[tuple[Decimal, str] | None, tuple[FrozenFluentError, ...]]` - -**Implementation**: Uses Babel for number parsing, Python 3.13 stdlib (`strptime`, `fromisoformat`) with Babel CLDR patterns for date parsing. - -**Babel Required**: All parsing functions raise `BabelImportError` if Babel is not installed. Install with `pip install ftllexengine[babel]`. - -**See**: [PARSING_GUIDE.md](PARSING_GUIDE.md) for complete guide with best practices and examples. - ---- - -## Introspection - -### Get Message Variables - -```python -bundle.add_resource("welcome = Hello, { $firstName } { $lastName }!") - -variables = bundle.get_message_variables("welcome") -print(variables) # frozenset({'firstName', 'lastName'}) -``` - -### Batch Variable Extraction - -```python -bundle.add_resource(""" -greeting = Hello, { $name }! -farewell = Goodbye, { $firstName } { $lastName }! -simple = No variables -""") +amount, errors = parse_decimal("12,450.50", "en_US") +assert errors == () +assert amount == Decimal("12450.50") -all_vars = bundle.get_all_message_variables() -print(all_vars["greeting"]) # frozenset({'name'}) -print(all_vars["farewell"]) # frozenset({'firstName', 'lastName'}) -print(all_vars["simple"]) # frozenset() +money, errors = parse_currency("12.450,50 EUR", "de_DE", default_currency="EUR") +assert errors == () +assert money == (Decimal("12450.50"), "EUR") ``` -### Full Introspection +## Validate FTL Before Loading ```python -bundle.add_resource(""" -msg = Hello, { $name }! You have { NUMBER($count) } items. -""") - -info = bundle.introspect_message("msg") - -print(info.get_variable_names()) -# → frozenset({'name', 'count'}) - -print(info.get_function_names()) -# → frozenset({'NUMBER'}) -``` - -### Function Introspection - -```python -# Access the function registry (read-only property) -registry = bundle.function_registry - -# List all available functions -functions = registry.list_functions() -print(functions) # ["NUMBER", "DATETIME", "CURRENCY"] - -# Check if function exists -if "CURRENCY" in registry: - print("CURRENCY available") - -# Get function metadata -info = registry.get_function_info("NUMBER") -print(f"Python name: {info.python_name}") -print(f"Parameters: {info.param_mapping}") +from ftllexengine import validate_resource -# Iterate over all functions -for func_name in registry: - info = registry.get_function_info(func_name) - print(f"{func_name}: {info.python_name}") +result = validate_resource("welcome = Hello, { $name }!") +assert result.is_valid +assert result.error_count == 0 ``` ---- - -## Type Annotations +## Boot Validation ```python -from ftllexengine import FluentBundle, FluentValue -from ftllexengine.localization import MessageId, LocaleCode, FTLSource - -def format_message( - bundle: FluentBundle, - msg_id: MessageId, - args: dict[str, FluentValue] | None = None, -) -> str: - """Format message with error logging.""" - result, errors = bundle.format_pattern(msg_id, args) - if errors: - for error in errors: - logger.warning(f"Translation error: {error}") - return result - -def create_bundle(locale: LocaleCode, ftl_source: FTLSource) -> FluentBundle: - """Create and populate bundle.""" - bundle = FluentBundle(locale) - bundle.add_resource(ftl_source) - return bundle -``` - -**`FluentValue`**: Type-hint for resolver arguments. Union of `str | int | Decimal | datetime | date | FluentNumber | None | Sequence[FluentValue] | Mapping[str, FluentValue]`. (`bool` is absent: it is an `int` subtype accepted by raw interpolation but rejected by numeric formatting functions; convert explicitly with `int(flag)` or `str(flag)`.) - -**`ParseResult[T]`**: Type-hint for parsing function returns. Alias for `tuple[T | None, tuple[FrozenFluentError, ...]]`. Import from `ftllexengine` or `ftllexengine.parsing`. - ---- - -## Thread Safety - -**FluentBundle and FluentLocalization are always thread-safe**. All public methods are synchronized via internal RWLock (readers-writer lock). Multiple concurrent read operations execute in parallel; write operations acquire exclusive access. +from pathlib import Path +from tempfile import TemporaryDirectory +from ftllexengine import LocalizationBootConfig -### Pattern 1: Shared Bundle (Recommended) +with TemporaryDirectory() as tmp: + base = Path(tmp) / "locales" / "en_us" + base.mkdir(parents=True) + (base / "main.ftl").write_text("welcome = Hello, { $name }!\n", encoding="utf-8") -```python -# Create bundle once, share across threads -bundle = FluentBundle("en_US") -bundle.add_resource(ftl_source) - -# All operations are thread-safe (reads AND writes) -bundle.add_resource(more_ftl) # Thread-safe -bundle.add_function("CUSTOM", my_function) # Thread-safe -result, errors = bundle.format_pattern("msg") # Thread-safe + cfg = LocalizationBootConfig.from_path( + locales=("en_US",), + resource_ids=("main.ftl",), + base_path=Path(tmp) / "locales" / "{locale}", + message_schemas={"welcome": {"name"}}, + required_messages=frozenset({"welcome"}), + ) + l10n, summary, schema_results = cfg.boot() + assert summary.all_clean + assert schema_results[0].is_valid ``` -### Pattern 2: Task-Local Bundles (Per-Task Customization) +## Register A Custom Function ```python -from contextvars import ContextVar +from ftllexengine import FluentBundle -_bundle_var: ContextVar[FluentBundle | None] = ContextVar("_bundle_var", default=None) +def UPPER(value: str) -> str: + return value.upper() -def get_bundle() -> FluentBundle: - bundle = _bundle_var.get() - if bundle is None: - bundle = FluentBundle("en_US") - bundle.add_resource(ftl_source) - _bundle_var.set(bundle) - return bundle +bundle = FluentBundle("en_US", use_isolating=False) +bundle.add_function("UPPER", UPPER) +bundle.add_resource("headline = { UPPER($text) }") +result, errors = bundle.format_pattern("headline", {"text": "coffee"}) +assert errors == () +assert result == "COFFEE" ``` -`ContextVar` provides automatic isolation per thread and per async task, with no dynamic attribute access. - ---- - -## Cache Management - -Clear module-level caches for testing, hot-reload, or memory management. +## Clear Module Caches ```python from ftllexengine import clear_module_caches -# Clear all library caches in one call clear_module_caches() +clear_module_caches(frozenset({"parsing.dates", "locale"})) ``` - -**Individual Cache Clear Functions**: -```python -from ftllexengine.core.locale_utils import clear_locale_cache -from ftllexengine.core.locale_utils import require_locale_code -from ftllexengine.parsing import clear_date_caches, clear_currency_caches -from ftllexengine.introspection import clear_introspection_cache, clear_iso_cache -from ftllexengine.runtime.locale_context import LocaleContext - -# Clear specific caches -clear_locale_cache() # Babel locale objects -locale_code = require_locale_code(" en-US ", "user.locale") -clear_date_caches() # Date/datetime patterns -clear_currency_caches() # Currency maps and patterns -clear_introspection_cache() # Message introspection results -clear_iso_cache() # ISO territory/currency data -LocaleContext.clear_cache() # Locale context instances -``` - ---- - -## Common Checks - -### Check if Message Exists - -```python -if bundle.has_message("premium-feature"): - result, _ = bundle.format_pattern("premium-feature") -else: - print("Feature not available") -``` - -### List All Messages - -```python -message_ids = bundle.get_message_ids() -print(f"Loaded {len(message_ids)} messages") -for msg_id in sorted(message_ids): - print(f" - {msg_id}") -``` - -### Check Required Variables - -```python -required = bundle.get_message_variables("welcome") -provided = {"firstName": "John", "lastName": "Doe"} - -missing = required - set(provided.keys()) -if missing: - print(f"Missing variables: {missing}") -``` - ---- - -## Important Warnings - -### RTL Languages Require use_isolating=True - -```python -# WRONG - Breaks Arabic/Hebrew -bundle = FluentBundle("ar_EG", use_isolating=False) - -# CORRECT - Default is safe -bundle = FluentBundle("ar_EG") # use_isolating=True by default -``` - -**Rule**: Only use `use_isolating=False` for: -- Documentation examples (cleaner output) -- Unit tests (exact assertions) -- LTR-only applications (verifiable constraint) - -### Errors Raise in Strict Mode (Default) - -```python -# In strict mode (default), format_pattern() raises FormattingIntegrityError on ANY error -# In non-strict mode (strict=False), format_pattern() returns (result, errors) tuple -bundle = FluentBundle("en", strict=False) # opt in to soft-error recovery -result, errors = bundle.format_pattern("missing-message") -# result → "{missing-message}" # Readable fallback -# errors → (FrozenFluentError(...),) # category=ErrorCategory.REFERENCE - -# Always check errors in non-strict production code -if errors: - logger.warning(f"Translation errors: {errors}") -``` - ---- - -## Exception Types - -```python -from ftllexengine import ( - FrozenFluentError, # Immutable error (returned in errors tuple) - ErrorCategory, # Error classification: REFERENCE, RESOLUTION, CYCLIC, PARSE, FORMATTING -) - -from ftllexengine.core.babel_compat import BabelImportError # Raised when Babel not installed -``` - -**Note**: `FrozenFluentError` instances are returned in the errors tuple, NOT raised. Use `error.category` to classify errors (e.g., `ErrorCategory.REFERENCE` for missing messages). `BabelImportError` is raised when parsing functions are called without Babel installed. - ---- - -## AST Manipulation (Advanced) - -### Parse and Serialize - -```python -from ftllexengine import parse_ftl, serialize_ftl -from ftllexengine.syntax.ast import Message - -# Parse FTL to AST -resource = parse_ftl(ftl_source) - -# Inspect AST -for entry in resource.entries: - if isinstance(entry, Message): - print(f"Message: {entry.id.name}") - -# Serialize back to FTL -ftl_output = serialize_ftl(resource) - -# Validate AST before serialization -from ftllexengine.syntax import SerializationValidationError - -try: - ftl_output = serialize_ftl(resource, validate=True) -except SerializationValidationError as e: - print(f"Invalid AST: {e}") - -# Depth guard prevents stack overflow from malicious ASTs -from ftllexengine.syntax import SerializationDepthError - -try: - ftl_output = serialize_ftl(resource, max_depth=100) # default -except SerializationDepthError as e: - print(f"AST too deep: {e}") -``` - -### Visitor Pattern - -```python -from ftllexengine import parse_ftl -from ftllexengine.syntax.ast import Message -from ftllexengine.syntax.visitor import ASTVisitor - -class MessageCollector(ASTVisitor): - def __init__(self): - super().__init__() - self.messages = [] - - def visit_Message(self, node: Message): - self.messages.append(node.id.name) - return self.generic_visit(node) - -resource = parse_ftl(ftl_source) -collector = MessageCollector() -collector.visit(resource) -print(f"Found messages: {collector.messages}") -``` - ---- - -## Supported Locales - -**CLDR plural rules via Babel**: 200+ locales with full Unicode CLDR compliance. Includes all major languages (English, Spanish, French, German, Chinese, Japanese, Arabic, Russian, etc.) plus regional variants. Unsupported locales fall back to English-style one/other rules. - ---- - -## Getting Help - -- **Full API Documentation**: [DOC_00_Index.md](DOC_00_Index.md) -- **Examples**: [examples/](../examples/) -- **Contributing**: [CONTRIBUTING.md](../CONTRIBUTING.md) -- **Issues**: https://github.com/resoltico/ftllexengine/issues - ---- - -## Version Info - -```python -from ftllexengine import ( - __version__, # Package version - __fluent_spec_version__, # FTL spec version (1.0) - __spec_url__, # Spec URL - __recommended_encoding__, # UTF-8 -) - -print(f"FTLLexEngine {__version__}") -print(f"Fluent Specification {__fluent_spec_version__}") -``` - ---- - -**Python Requirement**: 3.13+ diff --git a/docs/RELEASE_PROTOCOL.md b/docs/RELEASE_PROTOCOL.md new file mode 100644 index 00000000..878cc83f --- /dev/null +++ b/docs/RELEASE_PROTOCOL.md @@ -0,0 +1,308 @@ +--- +afad: "3.5" +version: "0.163.0" +domain: RELEASE +updated: "2026-04-22" +route: + keywords: [release, gh, github release, pypi, tag, assets, publish, verify, worktree, main] + questions: ["how do I cut a release?", "how do I publish GitHub assets?", "how do I verify a release handoff?", "how do I rerun publish for an existing tag?"] +--- + +# Release Protocol + +**Purpose**: Publish a tagged FTLLexEngine release through GitHub CLI and verify the GitHub Release and PyPI handoff. +**Prerequisites**: `gh` installed and authenticated, release version already set in `pyproject.toml`, and a checkout topology that can produce a clean release payload. + +## Overview + +The release flow is `gh`-first and branch-based. Do not push release commits directly to `main`. +Use a release branch, open a PR, merge it, verify the merged `main` commit is green, tag that +commit, and then verify the GitHub Release and published artifacts directly. + +## Step 0: Verify GitHub CLI Readiness + +Before doing anything else, run: + +```bash +gh --version +gh auth status +``` + +If either command fails, stop immediately. Do not continue with release work until `gh` is both +installed and authenticated for the target repository. + +## Step 1: Choose The Authoritative Checkout + +Before any release build or branch creation, inspect the checkout that the user will keep using +after the release. Call it the primary checkout. + +```bash +git rev-parse --show-toplevel +git branch --show-current +git status --short +git fetch origin --prune +git fetch origin --tags +git rev-list --left-right --count HEAD...origin/main +``` + +Rules: + +- If the primary checkout is clean and current enough for release work, release from it directly. +- If the primary checkout is intentionally dirty, contains unrelated unpublished work, or should + not be disturbed, create a clean release worktree from the same repository and do release work + there. +- Do not run the release from a dirty checkout just because the intended payload currently lives + there. +- If `git fetch origin --tags` fails with `would clobber existing tag`, stop and inspect the tag + divergence before continuing. Compare the local and remote tag directly, delete only the stale + local tag, and rerun the tag fetch: + +```bash +TAG=v0.36.0 +git rev-parse "$TAG" +git ls-remote --tags origin "refs/tags/$TAG" "refs/tags/$TAG^{}" +git tag -d "$TAG" +git fetch origin --tags +``` + +Recommended clean-worktree flow: + +```bash +PRIMARY_CHECKOUT="$(git rev-parse --show-toplevel)" +git fetch origin --prune +git fetch origin --tags +RELEASE_WORKTREE="$(mktemp -d -t ftllexengine-release-XXXXXX)" +git worktree add -b release/X.Y.Z "$RELEASE_WORKTREE" origin/main +cd "$RELEASE_WORKTREE" +``` + +If the unpublished release payload exists only in the dirty primary checkout, move it explicitly +before running release gates in the clean worktree. Preferred: create a local bootstrap branch that +captures the payload, then add the release worktree from that branch. Acceptable: export one +explicit patch and apply it inside the release worktree. + +Bootstrap-branch example: + +```bash +git switch -c codex/release-bootstrap-X.Y.Z +git add -A +git commit -m "release: bootstrap X.Y.Z payload" +RELEASE_WORKTREE="$(mktemp -d -t ftllexengine-release-XXXXXX)" +git worktree add -b release/X.Y.Z "$RELEASE_WORKTREE" codex/release-bootstrap-X.Y.Z +cd "$RELEASE_WORKTREE" +``` + +## Step 2: Pre-flight And Release Readiness + +Run the local gates first: + +```bash +gh pr list --state open \ + --json number,title,url,headRefName,mergeStateStatus,isDraft,author,statusCheckRollup +bash -n scripts/*.sh +./check.sh +PY_VERSION=3.14 ./scripts/lint.sh +PY_VERSION=3.14 ./scripts/test.sh +uv run python scripts/validate_docs.py +uv run python scripts/validate_version.py +uv build +``` + +Also confirm: + +- `CHANGELOG.md` contains the target release entry. +- `pyproject.toml` has the final target version. +- the release checkout is based on current `origin/main` or you explicitly understand the delta. + +Do not cut the release branch or tag anything while any gate is red. + +## Step 3: Release Branch And Staging Checkpoint + +Create the release branch and treat staging as a scope-verification checkpoint: + +```bash +git checkout -b release/X.Y.Z +git add +git status --short +git diff --cached --name-status +git diff --cached --stat +git commit -m "release: bump version to X.Y.Z" +git push origin release/X.Y.Z +``` + +Requirements before continuing: + +- `git status --short` shows no intended release file left unstaged or untracked. +- `git diff --cached --name-status` matches the expected file set. +- `git diff --cached --stat` confirms the staged payload is the release you intend to ship. + +If the staged diff is incomplete or polluted, fix the branch before committing. + +## Step 4: Pull Request And CI Checkpoint + +Open the pull request: + +```bash +gh pr create \ + --title "release: bump version to X.Y.Z" \ + --base main \ + --head release/X.Y.Z \ + --body "Release X.Y.Z" +``` + +Then verify scope and wait for checks: + +```bash +gh pr diff --name-only +gh pr view --json number,state,mergeStateStatus,statusCheckRollup,url +gh pr checks +``` + +Rules: + +- `gh pr diff --name-only` must still match the intended release file set. +- If `gh pr diff --name-only` fails with HTTP 406 because the PR diff is too large, fall back + to GitHub's paginated file list API and the local branch comparison: + +```bash +REPO="$(gh repo view --json nameWithOwner -q .nameWithOwner)" +gh api "repos/$REPO/pulls//files" --paginate --jq '.[].filename' +git diff --name-only origin/main...HEAD +``` + +- If you push another commit, reopen both the staging checkpoint and this PR diff checkpoint. +- Do not continue until the required PR checks are green. + +## Step 5: Merge, Verify `main`, And Handle Partial Merge Failures + +Merge the PR through GitHub, then verify the merged `main` commit itself before tagging: + +```bash +REPO="$(gh repo view --json nameWithOwner -q .nameWithOwner)" +gh pr merge --repo "$REPO" --merge --delete-branch \ + --subject "release: bump version to X.Y.Z (#)" +``` + +If `gh pr merge` exits non-zero, do not assume the merge failed. Inspect the PR directly: + +```bash +gh pr view --repo "$REPO" --json number,state,mergedAt,headRefName,baseRefName,url +``` + +If GitHub already reports `state` as `MERGED` and `mergedAt` is populated, treat that merged +state as authoritative and continue with the post-merge checks instead of retrying blindly. + +Then fetch and verify the merged `main` handoff: + +```bash +git fetch origin --prune +git fetch origin --tags +git switch --detach origin/main +MAIN_SHA="$(git rev-parse HEAD)" +gh run list --workflow=test.yml --branch=main --commit "$MAIN_SHA" --limit=20 +gh run view --log-failed +``` + +Do not create the tag until the exact merged `main` commit you intend to tag has a successful +`test.yml` run. + +## Step 6: Tag, Publish Workflow, And Asset Convergence + +Create and push the version tag only after Step 5 is green: + +```bash +git tag vX.Y.Z +git push origin vX.Y.Z +``` + +Verify the remote tag exists: + +```bash +REPO="$(gh repo view --json nameWithOwner -q .nameWithOwner)" +gh api "repos/$REPO/git/ref/tags/vX.Y.Z" +``` + +The tag push triggers `.github/workflows/publish.yml`. Monitor it directly: + +```bash +TAG_SHA="$(git rev-list -n 1 vX.Y.Z)" +gh run list --workflow=publish.yml --event=push --commit "$TAG_SHA" --limit=20 +gh run view --log-failed +``` + +If you need to rerun publication for the existing tag, rerun the workflow against that tag. Do not +move or recreate the tag: + +```bash +gh workflow run publish.yml -f release_tag=vX.Y.Z +gh workflow run publish.yml -f release_tag=vX.Y.Z -f publish_to_testpypi=true +``` + +If GitHub Release assets need manual convergence after the workflow, use: + +```bash +GH_TOKEN=... ./scripts/publish-github-release-assets.sh vX.Y.Z +GH_TOKEN=... ./scripts/verify-github-release.sh vX.Y.Z +``` + +## Step 7: Verify Public Release State + +Do not treat workflow success alone as authoritative. Inspect the published release object: + +```bash +gh release view vX.Y.Z --json tagName,isDraft,isPrerelease,publishedAt,url,assets +``` + +Required assets: + +- `ftllexengine-X.Y.Z.tar.gz` +- `ftllexengine-X.Y.Z-py3-none-any.whl` +- `ftllexengine-X.Y.Z.sha256` + +Then verify download, checksum, and installability: + +```bash +TMP_DIR="$(mktemp -d)" +gh release download vX.Y.Z \ + -p 'ftllexengine-X.Y.Z-py3-none-any.whl' \ + -p 'ftllexengine-X.Y.Z.tar.gz' \ + -p 'ftllexengine-X.Y.Z.sha256' \ + -D "$TMP_DIR" + +( + cd "$TMP_DIR" + shasum -a 256 -c "ftllexengine-X.Y.Z.sha256" +) + +python3.13 -m venv "$TMP_DIR/py313" +"$TMP_DIR/py313/bin/pip" install --no-cache-dir "ftllexengine==X.Y.Z" +"$TMP_DIR/py313/bin/python" -c "import ftllexengine as pkg; print(pkg.__version__)" +rm -rf "$TMP_DIR" +``` + +The release is not complete until the release object, assets, and real install test all succeed. + +## Step 8: Branch And Checkout Hygiene + +Clean up the release branch topology and reconcile the primary checkout: + +```bash +git remote prune origin +gh api "repos/$REPO/branches" --paginate --jq '.[].name' +``` + +Requirements: + +- The remote `release/X.Y.Z` branch is gone. +- No stale historical `release/` branches remain locally or remotely. +- If a dedicated release worktree was used, the primary checkout is explicitly returned to a + truthful `main`: + +```bash +git -C "$PRIMARY_CHECKOUT" switch main +git -C "$PRIMARY_CHECKOUT" pull --ff-only +``` + +- Any still-needed unpublished local work from the old primary checkout is moved to a named branch + or exported patch. +- Disposable release worktrees are removed after the release closes. diff --git a/docs/TERMINOLOGY.md b/docs/TERMINOLOGY.md index 7829e44f..6bea954e 100644 --- a/docs/TERMINOLOGY.md +++ b/docs/TERMINOLOGY.md @@ -1,443 +1,41 @@ --- -afad: "3.3" -version: "0.153.0" -domain: terminology -updated: "2026-03-13" +afad: "3.5" +version: "0.163.0" +domain: TERMINOLOGY +updated: "2026-04-22" route: - keywords: [terminology, definitions, glossary, fluent terms, message, term, pattern, placeable, resource] - questions: ["what is a message?", "what is a term?", "what is a pattern?", "fluent terminology?"] + keywords: [terminology, glossary, message, term, resource, locale code, strict mode] + questions: ["what does resource mean here?", "what is the difference between a message and a term?", "what does strict mode mean in FTLLexEngine?"] --- -# FTLLexEngine Terminology Guide +# Terminology -**Purpose**: Standard terminology reference for FTLLexEngine. +**Purpose**: Keep the project’s documentation and code comments aligned on a small set of terms. **Prerequisites**: None. -This document establishes the standard terminology used throughout FTLLexEngine's codebase, documentation, and communication. Consistent terminology improves clarity and reduces confusion. +## Core Terms ---- - -## Core Terminology - -### Fluent System Terms - -| Term | Definition | Usage Example | -|------|------------|---------------| -| **Fluent** | The localization system and specification | "Fluent supports asymmetric localization" | -| **FTL** | The file format (Fluent Translation List) | "Save translations in .ftl files" | -| **.ftl files** | Files using the FTL format | "Load main.ftl and errors.ftl" | -| **Fluent syntax** | The language syntax used in .ftl files | "Learn Fluent syntax at projectfluent.org" | -| **FTL specification** | The formal grammar and rules (v1.0) | "Implements FTL specification v1.0" | - -**Rationale**: "Fluent" is the system, "FTL" is the file format, "Fluent syntax" is the language. - ---- - -### Message Structure Terms - -| Term | Definition | Code Example | Prose Example | -|------|------------|--------------|---------------| -| **Message** | A translatable unit with an ID | `Message` class | "The welcome message" | -| **Message ID** | The identifier for a message | `message_id` (snake_case) | "message ID" (two words) | -| **Message identifier** | Formal variant of message ID | `message_id: str` | "message identifier" | -| **Term** | Reusable translation (prefixed with `-`) | `Term` class | "The -brand term" | -| **Term ID** | The identifier for a term | `term_id` (snake_case) | "term ID" | -| **Pattern** | The text content of a message/term | `Pattern` class | "The message pattern" | -| **Placeable** | An expression wrapped in `{ }` braces | `Placeable` class | "A variable placeable" | -| **Attribute** | Named sub-value of a message | `Attribute` class | "The tooltip attribute" | - -**Naming Conventions**: -- **Code**: Use snake_case (`message_id`, `term_id`) -- **Prose**: Use two words ("message ID", "term ID") -- **Classes**: Use PascalCase (`Message`, `Term`, `Pattern`) - ---- - -## CRITICAL: "Resource" Disambiguation - -**WARNING**: The term "resource" has **three distinct meanings** in FTLLexEngine. **ALWAYS specify which meaning** when using this term. - -### The Three Meanings of "Resource" - -#### 1. FTL Resource (AST) - -**What it is**: The parsed Abstract Syntax Tree (AST) root node returned by `parse_ftl()`. - -**Type**: `Resource` class from `ftllexengine.syntax.ast` - -**Usage Context**: AST manipulation, linting, transformation, serialization - -**How to Reference**: -- [OK] **"the Resource AST node"** -- [OK] **"the parsed Resource"** -- [OK] **"Resource object"** -- [OK] **"AST Resource"** -- [AVOID] ~~"resource"~~ (ambiguous) - -**Code Example**: -```python -from ftllexengine import parse_ftl -from ftllexengine.syntax.ast import Resource - -# Correct: Clear context -resource_ast: Resource = parse_ftl(ftl_source) # Resource AST node -for entry in resource_ast.entries: - print(entry) - -# Ambiguous: What type of resource? -resource = parse_ftl(ftl_source) # Is this AST, source, or loader? -``` - -**Prose Example**: -```markdown -[OK] "The Resource AST node contains all parsed entries" -[OK] "parse_ftl() returns a Resource object representing the AST" -[AVOID] "The resource contains all entries" (which resource?) -``` - ---- - -#### 2. FTL Source (String) - -**What it is**: The string content containing FTL syntax, passed to `add_resource(source: str)`. - -**Type**: `str` (or `FTLSource` type alias) - -**Usage Context**: Loading translations at runtime, validation, file I/O - -**How to Reference**: -- [OK] **"FTL source"** -- [OK] **"FTL source text"** -- [OK] **"FTL source code"** -- [OK] **"ftl_source"** (variable name) -- [AVOID] ~~"resource"~~ (ambiguous) - -**Code Example**: -```python -from ftllexengine import FluentBundle -from ftllexengine.localization import FTLSource - -# Correct: Clear naming -ftl_source: FTLSource = """ -hello = Hello, World! -""" -bundle.add_resource(ftl_source) # String parameter - -# Ambiguous: What type of resource? -resource = "hello = Hello!" # Is this a source string or AST? -bundle.add_resource(resource) -``` - -**Prose Example**: -```markdown -[OK] "Pass FTL source to add_resource()" -[OK] "The FTL source text is validated before loading" -[AVOID] "Pass the resource to add_resource()" (which resource?) -``` - ---- - -#### 3. Resource Loader - -**What it is**: System for loading .ftl files from disk/network. - -**Types**: `PathResourceLoader` (file system), `ResourceLoader` (protocol) - -**Usage Context**: Multi-locale applications with file-based translations - -**How to Reference**: -- [OK] **"resource loader"** -- [OK] **"PathResourceLoader instance"** -- [OK] **"ResourceLoader protocol"** -- [OK] **"loader"** (variable name) -- [AVOID] ~~"resource"~~ (ambiguous) - -**Code Example**: -```python -from ftllexengine.localization import PathResourceLoader, ResourceLoader - -# Correct: Clear naming -loader: ResourceLoader = PathResourceLoader("locales/{locale}") -ftl_source = loader.load("en", "main.ftl") - -# Ambiguous: What type of resource? -resource = PathResourceLoader("locales/{locale}") # This is a LOADER -``` - -**Prose Example**: -```markdown -[OK] "PathResourceLoader loads .ftl files from disk" -[OK] "Implement the ResourceLoader protocol for custom loaders" -[AVOID] "The resource loads .ftl files" (resource doesn't load files, loaders do!) -``` - ---- - -### Disambiguation Decision Tree - -When writing documentation or code, ask: - -``` -Am I talking about... - -├─ An AST object from parse_ftl()? -│ └─ Use: "Resource AST", "Resource object", variable: resource_ast -│ -├─ A string containing FTL syntax? -│ └─ Use: "FTL source", "FTL source text", variable: ftl_source -│ -└─ A system that loads .ftl files? - └─ Use: "resource loader", "PathResourceLoader", variable: loader -``` - ---- - -### Variable Naming Conventions - -**Recommended variable names** to avoid ambiguity: - -```python -# FTL Resource (AST) -resource_ast = parse_ftl(ftl_source) -ast_root = parse_ftl(ftl_source) -parsed_resource = parse_ftl(ftl_source) - -# FTL Source (String) -ftl_source = "hello = World" -ftl_content = Path("main.ftl").read_text() -source_text = "..." - -# Resource Loader -loader = PathResourceLoader("locales/{locale}") -resource_loader = PathResourceLoader("locales/{locale}") -disk_loader = PathResourceLoader("locales/{locale}") -``` - -**Avoid**: -```python -# [WRONG] Ambiguous - which type of resource? -resource = ... -res = ... -r = ... -``` +| Term | Meaning | +|:-----|:--------| +| Message | Public FTL entry such as `welcome = Hello` | +| Term | Private reusable FTL entry such as `-brand = FTLLexEngine` | +| FTL source | Raw `.ftl` text before parsing | +| FTL resource | Parsed `Resource` AST | +| Resource loader | Object that returns FTL source for a locale/resource id pair | +| Locale code | Canonical locale identifier used by the runtime | +| Strict mode | Fail-fast behavior that raises integrity exceptions instead of returning soft fallbacks | +| Boot validation | Startup path that proves resource cleanliness and schema correctness before traffic | ---- - -### Method Naming Context - -Some methods use "resource" in their name - context determines meaning: - -| Method | "Resource" Meaning | Full Type | -|--------|-------------------|-----------| -| `add_resource(source)` | **FTL source (string)** | Parameter is `str` | -| `validate_resource(source)` | **FTL source (string)** | Parameter is `str` | -| `parse_ftl(source)` returns | **Resource AST** | Returns `Resource` object | -| `PathResourceLoader(...)` | **Resource loader** | Creates loader instance | -| `ResourceLoader.load(...)` | **FTL source (string)** | Returns `str` | - -**Note**: `add_resource()` and `validate_resource()` take **FTL source** (string), NOT Resource AST objects. - ---- - -## Other Important Terms - -### Locale Terms - -| Term | Definition | Examples | -|------|------------|----------| -| **Locale** | Language and regional variant | "en_US", "lv_LV", "ar_SA" | -| **Locale code** | String identifier for locale | `locale: str`, `LocaleCode` type alias | -| **Language code** | Two-letter ISO 639-1 code | "en", "lv", "ar" | -| **Territory code** | Two-letter ISO 3166-1 code | "US", "LV", "SA" | -| **CLDR** | Common Locale Data Repository | "CLDR plural rules" | - -**Formatting**: Use underscore (`en_US`) or hyphen (`en-US`) - both supported. - ---- - -### Error Handling Terms - -| Term | Definition | Usage | -|------|------------|-------| -| **Errors tuple** | Immutable tuple of FrozenFluentError instances | `errors: tuple[FrozenFluentError, ...]` | -| **Fallback** | Default value when error occurs | "Returns readable fallback" | -| **Graceful degradation** | Continues with fallback instead of crashing | "Never raises, always degrades gracefully" | -| **Junk entry** | Unparseable FTL syntax | `Junk` AST node type | - ---- - -### AST Terms - -| Term | Definition | Type | -|------|------------|------| -| **AST** | Abstract Syntax Tree | `Resource` root with entries | -| **Entry** | Top-level AST node | `Message`, `Term`, `Comment`, `Junk` | -| **Expression** | Evaluable AST node | `VariableReference`, `FunctionReference`, etc. | -| **Selector** | Select expression condition | Part of `SelectExpression` | -| **Variant** | Select expression branch | `Variant` with key and value | - ---- - -### Function Terms - -| Term | Definition | Examples | -|------|------------|----------| -| **Built-in function** | Provided by FTLLexEngine | NUMBER, DATETIME, CURRENCY | -| **Custom function** | User-defined function | FILESIZE, PHONE | -| **Function name** | UPPERCASE identifier | "NUMBER", "CURRENCY" | -| **Function parameter** | Named argument to function | minimumFractionDigits, currencyCode | -| **camelCase** | FTL parameter convention | minimumFractionDigits | -| **snake_case** | Python parameter convention | minimum_fraction_digits | - ---- - -## Writing Guidelines - -### Documentation Style - -1. **Be explicit about "resource" meaning**: - ```markdown - [AVOID] "Load the resource into the bundle" - [OK] "Load the FTL source into the bundle" - [OK] "Parse the Resource AST using parse_ftl()" - [OK] "Use the resource loader to fetch .ftl files" - ``` - -2. **Use consistent capitalization**: - ```markdown - [OK] "Fluent" (system), "FTL" (format), "Fluent syntax" (language) - [AVOID] "fluent", "ftl", "FTL syntax" - ``` - -3. **Prose vs Code formatting**: - ```markdown - [OK] "The message ID 'welcome' is used in `bundle.format_pattern()`" - [AVOID] "The `message ID` welcome is used in bundle.format_pattern()" - ``` +## Resource Disambiguation -### Code Style +“Resource” can mean different things in localization systems. In this repository, prefer explicit phrases: -1. **Variable names should indicate type**: - ```python - # Good - ftl_source = "hello = World" - resource_ast = parse_ftl(ftl_source) - loader = PathResourceLoader("...") +- Say `FTL source` for raw text. +- Say `Resource` or `FTL resource` for the parsed AST. +- Say `resource loader` for the object that loads source material. - # Avoid - resource = "hello = World" # Which type? - r = parse_ftl(ftl_source) # Unclear - ``` - -2. **Type annotations clarify intent**: - ```python - from ftllexengine.syntax.ast import Resource - from ftllexengine.localization import FTLSource, ResourceLoader - - def process_ftl(ftl_source: FTLSource) -> Resource: - resource_ast: Resource = parse_ftl(ftl_source) - return resource_ast - - def load_translations(loader: ResourceLoader, locale: str) -> FTLSource: - ftl_source: FTLSource = loader.load(locale, "main.ftl") - return ftl_source - ``` - ---- - -## Terminology Checklist - -When reviewing documentation or code, verify: - -- [ ] "Resource" is always qualified (AST, source, or loader) -- [ ] Message ID uses correct case (prose: "message ID", code: `message_id`) -- [ ] "Fluent" refers to system, "FTL" refers to file format -- [ ] Variable names indicate their type (`ftl_source` vs `resource_ast`) -- [ ] Capitalization is consistent ("Fluent", not "fluent") -- [ ] Parameter names use correct case (FTL: camelCase, Python: snake_case) - ---- - -## Common Pitfalls - -### Pitfall 1: Ambiguous "resource" - -```markdown -[AVOID] "The bundle loads resources from disk" -[OK] "The bundle loads FTL source from resource loaders on disk" -``` - -### Pitfall 2: Mixing "Fluent" and "FTL" - -```markdown -[AVOID] "FTL is a localization system for .ftl files" -[OK] "Fluent is a localization system using .ftl files (FTL format)" -``` - -### Pitfall 3: Inconsistent capitalization - -```python -# [WRONG] Inconsistent -from ftllexengine import fluentBundle # Wrong -from ftllexengine import FLUENTBUNDLE # Wrong - -# Correct -from ftllexengine import FluentBundle -``` - ---- - -## Quick Reference - -**When in doubt**: - -| Context | Use This Term | -|---------|---------------| -| AST object from parse_ftl() | "Resource AST", `resource_ast` | -| String with FTL syntax | "FTL source", `ftl_source` | -| File loader system | "resource loader", `loader` | -| The Fluent system | "Fluent" | -| File format (.ftl) | "FTL" or ".ftl files" | -| The syntax language | "Fluent syntax" | -| Translatable unit | "message" | -| Message identifier | "message ID" (prose), `message_id` (code) | - ---- - -## Glossary - -Complete alphabetical reference: - -| Term | Short Definition | Full Details | -|------|------------------|--------------| -| **AST** | Abstract Syntax Tree | Parsed representation of FTL source | -| **Attribute** | Named sub-value of message | `.tooltip`, `.aria-label` | -| **Bundle** | Single-locale message collection | `FluentBundle` class | -| **CLDR** | Common Locale Data Repository | Unicode locale data standard | -| **Entry** | Top-level AST node | Message, Term, Comment, or Junk | -| **Expression** | Evaluable AST component | Variables, functions, selects | -| **Fallback** | Default when error occurs | Readable placeholder value | -| **Fluent** | The localization system | Overall specification and ecosystem | -| **Fluent syntax** | The language syntax | Grammar rules for .ftl files | -| **FTL** | Fluent Translation List file format | .ftl file extension | -| **FTL source** | String containing FTL syntax | What you pass to `add_resource()` | -| **Function** | Formatting function | NUMBER, DATETIME, custom | -| **Junk** | Unparseable FTL syntax | Parser error recovery node | -| **Locale** | Language and region | "en_US", "lv_LV" | -| **Localization** | Multi-locale orchestration | `FluentLocalization` class | -| **Message** | Translatable unit with ID | `Message` AST node | -| **Message ID** | Message identifier | Key used in `format_pattern()` | -| **Pattern** | Text content of message | `Pattern` AST node | -| **Placeable** | Expression in `{ }` braces | `Placeable` AST node | -| **Resource (AST)** | Parsed FTL structure | `Resource` object from `parse_ftl()` | -| **Resource loader** | System loading .ftl files | `PathResourceLoader`, custom loaders | -| **Selector** | Select expression condition | Plural category, gender, etc. | -| **Term** | Reusable translation | Prefixed with `-` | -| **Variant** | Select expression branch | Key-value pair in select | - ---- +## Naming Style -**See Also**: -- [README.md](../README.md) - Project overview -- [DOC_00_Index.md](DOC_00_Index.md) - Complete API reference -- [QUICK_REFERENCE.md](QUICK_REFERENCE.md) - Quick examples +- Use `Fluent` when referring to the Fluent specification or runtime concepts. +- Use `FTL` when referring to the language syntax or `.ftl` files. +- Use readable input examples such as `en_US`, `de_DE`, and `lv_LV`; reserve lowercase forms like `en_us` for normalized internal/cache-key examples. diff --git a/docs/THREAD_SAFETY.md b/docs/THREAD_SAFETY.md index 4ca68ff9..f360662e 100644 --- a/docs/THREAD_SAFETY.md +++ b/docs/THREAD_SAFETY.md @@ -1,225 +1,29 @@ --- -afad: "3.3" -version: "0.161.0" -domain: architecture -updated: "2026-03-21" +afad: "3.5" +version: "0.163.0" +domain: ARCHITECTURE +updated: "2026-04-22" route: - keywords: [thread safety, concurrency, async, thread-local, contextvars, race condition, WeakKeyDictionary, timeout, TimeoutError] - questions: ["is FTLLexEngine thread-safe?", "can I use FluentBundle in async?", "what are the thread-safety guarantees?", "how to set lock timeout?"] + keywords: [thread safety, concurrency, FluentBundle, FluentLocalization, AsyncFluentBundle, shared bundle] + questions: ["is FluentBundle thread-safe?", "can I share a localization object across threads?", "what does AsyncFluentBundle do?"] --- -# Thread Safety Reference +# Thread Safety -**Purpose**: Document thread-safety architectural decisions and guarantees. -**Prerequisites**: Basic concurrency concepts. +**Purpose**: Describe the concurrency guarantees of the public runtime classes. +**Prerequisites**: None. ## Overview -FTLLexEngine provides explicit thread-safety guarantees for different components. This document consolidates all architectural decisions related to concurrency. +`FluentBundle` and `FluentLocalization` are designed for concurrent use. Read operations can run concurrently, while resource and function mutations take exclusive access internally. Callers do not need to provide their own external lock around normal formatting calls. -**Quick Reference**: +## Practical Rules -| Component | Thread-Safe | Async-Safe | Notes | -|:----------|:------------|:-----------|:------| -| `FluentBundle` | Yes (all ops) | Yes | RWLock-protected reads and writes | -| `FluentLocalization` | Yes (all ops) | Yes | RWLock-protected bundle map; brief per-operation lock for bundle lookup | -| `FluentParserV1` | Yes | Yes | Stateless parsing | -| `IntegrityCache` | Yes | Yes | Lock-protected | -| `FunctionRegistry` | Copy-on-write | Copy-on-write | Copied on bundle init | -| Introspection cache | Accepted race | Accepted race | Redundant computation, no corruption | -| Parse error context | Thread-local | Requires clear | Call `clear_parse_error()` before parse | +- Share a `FluentBundle` across threads when all requests use the same locale. +- Share a `FluentLocalization` across threads when the locale fallback chain is fixed. +- Use `AsyncFluentBundle` in asyncio handlers when you want bundle work offloaded through `asyncio.to_thread()`. +- Do not try to mutate a bundle from inside a custom function triggered by that same bundle’s formatting call. ---- - -## FluentBundle Thread Safety - -`FluentBundle` is **fully thread-safe** for all operations via internal RWLock. - -**Guarantees**: -- All read operations (`format_pattern()`, `has_message()`, introspection) acquire read lock (concurrent) -- All write operations (`add_resource()`, `add_function()`) acquire write lock (exclusive) -- `format_pattern()` creates isolated `ResolutionContext` per call -- `IntegrityCache` uses `Lock` for internal synchronization -- `FunctionRegistry` is copied on initialization (copy-on-write) -- Batch operations (`get_all_message_variables()`) acquire single read lock for atomic snapshot - -**Write Operations**: -- `add_resource()` - Parses outside lock (stateless parser), acquires write lock for registration only -- `add_function()` - Acquires write lock for registry mutation - -**Acquisition Limitations**: -The following acquisition patterns raise `RuntimeError`: -- Read-to-write upgrade: a thread holding a read lock cannot acquire the write lock (deadlock prevention). -- Write-to-read downgrade: a thread holding the write lock cannot acquire a read lock. FluentBundle write paths are single-level operations; they do not need to read-validate while holding the write lock. -- Write lock reentrancy: a thread holding the write lock cannot acquire it again. FluentBundle write paths (`add_resource`, `add_function`) are single-level; nested acquisition is a design error. - -Read lock reentrancy is supported: a thread holding a read lock can acquire it again (enables custom function re-entry into `format_pattern`). - -If you need lazy-loading patterns, load resources before formatting or use a separate bundle instance. - -**Timeout Support**: -`RWLock.read()` and `RWLock.write()` accept an optional `timeout` parameter (seconds). `None` (default) waits indefinitely. `0.0` attempts non-blocking acquisition. Positive float sets a deadline. Raises `TimeoutError` on expiry. Reentrant read acquisitions never wait, so timeout is irrelevant in that path. On write timeout, the internal `_waiting_writers` counter is correctly decremented (via `try/finally`), preventing reader starvation from abandoned writes. - -```python -# RWLock is an internal implementation detail used by FluentBundle and FluentLocalization. -# It is not part of the public API. Thread safety is provided automatically by those classes. -lock = bundle._rwlock # internal only; callers use FluentBundle directly -try: - with lock.write(timeout=5.0): - ... -except TimeoutError: - ... -``` - ---- - -## FluentLocalization Thread Safety - -`FluentLocalization` is **fully thread-safe** for all operations via internal RWLock. The RWLock protects the bundle map; per-bundle formatting is independently protected by each bundle's own RWLock. - -**Guarantees**: -- `format_value()`, `format_pattern()`, `has_message()`: briefly acquire read lock per bundle map lookup only; actual formatting proceeds under the per-bundle RWLock after the bundle reference is retrieved -- `require_clean()`: reads the immutable initialization snapshot without mutating bundle state -- `validate_message_variables()`: resolves one message through the fallback chain and validates its AST without mutation -- `validate_message_schemas()`: reuses fallback-chain bundle lookups and message AST reads; no mutation -- `get_cache_stats()` and `get_cache_audit_log()`: acquire read lock for the full duration to produce a consistent aggregate snapshot -- All write operations (`add_resource()`, `add_function()`, `clear_cache()`) acquire write lock (exclusive) -- Lazy bundle creation via `_get_or_create_bundle()` uses double-checked locking: read lock for already-initialized bundles (concurrent), write lock with double-check only when creating a new bundle; callers already holding the write lock (`add_resource`) use `_create_bundle()` directly (no lock re-acquisition) - -**Cache Invalidation**: -`add_resource()`, `add_function()`, and `clear_cache()` clear the relevant cache as part of the mutation — not deferred to any exit point. - ---- - -## Resolution Context (Explicit State) - -The resolver uses **explicit context passing** instead of thread-local state. - -**Design**: -```python -@dataclass(slots=True) -class ResolutionContext: - """Per-resolution state, isolated per call.""" - stack: list[str] # Cycle detection path - _seen: set[str] # O(1) membership check - max_depth: int # Stack overflow protection - depth_guard: DepthGuard # Per-call depth tracking -``` - -**Why Explicit**: -- Thread-safe without locks -- Async framework compatible (no thread-local conflicts) -- Easier testing (no state reset needed) -- Clear dependency flow - -**Instance Lifecycle**: -Each `format_pattern()` call creates a fresh `ResolutionContext`. This ensures complete isolation between concurrent resolutions. Object pooling is intentionally avoided to prevent synchronization overhead. - ---- - -## Global Depth Guard (Contextvars) - -Global resolution depth uses `contextvars` for async-safe per-task state. - -**Purpose**: Prevent custom functions from bypassing depth limits by calling back into `bundle.format_pattern()`. - -```python -from contextvars import ContextVar - -_global_resolution_depth: ContextVar[int] = ContextVar( - "fluent_resolution_depth", default=0 -) - -class GlobalDepthGuard: - """Track depth across format_pattern calls.""" - def __enter__(self): - current = _global_resolution_depth.get() - if current >= self._max_depth: - raise FrozenFluentError(...) # category=RESOLUTION - self._token = _global_resolution_depth.set(current + 1) -``` - -**Security Model**: -Without global tracking, a malicious custom function could: -1. Receive control during resolution -2. Call `bundle.format_pattern()` (creates fresh context) -3. Repeat recursively, bypassing per-context limits -4. Cause stack overflow - -`GlobalDepthGuard` prevents this by tracking depth across all contexts per async task. - -**Thread Spawning Limitation**: -`ContextVar` provides per-thread isolation. Custom functions that spawn **new threads** bypass the guard: each new thread starts with the `ContextVar` default (depth 0) and can initiate its own full-depth resolution chain independent of the spawning thread. The guard prevents re-entry within a single thread or async task; it does not prevent cross-thread recursive invocation. If custom functions may spawn threads that call `format_pattern()`, apply additional rate limiting at the custom function level. - ---- - -## Introspection Cache (Accepted Race) - -The introspection module uses `WeakKeyDictionary` **without locking**. - -**Architectural Decision**: This is an **intentional trade-off** accepting potential race conditions for better common-case performance. - -**Location**: `src/ftllexengine/introspection/message.py:59-83` - -**Trade-off Analysis**: - -| Alternative | Overhead | Benefit | -|:------------|:---------|:--------| -| RLock | Synchronization on every read | Full thread safety | -| Thread-local cache | Memory duplication | No contention | -| **Current (lock-free)** | **None** | **Best read performance** | - -**Why Acceptable**: -- Introspection is a **pure read operation** on immutable AST nodes -- Worst case: redundant computation (cache miss), **never data corruption** -- Typical usage: read-mostly workload, concurrent introspection is rare -- Cache entries are computed identically regardless of which thread wins - -**Explicit Documentation**: -```python -# Thread Safety (Accepted Race Condition): -# WeakKeyDictionary is NOT thread-safe for concurrent writes. Concurrent -# introspection of the same Message/Term from multiple threads may cause -# race conditions during cache write operations. -# -# Trade-off: Lock-free reads provide better performance than synchronized access. -``` - -**When This Matters**: Only if multiple threads simultaneously introspect the same `Message`/`Term` object for the first time. The only consequence is both threads compute and cache the same result. - ---- - -## Copy-on-Write Registry - -`FluentBundle` copies any registry passed to the constructor. - -**Purpose**: Prevent shared mutable state between bundles. - -```python -class FluentBundle: - def __init__(self, locale: str, /, *, functions: FunctionRegistry | None = None): - # Always copy to prevent external mutation affecting this bundle - if functions is not None: - self._functions = functions.copy() - else: - self._functions = get_shared_registry().copy() -``` - -**Guarantees**: -- No bundle shares a mutable registry with another bundle -- Modifications to the original registry after bundle creation have no effect -- The built-in registry is frozen and copied if `add_function()` is called - ---- - -## Summary - -| Pattern | Rationale | -|:--------|:----------| -| Explicit `ResolutionContext` | Thread isolation without locks | -| `contextvars` for depth | Async-safe global state | -| Lock-free introspection cache | Performance over perfect synchronization | -| Thread-local parse errors | Hot-path optimization | -| Copy-on-write registry | Prevent shared mutable state | +## Async -**Key Principle**: FTLLexEngine optimizes for the common case (single-threaded or read-heavy concurrent workloads) while documenting explicit requirements for edge cases (async thread reuse, concurrent cache writes). +`AsyncFluentBundle` is not a separate resolver implementation. It wraps the same runtime behavior in an async-facing API and delegates the heavy work to worker threads so the event loop stays responsive. diff --git a/docs/TYPE_HINTS_GUIDE.md b/docs/TYPE_HINTS_GUIDE.md index 9ab13095..8d32d035 100644 --- a/docs/TYPE_HINTS_GUIDE.md +++ b/docs/TYPE_HINTS_GUIDE.md @@ -1,812 +1,41 @@ --- -afad: "3.3" -version: "0.153.0" -domain: type-hints -updated: "2026-03-13" +afad: "3.5" +version: "0.163.0" +domain: TYPE_HINTS +updated: "2026-04-22" route: - keywords: [type hints, typing, mypy, type safety, pep 695, typeis, type guards, annotations, FluentValue] - questions: ["how to use type hints?", "how to configure mypy?", "how to write type safe code?", "what types can FluentValue hold?"] + keywords: [type hints, mypy, FluentValue, ParseResult, TypeIs, LocaleCode] + questions: ["what types does the library expose?", "how do I type parse results?", "which helpers are type guards?"] --- -# Type Hints Guide - FTLLexEngine +# Type Hints Guide -**Purpose**: Leverage Python 3.13+ type hints with FTLLexEngine. -**Prerequisites**: Basic Python typing knowledge. +**Purpose**: Show the main public typing surfaces exposed by FTLLexEngine. +**Prerequisites**: Python typing basics and mypy or another static checker. -FTLLexEngine is built with modern Python 3.13+ features and provides full `mypy --strict` type safety. This guide shows how to leverage type hints for better code quality and IDE support. +## Overview ---- - -## Quick Start - -### Basic Type-Safe Function - -```python -from ftllexengine import FluentBundle -from ftllexengine.localization import MessageId - -def format_message(bundle: FluentBundle, msg_id: MessageId) -> str: - """Format message with proper type annotations.""" - result, errors = bundle.format_pattern(msg_id) - if errors: - # errors is tuple[FrozenFluentError, ...] - fully typed, immutable - for error in errors: - print(f"Error: {error}") - return result - -# Usage -bundle = FluentBundle("en") -bundle.add_resource("hello = Hello, World!") -output = format_message(bundle, "hello") -``` - ---- - -## Python 3.13+ Features in FTLLexEngine - -### 1. PEP 695: Type Parameter Syntax (`type` keyword) - -FTLLexEngine uses the new `type` keyword for type aliases: - -```python -# FTLLexEngine source code (Python 3.13+) -type MessageId = str -type LocaleCode = str -type ResourceId = str -type FTLSource = str -``` - -**Your Code**: -```python -from ftllexengine.localization import MessageId, LocaleCode, FTLSource - -# More descriptive than plain 'str' -def load_translations(locale: LocaleCode, source: FTLSource) -> None: - bundle = FluentBundle(locale) - bundle.add_resource(source) -``` +The package is fully typed and exposes useful public aliases and guard-style helpers. -**Benefits**: -- Better IDE autocomplete -- Self-documenting code -- Type checker understands intent -- Easier refactoring - ---- +Common surfaces: -### 2. PEP 742: TypeIs for Type Guards +- `FluentValue`: values accepted by formatting functions. +- `ParseResult[T]`: standard `(value | None, tuple[FrozenFluentError, ...])` parsing return type. +- `LocaleCode`, `MessageId`, `ResourceId`, `FTLSource`: semantic aliases for localization boundaries. +- `is_valid_decimal()`, `is_valid_date()`, `is_valid_datetime()`, `is_valid_currency()`: `TypeIs` guards for parse results. +- `CurrencyCode` and `TerritoryCode`: typed ISO identifiers. -FTLLexEngine uses `TypeIs` for runtime type narrowing: +## ParseResult Pattern ```python -from typing import TypeIs -from ftllexengine import parse_ftl -from ftllexengine.syntax.ast import Message, Term +from decimal import Decimal +from ftllexengine import ParseResult -# Using built-in type guards (static methods) -ftl_source = "hello = World" -resource = parse_ftl(ftl_source) - -for entry in resource.entries: - if Message.guard(entry): - # TypeIs narrows type to Message - print(entry.id.name) # Type checker knows entry is Message - print(entry.value) # Safe access to message-specific attributes - - if Term.guard(entry): - # TypeIs narrows type to Term - print(entry.id.name) # entry is Term here +def parse_amount(raw: str) -> ParseResult[Decimal]: + from ftllexengine.parsing import parse_decimal + return parse_decimal(raw, "en_US") ``` ---- - -### 3. Pattern Matching with Type Safety - -Python 3.10+ pattern matching + FTLLexEngine types: - -```python -from ftllexengine import parse_ftl -from ftllexengine.syntax.ast import Message, Term, Comment, Junk - -resource = parse_ftl(ftl_source) - -for entry in resource.entries: - match entry: - case Message(id=id_node, value=pattern): - print(f"Message: {id_node.name}") - # Pattern matching provides type narrowing - if pattern: - print(f" Value: {pattern}") - - case Term(id=id_node): - print(f"Term: {id_node.name}") - - case Comment(): - print("Comment found") - - case Junk(content=content): - print(f"Parse error: {content[:50]}") -``` - ---- - -## Type Aliases Reference - -### Core Type Aliases - -```python -from ftllexengine.localization import MessageId, LocaleCode, ResourceId, FTLSource - -# MessageId - Message identifiers -msg_id: MessageId = "welcome" - -# LocaleCode - Locale codes -locale: LocaleCode = "en_US" - -# ResourceId - Resource file identifiers -resource_id: ResourceId = "main.ftl" - -# FTLSource - FTL source strings -ftl_source: FTLSource = "hello = Hello!" -``` - -**Type Hierarchy**: -```python -# PEP 695 type aliases are TypeAliasType objects -type(MessageId) # -MessageId.__value__ == str # True (underlying type is str) - -# Values are plain str at runtime -isinstance("hello", str) # True - MessageId values are str instances - -# Type aliases are transparent: both resolve to str -# They serve as documentation aids, not enforcement mechanisms -def format(msg_id: MessageId) -> str: ... -format("en_US") # Accepted by type checkers (MessageId is just str) -``` - ---- - -## Practical Examples - -### Example 1: Message Formatter Service - -```python -from __future__ import annotations - -from ftllexengine import FluentBundle, FrozenFluentError -from ftllexengine.localization import MessageId, LocaleCode, FTLSource -import logging - -logger = logging.getLogger(__name__) - - -class MessageFormatter: - """Type-safe message formatting service.""" - - def __init__(self, locale: LocaleCode) -> None: - """Initialize formatter for locale. - - Args: - locale: Locale code (e.g., "en_US", "lv_LV") - """ - self._bundle: FluentBundle = FluentBundle(locale) - self._locale: LocaleCode = locale - - def load_translations(self, ftl_source: FTLSource) -> None: - """Load FTL translations into bundle. - - Args: - ftl_source: FTL source string - """ - self._bundle.add_resource(ftl_source) - - def format( - self, - msg_id: MessageId, - args: dict[str, object] | None = None, - ) -> str: - """Format message with error logging. - - Args: - msg_id: Message identifier - args: Variable substitutions - - Returns: - Formatted message string - """ - result, errors = self._bundle.format_pattern(msg_id, args) - - if errors: - self._log_errors(msg_id, errors) - - return result - - def _log_errors( - self, - msg_id: MessageId, - errors: tuple[FrozenFluentError, ...], - ) -> None: - """Log translation errors. - - Args: - msg_id: Message that had errors - errors: Tuple of errors encountered (immutable) - """ - for error in errors: - logger.warning( - "Translation error in message %r: %s", - msg_id, - error, - extra={"locale": self._locale}, - ) - - @property - def locale(self) -> LocaleCode: - """Get current locale.""" - return self._locale - - -# Usage -formatter = MessageFormatter("en_US") -formatter.load_translations(""" -welcome = Hello, { $name }! -""") - -message = formatter.format("welcome", {"name": "Alice"}) -print(message) # "Hello, Alice!" -``` - ---- - -### Example 2: Multi-Locale Manager with Type Safety - -```python -from __future__ import annotations - -from collections.abc import Generator -from typing import Protocol - -from ftllexengine import FluentBundle, FluentLocalization -from ftllexengine.localization import ( - LocaleCode, - MessageId, - ResourceId, - FTLSource, -) - - -class TranslationLoader(Protocol): - """Protocol for translation loading systems.""" - - def load(self, locale: LocaleCode, resource_id: ResourceId) -> FTLSource: - """Load FTL resource for locale. - - Args: - locale: Locale code - resource_id: Resource identifier - - Returns: - FTL source content - """ - ... - - -class LocalizationManager: - """Type-safe multi-locale manager.""" - - def __init__( - self, - locales: list[LocaleCode], - loader: TranslationLoader, - ) -> None: - """Initialize manager. - - Args: - locales: Locale codes in fallback order - loader: Translation loading system - """ - self._locales: tuple[LocaleCode, ...] = tuple(locales) - self._loader: TranslationLoader = loader - self._l10n: FluentLocalization | None = None - - def initialize(self, resource_ids: list[ResourceId]) -> None: - """Load all resources. - - Args: - resource_ids: List of resource file identifiers - """ - self._l10n = FluentLocalization( - self._locales, - resource_ids, - self._loader, - ) - - def translate( - self, - msg_id: MessageId, - args: dict[str, object] | None = None, - ) -> str: - """Translate message with fallback. - - Args: - msg_id: Message identifier - args: Variable substitutions - - Returns: - Translated message - - Raises: - RuntimeError: If not initialized - """ - if self._l10n is None: - raise RuntimeError("Manager not initialized") - - result, errors = self._l10n.format_value(msg_id, args) - - if errors: - # Handle errors (log, report, etc.) - pass - - return result - - def has_translation(self, msg_id: MessageId) -> bool: - """Check if message exists in any locale. - - Args: - msg_id: Message identifier - - Returns: - True if message exists - """ - if self._l10n is None: - return False - - return self._l10n.has_message(msg_id) - - def get_bundles(self) -> Generator[FluentBundle, None, None]: - """Get all bundles in fallback order. - - Yields: - FluentBundle instances - - Raises: - RuntimeError: If not initialized - """ - if self._l10n is None: - raise RuntimeError("Manager not initialized") - - yield from self._l10n.get_bundles() -``` - ---- - -### Example 3: Custom Function with Full Type Safety - -```python -from __future__ import annotations - -from ftllexengine import FluentBundle -from ftllexengine.runtime.functions import create_default_registry -from typing import Literal - -# Python 3.13+ with precise types -def format_currency( - amount: int | Decimal, - *, - currency_code: Literal["USD", "EUR", "GBP", "JPY"] = "USD", - show_symbol: bool = True, -) -> str: - """Format currency with type-safe currency codes. - - Args: - amount: Monetary amount - currency_code: ISO currency code (limited set) - show_symbol: Include currency symbol - - Returns: - Formatted currency string - """ - symbols: dict[str, str] = { - "USD": "$", - "EUR": "€", - "GBP": "£", - "JPY": "¥", - } - - symbol = symbols[currency_code] if show_symbol else currency_code - return f"{symbol}{amount:,.2f}" - - -# Create isolated registry and register function -registry = create_default_registry() -registry.register( - format_currency, - ftl_name="CURRENCY", - param_map={ - "currencyCode": "currency_code", - "showSymbol": "show_symbol", - }, -) - -# Type-safe usage with custom registry -bundle: FluentBundle = FluentBundle("en", functions=registry) -bundle.add_resource(""" -price = { CURRENCY($amount, currencyCode: "EUR") } -""") - -result, _ = bundle.format_pattern("price", {"amount": 99.95}) -``` - ---- - -### Example 4: AST Visitor with Type Guards - -```python -from __future__ import annotations - -from ftllexengine import parse_ftl -from ftllexengine.syntax.ast import Message, Term, VariableReference, FunctionReference -from ftllexengine.syntax.visitor import ASTVisitor - - -class VariableCollector(ASTVisitor): - """Collect all variables from FTL source with type safety.""" - - def __init__(self) -> None: - """Initialize collector.""" - super().__init__() - self.variables: set[str] = set() - self.current_message: str | None = None - - def visit_Message(self, node: Message) -> None: - """Visit message node. - - Args: - node: Message AST node - """ - self.current_message = node.id.name - super().visit_Message(node) - self.current_message = None - - def visit_VariableReference(self, node: VariableReference) -> None: - """Collect variable reference. - - Args: - node: VariableReference AST node - """ - self.variables.add(node.id.name) - super().visit_VariableReference(node) - - -# Usage with type checking -ftl_source = """ -welcome = Hello, { $name }! -farewell = Goodbye, { $firstName } { $lastName }! -""" - -resource = parse_ftl(ftl_source) -collector = VariableCollector() -collector.visit(resource) - -# Type checker knows variables is set[str] -all_vars: set[str] = collector.variables -print(f"Found variables: {sorted(all_vars)}") -# → Found variables: ['firstName', 'lastName', 'name'] -``` - ---- - -## Type-Safe Error Handling - -### Pattern 1: Exhaustive Error Handling - -```python -from ftllexengine import FrozenFluentError, ErrorCategory - - -def handle_errors(errors: tuple[FrozenFluentError, ...]) -> None: - """Handle translation errors with exhaustive matching. - - Args: - errors: Tuple of errors from formatting (immutable) - """ - for error in errors: - match error.category: - case ErrorCategory.REFERENCE: - # Missing message, variable, or term - print(f"Reference error: {error}") - - case ErrorCategory.RESOLUTION: - # Runtime error during function execution - print(f"Resolution error: {error}") - - case ErrorCategory.CYCLIC: - # Circular dependency detected - print(f"Circular reference: {error}") - - case ErrorCategory.PARSE: - # Bi-directional parsing failure - print(f"Parse error: {error}") - - case ErrorCategory.FORMATTING: - # Locale-aware formatting failure - print(f"Formatting error: {error}") -``` - ---- - -### Pattern 2: Type-Safe Error Categorization - -```python -from __future__ import annotations - -from dataclasses import dataclass -from ftllexengine import FrozenFluentError, ErrorCategory - - -@dataclass(frozen=True, slots=True) -class ErrorReport: - """Type-safe error categorization.""" - - critical: tuple[FrozenFluentError, ...] - warnings: tuple[FrozenFluentError, ...] - - @classmethod - def from_errors(cls, errors: tuple[FrozenFluentError, ...]) -> ErrorReport: - """Categorize errors by severity. - - Args: - errors: Tuple of translation errors (immutable) - - Returns: - Categorized error report - """ - critical_list: list[FrozenFluentError] = [] - warnings_list: list[FrozenFluentError] = [] - - for error in errors: - if error.category in (ErrorCategory.REFERENCE, ErrorCategory.CYCLIC): - critical_list.append(error) - else: - warnings_list.append(error) - - return cls(critical=tuple(critical_list), warnings=tuple(warnings_list)) - - @property - def has_critical(self) -> bool: - """Check if critical errors exist.""" - return len(self.critical) > 0 - - @property - def error_count(self) -> int: - """Total error count.""" - return len(self.critical) + len(self.warnings) -``` - ---- - -## Advanced Type Patterns - -### Generic Wrapper for Bundles - -```python -from __future__ import annotations - -from dataclasses import dataclass -from ftllexengine import FluentBundle -from ftllexengine.localization import MessageId - - -class TypedBundle[T]: - """Type-safe wrapper for FluentBundle with custom context. - - This pattern allows attaching typed metadata to bundles. - """ - - def __init__(self, bundle: FluentBundle, context: T) -> None: - """Initialize typed bundle. - - Args: - bundle: FluentBundle instance - context: Custom context data - """ - self._bundle: FluentBundle = bundle - self._context: T = context - - @property - def bundle(self) -> FluentBundle: - """Get underlying bundle.""" - return self._bundle - - @property - def context(self) -> T: - """Get typed context.""" - return self._context - - def format(self, msg_id: MessageId, args: dict[str, object] | None = None) -> str: - """Format message. - - Args: - msg_id: Message identifier - args: Variable substitutions - - Returns: - Formatted string - """ - result, _ = self._bundle.format_pattern(msg_id, args) - return result - - -# Usage with typed context -@dataclass -class UserContext: - """User-specific localization context.""" - - user_id: int - timezone: str - date_format: str - - -user_ctx = UserContext(user_id=123, timezone="America/New_York", date_format="MM/DD/YYYY") -bundle = FluentBundle("en_US") - -typed_bundle: TypedBundle[UserContext] = TypedBundle(bundle, user_ctx) - -# Type checker knows context is UserContext -print(typed_bundle.context.timezone) # Type-safe access -``` - ---- - -## mypy Configuration - -For maximum type safety with FTLLexEngine: - -```ini -# mypy.ini or pyproject.toml [tool.mypy] -[mypy] -python_version = 3.13 -strict = true -warn_return_any = true -warn_unused_configs = true -disallow_untyped_defs = true -disallow_any_generics = true - -# FTLLexEngine is fully typed -[mypy-ftllexengine] -ignore_missing_imports = false -``` - ---- - -## Type Checking Best Practices - -### DO: Use Type Aliases - -```python -from ftllexengine.localization import MessageId, LocaleCode - -# [OK] Good - descriptive types -def format_message(msg_id: MessageId, locale: LocaleCode) -> str: - ... - -# Less clear - generic str -def format_message(msg_id: str, locale: str) -> str: - ... -``` - ---- - -### DO: Annotate Return Types - -```python -from ftllexengine import FluentBundle - -# [OK] Good - explicit return type -def create_bundle(locale: str) -> FluentBundle: - return FluentBundle(locale) - -# Less safe - inferred return type -def create_bundle(locale: str): - return FluentBundle(locale) -``` - ---- - -### DO: Use Type Guards for AST - -```python -from ftllexengine import parse_ftl -from ftllexengine.syntax.ast import Message - -resource = parse_ftl(ftl_source) - -for entry in resource.entries: - # [OK] Good - type guard provides narrowing (static method) - if Message.guard(entry): - # entry is Message here - print(entry.value) - - # [OK] Also correct - isinstance() works fine - if isinstance(entry, Message): - # Type checker narrows to Message here too - print(entry.value) - # Note: Message.guard() is preferred for FTLLexEngine style consistency -``` - ---- - -### DO: Use dict[K, V] Syntax (Python 3.9+) - -```python -# [OK] Good - modern syntax (Python 3.9+) -def format(msg_id: str, args: dict[str, object]) -> str: - ... - -# Old - deprecated typing.Dict -from typing import Dict -def format(msg_id: str, args: Dict[str, object]) -> str: - ... -``` - ---- - -## Troubleshooting Type Errors - -### Error: "Argument has incompatible type" - -```python -# [WRONG] Type error -locale_codes: list[LocaleCode] = ["en", "fr"] -bundle = FluentBundle(locale_codes) # Error: expected str, got list - -# [OK] Fixed - extract single locale -bundle = FluentBundle(locale_codes[0]) -``` - ---- - -### Error: "Item 'None' of 'Optional[...]' has no attribute" - -```python -from ftllexengine import parse_ftl -from ftllexengine.syntax.ast import Message - -resource = parse_ftl(ftl_source) -msg = resource.entries[0] - -# [WRONG] Type error - entry might not be Message -print(msg.value) # Error: entry could be Term, Comment, Junk - -# [OK] Fixed - use type guard (static method) -if Message.guard(msg): - print(msg.value) # Safe - type narrowed to Message -``` - ---- - -## Summary - -**FTLLexEngine provides**: -- Full `mypy --strict` compatibility -- Python 3.13+ modern type features -- Type aliases for clarity -- Type guards for runtime narrowing -- Complete type annotations - -**Best practices**: -- Use provided type aliases (`MessageId`, `LocaleCode`, etc.) -- Annotate function return types -- Use type guards for AST manipulation -- Enable `mypy --strict` in your project -- Leverage Python 3.13+ features - ---- - -**Python Requirement**: 3.13+ +## Mypy -**See Also**: -- [DOC_00_Index.md](DOC_00_Index.md) - Complete API reference -- [QUICK_REFERENCE.md](QUICK_REFERENCE.md) - Quick examples -- [examples/](../examples/) - Working code samples +Project-wide mypy configuration lives in `pyproject.toml`. The examples directory has its own strict config in `examples/mypy.ini`. diff --git a/docs/VALIDATION_GUIDE.md b/docs/VALIDATION_GUIDE.md index 03e89d9a..9d139238 100644 --- a/docs/VALIDATION_GUIDE.md +++ b/docs/VALIDATION_GUIDE.md @@ -1,332 +1,45 @@ --- -afad: "3.3" -version: "0.153.0" -domain: validation -updated: "2026-03-13" +afad: "3.5" +version: "0.163.0" +domain: VALIDATION +updated: "2026-04-22" route: - keywords: [validation, validate_resource, SemanticValidator, duplicate, cycle detection, FTL validation] - questions: ["how to validate FTL?", "what validation checks exist?", "where is duplicate detection?", "how to detect cycles?"] + keywords: [validation, validate_resource, ValidationResult, require_clean, boot validation, message schemas] + questions: ["how do I validate FTL before loading it?", "how do I fail fast at startup?", "how do I validate message variables?"] --- # Validation Guide -**Purpose**: Understand FTLLexEngine's validation architecture and responsibility distribution. -**Prerequisites**: Basic FTL syntax knowledge. +**Purpose**: Validate FTL source, loaded resources, and message-variable contracts before serving traffic. +**Prerequisites**: Basic familiarity with `FluentBundle` or `FluentLocalization`. -## Overview +## Resource Validation -FTLLexEngine implements a **two-tier validation architecture**: - -1. **Resource-level validation** (`validate_resource()`): Checks spanning multiple entries -2. **AST node-level validation** (`SemanticValidator`): Checks within individual AST nodes - -This separation follows single-responsibility principle: each validator handles checks appropriate to its scope. - ---- - -## Validation Responsibility Matrix - -Function names prefixed with `_` are internal implementation details. They are listed -for traceability only; direct calls or imports of private functions are unsupported. - -| Check | Module | Function/Class | Scope | -|:------|:-------|:---------------|:------| -| Syntax errors (Junk) | `validation.resource` | `_extract_syntax_errors()` (internal) | Resource | -| Duplicate message IDs | `validation.resource` | `_collect_entries()` (internal) | Resource | -| Duplicate term IDs | `validation.resource` | `_collect_entries()` (internal) | Resource | -| Duplicate attribute IDs | `validation.resource` | `_collect_entries()` (internal) | Entry | -| Messages without value/attrs | `validation.resource` | `_collect_entries()` (internal) | Entry | -| Shadow warnings | `validation.resource` | `_collect_entries()` (internal) | Resource | -| Undefined message refs | `validation.resource` | `_check_undefined_references()` (internal) | Resource | -| Undefined term refs | `validation.resource` | `_check_undefined_references()` (internal) | Resource | -| Circular references | `validation.resource` | `_detect_circular_references()` (internal) | Resource | -| Long reference chains | `validation.resource` | `_detect_long_chains()` (internal) | Resource | -| Term missing value | `syntax.validator` | `SemanticValidator` | Node | -| Select without default | `syntax.validator` | `SemanticValidator` | Node | -| Select without variants | `syntax.validator` | `SemanticValidator` | Node | -| Duplicate variant keys | `syntax.validator` | `SemanticValidator` | Node | -| Duplicate named arguments | `syntax.validator` | `SemanticValidator` | Node | -| Term positional args warning | `syntax.validator` | `SemanticValidator` | Node | -| Placeable as selector (bypass guard) | `syntax.validator` | `SemanticValidator` | Node | - ---- - -## Quick Start +Use `validate_resource()` to check FTL source before adding it to a bundle. ```python -from ftllexengine.validation import validate_resource - -source = """ -hello = Hello, { $name }! --brand = FTLLexEngine -welcome = Welcome to { -brand } -""" +from ftllexengine import validate_resource -# Validate -result = validate_resource(source) - -if result.is_valid: - print("Validation passed") -else: - for error in result.errors: - print(f"Error: {error.code} - {error.message}") - for warning in result.warnings: - print(f"Warning: {warning.code} - {warning.message}") +result = validate_resource("welcome = Hello, { $name }!") +assert result.is_valid is True +assert result.error_count == 0 +assert result.warning_count == 0 ``` ---- - -## Resource-Level Validation +`ValidationResult` separates: -`validate_resource()` orchestrates six validation passes: - -### Pass 1: Syntax Error Extraction - -Converts `Junk` entries (unparseable content) to structured errors. - -```python -# FTL with syntax error -source = "hello = Hello { missing-close" -result = validate_resource(source) -# Error: VALIDATION_PARSE_ERROR -``` - -### Pass 2: Entry Collection and Duplicates - -Checks for duplicate IDs within namespaces and duplicate attributes within entries. - -```python -# Duplicate message ID -source = """ -hello = First -hello = Second -""" -# Warning: VALIDATION_DUPLICATE_ID - "Duplicate message ID 'hello'" -``` - -**Namespace Separation**: Per Fluent spec, messages and terms have separate namespaces: -```python -# NOT a duplicate - different namespaces -source = """ -brand = Brand message --brand = Brand term -""" -# No warning: 'brand' and '-brand' coexist -``` +- `errors`: structural or syntax validation failures. +- `warnings`: semantic problems such as unresolved references. +- `annotations`: parser-level annotations recovered from junk input. -### Pass 3: Undefined Reference Detection - -Identifies references to non-existent messages or terms. - -```python -source = """ -hello = { greeting } --missing = { -nonexistent } -""" -# Warning: VALIDATION_UNDEFINED_REFERENCE - "Message 'hello' references undefined message 'greeting'" -# Warning: VALIDATION_UNDEFINED_REFERENCE - "Term '-missing' references undefined term '-nonexistent'" -``` - -### Pass 4: Circular Reference Detection - -Detects cycles in the message/term dependency graph. - -```python -source = """ -a = { b } -b = { c } -c = { a } -""" -# Warning: VALIDATION_CIRCULAR_REFERENCE - "Circular reference detected: a -> b -> c -> a" -``` - -**Cross-Type Cycles**: The validator builds a unified graph to detect cycles spanning both messages and terms: -```python -source = """ -msg = { -term } --term = { msg } -""" -# Warning: Detects message -> term -> message cycle -``` - -**Cross-Resource Cycles**: When validating via `FluentBundle.validate_resource()`, the validator also detects cycles involving entries already loaded in the bundle: -```python -bundle = FluentBundle("en") -bundle.add_resource("msg_a = { msg_b }") # msg_a depends on msg_b - -# Now validate a resource that completes the cycle -result = bundle.validate_resource("msg_b = { msg_a }") -# Warning: Circular reference detected - msg_a and msg_b form a cycle -``` - -This cross-resource detection works because the bundle tracks dependencies for all loaded entries. - -### Pass 5: Long Chain Detection - -Warns about reference chains approaching `MAX_DEPTH` limit. - -```python -# Chain of 90 messages (warning threshold at MAX_DEPTH - 10) -# Warning: VALIDATION_LONG_CHAIN -``` - -### Pass 6: Semantic Validation - -Delegates to `SemanticValidator` for AST node-level checks. - ---- +## Loaded-Resource Validation -## AST Node-Level Validation (SemanticValidator) +`FluentLocalization.require_clean()` converts load summary problems into an `IntegrityCheckFailedError`. This is the fail-fast path for production startup. -`SemanticValidator` checks semantic correctness within individual AST nodes. - -### Term Must Have Value - -```python -source = "-empty" # Term without value -# Error: VALIDATION_TERM_NO_VALUE -``` - -### Select Expression Requirements - -```python -# Missing default variant -source = """ -count = { $n -> - [one] One - [other] Other -} -""" -# Error: VALIDATION_SELECT_NO_DEFAULT - must have exactly one *[default] - -# Missing variants -source = "count = { $n -> }" -# Error: VALIDATION_SELECT_NO_VARIANTS - -# Duplicate variant keys -source = """ -count = { $n -> - [one] First one - [one] Second one - *[other] Other -} -""" -# Error: VALIDATION_VARIANT_DUPLICATE -``` - -### Duplicate Named Arguments - -```python -source = 'msg = { NUMBER($n, style: "decimal", style: "percent") }' -# Error: VALIDATION_NAMED_ARG_DUPLICATE -``` - -### Term Positional Arguments Warning - -Per Fluent specification, terms only accept named arguments. Positional arguments are silently ignored at runtime. The validator warns about this to catch likely user errors: - -```python -source = """ --brand = Acme Corp -msg = { -brand($value) } -""" -# Warning: VALIDATION_TERM_POSITIONAL_ARGS - "Term '-brand' called with positional arguments; positional arguments are ignored for term references" -``` - ---- - -## Architecture Rationale - -**Why Two Tiers?** - -| Concern | Level | Example | -|:--------|:------|:--------| -| Cross-entry relationships | Resource | Circular references between messages | -| Entry-spanning checks | Resource | Duplicate attribute IDs across attributes | -| Node-internal rules | AST Node | Select expression must have default | -| Call argument rules | AST Node | Named argument uniqueness | - -Attempting to consolidate all checks into one validator would create a "god class" with mixed concerns. The current design: - -1. `validate_resource()` owns the resource-level view -2. `SemanticValidator` owns the node-level view -3. Each is testable independently -4. Each has clear responsibility boundaries - ---- - -## Integration with FluentBundle - -`FluentBundle.add_resource()` automatically validates during FTL source loading: - -```python -from ftllexengine import FluentBundle - -bundle = FluentBundle("en_US") -junk_entries = bundle.add_resource("hello = Hello") - -if junk_entries: - for junk in junk_entries: - print(f"Parse error: {junk}") -``` - -For standalone validation without a bundle (CI/CD pipelines, linters): - -```python -from ftllexengine.validation import validate_resource - -result = validate_resource(ftl_content) -``` - ---- - -## Validation Result Structure - -```python -@dataclass(frozen=True, slots=True) -class ValidationResult: - errors: tuple[ValidationError, ...] # Blocking issues - warnings: tuple[ValidationWarning, ...] # Non-blocking; do not affect is_valid - annotations: tuple[Annotation, ...] # Parser annotations; affect is_valid - - # Validity check - @property - def is_valid(self) -> bool: ... # True if no errors AND no annotations - - # Counts - @property - def error_count(self) -> int: ... # len(errors) - @property - def annotation_count(self) -> int: ... # len(annotations) - @property - def warning_count(self) -> int: ... # len(warnings) - - # Factories - @staticmethod - def valid() -> ValidationResult: ... # Empty result (all tuples empty) - @staticmethod - def invalid( - errors: tuple[ValidationError, ...] = (), - warnings: tuple[ValidationWarning, ...] = (), - annotations: tuple[Annotation, ...] = (), - ) -> ValidationResult: ... - @staticmethod - def from_annotations( - annotations: tuple[Annotation, ...] - ) -> ValidationResult: ... # Convenience: parser annotations only -``` - -**Error vs Warning**: -- **Errors**: Prevent correct resolution (syntax errors, missing term values) -- **Warnings**: May indicate issues but don't prevent resolution (duplicates, undefined refs) - ---- +## Message Variable Contracts -## Summary +Use `validate_message_variables()` when you already have an AST node, or `FluentLocalization.validate_message_schemas()` when you want to enforce contracts across a loaded localization set. -| Validator | Location | Checks | -|:----------|:---------|:-------| -| `validate_resource()` | `validation/resource.py` | Duplicates, cycles, undefined refs, chains | -| `SemanticValidator` | `syntax/validator.py` | Term values, select rules, argument uniqueness | +## Recommended Startup Pattern -**Key Insight**: If you're looking for a specific check, consult the responsibility matrix. Checks spanning multiple entries are in `validate_resource()`; checks within a single AST node are in `SemanticValidator`. +For audited startup, prefer `LocalizationBootConfig.boot()` or `boot_simple()` over assembling the boot sequence by hand. That path loads resources, checks load cleanliness, enforces required message presence, and validates declared message schemas in one place. `LocalizationBootConfig` instances are one-shot coordinators, so create a fresh instance for each boot attempt. diff --git a/examples/README.md b/examples/README.md index c03293a9..5136b70c 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,417 +1,65 @@ - -# ftllexengine Examples - -Comprehensive examples demonstrating all FTLLexEngine features. - -**Requirements**: Python 3.13+ - -## Note on Bidi Isolation Marks - -By default, FTLLexEngine wraps interpolated variables in Unicode bidi isolation marks (FSI U+2068 and PDI U+2069). You may see `⁨` and `⁩` characters in terminal output like: `"Sveiki, ⁨Anna⁩!"` These marks are: -- **Critical for RTL languages** (Arabic, Hebrew, Persian, Urdu) - prevents text corruption -- **Invisible in proper Unicode rendering** (browsers, most GUI apps) -- **May appear as symbols** in some terminals (this is a terminal limitation, not a bug) -- **Recommended to keep enabled** (`use_isolating=True`, the default) unless your app will only ever support LTR languages - -**Important:** Examples in this directory use `use_isolating=False` for cleaner terminal demonstrations. **Never disable bidi isolation in production applications** that may support RTL languages. - -## Common Import Patterns - -All FTLLexEngine APIs are available as top-level imports for maximum convenience: - -### Core Message Formatting - -```python -from ftllexengine import FluentBundle, FluentLocalization - -# Single locale -bundle = FluentBundle("en") - -# Multi-locale fallback -l10n = FluentLocalization(['lv', 'en']) -``` - -### Resource Loading - -```python -from ftllexengine.localization import PathResourceLoader, ResourceLoader - -# File system loader -loader = PathResourceLoader("locales/{locale}") -ftl_source = loader.load("en", "main.ftl") - -# Custom loader (implement ResourceLoader protocol) -class MyLoader: - def load(self, locale: str, resource_id: str) -> str: - # Your custom loading logic - ... -``` - -### AST Manipulation (Linters, Transformers) - -```python -from ftllexengine import parse_ftl, serialize_ftl -from ftllexengine.syntax import ASTVisitor, ASTTransformer -from ftllexengine.syntax.ast import Message, Term, VariableReference - -# Parse FTL to AST -resource_ast = parse_ftl(ftl_source) - -# Traverse AST -class MyVisitor(ASTVisitor): - def visit_Message(self, node): - print(f"Found message: {node.id.name}") - -# Serialize back to FTL -ftl_output = serialize_ftl(resource_ast) -``` - -### Error Handling - -```python -from ftllexengine import FrozenFluentError, ErrorCategory - -# Robust error handling -# Parser uses Junk nodes for syntax errors (robustness principle) -# and never raises exceptions. -result, errors = bundle.format_pattern("msg", {"var": value}) -if errors: - for error in errors: - diag = error.diagnostic - msg = diag.message if diag else error.message - match error.category: - case ErrorCategory.REFERENCE: - logger.warning(f"Missing translation: {msg}") - case ErrorCategory.CYCLIC: - logger.error(f"Cyclic reference: {msg}") - case ErrorCategory.RESOLUTION: - logger.error(f"Runtime error: {msg}") - case _: - logger.error(f"Error: {msg}") -``` - -### Advanced - Function Registry - -```python -from ftllexengine import FluentBundle -from ftllexengine.runtime.functions import create_default_registry - -# Create custom registry -registry = create_default_registry() - -# Register custom function -def UPPER(text: str) -> str: - return text.upper() - -registry.register(UPPER, ftl_name="UPPER") - -# Pass registry to bundle (isolated, no global state) -bundle = FluentBundle("en", functions=registry) -``` - -**Recommended Pattern**: Use `create_default_registry()` and pass to `FluentBundle` constructor for isolated function registries. For single-bundle functions, use `bundle.add_function()` method. - -### Introspection - -```python -from ftllexengine import parse_ftl -from ftllexengine.introspection import introspect_message, extract_variables - -# Module-level introspection (works with AST nodes) -resource_ast = parse_ftl(ftl_source) -msg = resource_ast.entries[0] -variables = extract_variables(msg) - -# Bundle method (works with message IDs) -bundle = FluentBundle("en") -bundle.add_resource(ftl_source) -info = bundle.introspect_message("welcome") -print(info.get_variable_names()) -``` - -**Note**: All examples in this directory use these top-level imports. - -## Available Examples - -### [quickstart.py](quickstart.py) - -**Basic usage of FluentBundle** - Start here for single-locale applications. - -Demonstrates: -1. Simple messages -2. Variable interpolation -3. English plurals (one, other) -4. Latvian plurals (zero, one, other) -5. Select expressions -6. Number formatting -7. Loading from files -8. **Proper error handling** (production pattern with error logging) - -**Run**: `python examples/quickstart.py` - ---- - -### [parser_only.py](parser_only.py) - -**Parser-only mode (no Babel required)** - Everything you can do without the Babel dependency. - -Demonstrates: -1. Parse FTL source to AST -2. Inspect message and term structure -3. Extract variables and function references -4. Validate FTL syntax -5. Serialize AST back to FTL -6. Roundtrip validation - -**Run**: `python examples/parser_only.py` - -**Note**: Works with `pip install ftllexengine` (no `[babel]` extra needed). - ---- - -### [locale_fallback.py](locale_fallback.py) - -**Multi-locale with fallback chains** - Use this for applications supporting multiple languages. - -Demonstrates: -1. Basic two-locale fallback (Latvian → English) -2. Three-locale fallback chains (Latvian → Lithuanian → English) -3. Disk-based resource loading with PathResourceLoader -4. Custom in-memory resource loaders -5. Database/cache resource loaders (production pattern with Redis example) -6. Realistic e-commerce application example -7. Checking message availability -8. Iterating through bundles for introspection - -**Run**: `python examples/locale_fallback.py` - ---- - -### [bidirectional_formatting.py](bidirectional_formatting.py) - -**Bi-directional localization** - Parse locale-formatted strings back to Python types. - -Demonstrates: -1. Invoice processing with bi-directional localization (Latvian) -2. Form input validation with locale-aware parsing (German) -3. Currency parsing with automatic symbol detection (multiple locales) -4. Date parsing with locale-aware format detection (US vs European) -5. Roundtrip validation (format → parse → format) -6. CSV data import with locale-aware parsing - -**Run**: `python examples/bidirectional_formatting.py` - -**Key Features**: -- Number/currency parsing via Babel (CLDR-compliant) -- Date/datetime parsing via Python 3.13 stdlib with Babel CLDR patterns -- Financial precision with Decimal type -- Form validation patterns -- Import/export workflows -- Use `has_parse_errors()` and type guards from `ftllexengine.parsing.guards` -- **Note**: Babel's `parse_decimal()` accepts `NaN`, `Infinity`, and `Inf` (case-insensitive) as valid Decimal values - use `is_valid_decimal()` to reject these for financial data - ---- - -### [ftl_transform.py](ftl_transform.py) - -**AST transformation and manipulation** - Build tools that modify FTL files programmatically. - -Demonstrates: -1. Removing comments from FTL source -2. Renaming variables (refactoring) -3. Extracting hardcoded strings to variables -4. Removing deprecated messages by prefix -5. Chaining multiple transformations -6. Real-world modernization workflow (camelCase → snake_case) - -**Run**: `python examples/ftl_transform.py` - ---- - -### [ftl_linter.py](ftl_linter.py) - -**Static analysis and linting** - Build quality tools for FTL files. - -Demonstrates: -1. Detecting duplicate message IDs -2. Finding undefined variables -3. Validating function calls -4. Checking message/term references -5. Identifying messages without values -6. Building custom lint rules with ASTVisitor - -**Run**: `python examples/ftl_linter.py` - ---- - -### [custom_functions.py](custom_functions.py) - -**Custom formatting functions** - Extend FTLLexEngine with domain-specific formatters. - -Demonstrates: -1. CURRENCY formatting with symbols -2. PHONE number formatting -3. MARKDOWN rendering (simplified) -4. FILESIZE human-readable formatting -5. DURATION time formatting -6. Locale-aware custom functions using factory pattern - -**Run**: `python examples/custom_functions.py` - -**See also**: [CUSTOM_FUNCTIONS_GUIDE.md](../docs/CUSTOM_FUNCTIONS_GUIDE.md) - Comprehensive guide to custom function development including error handling patterns, Babel integration, testing strategies, and best practices. - ---- - -### [function_introspection.py](function_introspection.py) - -**Runtime function discovery and introspection** - Discover and inspect functions at runtime. - -Demonstrates: -1. Basic introspection operations (list, iterate, check membership) -2. Function metadata inspection (parameter mappings, Python names) -3. Custom function introspection workflows -4. Financial application validation patterns -5. Auto-documentation generation -6. Safe function usage with existence checks -7. Registry copying for isolated customization - -**Run**: `python examples/function_introspection.py` - -**Note**: Uses the new FunctionRegistry introspection API (`list_functions()`, `get_function_info()`, `__iter__`, `__len__`, `__contains__`) for runtime function discovery. - --- - -### [thread_safety.py](thread_safety.py) - -**Thread-safe FluentBundle usage** - Patterns for multi-threaded applications. - -Demonstrates: -1. Single-threaded initialization (recommended for static resources) -2. Concurrent read operations with ThreadPoolExecutor -3. Thread-local bundles (for per-thread customization) -4. Dynamic resource loading (always thread-safe) - -**Run**: `python examples/thread_safety.py` - -**Note**: FluentBundle is always thread-safe. No manual locks or special parameters needed. - +afad: "3.5" +version: "0.163.0" +domain: EXAMPLES +updated: "2026-04-22" +route: + keywords: [examples, quickstart, parser-only, localization, custom functions, thread safety, benchmarks] + questions: ["what examples are available?", "how do I run the examples?", "which example should I start with?"] --- -### [benchmark_loaders.py](benchmark_loaders.py) +# FTLLexEngine Examples -**Performance benchmarks for resource loaders** - Compare different loader implementations. +**Purpose**: Show which runnable example scripts ship with the repository and what each one demonstrates. +**Prerequisites**: Development environment synced with `uv sync --group dev`. -Demonstrates: -1. In-memory loader benchmarks (baseline performance) -2. Disk loader benchmarks (PathResourceLoader) -3. Database loader benchmarks without cache (worst case) -4. Database loader benchmarks with cache (production pattern) -5. Cache hit rate analysis -6. Throughput and latency measurements -7. Production recommendations based on app size and requirements +## Overview -**Run**: `python examples/benchmark_loaders.py` - -**Output**: Comprehensive performance comparison with initialization times, throughput metrics, and best practice recommendations for choosing the optimal loader pattern. - ---- +Every `examples/*.py` script is intended to run directly from the repository root. The command below exercises the full shipped example set under the project’s Python 3.13 environment. -### [property_based_testing.py](property_based_testing.py) - -**Property-based testing with Hypothesis** - Advanced testing techniques for discovering edge cases. - -Demonstrates: -1. Testing universal properties (format_pattern never raises exceptions) -2. Testing idempotence (parse → serialize → parse roundtrip) -3. Testing invariants (message count consistency) -4. Testing symmetry (fallback chain locale precedence) -5. Testing batch operations equivalence (batch vs individual introspection) -6. Stateful property testing with RuleBasedStateMachine (advanced) -7. Custom Hypothesis strategies for valid FTL generation - -**Run**: `python examples/property_based_testing.py` - -**Note**: This example demonstrates advanced testing techniques using property-based testing, which generates hundreds of random test cases to verify universal properties of the library. Excellent for discovering edge cases and verifying API contracts. - ---- - -## Running All Examples +Run one example: ```bash -# Run each example individually -python examples/quickstart.py -python examples/parser_only.py -python examples/locale_fallback.py -python examples/bidirectional_formatting.py -python examples/ftl_transform.py -python examples/ftl_linter.py -python examples/custom_functions.py -python examples/function_introspection.py -python examples/thread_safety.py -python examples/benchmark_loaders.py -python examples/property_based_testing.py +uv run --python 3.13 python examples/quickstart.py ``` -## Basic Usage +Run all examples: -```python -from ftllexengine import FluentBundle - -bundle = FluentBundle("en") -bundle.add_resource(""" -my-message = Hello, { $name }! -""") - -result, errors = bundle.format_pattern("my-message", {"name": "World"}) -print(result) # Hello, World! +```bash +uv run --python 3.13 python scripts/run_examples.py ``` -## Loading from Files +## Example Map -```python -from pathlib import Path -from ftllexengine import FluentBundle +| Script | Focus | +|:-------|:------| +| `quickstart.py` | Single-locale bundle usage, variables, plurals, parsing handoff | +| `parser_only.py` | Parser-only install surface: parse, validate, inspect, serialize | +| `locale_fallback.py` | `FluentLocalization`, fallback chains, disk and custom loaders | +| `bidirectional_formatting.py` | Locale-aware parsing for numbers, dates, currency | +| `custom_functions.py` | `FunctionRegistry`, `bundle.add_function()`, `@fluent_function` | +| `function_introspection.py` | Introspection APIs and function metadata | +| `ftl_transform.py` | AST transforms and serialization | +| `ftl_linter.py` | Validation and custom lint-style checks | +| `thread_safety.py` | Shared bundle and task-local patterns | +| `property_based_testing.py` | Hypothesis-oriented usage examples | +| `benchmark_loaders.py` | Loader micro-benchmarks | -ftl_source = Path("locales/en/messages.ftl").read_text(encoding="utf-8") -bundle = FluentBundle("en") -bundle.add_resource(ftl_source) -result, errors = bundle.format_pattern("welcome") -print(result) -``` +## Picking A Starting Point -## FTL File Example +- New to the runtime: start with `examples/quickstart.py`. +- Working without Babel: start with `examples/parser_only.py`. +- Building a multi-locale app: use `examples/locale_fallback.py`. +- Accepting localized user input: use `examples/bidirectional_formatting.py`. -`locales/en/messages.ftl`: +## Type Checking -```ftl -hello = Hello, World! -greeting = Hello, { $name }! +The examples have a dedicated mypy configuration: -emails = You have { NUMBER($count) -> - [one] one email - *[other] { $count } emails -}. - -greeting-formal = { $gender -> - [male] Mr. { $name } - [female] Ms. { $name } - *[other] { $name } -} - -price = Price: { NUMBER($amount, minimumFractionDigits: 2) } EUR +```bash +uv run mypy --config-file examples/mypy.ini examples ``` -## See Also +Related guide: -- [docs/DOC_00_Index.md](../docs/DOC_00_Index.md) - Complete API reference -- [README.md](../README.md) - Project overview and getting started guide -- [CUSTOM_FUNCTIONS_GUIDE.md](../docs/CUSTOM_FUNCTIONS_GUIDE.md) - Comprehensive guide to extending FTLLexEngine with custom formatting functions -- [CONTRIBUTING.md](../CONTRIBUTING.md) - Contribution guidelines for developers +- [README_TYPE_CHECKING.md](README_TYPE_CHECKING.md) diff --git a/examples/README_TYPE_CHECKING.md b/examples/README_TYPE_CHECKING.md index a0c00c26..e15f5102 100644 --- a/examples/README_TYPE_CHECKING.md +++ b/examples/README_TYPE_CHECKING.md @@ -1,152 +1,37 @@ --- -afad: "3.3" -version: "0.143.0" -domain: examples -updated: "2026-02-28" +afad: "3.5" +version: "0.163.0" +domain: EXAMPLES +updated: "2026-04-22" route: - keywords: [type checking, mypy, strict, threading, examples, stubs] - questions: ["how to type check examples?", "how to use mypy with examples?", "how to type threading.local?"] + keywords: [examples, mypy, type checking, strict, explicit ownership, thread safety] + questions: ["how do I type-check the examples?", "what mypy config do the examples use?", "how do the examples stay strict without local stubs?"] --- -# Type Checking Configuration for Examples +# Example Type Checking -This directory includes enhanced type checking configuration for example code. +**Purpose**: Explain how the example scripts are type-checked and how they stay strict without local stub overlays. +**Prerequisites**: Dev environment synced with `uv sync --group dev`. ## Overview -The examples/ directory uses **mypy --strict** mode to demonstrate best practices for type-safe FTL localization code. This includes: +The `examples/` directory uses its own `mypy.ini` so the example code stays strict and self-contained. The examples now model explicit object ownership directly in Python instead of relying on dynamic per-thread attributes, so they type-check cleanly with standard library types alone. -1. **Local mypy configuration** (`mypy.ini`) - Examples-specific type checking settings -2. **Custom type stubs** (`stubs/threading.pyi`) - Enhanced typing for threading.local() - -## Usage - -### Type-check Examples from Project Root - -```bash -# Check all examples with strict typing -python -m mypy examples/ --strict - -# Output: Success: no issues found in 11 source files -``` - -### Type-check Examples from examples/ Directory - -```bash -cd examples -python -m mypy . - -# Uses examples/mypy.ini configuration automatically -``` - -## Configuration Files - -### examples/mypy.ini - -Examples-specific mypy configuration that: -- Enables **strict mode** (demonstrates best practices) -- Uses Python 3.13 features (type aliases, pattern matching) -- Points to local type stubs in `stubs/` directory - -**Philosophy**: Examples should demonstrate production-quality type safety. - -### examples/stubs/threading.pyi - -Custom type stub for `threading` module that provides: -- Enhanced typing for `threading.local()` with dynamic attributes -- Type annotations for `Thread`, `Lock`, `current_thread()` - -**Why?**: The standard library's `threading.local()` uses dynamic attributes (set at runtime), which confuses type checkers. Our stub file helps mypy understand the thread-local bundle pattern used in [thread_safety.py](thread_safety.py). - -## Thread-Local Typing Example - -**Problem**: Standard typing can't track dynamic attributes on threading.local() - -```python -thread_local = threading.local() -thread_local.bundle = FluentBundle(...) # mypy: error - no attribute 'bundle' -return thread_local.bundle # mypy: error - Returning Any -``` - -**Solution**: Our `stubs/threading.pyi` provides `__setattr__` and `__getattribute__` stubs - -```python -# With our stub file: -thread_local = threading.local() -bundle: FluentBundle = FluentBundle("en", use_isolating=False) -thread_local.bundle = bundle # mypy understands -return thread_local.bundle # type: ignore[no-any-return] # Still needed for return type -``` - -## Production Recommendation - -For production code, prefer **strongly-typed wrappers** instead of dynamic attributes: - -```python -from dataclasses import dataclass -import threading - -@dataclass -class ThreadLocalState: - """Strongly-typed thread-local state.""" - bundle: FluentBundle - -thread_local: threading.local = threading.local() - -def get_bundle() -> FluentBundle: - """Get bundle for current thread (type-safe).""" - if not hasattr(thread_local, "state"): - thread_local.state = ThreadLocalState( - bundle=FluentBundle("en", use_isolating=False) - ) - state: ThreadLocalState = thread_local.state - return state.bundle # No type: ignore needed! -``` - -## Why Not Contribute to typeshed? - -The `threading.local()` typing challenge is a known limitation in Python's type system: - -1. **Dynamic attributes by design** - threading.local() intentionally uses `__setattr__`/`__getattribute__` for thread-isolation -2. **Generic solutions are complex** - Would require Generic[TypedDict] or Protocol, which adds complexity -3. **Production pattern exists** - Strongly-typed wrappers (dataclass pattern above) are the recommended approach - -Our local stub is a **pragmatic solution for examples**, showing users how to work with threading.local() while maintaining type safety. - -## File Structure - -``` -examples/ -├── mypy.ini # Examples-specific mypy config -├── stubs/ # Local type stubs -│ └── threading.pyi # threading.local() enhancements -├── README_TYPE_CHECKING.md # This file -└── *.py # Example scripts (all pass mypy --strict) -``` - -## Verification - -All examples pass strict type checking: +Run the examples type check from the repository root: ```bash -$ python -m mypy examples/ --strict -Success: no issues found in 11 source files +uv run mypy --config-file examples/mypy.ini examples ``` -All examples execute successfully: +## Files -```bash -$ python examples/thread_safety.py -[OK] All thread safety examples complete! -``` +| Path | Role | +|:-----|:-----| +| `examples/mypy.ini` | Strict mypy configuration for example code | +| `examples/thread_safety.py` | Thread-safety examples that keep worker-owned state explicit | -## Related Documentation +## Why No Local Stub Is Needed -- [TYPE_HINTS_GUIDE.md](../docs/TYPE_HINTS_GUIDE.md) - Type hints guide for FTLLexEngine -- [thread_safety.py](thread_safety.py) - Thread safety patterns with type hints - ---- +The examples avoid patterns that depend on dynamic attributes or implicit shared mutation. That keeps the scripts easier to reason about, easier to audit, and naturally compatible with strict type checking. -**Last Updated**: 2026-02-28 -**Python Version**: 3.13+ -**Mypy Version**: Compatible with latest stable mypy +When per-worker customization is needed, build the owned object directly inside the worker or pass it explicitly. That approach matches the project’s architecture guidance and removes the need for local stub maintenance. diff --git a/examples/ftl_transform.py b/examples/ftl_transform.py index ed78563c..b75896c3 100644 --- a/examples/ftl_transform.py +++ b/examples/ftl_transform.py @@ -262,8 +262,8 @@ def visit_Message(self, node: Message) -> Message | None: # pylint: disable=inv userProfile = { $firstName } { $lastName } accountBalance = Balance: { NUMBER($currentBalance) } -# TODO: Refactor variable names -# TODO: Remove obsolete messages +# Legacy review notes kept as comments in the source file +# Variable names and obsolete-message cleanup are shown below """ print("BEFORE:") diff --git a/examples/mypy.ini b/examples/mypy.ini index 4293d4bd..cdbbe34b 100644 --- a/examples/mypy.ini +++ b/examples/mypy.ini @@ -1,11 +1,9 @@ # Mypy configuration for examples/ # -# This configuration provides enhanced type checking for example code while -# maintaining practical type safety standards. +# This configuration provides strict type checking for example code. # # Philosophy: # - Examples should demonstrate best practices -# - Type stubs help with third-party libraries (threading.local) # - Strict mode enabled to catch real bugs # # Usage: @@ -19,9 +17,6 @@ strict = True # Python version python_version = 3.13 -# Local type stubs for threading.local() enhancement -mypy_path = stubs - # Strict type checking settings (applied globally) disallow_untyped_defs = True warn_return_any = True diff --git a/examples/property_based_testing.py b/examples/property_based_testing.py index 377112d2..be88ca59 100644 --- a/examples/property_based_testing.py +++ b/examples/property_based_testing.py @@ -29,6 +29,9 @@ from __future__ import annotations +import contextlib +import io + from hypothesis import assume, given, settings from hypothesis import strategies as st from hypothesis.stateful import RuleBasedStateMachine, initialize, rule @@ -79,8 +82,10 @@ def test_format_never_raises(message_id: str, args: dict[str, str]) -> None: # Always returns a tuple of errors assert isinstance(errors, tuple) - # Run the property test - test_format_never_raises() + # Invalid random message IDs can legitimately trigger parser diagnostics on stderr. + # Suppress that noise so the example stays focused on the property outcome. + with contextlib.redirect_stderr(io.StringIO()): + test_format_never_raises() print("Property verified: format_pattern() never raises in soft mode (strict=False)\n") diff --git a/examples/quickstart.py b/examples/quickstart.py index e2b38c62..d1941e78 100644 --- a/examples/quickstart.py +++ b/examples/quickstart.py @@ -10,6 +10,8 @@ always check errors and log/report translation issues. """ +import contextlib +import io import tempfile from decimal import Decimal from pathlib import Path @@ -285,7 +287,8 @@ # Missing variable in strict mode raises exception try: - strict_bundle.format_pattern("amount", {}) # Missing $value + with contextlib.redirect_stderr(io.StringIO()): + strict_bundle.format_pattern("amount", {}) # Missing $value except FormattingIntegrityError as e: print(f"[FAIL-FAST] {e.message_id}: {len(e.fluent_errors)} error(s)") print(f" Fallback would have been: {e.fallback_value!r}") diff --git a/examples/stubs/threading.pyi b/examples/stubs/threading.pyi deleted file mode 100644 index 33265572..00000000 --- a/examples/stubs/threading.pyi +++ /dev/null @@ -1,84 +0,0 @@ -"""Type stubs for threading module to improve type checking in examples. - -This stub file provides enhanced typing for threading.local() and other -threading primitives used in examples/thread_safety.py. - -NOTE: This is a local stub for example code only. It augments the standard -typeshed threading stubs with better support for dynamic attributes on -threading.local(). - -For production code, consider using TypedDict or dataclass wrappers around -threading.local() instead of relying on dynamic attributes. -""" - -# ruff: noqa: PIE790 # ... is required in stub files, not unnecessary -# pylint: disable=unnecessary-ellipsis # ... is required in stub files per PEP 484 - -from collections.abc import Callable -from typing import Any - -class local: # noqa: N801 # pylint: disable=invalid-name - """Enhanced type stub for threading.local() with dynamic attribute support. - - Type stub: Matches stdlib threading.local class naming convention. - This stub allows mypy to understand that threading.local() instances can - have arbitrary attributes set at runtime. - - Usage in examples/thread_safety.py: - thread_local = threading.local() - thread_local.bundle = FluentBundle(...) # mypy understands this - return thread_local.bundle # type: ignore still needed for generic return - """ - - def __init__(self) -> None: ... - def __getattribute__(self, name: str) -> Any: ... - def __setattr__(self, name: str, value: Any) -> None: ... - def __delattr__(self, name: str) -> None: ... - -class Thread: # pylint: disable=too-many-positional-arguments - """Thread class stub.""" - - def __init__( # pylint: disable=unused-argument - self, - group: None = None, - target: Callable[..., Any] | None = None, - name: str | None = None, - args: tuple[Any, ...] = (), - kwargs: dict[str, Any] | None = None, - *, - daemon: bool | None = None, - ) -> None: - """Initialize thread.""" - ... - def start(self) -> None: - """Start thread execution.""" - ... - def join(self, timeout: float | None = None) -> None: # pylint: disable=unused-argument - """Wait for thread completion.""" - ... - @property - def ident(self) -> int | None: - """Thread identifier.""" - ... - -class Lock: - """Lock class stub.""" - - def __init__(self) -> None: - """Initialize lock.""" - ... - def acquire(self, blocking: bool = True, timeout: float = -1) -> bool: # pylint: disable=unused-argument - """Acquire lock.""" - ... - def release(self) -> None: - """Release lock.""" - ... - def __enter__(self) -> bool: - """Enter context manager.""" - ... - def __exit__(self, *args: object) -> None: # pylint: disable=unused-argument - """Exit context manager.""" - ... - -def current_thread() -> Thread: - """Return current thread.""" diff --git a/examples/thread_safety.py b/examples/thread_safety.py index 8325cdaf..25c9b611 100644 --- a/examples/thread_safety.py +++ b/examples/thread_safety.py @@ -14,7 +14,7 @@ Demonstrates: 1. Single-threaded initialization pattern (recommended for static resources) 2. Concurrent read operations (always safe) -3. Thread-local bundles (alternative for per-thread customization) +3. Per-worker bundles (alternative for per-thread customization) 4. Dynamic resource loading (always safe without manual locks) WARNING: Examples use use_isolating=False for cleaner terminal output. @@ -112,55 +112,47 @@ def process_file(filename: str, bundle_ref: FluentBundle) -> str: print("\n[SUCCESS] All files processed") -# Example 3: Thread-local bundles (alternative for per-thread customization) -def example_3_thread_local_bundles() -> None: - """Example 3: Each thread gets its own bundle (for per-thread customization).""" +# Example 3: Per-worker bundles (alternative for per-thread customization) +def example_3_per_worker_bundles() -> None: + """Example 3: Each worker builds its own bundle when customization is isolated.""" print("\n" + "=" * 60) - print("Example 3: Thread-local Bundles") + print("Example 3: Per-worker Bundles") print("=" * 60) - print("[NOTE] Useful when each thread needs different resources or functions\n") - - # Thread-local storage for bundles - thread_local = threading.local() - - def get_or_create_bundle() -> FluentBundle: - """Get bundle for current thread (creates if needed).""" - if not hasattr(thread_local, "bundle"): - # Each thread creates its own bundle - bundle: FluentBundle = FluentBundle("en", use_isolating=False) - thread_local.bundle = bundle - thread_local.bundle.add_resource(""" + print("[NOTE] Useful when each worker needs different resources or functions\n") + + def build_worker_bundle() -> FluentBundle: + """Create a bundle owned by the current worker.""" + bundle = FluentBundle("en", use_isolating=False) + bundle.add_resource(""" worker-msg = Worker thread { $tid } initialized task = Processing task { $task_id } - """) - print(f" [Thread-{threading.current_thread().ident}] Created bundle") - - # Type ignore: threading.local() has dynamic attributes - return thread_local.bundle # type: ignore[no-any-return] + """) + return bundle - def worker_with_local_bundle(task_id: int) -> None: - """Worker that uses thread-local bundle.""" - bundle = get_or_create_bundle() + def worker_with_own_bundle(task_id: int) -> None: + """Worker that uses its own dedicated bundle.""" + bundle = build_worker_bundle() tid = threading.current_thread().ident + print(f" [Thread-{tid}] Created bundle") result, _ = bundle.format_pattern("worker-msg", {"tid": tid}) print(f" {result}") task_result, _ = bundle.format_pattern("task", {"task_id": task_id}) print(f" {task_result}") - print("[EXECUTION] Creating thread-local bundles:") + print("[EXECUTION] Creating per-worker bundles:") threads = [] for i in range(3): - t = threading.Thread(target=worker_with_local_bundle, args=(i,)) + t = threading.Thread(target=worker_with_own_bundle, args=(i,)) threads.append(t) t.start() for t in threads: t.join() - print("\n[SUCCESS] Thread-local bundles pattern complete") + print("\n[SUCCESS] Per-worker bundles pattern complete") # Example 4: Dynamic resource loading (always safe) @@ -213,7 +205,7 @@ def add_and_read(worker_id: int) -> None: if __name__ == "__main__": example_1_recommended_pattern() example_2_threadpool_pattern() - example_3_thread_local_bundles() + example_3_per_worker_bundles() example_4_dynamic_loading() print("\n" + "=" * 60) @@ -222,5 +214,5 @@ def add_and_read(worker_id: int) -> None: print("\nRECOMMENDATIONS:") print(" - Static resources: Use Example 1 (single-threaded init)") print(" - Dynamic resources: Use Example 4 (always safe)") - print(" - Per-thread customization: Use Example 3 (thread-local)") + print(" - Per-worker customization: Use Example 3 (per-worker bundles)") print("\n[NOTE] All FluentBundle instances are thread-safe.") diff --git a/fuzz_atheris/README.md b/fuzz_atheris/README.md index 8fee84ed..b4b8fe5e 100644 --- a/fuzz_atheris/README.md +++ b/fuzz_atheris/README.md @@ -1,1087 +1,48 @@ --- -afad: "3.3" +afad: "3.5" version: "0.163.0" domain: FUZZING -updated: "2026-03-23" +updated: "2026-04-22" route: - keywords: [fuzzing, coverage, atheris, libfuzzer, fuzz, seeds, corpus] - questions: ["what do the fuzzers cover?", "what modules are fuzzed?", "what is not fuzzed?"] ---- - -# Fuzzer Coverage Inventory - -**Purpose**: Stock-taking of what the Atheris/libFuzzer fuzzing infrastructure covers, enabling gap analysis and planning. - -## Fuzzer Summary - -| Fuzzer | Target Module(s) | Patterns | Seeds | Concern | -|:-------|:-----------------|:---------|:------|:--------| -| `fuzz_bridge.py` | `runtime.function_bridge`, `core.value_types` | 16 | 33 (.bin) | FunctionRegistry machinery, FluentNumber contracts, `make_fluent_number()` | -| `fuzz_graph.py` | `analysis.graph` | 12 | 24 (.bin) | Dependency graph cycle detection, canonicalization | -| `fuzz_builtins.py` | `runtime.functions` | 13 | 24 (.bin) | Babel formatting boundary (NUMBER, DATETIME, CURRENCY); ROUND_HALF_EVEN oracle | -| `fuzz_cache.py` | `runtime.bundle`, `runtime.cache`, `integrity` | 14 | 38 (.ftl) + 15 (.bin) | Cache concurrency, integrity, and public audit-trail access | -| `fuzz_currency.py` | `runtime.functions` | 11 | 65 (.txt) + 26 (.bin) | ROUND_HALF_EVEN oracle, `use_grouping`, `currency_digits`, `numbering_system` parameters, custom `pattern=` precision alignment, locale matrix (CURRENCY) | -| `fuzz_parse_currency.py` | `parsing.currency`, `parsing.guards` | 9 | 5 (.txt) + 20 (.bin) | Locale-aware currency parsing, symbol disambiguation, cache stability | -| `fuzz_iso.py` | `introspection.iso`, `ftllexengine` | 12 | 36 (.bin) | ISO 3166/4217 introspection; `get_currency_decimal_digits` oracle; `clear_module_caches(components=...)`; `TerritoryInfo.official_languages` invariants | -| `fuzz_lock.py` | `runtime`, `runtime.rwlock` | 16 | 39 (.bin) | RWLock concurrency primitives and public runtime export | -| `fuzz_numbers.py` | `runtime.functions` | 9 | 70 (.txt) + 19 (.bin) | ROUND_HALF_EVEN oracle, `numbering_system` parameter, custom `pattern=` path, boundary values, min>max clamping (NUMBER) | -| `fuzz_parse_decimal.py` | `parsing.numbers`, `parsing.guards`, `core.locale_utils` | 9 | 9 (.txt) + 1 (.bin) | Locale-aware decimal parsing, FluentNumber parsing, locale normalization/cache behavior, boundary locale validation, pseudo-locale fallback | -| `fuzz_plural.py` | `runtime.plural_rules` | 11 | 37 (.bin) | CLDR plural category selection; ordinal plural rules (`ordinal=True`) | -| `fuzz_oom.py` | `syntax.parser` | 16 | 42 (.ftl) + 8 (.bin) | Parser object explosion (DoS) | -| `fuzz_roundtrip.py` | `syntax.parser`, `syntax.serializer` | 13 | 31 (.bin) + 4 (.ftl) | Parser-serializer convergence | -| `fuzz_runtime.py` | `runtime.bundle`, `runtime.cache`, `integrity`, `diagnostics.errors` | 6+8 | 100 (.bin) | Full runtime stack, strict mode, FluentBundle AST lookup facade, canonical locale boundary | -| `fuzz_serializer.py` | `syntax.serializer`, `syntax.parser`, `syntax.visitor` | 13 | 26 (.bin) | AST-construction serializer roundtrip, visitor/transformer validation | -| `fuzz_scope.py` | `runtime.resolver`, `runtime.bundle` | 13 | 29 (.bin) | Variable scoping, term isolation, depth guards, expansion budget | -| `fuzz_structured.py` | `syntax.parser`, `syntax.serializer` | 10 | 16 (.ftl) + 6 (.bin) | Grammar-aware AST construction | -| `fuzz_cursor.py` | `syntax.cursor`, `syntax.position` | 8 | 5 (.txt) + 35 (.bin) | Cursor state machine, ParseError formatting, position helper parity | -| `fuzz_localization.py` | `localization.orchestrator`, `localization.loading` | 24 | 13 (.bin) | FluentLocalization orchestration, canonical locale boundary, boot validation, `required_messages`, `boot()`/`boot_simple()` 3-tuple API, single-message schema validation, AST lookup, cache audit trails, loader init, LoadSummary, fallback chains | -| `fuzz_dates.py` | `parsing.dates` | 14 | 59 (.bin) | CLDR→strptime token mapping, parse_date/parse_datetime locale-aware parsing; 4-digit year oracle (lv-LV/de-DE) | -| `fuzz_locale_context.py` | `runtime.locale_context`, `core.locale_utils` | 18 | 29 (.bin) | LocaleContext direct formatting, canonical locale_code contract, ROUND_HALF_EVEN oracle, `numbering_system`/`use_grouping`/`currency_digits` parameters, cross-locale determinism | -| `fuzz_introspection.py` | `introspection.message` | 13 | 25 (.bin) | IntrospectionVisitor, ReferenceExtractor, programmatic AST construction; `validate_message_variables` schema oracle | -| `fuzz_diagnostics_formatter.py` | `diagnostics.formatter`, `diagnostics.validation` | 12 | 23 (.bin) | Control-char escaping, RUST/SIMPLE/JSON output, sanitize/redact modes | - -## Module Coverage Matrix - -| Source Module | Fuzzers Covering It | -|:--------------|:--------------------| -| `analysis.graph` | graph | -| `core.locale_utils` | parse_decimal, runtime, localization, locale_context | -| `diagnostics.errors` | runtime, oom, numbers, currency, cache, integrity, builtins | -| `diagnostics.formatter` | diagnostics_formatter | -| `diagnostics.validation` | diagnostics_formatter, integrity | -| `integrity` | runtime, cache, integrity | -| `introspection.iso` | iso | -| `introspection.message` | introspection | -| `localization.loading` | localization | -| `localization.orchestrator` | localization | -| `parsing.currency` | parse_currency | -| `parsing.dates` | dates | -| `parsing.guards` | parse_currency, parse_decimal | -| `parsing.numbers` | parse_decimal | -| `runtime` | lock | -| `runtime.function_bridge` | bridge | -| `runtime.functions` | builtins, runtime, currency, numbers | -| `runtime.bundle` | runtime, cache, integrity, scope, localization | -| `runtime.locale_context` | locale_context, builtins | -| `runtime.resolver` | scope | -| `runtime.cache` | runtime, cache | -| `runtime.plural_rules` | plural | -| `runtime.rwlock` | lock | -| `core.value_types` | bridge | -| `syntax.cursor` | cursor | -| `syntax.parser` | oom, roundtrip, serializer, structured | -| `syntax.position` | cursor | -| `syntax.serializer` | roundtrip, serializer, structured | -| `syntax.visitor` | serializer | - -## `fuzz_bridge` - -Target: `runtime.function_bridge`, `core.value_types` -- FunctionRegistry lifecycle, `_to_camel_case`, parameter mapping, FluentNumber contracts, `make_fluent_number()`, `fluent_function` decorator, freeze/copy isolation, dict-like interface, metadata API, signature validation error paths. - -Concern boundary: This fuzzer stress-tests the bridge machinery that connects FTL function calls to Python implementations. Distinct from fuzz_builtins which tests built-in functions (NUMBER, DATETIME, CURRENCY) through the bridge; this fuzzer tests the bridge itself: registration, dispatch, parameter conversion, lifecycle, direct FluentNumber construction, and introspection. Tests registration error paths (inject_locale arity validation, underscore collision detection, auto-naming), metadata API (get_expected_positional_args, get_builtin_metadata, has_function), `make_fluent_number()` visible-precision inference, and adversarial Python objects through FluentBundle resolution. - -Shared infrastructure imported from `fuzz_common` (`BaseFuzzerState`, metrics, reporting); domain-specific metrics tracked in `BridgeMetrics` dataclass (register calls/failures, call dispatch tests/errors, FluentNumber checks, `make_fluent_number()` checks, camel case tests, freeze/copy tests, locale injection tests, signature validation tests, metadata API tests, evil object tests). Pattern selection uses deterministic round-robin through a pre-built weighted schedule (`select_pattern_round_robin`), immune to coverage-guided mutation bias. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=4096` default. - -### Patterns - -16 patterns across 4 categories: - -**REGISTRATION (4)** - Function registration and validation: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `register_basic` | 10 | len(registry) matches registration count | -| `register_signatures` | 12 | Positional-only, *args, **kwargs, many params, lambda, overwrite | -| `param_mapping_custom` | 8 | Custom param_map overrides auto-generated mapping | -| `signature_validation` | 6 | inject_locale arity TypeError, underscore collision ValueError, auto-naming | - -**CONTRACTS (4)** - Object immutability and type contracts: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `fluent_number_contracts` | 12 | str, __contains__, __len__, repr, frozen, precision=None | -| `make_fluent_number_api` | 10 | default Decimal precision, grouped/localized formatting inference, bool rejection | -| `signature_immutability` | 5 | FunctionSignature frozen, param_mapping tuple, ftl_name, fuzzed lookup | -| `camel_case_conversion` | 10 | Known snake->camelCase pairs, fuzzed input returns str | - -**DISPATCH (4)** - Call dispatch and error handling: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `call_dispatch` | 12 | call() returns result or raises for unknown function | -| `locale_injection` | 10 | should_inject_locale flag, FluentBundle locale protocol | -| `error_wrapping` | 7 | TypeError/ValueError wrapped as FrozenFluentError | -| `evil_objects` | 5 | Evil __str__, __hash__, recursive list/dict, huge str, None through FluentBundle | - -**INTROSPECTION (4)** - Registry introspection and lifecycle: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `dict_interface` | 8 | __contains__, __iter__, list_functions, get_python_name, get_callable, __repr__ | -| `freeze_copy_lifecycle` | 8 | Freeze prevents registration, copy is independent+unfrozen, idempotent | -| `fluent_function_decorator` | 8 | Bare, parenthesized, inject_locale=True attribute, registry integration | -| `metadata_api` | 6 | get_expected_positional_args, get_builtin_metadata, has_function vs __contains__ | - -### Allowed Exceptions - -`ValueError`, `TypeError`, `OverflowError`, `ArithmeticError`, `FrozenFluentError`, `RecursionError`, `RuntimeError` -- invalid inputs, frozen registry mutations, and adversarial object interactions. - ---- - -## `fuzz_builtins` - -Target: `runtime.functions` (NUMBER, DATETIME, CURRENCY) -- direct Babel formatting API boundary testing. - -Concern boundary: This fuzzer stress-tests the Babel formatting boundary by calling NUMBER, DATETIME, and CURRENCY functions directly through the Python API. This is distinct from fuzz_runtime which invokes these functions through FTL syntax and the resolver stack. Direct API testing isolates the Babel layer from resolver/cache behavior and enables: fuzz-generated Babel pattern strings (pattern= parameter), FluentNumber precision (CLDR v operand) correctness verification, currency-specific decimal digit enforcement (JPY=0, BHD=3), ROUND_HALF_EVEN rounding oracle verification (NUMBER and CURRENCY), type coercion across int/float/Decimal/FluentNumber inputs, cross-locale formatting consistency, and edge value handling (NaN, Inf, -0.0, extreme magnitudes). FunctionRegistry lifecycle, parameter mapping, and locale injection protocol are covered by fuzz_bridge.py. - -Shared infrastructure imported from `fuzz_common` (`BaseFuzzerState`, metrics, reporting); domain-specific metrics tracked in `BuiltinsMetrics` dataclass (per-function call counts, precision checks/violations, cross-locale tests, type coercion tests, custom pattern tests, edge value tracking, rounding oracle checks/violations, min_gt_max coverage). Pattern selection uses deterministic round-robin through a pre-built weighted schedule (`select_pattern_round_robin`), immune to coverage-guided mutation bias. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=4096` default. - -Run this fuzzer in isolation: `./scripts/fuzz_atheris.sh builtins` (uses `.venv-atheris`, independent of the project venv). Linting of this directory is covered by `./scripts/lint.sh` (auto-discovers all directories with `.py` files). - -### Patterns - -13 patterns across 4 categories: - -**NUMBER (4)** - Decimal/FluentNumber formatting: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `number_basic` | 12 | Result is FluentNumber, fraction/grouping variation; independent min/max draws | -| `number_precision` | 15 | CLDR v operand non-negative; ROUND_HALF_EVEN oracle (all ASCII-digit locales); independent min/max draws (covers min > max clamp path) | -| `number_edges` | 8 | NaN, Inf, -0.0, huge, tiny stability | -| `number_type_variety` | 8 | int/float/Decimal/FluentNumber all produce FluentNumber | - -**DATETIME (3)** - Date/time formatting: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `datetime_styles` | 10 | Non-empty string result, all style combos | -| `datetime_edges` | 8 | Epoch, Y2K, max timestamp, timezone offsets | -| `datetime_timezone_stress` | 6 | Fixed-offset timezones (-12h to +14h), UTC, naive | - -**CURRENCY (3)** - Currency formatting: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `currency_codes` | 12 | FluentNumber result, valid/fuzzed ISO codes | -| `currency_precision` | 10 | Currency-specific decimals (JPY=0, BHD=3); ROUND_HALF_EVEN oracle (all ASCII-digit locales) | -| `currency_cross_locale` | 8 | Same currency formatted across locales | - -**CROSS-CUTTING (3)** - Multi-function and consistency: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `custom_pattern` | 8 | Custom Babel patterns for all 3 functions | -| `cross_locale_consistency` | 8 | Same value, 3+ locales, deterministic results; independent min/max draws | -| `error_paths` | 5 | Negative/huge fraction digits, empty/invalid currency | - -### Allowed Exceptions - -`ValueError`, `TypeError`, `OverflowError`, `InvalidOperation`, `OSError`, `ArithmeticError` -- invalid inputs and Babel formatting limitations. - -### Rounding Oracle Design - -`number_precision` and `currency_precision` include a ROUND_HALF_EVEN oracle that verifies Babel's default rounding mode. Babel uses `decimal_quantization=True` which applies ROUND_HALF_EVEN (IEEE 754 banker's rounding) — 2.5 rounds to 2, 3.5 rounds to 4. The oracle covers all locales where digit extraction is possible. - -**Digit extraction** (`_extract_oracle_digits`): Uses `babel.numbers.get_decimal_symbol(locale)` and `get_group_symbol(locale)` to normalize the formatted string for any locale. Normalization removes group separators first (critical for de-DE where group separator is `.`), replaces the decimal separator with ASCII `.`, then strips all remaining non-digit characters (currency codes, whitespace, signs). Locales with non-ASCII digits (ar-EG Arabic-Indic, hi-IN Devanagari) are detected via `c.isdigit() and not c.isascii()` and skipped. Unknown locales (Babel raises `UnknownLocaleError`) are skipped via `except ValueError`. - -**Oracle check**: For each non-NaN/non-Inf Decimal result where `_extract_oracle_digits` returns a value: `expected = abs(val).quantize(10^-precision, rounding=ROUND_HALF_EVEN)` is compared against the extracted digits. NaN and Infinity skip the oracle via `InvalidOperation` from `quantize()`. - -**Input domain**: `min_frac` and `max_frac` are drawn independently (not `max_frac = ConsumeIntInRange(min_frac, N)`). This ensures the `min > max` clamping path in `format_number()` is exercised — a path that previously could trigger incorrect digit counts. - ---- - -## `fuzz_cache` - -Target: `runtime.cache` (via `FluentBundle` public API) -- cache parameter combinations, multi-threaded access (2-8 threads), LRU eviction stress, concurrent resource modification, write-once cache behavior, cache key complexity, and public audit-log visibility. - -Concern boundary: This fuzzer stress-tests the cache subsystem by systematically varying ALL cache constructor parameters (size, entry weight, error limits, write-once, audit mode) under concurrent multi-threaded access. This is distinct from the runtime fuzzer which tests the full resolver stack with fixed cache configs and only 2 threads. Unique coverage includes: cache parameter combinations (5 params = large state space), high thread concurrency (2-8 threads vs runtime's 2), cache eviction/LRU stress, concurrent resource modification during formatting, write-once cache behavior, cache key complexity via `_make_hashable`, and `FluentBundle.get_cache_audit_log()` consistency against `get_cache_stats()`. - -Shared infrastructure imported from `fuzz_common` (`BaseFuzzerState`, metrics, reporting); domain-specific metrics tracked in `CacheMetrics` dataclass (cache operations, write conflicts, oversize skips, error bloat, corruption events, audit-log checks, thread timeouts). Cache stats and audit logs are collected per-iteration via `bundle.get_cache_stats()` and `bundle.get_cache_audit_log()`, with invariant checks on tuple shape, `WriteLogEntry` typing, stats/log count agreement, non-decreasing audit timestamps, and operation-specific sequence/checksum structure. Pattern selection uses deterministic round-robin through a pre-built weighted schedule (`select_pattern_round_robin`), immune to coverage-guided mutation bias. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=4096` default. - -### Patterns - -14 patterns across 3 categories: - -**CACHE_KEYS (7)** - Cache key variation and complexity: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `variable_messages` | 10 | Cache key varies with args | -| `attribute_messages` | 8 | Attribute-qualified cache keys | -| `select_expressions` | 8 | Complex pattern caching | -| `message_references` | 6 | Cross-message resolution cache | -| `term_references` | 6 | Namespace variation in keys | -| `many_variables` | 6 | Key complexity scaling (5-10 placeables) | -| `deep_args` | 8 | Nested dicts/lists, unhashable types stress `_make_hashable` | - -**STRESS_PATTERNS (4)** - Capacity and resource stress: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `long_values` | 5 | Memory weight enforcement | -| `circular_refs` | 5 | Error caching on cycles | -| `minimal_resource` | 4 | Empty/trivial resource edge cases | -| `hotspot` | 8 | Repeated access cache hit efficiency | - -**CONCURRENCY (3)** - Multi-threaded scenarios: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `capacity_stress` | 10 | LRU eviction under capacity pressure | -| `concurrent_modify` | 8 | Race conditions: resource modification during formatting | -| `frozen_cache` | 8 | Write-once cache behavior (immutable entries) | - -### Allowed Exceptions - -`CacheCorruptionError`, `WriteConflictError`, `DataIntegrityError`, `FrozenFluentError`, `ValueError`, `TypeError`, `KeyError`, `RecursionError`, `MemoryError` -- cache integrity violations are expected findings; other exceptions from invalid inputs and depth guards. - ---- - -## `fuzz_graph` - -Target: `analysis.graph` -- `_canonicalize_cycle`, `make_cycle_key`, `detect_cycles`, `entry_dependency_set`. Validates cycle detection correctness, canonicalization invariants, and namespace-prefixed dependency set construction. - -### Patterns - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `canonicalize_idempotence` | 12 | Double canonicalization is identity, closing element preserved | -| `canonicalize_direction` | 10 | A->B->C and A->C->B produce distinct canonical forms | -| `make_cycle_key_consistency` | 8 | Key matches joined canonical form, rotation-invariant (unique nodes) | -| `canonicalize_edge_cases` | 6 | Empty, single, two-element sequences handled correctly | -| `detect_self_loops` | 10 | Self-referencing node detected as cycle of length 2 | -| `detect_simple_cycles` | 12 | Known N-node ring detected, all nodes present | -| `detect_dag_no_cycles` | 10 | Acyclic graphs return empty cycle list | -| `detect_disconnected` | 8 | Independent components each detect their own cycles | -| `detect_dense_mesh` | 8 | Complete graph cycle detection stability | -| `detect_deep_chain` | 8 | Long chain (up to 200 nodes) with back-edge cycle detection | -| `entry_dependency_set` | 10 | Namespace prefixing, frozenset return, count preservation | -| `adversarial_graph` | 5 | Unicode node IDs, empty strings, whitespace-only identifiers | - -### Allowed Exceptions - -`ValueError`, `TypeError`, `RecursionError` -- invalid inputs and graph construction edge cases. - ---- - -## `fuzz_currency` - -Target: `runtime.functions.currency_format` -- ROUND_HALF_EVEN oracle testing, custom `pattern=` precision alignment, locale matrix, display mode preservation, `use_grouping`/`currency_digits`/`numbering_system` parameter coverage, and `FluentNumber` wrapper contracts. - -Concern boundary: This fuzzer stress-tests the runtime CURRENCY function formatting path. Distinct from `fuzz_builtins` (which covers NUMBER/DATETIME/CURRENCY via the FTL `FluentBundle` evaluation pipeline); this fuzzer calls `currency_format` directly to probe oracle correctness, custom pattern precision alignment, and boundary-value rounding at precision 0, 2, and 3. Found production bug FIX-CURRENCY-PATTERN-PREC-001 on its first run (~1009 iterations): `format_currency` with a custom pattern and a currency whose CLDR precision differs from the pattern's declared decimal count produced incorrect rounding because `currency_digits=True` caused Babel to override the pattern's decimal count after pre-quantization. - -Shared infrastructure imported from `fuzz_common` (`BaseFuzzerState`, metrics, reporting); domain-specific metrics tracked in `CurrencyMetrics` dataclass. Pattern selection uses deterministic round-robin through a pre-built weighted schedule (`select_pattern_round_robin`), immune to coverage-guided mutation bias. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=4096` default. - -### Patterns - -11 patterns across 5 categories: - -**PRECISION (3)** - Per-currency decimal precision and custom pattern precision: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `0decimal_oracle` | 12 | JPY, KRW (0 decimals): ROUND_HALF_EVEN at integer boundary | -| `3decimal_oracle` | 13 | BHD, KWD, OMR (3 decimals): x.0005 midpoints | -| `pattern_oracle` | 16 | Custom `pattern=` with CLDR-differing currency: precision must match pattern, not CLDR | - -**ORACLE (3)** - ROUND_HALF_EVEN correctness across value ranges: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `boundary_values` | 15 | 2-decimal currencies (USD, EUR, GBP, AUD, CAD, CHF): x.005 midpoints | -| `large_oracle` | 11 | Large positive amounts (>1e6): non-empty, ROUND_HALF_EVEN preserved | -| `negative_oracle` | 11 | Negative amounts: ROUND_HALF_EVEN preserved; abs() applied before oracle comparison | - -**LOCALE (1)** - Cross-locale consistency: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `locale_matrix` | 11 | Same value across 10 locales: all non-empty, no exception | - -**DISPLAY (1)** - Currency display mode contracts: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `display_preservation` | 11 | symbol/code/name: result contains currency identifier; not empty | - -**PARAMETERS (3)** - New formatting parameter coverage: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `use_grouping` | 8 | `use_grouping=True/False` on large amounts: non-empty FluentNumber returned | -| `digits_override` | 8 | `currency_digits=True/False`: ISO 4217 precision applied or bypassed; non-empty | -| `numbering_system` | 7 | Non-Latin digit systems (arab, arabext, deva, beng): non-empty; determinism | - -### Allowed Exceptions - -`ValueError`, `TypeError` -- invalid currency codes and type validation. - ---- - -## `fuzz_parse_currency` - -Target: `parsing.currency.parse_currency`, `parsing.currency.resolve_ambiguous_symbol`, `parsing.currency._get_currency_pattern` -- locale-aware currency parsing, ambiguous symbol disambiguation, longest-match symbol regex behavior, and cache stability. - -Concern boundary: This fuzzer owns the text-to-`(Decimal, ISO code)` parse surface that the runtime-formatting fuzzers do not reach. It covers ISO-code parsing, symbol-only parsing, `default_currency=` and `infer_from_locale=` disambiguation, public soft-error contracts, direct ambiguous-symbol helper behavior, and cache-clearing stability. It also targets the longest-match regex path for multi-character symbols such as `R$` and `S/`, which is distinct from runtime formatting. - -Shared infrastructure imported from `fuzz_common`; domain-specific metrics tracked in `ParseCurrencyMetrics` dataclass. Pattern selection uses deterministic round-robin. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=4096` default. - -### Patterns - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `iso_code_values` | 14 | `USD 1,234.56`-style inputs parse to exact `(Decimal, code)` pairs | -| `default_currency_ambiguous` | 12 | Ambiguous symbols resolve to explicit `default_currency` | -| `infer_from_locale` | 12 | Ambiguous symbols resolve from locale inference when enabled | -| `ambiguous_symbol_resolution` | 10 | `resolve_ambiguous_symbol()` matches locale defaults for `$`, `£`, `¥` | -| `longest_symbol_match` | 10 | Multi-character symbols beat shorter prefixes in regex matching | -| `invalid_currency_inputs` | 12 | Invalid codes/ambiguous inputs return soft errors, not silent success | -| `cache_clear_cycle` | 10 | `clear_currency_caches()` does not change parse semantics | -| `type_guard_contract` | 10 | `is_valid_currency()` accepts valid tuples and rejects malformed values | -| `raw_unicode_stability` | 12 | Arbitrary Unicode inputs preserve the public result-or-errors contract | - -### Allowed Exceptions - -`ValueError`, `TypeError`, `OSError`, `UnicodeEncodeError`, `FrozenFluentError` -- invalid locale/input handling and soft-error plumbing. - ---- - -## `fuzz_fiscal` - -Target: `parsing.fiscal` -- `FiscalCalendar`, `FiscalDelta`, `FiscalPeriod`, `MonthEndPolicy`, and 5 convenience functions (`fiscal_quarter`, `fiscal_year`, `fiscal_month`, `fiscal_year_start`, `fiscal_year_end`). Tests date arithmetic correctness, boundary conditions, month-end policy handling, algebraic properties, type validation error paths, and immutability contracts. - -Concern boundary: Sole owner of the `parsing.fiscal` module. No other fuzzer imports or exercises any fiscal API. Tests FiscalCalendar cross-consistency (fiscal_year/quarter/month/period agreement, quarter contiguity, year span 365/366), FiscalDelta algebraic properties (commutativity, double negation, __sub__ == __add__ + __neg__, __mul__/__rmul__ symmetry, total_months), cross-policy ValueError enforcement, MonthEndPolicy CLAMP/STRICT invariants, FiscalPeriod frozen dataclass contracts (hash, eq, ordering, repr, validation), and convenience function oracle testing against FiscalCalendar methods. - -Shared infrastructure imported from `fuzz_common` (`BaseFuzzerState`, metrics, reporting); domain-specific metrics tracked in `FiscalMetrics` dataclass (per-pattern check counts). Pattern selection uses deterministic round-robin through a pre-built weighted schedule (`select_pattern_round_robin`), immune to coverage-guided mutation bias. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=4096` default. - -### Patterns - -10 patterns across 4 categories: - -**CALENDAR (3)** - FiscalCalendar invariants and identity: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `calendar_invariants` | 15 | Quarter 1-4, month 1-12, date in fiscal year, period agreement | -| `quarter_boundaries` | 10 | Quarter start/end contiguous, span 365/366 days | -| `calendar_identity` | 5 | Hash, equality, repr, frozen, type validation, range validation | - -**ARITHMETIC (4)** - FiscalDelta operations and policies: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `delta_add_subtract` | 12 | add_to returns date, subtract_from == negate().add_to(), CLAMP month-end, STRICT ValueError | -| `delta_algebra` | 12 | Commutativity, double negation, __neg__ == negate(), total_months, __mul__/__rmul__, __sub__ | -| `policy_cross` | 8 | with_policy preserves components, cross-policy add/sub ValueError, all policies valid | -| `delta_validation` | 5 | Non-int fields TypeError, non-MonthEndPolicy TypeError, valid construction | - -**CONTRACTS (2)** - Immutability and oracle testing: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `period_contracts` | 8 | Hash, equality, ordering (__lt__/__gt__/__le__/__ge__), frozen, repr, validation | -| `convenience_oracle` | 8 | All 5 convenience functions match FiscalCalendar methods | - -**STRESS (1)** - Boundary conditions: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `boundary_stress` | 5 | Extreme dates (year 1-9999), large deltas, result type assertion | - -### Allowed Exceptions - -`ValueError`, `OverflowError`, `TypeError` -- invalid dates, arithmetic overflow, and type validation. - ---- - -## `fuzz_integrity` - -Target: `validation.validate_resource` (standalone 6-pass validation), `syntax.validator.SemanticValidator` (E0001-E0013), `integrity` (DataIntegrityError hierarchy), `FluentBundle` strict mode (SyntaxIntegrityError, FormattingIntegrityError), `diagnostics.errors.FrozenFluentError` (integrity, immutability, sealed type, hash stability). - -Concern boundary: Validation gauntlet -- semantic integrity checks, cross-resource validation with `known_messages`/`known_terms`/`known_msg_deps`, chain depth limits (>MAX_DEPTH), strict mode DataIntegrityError triggering, and FrozenFluentError Error Layer properties. Distinct from fuzz_graph (direct cycle detection API) and fuzz_runtime (resolver stack, not validation). - -Shared infrastructure imported from `fuzz_common` (`BaseFuzzerState`, metrics, reporting); domain-specific metrics tracked in `IntegrityMetrics` dataclass (validation codes, strict mode exceptions, cross-resource conflicts, chain depth violations, FrozenFluentError coverage counters). Pattern selection uses deterministic round-robin through a pre-built weighted schedule (`select_pattern_round_robin`), immune to coverage-guided mutation bias. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=4096` default. - -### Patterns - -31 patterns across 6 categories: - -**VALIDATION (10)** - Standalone `validate_resource()`: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `valid_simple` | 8 | Valid FTL accepted without errors | -| `valid_complex` | 6 | Multi-entry with refs, terms, selects | -| `syntax_errors` | 8 | Junk extraction, parse error codes | -| `undefined_refs` | 10 | UNDEFINED_REFERENCE warning | -| `circular_2way` | 8 | 2-node cycle detection | -| `circular_3way` | 6 | 3-node cycle detection | -| `circular_self` | 6 | Self-reference detection | -| `duplicate_ids` | 8 | DUPLICATE_ID warning | -| `chain_depth_limit` | 10 | CHAIN_DEPTH_EXCEEDED for >MAX_DEPTH | -| `mixed_issues` | 6 | Multiple validation issues | - -**SEMANTIC (6)** - SemanticValidator (E0001-E0013): - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `semantic_no_default` | 8 | SELECT_NO_DEFAULT detection | -| `semantic_duplicate_variant` | 6 | VARIANT_DUPLICATE detection | -| `semantic_duplicate_named_arg` | 6 | NAMED_ARG_DUPLICATE detection | -| `semantic_term_positional` | 6 | TERM_POSITIONAL_ARGS warning | -| `semantic_no_variants` | 6 | Malformed select -> Junk | -| `semantic_combined` | 5 | Multiple semantic issues | - -**STRICT_MODE (5)** - DataIntegrityError triggering: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `strict_syntax_junk` | 10 | SyntaxIntegrityError on Junk | -| `strict_format_missing` | 8 | FormattingIntegrityError on missing msg | -| `strict_format_cycle` | 6 | Cycle in format triggers error | -| `strict_add_invalid` | 8 | Multiple Junk -> SyntaxIntegrityError | -| `strict_combined` | 5 | Various strict mode failures | - -**CROSS_RESOURCE (4)** - Multi-resource scenarios: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `cross_shadow` | 8 | SHADOW_WARNING with known_messages | -| `cross_cycle` | 10 | Cross-resource cycle via known_msg_deps | -| `cross_undefined` | 8 | Reference resolved by known_messages | -| `cross_chain_depth` | 6 | Chain depth spanning resources | - -**FROZEN_ERROR (4)** - FrozenFluentError Error Layer: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `frozen_error_integrity` | 8 | verify_integrity() True for uncorrupted errors across all ErrorCategory values | -| `frozen_error_immutability` | 8 | setattr and delattr raise ImmutabilityViolationError after construction | -| `frozen_error_sealed` | 6 | type() subclassing raises TypeError (fuzzed subclass name) | -| `frozen_error_hash_stability` | 6 | hash() and content_hash stable across repeated calls | - -**DOMAIN_INTEGRITY (2)** - LedgerInvariantError and PersistenceIntegrityError: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `domain_integrity_ledger` | 6 | `invariant_code`/`entity_ref` properties; `None` default for entity_ref; ImmutabilityViolationError on setattr; isinstance DataIntegrityError | -| `domain_integrity_persistence` | 6 | Basic construction matches message; ImmutabilityViolationError on setattr; isinstance DataIntegrityError | - -### Allowed Exceptions - -`DataIntegrityError` (and subclasses), `LedgerInvariantError`, `PersistenceIntegrityError`, `FrozenFluentError`, `ValueError`, `TypeError`, `KeyError`, `RecursionError`, `MemoryError` -- expected for strict mode and adversarial inputs. - ---- - -## `fuzz_iso` - -Target: `introspection.iso` -- ISO 3166-1 territory and ISO 4217 currency lookups, type guards, cache; `ftllexengine.clear_module_caches` -- selective and full module cache clearing. - -### Patterns - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `territory_lookup` | 15 | alpha2 matches input, name non-empty, hashable | -| `currency_lookup` | 15 | code matches input, decimal_digits in {0,2,3,4} | -| `type_guards` | 15 | Guard consistent with lookup result | -| `cache_consistency` | 12 | Repeated lookup returns same object | -| `list_functions` | 10 | Returns frozenset, elements typed, cardinality | -| `territory_currencies` | 10 | Returns tuple of 3-char uppercase codes | -| `cache_clear_stress` | 8 | Post-clear value equality preserved | -| `cross_reference` | 8 | Territory currencies resolve via get_currency | -| `invalid_input_stress` | 7 | Empty, long, null, unicode, mixed case | -| `decimal_digits_convenience` | 8 | `get_currency_decimal_digits` == `get_currency().decimal_digits`; valid range {0,2,3,4} | -| `clear_module_caches` | 6 | All-clear, single component, subset, unknown (silently ignored), re-lookup after clear | - -### Allowed Exceptions - -`BabelImportError`, `ValueError`, `KeyError`, `LookupError` -- Babel not installed or invalid locale/CLDR data. - ---- - -## `fuzz_lock` - -Target: `runtime.RWLock`, `runtime.rwlock.RWLock` -- public facade export identity, reader/writer exclusion, reentrant reads, write-reentry rejection, write-to-read downgrade rejection, read-to-write upgrade rejection, timeout, deadlock detection, negative timeout rejection, release-without-acquire rejection, zero-timeout non-blocking paths. - -Shared infrastructure imported from `fuzz_common` (`BaseFuzzerState`, metrics, reporting); domain-specific metrics tracked in `LockMetrics` dataclass (deadlocks detected, public export checks, timeouts, thread creation count, max concurrent threads). Weight skew detection compares actual vs intended pattern distribution. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=4096` default. Corpus retention rate and eviction tracking enabled. - -### Patterns - -Ordered cheapest-first. Pattern selection uses deterministic round-robin through a pre-built weighted schedule (`select_pattern_round_robin`), immune to coverage-guided mutation bias that caused severe weight skew under FDP-based selection. Seed files provide FDP parameter bytes only (pattern is determined by iteration counter). - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `public_export_surface` | 4 | `ftllexengine.runtime.RWLock` aliases direct implementation and appears in runtime `__all__` | -| `reentrant_reads` | 5 | Same thread acquires read lock N times | -| `write_reentry_rejection` | 4 | Write-to-write reentry raises RuntimeError | -| `downgrade_rejection` | 4 | Write-to-read downgrade raises RuntimeError | -| `negative_timeout` | 4 | Negative timeout raises ValueError, lock still usable | -| `release_without_acquire` | 4 | Release without acquire raises RuntimeError, lock still usable | -| `upgrade_rejection` | 8 | Read-to-write upgrade raises RuntimeError | -| `zero_timeout_nonblocking` | 5 | timeout=0.0 fails immediately when lock held, sub-1ms | -| `rapid_lock_cycling` | 8 | Shared counter correct after rapid cycles | -| `cross_thread_handoff` | 6 | Rapid write handoff between threads, no lost entries | -| `concurrent_readers` | 12 | Multiple readers hold lock simultaneously | -| `timeout_acquisition` | 8 | TimeoutError raised, lock usable after timeout | -| `reader_writer_exclusion` | 15 | No concurrent reader+writer, no multi-writer | -| `writer_preference` | 10 | Waiting writer blocks new readers (fuzz-controlled timing) | -| `reader_starvation` | 6 | Continuous readers cannot starve waiting writer | -| `mixed_contention` | 7 | All prohibition checks and permitted ops interleaved across threads | - -### Allowed Exceptions - -`RuntimeError`, `TimeoutError`, `ValueError` -- expected from upgrade rejection, lock protocol violations, negative timeout rejection, and timeout-based acquisition. - ---- - -## `fuzz_numbers` - -Target: `runtime.functions.number_format` -- ROUND_HALF_EVEN oracle testing, custom `pattern=` path, grouping separator correctness, boundary values, min>max clamping, `numbering_system` parameter coverage, and `FluentNumber` wrapper contracts. - -Concern boundary: This fuzzer stress-tests the runtime NUMBER function formatting path. Distinct from `fuzz_builtins` (which covers NUMBER/DATETIME via the FTL `FluentBundle` evaluation pipeline with structural invariants); this fuzzer calls `number_format` directly to probe oracle correctness and covers the custom `pattern=` fast-path that `fuzz_builtins` does not reach. Key gap: `fuzz_builtins` verifies non-empty output and ROUND_HALF_EVEN at specific boundary values; this fuzzer covers ROUND_HALF_EVEN across 35 boundary pairs at precisions 0-3, grouping separator interaction with rounding, and the `minimumFractionDigits > maximumFractionDigits` clamping path. - -Shared infrastructure imported from `fuzz_common` (`BaseFuzzerState`, metrics, reporting); domain-specific metrics tracked in `NumbersMetrics` dataclass. Pattern selection uses deterministic round-robin through a pre-built weighted schedule (`select_pattern_round_robin`), immune to coverage-guided mutation bias. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=4096` default. - -### Patterns - -9 patterns across 4 categories: - -**ORACLE (4)** - ROUND_HALF_EVEN correctness at precision boundaries: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `boundary_values` | 15 | 35 `(precision, x.y5)` midpoints: even-digit midpoints round down (ROUND_HALF_EVEN) | -| `grouping_oracle` | 16 | ROUND_HALF_EVEN with `use_grouping=True` across en-US/de-DE/fr-FR (gap in builtins) | -| `negative_oracle` | 12 | ROUND_HALF_EVEN preserved for negative values: abs() applied before oracle comparison | -| `pattern_oracle` | 13 | Custom `pattern=` path: `parse_pattern(p).frac_prec[1]` precision, ROUND_HALF_EVEN oracle | - -**BOUNDARY (2)** - Edge-case value handling: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `large_integers` | 11 | Values >1e9: non-empty, grouping separators present for en-US | -| `min_gt_max` | 11 | `minimumFractionDigits > maximumFractionDigits`: result non-empty, no exception | - -**CONTRACTS (2)** - Determinism and value preservation: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `determinism` | 11 | Same `(value, locale, kwargs)` always produces identical output | -| `value_preservation` | 11 | Formatted numeric content matches `Decimal` input through grouping/sign stripping | - -**PARAMETERS (1)** - New formatting parameter coverage: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `numbering_system` | 9 | Non-Latin digit systems (arab, arabext, deva, beng): non-empty FluentNumber; determinism | - -### Allowed Exceptions - -`ValueError`, `TypeError` -- invalid parameters and type validation. - ---- - -## `fuzz_parse_decimal` - -Target: `parsing.numbers.parse_decimal`, `parsing.numbers.parse_fluent_number`, `parsing.guards.is_valid_decimal`, `core.locale_utils` helpers -- locale-aware decimal parsing, FluentNumber parsing, locale normalization equivalence, locale boundary validation, Babel locale cache behavior, and system locale resolution. - -Concern boundary: This fuzzer owns the text-to-`Decimal` and text-to-`FluentNumber` parse surface that the runtime NUMBER-formatting fuzzers do not touch. It covers canonical locale-formatted inputs, the public `parse_decimal()` + `make_fluent_number()` composition contract exposed as `parse_fluent_number()`, locale spelling normalization (`en-US` vs `en_US` vs mixed case), `require_locale_code()` trim/type/structure/canonicalization rules, public soft-error contracts, Babel locale cache reuse/clearing, and `get_system_locale()` precedence through environment variables and `locale.getlocale()`, including encoded `C.UTF-8` / `POSIX.UTF-8` pseudo-locale fallback. - -Shared infrastructure imported from `fuzz_common`; domain-specific metrics tracked in `ParseDecimalMetrics` dataclass (`parse_calls`, `parse_successes`, `soft_errors`, `fluent_number_checks`, locale variant/boundary/cache/system checks). Pattern selection uses deterministic round-robin. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=4096` default. - -### Patterns - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `canonical_values` | 14 | Known locale-formatted decimals parse to exact `Decimal` values | -| `parse_fluent_number_api` | 12 | `parse_fluent_number()` matches public `parse_decimal()` + `make_fluent_number()` composition | -| `locale_variants` | 12 | Equivalent locale spellings produce identical parse results | -| `invalid_soft_error` | 12 | Invalid decimal text returns soft errors, not silent success | -| `require_locale_code_api` | 10 | `require_locale_code()` trims/canonicalizes valid input and rejects blank, invalid, non-string, and overlong values | -| `type_guard_contract` | 10 | `is_valid_decimal()` accepts valid finite decimals and rejects bad values | -| `babel_locale_cache` | 10 | Locale normalization/cache clear cycles preserve locale-object equivalence | -| `system_locale_resolution` | 10 | `get_system_locale()` respects precedence and skips encoded C/POSIX pseudo-locales | -| `raw_unicode_stability` | 12 | Arbitrary Unicode inputs preserve the public result-or-errors contract | - -### Allowed Exceptions - -`ValueError`, `TypeError`, `OSError`, `RuntimeError`, `UnicodeEncodeError`, `FrozenFluentError` -- invalid locale/input handling and soft-error plumbing. - ---- - -## `fuzz_plural` - -Target: `runtime.plural_rules.select_plural_category` -- CLDR plural category selection across locales, number types, and precision-aware v-operand handling. - -### Patterns - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `category_validity` | 15 | Result in {zero, one, two, few, many, other} | -| `precision_sensitivity` | 15 | Precision changes v operand, both results valid | -| `locale_coverage` | 12 | High-leverage locales with boundary numbers | -| `locale_fallback` | 8 | Invalid/unknown locales fall back gracefully | -| `determinism` | 12 | Same inputs always return same category | -| `number_type_variety` | 10 | int, float, Decimal all produce valid categories | -| `boundary_numbers` | 12 | CLDR boundary values (0, 1, 2, 5, 11, 21, 100) | -| `cache_consistency` | 8 | LRU-cached locale returns consistent results | -| `extreme_inputs` | 5 | Huge, negative, NaN, Inf, high precision | -| `raw_bytes` | 3 | Malformed input stability | - -### Allowed Exceptions - -`ValueError`, `TypeError`, `OverflowError`, `InvalidOperation` -- invalid numbers and arithmetic edge cases. - ---- - -## `fuzz_oom` - -Target: `syntax.parser.FluentParserV1` -- small inputs producing massive ASTs ("Billion Laughs" style DoS). - -### Patterns - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `placeable_nest` | 8 | Nested placeable depth limit | -| `attribute_explosion` | 8 | Many attributes per message | -| `select_nest` | 7 | Nested select expressions | -| `variant_explosion` | 7 | Many variants per select | -| `reference_chain` | 8 | Long message reference chains | -| `term_nest` | 7 | Nested term references | -| `mixed_placeable_select` | 7 | Combined placeable/select nesting | -| `attribute_select_combo` | 7 | Attributes with selects inside | -| `raw_bytes` | 10 | Malformed input stability | -| `comment_flood` | 6 | Many comments before message | -| `message_flood` | 6 | Many small messages | -| `multiline_value` | 6 | Long multiline continuations | -| `variant_expression_explosion` | 6 | Variants with placeables in arms | -| `cyclic_chain` | 6 | Self-referencing message cycles | -| `term_message_cross_ref` | 6 | Terms and messages cross-referencing | -| `attr_deep_placeable` | 5 | Attributes with deep nesting | - -### Allowed Exceptions - -`FrozenFluentError` -- depth guard and max nesting enforcement. - ---- - -## `fuzz_roundtrip` - -Target: `syntax.parser.FluentParserV1`, `syntax.serializer.serialize` -- parser-serializer convergence property S(P(S(P(x)))) == S(P(x)) across all grammar productions. - -Pattern selection uses deterministic round-robin through a pre-built weighted schedule (`select_pattern_round_robin`), immune to coverage-guided mutation bias. FDP bytes are used exclusively for pattern parameters, not pattern selection. String and RegEx instrumentation hooks enabled for deeper coverage of identifier lookups and pattern-based parsing. Custom mutator parses valid FTL, applies AST-level mutations (swap variants, duplicate attributes, mutate variant keys, nest placeables, shuffle entries) using `dataclasses.replace()` on frozen AST nodes, serializes, then applies byte-level mutation on top. Multi-pass convergence checks (S2 == S3 == S4) verify serialization stabilizes within 3 passes. AST structural comparison (ignoring spans) catches bugs where serialization normalizes structural differences that string comparison misses. Junk ratio tracked and warned when >50%. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=4096` default. - -When a convergence failure is detected, the fuzzer writes finding artifacts (source.ftl, s1.ftl, s2.ftl, meta.json) to `.fuzz_atheris_corpus/roundtrip/findings/`. These artifacts enable post-mortem debugging without Atheris and can be replayed via `python fuzz_atheris/fuzz_atheris_replay_finding.py`. - -### Patterns - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `simple_message` | 10 | Basic id = value roundtrips | -| `variable_placeable` | 12 | { $var } placeables survive roundtrip | -| `term_reference` | 8 | -term definitions and { -term } references | -| `message_reference` | 8 | { other-msg } cross-references | -| `select_expression` | 15 | Plural/string selector with variants | -| `attributes` | 10 | .attr = value on messages | -| `comments` | 5 | #, ##, ### comment types | -| `function_call` | 8 | NUMBER, DATETIME, CURRENCY with args | -| `multiline_pattern` | 7 | Continuation line values | -| `mixed_resource` | 12 | Multiple entry types combined | -| `deep_nesting` | 5 | String literals, nested variable refs | -| `raw_unicode` | 5 | Random Unicode junk-free convergence | -| `convergence_stress` | 5 | Multi-pass S2 == S3 == S4 stabilization | - -### Allowed Exceptions - -`ValueError`, `RecursionError`, `MemoryError`, `UnicodeDecodeError`, `UnicodeEncodeError` -- parser/serializer resource limits and encoding edge cases. - ---- - -## `fuzz_serializer` - -Target: `syntax.serializer.serialize`, `syntax.parser.FluentParserV1`, `syntax.visitor.ASTVisitor`, `syntax.visitor.ASTTransformer` -- AST-construction serializer roundtrip idempotence plus visitor/transformer dispatch and validation. - -Concern boundary: This fuzzer programmatically constructs AST nodes (bypassing the parser) and feeds them to the serializer. This is the ONLY Atheris fuzzer that can produce AST states the parser would never emit -- e.g. TextElement values with leading whitespace, syntax characters in pattern-initial positions, or structurally valid but semantically unusual combinations. The same AST-construction model now also drives direct `ASTVisitor` and `ASTTransformer` coverage: custom dispatch methods, list-expanding transforms, and invalid scalar-field replacements that must raise `TypeError`. - -Shared infrastructure imported from `fuzz_common` (`BaseFuzzerState`, `gen_ftl_identifier`, `gen_ftl_value`, `write_finding_artifact`, `print_fuzzer_banner`, metrics, reporting); domain-specific metrics tracked in `SerializerMetrics` dataclass (ast_construction_failures, convergence_failures, junk_on_reparse, validation_errors, visitor_runs, transformer_runs). Pattern selection uses deterministic round-robin through a pre-built weighted schedule (`select_pattern_round_robin`), immune to coverage-guided mutation bias. Custom mutator applies whitespace injection and syntax character insertion mutations before byte-level mutation. Finding artifacts written to `.fuzz_atheris_corpus/serializer/findings/`. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=4096` default. - -### Patterns - -13 patterns across 5 categories: - -**WHITESPACE (2)** - Leading/trailing whitespace in TextElement values: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `leading_whitespace` | 18 | Leading spaces in message and attribute values roundtrip correctly | -| `trailing_whitespace` | 8 | Trailing spaces in values roundtrip correctly | - -**SYNTAX (2)** - FTL syntax characters in values: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `syntax_chars_value` | 15 | Braces, dots, hash, asterisk, brackets in values | -| `string_literal_placeable` | 10 | StringLiteral placeables with edge-case content | - -**STRUCTURE (4)** - Structural edge cases: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `simple_message` | 8 | Baseline AST-constructed message roundtrip | -| `attribute_edge_cases` | 12 | Attributes with whitespace/syntax-char values | -| `term_edge_cases` | 8 | Terms with whitespace in values and attributes | -| `select_expression` | 8 | AST-constructed select expressions with leading-space variant values | - -**COMPOSITION (2)** - Complex element combinations: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `mixed_elements` | 8 | Interleaved TextElement/Placeable with leading spaces | -| `multiline_value` | 5 | Multi-line values with indentation edge cases | - -**VISITOR (3)** - Direct `syntax.visitor` coverage: - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `visitor_dispatch` | 8 | Custom `visit_*` handlers and generic traversal both execute | -| `transformer_roundtrip` | 8 | List-expanding `ASTTransformer` output remains serializable and convergent | -| `transformer_validation` | 6 | Invalid scalar replacements for required fields raise `TypeError` | - -### Allowed Exceptions - -`ValueError`, `TypeError`, `RecursionError`, `MemoryError`, `UnicodeDecodeError`, `UnicodeEncodeError` -- AST construction edge cases, serializer limits, visitor validation, and encoding boundaries. - ---- - -## `fuzz_cursor` - -Target: `syntax.cursor.Cursor`, `syntax.cursor.LineOffsetCache`, `syntax.cursor.ParseError`, `syntax.cursor.ParseResult`, `syntax.position` helpers -- cursor state-machine behavior, contextual parse-error rendering, and line/column parity across normalized source. - -Concern boundary: Existing parser fuzzers only touch `Cursor` indirectly through parser control flow. This fuzzer hits the subsystem directly: constructor guards, `peek()`/`advance()`/`expect()` semantics, whitespace skipping, line navigation, parity between cursor-computed positions and standalone helper functions, `ParseError.format_with_context()` rendering, and `ParseResult` contract behavior on arbitrary raw and CRLF-normalized sources. - -Shared infrastructure imported from `fuzz_common`; domain-specific metrics tracked in `CursorMetrics` dataclass. Pattern selection uses deterministic round-robin. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=4096` default. - -### Patterns - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `constructor_guards` | 12 | Negative/out-of-range positions reject; EOF cursor behavior is correct | -| `peek_advance_expect` | 14 | `peek`, `advance`, `slice_*`, and `expect` match manual semantics | -| `whitespace_skips` | 12 | `skip_spaces`/`skip_whitespace` agree with reference implementations | -| `line_navigation` | 10 | Line-start/line-end helpers preserve valid cursor positions | -| `line_col_parity` | 12 | Cursor-computed line/column matches cached position helper results | -| `parse_error_formatting` | 10 | Context rendering stays non-empty and positionally coherent | -| `position_helpers` | 12 | `line_offset`, `column_offset`, `get_line_content`, `format_position` stay consistent | -| `parse_result_contract` | 8 | `ParseResult` success/error states preserve documented invariants | - -### Allowed Exceptions - -`ValueError`, `EOFError`, `UnicodeEncodeError` -- invalid positions, EOF access, and encoding edge cases. - ---- - -## `fuzz_runtime` - -Target: `runtime.bundle.FluentBundle` -- full resolver stack, strict mode, caching, concurrency, security, AST lookup facade, and canonical constructor locale boundary contracts. - -Scenario selection uses deterministic round-robin through a pre-built weighted schedule (`select_pattern_round_robin`), immune to coverage-guided mutation bias. Security sub-pattern selection within `_perform_security_fuzzing` remains FDP-based (second-level, already guaranteed execution). FDP bytes are used exclusively for scenario parameters, not scenario selection. String and RegEx instrumentation hooks enabled for deeper coverage of message ID lookups, selector matching, and pattern-based parsing. Shared infrastructure imported from `fuzz_common` (`BaseFuzzerState`, metrics, reporting); domain-specific metrics tracked in `RuntimeMetrics` dataclass, including `FluentBundle.get_message()` / `get_term()` AST lookup checks, constructor locale boundary checks, and direct `validate_message_variables()` compatibility. Weight skew detection compares actual vs intended scenario distribution. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=4096` default. - -### Patterns - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `core_runtime` | 40 | Frozen error checksum, cache stability, determinism | -| `strict_mode` | 20 | Zero errors in strict format_pattern | -| `caching` | 15 | Cache hit determinism, corruption detection | -| `security` | 10 | *(8 sub-patterns below)* | -| `concurrent` | 10 | No deadlocks, 2-thread barrier test | -| `differential` | 5 | Same FTL, different configs, no crash divergence | - -Security sub-patterns: - -| Sub-pattern | Weight | Attack Vector | -|:------------|-------:|:--------------| -| `security_recursion` | 25 | Deep placeables, cyclic refs, self-ref terms | -| `security_memory` | 20 | Large values, many variants/attributes | -| `security_cache_poison` | 15 | inf/nan/None/list as args | -| `security_function_inject` | 12 | Custom function registration + recursive cross-context calls | -| `security_locale_boundary` | 8 | Canonicalize valid locales to lowercase underscore; reject blank, non-string, invalid, and overlong constructor locales | -| `security_expansion_budget` | 8 | Billion Laughs exponential message expansion (max_expansion_size) | -| `security_dag_expansion` | 7 | DAG shared-reference args stress _make_hashable node budget | -| `security_dict_functions` | 5 | Dict-as-functions constructor rejection (TypeError guard) | - -### Memory Management - -Two reference cycle sources were fixed in ftllexengine 0.101.0 (MEM-REFCYCLE-001): (1) `ASTVisitor._instance_dispatch_cache` stored bound methods referencing `self`, and (2) `FrozenFluentError.__traceback__` retained resolver frames. Both are now eliminated at source. The fuzzer still runs `gc.collect()` every 256 iterations as a defensive measure against Atheris instrumentation overhead, and defaults to `-rss_limit_mb=4096` as a safety net. - -### Allowed Exceptions - -`CacheCorruptionError`, `FormattingIntegrityError`, `WriteConflictError`, `FrozenFluentError`, `RecursionError`, `MemoryError` -- integrity violations are findings; depth guards and resource limits are safety mechanisms. - ---- - -## `fuzz_scope` - -Target: `runtime.resolver` (via `FluentBundle`) -- variable scoping, term argument isolation, message reference scope inheritance, ResolutionContext push/pop, GlobalDepthGuard cross-context depth tracking, select expression scope, bidi isolation marks. - -### Patterns - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `term_arg_isolation` | 12 | Terms see ONLY explicit args, not caller's scope | -| `variable_shadowing` | 12 | External $var preserved around term call | -| `message_ref_scope` | 10 | Referenced messages share caller's args | -| `select_scope` | 10 | Selector and variant bodies share message scope | -| `attribute_scope` | 8 | Attribute patterns share message scope | -| `bidi_isolation` | 8 | FSI/PDI wrap values, don't alter content | -| `function_arg_scope` | 8 | Function args evaluated in calling scope | -| `nested_term_scope` | 8 | Nested terms maintain independent scopes | -| `scope_chain` | 8 | Message ref chains share args (depth 2-4) | -| `cross_message_isolation` | 6 | Independent messages don't pollute each other | -| `depth_guard_boundary` | 5 | Self-ref, mutual recursion, deep chains hit limits | -| `adversarial_scope` | 5 | Scope leaks, missing vars, empty values, fuzzed IDs | -| `expansion_size_limit` | 5 | `_total_chars` budget fires, EXPANSION_BUDGET_EXCEEDED returned | - -### Allowed Exceptions - -`ValueError`, `TypeError`, `OverflowError`, `FrozenFluentError`, `RecursionError`, `RuntimeError` -- invalid inputs, depth guard enforcement, and resolution errors. - ---- - -## `fuzz_structured` - -Target: `syntax.parser.FluentParserV1`, `syntax.serializer.FluentSerializer` -- grammar-aware AST construction and roundtrip verification. - -Pattern selection uses deterministic round-robin through a pre-built weighted schedule (`select_pattern_round_robin`), immune to coverage-guided mutation bias. FDP bytes are used exclusively for pattern parameters, not pattern selection. String and RegEx instrumentation hooks enabled for deeper coverage of identifier lookups and pattern-based parsing. Custom mutator parses valid FTL, applies AST-level mutations (swap variants, duplicate attributes, mutate variant keys, nest placeables, shuffle entries), serializes, then applies byte-level mutation on top. Module-level `FluentSerializer` instance reused across iterations (avoids per-call allocation). Junk ratio tracked and warned when >50%. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=4096` default. - -When a convergence failure is detected, the fuzzer writes finding artifacts (source.ftl, s1.ftl, s2.ftl, meta.json) to `.fuzz_atheris_corpus/structured/findings/`. These artifacts enable post-mortem debugging without Atheris and can be replayed via `python fuzz_atheris/fuzz_atheris_replay_finding.py`. - -### Patterns - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `simple_messages` | 10 | Parse produces entries | -| `variable_messages` | 12 | Variable placeables parse correctly | -| `term_definitions` | 8 | Term definitions accepted | -| `attribute_messages` | 10 | Attribute parsing | -| `select_expressions` | 15 | Select with plural/string keys | -| `comment_entries` | 5 | Comment parsing | -| `multi_entry` | 15 | Multi-message resource handling | -| `corrupted_input` | 10 | Malformed input stability | -| `deep_nesting` | 8 | Deep placeable/reference nesting | -| `roundtrip_verify` | 7 | S(P(S(P(x)))) == S(P(x)) convergence | - -### Allowed Exceptions - -`RecursionError`, `MemoryError`, `UnicodeDecodeError`, `UnicodeEncodeError` -- resource limits and encoding edge cases. - ---- - -## `fuzz_localization` - -Target: `localization.orchestrator.FluentLocalization`, `localization.loading.PathResourceLoader` -- multi-locale orchestration, canonical locale boundary and boot validation APIs, eager loader-backed initialization, fallback chains, `LoadSummary`, and post-construction mutation APIs. - -Concern boundary: This fuzzer stress-tests the FluentLocalization lifecycle orthogonal to FluentBundle. Distinct from fuzz_runtime (single bundle) and fuzz_integrity (validation). It covers constructor locale canonicalization/deduplication and unified rejection errors, multi-locale fallback traversal, `add_resource()` mutation between calls, `has_message()`/`get_message_ids()` API contracts, `get_message()`/`get_term()` AST lookup precedence, `require_clean()`, `validate_message_variables()`, and `validate_message_schemas()` boot-validation APIs, per-locale `get_cache_audit_log()` access, custom function registration and invocation, `on_fallback` callback delivery, introspection delegation, and the loader-backed initialization path: eager resource loading, canonical `{locale}` directory substitution, `PathResourceLoader` path validation, per-locale success/not-found/error accounting, junk-bearing loads, and `source_path` propagation into `LoadSummary`. - -Shared infrastructure imported from `fuzz_common`; domain-specific metrics tracked in `LocalizationMetrics` dataclass (including AST lookup, schema-validation, cache-audit, constructor locale-boundary, and loader/boot-validation counters). Pattern selection uses deterministic round-robin. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=4096` default. - -### Patterns - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `single_locale_add_resource` | 10 | add_resource accepts valid FTL; format returns the stored value | -| `multi_locale_fallback` | 10 | Primary miss triggers fallback; callback sees resolved locale | -| `chain_of_3_fallback` | 8 | 3-locale chain traverses correctly | -| `format_value_missing` | 7 | Missing message returns fallback text plus errors | -| `format_with_variables` | 9 | Variables propagate through fallback chain | -| `add_resource_mutation` | 7 | add_resource between format calls updates visible state | -| `has_message_api` | 7 | has_message/has_attribute contracts hold across locales | -| `ast_lookup_api` | 7 | get_message/get_term honor fallback precedence and namespace boundaries | -| `get_message_ids_api` | 6 | get_message_ids returns all IDs without duplicates | -| `validate_resource_api` | 7 | validate_resource delegates and returns structured results | -| `validate_message_variables_api` | 6 | single-message schema validation matches AST lookup and raises integrity context on missing/mismatched schemas | -| `validate_message_schemas_api` | 6 | exact schema order, fallback resolution, and missing/extra variable failures | -| `add_function_custom` | 6 | Custom UPPER function registration/invocation works | -| `introspect_api` | 7 | get_message_variables/introspect_message stay consistent | -| `cache_audit_api` | 6 | get_cache_audit_log matches initialized locales and stats | -| `locale_boundary_api` | 5 | Constructor canonicalizes/deduplicates valid locales and rejects blank/non-string/invalid/overlong input | -| `on_fallback_callback` | 6 | on_fallback receives requested/resolved locale data | -| `loader_init_success` | 5 | Eager loader initialization records all-success summary data | -| `loader_not_found_fallback` | 5 | Primary miss increments not_found while fallback still resolves | -| `loader_junk_summary` | 4 | Junk-bearing resources are surfaced through LoadSummary | -| `loader_path_error` | 4 | Invalid `resource_id` becomes a loader error, not a crash | -| `require_clean_api` | 5 | clean initialization returns summary; missing/junk/error states raise integrity context | -| `boot_config_api` | 6 | `LocalizationBootConfig` validation, `boot_simple()` → FluentLocalization, `boot()` → 3-tuple, `required_messages` absent raises IntegrityCheckFailedError, `required_messages` present succeeds, second `boot()` call raises RuntimeError (one-shot enforcement) | - -### Allowed Exceptions - -`ValueError`, `TypeError`, `UnicodeEncodeError`, `FrozenFluentError`, `DataIntegrityError`, `FormattingIntegrityError`, `SyntaxIntegrityError`, `IntegrityCheckFailedError` -- invalid locale/resource input, loader validation, strict mode enforcement, required-message absence, and resolution errors. - ---- - -## `fuzz_dates` - -Target: `parsing.dates.parse_date`, `parsing.dates.parse_datetime` -- CLDR→strptime token mapping, locale-aware date/datetime parsing across 24 test locales (Latin-DMY, Latin-MDY, Latin-YMD, CJK, RTL). - -Concern boundary: This fuzzer stress-tests the bidirectional date parsing pipeline. Covers the `_babel_to_strptime` token mapping, all 14 pattern variants (short/medium/long/full plus 4-digit year oracle), adversarial inputs (null bytes, ANSI escapes, surrogates, 10000-char strings, invalid month/day values), and cross-locale format string generation. Key invariants: if result is None, errors must be non-empty; if result is not None, it must be a `date`/`datetime` instance; `parse_datetime` result must be instance of `datetime` (not bare `date`). The `four_digit_year_acceptance` pattern uses ISO 8601 as a ground-truth oracle: `parse_date("dd.MM.yyyy", locale)` for locales whose CLDR short pattern uses `yy` must return the same date as `parse_date("yyyy-MM-dd", locale)`. - -Shared infrastructure imported from `fuzz_common`; domain-specific metrics tracked in `DatesMetrics` dataclass. Pattern selection uses deterministic round-robin. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=4096` default. - -### Patterns - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `parse_date_generated` | 14 | Generated date strings parse successfully per locale | -| `parse_datetime_generated` | 12 | Generated datetime strings parse per locale | -| `locale_variation` | 12 | Same date parsed in multiple locales | -| `style_variation` | 12 | short/medium/long/full styles all produce parseable strings | -| `cross_locale_agreement` | 10 | Parsing is consistent across 3 random locales | -| `adversarial_input` | 10 | Null bytes, surrogates, ANSI escapes handled without crash | -| `format_then_parse` | 12 | format → parse roundtrip (DATETIME output is parseable) | -| `invalid_date_values` | 10 | month=13, day=99 produce errors, not silent wrong dates | -| `empty_string` | 8 | Empty string → errors, not crash | -| `whitespace_input` | 8 | Whitespace-only → errors, not crash | -| `partial_date_strings` | 6 | Partial (year-only, month-only) inputs handled | -| `unicode_month_names` | 8 | Non-ASCII month names in CJK/RTL locales | -| `leap_year_boundary` | 8 | Feb 29 on leap/non-leap years | -| `four_digit_year_acceptance` | 8 | lv-LV/de-DE/pl-PL/fi-FI/ru-RU: dd.MM.yyyy == ISO oracle; must not return None | - -### Allowed Exceptions - -`ValueError`, `TypeError`, `UnicodeDecodeError`, `UnicodeEncodeError`, `FrozenFluentError` -- invalid locale, encoding edge cases, and bidirectional parse errors. - ---- - -## `fuzz_locale_context` - -Target: `runtime.locale_context.LocaleContext`, `core.locale_utils.normalize_locale` -- direct formatting API: `format_number()`, `format_currency()`, `format_datetime()`, canonical locale boundary handling, ROUND_HALF_EVEN rounding oracle, `numbering_system`/`use_grouping`/`currency_digits` parameter coverage, cross-locale determinism. - -Concern boundary: This fuzzer stress-tests the LocaleContext formatting layer, distinct from fuzz_builtins (which goes through FluentBundle/FTL) and fuzz_runtime (full runtime stack). Directly exercises the locale-aware formatting primitives that underpin the NUMBER and CURRENCY functions. Key invariant (oracle-based): `format_number(val, max_frac=N)` must round half-even (Babel default), verified by `Decimal.quantize(10^-N, ROUND_HALF_EVEN)`. Also covers `numbering_system`, `use_grouping`, and `currency_digits` parameters added to `format_number`/`format_currency`. Control chars stripped from currency symbols prevent log injection through formatted output. - -Shared infrastructure imported from `fuzz_common`; domain-specific metrics tracked in `LocaleContextMetrics` dataclass. Pattern selection uses deterministic round-robin. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=2048` default (no Babel concurrency needed). - -### Patterns - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `format_number_int` | 8 | Integers format to non-empty output with canonical locale_code | -| `format_number_decimal` | 8 | Decimal values format to non-empty output | -| `format_number_precision` | 8 | ROUND_HALF_EVEN oracle at explicit min/max precision | -| `format_number_custom_pattern` | 6 | Custom number patterns produce non-empty output | -| `format_number_grouping` | 6 | Grouping on/off remains stable | -| `format_currency_standard` | 8 | Standard currency formatting is non-empty and oracle-safe | -| `format_currency_precision_override` | 6 | Currency display variants remain non-empty | -| `format_currency_custom_pattern` | 5 | Custom currency patterns produce non-empty output | -| `format_datetime_date_obj` | 7 | `date` promotion to midnight datetime formats correctly | -| `format_datetime_datetime_obj` | 7 | `datetime` formatting is non-empty | -| `format_datetime_style_combo` | 7 | Date/time style combinations remain non-empty | -| `format_datetime_pattern` | 6 | Custom datetime patterns produce non-empty output | -| `locale_create_adversarial` | 8 | Successful creates store canonical locale_code; invalid boundaries reject cleanly | -| `cross_locale_determinism` | 5 | Same value+locale → identical output on repeated calls | -| `format_number_numbering_system` | 7 | Non-Latin digit systems via `numbering_system=`: non-empty output; determinism | -| `format_currency_grouping` | 6 | `use_grouping=True/False` on large amounts: non-empty FluentNumber returned | -| `format_currency_digits` | 6 | `currency_digits=True/False`: ISO 4217 precision applied or bypassed; non-empty | -| `format_currency_numbering_system` | 7 | Non-Latin digit systems on `format_currency`: non-empty output; determinism | - -### Allowed Exceptions - -`ValueError`, `TypeError`, `OverflowError`, `UnicodeEncodeError`, `FrozenFluentError` -- invalid locale, Babel formatting errors, and out-of-range values. - ---- - -## `fuzz_introspection` - -Target: `introspection.message.IntrospectionVisitor`, `introspection.message.ReferenceExtractor`, `introspection.message.MessageIntrospection` -- `extract_variables()`, `extract_references()`, `extract_references_by_attribute()`, `introspect_message()`, `clear_introspection_cache()`, `FluentBundle` introspection facade. - -Concern boundary: This fuzzer uses programmatic AST construction (bypasses the FTL parser) to reach introspection code paths that parser-generated ASTs would never produce. Tests the `MAX_DEPTH` guard via `SelectExpression` chain at depths ± `MAX_DEPTH`, frozenset deduplication (same variable referenced N times → 1 entry), `requires_variable(x)` ↔ `get_variable_names()` consistency, cache correctness under repeated calls, and weakref/lock safety under the `threading.Lock`-protected result cache. - -Shared infrastructure imported from `fuzz_common`; domain-specific metrics tracked in `IntrospectionMetrics` dataclass. Pattern selection uses deterministic round-robin. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=2048` default. - -### Patterns - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `extract_variables_simple` | 14 | Variables returned as frozenset; deduplication correct | -| `extract_references_simple` | 12 | Message/term refs returned correctly | -| `attribute_variables` | 10 | Attribute-level variables extracted | -| `deduplication_invariant` | 12 | N references to same var → 1 frozenset entry | -| `requires_variable_consistency` | 10 | requires_variable ↔ get_variable_names match | -| `deep_nesting_guard` | 10 | MAX_DEPTH chain triggers guard, no unhandled crash | -| `select_expression_vars` | 8 | Variables in select variants extracted | -| `term_reference_vars` | 8 | Term args as variable references | -| `cache_correctness` | 10 | Repeated calls return identical frozensets | -| `clear_cache` | 8 | clear_introspection_cache() resets state correctly | -| `bundle_facade` | 8 | FluentBundle.introspect_message() delegation | -| `adversarial_ast` | 5 | Programmatic AST edge cases (empty pattern, no elements) | -| `validate_variables_schema` | 8 | exact/superset/subset invariants; frozen result; missing/extra sets correct | - -### Allowed Exceptions - -`ValueError`, `TypeError`, `RecursionError`, `FrozenFluentError` -- invalid AST nodes, depth guard enforcement, and resolution errors. - ---- - -## `fuzz_diagnostics_formatter` - -Target: `diagnostics.formatter.DiagnosticFormatter`, `diagnostics.validation.ValidationError`, `diagnostics.validation.ValidationWarning`, `diagnostics.validation.ValidationResult` -- RUST/SIMPLE/JSON output formats, control-character escaping (log injection prevention), sanitize/redact modes, `format_validation_result()`, `format_error()`, `format_warning()`, `format_all()`. - -Concern boundary: This fuzzer stress-tests the diagnostic formatting pipeline as a security boundary. The primary invariant is that no raw ASCII control character (0x00-0x1F, 0x7F) survives into RUST or SIMPLE formatted output when injected into any diagnostic field (message, hint, function_name, argument_name, expected_type, received_type, ftl_location, resolution_path). Secondary invariants: JSON output always parses as valid JSON; sanitize mode bounds output length; redact mode replaces content with `[content redacted]` sentinel; `format_all()` contains each individual formatted diagnostic; `color=True` produces ANSI escape sequences. - -Shared infrastructure imported from `fuzz_common`; domain-specific metrics tracked in `FormatterMetrics` dataclass. Pattern selection uses deterministic round-robin. Periodic `gc.collect()` every 256 iterations and `-rss_limit_mb=2048` default. - -### Patterns - -| Pattern | Weight | Invariants Checked | -|:--------|-------:|:-------------------| -| `control_char_escaping` | 14 | No raw C0/DEL in RUST/SIMPLE output after injection | -| `format_rust_all_fields` | 12 | RUST non-empty; span line present; resolution_path separator | -| `format_json_valid` | 12 | JSON parseable; mandatory keys present; code matches | -| `format_simple` | 10 | Single-line for clean messages; code name in output | -| `sanitize_truncation` | 10 | Sanitize mode truncates; ellipsis marker present | -| `sanitize_redact` | 8 | Redact mode hides content; sentinel present | -| `format_error_location` | 10 | line/column present when set; absent when not set | -| `format_warning_context` | 8 | Context present in output; redact removes it | -| `format_validation_result_mixed` | 8 | passed/failed summary; include_warnings respected | -| `format_all_multiple` | 8 | Each individual diagnostic present in combined output | -| `color_ansi_mode` | 5 | color=True longer than color=False; ESC byte present | -| `adversarial_fields` | 5 | Control chars in rich fields all escaped | - -### Allowed Exceptions - -`ValueError`, `TypeError` -- invalid input types to format_error/format_warning. - ---- - -## Observability Standard - -All fuzzers import shared infrastructure from `fuzz_common.py` (`BaseFuzzerState`, metrics, reporting) and compose domain-specific metrics via separate dataclasses: - -- `BaseFuzzerState` dataclass with bounded deques (shared via `fuzz_common`) -- Domain metrics: `RoundtripMetrics`, `SerializerMetrics`, `StructuredMetrics`, `RuntimeMetrics`, `LockMetrics`, `IntegrityMetrics`, `CacheMetrics`, `BuiltinsMetrics`, `BridgeMetrics`, `FiscalMetrics`, `ISOMetrics`, `CurrencyMetrics`, `NumbersMetrics`, `OOMMetrics`, `PluralMetrics`, `ScopeMetrics`, `LocalizationMetrics`, `DatesMetrics`, `LocaleContextMetrics`, `IntrospectionMetrics`, `FormatterMetrics` (per-fuzzer) -- psutil RSS memory tracking with leak detection (quartile comparison) -- Performance percentiles: min/mean/median/p95/p99/max -- Per-pattern wall-time accumulation -- Weight skew detection: actual vs intended distribution per pattern, warns when >3x deviation -- Corpus retention rate: `corpus_evictions` / `corpus_entries_added` tracks FIFO churn -- Crash-proof JSON report via `atexit` (stderr + `.fuzz_atheris_corpus//`) -- argparse CLI (`--checkpoint-interval`, `--seed-corpus-size`) -- Top-10 slowest operations (max-heap) -- FIFO seed corpus management (`dict[str, bytes]`) with configurable max size and eviction tracking -- Deterministic round-robin weighted pattern routing (immune to coverage-guided mutation bias) -- Pattern-stratified corpus retention (per-pattern FIFO buckets preserve diversity) -- `atheris.enabled_hooks` for `str` and `RegEx` comparison feedback -- Periodic `gc.collect()` every 256 iterations -- `-rss_limit_mb=4096` default safety net -- Custom mutator (roundtrip, serializer, structured): AST-level mutations + byte-level mutation for structurally valid inputs -- Finding artifact system (roundtrip, serializer, structured): source/s1/s2/meta.json written to `.fuzz_atheris_corpus//findings/` -- `fuzz_atheris_replay_finding.py`: standalone reproduction of finding artifacts without Atheris instrumentation -- Adaptive time budgets: patterns exceeding 10x their mean cost are tracked (`time_budget_skips`) -- Performance outlier tracking: inputs exceeding 2x P99 latency are recorded with timestamps -- Per-pattern mean cost tracking: exponential moving average for cost-aware scheduling -- Graceful Ctrl+C handling: custom mutators catch `KeyboardInterrupt` and set status to "stopped" -- FTL-safe text generation (structured): 90% safe ASCII, 8% Unicode, 2% inline-safe special chars -- Consolidated `record_iteration_metrics`: single function for all fuzzers (time budgets, outlier tracking, corpus retention) -- Common FTL generation: `gen_ftl_identifier` and `gen_ftl_value` for deterministic FDP-based identifier/value generation -- Common finding artifacts: `write_finding_artifact` with parametric `extra_meta` for per-fuzzer metadata -- Common banner: `print_fuzzer_banner` for consistent startup output across all fuzzers + keywords: [atheris, fuzz inventory, fuzz targets, libfuzzer, corpus] + questions: ["what do the Atheris fuzzers cover?", "which targets exist?", "how do I map a target name to a file?"] +--- + +# Atheris Target Inventory + +## Summary + +| Target | File | Concern | +|:-------|:-----|:--------| +| `bridge` | `fuzz_bridge.py` | Function bridge and registry | +| `builtins` | `fuzz_builtins.py` | Built-in formatting functions | +| `cache` | `fuzz_cache.py` | Cache concurrency and audit behavior | +| `currency` | `fuzz_currency.py` | Currency formatting oracle | +| `cursor` | `fuzz_cursor.py` | Cursor and parse-position helpers | +| `dates` | `fuzz_dates.py` | Locale-aware date/datetime parsing | +| `diagnostics_formatter` | `fuzz_diagnostics_formatter.py` | Diagnostic formatter output | +| `graph` | `fuzz_graph.py` | Dependency graph algorithms | +| `integrity` | `fuzz_integrity.py` | Integrity and validation surfaces | +| `introspection` | `fuzz_introspection.py` | Message introspection | +| `iso` | `fuzz_iso.py` | ISO lookup/introspection | +| `locale_context` | `fuzz_locale_context.py` | LocaleContext formatting paths | +| `localization` | `fuzz_localization.py` | `FluentLocalization` orchestration | +| `lock` | `fuzz_lock.py` | RWLock contention behavior | +| `numbers` | `fuzz_numbers.py` | Number formatting oracle | +| `oom` | `fuzz_oom.py` | Parser object-density limits | +| `parse_currency` | `fuzz_parse_currency.py` | Currency parsing and symbol resolution | +| `parse_decimal` | `fuzz_parse_decimal.py` | Decimal and FluentNumber parsing | +| `plural` | `fuzz_plural.py` | CLDR plural category boundaries | +| `roundtrip` | `fuzz_roundtrip.py` | Parser/serializer roundtrip | +| `runtime` | `fuzz_runtime.py` | End-to-end runtime behavior | +| `scope` | `fuzz_scope.py` | Variable scoping invariants | +| `serializer` | `fuzz_serializer.py` | AST-construction serializer paths | +| `structured` | `fuzz_structured.py` | Structure-aware parser stress | + +## How To Run + +```bash +./scripts/fuzz_atheris.sh numbers --time 60 +./scripts/fuzz_atheris.sh --list +./scripts/fuzz_atheris.sh --replay runtime path/to/finding +``` diff --git a/images/FTLLexEngine.png b/images/FTLLexEngine.png index 7e10326c..5b640eee 100644 Binary files a/images/FTLLexEngine.png and b/images/FTLLexEngine.png differ diff --git a/pyproject.toml b/pyproject.toml index e368c280..97a41280 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,15 +7,31 @@ managed = true package = true [tool.validate-version] -# Check for `project_version: X.Y.Z` in the YAML frontmatter of these files -frontmatter_globs = ["docs/DOC_*.md", "DOC_*.md"] -frontmatter_key = "project_version" -# Check for `**FTLLexEngine Version**: X.Y.Z` footer in these specific files -footer_files = ["docs/QUICK_REFERENCE.md", "docs/TERMINOLOGY.md"] +# Check for `version: X.Y.Z` in YAML frontmatter for non-README documentation. +frontmatter_globs = [ + "CHANGELOG.md", + "CONTRIBUTING.md", + "PATENTS.md", + "docs/**/*.md", + "examples/**/*.md", + "fuzz_atheris/README.md", +] +frontmatter_key = "version" [tool.validate-docs] # Glob patterns for markdown files to scan -scan_globs = ["README.md", "CHANGELOG.md", "docs/**/*.md"] +scan_globs = ["README.md", "CHANGELOG.md", "docs/**/*.md", "examples/**/*.md", "fuzz_atheris/README.md"] +# Markdown files whose Python fences are intended to be copy-paste runnable. +python_exec_globs = [ + "README.md", + "docs/CUSTOM_FUNCTIONS_GUIDE.md", + "docs/LOCALE_GUIDE.md", + "docs/MIGRATION.md", + "docs/PARSING_GUIDE.md", + "docs/QUICK_REFERENCE.md", + "docs/TYPE_HINTS_GUIDE.md", + "docs/VALIDATION_GUIDE.md", +] # Substrings that trigger skipping a specific FTL code block skip_markers = [ "# ←", @@ -36,7 +52,7 @@ parser_path = "ftllexengine.syntax.parser:FluentParserV1" [project] name = "ftllexengine" -version = "0.162.0" +version = "0.163.0" description = "Python runtime for the Fluent (FTL) specification: bidirectional parsing, CLDR-backed locale-aware formatting, and fail-fast boot validation with structured audit evidence." readme = "README.md" requires-python = ">=3.13" @@ -103,15 +119,18 @@ babel = [ [dependency-groups] dev = [ "Babel>=2.18.0,<3.0.0", # Required for tests (locale formatting, parsing) - "pytest>=9.0.2", - "pytest-cov>=7.0.0", - "hypothesis>=6.151.9", - "mypy>=1.19.1", - "ruff>=0.15.7", + "pytest>=9.0.3", + "pytest-cov>=7.1.0", + "hypothesis>=6.152.1", + "mypy>=1.20.2", + "ruff>=0.15.11", "pytest-benchmark>=5.2.3", - "hypofuzz>=25.11.1", "psutil>=7.2.2", - "types-psutil>=7.2.2.20260130", + "types-psutil>=7.2.2.20260408", +] + +fuzz = [ + "hypofuzz>=25.11.1", ] atheris = [ @@ -119,7 +138,7 @@ atheris = [ ] release = [ - "build>=1.4.0", + "build>=1.4.3", "twine>=6.2.0", ] @@ -200,7 +219,7 @@ omit = [ precision = 2 show_missing = true skip_covered = false -fail_under = 95.0 +fail_under = 100.0 exclude_lines = [ "pragma: no cover", "if TYPE_CHECKING:", @@ -313,7 +332,9 @@ ignore = [ # Parser: Inherent complexity from EBNF grammar # C901: Each function implements one EBNF grammar rule; branching is structural, not accidental "src/ftllexengine/syntax/parser/core.py" = ["PLR0912", "C901"] -"src/ftllexengine/syntax/parser/rules.py" = ["PLR0911", "PLR0912", "PLR0915", "C901"] +"src/ftllexengine/syntax/parser/entries.py" = ["PLR0911", "C901"] +"src/ftllexengine/syntax/parser/expressions.py" = ["PLR0911", "PLR0912", "C901"] +"src/ftllexengine/syntax/parser/patterns.py" = ["PLR0911", "PLR0912", "PLR0915", "C901"] "src/ftllexengine/syntax/parser/primitives.py" = ["C901"] # Visitor: Commented example code + inherent complexity from AST node dispatch diff --git a/scripts/fuzz_atheris.sh b/scripts/fuzz_atheris.sh index 04603f3d..f80d8e5e 100755 --- a/scripts/fuzz_atheris.sh +++ b/scripts/fuzz_atheris.sh @@ -162,6 +162,15 @@ discover_plugins # ============================================================================= _find_python313() { + # Prefer uv-managed interpreters in the current workspace. + if command -v uv &>/dev/null; then + local uv_python + uv_python=$(uv python find 3.13 2>/dev/null || echo "") + if [[ -n "$uv_python" ]] && [[ -x "$uv_python" ]]; then + echo "$uv_python" + return 0 + fi + fi # Prefer pyenv (most reliable on macOS dev machines) if command -v pyenv &>/dev/null; then local pyenv_root resolved @@ -181,20 +190,30 @@ _find_python313() { } ensure_atheris_venv() { + if [[ -d "$ATHERIS_VENV" ]] && [[ ! -x "$ATHERIS_PYTHON" ]]; then + echo -e "${YELLOW}[WARN] .venv-atheris exists but its Python is missing or broken. Recreating...${NC}" + rm -rf "$ATHERIS_VENV" + fi + # If venv exists and is already Python 3.13, nothing to do. - if [[ -f "$ATHERIS_PYTHON" ]]; then + if [[ -x "$ATHERIS_PYTHON" ]]; then local venv_mm venv_mm=$("$ATHERIS_PYTHON" --version 2>&1 | grep -oE '[0-9]+\.[0-9]+' | head -1) if [[ "$venv_mm" == "3.13" ]]; then - return 0 + if "$ATHERIS_PYTHON" -c "import atheris, ftllexengine, psutil" &>/dev/null; then + return 0 + fi + echo -e "${YELLOW}[WARN] .venv-atheris is missing required packages. Recreating...${NC}" + rm -rf "$ATHERIS_VENV" + else + echo -e "${YELLOW}[WARN] .venv-atheris has Python $venv_mm (need 3.13). Recreating...${NC}" + rm -rf "$ATHERIS_VENV" fi - echo -e "${YELLOW}[WARN] .venv-atheris has Python $venv_mm (need 3.13). Recreating...${NC}" - rm -rf "$ATHERIS_VENV" fi local python313 if ! python313=$(_find_python313); then - log_error "Python 3.13 not found. Install with: pyenv install 3.13" + log_error "Python 3.13 not found. Install it or make it discoverable via uv/python3.13/pyenv." exit 1 fi @@ -409,7 +428,7 @@ AUTO-HEAL: EXAMPLES: ./scripts/fuzz_atheris.sh currency --time 60 - ./scripts/fuzz_atheris.sh stability --workers 8 + ./scripts/fuzz_atheris.sh runtime --workers 8 ./scripts/fuzz_atheris.sh --setup ./scripts/fuzz_atheris.sh --minimize currency .fuzz_atheris_corpus/crash_abc123 ./scripts/fuzz_atheris.sh --replay structured @@ -499,6 +518,7 @@ run_corpus_health() { log_error "Corpus health script not found: $health_script" exit 1 fi + run_diagnostics echo -e "${BOLD}Checking Corpus Health...${NC}" "$ATHERIS_PYTHON" "$health_script" } diff --git a/scripts/fuzz_atheris_corpus_health.py b/scripts/fuzz_atheris_corpus_health.py index 99dfe4da..4b55f2af 100755 --- a/scripts/fuzz_atheris_corpus_health.py +++ b/scripts/fuzz_atheris_corpus_health.py @@ -25,10 +25,7 @@ from pathlib import Path from typing import TYPE_CHECKING -# Add src to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent / "src")) - -from ftllexengine.syntax.ast import ( # pylint: disable=C0413 +from ftllexengine.syntax.ast import ( Attribute, Comment, FunctionReference, @@ -44,7 +41,7 @@ TextElement, VariableReference, ) -from ftllexengine.syntax.parser import FluentParserV1 # pylint: disable=C0413 +from ftllexengine.syntax.parser import FluentParserV1 if TYPE_CHECKING: from ftllexengine.syntax.ast import Resource diff --git a/scripts/fuzz_hypofuzz.sh b/scripts/fuzz_hypofuzz.sh index 3e2d5e2b..a033bef6 100755 --- a/scripts/fuzz_hypofuzz.sh +++ b/scripts/fuzz_hypofuzz.sh @@ -216,7 +216,7 @@ _run_with_heartbeat() { show_help() { local project_name="Project" if [[ -f "$PROJECT_ROOT/pyproject.toml" ]]; then - project_name=$(python -c 'import sys; sys.path.append(sys.argv[1]); import tomllib; print(tomllib.load(open(sys.argv[2], "rb")).get("project", {}).get("name", "Project").capitalize())' "$PROJECT_ROOT" "$PROJECT_ROOT/pyproject.toml" 2>/dev/null || echo "Project") + project_name=$(python -c 'import pathlib, tomllib; pyproject = pathlib.Path(__import__("sys").argv[1]); print(tomllib.loads(pyproject.read_text(encoding="utf-8")).get("project", {}).get("name", "Project").capitalize())' "$PROJECT_ROOT/pyproject.toml" 2>/dev/null || echo "Project") fi cat << HELPEOF diff --git a/scripts/lint.sh b/scripts/lint.sh index 1856e8e3..8c012e16 100755 --- a/scripts/lint.sh +++ b/scripts/lint.sh @@ -76,13 +76,7 @@ unset _script_src PY_VERSION_NODOT="${PY_VERSION//./}" FAILED_ITEMS_FILE=$(mktemp) -# Auto-configure PYTHONPATH to include 'src' if it exists -# This solves 'Module not found' in examples/tests for 99% of projects -if [[ -d "src" ]]; then - export PYTHONPATH="${PWD}/src:${PYTHONPATH:-}" -else - export PYTHONPATH="${PWD}:${PYTHONPATH:-}" -fi +unset PYTHONPATH while [[ $# -gt 0 ]]; do case "$1" in @@ -118,7 +112,7 @@ pre_flight_diagnostics() { echo "[ INFO ] Environment : System/User ($VIRTUAL_ENV)" fi echo "[ INFO ] Python : $(python --version)" - echo "[ INFO ] PYTHONPATH : ${PYTHONPATH:-}" + echo "[ INFO ] Import Mode : Installed package (PYTHONPATH unset)" # Tool Availability Check for tool in ruff mypy; do @@ -159,6 +153,7 @@ declare -a TARGETS=() for dir in */; do dir=${dir%/} [[ "$dir" == .* ]] && continue # Skip hidden directories (.git, .venv, etc.) + [[ "$dir" == "tmp" ]] && continue # Scratch workspace: intentionally excluded from gates. # Only include if it contains at least one .py file (recursively) if find "$dir" -maxdepth 5 -name "*.py" -print -quit 2>/dev/null | grep -q ".py"; then @@ -268,7 +263,6 @@ run_mypy() { log_info " + Using ${config_source}: ${config}" # Flags: --no-color-output (agent), --no-error-summary (quiet) - # Note: We rely on PYTHONPATH being set correctly above local cmd=(mypy --config-file "$config" --python-version "$PY_VERSION" --no-color-output --no-error-summary) execute_tool "mypy" "$target" "${cmd[@]}" "$target" done @@ -449,4 +443,4 @@ else log_pass "All checks passed in $TARGET_VENV." echo "[EXIT-CODE] 0" >&2 exit 0 -fi \ No newline at end of file +fi diff --git a/scripts/publish-github-release-assets.sh b/scripts/publish-github-release-assets.sh new file mode 100755 index 00000000..b5892643 --- /dev/null +++ b/scripts/publish-github-release-assets.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash + +set -euo pipefail + +die() { + printf 'error: %s\n' "$1" >&2 + exit 1 +} + +resolve_script_dir() { + local source_path="${BASH_SOURCE[0]}" + while [[ -h "${source_path}" ]]; do + local source_dir + source_dir="$(cd -P -- "$(dirname -- "${source_path}")" && pwd)" + source_path="$(readlink "${source_path}")" + if [[ "${source_path}" != /* ]]; then + source_path="${source_dir}/${source_path}" + fi + done + cd -P -- "$(dirname -- "${source_path}")" && pwd +} + +read_pyproject_field() { + local field="$1" + python3 - "${repo_root}/pyproject.toml" "${field}" <<'PY' +from __future__ import annotations + +import sys +import tomllib + +pyproject_path = sys.argv[1] +field = sys.argv[2] + +with open(pyproject_path, "rb") as handle: + project = tomllib.load(handle)["project"] + +if field == "normalized_name": + print(project["name"].replace("-", "_")) +elif field == "version": + print(project["version"]) +else: + raise SystemExit(f"unsupported field: {field}") +PY +} + +release_exists() { + gh release view "${tag_name}" >/dev/null 2>&1 +} + +ensure_release() { + if release_exists; then + return + fi + + if gh release create "${tag_name}" --verify-tag --title "${tag_name}" --generate-notes \ + >/dev/null 2>&1; then + return + fi + + release_exists || die "failed to converge GitHub release ${tag_name}" +} + +release_has_asset() { + local asset_name="$1" + gh release view "${tag_name}" --json assets --jq \ + ".assets | map(.name) | index(\"${asset_name}\") != null" +} + +upload_if_missing() { + local asset_path="$1" + local asset_name + asset_name="$(basename -- "${asset_path}")" + + [[ -f "${asset_path}" ]] || die "missing asset ${asset_path}" + + if [[ "$(release_has_asset "${asset_name}")" == "true" ]]; then + return + fi + + if gh release upload "${tag_name}" "${asset_path}" >/dev/null 2>&1; then + return + fi + + [[ "$(release_has_asset "${asset_name}")" == "true" ]] || die \ + "failed to upload ${asset_name} to release ${tag_name}" +} + +readonly script_dir="$(resolve_script_dir)" +readonly repo_root="$(cd -P -- "${script_dir}/.." && pwd)" +readonly normalized_name="$(read_pyproject_field normalized_name)" +readonly version="$(read_pyproject_field version)" +readonly tag_name="${1:-${RELEASE_TAG:-${GITHUB_REF_NAME:-}}}" +readonly expected_tag="v${version}" + +[[ -n "${GH_TOKEN:-}" ]] || die "GH_TOKEN is required" +[[ -n "${tag_name}" ]] || die "tag name is required" +[[ "${tag_name}" == "${expected_tag}" ]] || die "expected tag ${expected_tag}, got ${tag_name}" + +readonly assets=( + "${repo_root}/dist/${normalized_name}-${version}.tar.gz" + "${repo_root}/dist/${normalized_name}-${version}-py3-none-any.whl" + "${repo_root}/dist/${normalized_name}-${version}.sha256" +) + +ensure_release + +for asset_path in "${assets[@]}"; do + upload_if_missing "${asset_path}" +done + +printf 'GitHub release asset upload converged for %s\n' "${tag_name}" diff --git a/scripts/run_examples.py b/scripts/run_examples.py new file mode 100644 index 00000000..a8359ea9 --- /dev/null +++ b/scripts/run_examples.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +"""Run all shipped example scripts under the current project interpreter. + +This keeps example verification as a first-class repository workflow instead of +an ad-hoc manual step. The runner intentionally clears ``PYTHONPATH`` so +examples execute against the installed package contract, not a local path hack. +""" + +from __future__ import annotations + +import argparse +import os +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parent.parent +EXAMPLES_DIR = REPO_ROOT / "examples" + + +@dataclass(slots=True) +class ExampleFailure: + """Captured example-run failure details.""" + + path: Path + returncode: int + stderr: str + + +def _clean_env() -> dict[str, str]: + """Return subprocess environment without legacy path overrides.""" + env = dict(os.environ) + env.pop("PYTHONPATH", None) + return env + + +def _discover_examples(pattern: str) -> list[Path]: + """Return runnable example scripts matching a glob pattern.""" + return sorted( + path + for path in EXAMPLES_DIR.glob(pattern) + if path.is_file() and path.suffix == ".py" + ) + + +def _run_example(path: Path) -> ExampleFailure | None: + """Execute one example script and return failure details if it fails.""" + result = subprocess.run( + [sys.executable, str(path)], + cwd=REPO_ROOT, + env=_clean_env(), + text=True, + capture_output=True, + timeout=60, + check=False, + ) + if result.returncode == 0: + return None + + stderr = result.stderr.strip() or result.stdout.strip() + return ExampleFailure(path=path, returncode=result.returncode, stderr=stderr) + + +def main() -> int: + """Run selected examples and return a process exit code.""" + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--pattern", + default="*.py", + help="Glob pattern inside examples/ (default: %(default)s)", + ) + parser.add_argument( + "--list", + action="store_true", + help="List matching examples without executing them.", + ) + args = parser.parse_args() + + examples = _discover_examples(args.pattern) + if not examples: + print(f"[FAIL] No examples matched pattern: {args.pattern}") + return 1 + + if args.list: + for path in examples: + print(path.relative_to(REPO_ROOT)) + return 0 + + failures: list[ExampleFailure] = [] + for path in examples: + rel_path = path.relative_to(REPO_ROOT) + print(f"[RUN] {rel_path}") + failure = _run_example(path) + if failure is not None: + failures.append(failure) + + if failures: + print("\n[FAIL] Example execution failures:") + for failure in failures: + rel_path = failure.path.relative_to(REPO_ROOT) + print(f" {rel_path} (exit {failure.returncode}): {failure.stderr}") + return 1 + + print(f"[PASS] Executed {len(examples)} example script(s).") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/test.sh b/scripts/test.sh index 3515545a..2b3a13fb 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -57,7 +57,7 @@ else fi # [SECTION: SETUP] -DEFAULT_COV_LIMIT=95 +DEFAULT_COV_LIMIT=100 QUICK_MODE=false CI_MODE=false CLEAN_CACHE=true @@ -261,12 +261,7 @@ if ! [[ "$FAILURE_TAIL_LINES" =~ ^[0-9]+$ ]]; then exit 1 fi -# Auto-configure PYTHONPATH to include 'src' if it exists (Parity with lint.sh) -if [[ -d "src" ]]; then - export PYTHONPATH="${PWD}/src:${PYTHONPATH:-}" -else - export PYTHONPATH="${PWD}:${PYTHONPATH:-}" -fi +unset PYTHONPATH # [SECTION: DIAGNOSTICS] pre_flight_diagnostics() { @@ -280,7 +275,7 @@ pre_flight_diagnostics() { echo "[ INFO ] Environment : System/User ($VIRTUAL_ENV)" fi echo "[ INFO ] Python : $(python --version)" - echo "[ INFO ] PYTHONPATH : ${PYTHONPATH:-}" + echo "[ INFO ] Import Mode : Installed package (PYTHONPATH unset)" if ! command -v pytest >/dev/null 2>&1; then echo "[ FAIL ] Tooling : Pytest missing (uv sync required)" diff --git a/scripts/validate_docs.py b/scripts/validate_docs.py index 156aca2c..40a082c9 100755 --- a/scripts/validate_docs.py +++ b/scripts/validate_docs.py @@ -33,7 +33,9 @@ import importlib import json +import os import re +import subprocess import sys import tomllib from dataclasses import dataclass, field @@ -56,6 +58,7 @@ class CheckConfig: skip_markers: list[str] parser_path: str language: str = "ftl" + python_exec_globs: list[str] = field(default_factory=list) @classmethod def from_pyproject(cls, root: Path) -> Self: @@ -81,6 +84,7 @@ def from_pyproject(cls, root: Path) -> Self: skip_markers=config.get("skip_markers", []), parser_path=config.get("parser_path", ""), language=config.get("language", "ftl"), + python_exec_globs=config.get("python_exec_globs", []), ) @@ -93,6 +97,7 @@ class ExampleFailure: content: str error: str error_type: str = "SyntaxError" + language: str = "ftl" @dataclass @@ -120,6 +125,7 @@ def to_json(self) -> str: { "file": f.file, "line": f.line, + "language": f.language, "error_type": f.error_type, "message": f.error, "snippet": f.content[:100] + "...", @@ -146,6 +152,37 @@ def get_parser(path: str) -> Any: return None +def _python_env(root: Path) -> dict[str, str]: + """Return subprocess environment for installed-package snippet execution.""" + del root + env = dict(**os.environ) + env.pop("PYTHONPATH", None) + return env + + +def validate_python_code(code: str, root: Path) -> str | None: + """Execute a Python documentation block in isolation.""" + try: + result = subprocess.run( + [sys.executable, "-c", code], + cwd=root, + env=_python_env(root), + text=True, + capture_output=True, + timeout=20, + check=False, + ) + except subprocess.TimeoutExpired as exc: + return f"TimeoutExpired: {exc!s}" + + if result.returncode == 0: + return None + + stderr = result.stderr.strip() + stdout = result.stdout.strip() + return stderr or stdout or f"process exited with code {result.returncode}" + + def validate_code(code: str, parser: Any) -> str | None: """Validate a code block using the provided parser. @@ -197,13 +234,16 @@ def process_file( report.files_checked += 1 rel_path = str(md_file.relative_to(root)) + python_enabled = any(md_file.match(pattern) for pattern in config.python_exec_globs) for match in pattern.finditer(content): indent = match.group(1) language = match.group(2).lower() code_block = match.group(3) - if language != config.language: + should_validate_ftl = language == config.language + should_validate_python = python_enabled and language == "python" + if not should_validate_ftl and not should_validate_python: continue report.examples_validated += 1 @@ -218,11 +258,25 @@ def process_file( if any(m in code_block for m in config.skip_markers): continue - error = validate_code(code_block, parser) + if should_validate_python: + error = validate_python_code(code_block, root) + error_type = "PythonRuntimeError" + failure_language = "python" + else: + error = validate_code(code_block, parser) + error_type = "SyntaxError" + failure_language = config.language if error: line_num = content[: match.start()].count("\n") + 2 report.failures.append( - ExampleFailure(file=rel_path, line=line_num, content=code_block, error=error) + ExampleFailure( + file=rel_path, + line=line_num, + content=code_block, + error=error, + error_type=error_type, + language=failure_language, + ) ) diff --git a/scripts/validate_version.py b/scripts/validate_version.py index 00918f22..f4ed1eb4 100755 --- a/scripts/validate_version.py +++ b/scripts/validate_version.py @@ -12,22 +12,21 @@ CHECKS PERFORMED: CRITICAL (exit 1 — fail build): - 1. Runtime __version__ matches pyproject.toml [version_sync] + 1. Installed package version matches pyproject.toml [version_sync] 2. Version follows semantic versioning (MAJOR.MINOR.PATCH) [semver] 3. Version is not a development placeholder [not_placeholder] DOCUMENTATION (exit 2 — fail build): - 4. All docs/DOC_*.md frontmatter has correct project_version [doc_frontmatter] - 5. docs/QUICK_REFERENCE.md footer has correct version [quick_reference] - 6. docs/TERMINOLOGY.md footer has correct version [terminology] + 4. Configured markdown frontmatter declares the current version [configurable_frontmatter] + 5. Configured markdown footers declare the current version [configurable_footers] INFORMATIONAL (exit 0 — warn only): - 7. CHANGELOG.md mentions current version [changelog_entry] - 8. CHANGELOG.md has version link at bottom [changelog_link] + 6. CHANGELOG.md mentions current version [changelog_entry] + 7. CHANGELOG.md has version link at bottom [changelog_link] NOTES ON VACUOUS PASSES: - Checks 4-6 are "if present, must be correct." If a doc file does not - exist, or does not contain the version field/footer, the check passes and + Documentation checks are "if present, must be correct." If a configured doc + file does not exist, or does not contain the configured version field, the check passes and reports "(skipped)". This is intentional: the checks activate as the project grows, without requiring maintenance of this script. @@ -52,14 +51,16 @@ import re import sys import tomllib +from dataclasses import dataclass from pathlib import Path -from typing import Any, NamedTuple +from typing import Any # ============================================================================== # CONFIGURATION # ============================================================================== NO_COLOR = os.environ.get("NO_COLOR", "") == "1" +type PyProjectData = dict[str, Any] class Colors: @@ -80,7 +81,8 @@ class Colors: SEVERITY_WARNING = "warning" # exit 0 (informational) -class CheckResult(NamedTuple): +@dataclass(frozen=True, slots=True) +class CheckResult: """Result of a single validation check.""" name: str @@ -94,7 +96,7 @@ class CheckResult(NamedTuple): # ============================================================================== -def load_pyproject(root: Path) -> dict: # type: ignore[type-arg] +def load_pyproject(root: Path) -> PyProjectData: """Load and return the pyproject.toml data dict. Raises SystemExit(3) if the file is missing or malformed. @@ -117,13 +119,13 @@ def load_pyproject(root: Path) -> dict: # type: ignore[type-arg] sys.exit(3) -def get_pyproject_version(data: dict) -> str | None: # type: ignore[type-arg] +def get_pyproject_version(data: PyProjectData) -> str | None: """Extract version from already-loaded pyproject.toml data.""" raw = data.get("project", {}).get("version") return str(raw) if raw is not None else None -def get_project_name(data: dict) -> str: # type: ignore[type-arg] +def get_project_name(data: PyProjectData) -> str: """Extract [project].name from already-loaded pyproject.toml data.""" return str(data.get("project", {}).get("name", "unknown")) @@ -150,7 +152,7 @@ def get_runtime_version(package_name: str) -> str | None: # ============================================================================== -def check_version_sync(data: dict, package_name: str) -> CheckResult: # type: ignore[type-arg] +def check_version_sync(data: PyProjectData, package_name: str) -> CheckResult: """CRITICAL: installed package version must match pyproject.toml.""" pyproject_version = get_pyproject_version(data) @@ -194,7 +196,7 @@ def check_version_sync(data: dict, package_name: str) -> CheckResult: # type: i ) -def check_semver(data: dict) -> CheckResult: # type: ignore[type-arg] +def check_semver(data: PyProjectData) -> CheckResult: """CRITICAL: version must be valid semantic versioning (MAJOR.MINOR.PATCH) with non-negative integer components. @@ -244,7 +246,7 @@ def check_semver(data: dict) -> CheckResult: # type: ignore[type-arg] ) -def check_not_placeholder(data: dict) -> CheckResult: # type: ignore[type-arg] +def check_not_placeholder(data: PyProjectData) -> CheckResult: """CRITICAL: version must not be a development placeholder.""" version = get_pyproject_version(data) @@ -439,7 +441,7 @@ def check_configurable_footers( return results -def check_changelog_entry(data: dict, root: Path) -> CheckResult: # type: ignore[type-arg] +def check_changelog_entry(data: PyProjectData, root: Path) -> CheckResult: """INFORMATIONAL: CHANGELOG.md should document the current version.""" version = get_pyproject_version(data) if version is None: @@ -497,7 +499,7 @@ def check_changelog_entry(data: dict, root: Path) -> CheckResult: # type: ignor ) -def check_changelog_link(data: dict, root: Path) -> CheckResult: # type: ignore[type-arg] +def check_changelog_link(data: PyProjectData, root: Path) -> CheckResult: """INFORMATIONAL: CHANGELOG.md should have a hyperlink for the current version.""" version = get_pyproject_version(data) if version is None: @@ -629,11 +631,9 @@ def main() -> int: data = load_pyproject(root) # Derive project identity dynamically — no hardcoded strings - package_name = get_project_name(data) # e.g. "ftllexengine" - project_display_name = package_name.capitalize() # e.g. "Ftllexengine" - # Better: if the project name uses title-casing hints, derive it properly - # e.g. "ftllexengine" → "FTLLexEngine" via pyproject [tool.project-display-name] - # Fallback: capitalise first letter only (safe for any project name) + package_name = get_project_name(data) + val_config = data.get("tool", {}).get("validate-version", {}) + project_display_name = str(val_config.get("project_display_name", package_name)) canonical_version = get_pyproject_version(data) or "unknown" print(f"{Colors.BOLD}{Colors.CYAN}=== Version Consistency Check ==={Colors.RESET}") @@ -651,7 +651,6 @@ def main() -> int: ] # Configurable documentation checks - val_config = data.get("tool", {}).get("validate-version", {}) if val_config: frontmatter_globs = val_config.get("frontmatter_globs", []) frontmatter_key = val_config.get("frontmatter_key", "") diff --git a/scripts/verify-github-release.sh b/scripts/verify-github-release.sh new file mode 100755 index 00000000..dc8b790d --- /dev/null +++ b/scripts/verify-github-release.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash + +set -euo pipefail + +die() { + printf 'error: %s\n' "$1" >&2 + exit 1 +} + +resolve_script_dir() { + local source_path="${BASH_SOURCE[0]}" + while [[ -h "${source_path}" ]]; do + local source_dir + source_dir="$(cd -P -- "$(dirname -- "${source_path}")" && pwd)" + source_path="$(readlink "${source_path}")" + if [[ "${source_path}" != /* ]]; then + source_path="${source_dir}/${source_path}" + fi + done + cd -P -- "$(dirname -- "${source_path}")" && pwd +} + +read_pyproject_field() { + local field="$1" + python3 - "${repo_root}/pyproject.toml" "${field}" <<'PY' +from __future__ import annotations + +import sys +import tomllib + +pyproject_path = sys.argv[1] +field = sys.argv[2] + +with open(pyproject_path, "rb") as handle: + project = tomllib.load(handle)["project"] + +if field == "normalized_name": + print(project["name"].replace("-", "_")) +elif field == "version": + print(project["version"]) +else: + raise SystemExit(f"unsupported field: {field}") +PY +} + +readonly script_dir="$(resolve_script_dir)" +readonly repo_root="$(cd -P -- "${script_dir}/.." && pwd)" +readonly normalized_name="$(read_pyproject_field normalized_name)" +readonly version="$(read_pyproject_field version)" +readonly tag_name="${1:-${RELEASE_TAG:-${GITHUB_REF_NAME:-}}}" +readonly expected_tag="v${version}" +readonly expected_assets=( + "${normalized_name}-${version}.tar.gz" + "${normalized_name}-${version}-py3-none-any.whl" + "${normalized_name}-${version}.sha256" +) + +[[ -n "${GH_TOKEN:-}" ]] || die "GH_TOKEN is required" +[[ -n "${tag_name}" ]] || die "tag name is required" +[[ "${tag_name}" == "${expected_tag}" ]] || die "expected tag ${expected_tag}, got ${tag_name}" + +release_tag="$(gh release view "${tag_name}" --json tagName --jq '.tagName')" +[[ "${release_tag}" == "${tag_name}" ]] || die \ + "expected release tag ${tag_name}, got ${release_tag}" + +is_draft="$(gh release view "${tag_name}" --json isDraft --jq '.isDraft')" +[[ "${is_draft}" == "false" ]] || die "release ${tag_name} is still a draft" + +is_prerelease="$(gh release view "${tag_name}" --json isPrerelease --jq '.isPrerelease')" +[[ "${is_prerelease}" == "false" ]] || die "release ${tag_name} is marked prerelease" + +for asset_name in "${expected_assets[@]}"; do + has_asset="$(gh release view "${tag_name}" --json assets --jq \ + ".assets | map(.name) | index(\"${asset_name}\") != null")" + [[ "${has_asset}" == "true" ]] || die \ + "release ${tag_name} is missing required asset ${asset_name}" +done + +release_url="$(gh release view "${tag_name}" --json url --jq '.url')" +printf 'Verified GitHub release handoff: %s\n' "${release_url}" diff --git a/src/ftllexengine/__init__.py b/src/ftllexengine/__init__.py index 9ea9d2e1..83e06d3e 100644 --- a/src/ftllexengine/__init__.py +++ b/src/ftllexengine/__init__.py @@ -28,8 +28,11 @@ LocalizationCacheStats - Cache statistics for all locales in a FluentLocalization Locale Utilities (no Babel dependency): - LoadStatus - Enum of resource load statuses (SUCCESS, NOT_FOUND, ERROR, SKIPPED) + LoadStatus - Enum of resource load statuses (SUCCESS, NOT_FOUND, ERROR) LocaleCode - Type alias for BCP-47 / POSIX locale codes (e.g. "en_US", "de") + MessageId - Type alias for Fluent message identifiers + ResourceId - Type alias for loader resource identifiers + FTLSource - Type alias for raw Fluent source text normalize_locale - Convert BCP-47 to canonical lowercase POSIX form get_system_locale - Detect locale from OS environment variables @@ -103,6 +106,7 @@ from .analysis import detect_cycles from .core.locale_utils import get_system_locale, normalize_locale, require_locale_code +from .core.semantic_types import FTLSource, LocaleCode, MessageId, ResourceId # Domain validators - no Babel dependency; no circular import risk from .core.validators import ( @@ -143,7 +147,6 @@ require_territory_code, ) from .introspection.message import MessageVariableValidationResult, validate_message_variables -from .localization.types import LocaleCode from .syntax import parse as parse_ftl from .syntax import parse_stream as parse_stream_ftl from .syntax import serialize as serialize_ftl @@ -288,9 +291,9 @@ def clear_module_caches( ``bundle.clear_cache()``. Example: - >>> import ftllexengine - >>> ftllexengine.clear_module_caches() # Clear all caches - >>> ftllexengine.clear_module_caches( # Clear only ISO + message caches + >>> import ftllexengine # doctest: +SKIP + >>> ftllexengine.clear_module_caches() # Clear all caches # doctest: +SKIP + >>> ftllexengine.clear_module_caches( # Clear only ISO + message caches # doctest: +SKIP ... components=frozenset({'introspection.iso', 'introspection.message'}) ... ) """ @@ -400,7 +403,10 @@ def _want(name: str) -> bool: "SyntaxIntegrityError", "WriteConflictError", # Locale utilities (no Babel dependency) + "FTLSource", "LocaleCode", + "MessageId", + "ResourceId", "get_system_locale", "normalize_locale", # Domain validators (no Babel dependency) diff --git a/src/ftllexengine/__init__.pyi b/src/ftllexengine/__init__.pyi index d98ac4b2..f1af4a18 100644 --- a/src/ftllexengine/__init__.pyi +++ b/src/ftllexengine/__init__.pyi @@ -6,6 +6,10 @@ from .core.babel_compat import get_cldr_version as get_cldr_version from .core.locale_utils import get_system_locale as get_system_locale from .core.locale_utils import normalize_locale as normalize_locale from .core.locale_utils import require_locale_code as require_locale_code +from .core.semantic_types import FTLSource as FTLSource +from .core.semantic_types import LocaleCode as LocaleCode +from .core.semantic_types import MessageId as MessageId +from .core.semantic_types import ResourceId as ResourceId # Domain validators (no Babel dependency) from .core.validators import require_date as require_date @@ -81,7 +85,6 @@ from .localization import LocalizationCacheStats as LocalizationCacheStats from .localization import PathResourceLoader as PathResourceLoader from .localization import ResourceLoader as ResourceLoader from .localization import ResourceLoadResult as ResourceLoadResult -from .localization.types import LocaleCode as LocaleCode from .runtime import AsyncFluentBundle as AsyncFluentBundle from .runtime import FluentBundle as FluentBundle from .runtime import FluentNumber as FluentNumber @@ -143,7 +146,10 @@ __all__: list[str] = [ "SyntaxIntegrityError", "WriteConflictError", # Locale utilities (no Babel dependency) + "FTLSource", "LocaleCode", + "MessageId", + "ResourceId", "get_system_locale", "normalize_locale", # Domain validators (no Babel dependency) diff --git a/src/ftllexengine/analysis/graph.py b/src/ftllexengine/analysis/graph.py index f6400bfd..6793f88f 100644 --- a/src/ftllexengine/analysis/graph.py +++ b/src/ftllexengine/analysis/graph.py @@ -1,208 +1,60 @@ -"""Graph algorithms for dependency analysis. +"""Graph analysis facade for public dependency helpers. -Provides cycle detection using iterative depth-first search and -namespace-prefixed dependency set construction for validating -message/term reference graphs in FTL resources. - -Python 3.13+. +The implementation lives in ``ftllexengine.core.reference_graph`` so lower +layers can use the same algorithms without importing the higher-level analysis +package. This module remains the stable public namespace for callers and keeps +module-level compatibility for monkeypatch-based tests. """ from __future__ import annotations -from typing import TYPE_CHECKING, Final - -if TYPE_CHECKING: - from collections.abc import Mapping, Sequence +import ftllexengine.core.reference_graph as _core_graph +from ftllexengine.constants import ( + MAX_DETECTED_CYCLES as _DEFAULT_MAX_DETECTED_CYCLES, +) +from ftllexengine.constants import ( + MAX_GRAPH_DFS_STACK as _DEFAULT_MAX_GRAPH_DFS_STACK, +) -from ftllexengine.constants import MAX_DETECTED_CYCLES, MAX_GRAPH_DFS_STACK +MAX_DETECTED_CYCLES = _DEFAULT_MAX_DETECTED_CYCLES +MAX_GRAPH_DFS_STACK = _DEFAULT_MAX_GRAPH_DFS_STACK __all__ = [ + "MAX_DETECTED_CYCLES", + "MAX_GRAPH_DFS_STACK", + "_canonicalize_cycle", "detect_cycles", "entry_dependency_set", "make_cycle_key", ] -_ENTERING: Final[bool] = True -_EXITING: Final[bool] = False - def entry_dependency_set( message_refs: frozenset[str], term_refs: frozenset[str], ) -> frozenset[str]: - """Build a namespace-prefixed dependency set from reference sets. - - Combines message and term references into a single frozenset with - ``msg:`` and ``term:`` prefixes. This is the canonical key format - used by ``detect_cycles`` for cross-namespace cycle detection. - - Args: - message_refs: Message IDs referenced by the entry. - term_refs: Term IDs referenced by the entry. - - Returns: - Frozenset of prefixed dependency keys - (e.g., ``frozenset({"msg:welcome", "term:brand"})``). - """ - return frozenset( - f"{prefix}:{r}" - for prefix, refs in (("msg", message_refs), ("term", term_refs)) - for r in refs - ) - - -def _canonicalize_cycle(cycle: Sequence[str]) -> tuple[str, ...]: - """Canonicalize a cycle path by rotating to start with smallest element. - - Preserves directional information (A->B->C vs A->C->B remain distinct) - while normalizing the starting point for deduplication. - - The input cycle has the format ``[A, B, C, A]`` where the last element - repeats the first to close the cycle. - - Args: - cycle: Cycle path with closing repeat - (e.g., ``["A", "B", "C", "A"]``). - - Returns: - Canonicalized cycle as tuple, rotated to start with - lexicographically smallest element. Closing repeat preserved. - """ - if len(cycle) <= 1: - return tuple(cycle) - - nodes = list(cycle[:-1]) - min_idx = nodes.index(min(nodes)) - rotated = nodes[min_idx:] + nodes[:min_idx] - return (*rotated, rotated[0]) - - -def make_cycle_key(cycle: Sequence[str]) -> str: - """Create a canonical string key from a cycle for display. - - Args: - cycle: Cycle path as sequence of node IDs. - - Returns: - Canonical string key in format ``"A -> B -> C -> A"``. - """ - canonical = _canonicalize_cycle(cycle) - return " -> ".join(canonical) - - -def detect_cycles(dependencies: Mapping[str, set[str]]) -> list[list[str]]: - """Detect cycles in a dependency graph using bounded iterative DFS. - - Implements iterative DFS with explicit stack to avoid RecursionError - on deep graphs (>1000 nodes in linear chain). Returns up to - ``MAX_DETECTED_CYCLES`` unique cycles; exits early once that limit is - reached, as FTL validation requires actionable diagnostics rather than - exhaustive cycle enumeration. - - Args: - dependencies: Mapping from node ID to set of referenced node IDs. - Example: ``{"a": {"b", "c"}, "b": {"c"}, "c": {"a"}}`` - - Returns: - List of up to ``MAX_DETECTED_CYCLES`` cycles, where each cycle is - a list of node IDs forming the cycle path (closed: last element - repeats first). Empty list if no cycles detected. Cycles are - deduplicated via canonical tuple form. - - Complexity: - Time: O(V * E) for typical sparse FTL graphs. In adversarial dense - graphs (complete K_n), exploration is bounded by ``MAX_GRAPH_DFS_STACK`` - work-queue entries and ``MAX_DETECTED_CYCLES`` cycle collection. - Space: O(MAX_GRAPH_DFS_STACK) worst case for the DFS work queue; - O(V) for path and rec_stack tracking. - - Correctness: - The ``visited`` guard used in a simple DFS causes false negatives - when a graph has multiple paths to the same intermediate node that - closes a cycle. Example: A→B→D→A and A→C→D→A share node D; the - second cycle is missed if D is marked globally visited after the - first traversal. This implementation avoids that defect by NOT - applying a global visited guard on neighbors. Instead: - - ``rec_stack`` prevents re-entering nodes already on the current - DFS path (back-edge detection and termination guarantee). - - ``globally_visited`` tracks only start nodes whose reachable - subgraphs have been fully explored, safely pruning the outer - for-loop without suppressing intra-DFS re-exploration. - - ``MAX_GRAPH_DFS_STACK`` prevents the O(n!) work-queue growth that - occurs in dense graphs where every node is reachable via - exponentially many distinct paths. - - Security: - Uses iterative DFS to prevent stack overflow attacks via - deeply nested dependency chains in untrusted FTL resources. - ``MAX_DETECTED_CYCLES`` and ``MAX_GRAPH_DFS_STACK`` prevent memory - exhaustion from adversarial complete or near-complete graphs. - """ - globally_visited: set[str] = set() - cycles: list[list[str]] = [] - seen_canonical: set[tuple[str, ...]] = set() - - for start_node in dependencies: - if start_node in globally_visited: - continue - if len(cycles) >= MAX_DETECTED_CYCLES: - break - - path: list[str] = [] - rec_stack: set[str] = set() - - stack: list[tuple[str, bool, list[str]]] = [ - (start_node, _ENTERING, list(dependencies.get(start_node, set()))) - ] - - while stack and len(cycles) < MAX_DETECTED_CYCLES: - node, entering, neighbors = stack.pop() + """Build a namespace-prefixed dependency set from reference sets.""" + return _core_graph.entry_dependency_set(message_refs, term_refs) - if entering: - # Prevent re-entering a node already on the current DFS path. - # Without this guard the same node could be pushed repeatedly, - # creating an infinite exploration loop through the cycle. - # Nodes in rec_stack are caught by the back-edge guard before - # being pushed (ENTERING), so this branch is a permanent safety - # net that cannot be triggered by the current algorithm. - if node in rec_stack: # pragma: no cover - continue - globally_visited.add(node) - rec_stack.add(node) - path.append(node) +def make_cycle_key(cycle: list[str] | tuple[str, ...]) -> str: + """Create a canonical display key from a cycle path.""" + return _core_graph.make_cycle_key(cycle) - stack.append((node, _EXITING, [])) - for neighbor in neighbors: - if neighbor not in rec_stack: - # Forward/cross edge: push for exploration if budget allows. - # No globally_visited guard here: nodes reachable from - # multiple branches of the current path must be explored - # via each branch independently to find all cycles - # (e.g., A→B→D→A and A→C→D→A both require exploring D). - # MAX_GRAPH_DFS_STACK caps the work queue: without it, - # dense graphs cause O(n!) queue growth as every node - # is re-pushed for each distinct incoming path. - if len(stack) < MAX_GRAPH_DFS_STACK: - stack.append(( - neighbor, - _ENTERING, - list(dependencies.get(neighbor, set())), - )) - continue - # Back edge: neighbor is an ancestor in the current path. - cycle_start = path.index(neighbor) - cycle = [*path[cycle_start:], neighbor] - canonical = _canonicalize_cycle(cycle) - if canonical not in seen_canonical: - seen_canonical.add(canonical) - cycles.append(cycle) - if len(cycles) >= MAX_DETECTED_CYCLES: - break +def _canonicalize_cycle(cycle: list[str] | tuple[str, ...]) -> tuple[str, ...]: + """Canonicalize a cycle path for compatibility callers and fuzzers.""" + return _core_graph.canonicalize_cycle(cycle) - else: - path.pop() - rec_stack.discard(node) - return cycles +def detect_cycles(dependencies: dict[str, set[str]]) -> list[list[str]]: + """Detect cycles while honoring monkeypatched module-level limits.""" + original_max_cycles = _core_graph.MAX_DETECTED_CYCLES + original_max_stack = _core_graph.MAX_GRAPH_DFS_STACK + _core_graph.MAX_DETECTED_CYCLES = MAX_DETECTED_CYCLES + _core_graph.MAX_GRAPH_DFS_STACK = MAX_GRAPH_DFS_STACK + try: + return _core_graph.detect_cycles(dependencies) + finally: + _core_graph.MAX_DETECTED_CYCLES = original_max_cycles + _core_graph.MAX_GRAPH_DFS_STACK = original_max_stack diff --git a/src/ftllexengine/core/depth_guard.py b/src/ftllexengine/core/depth_guard.py index e8623f2a..453cf7f3 100644 --- a/src/ftllexengine/core/depth_guard.py +++ b/src/ftllexengine/core/depth_guard.py @@ -13,17 +13,33 @@ import logging import sys +from collections.abc import Callable from dataclasses import dataclass, field from typing import Self from ftllexengine.constants import MAX_DEPTH -from ftllexengine.diagnostics import ErrorCategory, FrozenFluentError -from ftllexengine.diagnostics.templates import ErrorTemplate -__all__ = ["DepthGuard", "depth_clamp"] +__all__ = ["DepthGuard", "DepthLimitExceededError", "depth_clamp"] logger = logging.getLogger(__name__) +type DepthErrorFactory = Callable[[int], BaseException] + + +class DepthLimitExceededError(ValueError): + """Raised when DepthGuard detects recursion beyond the configured limit.""" + + __slots__ = ("max_depth",) + + def __init__(self, max_depth: int) -> None: + self.max_depth = max_depth + super().__init__(f"Depth limit exceeded (max_depth={max_depth})") + + +def _default_depth_error(max_depth: int) -> DepthLimitExceededError: + """Build the default core-layer depth exception.""" + return DepthLimitExceededError(max_depth) + @dataclass(slots=True) class DepthGuard: @@ -41,7 +57,7 @@ class DepthGuard: Explicit check (non-context-manager call sites): guard = DepthGuard() - guard.check() # Raises FrozenFluentError if limit exceeded + guard.check() # Raises DepthLimitExceededError if limit exceeded Mutability Note: Intentionally mutable (not frozen=True) to enable stateful depth @@ -59,24 +75,30 @@ class DepthGuard: max_depth: int = MAX_DEPTH current_depth: int = field(default=0, init=False) + error_factory: DepthErrorFactory = field( + default=_default_depth_error, + repr=False, + compare=False, + ) def __post_init__(self) -> None: """Clamp max_depth against Python recursion limit.""" self.max_depth = depth_clamp(self.max_depth) + def _raise_depth_error(self) -> None: + """Raise the configured exception for a depth-limit breach.""" + raise self.error_factory(self.max_depth) + def __enter__(self) -> Self: """Enter guarded section, increment depth. Validates depth limit BEFORE incrementing to prevent state corruption - if FrozenFluentError is raised. Since __exit__ is not called when + if the configured exception is raised. Since __exit__ is not called when __enter__ raises, incrementing first would leave current_depth permanently elevated, causing all subsequent operations to fail. """ if self.current_depth >= self.max_depth: - diag = ErrorTemplate.depth_exceeded(self.max_depth) - raise FrozenFluentError( - str(diag), ErrorCategory.RESOLUTION, diagnostic=diag, - ) + self._raise_depth_error() self.current_depth += 1 return self @@ -95,13 +117,13 @@ def check(self) -> None: Use when context manager pattern is not convenient. Raises: - FrozenFluentError: If depth limit exceeded (category=RESOLUTION) + DepthLimitExceededError: If depth limit exceeded and the default + core-layer error factory is in use. + BaseException: Any caller-supplied exception produced by + ``error_factory``. """ if self.current_depth >= self.max_depth: - diag = ErrorTemplate.depth_exceeded(self.max_depth) - raise FrozenFluentError( - str(diag), ErrorCategory.RESOLUTION, diagnostic=diag, - ) + self._raise_depth_error() def depth_clamp(requested_depth: int, reserve_frames: int = 50) -> int: @@ -119,11 +141,11 @@ def depth_clamp(requested_depth: int, reserve_frames: int = 50) -> int: Safe depth value, clamped if necessary Example: - >>> import sys - >>> sys.setrecursionlimit(200) - >>> depth_clamp(100) # OK, within limit + >>> import sys # doctest: +SKIP + >>> sys.setrecursionlimit(200) # doctest: +SKIP + >>> depth_clamp(100) # OK, within limit # doctest: +SKIP 100 - >>> depth_clamp(500) # Exceeds limit, clamped to 150 + >>> depth_clamp(500) # Exceeds limit, clamped to 150 # doctest: +SKIP 150 """ max_safe_depth = sys.getrecursionlimit() - reserve_frames diff --git a/src/ftllexengine/core/identifier_validation.py b/src/ftllexengine/core/identifier_validation.py index d27290ae..db0a4c20 100644 --- a/src/ftllexengine/core/identifier_validation.py +++ b/src/ftllexengine/core/identifier_validation.py @@ -61,11 +61,11 @@ def is_identifier_start(ch: str) -> bool: True if character is ASCII letter (a-z, A-Z), False otherwise Example: - >>> is_identifier_start('a') + >>> is_identifier_start('a') # doctest: +SKIP True - >>> is_identifier_start('1') + >>> is_identifier_start('1') # doctest: +SKIP False - >>> is_identifier_start('é') + >>> is_identifier_start('é') # doctest: +SKIP False """ return len(ch) == 1 and ch.isascii() and ch.isalpha() @@ -87,15 +87,15 @@ def is_identifier_char(ch: str) -> bool: True if character is ASCII letter, ASCII digit, hyphen, or underscore Example: - >>> is_identifier_char('a') + >>> is_identifier_char('a') # doctest: +SKIP True - >>> is_identifier_char('5') + >>> is_identifier_char('5') # doctest: +SKIP True - >>> is_identifier_char('-') + >>> is_identifier_char('-') # doctest: +SKIP True - >>> is_identifier_char('_') + >>> is_identifier_char('_') # doctest: +SKIP True - >>> is_identifier_char('é') + >>> is_identifier_char('é') # doctest: +SKIP False """ return len(ch) == 1 and ch.isascii() and (ch.isalnum() or ch in "-_") @@ -121,15 +121,15 @@ def is_valid_identifier(name: str) -> bool: - Length must not exceed MAX_IDENTIFIER_LENGTH (256 characters) Example: - >>> is_valid_identifier("message-id") + >>> is_valid_identifier("message-id") # doctest: +SKIP True - >>> is_valid_identifier("message_id_2") + >>> is_valid_identifier("message_id_2") # doctest: +SKIP True - >>> is_valid_identifier("1message") + >>> is_valid_identifier("1message") # doctest: +SKIP False - >>> is_valid_identifier("") + >>> is_valid_identifier("") # doctest: +SKIP False - >>> is_valid_identifier("a" * 300) + >>> is_valid_identifier("a" * 300) # doctest: +SKIP False """ # Empty check diff --git a/src/ftllexengine/core/locale_utils.py b/src/ftllexengine/core/locale_utils.py index 6b305939..1b5a4fea 100644 --- a/src/ftllexengine/core/locale_utils.py +++ b/src/ftllexengine/core/locale_utils.py @@ -31,7 +31,7 @@ if TYPE_CHECKING: from babel import Locale - from ftllexengine.localization.types import LocaleCode + from ftllexengine.core.semantic_types import LocaleCode __all__ = [ "clear_locale_cache", @@ -90,13 +90,13 @@ def normalize_locale(locale_code: str) -> str: Lowercase POSIX-formatted locale code (e.g., "en_us", "pt_br") Example: - >>> normalize_locale("en-US") + >>> normalize_locale("en-US") # doctest: +SKIP 'en_us' - >>> normalize_locale("EN-US") + >>> normalize_locale("EN-US") # doctest: +SKIP 'en_us' - >>> normalize_locale("pt-BR") + >>> normalize_locale("pt-BR") # doctest: +SKIP 'pt_br' - >>> normalize_locale("en") # Already normalized + >>> normalize_locale("en") # Already normalized # doctest: +SKIP 'en' """ return locale_code.replace("-", "_").lower() @@ -205,10 +205,10 @@ def get_babel_locale(locale_code: str) -> Locale: ValueError: If locale format is invalid Example: - >>> locale = get_babel_locale("en-US") - >>> locale.language + >>> locale = get_babel_locale("en-US") # doctest: +SKIP + >>> locale.language # doctest: +SKIP 'en' - >>> locale.territory + >>> locale.territory # doctest: +SKIP 'US' """ normalized_code = require_locale_code(locale_code, "locale_code") @@ -242,12 +242,12 @@ def get_system_locale(*, raise_on_failure: bool = False) -> str: RuntimeError: If raise_on_failure is True and locale cannot be determined. Example: - >>> import os - >>> os.environ['LANG'] = 'de_DE.UTF-8' - >>> get_system_locale() + >>> import os # doctest: +SKIP + >>> os.environ['LANG'] = 'de_DE.UTF-8' # doctest: +SKIP + >>> get_system_locale() # doctest: +SKIP 'de_de' - >>> get_system_locale(raise_on_failure=True) # May raise if no locale set + >>> get_system_locale(raise_on_failure=True) # May raise if no locale set # doctest: +SKIP 'de_de' """ # stdlib locale module deferred: has significant initialization overhead @@ -303,7 +303,7 @@ def clear_locale_cache() -> None: regardless of whether Babel is installed. Example: - >>> from ftllexengine.core.locale_utils import clear_locale_cache - >>> clear_locale_cache() # Clears all cached Locale objects + >>> from ftllexengine.core.locale_utils import clear_locale_cache # doctest: +SKIP + >>> clear_locale_cache() # Clears all cached Locale objects # doctest: +SKIP """ _get_babel_locale_normalized.cache_clear() diff --git a/src/ftllexengine/core/reference_graph.py b/src/ftllexengine/core/reference_graph.py new file mode 100644 index 00000000..fab324f5 --- /dev/null +++ b/src/ftllexengine/core/reference_graph.py @@ -0,0 +1,188 @@ +"""Reference-graph helpers shared across validation and runtime. + +Provides bounded dependency-graph algorithms plus the canonical namespace +encoding used for mixed message/term graphs. + +Python 3.13+. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Final + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + +from ftllexengine.constants import ( + MAX_DETECTED_CYCLES as _DEFAULT_MAX_DETECTED_CYCLES, +) +from ftllexengine.constants import ( + MAX_GRAPH_DFS_STACK as _DEFAULT_MAX_GRAPH_DFS_STACK, +) + +MAX_DETECTED_CYCLES = _DEFAULT_MAX_DETECTED_CYCLES +MAX_GRAPH_DFS_STACK = _DEFAULT_MAX_GRAPH_DFS_STACK + +__all__ = [ + "MAX_DETECTED_CYCLES", + "MAX_GRAPH_DFS_STACK", + "_canonicalize_cycle", + "canonicalize_cycle", + "detect_cycles", + "entry_dependency_set", + "make_cycle_key", +] + +_ENTERING: Final[bool] = True +_EXITING: Final[bool] = False + + +def _append_cycle( + *, + path: list[str], + neighbor: str, + cycles: list[list[str]], + seen_canonical: set[tuple[str, ...]], +) -> None: + """Append a newly discovered cycle if its canonical form is unique.""" + cycle_start = path.index(neighbor) + cycle = [*path[cycle_start:], neighbor] + canonical = canonicalize_cycle(cycle) + if canonical in seen_canonical: + return + seen_canonical.add(canonical) + cycles.append(cycle) + + +def _queue_neighbor( + *, + neighbor: str, + dependencies: Mapping[str, set[str]], + rec_stack: set[str], + stack: list[tuple[str, bool, list[str]]], +) -> None: + """Queue an unvisited neighbor while honoring DFS stack limits.""" + if neighbor in rec_stack or len(stack) >= MAX_GRAPH_DFS_STACK: + return + stack.append((neighbor, _ENTERING, list(dependencies.get(neighbor, set())))) + + +def _visit_entering_node( + *, + node: str, + neighbors: list[str], + dependencies: Mapping[str, set[str]], + globally_visited: set[str], + path: list[str], + rec_stack: set[str], + stack: list[tuple[str, bool, list[str]]], + cycles: list[list[str]], + seen_canonical: set[tuple[str, ...]], +) -> None: + """Process the DFS enter phase for one node.""" + if node in rec_stack: # pragma: no cover + return + + globally_visited.add(node) + rec_stack.add(node) + path.append(node) + stack.append((node, _EXITING, [])) + + for neighbor in neighbors: + if neighbor in rec_stack: + _append_cycle( + path=path, + neighbor=neighbor, + cycles=cycles, + seen_canonical=seen_canonical, + ) + if len(cycles) >= MAX_DETECTED_CYCLES: + return + continue + + _queue_neighbor( + neighbor=neighbor, + dependencies=dependencies, + rec_stack=rec_stack, + stack=stack, + ) + + +def _finish_node(*, node: str, path: list[str], rec_stack: set[str]) -> None: + """Process the DFS exit phase for one node.""" + path.pop() + rec_stack.discard(node) + + +def entry_dependency_set( + message_refs: frozenset[str], + term_refs: frozenset[str], +) -> frozenset[str]: + """Build a namespace-prefixed dependency set from reference sets.""" + return frozenset( + f"{prefix}:{ref}" + for prefix, refs in (("msg", message_refs), ("term", term_refs)) + for ref in refs + ) + + +def _canonicalize_cycle(cycle: Sequence[str]) -> tuple[str, ...]: + """Canonicalize a cycle path by rotating to start with smallest element.""" + if len(cycle) <= 1: + return tuple(cycle) + + nodes = list(cycle[:-1]) + min_idx = nodes.index(min(nodes)) + rotated = nodes[min_idx:] + nodes[:min_idx] + return (*rotated, rotated[0]) + + +def canonicalize_cycle(cycle: Sequence[str]) -> tuple[str, ...]: + """Canonicalize a cycle path into its stable tuple representation.""" + return _canonicalize_cycle(cycle) + + +def make_cycle_key(cycle: Sequence[str]) -> str: + """Create a canonical string key from a cycle for display.""" + canonical = canonicalize_cycle(cycle) + return " -> ".join(canonical) + + +def detect_cycles(dependencies: Mapping[str, set[str]]) -> list[list[str]]: + """Detect cycles in a dependency graph using bounded iterative DFS.""" + globally_visited: set[str] = set() + cycles: list[list[str]] = [] + seen_canonical: set[tuple[str, ...]] = set() + + for start_node in dependencies: + if start_node in globally_visited: + continue + if len(cycles) >= MAX_DETECTED_CYCLES: + break + + path: list[str] = [] + rec_stack: set[str] = set() + stack: list[tuple[str, bool, list[str]]] = [ + (start_node, _ENTERING, list(dependencies.get(start_node, set()))) + ] + + while stack and len(cycles) < MAX_DETECTED_CYCLES: + node, entering, neighbors = stack.pop() + + if not entering: + _finish_node(node=node, path=path, rec_stack=rec_stack) + continue + + _visit_entering_node( + node=node, + neighbors=neighbors, + dependencies=dependencies, + globally_visited=globally_visited, + path=path, + rec_stack=rec_stack, + stack=stack, + cycles=cycles, + seen_canonical=seen_canonical, + ) + + return cycles diff --git a/src/ftllexengine/core/semantic_types.py b/src/ftllexengine/core/semantic_types.py new file mode 100644 index 00000000..a1548e25 --- /dev/null +++ b/src/ftllexengine/core/semantic_types.py @@ -0,0 +1,30 @@ +"""Project-wide semantic type aliases. + +These aliases are the canonical low-layer home for semantic string types used +across runtime, localization, and documentation surfaces. Keeping them in the +core layer prevents lower modules from importing higher-level localization +helpers just to annotate identifiers or locale codes. + +Python 3.13+. Zero external dependencies. +""" + +from __future__ import annotations + +__all__ = [ + "FTLSource", + "LocaleCode", + "MessageId", + "ResourceId", +] + +type MessageId = str +"""Identifier for a Fluent message (for example ``"welcome"``).""" + +type LocaleCode = str +"""Locale identifier in BCP-47 or normalized POSIX form.""" + +type ResourceId = str +"""Logical Fluent resource identifier (for example ``"main.ftl"``).""" + +type FTLSource = str +"""Raw Fluent source text before parsing.""" diff --git a/src/ftllexengine/core/validators.py b/src/ftllexengine/core/validators.py index 5c14004a..28c5882a 100644 --- a/src/ftllexengine/core/validators.py +++ b/src/ftllexengine/core/validators.py @@ -61,17 +61,17 @@ def require_positive_int(value: object, field_name: str) -> int: ValueError: If value is zero or negative. Example: - >>> require_positive_int(42, "size") + >>> require_positive_int(42, "size") # doctest: +SKIP 42 - >>> require_positive_int(0, "size") + >>> require_positive_int(0, "size") # doctest: +SKIP Traceback (most recent call last): ... ValueError: size must be positive - >>> require_positive_int(-1, "size") + >>> require_positive_int(-1, "size") # doctest: +SKIP Traceback (most recent call last): ... ValueError: size must be positive - >>> require_positive_int(True, "size") + >>> require_positive_int(True, "size") # doctest: +SKIP Traceback (most recent call last): ... TypeError: size must be int, got bool @@ -114,14 +114,14 @@ def require_date(value: object, field_name: str) -> _date: TypeError: If value is not a date instance. Example: - >>> from datetime import date, datetime - >>> require_date(date(2024, 1, 15), "effective_date") + >>> from datetime import date, datetime # doctest: +SKIP + >>> require_date(date(2024, 1, 15), "effective_date") # doctest: +SKIP datetime.date(2024, 1, 15) - >>> require_date(datetime(2024, 1, 15, 9, 0), "effective_date") + >>> require_date(datetime(2024, 1, 15, 9, 0), "effective_date") # doctest: +SKIP Traceback (most recent call last): ... TypeError: effective_date must be date, got datetime - >>> require_date("2024-01-15", "effective_date") + >>> require_date("2024-01-15", "effective_date") # doctest: +SKIP Traceback (most recent call last): ... TypeError: effective_date must be date, got str @@ -160,14 +160,14 @@ def require_datetime(value: object, field_name: str) -> _datetime: TypeError: If value is not a datetime instance (including plain date). Example: - >>> from datetime import date, datetime - >>> require_datetime(datetime(2024, 1, 15, 9, 0), "created_at") + >>> from datetime import date, datetime # doctest: +SKIP + >>> require_datetime(datetime(2024, 1, 15, 9, 0), "created_at") # doctest: +SKIP datetime.datetime(2024, 1, 15, 9, 0) - >>> require_datetime(date(2024, 1, 15), "created_at") + >>> require_datetime(date(2024, 1, 15), "created_at") # doctest: +SKIP Traceback (most recent call last): ... TypeError: created_at must be datetime, got date - >>> require_datetime("2024-01-15T09:00:00", "created_at") + >>> require_datetime("2024-01-15T09:00:00", "created_at") # doctest: +SKIP Traceback (most recent call last): ... TypeError: created_at must be datetime, got str @@ -199,12 +199,14 @@ def require_fluent_number(value: object, field_name: str) -> FluentNumber: TypeError: If value is not a FluentNumber instance. Example: - >>> from ftllexengine.core.value_types import FluentNumber - >>> from decimal import Decimal - >>> fn = FluentNumber(value=Decimal("9.99"), formatted="9.99", precision=2) - >>> require_fluent_number(fn, "amount") + >>> from ftllexengine.core.value_types import FluentNumber # doctest: +SKIP + >>> from decimal import Decimal # doctest: +SKIP + >>> fn = FluentNumber( # doctest: +SKIP + ... value=Decimal("9.99"), formatted="9.99", precision=2 + ... ) + >>> require_fluent_number(fn, "amount") # doctest: +SKIP FluentNumber(value=Decimal('9.99'), formatted='9.99', precision=2) - >>> require_fluent_number(9.99, "amount") + >>> require_fluent_number(9.99, "amount") # doctest: +SKIP Traceback (most recent call last): ... TypeError: amount must be FluentNumber, got float @@ -214,4 +216,3 @@ def require_fluent_number(value: object, field_name: str) -> FluentNumber: raise TypeError(msg) return value - diff --git a/src/ftllexengine/core/value_types.py b/src/ftllexengine/core/value_types.py index e15116b7..39c56752 100644 --- a/src/ftllexengine/core/value_types.py +++ b/src/ftllexengine/core/value_types.py @@ -98,12 +98,12 @@ class FluentNumber: Must be >= 0 when set; negative precision has no CLDR meaning. Example: - >>> fn = FluentNumber(value=1, formatted="1.00", precision=2) - >>> str(fn) # Used in output + >>> fn = FluentNumber(value=1, formatted="1.00", precision=2) # doctest: +SKIP + >>> str(fn) # Used in output # doctest: +SKIP '1.00' - >>> fn.value # Used for plural matching + >>> fn.value # Used for plural matching # doctest: +SKIP 1 - >>> fn.precision # CLDR v operand: 2 visible fraction digits + >>> fn.precision # CLDR v operand: 2 visible fraction digits # doctest: +SKIP 2 Precision Semantics: @@ -179,11 +179,13 @@ def decimal_value(self) -> Decimal: returned unchanged. Example: - >>> fn = FluentNumber(value=42, formatted="42", precision=0) - >>> fn.decimal_value + >>> fn = FluentNumber(value=42, formatted="42", precision=0) # doctest: +SKIP + >>> fn.decimal_value # doctest: +SKIP Decimal('42') - >>> fn2 = FluentNumber(value=Decimal("1234.50"), formatted="1,234.50", precision=2) - >>> fn2.decimal_value + >>> fn2 = FluentNumber( # doctest: +SKIP + ... value=Decimal("1234.50"), formatted="1,234.50", precision=2 + ... ) + >>> fn2.decimal_value # doctest: +SKIP Decimal('1234.50') """ if isinstance(self.value, Decimal): @@ -446,9 +448,9 @@ def make_fluent_number( the formatted string, not a valid locale output). Example: - >>> make_fluent_number(Decimal("1234.50"), formatted="1 234,50") + >>> make_fluent_number(Decimal("1234.50"), formatted="1 234,50") # doctest: +SKIP FluentNumber(value=Decimal('1234.50'), formatted='1 234,50', precision=2) - >>> make_fluent_number(42) + >>> make_fluent_number(42) # doctest: +SKIP FluentNumber(value=42, formatted='42', precision=0) """ rendered = str(value) if formatted is None else formatted diff --git a/src/ftllexengine/diagnostics/depth.py b/src/ftllexengine/diagnostics/depth.py new file mode 100644 index 00000000..38cc5576 --- /dev/null +++ b/src/ftllexengine/diagnostics/depth.py @@ -0,0 +1,23 @@ +"""Depth-limit error adapters for higher layers. + +Core depth tracking stays independent of diagnostics; higher layers that want +domain-specific ``FrozenFluentError`` instances opt into them through these +builders. +""" + +from __future__ import annotations + +from ftllexengine.diagnostics.errors import ErrorCategory, FrozenFluentError +from ftllexengine.diagnostics.templates import ErrorTemplate + +__all__ = ["resolution_depth_error"] + + +def resolution_depth_error(max_depth: int) -> FrozenFluentError: + """Build the canonical resolution-category depth error.""" + diagnostic = ErrorTemplate.depth_exceeded(max_depth) + return FrozenFluentError( + str(diagnostic), + ErrorCategory.RESOLUTION, + diagnostic=diagnostic, + ) diff --git a/src/ftllexengine/diagnostics/errors.py b/src/ftllexengine/diagnostics/errors.py index a9da9823..ad9a50e7 100644 --- a/src/ftllexengine/diagnostics/errors.py +++ b/src/ftllexengine/diagnostics/errors.py @@ -43,10 +43,10 @@ The second element is a tuple of errors (empty on success). Example: - >>> from ftllexengine import ParseResult - >>> result: ParseResult[Decimal] - >>> value, errors = result - >>> if not errors and value is not None: + >>> from ftllexengine import ParseResult # doctest: +SKIP + >>> result: ParseResult[Decimal] # doctest: +SKIP + >>> value, errors = result # doctest: +SKIP + >>> if not errors and value is not None: # doctest: +SKIP ... total = value.quantize(Decimal("0.01")) """ @@ -77,17 +77,17 @@ class FrozenFluentError(Exception): context: Additional context for parse/formatting errors (optional) Example: - >>> error = FrozenFluentError( + >>> error = FrozenFluentError( # doctest: +SKIP ... "Message 'hello' not found", ... ErrorCategory.REFERENCE, ... diagnostic=some_diagnostic, ... ) - >>> error.category == ErrorCategory.REFERENCE + >>> error.category == ErrorCategory.REFERENCE # doctest: +SKIP True - >>> error.verify_integrity() + >>> error.verify_integrity() # doctest: +SKIP True - >>> # Attempting mutation raises: - >>> error._message = "modified" # Raises ImmutabilityViolationError + Attempting mutation raises: + >>> error._message = "modified" # Raises ImmutabilityViolationError # doctest: +SKIP """ __slots__ = ( diff --git a/src/ftllexengine/diagnostics/formatter.py b/src/ftllexengine/diagnostics/formatter.py index 1e06dfea..a08116cf 100644 --- a/src/ftllexengine/diagnostics/formatter.py +++ b/src/ftllexengine/diagnostics/formatter.py @@ -70,19 +70,19 @@ class DiagnosticFormatter: max_content_length: Maximum content length when sanitizing Example: - >>> formatter = DiagnosticFormatter() - >>> diagnostic = ErrorTemplate.message_not_found("hello") - >>> print(formatter.format(diagnostic)) + >>> formatter = DiagnosticFormatter() # doctest: +SKIP + >>> diagnostic = ErrorTemplate.message_not_found("hello") # doctest: +SKIP + >>> print(formatter.format(diagnostic)) # doctest: +SKIP error[MESSAGE_NOT_FOUND]: Message 'hello' not found = help: Check that the message is defined in the loaded resources = note: see https://projectfluent.org/fluent/guide/messages.html - >>> formatter = DiagnosticFormatter(output_format=OutputFormat.SIMPLE) - >>> print(formatter.format(diagnostic)) + >>> formatter = DiagnosticFormatter(output_format=OutputFormat.SIMPLE) # doctest: +SKIP + >>> print(formatter.format(diagnostic)) # doctest: +SKIP MESSAGE_NOT_FOUND: Message 'hello' not found - >>> formatter = DiagnosticFormatter(output_format=OutputFormat.JSON) - >>> print(formatter.format(diagnostic)) + >>> formatter = DiagnosticFormatter(output_format=OutputFormat.JSON) # doctest: +SKIP + >>> print(formatter.format(diagnostic)) # doctest: +SKIP {"code": "MESSAGE_NOT_FOUND", "message": "Message 'hello' not found", ...} """ diff --git a/src/ftllexengine/diagnostics/validation.py b/src/ftllexengine/diagnostics/validation.py index 24a8e1e0..0b95c28a 100644 --- a/src/ftllexengine/diagnostics/validation.py +++ b/src/ftllexengine/diagnostics/validation.py @@ -17,11 +17,10 @@ from dataclasses import dataclass from enum import StrEnum -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Protocol if TYPE_CHECKING: from ftllexengine.diagnostics.codes import DiagnosticCode - from ftllexengine.syntax.ast import Annotation from .formatter import DiagnosticFormatter @@ -50,6 +49,30 @@ class WarningSeverity(StrEnum): INFO = "info" # Informational only +class ParserAnnotation(Protocol): + """Structural contract for parser annotations stored in ValidationResult.""" + + @property + def code(self) -> str: + """Machine-readable annotation code.""" + ... # pragma: no cover - typing-only protocol declaration + + @property + def message(self) -> str: + """Human-readable parser annotation message.""" + ... # pragma: no cover - typing-only protocol declaration + + @property + def arguments(self) -> tuple[tuple[str, str], ...] | None: + """Structured parser annotation arguments.""" + ... # pragma: no cover - typing-only protocol declaration + + @property + def span(self) -> object | None: + """Optional source span for the annotation.""" + ... # pragma: no cover - typing-only protocol declaration + + # ============================================================================ # VALIDATION ERROR & WARNING TYPES # ============================================================================ @@ -114,9 +137,13 @@ def format( Formatted error string with optional content sanitization. Examples: - >>> error = ValidationError(DiagnosticCode.PARSE_JUNK, "Syntax error", "bad ftl") - >>> error.format(sanitize=True) # Truncates to 100 chars - >>> error.format(sanitize=True, redact_content=True) # Redacts entirely + >>> error = ValidationError( # doctest: +SKIP + ... DiagnosticCode.PARSE_JUNK, "Syntax error", "bad ftl" + ... ) + >>> error.format(sanitize=True) # Truncates to 100 chars # doctest: +SKIP + >>> error.format( # Redacts entirely # doctest: +SKIP + ... sanitize=True, redact_content=True + ... ) """ formatter = _get_formatter(sanitize=sanitize, redact_content=redact_content) return formatter.format_error(self) @@ -196,14 +223,14 @@ class ValidationResult: annotations: Parser-level AST annotations. Example: - >>> result = ValidationResult.valid() - >>> result.is_valid + >>> result = ValidationResult.valid() # doctest: +SKIP + >>> result.is_valid # doctest: +SKIP True - >>> result.error_count + >>> result.error_count # doctest: +SKIP 0 - >>> # With errors - >>> result = ValidationResult.invalid( + With errors: + >>> result = ValidationResult.invalid( # doctest: +SKIP ... errors=(ValidationError( ... code=DiagnosticCode.PARSE_JUNK, ... message="Expected '=' but found EOF", @@ -212,15 +239,15 @@ class ValidationResult: ... column=4 ... ),) ... ) - >>> result.is_valid + >>> result.is_valid # doctest: +SKIP False - >>> result.error_count + >>> result.error_count # doctest: +SKIP 1 """ errors: tuple[ValidationError, ...] warnings: tuple[ValidationWarning, ...] - annotations: tuple[Annotation, ...] + annotations: tuple[ParserAnnotation, ...] @property def is_valid(self) -> bool: @@ -273,7 +300,7 @@ def valid() -> ValidationResult: def invalid( errors: tuple[ValidationError, ...] = (), warnings: tuple[ValidationWarning, ...] = (), - annotations: tuple[Annotation, ...] = (), + annotations: tuple[ParserAnnotation, ...] = (), ) -> ValidationResult: """Create an invalid result with errors and/or annotations. @@ -290,7 +317,9 @@ def invalid( ) @staticmethod - def from_annotations(annotations: tuple[Annotation, ...]) -> ValidationResult: + def from_annotations( + annotations: tuple[ParserAnnotation, ...] + ) -> ValidationResult: """Create result from parser-level annotations only. Convenience factory for semantic validator usage. @@ -333,9 +362,9 @@ def format( set redact_content=True. Examples: - >>> result.format() # Full output for debugging - >>> result.format(sanitize=True) # Truncated content - >>> result.format(sanitize=True, redact_content=True) # No content + >>> result.format() # Full output for debugging # doctest: +SKIP + >>> result.format(sanitize=True) # Truncated content # doctest: +SKIP + >>> result.format(sanitize=True, redact_content=True) # No content # doctest: +SKIP """ formatter = _get_formatter(sanitize=sanitize, redact_content=redact_content) return formatter.format_validation_result(self, include_warnings=include_warnings) diff --git a/src/ftllexengine/introspection/iso.py b/src/ftllexengine/introspection/iso.py index 775b7f76..962e8d16 100644 --- a/src/ftllexengine/introspection/iso.py +++ b/src/ftllexengine/introspection/iso.py @@ -14,13 +14,12 @@ from __future__ import annotations -from dataclasses import dataclass from functools import lru_cache # TypeIs (PEP 742) is available unconditionally on Python 3.13+, which is the # minimum supported version. The import is placed here at module level so that # typing.get_type_hints() callers resolve the name from this module's globals. -from typing import NewType, TypeIs +from typing import TypeIs from ftllexengine.constants import ( ISO_4217_DECIMAL_DIGITS, @@ -30,14 +29,22 @@ MAX_LOCALE_CACHE_SIZE, MAX_TERRITORY_CACHE_SIZE, ) -from ftllexengine.core.babel_compat import ( - BabelImportError, - get_babel_languages, - get_babel_numbers, - get_locale_class, - get_unknown_locale_error_class, -) +from ftllexengine.core.babel_compat import BabelImportError from ftllexengine.core.locale_utils import normalize_locale +from ftllexengine.introspection.iso_babel import ( + _get_babel_currencies, + _get_babel_currency_name, + _get_babel_currency_symbol, + _get_babel_official_languages, + _get_babel_territories, + _get_babel_territory_currencies, +) +from ftllexengine.introspection.iso_types import ( + CurrencyCode, + CurrencyInfo, + TerritoryCode, + TerritoryInfo, +) # ruff: noqa: RUF022 - __all__ organized by category for readability __all__ = [ @@ -66,201 +73,6 @@ "BabelImportError", ] -_BABEL_FEATURE = "ISO introspection" - - -# ============================================================================ -# NEWTYPES -# ============================================================================ - -TerritoryCode = NewType("TerritoryCode", str) -"""ISO 3166-1 alpha-2 territory code (e.g., 'US', 'LV', 'DE'). - -Nominal subtype of str. Use is_valid_territory_code() to narrow a plain str -to TerritoryCode; both branches are then reachable, preventing false -unreachable diagnostics at validation sites. -""" - -CurrencyCode = NewType("CurrencyCode", str) -"""ISO 4217 currency code (e.g., 'USD', 'EUR', 'GBP'). - -Nominal subtype of str. Use is_valid_currency_code() to narrow a plain str -to CurrencyCode; both branches are then reachable, preventing false -unreachable diagnostics at validation sites. -""" - - -# ============================================================================ -# DATA CLASSES -# ============================================================================ - - -@dataclass(frozen=True, slots=True) -class TerritoryInfo: - """ISO 3166-1 territory data with localized name. - - Immutable, thread-safe, hashable. Safe for use as dict key or set member. - - Attributes: - alpha2: ISO 3166-1 alpha-2 code (e.g., 'US', 'DE'). - name: Localized display name (depends on locale used for lookup). - currencies: All active legal tender currencies for this territory. - Multi-currency territories (e.g., Panama: PAB, USD) have multiple entries. - Empty tuple if no currency data available. - official_languages: BCP-47 language codes of official languages for this - territory (e.g., ('en',) for 'US', ('fr', 'nl', 'de') for 'BE'). - Empty tuple if no language data is available in CLDR. - """ - - alpha2: TerritoryCode - name: str - currencies: tuple[CurrencyCode, ...] - official_languages: tuple[str, ...] - - -@dataclass(frozen=True, slots=True) -class CurrencyInfo: - """ISO 4217 currency data with localized presentation. - - Immutable, thread-safe, hashable. Safe for use as dict key or set member. - - Attributes: - code: ISO 4217 currency code (e.g., 'USD', 'EUR'). - name: Localized display name (depends on locale used for lookup). - symbol: Locale-specific symbol (e.g., '$', 'EUR', 'USD'). - decimal_digits: Standard decimal places (0, 2, 3, or 4). - """ - - code: CurrencyCode - name: str - symbol: str - decimal_digits: int - - -# ============================================================================ -# BABEL INTERFACE (LAZY IMPORT) -# ============================================================================ - - -def _get_babel_locale(locale_str: str) -> object: - """Get Babel Locale object, raising BabelImportError if unavailable.""" - locale_class = get_locale_class() - return locale_class.parse(locale_str) - - -def _is_unknown_locale_error(exc: Exception) -> bool: - """Return True if exc is Babel's UnknownLocaleError. - - Babel's UnknownLocaleError inherits directly from Exception (not LookupError), - requiring explicit runtime type checking. Returns False when Babel is unavailable, - allowing the caller to re-raise the original exception via a bare `raise`. - """ - try: - unknown_locale_error_class = get_unknown_locale_error_class() - except BabelImportError: - return False - return isinstance(exc, unknown_locale_error_class) - - -def _get_babel_territories(locale_str: str) -> dict[str, str]: - """Get territory names from Babel for a locale. - - Returns empty dict if locale is invalid or data unavailable. - """ - try: - locale = _get_babel_locale(locale_str) - return locale.territories # type: ignore[attr-defined, no-any-return] - except (ValueError, LookupError, KeyError, AttributeError): - # Standard library exceptions from invalid data - return {} - except Exception as exc: - if _is_unknown_locale_error(exc): - return {} - raise # Re-raise unexpected errors (logic bugs) - - -@lru_cache(maxsize=1) -def _get_babel_currencies() -> dict[str, str]: - """Get English currency names from Babel. Result is invariant; cached once. - - The English CLDR currency map never changes within a process lifetime. - Caching with maxsize=1 avoids redundant Babel round-trips when list_currencies - is called for multiple locales (all calls share the same English source map). - """ - locale = _get_babel_locale("en") - return locale.currencies # type: ignore[attr-defined, no-any-return] - - -def _get_babel_currency_name(code: str, locale_str: str) -> str | None: - """Get localized currency name from Babel. - - Returns None if the currency code is not found in CLDR data. - """ - locale_class = get_locale_class() - babel_numbers = get_babel_numbers() - try: - # Validate code exists in CLDR currency data before getting name - # Babel returns input code if not found, so we check explicitly - locale = locale_class.parse(locale_str) - if code.upper() not in locale.currencies: - return None - return str(babel_numbers.get_currency_name(code, locale=locale_str)) - except (ValueError, LookupError, KeyError, AttributeError): - # Babel raises ValueError/LookupError for invalid locales, - # KeyError/AttributeError for missing data. Logic bugs (NameError, - # TypeError) propagate to fail fast in financial-grade contexts. - return None - except Exception as exc: - if _is_unknown_locale_error(exc): - return None - raise # Re-raise unexpected errors (logic bugs) - - -def _get_babel_currency_symbol(code: str, locale_str: str) -> str: - """Get localized currency symbol from Babel.""" - babel_numbers = get_babel_numbers() - try: - return str(babel_numbers.get_currency_symbol(code, locale=locale_str)) - except (ValueError, LookupError, KeyError, AttributeError): - # Babel raises ValueError/LookupError for invalid locales, - # KeyError/AttributeError for unknown codes. Logic bugs propagate. - return code - except Exception as exc: - if _is_unknown_locale_error(exc): - return code - raise # Re-raise unexpected errors (logic bugs) - - -def _get_babel_territory_currencies(territory: str) -> list[str]: - """Get currencies used by a territory from Babel. - - Returns list of currently active legal tender currencies. - Uses babel.numbers.get_territory_currencies() — the stable public API — - rather than accessing the raw CLDR data table via get_global(). - """ - babel_numbers = get_babel_numbers() - try: - return list(babel_numbers.get_territory_currencies(territory, tender=True)) - except (ValueError, LookupError, KeyError, AttributeError): - return [] - - -def _get_babel_official_languages(territory: str) -> tuple[str, ...]: - """Get official language codes for a territory from Babel CLDR data. - - Returns BCP-47 language codes of officially recognized languages. - Uses babel.languages.get_official_languages() — the stable public API. - - Returns empty tuple if the territory is unknown or no language data exists. - """ - babel_languages = get_babel_languages() - try: - return tuple(babel_languages.get_official_languages(territory)) - except (ValueError, LookupError, KeyError, AttributeError): - return () - - -# ============================================================================ # CACHED LOOKUP FUNCTIONS # ============================================================================ @@ -434,15 +246,15 @@ def get_currency_decimal_digits(code: str) -> int | None: process-immutable tables. Examples: - >>> get_currency_decimal_digits("KWD") + >>> get_currency_decimal_digits("KWD") # doctest: +SKIP 3 - >>> get_currency_decimal_digits("JPY") + >>> get_currency_decimal_digits("JPY") # doctest: +SKIP 0 - >>> get_currency_decimal_digits("EUR") + >>> get_currency_decimal_digits("EUR") # doctest: +SKIP 2 - >>> get_currency_decimal_digits("IQD") + >>> get_currency_decimal_digits("IQD") # doctest: +SKIP 3 - >>> get_currency_decimal_digits("XYZ") is None + >>> get_currency_decimal_digits("XYZ") is None # doctest: +SKIP True """ # ISO 4217 codes are exactly 3 characters before uppercasing. @@ -755,15 +567,15 @@ def require_currency_code(value: object, field_name: str) -> CurrencyCode: BabelImportError: If Babel is not installed. Example: - >>> require_currency_code("usd", "currency") + >>> require_currency_code("usd", "currency") # doctest: +SKIP 'USD' - >>> require_currency_code(" EUR ", "currency") + >>> require_currency_code(" EUR ", "currency") # doctest: +SKIP 'EUR' - >>> require_currency_code("XYZ", "currency") + >>> require_currency_code("XYZ", "currency") # doctest: +SKIP Traceback (most recent call last): ... ValueError: currency must be a valid ISO 4217 currency code, got 'XYZ' - >>> require_currency_code(840, "currency") + >>> require_currency_code(840, "currency") # doctest: +SKIP Traceback (most recent call last): ... TypeError: currency must be str, got int @@ -772,10 +584,14 @@ def require_currency_code(value: object, field_name: str) -> CurrencyCode: msg = f"{field_name} must be str, got {type(value).__name__}" raise TypeError(msg) stripped = value.strip() - if not is_valid_currency_code(stripped): + code = stripped.upper() + if len(stripped) != 3: + msg = f"{field_name} must be a valid ISO 4217 currency code, got {value!r}" + raise ValueError(msg) + if code not in _currency_codes_impl(normalize_locale("en")): msg = f"{field_name} must be a valid ISO 4217 currency code, got {value!r}" raise ValueError(msg) - return CurrencyCode(stripped.upper()) + return CurrencyCode(code) def require_territory_code(value: object, field_name: str) -> TerritoryCode: @@ -805,15 +621,15 @@ def require_territory_code(value: object, field_name: str) -> TerritoryCode: BabelImportError: If Babel is not installed. Example: - >>> require_territory_code("us", "territory") + >>> require_territory_code("us", "territory") # doctest: +SKIP 'US' - >>> require_territory_code(" DE ", "territory") + >>> require_territory_code(" DE ", "territory") # doctest: +SKIP 'DE' - >>> require_territory_code("XX", "territory") + >>> require_territory_code("XX", "territory") # doctest: +SKIP Traceback (most recent call last): ... ValueError: territory must be a valid ISO 3166-1 alpha-2 territory code, got 'XX' - >>> require_territory_code(840, "territory") + >>> require_territory_code(840, "territory") # doctest: +SKIP Traceback (most recent call last): ... TypeError: territory must be str, got int @@ -822,12 +638,18 @@ def require_territory_code(value: object, field_name: str) -> TerritoryCode: msg = f"{field_name} must be str, got {type(value).__name__}" raise TypeError(msg) stripped = value.strip() - if not is_valid_territory_code(stripped): + code = stripped.upper() + if len(stripped) != 2: + msg = ( + f"{field_name} must be a valid ISO 3166-1 alpha-2 territory code, got {value!r}" + ) + raise ValueError(msg) + if code not in _territory_codes_impl(normalize_locale("en")): msg = ( f"{field_name} must be a valid ISO 3166-1 alpha-2 territory code, got {value!r}" ) raise ValueError(msg) - return TerritoryCode(stripped.upper()) + return TerritoryCode(code) def clear_iso_cache() -> None: diff --git a/src/ftllexengine/introspection/iso_babel.py b/src/ftllexengine/introspection/iso_babel.py new file mode 100644 index 00000000..7781630f --- /dev/null +++ b/src/ftllexengine/introspection/iso_babel.py @@ -0,0 +1,107 @@ +"""Lazy Babel bridge helpers for ISO introspection.""" + +from __future__ import annotations + +from functools import lru_cache + +from ftllexengine.core.babel_compat import ( + BabelImportError, + get_babel_languages, + get_babel_numbers, + get_locale_class, + get_unknown_locale_error_class, +) + +__all__ = [ + "_get_babel_currencies", + "_get_babel_currency_name", + "_get_babel_currency_symbol", + "_get_babel_locale", + "_get_babel_official_languages", + "_get_babel_territories", + "_get_babel_territory_currencies", + "_is_unknown_locale_error", +] + + +def _get_babel_locale(locale_str: str) -> object: + """Get Babel Locale object, raising BabelImportError if unavailable.""" + locale_class = get_locale_class() + return locale_class.parse(locale_str) + + +def _is_unknown_locale_error(exc: Exception) -> bool: + """Return True if exc is Babel's UnknownLocaleError.""" + try: + unknown_locale_error_class = get_unknown_locale_error_class() + except BabelImportError: + return False + return isinstance(exc, unknown_locale_error_class) + + +def _get_babel_territories(locale_str: str) -> dict[str, str]: + """Get territory names from Babel for a locale.""" + try: + locale = _get_babel_locale(locale_str) + return locale.territories # type: ignore[attr-defined, no-any-return] + except (ValueError, LookupError, KeyError, AttributeError): + return {} + except Exception as exc: + if _is_unknown_locale_error(exc): + return {} + raise + + +@lru_cache(maxsize=1) +def _get_babel_currencies() -> dict[str, str]: + """Get English currency names from Babel. Result is invariant; cached once.""" + locale = _get_babel_locale("en") + return locale.currencies # type: ignore[attr-defined, no-any-return] + + +def _get_babel_currency_name(code: str, locale_str: str) -> str | None: + """Get localized currency name from Babel.""" + locale_class = get_locale_class() + babel_numbers = get_babel_numbers() + try: + locale = locale_class.parse(locale_str) + if code.upper() not in locale.currencies: + return None + return str(babel_numbers.get_currency_name(code, locale=locale_str)) + except (ValueError, LookupError, KeyError, AttributeError): + return None + except Exception as exc: + if _is_unknown_locale_error(exc): + return None + raise + + +def _get_babel_currency_symbol(code: str, locale_str: str) -> str: + """Get localized currency symbol from Babel.""" + babel_numbers = get_babel_numbers() + try: + return str(babel_numbers.get_currency_symbol(code, locale=locale_str)) + except (ValueError, LookupError, KeyError, AttributeError): + return code + except Exception as exc: + if _is_unknown_locale_error(exc): + return code + raise + + +def _get_babel_territory_currencies(territory: str) -> list[str]: + """Get currencies used by a territory from Babel.""" + babel_numbers = get_babel_numbers() + try: + return list(babel_numbers.get_territory_currencies(territory, tender=True)) + except (ValueError, LookupError, KeyError, AttributeError): + return [] + + +def _get_babel_official_languages(territory: str) -> tuple[str, ...]: + """Get official language codes for a territory from Babel CLDR data.""" + babel_languages = get_babel_languages() + try: + return tuple(babel_languages.get_official_languages(territory)) + except (ValueError, LookupError, KeyError, AttributeError): + return () diff --git a/src/ftllexengine/introspection/iso_types.py b/src/ftllexengine/introspection/iso_types.py new file mode 100644 index 00000000..4e0b0f4d --- /dev/null +++ b/src/ftllexengine/introspection/iso_types.py @@ -0,0 +1,73 @@ +"""Type definitions for ISO territory and currency introspection.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import NewType + +__all__ = [ + "CurrencyCode", + "CurrencyInfo", + "TerritoryCode", + "TerritoryInfo", +] + + +TerritoryCode = NewType("TerritoryCode", str) +"""ISO 3166-1 alpha-2 territory code (e.g., 'US', 'LV', 'DE'). + +Nominal subtype of str. Use is_valid_territory_code() to narrow a plain str +to TerritoryCode; both branches are then reachable, preventing false +unreachable diagnostics at validation sites. +""" + + +CurrencyCode = NewType("CurrencyCode", str) +"""ISO 4217 currency code (e.g., 'USD', 'EUR', 'GBP'). + +Nominal subtype of str. Use is_valid_currency_code() to narrow a plain str +to CurrencyCode; both branches are then reachable, preventing false +unreachable diagnostics at validation sites. +""" + + +@dataclass(frozen=True, slots=True) +class TerritoryInfo: + """ISO 3166-1 territory data with localized name. + + Immutable, thread-safe, hashable. Safe for use as dict key or set member. + + Attributes: + alpha2: ISO 3166-1 alpha-2 code (e.g., 'US', 'DE'). + name: Localized display name (depends on locale used for lookup). + currencies: All active legal tender currencies for this territory. + Multi-currency territories (e.g., Panama: PAB, USD) have multiple entries. + Empty tuple if no currency data available. + official_languages: BCP-47 language codes of official languages for this + territory (e.g., ('en',) for 'US', ('fr', 'nl', 'de') for 'BE'). + Empty tuple if no language data is available in CLDR. + """ + + alpha2: TerritoryCode + name: str + currencies: tuple[CurrencyCode, ...] + official_languages: tuple[str, ...] + + +@dataclass(frozen=True, slots=True) +class CurrencyInfo: + """ISO 4217 currency data with localized presentation. + + Immutable, thread-safe, hashable. Safe for use as dict key or set member. + + Attributes: + code: ISO 4217 currency code (e.g., 'USD', 'EUR'). + name: Localized display name (depends on locale used for lookup). + symbol: Locale-specific symbol (e.g., '$', 'EUR', 'USD'). + decimal_digits: Standard decimal places (0, 2, 3, or 4). + """ + + code: CurrencyCode + name: str + symbol: str + decimal_digits: int diff --git a/src/ftllexengine/introspection/message.py b/src/ftllexengine/introspection/message.py index b6f24574..ad3304a3 100644 --- a/src/ftllexengine/introspection/message.py +++ b/src/ftllexengine/introspection/message.py @@ -36,6 +36,11 @@ VariableReference, Variant, ) +from ftllexengine.syntax.reference_extraction import ( + ReferenceExtractor, + extract_references, + extract_references_by_attribute, +) from ftllexengine.syntax.visitor import ASTVisitor if TYPE_CHECKING: @@ -456,145 +461,6 @@ def _visit_variant(self, variant: Variant) -> None: self._context = old_context -# ============================================================================== -# REFERENCE EXTRACTION (Specialized Visitor) -# ============================================================================== - - -class ReferenceExtractor(ASTVisitor[MessageReference | TermReference]): - """Extract message and term references from AST for validation. - - Specialized visitor that collects only MessageReference and TermReference - nodes. Used by validation tools to build dependency graphs for circular - reference detection. - - This is intentionally simpler than IntrospectionVisitor - it does one thing - well: extract reference IDs for dependency analysis. - - Depth Limiting: - Includes DepthGuard to prevent stack overflow on adversarial or - programmatically constructed deeply nested ASTs. - - Memory Optimization: - Uses __slots__ to restrict attribute creation and reduce memory overhead. - """ - - __slots__ = ("message_refs", "term_refs") - - def __init__(self, *, max_depth: int = MAX_DEPTH) -> None: - """Initialize reference collector. - - Args: - max_depth: Maximum expression nesting depth (default: MAX_DEPTH). - Prevents stack overflow on adversarial ASTs. - """ - super().__init__(max_depth=max_depth) - self.message_refs: set[str] = set() - self.term_refs: set[str] = set() - - def visit_MessageReference(self, node: MessageReference) -> MessageReference: - """Collect message reference ID with optional attribute qualification. - - Stores attribute-qualified references ("msg.attr") when the reference - targets a specific attribute, or unqualified ("msg") for base message - references. This enables attribute-granular cycle detection. - - MessageReference contains only Identifier children (leaf nodes with - just name: str). No nested references are possible, so generic_visit() - is unnecessary and would waste cycles traversing leaf nodes. - """ - if node.attribute is not None: - self.message_refs.add(f"{node.id.name}.{node.attribute.name}") - else: - self.message_refs.add(node.id.name) - return node - - def visit_TermReference(self, node: TermReference) -> TermReference: - """Collect term reference with depth tracking. - - Unlike MessageReference, TermReference has arguments: CallArguments | None - which CAN contain nested expressions (including MessageReference, - TermReference, VariableReference). Must traverse children to find all - nested references. - """ - if node.attribute is not None: - self.term_refs.add(f"{node.id.name}.{node.attribute.name}") - else: - self.term_refs.add(node.id.name) - with self._depth_guard: - self.generic_visit(node) - return node - - -def extract_references(entry: Message | Term) -> tuple[frozenset[str], frozenset[str]]: - """Extract message and term references from an AST entry. - - Traverses the entry's value pattern and all attribute patterns to collect - all referenced message and term IDs. References include attribute - qualification: "msg.attr" for attribute references, "msg" for base - message references. - - Args: - entry: Message or Term AST node to analyze - - Returns: - Tuple of (message_refs, term_refs) as frozen sets of IDs. - - message_refs: Set of referenced message IDs, possibly attribute-qualified - (e.g., {"welcome", "msg.tooltip"}) - - term_refs: Set of referenced term IDs (e.g., {"brand", "app-name"}) - - Example: - >>> from ftllexengine import parse_ftl - >>> resource = parse_ftl("msg = { welcome } uses { -brand }") - >>> message = resource.entries[0] - >>> msg_refs, term_refs = extract_references(message) - >>> assert "welcome" in msg_refs - >>> assert "brand" in term_refs - """ - extractor = ReferenceExtractor() - - # Visit value pattern (Message.value can be None, Term.value is always present) - if entry.value is not None: - extractor.visit(entry.value) - - # Visit all attribute patterns - for attr in entry.attributes: - extractor.visit(attr.value) - - return frozenset(extractor.message_refs), frozenset(extractor.term_refs) - - -def extract_references_by_attribute( - entry: Message | Term, -) -> dict[str | None, tuple[frozenset[str], frozenset[str]]]: - """Extract references per source attribute for attribute-granular cycle detection. - - Returns a mapping from source attribute name (None for value pattern) to - the (message_refs, term_refs) found in that attribute's pattern. - - Args: - entry: Message or Term AST node to analyze - - Returns: - Dict mapping attribute name (or None for value) to (message_refs, term_refs). - """ - result: dict[str | None, tuple[frozenset[str], frozenset[str]]] = {} - - # Extract from value pattern - if entry.value is not None: - extractor = ReferenceExtractor() - extractor.visit(entry.value) - result[None] = (frozenset(extractor.message_refs), frozenset(extractor.term_refs)) - - # Extract from each attribute pattern separately - for attr in entry.attributes: - extractor = ReferenceExtractor() - extractor.visit(attr.value) - result[attr.id.name] = (frozenset(extractor.message_refs), frozenset(extractor.term_refs)) - - return result - - # ============================================================================== # PUBLIC API # ============================================================================== @@ -622,12 +488,12 @@ def introspect_message( TypeError: If message is not a Message or Term AST node Example: - >>> from ftllexengine.syntax.parser import FluentParserV1 - >>> parser = FluentParserV1() - >>> resource = parser.parse("greeting = Hello, { $name }!") - >>> msg = resource.entries[0] - >>> info = introspect_message(msg) - >>> print(info.get_variable_names()) + >>> from ftllexengine.syntax.parser import FluentParserV1 # doctest: +SKIP + >>> parser = FluentParserV1() # doctest: +SKIP + >>> resource = parser.parse("greeting = Hello, { $name }!") # doctest: +SKIP + >>> msg = resource.entries[0] # doctest: +SKIP + >>> info = introspect_message(msg) # doctest: +SKIP + >>> print(info.get_variable_names()) # doctest: +SKIP frozenset({'name'}) """ # Validate input type at API boundary (runtime check for callers ignoring type hints) @@ -691,8 +557,8 @@ def extract_variables(message: Message | Term) -> frozenset[str]: Frozen set of variable names (without $ prefix) Example: - >>> vars = extract_variables(msg) - >>> assert 'name' in vars + >>> vars = extract_variables(msg) # doctest: +SKIP + >>> assert 'name' in vars # doctest: +SKIP """ return introspect_message(message).get_variable_names() @@ -747,16 +613,18 @@ def validate_message_variables( variables exactly match expected_variables (no missing, no extra). Example: - >>> from ftllexengine.syntax import parse - >>> resource = parse("greeting = Hello, { $name }! You have { $count } items.") - >>> msg = resource.body[0] - >>> result = validate_message_variables(msg, {"name", "count"}) - >>> result.is_valid + >>> from ftllexengine.syntax import parse # doctest: +SKIP + >>> resource = parse( # doctest: +SKIP + ... "greeting = Hello, { $name }! You have { $count } items." + ... ) + >>> msg = resource.entries[0] # doctest: +SKIP + >>> result = validate_message_variables(msg, {"name", "count"}) # doctest: +SKIP + >>> result.is_valid # doctest: +SKIP True - >>> result = validate_message_variables(msg, {"name"}) - >>> result.is_valid + >>> result = validate_message_variables(msg, {"name"}) # doctest: +SKIP + >>> result.is_valid # doctest: +SKIP False - >>> result.extra_variables + >>> result.extra_variables # doctest: +SKIP frozenset({'count'}) """ declared = extract_variables(message) diff --git a/src/ftllexengine/localization/__init__.py b/src/ftllexengine/localization/__init__.py index 311bea29..a3bc1873 100644 --- a/src/ftllexengine/localization/__init__.py +++ b/src/ftllexengine/localization/__init__.py @@ -22,6 +22,7 @@ # ruff: noqa: RUF022 - __all__ organized by category for readability +from ftllexengine.core.semantic_types import FTLSource, LocaleCode, MessageId, ResourceId from ftllexengine.enums import LoadStatus from ftllexengine.localization.loading import ( FallbackInfo, @@ -30,7 +31,6 @@ ResourceLoader, ResourceLoadResult, ) -from ftllexengine.localization.types import FTLSource, LocaleCode, MessageId, ResourceId # Babel-optional: orchestrator and boot depend on FluentBundle (runtime → Babel). # On parser-only installs these imports fail; the names are absent from this @@ -48,8 +48,8 @@ from ftllexengine.runtime.cache import ( CacheAuditLogEntry as CacheAuditLogEntry, ) -except ImportError: - pass # Parser-only install; Babel-dependent localization types unavailable +except ImportError: # pragma: no cover - parser-only install; Babel-dependent names unavailable + pass # pragma: no cover - parser-only install; Babel-dependent names unavailable __all__ = [ # Main orchestrator (Babel-optional; absent in parser-only installs) diff --git a/src/ftllexengine/localization/boot.py b/src/ftllexengine/localization/boot.py index ae446cea..cc6530a4 100644 --- a/src/ftllexengine/localization/boot.py +++ b/src/ftllexengine/localization/boot.py @@ -31,8 +31,8 @@ if TYPE_CHECKING: from collections.abc import Callable, Mapping + from ftllexengine.core.semantic_types import MessageId from ftllexengine.introspection import MessageVariableValidationResult - from ftllexengine.localization.types import MessageId from ftllexengine.runtime.cache_config import CacheConfig __all__ = ["LocalizationBootConfig"] @@ -77,14 +77,14 @@ class LocalizationBootConfig: fallback locale (optional). Receives a FallbackInfo instance. Example: - >>> config = LocalizationBootConfig( + >>> config = LocalizationBootConfig( # doctest: +SKIP ... locales=('lv', 'en'), ... resource_ids=('ui.ftl',), ... base_path='locales/{locale}', ... message_schemas={'welcome': frozenset({'name'})}, ... required_messages=frozenset({'welcome', 'farewell'}), ... ) - >>> l10n, summary, schema_results = config.boot() + >>> l10n, summary, schema_results = config.boot() # doctest: +SKIP """ locales: tuple[str, ...] diff --git a/src/ftllexengine/localization/loading.py b/src/ftllexengine/localization/loading.py index 078645aa..19c0194d 100644 --- a/src/ftllexengine/localization/loading.py +++ b/src/ftllexengine/localization/loading.py @@ -24,7 +24,7 @@ from ftllexengine.enums import LoadStatus if TYPE_CHECKING: - from ftllexengine.localization.types import FTLSource, LocaleCode, MessageId, ResourceId + from ftllexengine.core.semantic_types import FTLSource, LocaleCode, MessageId, ResourceId from ftllexengine.syntax.ast import Junk # ruff: noqa: RUF022 - __all__ organized by category for readability @@ -56,15 +56,15 @@ class ResourceLoader(Protocol): description. Example: - >>> class DiskLoader: + >>> class DiskLoader: # doctest: +SKIP ... def load(self, locale: str, resource_id: str) -> str: ... path = Path(f"locales/{locale}/{resource_id}") ... return path.read_text(encoding="utf-8") ... def describe_path(self, locale: str, resource_id: str) -> str: ... return f"locales/{locale}/{resource_id}" ... - >>> loader = DiskLoader() - >>> l10n = FluentLocalization(['en', 'fr'], ['main.ftl'], loader) + >>> loader = DiskLoader() # doctest: +SKIP + >>> l10n = FluentLocalization(['en', 'fr'], ['main.ftl'], loader) # doctest: +SKIP """ def load(self, locale: LocaleCode, resource_id: ResourceId) -> FTLSource: @@ -114,8 +114,8 @@ class PathResourceLoader: All resolved paths are validated against a fixed root directory. Example: - >>> loader = PathResourceLoader("locales/{locale}") - >>> ftl = loader.load("en", "main.ftl") + >>> loader = PathResourceLoader("locales/{locale}") # doctest: +SKIP + >>> ftl = loader.load("en", "main.ftl") # doctest: +SKIP # Loads from: locales/en/main.ftl Attributes: @@ -286,10 +286,10 @@ class FallbackInfo: message_id: The message identifier that was resolved Example: - >>> def log_fallback(info: FallbackInfo) -> None: + >>> def log_fallback(info: FallbackInfo) -> None: # doctest: +SKIP ... print(f"Fallback: {info.message_id} resolved from " ... f"{info.resolved_locale} (requested {info.requested_locale})") - >>> l10n = FluentLocalization(['lv', 'en'], on_fallback=log_fallback) + >>> l10n = FluentLocalization(['lv', 'en'], on_fallback=log_fallback) # doctest: +SKIP """ requested_locale: LocaleCode @@ -353,12 +353,12 @@ class LoadSummary: results: All individual load results (immutable tuple) Example: - >>> l10n = FluentLocalization(['en', 'de'], ['ui.ftl'], loader) - >>> summary = l10n.get_load_summary() - >>> if summary.errors > 0: + >>> l10n = FluentLocalization(['en', 'de'], ['ui.ftl'], loader) # doctest: +SKIP + >>> summary = l10n.get_load_summary() # doctest: +SKIP + >>> if summary.errors > 0: # doctest: +SKIP ... for result in summary.get_errors(): ... print(f"Failed: {result.locale}/{result.resource_id}: {result.error}") - >>> if summary.has_junk: + >>> if summary.has_junk: # doctest: +SKIP ... for result in summary.get_with_junk(): ... print(f"Junk in {result.source_path}: {len(result.junk_entries)} entries") """ diff --git a/src/ftllexengine/localization/orchestrator.py b/src/ftllexengine/localization/orchestrator.py index a48e49e6..98a50fa9 100644 --- a/src/ftllexengine/localization/orchestrator.py +++ b/src/ftllexengine/localization/orchestrator.py @@ -33,43 +33,23 @@ from __future__ import annotations -import time -from collections.abc import Callable, Generator, Iterable, Mapping -from typing import TYPE_CHECKING, NoReturn +from typing import TYPE_CHECKING -from ftllexengine.constants import FALLBACK_INVALID, FALLBACK_MISSING_MESSAGE from ftllexengine.core.locale_utils import require_locale_code -from ftllexengine.diagnostics.codes import Diagnostic, DiagnosticCode -from ftllexengine.diagnostics.errors import ErrorCategory, FrozenFluentError -from ftllexengine.enums import LoadStatus -from ftllexengine.integrity import ( - FormattingIntegrityError, - IntegrityCheckFailedError, - IntegrityContext, -) -from ftllexengine.introspection import ( - MessageVariableValidationResult, -) -from ftllexengine.introspection import ( - validate_message_variables as validate_message_ast_variables, -) -from ftllexengine.localization.loading import ( - FallbackInfo, - LoadSummary, - ResourceLoader, - ResourceLoadResult, -) -from ftllexengine.runtime.bundle import FluentBundle -from ftllexengine.runtime.cache import CacheAuditLogEntry, CacheStats +from ftllexengine.localization.orchestrator_formatting import _LocalizationFormattingMixin +from ftllexengine.localization.orchestrator_loading import _LocalizationLoadingMixin +from ftllexengine.localization.orchestrator_queries import _LocalizationQueryMixin +from ftllexengine.runtime.cache import CacheStats from ftllexengine.runtime.rwlock import RWLock if TYPE_CHECKING: + from collections.abc import Callable, Iterable + + from ftllexengine.core.semantic_types import LocaleCode, ResourceId from ftllexengine.core.value_types import FluentValue - from ftllexengine.diagnostics import ValidationResult - from ftllexengine.introspection import MessageIntrospection - from ftllexengine.localization.types import FTLSource, LocaleCode, MessageId, ResourceId + from ftllexengine.localization.loading import FallbackInfo, ResourceLoader, ResourceLoadResult + from ftllexengine.runtime.bundle import FluentBundle from ftllexengine.runtime.cache_config import CacheConfig - from ftllexengine.syntax import Junk, Message, Term __all__ = ["FluentLocalization", "LocalizationCacheStats"] @@ -85,7 +65,11 @@ class LocalizationCacheStats(CacheStats, total=True): """Number of initialized bundles contributing to these statistics.""" -class FluentLocalization: +class FluentLocalization( + _LocalizationQueryMixin, + _LocalizationFormattingMixin, + _LocalizationLoadingMixin, +): """Multi-locale message formatting with fallback chains. Orchestrates multiple FluentBundle instances (one per locale) and implements @@ -103,16 +87,16 @@ class FluentLocalization: - Match statements for error handling Example - Disk-based resources: - >>> loader = PathResourceLoader("locales/{locale}") - >>> l10n = FluentLocalization(['lv', 'en'], ['ui.ftl'], loader) - >>> result, errors = l10n.format_value('welcome', {'name': 'Anna'}) + >>> loader = PathResourceLoader("locales/{locale}") # doctest: +SKIP + >>> l10n = FluentLocalization(['lv', 'en'], ['ui.ftl'], loader) # doctest: +SKIP + >>> result, errors = l10n.format_value('welcome', {'name': 'Anna'}) # doctest: +SKIP # Tries 'lv' first, falls back to 'en' if message not found Example - Direct resource provision: - >>> l10n = FluentLocalization(['lv', 'en']) - >>> l10n.add_resource('lv', 'welcome = Sveiki, { $name }!') - >>> l10n.add_resource('en', 'welcome = Hello, { $name }!') - >>> result, errors = l10n.format_value('welcome', {'name': 'Anna'}) + >>> l10n = FluentLocalization(['lv', 'en']) # doctest: +SKIP + >>> l10n.add_resource('lv', 'welcome = Sveiki, { $name }!') # doctest: +SKIP + >>> l10n.add_resource('en', 'welcome = Hello, { $name }!') # doctest: +SKIP + >>> result, errors = l10n.format_value('welcome', {'name': 'Anna'}) # doctest: +SKIP # Returns: ('Sveiki, Anna!', ()) Attributes: @@ -224,145 +208,6 @@ def __init__( result = self._load_single_resource(locale, resource_id, resource_loader) self._load_results.append(result) - def _create_bundle(self, locale: LocaleCode) -> FluentBundle: - """Create and register a bundle for locale. Caller must hold write lock. - - Applies any pending functions registered before bundle creation. - - Args: - locale: Locale code (must be in _locales tuple) - - Returns: - Newly created and registered FluentBundle instance - """ - bundle = FluentBundle( - locale, - use_isolating=self._use_isolating, - cache=self._cache_config, - strict=self._strict, - ) - for name, func in self._pending_functions.items(): - bundle.add_function(name, func) - self._bundles[locale] = bundle - return bundle - - def _get_or_create_bundle(self, locale: LocaleCode) -> FluentBundle: - """Get existing bundle or create one lazily. - - Implements lazy bundle initialization to reduce memory usage when - fallback locales are rarely accessed. - - Thread-safe via double-checked locking: read lock for the common - already-initialized case (allows concurrent format operations), write - lock only when a new bundle must be created. - - Must be called WITHOUT holding any lock. Use _create_bundle() directly - when already holding the write lock. - - Args: - locale: Locale code (must be in _locales tuple) - - Returns: - FluentBundle instance for the locale - """ - # Fast path: read lock allows concurrent format operations. - with self._lock.read(): - if locale in self._bundles: - return self._bundles[locale] - - # Slow path: bundle does not exist; acquire write lock and create it. - # Double-check after acquiring write lock: another thread may have - # created the bundle between our read-lock release and write-lock acquire. - with self._lock.write(): - if locale in self._bundles: # pragma: no cover - return self._bundles[locale] - return self._create_bundle(locale) - - def _load_single_resource( - self, - locale: LocaleCode, - resource_id: ResourceId, - resource_loader: ResourceLoader, - ) -> ResourceLoadResult: - """Load a single FTL resource and record the result. - - Encapsulates the logic for loading one resource for one locale, - including path construction, error handling, and result recording. - - Args: - locale: Locale code to load resource for - resource_id: Resource identifier (e.g., 'main.ftl') - resource_loader: Loader implementation to use - - Returns: - ResourceLoadResult indicating success, not_found, or error - """ - # Delegate path description to loader via protocol method. - # ResourceLoader.describe_path() returns a human-readable path string. - # PathResourceLoader overrides this with the actual locale-substituted path. - # Custom loaders use the default "{locale}/{resource_id}" implementation. - source_path = resource_loader.describe_path(locale, resource_id) - - try: - ftl_source = resource_loader.load(locale, resource_id) - bundle = self._get_or_create_bundle(locale) - junk_entries = bundle.add_resource(ftl_source, source_path=source_path) - return ResourceLoadResult( - locale=locale, - resource_id=resource_id, - status=LoadStatus.SUCCESS, - source_path=source_path, - junk_entries=junk_entries, - ) - except FileNotFoundError: - # Resource doesn't exist for this locale - expected for optional locales - return ResourceLoadResult( - locale=locale, - resource_id=resource_id, - status=LoadStatus.NOT_FOUND, - source_path=source_path, - ) - except (OSError, ValueError) as e: - # Permission errors, path traversal errors, etc. - return ResourceLoadResult( - locale=locale, - resource_id=resource_id, - status=LoadStatus.ERROR, - error=e, - source_path=source_path, - ) - - @staticmethod - def _check_mapping_arg( - args: Mapping[str, FluentValue] | None, - errors: list[FrozenFluentError], - ) -> bool: - """Validate that args is None or a Mapping (defensive runtime check). - - Callers annotate args as Mapping | None, but external callers may - violate the contract at runtime. This static method provides the - shared guard used by both format_value() and format_pattern(). - - Args: - args: The args argument from format_value or format_pattern - errors: Mutable error list; an error is appended if args is invalid - - Returns: - True if args is valid (None or Mapping), False otherwise - """ - if args is not None and not isinstance(args, Mapping): - diagnostic = Diagnostic( # type: ignore[unreachable] - code=DiagnosticCode.INVALID_ARGUMENT, - message=f"Invalid args type: expected Mapping or None, got {type(args).__name__}", - ) - errors.append( - FrozenFluentError( - str(diagnostic), ErrorCategory.RESOLUTION, diagnostic=diagnostic - ) - ) - return False - return True - @property def locales(self) -> tuple[LocaleCode, ...]: """Get immutable locale fallback chain. @@ -372,233 +217,6 @@ def locales(self) -> tuple[LocaleCode, ...]: """ return self._locales - def get_load_summary(self) -> LoadSummary: - """Get summary of resource load attempts during initialization. - - Returns a LoadSummary with information about which resources loaded - successfully, which were not found, and which failed with errors - during the __init__() resource loading phase. - - IMPORTANT: This only reflects resources loaded via the ResourceLoader - during construction. Resources added dynamically via add_resource() - are NOT included in this summary. This maintains a clear semantic - distinction between initialization-time (fail-fast) loading and - runtime (dynamic) resource additions. - - Use this to diagnose loading issues, especially in multi-locale setups - where some locales may have missing or broken resources. - - Returns: - LoadSummary with aggregated load results from initialization - - Example: - >>> loader = PathResourceLoader("locales/{locale}") - >>> l10n = FluentLocalization(['en', 'de', 'fr'], ['ui.ftl'], loader) - >>> summary = l10n.get_load_summary() - >>> print(f"Loaded: {summary.successful}/{summary.total_attempted}") - Loaded: 2/3 - >>> if summary.has_errors: - ... for result in summary.get_errors(): - ... print(f"Error loading {result.source_path}: {result.error}") - >>> for result in summary.get_not_found(): - ... print(f"Missing: {result.locale}/{result.resource_id}") - """ - return LoadSummary(results=tuple(self._load_results)) - - @staticmethod - def _describe_unclean_load_result( - result: ResourceLoadResult, - ) -> tuple[str, str]: - """Describe the first non-clean initialization result.""" - key = result.source_path or f"{result.locale}/{result.resource_id}" - if result.is_error: - error_name = type(result.error).__name__ if result.error is not None else "UnknownError" - return (key, f"load error ({error_name})") - if result.is_not_found: - return (key, "resource not found") - - junk_count = len(result.junk_entries) - noun = "entry" if junk_count == 1 else "entries" - return (key, f"{junk_count} junk {noun}") - - def _raise_integrity_check_failed( - self, - operation: str, - message: str, - *, - key: str | None = None, - expected: str | None = None, - actual: str | None = None, - ) -> NoReturn: - """Raise IntegrityCheckFailedError with localization context.""" - context = IntegrityContext( - component="localization", - operation=operation, - key=key, - expected=expected, - actual=actual, - timestamp=time.monotonic(), - wall_time_unix=time.time(), - ) - raise IntegrityCheckFailedError(message, context=context) - - def require_clean(self) -> LoadSummary: - """Require a clean initialization load summary. - - Returns the immutable initialization LoadSummary when every resource - loaded successfully and produced no junk. Raises IntegrityCheckFailedError - when initialization had missing resources, load errors, or junk entries. - """ - summary = self.get_load_summary() - if summary.all_clean: - return summary - - issue_key: str | None = None - issue_detail: str | None = None - for result in summary.results: # pragma: no branch - if result.is_error or result.is_not_found or result.has_junk: - issue_key, issue_detail = self._describe_unclean_load_result(result) - break - - actual = repr(summary) - detail = ( - f" First issue: {issue_detail} at {issue_key}." - if issue_key and issue_detail - else "" - ) - msg = f"Localization initialization is not clean: {actual}.{detail}" - self._raise_integrity_check_failed( - "require_clean", - msg, - key=issue_key, - expected="LoadSummary(all_clean=True)", - actual=actual, - ) - raise AssertionError # pragma: no cover - - @staticmethod - def _format_schema_difference( - validation: MessageVariableValidationResult, - ) -> str: - """Render a concise schema mismatch description.""" - parts: list[str] = [] - if validation.missing_variables: - missing = ", ".join(sorted(validation.missing_variables)) - parts.append(f"missing {{{missing}}}") - if validation.extra_variables: - extra = ", ".join(sorted(validation.extra_variables)) - parts.append(f"extra {{{extra}}}") - return "; ".join(parts) - - def _resolve_message_schema_validation( - self, - message_id: MessageId, - expected_variables: frozenset[str] | set[str], - ) -> MessageVariableValidationResult | None: - """Resolve a message through the fallback chain and validate its schema.""" - message = self.get_message(message_id) - if message is None: - return None - return validate_message_ast_variables(message, frozenset(expected_variables)) - - def validate_message_variables( - self, - message_id: str, - expected_variables: frozenset[str] | set[str], - ) -> MessageVariableValidationResult: - """Require an exact variable schema match for a single fallback-resolved message. - - Resolves ``message_id`` using the same fallback-chain semantics as - ``get_message()``. Returns the immutable validation result when the - message exists and its declared variables exactly match - ``expected_variables``. Missing messages and exact-schema mismatches - raise ``IntegrityCheckFailedError`` with localization-scoped context. - """ - validation = self._resolve_message_schema_validation(message_id, expected_variables) - if validation is None: - msg = f"Localization message schema validation failed: {message_id}: not found" - self._raise_integrity_check_failed( - "validate_message_variables", - msg, - key=message_id, - expected="1 exact schema match", - actual="missing_messages=1", - ) - - if validation.is_valid: - return validation - - difference = self._format_schema_difference(validation) - msg = f"Localization message schema validation failed: {message_id}: {difference}" - self._raise_integrity_check_failed( - "validate_message_variables", - msg, - key=message_id, - expected="1 exact schema match", - actual="schema_mismatches=1", - ) - raise AssertionError # pragma: no cover - - def validate_message_schemas( - self, - expected_schemas: Mapping[MessageId, frozenset[str] | set[str]], - ) -> tuple[MessageVariableValidationResult, ...]: - """Require exact variable-schema matches for specific messages. - - Validates messages using the existing fallback chain and returns one - MessageVariableValidationResult per requested message when every schema - matches exactly. Raises IntegrityCheckFailedError if any message is - missing or if any declared variable set differs from the expected set. - """ - results: list[MessageVariableValidationResult] = [] - mismatches: list[str] = [] - first_failure: str | None = None - missing_messages = 0 - schema_mismatches = 0 - - for message_id, expected_variables in expected_schemas.items(): - validation = self._resolve_message_schema_validation(message_id, expected_variables) - if validation is None: - first_failure = first_failure or str(message_id) - missing_messages += 1 - mismatches.append(f"{message_id}: not found") - continue - - results.append(validation) - if validation.is_valid: - continue - - first_failure = first_failure or message_id - schema_mismatches += 1 - difference = self._format_schema_difference(validation) - mismatches.append(f"{message_id}: {difference}") - - if missing_messages > 0 or schema_mismatches > 0: - fragments = mismatches[:3] - remaining = len(mismatches) - len(fragments) - if remaining > 0: - noun = "issue" if remaining == 1 else "issues" - fragments.append(f"... {remaining} more {noun}") - - actual_parts: list[str] = [] - if missing_messages > 0: - actual_parts.append(f"missing_messages={missing_messages}") - if schema_mismatches > 0: - actual_parts.append(f"schema_mismatches={schema_mismatches}") - - actual = ", ".join(actual_parts) - summary = "; ".join(fragments) - msg = f"Localization message schema validation failed: {summary}" - self._raise_integrity_check_failed( - "validate_message_schemas", - msg, - key=first_failure, - expected=f"{len(expected_schemas)} exact schema match(es)", - actual=actual, - ) - - return tuple(results) - @property def cache_enabled(self) -> bool: """Get whether format caching is enabled for all bundles (read-only). @@ -607,12 +225,12 @@ def cache_enabled(self) -> bool: bool: True if caching is enabled, False otherwise Example: - >>> from ftllexengine.runtime.cache_config import CacheConfig - >>> l10n = FluentLocalization(['lv', 'en'], cache=CacheConfig()) - >>> l10n.cache_enabled + >>> from ftllexengine.runtime.cache_config import CacheConfig # doctest: +SKIP + >>> l10n = FluentLocalization(['lv', 'en'], cache=CacheConfig()) # doctest: +SKIP + >>> l10n.cache_enabled # doctest: +SKIP True - >>> l10n_no_cache = FluentLocalization(['lv', 'en']) - >>> l10n_no_cache.cache_enabled + >>> l10n_no_cache = FluentLocalization(['lv', 'en']) # doctest: +SKIP + >>> l10n_no_cache.cache_enabled # doctest: +SKIP False """ return self._cache_config is not None @@ -625,9 +243,11 @@ def cache_config(self) -> CacheConfig | None: CacheConfig or None if caching is disabled. Example: - >>> from ftllexengine.runtime.cache_config import CacheConfig - >>> l10n = FluentLocalization(['lv', 'en'], cache=CacheConfig(size=500)) - >>> l10n.cache_config.size + >>> from ftllexengine.runtime.cache_config import CacheConfig # doctest: +SKIP + >>> l10n = FluentLocalization( # doctest: +SKIP + ... ['lv', 'en'], cache=CacheConfig(size=500) + ... ) + >>> l10n.cache_config.size # doctest: +SKIP 500 """ return self._cache_config @@ -651,744 +271,11 @@ def __repr__(self) -> str: String representation showing locales and bundle count Example: - >>> l10n = FluentLocalization(['lv', 'en']) - >>> repr(l10n) + >>> l10n = FluentLocalization(['lv', 'en']) # doctest: +SKIP + >>> repr(l10n) # doctest: +SKIP "FluentLocalization(locales=('lv', 'en'), bundles=0/2)" """ with self._lock.read(): initialized = len(self._bundles) total = len(self._locales) return f"FluentLocalization(locales={self._locales!r}, bundles={initialized}/{total})" - - def add_resource( - self, locale: LocaleCode, ftl_source: FTLSource - ) -> tuple[Junk, ...]: - """Add FTL resource to specific locale bundle. - - Allows dynamic resource loading without ResourceLoader. - - Thread-safe via internal RWLock. - - Args: - locale: Locale code (must resolve to an entry in the fallback chain) - ftl_source: FTL source code - - Returns: - Tuple of Junk entries encountered during parsing. Empty tuple if - parsing succeeded without errors. - - Raises: - ValueError: If locale does not resolve to a locale in the fallback chain. - """ - normalized_locale = require_locale_code(locale, "locale") - - with self._lock.write(): - if normalized_locale not in self._locales: - msg = ( - f"Locale '{normalized_locale}' not in fallback chain {self._locales}" - ) - raise ValueError(msg) - - # Direct lookup/create under write lock. _get_or_create_bundle cannot - # be used here because it acquires a read lock, and RWLock prohibits - # acquiring a read lock while holding the write lock. - if normalized_locale not in self._bundles: - self._create_bundle(normalized_locale) - return self._bundles[normalized_locale].add_resource(ftl_source) - - def add_resource_stream( - self, locale: LocaleCode, lines: Iterable[str], *, source_path: str | None = None - ) -> tuple[Junk, ...]: - """Add FTL resource to specific locale bundle from a line-oriented stream. - - Semantically identical to add_resource() but accepts any iterable of - lines rather than a pre-assembled source string. Memory usage is - proportional to the largest single FTL entry in the stream, not the - total resource size. - - Thread-safe via internal RWLock. - - Args: - locale: Locale code (must resolve to an entry in the fallback chain) - lines: Iterable of FTL source lines. Trailing newlines are stripped - per line. - source_path: Optional path to source file for better error messages - (e.g., "locales/lv/ui.ftl"). Defaults to "". - - Returns: - Tuple of Junk entries encountered during parsing. Empty tuple if - parsing succeeded without errors. - - Raises: - ValueError: If locale does not resolve to a locale in the fallback chain. - """ - normalized_locale = require_locale_code(locale, "locale") - - with self._lock.write(): - if normalized_locale not in self._locales: - msg = ( - f"Locale '{normalized_locale}' not in fallback chain {self._locales}" - ) - raise ValueError(msg) - - if normalized_locale not in self._bundles: - self._create_bundle(normalized_locale) - return self._bundles[normalized_locale].add_resource_stream( - lines, source_path=source_path - ) - - def _handle_message_not_found( - self, - message_id: MessageId, - errors: list[FrozenFluentError], - ) -> tuple[str, tuple[FrozenFluentError, ...]]: - """Handle message-not-found with consistent validation. - - Uses pattern matching to distinguish between empty/invalid message IDs - and valid IDs that simply weren't found in any locale. - - In strict mode, raises FormattingIntegrityError instead of returning - a fallback value. Financial applications must never silently display - placeholder text like ``{message_id}`` to end users. - - Args: - message_id: The message ID that was not found - errors: Mutable error list to append to - - Returns: - Tuple of (fallback_value, errors_tuple) - - Raises: - FormattingIntegrityError: In strict mode, always raised - """ - match message_id: - case str() if message_id: - # Valid but not found - diagnostic = Diagnostic( - code=DiagnosticCode.MESSAGE_NOT_FOUND, - message=f"Message '{message_id}' not found in any locale", - ) - error = FrozenFluentError( - str(diagnostic), ErrorCategory.REFERENCE, diagnostic=diagnostic - ) - errors.append(error) - fallback = FALLBACK_MISSING_MESSAGE.format(id=message_id) - case _: - # Empty or invalid message ID - diagnostic = Diagnostic( - code=DiagnosticCode.MESSAGE_NOT_FOUND, - message="Empty or invalid message ID", - ) - error = FrozenFluentError( - str(diagnostic), ErrorCategory.REFERENCE, diagnostic=diagnostic - ) - errors.append(error) - fallback = FALLBACK_INVALID - - errors_tuple = tuple(errors) - - if self._strict: - self._raise_strict_error(message_id, fallback, error) - - return (fallback, errors_tuple) - - def _raise_strict_error( - self, - message_id: MessageId, - fallback_value: str, - error: FrozenFluentError, - ) -> NoReturn: - """Raise FormattingIntegrityError for strict mode. - - Called from three single-error paths: - - format_pattern: invalid args type (not a Mapping or None) - - format_pattern: invalid attribute type (not str or None) - - _handle_message_not_found: message not found or invalid message ID - - Each call site produces exactly one error, matching the single-error - signature. The single-error constraint is enforced by the type signature. - - Args: - message_id: The message ID that failed - fallback_value: Value that would be returned in non-strict mode - error: The FrozenFluentError describing the failure - - Raises: - FormattingIntegrityError: Always raised with error details - """ - context = IntegrityContext( - component="localization", - operation="format_pattern", - key=str(message_id), - expected="", - actual="<1 error>", - timestamp=time.monotonic(), - wall_time_unix=time.time(), - ) - - msg = f"Strict mode: '{message_id}' failed: {error}" - raise FormattingIntegrityError( - msg, - context=context, - fluent_errors=(error,), - fallback_value=fallback_value, - message_id=str(message_id), - ) - - def format_value( - self, message_id: MessageId, args: Mapping[str, FluentValue] | None = None - ) -> tuple[str, tuple[FrozenFluentError, ...]]: - """Format message with fallback chain. - - Delegates to format_pattern() with attribute=None. Provided as a - convenience alias that matches Mozilla python-fluent's format_value API. - - Args: - message_id: Message identifier (e.g., 'welcome', 'error-404') - args: Message arguments for variable interpolation - - Returns: - Tuple of (formatted_value, errors) - - If message found: Returns formatted result from first bundle with message - - If not found: Returns ({message_id}, (error,)) - - Raises: - FormattingIntegrityError: In strict mode, raised when formatting - produces errors or when the message is not found in any locale. - - Example: - >>> l10n = FluentLocalization(['lv', 'en']) - >>> l10n.add_resource('lv', 'welcome = Sveiki!') - >>> l10n.add_resource('en', 'welcome = Hello!') - >>> result, errors = l10n.format_value('welcome') - >>> result - 'Sveiki!' - """ - return self.format_pattern(message_id, args) - - def has_message(self, message_id: MessageId) -> bool: - """Check if message exists in any locale. - - Args: - message_id: Message identifier - - Returns: - True if message exists in at least one locale - """ - for locale in self._locales: - bundle = self._get_or_create_bundle(locale) - if bundle.has_message(message_id): - return True - return False - - def format_pattern( - self, - message_id: MessageId, - args: Mapping[str, FluentValue] | None = None, - *, - attribute: str | None = None, - ) -> tuple[str, tuple[FrozenFluentError, ...]]: - """Format message with attribute support (fallback chain). - - Extends format_value() with attribute access. - - Args: - message_id: Message identifier - args: Variable arguments - attribute: Attribute name (e.g., "tooltip", "aria-label") - - Returns: - Tuple of (formatted_value, errors) - - Raises: - FormattingIntegrityError: In strict mode, raised when formatting - produces errors or when the message is not found in any locale. - - Example: - >>> l10n = FluentLocalization(['lv', 'en']) - >>> l10n.add_resource('lv', ''' - ... button = Klikšķināt - ... .tooltip = Klikšķiniet, lai iesniegtu - ... ''') - >>> result, errors = l10n.format_pattern("button", attribute="tooltip") - >>> result - 'Klikšķiniet, lai iesniegtu' - """ - errors: list[FrozenFluentError] = [] - - if not self._check_mapping_arg(args, errors): - if self._strict: - self._raise_strict_error(message_id, FALLBACK_INVALID, errors[-1]) - return (FALLBACK_INVALID, tuple(errors)) - - # Validate attribute is None or a string - if attribute is not None and not isinstance(attribute, str): - attr_type = type(attribute).__name__ # type: ignore[unreachable] - diagnostic = Diagnostic( - code=DiagnosticCode.INVALID_ARGUMENT, - message=f"Invalid attribute type: expected str or None, got {attr_type}", - ) - attr_error = FrozenFluentError( - str(diagnostic), ErrorCategory.RESOLUTION, diagnostic=diagnostic - ) - errors.append(attr_error) - if self._strict: - self._raise_strict_error(message_id, FALLBACK_INVALID, attr_error) - return (FALLBACK_INVALID, tuple(errors)) - - for locale in self._locales: - bundle = self._get_or_create_bundle(locale) - - if bundle.has_message(message_id): - try: - value, bundle_errors = bundle.format_pattern( - message_id, args, attribute=attribute - ) - except FormattingIntegrityError as exc: - # Re-raise with corrected component: the caller invoked - # localization.format_pattern(), not bundle.format_pattern() directly. - old_ctx = exc.context - err_count = len(exc.fluent_errors) - new_ctx = IntegrityContext( - component="localization", - operation=old_ctx.operation if old_ctx else "format_pattern", - key=old_ctx.key if old_ctx else str(message_id), - expected=old_ctx.expected if old_ctx else "", - actual=old_ctx.actual if old_ctx else f"<{err_count} error(s)>", - timestamp=old_ctx.timestamp if old_ctx else time.monotonic(), - wall_time_unix=old_ctx.wall_time_unix if old_ctx else time.time(), - ) - raise FormattingIntegrityError( - str(exc), - context=new_ctx, - fluent_errors=exc.fluent_errors, - fallback_value=exc.fallback_value, - message_id=exc.message_id, - ) from exc - errors.extend(bundle_errors) - - if ( - self._on_fallback is not None - and locale != self._primary_locale - ): - fallback_info = FallbackInfo( - requested_locale=self._primary_locale, - resolved_locale=locale, - message_id=message_id, - ) - self._on_fallback(fallback_info) - - return (value, tuple(errors)) - - return self._handle_message_not_found(message_id, errors) - - def add_function(self, name: str, func: Callable[..., FluentValue]) -> None: - """Register custom function on all bundles. - - Functions are applied immediately to any already-created bundles, - and stored for deferred application to bundles created later. - This preserves lazy bundle initialization. - - Thread-safe via internal RWLock. - - Args: - name: Function name (UPPERCASE by convention) - func: Python function implementation returning FluentValue - - Example: - >>> l10n = FluentLocalization(['lv', 'en']) - >>> def CUSTOM(value: str) -> str: - ... return value.upper() - >>> l10n.add_function("CUSTOM", CUSTOM) - >>> l10n.add_resource('en', 'msg = { CUSTOM($text) }') - >>> result, _ = l10n.format_value('msg', {'text': 'hello'}) - >>> result - 'HELLO' - """ - with self._lock.write(): - # Store for future bundle creation (lazy loading support) - self._pending_functions[name] = func - - # Apply to any already-created bundles - for bundle in self._bundles.values(): - bundle.add_function(name, func) - - def introspect_message( - self, - message_id: MessageId, - ) -> MessageIntrospection | None: - """Get message introspection from first bundle with message. - - Args: - message_id: Message identifier - - Returns: - MessageIntrospection or None if not found - """ - for locale in self._locales: - bundle = self._get_or_create_bundle(locale) - if bundle.has_message(message_id): - return bundle.introspect_message(message_id) - return None - - def has_attribute( - self, - message_id: MessageId, - attribute: str, - ) -> bool: - """Check if message has specific attribute in any locale. - - Tries bundles in fallback order. Returns True if any bundle - has the message AND the specified attribute. - - Args: - message_id: Message identifier - attribute: Attribute name - - Returns: - True if attribute exists in at least one locale - """ - for locale in self._locales: - bundle = self._get_or_create_bundle(locale) - if bundle.has_attribute(message_id, attribute): - return True - return False - - def get_message_ids(self) -> list[str]: - """Get all message IDs across all locales. - - Returns the union of message IDs from all bundles, ordered by - first appearance in locale priority order. Primary locale IDs - appear first. - - Returns: - List of unique message identifiers - """ - seen: set[str] = set() - result: list[str] = [] - for locale in self._locales: - bundle = self._get_or_create_bundle(locale) - for msg_id in bundle.get_message_ids(): - if msg_id not in seen: - seen.add(msg_id) - result.append(msg_id) - return result - - def get_message_variables( - self, - message_id: MessageId, - ) -> frozenset[str]: - """Get variables required by a message. - - Delegates to the first bundle in fallback order that has the - message. - - Args: - message_id: Message identifier - - Returns: - Frozen set of variable names (without $ prefix) - - Raises: - KeyError: If message not found in any locale - """ - for locale in self._locales: - bundle = self._get_or_create_bundle(locale) - if bundle.has_message(message_id): - return bundle.get_message_variables(message_id) - msg = f"Message '{message_id}' not found in any locale" - raise KeyError(msg) - - def get_all_message_variables(self) -> dict[str, frozenset[str]]: - """Get variables for all messages across all locales. - - Merges variables from all bundles. For messages present in - multiple locales, the primary locale's variables take - precedence (first-wins). - - Returns: - Dictionary mapping message IDs to variable sets - """ - result: dict[str, frozenset[str]] = {} - for locale in self._locales: - bundle = self._get_or_create_bundle(locale) - for msg_id, variables in bundle.get_all_message_variables().items(): - if msg_id not in result: - result[msg_id] = variables - return result - - def introspect_term( - self, - term_id: str, - ) -> MessageIntrospection | None: - """Get term introspection from first bundle with term. - - Tries bundles in fallback order. - - Args: - term_id: Term identifier (without leading dash) - - Returns: - MessageIntrospection or None if not found - """ - for locale in self._locales: - bundle = self._get_or_create_bundle(locale) - try: - return bundle.introspect_term(term_id) - except KeyError: - continue - return None - - def get_message(self, message_id: MessageId) -> Message | None: - """Return the parsed AST node for a message using the fallback chain. - - Searches bundles in locale priority order and returns the Message from - the first locale that contains it. Returns None if no locale has the - message. - - This enables callers to use validate_message_variables() directly with - the structured MessageVariableValidationResult return type, rather than - performing variable set arithmetic via get_message_variables(). - - Args: - message_id: Message identifier - - Returns: - Message AST node from the highest-priority locale that has it, - or None if not found in any locale - - Example: - >>> l10n = FluentLocalization(['lv', 'en']) - >>> l10n.add_resource('lv', 'greeting = Sveiki, { $name }!') - >>> msg = l10n.get_message('greeting') - >>> if msg is not None: - ... from ftllexengine import validate_message_variables - ... result = validate_message_variables(msg, frozenset({'name'})) - ... assert result.is_valid - """ - for locale in self._locales: - bundle = self._get_or_create_bundle(locale) - msg = bundle.get_message(message_id) - if msg is not None: - return msg - return None - - def get_term(self, term_id: str) -> Term | None: - """Return the parsed AST node for a term using the fallback chain. - - Searches bundles in locale priority order and returns the Term from - the first locale that contains it. The term_id should be supplied - without the leading dash (e.g., ``"brand"`` for ``-brand``). - - Args: - term_id: Term identifier without leading dash - - Returns: - Term AST node from the highest-priority locale that has it, - or None if not found in any locale - - Example: - >>> l10n = FluentLocalization(['lv', 'en']) - >>> l10n.add_resource('lv', '-brand = Firefox') - >>> term = l10n.get_term('brand') - >>> assert term is not None - """ - for locale in self._locales: - bundle = self._get_or_create_bundle(locale) - term = bundle.get_term(term_id) - if term is not None: - return term - return None - - def get_babel_locale(self) -> str: - """Get Babel locale identifier from primary bundle. - - Returns Babel Locale for the first locale in fallback chain. - Useful for integrating with Babel's formatting functions. - - Returns: - Babel locale identifier - - Example: - >>> l10n = FluentLocalization(['lv', 'en']) - >>> locale = l10n.get_babel_locale() - >>> locale - 'lv' - """ - primary_locale = self._locales[0] - bundle = self._get_or_create_bundle(primary_locale) - return bundle.get_babel_locale() - - def validate_resource(self, ftl_source: FTLSource) -> ValidationResult: - """Validate FTL resource without adding to bundles. - - Uses primary locale's bundle for validation. - - Args: - ftl_source: FTL source code - - Returns: - ValidationResult with errors and warnings - - Example: - >>> l10n = FluentLocalization(['lv', 'en']) - >>> result = l10n.validate_resource("msg = Hello") - >>> result.is_valid - True - """ - primary_locale = self._locales[0] - bundle = self._get_or_create_bundle(primary_locale) - return bundle.validate_resource(ftl_source) - - def clear_cache(self) -> None: - """Clear format cache on all initialized bundles. - - Calls clear_cache() on each bundle that has been created. - Does not create new bundles. - - Thread-safe via internal RWLock. - """ - with self._lock.write(): - for bundle in self._bundles.values(): - bundle.clear_cache() - - def get_cache_stats(self) -> LocalizationCacheStats | None: - """Get aggregate cache statistics across all initialized bundles. - - Aggregates cache metrics from all bundles that have been created. - Useful for production monitoring of multi-locale deployments. - All fields from IntegrityCache.get_stats() are included so callers - can monitor corruption events, oversize skips, and audit state. - - Returns: - LocalizationCacheStats with aggregated metrics, or None if caching disabled. - Numeric fields are summed across all bundles; boolean fields - (write_once, strict, audit_enabled) reflect the first bundle's - configuration (all bundles share the same CacheConfig). - See LocalizationCacheStats and CacheStats for field definitions. - - Thread-safe via internal RWLock (read lock). - - Example: - >>> l10n = FluentLocalization(['en', 'de'], cache=CacheConfig()) - >>> l10n.add_resource('en', 'msg = Hello') - >>> l10n.add_resource('de', 'msg = Hallo') - >>> l10n.format_value('msg') # Uses 'en' bundle - >>> stats = l10n.get_cache_stats() - >>> stats["bundle_count"] - 2 - >>> stats["size"] # Total entries across all bundles - 1 - >>> stats["corruption_detected"] # Zero for healthy cache - 0 - """ - if self._cache_config is None: - return None - - with self._lock.read(): - total_size = 0 - total_maxsize = 0 - total_hits = 0 - total_misses = 0 - total_unhashable = 0 - total_oversize = 0 - total_error_bloat = 0 - total_combined_weight = 0 - total_corruption = 0 - total_idempotent = 0 - total_write_once_conflicts = 0 - total_sequence = 0 - total_audit_entries = 0 - # Boolean fields: representative from first bundle (all share same CacheConfig) - first_write_once: bool = False - first_strict: bool = False - first_audit_enabled: bool = False - first_max_entry_weight: int = 0 - first_max_errors: int = 0 - is_first = True - - for bundle in self._bundles.values(): - stats = bundle.get_cache_stats() - if stats is not None: - total_size += stats["size"] - total_maxsize += stats["maxsize"] - total_hits += stats["hits"] - total_misses += stats["misses"] - total_unhashable += stats["unhashable_skips"] - total_oversize += stats["oversize_skips"] - total_error_bloat += stats["error_bloat_skips"] - total_combined_weight += stats["combined_weight_skips"] - total_corruption += stats["corruption_detected"] - total_idempotent += stats["idempotent_writes"] - total_write_once_conflicts += stats["write_once_conflicts"] - total_sequence += stats["sequence"] - total_audit_entries += stats["audit_entries"] - if is_first: - first_write_once = stats["write_once"] - first_strict = stats["strict"] - first_audit_enabled = stats["audit_enabled"] - first_max_entry_weight = stats["max_entry_weight"] - first_max_errors = stats["max_errors_per_entry"] - is_first = False - - total_requests = total_hits + total_misses - hit_rate = (total_hits / total_requests * 100) if total_requests > 0 else 0.0 - - return { - "size": total_size, - "maxsize": total_maxsize, - "max_entry_weight": first_max_entry_weight, - "max_errors_per_entry": first_max_errors, - "hits": total_hits, - "misses": total_misses, - "hit_rate": round(hit_rate, 2), - "unhashable_skips": total_unhashable, - "oversize_skips": total_oversize, - "error_bloat_skips": total_error_bloat, - "combined_weight_skips": total_combined_weight, - "corruption_detected": total_corruption, - "idempotent_writes": total_idempotent, - "write_once_conflicts": total_write_once_conflicts, - "sequence": total_sequence, - "write_once": first_write_once, - "strict": first_strict, - "audit_enabled": first_audit_enabled, - "audit_entries": total_audit_entries, - "bundle_count": len(self._bundles), - } - - def get_cache_audit_log(self) -> dict[LocaleCode, tuple[CacheAuditLogEntry, ...]] | None: - """Get per-locale cache audit logs for initialized bundles. - - Returns: - Mapping of initialized locale codes to immutable cache audit-log entry - tuples, or None if caching is disabled. Bundles with audit logging - disabled return empty tuples. Uninitialized bundles are omitted and - this method does not create them. - """ - if self._cache_config is None: - return None - - with self._lock.read(): - audit_logs: dict[LocaleCode, tuple[CacheAuditLogEntry, ...]] = {} - for locale in self._locales: - bundle = self._bundles.get(locale) - if bundle is None: - continue - - audit_log = bundle.get_cache_audit_log() - if audit_log is not None: - audit_logs[locale] = audit_log - - return audit_logs - - def get_bundles(self) -> Generator[FluentBundle]: - """Lazy generator yielding bundles in fallback order. - - Enables advanced use cases where direct bundle access is needed. - Creates bundles lazily if they don't exist yet. - - Yields: - FluentBundle instances in locale priority order - """ - yield from (self._get_or_create_bundle(locale) for locale in self._locales) diff --git a/src/ftllexengine/localization/orchestrator_formatting.py b/src/ftllexengine/localization/orchestrator_formatting.py new file mode 100644 index 00000000..832e044c --- /dev/null +++ b/src/ftllexengine/localization/orchestrator_formatting.py @@ -0,0 +1,216 @@ +"""Formatting and mutation helpers for FluentLocalization.""" + +from __future__ import annotations + +import time +from typing import TYPE_CHECKING, NoReturn + +from ftllexengine.constants import FALLBACK_INVALID, FALLBACK_MISSING_MESSAGE +from ftllexengine.core.locale_utils import require_locale_code +from ftllexengine.diagnostics.codes import Diagnostic, DiagnosticCode +from ftllexengine.diagnostics.errors import ErrorCategory, FrozenFluentError +from ftllexengine.integrity import FormattingIntegrityError, IntegrityContext +from ftllexengine.localization.loading import FallbackInfo + +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Mapping + + from ftllexengine.core.semantic_types import FTLSource, LocaleCode, MessageId + from ftllexengine.core.value_types import FluentValue + from ftllexengine.localization.orchestrator_protocols import LocalizationStateProtocol + from ftllexengine.syntax import Junk + + +class _LocalizationFormattingMixin: + """Formatting and mutation behavior for FluentLocalization.""" + + def add_resource( + self: LocalizationStateProtocol, locale: LocaleCode, ftl_source: FTLSource + ) -> tuple[Junk, ...]: + """Add FTL resource to a specific locale bundle.""" + normalized_locale = require_locale_code(locale, "locale") + + with self._lock.write(): + if normalized_locale not in self._locales: + msg = f"Locale '{normalized_locale}' not in fallback chain {self._locales}" + raise ValueError(msg) + + if normalized_locale not in self._bundles: + self._create_bundle(normalized_locale) + return self._bundles[normalized_locale].add_resource(ftl_source) + + def add_resource_stream( + self: LocalizationStateProtocol, + locale: LocaleCode, + lines: Iterable[str], + *, + source_path: str | None = None, + ) -> tuple[Junk, ...]: + """Add FTL resource to a locale bundle from a line-oriented stream.""" + normalized_locale = require_locale_code(locale, "locale") + + with self._lock.write(): + if normalized_locale not in self._locales: + msg = f"Locale '{normalized_locale}' not in fallback chain {self._locales}" + raise ValueError(msg) + + if normalized_locale not in self._bundles: + self._create_bundle(normalized_locale) + return self._bundles[normalized_locale].add_resource_stream( + lines, source_path=source_path + ) + + def _handle_message_not_found( + self: LocalizationStateProtocol, + message_id: MessageId, + errors: list[FrozenFluentError], + ) -> tuple[str, tuple[FrozenFluentError, ...]]: + """Handle missing-message fallbacks consistently.""" + match message_id: + case str() if message_id: + diagnostic = Diagnostic( + code=DiagnosticCode.MESSAGE_NOT_FOUND, + message=f"Message '{message_id}' not found in any locale", + ) + error = FrozenFluentError( + str(diagnostic), ErrorCategory.REFERENCE, diagnostic=diagnostic + ) + errors.append(error) + fallback = FALLBACK_MISSING_MESSAGE.format(id=message_id) + case _: + diagnostic = Diagnostic( + code=DiagnosticCode.MESSAGE_NOT_FOUND, + message="Empty or invalid message ID", + ) + error = FrozenFluentError( + str(diagnostic), ErrorCategory.REFERENCE, diagnostic=diagnostic + ) + errors.append(error) + fallback = FALLBACK_INVALID + + errors_tuple = tuple(errors) + if self._strict: + self._raise_strict_error(message_id, fallback, error) + return (fallback, errors_tuple) + + def _raise_strict_error( + self: LocalizationStateProtocol, + message_id: MessageId, + fallback_value: str, + error: FrozenFluentError, + ) -> NoReturn: + """Raise FormattingIntegrityError for localization-level failures.""" + context = IntegrityContext( + component="localization", + operation="format_pattern", + key=str(message_id), + expected="", + actual="<1 error>", + timestamp=time.monotonic(), + wall_time_unix=time.time(), + ) + msg = f"Strict mode: '{message_id}' failed: {error}" + raise FormattingIntegrityError( + msg, + context=context, + fluent_errors=(error,), + fallback_value=fallback_value, + message_id=str(message_id), + ) + + def format_value( + self: LocalizationStateProtocol, + message_id: MessageId, + args: Mapping[str, FluentValue] | None = None, + ) -> tuple[str, tuple[FrozenFluentError, ...]]: + """Format a value by delegating to ``format_pattern``.""" + return self.format_pattern(message_id, args) + + def has_message(self: LocalizationStateProtocol, message_id: MessageId) -> bool: + """Return whether any locale in the chain contains ``message_id``.""" + for locale in self._locales: + bundle = self._get_or_create_bundle(locale) + if bundle.has_message(message_id): + return True + return False + + def format_pattern( + self: LocalizationStateProtocol, + message_id: MessageId, + args: Mapping[str, FluentValue] | None = None, + *, + attribute: str | None = None, + ) -> tuple[str, tuple[FrozenFluentError, ...]]: + """Format a message with fallback-chain semantics.""" + errors: list[FrozenFluentError] = [] + + if not self._check_mapping_arg(args, errors): + if self._strict: + self._raise_strict_error(message_id, FALLBACK_INVALID, errors[-1]) + return (FALLBACK_INVALID, tuple(errors)) + + raw_attribute: object = attribute + if raw_attribute is not None and not isinstance(raw_attribute, str): + attr_type = type(raw_attribute).__name__ + diagnostic = Diagnostic( + code=DiagnosticCode.INVALID_ARGUMENT, + message=f"Invalid attribute type: expected str or None, got {attr_type}", + ) + attr_error = FrozenFluentError( + str(diagnostic), ErrorCategory.RESOLUTION, diagnostic=diagnostic + ) + errors.append(attr_error) + if self._strict: + self._raise_strict_error(message_id, FALLBACK_INVALID, attr_error) + return (FALLBACK_INVALID, tuple(errors)) + + for locale in self._locales: + bundle = self._get_or_create_bundle(locale) + if not bundle.has_message(message_id): + continue + + try: + value, bundle_errors = bundle.format_pattern(message_id, args, attribute=attribute) + except FormattingIntegrityError as exc: + old_ctx = exc.context + err_count = len(exc.fluent_errors) + new_ctx = IntegrityContext( + component="localization", + operation=old_ctx.operation if old_ctx else "format_pattern", + key=old_ctx.key if old_ctx else str(message_id), + expected=old_ctx.expected if old_ctx else "", + actual=old_ctx.actual if old_ctx else f"<{err_count} error(s)>", + timestamp=old_ctx.timestamp if old_ctx else time.monotonic(), + wall_time_unix=old_ctx.wall_time_unix if old_ctx else time.time(), + ) + raise FormattingIntegrityError( + str(exc), + context=new_ctx, + fluent_errors=exc.fluent_errors, + fallback_value=exc.fallback_value, + message_id=exc.message_id, + ) from exc + + errors.extend(bundle_errors) + + if self._on_fallback is not None and locale != self._primary_locale: + self._on_fallback( + FallbackInfo( + requested_locale=self._primary_locale, + resolved_locale=locale, + message_id=message_id, + ) + ) + + return (value, tuple(errors)) + + return self._handle_message_not_found(message_id, errors) + + def add_function( + self: LocalizationStateProtocol, name: str, func: Callable[..., FluentValue] + ) -> None: + """Register a custom function on current and future bundles.""" + with self._lock.write(): + self._pending_functions[name] = func + for bundle in self._bundles.values(): + bundle.add_function(name, func) diff --git a/src/ftllexengine/localization/orchestrator_loading.py b/src/ftllexengine/localization/orchestrator_loading.py new file mode 100644 index 00000000..e16a230b --- /dev/null +++ b/src/ftllexengine/localization/orchestrator_loading.py @@ -0,0 +1,294 @@ +"""Loading and validation helpers for FluentLocalization.""" + +from __future__ import annotations + +import time +from collections.abc import Mapping +from typing import TYPE_CHECKING, NoReturn + +from ftllexengine.diagnostics.codes import Diagnostic, DiagnosticCode +from ftllexengine.diagnostics.errors import ErrorCategory, FrozenFluentError +from ftllexengine.enums import LoadStatus +from ftllexengine.integrity import IntegrityCheckFailedError, IntegrityContext +from ftllexengine.introspection import MessageVariableValidationResult +from ftllexengine.introspection import ( + validate_message_variables as validate_message_ast_variables, +) +from ftllexengine.localization.loading import LoadSummary, ResourceLoader, ResourceLoadResult +from ftllexengine.runtime.bundle import FluentBundle + +if TYPE_CHECKING: + from ftllexengine.core.semantic_types import LocaleCode, MessageId, ResourceId + from ftllexengine.core.value_types import FluentValue + from ftllexengine.localization.orchestrator_protocols import LocalizationStateProtocol + + +class _LocalizationLoadingMixin: + """Lifecycle and schema-validation behavior for FluentLocalization.""" + + def _create_bundle( + self: LocalizationStateProtocol, locale: LocaleCode + ) -> FluentBundle: + """Create and register a bundle for ``locale``.""" + bundle = FluentBundle( + locale, + use_isolating=self._use_isolating, + cache=self._cache_config, + strict=self._strict, + ) + for name, func in self._pending_functions.items(): + bundle.add_function(name, func) + self._bundles[locale] = bundle + return bundle + + def _get_or_create_bundle( + self: LocalizationStateProtocol, locale: LocaleCode + ) -> FluentBundle: + """Get an existing bundle or create it lazily.""" + with self._lock.read(): + if locale in self._bundles: + return self._bundles[locale] + + with self._lock.write(): + if locale in self._bundles: # pragma: no cover + return self._bundles[locale] + return self._create_bundle(locale) + + def _load_single_resource( + self: LocalizationStateProtocol, + locale: LocaleCode, + resource_id: ResourceId, + resource_loader: ResourceLoader, + ) -> ResourceLoadResult: + """Load one resource for one locale and capture the outcome.""" + source_path = resource_loader.describe_path(locale, resource_id) + + try: + ftl_source = resource_loader.load(locale, resource_id) + bundle = self._get_or_create_bundle(locale) + junk_entries = bundle.add_resource(ftl_source, source_path=source_path) + return ResourceLoadResult( + locale=locale, + resource_id=resource_id, + status=LoadStatus.SUCCESS, + source_path=source_path, + junk_entries=junk_entries, + ) + except FileNotFoundError: + return ResourceLoadResult( + locale=locale, + resource_id=resource_id, + status=LoadStatus.NOT_FOUND, + source_path=source_path, + ) + except (OSError, ValueError) as error: + return ResourceLoadResult( + locale=locale, + resource_id=resource_id, + status=LoadStatus.ERROR, + error=error, + source_path=source_path, + ) + + @staticmethod + def _check_mapping_arg( + args: Mapping[str, FluentValue] | None, + errors: list[FrozenFluentError], + ) -> bool: + """Validate that ``args`` is ``None`` or a mapping.""" + raw_args: object = args + if raw_args is not None and not isinstance(raw_args, Mapping): + diagnostic = Diagnostic( + code=DiagnosticCode.INVALID_ARGUMENT, + message=( + f"Invalid args type: expected Mapping or None, got " + f"{type(raw_args).__name__}" + ), + ) + errors.append( + FrozenFluentError( + str(diagnostic), ErrorCategory.RESOLUTION, diagnostic=diagnostic + ) + ) + return False + return True + + def get_load_summary(self: LocalizationStateProtocol) -> LoadSummary: + """Return the immutable initialization load summary.""" + return LoadSummary(results=tuple(self._load_results)) + + @staticmethod + def _describe_unclean_load_result( + result: ResourceLoadResult, + ) -> tuple[str, str]: + """Describe the first non-clean initialization result.""" + key = result.source_path or f"{result.locale}/{result.resource_id}" + if result.is_error: + error_name = type(result.error).__name__ if result.error is not None else "UnknownError" + return (key, f"load error ({error_name})") + if result.is_not_found: + return (key, "resource not found") + + junk_count = len(result.junk_entries) + noun = "entry" if junk_count == 1 else "entries" + return (key, f"{junk_count} junk {noun}") + + def _raise_integrity_check_failed( + self: LocalizationStateProtocol, + operation: str, + message: str, + *, + key: str | None = None, + expected: str | None = None, + actual: str | None = None, + ) -> NoReturn: + """Raise IntegrityCheckFailedError with localization-scoped context.""" + context = IntegrityContext( + component="localization", + operation=operation, + key=key, + expected=expected, + actual=actual, + timestamp=time.monotonic(), + wall_time_unix=time.time(), + ) + raise IntegrityCheckFailedError(message, context=context) + + def require_clean(self: LocalizationStateProtocol) -> LoadSummary: + """Require a clean initialization load summary.""" + summary = self.get_load_summary() + if summary.all_clean: + return summary + + issue_key: str | None = None + issue_detail: str | None = None + for result in summary.results: # pragma: no branch + if result.is_error or result.is_not_found or result.has_junk: + issue_key, issue_detail = self._describe_unclean_load_result(result) + break + + actual = repr(summary) + detail = ( + f" First issue: {issue_detail} at {issue_key}." + if issue_key and issue_detail + else "" + ) + msg = f"Localization initialization is not clean: {actual}.{detail}" + self._raise_integrity_check_failed( + "require_clean", + msg, + key=issue_key, + expected="LoadSummary(all_clean=True)", + actual=actual, + ) + raise AssertionError # pragma: no cover + + @staticmethod + def _format_schema_difference( + validation: MessageVariableValidationResult, + ) -> str: + """Render a concise schema mismatch description.""" + parts: list[str] = [] + if validation.missing_variables: + missing = ", ".join(sorted(validation.missing_variables)) + parts.append(f"missing {{{missing}}}") + if validation.extra_variables: + extra = ", ".join(sorted(validation.extra_variables)) + parts.append(f"extra {{{extra}}}") + return "; ".join(parts) + + def _resolve_message_schema_validation( + self: LocalizationStateProtocol, + message_id: MessageId, + expected_variables: frozenset[str] | set[str], + ) -> MessageVariableValidationResult | None: + """Resolve a message through the fallback chain and validate its schema.""" + message = self.get_message(message_id) + if message is None: + return None + return validate_message_ast_variables(message, frozenset(expected_variables)) + + def validate_message_variables( + self: LocalizationStateProtocol, + message_id: str, + expected_variables: frozenset[str] | set[str], + ) -> MessageVariableValidationResult: + """Require an exact variable schema match for one fallback-resolved message.""" + validation = self._resolve_message_schema_validation(message_id, expected_variables) + if validation is None: + msg = f"Localization message schema validation failed: {message_id}: not found" + self._raise_integrity_check_failed( + "validate_message_variables", + msg, + key=message_id, + expected="1 exact schema match", + actual="missing_messages=1", + ) + + if validation.is_valid: + return validation + + difference = self._format_schema_difference(validation) + msg = f"Localization message schema validation failed: {message_id}: {difference}" + self._raise_integrity_check_failed( + "validate_message_variables", + msg, + key=message_id, + expected="1 exact schema match", + actual="schema_mismatches=1", + ) + raise AssertionError # pragma: no cover + + def validate_message_schemas( + self: LocalizationStateProtocol, + expected_schemas: Mapping[MessageId, frozenset[str] | set[str]], + ) -> tuple[MessageVariableValidationResult, ...]: + """Require exact variable-schema matches for specific messages.""" + results: list[MessageVariableValidationResult] = [] + mismatches: list[str] = [] + first_failure: str | None = None + missing_messages = 0 + schema_mismatches = 0 + + for message_id, expected_variables in expected_schemas.items(): + validation = self._resolve_message_schema_validation(message_id, expected_variables) + if validation is None: + first_failure = first_failure or str(message_id) + missing_messages += 1 + mismatches.append(f"{message_id}: not found") + continue + + results.append(validation) + if validation.is_valid: + continue + + first_failure = first_failure or message_id + schema_mismatches += 1 + difference = self._format_schema_difference(validation) + mismatches.append(f"{message_id}: {difference}") + + if missing_messages > 0 or schema_mismatches > 0: + fragments = mismatches[:3] + remaining = len(mismatches) - len(fragments) + if remaining > 0: + noun = "issue" if remaining == 1 else "issues" + fragments.append(f"... {remaining} more {noun}") + + actual_parts: list[str] = [] + if missing_messages > 0: + actual_parts.append(f"missing_messages={missing_messages}") + if schema_mismatches > 0: + actual_parts.append(f"schema_mismatches={schema_mismatches}") + + actual = ", ".join(actual_parts) + summary = "; ".join(fragments) + msg = f"Localization message schema validation failed: {summary}" + self._raise_integrity_check_failed( + "validate_message_schemas", + msg, + key=first_failure, + expected=f"{len(expected_schemas)} exact schema match(es)", + actual=actual, + ) + + return tuple(results) diff --git a/src/ftllexengine/localization/orchestrator_protocols.py b/src/ftllexengine/localization/orchestrator_protocols.py new file mode 100644 index 00000000..49e83d5b --- /dev/null +++ b/src/ftllexengine/localization/orchestrator_protocols.py @@ -0,0 +1,108 @@ +"""Type-checking protocols for FluentLocalization mixins.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, NoReturn, Protocol + +if TYPE_CHECKING: + from collections.abc import Callable, Mapping + + from ftllexengine.core.semantic_types import LocaleCode, MessageId + from ftllexengine.core.value_types import FluentValue + from ftllexengine.diagnostics import FrozenFluentError + from ftllexengine.introspection import MessageVariableValidationResult + from ftllexengine.localization.loading import ( + FallbackInfo, + LoadSummary, + ResourceLoadResult, + ) + from ftllexengine.runtime.bundle import FluentBundle + from ftllexengine.runtime.cache_config import CacheConfig + from ftllexengine.runtime.rwlock import RWLock + from ftllexengine.syntax import Message + + +class LocalizationStateProtocol(Protocol): + """Structural contract implemented by FluentLocalization for its mixins.""" + + _bundles: dict[LocaleCode, FluentBundle] + _cache_config: CacheConfig | None + _load_results: list[ResourceLoadResult] + _locales: tuple[LocaleCode, ...] + _lock: RWLock + _on_fallback: Callable[[FallbackInfo], None] | None + _pending_functions: dict[str, Callable[..., FluentValue]] + _primary_locale: LocaleCode + _strict: bool + _use_isolating: bool + + def _create_bundle(self, locale: LocaleCode) -> FluentBundle: + ... # pragma: no cover - typing-only protocol declaration + + def _get_or_create_bundle(self, locale: LocaleCode) -> FluentBundle: + ... # pragma: no cover - typing-only protocol declaration + + @staticmethod + def _check_mapping_arg( + args: Mapping[str, FluentValue] | None, + errors: list[FrozenFluentError], + ) -> bool: + ... # pragma: no cover - typing-only protocol declaration + + def get_message(self, message_id: MessageId) -> Message | None: + ... # pragma: no cover - typing-only protocol declaration + + def _handle_message_not_found( + self, + message_id: MessageId, + errors: list[FrozenFluentError], + ) -> tuple[str, tuple[FrozenFluentError, ...]]: + ... # pragma: no cover - typing-only protocol declaration + + def format_pattern( + self, + message_id: MessageId, + args: Mapping[str, FluentValue] | None = None, + *, + attribute: str | None = None, + ) -> tuple[str, tuple[FrozenFluentError, ...]]: + ... # pragma: no cover - typing-only protocol declaration + + def _raise_strict_error( + self, + message_id: MessageId, + fallback_value: str, + error: FrozenFluentError, + ) -> NoReturn: + ... # pragma: no cover - typing-only protocol declaration + + def get_load_summary(self) -> LoadSummary: + ... # pragma: no cover - typing-only protocol declaration + + @staticmethod + def _describe_unclean_load_result(result: ResourceLoadResult) -> tuple[str, str]: + ... # pragma: no cover - typing-only protocol declaration + + def _raise_integrity_check_failed( + self, + operation: str, + message: str, + *, + key: str | None = None, + expected: str | None = None, + actual: str | None = None, + ) -> NoReturn: + ... # pragma: no cover - typing-only protocol declaration + + @staticmethod + def _format_schema_difference( + validation: MessageVariableValidationResult, + ) -> str: + ... # pragma: no cover - typing-only protocol declaration + + def _resolve_message_schema_validation( + self, + message_id: MessageId, + expected_variables: frozenset[str] | set[str], + ) -> MessageVariableValidationResult | None: + ... # pragma: no cover - typing-only protocol declaration diff --git a/src/ftllexengine/localization/orchestrator_queries.py b/src/ftllexengine/localization/orchestrator_queries.py new file mode 100644 index 00000000..f282bf6b --- /dev/null +++ b/src/ftllexengine/localization/orchestrator_queries.py @@ -0,0 +1,240 @@ +"""Query and cache-reporting helpers for FluentLocalization.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, cast + +if TYPE_CHECKING: + from collections.abc import Iterator + + from ftllexengine.core.semantic_types import FTLSource, LocaleCode, MessageId + from ftllexengine.diagnostics import ValidationResult + from ftllexengine.introspection import MessageIntrospection + from ftllexengine.localization.orchestrator import LocalizationCacheStats + from ftllexengine.localization.orchestrator_protocols import LocalizationStateProtocol + from ftllexengine.runtime.bundle import FluentBundle + from ftllexengine.runtime.cache import CacheAuditLogEntry + from ftllexengine.syntax import Message, Term + + +class _LocalizationQueryMixin: + """Read-only query behavior for FluentLocalization.""" + + def introspect_message( + self: LocalizationStateProtocol, + message_id: MessageId, + ) -> MessageIntrospection | None: + """Return introspection for the first locale containing ``message_id``.""" + for locale in self._locales: + bundle = self._get_or_create_bundle(locale) + if bundle.has_message(message_id): + return bundle.introspect_message(message_id) + return None + + def has_attribute( + self: LocalizationStateProtocol, + message_id: MessageId, + attribute: str, + ) -> bool: + """Return whether any locale exposes ``attribute`` for ``message_id``.""" + for locale in self._locales: + bundle = self._get_or_create_bundle(locale) + if bundle.has_attribute(message_id, attribute): + return True + return False + + def get_message_ids(self: LocalizationStateProtocol) -> list[str]: + """Return the union of message IDs across the fallback chain.""" + seen: set[str] = set() + result: list[str] = [] + for locale in self._locales: + bundle = self._get_or_create_bundle(locale) + for msg_id in bundle.get_message_ids(): + if msg_id not in seen: + seen.add(msg_id) + result.append(msg_id) + return result + + def get_message_variables( + self: LocalizationStateProtocol, + message_id: MessageId, + ) -> frozenset[str]: + """Return variables from the first locale that contains ``message_id``.""" + for locale in self._locales: + bundle = self._get_or_create_bundle(locale) + if bundle.has_message(message_id): + return bundle.get_message_variables(message_id) + msg = f"Message '{message_id}' not found in any locale" + raise KeyError(msg) + + def get_all_message_variables( + self: LocalizationStateProtocol, + ) -> dict[str, frozenset[str]]: + """Return variables for all messages across the fallback chain.""" + result: dict[str, frozenset[str]] = {} + for locale in self._locales: + bundle = self._get_or_create_bundle(locale) + for msg_id, variables in bundle.get_all_message_variables().items(): + if msg_id not in result: + result[msg_id] = variables + return result + + def introspect_term( + self: LocalizationStateProtocol, + term_id: str, + ) -> MessageIntrospection | None: + """Return term introspection from the first locale that contains it.""" + for locale in self._locales: + bundle = self._get_or_create_bundle(locale) + try: + return bundle.introspect_term(term_id) + except KeyError: + continue + return None + + def get_message( + self: LocalizationStateProtocol, message_id: MessageId + ) -> Message | None: + """Return the first message AST node found across the fallback chain.""" + for locale in self._locales: + bundle = self._get_or_create_bundle(locale) + message = bundle.get_message(message_id) + if message is not None: + return message + return None + + def get_term(self: LocalizationStateProtocol, term_id: str) -> Term | None: + """Return the first term AST node found across the fallback chain.""" + for locale in self._locales: + bundle = self._get_or_create_bundle(locale) + term = bundle.get_term(term_id) + if term is not None: + return term + return None + + def get_babel_locale(self: LocalizationStateProtocol) -> str: + """Return the primary bundle's Babel locale identifier.""" + primary_locale = self._locales[0] + bundle = self._get_or_create_bundle(primary_locale) + return bundle.get_babel_locale() + + def validate_resource( + self: LocalizationStateProtocol, ftl_source: FTLSource + ) -> ValidationResult: + """Validate FTL source using the primary locale bundle.""" + primary_locale = self._locales[0] + bundle = self._get_or_create_bundle(primary_locale) + return bundle.validate_resource(ftl_source) + + def clear_cache(self: LocalizationStateProtocol) -> None: + """Clear caches on all initialized bundles.""" + with self._lock.write(): + for bundle in self._bundles.values(): + bundle.clear_cache() + + def get_cache_stats( + self: LocalizationStateProtocol, + ) -> LocalizationCacheStats | None: + """Aggregate cache statistics across initialized bundles.""" + if self._cache_config is None: + return None + + with self._lock.read(): + total_size = 0 + total_maxsize = 0 + total_hits = 0 + total_misses = 0 + total_unhashable = 0 + total_oversize = 0 + total_error_bloat = 0 + total_combined_weight = 0 + total_corruption = 0 + total_idempotent = 0 + total_write_once_conflicts = 0 + total_sequence = 0 + total_audit_entries = 0 + first_write_once = False + first_strict = False + first_audit_enabled = False + first_max_entry_weight = 0 + first_max_errors = 0 + is_first = True + + for bundle in self._bundles.values(): + stats = bundle.get_cache_stats() + if stats is None: + continue + + total_size += stats["size"] + total_maxsize += stats["maxsize"] + total_hits += stats["hits"] + total_misses += stats["misses"] + total_unhashable += stats["unhashable_skips"] + total_oversize += stats["oversize_skips"] + total_error_bloat += stats["error_bloat_skips"] + total_combined_weight += stats["combined_weight_skips"] + total_corruption += stats["corruption_detected"] + total_idempotent += stats["idempotent_writes"] + total_write_once_conflicts += stats["write_once_conflicts"] + total_sequence += stats["sequence"] + total_audit_entries += stats["audit_entries"] + if is_first: + first_write_once = stats["write_once"] + first_strict = stats["strict"] + first_audit_enabled = stats["audit_enabled"] + first_max_entry_weight = stats["max_entry_weight"] + first_max_errors = stats["max_errors_per_entry"] + is_first = False + + total_requests = total_hits + total_misses + hit_rate = (total_hits / total_requests * 100) if total_requests > 0 else 0.0 + + return cast( + "LocalizationCacheStats", + { + "size": total_size, + "maxsize": total_maxsize, + "max_entry_weight": first_max_entry_weight, + "max_errors_per_entry": first_max_errors, + "hits": total_hits, + "misses": total_misses, + "hit_rate": round(hit_rate, 2), + "unhashable_skips": total_unhashable, + "oversize_skips": total_oversize, + "error_bloat_skips": total_error_bloat, + "combined_weight_skips": total_combined_weight, + "corruption_detected": total_corruption, + "idempotent_writes": total_idempotent, + "write_once_conflicts": total_write_once_conflicts, + "sequence": total_sequence, + "write_once": first_write_once, + "strict": first_strict, + "audit_enabled": first_audit_enabled, + "audit_entries": total_audit_entries, + "bundle_count": len(self._bundles), + }, + ) + + def get_cache_audit_log( + self: LocalizationStateProtocol, + ) -> dict[LocaleCode, tuple[CacheAuditLogEntry, ...]] | None: + """Return per-locale audit logs for initialized bundles.""" + if self._cache_config is None: + return None + + with self._lock.read(): + audit_logs: dict[LocaleCode, tuple[CacheAuditLogEntry, ...]] = {} + for locale in self._locales: + bundle = self._bundles.get(locale) + if bundle is None: + continue + + audit_log = bundle.get_cache_audit_log() + if audit_log is not None: + audit_logs[locale] = audit_log + + return audit_logs + + def get_bundles(self: LocalizationStateProtocol) -> Iterator[FluentBundle]: + """Yield bundles in fallback order, creating them lazily as needed.""" + yield from (self._get_or_create_bundle(locale) for locale in self._locales) diff --git a/src/ftllexengine/localization/types.py b/src/ftllexengine/localization/types.py index 8ebf8c89..028b4b59 100644 --- a/src/ftllexengine/localization/types.py +++ b/src/ftllexengine/localization/types.py @@ -1,28 +1,13 @@ -"""Type aliases for the localization domain. +"""Compatibility re-export facade for localization semantic aliases. -Provides semantic type aliases used throughout the localization package -and by user code when annotating FluentLocalization call sites. - -Python 3.13+. Zero external dependencies. +The canonical definitions live in ``ftllexengine.core.semantic_types`` so lower +layers can annotate locale and resource boundaries without importing the +localization package. This module remains as the stable localization namespace +for callers that prefer ``ftllexengine.localization``-scoped imports. """ +from __future__ import annotations +from ftllexengine.core.semantic_types import FTLSource, LocaleCode, MessageId, ResourceId -__all__ = [ - "FTLSource", - "LocaleCode", - "MessageId", - "ResourceId", -] - -type MessageId = str -"""Identifier for a Fluent message (e.g., 'welcome', 'error-404').""" - -type LocaleCode = str -"""BCP-47 locale code (e.g., 'en', 'lv', 'zh-Hans-CN').""" - -type ResourceId = str -"""FTL resource file identifier (e.g., 'main.ftl', 'errors.ftl').""" - -type FTLSource = str -"""Raw FTL source text as a Python string.""" +__all__ = ["FTLSource", "LocaleCode", "MessageId", "ResourceId"] diff --git a/src/ftllexengine/parsing/__init__.py b/src/ftllexengine/parsing/__init__.py index caa2d85f..ebfcbe7e 100644 --- a/src/ftllexengine/parsing/__init__.py +++ b/src/ftllexengine/parsing/__init__.py @@ -32,9 +32,9 @@ clear_currency_caches - Clear cached CLDR currency data Examples: - >>> from ftllexengine.parsing import parse_decimal, is_valid_decimal - >>> result, errors = parse_decimal("1 234,56", "lv_LV") - >>> if not errors and is_valid_decimal(result): + >>> from ftllexengine.parsing import parse_decimal, is_valid_decimal # doctest: +SKIP + >>> result, errors = parse_decimal("1 234,56", "lv_LV") # doctest: +SKIP + >>> if not errors and is_valid_decimal(result): # doctest: +SKIP ... total = result.quantize(Decimal("0.01")) Python 3.13+. Requires Babel for CLDR patterns. diff --git a/src/ftllexengine/parsing/currency.py b/src/ftllexengine/parsing/currency.py index bcd82e2e..321536d2 100644 --- a/src/ftllexengine/parsing/currency.py +++ b/src/ftllexengine/parsing/currency.py @@ -32,7 +32,6 @@ """ from __future__ import annotations -# ruff: noqa: ERA001 - Section comments in data structures are documentation, not dead code import functools import re from typing import TYPE_CHECKING, Any @@ -41,13 +40,10 @@ from decimal import Decimal from ftllexengine.core.babel_compat import ( - get_babel_numbers, get_locale_class, - get_locale_identifiers_func, get_number_format_error_class, get_parse_decimal_func, get_unknown_locale_error_class, - is_babel_available, require_babel, ) from ftllexengine.core.locale_utils import ( @@ -56,449 +52,37 @@ ) from ftllexengine.diagnostics import ErrorCategory, FrozenErrorContext, FrozenFluentError from ftllexengine.diagnostics.templates import ErrorTemplate - -__all__ = ["clear_currency_caches", "parse_currency"] - -# ISO 4217 currency codes are exactly 3 uppercase ASCII letters. -# This is per the ISO 4217 standard and is guaranteed not to change. -ISO_CURRENCY_CODE_LENGTH: int = 3 - -# ============================================================================= -# FAST TIER: Common currencies with unambiguous symbols (no CLDR scan required) -# ============================================================================= -# These symbols map to exactly one currency worldwide. -# Loaded immediately at import time (zero CLDR overhead). -_FAST_TIER_UNAMBIGUOUS_SYMBOLS: dict[str, str] = { - # European currencies - "\u20ac": "EUR", # Euro sign - # NOTE: Pound sign (U+00A3) is in ambiguous set (GBP, EGP, GIP, etc.) - "\u20a4": "ITL", # Lira sign (historical) - # Asian currencies (truly unambiguous symbols) - # NOTE: Yen sign (U+00A5) is NOT here - it's ambiguous (JPY vs CNY) - "\u20b9": "INR", # Indian Rupee - "\u20a9": "KRW", # Korean Won - "\u20ab": "VND", # Vietnamese Dong - "\u20ae": "MNT", # Mongolian Tugrik - "\u20b1": "PHP", # Philippine Peso - "\u20b4": "UAH", # Ukrainian Hryvnia - "\u20b8": "KZT", # Kazakhstani Tenge - "\u20ba": "TRY", # Turkish Lira - "\u20bd": "RUB", # Russian Ruble - "\u20be": "GEL", # Georgian Lari - "\u20bf": "BTC", # Bitcoin (cryptocurrency) - # Americas (unambiguous) - "\u20b2": "PYG", # Paraguayan Guarani - # Middle East - "\u20aa": "ILS", # Israeli New Shekel - "\u20bc": "AZN", # Azerbaijani Manat - # African currencies - "\u20a6": "NGN", # Nigerian Naira - "\u20b5": "GHS", # Ghanaian Cedi - # Text symbols (less common but unambiguous) - "zl": "PLN", # Polish Zloty (text form) - "Ft": "HUF", # Hungarian Forint - "Ls": "LVL", # Latvian Lats (historical, pre-Euro) - "Lt": "LTL", # Lithuanian Litas (historical, pre-Euro) -} - -# Ambiguous symbols that require locale context or explicit currency code. -# These are NOT in the fast tier unambiguous map - they require context. -_FAST_TIER_AMBIGUOUS_SYMBOLS: frozenset[str] = frozenset({ - "$", # USD, CAD, AUD, NZD, SGD, HKD, MXN, ARS, CLP, COP, etc. - "kr", # SEK, NOK, DKK, ISK - "R", # ZAR, BRL (R$), INR (historical) - "R$", # BRL - "S/", # PEN - "\u00a5", # Yen/Yuan sign - JPY (Japanese) or CNY (Chinese) - "\u00a3", # Pound sign - GBP (British), EGP (Egyptian), GIP (Gibraltar), etc. -}) - -# Locale-aware resolution for ambiguous symbols. -# Maps (symbol, locale_prefix) -> currency_code for context-sensitive resolution. -# Keys use lowercase normalized locale format (BCP-47 is case-insensitive). -_AMBIGUOUS_SYMBOL_LOCALE_RESOLUTION: dict[tuple[str, str], str] = { - # Yen/Yuan sign: CNY for Chinese locales, JPY otherwise - ("\u00a5", "zh"): "CNY", # Chinese locales use Yuan - # Dollar sign: locale-specific resolution - ("$", "en_us"): "USD", - ("$", "en_ca"): "CAD", - ("$", "en_au"): "AUD", - ("$", "en_nz"): "NZD", - ("$", "en_sg"): "SGD", - ("$", "en_hk"): "HKD", - ("$", "es_mx"): "MXN", - ("$", "es_ar"): "ARS", - ("$", "es_cl"): "CLP", - ("$", "es_co"): "COP", - # Pound sign: locale-specific resolution - ("\u00a3", "en_gb"): "GBP", # British Pound - ("\u00a3", "en"): "GBP", # English locales default to British - ("\u00a3", "ar_eg"): "EGP", # Egyptian Pound - ("\u00a3", "ar"): "EGP", # Arabic locales default to Egyptian - ("\u00a3", "en_gi"): "GIP", # Gibraltar Pound - ("\u00a3", "en_fk"): "FKP", # Falkland Islands Pound - ("\u00a3", "en_sh"): "SHP", # Saint Helena Pound - ("\u00a3", "en_ss"): "SSP", # South Sudanese Pound -} - -# Default resolution for ambiguous symbols when locale doesn't match -_AMBIGUOUS_SYMBOL_DEFAULTS: dict[str, str] = { - "\u00a5": "JPY", # Default to JPY for non-Chinese locales - "\u00a3": "GBP", # Default to GBP when locale not recognized - "$": "USD", # Default to USD when locale not recognized - "kr": "SEK", # Default to SEK for Nordic kr - "R": "ZAR", # Default to ZAR for R - "R$": "BRL", # R$ is unambiguous as BRL - "S/": "PEN", # S/ is unambiguous as PEN -} - -# Common locale-to-currency mappings for fast tier (no CLDR scan needed) -# Keys use lowercase normalized locale format (BCP-47 is case-insensitive). -_FAST_TIER_LOCALE_CURRENCIES: dict[str, str] = { - # North America - "en_us": "USD", "es_us": "USD", - "en_ca": "CAD", "fr_ca": "CAD", - "es_mx": "MXN", - # Europe - Eurozone - "de_de": "EUR", "de_at": "EUR", - "fr_fr": "EUR", "it_it": "EUR", - "es_es": "EUR", "pt_pt": "EUR", - "nl_nl": "EUR", "fi_fi": "EUR", - "el_gr": "EUR", "et_ee": "EUR", - "lt_lt": "EUR", "lv_lv": "EUR", - "sk_sk": "EUR", "sl_si": "EUR", - # Europe - Non-Eurozone - "en_gb": "GBP", "de_ch": "CHF", "fr_ch": "CHF", "it_ch": "CHF", - "sv_se": "SEK", "no_no": "NOK", "da_dk": "DKK", - "pl_pl": "PLN", "cs_cz": "CZK", "hu_hu": "HUF", - "ro_ro": "RON", "bg_bg": "BGN", "hr_hr": "HRK", - "uk_ua": "UAH", "ru_ru": "RUB", "is_is": "ISK", - # Asia-Pacific - "ja_jp": "JPY", "zh_cn": "CNY", "zh_tw": "TWD", "zh_hk": "HKD", - "ko_kr": "KRW", "hi_in": "INR", "th_th": "THB", - "vi_vn": "VND", "id_id": "IDR", "ms_my": "MYR", - "fil_ph": "PHP", "en_sg": "SGD", "en_au": "AUD", "en_nz": "NZD", - # Middle East / Africa - "ar_sa": "SAR", "ar_eg": "EGP", "ar_ae": "AED", - "he_il": "ILS", "tr_tr": "TRY", - "en_za": "ZAR", "pt_br": "BRL", - # South America - "es_ar": "ARS", "es_cl": "CLP", "es_co": "COP", "es_pe": "PEN", -} - -# Fast tier valid ISO codes (subset for quick validation before full CLDR) -_FAST_TIER_VALID_CODES: frozenset[str] = frozenset({ - "USD", "EUR", "GBP", "JPY", "CNY", "CHF", "CAD", "AUD", "NZD", - "HKD", "SGD", "SEK", "NOK", "DKK", "ISK", "PLN", "CZK", "HUF", - "RON", "BGN", "HRK", "UAH", "RUB", "TRY", "ILS", "INR", "KRW", - "THB", "VND", "IDR", "MYR", "PHP", "TWD", "SAR", "AED", "EGP", - "ZAR", "BRL", "ARS", "CLP", "COP", "PEN", "MXN", "KZT", "GEL", - "AZN", "NGN", "GHS", "BTC", -}) - -# Curated list of locales for currency symbol lookup. -# Selected to cover major world currencies and regional variants. -# Add locales here to support additional currency symbol mappings. -_SYMBOL_LOOKUP_LOCALE_IDS: tuple[str, ...] = ( - "en_US", "en_GB", "en_CA", "en_AU", "en_NZ", "en_SG", "en_HK", "en_IN", - "de_DE", "de_CH", "de_AT", "fr_FR", "fr_CH", "fr_CA", - "es_ES", "es_MX", "es_AR", "it_IT", "it_CH", "nl_NL", "pt_PT", "pt_BR", - "ja_JP", "zh_CN", "zh_TW", "zh_HK", "ko_KR", - "ru_RU", "pl_PL", "sv_SE", "no_NO", "da_DK", "fi_FI", - "tr_TR", "ar_SA", "ar_EG", "he_IL", "hi_IN", - "th_TH", "vi_VN", "id_ID", "ms_MY", "fil_PH", - "lv_LV", "et_EE", "lt_LT", "cs_CZ", "sk_SK", "hu_HU", - "ro_RO", "bg_BG", "hr_HR", "sl_SI", "sr_RS", - "uk_UA", "ka_GE", "az_AZ", "kk_KZ", "is_IS", +from ftllexengine.parsing.currency_maps import ( + _FAST_TIER_UNAMBIGUOUS_SYMBOLS, + ISO_CURRENCY_CODE_LENGTH, + _build_currency_maps_from_cldr, + _get_currency_maps, + _get_currency_maps_fast, + _get_currency_maps_full, + resolve_ambiguous_symbol, +) +from ftllexengine.parsing.currency_maps import ( + clear_currency_caches as _clear_currency_maps_caches, ) -# ============================================================================= -# Locale-Aware Symbol Resolution -# ============================================================================= - - -def resolve_ambiguous_symbol( - symbol: str, - locale_code: str | None = None, -) -> str | None: - """Resolve ambiguous symbol to currency code with locale context. - - Resolution order: - 1. Exact locale match in _AMBIGUOUS_SYMBOL_LOCALE_RESOLUTION - 2. Locale prefix match (e.g., "zh" for "zh_CN", "zh_TW") - 3. Default from _AMBIGUOUS_SYMBOL_DEFAULTS - - Args: - symbol: The currency symbol to resolve - locale_code: Optional locale for context-sensitive resolution - - Returns: - ISO 4217 currency code, or None if symbol not in ambiguous set - """ - if symbol not in _FAST_TIER_AMBIGUOUS_SYMBOLS: - return None - - if locale_code: - # Normalize locale for lookup - normalized = normalize_locale(locale_code) - - # Try exact locale match first - exact_key = (symbol, normalized) - if exact_key in _AMBIGUOUS_SYMBOL_LOCALE_RESOLUTION: - return _AMBIGUOUS_SYMBOL_LOCALE_RESOLUTION[exact_key] - - # Try locale prefix match (language code only) - # e.g., "zh" matches "zh_CN", "zh_TW", "zh_HK" - if "_" in normalized: - lang_prefix = normalized.split("_")[0] - prefix_key = (symbol, lang_prefix) - if prefix_key in _AMBIGUOUS_SYMBOL_LOCALE_RESOLUTION: - return _AMBIGUOUS_SYMBOL_LOCALE_RESOLUTION[prefix_key] - - # Fall back to default - return _AMBIGUOUS_SYMBOL_DEFAULTS.get(symbol) - - -def _collect_all_currencies( - locale_ids: list[str], - locale_parse: Any, - unknown_locale_error: type[Exception], -) -> set[str]: - """Collect all currency codes from CLDR by scanning all locales. - - Ensures complete currency coverage (JPY, KRW, CNY, etc.). - - Args: - locale_ids: All available CLDR locale identifiers. - locale_parse: Babel's Locale.parse function. - unknown_locale_error: Babel's UnknownLocaleError class. - - Returns: - Set of all ISO 4217 currency codes found in CLDR. - """ - all_currencies: set[str] = set() - for locale_id in locale_ids: - try: - locale = locale_parse(locale_id) - if hasattr(locale, "currencies") and locale.currencies: - all_currencies.update(locale.currencies.keys()) - except (unknown_locale_error, ValueError, AttributeError, KeyError): - continue - return all_currencies - - -def _build_symbol_mappings( - all_currencies: set[str], - locale_ids: list[str], - locale_parse: Any, - unknown_locale_error: type[Exception], - get_currency_symbol: Any, -) -> tuple[dict[str, str], set[str]]: - """Build symbol-to-currency mappings, separating ambiguous from unambiguous. - - For each currency, finds all symbols it uses across a curated locale sample. - A symbol is ambiguous if multiple currencies use it. - - Args: - all_currencies: All ISO 4217 codes from CLDR. - locale_ids: All available CLDR locale identifiers. - locale_parse: Babel's Locale.parse function. - unknown_locale_error: Babel's UnknownLocaleError class. - get_currency_symbol: Babel's get_currency_symbol function. - - Returns: - Tuple of (unambiguous_map, ambiguous_set): - - unambiguous_map: Symbol -> single ISO 4217 code - - ambiguous_set: Symbols mapping to multiple currencies - """ - symbol_to_codes: dict[str, set[str]] = {} - - symbol_lookup_locales = [ - locale_parse(lid) for lid in _SYMBOL_LOOKUP_LOCALE_IDS - if lid in locale_ids - ] - - for currency_code in all_currencies: - for locale in symbol_lookup_locales: - try: - symbol = get_currency_symbol( - currency_code, locale=locale, - ) - is_iso_format = ( - len(symbol) == ISO_CURRENCY_CODE_LENGTH - and symbol.isupper() - and symbol.isalpha() - ) - if symbol and symbol != currency_code and not is_iso_format: - if symbol not in symbol_to_codes: - symbol_to_codes[symbol] = set() - symbol_to_codes[symbol].add(currency_code) - except ( - unknown_locale_error, ValueError, AttributeError, KeyError, - ): - continue - - unambiguous_map: dict[str, str] = {} - ambiguous_set: set[str] = set() - for symbol, codes in symbol_to_codes.items(): - if len(codes) == 1: - unambiguous_map[symbol] = next(iter(codes)) - else: - ambiguous_set.add(symbol) - - return unambiguous_map, ambiguous_set - - -def _build_locale_currency_map( - locale_ids: list[str], - locale_parse: Any, - unknown_locale_error: type[Exception], - get_territory_currencies: Any, -) -> dict[str, str]: - """Build locale-to-default-currency mapping from CLDR territory data. - - Args: - locale_ids: All available CLDR locale identifiers. - locale_parse: Babel's Locale.parse function. - unknown_locale_error: Babel's UnknownLocaleError class. - get_territory_currencies: Babel's get_territory_currencies function. - - Returns: - Mapping of locale code -> default ISO 4217 currency code. - """ - locale_to_currency: dict[str, str] = {} - for locale_id in locale_ids: - try: - locale = locale_parse(locale_id) - if not locale.territory: - continue - territory_currencies = get_territory_currencies( - locale.territory, - ) - if territory_currencies: - locale_str = str(locale) - if "_" in locale_str: - locale_to_currency[locale_str] = territory_currencies[0] - except ( - unknown_locale_error, ValueError, AttributeError, KeyError, - ): - continue - return locale_to_currency - - -@functools.cache -def _build_currency_maps_from_cldr() -> tuple[ - dict[str, str], set[str], dict[str, str], frozenset[str] -]: - """Build currency maps from Unicode CLDR data via Babel. - - Thread-safe via functools.cache internal locking. - Called once per process lifetime; subsequent calls return cached result. - - Orchestrates three sub-operations: - 1. Collect all currency codes from CLDR locale scan - 2. Build symbol-to-currency mappings (unambiguous vs ambiguous) - 3. Build locale-to-default-currency mapping from territory data - - Returns: - Tuple of (symbol_to_code, ambiguous_symbols, locale_to_currency, valid_codes): - - symbol_to_code: Unambiguous currency symbol -> ISO 4217 code - - ambiguous_symbols: Symbols that map to multiple currencies - - locale_to_currency: Locale code -> default ISO 4217 currency code - - valid_codes: Frozenset of all valid ISO 4217 currency codes from CLDR - Returns empty maps if Babel is not installed (fast tier still available). - """ - if not is_babel_available(): - # Babel not installed - return empty maps, fast tier still available - return ({}, set(), {}, frozenset()) - - locale_class = get_locale_class() - unknown_locale_error_class = get_unknown_locale_error_class() - locale_identifiers_fn = get_locale_identifiers_func() - babel_numbers = get_babel_numbers() - get_currency_symbol = babel_numbers.get_currency_symbol - get_territory_currencies = babel_numbers.get_territory_currencies - - all_locale_ids = list(locale_identifiers_fn()) - - all_currencies = _collect_all_currencies( - all_locale_ids, locale_class.parse, unknown_locale_error_class, - ) - - unambiguous_map, ambiguous_set = _build_symbol_mappings( - all_currencies, all_locale_ids, - locale_class.parse, unknown_locale_error_class, get_currency_symbol, - ) - - locale_to_currency = _build_locale_currency_map( - all_locale_ids, - locale_class.parse, unknown_locale_error_class, get_territory_currencies, - ) - - return ( - unambiguous_map, ambiguous_set, - locale_to_currency, frozenset(all_currencies), - ) - - -def _get_currency_maps_fast() -> tuple[ - dict[str, str], frozenset[str], dict[str, str], frozenset[str] -]: - """Get fast tier currency maps (no CLDR scan, immediate). - - Returns: - Tuple of (symbol_to_code, ambiguous_symbols, locale_to_currency, valid_codes) - from the fast tier (hardcoded common currencies). - """ - return ( - _FAST_TIER_UNAMBIGUOUS_SYMBOLS, - _FAST_TIER_AMBIGUOUS_SYMBOLS, - _FAST_TIER_LOCALE_CURRENCIES, - _FAST_TIER_VALID_CODES, - ) - - -def _get_currency_maps_full() -> tuple[dict[str, str], set[str], dict[str, str], frozenset[str]]: - """Get full CLDR currency maps (lazy-loaded on first call). - - Thread-safe via functools.cache on _build_currency_maps_from_cldr. - - Returns: - Tuple of (symbol_to_code, ambiguous_symbols, locale_to_currency, valid_codes) - from complete CLDR data. - """ - return _build_currency_maps_from_cldr() - - -@functools.cache -def _get_currency_maps() -> tuple[dict[str, str], set[str], dict[str, str], frozenset[str]]: - """Get merged currency maps (fast tier + full CLDR). - - Thread-safe via functools.cache internal locking. - Called once per process lifetime; subsequent calls return cached result. - - Tiered Loading Strategy: - - Fast tier data is always included (zero overhead) - - Full CLDR data is merged in (loaded lazily on first call to this function) - - Returns: - Tuple of (symbol_to_code, ambiguous_symbols, locale_to_currency, valid_codes): - - symbol_to_code: Unambiguous currency symbol → ISO 4217 code - - ambiguous_symbols: Symbols that map to multiple currencies - - locale_to_currency: Locale code → default ISO 4217 currency code - - valid_codes: Frozenset of all valid ISO 4217 currency codes from CLDR - """ - # Get both tiers - fast_symbols, fast_ambiguous, fast_locales, fast_codes = _get_currency_maps_fast() - full_symbols, full_ambiguous, full_locales, full_codes = _get_currency_maps_full() - - # Merge: fast tier has priority for unambiguous symbols - merged_symbols = {**full_symbols, **fast_symbols} # fast overwrites full - merged_ambiguous = full_ambiguous | set(fast_ambiguous) - merged_locales = {**full_locales, **fast_locales} # fast overwrites full - merged_codes = full_codes | fast_codes +__all__ = [ + "_FAST_TIER_UNAMBIGUOUS_SYMBOLS", + "_build_currency_maps_from_cldr", + "_get_currency_maps", + "_get_currency_maps_fast", + "_get_currency_maps_full", + "clear_currency_caches", + "parse_currency", + "resolve_ambiguous_symbol", +] + +_PRIVATE_CURRENCY_EXPORTS = ( + _FAST_TIER_UNAMBIGUOUS_SYMBOLS, + _build_currency_maps_from_cldr, + _get_currency_maps_fast, + _get_currency_maps_full, +) - return merged_symbols, merged_ambiguous, merged_locales, merged_codes def _is_valid_iso_4217_format(code: str) -> bool: @@ -776,36 +360,42 @@ def parse_currency( BabelImportError: If Babel is not installed Examples: - >>> result, errors = parse_currency("EUR100.50", "en_US") - >>> result + >>> result, errors = parse_currency("EUR100.50", "en_US") # doctest: +SKIP + >>> result # doctest: +SKIP (Decimal('100.50'), 'EUR') - >>> errors + >>> errors # doctest: +SKIP () - >>> result, errors = parse_currency("100,50 EUR", "lv_LV") - >>> result + >>> result, errors = parse_currency("100,50 EUR", "lv_LV") # doctest: +SKIP + >>> result # doctest: +SKIP (Decimal('100.50'), 'EUR') - >>> result, errors = parse_currency("USD 1,234.56", "en_US") - >>> result + >>> result, errors = parse_currency("USD 1,234.56", "en_US") # doctest: +SKIP + >>> result # doctest: +SKIP (Decimal('1234.56'), 'USD') - >>> result, errors = parse_currency("$100", "en_US", default_currency="USD") - >>> result + >>> result, errors = parse_currency( # doctest: +SKIP + ... "$100", "en_US", default_currency="USD" + ... ) + >>> result # doctest: +SKIP (Decimal('100'), 'USD') - >>> result, errors = parse_currency("$100", "en_CA", default_currency="CAD") - >>> result + >>> result, errors = parse_currency( # doctest: +SKIP + ... "$100", "en_CA", default_currency="CAD" + ... ) + >>> result # doctest: +SKIP (Decimal('100'), 'CAD') - >>> result, errors = parse_currency("$100", "en_CA", infer_from_locale=True) - >>> result + >>> result, errors = parse_currency( # doctest: +SKIP + ... "$100", "en_CA", infer_from_locale=True + ... ) + >>> result # doctest: +SKIP (Decimal('100'), 'CAD') - >>> result, errors = parse_currency("$100", "en_US") - >>> result is None + >>> result, errors = parse_currency("$100", "en_US") # doctest: +SKIP + >>> result is None # doctest: +SKIP True - >>> len(errors) + >>> len(errors) # doctest: +SKIP 1 Note: @@ -980,9 +570,8 @@ def clear_currency_caches() -> None: clearing; only the full CLDR scan results are invalidated. Example: - >>> from ftllexengine.parsing.currency import clear_currency_caches - >>> clear_currency_caches() # Clears all cached currency data + >>> from ftllexengine.parsing.currency import clear_currency_caches # doctest: +SKIP + >>> clear_currency_caches() # Clears all cached currency data # doctest: +SKIP """ - _build_currency_maps_from_cldr.cache_clear() - _get_currency_maps.cache_clear() + _clear_currency_maps_caches() _get_currency_pattern.cache_clear() diff --git a/src/ftllexengine/parsing/currency_maps.py b/src/ftllexengine/parsing/currency_maps.py new file mode 100644 index 00000000..2ff5a53e --- /dev/null +++ b/src/ftllexengine/parsing/currency_maps.py @@ -0,0 +1,432 @@ +"""Currency map data and CLDR-backed lookup helpers.""" + +from __future__ import annotations + +import functools +from typing import Any + +from ftllexengine.core.babel_compat import ( + get_babel_numbers, + get_locale_class, + get_locale_identifiers_func, + get_unknown_locale_error_class, + is_babel_available, +) +from ftllexengine.core.locale_utils import normalize_locale + +ISO_CURRENCY_CODE_LENGTH: int = 3 + +_FAST_TIER_UNAMBIGUOUS_SYMBOLS: dict[str, str] = { + "\u20ac": "EUR", + "\u20a4": "ITL", + "\u20b9": "INR", + "\u20a9": "KRW", + "\u20ab": "VND", + "\u20ae": "MNT", + "\u20b1": "PHP", + "\u20b4": "UAH", + "\u20b8": "KZT", + "\u20ba": "TRY", + "\u20bd": "RUB", + "\u20be": "GEL", + "\u20bf": "BTC", + "\u20b2": "PYG", + "\u20aa": "ILS", + "\u20bc": "AZN", + "\u20a6": "NGN", + "\u20b5": "GHS", + "zl": "PLN", + "Ft": "HUF", + "Ls": "LVL", + "Lt": "LTL", +} + +_FAST_TIER_AMBIGUOUS_SYMBOLS: frozenset[str] = frozenset( + { + "$", + "kr", + "R", + "R$", + "S/", + "\u00a5", + "\u00a3", + } +) + +_AMBIGUOUS_SYMBOL_LOCALE_RESOLUTION: dict[tuple[str, str], str] = { + ("\u00a5", "zh"): "CNY", + ("$", "en_us"): "USD", + ("$", "en_ca"): "CAD", + ("$", "en_au"): "AUD", + ("$", "en_nz"): "NZD", + ("$", "en_sg"): "SGD", + ("$", "en_hk"): "HKD", + ("$", "es_mx"): "MXN", + ("$", "es_ar"): "ARS", + ("$", "es_cl"): "CLP", + ("$", "es_co"): "COP", + ("\u00a3", "en_gb"): "GBP", + ("\u00a3", "en"): "GBP", + ("\u00a3", "ar_eg"): "EGP", + ("\u00a3", "ar"): "EGP", + ("\u00a3", "en_gi"): "GIP", + ("\u00a3", "en_fk"): "FKP", + ("\u00a3", "en_sh"): "SHP", + ("\u00a3", "en_ss"): "SSP", +} + +_AMBIGUOUS_SYMBOL_DEFAULTS: dict[str, str] = { + "\u00a5": "JPY", + "\u00a3": "GBP", + "$": "USD", + "kr": "SEK", + "R": "ZAR", + "R$": "BRL", + "S/": "PEN", +} + +_FAST_TIER_LOCALE_CURRENCIES: dict[str, str] = { + "en_us": "USD", + "es_us": "USD", + "en_ca": "CAD", + "fr_ca": "CAD", + "es_mx": "MXN", + "de_de": "EUR", + "de_at": "EUR", + "fr_fr": "EUR", + "it_it": "EUR", + "es_es": "EUR", + "pt_pt": "EUR", + "nl_nl": "EUR", + "fi_fi": "EUR", + "el_gr": "EUR", + "et_ee": "EUR", + "lt_lt": "EUR", + "lv_lv": "EUR", + "sk_sk": "EUR", + "sl_si": "EUR", + "en_gb": "GBP", + "de_ch": "CHF", + "fr_ch": "CHF", + "it_ch": "CHF", + "sv_se": "SEK", + "no_no": "NOK", + "da_dk": "DKK", + "pl_pl": "PLN", + "cs_cz": "CZK", + "hu_hu": "HUF", + "ro_ro": "RON", + "bg_bg": "BGN", + "hr_hr": "HRK", + "uk_ua": "UAH", + "ru_ru": "RUB", + "is_is": "ISK", + "ja_jp": "JPY", + "zh_cn": "CNY", + "zh_tw": "TWD", + "zh_hk": "HKD", + "ko_kr": "KRW", + "hi_in": "INR", + "th_th": "THB", + "vi_vn": "VND", + "id_id": "IDR", + "ms_my": "MYR", + "fil_ph": "PHP", + "en_sg": "SGD", + "en_au": "AUD", + "en_nz": "NZD", + "ar_sa": "SAR", + "ar_eg": "EGP", + "ar_ae": "AED", + "he_il": "ILS", + "tr_tr": "TRY", + "en_za": "ZAR", + "pt_br": "BRL", + "es_ar": "ARS", + "es_cl": "CLP", + "es_co": "COP", + "es_pe": "PEN", +} + +_FAST_TIER_VALID_CODES: frozenset[str] = frozenset( + { + "USD", + "EUR", + "GBP", + "JPY", + "CNY", + "CHF", + "CAD", + "AUD", + "NZD", + "HKD", + "SGD", + "SEK", + "NOK", + "DKK", + "ISK", + "PLN", + "CZK", + "HUF", + "RON", + "BGN", + "HRK", + "UAH", + "RUB", + "TRY", + "ILS", + "INR", + "KRW", + "THB", + "VND", + "IDR", + "MYR", + "PHP", + "TWD", + "SAR", + "AED", + "EGP", + "ZAR", + "BRL", + "ARS", + "CLP", + "COP", + "PEN", + "MXN", + "KZT", + "GEL", + "AZN", + "NGN", + "GHS", + "BTC", + } +) + +_SYMBOL_LOOKUP_LOCALE_IDS: tuple[str, ...] = ( + "en_US", + "en_GB", + "en_CA", + "en_AU", + "en_NZ", + "en_SG", + "en_HK", + "en_IN", + "de_DE", + "de_CH", + "de_AT", + "fr_FR", + "fr_CH", + "fr_CA", + "es_ES", + "es_MX", + "es_AR", + "it_IT", + "it_CH", + "nl_NL", + "pt_PT", + "pt_BR", + "ja_JP", + "zh_CN", + "zh_TW", + "zh_HK", + "ko_KR", + "ru_RU", + "pl_PL", + "sv_SE", + "no_NO", + "da_DK", + "fi_FI", + "tr_TR", + "ar_SA", + "ar_EG", + "he_IL", + "hi_IN", + "th_TH", + "vi_VN", + "id_ID", + "ms_MY", + "fil_PH", + "lv_LV", + "et_EE", + "lt_LT", + "cs_CZ", + "sk_SK", + "hu_HU", + "ro_RO", + "bg_BG", + "hr_HR", + "sl_SI", + "sr_RS", + "uk_UA", + "ka_GE", + "az_AZ", + "kk_KZ", + "is_IS", +) + + +def resolve_ambiguous_symbol( + symbol: str, + locale_code: str | None = None, +) -> str | None: + """Resolve ambiguous currency symbols using locale context when available.""" + if symbol not in _FAST_TIER_AMBIGUOUS_SYMBOLS: + return None + + if locale_code: + normalized = normalize_locale(locale_code) + exact_key = (symbol, normalized) + if exact_key in _AMBIGUOUS_SYMBOL_LOCALE_RESOLUTION: + return _AMBIGUOUS_SYMBOL_LOCALE_RESOLUTION[exact_key] + + if "_" in normalized: + lang_prefix = normalized.split("_")[0] + prefix_key = (symbol, lang_prefix) + if prefix_key in _AMBIGUOUS_SYMBOL_LOCALE_RESOLUTION: + return _AMBIGUOUS_SYMBOL_LOCALE_RESOLUTION[prefix_key] + + return _AMBIGUOUS_SYMBOL_DEFAULTS.get(symbol) + + +def _collect_all_currencies( + locale_ids: list[str], + locale_parse: Any, + unknown_locale_error: type[Exception], +) -> set[str]: + all_currencies: set[str] = set() + for locale_id in locale_ids: + try: + locale = locale_parse(locale_id) + if hasattr(locale, "currencies") and locale.currencies: + all_currencies.update(locale.currencies.keys()) + except (unknown_locale_error, ValueError, AttributeError, KeyError): + continue + return all_currencies + + +def _build_symbol_mappings( + all_currencies: set[str], + locale_ids: list[str], + locale_parse: Any, + unknown_locale_error: type[Exception], + get_currency_symbol: Any, +) -> tuple[dict[str, str], set[str]]: + symbol_to_codes: dict[str, set[str]] = {} + symbol_lookup_locales = [ + locale_parse(locale_id) + for locale_id in _SYMBOL_LOOKUP_LOCALE_IDS + if locale_id in locale_ids + ] + + for currency_code in all_currencies: + for locale in symbol_lookup_locales: + try: + symbol = get_currency_symbol(currency_code, locale=locale) + is_iso_format = ( + len(symbol) == ISO_CURRENCY_CODE_LENGTH + and symbol.isupper() + and symbol.isalpha() + ) + if symbol and symbol != currency_code and not is_iso_format: + symbol_to_codes.setdefault(symbol, set()).add(currency_code) + except (unknown_locale_error, ValueError, AttributeError, KeyError): + continue + + unambiguous_map: dict[str, str] = {} + ambiguous_set: set[str] = set() + for symbol, codes in symbol_to_codes.items(): + if len(codes) == 1: + unambiguous_map[symbol] = next(iter(codes)) + else: + ambiguous_set.add(symbol) + + return unambiguous_map, ambiguous_set + + +def _build_locale_currency_map( + locale_ids: list[str], + locale_parse: Any, + unknown_locale_error: type[Exception], + get_territory_currencies: Any, +) -> dict[str, str]: + locale_to_currency: dict[str, str] = {} + for locale_id in locale_ids: + try: + locale = locale_parse(locale_id) + if not locale.territory: + continue + territory_currencies = get_territory_currencies(locale.territory) + if territory_currencies: + locale_str = str(locale) + if "_" in locale_str: + locale_to_currency[locale_str] = territory_currencies[0] + except (unknown_locale_error, ValueError, AttributeError, KeyError): + continue + return locale_to_currency + + +@functools.cache +def _build_currency_maps_from_cldr() -> tuple[ + dict[str, str], set[str], dict[str, str], frozenset[str] +]: + if not is_babel_available(): + return ({}, set(), {}, frozenset()) + + locale_class = get_locale_class() + unknown_locale_error_class = get_unknown_locale_error_class() + locale_identifiers_fn = get_locale_identifiers_func() + babel_numbers = get_babel_numbers() + get_currency_symbol = babel_numbers.get_currency_symbol + get_territory_currencies = babel_numbers.get_territory_currencies + + all_locale_ids = list(locale_identifiers_fn()) + all_currencies = _collect_all_currencies( + all_locale_ids, locale_class.parse, unknown_locale_error_class + ) + unambiguous_map, ambiguous_set = _build_symbol_mappings( + all_currencies, + all_locale_ids, + locale_class.parse, + unknown_locale_error_class, + get_currency_symbol, + ) + locale_to_currency = _build_locale_currency_map( + all_locale_ids, + locale_class.parse, + unknown_locale_error_class, + get_territory_currencies, + ) + return (unambiguous_map, ambiguous_set, locale_to_currency, frozenset(all_currencies)) + + +def _get_currency_maps_fast() -> tuple[ + dict[str, str], frozenset[str], dict[str, str], frozenset[str] +]: + return ( + _FAST_TIER_UNAMBIGUOUS_SYMBOLS, + _FAST_TIER_AMBIGUOUS_SYMBOLS, + _FAST_TIER_LOCALE_CURRENCIES, + _FAST_TIER_VALID_CODES, + ) + + +def _get_currency_maps_full() -> tuple[dict[str, str], set[str], dict[str, str], frozenset[str]]: + return _build_currency_maps_from_cldr() + + +@functools.cache +def _get_currency_maps() -> tuple[dict[str, str], set[str], dict[str, str], frozenset[str]]: + fast_symbols, fast_ambiguous, fast_locales, fast_codes = _get_currency_maps_fast() + full_symbols, full_ambiguous, full_locales, full_codes = _get_currency_maps_full() + return ( + {**full_symbols, **fast_symbols}, + full_ambiguous | set(fast_ambiguous), + {**full_locales, **fast_locales}, + full_codes | fast_codes, + ) + + +def clear_currency_caches() -> None: + """Clear cached currency map data.""" + _get_currency_maps.cache_clear() + _build_currency_maps_from_cldr.cache_clear() diff --git a/src/ftllexengine/parsing/date_patterns.py b/src/ftllexengine/parsing/date_patterns.py new file mode 100644 index 00000000..c453ef4b --- /dev/null +++ b/src/ftllexengine/parsing/date_patterns.py @@ -0,0 +1,375 @@ +"""Cached CLDR pattern extraction and conversion helpers for date parsing.""" + +from __future__ import annotations + +from functools import lru_cache +from typing import Any + +from ftllexengine.constants import MAX_LOCALE_CACHE_SIZE +from ftllexengine.core.babel_compat import ( + get_locale_class, + get_unknown_locale_error_class, + is_babel_available, + require_babel, +) +from ftllexengine.core.locale_utils import normalize_locale + +__all__ = [ + "_babel_to_strptime", + "_get_date_patterns", + "_get_datetime_patterns", + "_is_word_boundary", + "_preprocess_datetime_input", + "_strip_era", + "_tokenize_babel_pattern", + "clear_date_caches", +] + +# CLDR date format styles used for parsing. +# Both date and datetime use the same styles for consistency. +_DATE_PARSE_STYLES: tuple[str, ...] = ("short", "medium", "long", "full") +_DATETIME_PARSE_STYLES: tuple[str, ...] = ("short", "medium", "long", "full") + +# Default separator between date and time components (fallback only). +# Used when locale-specific dateTimeFormat pattern extraction fails. +_DATETIME_SEPARATOR_FALLBACK: str = " " + + +def _extract_cldr_patterns( + format_dict: Any, + styles: tuple[str, ...], +) -> list[tuple[str, bool]]: + """Extract strptime patterns from a Babel CLDR format dictionary.""" + patterns: list[tuple[str, bool]] = [] + for style in styles: + try: + fmt = format_dict[style] + babel_pattern = fmt.pattern if hasattr(fmt, "pattern") else str(fmt) + strptime_pattern, has_era = _babel_to_strptime(babel_pattern) + patterns.append((strptime_pattern, has_era)) + if "%y" in strptime_pattern: + patterns.append((strptime_pattern.replace("%y", "%Y"), has_era)) + except (AttributeError, KeyError): + pass + return patterns + + +@lru_cache(maxsize=MAX_LOCALE_CACHE_SIZE) +def _get_date_patterns(locale_code: str) -> tuple[tuple[str, bool], ...]: + """Get cached strptime date patterns for one locale.""" + require_babel("parse_date") + locale_class = get_locale_class() + unknown_locale_error_class = get_unknown_locale_error_class() + + try: + locale = locale_class.parse(normalize_locale(locale_code)) + return tuple(_extract_cldr_patterns(locale.date_formats, _DATE_PARSE_STYLES)) + except (unknown_locale_error_class, ValueError, RuntimeError, AttributeError): + return () + + +def _extract_datetime_separator(locale: Any, style: str = "medium") -> tuple[str, bool]: + """Extract the locale-specific separator and ordering for date-time formats.""" + try: + datetime_format = locale.datetime_formats.get(style) + if datetime_format is None: + return _DATETIME_SEPARATOR_FALLBACK, False + + pattern = str(datetime_format) + date_placeholder = "{1}" + time_placeholder = "{0}" + + date_idx = pattern.find(date_placeholder) + time_idx = pattern.find(time_placeholder) + + if date_idx == -1 or time_idx == -1: + return _DATETIME_SEPARATOR_FALLBACK, False + + is_time_first = time_idx < date_idx + + if date_idx < time_idx: + sep_start = date_idx + len(date_placeholder) + sep_end = time_idx + else: + sep_start = time_idx + len(time_placeholder) + sep_end = date_idx + + if sep_start < sep_end: + return pattern[sep_start:sep_end], is_time_first + + return _DATETIME_SEPARATOR_FALLBACK, is_time_first + except (AttributeError, TypeError, ValueError): + return _DATETIME_SEPARATOR_FALLBACK, False + + +@lru_cache(maxsize=MAX_LOCALE_CACHE_SIZE) +def _get_datetime_patterns(locale_code: str) -> tuple[tuple[str, bool], ...]: + """Get cached strptime datetime patterns for one locale.""" + require_babel("parse_datetime") + locale_class = get_locale_class() + unknown_locale_error_class = get_unknown_locale_error_class() + + try: + locale = locale_class.parse(normalize_locale(locale_code)) + patterns = _extract_cldr_patterns(locale.datetime_formats, _DATETIME_PARSE_STYLES) + date_patterns = _get_date_patterns(locale_code) + sep, is_time_first = _extract_datetime_separator(locale) + + time_formats = [ + "%H:%M:%S", + "%H:%M", + "%I:%M:%S %p", + "%I:%M %p", + ] + + for date_pat, has_era in date_patterns: + for time_pat in time_formats: + combined = ( + f"{time_pat}{sep}{date_pat}" if is_time_first else f"{date_pat}{sep}{time_pat}" + ) + patterns.append((combined, has_era)) + + return tuple(patterns) + except (unknown_locale_error_class, ValueError, RuntimeError, AttributeError): + return () + + +# ============================================================================== +# TOKEN-BASED BABEL-TO-STRPTIME CONVERTER +# ============================================================================== + + +_BABEL_TOKEN_MAP: dict[str, str | None] = { + "yyyy": "%Y", + "yy": "%y", + "y": "%Y", + "MMMM": "%B", + "MMM": "%b", + "MM": "%m", + "M": "%m", + "LLLL": "%B", + "LLL": "%b", + "LL": "%m", + "L": "%m", + "dd": "%d", + "d": "%d", + "EEEE": "%A", + "EEE": "%a", + "E": "%a", + "cccc": "%A", + "ccc": "%a", + "cc": "%w", + "c": "%w", + "GGGG": None, + "GGG": None, + "GG": None, + "G": None, + "HH": "%H", + "H": "%H", + "hh": "%I", + "h": "%I", + "mm": "%M", + "m": "%M", + "ss": "%S", + "s": "%S", + "SSSSSS": "%f", + "SSSSS": "%f", + "SSSS": "%f", + "SSS": "%f", + "SS": "%f", + "S": "%f", + "a": "%p", + "kk": "%H", + "k": "%H", + "KK": "%I", + "K": "%I", + "ZZZZZ": "%z", + "ZZZZ": None, + "ZZZ": "%z", + "ZZ": "%z", + "Z": "%z", + "xxxxx": "%z", + "xxxx": "%z", + "xxx": "%z", + "xx": "%z", + "x": "%z", + "XXXXX": "%z", + "XXXX": "%z", + "XXX": "%z", + "XX": "%z", + "X": "%z", + "zzzz": None, + "zzz": None, + "zz": None, + "z": None, + "vvvv": None, + "v": None, + "VVVV": None, + "VVV": None, + "VV": None, + "V": None, + "OOOO": None, + "O": None, +} + +_ERA_STRINGS: tuple[str, ...] = ( + "Anno Domini", + "Before Christ", + "Common Era", + "Before Common Era", + "A.D.", + "B.C.", + "C.E.", + "BCE", + "AD", + "BC", + "CE", +) + + +def _is_word_boundary(text: str, idx: int, *, is_start: bool) -> bool: + """Check whether a position is a word boundary.""" + if is_start: + return idx == 0 or not text[idx - 1].isalnum() + return idx >= len(text) or not text[idx].isalnum() + + +def _extract_era_strings_from_babel_locale(babel_locale: Any) -> list[str]: + """Extract localized era strings from one Babel locale.""" + localized_eras: list[str] = [] + if not hasattr(babel_locale, "eras") or not babel_locale.eras: + return localized_eras + + for width_key in ("wide", "abbreviated", "narrow"): + era_dict = babel_locale.eras.get(width_key, {}) + for era_idx in (0, 1): + era_text = era_dict.get(era_idx) + if era_text and era_text not in localized_eras: + localized_eras.append(era_text) + return localized_eras + + +@lru_cache(maxsize=64) +def _get_localized_era_strings(locale_code: str) -> tuple[str, ...]: + """Get cached localized era strings for one locale.""" + if not is_babel_available(): + return () + + locale_class = get_locale_class() + unknown_locale_error_class = get_unknown_locale_error_class() + + try: + babel_locale = locale_class.parse(locale_code) + return tuple(_extract_era_strings_from_babel_locale(babel_locale)) + except (unknown_locale_error_class, ValueError): + return () + + +def _strip_era(value: str, locale_code: str | None = None) -> str: + """Strip era designations from a date string.""" + era_strings: list[str] = list(_ERA_STRINGS) + + if locale_code is not None: + localized = _get_localized_era_strings(locale_code) + for era_text in localized: + if era_text not in era_strings: + era_strings.append(era_text) + + result = value + for era in era_strings: + upper_result = result.upper() + upper_era = era.upper() + idx = upper_result.find(upper_era) + if idx != -1: + end_idx = idx + len(era) + if _is_word_boundary(result, idx, is_start=True) and _is_word_boundary( + result, end_idx, is_start=False + ): + result = result[:idx] + result[end_idx:] + return " ".join(result.split()) + + +def _preprocess_datetime_input( + value: str, locale_code: str | None = None, *, has_era: bool +) -> str: + """Strip era text when a pattern requires era preprocessing.""" + if has_era: + return _strip_era(value, locale_code) + return value + + +def _tokenize_babel_pattern(pattern: str) -> list[str]: + """Tokenize a CLDR pattern into atomic tokens.""" + tokens: list[str] = [] + i = 0 + n = len(pattern) + + while i < n: + char = pattern[i] + + if char == "'": + if i + 1 < n and pattern[i + 1] == "'": + tokens.append("'") + i += 2 + continue + + i += 1 + literal_chars: list[str] = [] + + while i < n: + if pattern[i] == "'": + if i + 1 < n and pattern[i + 1] == "'": + literal_chars.append("'") + i += 2 + else: + i += 1 + break + else: + literal_chars.append(pattern[i]) + i += 1 + + if literal_chars: + tokens.append("".join(literal_chars)) + continue + + if char.isalpha(): + j = i + 1 + while j < n and pattern[j] == char: + j += 1 + tokens.append(pattern[i:j]) + i = j + continue + + tokens.append(char) + i += 1 + + return tokens + + +def _babel_to_strptime(babel_pattern: str) -> tuple[str, bool]: + """Convert one CLDR date/time pattern to a Python strptime pattern.""" + tokens = _tokenize_babel_pattern(babel_pattern) + result_parts: list[str] = [] + has_era = False + + for token in tokens: + if token in _BABEL_TOKEN_MAP: + mapped = _BABEL_TOKEN_MAP[token] + if mapped is None: + if token.startswith("G"): + has_era = True + if result_parts and result_parts[-1].strip() == "": + result_parts.pop() + else: + result_parts.append(mapped) + else: + result_parts.append(token) + + return ("".join(result_parts).strip(), has_era) + + +def clear_date_caches() -> None: + """Clear cached locale-specific date and datetime parsing patterns.""" + _get_date_patterns.cache_clear() + _get_datetime_patterns.cache_clear() + _get_localized_era_strings.cache_clear() diff --git a/src/ftllexengine/parsing/dates.py b/src/ftllexengine/parsing/dates.py index 8d8f0653..b9e8b87d 100644 --- a/src/ftllexengine/parsing/dates.py +++ b/src/ftllexengine/parsing/dates.py @@ -38,30 +38,68 @@ """ from datetime import date, datetime, timezone -from functools import lru_cache -from typing import Any - -from ftllexengine.constants import MAX_LOCALE_CACHE_SIZE -from ftllexengine.core.babel_compat import ( - get_locale_class, - get_unknown_locale_error_class, - is_babel_available, - require_babel, -) -from ftllexengine.core.locale_utils import normalize_locale +from importlib import import_module +from typing import TYPE_CHECKING, cast + from ftllexengine.diagnostics import ErrorCategory, FrozenErrorContext, FrozenFluentError from ftllexengine.diagnostics.templates import ErrorTemplate -__all__ = ["clear_date_caches", "parse_date", "parse_datetime"] +from .date_patterns import ( + _BABEL_TOKEN_MAP, + _extract_datetime_separator, + _extract_era_strings_from_babel_locale, + _get_date_patterns, + _get_datetime_patterns, + _get_localized_era_strings, + _is_word_boundary, + _preprocess_datetime_input, + _strip_era, + _tokenize_babel_pattern, + clear_date_caches, +) -# CLDR date format styles used for parsing. -# Both date and datetime use the same styles for consistency. -_DATE_PARSE_STYLES: tuple[str, ...] = ("short", "medium", "long", "full") -_DATETIME_PARSE_STYLES: tuple[str, ...] = ("short", "medium", "long", "full") +if TYPE_CHECKING: + from collections.abc import Callable + +__all__ = [ + "_BABEL_TOKEN_MAP", + "_babel_to_strptime", + "_extract_datetime_separator", + "_extract_era_strings_from_babel_locale", + "_get_date_patterns", + "_get_datetime_patterns", + "_get_localized_era_strings", + "_is_word_boundary", + "_preprocess_datetime_input", + "_strip_era", + "_tokenize_babel_pattern", + "clear_date_caches", + "parse_date", + "parse_datetime", +] + +_DATE_PATTERNS_MODULE = import_module("ftllexengine.parsing.date_patterns") +_PRIVATE_DATE_EXPORTS = ( + _BABEL_TOKEN_MAP, + _extract_datetime_separator, + _extract_era_strings_from_babel_locale, + _get_localized_era_strings, + _is_word_boundary, + _strip_era, + _tokenize_babel_pattern, +) -# Default separator between date and time components (fallback only). -# Used when locale-specific dateTimeFormat pattern extraction fails. -_DATETIME_SEPARATOR_FALLBACK: str = " " + +def _babel_to_strptime(babel_pattern: str) -> tuple[str, bool]: + """Convert one CLDR pattern using the patchable module-level token map.""" + module_vars = vars(_DATE_PATTERNS_MODULE) + original_map = cast("dict[str, str | None]", module_vars["_BABEL_TOKEN_MAP"]) + module_vars["_BABEL_TOKEN_MAP"] = _BABEL_TOKEN_MAP + try: + converter = cast("Callable[[str], tuple[str, bool]]", module_vars["_babel_to_strptime"]) + return converter(babel_pattern) + finally: + module_vars["_BABEL_TOKEN_MAP"] = original_map def parse_date( @@ -92,20 +130,20 @@ def parse_date( BabelImportError: If Babel is not installed Examples: - >>> result, errors = parse_date("2025-01-28", "en_US") # ISO 8601 - >>> result + >>> result, errors = parse_date("2025-01-28", "en_US") # ISO 8601 # doctest: +SKIP + >>> result # doctest: +SKIP datetime.date(2025, 1, 28) - >>> errors + >>> errors # doctest: +SKIP () - >>> result, errors = parse_date("1/28/25", "en_US") # US locale format - >>> result + >>> result, errors = parse_date("1/28/25", "en_US") # US locale format # doctest: +SKIP + >>> result # doctest: +SKIP datetime.date(2025, 1, 28) - >>> result, errors = parse_date("invalid", "en_US") - >>> result is None + >>> result, errors = parse_date("invalid", "en_US") # doctest: +SKIP + >>> result is None # doctest: +SKIP True - >>> len(errors) + >>> len(errors) # doctest: +SKIP 1 Thread Safety: @@ -205,20 +243,24 @@ def parse_datetime( BabelImportError: If Babel is not installed Examples: - >>> result, errors = parse_datetime("2025-01-28 14:30", "en_US") # ISO 8601 - >>> result + >>> result, errors = parse_datetime( # ISO 8601 # doctest: +SKIP + ... "2025-01-28 14:30", "en_US" + ... ) + >>> result # doctest: +SKIP datetime.datetime(2025, 1, 28, 14, 30) - >>> errors + >>> errors # doctest: +SKIP () - >>> result, errors = parse_datetime("1/28/25 2:30 PM", "en_US") # US locale - >>> result + >>> result, errors = parse_datetime( # US locale # doctest: +SKIP + ... "1/28/25 2:30 PM", "en_US" + ... ) + >>> result # doctest: +SKIP datetime.datetime(2025, 1, 28, 14, 30) - >>> result, errors = parse_datetime("invalid", "en_US") - >>> result is None + >>> result, errors = parse_datetime("invalid", "en_US") # doctest: +SKIP + >>> result is None # doctest: +SKIP True - >>> len(errors) + >>> len(errors) # doctest: +SKIP 1 Thread Safety: @@ -292,725 +334,3 @@ def parse_datetime( ) errors.append(error) return (None, tuple(errors)) - - -def _extract_cldr_patterns( - format_dict: Any, - styles: tuple[str, ...], -) -> list[tuple[str, bool]]: - """Extract strptime patterns from a Babel CLDR format dictionary. - - Iterates CLDR format styles, converting each Babel pattern to a - (strptime_pattern, has_era) pair via _babel_to_strptime. - - Args: - format_dict: Babel locale format dict (e.g., locale.date_formats). - styles: CLDR style names to try (e.g., ("short", "medium", "long", "full")). - - Returns: - List of (strptime_pattern, has_era) tuples for styles that succeeded. - """ - patterns: list[tuple[str, bool]] = [] - for style in styles: - try: - fmt = format_dict[style] - babel_pattern = ( - fmt.pattern if hasattr(fmt, "pattern") else str(fmt) - ) - strptime_pattern, has_era = _babel_to_strptime(babel_pattern) - patterns.append((strptime_pattern, has_era)) - # CLDR short patterns often use 2-digit years (yy -> %y). Real-world - # documents frequently write the year in 4-digit form even when the - # CLDR short style specifies 2 digits (e.g. lv-LV "dd.MM.yy" vs the - # common handwritten form "dd.MM.yyyy"). Generate a 4-digit variant so - # both "15.01.26" (%y) and "15.01.2026" (%Y) are accepted. The 2-digit - # variant is listed first so that an unambiguous 2-digit input matches - # its canonical CLDR interpretation (2000-based expansion via %y) rather - # than being mis-parsed as the year 0026 AD via %Y. - if "%y" in strptime_pattern: - patterns.append((strptime_pattern.replace("%y", "%Y"), has_era)) - except (AttributeError, KeyError): - pass - return patterns - - -@lru_cache(maxsize=MAX_LOCALE_CACHE_SIZE) -def _get_date_patterns(locale_code: str) -> tuple[tuple[str, bool], ...]: - """Get strptime date patterns for locale with era flag. - - Uses ONLY Babel CLDR date format patterns specific to the locale. - No fallback patterns to avoid ambiguous date interpretation. - - Results are cached per locale_code for performance. - - Args: - locale_code: BCP 47 locale identifier - - Returns: - Tuple of (strptime_pattern, has_era) pairs to try. - has_era is True if the pattern contains era tokens requiring preprocessing. - Empty tuple if locale parsing fails. - - Raises: - BabelImportError: If Babel is not installed - """ - require_babel("parse_date") - locale_class = get_locale_class() - unknown_locale_error_class = get_unknown_locale_error_class() - - try: - locale = locale_class.parse(normalize_locale(locale_code)) - return tuple( - _extract_cldr_patterns(locale.date_formats, _DATE_PARSE_STYLES), - ) - except (unknown_locale_error_class, ValueError, RuntimeError, AttributeError): - # AttributeError: locale.date_formats may raise if CLDR data is unavailable - return () - - -def _extract_datetime_separator(locale: Any, style: str = "medium") -> tuple[str, bool]: - """Extract the date-time separator and component order from locale's CLDR dateTimeFormat. - - CLDR dateTimeFormat patterns use {0} for time and {1} for date, e.g.: - - en_US: "{1}, {0}" -> separator is ", ", is_time_first=False - - ja_JP: "{1} {0}" -> separator is " ", is_time_first=False - - Some locales: "{0} {1}" -> separator is " ", is_time_first=True - - Args: - locale: Babel Locale object - style: Format style to extract from ("short" or "medium") - - Returns: - Tuple of (separator, is_time_first): - - separator: The string between date and time components - - is_time_first: True if locale uses time-before-date order (pattern "{0}...{1}") - Falls back to (' ', False) if extraction fails. - """ - try: - datetime_format = locale.datetime_formats.get(style) - if datetime_format is None: - return _DATETIME_SEPARATOR_FALLBACK, False - - # Get the pattern string - may be str or DateTimePattern object - pattern = str(datetime_format) - - # Pattern format: "{1}{0}" where {1}=date, {0}=time - # Find the text between {1} and {0} - date_placeholder = "{1}" - time_placeholder = "{0}" - - date_idx = pattern.find(date_placeholder) - time_idx = pattern.find(time_placeholder) - - if date_idx == -1 or time_idx == -1: - return _DATETIME_SEPARATOR_FALLBACK, False - - # Determine if time comes first: "{0}...{1}" means time first - is_time_first = time_idx < date_idx - - # Handle both "{1}{0}" and "{0}{1}" orderings - if date_idx < time_idx: - # Normal order: date first, then time - sep_start = date_idx + len(date_placeholder) - sep_end = time_idx - else: - # Reversed order: time first, then date - sep_start = time_idx + len(time_placeholder) - sep_end = date_idx - - if sep_start < sep_end: - return pattern[sep_start:sep_end], is_time_first - - return _DATETIME_SEPARATOR_FALLBACK, is_time_first - - except (AttributeError, TypeError, ValueError): - return _DATETIME_SEPARATOR_FALLBACK, False - - -@lru_cache(maxsize=MAX_LOCALE_CACHE_SIZE) -def _get_datetime_patterns(locale_code: str) -> tuple[tuple[str, bool], ...]: - """Get strptime datetime patterns for locale with era flag. - - Uses ONLY Babel CLDR datetime format patterns specific to the locale. - No fallback patterns to avoid ambiguous datetime interpretation. - - Results are cached per locale_code for performance. - - Args: - locale_code: BCP 47 locale identifier - - Returns: - Tuple of (strptime_pattern, has_era) pairs to try. - has_era is True if the pattern contains era tokens requiring preprocessing. - Empty tuple if locale parsing fails. - - Raises: - BabelImportError: If Babel is not installed - """ - require_babel("parse_datetime") - locale_class = get_locale_class() - unknown_locale_error_class = get_unknown_locale_error_class() - - try: - locale = locale_class.parse(normalize_locale(locale_code)) - - patterns = _extract_cldr_patterns( - locale.datetime_formats, _DATETIME_PARSE_STYLES, - ) - - # Get date patterns and add time components for locale - date_patterns = _get_date_patterns(locale_code) - - # Get locale-specific separator and component order from CLDR dateTimeFormat - sep, is_time_first = _extract_datetime_separator(locale) - - # Time format patterns (no era - era is carried by date pattern) - time_formats = [ - "%H:%M:%S", # 24-hour with seconds - "%H:%M", # 24-hour without seconds - "%I:%M:%S %p", # 12-hour with seconds - "%I:%M %p", # 12-hour without seconds - ] - - for date_pat, has_era in date_patterns: - for time_pat in time_formats: - # Respect locale's component order: date-first or time-first - if is_time_first: - combined = f"{time_pat}{sep}{date_pat}" - else: - combined = f"{date_pat}{sep}{time_pat}" - patterns.append((combined, has_era)) - - return tuple(patterns) - - except (unknown_locale_error_class, ValueError, RuntimeError, AttributeError): - # AttributeError: locale.datetime_formats may raise if CLDR data is unavailable - return () - - -# ============================================================================== -# TOKEN-BASED BABEL-TO-STRPTIME CONVERTER -# ============================================================================== -# ruff: noqa: ERA001 - Documentation table is not commented-out code -# -# ARCHITECTURAL OVERVIEW: -# -# The Unicode CLDR (Common Locale Data Repository) defines locale-specific date -# patterns using a standardized format. Babel provides access to CLDR data. -# Python's strptime uses a different directive syntax. This module bridges them. -# -# CLDR Pattern Syntax (subset relevant to parsing): -# Pattern | Meaning | Example -# --------|------------------------|-------- -# y/yy | 2-digit year | 25 -# yyyy | 4-digit year | 2025 -# M/MM | Month (numeric) | 1, 01 -# MMM | Month (short name) | Jan -# MMMM | Month (full name) | January -# d/dd | Day of month | 5, 05 -# E/EEE | Weekday (short) | Mon -# EEEE | Weekday (full) | Monday -# G | Era (AD/BC) | AD (no strptime equivalent) -# H/HH | Hour (0-23) | 14 -# h/hh | Hour (1-12) | 2 -# m/mm | Minute | 30 -# s/ss | Second | 45 -# a | AM/PM marker | PM -# S+ | Fractional seconds | 123 -# -# CONVERSION STRATEGY: -# 1. Tokenize: Split CLDR pattern into tokens (letters, literals, quotes) -# 2. Map: Convert each token using _BABEL_TOKEN_MAP -# 3. Handle special cases: -# - Era tokens (G): Mark pattern for preprocessing, strip era from input -# - Timezone names (z): Cannot be parsed by strptime, marked for skip -# - Stand-alone month/weekday (L/c): Map to format context equivalents -# -# QUOTE ESCAPING (CLDR): -# - Single quotes delimit literal text: 'at' -> "at" -# - Double single quotes escape: '' -> "'" -# - Example: "h 'o''clock' a" -> "2 o'clock PM" -# -# ERA HANDLING: -# Python's strptime has no era support. Patterns containing G tokens are -# marked with has_era=True. At parse time, _strip_era() removes era text -# from input before parsing. See _ERA_STRINGS for supported designations. -# -# KNOWN LIMITATIONS: -# - Fractional seconds: CLDR uses S/SS/SSS for 1-3 digits, strptime %f -# expects 6 digits (microseconds). Best-effort mapping is applied. -# - Timezone names: strptime cannot parse "PST" or "America/Los_Angeles". -# These tokens are marked for skip. -# - Hour 1-24 (k) and 0-11 (K): Mapped to closest strptime equivalent -# with potential off-by-one at midnight/noon boundaries. -# -# ============================================================================== - -# Token mapping: Babel CLDR pattern -> Python strptime directive -# None values indicate tokens that require preprocessing (e.g., era stripping) -_BABEL_TOKEN_MAP: dict[str, str | None] = { - # Year - "yyyy": "%Y", # 4-digit year - "yy": "%y", # 2-digit year - "y": "%Y", # Year (default to 4-digit) - # Month (format context) - "MMMM": "%B", # Full month name - "MMM": "%b", # Short month name - "MM": "%m", # 2-digit month - "M": "%m", # Month - # Month (stand-alone context) - used in some locales for headers/labels - "LLLL": "%B", # Full month name (stand-alone) - "LLL": "%b", # Short month name (stand-alone) - "LL": "%m", # 2-digit month (stand-alone) - "L": "%m", # Month (stand-alone) - # Day - "dd": "%d", # 2-digit day - "d": "%d", # Day - # Weekday (format context) - "EEEE": "%A", # Full weekday name - "EEE": "%a", # Short weekday name - "E": "%a", # Weekday - # Weekday (stand-alone context) - used in some locales for headers/labels - "cccc": "%A", # Full weekday name (stand-alone) - "ccc": "%a", # Short weekday name (stand-alone) - "cc": "%w", # Numeric weekday (stand-alone) - "c": "%w", # Numeric weekday (stand-alone) - # Era (AD/BC) - strptime doesn't support era - # Map to None to signal that era stripping is needed - # See _ERA_STRINGS and _strip_era() for runtime handling - "GGGG": None, # Full era name (Anno Domini) - "GGG": None, # Abbreviated era (AD) - "GG": None, # Abbreviated era (AD) - "G": None, # Era abbreviation (AD) - # Hour - "HH": "%H", # 2-digit hour (0-23) - "H": "%H", # Hour (0-23) - "hh": "%I", # 2-digit hour (1-12) - "h": "%I", # Hour (1-12) - # Minute - "mm": "%M", # 2-digit minute - "m": "%M", # Minute - # Second - "ss": "%S", # 2-digit second - "s": "%S", # Second - # Fractional seconds - # Python's %f expects 6 digits (microseconds); CLDR uses variable precision - # Map to %f and accept precision mismatch as best-effort - "SSSSSS": "%f", # Microseconds (6 digits) - "SSSSS": "%f", # 5 fractional digits - "SSSS": "%f", # 4 fractional digits - "SSS": "%f", # Milliseconds (3 digits) - "SS": "%f", # 2 fractional digits - "S": "%f", # 1 fractional digit - # AM/PM - "a": "%p", # AM/PM marker - # Hour (1-24 and 0-11 variants) - # Python doesn't have direct equivalents; map to closest - "kk": "%H", # Hour 1-24 -> 0-23 (off-by-one at midnight) - "k": "%H", # Hour 1-24 -> 0-23 - "KK": "%I", # Hour 0-11 -> 1-12 (off-by-one at noon) - "K": "%I", # Hour 0-11 -> 1-12 - # Timezone tokens - # Python strptime has limited timezone support; map what's possible - "ZZZZZ": "%z", # Extended offset (e.g., +01:00) -> +HHMM - "ZZZZ": None, # Localized GMT (e.g., GMT+01:00) - strptime cannot parse GMT prefix - "ZZZ": "%z", # RFC 822 offset (e.g., +0100) - "ZZ": "%z", # RFC 822 offset - "Z": "%z", # Basic offset - "xxxxx": "%z", # ISO 8601 extended (+01:00:00) - "xxxx": "%z", # ISO 8601 basic (+0100) - "xxx": "%z", # ISO 8601 extended (+01:00) - "xx": "%z", # ISO 8601 basic (+01) - "x": "%z", # ISO 8601 basic (+01) - "XXXXX": "%z", # ISO 8601 extended with Z - "XXXX": "%z", # ISO 8601 basic with Z - "XXX": "%z", # ISO 8601 extended with Z - "XX": "%z", # ISO 8601 basic with Z - "X": "%z", # ISO 8601 basic with Z - # Timezone names - strptime has limited support - # These often fail in strptime; map to None like era tokens - "zzzz": None, # Full timezone name (e.g., Pacific Standard Time) - "zzz": None, # Abbreviated timezone (e.g., PST) - "zz": None, # Abbreviated timezone - "z": None, # Abbreviated timezone - "vvvv": None, # Generic non-location timezone - "v": None, # Generic non-location timezone short - "VVVV": None, # Generic location timezone - "VVV": None, # City timezone - "VV": None, # Timezone ID (e.g., America/Los_Angeles) - "V": None, # Short timezone ID - "OOOO": None, # Localized GMT long - "O": None, # Localized GMT short -} - -# Era strings to strip from input when pattern contains era tokens -# Sorted by length descending to match longer strings first -# Covers common English and Latin era designations -# Localized era names are dynamically added from Babel when available -_ERA_STRINGS: tuple[str, ...] = ( - "Anno Domini", # GGGG full form - "Before Christ", # GGGG full form (BC) - "Common Era", # CE variant - "Before Common Era", # BCE variant - "A.D.", # With periods - "B.C.", # With periods - "C.E.", # Common Era with periods - "BCE", # Before Common Era - "AD", # Standard abbreviation - "BC", # Standard abbreviation - "CE", # Common Era -) - -# NOTE: Timezone name stripping is not implemented. -# English-only timezone stripping would be incomplete: -# - Only worked for English timezone names (PST, EST, etc.) -# - Failed for localized timezone names (French, Spanish, etc.) -# - Created inconsistent behavior across locales -# -# Timezone tokens mapped to None (silently skipped from pattern): -# - Timezone name tokens (z, zz, zzz, zzzz, v, V, O series) -# - Localized GMT format (ZZZZ) - produces "GMT-08:00" which strptime cannot parse -# - NOT stripped from input (users must pre-strip or use UTC offset patterns) -# -# Supported timezone patterns (mapped to strptime %z): -# - UTC offset patterns: Z, ZZ, ZZZ, ZZZZZ, x, xx, xxx, xxxx, xxxxx, X, XX, XXX, XXXX, XXXXX -# - These are locale-agnostic and parse offset formats like +0100, +01:00 -# -# Unsupported timezone patterns (input must be pre-stripped by caller): -# - Timezone name patterns: z, zz, zzz, zzzz, v, vvvv, V, VV, VVV, VVVV, O, OOOO -# - Localized GMT format: ZZZZ (produces "GMT-08:00" which strptime cannot parse) - - -def _is_word_boundary(text: str, idx: int, *, is_start: bool) -> bool: - """Check if position is at a word boundary. - - A word boundary occurs when the adjacent character is non-alphanumeric - or the position is at the start/end of the string. - - Args: - text: The text to check - idx: Position index - is_start: True to check start boundary, False for end boundary - - Returns: - True if position is at a word boundary - """ - if is_start: - return idx == 0 or not text[idx - 1].isalnum() - return idx >= len(text) or not text[idx].isalnum() - - -def _extract_era_strings_from_babel_locale(babel_locale: Any) -> list[str]: - """Extract era strings from a Babel Locale object. - - Helper for _get_localized_era_strings to reduce nesting. - - Args: - babel_locale: Babel Locale instance with eras attribute. - - Returns: - List of unique era strings from all width variants. - """ - localized_eras: list[str] = [] - if not hasattr(babel_locale, "eras") or not babel_locale.eras: - return localized_eras - - # Babel eras: dict with keys 'wide', 'abbreviated', 'narrow' - # Each key maps to dict {0: 'BCE string', 1: 'CE string'} - for width_key in ("wide", "abbreviated", "narrow"): - era_dict = babel_locale.eras.get(width_key, {}) - for era_idx in (0, 1): - era_text = era_dict.get(era_idx) - if era_text and era_text not in localized_eras: - localized_eras.append(era_text) - return localized_eras - - -@lru_cache(maxsize=64) -def _get_localized_era_strings(locale_code: str) -> tuple[str, ...]: - """Get localized era strings from Babel for a locale. - - Cached per locale to avoid repeated Locale object instantiation. - Returns empty tuple if Babel unavailable or locale has no era data. - - Args: - locale_code: Locale code (e.g., "ja_JP", "zh_Hans"). - - Returns: - Tuple of localized era strings from Babel CLDR data. - Empty tuple if Babel unavailable or locale invalid. - """ - if not is_babel_available(): - return () - - locale_class = get_locale_class() - unknown_locale_error_class = get_unknown_locale_error_class() - - try: - babel_locale = locale_class.parse(locale_code) - return tuple(_extract_era_strings_from_babel_locale(babel_locale)) - except (unknown_locale_error_class, ValueError): - return () - - -def _strip_era(value: str, locale_code: str | None = None) -> str: - """Strip era designations from date string. - - Used when pattern contains era tokens (G/GG/GGG/GGGG) since Python's - strptime doesn't support era parsing. - - Uses word boundary detection to avoid stripping partial matches - (e.g., "bad" should not match "AD", "cereal" should not match "CE"). - - Supports localized era strings via Babel when available. Falls back to - English/Latin era designations if Babel is unavailable or locale has no - era data. - - Args: - value: Date string potentially containing era text - locale_code: Optional locale code for localized era strings - - Returns: - Date string with era text removed and whitespace normalized - """ - # Build era strings list: English defaults + localized from cached Babel lookup - era_strings: list[str] = list(_ERA_STRINGS) - - if locale_code is not None: - # Cached lookup for localized era strings (avoids repeated Locale instantiation) - localized = _get_localized_era_strings(locale_code) - for era_text in localized: - if era_text not in era_strings: - era_strings.append(era_text) - - result = value - for era in era_strings: - # Case-insensitive search with word boundary validation - upper_result = result.upper() - upper_era = era.upper() - idx = upper_result.find(upper_era) - if idx != -1: - end_idx = idx + len(era) - # Only strip if both boundaries are word boundaries - if _is_word_boundary(result, idx, is_start=True) and _is_word_boundary( - result, end_idx, is_start=False - ): - result = result[:idx] + result[end_idx:] - # Normalize whitespace (collapse multiple spaces) - return " ".join(result.split()) - - -def _preprocess_datetime_input( - value: str, locale_code: str | None = None, *, has_era: bool -) -> str: - """Preprocess datetime input by stripping unsupported tokens. - - Currently only handles era tokens. Timezone name tokens (z, zz, zzz, zzzz, - v, V, O series) are stripped from the pattern but NOT from the input. - Users must pre-strip timezone text from input or use UTC offset patterns - (Z, x, X series) which are locale-agnostic. - - Args: - value: Date/datetime string to preprocess - has_era: True if pattern contained era tokens (G/GG/GGG/GGGG) - locale_code: Optional locale code for localized era stripping - - Returns: - Preprocessed string with era text removed (using localized era names - from Babel when available) - """ - if has_era: - return _strip_era(value, locale_code) - return value - - -def _tokenize_babel_pattern(pattern: str) -> list[str]: - """Tokenize Babel CLDR pattern into individual tokens. - - This correctly handles patterns like "d.MM.yyyy" where "d" is adjacent - to punctuation without word boundaries. - - CLDR quote escaping rules: - - Single quotes delimit literal text: 'at' produces "at" - - Two consecutive single quotes '' produce a literal single quote - - '' inside quoted text also produces a literal single quote - - Examples: - "h 'o''clock' a" -> ["h", " ", "o'clock", " ", "a"] - "yyyy-MM-dd" -> ["yyyy", "-", "MM", "-", "dd"] - "d.MM.yyyy" -> ["d", ".", "MM", ".", "yyyy"] - - Args: - pattern: Babel CLDR date pattern (e.g., "d.MM.yyyy") - - Returns: - List of tokens (e.g., ["d", ".", "MM", ".", "yyyy"]) - """ - tokens: list[str] = [] - i = 0 - n = len(pattern) - - while i < n: - char = pattern[i] - - # Check for quoted literal (single quotes in CLDR patterns) - if char == "'": - # Check for escaped quote '' (produces literal single quote) - if i + 1 < n and pattern[i + 1] == "'": - # '' outside quoted section -> literal single quote - tokens.append("'") - i += 2 - continue - - # Start of quoted literal section - i += 1 # Skip opening quote - literal_chars: list[str] = [] - - while i < n: - if pattern[i] == "'": - # Check for escaped quote '' inside quoted section - if i + 1 < n and pattern[i + 1] == "'": - # '' inside quoted section -> literal single quote - literal_chars.append("'") - i += 2 - else: - # Closing quote found - i += 1 - break - else: - literal_chars.append(pattern[i]) - i += 1 - - # Add collected literal as single token - if literal_chars: - tokens.append("".join(literal_chars)) - continue - - # Check for pattern letter sequences (a-zA-Z) - if char.isalpha(): - # Collect consecutive same letters (e.g., "yyyy", "MM", "dd") - j = i + 1 - while j < n and pattern[j] == char: - j += 1 - tokens.append(pattern[i:j]) - i = j - continue - - # Everything else is a literal (punctuation, spaces, etc.) - tokens.append(char) - i += 1 - - return tokens - - -def _babel_to_strptime(babel_pattern: str) -> tuple[str, bool]: - """Convert Babel CLDR pattern to Python strptime format. - - Fixes edge cases with word boundaries in patterns like "d.MM.yyyy". - - Babel uses Unicode CLDR date pattern syntax, Python uses strptime directives. - - Era tokens (G/GG/GGG/GGGG) require preprocessing to strip era text from input - before parsing. Timezone name tokens (z/v/V/O series) are stripped from the - pattern but NOT from input - users must pre-strip timezone names. - - Babel Patterns: - y, yy = 2-digit year - yyyy = 4-digit year - M, MM = month (1-12) - MMM = short month name (Jan, Feb) - MMMM = full month name (January, February) - d, dd = day of month - E, EEE = short weekday (Mon) - EEEE = full weekday (Monday) - H, HH = hour 0-23 - h, hh = hour 1-12 - m, mm = minute - s, ss = second - a = AM/PM - - Python strptime: - %y = 2-digit year - %Y = 4-digit year - %m = month (01-12) - %b = short month name - %B = full month name - %d = day of month - %a = short weekday - %A = full weekday - %H = hour 0-23 - %I = hour 1-12 - %M = minute - %S = second - %p = AM/PM - - Args: - babel_pattern: Babel CLDR date pattern - - Returns: - Tuple of (strptime_pattern, has_era): - - strptime_pattern: Python strptime pattern - - has_era: True if pattern contained era tokens (G/GG/GGG/GGGG) - """ - tokens = _tokenize_babel_pattern(babel_pattern) - result_parts: list[str] = [] - has_era = False - - for token in tokens: - # Check if token is a Babel pattern token - if token in _BABEL_TOKEN_MAP: - mapped = _BABEL_TOKEN_MAP[token] - if mapped is None: - # Token maps to None (era, timezone) - skip it - # Timezone tokens (z/v/V/O/ZZZZ) also map to None but are silently skipped - if token.startswith("G"): - has_era = True - # Don't add to result_parts - this token is skipped - # Adjacent separator cleanup: if previous token was whitespace/separator - # and current token is skipped, remove that separator - if result_parts and result_parts[-1].strip() == "": - result_parts.pop() - else: - result_parts.append(mapped) - else: - # Literal: pass through (punctuation, spaces, etc.) - result_parts.append(token) - - # Join and normalize leading/trailing whitespace from skipped tokens - # Example 1: "HH:mm zzzz" -> tokens ["HH", ":", "mm", " ", "zzzz"] - # Without normalization: "%H:%M " (trailing space causes strptime failure) - # With normalization: "%H:%M" (trailing space removed) - # - # Example 2: "zzzz HH:mm" -> tokens ["zzzz", " ", "HH", ":", "mm"] - # Previous: " %H:%M" (leading space from skipped token, adjacent separator removed) - # Now: "%H:%M" (leading space also stripped) - result = "".join(result_parts).strip() - return (result, has_era) - - -def clear_date_caches() -> None: - """Clear all date pattern caches. - - Clears cached CLDR date and datetime patterns from: - - _get_date_patterns() - locale-specific date format patterns - - _get_datetime_patterns() - locale-specific datetime format patterns - - _get_localized_era_strings() - locale-specific era designations - - Useful for: - - Memory reclamation in long-running applications - - Testing scenarios requiring fresh cache state - - After Babel/CLDR data updates - - Thread-safe via functools.cache internal locking. - - Note: - This function does NOT require Babel. It clears the caches - regardless of whether Babel is installed. - - Example: - >>> from ftllexengine.parsing.dates import clear_date_caches - >>> clear_date_caches() # Clears all cached date patterns - """ - _get_date_patterns.cache_clear() - _get_datetime_patterns.cache_clear() - _get_localized_era_strings.cache_clear() diff --git a/src/ftllexengine/parsing/fiscal.py,cover b/src/ftllexengine/parsing/fiscal.py,cover deleted file mode 100644 index c87ae066..00000000 --- a/src/ftllexengine/parsing/fiscal.py,cover +++ /dev/null @@ -1,615 +0,0 @@ -> """Fiscal calendar arithmetic for financial date calculations. - -> Provides types for fiscal calendar configuration and date arithmetic: -> - FiscalCalendar: Configuration for fiscal year boundaries -> - FiscalDelta: Immutable period delta (years, quarters, months, days) -> - FiscalPeriod: Immutable fiscal period (year, quarter, month) - -> Month-End Policy: -> When adding months to a month-end date (e.g., Jan 31 + 1 month), the target -> month may have fewer days. The month_end_policy parameter controls behavior: - -> - "preserve": Try to preserve the day-of-month; clamp if out of range. -> Jan 31 + 1 month -> Feb 28/29 (clamped to last day) -> This is the default and most common business rule. - -> - "clamp": Always clamp to last day if original was month-end. -> Jan 31 + 1 month -> Feb 28/29 (last day of Feb) -> Mar 15 + 1 month -> Apr 15 (day preserved, not month-end) -> Useful for month-end reporting (always lands on month-end). - -> - "strict": Raise ValueError if day would be out of range. -> Jan 31 + 1 month -> ValueError (no Feb 31) -> Useful for validation where inexact dates are errors. - -> No external dependencies. Thread-safe. Python 3.13+. -> """ - -> from __future__ import annotations - -> import calendar -> from dataclasses import dataclass -> from datetime import date, timedelta -> from enum import StrEnum -> from typing import TYPE_CHECKING, Self - -- if TYPE_CHECKING: -- pass - - # ruff: noqa: RUF022 - __all__ organized by category for readability -> __all__ = [ - # Enums -> "MonthEndPolicy", - # Data classes -> "FiscalCalendar", -> "FiscalDelta", -> "FiscalPeriod", - # Factory functions -> "fiscal_quarter", -> "fiscal_year_start", -> "fiscal_year_end", -> ] - - - # ============================================================================ - # ENUMS - # ============================================================================ - - -> class MonthEndPolicy(StrEnum): -> """Policy for handling month-end dates in date arithmetic. - -> Controls behavior when adding months to a date where the target month -> has fewer days than the source day-of-month. -> """ - -> PRESERVE = "preserve" -> """Try to preserve day-of-month; clamp to last day if out of range.""" - -> CLAMP = "clamp" -> """If original date was month-end, result is also month-end.""" - -> STRICT = "strict" -> """Raise ValueError if resulting day would be out of range.""" - - - # ============================================================================ - # FISCAL PERIOD - # ============================================================================ - - -> @dataclass(frozen=True, slots=True, order=True) -> class FiscalPeriod: -> """Immutable fiscal period identifier. - -> Represents a specific fiscal year, quarter, or month within a fiscal calendar. -> Ordering is by (fiscal_year, quarter, month). - -> Attributes: -> fiscal_year: The fiscal year number. -> quarter: Quarter within fiscal year (1-4). -> month: Month within fiscal year (1-12). -> """ - -> fiscal_year: int -> quarter: int -> month: int - -> def __post_init__(self) -> None: -> """Validate period values.""" -> if not 1 <= self.quarter <= 4: -> msg = f"Quarter must be 1-4, got {self.quarter}" -> raise ValueError(msg) -> if not 1 <= self.month <= 12: -> msg = f"Month must be 1-12, got {self.month}" -> raise ValueError(msg) - - - # ============================================================================ - # FISCAL CALENDAR - # ============================================================================ - - -> @dataclass(frozen=True, slots=True) -> class FiscalCalendar: -> """Configuration for a fiscal calendar. - -> Defines when the fiscal year starts. All fiscal period calculations -> are relative to this configuration. - -> Attributes: -> start_month: Calendar month when fiscal year begins (1-12). -> 1 = Calendar year (Jan-Dec fiscal year) -> 4 = UK/Japan government (Apr-Mar fiscal year) -> 7 = Australia/NZ (Jul-Jun fiscal year) -> 10 = US federal government (Oct-Sep fiscal year) - -> Thread-safe. Immutable. Hashable. -> """ - -> start_month: int = 1 - -> def __post_init__(self) -> None: -> """Validate start_month is 1-12.""" -> if not isinstance(self.start_month, int): -> msg = f"start_month must be int, got {type(self.start_month).__name__}" # type: ignore[unreachable] -> raise TypeError(msg) -> if not 1 <= self.start_month <= 12: -> msg = f"start_month must be 1-12, got {self.start_month}" -> raise ValueError(msg) - -> def fiscal_year(self, d: date) -> int: -> """Get the fiscal year containing a date. - -> Args: -> d: Calendar date. - -> Returns: -> Fiscal year number. For calendars starting in month > 1, -> the fiscal year is typically labeled by the ending calendar year. - -> Examples: -> >>> cal = FiscalCalendar(start_month=4) # Apr-Mar fiscal year -> >>> cal.fiscal_year(date(2024, 3, 15)) # Before fiscal year start -> 2024 -> >>> cal.fiscal_year(date(2024, 4, 1)) # First day of FY2025 -> 2025 -> """ -> if d.month >= self.start_month: - # Date is in first part of fiscal year -> return d.year + (1 if self.start_month > 1 else 0) - # Date is in second part of fiscal year (after calendar year boundary) -> return d.year - -> def fiscal_quarter(self, d: date) -> int: -> """Get the fiscal quarter (1-4) containing a date. - -> Args: -> d: Calendar date. - -> Returns: -> Quarter number (1-4) within the fiscal year. - -> Examples: -> >>> cal = FiscalCalendar(start_month=4) # Apr-Mar fiscal year -> >>> cal.fiscal_quarter(date(2024, 4, 15)) # Apr = Q1 -> 1 -> >>> cal.fiscal_quarter(date(2024, 7, 15)) # Jul = Q2 -> 2 -> """ -> fiscal_month = self.fiscal_month(d) -> return (fiscal_month - 1) // 3 + 1 - -> def fiscal_month(self, d: date) -> int: -> """Get the fiscal month (1-12) of a date. - -> Args: -> d: Calendar date. - -> Returns: -> Month number (1-12) within the fiscal year. -> Month 1 is the first month of the fiscal year. - -> Examples: -> >>> cal = FiscalCalendar(start_month=4) # Apr-Mar fiscal year -> >>> cal.fiscal_month(date(2024, 4, 15)) # Apr = Month 1 -> 1 -> >>> cal.fiscal_month(date(2024, 3, 15)) # Mar = Month 12 -> 12 -> """ - # Calculate months since fiscal year start -> month_offset = d.month - self.start_month -> if month_offset < 0: -> month_offset += 12 -> return month_offset + 1 - -> def fiscal_period(self, d: date) -> FiscalPeriod: -> """Get the full fiscal period for a date. - -> Args: -> d: Calendar date. - -> Returns: -> FiscalPeriod with fiscal year, quarter, and month. -> """ -> return FiscalPeriod( -> fiscal_year=self.fiscal_year(d), -> quarter=self.fiscal_quarter(d), -> month=self.fiscal_month(d), -> ) - -> def fiscal_year_start_date(self, fiscal_year: int) -> date: -> """Get the first day of a fiscal year. - -> Args: -> fiscal_year: The fiscal year number. - -> Returns: -> First date of the fiscal year. - -> Examples: -> >>> cal = FiscalCalendar(start_month=4) # Apr-Mar -> >>> cal.fiscal_year_start_date(2025) -> datetime.date(2024, 4, 1) -> """ -> if self.start_month == 1: -> return date(fiscal_year, 1, 1) - # Fiscal year labeled by end year, so start is in prior calendar year -> return date(fiscal_year - 1, self.start_month, 1) - -> def fiscal_year_end_date(self, fiscal_year: int) -> date: -> """Get the last day of a fiscal year. - -> Args: -> fiscal_year: The fiscal year number. - -> Returns: -> Last date of the fiscal year. - -> Examples: -> >>> cal = FiscalCalendar(start_month=4) # Apr-Mar -> >>> cal.fiscal_year_end_date(2025) -> datetime.date(2025, 3, 31) -> """ -> if self.start_month == 1: -> return date(fiscal_year, 12, 31) - # End month is the month before start_month -> end_month = self.start_month - 1 if self.start_month > 1 else 12 -> end_year = fiscal_year -> last_day = calendar.monthrange(end_year, end_month)[1] -> return date(end_year, end_month, last_day) - -> def quarter_start_date(self, fiscal_year: int, quarter: int) -> date: -> """Get the first day of a fiscal quarter. - -> Args: -> fiscal_year: The fiscal year number. -> quarter: Quarter number (1-4). - -> Returns: -> First date of the quarter. - -> Raises: -> ValueError: If quarter is not 1-4. -> """ -> if not 1 <= quarter <= 4: -> msg = f"Quarter must be 1-4, got {quarter}" -> raise ValueError(msg) - - # Calculate the calendar month for this quarter start -> fiscal_month = (quarter - 1) * 3 + 1 # Fiscal months 1, 4, 7, 10 -> return self._fiscal_month_to_date(fiscal_year, fiscal_month, day=1) - -> def quarter_end_date(self, fiscal_year: int, quarter: int) -> date: -> """Get the last day of a fiscal quarter. - -> Args: -> fiscal_year: The fiscal year number. -> quarter: Quarter number (1-4). - -> Returns: -> Last date of the quarter. - -> Raises: -> ValueError: If quarter is not 1-4. -> """ -> if not 1 <= quarter <= 4: -> msg = f"Quarter must be 1-4, got {quarter}" -> raise ValueError(msg) - - # Calculate the calendar month for this quarter end -> fiscal_month = quarter * 3 # Fiscal months 3, 6, 9, 12 -> cal_year, cal_month = self._fiscal_to_calendar_month(fiscal_year, fiscal_month) -> last_day = calendar.monthrange(cal_year, cal_month)[1] -> return date(cal_year, cal_month, last_day) - -> def _fiscal_to_calendar_month(self, fiscal_year: int, fiscal_month: int) -> tuple[int, int]: -> """Convert fiscal year and month to calendar year and month. - -> Args: -> fiscal_year: The fiscal year number. -> fiscal_month: Month within fiscal year (1-12). - -> Returns: -> Tuple of (calendar_year, calendar_month). -> """ - # Calculate calendar month -> cal_month = (self.start_month + fiscal_month - 2) % 12 + 1 - - # Calculate calendar year -> if self.start_month == 1: -> cal_year = fiscal_year -> elif cal_month >= self.start_month: - # In first part of fiscal year (same calendar year as start) -> cal_year = fiscal_year - 1 -> else: - # In second part of fiscal year (after calendar year boundary) -> cal_year = fiscal_year - -> return (cal_year, cal_month) - -> def _fiscal_month_to_date(self, fiscal_year: int, fiscal_month: int, day: int) -> date: -> """Convert fiscal year, month, and day to calendar date. - -> Args: -> fiscal_year: The fiscal year number. -> fiscal_month: Month within fiscal year (1-12). -> day: Day of month. - -> Returns: -> Calendar date. -> """ -> cal_year, cal_month = self._fiscal_to_calendar_month(fiscal_year, fiscal_month) -> return date(cal_year, cal_month, day) - - - # ============================================================================ - # FISCAL DELTA - # ============================================================================ - - -> @dataclass(frozen=True, slots=True) -> class FiscalDelta: -> """Immutable fiscal period delta for date arithmetic. - -> Represents a duration in fiscal terms: years, quarters, months, and days. -> Can be added to or subtracted from dates. - -> The month_end_policy controls how month-end dates are handled when -> the target month has fewer days. - -> Attributes: -> years: Number of years (positive or negative). -> quarters: Number of quarters (positive or negative). -> months: Number of months (positive or negative). -> days: Number of days (positive or negative). -> month_end_policy: How to handle month-end date edge cases. - -> Thread-safe. Immutable. Hashable. -> """ - -> years: int = 0 -> quarters: int = 0 -> months: int = 0 -> days: int = 0 -> month_end_policy: MonthEndPolicy = MonthEndPolicy.PRESERVE - -> def __post_init__(self) -> None: -> """Validate inputs are integers.""" -> for field in ("years", "quarters", "months", "days"): -> value = getattr(self, field) -> if not isinstance(value, int): -> msg = f"{field} must be int, got {type(value).__name__}" -> raise TypeError(msg) - -> def total_months(self) -> int: -> """Get total delta in months (years + quarters + months). - -> Does not include days (those are applied separately). - -> Returns: -> Total months from years, quarters, and months. -> """ -> return self.years * 12 + self.quarters * 3 + self.months - -> def add_to(self, d: date) -> date: -> """Add this delta to a date. - -> Applies month delta first (years + quarters + months), then days. - -> Args: -> d: Starting date. - -> Returns: -> Resulting date after adding delta. - -> Raises: -> ValueError: If month_end_policy is STRICT and day overflows. -> OverflowError: If resulting date is out of range. -> """ - # First apply months (years + quarters + months combined) -> total_months = self.total_months() -> result = _add_months(d, total_months, self.month_end_policy) - - # Then apply days -> if self.days != 0: -> result = result + timedelta(days=self.days) - -> return result - -> def subtract_from(self, d: date) -> date: -> """Subtract this delta from a date. - -> Equivalent to adding the negation of this delta. - -> Args: -> d: Starting date. - -> Returns: -> Resulting date after subtracting delta. - -> Raises: -> ValueError: If month_end_policy is STRICT and day overflows. -> OverflowError: If resulting date is out of range. -> """ -> return self.negate().add_to(d) - -> def negate(self) -> Self: -> """Return negation of this delta. - -> Returns: -> New FiscalDelta with all values negated. -> """ -> return type(self)( -> years=-self.years, -> quarters=-self.quarters, -> months=-self.months, -> days=-self.days, -> month_end_policy=self.month_end_policy, -> ) - -> def __add__(self, other: FiscalDelta) -> FiscalDelta: -> """Add two FiscalDeltas.""" -> if not isinstance(other, FiscalDelta): -> return NotImplemented -> return FiscalDelta( -> years=self.years + other.years, -> quarters=self.quarters + other.quarters, -> months=self.months + other.months, -> days=self.days + other.days, -> month_end_policy=self.month_end_policy, -> ) - -> def __sub__(self, other: FiscalDelta) -> FiscalDelta: -> """Subtract two FiscalDeltas.""" -> if not isinstance(other, FiscalDelta): -> return NotImplemented -> return FiscalDelta( -> years=self.years - other.years, -> quarters=self.quarters - other.quarters, -> months=self.months - other.months, -> days=self.days - other.days, -> month_end_policy=self.month_end_policy, -> ) - -> def __neg__(self) -> Self: -> """Negate this delta.""" -> return self.negate() - -> def __mul__(self, factor: int) -> FiscalDelta: -> """Multiply delta by an integer factor.""" -> if not isinstance(factor, int): -> return NotImplemented -> return FiscalDelta( -> years=self.years * factor, -> quarters=self.quarters * factor, -> months=self.months * factor, -> days=self.days * factor, -> month_end_policy=self.month_end_policy, -> ) - -> def __rmul__(self, factor: int) -> FiscalDelta: -> """Right multiply delta by an integer factor.""" -> return self.__mul__(factor) - - - # ============================================================================ - # HELPER FUNCTIONS - # ============================================================================ - - -> def _is_last_day_of_month(d: date) -> bool: -> """Check if a date is the last day of its month.""" -> return d.day == calendar.monthrange(d.year, d.month)[1] - - -> def _add_months(d: date, months: int, policy: MonthEndPolicy) -> date: -> """Add months to a date with month-end policy. - -> Args: -> d: Starting date. -> months: Number of months to add (can be negative). -> policy: How to handle month-end dates. - -> Returns: -> Resulting date. - -> Raises: -> ValueError: If policy is STRICT and day overflows. -> """ -> if months == 0: -> return d - - # Calculate target year and month -> total_months = d.year * 12 + d.month - 1 + months -> target_year = total_months // 12 -> target_month = total_months % 12 + 1 - - # Get max day of target month -> max_day = calendar.monthrange(target_year, target_month)[1] - - # Apply month-end policy -> match policy: -> case MonthEndPolicy.PRESERVE: - # Clamp day to max if needed -> target_day = min(d.day, max_day) - -> case MonthEndPolicy.CLAMP: - # If original was month-end, result is month-end -> target_day = max_day if _is_last_day_of_month(d) else min(d.day, max_day) - -> case MonthEndPolicy.STRICT: - # Raise if day would overflow -> if d.day > max_day: -> msg = ( -> f"Day {d.day} does not exist in {target_year}-{target_month:02d} " -> f"(max day: {max_day})" -> ) -> raise ValueError(msg) -> target_day = d.day - -> return date(target_year, target_month, target_day) - - - # ============================================================================ - # CONVENIENCE FACTORIES - # ============================================================================ - - -> def fiscal_quarter(d: date, start_month: int = 1) -> int: -> """Get fiscal quarter for a date with given fiscal year start. - -> Convenience function for one-off lookups without creating FiscalCalendar. - -> Args: -> d: Calendar date. -> start_month: Month when fiscal year begins (1-12). Default 1 (calendar year). - -> Returns: -> Quarter number (1-4) within fiscal year. - -> Examples: -> >>> fiscal_quarter(date(2024, 4, 15), start_month=4) # UK fiscal Q1 -> 1 -> >>> fiscal_quarter(date(2024, 7, 15), start_month=1) # Calendar Q3 -> 3 -> """ -> return FiscalCalendar(start_month=start_month).fiscal_quarter(d) - - -> def fiscal_year_start(fiscal_year: int, start_month: int = 1) -> date: -> """Get first day of a fiscal year. - -> Convenience function for one-off lookups without creating FiscalCalendar. - -> Args: -> fiscal_year: The fiscal year number. -> start_month: Month when fiscal year begins (1-12). Default 1 (calendar year). - -> Returns: -> First date of the fiscal year. - -> Examples: -> >>> fiscal_year_start(2025, start_month=4) # UK FY2025 starts Apr 2024 -> datetime.date(2024, 4, 1) -> """ -> return FiscalCalendar(start_month=start_month).fiscal_year_start_date(fiscal_year) - - -> def fiscal_year_end(fiscal_year: int, start_month: int = 1) -> date: -> """Get last day of a fiscal year. - -> Convenience function for one-off lookups without creating FiscalCalendar. - -> Args: -> fiscal_year: The fiscal year number. -> start_month: Month when fiscal year begins (1-12). Default 1 (calendar year). - -> Returns: -> Last date of the fiscal year. - -> Examples: -> >>> fiscal_year_end(2025, start_month=4) # UK FY2025 ends Mar 2025 -> datetime.date(2025, 3, 31) -> """ -> return FiscalCalendar(start_month=start_month).fiscal_year_end_date(fiscal_year) diff --git a/src/ftllexengine/parsing/guards.py b/src/ftllexengine/parsing/guards.py index d79bfbe8..d65f6ebe 100644 --- a/src/ftllexengine/parsing/guards.py +++ b/src/ftllexengine/parsing/guards.py @@ -17,9 +17,9 @@ `if not errors and is_valid_decimal(result)` to just `if is_valid_decimal(result)`. Example: - >>> from ftllexengine.parsing import parse_decimal, is_valid_decimal - >>> result, errors = parse_decimal("1,234.56", "en_US") - >>> if is_valid_decimal(result): + >>> from ftllexengine.parsing import parse_decimal, is_valid_decimal # doctest: +SKIP + >>> result, errors = parse_decimal("1,234.56", "en_US") # doctest: +SKIP + >>> if is_valid_decimal(result): # doctest: +SKIP ... # mypy knows result is finite Decimal ... amount = result.quantize(Decimal("0.01")) """ @@ -60,8 +60,8 @@ def is_valid_decimal(value: Decimal | None) -> TypeIs[Decimal]: True if value is a finite Decimal, False otherwise Example: - >>> result, errors = parse_decimal("1,234.56", "en_US") - >>> if is_valid_decimal(result): + >>> result, errors = parse_decimal("1,234.56", "en_US") # doctest: +SKIP + >>> if is_valid_decimal(result): # doctest: +SKIP ... # Type-safe: mypy knows result is finite Decimal ... total = result * Decimal("1.21") # Add VAT """ @@ -83,8 +83,8 @@ def is_valid_currency( True if value is (Decimal, str) with finite amount, False otherwise Example: - >>> result, errors = parse_currency("EUR1,234.56", "en_US") - >>> if is_valid_currency(result): + >>> result, errors = parse_currency("EUR1,234.56", "en_US") # doctest: +SKIP + >>> if is_valid_currency(result): # doctest: +SKIP ... # Type-safe: mypy knows result is tuple[Decimal, str] ... amount, currency = result ... total = amount * Decimal("1.21") @@ -104,8 +104,8 @@ def is_valid_date(value: date | None) -> TypeIs[date]: True if value is a date object, False otherwise Example: - >>> result, errors = parse_date("2025-01-28", "en_US") - >>> if is_valid_date(result): + >>> result, errors = parse_date("2025-01-28", "en_US") # doctest: +SKIP + >>> if is_valid_date(result): # doctest: +SKIP ... # Type-safe: mypy knows result is date ... year = result.year """ @@ -124,8 +124,8 @@ def is_valid_datetime(value: datetime | None) -> TypeIs[datetime]: True if value is a datetime object, False otherwise Example: - >>> result, errors = parse_datetime("2025-01-28 14:30", "en_US") - >>> if is_valid_datetime(result): + >>> result, errors = parse_datetime("2025-01-28 14:30", "en_US") # doctest: +SKIP + >>> if is_valid_datetime(result): # doctest: +SKIP ... # Type-safe: mypy knows result is datetime ... timestamp = result.timestamp() """ diff --git a/src/ftllexengine/parsing/numbers.py b/src/ftllexengine/parsing/numbers.py index b6810eb7..6752c4bc 100644 --- a/src/ftllexengine/parsing/numbers.py +++ b/src/ftllexengine/parsing/numbers.py @@ -103,26 +103,26 @@ def parse_decimal( BabelImportError: If Babel is not installed Examples: - >>> result, errors = parse_decimal("1,234.56", "en_US") - >>> result + >>> result, errors = parse_decimal("1,234.56", "en_US") # doctest: +SKIP + >>> result # doctest: +SKIP Decimal('1234.56') - >>> errors + >>> errors # doctest: +SKIP () - >>> result, errors = parse_decimal("1 234,56", "lv_LV") - >>> result + >>> result, errors = parse_decimal("1 234,56", "lv_LV") # doctest: +SKIP + >>> result # doctest: +SKIP Decimal('1234.56') - >>> result, errors = parse_decimal("invalid", "en_US") - >>> result + >>> result, errors = parse_decimal("invalid", "en_US") # doctest: +SKIP + >>> result # doctest: +SKIP None - >>> len(errors) + >>> len(errors) # doctest: +SKIP 1 Financial Use Cases: # VAT calculations (no float precision loss) - >>> amount, errors = parse_decimal("100,50", "lv_LV") - >>> if amount is not None: + >>> amount, errors = parse_decimal("100,50", "lv_LV") # doctest: +SKIP + >>> if amount is not None: # doctest: +SKIP ... vat = amount * Decimal("0.21") ... print(vat) 21.105 @@ -250,12 +250,12 @@ def parse_fluent_number( BabelImportError: If Babel is not installed Examples: - >>> result, errors = parse_fluent_number("1 234,50", "lv_LV") - >>> str(result) + >>> result, errors = parse_fluent_number("1 234,50", "lv_LV") # doctest: +SKIP + >>> str(result) # doctest: +SKIP '1 234,50' - >>> result.value + >>> result.value # doctest: +SKIP Decimal('1234.50') - >>> result.precision + >>> result.precision # doctest: +SKIP 2 """ require_babel("parse_fluent_number") diff --git a/src/ftllexengine/runtime/async_bundle.py b/src/ftllexengine/runtime/async_bundle.py index de2482d8..39ce6b3d 100644 --- a/src/ftllexengine/runtime/async_bundle.py +++ b/src/ftllexengine/runtime/async_bundle.py @@ -21,10 +21,10 @@ from collections.abc import Callable, Iterable, Mapping from types import TracebackType + from ftllexengine.core.semantic_types import LocaleCode from ftllexengine.core.value_types import FluentValue from ftllexengine.diagnostics import FrozenFluentError from ftllexengine.introspection import MessageIntrospection - from ftllexengine.localization.types import LocaleCode from ftllexengine.runtime.cache import CacheAuditLogEntry, CacheStats from ftllexengine.syntax.ast import Junk, Message, Term @@ -48,15 +48,15 @@ class AsyncFluentBundle: Supports the async context manager protocol: Examples: - >>> import asyncio - >>> async def example() -> None: + >>> import asyncio # doctest: +SKIP + >>> async def example() -> None: # doctest: +SKIP ... async with AsyncFluentBundle("en_US") as bundle: ... await bundle.add_resource("greeting = Hello, { $name }!") ... result, errors = await bundle.format_pattern( ... "greeting", {"name": "Alice"} ... ) ... assert errors == () - >>> asyncio.run(example()) + >>> asyncio.run(example()) # doctest: +SKIP """ __slots__ = ("_bundle",) @@ -240,7 +240,7 @@ async def add_resource_stream( SyntaxIntegrityError: In strict mode, if any Junk entries are parsed. Example: - >>> async with AsyncFluentBundle("en_US") as bundle: + >>> async with AsyncFluentBundle("en_US") as bundle: # doctest: +SKIP ... with open("locales/en/ui.ftl") as f: ... await bundle.add_resource_stream(f, source_path="locales/en/ui.ftl") """ diff --git a/src/ftllexengine/runtime/bundle.py b/src/ftllexengine/runtime/bundle.py index 008b7b82..c063dba4 100644 --- a/src/ftllexengine/runtime/bundle.py +++ b/src/ftllexengine/runtime/bundle.py @@ -6,77 +6,42 @@ from __future__ import annotations import logging -import time -from collections.abc import Callable, Iterable, Mapping -from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Literal, NoReturn, assert_never +from typing import TYPE_CHECKING from ftllexengine.constants import ( DEFAULT_MAX_EXPANSION_SIZE, - FALLBACK_INVALID, - FALLBACK_MISSING_MESSAGE, MAX_DEPTH, MAX_SOURCE_SIZE, ) from ftllexengine.core.depth_guard import depth_clamp from ftllexengine.core.locale_utils import get_system_locale, require_locale_code -from ftllexengine.diagnostics import ( - Diagnostic, - DiagnosticCode, - ErrorCategory, - ErrorTemplate, - FrozenFluentError, - ValidationResult, -) -from ftllexengine.integrity import ( - FormattingIntegrityError, - IntegrityContext, - SyntaxIntegrityError, -) -from ftllexengine.introspection import extract_variables, introspect_message +from ftllexengine.runtime.bundle_formatting import _BundleFormattingMixin +from ftllexengine.runtime.bundle_queries import _BundleQueryMixin +from ftllexengine.runtime.bundle_registration import _BundleRegistrationMixin from ftllexengine.runtime.cache import CacheAuditLogEntry, CacheStats, IntegrityCache from ftllexengine.runtime.function_bridge import FunctionRegistry from ftllexengine.runtime.functions import get_shared_registry from ftllexengine.runtime.locale_context import LocaleContext -from ftllexengine.runtime.resolver import FluentResolver from ftllexengine.runtime.rwlock import RWLock -from ftllexengine.syntax import Comment, Entry, Junk, Message, Resource, Term +from ftllexengine.syntax import Entry, Junk, Message, Resource, Term from ftllexengine.syntax.parser import FluentParserV1 from ftllexengine.validation import validate_resource as _validate_resource_impl if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Mapping + + from ftllexengine.core.semantic_types import LocaleCode from ftllexengine.core.value_types import FluentValue - from ftllexengine.introspection import MessageIntrospection - from ftllexengine.localization.types import LocaleCode + from ftllexengine.diagnostics import FrozenFluentError, ValidationResult from ftllexengine.runtime.cache_config import CacheConfig + from ftllexengine.runtime.resolver import FluentResolver __all__ = ["FluentBundle"] logger = logging.getLogger(__name__) -# Logging truncation limit for warning messages (surfaced to users, more context helpful). -_LOG_TRUNCATE_WARNING: int = 100 - - -@dataclass(slots=True) -class _PendingRegistration: - """Collected entries from a parsed resource, prior to bundle state mutation. - - Intermediate result of Phase 1 (collection) in the two-phase commit - protocol used by ``_register_resource``. Separating collection from - mutation makes strict-mode atomicity explicit: if strict validation - rejects the resource, no bundle state has been touched. - """ - - messages: dict[str, Message] = field(default_factory=dict) - terms: dict[str, Term] = field(default_factory=dict) - msg_deps: dict[str, frozenset[str]] = field(default_factory=dict) - term_deps: dict[str, frozenset[str]] = field(default_factory=dict) - junk: list[Junk] = field(default_factory=list) - overwrite_warnings: list[tuple[Literal["message", "term"], str]] = field(default_factory=list) - -class FluentBundle: +class FluentBundle(_BundleQueryMixin, _BundleFormattingMixin, _BundleRegistrationMixin): """Fluent message bundle for specific locale. Main public API for Fluent localization. Aligned with Mozilla python-fluent @@ -111,21 +76,21 @@ class FluentBundle: - max_nesting_depth: Maximum placeable nesting depth (default: 100) Examples: - >>> bundle = FluentBundle("lv_LV") - >>> bundle.add_resource(''' + >>> bundle = FluentBundle("lv_LV") # doctest: +SKIP + >>> bundle.add_resource(''' # doctest: +SKIP ... hello = Sveiki, pasaule! ... welcome = Laipni lūdzam, { $name }! ... ''') - >>> result, errors = bundle.format_pattern("hello") - >>> assert result == 'Sveiki, pasaule!' - >>> assert errors == () - >>> - >>> result, errors = bundle.format_pattern("welcome", {"name": "Jānis"}) - >>> assert result == 'Laipni lūdzam, Jānis!' - >>> assert errors == () - >>> - >>> # Custom security limits for stricter environments - >>> strict_bundle = FluentBundle("en_US", max_source_size=1_000_000) + >>> result, errors = bundle.format_pattern("hello") # doctest: +SKIP + >>> assert result == 'Sveiki, pasaule!' # doctest: +SKIP + >>> assert errors == () # doctest: +SKIP + + >>> result, errors = bundle.format_pattern("welcome", {"name": "Jānis"}) # doctest: +SKIP + >>> assert result == 'Laipni lūdzam, Jānis!' # doctest: +SKIP + >>> assert errors == () # doctest: +SKIP + + Custom security limits for stricter environments: + >>> strict_bundle = FluentBundle("en_US", max_source_size=1_000_000) # doctest: +SKIP """ __slots__ = ( @@ -197,25 +162,25 @@ def __init__( Write operations (add_resource, add_function) acquire exclusive access. Example: - >>> from ftllexengine.runtime.cache_config import CacheConfig - >>> - >>> # Using default registry (standard functions) - >>> bundle = FluentBundle("en") - >>> - >>> # Using custom registry with additional functions - >>> from ftllexengine.runtime.functions import create_default_registry - >>> registry = create_default_registry() - >>> registry.register(my_custom_func, ftl_name="CUSTOM") - >>> bundle = FluentBundle("en", functions=registry) - >>> - >>> # Stricter limits for untrusted input - >>> bundle = FluentBundle("en", max_source_size=100_000, max_nesting_depth=20) - >>> - >>> # Financial-grade: default strict=True with write-once cache - >>> bundle = FluentBundle("en", cache=CacheConfig(write_once=True)) - >>> - >>> # Audit-enabled cache for compliance - >>> bundle = FluentBundle("en", cache=CacheConfig(enable_audit=True)) + >>> from ftllexengine.runtime.cache_config import CacheConfig # doctest: +SKIP + + Using the default registry (standard functions): + >>> bundle = FluentBundle("en") # doctest: +SKIP + + Using a custom registry with additional functions: + >>> from ftllexengine.runtime.functions import create_default_registry # doctest: +SKIP + >>> registry = create_default_registry() # doctest: +SKIP + >>> registry.register(my_custom_func, ftl_name="CUSTOM") # doctest: +SKIP + >>> bundle = FluentBundle("en", functions=registry) # doctest: +SKIP + + Stricter limits for untrusted input: + >>> bundle = FluentBundle("en", max_source_size=100_000, max_nesting_depth=20) # doctest: +SKIP + + Financial-grade default: `strict=True` with a write-once cache: + >>> bundle = FluentBundle("en", cache=CacheConfig(write_once=True)) # doctest: +SKIP + + Audit-enabled cache for compliance: + >>> bundle = FluentBundle("en", cache=CacheConfig(enable_audit=True)) # doctest: +SKIP """ # Canonicalize at the boundary so every runtime-facing locale API uses # the same LocaleCode representation. @@ -304,8 +269,8 @@ def locale(self) -> LocaleCode: LocaleCode: Canonical lowercase POSIX locale code (e.g., "en_us", "lv_lv") Example: - >>> bundle = FluentBundle("lv_LV") - >>> bundle.locale + >>> bundle = FluentBundle("lv_LV") # doctest: +SKIP + >>> bundle.locale # doctest: +SKIP 'lv_lv' """ return self._locale @@ -318,8 +283,8 @@ def use_isolating(self) -> bool: bool: True if bidi isolation is enabled, False otherwise Example: - >>> bundle = FluentBundle("ar_EG", use_isolating=True) - >>> bundle.use_isolating + >>> bundle = FluentBundle("ar_EG", use_isolating=True) # doctest: +SKIP + >>> bundle.use_isolating # doctest: +SKIP True """ return self._use_isolating @@ -336,11 +301,11 @@ def strict(self) -> bool: bool: True if strict mode is enabled, False otherwise Example: - >>> bundle = FluentBundle("en", strict=True) - >>> bundle.strict + >>> bundle = FluentBundle("en", strict=True) # doctest: +SKIP + >>> bundle.strict # doctest: +SKIP True - >>> bundle_normal = FluentBundle("en") - >>> bundle_normal.strict + >>> bundle_normal = FluentBundle("en") # doctest: +SKIP + >>> bundle_normal.strict # doctest: +SKIP True """ return self._strict @@ -353,12 +318,12 @@ def cache_enabled(self) -> bool: bool: True if caching is enabled, False otherwise Example: - >>> from ftllexengine.runtime.cache_config import CacheConfig - >>> bundle = FluentBundle("en", cache=CacheConfig()) - >>> bundle.cache_enabled + >>> from ftllexengine.runtime.cache_config import CacheConfig # doctest: +SKIP + >>> bundle = FluentBundle("en", cache=CacheConfig()) # doctest: +SKIP + >>> bundle.cache_enabled # doctest: +SKIP True - >>> bundle_no_cache = FluentBundle("en") - >>> bundle_no_cache.cache_enabled + >>> bundle_no_cache = FluentBundle("en") # doctest: +SKIP + >>> bundle_no_cache.cache_enabled # doctest: +SKIP False """ return self._cache is not None @@ -371,12 +336,12 @@ def cache_config(self) -> CacheConfig | None: CacheConfig if caching is enabled, None if caching is disabled. Example: - >>> from ftllexengine.runtime.cache_config import CacheConfig - >>> bundle = FluentBundle("en", cache=CacheConfig(size=500)) - >>> bundle.cache_config.size + >>> from ftllexengine.runtime.cache_config import CacheConfig # doctest: +SKIP + >>> bundle = FluentBundle("en", cache=CacheConfig(size=500)) # doctest: +SKIP + >>> bundle.cache_config.size # doctest: +SKIP 500 - >>> bundle_no_cache = FluentBundle("en") - >>> bundle_no_cache.cache_config is None + >>> bundle_no_cache = FluentBundle("en") # doctest: +SKIP + >>> bundle_no_cache.cache_config is None # doctest: +SKIP True """ return self._cache_config @@ -404,8 +369,8 @@ def max_source_size(self) -> int: int: Maximum source size limit for add_resource() Example: - >>> bundle = FluentBundle("en", max_source_size=1_000_000) - >>> bundle.max_source_size + >>> bundle = FluentBundle("en", max_source_size=1_000_000) # doctest: +SKIP + >>> bundle.max_source_size # doctest: +SKIP 1000000 """ return self._max_source_size @@ -418,8 +383,8 @@ def max_nesting_depth(self) -> int: int: Maximum nesting depth limit for parser Example: - >>> bundle = FluentBundle("en", max_nesting_depth=50) - >>> bundle.max_nesting_depth + >>> bundle = FluentBundle("en", max_nesting_depth=50) # doctest: +SKIP + >>> bundle.max_nesting_depth # doctest: +SKIP 50 """ return self._max_nesting_depth @@ -444,9 +409,9 @@ def function_registry(self) -> FunctionRegistry: FunctionRegistry: The function registry for this bundle Example: - >>> bundle = FluentBundle("en") - >>> registry = bundle.function_registry - >>> "NUMBER" in registry + >>> bundle = FluentBundle("en") # doctest: +SKIP + >>> registry = bundle.function_registry # doctest: +SKIP + >>> "NUMBER" in registry # doctest: +SKIP True """ return self._function_registry @@ -486,8 +451,8 @@ def for_system_locale( RuntimeError: If system locale cannot be determined Example: - >>> bundle = FluentBundle.for_system_locale() - >>> bundle.locale # Returns canonical detected system locale + >>> bundle = FluentBundle.for_system_locale() # doctest: +SKIP + >>> bundle.locale # Returns canonical detected system locale # doctest: +SKIP 'en_us' """ # Delegate to unified locale detection (raises RuntimeError on failure) @@ -511,8 +476,8 @@ def __repr__(self) -> str: String representation showing locale and loaded messages count Example: - >>> bundle = FluentBundle("lv_LV") - >>> repr(bundle) + >>> bundle = FluentBundle("lv_LV") # doctest: +SKIP + >>> repr(bundle) # doctest: +SKIP "FluentBundle(locale='lv_lv', messages=0, terms=0)" """ with self._rwlock.read(): @@ -535,11 +500,11 @@ def get_babel_locale(self) -> str: str: Babel locale identifier (e.g., "en_US", "lv_LV", "ar_EG") Example: - >>> bundle = FluentBundle("lv") - >>> bundle.get_babel_locale() + >>> bundle = FluentBundle("lv") # doctest: +SKIP + >>> bundle.get_babel_locale() # doctest: +SKIP 'lv' - >>> bundle_us = FluentBundle("en-US") - >>> bundle_us.get_babel_locale() + >>> bundle_us = FluentBundle("en-US") # doctest: +SKIP + >>> bundle_us.get_babel_locale() # doctest: +SKIP 'en_US' Note: @@ -642,8 +607,8 @@ def add_resource_stream( SyntaxIntegrityError: In strict mode, if any Junk entries are parsed. Example: - >>> bundle = FluentBundle("en") - >>> with open("locales/en/ui.ftl") as f: + >>> bundle = FluentBundle("en") # doctest: +SKIP + >>> with open("locales/en/ui.ftl") as f: # doctest: +SKIP ... bundle.add_resource_stream(f, source_path="locales/en/ui.ftl") """ # Collect parsed entries outside lock (stateless parse, immutable input) @@ -653,159 +618,6 @@ def add_resource_stream( with self._rwlock.write(): return self._register_resource(resource, source_path) - def _collect_pending_entries( - self, resource: Resource - ) -> _PendingRegistration: - """Phase 1: Collect entries from a parsed resource without mutating state. - - Iterates over all resource entries, partitioning them into messages, - terms, and junk. Detects overwrites against both existing bundle state - and entries already collected in this batch. - - Args: - resource: Parsed FTL resource - - Returns: - Collected entries ready for Phase 2 (commit). - """ - from ftllexengine.analysis.graph import entry_dependency_set # noqa: PLC0415 - circular - from ftllexengine.introspection import extract_references # noqa: PLC0415 - circular - - pending = _PendingRegistration() - - for entry in resource.entries: - match entry: - case Message(): - msg_id = entry.id.name - if msg_id in self._messages or msg_id in pending.messages: - pending.overwrite_warnings.append(("message", msg_id)) - pending.messages[msg_id] = entry - pending.msg_deps[msg_id] = entry_dependency_set( - *extract_references(entry) - ) - case Term(): - term_id = entry.id.name - if term_id in self._terms or term_id in pending.terms: - pending.overwrite_warnings.append(("term", term_id)) - pending.terms[term_id] = entry - pending.term_deps[term_id] = entry_dependency_set( - *extract_references(entry) - ) - case Junk(): - pending.junk.append(entry) - case Comment(): - pass # Comments carry no runtime state; silently skip. - case _: # pragma: no cover - Entry union is closed (Message|Term|Comment|Junk) - assert_never(entry) - - return pending - - def _register_resource( - self, resource: Resource, source_path: str | None - ) -> tuple[Junk, ...]: - """Register parsed resource entries via two-phase commit. - - Phase 1 (collection) delegates to ``_collect_pending_entries``. - Phase 2 (commit) applies mutations only after strict-mode validation - passes, ensuring atomicity: a resource with syntax errors never - partially populates the bundle. - - Assumes caller holds write lock. - - Args: - resource: Parsed FTL resource - source_path: Optional path for logging - - Returns: - Tuple of Junk entries from resource - """ - # Phase 1: Collect without mutation - pending = self._collect_pending_entries(resource) - junk_tuple = tuple(pending.junk) - - # Strict mode: fail fast on syntax errors BEFORE any state mutation - if self._strict and junk_tuple: - source_desc = source_path or "" - error_summary = "; ".join( - repr(j.content[:50]) for j in junk_tuple[:3] - ) - if len(junk_tuple) > 3: - error_summary += f" (and {len(junk_tuple) - 3} more)" - - context = IntegrityContext( - component="bundle", - operation="add_resource", - key=source_desc, - expected="", - actual=f"<{len(junk_tuple)} syntax error(s)>", - timestamp=time.monotonic(), - wall_time_unix=time.time(), - ) - - error_msg = ( - f"Strict mode: {len(junk_tuple)} syntax error(s) in " - f"{source_desc}: {error_summary}" - ) - raise SyntaxIntegrityError( - error_msg, - context=context, - junk_entries=junk_tuple, - source_path=source_path, - ) - - # Phase 2: Commit — apply mutations - for entry_type, entry_id in pending.overwrite_warnings: - if entry_type == "message": - logger.warning( - "Overwriting existing message '%s' with new definition", - entry_id, - ) - else: - logger.warning( - "Overwriting existing term '-%s' with new definition", - entry_id, - ) - - self._messages.update(pending.messages) - self._terms.update(pending.terms) - self._msg_deps.update(pending.msg_deps) - self._term_deps.update(pending.term_deps) - - for msg_id in pending.messages: - logger.debug("Registered message: %s", msg_id) - for term_id in pending.terms: - logger.debug("Registered term: %s", term_id) - - source_desc = source_path or "" - for junk in pending.junk: - logger.warning( - "Syntax error in %s: %s", - source_desc, - repr(junk.content[:_LOG_TRUNCATE_WARNING]), - ) - - if source_path: - logger.info( - "Added resource %s: %d messages, %d terms, %d junk entries", - source_path, - len(self._messages), - len(self._terms), - len(pending.junk), - ) - else: - logger.info( - "Added resource: %d messages, %d terms, %d junk entries", - len(self._messages), - len(self._terms), - len(pending.junk), - ) - - if self._cache is not None: - self._cache.clear() - logger.debug("Cache cleared after add_resource") - - return junk_tuple - def validate_resource(self, source: str) -> ValidationResult: """Validate FTL resource without adding to bundle. @@ -827,12 +639,12 @@ def validate_resource(self, source: str) -> ValidationResult: TypeError: If source is not a string (e.g., bytes were passed). Example: - >>> bundle = FluentBundle("lv") - >>> result = bundle.validate_resource(ftl_source) - >>> if not result.is_valid: + >>> bundle = FluentBundle("lv") # doctest: +SKIP + >>> result = bundle.validate_resource(ftl_source) # doctest: +SKIP + >>> if not result.is_valid: # doctest: +SKIP ... for error in result.errors: ... print(f"Error [{error.code}]: {error.message}") - >>> if result.warning_count > 0: + >>> if result.warning_count > 0: # doctest: +SKIP ... for warning in result.warnings: ... print(f"Warning [{warning.code}]: {warning.message}") @@ -904,446 +716,31 @@ def format_pattern( This matches the Fluent specification and Mozilla reference implementation. Examples: - >>> # Successful formatting - >>> result, errors = bundle.format_pattern("hello") - >>> assert result == 'Sveiki, pasaule!' - >>> assert errors == () - - >>> # Missing variable - returns fallback and error (non-strict mode) - >>> bundle.add_resource('msg = Hello { $name }!') - >>> result, errors = bundle.format_pattern("msg", {}) - >>> assert result == 'Hello {$name}!' # Readable fallback - >>> assert len(errors) == 1 - >>> assert errors[0].category == ErrorCategory.REFERENCE - - >>> # Attribute access - >>> result, errors = bundle.format_pattern("button-save", attribute="tooltip") - >>> assert result == 'Saglabā pašreizējo ierakstu datubāzē' - >>> assert errors == () - - >>> # Default strict=True - raises on errors (no missing $name) - >>> bundle_strict = FluentBundle("en") - >>> bundle_strict.add_resource('msg = Hello { $name }!') - >>> bundle_strict.format_pattern("msg", {}) # Raises FormattingIntegrityError + Successful formatting: + >>> result, errors = bundle.format_pattern("hello") # doctest: +SKIP + >>> assert result == 'Sveiki, pasaule!' # doctest: +SKIP + >>> assert errors == () # doctest: +SKIP + + Missing variable returns a fallback plus an error in non-strict mode: + >>> bundle.add_resource('msg = Hello { $name }!') # doctest: +SKIP + >>> result, errors = bundle.format_pattern("msg", {}) # doctest: +SKIP + >>> assert result == 'Hello {$name}!' # Readable fallback # doctest: +SKIP + >>> assert len(errors) == 1 # doctest: +SKIP + >>> assert errors[0].category == ErrorCategory.REFERENCE # doctest: +SKIP + + Attribute access: + >>> result, errors = bundle.format_pattern("button-save", attribute="tooltip") # doctest: +SKIP + >>> assert result == 'Saglabā pašreizējo ierakstu datubāzē' # doctest: +SKIP + >>> assert errors == () # doctest: +SKIP + + Default `strict=True` raises on errors, including missing `$name`: + >>> bundle_strict = FluentBundle("en") # doctest: +SKIP + >>> bundle_strict.add_resource('msg = Hello { $name }!') # doctest: +SKIP + >>> bundle_strict.format_pattern("msg", {}) # Raises FormattingIntegrityError # doctest: +SKIP """ with self._rwlock.read(): return self._format_pattern_impl(message_id, args, attribute) - def _raise_strict_error( - self, - message_id: str, - fallback_value: str, - errors: tuple[FrozenFluentError, ...], - ) -> NoReturn: - """Raise FormattingIntegrityError for strict mode (internal helper). - - Args: - message_id: The message ID that failed to format - fallback_value: The fallback value that would be returned in non-strict mode - errors: Tuple of FrozenFluentError instances - - Raises: - FormattingIntegrityError: Always raised with error details - """ - error_summary = "; ".join(str(e) for e in errors[:3]) - if len(errors) > 3: - error_summary += f" (and {len(errors) - 3} more)" - - context = IntegrityContext( - component="bundle", - operation="format_pattern", - key=message_id, - expected="", - actual=f"<{len(errors)} error(s)>", - timestamp=time.monotonic(), - wall_time_unix=time.time(), - ) - - msg = ( - f"Strict mode: formatting '{message_id}' produced {len(errors)} error(s): " - f"{error_summary}" - ) - raise FormattingIntegrityError( - msg, - context=context, - fluent_errors=errors, - fallback_value=fallback_value, - message_id=message_id, - ) - - def _create_resolver(self) -> FluentResolver: - """Create a new FluentResolver from current bundle state. - - Called once at initialization and again whenever the function_registry - changes (add_function). The resolver holds references to self._messages - and self._terms (not copies), so add_resource() mutations are immediately - visible without re-creation. - """ - return FluentResolver( - locale=self._locale, - messages=self._messages, - terms=self._terms, - function_registry=self._function_registry, - use_isolating=self._use_isolating, - max_nesting_depth=self._max_nesting_depth, - max_expansion_size=self._max_expansion_size, - ) - - def _format_pattern_impl( - self, - message_id: str, - args: Mapping[str, FluentValue] | None, - attribute: str | None, - ) -> tuple[str, tuple[FrozenFluentError, ...]]: - """Internal implementation of format_pattern (no locking).""" - # Validate message_id is non-empty string BEFORE cache lookup. - # Invalid inputs must be rejected immediately; caching invalid-ID results - # would waste entries and could produce misleading cache hits. - if not message_id or not isinstance(message_id, str): - logger.warning("Invalid message ID: empty or non-string") - diagnostic = Diagnostic( - code=DiagnosticCode.MESSAGE_NOT_FOUND, - message="Invalid message ID: empty or non-string", - ) - error = FrozenFluentError( - str(diagnostic), ErrorCategory.REFERENCE, diagnostic=diagnostic - ) - if self._strict: - self._raise_strict_error("", FALLBACK_INVALID, (error,)) - return (FALLBACK_INVALID, (error,)) - - # Validate args is None or a Mapping (defensive check for callers ignoring type hints) - if args is not None and not isinstance(args, Mapping): - logger.warning( # type: ignore[unreachable] - "Invalid args type: expected Mapping or None, got %s", type(args).__name__ - ) - diagnostic = Diagnostic( - code=DiagnosticCode.INVALID_ARGUMENT, - message=f"Invalid args type: expected Mapping or None, got {type(args).__name__}", - ) - error = FrozenFluentError( - str(diagnostic), ErrorCategory.RESOLUTION, diagnostic=diagnostic - ) - # Strict mode: raise instead of returning fallback - if self._strict: - self._raise_strict_error(message_id, FALLBACK_INVALID, (error,)) - return (FALLBACK_INVALID, (error,)) - - # Validate attribute is None or a string - if attribute is not None and not isinstance(attribute, str): - logger.warning( # type: ignore[unreachable] - "Invalid attribute type: expected str or None, got %s", type(attribute).__name__ - ) - diagnostic = Diagnostic( - code=DiagnosticCode.INVALID_ARGUMENT, - message=f"Invalid attribute type: expected str or None, got {type(attribute).__name__}", - ) - error = FrozenFluentError( - str(diagnostic), ErrorCategory.RESOLUTION, diagnostic=diagnostic - ) - # Strict mode: raise instead of returning fallback - if self._strict: - self._raise_strict_error(message_id, FALLBACK_INVALID, (error,)) - return (FALLBACK_INVALID, (error,)) - - # Check cache after input validation (validated inputs are safe to use as key). - # Placing cache lookup here — after validation, before the message-exists check — - # ensures invalid inputs are never cached and avoids wasting a cache round-trip - # on inputs that would be rejected anyway. - if self._cache is not None: - cached_entry = self._cache.get( - message_id, args, attribute, self._locale, - use_isolating=self._use_isolating, - ) - if cached_entry is not None: - result, errors_tuple = cached_entry.as_result() - if errors_tuple and self._strict: - self._raise_strict_error(message_id, result, errors_tuple) - return (result, errors_tuple) - - # Check if message exists - if message_id not in self._messages: - # strict=True: missing message is unexpected — WARNING for ops visibility. - # strict=False: caller opted into soft-error return semantics; missing messages - # are a legitimate return path, not an anomaly. Use DEBUG to avoid log noise. - (logger.warning if self._strict else logger.debug)( - "Message '%s' not found", message_id - ) - diag = ErrorTemplate.message_not_found(message_id) - error = FrozenFluentError(str(diag), ErrorCategory.REFERENCE, diagnostic=diag) - # Don't cache missing message errors - fallback = FALLBACK_MISSING_MESSAGE.format(id=message_id) - # Strict mode: raise instead of returning fallback - if self._strict: - self._raise_strict_error(message_id, fallback, (error,)) - return (fallback, (error,)) - - message = self._messages[message_id] - - # The resolver is stateless: all per-call state lives in ResolutionContext. - # It holds references to self._messages and self._terms dicts directly, - # so mutations from add_resource are visible without re-creation. The - # resolver is only re-created when function_registry changes (add_function). - resolver = self._resolver - - # Resolve message (resolver handles all errors internally including cycles) - # Note: No try-except here. The resolver is designed to collect all expected - # errors (missing references, type errors, etc.) and return them in the tuple. - # If a raw KeyError/AttributeError/RuntimeError escapes the resolver, that - # indicates a bug in the resolver implementation that should be exposed, - # not swallowed. This follows the principle of failing fast on internal bugs. - result, errors_tuple = resolver.resolve_message(message, args, attribute) - - if errors_tuple: - # strict=True: errors are unexpected — use WARNING so ops alerts fire. - # strict=False: errors are the explicit return-value API; caller receives - # them in the tuple and handles them. WARNING would fire on every expected - # soft-error call, polluting logs. Use DEBUG instead. - log_fn = logger.warning if self._strict else logger.debug - log_fn( - "Message resolution errors for '%s': %d error(s)", message_id, len(errors_tuple) - ) - for err in errors_tuple: - logger.debug(" - %s: %s", type(err).__name__, err) - else: - logger.debug("Resolved message '%s' successfully", message_id) - - # Cache resolution result (including errors) BEFORE strict mode check. - # This ensures repeated calls for the same erroneous message in strict mode - # hit the cache instead of triggering expensive re-resolution each time. - if self._cache is not None: - self._cache.put( - message_id, args, attribute, self._locale, - use_isolating=self._use_isolating, formatted=result, errors=errors_tuple, - ) - - # Strict mode: raise after caching so subsequent calls can use cached result - if errors_tuple and self._strict: - self._raise_strict_error(message_id, result, errors_tuple) - - return (result, errors_tuple) - - def has_message(self, message_id: str) -> bool: - """Check if message exists. - - Args: - message_id: Message identifier - - Returns: - True if message exists in bundle - """ - with self._rwlock.read(): - return message_id in self._messages - - def has_attribute(self, message_id: str, attribute: str) -> bool: - """Check if message has specific attribute. - - Args: - message_id: Message identifier - attribute: Attribute name - - Returns: - True if message exists AND has the specified attribute - - Note: - This method checks if any attribute with the given name exists. - If duplicate attribute names exist (validation warning), this returns - True without indicating which definition will be used. See format_pattern - for resolution semantics (last-wins for duplicates). - - Example: - >>> bundle.add_resource(''' - ... button = Click - ... .tooltip = Click to save - ... ''') - >>> bundle.has_message("button") - True - >>> bundle.has_attribute("button", "tooltip") - True - >>> bundle.has_attribute("button", "missing") - False - >>> bundle.has_attribute("nonexistent", "tooltip") - False - """ - with self._rwlock.read(): - if message_id not in self._messages: - return False - message = self._messages[message_id] - return any(attr.id.name == attribute for attr in message.attributes) - - def get_message_ids(self) -> list[str]: - """Get all message IDs in bundle. - - Returns: - List of message identifiers - """ - with self._rwlock.read(): - return list(self._messages.keys()) - - def get_message_variables(self, message_id: str) -> frozenset[str]: - """Get all variables required by a message (introspection API). - - This is a value-add feature not present in Mozilla's python-fluent. - Enables FTL file validation in CI/CD pipelines. - - Args: - message_id: Message identifier - - Returns: - Frozen set of variable names (without $ prefix) - - Raises: - KeyError: If message doesn't exist - - Example: - >>> bundle.add_resource("greeting = Hello, { $name }!") - >>> vars = bundle.get_message_variables("greeting") - >>> assert "name" in vars - """ - with self._rwlock.read(): - if message_id not in self._messages: - msg = f"Message '{message_id}' not found" - raise KeyError(msg) - - return extract_variables(self._messages[message_id]) - - def get_all_message_variables(self) -> dict[str, frozenset[str]]: - """Get variables for all messages in bundle (batch introspection API). - - Convenience method for extracting variables from all messages at once. - Useful for CI/CD validation pipelines that need to analyze entire - FTL resources in a single operation. - - This is equivalent to calling get_message_variables() for each message - ID, but provides a cleaner API for batch operations. - - Returns: - Dictionary mapping message IDs to their required variable sets. - Empty dict if bundle has no messages. - - Example: - >>> bundle.add_resource(''' - ... greeting = Hello, { $name }! - ... farewell = Goodbye, { $firstName } { $lastName }! - ... simple = No variables here - ... ''') - >>> all_vars = bundle.get_all_message_variables() - >>> assert all_vars["greeting"] == frozenset({"name"}) - >>> assert all_vars["farewell"] == frozenset({"firstName", "lastName"}) - >>> assert all_vars["simple"] == frozenset() - - See Also: - - get_message_variables(): Get variables for single message - - introspect_message(): Get complete metadata (variables + functions + references) - - Note: - Acquires a single read lock for atomic snapshot of all message variables. - """ - with self._rwlock.read(): - return { - message_id: extract_variables(message) - for message_id, message in self._messages.items() - } - - def introspect_message(self, message_id: str) -> MessageIntrospection: - """Get complete introspection data for a message. - - Returns comprehensive metadata about variables, functions, and references - used in the message. Uses Python 3.13's TypeIs for type-safe results. - - Args: - message_id: Message identifier - - Returns: - MessageIntrospection with complete metadata - - Raises: - KeyError: If message doesn't exist - - Example: - >>> bundle.add_resource("price = { NUMBER($amount, minimumFractionDigits: 2) }") - >>> info = bundle.introspect_message("price") - >>> assert "amount" in info.get_variable_names() - >>> assert "NUMBER" in info.get_function_names() - """ - with self._rwlock.read(): - if message_id not in self._messages: - msg = f"Message '{message_id}' not found" - raise KeyError(msg) - - return introspect_message(self._messages[message_id]) - - def introspect_term(self, term_id: str) -> MessageIntrospection: - """Get complete introspection data for a term. - - Returns comprehensive metadata about variables, functions, and references - used in the term. Mirrors introspect_message() for API symmetry. - - Args: - term_id: Term identifier (without leading dash) - - Returns: - MessageIntrospection with complete metadata - - Raises: - KeyError: If term doesn't exist - - Example: - >>> bundle.add_resource("-brand = { $case -> \\n [nominative] Firefox\\n *[other] Firefox\\n}") - >>> info = bundle.introspect_term("brand") - >>> assert "case" in info.get_variable_names() - """ - with self._rwlock.read(): - if term_id not in self._terms: - msg = f"Term '{term_id}' not found" - raise KeyError(msg) - - return introspect_message(self._terms[term_id]) - - def get_message(self, message_id: str) -> Message | None: - """Return the parsed AST node for a message, or None if not found. - - Provides direct access to the Message AST node, enabling callers to use - structured introspection APIs such as validate_message_variables() without - re-parsing the FTL source. - - Args: - message_id: Message identifier - - Returns: - Message AST node, or None if the message does not exist - - Example: - >>> bundle.add_resource("greeting = Hello, { $name }!") - >>> msg = bundle.get_message("greeting") - >>> if msg is not None: - ... from ftllexengine import validate_message_variables - ... result = validate_message_variables(msg, frozenset({"name"})) - ... assert result.is_valid - """ - with self._rwlock.read(): - return self._messages.get(message_id) - - def get_term(self, term_id: str) -> Term | None: - """Return the parsed AST node for a term, or None if not found. - - Provides direct access to the Term AST node. The term_id should be - supplied without the leading dash (e.g., ``"brand"`` for ``-brand``). - - Args: - term_id: Term identifier without leading dash - - Returns: - Term AST node, or None if the term does not exist - - Example: - >>> bundle.add_resource("-brand = Firefox") - >>> term = bundle.get_term("brand") - >>> assert term is not None - """ - with self._rwlock.read(): - return self._terms.get(term_id) - def add_function(self, name: str, func: Callable[..., FluentValue]) -> None: """Add custom function to bundle. @@ -1352,9 +749,9 @@ def add_function(self, name: str, func: Callable[..., FluentValue]) -> None: func: Callable function that returns a FluentValue Example: - >>> def CUSTOM(value): + >>> def CUSTOM(value): # doctest: +SKIP ... return value.upper() - >>> bundle.add_function("CUSTOM", CUSTOM) + >>> bundle.add_function("CUSTOM", CUSTOM) # doctest: +SKIP """ with self._rwlock.write(): # Copy-on-write: copy the shared registry on first modification @@ -1381,10 +778,10 @@ def clear_cache(self) -> None: Automatically called by add_resource() and add_function(). Example: - >>> bundle = FluentBundle("en", cache=CacheConfig()) - >>> bundle.add_resource("msg = Hello") - >>> bundle.format_pattern("msg") # Caches result - >>> bundle.clear_cache() # Manual invalidation + >>> bundle = FluentBundle("en", cache=CacheConfig()) # doctest: +SKIP + >>> bundle.add_resource("msg = Hello") # doctest: +SKIP + >>> bundle.format_pattern("msg") # Caches result # doctest: +SKIP + >>> bundle.clear_cache() # Manual invalidation # doctest: +SKIP """ with self._rwlock.write(): if self._cache is not None: @@ -1400,16 +797,16 @@ def get_cache_stats(self) -> CacheStats | None: See CacheStats for the complete field specification. Example: - >>> bundle = FluentBundle("en", cache=CacheConfig()) - >>> bundle.add_resource("msg = Hello") - >>> bundle.format_pattern("msg", {}) # Cache miss - >>> bundle.format_pattern("msg", {}) # Cache hit - >>> stats = bundle.get_cache_stats() - >>> stats["hits"] + >>> bundle = FluentBundle("en", cache=CacheConfig()) # doctest: +SKIP + >>> bundle.add_resource("msg = Hello") # doctest: +SKIP + >>> bundle.format_pattern("msg", {}) # Cache miss # doctest: +SKIP + >>> bundle.format_pattern("msg", {}) # Cache hit # doctest: +SKIP + >>> stats = bundle.get_cache_stats() # doctest: +SKIP + >>> stats["hits"] # doctest: +SKIP 1 - >>> stats["misses"] + >>> stats["misses"] # doctest: +SKIP 1 - >>> isinstance(stats["hit_rate"], float) + >>> isinstance(stats["hit_rate"], float) # doctest: +SKIP True """ if self._cache is not None: diff --git a/src/ftllexengine/runtime/bundle.py,cover b/src/ftllexengine/runtime/bundle.py,cover deleted file mode 100644 index 2fdf510e..00000000 --- a/src/ftllexengine/runtime/bundle.py,cover +++ /dev/null @@ -1,1613 +0,0 @@ -> """FluentBundle - Main API for Fluent message formatting. - -> Python 3.13+. External dependency: Babel (CLDR locale data). -> """ - -> from __future__ import annotations - -> import logging -> import re -> import time -> from collections.abc import Callable, Mapping -> from typing import TYPE_CHECKING, NoReturn - -> from ftllexengine.constants import ( -> DEFAULT_CACHE_SIZE, -> DEFAULT_MAX_ENTRY_SIZE, -> DEFAULT_MAX_EXPANSION_SIZE, -> FALLBACK_INVALID, -> FALLBACK_MISSING_MESSAGE, -> MAX_DEPTH, -> MAX_LOCALE_LENGTH_HARD_LIMIT, -> MAX_SOURCE_SIZE, -> ) -> from ftllexengine.core.depth_guard import depth_clamp -> from ftllexengine.diagnostics import ( -> Diagnostic, -> DiagnosticCode, -> ErrorCategory, -> ErrorTemplate, -> FrozenFluentError, -> ValidationResult, -> ) -> from ftllexengine.integrity import ( -> FormattingIntegrityError, -> IntegrityContext, -> SyntaxIntegrityError, -> ) -> from ftllexengine.introspection import extract_variables, introspect_message -> from ftllexengine.locale_utils import get_system_locale -> from ftllexengine.runtime.cache import IntegrityCache -> from ftllexengine.runtime.function_bridge import FluentValue, FunctionRegistry -> from ftllexengine.runtime.functions import get_shared_registry -> from ftllexengine.runtime.locale_context import LocaleContext -> from ftllexengine.runtime.resolver import FluentResolver -> from ftllexengine.runtime.rwlock import RWLock -> from ftllexengine.syntax import Comment, Junk, Message, Resource, Term -> from ftllexengine.syntax.parser import FluentParserV1 -> from ftllexengine.validation import validate_resource as _validate_resource_impl - -- if TYPE_CHECKING: -- from ftllexengine.introspection import MessageIntrospection - -> __all__ = ["FluentBundle"] - -> logger = logging.getLogger(__name__) - - # Logging truncation limits for error messages. - # Warnings show more context (100 chars) as they're surfaced to users. - # Debug messages are high-volume, shorter (50 chars) keeps logs manageable. -> _LOG_TRUNCATE_WARNING: int = 100 -> _LOG_TRUNCATE_DEBUG: int = 50 - - # BCP 47 locale code pattern (ASCII-only alphanumerics with underscore/hyphen separators). - # Rejects non-ASCII characters like accented letters (e.g., "e_FR" with accented e). - # Uses \Z instead of $ to match only at end-of-string, not before trailing newline. -> _LOCALE_PATTERN: re.Pattern[str] = re.compile(r"^[a-zA-Z0-9]+([_-][a-zA-Z0-9]+)*\Z") - - -> class FluentBundle: -> """Fluent message bundle for specific locale. - -> Main public API for Fluent localization. Aligned with Mozilla python-fluent -> error handling that returns (result, errors) tuples. - -> Thread Safety: -> FluentBundle is always thread-safe using a readers-writer lock (RWLock). -> This enables high-concurrency access patterns: - -> - Read operations (format_pattern, format_message, has_message, etc.) -> can execute concurrently without blocking each other. -> - Write operations (add_resource, add_function) acquire exclusive access. -> - Writers have priority to prevent starvation in read-heavy workloads. - -> This design provides superior throughput for multi-threaded applications -> while maintaining full thread safety. Typical web servers with 100+ -> concurrent format requests will see significant performance improvements -> compared to coarse-grained locking. - -> Reentrancy Limitation: -> Modifying the bundle from within format operations is PROHIBITED and -> raises RuntimeError. This includes calling add_resource() or add_function() -> from custom functions invoked during formatting. The RWLock does not -> support read-to-write lock upgrading (deadlock prevention). - -> If you need lazy-loading patterns, load resources before formatting -> or use a separate bundle instance for dynamic content. - -> Parser Security: -> Configurable limits prevent DoS attacks: -> - max_source_size: Maximum FTL source length in characters (default: 10 MiB / 10,485,760 chars) -> - max_nesting_depth: Maximum placeable nesting depth (default: 100) - -> Examples: -> >>> bundle = FluentBundle("lv_LV") -> >>> bundle.add_resource(''' -> ... hello = Sveiki, pasaule! -> ... welcome = Laipni lūdzam, { $name }! -> ... ''') -> >>> result, errors = bundle.format_pattern("hello") -> >>> assert result == 'Sveiki, pasaule!' -> >>> assert errors == () -> >>> -> >>> result, errors = bundle.format_pattern("welcome", {"name": "Jānis"}) -> >>> assert result == 'Laipni lūdzam, Jānis!' -> >>> assert errors == () -> >>> -> >>> # Custom security limits for stricter environments -> >>> strict_bundle = FluentBundle("en_US", max_source_size=1_000_000) -> """ - -> __slots__ = ( -> "_cache", -> "_cache_enable_audit", -> "_cache_max_audit_entries", -> "_cache_max_entry_weight", -> "_cache_max_errors_per_entry", -> "_cache_size", -> "_cache_write_once", -> "_function_registry", -> "_locale", -> "_max_expansion_size", -> "_max_nesting_depth", -> "_max_source_size", -> "_messages", -> "_modified_in_context", -> "_msg_deps", -> "_owns_registry", -> "_parser", -> "_rwlock", -> "_strict", -> "_term_deps", -> "_terms", -> "_use_isolating", -> ) - -> @staticmethod -> def _validate_locale_format(locale: str) -> None: -> """Validate locale code format. - -> Checks that locale is non-empty and contains only ASCII alphanumeric -> characters with optional underscore or hyphen separators. Enforces -> BCP 47 compliance by rejecting non-ASCII characters. - -> Rejects obviously malicious inputs (>1000 characters) to prevent DoS. -> Locale codes exceeding standard BCP 47 length (35 chars) trigger warnings -> in LocaleContext but are accepted here. - -> Args: -> locale: Locale code to validate - -> Raises: -> ValueError: If locale code is empty, excessively long (>1000), -> contains non-ASCII characters, or has invalid format -> """ -> if not locale: -! msg = "Locale code cannot be empty" -! raise ValueError(msg) - - # Reject obviously malicious inputs (DoS prevention) -> if len(locale) > MAX_LOCALE_LENGTH_HARD_LIMIT: -! msg = ( -! f"Locale code exceeds maximum length of {MAX_LOCALE_LENGTH_HARD_LIMIT} characters: " -! f"'{locale[:50]}...' ({len(locale)} characters)" -! ) -! raise ValueError(msg) - -> if not _LOCALE_PATTERN.match(locale): -! msg = f"Invalid locale code format: '{locale}' (must be ASCII alphanumeric)" -! raise ValueError(msg) - -> def __init__( -> self, -> locale: str, -> /, -> *, -> use_isolating: bool = True, -> enable_cache: bool = False, -> cache_size: int = DEFAULT_CACHE_SIZE, -> cache_write_once: bool = False, -> cache_enable_audit: bool = False, -> cache_max_audit_entries: int = 10000, -> cache_max_entry_weight: int = DEFAULT_MAX_ENTRY_SIZE, -> cache_max_errors_per_entry: int = 50, -> functions: FunctionRegistry | None = None, -> max_source_size: int | None = None, -> max_nesting_depth: int | None = None, -> max_expansion_size: int | None = None, -> strict: bool = False, -> ) -> None: -> """Initialize bundle for locale. - -> Args: -> locale: Locale code (lv_LV, en_US, de_DE, pl_PL) [positional-only] -> use_isolating: Wrap interpolated values in Unicode bidi isolation marks (default: True) -> Set to False only if you're certain RTL languages won't be used. -> See Unicode TR9: http://www.unicode.org/reports/tr9/ -> enable_cache: Enable format caching for performance (default: False) -> Cache provides 50x speedup on repeated format calls. -> cache_size: Maximum cache entries when caching enabled (default: 1000) -> cache_write_once: Reject updates to existing cache keys (default: False). -> Enables data race prevention for financial applications. -> When True and strict=True, raises WriteConflictError on overwrite attempt. -> cache_enable_audit: Maintain audit log of all cache operations (default: False). -> Enables post-mortem analysis and compliance logging. -> cache_max_audit_entries: Maximum audit log entries before oldest eviction (default: 10000). -> Only relevant when cache_enable_audit=True. -> cache_max_entry_weight: Maximum memory weight for cached results (default: 10000). -> Weight is calculated as: len(formatted_str) + sum(error_weights). -> Results exceeding this limit are computed but not cached. -> cache_max_errors_per_entry: Maximum errors per cache entry (default: 50). -> Prevents memory exhaustion from pathological error cases. -> functions: Custom FunctionRegistry to use (default: standard registry with -> NUMBER, DATETIME, CURRENCY). Pass a custom registry to: -> - Use pre-registered custom functions -> - Share function registrations between bundles -> - Override default function behavior -> max_source_size: Maximum FTL source length in characters (default: 10 MiB / 10,485,760 chars). -> Set to 0 to disable limit (not recommended for untrusted input). -> max_nesting_depth: Maximum placeable nesting depth (default: 100). -> Prevents DoS via deeply nested { { { ... } } } structures. -> max_expansion_size: Maximum total characters produced during resolution (default: 1,000,000). -> Prevents Billion Laughs attacks via exponentially expanding message references. -> strict: Enable strict mode for financial applications (default: False). -> When True, format_pattern raises FormattingIntegrityError on ANY error -> instead of returning fallback values. Use for monetary/critical data -> where silent fallbacks are unacceptable. Also affects cache corruption -> handling: raises CacheCorruptionError instead of silent eviction. - -> Raises: -> ValueError: If locale code is empty or has invalid format - -> Thread Safety: -> FluentBundle is always thread-safe using a readers-writer lock (RWLock). -> Read operations (format calls) execute concurrently without blocking. -> Write operations (add_resource, add_function) acquire exclusive access. - -> Example: -> >>> # Using default registry (standard functions) -> >>> bundle = FluentBundle("en") -> >>> -> >>> # Using custom registry with additional functions -> >>> from ftllexengine.runtime.functions import create_default_registry -> >>> registry = create_default_registry() -> >>> registry.register(my_custom_func, ftl_name="CUSTOM") -> >>> bundle = FluentBundle("en", functions=registry) -> >>> -> >>> # Stricter limits for untrusted input -> >>> bundle = FluentBundle("en", max_source_size=100_000, max_nesting_depth=20) -> >>> -> >>> # Financial-grade strict mode with write-once cache -> >>> bundle = FluentBundle("en", strict=True, enable_cache=True, cache_write_once=True) -> >>> -> >>> # Audit-enabled cache for compliance -> >>> bundle = FluentBundle("en", enable_cache=True, cache_enable_audit=True) -> """ - # Validate locale format -> FluentBundle._validate_locale_format(locale) - -> self._locale = locale -> self._use_isolating = use_isolating -> self._strict = strict -> self._messages: dict[str, Message] = {} -> self._terms: dict[str, Term] = {} - - # Dependency tracking for cross-resource cycle detection. - # Maps entry ID to set of (type-prefixed) dependencies. - # E.g., {"greeting": {"msg:welcome", "term:brand"}} -> self._msg_deps: dict[str, set[str]] = {} -> self._term_deps: dict[str, set[str]] = {} - - # Parser security configuration -> self._max_source_size = max_source_size if max_source_size is not None else MAX_SOURCE_SIZE -> requested_depth = max_nesting_depth if max_nesting_depth is not None else MAX_DEPTH -> self._max_nesting_depth = depth_clamp(requested_depth) -> self._max_expansion_size = ( -> max_expansion_size if max_expansion_size is not None else DEFAULT_MAX_EXPANSION_SIZE -> ) -> self._parser = FluentParserV1( -> max_source_size=self._max_source_size, -> max_nesting_depth=self._max_nesting_depth, -> ) - - # Thread safety: always enabled via RWLock (readers-writer lock) - # Allows concurrent read operations (format calls) while ensuring - # exclusive write access (add_resource, add_function) -> self._rwlock = RWLock() - - # Function registry: copy-on-write optimization - # Using the shared registry avoids re-registering built-in functions for each bundle. - # Copy is deferred until add_function() is called (copy-on-write pattern). -> if functions is not None: - # Type validation at API boundary: reject non-FunctionRegistry objects early. - # dict, OrderedDict, and other Mapping types have .copy() but lack the - # FunctionRegistry interface (should_inject_locale, call, etc.), causing - # opaque AttributeErrors during format_pattern() if not caught here. -! if not isinstance(functions, FunctionRegistry): -! msg = ( # type: ignore[unreachable] -! f"functions must be FunctionRegistry, not {type(functions).__name__}. " -! "Use create_default_registry() or FunctionRegistry() to create one." -! ) -! raise TypeError(msg) - # User provided a registry - copy it for isolation -! self._function_registry = functions.copy() -! self._owns_registry = True -> else: - # Use shared registry directly (frozen, so safe to share) - # Will be copied on first add_function() call -> self._function_registry = get_shared_registry() -> self._owns_registry = False - - # Format cache (opt-in) with integrity verification - # Store cache configuration for introspection; created eagerly when enable_cache=True -> self._cache: IntegrityCache | None = None -> self._cache_size = cache_size -> self._cache_write_once = cache_write_once -> self._cache_enable_audit = cache_enable_audit -> self._cache_max_audit_entries = cache_max_audit_entries -> self._cache_max_entry_weight = cache_max_entry_weight -> self._cache_max_errors_per_entry = cache_max_errors_per_entry - -> if enable_cache: - # Cache strict mode matches bundle strict mode for consistent error handling. - # When bundle strict=True, cache corruption raises CacheCorruptionError. - # When bundle strict=False, cache corruption silently evicts the entry. -! self._cache = IntegrityCache( -! maxsize=cache_size, -! max_entry_weight=cache_max_entry_weight, -! max_errors_per_entry=cache_max_errors_per_entry, -! write_once=cache_write_once, -! strict=self._strict, -! enable_audit=cache_enable_audit, -! max_audit_entries=cache_max_audit_entries, -! ) - - # Context manager state tracking (cache invalidation optimization) -> self._modified_in_context = False - -> logger.info( -> "FluentBundle initialized for locale: %s (use_isolating=%s, cache=%s, strict=%s)", -> locale, -> use_isolating, -> "enabled" if enable_cache else "disabled", -> strict, -> ) - -> @property -> def locale(self) -> str: -> """Get the locale code for this bundle (read-only). - -> Returns: -> str: Locale code (e.g., "en_US", "lv_LV") - -> Example: -> >>> bundle = FluentBundle("lv_LV") -> >>> bundle.locale -> 'lv_LV' -> """ -! return self._locale - -> @property -> def use_isolating(self) -> bool: -> """Get whether Unicode bidi isolation is enabled (read-only). - -> Returns: -> bool: True if bidi isolation is enabled, False otherwise - -> Example: -> >>> bundle = FluentBundle("ar_EG", use_isolating=True) -> >>> bundle.use_isolating -> True -> """ -! return self._use_isolating - -> @property -> def strict(self) -> bool: -> """Get whether strict mode is enabled (read-only). - -> Strict mode raises FormattingIntegrityError on ANY formatting error -> instead of returning fallback values. Essential for financial applications -> where silent fallbacks are unacceptable. - -> Returns: -> bool: True if strict mode is enabled, False otherwise - -> Example: -> >>> bundle = FluentBundle("en", strict=True) -> >>> bundle.strict -> True -> >>> bundle_normal = FluentBundle("en") -> >>> bundle_normal.strict -> False -> """ -! return self._strict - -> @property -> def cache_enabled(self) -> bool: -> """Get whether format caching is enabled (read-only). - -> Returns: -> bool: True if caching is enabled, False otherwise - -> Example: -> >>> bundle = FluentBundle("en", enable_cache=True) -> >>> bundle.cache_enabled -> True -> >>> bundle_no_cache = FluentBundle("en") -> >>> bundle_no_cache.cache_enabled -> False -> """ -! return self._cache is not None - -> @property -> def cache_size(self) -> int: -> """Get maximum cache size configuration (read-only). - -> Returns: -> int: Configured maximum cache entries - -> Example: -> >>> bundle = FluentBundle("en", enable_cache=True, cache_size=500) -> >>> bundle.cache_size -> 500 -> >>> # Cache size is returned even when caching is disabled -> >>> bundle_no_cache = FluentBundle("en", cache_size=200) -> >>> bundle_no_cache.cache_size -> 200 -> >>> bundle_no_cache.cache_enabled -> False - -> Note: -> Returns configured size regardless of whether caching is enabled. -> Use cache_enabled to check if caching is active. -> """ -! return self._cache_size - -> @property -> def cache_usage(self) -> int: -> """Get current number of cached format results (read-only). - -> Returns: -> int: Number of entries currently in cache (0 if caching disabled) - -> Example: -> >>> bundle = FluentBundle("en", enable_cache=True, cache_size=500) -> >>> bundle.add_resource("msg = Hello") -> >>> bundle.format_pattern("msg", {}) -> ('Hello', ()) -> >>> bundle.cache_usage # One entry cached -> 1 -> >>> bundle.cache_size # Configured limit -> 500 - -> Note: -> Use with cache_size to calculate utilization: cache_usage / cache_size -> """ -! if self._cache is None: -! return 0 -! return self._cache.size - -> @property -> def cache_write_once(self) -> bool: -> """Get whether cache write-once mode is enabled (read-only). - -> Write-once mode rejects updates to existing cache keys, preventing -> data races in concurrent environments. Essential for financial -> applications where cache overwrites could indicate race conditions. - -> Returns: -> bool: True if write-once mode is configured - -> Example: -> >>> bundle = FluentBundle("en", enable_cache=True, cache_write_once=True) -> >>> bundle.cache_write_once -> True -> >>> bundle_normal = FluentBundle("en", enable_cache=True) -> >>> bundle_normal.cache_write_once -> False - -> Note: -> Returns configured value regardless of whether caching is enabled. -> """ -! return self._cache_write_once - -> @property -> def cache_enable_audit(self) -> bool: -> """Get whether cache audit logging is enabled (read-only). - -> Audit logging maintains a history of all cache operations for -> compliance and debugging purposes. Each operation (GET, PUT, HIT, -> MISS, EVICT, CORRUPTION) is recorded with timestamps. - -> Returns: -> bool: True if audit logging is configured - -> Example: -> >>> bundle = FluentBundle("en", enable_cache=True, cache_enable_audit=True) -> >>> bundle.cache_enable_audit -> True - -> Note: -> Returns configured value regardless of whether caching is enabled. -> """ -! return self._cache_enable_audit - -> @property -> def cache_max_audit_entries(self) -> int: -> """Get maximum audit log entries configuration (read-only). - -> The audit log uses a bounded deque with O(1) eviction of oldest -> entries when the limit is reached. - -> Returns: -> int: Configured maximum audit log entries - -> Example: -> >>> bundle = FluentBundle("en", enable_cache=True, cache_enable_audit=True, cache_max_audit_entries=5000) -> >>> bundle.cache_max_audit_entries -> 5000 - -> Note: -> Returns configured value regardless of whether caching or audit is enabled. -> """ -! return self._cache_max_audit_entries - -> @property -> def cache_max_entry_weight(self) -> int: -> """Get maximum cache entry weight configuration (read-only). - -> Weight is calculated as: len(formatted_str) + sum(error_weights). -> Results exceeding this limit are computed but not cached, protecting -> against memory exhaustion from large formatted outputs. - -> Returns: -> int: Configured maximum entry weight in approximate bytes - -> Example: -> >>> bundle = FluentBundle("en", enable_cache=True, cache_max_entry_weight=5000) -> >>> bundle.cache_max_entry_weight -> 5000 - -> Note: -> Returns configured value regardless of whether caching is enabled. -> """ -! return self._cache_max_entry_weight - -> @property -> def cache_max_errors_per_entry(self) -> int: -> """Get maximum errors per cache entry configuration (read-only). - -> Entries with more errors than this limit are not cached, preventing -> memory exhaustion from pathological cases where resolution produces -> many errors (e.g., cyclic references, deeply nested validation failures). - -> Returns: -> int: Configured maximum errors per cache entry - -> Example: -> >>> bundle = FluentBundle("en", enable_cache=True, cache_max_errors_per_entry=25) -> >>> bundle.cache_max_errors_per_entry -> 25 - -> Note: -> Returns configured value regardless of whether caching is enabled. -> """ -! return self._cache_max_errors_per_entry - -> @property -> def max_source_size(self) -> int: -> """Maximum FTL source size in characters (read-only). - -> Python measures string length in characters (code points), not bytes. -> UTF-8 encoding means 1 character = 1-4 bytes, but this limit counts -> characters as returned by len(source). - -> Returns: -> int: Maximum source size limit for add_resource() - -> Example: -> >>> bundle = FluentBundle("en", max_source_size=1_000_000) -> >>> bundle.max_source_size -> 1000000 -> """ -! return self._max_source_size - -> @property -> def max_nesting_depth(self) -> int: -> """Maximum placeable nesting depth (read-only). - -> Returns: -> int: Maximum nesting depth limit for parser - -> Example: -> >>> bundle = FluentBundle("en", max_nesting_depth=50) -> >>> bundle.max_nesting_depth -> 50 -> """ -! return self._max_nesting_depth - -> @property -> def max_expansion_size(self) -> int: -> """Maximum total characters produced during resolution (read-only). - -> Returns: -> int: Maximum expansion budget for DoS prevention -> """ -! return self._max_expansion_size - -> @property -> def function_registry(self) -> FunctionRegistry: -> """Get the function registry for this bundle (read-only). - -> Provides read access to the registered formatting functions without -> requiring access to private attributes. - -> Returns: -> FunctionRegistry: The function registry for this bundle - -> Example: -> >>> bundle = FluentBundle("en") -> >>> registry = bundle.function_registry -> >>> "NUMBER" in registry -> True -> """ -! return self._function_registry - -> @classmethod -> def for_system_locale( -> cls, -> *, -> use_isolating: bool = True, -> enable_cache: bool = False, -> cache_size: int = DEFAULT_CACHE_SIZE, -> cache_write_once: bool = False, -> cache_enable_audit: bool = False, -> cache_max_audit_entries: int = 10000, -> cache_max_entry_weight: int = DEFAULT_MAX_ENTRY_SIZE, -> cache_max_errors_per_entry: int = 50, -> functions: FunctionRegistry | None = None, -> max_source_size: int | None = None, -> max_nesting_depth: int | None = None, -> max_expansion_size: int | None = None, -> strict: bool = False, -> ) -> FluentBundle: -> """Factory method to create a FluentBundle using the system locale. - -> Detects and uses the current system locale (from locale.getlocale(), -> LC_ALL, LC_MESSAGES, or LANG environment variables). - -> Args: -> use_isolating: Wrap interpolated values in Unicode bidi isolation marks -> enable_cache: Enable format caching for performance -> cache_size: Maximum cache entries when caching enabled -> cache_write_once: Reject updates to existing cache keys (data race prevention) -> cache_enable_audit: Maintain audit log of all cache operations -> cache_max_audit_entries: Maximum audit log entries before oldest eviction -> cache_max_entry_weight: Maximum memory weight for cached results -> cache_max_errors_per_entry: Maximum errors per cache entry -> functions: Custom FunctionRegistry to use (default: standard registry) -> max_source_size: Maximum FTL source size in characters (default: 10 MiB / 10,485,760 chars) -> max_nesting_depth: Maximum placeable nesting depth (default: 100) -> strict: Enable strict mode (fail-fast on errors, strict cache corruption handling) - -> Returns: -> Configured FluentBundle instance for system locale - -> Raises: -> RuntimeError: If system locale cannot be determined - -> Example: -> >>> bundle = FluentBundle.for_system_locale() -> >>> bundle.locale # Returns detected system locale -> 'en_US' -> """ - # Delegate to unified locale detection (raises RuntimeError on failure) -! system_locale = get_system_locale(raise_on_failure=True) - -! return cls( -! system_locale, -! use_isolating=use_isolating, -! enable_cache=enable_cache, -! cache_size=cache_size, -! cache_write_once=cache_write_once, -! cache_enable_audit=cache_enable_audit, -! cache_max_audit_entries=cache_max_audit_entries, -! cache_max_entry_weight=cache_max_entry_weight, -! cache_max_errors_per_entry=cache_max_errors_per_entry, -! functions=functions, -! max_source_size=max_source_size, -! max_nesting_depth=max_nesting_depth, -! max_expansion_size=max_expansion_size, -! strict=strict, -! ) - -> def __repr__(self) -> str: -> """Return string representation for debugging. - - -> Returns: -> String representation showing locale and loaded messages count - -> Example: -> >>> bundle = FluentBundle("lv_LV") -> >>> repr(bundle) -> "FluentBundle(locale='lv_LV', messages=0, terms=0)" -> """ -! return ( -! f"FluentBundle(locale={self._locale!r}, " -! f"messages={len(self._messages)}, " -! f"terms={len(self._terms)})" -! ) - -> def __enter__(self) -> FluentBundle: -> """Enter context manager. - -> Enables use of FluentBundle with 'with' statement. The context manager -> clears the format cache on exit only if the bundle was modified during -> the context (add_resource, add_function, or clear_cache called). For -> read-only operations, the cache is preserved for better performance. - -> Messages and terms are always preserved so the bundle remains usable -> after the with block. - -> Returns: -> Self (the FluentBundle instance) - -> Example: -> >>> with FluentBundle("en_US", enable_cache=True) as bundle: -> ... bundle.add_resource("hello = Hello") # Modifying operation -> ... result = bundle.format_pattern("hello") -> ... # Cache cleared (bundle was modified) -> >>> -> >>> with bundle: # Read-only context -> ... result = bundle.format_pattern("hello") -> ... # Cache preserved (bundle NOT modified) -> """ - # Reset modification tracking for new context -! self._modified_in_context = False -! return self - -> def __exit__( -> self, -> exc_type: type[BaseException] | None, -> exc_val: BaseException | None, -> exc_tb: object | None, -> ) -> None: -> """Exit context manager with conditional cache cleanup. - -> Clears the format cache only if the bundle was modified during the -> context (add_resource, add_function, or clear_cache called). For -> read-only contexts, the cache is preserved to avoid invalidating -> cached results in shared bundle scenarios. - -> Messages and terms are always preserved so the bundle remains usable -> after the with block. Does not suppress exceptions. - -> Args: -> exc_type: Exception type (if any) -> exc_val: Exception value (if any) -> exc_tb: Exception traceback (if any) -> """ - # Clear cache only if bundle was modified during context - # Read-only operations (format_pattern) preserve cache for performance -! if self._modified_in_context and self._cache is not None: -! self._cache.clear() -! logger.debug( -! "FluentBundle cache cleared on context exit (modified): %s", -! self._locale, -! ) -! else: -! logger.debug( -! "FluentBundle cache preserved on context exit (read-only): %s", -! self._locale, -! ) - - # Reset flag for next context (defensive) -! self._modified_in_context = False - -> def get_babel_locale(self) -> str: -> """Get the Babel locale identifier for this bundle (introspection API). - -> This is a debugging/introspection method that returns the actual Babel locale -> identifier being used for NUMBER(), DATETIME(), and CURRENCY() formatting. - -> Useful for troubleshooting locale-related formatting issues, especially when -> verifying which CLDR data is being applied. - -> Returns: -> str: Babel locale identifier (e.g., "en_US", "lv_LV", "ar_EG") - -> Example: -> >>> bundle = FluentBundle("lv") -> >>> bundle.get_babel_locale() -> 'lv' -> >>> bundle_us = FluentBundle("en-US") -> >>> bundle_us.get_babel_locale() -> 'en_US' - -> Note: -> This creates a LocaleContext temporarily to access Babel locale information. -> The return value shows what locale Babel is using for CLDR-based formatting. - -> See Also: -> - bundle.locale: The original locale code passed to FluentBundle -> - LocaleContext.babel_locale: The underlying Babel Locale object -> """ - # create() always returns LocaleContext with en_US fallback for invalid locales -! ctx = LocaleContext.create(self._locale) -! return str(ctx.babel_locale) - -> def add_resource( -> self, source: str, /, *, source_path: str | None = None -> ) -> tuple[Junk, ...]: -> """Add FTL resource to bundle. - -> Parses FTL source and adds messages/terms to registry. -> Thread-safe (uses internal RWLock). - -> Parse operation occurs outside the write lock to minimize reader -> contention. Only registration (dict updates) requires exclusive access. - -> Args: -> source: FTL file content [positional-only] -> source_path: Optional path to source file for better error messages -> (e.g., "locales/lv/ui.ftl"). Used as source identifier -> in warning messages. Defaults to "" if not provided. - -> Returns: -> Tuple of Junk entries encountered during parsing. Empty tuple if -> parsing succeeded without errors. Each Junk entry contains the -> unparseable content and associated annotations. - -> Logging: -> Syntax errors (Junk entries) are logged at WARNING level regardless -> of whether source_path is provided. This ensures syntax errors are -> visible whether loading from files, databases, or in-memory strings. - -> Note: -> Parser continues after errors (robustness principle). Junk entries -> are returned for programmatic error handling. - -> Raises: -> TypeError: If source is not a string (e.g., bytes were passed). -> SyntaxIntegrityError: In strict mode only, if parsing produces any -> Junk entries. Financial applications using strict=True get -> fail-fast behavior on syntax errors. - -> Thread Safety: -> Parser is stateless and thread-safe. Parse operation can occur -> outside write lock without risk. Only registration step requires -> exclusive write access. -> """ - # Type validation at API boundary - type hints are not enforced at runtime. - # Defensive check: users may pass bytes despite str annotation. -> if not isinstance(source, str): -! msg = ( # type: ignore[unreachable] -! f"source must be str, not {type(source).__name__}. " -! "Decode bytes to str (e.g., source.decode('utf-8')) before calling add_resource()." -! ) -! raise TypeError(msg) - - # Parse outside lock (expensive, but safe - parser is stateless, source is immutable) -> resource = self._parser.parse(source) - - # Only hold lock for registration (fast, O(N) where N is entry count) -> with self._rwlock.write(): -> return self._register_resource(resource, source_path) - -> def _register_resource( # noqa: PLR0915 - Two-phase commit requires statement count -> self, resource: Resource, source_path: str | None -> ) -> tuple[Junk, ...]: -> """Register parsed resource entries (messages, terms, junk). - -> Assumes caller holds write lock. Internal method for add_resource. - -> Two-phase commit for strict mode atomicity: -> Phase 1: Collect all entries and validate (no state mutation) -> Phase 2: Apply mutations only if strict mode check passes - -> This ensures that in strict mode, a resource with syntax errors -> does not partially populate the bundle before raising an exception. - -> Args: -> resource: Parsed FTL resource -> source_path: Optional path for logging - -> Returns: -> Tuple of Junk entries from resource -> """ -> from ftllexengine.introspection import extract_references # noqa: PLC0415 - - # Phase 1: Collect entries without mutating bundle state -> pending_messages: dict[str, Message] = {} -> pending_terms: dict[str, Term] = {} -> pending_msg_deps: dict[str, set[str]] = {} -> pending_term_deps: dict[str, set[str]] = {} -> junk_entries: list[Junk] = [] -> overwrite_warnings: list[tuple[str, str]] = [] # (type, id) - -> for entry in resource.entries: -> match entry: -> case Message(): -> msg_id = entry.id.name - # Check for overwrites (log later, after strict check) -> if msg_id in self._messages or msg_id in pending_messages: -! overwrite_warnings.append(("message", msg_id)) -> pending_messages[msg_id] = entry - # Extract dependencies for cross-resource cycle detection -> msg_refs, term_refs = extract_references(entry) -> deps: set[str] = set() -> for ref in msg_refs: -! deps.add(f"msg:{ref}") -> for ref in term_refs: -! deps.add(f"term:{ref}") -> pending_msg_deps[msg_id] = deps -> case Term(): -! term_id = entry.id.name - # Check for overwrites (log later, after strict check) -! if term_id in self._terms or term_id in pending_terms: -! overwrite_warnings.append(("term", term_id)) -! pending_terms[term_id] = entry - # Extract dependencies for cross-resource cycle detection -! msg_refs, term_refs = extract_references(entry) -! deps_term: set[str] = set() -! for ref in msg_refs: -! deps_term.add(f"msg:{ref}") -! for ref in term_refs: -! deps_term.add(f"term:{ref}") -! pending_term_deps[term_id] = deps_term -> case Junk(): -! junk_entries.append(entry) -> case Comment(): - # Comments don't need registration -> logger.debug("Skipping comment entry") - -> junk_tuple = tuple(junk_entries) - - # Strict mode: fail fast on syntax errors BEFORE any state mutation -> if self._strict and junk_tuple: -! source_desc = source_path or "" -! error_summary = "; ".join( -! repr(j.content[:50]) for j in junk_tuple[:3] -! ) -! if len(junk_tuple) > 3: -! error_summary += f" (and {len(junk_tuple) - 3} more)" - -! context = IntegrityContext( -! component="bundle", -! operation="add_resource", -! key=source_desc, -! expected="", -! actual=f"<{len(junk_tuple)} syntax error(s)>", -! timestamp=time.monotonic(), -! ) - -! error_msg = ( -! f"Strict mode: {len(junk_tuple)} syntax error(s) in " -! f"{source_desc}: {error_summary}" -! ) -! raise SyntaxIntegrityError( -! error_msg, -! context=context, -! junk_entries=junk_tuple, -! source_path=source_path, -! ) - - # Phase 2: Apply mutations (only reached if strict check passes or not strict) - # Log overwrite warnings now that we know we'll proceed -> for entry_type, entry_id in overwrite_warnings: -! if entry_type == "message": -! logger.warning( -! "Overwriting existing message '%s' with new definition", -! entry_id, -! ) -! else: -! logger.warning( -! "Overwriting existing term '-%s' with new definition", -! entry_id, -! ) - - # Apply collected entries to bundle state -> self._messages.update(pending_messages) -> self._terms.update(pending_terms) -> self._msg_deps.update(pending_msg_deps) -> self._term_deps.update(pending_term_deps) - - # Log registration of individual entries -> for msg_id in pending_messages: -> logger.debug("Registered message: %s", msg_id) -> for term_id in pending_terms: -! logger.debug("Registered term: %s", term_id) - - # Log junk entries (always WARNING level for syntax errors) -> source_desc = source_path or "" -> for junk in junk_entries: -! logger.warning( -! "Syntax error in %s: %s", -! source_desc, -! repr(junk.content[:_LOG_TRUNCATE_WARNING]), -! ) - - # Log summary with file context -> if source_path: -! logger.info( -! "Added resource %s: %d messages, %d terms, %d junk entries", -! source_path, -! len(self._messages), -! len(self._terms), -! len(junk_entries), -! ) -> else: -> logger.info( -> "Added resource: %d messages, %d terms, %d junk entries", -> len(self._messages), -> len(self._terms), -> len(junk_entries), -> ) - - # Invalidate cache (messages changed) -> if self._cache is not None: -! self._cache.clear() -! logger.debug("Cache cleared after add_resource") - - # Mark bundle as modified for context manager tracking -> self._modified_in_context = True - -> return junk_tuple - -> def validate_resource(self, source: str) -> ValidationResult: -> """Validate FTL resource without adding to bundle. - -> Use this to check FTL files in CI/tooling before adding them. -> Unlike add_resource(), this does not modify the bundle. - -> Performs both syntax validation (errors) and semantic validation (warnings): -> - Errors: Parse failures (Junk entries) -> - Warnings: Duplicate IDs, messages without values, undefined references, -> circular dependencies - -> Args: -> source: FTL file content - -> Returns: -> ValidationResult with parse errors and semantic warnings - -> Raises: -> TypeError: If source is not a string (e.g., bytes were passed). - -> Example: -> >>> bundle = FluentBundle("lv") -> >>> result = bundle.validate_resource(ftl_source) -> >>> if not result.is_valid: -> ... for error in result.errors: -> ... print(f"Error [{error.code}]: {error.message}") -> >>> if result.warning_count > 0: -> ... for warning in result.warnings: -> ... print(f"Warning [{warning.code}]: {warning.message}") - -> See Also: -> ftllexengine.validation.validate_resource: Standalone validation function -> """ - # Type validation at API boundary - type hints are not enforced at runtime. - # Defensive check: users may pass bytes despite str annotation. -! if not isinstance(source, str): -! msg = ( # type: ignore[unreachable] -! f"source must be str, not {type(source).__name__}. " -! "Decode bytes to str (e.g., source.decode('utf-8')) before calling validate_resource()." -! ) -! raise TypeError(msg) - - # Delegate to validation module, reusing bundle's parser for consistency - # Pass existing bundle entries and their dependencies for cross-resource validation -! with self._rwlock.read(): -! return _validate_resource_impl( -! source, -! parser=self._parser, -! known_messages=frozenset(self._messages.keys()), -! known_terms=frozenset(self._terms.keys()), -! known_msg_deps=self._msg_deps, -! known_term_deps=self._term_deps, -! ) - -> def format_pattern( -> self, -> message_id: str, -> /, -> args: Mapping[str, FluentValue] | None = None, -> *, -> attribute: str | None = None, -> ) -> tuple[str, tuple[FrozenFluentError, ...]]: -> """Format message to string with error reporting. - -> Mozilla python-fluent aligned API that returns both the formatted -> string and any errors encountered during resolution. Thread-safe. - -> Args: -> message_id: Message identifier [positional-only] -> args: Variable arguments for interpolation -> attribute: Attribute name (optional, keyword-only) - -> Returns: -> Tuple of (formatted_string, errors) -> - formatted_string: Best-effort formatted output (never empty) -> - errors: Tuple of FrozenFluentError instances encountered during resolution (immutable) - -> Raises: -> FormattingIntegrityError: In strict mode, if ANY error occurs during formatting. -> The exception carries the original errors, fallback value, and message ID. - -> Note: -> In non-strict mode (default), this method handles expected formatting -> errors gracefully. All anticipated errors (missing messages, variables, -> references) are collected and returned in the errors list. The formatted -> string always contains a readable fallback value per Fluent specification. - -> In strict mode (bundle.strict=True), FormattingIntegrityError is raised -> immediately when ANY error occurs. This is required for financial applications -> where silent fallbacks are unacceptable. The exception provides: -> - fluent_errors: The original FrozenFluentError instances -> - fallback_value: What would have been returned in non-strict mode -> - message_id: The message that failed to format - -> If an attribute name is duplicated within a message (validation warning), -> the last definition is used during resolution (last-wins semantics). -> This matches the Fluent specification and Mozilla reference implementation. - -> Examples: -> >>> # Successful formatting -> >>> result, errors = bundle.format_pattern("hello") -> >>> assert result == 'Sveiki, pasaule!' -> >>> assert errors == () - -> >>> # Missing variable - returns fallback and error (non-strict mode) -> >>> bundle.add_resource('msg = Hello { $name }!') -> >>> result, errors = bundle.format_pattern("msg", {}) -> >>> assert result == 'Hello {$name}!' # Readable fallback -> >>> assert len(errors) == 1 -> >>> assert errors[0].category == ErrorCategory.REFERENCE - -> >>> # Attribute access -> >>> result, errors = bundle.format_pattern("button-save", attribute="tooltip") -> >>> assert result == 'Saglabā pašreizējo ierakstu datubāzē' -> >>> assert errors == () - -> >>> # Strict mode - raises on errors -> >>> strict_bundle = FluentBundle("en", strict=True) -> >>> strict_bundle.add_resource('msg = Hello { $name }!') -> >>> strict_bundle.format_pattern("msg", {}) # Raises FormattingIntegrityError -> """ -! with self._rwlock.read(): -! return self._format_pattern_impl(message_id, args, attribute) - -> def _raise_strict_error( -> self, -> message_id: str, -> fallback_value: str, -> errors: tuple[FrozenFluentError, ...], -> ) -> NoReturn: -> """Raise FormattingIntegrityError for strict mode (internal helper). - -> Args: -> message_id: The message ID that failed to format -> fallback_value: The fallback value that would be returned in non-strict mode -> errors: Tuple of FrozenFluentError instances - -> Raises: -> FormattingIntegrityError: Always raised with error details -> """ -! error_summary = "; ".join(str(e) for e in errors[:3]) -! if len(errors) > 3: -! error_summary += f" (and {len(errors) - 3} more)" - -! context = IntegrityContext( -! component="bundle", -! operation="format_pattern", -! key=message_id, -! expected="", -! actual=f"<{len(errors)} error(s)>", -! timestamp=time.monotonic(), -! ) - -! msg = ( -! f"Strict mode: formatting '{message_id}' produced {len(errors)} error(s): " -! f"{error_summary}" -! ) -! raise FormattingIntegrityError( -! msg, -! context=context, -! fluent_errors=errors, -! fallback_value=fallback_value, -! message_id=message_id, -! ) - -> def _format_pattern_impl( -> self, -> message_id: str, -> args: Mapping[str, FluentValue] | None, -> attribute: str | None, -> ) -> tuple[str, tuple[FrozenFluentError, ...]]: -> """Internal implementation of format_pattern (no locking).""" - # Check cache first (if enabled) -! if self._cache is not None: -! cached_entry = self._cache.get( -! message_id, args, attribute, self._locale, self._use_isolating -! ) -! if cached_entry is not None: -! result, errors_tuple = cached_entry.to_tuple() -! if errors_tuple and self._strict: -! self._raise_strict_error(message_id, result, errors_tuple) -! return (result, errors_tuple) - - # Validate message_id is non-empty string -! if not message_id or not isinstance(message_id, str): -! logger.warning("Invalid message ID: empty or non-string") -! diagnostic = Diagnostic( -! code=DiagnosticCode.MESSAGE_NOT_FOUND, -! message="Invalid message ID: empty or non-string", -! ) -! error = FrozenFluentError( -! str(diagnostic), ErrorCategory.REFERENCE, diagnostic=diagnostic -! ) - # Strict mode: raise instead of returning fallback -! if self._strict: -! self._raise_strict_error("", FALLBACK_INVALID, (error,)) - # Don't cache errors -! return (FALLBACK_INVALID, (error,)) - - # Validate args is None or a Mapping (defensive check for callers ignoring type hints) -! if args is not None and not isinstance(args, Mapping): -! logger.warning( # type: ignore[unreachable] -! "Invalid args type: expected Mapping or None, got %s", type(args).__name__ -! ) -! diagnostic = Diagnostic( -! code=DiagnosticCode.INVALID_ARGUMENT, -! message=f"Invalid args type: expected Mapping or None, got {type(args).__name__}", -! ) -! error = FrozenFluentError( -! str(diagnostic), ErrorCategory.RESOLUTION, diagnostic=diagnostic -! ) - # Strict mode: raise instead of returning fallback -! if self._strict: -! self._raise_strict_error(message_id, FALLBACK_INVALID, (error,)) -! return (FALLBACK_INVALID, (error,)) - - # Validate attribute is None or a string -! if attribute is not None and not isinstance(attribute, str): -! logger.warning( # type: ignore[unreachable] -! "Invalid attribute type: expected str or None, got %s", type(attribute).__name__ -! ) -! diagnostic = Diagnostic( -! code=DiagnosticCode.INVALID_ARGUMENT, -! message=f"Invalid attribute type: expected str or None, got {type(attribute).__name__}", -! ) -! error = FrozenFluentError( -! str(diagnostic), ErrorCategory.RESOLUTION, diagnostic=diagnostic -! ) - # Strict mode: raise instead of returning fallback -! if self._strict: -! self._raise_strict_error(message_id, FALLBACK_INVALID, (error,)) -! return (FALLBACK_INVALID, (error,)) - - # Check if message exists -! if message_id not in self._messages: -! logger.warning("Message '%s' not found", message_id) -! diag = ErrorTemplate.message_not_found(message_id) -! error = FrozenFluentError(str(diag), ErrorCategory.REFERENCE, diagnostic=diag) - # Don't cache missing message errors -! fallback = FALLBACK_MISSING_MESSAGE.format(id=message_id) - # Strict mode: raise instead of returning fallback -! if self._strict: -! self._raise_strict_error(message_id, fallback, (error,)) -! return (fallback, (error,)) - -! message = self._messages[message_id] - - # Create resolver -! resolver = FluentResolver( -! locale=self._locale, -! messages=self._messages, -! terms=self._terms, -! function_registry=self._function_registry, -! use_isolating=self._use_isolating, -! max_nesting_depth=self._max_nesting_depth, -! max_expansion_size=self._max_expansion_size, -! ) - - # Resolve message (resolver handles all errors internally including cycles) - # Note: No try-except here. The resolver is designed to collect all expected - # errors (missing references, type errors, etc.) and return them in the tuple. - # If a raw KeyError/AttributeError/RuntimeError escapes the resolver, that - # indicates a bug in the resolver implementation that should be exposed, - # not swallowed. This follows the principle of failing fast on internal bugs. -! result, errors_tuple = resolver.resolve_message(message, args, attribute) - -! if errors_tuple: -! logger.warning( -! "Message resolution errors for '%s': %d error(s)", message_id, len(errors_tuple) -! ) -! for err in errors_tuple: -! logger.debug(" - %s: %s", type(err).__name__, err) -! else: -! logger.debug("Resolved message '%s': %s", message_id, result[:50]) - - # Cache resolution result (including errors) BEFORE strict mode check. - # This ensures repeated calls for the same erroneous message in strict mode - # hit the cache instead of triggering expensive re-resolution each time. -! if self._cache is not None: -! self._cache.put( -! message_id, args, attribute, self._locale, self._use_isolating, result, errors_tuple -! ) - - # Strict mode: raise after caching so subsequent calls can use cached result -! if errors_tuple and self._strict: -! self._raise_strict_error(message_id, result, errors_tuple) - -! return (result, errors_tuple) - -> def format_value( -> self, message_id: str, args: Mapping[str, FluentValue] | None = None -> ) -> tuple[str, tuple[FrozenFluentError, ...]]: -> """Format message to string (alias for format_pattern without attribute access). - -> This method provides API consistency with FluentLocalization.format_value() -> for users who don't need attribute access. It's an alias for -> format_pattern(message_id, args, attribute=None). - -> Args: -> message_id: Message identifier -> args: Variable arguments for interpolation - -> Returns: -> Tuple of (formatted_string, errors) -> - formatted_string: Best-effort formatted output (never empty) -> - errors: Tuple of FrozenFluentError instances encountered during resolution (immutable) - -> Raises: -> FormattingIntegrityError: In strict mode, if ANY error occurs during formatting - -> Note: -> In non-strict mode, this method never raises exceptions. All errors -> are collected and returned in the errors list. - -> In strict mode (bundle.strict=True), FormattingIntegrityError is raised -> instead of returning fallback values when errors occur. - -> Example: -> >>> bundle.add_resource("welcome = Hello, { $name }!") -> >>> result, errors = bundle.format_value("welcome", {"name": "Alice"}) -> >>> assert result == "Hello, Alice!" -> >>> assert errors == () -> """ -! return self.format_pattern(message_id, args, attribute=None) - -> def has_message(self, message_id: str) -> bool: -> """Check if message exists. - -> Args: -> message_id: Message identifier - -> Returns: -> True if message exists in bundle -> """ -> with self._rwlock.read(): -> return message_id in self._messages - -> def has_attribute(self, message_id: str, attribute: str) -> bool: -> """Check if message has specific attribute. - -> Args: -> message_id: Message identifier -> attribute: Attribute name - -> Returns: -> True if message exists AND has the specified attribute - -> Note: -> This method checks if any attribute with the given name exists. -> If duplicate attribute names exist (validation warning), this returns -> True without indicating which definition will be used. See format_pattern -> for resolution semantics (last-wins for duplicates). - -> Example: -> >>> bundle.add_resource(''' -> ... button = Click -> ... .tooltip = Click to save -> ... ''') -> >>> bundle.has_message("button") -> True -> >>> bundle.has_attribute("button", "tooltip") -> True -> >>> bundle.has_attribute("button", "missing") -> False -> >>> bundle.has_attribute("nonexistent", "tooltip") -> False -> """ -! with self._rwlock.read(): -! if message_id not in self._messages: -! return False -! message = self._messages[message_id] -! return any(attr.id.name == attribute for attr in message.attributes) - -> def get_message_ids(self) -> list[str]: -> """Get all message IDs in bundle. - -> Returns: -> List of message identifiers -> """ -! with self._rwlock.read(): -! return list(self._messages.keys()) - -> def get_message_variables(self, message_id: str) -> frozenset[str]: -> """Get all variables required by a message (introspection API). - -> This is a value-add feature not present in Mozilla's python-fluent. -> Enables FTL file validation in CI/CD pipelines. - -> Args: -> message_id: Message identifier - -> Returns: -> Frozen set of variable names (without $ prefix) - -> Raises: -> KeyError: If message doesn't exist - -> Example: -> >>> bundle.add_resource("greeting = Hello, { $name }!") -> >>> vars = bundle.get_message_variables("greeting") -> >>> assert "name" in vars -> """ -! with self._rwlock.read(): -! if message_id not in self._messages: -! msg = f"Message '{message_id}' not found" -! raise KeyError(msg) - -! return extract_variables(self._messages[message_id]) - -> def get_all_message_variables(self) -> dict[str, frozenset[str]]: -> """Get variables for all messages in bundle (batch introspection API). - -> Convenience method for extracting variables from all messages at once. -> Useful for CI/CD validation pipelines that need to analyze entire -> FTL resources in a single operation. - -> This is equivalent to calling get_message_variables() for each message -> ID, but provides a cleaner API for batch operations. - -> Returns: -> Dictionary mapping message IDs to their required variable sets. -> Empty dict if bundle has no messages. - -> Example: -> >>> bundle.add_resource(''' -> ... greeting = Hello, { $name }! -> ... farewell = Goodbye, { $firstName } { $lastName }! -> ... simple = No variables here -> ... ''') -> >>> all_vars = bundle.get_all_message_variables() -> >>> assert all_vars["greeting"] == frozenset({"name"}) -> >>> assert all_vars["farewell"] == frozenset({"firstName", "lastName"}) -> >>> assert all_vars["simple"] == frozenset() - -> See Also: -> - get_message_variables(): Get variables for single message -> - introspect_message(): Get complete metadata (variables + functions + references) - -> Note: -> Acquires a single read lock for atomic snapshot of all message variables. -> """ -! with self._rwlock.read(): -! return { -! message_id: extract_variables(message) -! for message_id, message in self._messages.items() -! } - -> def introspect_message(self, message_id: str) -> MessageIntrospection: -> """Get complete introspection data for a message. - -> Returns comprehensive metadata about variables, functions, and references -> used in the message. Uses Python 3.13's TypeIs for type-safe results. - -> Args: -> message_id: Message identifier - -> Returns: -> MessageIntrospection with complete metadata - -> Raises: -> KeyError: If message doesn't exist - -> Example: -> >>> bundle.add_resource("price = { NUMBER($amount, minimumFractionDigits: 2) }") -> >>> info = bundle.introspect_message("price") -> >>> assert "amount" in info.get_variable_names() -> >>> assert "NUMBER" in info.get_function_names() -> """ -! with self._rwlock.read(): -! if message_id not in self._messages: -! msg = f"Message '{message_id}' not found" -! raise KeyError(msg) - -! return introspect_message(self._messages[message_id]) - -> def introspect_term(self, term_id: str) -> MessageIntrospection: -> """Get complete introspection data for a term. - -> Returns comprehensive metadata about variables, functions, and references -> used in the term. Mirrors introspect_message() for API symmetry. - -> Args: -> term_id: Term identifier (without leading dash) - -> Returns: -> MessageIntrospection with complete metadata - -> Raises: -> KeyError: If term doesn't exist - -> Example: -> >>> bundle.add_resource("-brand = { $case -> \\n [nominative] Firefox\\n *[other] Firefox\\n}") -> >>> info = bundle.introspect_term("brand") -> >>> assert "case" in info.get_variable_names() -> """ -! with self._rwlock.read(): -! if term_id not in self._terms: -! msg = f"Term '{term_id}' not found" -! raise KeyError(msg) - -! return introspect_message(self._terms[term_id]) - -> def add_function(self, name: str, func: Callable[..., FluentValue]) -> None: -> """Add custom function to bundle. - -> Args: -> name: Function name (UPPERCASE by convention) -> func: Callable function that returns a FluentValue - -> Example: -> >>> def CUSTOM(value): -> ... return value.upper() -> >>> bundle.add_function("CUSTOM", CUSTOM) -> """ -! with self._rwlock.write(): - # Copy-on-write: copy the shared registry on first modification -! if not self._owns_registry: -! self._function_registry = self._function_registry.copy() -! self._owns_registry = True -! logger.debug("Registry copied on first add_function") - -! self._function_registry.register(func, ftl_name=name) -! logger.debug("Added custom function: %s", name) - - # Invalidate cache (functions changed) -! if self._cache is not None: -! self._cache.clear() -! logger.debug("Cache cleared after add_function") - - # Mark bundle as modified for context manager tracking -! self._modified_in_context = True - -> def clear_cache(self) -> None: -> """Clear format cache. - -> Call this when you want to force cache invalidation. -> Automatically called by add_resource() and add_function(). - -> Example: -> >>> bundle = FluentBundle("en", enable_cache=True) -> >>> bundle.add_resource("msg = Hello") -> >>> bundle.format_pattern("msg") # Caches result -> >>> bundle.clear_cache() # Manual invalidation -> """ -! with self._rwlock.write(): -! if self._cache is not None: -! self._cache.clear() -! logger.debug("Cache manually cleared") - - # Mark bundle as modified for context manager tracking -! self._modified_in_context = True - -> def get_cache_stats(self) -> dict[str, int | float | bool] | None: -> """Get cache statistics. - -> Returns: -> Dict with cache metrics or None if caching disabled. -> Keys: size (int), maxsize (int), hits (int), misses (int), -> hit_rate (float 0.0-100.0), unhashable_skips (int) - -> Example: -> >>> bundle = FluentBundle("en", enable_cache=True) -> >>> bundle.add_resource("msg = Hello") -> >>> bundle.format_pattern("msg", {}) # Cache miss -> >>> bundle.format_pattern("msg", {}) # Cache hit -> >>> stats = bundle.get_cache_stats() -> >>> stats["hits"] -> 1 -> >>> stats["misses"] -> 1 -> >>> isinstance(stats["hit_rate"], float) -> True -> """ -! if self._cache is not None: -! return self._cache.get_stats() -! return None diff --git a/src/ftllexengine/runtime/bundle_formatting.py b/src/ftllexengine/runtime/bundle_formatting.py new file mode 100644 index 00000000..7be10248 --- /dev/null +++ b/src/ftllexengine/runtime/bundle_formatting.py @@ -0,0 +1,221 @@ +"""Formatting helpers for FluentBundle.""" + +from __future__ import annotations + +import logging +import time +from collections.abc import Mapping +from typing import TYPE_CHECKING, NoReturn + +from ftllexengine.constants import FALLBACK_INVALID, FALLBACK_MISSING_MESSAGE +from ftllexengine.diagnostics import ( + Diagnostic, + DiagnosticCode, + ErrorCategory, + ErrorTemplate, + FrozenFluentError, +) +from ftllexengine.integrity import FormattingIntegrityError, IntegrityContext +from ftllexengine.runtime.resolver import FluentResolver + +if TYPE_CHECKING: + from ftllexengine.core.value_types import FluentValue + from ftllexengine.runtime.bundle_protocols import BundleStateProtocol + +logger = logging.getLogger("ftllexengine.runtime.bundle") + + +class _BundleFormattingMixin: + """Formatting behavior for FluentBundle.""" + + def _invalid_request_result( + self: BundleStateProtocol, + message_id: str, + fallback_value: str, + *, + category: ErrorCategory, + code: DiagnosticCode, + message: str, + ) -> tuple[str, tuple[FrozenFluentError, ...]]: + """Build a one-error failure result for invalid formatting input.""" + diagnostic = Diagnostic(code=code, message=message) + error = FrozenFluentError(str(diagnostic), category, diagnostic=diagnostic) + if self._strict: + self._raise_strict_error(message_id, fallback_value, (error,)) + return (fallback_value, (error,)) + + def _validate_format_request( + self: BundleStateProtocol, + message_id: str, + args: Mapping[str, FluentValue] | None, + attribute: str | None, + ) -> tuple[str, tuple[FrozenFluentError, ...]] | None: + """Validate top-level format_pattern inputs.""" + if not message_id or not isinstance(message_id, str): + logger.warning("Invalid message ID: empty or non-string") + return self._invalid_request_result( + "", + FALLBACK_INVALID, + category=ErrorCategory.REFERENCE, + code=DiagnosticCode.MESSAGE_NOT_FOUND, + message="Invalid message ID: empty or non-string", + ) + + raw_args: object = args + if raw_args is not None and not isinstance(raw_args, Mapping): + arg_type = type(raw_args).__name__ + logger.warning("Invalid args type: expected Mapping or None, got %s", arg_type) + return self._invalid_request_result( + message_id, + FALLBACK_INVALID, + category=ErrorCategory.RESOLUTION, + code=DiagnosticCode.INVALID_ARGUMENT, + message=f"Invalid args type: expected Mapping or None, got {arg_type}", + ) + + raw_attribute: object = attribute + if raw_attribute is not None and not isinstance(raw_attribute, str): + attribute_type = type(raw_attribute).__name__ + logger.warning( + "Invalid attribute type: expected str or None, got %s", + attribute_type, + ) + return self._invalid_request_result( + message_id, + FALLBACK_INVALID, + category=ErrorCategory.RESOLUTION, + code=DiagnosticCode.INVALID_ARGUMENT, + message=( + f"Invalid attribute type: expected str or None, got {attribute_type}" + ), + ) + + return None + + def _lookup_cached_pattern( + self: BundleStateProtocol, + message_id: str, + args: Mapping[str, FluentValue] | None, + attribute: str | None, + ) -> tuple[str, tuple[FrozenFluentError, ...]] | None: + """Return a cached formatting result when available.""" + if self._cache is None: + return None + + cached_entry = self._cache.get( + message_id, + args, + attribute, + self._locale, + use_isolating=self._use_isolating, + ) + if cached_entry is None: + return None + + result, errors_tuple = cached_entry.as_result() + if errors_tuple and self._strict: + self._raise_strict_error(message_id, result, errors_tuple) + return (result, errors_tuple) + + def _raise_strict_error( + self: BundleStateProtocol, + message_id: str, + fallback_value: str, + errors: tuple[FrozenFluentError, ...], + ) -> NoReturn: + """Raise FormattingIntegrityError for strict formatting failures.""" + error_summary = "; ".join(str(error) for error in errors[:3]) + if len(errors) > 3: + error_summary += f" (and {len(errors) - 3} more)" + + context = IntegrityContext( + component="bundle", + operation="format_pattern", + key=message_id, + expected="", + actual=f"<{len(errors)} error(s)>", + timestamp=time.monotonic(), + wall_time_unix=time.time(), + ) + msg = ( + f"Strict mode: formatting '{message_id}' produced {len(errors)} error(s): " + f"{error_summary}" + ) + raise FormattingIntegrityError( + msg, + context=context, + fluent_errors=errors, + fallback_value=fallback_value, + message_id=message_id, + ) + + def _create_resolver(self: BundleStateProtocol) -> FluentResolver: + """Create a resolver bound to the current bundle state.""" + return FluentResolver( + locale=self._locale, + messages=self._messages, + terms=self._terms, + function_registry=self._function_registry, + use_isolating=self._use_isolating, + max_nesting_depth=self._max_nesting_depth, + max_expansion_size=self._max_expansion_size, + ) + + def _format_pattern_impl( + self: BundleStateProtocol, + message_id: str, + args: Mapping[str, FluentValue] | None, + attribute: str | None, + ) -> tuple[str, tuple[FrozenFluentError, ...]]: + """Format a message without acquiring bundle locks.""" + invalid_result = self._validate_format_request(message_id, args, attribute) + if invalid_result is not None: + return invalid_result + + cached_result = self._lookup_cached_pattern(message_id, args, attribute) + if cached_result is not None: + return cached_result + + if message_id not in self._messages: + (logger.warning if self._strict else logger.debug)( + "Message '%s' not found", + message_id, + ) + diag = ErrorTemplate.message_not_found(message_id) + error = FrozenFluentError(str(diag), ErrorCategory.REFERENCE, diagnostic=diag) + fallback = FALLBACK_MISSING_MESSAGE.format(id=message_id) + if self._strict: + self._raise_strict_error(message_id, fallback, (error,)) + return (fallback, (error,)) + + message = self._messages[message_id] + resolver = self._resolver + result, errors_tuple = resolver.resolve_message(message, args, attribute) + + if errors_tuple: + log_fn = logger.warning if self._strict else logger.debug + log_fn( + "Message resolution errors for '%s': %d error(s)", + message_id, + len(errors_tuple), + ) + for error in errors_tuple: + logger.debug(" - %s: %s", type(error).__name__, error) + else: + logger.debug("Resolved message '%s' successfully", message_id) + + if self._cache is not None: + self._cache.put( + message_id, + args, + attribute, + self._locale, + use_isolating=self._use_isolating, + formatted=result, + errors=errors_tuple, + ) + + if errors_tuple and self._strict: + self._raise_strict_error(message_id, result, errors_tuple) + + return (result, errors_tuple) diff --git a/src/ftllexengine/runtime/bundle_protocols.py b/src/ftllexengine/runtime/bundle_protocols.py new file mode 100644 index 00000000..e3d9e163 --- /dev/null +++ b/src/ftllexengine/runtime/bundle_protocols.py @@ -0,0 +1,75 @@ +"""Type-checking protocols for FluentBundle mixins.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, NoReturn, Protocol + +if TYPE_CHECKING: + from collections.abc import Mapping + + from ftllexengine.core.semantic_types import LocaleCode + from ftllexengine.core.value_types import FluentValue + from ftllexengine.diagnostics import ErrorCategory, FrozenFluentError + from ftllexengine.diagnostics.codes import DiagnosticCode + from ftllexengine.runtime.bundle_registration import _PendingRegistration + from ftllexengine.runtime.cache import IntegrityCache + from ftllexengine.runtime.function_bridge import FunctionRegistry + from ftllexengine.runtime.resolver import FluentResolver + from ftllexengine.runtime.rwlock import RWLock + from ftllexengine.syntax import Message, Resource, Term + + +class BundleStateProtocol(Protocol): + """Structural contract implemented by FluentBundle for its mixins.""" + + _cache: IntegrityCache | None + _function_registry: FunctionRegistry + _locale: LocaleCode + _max_expansion_size: int + _max_nesting_depth: int + _messages: dict[str, Message] + _msg_deps: dict[str, frozenset[str]] + _resolver: FluentResolver + _rwlock: RWLock + _strict: bool + _term_deps: dict[str, frozenset[str]] + _terms: dict[str, Term] + _use_isolating: bool + + def _collect_pending_entries(self, resource: Resource) -> _PendingRegistration: + ... # pragma: no cover - typing-only protocol declaration + + def _raise_strict_error( + self, + message_id: str, + fallback_value: str, + errors: tuple[FrozenFluentError, ...], + ) -> NoReturn: + ... # pragma: no cover - typing-only protocol declaration + + def _invalid_request_result( + self, + message_id: str, + fallback_value: str, + *, + category: ErrorCategory, + code: DiagnosticCode, + message: str, + ) -> tuple[str, tuple[FrozenFluentError, ...]]: + ... # pragma: no cover - typing-only protocol declaration + + def _validate_format_request( + self, + message_id: str, + args: Mapping[str, FluentValue] | None, + attribute: str | None, + ) -> tuple[str, tuple[FrozenFluentError, ...]] | None: + ... # pragma: no cover - typing-only protocol declaration + + def _lookup_cached_pattern( + self, + message_id: str, + args: Mapping[str, FluentValue] | None, + attribute: str | None, + ) -> tuple[str, tuple[FrozenFluentError, ...]] | None: + ... # pragma: no cover - typing-only protocol declaration diff --git a/src/ftllexengine/runtime/bundle_queries.py b/src/ftllexengine/runtime/bundle_queries.py new file mode 100644 index 00000000..5f94000b --- /dev/null +++ b/src/ftllexengine/runtime/bundle_queries.py @@ -0,0 +1,88 @@ +"""Query and introspection helpers for FluentBundle.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ftllexengine.introspection import extract_variables, introspect_message + +if TYPE_CHECKING: + from ftllexengine.introspection import MessageIntrospection + from ftllexengine.runtime.bundle_protocols import BundleStateProtocol + from ftllexengine.syntax import Message, Term + + +class _BundleQueryMixin: + """Read-only query behavior for FluentBundle.""" + + def has_message(self: BundleStateProtocol, message_id: str) -> bool: + """Return whether the bundle contains ``message_id``.""" + with self._rwlock.read(): + return message_id in self._messages + + def has_attribute( + self: BundleStateProtocol, message_id: str, attribute: str + ) -> bool: + """Return whether ``message_id`` exposes ``attribute``.""" + with self._rwlock.read(): + message = self._messages.get(message_id) + if message is None: + return False + return any(attr.id.name == attribute for attr in message.attributes) + + def get_message_ids(self: BundleStateProtocol) -> list[str]: + """Return message IDs in insertion order.""" + with self._rwlock.read(): + return list(self._messages.keys()) + + def get_message_variables( + self: BundleStateProtocol, message_id: str + ) -> frozenset[str]: + """Return the variables referenced by one message.""" + with self._rwlock.read(): + if message_id not in self._messages: + msg = f"Message '{message_id}' not found" + raise KeyError(msg) + return frozenset(extract_variables(self._messages[message_id])) + + def get_all_message_variables( + self: BundleStateProtocol, + ) -> dict[str, frozenset[str]]: + """Return variables for every registered message.""" + with self._rwlock.read(): + return { + message_id: frozenset(extract_variables(message)) + for message_id, message in self._messages.items() + } + + def introspect_message( + self: BundleStateProtocol, message_id: str + ) -> MessageIntrospection: + """Return structured introspection for ``message_id``.""" + with self._rwlock.read(): + if message_id not in self._messages: + msg = f"Message '{message_id}' not found" + raise KeyError(msg) + return introspect_message(self._messages[message_id]) + + def introspect_term( + self: BundleStateProtocol, term_id: str + ) -> MessageIntrospection: + """Return structured introspection for ``term_id``.""" + with self._rwlock.read(): + if term_id not in self._terms: + msg = f"Term '{term_id}' not found" + raise KeyError(msg) + return introspect_message(self._terms[term_id]) + + def get_message( + self: BundleStateProtocol, message_id: str + ) -> Message | None: + """Return the raw message AST node when present.""" + with self._rwlock.read(): + return self._messages.get(message_id) + + def get_term(self: BundleStateProtocol, term_id: str) -> Term | None: + """Return the raw term AST node when present.""" + with self._rwlock.read(): + return self._terms.get(term_id) diff --git a/src/ftllexengine/runtime/bundle_registration.py b/src/ftllexengine/runtime/bundle_registration.py new file mode 100644 index 00000000..315f9782 --- /dev/null +++ b/src/ftllexengine/runtime/bundle_registration.py @@ -0,0 +1,150 @@ +"""Registration helpers for FluentBundle resource ingestion.""" + +from __future__ import annotations + +import logging +import time +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Literal, assert_never + +from ftllexengine.core.reference_graph import entry_dependency_set +from ftllexengine.integrity import IntegrityContext, SyntaxIntegrityError +from ftllexengine.introspection import extract_references +from ftllexengine.syntax import Comment, Junk, Message, Resource, Term + +if TYPE_CHECKING: + from ftllexengine.runtime.bundle_protocols import BundleStateProtocol + +logger = logging.getLogger("ftllexengine.runtime.bundle") + +_LOG_TRUNCATE_WARNING: int = 100 + + +@dataclass(slots=True) +class _PendingRegistration: + """Collected resource entries prior to mutating bundle state.""" + + messages: dict[str, Message] = field(default_factory=dict) + terms: dict[str, Term] = field(default_factory=dict) + msg_deps: dict[str, frozenset[str]] = field(default_factory=dict) + term_deps: dict[str, frozenset[str]] = field(default_factory=dict) + junk: list[Junk] = field(default_factory=list) + overwrite_warnings: list[tuple[Literal["message", "term"], str]] = field(default_factory=list) + + +class _BundleRegistrationMixin: + """Resource registration behavior for FluentBundle.""" + + def _collect_pending_entries( + self: BundleStateProtocol, resource: Resource + ) -> _PendingRegistration: + """Collect parsed entries without mutating bundle state.""" + pending = _PendingRegistration() + + for entry in resource.entries: + match entry: + case Message(): + msg_id = entry.id.name + if msg_id in self._messages or msg_id in pending.messages: + pending.overwrite_warnings.append(("message", msg_id)) + pending.messages[msg_id] = entry + pending.msg_deps[msg_id] = entry_dependency_set(*extract_references(entry)) + case Term(): + term_id = entry.id.name + if term_id in self._terms or term_id in pending.terms: + pending.overwrite_warnings.append(("term", term_id)) + pending.terms[term_id] = entry + pending.term_deps[term_id] = entry_dependency_set(*extract_references(entry)) + case Junk(): + pending.junk.append(entry) + case Comment(): + pass + case _ as unreachable: # pragma: no cover + assert_never(unreachable) + + return pending + + def _register_resource( + self: BundleStateProtocol, resource: Resource, source_path: str | None + ) -> tuple[Junk, ...]: + """Register parsed resource entries via a two-phase commit.""" + pending = self._collect_pending_entries(resource) + junk_tuple = tuple(pending.junk) + + if self._strict and junk_tuple: + source_desc = source_path or "" + error_summary = "; ".join(repr(junk.content[:50]) for junk in junk_tuple[:3]) + if len(junk_tuple) > 3: + error_summary += f" (and {len(junk_tuple) - 3} more)" + + context = IntegrityContext( + component="bundle", + operation="add_resource", + key=source_desc, + expected="", + actual=f"<{len(junk_tuple)} syntax error(s)>", + timestamp=time.monotonic(), + wall_time_unix=time.time(), + ) + msg = ( + f"Strict mode: {len(junk_tuple)} syntax error(s) in " + f"{source_desc}: {error_summary}" + ) + raise SyntaxIntegrityError( + msg, + context=context, + junk_entries=junk_tuple, + source_path=source_path, + ) + + for entry_type, entry_id in pending.overwrite_warnings: + if entry_type == "message": + logger.warning( + "Overwriting existing message '%s' with new definition", + entry_id, + ) + else: + logger.warning( + "Overwriting existing term '-%s' with new definition", + entry_id, + ) + + self._messages.update(pending.messages) + self._terms.update(pending.terms) + self._msg_deps.update(pending.msg_deps) + self._term_deps.update(pending.term_deps) + + for msg_id in pending.messages: + logger.debug("Registered message: %s", msg_id) + for term_id in pending.terms: + logger.debug("Registered term: %s", term_id) + + source_desc = source_path or "" + for junk in pending.junk: + logger.warning( + "Syntax error in %s: %s", + source_desc, + repr(junk.content[:_LOG_TRUNCATE_WARNING]), + ) + + if source_path: + logger.info( + "Added resource %s: %d messages, %d terms, %d junk entries", + source_path, + len(self._messages), + len(self._terms), + len(pending.junk), + ) + else: + logger.info( + "Added resource: %d messages, %d terms, %d junk entries", + len(self._messages), + len(self._terms), + len(pending.junk), + ) + + if self._cache is not None: + self._cache.clear() + logger.debug("Cache cleared after add_resource") + + return junk_tuple diff --git a/src/ftllexengine/runtime/cache.py b/src/ftllexengine/runtime/cache.py index 9f21ca51..3d018be0 100644 --- a/src/ftllexengine/runtime/cache.py +++ b/src/ftllexengine/runtime/cache.py @@ -32,24 +32,39 @@ import hashlib import hmac -import struct import time from collections import OrderedDict, deque -from collections.abc import Mapping, Sequence -from dataclasses import dataclass, field -from datetime import date, datetime -from decimal import Decimal from threading import Lock -from typing import TypedDict, final +from typing import TYPE_CHECKING, final from ftllexengine.constants import DEFAULT_CACHE_SIZE, DEFAULT_MAX_ENTRY_WEIGHT, MAX_DEPTH -from ftllexengine.core.value_types import FluentNumber, FluentValue -from ftllexengine.diagnostics import FrozenFluentError from ftllexengine.integrity import ( CacheCorruptionError, IntegrityContext, WriteConflictError, ) +from ftllexengine.runtime.cache_keys import ( + HASHABLE_NODE_BUDGET, + compute_key_hash, + make_hashable, + make_key, +) +from ftllexengine.runtime.cache_types import ( + _DEFAULT_MAX_ERRORS_PER_ENTRY, + CacheAuditLogEntry, + CacheStats, + HashableValue, + IntegrityCacheEntry, + WriteLogEntry, + _CacheKey, + _estimate_error_weight, +) + +if TYPE_CHECKING: + from collections.abc import Mapping + + from ftllexengine.core.value_types import FluentValue + from ftllexengine.diagnostics import FrozenFluentError __all__ = [ "CacheAuditLogEntry", @@ -60,408 +75,6 @@ "WriteLogEntry", ] -class CacheStats(TypedDict): - """Typed statistics snapshot returned by IntegrityCache.get_stats(). - - All fields are point-in-time readings taken under the cache lock. - Use get_stats() (not individual properties) for a consistent snapshot. - - Attributes: - size: Current number of cached entries. - maxsize: Maximum cache capacity. - max_entry_weight: Maximum memory weight for a single cached result. - max_errors_per_entry: Maximum errors stored per cache entry. - hits: Total cache hits since creation (not reset on clear()). - misses: True cache misses since creation: key was hashable, looked up, - but not found (or found corrupted in non-strict mode). Unhashable - bypasses are excluded; those increment unhashable_skips only. Not - reset on clear(). - hit_rate: Hit rate as a percentage (0.0-100.0), rounded to 2 decimal - places. Computed over hashable-key interactions only: - hits / (hits + misses). Unhashable bypasses do not affect this - metric, so the rate reflects true cache efficiency. - unhashable_skips: Puts skipped because the args could not be hashed. - oversize_skips: Puts skipped because the formatted string alone exceeded - max_entry_weight (before errors are considered). - error_bloat_skips: Puts skipped because the number of errors exceeded - max_errors_per_entry. - combined_weight_skips: Puts skipped because len(formatted) + total error - weight exceeded max_entry_weight (formatted alone was within limit but - combined content was not). Distinct from oversize_skips and error_bloat_skips - to enable accurate diagnosis: high combined_weight_skips points to the - combination of message length and error payload, not one alone. - corruption_detected: Number of BLAKE2b checksum mismatches detected. - idempotent_writes: Concurrent puts of identical content (benign races). - write_once_conflicts: True write-once violations: different content attempted - for an existing key under write_once=True. In strict mode these raise - WriteConflictError; in non-strict mode they are silently rejected. This - counter increments for both modes, enabling detection of data races without - requiring the audit log. - sequence: Monotonically increasing total-put counter (audit trail). - write_once: Whether write-once mode is enabled. - strict: Whether strict (fail-fast) mode is enabled. - audit_enabled: Whether the audit log is active. - audit_entries: Current number of entries in the audit log. - """ - - size: int - maxsize: int - max_entry_weight: int - max_errors_per_entry: int - hits: int - misses: int - hit_rate: float - unhashable_skips: int - oversize_skips: int - error_bloat_skips: int - corruption_detected: int - idempotent_writes: int - write_once_conflicts: int - combined_weight_skips: int - sequence: int - write_once: bool - strict: bool - audit_enabled: bool - audit_entries: int - - -# Base overhead per FrozenFluentError object (dataclass, slots, references). -# Dynamic weight calculation adds actual string lengths on top of this. -_ERROR_BASE_OVERHEAD: int = 100 - -# Maximum number of errors allowed per cache entry. -# Prevents memory exhaustion from pathological cases where resolution produces -# many errors (e.g., cyclic references, deeply nested validation failures). -_DEFAULT_MAX_ERRORS_PER_ENTRY: int = 50 - - -def _estimate_error_weight(error: FrozenFluentError) -> int: - """Estimate memory weight of a FrozenFluentError. - - Computes actual weight based on error content rather than using a static - estimate. This provides accurate memory budget enforcement for financial - applications where complex errors with detailed diagnostics may exceed - simple estimates. - - Args: - error: FrozenFluentError to estimate - - Returns: - Estimated byte weight of the error - """ - weight = _ERROR_BASE_OVERHEAD + len(error.message) - - if error.diagnostic is not None: - diag = error.diagnostic - weight += len(diag.message) - # Optional string fields - for attr in ( - diag.hint, - diag.help_url, - diag.function_name, - diag.argument_name, - diag.expected_type, - diag.received_type, - diag.ftl_location, - ): - if attr is not None: - weight += len(attr) - # Resolution path - if diag.resolution_path is not None: - for path_element in diag.resolution_path: - weight += len(path_element) - - if error.context is not None: - ctx = error.context - weight += len(ctx.input_value) - weight += len(ctx.locale_code) - weight += len(ctx.parse_type) - weight += len(ctx.fallback_value) - - return weight - -# Type alias for hashable values produced by _make_hashable(). -# Recursive definition: primitives plus tuple/frozenset of self. -# -# Type-Tagging: _make_hashable never returns primitive types (int, bool, Decimal, -# datetime, date, FluentNumber) directly. Every non-string, non-None value is -# converted to a type-tagged tuple: e.g., 1 -> ("__int__", 1), True -> ("__bool__", -# True), Decimal("1.5") -> ("__decimal__", "1.5"). These primitives appear in the -# union because the tagged tuples contain them as second/subsequent elements, and -# HashableValue is recursive — tuple["HashableValue", ...] must accept int, bool, etc. -# as inner elements. str and None are the only types returned directly (as-is). -# -# Collision Prevention Rationale: -# Python's hash equality means hash(1) == hash(True), causing cache collisions -# when these values produce different formatted outputs. -# To prevent this, _make_hashable() returns type-tagged tuples for bool/int: -# - True -> ("__bool__", True) -# - 1 -> ("__int__", 1) -# - Decimal("1") -> ("__decimal__", "1") -# These are distinct cache keys despite Python's hash equality. -type HashableValue = ( - str - | int - | bool - | Decimal - | datetime - | date - | FluentNumber - | None - | tuple["HashableValue", ...] - | frozenset["HashableValue"] -) - -# Internal type alias for cache keys (prefixed with _ per naming convention) -# 5-tuple: (message_id, args_tuple, attribute, locale_code, use_isolating) -type _CacheKey = tuple[str, tuple[tuple[str, HashableValue], ...], str | None, str, bool] - -# Internal type alias for cache entry values returned by IntegrityCacheEntry.as_result() -type _CacheValue = tuple[str, tuple[FrozenFluentError, ...]] - - -@dataclass(frozen=True, slots=True) -class IntegrityCacheEntry: - """Immutable cache entry with integrity metadata. - - Each entry contains the formatted result, any errors, and two BLAKE2b-128 - hashes: a content-only hash and a full checksum covering content + metadata. - Both enable detection of memory corruption, hardware faults, or tampering. - - Attributes: - formatted: Formatted message string - errors: Tuple of FrozenFluentError instances (immutable) - checksum: BLAKE2b-128 hash of (formatted, errors, created_at, sequence, key_hash) - created_at: Monotonic timestamp when entry was created (time.monotonic()) - sequence: Monotonically increasing sequence number for audit trail - key_hash: BLAKE2b-8 hash of the cache key computed at put() time. Included - in the checksum to make the entry tamper-evident: the key_hash field cannot - be altered without invalidating the checksum. IntegrityCache.get() verifies - entry.key_hash matches the lookup key's hash (key confusion detection). - content_hash: BLAKE2b-128 hash of (formatted, errors) only. Computed once - at construction via __post_init__; not part of the constructor signature. - Used for idempotent write detection without recomputation. Intentionally - key-agnostic: same content under any key has the same content_hash, which - is correct for the thundering-herd idempotency check in write_once mode. - """ - - formatted: str - errors: tuple[FrozenFluentError, ...] - checksum: bytes - created_at: float - sequence: int - key_hash: bytes - # Computed once from (formatted, errors) at construction; not an __init__ parameter. - # Stored to avoid BLAKE2b recomputation on every put() idempotency check. - # Uses object.__setattr__ because frozen=True prevents normal assignment in __post_init__. - content_hash: bytes = field(init=False, repr=False, compare=False, hash=False) - - def __post_init__(self) -> None: - """Compute and store content_hash after field initialization.""" - object.__setattr__( - self, "content_hash", self._compute_content_hash(self.formatted, self.errors) - ) - - @classmethod - def create( - cls, - formatted: str, - errors: tuple[FrozenFluentError, ...], - sequence: int, - key_hash: bytes, - ) -> IntegrityCacheEntry: - """Create entry with computed checksum. - - Factory method that computes the BLAKE2b-128 checksum from the content - and creates an immutable entry with the current monotonic timestamp. - - Args: - formatted: Formatted message string - errors: Tuple of FrozenFluentError instances - sequence: Sequence number for audit trail - key_hash: BLAKE2b-8 hash of the cache key (from IntegrityCache._compute_key_hash). - Binds the entry to its storage position for key confusion detection. - - Returns: - New IntegrityCacheEntry with computed checksum and content_hash - """ - # Capture timestamp BEFORE computing checksum to ensure consistency - created_at = time.monotonic() - checksum = cls._compute_checksum(formatted, errors, created_at, sequence, key_hash) - return cls( - formatted=formatted, - errors=errors, - checksum=checksum, - created_at=created_at, - sequence=sequence, - key_hash=key_hash, - ) - - @staticmethod - def _feed_errors(h: hashlib.blake2b, errors: tuple[FrozenFluentError, ...]) -> None: - """Feed error sequence into hasher via content_hash. - - Shared by both _compute_checksum and _compute_content_hash to eliminate - duplicated hashing logic. FrozenFluentError is @final and always carries - a content_hash (bytes), so direct attribute access is safe and correct. - The b"\\x01" type marker provides structural disambiguation between the - count field and each hash entry. - - Args: - h: Active BLAKE2b hasher to update in-place - errors: Tuple of errors to include in hash - """ - h.update(len(errors).to_bytes(4, "big")) - for error in errors: - # FrozenFluentError is @final; content_hash is always a bytes field. - # Accessing it directly enforces the type contract and eliminates dead code. - h.update(b"\x01") # Type marker: content hash follows - h.update(error.content_hash) - - @staticmethod - def _compute_checksum( - formatted: str, - errors: tuple[FrozenFluentError, ...], - created_at: float, - sequence: int, - key_hash: bytes, - ) -> bytes: - """Compute BLAKE2b-128 hash of cache entry (content + metadata + key binding). - - Uses BLAKE2b with 128-bit (16 byte) digest for fast cryptographic - hashing. This provides collision resistance sufficient for integrity - verification while minimizing memory overhead. - - Hash Composition: - All variable-length fields are length-prefixed to prevent collision - between semantically different values. The checksum covers ALL entry - fields for complete audit trail integrity: - 1. formatted: Message output (length-prefixed UTF-8) - 2. errors: Count + each error as (b"\\x01" + content_hash) using - FrozenFluentError.content_hash (BLAKE2b-128, always present) - 3. created_at: Monotonic timestamp (8-byte IEEE 754 double) - 4. sequence: Entry sequence number (8-byte unsigned big-endian) - 5. key_hash: Cache key binding (8 bytes, BLAKE2b-8 of storage key). - Including key_hash makes it tamper-evident: moving this entry to a - different cache slot and altering key_hash would break the checksum. - - Args: - formatted: Formatted message string - errors: Tuple of errors to include in hash - created_at: Monotonic timestamp when entry was created - sequence: Sequence number for audit trail - key_hash: BLAKE2b-8 hash of cache key (8 bytes, fixed length) - - Returns: - 16-byte BLAKE2b digest - """ - h = hashlib.blake2b(digest_size=16) - # Length-prefix formatted string for collision resistance - encoded = formatted.encode("utf-8", errors="surrogatepass") - h.update(len(encoded).to_bytes(4, "big")) - h.update(encoded) - IntegrityCacheEntry._feed_errors(h, errors) - # Include metadata fields for complete audit trail integrity - h.update(struct.pack(">d", created_at)) # 8-byte big-endian IEEE 754 double - h.update(sequence.to_bytes(8, "big")) # 8-byte unsigned int; sequence is always >= 0 - h.update(key_hash) # 8 bytes, fixed length; no length prefix needed - return h.digest() - - def verify(self) -> bool: - """Verify entry integrity recursively. - - Recomputes both the content hash and the full checksum from current - content, then compares against stored values using constant-time - comparison (defense against timing attacks). Also recursively verifies - each contained error's integrity for defense-in-depth. - - Returns: - True if content_hash matches AND checksum matches AND all errors verify - """ - # Verify stored content_hash matches recomputed (catches field-level corruption) - expected_content = self._compute_content_hash(self.formatted, self.errors) - if not hmac.compare_digest(self.content_hash, expected_content): - return False - # Verify full checksum (includes metadata and key binding) - expected = self._compute_checksum( - self.formatted, self.errors, self.created_at, self.sequence, self.key_hash - ) - if not hmac.compare_digest(self.checksum, expected): - return False - # Recursively verify each error's integrity (defense-in-depth). - # FrozenFluentError is @final, so verify_integrity() is always present. - # Direct call eliminates the duck-typing overhead and clarifies intent. - return all(error.verify_integrity() for error in self.errors) - - def as_result(self) -> _CacheValue: - """Extract formatted result and errors as a tuple. - - Returns: - (formatted, errors) pair for resolver consumption. - """ - return (self.formatted, self.errors) - - @staticmethod - def _compute_content_hash( - formatted: str, - errors: tuple[FrozenFluentError, ...], - ) -> bytes: - """Compute BLAKE2b-128 hash of content only (excludes metadata). - - Used for idempotent write detection: two entries with identical content - should have identical content hashes regardless of created_at/sequence. - - Hash Composition: - 1. formatted: Message output (length-prefixed UTF-8) - 2. errors: Count + each error as (b"\\x01" + content_hash) using - FrozenFluentError.content_hash (BLAKE2b-128, always present) - - Args: - formatted: Formatted message string - errors: Tuple of errors to include in hash - - Returns: - 16-byte BLAKE2b digest of content only - """ - h = hashlib.blake2b(digest_size=16) - # Length-prefix formatted string for collision resistance - encoded = formatted.encode("utf-8", errors="surrogatepass") - h.update(len(encoded).to_bytes(4, "big")) - h.update(encoded) - IntegrityCacheEntry._feed_errors(h, errors) - return h.digest() - - -@dataclass(frozen=True, slots=True) -class WriteLogEntry: - """Immutable audit log entry for cache operations. - - Records cache operations for post-mortem analysis and debugging. - Used when audit logging is enabled on IntegrityCache. - - Attributes: - operation: Operation type (GET, PUT, HIT, MISS, EVICT, CORRUPTION) - key_hash: Hash of cache key (privacy-preserving) - timestamp: Monotonic timestamp of operation (time.monotonic()). - Use for ordering within a single process. - sequence: Cache entry sequence number (for PUT operations) - checksum_hex: Hex representation of entry checksum (for tracing) - wall_time_unix: Unix wall-clock timestamp of operation (time.time()). - Use for cross-system incident correlation and persisting audit - trails as standalone evidence outside the originating process. - """ - - operation: str - key_hash: str - timestamp: float - sequence: int - checksum_hex: str - wall_time_unix: float - - -# Public alias for cache audit-log entries returned by runtime/localization facades. -CacheAuditLogEntry = WriteLogEntry - @final class IntegrityCache: @@ -492,12 +105,20 @@ class IntegrityCache: - Audit log provides complete operation history Example: - >>> cache = IntegrityCache(maxsize=1000, strict=True) - >>> cache.put("msg", None, None, "en_US", use_isolating=False, formatted="Hello", errors=()) - >>> entry = cache.get("msg", None, None, "en_US", use_isolating=False) - >>> assert entry is not None - >>> assert entry.verify() # Integrity check - >>> result, errors = entry.as_result() + >>> cache = IntegrityCache(maxsize=1000, strict=True) # doctest: +SKIP + >>> cache.put( # doctest: +SKIP + ... "msg", + ... None, + ... None, + ... "en_US", + ... use_isolating=False, + ... formatted="Hello", + ... errors=(), + ... ) + >>> entry = cache.get("msg", None, None, "en_US", use_isolating=False) # doctest: +SKIP + >>> assert entry is not None # doctest: +SKIP + >>> assert entry.verify() # Integrity check # doctest: +SKIP + >>> result, errors = entry.as_result() # doctest: +SKIP """ __slots__ = ( @@ -912,208 +533,18 @@ def _audit( # deque with maxlen provides automatic O(1) eviction of oldest entries self._audit_log.append(log_entry) - # Maximum nodes traversed during _make_hashable to prevent exponential - # expansion of DAG structures with shared references. A 25-level binary - # DAG has only 25 nodes but expands to 2^25 during tree flattening. - # 10,000 nodes is generous for legitimate use while blocking abuse. - _MAX_HASHABLE_NODES: int = 10_000 + # Bound recursive cache-key normalization to prevent DAG expansion abuse. + _MAX_HASHABLE_NODES: int = HASHABLE_NODE_BUDGET @staticmethod - def _make_hashable( - value: object, depth: int = MAX_DEPTH - ) -> HashableValue: - """Convert potentially unhashable value to hashable equivalent. - - Converts: - - list -> ("__list__", tuple) - type-tagged for collision prevention - - tuple -> ("__tuple__", tuple) - type-tagged for collision prevention - - dict -> tuple of sorted key-value tuples (recursively) - - set -> frozenset (recursively) - - Decimal -> ("__decimal__", str) - str preserves scale for CLDR rules - - datetime -> ("__datetime__", isoformat, tzinfo_str) - includes timezone - - date -> ("__date__", isoformat) - no timezone - - FluentNumber -> type-tagged with underlying type info - - Mapping ABC -> tuple of sorted key-value tuples (for ChainMap, etc.) - - Sequence ABC -> ("__seq__", tuple) - for UserList, etc. - - Known primitive types -> type-tagged tuples - - Type-Tagging Rationale: - Python's hash equality creates collision risk: - - hash(1) == hash(True) - - Decimal("1.0") == Decimal("1") but produce different plural forms - - datetime objects at same UTC instant with different tzinfo are equal - but format to different local time strings - - list vs tuple: str([1,2]) != str((1,2)) but would hash same - Type-tagging creates distinct cache keys for semantically different values. - - Depth Protection: - Uses explicit depth tracking consistent with codebase pattern - (parser, resolver, serializer all use MAX_DEPTH=100). Raises - TypeError when depth is exhausted, which is caught by _make_key - and results in graceful cache bypass. - - Node Budget Protection: - An integer counter in the enclosing scope, mutated by ``_go`` via - ``nonlocal``, tracks total nodes visited across all recursive calls. - This prevents exponential expansion of DAG structures where shared - references are traversed independently (e.g., l=[l,l] repeated 25 - times creates 2^25 tree traversal despite only 25 depth levels). - The counter is fully encapsulated inside the method body and is not - part of the public signature. - - Args: - value: Value to convert (typically FluentValue or nested collection) - depth: Remaining recursion depth (default: MAX_DEPTH) - - Returns: - Hashable equivalent of the value - - Raises: - TypeError: If depth limit exceeded, node budget exceeded, or unknown type - """ - # Node budget counter shared across all recursive calls via closure. - # nonlocal allows _go to mutate _node_count in the enclosing _make_hashable scope. - _node_count: int = 0 - - def _go(v: object, d: int) -> HashableValue: - nonlocal _node_count - _node_count += 1 - if _node_count > IntegrityCache._MAX_HASHABLE_NODES: - msg = ( - "Node budget exceeded in cache key conversion " - "(possible DAG expansion attack)" - ) - raise TypeError(msg) - if d <= 0: - msg = "Maximum nesting depth exceeded in cache key conversion" - raise TypeError(msg) - - def _recurse(x: object) -> HashableValue: - return _go(x, d - 1) - - match v: - # str and None: return as-is. Must check str before Sequence (str is Sequence). - case str() | None: - return v - # Type-tag list and tuple distinctly: str([1,2])="[1, 2]" vs str((1,2))="(1, 2)" - case list(): - return ( - "__list__", - tuple(_recurse(i) for i in v), - ) - case tuple(): - return ( - "__tuple__", - tuple(_recurse(i) for i in v), - ) - case dict(): - # Type-tag dict to distinguish from Mapping ABC (e.g., ChainMap). - # str(dict({"a": 1})) = "{'a': 1}" vs str(ChainMap({"a": 1})) differs. - # Both must produce distinct cache keys since formatting differs. - return ( - "__dict__", - tuple( - sorted( - (k, _recurse(val)) for k, val in v.items() - ) - ), - ) - case set(): - # Convert mutable set to immutable frozenset for hashability. - # Tag distinguishes from frozenset since str(set) != str(frozenset). - return ( - "__set__", - frozenset(_recurse(i) for i in v), - ) - case frozenset(): - # Explicit frozenset case - already hashable but tag for type distinction. - # str(frozenset({1})) = "frozenset({1})" vs str({1}) = "{1}" - return ( - "__frozenset__", - frozenset(_recurse(i) for i in v), - ) - # Type-tagging for collision prevention: bool MUST be checked before int - # because bool is a subclass of int in Python. Without separate cases, - # True and 1 would hash-collide despite producing different formatted output. - case bool(): - return ("__bool__", v) - case int(): - return ("__int__", v) - # Decimal: use str() to preserve scale (Decimal("1.0") vs Decimal("1")) - # CLDR plural rules use visible fraction digits (v operand) which differs - case Decimal(): - # NaN normalization: Decimal("NaN").is_nan() for IEEE 754 compliance. - # Same rationale as float NaN - prevents cache pollution. - if v.is_nan(): - return ("__decimal__", "__NaN__") - return ("__decimal__", str(v)) - case datetime(): - # Include timezone info to distinguish same-instant different-offset datetimes. - # Two datetimes representing the same UTC instant but with different tzinfo - # compare equal, but they format to different local time strings. - tz_key = str(v.tzinfo) if v.tzinfo else "__naive__" - return ("__datetime__", v.isoformat(), tz_key) - case date(): - # date has no timezone, isoformat is sufficient for unique key - return ("__date__", v.isoformat()) - # FluentNumber: type-tag with underlying value type for financial precision - # Recursively normalize inner value to handle Decimal NaN correctly. - # Without this, FluentNumber(value=Decimal('NaN')...) creates unretrievable keys. - case FluentNumber(): - return ( - "__fluentnumber__", - type(v.value).__name__, - _recurse(v.value), - v.formatted, - v.precision, - ) - case _: - # Handle Mapping and Sequence ABCs for types like ChainMap, UserList. - # This fallback catches any Mapping/Sequence not matched above. - # Must be after specific type checks (dict, list, tuple, str). - if isinstance(v, Mapping): - # Type-tag Mapping ABC to distinguish from dict. - return ( - "__mapping__", - tuple( - sorted( - (k, _recurse(val)) - for k, val in v.items() - ) - ), - ) - if isinstance(v, Sequence): - # Generic Sequence (UserList, etc.) - tag distinctly from list/tuple - return ( - "__seq__", - tuple(_recurse(i) for i in v), - ) - msg = f"Unknown type in cache key: {type(v).__name__}" - raise TypeError(msg) - - return _go(value, depth) + def _make_hashable(value: object, depth: int = MAX_DEPTH) -> HashableValue: + """Convert potentially unhashable cache arguments into a stable hashable form.""" + return make_hashable(value, depth=depth) @staticmethod def _compute_key_hash(key: _CacheKey) -> bytes: - """Compute BLAKE2b-8 hash of a cache key for entry binding. - - Returns an 8-byte digest used to bind an IntegrityCacheEntry to its - storage position. Called by put() to compute the key_hash stored in the - entry, and by get() to verify the stored key_hash matches the lookup key. - - 8-byte (64-bit) digest provides sufficient collision resistance for - integrity binding while keeping per-entry memory overhead minimal. - - Args: - key: Cache key tuple (message_id, args_tuple, attribute, locale_code, use_isolating) - - Returns: - 8-byte BLAKE2b digest - """ - return hashlib.blake2b( - str(key).encode("utf-8", errors="surrogatepass"), - digest_size=8, - ).digest() + """Compute the 8-byte key binding used to detect cache slot confusion.""" + return compute_key_hash(key) @staticmethod def _make_key( @@ -1124,33 +555,14 @@ def _make_key( *, use_isolating: bool, ) -> _CacheKey | None: - """Create immutable cache key from arguments. - - Converts unhashable types (lists, dicts, sets) to hashable equivalents. - - Args: - message_id: Message identifier - args: Message arguments (may contain unhashable values) - attribute: Attribute name - locale_code: Locale code - use_isolating: Whether Unicode isolation marks are used - - Returns: - Immutable cache key tuple, or None if conversion fails - """ - if args is None: - args_tuple: tuple[tuple[str, HashableValue], ...] = () - else: - try: - items: list[tuple[str, HashableValue]] = [] - for k, v in args.items(): - items.append((k, IntegrityCache._make_hashable(v))) - args_tuple = tuple(sorted(items)) - hash(args_tuple) - except (TypeError, RecursionError): - return None - - return (message_id, args_tuple, attribute, locale_code, use_isolating) + """Create the immutable lookup key for a formatting request.""" + return make_key( + message_id, + args, + attribute, + locale_code, + use_isolating=use_isolating, + ) def __len__(self) -> int: """Get current cache size. Thread-safe.""" diff --git a/src/ftllexengine/runtime/cache_config.py b/src/ftllexengine/runtime/cache_config.py index 4e6061a8..79b0f381 100644 --- a/src/ftllexengine/runtime/cache_config.py +++ b/src/ftllexengine/runtime/cache_config.py @@ -45,24 +45,24 @@ class CacheConfig: Prevents memory exhaustion from pathological cases. Example: - >>> from ftllexengine import FluentBundle - >>> from ftllexengine.runtime.cache_config import CacheConfig - >>> config = CacheConfig(size=500, write_once=True) - >>> bundle = FluentBundle("en", cache=config) - >>> bundle.cache_enabled + >>> from ftllexengine import FluentBundle # doctest: +SKIP + >>> from ftllexengine.runtime.cache_config import CacheConfig # doctest: +SKIP + >>> config = CacheConfig(size=500, write_once=True) # doctest: +SKIP + >>> bundle = FluentBundle("en", cache=config) # doctest: +SKIP + >>> bundle.cache_enabled # doctest: +SKIP True - >>> assert bundle.cache_config is not None - >>> bundle.cache_config.size + >>> assert bundle.cache_config is not None # doctest: +SKIP + >>> bundle.cache_config.size # doctest: +SKIP 500 Example - Financial application: - >>> config = CacheConfig( + >>> config = CacheConfig( # doctest: +SKIP ... write_once=True, ... integrity_strict=True, ... enable_audit=True, ... max_audit_entries=50000, ... ) - >>> bundle = FluentBundle("en", cache=config, strict=True) + >>> bundle = FluentBundle("en", cache=config, strict=True) # doctest: +SKIP """ size: int = DEFAULT_CACHE_SIZE diff --git a/src/ftllexengine/runtime/cache_keys.py b/src/ftllexengine/runtime/cache_keys.py new file mode 100644 index 00000000..6fe29975 --- /dev/null +++ b/src/ftllexengine/runtime/cache_keys.py @@ -0,0 +1,177 @@ +"""Hashable-key conversion helpers for IntegrityCache.""" + +from __future__ import annotations + +import hashlib +from collections.abc import Mapping, Sequence +from datetime import date, datetime +from decimal import Decimal +from typing import TYPE_CHECKING, cast + +from ftllexengine.constants import MAX_DEPTH +from ftllexengine.core.value_types import FluentNumber, FluentValue + +if TYPE_CHECKING: + from collections.abc import Callable + + from ftllexengine.runtime.cache_types import HashableValue, _CacheKey + +__all__ = ["HASHABLE_NODE_BUDGET", "compute_key_hash", "make_hashable", "make_key"] + +HASHABLE_NODE_BUDGET: int = 10_000 + + +def _hashable_decimal(value: Decimal) -> HashableValue: + if value.is_nan(): + return ("__decimal__", "__NaN__") + return ("__decimal__", str(value)) + + +def _hashable_datetime(value: datetime) -> HashableValue: + tz_key = str(value.tzinfo) if value.tzinfo else "__naive__" + return ("__datetime__", value.isoformat(), tz_key) + + +def _hashable_mapping( + tag: str, + value: Mapping[object, object], + recurse: Callable[[object], HashableValue], +) -> HashableValue: + return cast( + "HashableValue", + (tag, tuple(sorted((key, recurse(item)) for key, item in value.items()))), + ) + + +def _hashable_sequence( + tag: str, + value: Sequence[object], + recurse: Callable[[object], HashableValue], +) -> HashableValue: + return (tag, tuple(recurse(item) for item in value)) + + +def _hashable_set( + tag: str, + value: set[object] | frozenset[object], + recurse: Callable[[object], HashableValue], +) -> HashableValue: + return (tag, frozenset(recurse(item) for item in value)) + + +def _hashable_scalar_value( + value: object, recurse: Callable[[object], HashableValue] +) -> HashableValue | None: + result: HashableValue | None = None + match value: + case str() | None: + result = value + case bool(): + result = ("__bool__", value) + case int(): + result = ("__int__", value) + case Decimal(): + result = _hashable_decimal(value) + case datetime(): + result = _hashable_datetime(value) + case date(): + result = ("__date__", value.isoformat()) + case FluentNumber(): + result = ( + "__fluentnumber__", + type(value.value).__name__, + recurse(value.value), + value.formatted, + value.precision, + ) + case _: + pass + return result + + +def _hashable_container_value( + value: object, + recurse: Callable[[object], HashableValue], +) -> HashableValue | None: + match value: + case list(): + return _hashable_sequence("__list__", value, recurse) + case tuple(): + return _hashable_sequence("__tuple__", value, recurse) + case dict(): + return _hashable_mapping("__dict__", value, recurse) + case set(): + return _hashable_set("__set__", value, recurse) + case frozenset(): + return _hashable_set("__frozenset__", value, recurse) + case _: + return None + + +def make_hashable(value: object, depth: int = MAX_DEPTH) -> HashableValue: + """Convert potentially unhashable values into a stable hashable form.""" + node_count = 0 + + def go(current: object, remaining_depth: int) -> HashableValue: + nonlocal node_count + node_count += 1 + if node_count > HASHABLE_NODE_BUDGET: + msg = "Node budget exceeded in cache key conversion (possible DAG expansion attack)" + raise TypeError(msg) + if remaining_depth <= 0: + msg = "Maximum nesting depth exceeded in cache key conversion" + raise TypeError(msg) + if current is None: + return None + + def recurse(item: object) -> HashableValue: + return go(item, remaining_depth - 1) + + known_value = _hashable_scalar_value(current, recurse) + if known_value is not None: + return known_value + + known_value = _hashable_container_value(current, recurse) + if known_value is not None: + return known_value + if isinstance(current, Mapping): + return _hashable_mapping("__mapping__", current, recurse) + if isinstance(current, Sequence): + return _hashable_sequence("__seq__", current, recurse) + + msg = f"Unknown type in cache key: {type(current).__name__}" + raise TypeError(msg) + + return go(value, depth) + + +def compute_key_hash(key: _CacheKey) -> bytes: + """Compute the 8-byte BLAKE2b key binding used by cache entries.""" + return hashlib.blake2b( + str(key).encode("utf-8", errors="surrogatepass"), + digest_size=8, + ).digest() + + +def make_key( + message_id: str, + args: Mapping[str, FluentValue] | None, + attribute: str | None, + locale_code: str, + *, + use_isolating: bool, +) -> _CacheKey | None: + """Create an immutable cache key tuple from formatting arguments.""" + if args is None: + args_tuple: tuple[tuple[str, HashableValue], ...] = () + else: + try: + items: list[tuple[str, HashableValue]] = [] + for key, value in args.items(): + items.append((key, make_hashable(value))) + args_tuple = tuple(sorted(items)) + hash(args_tuple) + except (TypeError, RecursionError): + return None + + return (message_id, args_tuple, attribute, locale_code, use_isolating) diff --git a/src/ftllexengine/runtime/cache_types.py b/src/ftllexengine/runtime/cache_types.py new file mode 100644 index 00000000..b62692a2 --- /dev/null +++ b/src/ftllexengine/runtime/cache_types.py @@ -0,0 +1,216 @@ +"""Shared cache types and immutable entry structures.""" + +from __future__ import annotations + +import hashlib +import hmac +import struct +import time +from dataclasses import dataclass, field +from datetime import date, datetime +from decimal import Decimal +from typing import TypedDict + +from ftllexengine.core.value_types import FluentNumber +from ftllexengine.diagnostics import FrozenFluentError + +__all__ = [ + "_DEFAULT_MAX_ERRORS_PER_ENTRY", + "CacheAuditLogEntry", + "CacheStats", + "HashableValue", + "IntegrityCacheEntry", + "WriteLogEntry", + "_CacheKey", + "_CacheValue", + "_estimate_error_weight", +] + + +class CacheStats(TypedDict): + """Typed statistics snapshot returned by IntegrityCache.get_stats().""" + + size: int + maxsize: int + max_entry_weight: int + max_errors_per_entry: int + hits: int + misses: int + hit_rate: float + unhashable_skips: int + oversize_skips: int + error_bloat_skips: int + corruption_detected: int + idempotent_writes: int + write_once_conflicts: int + combined_weight_skips: int + sequence: int + write_once: bool + strict: bool + audit_enabled: bool + audit_entries: int + + +_ERROR_BASE_OVERHEAD: int = 100 +_DEFAULT_MAX_ERRORS_PER_ENTRY: int = 50 + + +def _estimate_error_weight(error: FrozenFluentError) -> int: + """Estimate the memory weight of one FrozenFluentError.""" + weight = _ERROR_BASE_OVERHEAD + len(error.message) + + if error.diagnostic is not None: + diag = error.diagnostic + weight += len(diag.message) + for attr in ( + diag.hint, + diag.help_url, + diag.function_name, + diag.argument_name, + diag.expected_type, + diag.received_type, + diag.ftl_location, + ): + if attr is not None: + weight += len(attr) + if diag.resolution_path is not None: + for path_element in diag.resolution_path: + weight += len(path_element) + + if error.context is not None: + ctx = error.context + weight += len(ctx.input_value) + weight += len(ctx.locale_code) + weight += len(ctx.parse_type) + weight += len(ctx.fallback_value) + + return weight + + +type HashableValue = ( + str + | int + | bool + | Decimal + | datetime + | date + | FluentNumber + | None + | tuple["HashableValue", ...] + | frozenset["HashableValue"] +) + +type _CacheKey = tuple[str, tuple[tuple[str, HashableValue], ...], str | None, str, bool] +type _CacheValue = tuple[str, tuple[FrozenFluentError, ...]] + + +@dataclass(frozen=True, slots=True) +class IntegrityCacheEntry: + """Immutable cache entry with integrity metadata.""" + + formatted: str + errors: tuple[FrozenFluentError, ...] + checksum: bytes + created_at: float + sequence: int + key_hash: bytes + content_hash: bytes = field(init=False, repr=False, compare=False, hash=False) + + def __post_init__(self) -> None: + """Compute and store content_hash after field initialization.""" + object.__setattr__( + self, "content_hash", self._compute_content_hash(self.formatted, self.errors) + ) + + @classmethod + def create( + cls, + formatted: str, + errors: tuple[FrozenFluentError, ...], + sequence: int, + key_hash: bytes, + ) -> IntegrityCacheEntry: + """Create entry with computed checksum.""" + created_at = time.monotonic() + checksum = cls._compute_checksum(formatted, errors, created_at, sequence, key_hash) + return cls( + formatted=formatted, + errors=errors, + checksum=checksum, + created_at=created_at, + sequence=sequence, + key_hash=key_hash, + ) + + @staticmethod + def _feed_errors(h: hashlib.blake2b, errors: tuple[FrozenFluentError, ...]) -> None: + """Feed error sequence into an active hasher.""" + h.update(len(errors).to_bytes(4, "big")) + for error in errors: + h.update(b"\x01") + h.update(error.content_hash) + + @staticmethod + def _compute_checksum( + formatted: str, + errors: tuple[FrozenFluentError, ...], + created_at: float, + sequence: int, + key_hash: bytes, + ) -> bytes: + """Compute a BLAKE2b-128 checksum for content plus metadata.""" + h = hashlib.blake2b(digest_size=16) + encoded = formatted.encode("utf-8", errors="surrogatepass") + h.update(len(encoded).to_bytes(4, "big")) + h.update(encoded) + IntegrityCacheEntry._feed_errors(h, errors) + h.update(struct.pack(">d", created_at)) + h.update(sequence.to_bytes(8, "big")) + h.update(key_hash) + return h.digest() + + def verify(self) -> bool: + """Verify entry integrity recursively.""" + expected_content = self._compute_content_hash(self.formatted, self.errors) + if not hmac.compare_digest(self.content_hash, expected_content): + return False + + expected = self._compute_checksum( + self.formatted, self.errors, self.created_at, self.sequence, self.key_hash + ) + if not hmac.compare_digest(self.checksum, expected): + return False + + return all(error.verify_integrity() for error in self.errors) + + def as_result(self) -> _CacheValue: + """Extract formatted result and errors as a tuple.""" + return (self.formatted, self.errors) + + @staticmethod + def _compute_content_hash( + formatted: str, + errors: tuple[FrozenFluentError, ...], + ) -> bytes: + """Compute a BLAKE2b-128 hash of content only.""" + h = hashlib.blake2b(digest_size=16) + encoded = formatted.encode("utf-8", errors="surrogatepass") + h.update(len(encoded).to_bytes(4, "big")) + h.update(encoded) + IntegrityCacheEntry._feed_errors(h, errors) + return h.digest() + + +@dataclass(frozen=True, slots=True) +class WriteLogEntry: + """Immutable audit log entry for cache operations.""" + + operation: str + key_hash: str + timestamp: float + sequence: int + checksum_hex: str + wall_time_unix: float + + +CacheAuditLogEntry = WriteLogEntry diff --git a/src/ftllexengine/runtime/function_bridge.py b/src/ftllexengine/runtime/function_bridge.py index c5dccbb6..7da1cd7c 100644 --- a/src/ftllexengine/runtime/function_bridge.py +++ b/src/ftllexengine/runtime/function_bridge.py @@ -107,20 +107,20 @@ def my_func(value: T, locale_code: str, *, keyword_args...) -> R your signature matches the expected pattern. Example - Simple function (no locale): - >>> @fluent_function + >>> @fluent_function # doctest: +SKIP ... def my_upper(value: str) -> str: ... return value.upper() - >>> bundle.add_function("MYUPPER", my_upper) - >>> # FTL: { MY_UPPER($name) } + >>> bundle.add_function("MYUPPER", my_upper) # doctest: +SKIP + FTL: `{ MY_UPPER($name) }` Example - Locale-aware function: - >>> @fluent_function(inject_locale=True) + >>> @fluent_function(inject_locale=True) # doctest: +SKIP ... def my_format(value: int, locale_code: str) -> str: ... # Format number according to locale ... return format_for_locale(value, locale_code) - >>> bundle.add_function("MYFORMAT", my_format) - >>> # FTL: { MY_FORMAT($count) } - >>> # Bundle appends locale: my_format(count_value, "en_US") + >>> bundle.add_function("MYFORMAT", my_format) # doctest: +SKIP + FTL: `{ MY_FORMAT($count) }` + Bundle appends locale: `my_format(count_value, "en_US")` """ def decorator(fn: F) -> F: @@ -171,13 +171,13 @@ class FunctionRegistry: Uses __slots__ for memory efficiency (avoids per-instance __dict__). Example: - >>> registry = FunctionRegistry() - >>> registry.register(my_func, ftl_name="CUSTOM") - >>> "CUSTOM" in registry + >>> registry = FunctionRegistry() # doctest: +SKIP + >>> registry.register(my_func, ftl_name="CUSTOM") # doctest: +SKIP + >>> "CUSTOM" in registry # doctest: +SKIP True - >>> len(registry) + >>> len(registry) # doctest: +SKIP 1 - >>> for name in registry: + >>> for name in registry: # doctest: +SKIP ... print(name) CUSTOM """ @@ -210,12 +210,12 @@ def register( 2 positional parameters to receive (value, locale_code). Example: - >>> def number_format(value, *, minimum_fraction_digits=0): + >>> def number_format(value, *, minimum_fraction_digits=0): # doctest: +SKIP ... return str(value) - >>> registry = FunctionRegistry() - >>> registry.register(number_format, ftl_name="NUMBER") - >>> # FTL: { $x NUMBER(minimumFractionDigits: 2) } - >>> # Python: number_format(x, minimum_fraction_digits=2) + >>> registry = FunctionRegistry() # doctest: +SKIP + >>> registry.register(number_format, ftl_name="NUMBER") # doctest: +SKIP + FTL: `{ $x NUMBER(minimumFractionDigits: 2) }` + Python: `number_format(x, minimum_fraction_digits=2)` """ if self._frozen: msg = ( @@ -409,9 +409,9 @@ def list_functions(self) -> list[str]: List of FTL function names (e.g., ["NUMBER", "DATETIME", "CURRENCY"]) Example: - >>> registry = FunctionRegistry() - >>> registry.register(lambda x: str(x), ftl_name="CUSTOM") - >>> registry.list_functions() + >>> registry = FunctionRegistry() # doctest: +SKIP + >>> registry.register(lambda x: str(x), ftl_name="CUSTOM") # doctest: +SKIP + >>> registry.list_functions() # doctest: +SKIP ['CUSTOM'] """ return list(self._functions.keys()) @@ -426,13 +426,13 @@ def get_function_info(self, ftl_name: str) -> FunctionSignature | None: FunctionSignature with metadata, or None if not found Example: - >>> registry = FunctionRegistry() - >>> def my_func(value, *, min_digits=0): return str(value) - >>> registry.register(my_func, ftl_name="MYFUNC") - >>> info = registry.get_function_info("MYFUNC") - >>> info.python_name + >>> registry = FunctionRegistry() # doctest: +SKIP + >>> def my_func(value, *, min_digits=0): return str(value) # doctest: +SKIP + >>> registry.register(my_func, ftl_name="MYFUNC") # doctest: +SKIP + >>> info = registry.get_function_info("MYFUNC") # doctest: +SKIP + >>> info.python_name # doctest: +SKIP 'my_func' - >>> info.ftl_name + >>> info.ftl_name # doctest: +SKIP 'MYFUNC' """ return self._functions.get(ftl_name) @@ -450,11 +450,11 @@ def get_callable(self, ftl_name: str) -> Callable[..., FluentValue] | None: The registered callable, or None if function not found Example: - >>> registry = FunctionRegistry() - >>> def my_func(value): return str(value) - >>> registry.register(my_func, ftl_name="MYFUNC") - >>> callable_func = registry.get_callable("MYFUNC") - >>> callable_func is my_func + >>> registry = FunctionRegistry() # doctest: +SKIP + >>> def my_func(value): return str(value) # doctest: +SKIP + >>> registry.register(my_func, ftl_name="MYFUNC") # doctest: +SKIP + >>> callable_func = registry.get_callable("MYFUNC") # doctest: +SKIP + >>> callable_func is my_func # doctest: +SKIP True """ sig = self._functions.get(ftl_name) @@ -467,10 +467,10 @@ def __iter__(self) -> Iterator[str]: Iterator over FTL function names Example: - >>> registry = FunctionRegistry() - >>> registry.register(lambda x: str(x), ftl_name="FUNC1") - >>> registry.register(lambda x: str(x), ftl_name="FUNC2") - >>> for name in registry: + >>> registry = FunctionRegistry() # doctest: +SKIP + >>> registry.register(lambda x: str(x), ftl_name="FUNC1") # doctest: +SKIP + >>> registry.register(lambda x: str(x), ftl_name="FUNC2") # doctest: +SKIP + >>> for name in registry: # doctest: +SKIP ... print(name) FUNC1 FUNC2 @@ -484,11 +484,11 @@ def __len__(self) -> int: Number of registered functions Example: - >>> registry = FunctionRegistry() - >>> len(registry) + >>> registry = FunctionRegistry() # doctest: +SKIP + >>> len(registry) # doctest: +SKIP 0 - >>> registry.register(lambda x: str(x), ftl_name="FUNC") - >>> len(registry) + >>> registry.register(lambda x: str(x), ftl_name="FUNC") # doctest: +SKIP + >>> len(registry) # doctest: +SKIP 1 """ return len(self._functions) @@ -503,11 +503,11 @@ def __contains__(self, ftl_name: str) -> bool: True if function is registered Example: - >>> registry = FunctionRegistry() - >>> registry.register(lambda x: str(x), ftl_name="CUSTOM") - >>> "CUSTOM" in registry + >>> registry = FunctionRegistry() # doctest: +SKIP + >>> registry.register(lambda x: str(x), ftl_name="CUSTOM") # doctest: +SKIP + >>> "CUSTOM" in registry # doctest: +SKIP True - >>> "MISSING" in registry + >>> "MISSING" in registry # doctest: +SKIP False """ return ftl_name in self._functions @@ -519,8 +519,8 @@ def __repr__(self) -> str: String representation showing registered functions Example: - >>> registry = FunctionRegistry() - >>> repr(registry) + >>> registry = FunctionRegistry() # doctest: +SKIP + >>> repr(registry) # doctest: +SKIP 'FunctionRegistry(functions=0)' """ return f"FunctionRegistry(functions={len(self._functions)})" @@ -540,9 +540,9 @@ def copy(self) -> FunctionRegistry: either copy won't affect the other. Example: - >>> frozen_registry = get_shared_registry() # Frozen - >>> my_registry = frozen_registry.copy() # Unfrozen copy - >>> my_registry.register(my_custom_func) # Works! + >>> frozen_registry = get_shared_registry() # Frozen # doctest: +SKIP + >>> my_registry = frozen_registry.copy() # Unfrozen copy # doctest: +SKIP + >>> my_registry.register(my_custom_func) # Works! # doctest: +SKIP """ new_registry = FunctionRegistry() new_registry._functions = self._functions.copy() @@ -568,11 +568,11 @@ def should_inject_locale(self, ftl_name: str) -> bool: 3. Only inject if the callable has the marker set to True Example: - >>> registry = FunctionRegistry() - >>> @fluent_function(inject_locale=True) + >>> registry = FunctionRegistry() # doctest: +SKIP + >>> @fluent_function(inject_locale=True) # doctest: +SKIP ... def my_format(value, locale_code): return str(value) - >>> registry.register(my_format, ftl_name="MYFORMAT") - >>> registry.should_inject_locale("MYFORMAT") + >>> registry.register(my_format, ftl_name="MYFORMAT") # doctest: +SKIP + >>> registry.should_inject_locale("MYFORMAT") # doctest: +SKIP True """ if ftl_name not in self._functions: @@ -598,10 +598,10 @@ def get_expected_positional_args(self, ftl_name: str) -> int | None: or None if not a built-in function with known arity. Example: - >>> registry = create_default_registry() - >>> registry.get_expected_positional_args("NUMBER") + >>> registry = create_default_registry() # doctest: +SKIP + >>> registry.get_expected_positional_args("NUMBER") # doctest: +SKIP 1 - >>> registry.get_expected_positional_args("CUSTOM") + >>> registry.get_expected_positional_args("CUSTOM") # doctest: +SKIP None """ # Lazy import to avoid circular dependency at module load time @@ -622,9 +622,9 @@ def get_builtin_metadata(self, ftl_name: str) -> FunctionMetadata | None: FunctionMetadata for built-in functions, None for custom functions. Example: - >>> registry = create_default_registry() - >>> meta = registry.get_builtin_metadata("NUMBER") - >>> meta.requires_locale + >>> registry = create_default_registry() # doctest: +SKIP + >>> meta = registry.get_builtin_metadata("NUMBER") # doctest: +SKIP + >>> meta.requires_locale # doctest: +SKIP True """ # Lazy import to avoid circular dependency at module load time @@ -645,11 +645,11 @@ def _to_camel_case(snake_case: str) -> str: FTL parameter name (e.g., "minimumFractionDigits") Examples: - >>> FunctionRegistry._to_camel_case("minimum_fraction_digits") + >>> FunctionRegistry._to_camel_case("minimum_fraction_digits") # doctest: +SKIP 'minimumFractionDigits' - >>> FunctionRegistry._to_camel_case("use_grouping") + >>> FunctionRegistry._to_camel_case("use_grouping") # doctest: +SKIP 'useGrouping' - >>> FunctionRegistry._to_camel_case("value") + >>> FunctionRegistry._to_camel_case("value") # doctest: +SKIP 'value' """ # Split on underscores diff --git a/src/ftllexengine/runtime/function_metadata.py b/src/ftllexengine/runtime/function_metadata.py index 8ad0e7c1..7cfbcf38 100644 --- a/src/ftllexengine/runtime/function_metadata.py +++ b/src/ftllexengine/runtime/function_metadata.py @@ -52,7 +52,7 @@ class FunctionMetadata: category: Function category for documentation Example: - >>> NUMBER_META = FunctionMetadata( + >>> NUMBER_META = FunctionMetadata( # doctest: +SKIP ... python_name="number_format", ... ftl_name="NUMBER", ... requires_locale=True, @@ -108,9 +108,9 @@ def requires_locale_injection(func_name: str) -> bool: True if function requires locale injection, False otherwise Example: - >>> requires_locale_injection("NUMBER") + >>> requires_locale_injection("NUMBER") # doctest: +SKIP True - >>> requires_locale_injection("CUSTOM") + >>> requires_locale_injection("CUSTOM") # doctest: +SKIP False """ metadata = BUILTIN_FUNCTIONS.get(func_name) @@ -127,9 +127,9 @@ def is_builtin_function(func_name: str) -> bool: True if function is built-in, False otherwise Example: - >>> is_builtin_function("NUMBER") + >>> is_builtin_function("NUMBER") # doctest: +SKIP True - >>> is_builtin_function("CUSTOM") + >>> is_builtin_function("CUSTOM") # doctest: +SKIP False """ return func_name in BUILTIN_FUNCTIONS @@ -145,9 +145,9 @@ def get_python_name(ftl_name: str) -> str | None: Python function name (e.g., "number_format") or None if not found Example: - >>> get_python_name("NUMBER") + >>> get_python_name("NUMBER") # doctest: +SKIP 'number_format' - >>> get_python_name("CUSTOM") + >>> get_python_name("CUSTOM") # doctest: +SKIP None """ metadata = BUILTIN_FUNCTIONS.get(ftl_name) diff --git a/src/ftllexengine/runtime/functions.py b/src/ftllexengine/runtime/functions.py index 61dc5bfd..2ff143a6 100644 --- a/src/ftllexengine/runtime/functions.py +++ b/src/ftllexengine/runtime/functions.py @@ -87,16 +87,18 @@ def number_format( FluentNumber with formatted string and computed precision for plural matching Examples: - >>> from decimal import Decimal - >>> number_format(Decimal('1234.5'), "en-US") + >>> from decimal import Decimal # doctest: +SKIP + >>> number_format(Decimal('1234.5'), "en-US") # doctest: +SKIP FluentNumber(value=Decimal('1234.5'), formatted='1,234.5', precision=1) - >>> number_format(Decimal('1234.5'), "de-DE") + >>> number_format(Decimal('1234.5'), "de-DE") # doctest: +SKIP FluentNumber(value=Decimal('1234.5'), formatted='1.234,5', precision=1) - >>> number_format(Decimal('1234.5'), "lv-LV") + >>> number_format(Decimal('1234.5'), "lv-LV") # doctest: +SKIP FluentNumber(value=Decimal('1234.5'), formatted='1 234,5', precision=1) - >>> number_format(42, "en-US", minimum_fraction_digits=2) + >>> number_format(42, "en-US", minimum_fraction_digits=2) # doctest: +SKIP FluentNumber(value=42, formatted='42.00', precision=2) - >>> number_format(Decimal('-1234.56'), "en-US", pattern="#,##0.00;(#,##0.00)") + >>> number_format( # doctest: +SKIP + ... Decimal('-1234.56'), "en-US", pattern="#,##0.00;(#,##0.00)" + ... ) FluentNumber(value=Decimal('-1234.56'), formatted='(1,234.56)', precision=2) FTL Usage: @@ -202,18 +204,20 @@ def datetime_format( Formatted date/datetime string Examples: - >>> from datetime import date, datetime, UTC - >>> dt = datetime(2025, 10, 27, tzinfo=UTC) - >>> datetime_format(dt, "en-US", date_style="short") + >>> from datetime import date, datetime, UTC # doctest: +SKIP + >>> dt = datetime(2025, 10, 27, tzinfo=UTC) # doctest: +SKIP + >>> datetime_format(dt, "en-US", date_style="short") # doctest: +SKIP '10/27/25' - >>> datetime_format(dt, "de-DE", date_style="short") + >>> datetime_format(dt, "de-DE", date_style="short") # doctest: +SKIP '27.10.25' - >>> dt_with_time = datetime(2025, 10, 27, 14, 30, tzinfo=UTC) - >>> datetime_format(dt_with_time, "en-US", date_style="medium", time_style="short") + >>> dt_with_time = datetime(2025, 10, 27, 14, 30, tzinfo=UTC) # doctest: +SKIP + >>> datetime_format( # doctest: +SKIP + ... dt_with_time, "en-US", date_style="medium", time_style="short" + ... ) 'Oct 27, 2025, 2:30 PM' - >>> datetime_format(dt, "en-US", pattern="yyyy-MM-dd") + >>> datetime_format(dt, "en-US", pattern="yyyy-MM-dd") # doctest: +SKIP '2025-10-27' - >>> datetime_format(date(2025, 10, 27), "en-US", date_style="short") + >>> datetime_format(date(2025, 10, 27), "en-US", date_style="short") # doctest: +SKIP '10/27/25' FTL Usage: @@ -284,14 +288,14 @@ def currency_format( in plural/select expressions, matching NUMBER() behavior. Examples: - >>> from decimal import Decimal - >>> currency_format(Decimal('123.45'), "en-US", currency="EUR") + >>> from decimal import Decimal # doctest: +SKIP + >>> currency_format(Decimal('123.45'), "en-US", currency="EUR") # doctest: +SKIP FluentNumber(value=Decimal('123.45'), formatted='€123.45', precision=2) - >>> currency_format(Decimal('123.45'), "lv-LV", currency="EUR") + >>> currency_format(Decimal('123.45'), "lv-LV", currency="EUR") # doctest: +SKIP FluentNumber(value=Decimal('123.45'), formatted='123,45 €', precision=2) - >>> currency_format(12345, "ja-JP", currency="JPY") + >>> currency_format(12345, "ja-JP", currency="JPY") # doctest: +SKIP FluentNumber(value=12345, formatted='¥12,345', precision=0) - >>> currency_format(Decimal('123.456'), "ar-BH", currency="BHD") + >>> currency_format(Decimal('123.456'), "ar-BH", currency="BHD") # doctest: +SKIP FluentNumber(value=Decimal('123.456'), formatted='123.456 د.ب.', precision=3) FTL Usage: @@ -419,21 +423,21 @@ def create_default_registry() -> FunctionRegistry: FunctionRegistry with NUMBER, DATETIME, and CURRENCY functions registered. Example: - >>> registry = create_default_registry() - >>> "NUMBER" in registry + >>> registry = create_default_registry() # doctest: +SKIP + >>> "NUMBER" in registry # doctest: +SKIP True - >>> "DATETIME" in registry + >>> "DATETIME" in registry # doctest: +SKIP True - >>> "CURRENCY" in registry + >>> "CURRENCY" in registry # doctest: +SKIP True Use Case: FluentBundle uses this internally to create isolated function registries. Users who need custom registries can call this and then modify the result: - >>> registry = create_default_registry() - >>> registry.register(my_custom_func, ftl_name="CUSTOM") - >>> bundle = FluentBundle("en", functions=registry) + >>> registry = create_default_registry() # doctest: +SKIP + >>> registry.register(my_custom_func, ftl_name="CUSTOM") # doctest: +SKIP + >>> bundle = FluentBundle("en", functions=registry) # doctest: +SKIP See Also: get_shared_registry: Returns a shared cached registry for performance. @@ -499,18 +503,18 @@ def get_shared_registry() -> FunctionRegistry: TypeError: If you attempt to call register() on the returned registry. Example: - >>> # Efficient: Share registry across multiple bundles - >>> shared = get_shared_registry() - >>> bundle_en = FluentBundle("en", functions=shared) - >>> bundle_de = FluentBundle("de", functions=shared) - >>> bundle_fr = FluentBundle("fr", functions=shared) - >>> - >>> # Registry is frozen - attempting to modify raises TypeError - >>> shared.register(my_func) # Raises TypeError! - >>> - >>> # To add custom functions, use copy() to get unfrozen copy: - >>> my_registry = shared.copy() - >>> my_registry.register(my_custom_func, ftl_name="CUSTOM") + Efficient: share the registry across multiple bundles. + >>> shared = get_shared_registry() # doctest: +SKIP + >>> bundle_en = FluentBundle("en", functions=shared) # doctest: +SKIP + >>> bundle_de = FluentBundle("de", functions=shared) # doctest: +SKIP + >>> bundle_fr = FluentBundle("fr", functions=shared) # doctest: +SKIP + + The registry is frozen, so modification attempts raise `TypeError`. + >>> shared.register(my_func) # Raises TypeError! # doctest: +SKIP + + To add custom functions, use `copy()` to get an unfrozen copy: + >>> my_registry = shared.copy() # doctest: +SKIP + >>> my_registry.register(my_custom_func, ftl_name="CUSTOM") # doctest: +SKIP See Also: create_default_registry: Creates a new unfrozen registry for customization. diff --git a/src/ftllexengine/runtime/locale_context.py b/src/ftllexengine/runtime/locale_context.py index 613105e5..5a5295b7 100644 --- a/src/ftllexengine/runtime/locale_context.py +++ b/src/ftllexengine/runtime/locale_context.py @@ -35,32 +35,33 @@ import logging from collections import OrderedDict from dataclasses import dataclass, field -from datetime import date, datetime -from decimal import Decimal, InvalidOperation from threading import Lock from typing import TYPE_CHECKING, ClassVar, Literal from ftllexengine.constants import ( - FALLBACK_FUNCTION_ERROR, - MAX_FORMAT_DIGITS, MAX_LOCALE_CACHE_SIZE, MAX_LOCALE_CODE_LENGTH, ) from ftllexengine.core.babel_compat import ( - get_babel_dates, - get_babel_numbers, get_locale_class, get_unknown_locale_error_class, require_babel, ) from ftllexengine.core.locale_utils import require_locale_code -from ftllexengine.diagnostics import ErrorCategory, FrozenErrorContext, FrozenFluentError -from ftllexengine.diagnostics.templates import ErrorTemplate +from ftllexengine.runtime.locale_formatting import ( + format_currency_for_locale, + format_datetime_for_locale, + format_number_for_locale, + get_iso_code_pattern_for_locale, +) if TYPE_CHECKING: + from datetime import date, datetime + from decimal import Decimal + from babel import Locale - from ftllexengine.localization.types import LocaleCode + from ftllexengine.core.semantic_types import LocaleCode __all__ = ["LocaleContext"] @@ -89,20 +90,20 @@ class LocaleContext: - LocaleContext.cache_info(): Get detailed cache statistics Examples: - >>> from decimal import Decimal - >>> ctx = LocaleContext.create('en-US') - >>> ctx.format_number(Decimal('1234.5'), use_grouping=True) + >>> from decimal import Decimal # doctest: +SKIP + >>> ctx = LocaleContext.create('en-US') # doctest: +SKIP + >>> ctx.format_number(Decimal('1234.5'), use_grouping=True) # doctest: +SKIP '1,234.5' - >>> ctx = LocaleContext.create('lv-LV') - >>> ctx.format_number(Decimal('1234.5'), use_grouping=True) + >>> ctx = LocaleContext.create('lv-LV') # doctest: +SKIP + >>> ctx.format_number(Decimal('1234.5'), use_grouping=True) # doctest: +SKIP '1 234,5' - >>> # Unknown locales fall back to en_US formatting rules with a warning - >>> ctx = LocaleContext.create('xx-UNKNOWN') - >>> ctx.locale_code + Unknown locales fall back to `en_US` formatting rules with a warning: + >>> ctx = LocaleContext.create('xx-UNKNOWN') # doctest: +SKIP + >>> ctx.locale_code # doctest: +SKIP 'xx_unknown' - >>> ctx.is_fallback # Programmatic detection of fallback + >>> ctx.is_fallback # Programmatic detection of fallback # doctest: +SKIP True Thread Safety: @@ -145,11 +146,11 @@ def clear_cache(cls) -> None: Thread-safe via Lock. Example: - >>> LocaleContext.create('en-US') # Cached - >>> LocaleContext.cache_size() + >>> LocaleContext.create('en-US') # Cached # doctest: +SKIP + >>> LocaleContext.cache_size() # doctest: +SKIP 1 - >>> LocaleContext.clear_cache() - >>> LocaleContext.cache_size() + >>> LocaleContext.clear_cache() # doctest: +SKIP + >>> LocaleContext.cache_size() # doctest: +SKIP 0 """ with cls._cache_lock: @@ -163,10 +164,10 @@ def cache_size(cls) -> int: Number of cached instances Example: - >>> LocaleContext.clear_cache() - >>> LocaleContext.create('en-US') - >>> LocaleContext.create('de-DE') - >>> LocaleContext.cache_size() + >>> LocaleContext.clear_cache() # doctest: +SKIP + >>> LocaleContext.create('en-US') # doctest: +SKIP + >>> LocaleContext.create('de-DE') # doctest: +SKIP + >>> LocaleContext.cache_size() # doctest: +SKIP 2 """ with cls._cache_lock: @@ -183,9 +184,9 @@ def cache_info(cls) -> dict[str, int | tuple[str, ...]]: - locales: Tuple of cached locale codes (LRU order) Example: - >>> LocaleContext.clear_cache() - >>> LocaleContext.create('en-US') - >>> LocaleContext.cache_info() + >>> LocaleContext.clear_cache() # doctest: +SKIP + >>> LocaleContext.create('en-US') # doctest: +SKIP + >>> LocaleContext.cache_info() # doctest: +SKIP {'size': 1, 'max_size': 128, 'locales': ('en_us',)} """ with cls._cache_lock: @@ -217,14 +218,14 @@ def create(cls, locale_code: str) -> LocaleContext: canonical locale_code and setting is_fallback=True. Examples: - >>> ctx = LocaleContext.create('en-US') - >>> ctx.locale_code + >>> ctx = LocaleContext.create('en-US') # doctest: +SKIP + >>> ctx.locale_code # doctest: +SKIP 'en_us' - >>> ctx = LocaleContext.create('xx_UNKNOWN') # Unknown locale - >>> ctx.locale_code + >>> ctx = LocaleContext.create('xx_UNKNOWN') # Unknown locale # doctest: +SKIP + >>> ctx.locale_code # doctest: +SKIP 'xx_unknown' - >>> # But formatting uses en_US rules (with warning logged) + Formatting still uses `en_US` rules, with a warning logged: """ normalized_locale = require_locale_code(locale_code, "locale_code") @@ -328,11 +329,13 @@ def create_or_raise(cls, locale_code: str) -> LocaleContext: ValueError: If locale code is invalid or unknown Examples: - >>> ctx = LocaleContext.create_or_raise('en-US') - >>> ctx.locale_code + >>> ctx = LocaleContext.create_or_raise('en-US') # doctest: +SKIP + >>> ctx.locale_code # doctest: +SKIP 'en_us' - >>> LocaleContext.create_or_raise('invalid-locale') # doctest: +IGNORE_EXCEPTION_DETAIL + >>> LocaleContext.create_or_raise( # doctest: +IGNORE_EXCEPTION_DETAIL, +SKIP + ... 'invalid-locale' + ... ) Traceback (most recent call last): ... ValueError: Unknown locale identifier 'invalid-locale' @@ -386,119 +389,17 @@ def format_number( pattern: str | None = None, numbering_system: str = "latn", ) -> str: - """Format number with locale-specific separators. - - Implements Fluent NUMBER function semantics using Babel. - - Args: - value: Number to format (int or Decimal). float is not accepted; - use Decimal(str(float_val)) to convert at system boundaries. - minimum_fraction_digits: Minimum decimal places (default: 0) - maximum_fraction_digits: Maximum decimal places (default: 3) - use_grouping: Use thousands separator (default: True) - pattern: Custom number pattern (overrides other parameters) - numbering_system: CLDR numbering system identifier (default: "latn"). - Controls which numeral glyphs are used in the output. - Examples: "latn" (0-9), "arab" (Arabic-Indic), "deva" (Devanagari). - - Returns: - Formatted number string according to locale rules - - Examples: - >>> ctx = LocaleContext.create('en-US') - >>> from decimal import Decimal - >>> ctx.format_number(Decimal('1234.5')) - '1,234.5' - - >>> ctx = LocaleContext.create('de-DE') - >>> ctx.format_number(Decimal('1234.5')) - '1.234,5' - - >>> ctx = LocaleContext.create('lv-LV') - >>> ctx.format_number(Decimal('1234.5')) - '1 234,5' - - >>> ctx = LocaleContext.create('en-US') - >>> ctx.format_number(Decimal('-1234.56'), pattern="#,##0.00;(#,##0.00)") - '(1,234.56)' - - CLDR Compliance: - Uses Babel's format_decimal() which implements CLDR rules. - Matches Intl.NumberFormat behavior in JavaScript. - """ - # Validate digit parameters to prevent DoS via unbounded string allocation - if not 0 <= minimum_fraction_digits <= MAX_FORMAT_DIGITS: - msg = ( - f"minimum_fraction_digits must be 0-{MAX_FORMAT_DIGITS}, " - f"got {minimum_fraction_digits}" - ) - raise ValueError(msg) - if not 0 <= maximum_fraction_digits <= MAX_FORMAT_DIGITS: - msg = ( - f"maximum_fraction_digits must be 0-{MAX_FORMAT_DIGITS}, " - f"got {maximum_fraction_digits}" - ) - raise ValueError(msg) - # When minimum exceeds maximum (e.g., minimumFractionDigits: 4 with - # default maximumFractionDigits: 3), clamp maximum up to minimum. - # Matches JavaScript Intl.NumberFormat semantics: specifying only - # minimumFractionDigits=4 should yield 4 decimal places, not an error. - maximum_fraction_digits = max(maximum_fraction_digits, minimum_fraction_digits) - - babel_numbers = get_babel_numbers() - - try: - # Use custom pattern if provided. - if pattern is not None: - return str( - babel_numbers.format_decimal( - value, - format=pattern, - locale=self.babel_locale, - numbering_system=numbering_system, - ) - ) - - # Build format pattern from parameters. - # '#,##0' = integer with grouping; '0' = integer without grouping. - # '#,##0.0##' = 1-3 decimal places with grouping; '0.00' = fixed 2. - integer_part = "#,##0" if use_grouping else "0" - - if maximum_fraction_digits == 0: - format_pattern = integer_part - elif minimum_fraction_digits == maximum_fraction_digits: - decimal_part = "0" * minimum_fraction_digits - format_pattern = f"{integer_part}.{decimal_part}" - else: - required = "0" * minimum_fraction_digits - optional = "#" * (maximum_fraction_digits - minimum_fraction_digits) - format_pattern = f"{integer_part}.{required}{optional}" - - # Babel's decimal_quantization=True (default) applies ROUND_HALF_EVEN, - # which is the IEEE 754 / CLDR-neutral rounding mode. - return str( - babel_numbers.format_decimal( - value, - format=format_pattern, - locale=self.babel_locale, - numbering_system=numbering_system, - ) - ) - - except (ValueError, TypeError, InvalidOperation, AttributeError, KeyError) as e: - # Formatting failed - raise FrozenFluentError with fallback value - # The resolver will catch this error, collect it, and use the fallback - fallback = str(value) - diagnostic = ErrorTemplate.formatting_failed("NUMBER", str(value), str(e)) - context = FrozenErrorContext( - input_value=str(value), - locale_code=self.locale_code, - parse_type="number", - fallback_value=fallback, - ) - raise FrozenFluentError( - str(diagnostic), ErrorCategory.FORMATTING, diagnostic=diagnostic, context=context - ) from e + """Format number with locale-specific separators.""" + return format_number_for_locale( + locale_code=self.locale_code, + babel_locale=self.babel_locale, + value=value, + minimum_fraction_digits=minimum_fraction_digits, + maximum_fraction_digits=maximum_fraction_digits, + use_grouping=use_grouping, + pattern=pattern, + numbering_system=numbering_system, + ) def format_datetime( self, @@ -508,168 +409,15 @@ def format_datetime( time_style: Literal["short", "medium", "long", "full"] | None = None, pattern: str | None = None, ) -> str: - """Format datetime with locale-specific formatting. - - Implements Fluent DATETIME function semantics using Babel. - - Args: - value: date, datetime, or ISO 8601 string. FluentValue includes both - date and datetime, so both are accepted. Strings are converted via - datetime.fromisoformat() which accepts formats like: - - "2025-10-27" (date only, time defaults to 00:00:00) - - "2025-10-27T14:30:00" (date and time) - - "2025-10-27T14:30:00+00:00" (with timezone) - - date objects (without time): for date-only formatting (time_style=None), - formatted directly. When time_style is also requested, the date is - promoted to midnight datetime (00:00:00, no tzinfo) so Babel can - format the time component. This is the natural behavior for a calendar - date with no intrinsic time. - date_style: Date format style (default: "medium") - time_style: Time format style (default: None - date only) - pattern: Custom datetime pattern (overrides style parameters) - - Returns: - Formatted datetime string according to locale rules - - Raises: - FrozenFluentError: If string value is not valid ISO 8601 format - (category=FORMATTING) - - Examples: - >>> from datetime import date, datetime, UTC - >>> ctx = LocaleContext.create('en-US') - >>> dt = datetime(2025, 10, 27, 14, 30, tzinfo=UTC) - >>> ctx.format_datetime(dt, date_style='short') - '10/27/25' - - >>> ctx = LocaleContext.create('de-DE') - >>> ctx.format_datetime(dt, date_style='short') - '27.10.25' - - >>> ctx = LocaleContext.create('en-US') - >>> ctx.format_datetime(dt, pattern='yyyy-MM-dd') - '2025-10-27' - - >>> ctx = LocaleContext.create('en-US') - >>> ctx.format_datetime(date(2025, 10, 27), date_style='short') - '10/27/25' - - CLDR Compliance: - Uses Babel's format_datetime() which implements CLDR rules. - Matches Intl.DateTimeFormat behavior in JavaScript. - """ - babel_dates = get_babel_dates() - - # Type narrowing: produce a datetime for all paths. - # datetime must be checked before date because datetime IS a date subtype; - # isinstance(some_datetime, date) is True, so order matters here. - dt_value: datetime | date - - if isinstance(value, str): - try: - dt_value = datetime.fromisoformat(value) - except ValueError as e: - # Invalid datetime string - raise FrozenFluentError with fallback - # This ensures consistent error handling across all format_* methods - fallback = FALLBACK_FUNCTION_ERROR.format(name="DATETIME") - diagnostic = ErrorTemplate.formatting_failed( - "DATETIME", value, "not ISO 8601 format" - ) - context = FrozenErrorContext( - input_value=value, - locale_code=self.locale_code, - parse_type="datetime", - fallback_value=fallback, - ) - raise FrozenFluentError( - str(diagnostic), ErrorCategory.FORMATTING, - diagnostic=diagnostic, context=context - ) from e - elif isinstance(value, datetime): - # datetime is a subtype of date — must check datetime first - dt_value = value - else: - # Plain date object. - dt_value = value - - # Promote plain date to midnight datetime when a time component is needed. - # babel_dates.format_datetime() and format_time() require a datetime, not - # a bare date. A calendar date with no intrinsic time promotes to 00:00:00 - # (no tzinfo — the date carried no timezone, so none is inferred). - if isinstance(dt_value, date) and not isinstance(dt_value, datetime) and ( - time_style is not None or pattern is not None - ): - dt_value = datetime( # noqa: DTZ001 - date carries no tz; midnight promotion is explicitly naive - dt_value.year, dt_value.month, dt_value.day - ) - - try: - # Use custom pattern if provided - if pattern is not None: - return str( - babel_dates.format_datetime( - dt_value, - format=pattern, - locale=self.babel_locale, - ) - ) - - # Map Fluent styles to Babel format strings - if time_style: - # Both date and time - use locale's dateTimeFormat to combine - date_str = babel_dates.format_date( - dt_value, format=date_style, locale=self.babel_locale - ) - time_str = babel_dates.format_time( - dt_value, format=time_style, locale=self.babel_locale - ) - # Get locale's dateTimeFormat pattern for combining date and time - # Pattern uses {0} for time and {1} for date per CLDR spec - # Use multi-level fallback: requested style -> medium -> short -> hardcoded - # - # Ultimate fallback "{1} {0}" rationale: - # - {1} = date, {0} = time per CLDR convention - # - Space separator is universally acceptable (no locale uses no separator) - # - date-before-time order is most common globally (ISO 8601, CJK, most of Europe) - # - For locales where time-before-date is preferred (e.g., some EN variants), - # Babel should always provide CLDR data, so this fallback rarely triggers - datetime_pattern = ( - self.babel_locale.datetime_formats.get(date_style) - or self.babel_locale.datetime_formats.get("medium") - or self.babel_locale.datetime_formats.get("short") - or "{1} {0}" # Ultimate fallback (Western LTR: date space time) - ) - # DateTimePattern objects have format() method, strings use str.format() - if hasattr(datetime_pattern, "format"): - return str(datetime_pattern.format(time_str, date_str)) - return str(datetime_pattern).format(time_str, date_str) - # Date only - return str( - babel_dates.format_date( - dt_value, - format=date_style, - locale=self.babel_locale, - ) - ) - - except (ValueError, OverflowError, AttributeError, KeyError) as e: - # Formatting failed - raise FrozenFluentError with fallback value - # The resolver will catch this error, collect it, and use the fallback - fallback = dt_value.isoformat() - diagnostic = ErrorTemplate.formatting_failed( - "DATETIME", str(dt_value), str(e) - ) - context = FrozenErrorContext( - input_value=str(dt_value), - locale_code=self.locale_code, - parse_type="datetime", - fallback_value=fallback, - ) - raise FrozenFluentError( - str(diagnostic), ErrorCategory.FORMATTING, - diagnostic=diagnostic, context=context - ) from e + """Format datetime with locale-specific formatting.""" + return format_datetime_for_locale( + locale_code=self.locale_code, + babel_locale=self.babel_locale, + value=value, + date_style=date_style, + time_style=time_style, + pattern=pattern, + ) def format_currency( self, @@ -682,174 +430,24 @@ def format_currency( currency_digits: bool = True, numbering_system: str = "latn", ) -> str: - """Format currency with locale-specific rules. - - Implements Fluent CURRENCY function semantics using Babel. - - Args: - value: Monetary amount (int or Decimal). float is not accepted; - use Decimal(str(float_val)) to convert at system boundaries. - currency: ISO 4217 currency code (EUR, USD, JPY, BHD, etc.) - currency_display: Display style for currency - - "symbol": Use currency symbol (default) - - "code": Use currency code (EUR, USD, JPY) - - "name": Use currency name (euros, dollars, yen) - pattern: Custom currency pattern (overrides currency_display). - CLDR currency pattern placeholders: - - Use double currency sign for ISO code display - - Standard patterns use single currency sign for symbol - use_grouping: Use thousands separator (default: True) - currency_digits: Use ISO 4217 decimal places for the currency - (default: True). When False, no automatic decimal place adjustment - is made and the value is formatted as-is. Has no effect when - ``pattern`` is provided (pattern precision takes precedence). - numbering_system: CLDR numbering system identifier (default: "latn"). - Controls which numeral glyphs are used in the output. - Examples: "latn" (0-9), "arab" (Arabic-Indic), "deva" (Devanagari). - - Returns: - Formatted currency string according to locale rules - - Examples: - >>> from decimal import Decimal - >>> ctx = LocaleContext.create('en-US') - >>> ctx.format_currency(Decimal('123.45'), currency='EUR') - '€123.45' - - >>> ctx = LocaleContext.create('lv-LV') - >>> ctx.format_currency(Decimal('123.45'), currency='EUR') - '123,45 €' - - >>> ctx = LocaleContext.create('ja-JP') - >>> ctx.format_currency(12345, currency='JPY') - '¥12,345' - - >>> ctx = LocaleContext.create('ar-BH') - >>> ctx.format_currency(Decimal('123.456'), currency='BHD') - '123.456 د.ب.' - - >>> # Custom pattern example - >>> ctx = LocaleContext.create('en-US') - >>> ctx.format_currency(Decimal('1234.56'), currency='USD', pattern='#,##0.00 ¤') - '1,234.56 $' - - CLDR Compliance: - Uses Babel's format_currency() which implements CLDR rules. - Matches Intl.NumberFormat with style: 'currency'. - Automatically applies currency-specific decimal places when - currency_digits=True (default): - - JPY: 0 decimals - - BHD, KWD, OMR: 3 decimals - - Most others: 2 decimals - """ - babel_numbers = get_babel_numbers() - - try: - # Custom pattern overrides currency_display. - # currency_digits=False: the pattern explicitly controls decimal places; - # CLDR ISO 4217 defaults must not override the pattern's precision. - if pattern is not None: - return str( - babel_numbers.format_currency( - value, - currency, - format=pattern, - locale=self.babel_locale, - currency_digits=False, - group_separator=use_grouping, - numbering_system=numbering_system, - ) - ) - - # Map currency_display to Babel's format_type parameter. - if currency_display == "name": - format_type: Literal["name", "standard", "accounting"] = "name" - return str( - babel_numbers.format_currency( - value, - currency, - locale=self.babel_locale, - currency_digits=currency_digits, - format_type=format_type, - group_separator=use_grouping, - numbering_system=numbering_system, - ) - ) - - if currency_display == "code": - # Double currency sign (¤¤) per CLDR displays ISO code. - code_pattern = self._get_iso_code_pattern() - if code_pattern is not None: - return str( - babel_numbers.format_currency( - value, - currency, - format=code_pattern, - locale=self.babel_locale, - currency_digits=currency_digits, - group_separator=use_grouping, - numbering_system=numbering_system, - ) - ) - # Fallback: use standard format if pattern extraction fails. - - # Default: symbol display using standard format. - return str( - babel_numbers.format_currency( - value, - currency, - locale=self.babel_locale, - currency_digits=currency_digits, - format_type="standard", - group_separator=use_grouping, - numbering_system=numbering_system, - ) - ) - - except (ValueError, TypeError, InvalidOperation, AttributeError, KeyError) as e: - # Formatting failed - raise FrozenFluentError with fallback value - # The resolver will catch this error, collect it, and use the fallback - fallback = f"{currency} {value}" - diagnostic = ErrorTemplate.formatting_failed( - "CURRENCY", f"{currency} {value}", str(e) - ) - context = FrozenErrorContext( - input_value=f"{currency} {value}", - locale_code=self.locale_code, - parse_type="currency", - fallback_value=fallback, - ) - raise FrozenFluentError( - str(diagnostic), ErrorCategory.FORMATTING, - diagnostic=diagnostic, context=context - ) from e + """Format currency with locale-specific rules.""" + return format_currency_for_locale( + locale_code=self.locale_code, + babel_locale=self.babel_locale, + value=value, + currency=currency, + currency_display=currency_display, + pattern=pattern, + use_grouping=use_grouping, + currency_digits=currency_digits, + numbering_system=numbering_system, + debug_logger=logger, + ) def _get_iso_code_pattern(self) -> str | None: - """Get CLDR pattern for ISO currency code display. - - Per CLDR specification: - - Single currency sign (U+00A4) displays currency symbol - - Double currency sign (U+00A4 U+00A4) displays ISO code - - This helper extracts the standard currency pattern and replaces - single currency signs with double signs for ISO code display. - - Returns: - Modified pattern for ISO code display, or None if extraction fails. - """ - locale_currency_formats = self.babel_locale.currency_formats - standard_pattern = locale_currency_formats.get("standard") - if standard_pattern is None or not hasattr(standard_pattern, "pattern"): - return None - - raw_pattern = standard_pattern.pattern - # Guard: verify currency placeholder exists before replacement - # Single U+00A4 = symbol, Double U+00A4 U+00A4 = ISO code per CLDR - if "\xa4" not in raw_pattern: - logger.debug( - "Currency pattern for locale %s lacks placeholder", - self.locale_code, - ) - return None - - return str(raw_pattern.replace("\xa4", "\xa4\xa4")) + """Get CLDR pattern for ISO currency code display.""" + return get_iso_code_pattern_for_locale( + locale_code=self.locale_code, + babel_locale=self.babel_locale, + debug_logger=logger, + ) diff --git a/src/ftllexengine/runtime/locale_formatting.py b/src/ftllexengine/runtime/locale_formatting.py new file mode 100644 index 00000000..3bc9d4c8 --- /dev/null +++ b/src/ftllexengine/runtime/locale_formatting.py @@ -0,0 +1,330 @@ +"""Locale-scoped formatting helpers used by ``LocaleContext``. + +Keeps the public ``LocaleContext`` facade focused on cache and lifecycle +management while the heavy number/date/currency formatting machinery lives in a +dedicated internal module. +""" + +from __future__ import annotations + +import logging +from datetime import date, datetime +from decimal import Decimal, InvalidOperation +from typing import TYPE_CHECKING, Literal + +from ftllexengine.constants import FALLBACK_FUNCTION_ERROR, MAX_FORMAT_DIGITS +from ftllexengine.core.babel_compat import get_babel_dates, get_babel_numbers +from ftllexengine.diagnostics import ErrorCategory, FrozenErrorContext, FrozenFluentError +from ftllexengine.diagnostics.templates import ErrorTemplate + +if TYPE_CHECKING: + from babel import Locale + + from ftllexengine.core.semantic_types import LocaleCode + +logger = logging.getLogger(__name__) + +__all__ = [ + "format_currency_for_locale", + "format_datetime_for_locale", + "format_number_for_locale", + "get_iso_code_pattern_for_locale", +] + + +def format_number_for_locale( + *, + locale_code: LocaleCode, + babel_locale: Locale, + value: int | Decimal, + minimum_fraction_digits: int = 0, + maximum_fraction_digits: int = 3, + use_grouping: bool = True, + pattern: str | None = None, + numbering_system: str = "latn", +) -> str: + """Format a number using the supplied Babel locale.""" + if not 0 <= minimum_fraction_digits <= MAX_FORMAT_DIGITS: + msg = ( + f"minimum_fraction_digits must be 0-{MAX_FORMAT_DIGITS}, " + f"got {minimum_fraction_digits}" + ) + raise ValueError(msg) + if not 0 <= maximum_fraction_digits <= MAX_FORMAT_DIGITS: + msg = ( + f"maximum_fraction_digits must be 0-{MAX_FORMAT_DIGITS}, " + f"got {maximum_fraction_digits}" + ) + raise ValueError(msg) + maximum_fraction_digits = max(maximum_fraction_digits, minimum_fraction_digits) + + babel_numbers = get_babel_numbers() + + try: + if pattern is not None: + return str( + babel_numbers.format_decimal( + value, + format=pattern, + locale=babel_locale, + numbering_system=numbering_system, + ) + ) + + integer_part = "#,##0" if use_grouping else "0" + + if maximum_fraction_digits == 0: + format_pattern = integer_part + elif minimum_fraction_digits == maximum_fraction_digits: + decimal_part = "0" * minimum_fraction_digits + format_pattern = f"{integer_part}.{decimal_part}" + else: + required = "0" * minimum_fraction_digits + optional = "#" * (maximum_fraction_digits - minimum_fraction_digits) + format_pattern = f"{integer_part}.{required}{optional}" + + return str( + babel_numbers.format_decimal( + value, + format=format_pattern, + locale=babel_locale, + numbering_system=numbering_system, + ) + ) + + except (ValueError, TypeError, InvalidOperation, AttributeError, KeyError) as e: + fallback = str(value) + diagnostic = ErrorTemplate.formatting_failed("NUMBER", str(value), str(e)) + context = FrozenErrorContext( + input_value=str(value), + locale_code=locale_code, + parse_type="number", + fallback_value=fallback, + ) + raise FrozenFluentError( + str(diagnostic), + ErrorCategory.FORMATTING, + diagnostic=diagnostic, + context=context, + ) from e + + +def format_datetime_for_locale( + *, + locale_code: LocaleCode, + babel_locale: Locale, + value: date | datetime | str, + date_style: Literal["short", "medium", "long", "full"] = "medium", + time_style: Literal["short", "medium", "long", "full"] | None = None, + pattern: str | None = None, +) -> str: + """Format a date or datetime using the supplied Babel locale.""" + babel_dates = get_babel_dates() + dt_value: datetime | date + + if isinstance(value, str): + try: + dt_value = datetime.fromisoformat(value) + except ValueError as e: + fallback = FALLBACK_FUNCTION_ERROR.format(name="DATETIME") + diagnostic = ErrorTemplate.formatting_failed( + "DATETIME", value, "not ISO 8601 format" + ) + context = FrozenErrorContext( + input_value=value, + locale_code=locale_code, + parse_type="datetime", + fallback_value=fallback, + ) + raise FrozenFluentError( + str(diagnostic), + ErrorCategory.FORMATTING, + diagnostic=diagnostic, + context=context, + ) from e + elif isinstance(value, datetime): + dt_value = value + else: + dt_value = value + + if isinstance(dt_value, date) and not isinstance(dt_value, datetime) and ( + time_style is not None or pattern is not None + ): + dt_value = datetime( # noqa: DTZ001 - date carries no tz; midnight promotion is explicitly naive + dt_value.year, + dt_value.month, + dt_value.day, + ) + + try: + if pattern is not None: + return str( + babel_dates.format_datetime( + dt_value, + format=pattern, + locale=babel_locale, + ) + ) + + if time_style: + date_str = babel_dates.format_date( + dt_value, + format=date_style, + locale=babel_locale, + ) + time_str = babel_dates.format_time( + dt_value, + format=time_style, + locale=babel_locale, + ) + datetime_pattern = ( + babel_locale.datetime_formats.get(date_style) + or babel_locale.datetime_formats.get("medium") + or babel_locale.datetime_formats.get("short") + or "{1} {0}" + ) + if hasattr(datetime_pattern, "format"): + return str(datetime_pattern.format(time_str, date_str)) + return str(datetime_pattern).format(time_str, date_str) + + return str( + babel_dates.format_date( + dt_value, + format=date_style, + locale=babel_locale, + ) + ) + + except (ValueError, OverflowError, AttributeError, KeyError) as e: + fallback = dt_value.isoformat() + diagnostic = ErrorTemplate.formatting_failed("DATETIME", str(dt_value), str(e)) + context = FrozenErrorContext( + input_value=str(dt_value), + locale_code=locale_code, + parse_type="datetime", + fallback_value=fallback, + ) + raise FrozenFluentError( + str(diagnostic), + ErrorCategory.FORMATTING, + diagnostic=diagnostic, + context=context, + ) from e + + +def format_currency_for_locale( + *, + locale_code: LocaleCode, + babel_locale: Locale, + value: int | Decimal, + currency: str, + currency_display: Literal["symbol", "code", "name"] = "symbol", + pattern: str | None = None, + use_grouping: bool = True, + currency_digits: bool = True, + numbering_system: str = "latn", + debug_logger: logging.Logger | None = None, +) -> str: + """Format a currency value using the supplied Babel locale.""" + babel_numbers = get_babel_numbers() + + try: + if pattern is not None: + return str( + babel_numbers.format_currency( + value, + currency, + format=pattern, + locale=babel_locale, + currency_digits=False, + group_separator=use_grouping, + numbering_system=numbering_system, + ) + ) + + if currency_display == "name": + format_type: Literal["name", "standard", "accounting"] = "name" + return str( + babel_numbers.format_currency( + value, + currency, + locale=babel_locale, + currency_digits=currency_digits, + format_type=format_type, + group_separator=use_grouping, + numbering_system=numbering_system, + ) + ) + + if currency_display == "code": + code_pattern = get_iso_code_pattern_for_locale( + locale_code=locale_code, + babel_locale=babel_locale, + debug_logger=debug_logger, + ) + if code_pattern is not None: + return str( + babel_numbers.format_currency( + value, + currency, + format=code_pattern, + locale=babel_locale, + currency_digits=currency_digits, + group_separator=use_grouping, + numbering_system=numbering_system, + ) + ) + + return str( + babel_numbers.format_currency( + value, + currency, + locale=babel_locale, + currency_digits=currency_digits, + format_type="standard", + group_separator=use_grouping, + numbering_system=numbering_system, + ) + ) + + except (ValueError, TypeError, InvalidOperation, AttributeError, KeyError) as e: + fallback = f"{currency} {value}" + diagnostic = ErrorTemplate.formatting_failed( + "CURRENCY", f"{currency} {value}", str(e) + ) + context = FrozenErrorContext( + input_value=f"{currency} {value}", + locale_code=locale_code, + parse_type="currency", + fallback_value=fallback, + ) + raise FrozenFluentError( + str(diagnostic), + ErrorCategory.FORMATTING, + diagnostic=diagnostic, + context=context, + ) from e + + +def get_iso_code_pattern_for_locale( + *, + locale_code: LocaleCode, + babel_locale: Locale, + debug_logger: logging.Logger | None = None, +) -> str | None: + """Return a CLDR currency pattern rewritten for ISO code display.""" + active_logger = logger if debug_logger is None else debug_logger + locale_currency_formats = babel_locale.currency_formats + standard_pattern = locale_currency_formats.get("standard") + if standard_pattern is None or not hasattr(standard_pattern, "pattern"): + return None + + raw_pattern = standard_pattern.pattern + if "\xa4" not in raw_pattern: + active_logger.debug( + "Currency pattern for locale %s lacks placeholder", + locale_code, + ) + return None + + return str(raw_pattern.replace("\xa4", "\xa4\xa4")) diff --git a/src/ftllexengine/runtime/plural_rules.py b/src/ftllexengine/runtime/plural_rules.py index d971d314..9936f945 100644 --- a/src/ftllexengine/runtime/plural_rules.py +++ b/src/ftllexengine/runtime/plural_rules.py @@ -49,25 +49,25 @@ def select_plural_category( BabelImportError: If Babel is not installed Examples: - >>> select_plural_category(0, "lv_LV") + >>> select_plural_category(0, "lv_LV") # doctest: +SKIP 'zero' - >>> select_plural_category(1, "en_US") + >>> select_plural_category(1, "en_US") # doctest: +SKIP 'one' - >>> select_plural_category(1, "en_US", precision=2) # "1.00" has v=2 + >>> select_plural_category(1, "en_US", precision=2) # "1.00" has v=2 # doctest: +SKIP 'other' - >>> select_plural_category(5, "ru_RU") + >>> select_plural_category(5, "ru_RU") # doctest: +SKIP 'many' - >>> select_plural_category(2, "ar_SA") + >>> select_plural_category(2, "ar_SA") # doctest: +SKIP 'two' - >>> select_plural_category(42, "ja_JP") + >>> select_plural_category(42, "ja_JP") # doctest: +SKIP 'other' - >>> select_plural_category(1, "en_US", ordinal=True) + >>> select_plural_category(1, "en_US", ordinal=True) # doctest: +SKIP 'one' - >>> select_plural_category(2, "en_US", ordinal=True) + >>> select_plural_category(2, "en_US", ordinal=True) # doctest: +SKIP 'two' - >>> select_plural_category(3, "en_US", ordinal=True) + >>> select_plural_category(3, "en_US", ordinal=True) # doctest: +SKIP 'few' - >>> select_plural_category(11, "en_US", ordinal=True) + >>> select_plural_category(11, "en_US", ordinal=True) # doctest: +SKIP 'other' Architecture: diff --git a/src/ftllexengine/runtime/resolution_context.py b/src/ftllexengine/runtime/resolution_context.py index 0d12d794..9bdc998c 100644 --- a/src/ftllexengine/runtime/resolution_context.py +++ b/src/ftllexengine/runtime/resolution_context.py @@ -29,6 +29,7 @@ ErrorTemplate, FrozenFluentError, ) +from ftllexengine.diagnostics.depth import resolution_depth_error from ftllexengine.integrity import DataIntegrityError, IntegrityContext __all__ = ["GlobalDepthGuard", "ResolutionContext"] @@ -149,7 +150,8 @@ class ResolutionContext: def __post_init__(self) -> None: """Initialize the expression depth guard with configured max depth.""" self._expression_guard = DepthGuard( - max_depth=self.max_expression_depth + max_depth=self.max_expression_depth, + error_factory=resolution_depth_error, ) def push(self, key: str) -> None: diff --git a/src/ftllexengine/runtime/resolver.py b/src/ftllexengine/runtime/resolver.py index 0311a4aa..c43e1db7 100644 --- a/src/ftllexengine/runtime/resolver.py +++ b/src/ftllexengine/runtime/resolver.py @@ -16,36 +16,32 @@ from __future__ import annotations import logging -from collections.abc import Mapping, Sequence -from decimal import Decimal from typing import TYPE_CHECKING from ftllexengine.constants import ( DEFAULT_MAX_EXPANSION_SIZE, - FALLBACK_FUNCTION_ERROR, - FALLBACK_INVALID, FALLBACK_MISSING_MESSAGE, FALLBACK_MISSING_TERM, - FALLBACK_MISSING_VARIABLE, MAX_DEPTH, ) from ftllexengine.core import depth_clamp -from ftllexengine.core.babel_compat import BabelImportError -from ftllexengine.core.value_types import FluentNumber from ftllexengine.diagnostics import ( ErrorCategory, ErrorTemplate, FrozenFluentError, ) -from ftllexengine.runtime.plural_rules import select_plural_category +from ftllexengine.runtime.plural_rules import ( + select_plural_category as _select_plural_category, +) from ftllexengine.runtime.resolution_context import ( GlobalDepthGuard, ResolutionContext, ) +from ftllexengine.runtime.resolver_runtime import _ResolverRuntimeMixin +from ftllexengine.runtime.resolver_selection import _ResolverSelectionMixin from ftllexengine.syntax import ( Expression, FunctionReference, - Identifier, Message, MessageReference, NumberLiteral, @@ -57,15 +53,18 @@ TermReference, TextElement, VariableReference, - Variant, ) if TYPE_CHECKING: + from collections.abc import Mapping + from ftllexengine.core.value_types import FluentValue from ftllexengine.runtime.function_bridge import FunctionRegistry __all__ = ["FluentResolver", "GlobalDepthGuard", "ResolutionContext"] +select_plural_category = _select_plural_category + logger = logging.getLogger(__name__) # Unicode bidirectional isolation characters per Unicode TR9. @@ -73,13 +72,8 @@ UNICODE_FSI: str = "\u2068" # U+2068 FIRST STRONG ISOLATE UNICODE_PDI: str = "\u2069" # U+2069 POP DIRECTIONAL ISOLATE -# Maximum recursion depth for fallback string generation in _get_fallback_for_placeable. -# Fallback rendering is purely diagnostic (shown when resolution fails), so a shallow -# depth limit prevents runaway recursion while still capturing meaningful context. -_FALLBACK_MAX_DEPTH: int = 10 - -class FluentResolver: +class FluentResolver(_ResolverRuntimeMixin, _ResolverSelectionMixin): """Resolves Fluent messages to strings. Aligned with Mozilla python-fluent error handling: @@ -525,478 +519,3 @@ def _resolve_term_reference( return self._resolve_pattern(pattern, term_args, errors, context) finally: context.pop() - - def _find_exact_variant( - self, - variants: Sequence[Variant], - selector_value: FluentValue, - selector_str: str, - ) -> Variant | None: - """Pass 1: Find variant with exact string or number match. - - Args: - variants: Sequence of variants to search - selector_value: Resolved selector value (for numeric comparison) - selector_str: String representation of selector (for string comparison) - - Returns: - Matching variant or None if no exact match found. - """ - # Compute numeric selector once before the loop. Only int, Decimal, and FluentNumber - # are valid numeric selector types (float is excluded from FluentValue). - # bool is excluded: isinstance(True, int) is True but str(True) == "True" is not - # a valid Decimal literal. FluentNumber wraps NUMBER()-formatted values while - # preserving the original numeric value for matching (.value attribute). - numeric_for_match: int | Decimal | None = None - if isinstance(selector_value, FluentNumber): - numeric_for_match = selector_value.value - elif isinstance(selector_value, (int, Decimal)) and not isinstance(selector_value, bool): - numeric_for_match = selector_value - - # Pre-convert selector to Decimal once for all NumberLiteral comparisons in the loop. - # Decimal(raw_str) per variant still executes inside the loop since raw_str varies. - # NumberLiteral.__post_init__ guarantees raw is a parseable finite number. - sel_decimal: Decimal | None = None - if numeric_for_match is not None: - sel_decimal = Decimal(str(numeric_for_match)) - - for variant in variants: - match variant.key: - case Identifier(name=key_name): - if key_name == selector_str: - return variant - case NumberLiteral(raw=raw_str): - if sel_decimal is not None and Decimal(raw_str) == sel_decimal: - return variant - return None - - def _find_plural_variant( - self, - variants: Sequence[Variant], - plural_category: str, - ) -> Variant | None: - """Pass 2: Find variant matching plural category. - - Args: - variants: Sequence of variants to search - plural_category: CLDR plural category (zero, one, two, few, many, other) - - Returns: - Matching variant or None if no plural category match found. - """ - for variant in variants: - match variant.key: - case Identifier(name=key_name): - if key_name == plural_category: - return variant - return None - - def _find_default_variant(self, variants: Sequence[Variant]) -> Variant | None: - """Find the default variant (marked with *). - - Args: - variants: Sequence of variants to search - - Returns: - Default variant or None if no default marked. - """ - for variant in variants: - if variant.default: - return variant - return None - - def _resolve_select_expression( - self, - expr: SelectExpression, - args: Mapping[str, FluentValue], - errors: list[FrozenFluentError], - context: ResolutionContext, - ) -> str: - """Resolve select expression by matching variant. - - Matching priority (two-pass linear scan): - 1. Exact string/number match (pass 1) - 2. Plural category match for numeric selectors (pass 2) - 3. Default variant - 4. First variant (fallback) - - For typical FTL files with <5 variants, linear scan is more efficient - than building dictionary indices. Exact matches always take precedence - over plural category matches, regardless of variant order in FTL source. - - Error handling: - If the selector expression fails (e.g., missing variable), the error - is collected and resolution falls back to the default variant. This - ensures robustness and matches the Fluent spec behavior. - """ - # Evaluate selector with error resilience. - # If selector evaluation fails (e.g., missing variable), collect the error - # and fall back to the default variant per Fluent spec. - # Wrap in expression_guard to track depth for DoS protection. - try: - with context.expression_guard: - selector_value = self._resolve_expression( - expr.selector, args, errors, context - ) - except FrozenFluentError as e: - # Collect the error but don't propagate - fall back to default variant - errors.append(e) - return self._resolve_fallback_variant(expr, args, errors, context) - - # Use _format_value for consistent string representation. - # This ensures: - # - None -> "" (falls through to default variant) - # - bool -> "true"/"false" (matches FTL variant keys, not Python "True"/"False") - # - FluentNumber -> formatted string (display representation) - # - Other types -> str() representation - selector_str = self._format_value(selector_value) - - # Pass 1: Exact match (takes priority) - exact_match = self._find_exact_variant(expr.variants, selector_value, selector_str) - if exact_match is not None: - return self._resolve_pattern(exact_match.value, args, errors, context) - - # Pass 2: Plural category match (numeric selectors only) - # FluentValue includes Decimal for currency/financial values. - # FluentNumber wraps formatted numbers while preserving numeric identity. - # float is not in FluentValue: only int and Decimal are valid numeric types. - # Note: Exclude bool since isinstance(True, int) is True in Python, - # but booleans should match [true]/[false] variants, not plural categories. - # - # Extract numeric value and precision from FluentNumber for plural matching. - numeric_value: int | Decimal | None = None - precision: int | None = None - if isinstance(selector_value, FluentNumber): - numeric_value = selector_value.value - precision = selector_value.precision - elif isinstance(selector_value, (int, Decimal)) and not isinstance( - selector_value, bool - ): - numeric_value = selector_value - - if numeric_value is not None: - # Try plural category matching (requires Babel for CLDR data). - # If Babel is not installed (parser-only mode), collect error and - # fall through to default variant. - try: - # Pass precision to ensure CLDR v operand (fraction digit count) is correct. - # Example: NUMBER(1, minimumFractionDigits: 2) creates FluentNumber with - # precision=2, which makes select_plural_category treat it as "1.00" (v=2), - # selecting "other" instead of "one" in English plural rules. - plural_category = select_plural_category(numeric_value, self._locale, precision) - plural_match = self._find_plural_variant(expr.variants, plural_category) - if plural_match is not None: - return self._resolve_pattern( - plural_match.value, args, errors, context - ) - except BabelImportError: - # Babel not installed - collect error, fall through to default - diag = ErrorTemplate.plural_support_unavailable() - errors.append( - FrozenFluentError(str(diag), ErrorCategory.RESOLUTION, diagnostic=diag) - ) - - # Fallback: default variant - default_variant = self._find_default_variant(expr.variants) - if default_variant is not None: - return self._resolve_pattern(default_variant.value, args, errors, context) - - # Fallback: first variant - if expr.variants: - return self._resolve_pattern(expr.variants[0].value, args, errors, context) - - diag = ErrorTemplate.no_variants() - raise FrozenFluentError(str(diag), ErrorCategory.RESOLUTION, diagnostic=diag) - - def _resolve_fallback_variant( - self, - expr: SelectExpression, - args: Mapping[str, FluentValue], - errors: list[FrozenFluentError], - context: ResolutionContext, - ) -> str: - """Resolve fallback variant when selector evaluation fails. - - Attempts to resolve in order: - 1. Default variant (marked with *) - 2. First variant - - Args: - expr: The SelectExpression to resolve - args: Arguments for pattern resolution - errors: Error list for error collection - context: Resolution context - - Returns: - Resolved variant pattern string - - Raises: - FrozenFluentError: If no variants exist (category=RESOLUTION) - """ - # Try default variant first - default_variant = self._find_default_variant(expr.variants) - if default_variant is not None: - return self._resolve_pattern(default_variant.value, args, errors, context) - - # Fall back to first variant - if expr.variants: - return self._resolve_pattern(expr.variants[0].value, args, errors, context) - - diag = ErrorTemplate.no_variants() - raise FrozenFluentError(str(diag), ErrorCategory.RESOLUTION, diagnostic=diag) - - def _resolve_function_call( - self, - func_ref: FunctionReference, - args: Mapping[str, FluentValue], - errors: list[FrozenFluentError], - context: ResolutionContext, - ) -> FluentValue: - """Resolve function call. - - Uses FunctionRegistry to handle camelCase → snake_case parameter conversion. - Uses metadata system to determine if locale injection is needed. - - Exception Handling: - FrozenFluentError from registry (TypeError/ValueError) propagates to - pattern-level handler. Other exceptions (bugs in custom functions) - are caught here to provide graceful degradation per Fluent spec. - This ensures resolution "never fails catastrophically." - - Security: - Wraps argument resolution in expression_guard to prevent DoS via deeply - nested function calls like NUMBER(A(B(C(...)))). Each nested call - consumes stack frames during resolution. - - Returns FluentValue which the resolver will convert to string for final output. - """ - func_name = func_ref.id.name - - # Evaluate arguments within depth guard (DoS prevention) - # Function arguments can contain nested function calls: NUMBER(ABS(FLOOR($x))) - # Without depth tracking, deeply nested calls can exhaust the Python stack. - with context.expression_guard: - positional_values: list[FluentValue] = [ - self._resolve_expression(arg, args, errors, context) - for arg in func_ref.arguments.positional - ] - - # Evaluate named arguments (camelCase from FTL) - named_values: dict[str, FluentValue] = { - arg.name.name: self._resolve_expression(arg.value, args, errors, context) - for arg in func_ref.arguments.named - } - - # Check if locale injection is needed (metadata-driven, not magic tuple) - # This correctly handles custom functions with same name as built-ins - if self._function_registry.should_inject_locale(func_name): - # Validate arity before injection to provide clear error messages - # instead of opaque TypeError from incorrect argument positioning - expected_args = self._function_registry.get_expected_positional_args(func_name) - if expected_args is not None and len(positional_values) != expected_args: - diag = ErrorTemplate.function_arity_mismatch( - func_name, expected_args, len(positional_values) - ) - raise FrozenFluentError(str(diag), ErrorCategory.RESOLUTION, diagnostic=diag) - - # Built-in formatting functions expect signature: func(value, locale, *, ...) - # Append locale after positional args (FTL passes exactly one value arg, - # so this places locale as the second positional argument by contract) - # FunctionRegistry.call() handles camelCase -> snake_case conversion - return self._call_function_safe( - func_name, - [*positional_values, self._locale], - named_values, - errors, - ) - - # Custom function or built-in that doesn't need locale: pass args as-is - return self._call_function_safe( - func_name, - positional_values, - named_values, - errors, - ) - - def _call_function_safe( - self, - func_name: str, - positional: list[FluentValue], - named: dict[str, FluentValue], - errors: list[FrozenFluentError], - ) -> FluentValue: - """Call a registered function with graceful error handling. - - FrozenFluentError from the registry propagates directly (already - structured). Any other exception is caught and converted to a - diagnostic error per Fluent spec requirement that resolution must - "never fail catastrophically." - - Args: - func_name: Function name as it appears in FTL. - positional: Positional argument values (locale may be appended). - named: Named argument values (camelCase keys from FTL). - errors: Mutable error accumulator for the current resolution. - - Returns: - Function result on success, or fallback error string on failure. - """ - try: - return self._function_registry.call(func_name, positional, named) - except FrozenFluentError: - # Already structured error from registry (TypeError/ValueError), - # let it propagate to pattern-level handler - raise - except Exception as e: # noqa: BLE001 - spec requires graceful degradation for custom functions - # Intentionally broad: Fluent spec requires graceful degradation - # for ANY exception from custom functions. - logger.warning( - "Custom function %s raised %s: %s", - func_name, - type(e).__name__, - str(e), - ) - diag = ErrorTemplate.function_failed( - func_name, f"Uncaught exception: {type(e).__name__}: {e}" - ) - errors.append( - FrozenFluentError(str(diag), ErrorCategory.RESOLUTION, diagnostic=diag) - ) - return FALLBACK_FUNCTION_ERROR.format(name=func_name) - - def _format_value(self, value: FluentValue) -> str: - """Format FluentValue to string for final output. - - Handles all types in the FluentValue union: - - str: returned as-is - - bool: "true"/"false" (Fluent convention) - - int: string representation - - Decimal/datetime/date/FluentNumber: string representation via __str__ - - Sequence/Mapping: type name (collections are for function args, not display) - - None: empty string - - float is not in FluentValue and is explicitly rejected here at runtime. - Mypy strict mode catches float misuse at statically typed call sites, but - custom functions registered via FunctionRegistry return Python values through - function_bridge.py without return-type validation — a float return bypasses - mypy entirely. Without this guard, float silently produces IEEE 754 noise - (e.g., "3.1400000000000001") in financial output. Use int for whole amounts - or Decimal for fractional amounts. - - The float case raises FrozenFluentError (not TypeError) so callers' - ``except FrozenFluentError`` handlers catch it and return a graceful - fallback per Fluent spec requirement that resolution never fails - catastrophically. TypeError would escape those handlers entirely. - - Pattern order rationale: - - str before Sequence: str implements Sequence; must match str first to avoid - the collection guard path. - - bool before int: bool is a subclass of int; must match bool first to produce - "true"/"false" rather than "1"/"0". - - float before the wildcard: explicit rejection as FrozenFluentError so the - caller's error handler catches it; float is not in FluentValue but the - wildcard would silently accept it. - - None as explicit case: avoids the collection guard and the str(None) path. - - Sequence | Mapping before the default str() fallback: guards against - exponential str() expansion on deeply shared collection structures. - """ - match value: - case str(): - return value - # bool must precede int: isinstance(True, int) is True. - case bool(): - return "true" if value else "false" - case int(): - return str(value) - case None: - return "" - # Explicit float rejection. Statically unreachable for typed callers - # (FluentValue excludes float), but custom functions registered via - # FunctionRegistry return untyped Python values — a float return bypasses - # mypy and would silently reach str() without this guard, producing IEEE - # 754 noise in financial output (e.g., "3.1400000000000001"). - # FrozenFluentError (not TypeError) ensures callers' except handlers - # catch this and produce a graceful fallback per Fluent spec. - case float(): - msg = ( - f"float value {value!r} is not a valid FluentValue. " - "IEEE 754 float cannot represent most decimal fractions exactly. " - "Use int for whole amounts or decimal.Decimal for fractional amounts." - ) - raise FrozenFluentError(msg, ErrorCategory.RESOLUTION) - # Guard against str() on collections (Sequence/Mapping). These are valid - # FluentValue types for passing structured data to custom functions, but - # str() on deeply nested/shared structures causes exponential expansion - # (e.g., DAG with depth 30 → 2^30 nodes in str() output). - case Sequence() | Mapping(): - return f"[{type(value).__name__}]" - # Handles Decimal, datetime, date, and FluentNumber via __str__. - case _: - return str(value) - - def _get_fallback_for_placeable( - self, expr: Expression, depth: int = _FALLBACK_MAX_DEPTH - ) -> str: - """Get readable fallback for failed placeable per Fluent spec. - - Per Fluent specification, when a placeable fails to resolve, - we return a human-readable representation of what was attempted. - This is superior to {ERROR: ...} as it: - 1. Doesn't expose internal diagnostics - 2. Shows what the translator expected - 3. Makes errors visible but not alarming - - Args: - expr: The expression that failed to resolve - depth: Remaining recursion depth (prevents stack overflow) - - Returns: - Readable fallback string - - Examples: - VariableReference($name) -> "{$name}" - MessageReference(welcome) -> "{welcome}" - TermReference(-brand) -> "{-brand}" - FunctionReference(NUMBER) -> "{NUMBER(...)}" - SelectExpression($count) -> "{{$count} -> ...}" - """ - # Depth protection: prevent recursion overflow on adversarial ASTs - if depth <= 0: - return FALLBACK_INVALID - - match expr: - case VariableReference(): - return FALLBACK_MISSING_VARIABLE.format(name=expr.id.name) - case MessageReference(): - msg_id = expr.id.name - if expr.attribute: - msg_id = f"{msg_id}.{expr.attribute.name}" - return FALLBACK_MISSING_MESSAGE.format(id=msg_id) - case TermReference(): - term_id = expr.id.name - if expr.attribute: - term_id = f"{term_id}.{expr.attribute.name}" - return FALLBACK_MISSING_TERM.format(name=term_id) - case FunctionReference(): - return FALLBACK_FUNCTION_ERROR.format(name=expr.id.name) - case SelectExpression(): - # Provide context by showing the selector expression - selector_fallback = self._get_fallback_for_placeable(expr.selector, depth - 1) - return f"{{{selector_fallback} -> ...}}" - case Placeable(): - # Nested placeable: delegate to the inner expression - return self._get_fallback_for_placeable(expr.expression, depth - 1) - case StringLiteral(): - # Literal string value is the best fallback for a failed string literal - return expr.value - case NumberLiteral(): - # Raw source representation is the best fallback for a failed number literal - return expr.raw - case _: - # Statically unreachable (Expression union is exhaustively covered above), - # but defensively necessary: _get_fallback_for_placeable is an error-recovery - # function whose contract is to ALWAYS return a string. Tests intentionally - # bypass the type system (passing Mock objects) to verify graceful degradation. - # assert_never() would change the contract from "always return" to "may raise", - # breaking the fallback guarantee. This wildcard is the safety net. - return FALLBACK_INVALID # type: ignore[unreachable] diff --git a/src/ftllexengine/runtime/resolver.py,cover b/src/ftllexengine/runtime/resolver.py,cover deleted file mode 100644 index 46aab997..00000000 --- a/src/ftllexengine/runtime/resolver.py,cover +++ /dev/null @@ -1,954 +0,0 @@ -> """Fluent message resolver - converts AST to formatted strings. - -> Resolves patterns by walking AST, interpolating variables, evaluating selectors. -> Python 3.13+. Indirect dependency: Babel (via plural_rules). - -> Thread Safety: -> Resolution state is passed explicitly via ResolutionContext, making the -> resolver fully reentrant and compatible with async frameworks. Each -> resolution operation creates its own isolated context. - -> Global depth tracking uses contextvars for async-safe per-task state, -> preventing custom functions from bypassing depth limits by calling -> back into bundle.format_pattern(). -> """ - -> from __future__ import annotations - -> from collections.abc import Mapping, Sequence -> from contextvars import ContextVar, Token -> from dataclasses import dataclass, field -> from decimal import Decimal, InvalidOperation - -> from ftllexengine.constants import ( -> FALLBACK_FUNCTION_ERROR, -> FALLBACK_INVALID, -> FALLBACK_MISSING_MESSAGE, -> FALLBACK_MISSING_TERM, -> FALLBACK_MISSING_VARIABLE, -> MAX_DEPTH, -> ) -> from ftllexengine.core.babel_compat import BabelImportError -> from ftllexengine.core.depth_guard import DepthGuard, depth_clamp -> from ftllexengine.core.errors import FormattingError -> from ftllexengine.diagnostics import ( -> ErrorTemplate, -> FluentCyclicReferenceError, -> FluentError, -> FluentReferenceError, -> FluentResolutionError, -> ) -> from ftllexengine.runtime.function_bridge import FluentNumber, FluentValue, FunctionRegistry -> from ftllexengine.runtime.plural_rules import select_plural_category -> from ftllexengine.syntax import ( -> Expression, -> FunctionReference, -> Identifier, -> Message, -> MessageReference, -> NumberLiteral, -> Pattern, -> Placeable, -> SelectExpression, -> StringLiteral, -> Term, -> TermReference, -> TextElement, -> VariableReference, -> Variant, -> ) - - # Re-export FluentValue for public API compatibility - # Canonical definition is in function_bridge.py to avoid circular imports -> __all__ = ["FluentResolver", "FluentValue", "ResolutionContext"] - - # Unicode bidirectional isolation characters per Unicode TR9. - # Used to prevent RTL/LTR text interference when interpolating values. -> UNICODE_FSI: str = "\u2068" # U+2068 FIRST STRONG ISOLATE -> UNICODE_PDI: str = "\u2069" # U+2069 POP DIRECTIONAL ISOLATE - - # Global resolution depth tracking via contextvars. - # Prevents custom functions from bypassing depth limits by calling back into - # bundle.format_pattern(). Each async task/thread maintains independent state. - # This tracks the number of nested resolve_message() calls across all contexts. -> _global_resolution_depth: ContextVar[int] = ContextVar( -> "fluent_resolution_depth", default=0 -> ) - - -> class GlobalDepthGuard: -> """Context manager for tracking global resolution depth across format_pattern calls. - -> Uses contextvars for async-safe per-task state. This prevents custom functions -> from bypassing depth limits by creating new ResolutionContext instances. - -> Usage: -> with GlobalDepthGuard(max_depth=100): - # Nested format_pattern calls are tracked globally -> result = resolver.resolve_message(message, args) - -> Security: -> Without global depth tracking, a malicious custom function could: -> 1. Receive control during resolution -> 2. Call bundle.format_pattern() which creates a fresh ResolutionContext -> 3. Repeat step 2 recursively, bypassing per-context depth limits -> 4. Eventually cause stack overflow - -> GlobalDepthGuard prevents this by tracking depth across all contexts. -> """ - -> __slots__ = ("_max_depth", "_token") - -> def __init__(self, max_depth: int = MAX_DEPTH) -> None: -> """Initialize guard with maximum depth limit.""" -> self._max_depth = depth_clamp(max_depth) -> self._token: Token[int] | None = None - -> def __enter__(self) -> GlobalDepthGuard: -> """Enter guarded section, increment global depth.""" -> current = _global_resolution_depth.get() -> if current >= self._max_depth: -> raise FluentResolutionError( -> ErrorTemplate.expression_depth_exceeded(self._max_depth) -> ) -> self._token = _global_resolution_depth.set(current + 1) -> return self - -> def __exit__( -> self, -> exc_type: type[BaseException] | None, -> exc_val: BaseException | None, -> exc_tb: object, -> ) -> None: -> """Exit guarded section, restore previous depth.""" -> if self._token is not None: -> _global_resolution_depth.reset(self._token) - - -> @dataclass(slots=True) -> class ResolutionContext: -> """Explicit context for message resolution. - -> Replaces thread-local state with explicit parameter passing for: -> - Thread safety without global state -> - Async framework compatibility (no thread-local conflicts) -> - Easier testing (no state reset needed) -> - Clear dependency flow - -> Performance: Uses both list (for ordered path) and set (for O(1) lookup) -> to optimize cycle detection while preserving path information for errors. - -> Instance Lifecycle: -> Each resolution operation creates a fresh ResolutionContext instance. -> This ensures complete isolation between concurrent resolutions. -> The per-resolution DepthGuard allocation is intentional for thread safety; -> object pooling is not used to avoid synchronization overhead. - -> Attributes: -> stack: Resolution stack for cycle detection (message keys being resolved) -> _seen: Set for O(1) membership checking (internal) -> max_depth: Maximum resolution depth (prevents stack overflow) -> max_expression_depth: Maximum expression nesting depth -> _expression_guard: DepthGuard for expression depth tracking (internal) -> """ - -> stack: list[str] = field(default_factory=list) -> _seen: set[str] = field(default_factory=set) -> max_depth: int = MAX_DEPTH -> max_expression_depth: int = MAX_DEPTH -> _expression_guard: DepthGuard = field(init=False) - -> def __post_init__(self) -> None: -> """Initialize the expression depth guard with configured max depth.""" -> self._expression_guard = DepthGuard(max_depth=self.max_expression_depth) - -> def push(self, key: str) -> None: -> """Push message key onto resolution stack.""" -> self.stack.append(key) -> self._seen.add(key) - -> def pop(self) -> str: -> """Pop message key from resolution stack.""" -> key = self.stack.pop() -> self._seen.discard(key) -> return key - -> def contains(self, key: str) -> bool: -> """Check if key is in resolution stack (cycle detection). - -> Performance: O(1) set lookup instead of O(N) list scan. -> """ -> return key in self._seen - -> @property -> def depth(self) -> int: -> """Current resolution depth.""" -> return len(self.stack) - -> def is_depth_exceeded(self) -> bool: -> """Check if maximum depth has been exceeded.""" -> return self.depth >= self.max_depth - -> def get_cycle_path(self, key: str) -> list[str]: -> """Get the cycle path for error reporting.""" -> return [*self.stack, key] - -> @property -> def expression_guard(self) -> DepthGuard: -> """Get the expression depth guard for context manager use. - -> Usage: -> with context.expression_guard: -> result = self._resolve_expression(nested_expr, ...) -> """ -> return self._expression_guard - -> @property -> def expression_depth(self) -> int: -> """Current expression nesting depth (read-only, delegates to guard).""" -> return self._expression_guard.current_depth - - -> class FluentResolver: -> """Resolves Fluent messages to strings. - -> Aligned with Mozilla python-fluent error handling: -> - Collects errors instead of embedding them in output -> - Returns (result, errors) tuples -> - Provides readable fallbacks per Fluent specification - -> Thread Safety: -> Uses explicit ResolutionContext instead of thread-local state for -> full reentrancy and async framework compatibility. -> """ - -> __slots__ = ( -> "_max_nesting_depth", -> "function_registry", -> "locale", -> "messages", -> "terms", -> "use_isolating", -> ) - -> def __init__( -> self, -> locale: str, -> messages: dict[str, Message], -> terms: dict[str, Term], -> *, -> function_registry: FunctionRegistry, -> use_isolating: bool = True, -> max_nesting_depth: int = MAX_DEPTH, -> ) -> None: -> """Initialize resolver. - -> Args: -> locale: Locale code for plural selection -> messages: Message registry -> terms: Term registry -> function_registry: Function registry with camelCase conversion (keyword-only) -> use_isolating: Wrap interpolated values in Unicode bidi marks (keyword-only) -> max_nesting_depth: Maximum resolution depth limit (keyword-only) -> """ -> self.locale = locale -> self.use_isolating = use_isolating -> self.messages = messages -> self.terms = terms -> self.function_registry = function_registry -> self._max_nesting_depth = depth_clamp(max_nesting_depth) - -> def resolve_message( -> self, -> message: Message, -> args: Mapping[str, FluentValue] | None = None, -> attribute: str | None = None, -> *, -> context: ResolutionContext | None = None, -> ) -> tuple[str, tuple[FluentError, ...]]: -> """Resolve message to final string with error collection. - -> Mozilla python-fluent aligned API: -> - Returns (result, errors) tuple -> - Collects all errors during resolution -> - Never raises exceptions (graceful degradation) - -> Args: -> message: Message AST -> args: Variable arguments -> attribute: Attribute name (optional) -> context: Resolution context for cycle detection and depth tracking. -> If None, creates a fresh context for this resolution. - -> Typical Usage: Leave as None (default). Each format_pattern() -> call creates a fresh context automatically. - -> Advanced Usage: Provide a custom ResolutionContext when: -> - Batching multiple resolutions with shared cycle detection -> - Implementing custom depth limits via ResolutionContext(max_depth=N) -> - Building resolution pipelines that need cross-call state - -> See ResolutionContext class for configuration options. - -> Returns: -> Tuple of (formatted_string, errors) -> - formatted_string: Best-effort output (never empty) -> - errors: Tuple of FluentError instances encountered (immutable) - -> Note: -> Per Fluent spec, resolution never fails catastrophically. -> Errors are collected and fallback values are used. - -> Attribute resolution uses last-wins semantics for duplicate attribute -> names. If a message contains multiple attributes with the same name -> (which triggers a validation warning), the last definition is used -> during resolution. This matches the Fluent specification and Mozilla -> reference implementation behavior. -> """ -> errors: list[FluentError] = [] -> args = args or {} - - # Create fresh context if not provided (top-level call) -> if context is None: -> context = ResolutionContext( -> max_depth=self._max_nesting_depth, -> max_expression_depth=self._max_nesting_depth, -> ) - - # Select pattern (value or attribute) -> if attribute: -> attr = next((a for a in reversed(message.attributes) if a.id.name == attribute), None) -> if not attr: -> error = FluentReferenceError( -> ErrorTemplate.attribute_not_found(attribute, message.id.name) -> ) -> errors.append(error) -> fallback = FALLBACK_MISSING_MESSAGE.format(id=f"{message.id.name}.{attribute}") -> return (fallback, tuple(errors)) -> pattern = attr.value -> else: -> if message.value is None: -> error = FluentReferenceError(ErrorTemplate.message_no_value(message.id.name)) -> errors.append(error) -> fallback = FALLBACK_MISSING_MESSAGE.format(id=message.id.name) -> return (fallback, tuple(errors)) -> pattern = message.value - - # Check for circular references using explicit context -> msg_key = f"{message.id.name}.{attribute}" if attribute else message.id.name -> if context.contains(msg_key): -> cycle_path = context.get_cycle_path(msg_key) -> error = FluentCyclicReferenceError(ErrorTemplate.cyclic_reference(cycle_path)) -> errors.append(error) -> fallback = FALLBACK_MISSING_MESSAGE.format(id=msg_key) -> return (fallback, tuple(errors)) - - # Check for maximum depth (prevents stack overflow from long non-cyclic chains) -> if context.is_depth_exceeded(): -> error = FluentReferenceError( -> ErrorTemplate.max_depth_exceeded(msg_key, context.max_depth) -> ) -> errors.append(error) -> fallback = FALLBACK_MISSING_MESSAGE.format(id=msg_key) -> return (fallback, tuple(errors)) - - # Use GlobalDepthGuard to track depth across separate format_pattern() calls. - # This prevents custom functions from bypassing depth limits by calling - # back into bundle.format_pattern() which creates a fresh ResolutionContext. -> try: -> with GlobalDepthGuard(max_depth=context.max_depth): -> context.push(msg_key) -> try: -> result = self._resolve_pattern(pattern, args, errors, context) -> return (result, tuple(errors)) -> finally: -> context.pop() -> except FluentResolutionError as e: - # Global depth exceeded - collect error and return fallback -> errors.append(e) -> fallback = FALLBACK_MISSING_MESSAGE.format(id=msg_key) -> return (fallback, tuple(errors)) - -> def _resolve_pattern( -> self, -> pattern: Pattern, -> args: Mapping[str, FluentValue], -> errors: list[FluentError], -> context: ResolutionContext, -> ) -> str: -> """Resolve pattern by walking elements. - -> Uses list accumulation with join() for O(N) performance instead of -> repeated string concatenation which is O(N^2). -> """ -> parts: list[str] = [] - -> for element in pattern.elements: -> match element: -> case TextElement(): -> parts.append(element.value) -> case Placeable(): -> try: - # Track expression depth to prevent stack overflow from deeply - # nested SelectExpressions. The guard must be applied HERE at - # the Pattern->Placeable entry point, not just in _resolve_expression - # for nested Placeables. Without this, the recursion path: - # Pattern -> Placeable -> SelectExpression -> Variant Pattern -> ... - # bypasses depth limiting entirely. -> with context.expression_guard: -> value = self._resolve_expression( -> element.expression, args, errors, context -> ) -> formatted = self._format_value(value) - - # Wrap in Unicode bidi isolation marks (FSI/PDI) - # Per Unicode TR9, prevents RTL/LTR text interference -> if self.use_isolating: -> parts.append(f"{UNICODE_FSI}{formatted}{UNICODE_PDI}") -> else: -> parts.append(formatted) - -> except (FluentReferenceError, FluentResolutionError) as e: - # Mozilla-aligned error handling: - # Collect error, show readable fallback (not {ERROR: ...}) -> errors.append(e) - # Use pattern matching for type-safe fallback extraction -> match e: -> case FormattingError(fallback_value=fallback): - # FormattingError carries the original value as fallback -> parts.append(fallback) -> case _: -> parts.append(self._get_fallback_for_placeable(element.expression)) - -> return "".join(parts) - -> def _resolve_expression( # noqa: PLR0911 # Complex dispatch logic expected -> self, -> expr: Expression, -> args: Mapping[str, FluentValue], -> errors: list[FluentError], -> context: ResolutionContext, -> ) -> FluentValue: -> """Resolve expression to value. - -> Uses pattern matching (PEP 636) to reduce complexity. -> Each case delegates to a specialized resolver method. - -> Note: PLR0911 (too many returns) is acceptable here - each case -> represents a distinct expression type in the Fluent AST. -> """ -> match expr: -> case SelectExpression(): -> return self._resolve_select_expression(expr, args, errors, context) -> case VariableReference(): -> return self._resolve_variable_reference(expr, args, context) -> case MessageReference(): -> return self._resolve_message_reference(expr, args, errors, context) -> case TermReference(): -> return self._resolve_term_reference(expr, args, errors, context) -> case FunctionReference(): -> return self._resolve_function_call(expr, args, errors, context) -> case StringLiteral(): -> return expr.value -> case NumberLiteral(): -> return expr.value -> case Placeable(): - # Track expression depth to prevent stack overflow from deep nesting -> with context.expression_guard: -> return self._resolve_expression(expr.expression, args, errors, context) -> case _: -> raise FluentResolutionError(ErrorTemplate.unknown_expression(type(expr).__name__)) - -> def _resolve_variable_reference( -> self, -> expr: VariableReference, -> args: Mapping[str, FluentValue], -> context: ResolutionContext, -> ) -> FluentValue: -> """Resolve variable reference from args.""" -> var_name = expr.id.name -> if var_name not in args: - # Include resolution path for debugging nested references -> resolution_path = tuple(context.stack) if context.stack else None -> raise FluentReferenceError( -> ErrorTemplate.variable_not_provided( -> var_name, resolution_path=resolution_path -> ) -> ) -> return args[var_name] - -> def _resolve_message_reference( -> self, -> expr: MessageReference, -> args: Mapping[str, FluentValue], -> errors: list[FluentError], -> context: ResolutionContext, -> ) -> str: -> """Resolve message reference.""" -> msg_id = expr.id.name -> if msg_id not in self.messages: -> raise FluentReferenceError(ErrorTemplate.message_not_found(msg_id)) -> message = self.messages[msg_id] - # resolve_message returns (result, errors) tuple - # Pass the same context for proper cycle detection across nested calls -> result, nested_errors = self.resolve_message( -> message, -> args, -> attribute=expr.attribute.name if expr.attribute else None, -> context=context, -> ) - # Add nested errors to our error list -> errors.extend(nested_errors) -> return result - -> def _resolve_term_reference( -> self, -> expr: TermReference, -> args: Mapping[str, FluentValue], -> errors: list[FluentError], -> context: ResolutionContext, -> ) -> str: -> """Resolve term reference with cycle detection and argument handling. - -> Per Fluent spec, terms can be parameterized with arguments: -> -brand(case: "nominative") - -> Term arguments are evaluated and merged into the resolution context, -> allowing term patterns to reference them as variables. -> """ -> term_id = expr.id.name -> if term_id not in self.terms: -> raise FluentReferenceError(ErrorTemplate.term_not_found(term_id)) -> term = self.terms[term_id] - - # Select pattern (value or attribute) -> if expr.attribute: -> attr = next((a for a in term.attributes if a.id.name == expr.attribute.name), None) -> if not attr: -> raise FluentReferenceError( -> ErrorTemplate.term_attribute_not_found(expr.attribute.name, term_id) -> ) -> pattern = attr.value -> else: -> pattern = term.value - - # Build term key for cycle detection (use -prefix to match FTL syntax) -> term_key = f"-{term_id}.{expr.attribute.name}" if expr.attribute else f"-{term_id}" - - # Check for circular references -> if context.contains(term_key): -> cycle_path = context.get_cycle_path(term_key) -> cycle_error = FluentCyclicReferenceError(ErrorTemplate.cyclic_reference(cycle_path)) -> errors.append(cycle_error) - # term_key already has '-' prefix, strip it for the template -> return FALLBACK_MISSING_TERM.format(name=term_key.lstrip("-")) - - # Check for maximum depth -> if context.is_depth_exceeded(): -> depth_error = FluentReferenceError( -> ErrorTemplate.max_depth_exceeded(term_key, context.max_depth) -> ) -> errors.append(depth_error) - # term_key already has '-' prefix, strip it for the template -> return FALLBACK_MISSING_TERM.format(name=term_key.lstrip("-")) - - # Evaluate term arguments - terms are ISOLATED from calling context - # Per Fluent spec: terms can ONLY access explicitly passed arguments - # https://projectfluent.org/fluent/guide/terms.html - # "Terms receive such data from messages in which they are used" - # This means ONLY explicit parameterization like -term(arg: val), NOT - # implicit access to the calling message's $variables. -> term_args: dict[str, FluentValue] = {} -> if expr.arguments is not None: - # Evaluate named arguments (the primary use case for term args) -> for named_arg in expr.arguments.named: -> arg_name = named_arg.name.name -> arg_value = self._resolve_expression(named_arg.value, args, errors, context) -> term_args[arg_name] = arg_value - - # Evaluate positional arguments (per Fluent spec, term arguments section) - # Reference: https://projectfluent.org/fluent/guide/terms.html#parameterized-terms - # The spec defines term arguments as named only (e.g., -term(case: "gen")). - # Positional arguments in term references are technically parsed but have - # no binding semantics - there's no parameter name to assign the value to. - # We evaluate them to catch expression errors but discard the result. -> if expr.arguments.positional: -> for pos_arg in expr.arguments.positional: -> self._resolve_expression(pos_arg, args, errors, context) - - # Emit warning that positional arguments are ignored -> errors.append( -> FluentResolutionError( -> ErrorTemplate.term_positional_args_ignored( -> term_name=term_id, -> count=len(expr.arguments.positional), -> ) -> ) -> ) - -> try: -> context.push(term_key) -> return self._resolve_pattern(pattern, term_args, errors, context) -> finally: -> context.pop() - -> def _find_exact_variant( -> self, -> variants: Sequence[Variant], -> selector_value: FluentValue, -> selector_str: str, -> ) -> Variant | None: -> """Pass 1: Find variant with exact string or number match. - -> Args: -> variants: Sequence of variants to search -> selector_value: Resolved selector value (for numeric comparison) -> selector_str: String representation of selector (for string comparison) - -> Returns: -> Matching variant or None if no exact match found. -> """ -> for variant in variants: -> match variant.key: -> case Identifier(name=key_name): -> if key_name == selector_str: -> return variant -> case NumberLiteral(raw=raw_str): - # Handle int, float, Decimal, and FluentNumber for exact numeric match. - # Use raw string representation for maximum precision. - # Problem: float(1.1) != Decimal("1.1") due to IEEE 754. - # Solution: Use NumberLiteral.raw (exact source string) for key, - # and convert selector to Decimal via str for comparison. - # Edge case: Float arithmetic results (e.g., 0.1 + 0.2) may produce - # values like 0.30000000000000004 that won't match literal "0.3". - # For exact matching with computed values, use Decimal arithmetic. - # Note: Exclude bool since isinstance(True, int) is True in Python, - # but str(True) == "True" which is not a valid Decimal. - # - # FluentNumber wraps formatted numbers (from NUMBER() function) while - # preserving the original numeric value for matching. Extract .value - # for numeric comparison so [1000] matches FluentNumber(1000, "1,000"). -> numeric_for_match: int | float | Decimal | None = None -> if isinstance(selector_value, FluentNumber): -> numeric_for_match = selector_value.value -> elif ( -> isinstance(selector_value, (int, float, Decimal)) -> and not isinstance(selector_value, bool) -> ): -> numeric_for_match = selector_value - -> if numeric_for_match is not None: - # Use raw string for key to preserve exact source precision -> try: -> key_decimal = Decimal(raw_str) -> except InvalidOperation: - # Malformed NumberLiteral.raw from programmatic AST construction. - # Fall through to next variant instead of crashing. -> continue -> sel_decimal = Decimal(str(numeric_for_match)) -> if key_decimal == sel_decimal: -> return variant -> return None - -> def _find_plural_variant( -> self, -> variants: Sequence[Variant], -> plural_category: str, -> ) -> Variant | None: -> """Pass 2: Find variant matching plural category. - -> Args: -> variants: Sequence of variants to search -> plural_category: CLDR plural category (zero, one, two, few, many, other) - -> Returns: -> Matching variant or None if no plural category match found. -> """ -> for variant in variants: -> match variant.key: -> case Identifier(name=key_name): -> if key_name == plural_category: -> return variant -> return None - -> def _find_default_variant(self, variants: Sequence[Variant]) -> Variant | None: -> """Find the default variant (marked with *). - -> Args: -> variants: Sequence of variants to search - -> Returns: -> Default variant or None if no default marked. -> """ -> for variant in variants: -> if variant.default: -> return variant -> return None - -> def _resolve_select_expression( -> self, -> expr: SelectExpression, -> args: Mapping[str, FluentValue], -> errors: list[FluentError], -> context: ResolutionContext, -> ) -> str: -> """Resolve select expression by matching variant. - -> Matching priority (two-pass linear scan): -> 1. Exact string/number match (pass 1) -> 2. Plural category match for numeric selectors (pass 2) -> 3. Default variant -> 4. First variant (fallback) - -> For typical FTL files with <5 variants, linear scan is more efficient -> than building dictionary indices. Exact matches always take precedence -> over plural category matches, regardless of variant order in FTL source. - -> Error handling: -> If the selector expression fails (e.g., missing variable), the error -> is collected and resolution falls back to the default variant. This -> ensures robustness and matches the Fluent spec behavior. -> """ - # Evaluate selector with error resilience. - # If selector evaluation fails (e.g., missing variable), collect the error - # and fall back to the default variant per Fluent spec. - # Wrap in expression_guard to track depth for DoS protection. -> try: -> with context.expression_guard: -> selector_value = self._resolve_expression( -> expr.selector, args, errors, context -> ) -> except (FluentReferenceError, FluentResolutionError) as e: - # Collect the error but don't propagate - fall back to default variant -> errors.append(e) -> return self._resolve_fallback_variant(expr, args, errors, context) - - # Use _format_value for consistent string representation. - # This ensures: - # - None -> "" (falls through to default variant) - # - bool -> "true"/"false" (matches FTL variant keys, not Python "True"/"False") - # - FluentNumber -> formatted string (display representation) - # - Other types -> str() representation -> selector_str = self._format_value(selector_value) - - # Pass 1: Exact match (takes priority) -> exact_match = self._find_exact_variant(expr.variants, selector_value, selector_str) -> if exact_match is not None: -> return self._resolve_pattern(exact_match.value, args, errors, context) - - # Pass 2: Plural category match (numeric selectors only) - # FluentValue includes Decimal for currency/financial values. - # FluentNumber wraps formatted numbers while preserving numeric identity. - # Note: Exclude bool since isinstance(True, int) is True in Python, - # but booleans should match [true]/[false] variants, not plural categories. - # - # Extract numeric value from FluentNumber for plural matching. -> numeric_value: int | float | Decimal | None = None -> if isinstance(selector_value, FluentNumber): -> numeric_value = selector_value.value -> elif isinstance(selector_value, (int, float, Decimal)) and not isinstance( -> selector_value, bool -> ): -> numeric_value = selector_value - -> if numeric_value is not None: - # Try plural category matching (requires Babel for CLDR data). - # If Babel is not installed (parser-only mode), collect error and - # fall through to default variant. -> try: -> plural_category = select_plural_category(numeric_value, self.locale) -> plural_match = self._find_plural_variant(expr.variants, plural_category) -> if plural_match is not None: -> return self._resolve_pattern( -> plural_match.value, args, errors, context -> ) -> except BabelImportError: - # Babel not installed - collect error, fall through to default -> errors.append( -> FluentResolutionError(ErrorTemplate.plural_support_unavailable()) -> ) - - # Fallback: default variant -> default_variant = self._find_default_variant(expr.variants) -> if default_variant is not None: -> return self._resolve_pattern(default_variant.value, args, errors, context) - - # Fallback: first variant -> if expr.variants: -> return self._resolve_pattern(expr.variants[0].value, args, errors, context) - -> raise FluentResolutionError(ErrorTemplate.no_variants()) - -> def _resolve_fallback_variant( -> self, -> expr: SelectExpression, -> args: Mapping[str, FluentValue], -> errors: list[FluentError], -> context: ResolutionContext, -> ) -> str: -> """Resolve fallback variant when selector evaluation fails. - -> Attempts to resolve in order: -> 1. Default variant (marked with *) -> 2. First variant - -> Args: -> expr: The SelectExpression to resolve -> args: Arguments for pattern resolution -> errors: Error list for error collection -> context: Resolution context - -> Returns: -> Resolved variant pattern string - -> Raises: -> FluentResolutionError: If no variants exist -> """ - # Try default variant first -> default_variant = self._find_default_variant(expr.variants) -> if default_variant is not None: -> return self._resolve_pattern(default_variant.value, args, errors, context) - - # Fall back to first variant -> if expr.variants: -> return self._resolve_pattern(expr.variants[0].value, args, errors, context) - -> raise FluentResolutionError(ErrorTemplate.no_variants()) - -> def _resolve_function_call( -> self, -> func_ref: FunctionReference, -> args: Mapping[str, FluentValue], -> errors: list[FluentError], -> context: ResolutionContext, -> ) -> FluentValue: -> """Resolve function call. - -> Uses FunctionRegistry to handle camelCase → snake_case parameter conversion. -> Uses metadata system to determine if locale injection is needed. - -> Security: -> Wraps argument resolution in expression_guard to prevent DoS via deeply -> nested function calls like NUMBER(A(B(C(...)))). Each nested call -> consumes stack frames during resolution. - -> Returns FluentValue which the resolver will convert to string for final output. -> """ -> func_name = func_ref.id.name - - # Evaluate arguments within depth guard (DoS prevention) - # Function arguments can contain nested function calls: NUMBER(ABS(FLOOR($x))) - # Without depth tracking, deeply nested calls can exhaust the Python stack. -> with context.expression_guard: -> positional_values: list[FluentValue] = [ -> self._resolve_expression(arg, args, errors, context) -> for arg in func_ref.arguments.positional -> ] - - # Evaluate named arguments (camelCase from FTL) -> named_values: dict[str, FluentValue] = { -> arg.name.name: self._resolve_expression(arg.value, args, errors, context) -> for arg in func_ref.arguments.named -> } - - # Check if locale injection is needed (metadata-driven, not magic tuple) - # This correctly handles custom functions with same name as built-ins -> if self.function_registry.should_inject_locale(func_name): - # Validate arity before injection to provide clear error messages - # instead of opaque TypeError from incorrect argument positioning -> expected_args = self.function_registry.get_expected_positional_args(func_name) -> if expected_args is not None and len(positional_values) != expected_args: -> raise FluentResolutionError( -> ErrorTemplate.function_arity_mismatch( -> func_name, expected_args, len(positional_values) -> ) -> ) - - # Built-in formatting functions expect signature: func(value, locale, *, ...) - # Append locale after positional args (FTL passes exactly one value arg, - # so this places locale as the second positional argument by contract) - # FunctionRegistry.call() handles camelCase -> snake_case conversion -> return self.function_registry.call( -> func_name, -> [*positional_values, self.locale], -> named_values, -> ) - - # Custom function or built-in that doesn't need locale: pass args as-is -> return self.function_registry.call( -> func_name, -> positional_values, -> named_values, -> ) - -> def _format_value(self, value: FluentValue) -> str: -> """Format FluentValue to string for final output. - -> Handles all types in the FluentValue union: -> - str: returned as-is -> - bool: "true"/"false" (Fluent convention) -> - int/float: string representation -> - Decimal/datetime/date: string representation via __str__ -> - None: empty string -> """ -> if isinstance(value, str): -> return value - # Check bool BEFORE int/float (bool is subclass of int in Python) -> if isinstance(value, bool): -> return "true" if value else "false" -> if isinstance(value, (int, float)): -> return str(value) -> if value is None: -> return "" - # Handles Decimal, datetime, date, and any other types -> return str(value) - -> def _get_fallback_for_placeable(self, expr: Expression, depth: int = MAX_DEPTH) -> str: # noqa: PLR0911 -> """Get readable fallback for failed placeable per Fluent spec. - -> Per Fluent specification, when a placeable fails to resolve, -> we return a human-readable representation of what was attempted. -> This is superior to {ERROR: ...} as it: -> 1. Doesn't expose internal diagnostics -> 2. Shows what the translator expected -> 3. Makes errors visible but not alarming - -> Args: -> expr: The expression that failed to resolve -> depth: Remaining recursion depth (prevents stack overflow) - -> Returns: -> Readable fallback string - -> Examples: -> VariableReference($name) -> "{$name}" -> MessageReference(welcome) -> "{welcome}" -> TermReference(-brand) -> "{-brand}" -> FunctionReference(NUMBER) -> "{NUMBER(...)}" -> SelectExpression($count) -> "{{$count} -> ...}" -> """ - # Depth protection: prevent recursion overflow on adversarial ASTs -> if depth <= 0: -> return FALLBACK_INVALID - -> match expr: -> case VariableReference(): -> return FALLBACK_MISSING_VARIABLE.format(name=expr.id.name) -> case MessageReference(): -> msg_id = expr.id.name -> if expr.attribute: -> msg_id = f"{msg_id}.{expr.attribute.name}" -> return FALLBACK_MISSING_MESSAGE.format(id=msg_id) -> case TermReference(): -> term_id = expr.id.name -> if expr.attribute: -> term_id = f"{term_id}.{expr.attribute.name}" -> return FALLBACK_MISSING_TERM.format(name=term_id) -> case FunctionReference(): -> return FALLBACK_FUNCTION_ERROR.format(name=expr.id.name) -> case SelectExpression(): - # Provide context by showing the selector expression -> selector_fallback = self._get_fallback_for_placeable(expr.selector, depth - 1) -> return f"{{{selector_fallback} -> ...}}" -> case _: -> return FALLBACK_INVALID diff --git a/src/ftllexengine/runtime/resolver_protocols.py b/src/ftllexengine/runtime/resolver_protocols.py new file mode 100644 index 00000000..e578b62e --- /dev/null +++ b/src/ftllexengine/runtime/resolver_protocols.py @@ -0,0 +1,87 @@ +"""Type-checking protocols for FluentResolver mixins.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Protocol + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + + from ftllexengine.core.value_types import FluentValue + from ftllexengine.diagnostics import FrozenFluentError + from ftllexengine.runtime.function_bridge import FunctionRegistry + from ftllexengine.runtime.resolution_context import ResolutionContext + from ftllexengine.syntax import Expression, Pattern, SelectExpression, Variant + + +class ResolverStateProtocol(Protocol): + """Structural contract implemented by FluentResolver for its mixins.""" + + _function_registry: FunctionRegistry + _locale: str + + def _format_value(self, value: object) -> str: + ... # pragma: no cover - typing-only protocol declaration + + def _resolve_expression( + self, + expr: Expression, + args: Mapping[str, object], + errors: list[FrozenFluentError], + context: ResolutionContext, + ) -> FluentValue: + ... # pragma: no cover - typing-only protocol declaration + + def _resolve_pattern( + self, + pattern: Pattern, + args: Mapping[str, object], + errors: list[FrozenFluentError], + context: ResolutionContext, + ) -> str: + ... # pragma: no cover - typing-only protocol declaration + + def _call_function_safe( + self, + func_name: str, + positional: Sequence[FluentValue], + named: Mapping[str, FluentValue], + errors: list[FrozenFluentError], + ) -> FluentValue: + ... # pragma: no cover - typing-only protocol declaration + + def _get_fallback_for_placeable( + self, expr: Expression, depth: int = 10 + ) -> str: + ... # pragma: no cover - typing-only protocol declaration + + def _resolve_fallback_variant( + self, + expr: SelectExpression, + args: Mapping[str, object], + errors: list[FrozenFluentError], + context: ResolutionContext, + ) -> str: + ... # pragma: no cover - typing-only protocol declaration + + def _find_exact_variant( + self, + variants: Sequence[Variant], + selector_value: object, + selector_str: str, + ) -> Variant | None: + ... # pragma: no cover - typing-only protocol declaration + + def _find_plural_variant( + self, variants: Sequence[Variant], plural_category: str + ) -> Variant | None: + ... # pragma: no cover - typing-only protocol declaration + + def _find_default_variant(self, variants: Sequence[Variant]) -> Variant | None: + ... # pragma: no cover - typing-only protocol declaration + + def _get_reference_fallback(self, expr: Expression) -> str | None: + ... # pragma: no cover - typing-only protocol declaration + + def _get_nested_fallback(self, expr: Expression, depth: int) -> str: + ... # pragma: no cover - typing-only protocol declaration diff --git a/src/ftllexengine/runtime/resolver_runtime.py b/src/ftllexengine/runtime/resolver_runtime.py new file mode 100644 index 00000000..1b952136 --- /dev/null +++ b/src/ftllexengine/runtime/resolver_runtime.py @@ -0,0 +1,196 @@ +"""Function-call and fallback helpers for FluentResolver.""" + +from __future__ import annotations + +import logging +from collections.abc import Mapping, Sequence +from typing import TYPE_CHECKING + +from ftllexengine.constants import ( + FALLBACK_FUNCTION_ERROR, + FALLBACK_INVALID, + FALLBACK_MISSING_MESSAGE, + FALLBACK_MISSING_TERM, + FALLBACK_MISSING_VARIABLE, +) +from ftllexengine.diagnostics import ErrorCategory, ErrorTemplate, FrozenFluentError +from ftllexengine.syntax import ( + Expression, + FunctionReference, + MessageReference, + NumberLiteral, + Placeable, + SelectExpression, + StringLiteral, + TermReference, + VariableReference, +) + +if TYPE_CHECKING: + from ftllexengine.core.value_types import FluentValue + from ftllexengine.runtime.function_bridge import FunctionRegistry + from ftllexengine.runtime.resolution_context import ResolutionContext + +logger = logging.getLogger("ftllexengine.runtime.resolver") + +_FALLBACK_MAX_DEPTH: int = 10 + + +class _ResolverRuntimeMixin: + """Function-call, formatting, and fallback behavior for FluentResolver.""" + + _function_registry: FunctionRegistry + _locale: str + + if TYPE_CHECKING: + + def _resolve_expression( + self, + expr: Expression, + args: Mapping[str, FluentValue], + errors: list[FrozenFluentError], + context: ResolutionContext, + ) -> FluentValue: ... + + def _resolve_function_call( + self, + func_ref: FunctionReference, + args: Mapping[str, FluentValue], + errors: list[FrozenFluentError], + context: ResolutionContext, + ) -> FluentValue: + """Resolve a function call with guarded argument evaluation.""" + func_name = func_ref.id.name + + with context.expression_guard: + positional_values = [ + self._resolve_expression(arg, args, errors, context) + for arg in func_ref.arguments.positional + ] + named_values = { + arg.name.name: self._resolve_expression(arg.value, args, errors, context) + for arg in func_ref.arguments.named + } + + if self._function_registry.should_inject_locale(func_name): + expected_args = self._function_registry.get_expected_positional_args(func_name) + if expected_args is not None and len(positional_values) != expected_args: + diag = ErrorTemplate.function_arity_mismatch( + func_name, expected_args, len(positional_values) + ) + raise FrozenFluentError(str(diag), ErrorCategory.RESOLUTION, diagnostic=diag) + + return self._call_function_safe( + func_name, + [*positional_values, self._locale], + named_values, + errors, + ) + + return self._call_function_safe( + func_name, + positional_values, + named_values, + errors, + ) + + def _call_function_safe( + self, + func_name: str, + positional: Sequence[FluentValue], + named: Mapping[str, FluentValue], + errors: list[FrozenFluentError], + ) -> FluentValue: + """Call a registered function and normalize unexpected exceptions.""" + try: + return self._function_registry.call(func_name, positional, named) + except FrozenFluentError: + raise + except Exception as error: # noqa: BLE001 - function adapters may raise arbitrary user exceptions + logger.warning( + "Custom function %s raised %s: %s", + func_name, + type(error).__name__, + str(error), + ) + diag = ErrorTemplate.function_failed( + func_name, f"Uncaught exception: {type(error).__name__}: {error}" + ) + errors.append( + FrozenFluentError(str(diag), ErrorCategory.RESOLUTION, diagnostic=diag) + ) + return FALLBACK_FUNCTION_ERROR.format(name=func_name) + + def _format_value(self, value: object) -> str: + """Format a resolved FluentValue for final output.""" + match value: + case str(): + return value + case bool(): + return "true" if value else "false" + case int(): + return str(value) + case None: + return "" + case float(): + msg = ( + f"float value {value!r} is not a valid FluentValue. " + "IEEE 754 float cannot represent most decimal fractions exactly. " + "Use int for whole amounts or decimal.Decimal for fractional amounts." + ) + raise FrozenFluentError(msg, ErrorCategory.RESOLUTION) + case Sequence() | Mapping(): + return f"[{type(value).__name__}]" + case _: + return str(value) + + def _get_reference_fallback( + self, expr: Expression + ) -> str | None: + """Return direct fallback text for simple reference expressions.""" + match expr: + case VariableReference(): + return FALLBACK_MISSING_VARIABLE.format(name=expr.id.name) + case MessageReference(): + msg_id = expr.id.name + if expr.attribute: + msg_id = f"{msg_id}.{expr.attribute.name}" + return FALLBACK_MISSING_MESSAGE.format(id=msg_id) + case TermReference(): + term_id = expr.id.name + if expr.attribute: + term_id = f"{term_id}.{expr.attribute.name}" + return FALLBACK_MISSING_TERM.format(name=term_id) + case FunctionReference(): + return FALLBACK_FUNCTION_ERROR.format(name=expr.id.name) + case _: + return None + + def _get_nested_fallback( + self, expr: Expression, depth: int + ) -> str: + """Return fallback text for nested or literal expressions.""" + match expr: + case SelectExpression(): + selector_fallback = self._get_fallback_for_placeable(expr.selector, depth - 1) + return f"{{{selector_fallback} -> ...}}" + case Placeable(): + return self._get_fallback_for_placeable(expr.expression, depth - 1) + case StringLiteral(): + return expr.value + case NumberLiteral(): + return expr.raw + case _: + return FALLBACK_INVALID + + def _get_fallback_for_placeable( + self, expr: Expression, depth: int = _FALLBACK_MAX_DEPTH + ) -> str: + """Render a readable fallback string for a failed placeable.""" + if depth <= 0: + return FALLBACK_INVALID + + reference_fallback = self._get_reference_fallback(expr) + if reference_fallback is not None: + return reference_fallback + return self._get_nested_fallback(expr, depth) diff --git a/src/ftllexengine/runtime/resolver_selection.py b/src/ftllexengine/runtime/resolver_selection.py new file mode 100644 index 00000000..3c5189fd --- /dev/null +++ b/src/ftllexengine/runtime/resolver_selection.py @@ -0,0 +1,159 @@ +"""Variant-selection helpers for FluentResolver.""" + +from __future__ import annotations + +from decimal import Decimal +from typing import TYPE_CHECKING + +from ftllexengine.core.babel_compat import BabelImportError +from ftllexengine.core.value_types import FluentNumber +from ftllexengine.diagnostics import ErrorCategory, ErrorTemplate, FrozenFluentError +from ftllexengine.runtime import resolver as _resolver_module +from ftllexengine.syntax import Expression, Identifier, NumberLiteral, SelectExpression, Variant + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + + from ftllexengine.core.value_types import FluentValue + from ftllexengine.runtime.resolution_context import ResolutionContext + from ftllexengine.syntax import Pattern + + +class _ResolverSelectionMixin: + """Select-expression behavior for FluentResolver.""" + + _locale: str + + if TYPE_CHECKING: + + def _format_value(self, value: object) -> str: ... + + def _resolve_expression( + self, + expr: Expression, + args: Mapping[str, FluentValue], + errors: list[FrozenFluentError], + context: ResolutionContext, + ) -> FluentValue: ... + + def _resolve_pattern( + self, + pattern: Pattern, + args: Mapping[str, FluentValue], + errors: list[FrozenFluentError], + context: ResolutionContext, + ) -> str: ... + + def _find_exact_variant( + self, + variants: Sequence[Variant], + selector_value: object, + selector_str: str, + ) -> Variant | None: + """Pass 1: find an exact string or numeric variant match.""" + numeric_for_match: int | Decimal | None = None + if isinstance(selector_value, FluentNumber): + numeric_for_match = selector_value.value + elif isinstance(selector_value, (int, Decimal)) and not isinstance(selector_value, bool): + numeric_for_match = selector_value + + sel_decimal: Decimal | None = None + if numeric_for_match is not None: + sel_decimal = Decimal(str(numeric_for_match)) + + for variant in variants: + match variant.key: + case Identifier(name=key_name): + if key_name == selector_str: + return variant + case NumberLiteral(raw=raw_str): + if sel_decimal is not None and Decimal(raw_str) == sel_decimal: + return variant + return None + + def _find_plural_variant( + self, variants: Sequence[Variant], plural_category: str + ) -> Variant | None: + """Pass 2: find a plural-category variant match.""" + for variant in variants: + match variant.key: + case Identifier(name=key_name): + if key_name == plural_category: + return variant + return None + + def _find_default_variant( + self, variants: Sequence[Variant] + ) -> Variant | None: + """Return the default variant, if one exists.""" + for variant in variants: + if variant.default: + return variant + return None + + def _resolve_select_expression( + self, + expr: SelectExpression, + args: Mapping[str, FluentValue], + errors: list[FrozenFluentError], + context: ResolutionContext, + ) -> str: + """Resolve a select expression using Fluent's matching order.""" + try: + with context.expression_guard: + selector_value = self._resolve_expression(expr.selector, args, errors, context) + except FrozenFluentError as error: + errors.append(error) + return self._resolve_fallback_variant(expr, args, errors, context) + + selector_str = self._format_value(selector_value) + + exact_match = self._find_exact_variant(expr.variants, selector_value, selector_str) + if exact_match is not None: + return self._resolve_pattern(exact_match.value, args, errors, context) + + numeric_value: int | Decimal | None = None + precision: int | None = None + if isinstance(selector_value, FluentNumber): + numeric_value = selector_value.value + precision = selector_value.precision + elif isinstance(selector_value, (int, Decimal)) and not isinstance( + selector_value, bool + ): + numeric_value = selector_value + + if numeric_value is not None: + try: + plural_category = _resolver_module.select_plural_category( + numeric_value, + self._locale, + precision, + ) + plural_match = self._find_plural_variant(expr.variants, plural_category) + if plural_match is not None: + return self._resolve_pattern(plural_match.value, args, errors, context) + except BabelImportError: + diag = ErrorTemplate.plural_support_unavailable() + errors.append( + FrozenFluentError(str(diag), ErrorCategory.RESOLUTION, diagnostic=diag) + ) + + return self._resolve_fallback_variant(expr, args, errors, context) + + def _resolve_fallback_variant( + self, + expr: SelectExpression, + args: Mapping[str, FluentValue], + errors: list[FrozenFluentError], + context: ResolutionContext, + ) -> str: + """Resolve the default or first variant after selector failure.""" + default_variant = self._find_default_variant(expr.variants) + if default_variant is not None: + return self._resolve_pattern(default_variant.value, args, errors, context) + + if expr.variants: + return self._resolve_pattern(expr.variants[0].value, args, errors, context) + + diag = ErrorTemplate.no_variants() + raise FrozenFluentError(str(diag), ErrorCategory.RESOLUTION, diagnostic=diag) diff --git a/src/ftllexengine/runtime/rwlock.py b/src/ftllexengine/runtime/rwlock.py index e476669c..3be289ab 100644 --- a/src/ftllexengine/runtime/rwlock.py +++ b/src/ftllexengine/runtime/rwlock.py @@ -62,20 +62,20 @@ class RWLock: Write lock reentrancy is prohibited: raises RuntimeError. Example: - >>> lock = RWLock() - >>> - >>> # Multiple readers can proceed concurrently - >>> with lock.read(): + >>> lock = RWLock() # doctest: +SKIP + + Multiple readers can proceed concurrently: + >>> with lock.read(): # doctest: +SKIP ... # Read data ... pass - >>> - >>> # Writers get exclusive access - >>> with lock.write(): + + Writers get exclusive access: + >>> with lock.write(): # doctest: +SKIP ... # Modify data ... pass - >>> - >>> # Reentrant read locks work - >>> with lock.read(): + + Reentrant read locks work: + >>> with lock.read(): # doctest: +SKIP ... with lock.read(): # Same thread can reacquire ... # Still shared ... pass @@ -128,10 +128,10 @@ def read(self, timeout: float | None = None) -> Generator[None]: None Example: - >>> with lock.read(): + >>> with lock.read(): # doctest: +SKIP ... # Safe to read data ... pass - >>> with lock.read(timeout=1.0): + >>> with lock.read(timeout=1.0): # doctest: +SKIP ... # Acquired within 1 second or TimeoutError raised ... pass """ @@ -164,10 +164,10 @@ def write(self, timeout: float | None = None) -> Generator[None]: None Example: - >>> with lock.write(): + >>> with lock.write(): # doctest: +SKIP ... # Exclusive access to modify data ... pass - >>> with lock.write(timeout=2.0): + >>> with lock.write(timeout=2.0): # doctest: +SKIP ... # Acquired within 2 seconds or TimeoutError raised ... pass """ diff --git a/src/ftllexengine/syntax/__init__.py b/src/ftllexengine/syntax/__init__.py index 5180d133..c78417ba 100644 --- a/src/ftllexengine/syntax/__init__.py +++ b/src/ftllexengine/syntax/__init__.py @@ -104,9 +104,9 @@ def parse(source: str) -> Resource: Resource containing parsed entries Example: - >>> from ftllexengine.syntax import parse - >>> resource = parse("hello = Hello, world!") - >>> resource.entries[0].id.name + >>> from ftllexengine.syntax import parse # doctest: +SKIP + >>> resource = parse("hello = Hello, world!") # doctest: +SKIP + >>> resource.entries[0].id.name # doctest: +SKIP 'hello' """ parser = FluentParserV1() @@ -131,12 +131,12 @@ def parse_stream(lines: Iterable[str]) -> Iterator[Entry]: Message, Term, Comment, or Junk AST nodes in document order. Example: - >>> from ftllexengine.syntax import parse_stream - >>> lines = ["greeting = Hello\\n", "\\n", "farewell = Bye\\n"] - >>> entries = list(parse_stream(lines)) - >>> len(entries) + >>> from ftllexengine.syntax import parse_stream # doctest: +SKIP + >>> lines = ["greeting = Hello\\n", "\\n", "farewell = Bye\\n"] # doctest: +SKIP + >>> entries = list(parse_stream(lines)) # doctest: +SKIP + >>> len(entries) # doctest: +SKIP 2 - >>> entries[0].id.name + >>> entries[0].id.name # doctest: +SKIP 'greeting' """ parser = FluentParserV1() diff --git a/src/ftllexengine/syntax/cursor.py b/src/ftllexengine/syntax/cursor.py index aa0afb16..5e9e9b71 100644 --- a/src/ftllexengine/syntax/cursor.py +++ b/src/ftllexengine/syntax/cursor.py @@ -58,20 +58,20 @@ class Cursor: len(source) is rejected to make the invariant explicit. Example: - >>> cursor = Cursor("hello", 0) - >>> cursor.current # Type: str (not str | None!) + >>> cursor = Cursor("hello", 0) # doctest: +SKIP + >>> cursor.current # Type: str (not str | None!) # doctest: +SKIP 'h' - >>> new_cursor = cursor.advance() - >>> new_cursor.current + >>> new_cursor = cursor.advance() # doctest: +SKIP + >>> new_cursor.current # doctest: +SKIP 'e' - >>> cursor.current # Original unchanged (immutability) + >>> cursor.current # Original unchanged (immutability) # doctest: +SKIP 'h' - >>> cursor.is_eof + >>> cursor.is_eof # doctest: +SKIP False - >>> eof_cursor = Cursor("hi", 2) - >>> eof_cursor.is_eof + >>> eof_cursor = Cursor("hi", 2) # doctest: +SKIP + >>> eof_cursor.is_eof # doctest: +SKIP True - >>> eof_cursor.current # Raises EOFError + >>> eof_cursor.current # Raises EOFError # doctest: +SKIP Traceback (most recent call last): ... EOFError: Unexpected EOF at position 2 @@ -203,14 +203,14 @@ def advance(self, count: int = 1) -> Cursor: The compiler enforces progress! Example: - >>> cursor = Cursor("hello", 0) - >>> cursor2 = cursor.advance() - >>> cursor.pos # Original unchanged + >>> cursor = Cursor("hello", 0) # doctest: +SKIP + >>> cursor2 = cursor.advance() # doctest: +SKIP + >>> cursor.pos # Original unchanged # doctest: +SKIP 0 - >>> cursor2.pos # New cursor advanced + >>> cursor2.pos # New cursor advanced # doctest: +SKIP 1 - >>> cursor.advance(0) # Raises: must advance by at least 1 + >>> cursor.advance(0) # Raises: must advance by at least 1 # doctest: +SKIP Traceback (most recent call last): ... ValueError: advance() count must be >= 1, got 0 @@ -239,11 +239,11 @@ def slice_to(self, end_pos: int) -> str: Source substring from current position to end_pos Example: - >>> cursor = Cursor("hello world", 0) - >>> start_cursor = cursor - >>> while not cursor.is_eof and cursor.current != ' ': + >>> cursor = Cursor("hello world", 0) # doctest: +SKIP + >>> start_cursor = cursor # doctest: +SKIP + >>> while not cursor.is_eof and cursor.current != ' ': # doctest: +SKIP ... cursor = cursor.advance() - >>> start_cursor.slice_to(cursor.pos) + >>> start_cursor.slice_to(cursor.pos) # doctest: +SKIP 'hello' """ return self.source[self.pos : end_pos] @@ -259,16 +259,16 @@ def skip_spaces(self) -> Cursor: This matches Fluent parser specification for inline whitespace. Example: - >>> cursor = Cursor(" hello", 0) - >>> new_cursor = cursor.skip_spaces() - >>> new_cursor.pos + >>> cursor = Cursor(" hello", 0) # doctest: +SKIP + >>> new_cursor = cursor.skip_spaces() # doctest: +SKIP + >>> new_cursor.pos # doctest: +SKIP 3 - >>> new_cursor.current + >>> new_cursor.current # doctest: +SKIP 'h' - >>> cursor = Cursor("hello", 0) - >>> new_cursor = cursor.skip_spaces() - >>> new_cursor.pos # No spaces to skip + >>> cursor = Cursor("hello", 0) # doctest: +SKIP + >>> new_cursor = cursor.skip_spaces() # doctest: +SKIP + >>> new_cursor.pos # No spaces to skip # doctest: +SKIP 0 """ # O(1) cursor allocation: compute final position via integer arithmetic @@ -291,16 +291,16 @@ def skip_whitespace(self) -> Cursor: to LF at parser entry (see FluentParserV1.parse()). Example: - >>> cursor = Cursor(" \\n hello", 0) - >>> new_cursor = cursor.skip_whitespace() - >>> new_cursor.pos + >>> cursor = Cursor(" \\n hello", 0) # doctest: +SKIP + >>> new_cursor = cursor.skip_whitespace() # doctest: +SKIP + >>> new_cursor.pos # doctest: +SKIP 5 - >>> new_cursor.current + >>> new_cursor.current # doctest: +SKIP 'h' - >>> cursor = Cursor("hello", 0) - >>> new_cursor = cursor.skip_whitespace() - >>> new_cursor.pos # No whitespace to skip + >>> cursor = Cursor("hello", 0) # doctest: +SKIP + >>> new_cursor = cursor.skip_whitespace() # doctest: +SKIP + >>> new_cursor.pos # No whitespace to skip # doctest: +SKIP 0 """ # O(1) cursor allocation: compute final position via integer arithmetic @@ -326,17 +326,17 @@ def expect(self, char: str) -> Cursor | None: For required characters, use current property with explicit check. Example: - >>> cursor = Cursor("hello", 0) - >>> new_cursor = cursor.expect('h') - >>> new_cursor.pos if new_cursor else None + >>> cursor = Cursor("hello", 0) # doctest: +SKIP + >>> new_cursor = cursor.expect('h') # doctest: +SKIP + >>> new_cursor.pos if new_cursor else None # doctest: +SKIP 1 - >>> cursor = Cursor("hello", 0) - >>> cursor.expect('x') # No match + >>> cursor = Cursor("hello", 0) # doctest: +SKIP + >>> cursor.expect('x') # No match # doctest: +SKIP None - >>> eof_cursor = Cursor("hi", 2) - >>> eof_cursor.expect('h') # At EOF + >>> eof_cursor = Cursor("hi", 2) # doctest: +SKIP + >>> eof_cursor.expect('h') # At EOF # doctest: +SKIP None """ if not self.is_eof and self.current == char: @@ -358,12 +358,12 @@ def slice_ahead(self, n: int) -> str: Does not advance the cursor position. Example: - >>> cursor = Cursor("hello", 0) - >>> cursor.slice_ahead(3) + >>> cursor = Cursor("hello", 0) # doctest: +SKIP + >>> cursor.slice_ahead(3) # doctest: +SKIP 'hel' - >>> cursor.pos # Unchanged + >>> cursor.pos # Unchanged # doctest: +SKIP 0 - >>> cursor.slice_ahead(10) # More than available + >>> cursor.slice_ahead(10) # More than available # doctest: +SKIP 'hello' """ return self.source[self.pos : self.pos + n] @@ -380,9 +380,9 @@ def skip_line_end(self) -> Cursor: performs this normalization automatically. Example: - >>> cursor = Cursor("hello\\nworld", 5) # At \\n - >>> new_cursor = cursor.skip_line_end() - >>> new_cursor.pos + >>> cursor = Cursor("hello\\nworld", 5) # At \\n # doctest: +SKIP + >>> new_cursor = cursor.skip_line_end() # doctest: +SKIP + >>> new_cursor.pos # doctest: +SKIP 6 """ if not self.is_eof and self.current == "\n": @@ -404,11 +404,11 @@ def skip_to_line_end(self) -> Cursor: performs this normalization automatically. Example: - >>> cursor = Cursor("hello\\nworld", 0) - >>> new_cursor = cursor.skip_to_line_end() - >>> new_cursor.pos + >>> cursor = Cursor("hello\\nworld", 0) # doctest: +SKIP + >>> new_cursor = cursor.skip_to_line_end() # doctest: +SKIP + >>> new_cursor.pos # doctest: +SKIP 5 - >>> new_cursor.current + >>> new_cursor.current # doctest: +SKIP '\\n' """ # C-level str.find() avoids O(N) cursor allocations @@ -429,8 +429,8 @@ def count_newlines_before(self) -> int: More efficient than source[:pos].count() for large files. Example: - >>> cursor = Cursor("a\\nb\\nc", 4) # At 'c' - >>> cursor.count_newlines_before() + >>> cursor = Cursor("a\\nb\\nc", 4) # At 'c' # doctest: +SKIP + >>> cursor.count_newlines_before() # doctest: +SKIP 2 """ return self.source.count("\n", 0, self.pos) @@ -446,15 +446,15 @@ def compute_line_col(self) -> tuple[int, int]: Only call for error reporting, not during normal parsing! Example: - >>> source = "line1\\nline2\\nline3" - >>> cursor = Cursor(source, 0) - >>> cursor.compute_line_col() + >>> source = "line1\\nline2\\nline3" # doctest: +SKIP + >>> cursor = Cursor(source, 0) # doctest: +SKIP + >>> cursor.compute_line_col() # doctest: +SKIP (1, 1) - >>> cursor = Cursor(source, 6) # Start of line2 - >>> cursor.compute_line_col() + >>> cursor = Cursor(source, 6) # Start of line2 # doctest: +SKIP + >>> cursor.compute_line_col() # doctest: +SKIP (2, 1) - >>> cursor = Cursor(source, 8) # Middle of line2 - >>> cursor.compute_line_col() + >>> cursor = Cursor(source, 8) # Middle of line2 # doctest: +SKIP + >>> cursor.compute_line_col() # doctest: +SKIP (2, 3) """ # Count newlines before current position (O(1) memory) @@ -480,13 +480,13 @@ class LineOffsetCache: each position, which is O(n) per call. Example: - >>> source = "line1\\nline2\\nline3" - >>> cache = LineOffsetCache(source) - >>> cache.get_line_col(0) # Start of line 1 + >>> source = "line1\\nline2\\nline3" # doctest: +SKIP + >>> cache = LineOffsetCache(source) # doctest: +SKIP + >>> cache.get_line_col(0) # Start of line 1 # doctest: +SKIP (1, 1) - >>> cache.get_line_col(6) # Start of line 2 + >>> cache.get_line_col(6) # Start of line 2 # doctest: +SKIP (2, 1) - >>> cache.get_line_col(8) # Third char of line 2 + >>> cache.get_line_col(8) # Third char of line 2 # doctest: +SKIP (2, 3) Thread Safety: @@ -531,10 +531,10 @@ def get_line_col(self, pos: int) -> tuple[int, int]: O(log n) where n = number of lines Example: - >>> cache = LineOffsetCache("abc\\ndef\\nghi") - >>> cache.get_line_col(0) + >>> cache = LineOffsetCache("abc\\ndef\\nghi") # doctest: +SKIP + >>> cache.get_line_col(0) # doctest: +SKIP (1, 1) - >>> cache.get_line_col(4) # 'd' in "def" + >>> cache.get_line_col(4) # 'd' in "def" # doctest: +SKIP (2, 1) """ # Clamp position to valid range @@ -580,14 +580,14 @@ def parse_foo(cursor: Cursor) -> Result[ParseResult[Foo], ParseError]: return Success(ParseResult(parsed_value, new_cursor)) Example: - >>> cursor = Cursor("hello", 0) - >>> # Parse single character - >>> result = ParseResult('h', cursor.advance()) - >>> result.value + >>> cursor = Cursor("hello", 0) # doctest: +SKIP + Parse a single character: + >>> result = ParseResult('h', cursor.advance()) # doctest: +SKIP + >>> result.value # doctest: +SKIP 'h' - >>> result.cursor.pos + >>> result.cursor.pos # doctest: +SKIP 1 - >>> result.cursor.current + >>> result.cursor.current # doctest: +SKIP 'e' """ @@ -606,9 +606,9 @@ class ParseError: - Immutable for error chaining Example: - >>> cursor = Cursor("hello", 2) - >>> error = ParseError("Expected '}'", cursor, expected=('}', ']')) - >>> error.format_error() + >>> cursor = Cursor("hello", 2) # doctest: +SKIP + >>> error = ParseError("Expected '}'", cursor, expected=('}', ']')) # doctest: +SKIP + >>> error.format_error() # doctest: +SKIP "1:3: Expected '}' (expected: '}', ']')" """ @@ -623,13 +623,13 @@ def format_error(self) -> str: Formatted error string with location Example: - >>> cursor = Cursor("hello\\nworld", 7) - >>> error = ParseError("Expected ']'", cursor) - >>> error.format_error() + >>> cursor = Cursor("hello\\nworld", 7) # doctest: +SKIP + >>> error = ParseError("Expected ']'", cursor) # doctest: +SKIP + >>> error.format_error() # doctest: +SKIP "2:2: Expected ']'" - >>> error2 = ParseError("Unexpected", cursor, expected=(']', '}')) - >>> error2.format_error() + >>> error2 = ParseError("Unexpected", cursor, expected=(']', '}')) # doctest: +SKIP + >>> error2.format_error() # doctest: +SKIP "2:2: Unexpected (expected: ']', '}')" """ line, col = self.cursor.compute_line_col() @@ -653,10 +653,10 @@ def format_with_context(self, context_lines: int = 2) -> str: Multi-line formatted error with context Example: - >>> source = "hello = Hi\\nworld = { $name\\nfoo = Bar" - >>> cursor = Cursor(source, 26) # After $name - >>> error = ParseError("Expected '}'", cursor) - >>> print(error.format_with_context()) + >>> source = "hello = Hi\\nworld = { $name\\nfoo = Bar" # doctest: +SKIP + >>> cursor = Cursor(source, 26) # After $name # doctest: +SKIP + >>> error = ParseError("Expected '}'", cursor) # doctest: +SKIP + >>> print(error.format_with_context()) # doctest: +SKIP 2:15: Expected '}' 1 | hello = Hi diff --git a/src/ftllexengine/syntax/parser/__init__.py b/src/ftllexengine/syntax/parser/__init__.py index 929a9786..e3027550 100644 --- a/src/ftllexengine/syntax/parser/__init__.py +++ b/src/ftllexengine/syntax/parser/__init__.py @@ -5,16 +5,20 @@ Module Organization: - core.py: Main FluentParserV1 class and parse() entry point +- context.py: ParseContext depth-tracking state +- entries.py: Message, term, and comment parsing +- expressions.py: Inline expressions, calls, and select expressions +- patterns.py: Pattern parsing and multiline continuation handling - primitives.py: Basic parsers (identifiers, numbers, strings) - whitespace.py: Whitespace handling and continuation detection -- rules.py: All grammar rules (patterns, expressions, entries) +- rules.py: Aggregated grammar surface for advanced internal/test usage Public API: FluentParserV1: Main parser class ParseContext: Parse context for depth tracking (advanced usage) """ +from ftllexengine.syntax.parser.context import ParseContext from ftllexengine.syntax.parser.core import FluentParserV1 -from ftllexengine.syntax.parser.rules import ParseContext __all__ = ["FluentParserV1", "ParseContext"] diff --git a/src/ftllexengine/syntax/parser/context.py b/src/ftllexengine/syntax/parser/context.py new file mode 100644 index 00000000..f09cba33 --- /dev/null +++ b/src/ftllexengine/syntax/parser/context.py @@ -0,0 +1,69 @@ +"""Parse-context state shared across Fluent grammar modules.""" + +from __future__ import annotations + +from dataclasses import dataclass + +from ftllexengine.constants import MAX_DEPTH + +__all__ = ["ParseContext"] + + +@dataclass(slots=True) +class ParseContext: + """Explicit context for parsing operations. + + Replaces thread-local state with explicit parameter passing for: + - Thread safety without global state + - Async framework compatibility + - Easier testing (no state reset needed) + - Clear dependency flow + + Security: + Tracks nesting depth for BOTH placeables and function calls to prevent + stack overflow DoS attacks. Deeply nested constructs like: + - { { { ... } } } (nested placeables) + - { A(B(C(D(...)))) } (nested function calls) + Both consume stack frames and must be bounded. + + Attributes: + max_nesting_depth: Maximum allowed nesting depth for placeables and calls + current_depth: Current nesting depth (0 = top level) + _depth_exceeded_flag: Mutable flag (list container) shared across all nested + contexts to track if depth limit was exceeded during parse. Uses list[bool] + as a mutable reference that persists when context objects are copied during + enter_nesting(). Set to [True] when depth exceeded; checked at Junk creation + to emit specific PARSE_NESTING_DEPTH_EXCEEDED diagnostic. + """ + + max_nesting_depth: int = MAX_DEPTH + current_depth: int = 0 + _depth_exceeded_flag: list[bool] | None = None + + def __post_init__(self) -> None: + """Initialize mutable depth exceeded flag if not provided.""" + if self._depth_exceeded_flag is None: + self._depth_exceeded_flag = [False] + + def is_depth_exceeded(self) -> bool: + """Check if maximum nesting depth has been exceeded.""" + return self.current_depth >= self.max_nesting_depth + + def mark_depth_exceeded(self) -> None: + """Mark that depth limit was exceeded during parse.""" + if self._depth_exceeded_flag is not None: + self._depth_exceeded_flag[0] = True + + def was_depth_exceeded(self) -> bool: + """Check if depth limit was exceeded at any point during parse.""" + return bool( + self._depth_exceeded_flag is not None and self._depth_exceeded_flag[0] + ) + + def enter_nesting(self) -> ParseContext: + """Create new context with incremented depth for entering nested construct.""" + return ParseContext( + max_nesting_depth=self.max_nesting_depth, + current_depth=self.current_depth + 1, + _depth_exceeded_flag=self._depth_exceeded_flag, + ) diff --git a/src/ftllexengine/syntax/parser/core.py b/src/ftllexengine/syntax/parser/core.py index b0f75abb..e77ca382 100644 --- a/src/ftllexengine/syntax/parser/core.py +++ b/src/ftllexengine/syntax/parser/core.py @@ -5,7 +5,9 @@ Architecture: The parser uses an immutable cursor pattern (:class:`~ftllexengine.syntax.cursor.Cursor`) - to traverse source text. Each sub-parser (in :mod:`~ftllexengine.syntax.parser.rules`, + to traverse source text. Each sub-parser (in :mod:`~ftllexengine.syntax.parser.entries`, + :mod:`~ftllexengine.syntax.parser.expressions`, + :mod:`~ftllexengine.syntax.parser.patterns`, :mod:`~ftllexengine.syntax.parser.primitives`, etc.) returns either a :class:`~ftllexengine.syntax.cursor.ParseResult` containing the parsed AST node and updated cursor position, or None on parse failure. @@ -25,7 +27,9 @@ See Also: - :mod:`ftllexengine.syntax.ast` - All AST node type definitions - :mod:`ftllexengine.syntax.cursor` - Cursor and ParseResult types - - :mod:`ftllexengine.syntax.parser.rules` - Grammar rules (patterns, expressions, entries) + - :mod:`ftllexengine.syntax.parser.entries` - Top-level entry parsing + - :mod:`ftllexengine.syntax.parser.expressions` - Inline and select expressions + - :mod:`ftllexengine.syntax.parser.patterns` - Pattern parsing and continuation rules """ from __future__ import annotations @@ -49,13 +53,9 @@ Term, ) from ftllexengine.syntax.cursor import Cursor +from ftllexengine.syntax.parser.context import ParseContext +from ftllexengine.syntax.parser.entries import parse_comment, parse_message, parse_term from ftllexengine.syntax.parser.primitives import is_identifier_start -from ftllexengine.syntax.parser.rules import ( - ParseContext, - parse_comment, - parse_message, - parse_term, -) from ftllexengine.syntax.parser.whitespace import skip_blank if TYPE_CHECKING: @@ -296,10 +296,10 @@ def parse(self, source: str) -> Resource: # noqa: PLR0915 - main parser loop parameter in constructor. Example: - >>> parser = FluentParserV1() - >>> resource = parser.parse("hello = World") - >>> message = resource.entries[0] - >>> message.id.name + >>> parser = FluentParserV1() # doctest: +SKIP + >>> resource = parser.parse("hello = World") # doctest: +SKIP + >>> message = resource.entries[0] # doctest: +SKIP + >>> message.id.name # doctest: +SKIP 'hello' See Also: @@ -577,12 +577,12 @@ def parse_stream(self, lines: Iterable[str]) -> Iterator[Entry]: Message, Term, Comment, or Junk AST nodes in document order. Example: - >>> parser = FluentParserV1() - >>> ftl_lines = ["greeting = Hello\\n", "\\n", "farewell = Bye\\n"] - >>> entries = list(parser.parse_stream(ftl_lines)) - >>> len(entries) + >>> parser = FluentParserV1() # doctest: +SKIP + >>> ftl_lines = ["greeting = Hello\\n", "\\n", "farewell = Bye\\n"] # doctest: +SKIP + >>> entries = list(parser.parse_stream(ftl_lines)) # doctest: +SKIP + >>> len(entries) # doctest: +SKIP 2 - >>> entries[0].id.name + >>> entries[0].id.name # doctest: +SKIP 'greeting' """ chunk: list[str] = [] diff --git a/src/ftllexengine/syntax/parser/entries.py b/src/ftllexengine/syntax/parser/entries.py new file mode 100644 index 00000000..cd8991b6 --- /dev/null +++ b/src/ftllexengine/syntax/parser/entries.py @@ -0,0 +1,229 @@ +"""Entry-oriented Fluent grammar rules.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ftllexengine.enums import CommentType +from ftllexengine.syntax.ast import Attribute, Comment, Identifier, Message, Pattern, Span, Term +from ftllexengine.syntax.cursor import Cursor, ParseError, ParseResult +from ftllexengine.syntax.parser.patterns import parse_pattern +from ftllexengine.syntax.parser.primitives import parse_identifier +from ftllexengine.syntax.parser.whitespace import skip_blank_inline, skip_multiline_pattern_start + +if TYPE_CHECKING: + from ftllexengine.syntax.parser.context import ParseContext + +__all__ = [ + "parse_comment", + "parse_message", + "parse_message_attributes", + "parse_message_header", + "parse_term", + "validate_message_content", +] + +_COMMENT_TYPE_BY_HASH_COUNT: tuple[CommentType, CommentType, CommentType] = ( + CommentType.COMMENT, + CommentType.GROUP, + CommentType.RESOURCE, +) + + +def parse_message_header(cursor: Cursor) -> ParseResult[tuple[str, int]] | None: + """Parse message header: Identifier "=".""" + id_result = parse_identifier(cursor) + if isinstance(id_result, ParseError): + return None + + id_end_pos = id_result.cursor.pos + cursor = skip_blank_inline(id_result.cursor) + if cursor.is_eof or cursor.current != "=": + return None + + cursor = cursor.advance() + return ParseResult((id_result.value, id_end_pos), cursor) + + +def parse_message_attributes( + cursor: Cursor, + context: ParseContext | None = None, +) -> ParseResult[list[Attribute]] | None: + """Parse zero or more message attributes.""" + attributes: list[Attribute] = [] + + while not cursor.is_eof: + if cursor.current != "\n": + break + + cursor = cursor.advance() + while not cursor.is_eof and cursor.current == "\n": + cursor = cursor.advance() + + saved_cursor = cursor + cursor = cursor.skip_spaces() + if cursor.is_eof or cursor.current != ".": + cursor = saved_cursor + break + + attr_result = parse_attribute(saved_cursor, context) + if attr_result is None: + cursor = saved_cursor + break + + attributes.append(attr_result.value) + cursor = attr_result.cursor + + return ParseResult(attributes, cursor) + + +def validate_message_content(pattern: Pattern | None, attributes: list[Attribute]) -> bool: + """Validate message has either pattern or attributes.""" + has_pattern = pattern is not None and len(pattern.elements) > 0 + has_attributes = len(attributes) > 0 + return has_pattern or has_attributes + + +def parse_message( + cursor: Cursor, + context: ParseContext | None = None, +) -> ParseResult[Message] | None: + """Parse message with full support for select expressions.""" + start_pos = cursor.pos + id_result = parse_message_header(cursor) + if id_result is None: + return id_result + + id_name, id_end_pos = id_result.value + cursor = id_result.cursor + cursor, initial_indent = skip_multiline_pattern_start(cursor) + pattern_result = parse_pattern(cursor, context, initial_common_indent=initial_indent) + if pattern_result is None: + return pattern_result + + cursor = pattern_result.cursor + attributes_result = parse_message_attributes(cursor, context) + if attributes_result is None: + return attributes_result + + cursor = attributes_result.cursor + if not validate_message_content(pattern_result.value, attributes_result.value): + return None + + message = Message( + id=Identifier(id_name, span=Span(start=start_pos, end=id_end_pos)), + value=pattern_result.value, + attributes=tuple(attributes_result.value), + span=Span(start=start_pos, end=cursor.pos), + ) + return ParseResult(message, cursor) + + +def parse_attribute( + cursor: Cursor, + context: ParseContext | None = None, +) -> ParseResult[Attribute] | None: + """Parse message attribute (.attribute = pattern).""" + cursor = skip_blank_inline(cursor) + if cursor.is_eof or cursor.current != ".": + return None + + attr_start_pos = cursor.pos + cursor = cursor.advance() + id_start_pos = cursor.pos + + id_result = parse_identifier(cursor) + if isinstance(id_result, ParseError): + return None + + id_end_pos = id_result.cursor.pos + cursor = skip_blank_inline(id_result.cursor) + if cursor.is_eof or cursor.current != "=": + return None + + cursor = cursor.advance() + cursor, initial_indent = skip_multiline_pattern_start(cursor) + pattern_result = parse_pattern(cursor, context, initial_common_indent=initial_indent) + if pattern_result is None: + return pattern_result + + attribute = Attribute( + id=Identifier(id_result.value, span=Span(start=id_start_pos, end=id_end_pos)), + value=pattern_result.value, + span=Span(start=attr_start_pos, end=pattern_result.cursor.pos), + ) + return ParseResult(attribute, pattern_result.cursor) + + +def parse_term( + cursor: Cursor, + context: ParseContext | None = None, +) -> ParseResult[Term] | None: + """Parse term definition (-term-id = pattern).""" + start_pos = cursor.pos + if cursor.is_eof or cursor.current != "-": + return None + + cursor = cursor.advance() + id_start_pos = cursor.pos + id_result = parse_identifier(cursor) + if isinstance(id_result, ParseError): + return None + + id_end_pos = id_result.cursor.pos + cursor = skip_blank_inline(id_result.cursor) + if cursor.is_eof or cursor.current != "=": + return None + + cursor = cursor.advance() + cursor, initial_indent = skip_multiline_pattern_start(cursor) + pattern_result = parse_pattern(cursor, context, initial_common_indent=initial_indent) + if pattern_result is None: + return pattern_result + + cursor = pattern_result.cursor + if not pattern_result.value.elements: + return None + + attributes_result = parse_message_attributes(cursor, context) + if attributes_result is None: + return None + + cursor = attributes_result.cursor + term = Term( + id=Identifier(id_result.value, span=Span(start=id_start_pos, end=id_end_pos)), + value=pattern_result.value, + attributes=tuple(attributes_result.value), + span=Span(start=start_pos, end=cursor.pos), + ) + return ParseResult(term, cursor) + + +def parse_comment(cursor: Cursor) -> ParseResult[Comment] | None: + """Parse comment line per Fluent spec.""" + start_pos = cursor.pos + hash_count = 0 + temp_cursor = cursor + while not temp_cursor.is_eof and temp_cursor.current == "#": + hash_count += 1 + temp_cursor = temp_cursor.advance() + + if hash_count > 3: + return None + + comment_type = _COMMENT_TYPE_BY_HASH_COUNT[hash_count - 1] + cursor = temp_cursor + if not cursor.is_eof and cursor.current == " ": + cursor = cursor.advance() + + content_start = cursor.pos + cursor = cursor.skip_to_line_end() + content = cursor.source[content_start : cursor.pos] + cursor = cursor.skip_line_end() + + comment_node = Comment( + content=content, + type=comment_type, + span=Span(start=start_pos, end=cursor.pos), + ) + return ParseResult(comment_node, cursor) diff --git a/src/ftllexengine/syntax/parser/expressions.py b/src/ftllexengine/syntax/parser/expressions.py new file mode 100644 index 00000000..d0d75cd9 --- /dev/null +++ b/src/ftllexengine/syntax/parser/expressions.py @@ -0,0 +1,594 @@ +"""Expression-oriented Fluent grammar rules.""" + +from __future__ import annotations + +from typing import cast + +from ftllexengine.syntax.ast import ( + CallArguments, + FunctionReference, + Identifier, + InlineExpression, + MessageReference, + NamedArgument, + NumberLiteral, + Placeable, + SelectExpression, + SelectorExpression, + Span, + StringLiteral, + TermReference, + VariableReference, + Variant, +) +from ftllexengine.syntax.cursor import Cursor, ParseError, ParseResult +from ftllexengine.syntax.parser.context import ParseContext +from ftllexengine.syntax.parser.patterns import parse_simple_pattern +from ftllexengine.syntax.parser.primitives import ( + _ASCII_DIGITS, + is_identifier_start, + parse_identifier, + parse_number, + parse_number_value, + parse_string_literal, +) +from ftllexengine.syntax.parser.whitespace import skip_blank, skip_blank_inline + +__all__ = [ + "parse_argument_expression", + "parse_call_arguments", + "parse_function_reference", + "parse_inline_expression", + "parse_placeable", + "parse_select_expression", + "parse_term_reference", + "parse_variable_reference", + "parse_variant", + "parse_variant_key", +] + + +def parse_variable_reference(cursor: Cursor) -> ParseResult[VariableReference] | None: + """Parse variable reference: $variable.""" + start_pos = cursor.pos + + if cursor.is_eof or cursor.current != "$": + return None + + cursor = cursor.advance() + id_start_pos = cursor.pos + + result = parse_identifier(cursor) + if isinstance(result, ParseError): + return None + + var_ref = VariableReference( + id=Identifier( + result.value, + span=Span(start=id_start_pos, end=result.cursor.pos), + ), + span=Span(start=start_pos, end=result.cursor.pos), + ) + return ParseResult(var_ref, result.cursor) + + +def parse_variant_key(cursor: Cursor) -> ParseResult[Identifier | NumberLiteral] | None: + """Parse variant key (identifier or number).""" + start_pos = cursor.pos + + if not cursor.is_eof and (cursor.current in _ASCII_DIGITS or cursor.current == "-"): + num_result = parse_number(cursor) + if not isinstance(num_result, ParseError): + num_str = num_result.value + num_value = parse_number_value(num_str) + return ParseResult( + NumberLiteral(value=num_value, raw=num_str), num_result.cursor + ) + + id_result = parse_identifier(cursor) + if isinstance(id_result, ParseError): + return None + + return ParseResult( + Identifier(id_result.value, span=Span(start=start_pos, end=id_result.cursor.pos)), + id_result.cursor, + ) + + id_result = parse_identifier(cursor) + if isinstance(id_result, ParseError): + return None + + return ParseResult( + Identifier(id_result.value, span=Span(start=start_pos, end=id_result.cursor.pos)), + id_result.cursor, + ) + + +def parse_variant( + cursor: Cursor, + context: ParseContext | None = None, +) -> ParseResult[Variant] | None: + """Parse variant: [key] pattern or *[key] pattern.""" + is_default = False + if not cursor.is_eof and cursor.current == "*": + is_default = True + cursor = cursor.advance() + + if cursor.is_eof or cursor.current != "[": + return None + + cursor = cursor.advance() + cursor = skip_blank(cursor) + key_result = parse_variant_key(cursor) + if key_result is None: + return key_result + + cursor = skip_blank(key_result.cursor) + if cursor.is_eof or cursor.current != "]": + return None + + cursor = cursor.advance() + cursor = skip_blank_inline(cursor) + pattern_result = parse_simple_pattern(cursor, context) + if pattern_result is None: + return pattern_result + + variant = Variant(key=key_result.value, value=pattern_result.value, default=is_default) + return ParseResult(variant, pattern_result.cursor) + + +def parse_select_expression( + cursor: Cursor, + selector: SelectorExpression, + start_pos: int, + context: ParseContext | None = None, +) -> ParseResult[SelectExpression] | None: + """Parse select expression after seeing selector and ->.""" + cursor = skip_blank(cursor) + variants: list[Variant] = [] + + while not cursor.is_eof: + cursor = skip_blank(cursor) + + if cursor.is_eof or cursor.current == "}": + break + + variant_result = parse_variant(cursor, context) + if variant_result is None: + return variant_result + + variants.append(variant_result.value) + cursor = variant_result.cursor + + if not variants: + return None + + default_count = sum(1 for variant in variants if variant.default) + if default_count != 1: + return None + + span = Span(start=start_pos, end=cursor.pos) + select_expr = SelectExpression(selector=selector, variants=tuple(variants), span=span) + return ParseResult(select_expr, cursor) + + +def _parse_message_attribute(cursor: Cursor) -> tuple[Identifier | None, Cursor]: + """Parse optional .attribute suffix on message/function references.""" + if cursor.is_eof or cursor.current != ".": + return None, cursor + cursor = cursor.advance() + attr_start = cursor.pos + attr_id_result = parse_identifier(cursor) + if isinstance(attr_id_result, ParseError): + return None, cursor + attr_id = Identifier( + attr_id_result.value, + span=Span(start=attr_start, end=attr_id_result.cursor.pos), + ) + return attr_id, attr_id_result.cursor + + +def parse_argument_expression( + cursor: Cursor, + context: ParseContext | None = None, +) -> ParseResult[InlineExpression] | None: + """Parse a single argument expression per FTL spec.""" + if cursor.is_eof: + return None + + start_pos = cursor.pos + ch = cursor.current + + if ch == "$": + var_result = parse_variable_reference(cursor) + if var_result is None: + return None + return ParseResult(var_result.value, var_result.cursor) + + if ch == '"': + str_result = parse_string_literal(cursor) + if isinstance(str_result, ParseError): + return None + return ParseResult(StringLiteral(value=str_result.value), str_result.cursor) + + if ch == "-": + next_cursor = cursor.advance() + if not next_cursor.is_eof and is_identifier_start(next_cursor.current): + term_result = parse_term_reference(cursor, context) + if term_result is None: + return None + return ParseResult(term_result.value, term_result.cursor) + + num_result = parse_number(cursor) + if isinstance(num_result, ParseError): + return None + num_value = parse_number_value(num_result.value) + return ParseResult( + NumberLiteral(value=num_value, raw=num_result.value), num_result.cursor + ) + + if ch in _ASCII_DIGITS: + num_result = parse_number(cursor) + if isinstance(num_result, ParseError): + return None + num_value = parse_number_value(num_result.value) + return ParseResult( + NumberLiteral(value=num_value, raw=num_result.value), num_result.cursor + ) + + if ch == "{": + cursor = cursor.advance() + placeable_result = parse_placeable(cursor, context) + if placeable_result is None: + return None + return ParseResult(placeable_result.value, placeable_result.cursor) + + if is_identifier_start(ch) or ch == "_": + id_result = parse_identifier(cursor) + if isinstance(id_result, ParseError): + return None + + name = id_result.value + cursor_after_id = id_result.cursor + lookahead = skip_blank_inline(cursor_after_id) + if not lookahead.is_eof and lookahead.current == "(": + func_result = parse_function_reference(cursor, context) + if func_result is None: + return None + return ParseResult(func_result.value, func_result.cursor) + + attribute, final_cursor = _parse_message_attribute(cursor_after_id) + return ParseResult( + MessageReference( + id=Identifier(name, span=Span(start=start_pos, end=cursor_after_id.pos)), + attribute=attribute, + span=Span(start=start_pos, end=final_cursor.pos), + ), + final_cursor, + ) + + return None + + +def parse_call_arguments( + cursor: Cursor, + context: ParseContext | None = None, +) -> ParseResult[CallArguments] | None: + """Parse function call arguments: (pos1, pos2, name1: val1, name2: val2).""" + cursor = skip_blank(cursor) + + positional: list[InlineExpression] = [] + named: list[NamedArgument] = [] + seen_named_arg_names: set[str] = set() + seen_named = False + + while not cursor.is_eof: + cursor = skip_blank(cursor) + if cursor.current == ")": + break + + arg_result = parse_argument_expression(cursor, context) + if arg_result is None: + return arg_result + + arg_expr = arg_result.value + cursor = skip_blank(arg_result.cursor) + + if not cursor.is_eof and cursor.current == ":": + cursor = cursor.advance() + cursor = skip_blank(cursor) + + if not isinstance(arg_expr, MessageReference): + return None + + arg_name = arg_expr.id.name + if arg_name in seen_named_arg_names: + return None + seen_named_arg_names.add(arg_name) + + if cursor.is_eof: + return None + + value_result = parse_argument_expression(cursor, context) + if value_result is None: + return value_result + + value_expr = value_result.value + cursor = value_result.cursor + if not isinstance(value_expr, (StringLiteral, NumberLiteral)): + return None + + named.append( + NamedArgument( + name=Identifier(arg_name, span=arg_expr.id.span), + value=value_expr, + ) + ) + seen_named = True + else: + if seen_named: + return None + positional.append(arg_expr) + + cursor = skip_blank(cursor) + if not cursor.is_eof and cursor.current == ",": + cursor = cursor.advance() + cursor = skip_blank(cursor) + + call_args = CallArguments(positional=tuple(positional), named=tuple(named)) + return ParseResult(call_args, cursor) + + +def parse_function_reference( + cursor: Cursor, + context: ParseContext | None = None, +) -> ParseResult[FunctionReference] | None: + """Parse function reference: identifier(args).""" + if context is None: + context = ParseContext() + + if context.is_depth_exceeded(): + return None + + start_pos = cursor.pos + id_result = parse_identifier(cursor) + if isinstance(id_result, ParseError): + return None + + func_name = id_result.value + cursor = skip_blank_inline(id_result.cursor) + if cursor.is_eof or cursor.current != "(": + return None + + cursor = cursor.advance() + nested_context = context.enter_nesting() + args_result = parse_call_arguments(cursor, nested_context) + if args_result is None: + return args_result + + cursor = skip_blank_inline(args_result.cursor) + if cursor.is_eof or cursor.current != ")": + return None + + cursor = cursor.advance() + func_ref = FunctionReference( + id=Identifier(func_name, span=Span(start=start_pos, end=id_result.cursor.pos)), + arguments=args_result.value, + span=Span(start=start_pos, end=cursor.pos), + ) + return ParseResult(func_ref, cursor) + + +def parse_term_reference( + cursor: Cursor, + context: ParseContext | None = None, +) -> ParseResult[TermReference] | None: + """Parse term reference in inline expression (-term-id or -term.attr).""" + if context is None: + context = ParseContext() + + start_pos = cursor.pos + if cursor.is_eof or cursor.current != "-": + return None + + cursor = cursor.advance() + id_start = cursor.pos + id_result = parse_identifier(cursor) + if isinstance(id_result, ParseError): + return None + + cursor = id_result.cursor + attribute: Identifier | None = None + if not cursor.is_eof and cursor.current == ".": + cursor = cursor.advance() + attr_start = cursor.pos + attr_id_result = parse_identifier(cursor) + if isinstance(attr_id_result, ParseError): + return None + attribute = Identifier( + attr_id_result.value, + span=Span(start=attr_start, end=attr_id_result.cursor.pos), + ) + cursor = attr_id_result.cursor + + cursor = skip_blank_inline(cursor) + arguments: CallArguments | None = None + if not cursor.is_eof and cursor.current == "(": + if context.is_depth_exceeded(): + return None + + cursor = cursor.advance() + nested_context = context.enter_nesting() + args_result = parse_call_arguments(cursor, nested_context) + if args_result is None: + return args_result + + cursor = skip_blank_inline(args_result.cursor) + if cursor.is_eof or cursor.current != ")": + return None + + cursor = cursor.advance() + arguments = args_result.value + + term_ref = TermReference( + id=Identifier(id_result.value, span=Span(start=id_start, end=id_result.cursor.pos)), + attribute=attribute, + arguments=arguments, + span=Span(start=start_pos, end=cursor.pos), + ) + return ParseResult(term_ref, cursor) + + +def _parse_inline_string_literal(cursor: Cursor) -> ParseResult[InlineExpression] | None: + """Parse string literal inline expression.""" + str_result = parse_string_literal(cursor) + if isinstance(str_result, ParseError): + return None + return ParseResult(StringLiteral(value=str_result.value), str_result.cursor) + + +def _parse_inline_number_literal(cursor: Cursor) -> ParseResult[InlineExpression] | None: + """Parse number literal inline expression.""" + num_result = parse_number(cursor) + if isinstance(num_result, ParseError): + return None + num_str = num_result.value + num_value = parse_number_value(num_str) + return ParseResult(NumberLiteral(value=num_value, raw=num_str), num_result.cursor) + + +def _parse_inline_hyphen( + cursor: Cursor, + context: ParseContext | None = None, +) -> ParseResult[InlineExpression] | None: + """Parse hyphen-prefixed expression.""" + next_cursor = cursor.advance() + if not next_cursor.is_eof and is_identifier_start(next_cursor.current): + term_result = parse_term_reference(cursor, context) + if term_result is None: + return None + return ParseResult(term_result.value, term_result.cursor) + return _parse_inline_number_literal(cursor) + + +def _parse_inline_identifier( + cursor: Cursor, + context: ParseContext | None = None, +) -> ParseResult[InlineExpression] | None: + """Parse identifier-based expression: function call or message reference.""" + start_pos = cursor.pos + id_result = parse_identifier(cursor) + if isinstance(id_result, ParseError): + return None + + name = id_result.value + cursor_after_id = id_result.cursor + lookahead = skip_blank_inline(cursor_after_id) + if not lookahead.is_eof and lookahead.current == "(": + func_result = parse_function_reference(cursor, context) + if func_result is None: + return None + return ParseResult(func_result.value, func_result.cursor) + + attribute, final_cursor = _parse_message_attribute(cursor_after_id) + return ParseResult( + MessageReference( + id=Identifier(name, span=Span(start=start_pos, end=cursor_after_id.pos)), + attribute=attribute, + span=Span(start=start_pos, end=final_cursor.pos), + ), + final_cursor, + ) + + +def parse_inline_expression( + cursor: Cursor, + context: ParseContext | None = None, +) -> ParseResult[InlineExpression] | None: + """Parse inline expression per Fluent spec.""" + if cursor.is_eof: + return None + + ch = cursor.current + match ch: + case "$": + var_result = parse_variable_reference(cursor) + if var_result is None: + return None + return ParseResult(var_result.value, var_result.cursor) + case '"': + return _parse_inline_string_literal(cursor) + case "-": + return _parse_inline_hyphen(cursor, context) + case "{": + placeable_result = parse_placeable(cursor.advance(), context) + if placeable_result is None: + return None + return ParseResult(placeable_result.value, placeable_result.cursor) + case _ if ch in _ASCII_DIGITS: + return _parse_inline_number_literal(cursor) + case _ if is_identifier_start(ch): + return _parse_inline_identifier(cursor, context) + case _: + return None + + +def parse_placeable( + cursor: Cursor, + context: ParseContext | None = None, +) -> ParseResult[Placeable] | None: + """Parse placeable expression.""" + if context is None: + context = ParseContext() + + if context.is_depth_exceeded(): + context.mark_depth_exceeded() + return None + + nested_context = context.enter_nesting() + cursor = skip_blank(cursor) + expr_start_pos = cursor.pos + + expr_result = parse_inline_expression(cursor, nested_context) + if expr_result is None: + return expr_result + + expression = expr_result.value + cursor = skip_blank(expr_result.cursor) + + is_valid_selector = isinstance( + expression, + ( + VariableReference, + StringLiteral, + NumberLiteral, + FunctionReference, + MessageReference, + TermReference, + ), + ) + if is_valid_selector and not cursor.is_eof and cursor.current == "-": + next_cursor = cursor.advance() + if not next_cursor.is_eof and next_cursor.current == ">": + cursor = next_cursor.advance() + select_result = parse_select_expression( + cursor, + cast("SelectorExpression", expression), + expr_start_pos, + nested_context, + ) + if select_result is None: + return select_result + + cursor = skip_blank(select_result.cursor) + if cursor.is_eof or cursor.current != "}": + return None + + cursor = cursor.advance() + return ParseResult(Placeable(expression=select_result.value), cursor) + + if cursor.is_eof or cursor.current != "}": + return None + + cursor = cursor.advance() + return ParseResult(Placeable(expression=expression), cursor) diff --git a/src/ftllexengine/syntax/parser/patterns.py b/src/ftllexengine/syntax/parser/patterns.py new file mode 100644 index 00000000..78cb1f88 --- /dev/null +++ b/src/ftllexengine/syntax/parser/patterns.py @@ -0,0 +1,358 @@ +"""Pattern-oriented Fluent grammar rules.""" + +from __future__ import annotations + +import importlib +from dataclasses import dataclass +from typing import TYPE_CHECKING, cast + +from ftllexengine.syntax.ast import Pattern, Placeable, TextElement +from ftllexengine.syntax.cursor import Cursor, ParseResult +from ftllexengine.syntax.parser.primitives import ( + _ASCII_DIGITS, + is_identifier_char, + is_identifier_start, +) +from ftllexengine.syntax.parser.whitespace import is_indented_continuation + +if TYPE_CHECKING: + from ftllexengine.syntax.parser.context import ParseContext + +__all__ = [ + "_MAX_LOOKAHEAD_CHARS", + "parse_pattern", + "parse_simple_pattern", +] + +# Maximum lookahead distance for variant marker detection. +# Must accommodate: '[' + optional_spaces + identifier (up to MAX_IDENTIFIER_LENGTH chars) +# + optional_spaces + ']'. Value of 300 ensures variant keys with maximum-length +# identifiers parse correctly while bounding lookahead on adversarial inputs. +_MAX_LOOKAHEAD_CHARS: int = 300 + + +def _parse_placeable( + cursor: Cursor, + context: ParseContext | None = None, +) -> ParseResult[Placeable] | None: + """Load the placeable parser lazily to keep grammar modules acyclic.""" + expressions = importlib.import_module("ftllexengine.syntax.parser.expressions") + return cast("ParseResult[Placeable] | None", expressions.parse_placeable(cursor, context)) + + +def _is_valid_variant_key_char(ch: str, *, is_first: bool) -> bool: + """Check if character is valid in a variant key (identifier or number).""" + if is_first: + return is_identifier_start(ch) or ch == "_" or ch in _ASCII_DIGITS + return is_identifier_char(ch) or ch == "." + + +def _is_variant_marker(cursor: Cursor) -> bool: + """Check if cursor is at a variant marker using bounded lookahead.""" + max_lookahead = _MAX_LOOKAHEAD_CHARS + + if cursor.is_eof: + return False + + ch = cursor.current + + if ch == "*": + next_cursor = cursor.advance() + return not next_cursor.is_eof and next_cursor.current == "[" + + if ch == "[": + scan = cursor.advance() + is_first = True + has_content = False + lookahead_count = 0 + + while not scan.is_eof and scan.current == " " and lookahead_count < max_lookahead: + scan = scan.advance() + lookahead_count += 1 + + while not scan.is_eof and lookahead_count < max_lookahead: + current = scan.current + lookahead_count += 1 + + if current == "]": + if not has_content: + return False + + after_bracket = scan.advance() + while ( + not after_bracket.is_eof + and after_bracket.current == " " + and lookahead_count < max_lookahead + ): + after_bracket = after_bracket.advance() + lookahead_count += 1 + + if after_bracket.is_eof: + return True + + return after_bracket.current in ("\n", "}", "[", "*") + + if current in ("\n", "{", "}", " ", "\t", ",", ":", ";", "=", "+", "*", "/"): + return False + if not _is_valid_variant_key_char(current, is_first=is_first): + return False + has_content = True + is_first = False + scan = scan.advance() + + return False + + +def _trim_pattern_blank_lines( + elements: list[TextElement | Placeable], +) -> tuple[TextElement | Placeable, ...]: + """Trim leading and trailing blank lines from pattern elements.""" + if not elements: + return () + + result = list(elements) + + while result and isinstance(result[0], TextElement): + first = result[0] + stripped = first.value.lstrip(" \n") + if stripped: + result[0] = TextElement(value=stripped) + break + result.pop(0) + + while result and isinstance(result[-1], TextElement): + last = result[-1] + text = last.value + last_newline = text.rfind("\n") + + if last_newline == -1: + break + + after_newline = text[last_newline + 1 :] + if after_newline.strip(" "): + break + + trimmed = text[:last_newline] + if trimmed: + result[-1] = TextElement(value=trimmed) + else: + result.pop() + + return tuple(result) + + +class _TextAccumulator: + """Accumulator for building TextElement with efficient string concatenation.""" + + __slots__ = ("fragments",) + + def __init__(self) -> None: + self.fragments: list[str] = [] + + def add(self, text: str) -> None: + """Add text fragment to accumulator.""" + self.fragments.append(text) + + def has_content(self) -> bool: + """Check if accumulator has any content.""" + return len(self.fragments) > 0 + + def finalize(self) -> TextElement: + """Create TextElement from accumulated fragments.""" + return TextElement(value="".join(self.fragments)) + + def clear(self) -> None: + """Clear accumulated fragments.""" + self.fragments.clear() + + +@dataclass(slots=True) +class _ContinuationResult: + """Result of processing a continuation line.""" + + cursor: Cursor + common_indent: int + extra_spaces: str + + +def _count_leading_spaces(cursor: Cursor) -> int: + """Count leading spaces at current position.""" + pos = cursor.pos + source = cursor.source + length = len(source) + start = pos + while pos < length and source[pos] == " ": + pos += 1 + return pos - start + + +def _skip_common_indent(cursor: Cursor, common_indent: int) -> tuple[Cursor, str]: + """Skip common indentation and return any extra spaces.""" + skipped = 0 + while skipped < common_indent and not cursor.is_eof and cursor.current == " ": + cursor = cursor.advance() + skipped += 1 + + extra_spaces: list[str] = [] + while not cursor.is_eof and cursor.current == " ": + extra_spaces.append(" ") + cursor = cursor.advance() + + return cursor, "".join(extra_spaces) + + +def _process_continuation_line( + cursor: Cursor, + common_indent: int | None, +) -> _ContinuationResult: + """Process a continuation line after newline.""" + while not cursor.is_eof and cursor.current == "\n": + cursor = cursor.advance() + + if common_indent is None: + common_indent = _count_leading_spaces(cursor) + cursor = cursor.skip_spaces() + extra_spaces = "" + else: + cursor, extra_spaces = _skip_common_indent(cursor, common_indent) + + return _ContinuationResult( + cursor=cursor, + common_indent=common_indent, + extra_spaces=extra_spaces, + ) + + +def _append_newline_to_elements( + elements: list[TextElement | Placeable], +) -> None: + """Append newline to last element or create new TextElement.""" + if elements and not isinstance(elements[-1], Placeable): + last_elem = elements[-1] + elements[-1] = TextElement(value=last_elem.value + "\n") + else: + elements.append(TextElement(value="\n")) + + +def parse_simple_pattern( + cursor: Cursor, + context: ParseContext | None = None, +) -> ParseResult[Pattern] | None: + """Parse simple pattern (text with optional placeables).""" + elements: list[TextElement | Placeable] = [] + common_indent: int | None = None + text_acc = _TextAccumulator() + + while not cursor.is_eof: + ch = cursor.current + + if ch == "}": + break + + if ch in ("[", "*") and _is_variant_marker(cursor): + break + + if ch == "\n": + if is_indented_continuation(cursor): + cursor = cursor.advance() + result = _process_continuation_line(cursor, common_indent) + cursor = result.cursor + common_indent = result.common_indent + _append_newline_to_elements(elements) + if result.extra_spaces: + text_acc.add(result.extra_spaces) + continue + break + + if ch == "{": + if text_acc.has_content(): + elements.append(text_acc.finalize()) + text_acc.clear() + + cursor = cursor.advance() + placeable_result = _parse_placeable(cursor, context) + if placeable_result is None: + return placeable_result + + cursor = placeable_result.cursor + elements.append(placeable_result.value) + else: + text_start = cursor.pos + while not cursor.is_eof: # pragma: no branch + ch = cursor.current + if ch in ("{", "\n", "}"): + break + if ch in ("[", "*") and _is_variant_marker(cursor): + break + cursor = cursor.advance() + + if cursor.pos > text_start: # pragma: no branch + text = Cursor(cursor.source, text_start).slice_to(cursor.pos) + if text_acc.has_content(): + text = text_acc.finalize().value + text + text_acc.clear() + elements.append(TextElement(value=text)) + + if text_acc.has_content(): + elements.append(text_acc.finalize()) + + return ParseResult(Pattern(elements=_trim_pattern_blank_lines(elements)), cursor) + + +def parse_pattern( + cursor: Cursor, + context: ParseContext | None = None, + *, + initial_common_indent: int | None = None, +) -> ParseResult[Pattern] | None: + """Parse full pattern with multi-line continuation support.""" + elements: list[TextElement | Placeable] = [] + common_indent: int | None = initial_common_indent or None + text_acc = _TextAccumulator() + + while not cursor.is_eof: + ch = cursor.current + + if ch == "\n": + if is_indented_continuation(cursor): + cursor = cursor.advance() + result = _process_continuation_line(cursor, common_indent) + cursor = result.cursor + common_indent = result.common_indent + _append_newline_to_elements(elements) + if result.extra_spaces: + text_acc.add(result.extra_spaces) + continue + break + + if ch == "{": + if text_acc.has_content(): + elements.append(text_acc.finalize()) + text_acc.clear() + + cursor = cursor.advance() + placeable_result = _parse_placeable(cursor, context) + if placeable_result is None: + return placeable_result + + elements.append(placeable_result.value) + cursor = placeable_result.cursor + else: + text_start = cursor.pos + while not cursor.is_eof: + ch = cursor.current + if ch in ("{", "\n"): + break + cursor = cursor.advance() + + if cursor.pos > text_start: # pragma: no branch + text = Cursor(cursor.source, text_start).slice_to(cursor.pos) + if text_acc.has_content(): + text = text_acc.finalize().value + text + text_acc.clear() + elements.append(TextElement(value=text)) + + if text_acc.has_content(): + elements.append(text_acc.finalize()) + + return ParseResult(Pattern(elements=_trim_pattern_blank_lines(elements)), cursor) diff --git a/src/ftllexengine/syntax/parser/rules.py b/src/ftllexengine/syntax/parser/rules.py index 6bf5ddea..834fc8b8 100644 --- a/src/ftllexengine/syntax/parser/rules.py +++ b/src/ftllexengine/syntax/parser/rules.py @@ -1,2144 +1,74 @@ -"""Grammar rules for Fluent FTL parser. - -This module provides all parsing rules for FTL grammar constructs: -- Pattern parsing (variable references, text elements, placeables) -- Expression parsing (inline expressions, select expressions, function calls) -- Entry parsing (messages, terms, attributes, comments) - -All grammar rules are co-located in a single module to: -1. Eliminate circular imports between interdependent parsing functions -2. Simplify the import graph -3. Allow direct function calls instead of function-local imports - -Lookahead Patterns: - The parser uses character-based lookahead for disambiguation: - - `{` starts a Placeable - - `$` starts a VariableReference - - `-` followed by identifier starts a TermReference - - `.` in specific contexts starts an attribute access - - `*[` marks the default variant in SelectExpression - - These single-character or two-character lookaheads are implemented inline - using cursor.peek(n) rather than separate Lookahead helper classes. While - this creates some code duplication, it keeps the parsing logic explicit - and easy to trace. Future refactoring could extract common patterns into - a Lookahead utility class if the grammar expands significantly. - -Security: - Includes configurable nesting depth limit to prevent DoS attacks via - deeply nested placeables (e.g., { { { { ... } } } }). -""" +"""Composable Fluent grammar surface assembled from focused parser modules.""" from __future__ import annotations -from dataclasses import dataclass -from typing import cast - -from ftllexengine.constants import MAX_DEPTH -from ftllexengine.enums import CommentType -from ftllexengine.syntax.ast import ( - Attribute, - CallArguments, - Comment, - FunctionReference, - Identifier, - InlineExpression, - Message, - MessageReference, - NamedArgument, - NumberLiteral, - Pattern, - Placeable, - SelectExpression, - SelectorExpression, - Span, - StringLiteral, - Term, - TermReference, - TextElement, - VariableReference, - Variant, -) -from ftllexengine.syntax.cursor import Cursor, ParseError, ParseResult -from ftllexengine.syntax.parser.primitives import ( - _ASCII_DIGITS, - is_identifier_char, - is_identifier_start, - parse_identifier, - parse_number, - parse_number_value, - parse_string_literal, +from ftllexengine.syntax.parser.context import ParseContext +from ftllexengine.syntax.parser.entries import ( + parse_attribute, + parse_comment, + parse_message, + parse_message_attributes, + parse_message_header, + parse_term, + validate_message_content, ) -from ftllexengine.syntax.parser.whitespace import ( - is_indented_continuation, - skip_blank, - skip_blank_inline, - skip_multiline_pattern_start, +from ftllexengine.syntax.parser.expressions import ( + _parse_inline_hyphen, + _parse_inline_identifier, + _parse_inline_number_literal, + _parse_inline_string_literal, + _parse_message_attribute, + parse_argument_expression, + parse_call_arguments, + parse_function_reference, + parse_inline_expression, + parse_placeable, + parse_select_expression, + parse_term_reference, + parse_variable_reference, + parse_variant, + parse_variant_key, ) - -__all__ = ["ParseContext", "parse_comment", "parse_message", "parse_term"] - -# Maximum lookahead distance for variant marker detection. -# Must accommodate: '[' + optional_spaces + identifier (up to MAX_IDENTIFIER_LENGTH chars) -# + optional_spaces + ']'. Value of 300 ensures variant keys with maximum-length -# identifiers parse correctly while bounding lookahead on adversarial inputs. -_MAX_LOOKAHEAD_CHARS: int = 300 - -# Ordered comment types indexed by hash count minus one (hash_count in [1, 2, 3]). -# Pre-computed tuple eliminates per-call dict allocation in parse_comment(). -_COMMENT_TYPE_BY_HASH_COUNT: tuple[CommentType, CommentType, CommentType] = ( - CommentType.COMMENT, - CommentType.GROUP, - CommentType.RESOURCE, +from ftllexengine.syntax.parser.patterns import ( + _MAX_LOOKAHEAD_CHARS, + _is_valid_variant_key_char, + _is_variant_marker, + _trim_pattern_blank_lines, + parse_pattern, + parse_simple_pattern, ) - - -@dataclass(slots=True) -class ParseContext: - """Explicit context for parsing operations. - - Replaces thread-local state with explicit parameter passing for: - - Thread safety without global state - - Async framework compatibility - - Easier testing (no state reset needed) - - Clear dependency flow - - Security: - Tracks nesting depth for BOTH placeables and function calls to prevent - stack overflow DoS attacks. Deeply nested constructs like: - - { { { ... } } } (nested placeables) - - { A(B(C(D(...)))) } (nested function calls) - Both consume stack frames and must be bounded. - - Attributes: - max_nesting_depth: Maximum allowed nesting depth for placeables and calls - current_depth: Current nesting depth (0 = top level) - _depth_exceeded_flag: Mutable flag (list container) shared across all nested - contexts to track if depth limit was exceeded during parse. Uses list[bool] - as a mutable reference that persists when context objects are copied during - enter_nesting(). Set to [True] when depth exceeded; checked at Junk creation - to emit specific PARSE_NESTING_DEPTH_EXCEEDED diagnostic. - """ - - max_nesting_depth: int = MAX_DEPTH - current_depth: int = 0 - _depth_exceeded_flag: list[bool] | None = None - - def __post_init__(self) -> None: - """Initialize mutable depth exceeded flag if not provided.""" - if self._depth_exceeded_flag is None: - # Create mutable flag container shared across all nested contexts - self._depth_exceeded_flag = [False] - - def is_depth_exceeded(self) -> bool: - """Check if maximum nesting depth has been exceeded.""" - return self.current_depth >= self.max_nesting_depth - - def mark_depth_exceeded(self) -> None: - """Mark that depth limit was exceeded during parse. - - Sets persistent flag that survives context unwinding, allowing Junk creation - sites to detect depth-exceeded failures and emit specific diagnostics. - """ - if self._depth_exceeded_flag is not None: - self._depth_exceeded_flag[0] = True - - def was_depth_exceeded(self) -> bool: - """Check if depth limit was exceeded at any point during parse. - - Returns: - True if depth exceeded, False otherwise - """ - return bool( - self._depth_exceeded_flag is not None and self._depth_exceeded_flag[0] - ) - - def enter_nesting(self) -> ParseContext: - """Create new context with incremented depth for entering nested construct. - - Used for both placeables and function/term calls with arguments. - Each recursive descent into nested syntax increments depth. - Shares depth_exceeded flag across all nested contexts. - """ - return ParseContext( - max_nesting_depth=self.max_nesting_depth, - current_depth=self.current_depth + 1, - _depth_exceeded_flag=self._depth_exceeded_flag, - ) - - -# ============================================================================= -# Pattern Parsing -# ============================================================================= - - -def parse_variable_reference(cursor: Cursor) -> ParseResult[VariableReference] | None: - """Parse variable reference: $variable - - Variables start with $ followed by an identifier. - - Examples: - $name -> VariableReference(Identifier("name")) - $count -> VariableReference(Identifier("count")) - - Args: - cursor: Current position in source - - Returns: - Success(ParseResult(VariableReference, new_cursor)) on success - Failure(ParseError(...)) if not a variable reference - """ - # Capture start position for span - start_pos = cursor.pos - - # Expect $ - if cursor.is_eof or cursor.current != "$": - return None # "Expected variable reference (starts with $)", cursor, expected=["$"] - - cursor = cursor.advance() # Skip $ - id_start_pos = cursor.pos # Start of identifier (after '$') - - # Parse identifier - result = parse_identifier(cursor) - if isinstance(result, ParseError): - return None - - parse_result = result - var_ref = VariableReference( - id=Identifier( - parse_result.value, - span=Span(start=id_start_pos, end=parse_result.cursor.pos), - ), - span=Span(start=start_pos, end=parse_result.cursor.pos), - ) - return ParseResult(var_ref, parse_result.cursor) - - -def _is_valid_variant_key_char(ch: str, *, is_first: bool) -> bool: - """Check if character is valid in a variant key (identifier or number). - - Variant keys are either identifiers or number literals: - - Identifiers: [a-zA-Z_][a-zA-Z0-9_-]* - - Numbers: [0-9]+ or [0-9]+.[0-9]+ - - Note: - This helper permits '.' for number literals (e.g., "1.5") but identifiers - cannot contain '.'. The caller (_is_variant_marker) uses this for lookahead - scanning, not strict grammar validation. A key like "foo.bar" would pass - this check but fail later grammar validation as an invalid identifier. - - Args: - ch: Character to check - is_first: True if this is the first character - - Returns: - True if character is valid for variant key content - """ - if is_first: - # First char: ASCII letter (for identifiers), underscore, or digit (for numbers) - # Note: Uses ASCII-only check per Fluent spec for cross-implementation compatibility - return is_identifier_start(ch) or ch == "_" or ch in _ASCII_DIGITS - # Subsequent chars: ASCII alphanumeric, underscore, hyphen, or dot (for decimals) - # Note: '.' is only valid in number literals, not identifiers - return is_identifier_char(ch) or ch == "." - - -def _is_variant_marker(cursor: Cursor) -> bool: - """Check if cursor is at a variant marker using bounded lookahead. - - Distinguishes actual variant syntax from literal text: - - '*' is a variant marker only if followed by '[' - - '[' is a variant marker only if: - 1. Content is valid identifier/number - 2. Ends with ']' - 3. After ']', no non-whitespace text before newline/variant/end - - Valid variant keys (stop parsing): - - [one] (followed by newline, }, or another variant) - - *[other] (default variant) - - NOT variant keys (literal text): - - [1, 2, 3] - contains comma and spaces - - [INFO] message - has text after ] on same line - - [matrix * vector] - contains spaces and operators - - Security: - Uses bounded lookahead (max 128 chars) to prevent O(N^2) parsing - on adversarial input like `[[[[...` with many unclosed brackets. - Variant keys are identifiers/numbers which are always short. - - Args: - cursor: Current position in source - - Returns: - True if at variant marker syntax, False if literal text - - Note: - PLR0911 waiver: Multiple returns are intentional for early-exit - pattern matching, which is clearer than nested conditionals. - """ - # Use bounded lookahead limit - variant keys are short (identifiers/numbers). - # This prevents O(N^2) worst-case on adversarial input like [[[[... - max_lookahead = _MAX_LOOKAHEAD_CHARS - - if cursor.is_eof: - return False - - ch = cursor.current - - if ch == "*": - # '*' is variant marker only if followed by '[' - next_cursor = cursor.advance() - return not next_cursor.is_eof and next_cursor.current == "[" - - if ch == "[": - # '[' is variant marker only if: - # 1. Content is valid identifier or number (no spaces, commas, etc.) - # 2. Ends with ']' - # 3. After ']', the next thing is whitespace leading to newline, }, [, or *[ - scan = cursor.advance() - is_first = True - has_content = False - lookahead_count = 0 - - # Skip blank? after opening bracket per Fluent spec - # Per Fluent EBNF: VariantKey ::= "[" blank? (NumberLiteral | Identifier) blank? "]" - # blank_inline ::= "\u0020"+ (spaces only, not tabs) - while not scan.is_eof and scan.current == " " and lookahead_count < max_lookahead: - scan = scan.advance() - lookahead_count += 1 - - # Find the closing ] with bounded lookahead - while not scan.is_eof and lookahead_count < max_lookahead: - c = scan.current - lookahead_count += 1 - - if c == "]": - # Found closing bracket - now check what follows - if not has_content: - return False # Empty [] is not a variant key - - # Check what comes after ] - after_bracket = scan.advance() - - # Skip inline whitespace (ONLY space per spec, NOT tab) - # Per Fluent EBNF: blank_inline ::= "\u0020"+ - while ( - not after_bracket.is_eof - and after_bracket.current == " " - and lookahead_count < max_lookahead - ): - after_bracket = after_bracket.advance() - lookahead_count += 1 - - if after_bracket.is_eof: - return True # EOF after ] - valid variant - - # Valid if followed by: newline, }, [, or * (for *[other]) - # Note: Line endings are normalized to LF at parser entry. - return after_bracket.current in ("\n", "}", "[", "*") - - if c in ("\n", "{", "}", " ", "\t", ",", ":", ";", "=", "+", "*", "/"): - # Invalid char for variant key - this is literal text - return False - if not _is_valid_variant_key_char(c, is_first=is_first): - # Character not valid for identifier/number - return False - has_content = True - is_first = False - scan = scan.advance() - - # Exceeded lookahead or EOF before ']' - treat as literal text - return False - - return False - - -def _trim_pattern_blank_lines( - elements: list[TextElement | Placeable], -) -> tuple[TextElement | Placeable, ...]: - """Trim leading and trailing blank lines from pattern elements. - - Per Fluent spec, patterns should not include leading or trailing blank lines. - A blank line is defined as a line containing only ASCII spaces (U+0020), - matching blank_inline ::= "\u0020"+ — NOT arbitrary Unicode whitespace. - Characters like U+00A0 (NO-BREAK SPACE) are valid inline-char values and - must be preserved, not stripped. - - This function: - 1. Strips leading whitespace/blank lines from the first TextElement - 2. Strips trailing blank lines from the last TextElement (but preserves - trailing whitespace on content lines - only removes after last newline) - 3. Removes empty TextElements resulting from stripping - - Args: - elements: List of pattern elements (TextElement or Placeable) - - Returns: - Tuple of trimmed pattern elements - """ - if not elements: - return () - - result = list(elements) - - # Trim leading whitespace from first element if it's a TextElement. - # Per Fluent spec, blank_inline ::= "\u0020"+ (ASCII space only). - # Use lstrip(" \n") — not lstrip() — to avoid stripping valid Unicode - # space separators like U+00A0 (NO-BREAK SPACE) that are legal inline-char - # values per the FTL grammar. - while result and isinstance(result[0], TextElement): - first = result[0] - stripped = first.value.lstrip(" \n") - if stripped: - # Keep non-empty content - result[0] = TextElement(value=stripped) - break - # Element was all whitespace - remove it - result.pop(0) - - # Trim trailing BLANK LINES from last element if it's a TextElement. - # Per Fluent spec, only trailing blank lines should be removed, - # NOT trailing whitespace on content lines. - # Example: "Firefox " should preserve trailing spaces, - # but "Firefox\n \n" should become "Firefox". - # A "blank line" is a line containing only ASCII spaces (U+0020), not - # arbitrary Unicode whitespace — use strip(" ") not strip(). - while result and isinstance(result[-1], TextElement): - last = result[-1] - text = last.value - - # Find the last newline in the text - last_newline = text.rfind("\n") - - if last_newline == -1: - # No newlines - this is a single-line text element. - # Do NOT strip trailing whitespace (it's significant per Fluent spec). - break - - # Check if everything after the last newline is ASCII spaces only (blank line). - # strip(" ") matches Fluent spec blank_inline ::= "\u0020"+ definition. - after_newline = text[last_newline + 1 :] - if after_newline.strip(" "): - # Content after last newline - preserve it all (including trailing spaces) - break - - # Everything after last newline is whitespace - trim this blank line - trimmed = text[:last_newline] - if trimmed: - result[-1] = TextElement(value=trimmed) - # Continue loop to check for more trailing blank lines - else: - # Element was all whitespace - remove it - result.pop() - - return tuple(result) - - -class _TextAccumulator: - """Accumulator for building TextElement with efficient string concatenation. - - Avoids O(N^2) behavior when processing continuation lines by collecting - text fragments in a list and joining once. - """ - - __slots__ = ("fragments",) - - def __init__(self) -> None: - """Initialize empty accumulator.""" - self.fragments: list[str] = [] - - def add(self, text: str) -> None: - """Add text fragment to accumulator. - - Args: - text: Text fragment to add - """ - self.fragments.append(text) - - def has_content(self) -> bool: - """Check if accumulator has any content. - - Returns: - True if accumulator has fragments, False otherwise - """ - return len(self.fragments) > 0 - - def finalize(self) -> TextElement: - """Create TextElement from accumulated fragments. - - Returns: - TextElement with joined content - """ - return TextElement(value="".join(self.fragments)) - - def clear(self) -> None: - """Clear accumulated fragments.""" - self.fragments.clear() - - -def parse_simple_pattern( - cursor: Cursor, - context: ParseContext | None = None, -) -> ParseResult[Pattern] | None: - """Parse simple pattern (text with optional placeables). - - Used for parsing variant value patterns within select expressions. - Stops at variant delimiters to allow proper parsing of inline and - multiline select expressions. - - Supports multiline continuation: variant values can span multiple lines when - continuation lines are indented, matching the behavior of top-level patterns. - - Per Fluent spec, common indentation handling: - - The first continuation line's indentation sets the "common indent" baseline - - Subsequent continuation lines have only the common indent stripped - - Extra indentation beyond the common baseline is preserved in the pattern - - Handles: - - Plain text with multi-line continuation (indented lines) - - All placeable types: {$var}, {-term}, {NUMBER(...)}, {"string"}, {42} - - Stop conditions: - - Close brace (}): End of containing select expression - - Open bracket ([): Start of next variant key (with lookahead) - - Asterisk (*): Start of default variant marker (only if followed by '[') - - Newline (\\n): End of variant value UNLESS followed by indented continuation - - Lookahead: - '*' and '[' are only treated as variant markers when they form valid - variant syntax. Standalone '*' or '[' without matching pattern are - treated as literal text, enabling values like "[INFO]" or "3 * 5". - - Examples: - "Hello" -> Pattern([TextElement("Hello")]) - "Hi {$name}" -> Pattern([TextElement("Hi "), Placeable(...)]) - "[INFO] msg" -> Pattern([TextElement("[INFO] msg")]) # [ is literal - "3 * 5" -> Pattern([TextElement("3 * 5")]) # * is literal - "Line 1\\n Line 2" -> Pattern with multiline content - - Args: - cursor: Current position in source - context: Parse context for depth tracking - - Returns: - ParseResult(Pattern, new_cursor) on success, None on parse error - """ - elements: list[TextElement | Placeable] = [] - # Track common indentation (set on first continuation line) - common_indent: int | None = None - # Accumulate text fragments to avoid O(N^2) string concatenation - text_acc = _TextAccumulator() - - while not cursor.is_eof: - ch = cursor.current - - # Stop condition: end of select expression - if ch == "}": - break - - # Check variant markers with lookahead - # - [: start of next variant key (only if followed by text and ]) - # - *: start of default variant marker (only if followed by [) - if ch in ("[", "*") and _is_variant_marker(cursor): - break - - # Handle newline - check for indented continuation. - # Note: Line endings are normalized to LF at parser entry. - if ch == "\n": - if is_indented_continuation(cursor): - # Skip newline and process continuation - cursor = cursor.advance() - result = _process_continuation_line(cursor, common_indent) - cursor = result.cursor - common_indent = result.common_indent - - # Merge newline with previous element - _append_newline_to_elements(elements) - - # Store extra_spaces to prepend to next text element - if result.extra_spaces: - text_acc.add(result.extra_spaces) - continue # Continue parsing on next line - break # Not a continuation, stop parsing pattern - - # Parse placeable expression - if ch == "{": - # Add accumulated extra_spaces as text element before placeable - if text_acc.has_content(): - elements.append(text_acc.finalize()) - text_acc.clear() - - cursor = cursor.advance() # Skip { - - # Use full placeable parser which handles all expression types - # (variables, terms, functions, strings, numbers, select expressions) - placeable_result = parse_placeable(cursor, context) - if placeable_result is None: - return placeable_result - - placeable_parse = placeable_result - cursor = placeable_parse.cursor - elements.append(placeable_parse.value) - - else: - # Parse text until { or stop condition - text_start = cursor.pos - while not cursor.is_eof: # pragma: no branch - ch = cursor.current - # Stop at: placeable start, newline, closing brace - # Note: Line endings are normalized to LF at parser entry. - if ch in ("{", "\n", "}"): - break - # Check variant markers with lookahead - if ch in ("[", "*") and _is_variant_marker(cursor): - break - cursor = cursor.advance() - - if cursor.pos > text_start: # pragma: no branch - # Note: This condition is always True because entering the else block - # at line 355 means ch was not a stop character, so the inner while - # loop at 358 will always advance at least once before breaking. - # The False branch (cursor.pos == text_start) is structurally unreachable. - text = Cursor(cursor.source, text_start).slice_to(cursor.pos) - # Prepend extra_spaces from continuation to new text element - if text_acc.has_content(): - text = text_acc.finalize().value + text - text_acc.clear() - elements.append(TextElement(value=text)) - - # Finalize any remaining accumulated extra_spaces (trailing spaces at end of pattern) - if text_acc.has_content(): - # These are just trailing extra_spaces; add as text element (may be trimmed) - elements.append(text_acc.finalize()) - - # Per Fluent spec, trim leading and trailing blank lines from patterns - trimmed_elements = _trim_pattern_blank_lines(elements) - pattern = Pattern(elements=trimmed_elements) - return ParseResult(pattern, cursor) - - -def _count_leading_spaces(cursor: Cursor) -> int: - """Count leading spaces at current position (for common indentation tracking). - - Args: - cursor: Current position (at start of line content after newline) - - Returns: - Number of leading space characters (U+0020 only, not tabs) - """ - # Integer arithmetic avoids O(N) cursor allocations on hot path - pos = cursor.pos - source = cursor.source - length = len(source) - start = pos - while pos < length and source[pos] == " ": - pos += 1 - return pos - start - - -def _skip_common_indent(cursor: Cursor, common_indent: int) -> tuple[Cursor, str]: - """Skip common indentation and return any extra spaces. - - Per Fluent spec, only the common indentation is stripped from continuation - lines. Extra indentation beyond the common baseline is preserved. - - Args: - cursor: Current position (at start of line content after newline) - common_indent: Number of spaces to strip (common indentation) - - Returns: - Tuple of (new cursor position, extra spaces to preserve) - """ - # Skip common indent spaces - skipped = 0 - while skipped < common_indent and not cursor.is_eof and cursor.current == " ": - cursor = cursor.advance() - skipped += 1 - - # Collect extra spaces beyond common indent - extra_spaces: list[str] = [] - while not cursor.is_eof and cursor.current == " ": - extra_spaces.append(" ") - cursor = cursor.advance() - - return cursor, "".join(extra_spaces) - - -@dataclass(slots=True) -class _ContinuationResult: - """Result of processing a continuation line. - - Encapsulates all state changes from continuation processing to reduce - duplication between parse_simple_pattern and parse_pattern. - """ - - cursor: Cursor - common_indent: int - extra_spaces: str - - -def _process_continuation_line( - cursor: Cursor, - common_indent: int | None, -) -> _ContinuationResult: - """Process a continuation line after newline. - - Shared logic for handling indented continuation lines in patterns. - Skips blank lines, tracks common indentation, and preserves extra spaces. - - Per Fluent spec, common indentation handling: - - The first continuation line's indentation sets the "common indent" baseline - - Subsequent continuation lines have only the common indent stripped - - Extra indentation beyond the common baseline is preserved in the pattern - - Args: - cursor: Position after the newline character (already advanced past newline) - common_indent: Current common indentation (None if not yet set) - - Returns: - ContinuationResult with updated cursor, common_indent, and extra_spaces - """ - # Skip any blank lines (consecutive newlines) before measuring indent. - # This matches is_indented_continuation() which looks past blank lines - # to find indented content. Without this, blank lines before first - # content would set common_indent to 0 (measuring at newline position). - while not cursor.is_eof and cursor.current == "\n": - cursor = cursor.advance() - - # Track common indentation from first continuation line - if common_indent is None: - common_indent = _count_leading_spaces(cursor) - # Skip the common indent - cursor = cursor.skip_spaces() - extra_spaces = "" - else: - # Skip only common indent, preserve extra spaces - cursor, extra_spaces = _skip_common_indent(cursor, common_indent) - - return _ContinuationResult( - cursor=cursor, - common_indent=common_indent, - extra_spaces=extra_spaces, - ) - - -def _append_newline_to_elements( - elements: list[TextElement | Placeable], -) -> None: - """Append newline to last element or create new TextElement. - - Per Fluent spec, continuation lines are joined with newlines. - The newline belongs to the END of the previous element. - - Args: - elements: List of pattern elements (mutated in place) - """ - if elements and not isinstance(elements[-1], Placeable): - last_elem = elements[-1] - elements[-1] = TextElement(value=last_elem.value + "\n") - else: - # No previous text element to merge with - elements.append(TextElement(value="\n")) - - -def parse_pattern( - cursor: Cursor, - context: ParseContext | None = None, - *, - initial_common_indent: int | None = None, -) -> ParseResult[Pattern] | None: - """Parse full pattern with multi-line continuation support. - - Use this for top-level message/attribute patterns. For variant patterns - inside select expressions, use parse_simple_pattern() which has simpler - stop conditions (no multi-line continuation). - - Per Fluent spec, common indentation handling: - - The first continuation line's indentation sets the "common indent" baseline - - Subsequent continuation lines have only the common indent stripped - - Extra indentation beyond the common baseline is preserved in the pattern - - Handles: - - Plain text with multi-line continuation (indented lines) - - All placeable types: {$var}, {-term}, {NUMBER(...)}, {"string"}, {42} - - Select expressions: {$var -> [key] value} - - Args: - cursor: Current position in source - context: Parse context for depth tracking - initial_common_indent: Pre-computed common indent from skip_multiline_pattern_start. - When provided, this is the indentation of the first line of a multiline - pattern (already skipped by skip_multiline_pattern_start). - - Returns: - ParseResult with Pattern on success, None on parse error - """ - elements: list[TextElement | Placeable] = [] - # Track common indentation (set on first continuation line, or from initial_common_indent) - common_indent: int | None = initial_common_indent or None - # Accumulate text fragments to avoid O(N^2) string concatenation - text_acc = _TextAccumulator() - - while not cursor.is_eof: - ch = cursor.current - - # Handle newline - check for indented continuation. - # Note: Line endings are normalized to LF at parser entry. - if ch == "\n": - if is_indented_continuation(cursor): - # Skip newline and process continuation - cursor = cursor.advance() - result = _process_continuation_line(cursor, common_indent) - cursor = result.cursor - common_indent = result.common_indent - - # Merge newline with previous element - _append_newline_to_elements(elements) - - # Store extra_spaces to prepend to next text element - if result.extra_spaces: - text_acc.add(result.extra_spaces) - continue # Continue parsing on next line - break # Not a continuation, stop parsing pattern - - # Note: '.' is removed from stop conditions here. - # Per Fluent spec, '.' only starts an attribute when it appears at the - # beginning of a NEW LINE (after newline + optional indentation). - # A '.' on the same line as '=' is valid text content. - # Attributes are detected in message/term parsing after pattern completes. - - # Placeable: {$var} or {$var -> ...} - if ch == "{": - # Add accumulated extra_spaces as text element before placeable - if text_acc.has_content(): - elements.append(text_acc.finalize()) - text_acc.clear() - - cursor = cursor.advance() # Skip { - - # Use helper method to parse placeable (reduces nesting!) - placeable_result = parse_placeable(cursor, context) - if placeable_result is None: - return placeable_result - - placeable_parse = placeable_result - elements.append(placeable_parse.value) - cursor = placeable_parse.cursor - - else: - # Parse text until { or stop condition - text_start = cursor.pos - while not cursor.is_eof: - ch = cursor.current - # Stop at: placeable start or newline only. - # Note: '}', '[', '*' are valid text in top-level patterns. - # They only have special meaning inside select expressions (handled - # by parse_simple_pattern). An unescaped '}' is technically invalid - # FTL syntax, but treating it as text is more robust than skipping. - # Note: Line endings are normalized to LF at parser entry. - if ch in ("{", "\n"): - break - cursor = cursor.advance() - - if cursor.pos > text_start: # pragma: no branch - # Note: False branch (cursor.pos == text_start) occurs when inner loop - # breaks immediately without consuming text. This happens when cursor - # starts on a stop char ('{', '\n'). However, outer loop checks for '\n' - # before text parsing, and '{' enters placeable parsing, so this condition - # is always True when reached. - text = Cursor(cursor.source, text_start).slice_to(cursor.pos) - # Prepend extra_spaces from continuation to new text element - if text_acc.has_content(): - text = text_acc.finalize().value + text - text_acc.clear() - elements.append(TextElement(value=text)) - - # Finalize any remaining accumulated extra_spaces (trailing spaces at end of pattern) - if text_acc.has_content(): - # These are just trailing extra_spaces; add as text element (may be trimmed) - elements.append(text_acc.finalize()) - - # Per Fluent spec, trim leading and trailing blank lines from patterns - trimmed_elements = _trim_pattern_blank_lines(elements) - pattern = Pattern(elements=trimmed_elements) - return ParseResult(pattern, cursor) - - -# ============================================================================= -# Expression Parsing -# ============================================================================= - - -def parse_variant_key(cursor: Cursor) -> ParseResult[Identifier | NumberLiteral] | None: - """Parse variant key (identifier or number). - - Helper method extracted from parse_variant to reduce complexity. - - Args: - cursor: Current position in source - - Returns: - Success(ParseResult(Identifier | NumberLiteral, cursor)) on success - Failure(ParseError(...)) on parse error - """ - start_pos = cursor.pos - - # Try number first (ASCII digits only, not Unicode like 2) - if not cursor.is_eof and (cursor.current in _ASCII_DIGITS or cursor.current == "-"): - num_result = parse_number(cursor) - if not isinstance(num_result, ParseError): - num_parse = num_result - num_str = num_parse.value - num_value = parse_number_value(num_str) - return ParseResult( - NumberLiteral(value=num_value, raw=num_str), num_parse.cursor - ) - - # Failed to parse as number, try identifier - id_result = parse_identifier(cursor) - if isinstance(id_result, ParseError): - # Both failed - return parse error - return None # "Expected variant key (identifier or number)", cursor - - id_parse = id_result - return ParseResult( - Identifier(id_parse.value, span=Span(start=start_pos, end=id_parse.cursor.pos)), - id_parse.cursor, - ) - - # Parse as identifier - id_result = parse_identifier(cursor) - if isinstance(id_result, ParseError): - return None - - id_parse = id_result - return ParseResult( - Identifier(id_parse.value, span=Span(start=start_pos, end=id_parse.cursor.pos)), - id_parse.cursor, - ) - - -def parse_variant( - cursor: Cursor, - context: ParseContext | None = None, -) -> ParseResult[Variant] | None: - """Parse variant: [key] pattern or *[key] pattern - - Variants are the cases in a select expression. - - Examples: - [zero] no items - *[other] many items - - Args: - cursor: Current position in source - context: Parse context for depth tracking - - Returns: - Success(ParseResult(Variant, new_cursor)) on success - Failure(ParseError(...)) on parse error - """ - # Check for default marker * - is_default = False - if not cursor.is_eof and cursor.current == "*": - is_default = True - cursor = cursor.advance() - - # Expect [ - if cursor.is_eof or cursor.current != "[": - return None # "Expected '[' at start of variant", cursor - - cursor = cursor.advance() # Skip [ - - # Parse variant key (identifier or number) using extracted helper - # Per spec: VariantKey ::= "[" blank? (NumberLiteral | Identifier) blank? "]" - cursor = skip_blank(cursor) - key_result = parse_variant_key(cursor) - if key_result is None: - return key_result - - key_parse = key_result - variant_key = key_parse.value - cursor = skip_blank(key_parse.cursor) - - # Expect ] - if cursor.is_eof or cursor.current != "]": - return None # "Expected ']' after variant key", cursor - - cursor = cursor.advance() # Skip ] - # After ], before pattern: blank_inline (same line) or newline+indent - cursor = skip_blank_inline(cursor) - - # Parse pattern (on same line or next line with indent) - # Simplified: parse until newline that's not indented - pattern_result = parse_simple_pattern(cursor, context) - if pattern_result is None: - return pattern_result - - pattern_parse = pattern_result - - # Don't skip trailing whitespace - let select expression parser handle it - variant = Variant(key=variant_key, value=pattern_parse.value, default=is_default) - return ParseResult(variant, pattern_parse.cursor) - - -def parse_select_expression( - cursor: Cursor, - selector: SelectorExpression, - start_pos: int, - context: ParseContext | None = None, -) -> ParseResult[SelectExpression] | None: - """Parse select expression after seeing selector and -> - - Format: {$var -> [key1] value1 *[key2] value2} - - The selector has already been parsed. - - Example: - After parsing {$count and seeing ->, we parse: - [zero] {$count} items - [one] {$count} item - *[other] {$count} items - } - - Args: - cursor: Current position (should be after ->) - selector: The selector expression (e.g., VariableReference($count)) - start_pos: Start position of the select expression (for span tracking) - context: Parse context for depth tracking - - Returns: - Success(ParseResult(SelectExpression, new_cursor)) on success - Failure(ParseError(...)) on parse error - """ - # Per spec: SelectExpression ::= InlineExpression blank? "->" blank_inline? variant_list - # After ->, we need blank_inline before variant list starts (could be on next line) - # variant_list allows line_end, so use skip_blank to handle newlines - cursor = skip_blank(cursor) - - # Parse variants - variants: list[Variant] = [] - - while not cursor.is_eof: - # Within variant_list, allow blank (spaces and newlines) - cursor = skip_blank(cursor) - - if cursor.is_eof: - break - - # Check for end of select } - if cursor.current == "}": - break - - # Parse variant (pass context for nested placeable depth tracking) - variant_result = parse_variant(cursor, context) - if variant_result is None: - return variant_result - - variant_parse = variant_result - variants.append(variant_parse.value) - cursor = variant_parse.cursor - - if not variants: - return None # "Select expression must have at least one variant", cursor - - # Validate exactly one default variant (FTL spec requirement) - default_count = sum(1 for v in variants if v.default) - if default_count == 0: - return None # "Select expression must have exactly one default variant (marked with *)" - if default_count > 1: - return None # "Select expression must have exactly one default variant, found multiple" - - # Create span from start position to current position (end of last variant) - span = Span(start=start_pos, end=cursor.pos) - select_expr = SelectExpression(selector=selector, variants=tuple(variants), span=span) - return ParseResult(select_expr, cursor) - - -def _parse_message_attribute(cursor: Cursor) -> tuple[Identifier | None, Cursor]: - """Parse optional .attribute suffix on message/function references.""" - if cursor.is_eof or cursor.current != ".": - return None, cursor - cursor = cursor.advance() # Skip '.' - attr_start = cursor.pos # Start of attribute identifier - attr_id_result = parse_identifier(cursor) - if isinstance(attr_id_result, ParseError): - return None, cursor - attr_id = Identifier( - attr_id_result.value, - span=Span(start=attr_start, end=attr_id_result.cursor.pos), - ) - return attr_id, attr_id_result.cursor - - -def parse_argument_expression( - cursor: Cursor, - context: ParseContext | None = None, -) -> ParseResult[InlineExpression] | None: - """Parse a single argument expression per FTL spec. - - FTL Argument Grammar: - InlineExpression ::= StringLiteral | NumberLiteral | FunctionReference - | MessageReference | TermReference | VariableReference - | inline_placeable - - This handles all valid positional argument types including: - - Variable references: $var - - String literals: "text" - - Number literals: 42, -123 - - Term references: -brand - - Function references: NUMBER($val) - - Inline placeables: { expr } - - Message references: identifier - - Args: - cursor: Current position in source - context: Parse context for nested placeable depth tracking - - Returns: - Success(ParseResult(InlineExpression, cursor)) on success - None on parse error - """ - if cursor.is_eof: - return None - - # Capture start position for span (used by identifier-based expressions) - start_pos = cursor.pos - ch = cursor.current - - # Variable reference: $var - if ch == "$": - var_result = parse_variable_reference(cursor) - if var_result is None: - return None - return ParseResult(var_result.value, var_result.cursor) - - # String literal: "text" - if ch == '"': - str_result = parse_string_literal(cursor) - if isinstance(str_result, ParseError): - return None - return ParseResult(StringLiteral(value=str_result.value), str_result.cursor) - - # Hyphen: could be TermReference (-brand) or negative number (-123) - if ch == "-": - next_cursor = cursor.advance() - if not next_cursor.is_eof and is_identifier_start(next_cursor.current): - # Term reference: -brand (ASCII letter after hyphen) - term_result = parse_term_reference(cursor, context) - if term_result is None: - return None - return ParseResult(term_result.value, term_result.cursor) - # Negative number: -123 - num_result = parse_number(cursor) - if isinstance(num_result, ParseError): - return None - num_value = parse_number_value(num_result.value) - return ParseResult( - NumberLiteral(value=num_value, raw=num_result.value), num_result.cursor - ) - - # Positive number: 42 - if ch in _ASCII_DIGITS: - num_result = parse_number(cursor) - if isinstance(num_result, ParseError): - return None - num_value = parse_number_value(num_result.value) - return ParseResult( - NumberLiteral(value=num_value, raw=num_result.value), num_result.cursor - ) - - # Inline placeable: { expr } - if ch == "{": - cursor = cursor.advance() # Skip opening { - placeable_result = parse_placeable(cursor, context) - if placeable_result is None: - return None - return ParseResult(placeable_result.value, placeable_result.cursor) - - # Identifier: function call (any case per spec) or message reference - # Note: ASCII letter check per Fluent spec for identifier start - if is_identifier_start(ch) or ch == "_": - id_result = parse_identifier(cursor) - if isinstance(id_result, ParseError): - return None - - name = id_result.value - cursor_after_id = id_result.cursor - - # Check if identifier followed by '(' -> function call (any case per spec) - lookahead = skip_blank_inline(cursor_after_id) - if not lookahead.is_eof and lookahead.current == "(": - func_result = parse_function_reference(cursor, context) - if func_result is None: - return None - return ParseResult(func_result.value, func_result.cursor) - - # Message reference with optional attribute (e.g., msg.attr) - # Per FTL spec: MessageReference can have attribute access - attribute, final_cursor = _parse_message_attribute(cursor_after_id) - return ParseResult( - MessageReference( - id=Identifier(name, span=Span(start=start_pos, end=cursor_after_id.pos)), - attribute=attribute, - span=Span(start=start_pos, end=final_cursor.pos), - ), - final_cursor, - ) - - return None # "Expected argument expression" - - -def parse_call_arguments( - cursor: Cursor, - context: ParseContext | None = None, -) -> ParseResult[CallArguments] | None: - """Parse function call arguments: (pos1, pos2, name1: val1, name2: val2) - - Arguments consist of positional arguments followed by named arguments. - Positional arguments must come before named arguments. - Named argument names must be unique. - - Examples: - ($value) -> CallArguments(positional=[$value], named=[]) - ($value, minimumFractionDigits: 2) -> CallArguments with both types - - Args: - cursor: Position AFTER the opening '(' - context: Parse context for nested placeable depth tracking - - Returns: - Success(ParseResult(CallArguments, cursor_after_))) on success - Failure(ParseError(...)) on parse error - """ - # Per spec: CallArguments ::= blank? "(" blank? argument_list blank? ")" - # The spec uses blank (spaces AND newlines), not blank_inline (spaces only). - # This enables multiline formatting of function/term arguments. - cursor = skip_blank(cursor) - - positional: list[InlineExpression] = [] - named: list[NamedArgument] = [] - seen_named_arg_names: set[str] = set() - seen_named = False # Track if we've seen any named args - - # Parse comma-separated arguments - while not cursor.is_eof: - cursor = skip_blank(cursor) - - # Check for end of arguments - if cursor.current == ")": - break - - # Parse the argument expression using extracted helper - arg_result = parse_argument_expression(cursor, context) - if arg_result is None: - return arg_result - - arg_parse = arg_result - arg_expr = arg_parse.value - cursor = skip_blank(arg_parse.cursor) - - # Check if this is a named argument (followed by :) - if not cursor.is_eof and cursor.current == ":": - # This is a named argument - cursor = cursor.advance() # Skip : - cursor = skip_blank(cursor) - - # The argument expression must be an identifier (MessageReference) - if not isinstance(arg_expr, MessageReference): - return None # "Named argument name must be an identifier", cursor - - arg_name = arg_expr.id.name - - # Check for duplicate named argument names - if arg_name in seen_named_arg_names: - return None # f"Duplicate named argument: '{arg_name}'", cursor - seen_named_arg_names.add(arg_name) - - # Parse the value (must be inline expression) - if cursor.is_eof: - return None # "Expected value after ':'", cursor - - # Parse value expression using extracted helper - value_result = parse_argument_expression(cursor, context) - if value_result is None: - return value_result - - value_parse = value_result - value_expr = value_parse.value - cursor = value_parse.cursor - - # Per FTL spec: NamedArgument ::= Identifier ":" (StringLiteral | NumberLiteral) - # Named argument values MUST be literals, NOT references or variables - if not isinstance(value_expr, (StringLiteral, NumberLiteral)): - # Named argument values must be literals per FTL spec - # This restriction enables static analysis by translation tools - return None # f"Named argument '{arg_name}' requires a literal value", cursor - - # Reuse span from original identifier in MessageReference - named.append(NamedArgument( - name=Identifier(arg_name, span=arg_expr.id.span), - value=value_expr, - )) - seen_named = True - - else: - # This is a positional argument - if seen_named: - return None # "Positional arguments must come before named arguments", cursor - positional.append(arg_expr) - - cursor = skip_blank(cursor) - - # Check for comma (optional before closing paren) - if not cursor.is_eof and cursor.current == ",": - cursor = cursor.advance() # Skip comma - cursor = skip_blank(cursor) - - call_args = CallArguments(positional=tuple(positional), named=tuple(named)) - return ParseResult(call_args, cursor) - - -def parse_function_reference( - cursor: Cursor, - context: ParseContext | None = None, -) -> ParseResult[FunctionReference] | None: - """Parse function reference: identifier(args) - - Per Fluent 1.0 spec, function names follow the standard Identifier grammar - which allows any case. The convention of uppercase function names (NUMBER, - DATETIME) is stylistic, not syntactic. - - FTL EBNF: FunctionReference ::= Identifier CallArguments - - Security: - Function calls increment nesting depth to prevent DoS via deeply nested - calls like NUMBER(A(B(C(...)))). Each level consumes stack frames. - - Examples: - NUMBER($value) - number($value) - DateTime($date, dateStyle: "full") - - Args: - cursor: Position at start of function name - context: Parse context for nesting depth tracking - - Returns: - Success(ParseResult(FunctionReference, cursor_after_))) on success - None on parse error or nesting depth exceeded - """ - # Create default context if not provided - if context is None: - context = ParseContext() - - # Check nesting depth limit (DoS prevention) - # Function calls can nest arbitrarily: A(B(C(D(...)))) - if context.is_depth_exceeded(): - return None - - # Capture start position for span - start_pos = cursor.pos - - # Parse function name (any case per spec) - id_result = parse_identifier(cursor) - if isinstance(id_result, ParseError): - return None - - id_parse = id_result - func_name = id_parse.value - - # Per spec: FunctionReference uses blank? before "(" - cursor = skip_blank_inline(id_parse.cursor) - - # Expect opening parenthesis - if cursor.is_eof or cursor.current != "(": - return None # "Expected '(' after function name", cursor - - cursor = cursor.advance() # Skip ( - - # Create nested context with incremented depth for argument parsing - nested_context = context.enter_nesting() - - # Parse arguments with nested context - args_result = parse_call_arguments(cursor, nested_context) - if args_result is None: - return args_result - - args_parse = args_result - cursor = skip_blank_inline(args_parse.cursor) - - # Expect closing parenthesis - if cursor.is_eof or cursor.current != ")": - return None # "Expected ')' after function arguments" - - cursor = cursor.advance() # Skip ) - - # Identifier span ends at id_parse.cursor.pos (before any whitespace) - func_ref = FunctionReference( - id=Identifier(func_name, span=Span(start=start_pos, end=id_parse.cursor.pos)), - arguments=args_parse.value, - span=Span(start=start_pos, end=cursor.pos), - ) - return ParseResult(func_ref, cursor) - - -def parse_term_reference( - cursor: Cursor, - context: ParseContext | None = None, -) -> ParseResult[TermReference] | None: - """Parse term reference in inline expression (-term-id or -term.attr). - - FTL syntax: - { -brand } - { -brand.short } - { -brand(case: "nominative") } - - Term references can have optional attribute access and arguments. - - Security: - Term calls with arguments increment nesting depth to prevent DoS via - deeply nested calls. Arguments can contain nested expressions. - - Args: - cursor: Current position (should be at '-') - context: Parse context for nesting depth tracking - - Returns: - Success(ParseResult(TermReference, new_cursor)) on success - None on parse error or nesting depth exceeded - """ - # Create default context if not provided - if context is None: - context = ParseContext() - - # Capture start position for span - start_pos = cursor.pos - - # Expect '-' prefix - if cursor.is_eof or cursor.current != "-": - return None # "Expected '-' at start of term reference", cursor, expected=["-"] - - cursor = cursor.advance() # Skip '-' - id_start = cursor.pos # Start of identifier (after '-') - - # Parse identifier - id_result = parse_identifier(cursor) - if isinstance(id_result, ParseError): - return None - - id_parse = id_result - cursor = id_parse.cursor - - # Check for optional attribute access (.attribute) - attribute: Identifier | None = None - if not cursor.is_eof and cursor.current == ".": - cursor = cursor.advance() # Skip '.' - attr_start = cursor.pos # Start of attribute identifier - - attr_id_result = parse_identifier(cursor) - if isinstance(attr_id_result, ParseError): - return None - - attr_id_parse = attr_id_result - attribute = Identifier( - attr_id_parse.value, - span=Span(start=attr_start, end=attr_id_parse.cursor.pos), - ) - cursor = attr_id_parse.cursor - - # Check for optional arguments (case: "nominative") - # Per spec: TermReference uses blank? before "(" - cursor = skip_blank_inline(cursor) - - arguments: CallArguments | None = None - if not cursor.is_eof and cursor.current == "(": - # Check nesting depth limit (DoS prevention) before parsing arguments - if context.is_depth_exceeded(): - return None - - # Parse call arguments with incremented depth - cursor = cursor.advance() # Skip '(' - nested_context = context.enter_nesting() - args_result = parse_call_arguments(cursor, nested_context) - if args_result is None: - return args_result - - args_parse = args_result - cursor = skip_blank_inline(args_parse.cursor) - - # Expect closing parenthesis - if cursor.is_eof or cursor.current != ")": - return None # "Expected ')' after term arguments" - - cursor = cursor.advance() # Skip ')' - arguments = args_parse.value - - term_ref = TermReference( - id=Identifier(id_parse.value, span=Span(start=id_start, end=id_parse.cursor.pos)), - attribute=attribute, - arguments=arguments, - span=Span(start=start_pos, end=cursor.pos), - ) - - return ParseResult(term_ref, cursor) - - -def _parse_inline_string_literal(cursor: Cursor) -> ParseResult[InlineExpression] | None: - """Parse string literal inline expression.""" - str_result = parse_string_literal(cursor) - if isinstance(str_result, ParseError): - return None - return ParseResult(StringLiteral(value=str_result.value), str_result.cursor) - - -def _parse_inline_number_literal(cursor: Cursor) -> ParseResult[InlineExpression] | None: - """Parse number literal inline expression.""" - num_result = parse_number(cursor) - if isinstance(num_result, ParseError): - return None - num_str = num_result.value - num_value = parse_number_value(num_str) - return ParseResult(NumberLiteral(value=num_value, raw=num_str), num_result.cursor) - - -def _parse_inline_hyphen( - cursor: Cursor, - context: ParseContext | None = None, -) -> ParseResult[InlineExpression] | None: - """Parse hyphen-prefixed expression: term reference (-brand) or negative number (-123). - - Args: - cursor: Current position in source - context: Parse context for nested placeable depth tracking - """ - next_cursor = cursor.advance() - if not next_cursor.is_eof and is_identifier_start(next_cursor.current): - # Term reference: -brand (ASCII letter after hyphen) - term_result = parse_term_reference(cursor, context) - if term_result is None: - return None - return ParseResult(term_result.value, term_result.cursor) - # Negative number: -123 - return _parse_inline_number_literal(cursor) - - -def _parse_inline_identifier( - cursor: Cursor, - context: ParseContext | None = None, -) -> ParseResult[InlineExpression] | None: - """Parse identifier-based expression: function call or message reference. - - Per Fluent 1.0 spec, function names follow the standard Identifier grammar - which allows any case. The convention of uppercase function names (NUMBER, - DATETIME) is stylistic, not syntactic. - - Args: - cursor: Current position in source - context: Parse context for nested placeable depth tracking - """ - # Capture start position for span - start_pos = cursor.pos - - id_result = parse_identifier(cursor) - if isinstance(id_result, ParseError): - return None - - name = id_result.value - cursor_after_id = id_result.cursor - - # Check if identifier followed by '(' -> function call (any case per spec) - # Per Fluent spec: FunctionReference ::= Identifier CallArguments - # Identifier allows any case; uppercase is convention, not requirement - lookahead = skip_blank_inline(cursor_after_id) - if not lookahead.is_eof and lookahead.current == "(": - func_result = parse_function_reference(cursor, context) - if func_result is None: - return None - return ParseResult(func_result.value, func_result.cursor) - - # Message reference with optional attribute - attribute, final_cursor = _parse_message_attribute(cursor_after_id) - return ParseResult( - MessageReference( - id=Identifier(name, span=Span(start=start_pos, end=cursor_after_id.pos)), - attribute=attribute, - span=Span(start=start_pos, end=final_cursor.pos), - ), - final_cursor, - ) - - -def parse_inline_expression( - cursor: Cursor, - context: ParseContext | None = None, -) -> ParseResult[InlineExpression] | None: - """Parse inline expression per Fluent spec. - - Uses character-based dispatch for efficient parsing. Each expression type - has a dedicated handler function. - - Per Fluent EBNF: - InlineExpression ::= StringLiteral | NumberLiteral | FunctionReference - | MessageReference | TermReference | VariableReference - | inline_placeable - - Handles: - - Variable references: $var - - String literals: "text" - - Number literals: 42 or -123 - - Function calls: FUNC(args) or func(args) (any case per spec) - - Message references: identifier or identifier.attribute - - Term references: -term-id or -term-id.attribute - - Nested placeables: { expr } (inline_placeable per spec) - - Args: - cursor: Current position in source - context: Parse context for nested placeable depth tracking - - Returns: - ParseResult with InlineExpression on success, None on parse error - """ - if cursor.is_eof: - return None - - ch = cursor.current - - # Dispatch based on first character - match ch: - case "$": - var_result = parse_variable_reference(cursor) - if var_result is None: - return None - return ParseResult(var_result.value, var_result.cursor) - - case '"': - return _parse_inline_string_literal(cursor) - - case "-": - return _parse_inline_hyphen(cursor, context) - - case "{": - # Nested placeable: { expr } per spec (inline_placeable) - # Advance past opening brace and delegate to parse_placeable - placeable_result = parse_placeable(cursor.advance(), context) - if placeable_result is None: - return None - return ParseResult(placeable_result.value, placeable_result.cursor) - - case _ if ch in _ASCII_DIGITS: - return _parse_inline_number_literal(cursor) - - case _ if is_identifier_start(ch): - # ASCII letter [a-zA-Z] check per Fluent spec for identifier start - return _parse_inline_identifier(cursor, context) - - case _: - return None - - -def parse_placeable( - cursor: Cursor, - context: ParseContext | None = None, -) -> ParseResult[Placeable] | None: - """Parse placeable expression: {$var}, {"\\n"}, {$var -> [key] value}, or {FUNC()}. - - Parser combinator helper that reduces nesting in parse_pattern(). - - Handles: - - Variable references: {$var} - - String literals: {"\\n"} - - Number literals: {42} - - Select expressions: {$var -> [one] item *[other] items} - - Function calls: {NUMBER($value, minimumFractionDigits: 2)} - - Security: - Enforces maximum nesting depth to prevent DoS attacks via deeply - nested placeables. Configure via max_nesting_depth on FluentParserV1. - - Args: - cursor: Position AFTER the opening '{' - context: Parse context for depth tracking. If None, creates fresh context. - - Returns: - Success(ParseResult(Placeable, cursor_after_})) on success - None on parse error or nesting depth exceeded - - Example: - cursor at: "$var}" -> parses to Placeable(VariableReference("var")) - cursor at: "\"\\n\"}" -> parses to Placeable(StringLiteral("\\n")) - cursor at: "$n -> [one] 1 *[other] N}" -> parses to Placeable(SelectExpression(...)) - cursor at: "NUMBER($val)}" -> parses to Placeable(FunctionReference(...)) - """ - # Create default context if not provided - if context is None: - context = ParseContext() - - # Check nesting depth limit (DoS prevention) - if context.is_depth_exceeded(): - # Nesting depth exceeded - mark flag and return None to signal parse failure - # This prevents stack overflow from deeply nested constructs - # Flag persists through context unwinding so Junk creation sites can - # emit specific PARSE_NESTING_DEPTH_EXCEEDED diagnostic - context.mark_depth_exceeded() - return None - - # Create child context with incremented depth for nested parsing - nested_context = context.enter_nesting() - - # Per spec: inline_placeable ::= "{" blank? (SelectExpression | InlineExpression) blank? "}" - # blank ::= (blank_inline | line_end)+ ; allows newlines inside placeables - cursor = skip_blank(cursor) - - # Capture start position before parsing expression (for select expression span) - expr_start_pos = cursor.pos - - # Parse the inline expression with nested context for depth tracking - expr_result = parse_inline_expression(cursor, nested_context) - if expr_result is None: - return expr_result - - expr_parse = expr_result - expression = expr_parse.value - parse_result_cursor = expr_parse.cursor - - # Per spec: blank allows newlines after expression - cursor = skip_blank(parse_result_cursor) - - # Check for select expression (->) - # Per FTL 1.0 spec: SelectExpression ::= InlineExpression blank? "->" ... - # Valid selectors (any InlineExpression): - # - VariableReference: { $var -> ... } - # - StringLiteral: { "foo" -> ... } - # - NumberLiteral: { 42 -> ... } - # - FunctionReference: { NUMBER($x) -> ... } - # - MessageReference: { msg -> ... } or { msg.attr -> ... } - # - TermReference: { -term -> ... } or { -term.attr -> ... } - is_valid_selector = isinstance( - expression, - ( - VariableReference, - StringLiteral, - NumberLiteral, - FunctionReference, - MessageReference, - TermReference, - ), - ) - - if is_valid_selector and not cursor.is_eof and cursor.current == "-": - # Peek ahead for -> - next_cursor = cursor.advance() - if not next_cursor.is_eof and next_cursor.current == ">": - # It's a select expression! - cursor = next_cursor.advance() # Skip -> - - select_result = parse_select_expression( - cursor, - cast("SelectorExpression", expression), # Narrowed: is_valid_selector passed above - expr_start_pos, - nested_context, - ) - if select_result is None: - return select_result - - select_parse = select_result - # Per spec: blank allows newlines after select expression - cursor = skip_blank(select_parse.cursor) - - # Expect } - if cursor.is_eof or cursor.current != "}": - return None # "Expected '}' after select expression", cursor - - cursor = cursor.advance() # Skip } - return ParseResult(Placeable(expression=select_parse.value), cursor) - - # Just a simple inline expression {$var}, {"\n"}, or {42} - # Expect } - if cursor.is_eof or cursor.current != "}": - return None # "Expected '}'", cursor - - cursor = cursor.advance() # Skip } - return ParseResult(Placeable(expression=expression), cursor) - - -# ============================================================================= -# Entry Parsing -# ============================================================================= - - -def parse_message_header(cursor: Cursor) -> ParseResult[tuple[str, int]] | None: - """Parse message header: Identifier "=" - - Returns tuple of (identifier string, identifier end position) and cursor after '='. - The end position is needed for constructing Identifier spans. - """ - id_result = parse_identifier(cursor) - if isinstance(id_result, ParseError): - return None - - id_parse = id_result - id_end_pos = id_parse.cursor.pos # Capture end position before whitespace/equals - - # Per spec: Message ::= Identifier blank_inline? "=" ... - cursor = skip_blank_inline(id_parse.cursor) - - if cursor.is_eof or cursor.current != "=": - return None # "Expected '=' after message ID", cursor - - cursor = cursor.advance() # Skip = - return ParseResult((id_parse.value, id_end_pos), cursor) - - -def parse_message_attributes( - cursor: Cursor, - context: ParseContext | None = None, -) -> ParseResult[list[Attribute]] | None: - """Parse zero or more message attributes. - - Attributes must appear on new lines starting with '.'. - Per Fluent spec, blank lines (empty lines) are allowed between attributes: - Attribute ::= line_end blank? "." Identifier blank_inline? "=" ... - blank ::= (blank_inline | line_end)+ - - Args: - cursor: Current position in source - context: Parse context for depth tracking - """ - attributes: list[Attribute] = [] - - while not cursor.is_eof: - # Per spec: Attribute ::= line_end blank? "." ... - # We need at least one line_end to continue looking for attributes - # Note: Line endings are normalized to LF at parser entry. - if cursor.current != "\n": - break # No newline, done with attributes - - # Skip the required line_end - cursor = cursor.advance() - - # Skip optional blank lines (consecutive newlines) - # Per spec: blank ::= (blank_inline | line_end)+ - while not cursor.is_eof and cursor.current == "\n": - cursor = cursor.advance() - - # Now cursor is at the start of a non-blank line (or EOF) - # Save position for restore if not an attribute, and for parse_attribute - saved_cursor = cursor - - # Skip leading spaces on this line (NOT tabs per spec) - cursor = cursor.skip_spaces() - - if cursor.is_eof or cursor.current != ".": - cursor = saved_cursor - break # Not an attribute - - # Parse attribute - attr_result = parse_attribute(saved_cursor, context) - if attr_result is None: - cursor = saved_cursor - break # Invalid attribute syntax - - attr_parse = attr_result - attributes.append(attr_parse.value) - cursor = attr_parse.cursor - - return ParseResult(attributes, cursor) - - -def validate_message_content(pattern: Pattern | None, attributes: list[Attribute]) -> bool: - """Validate message has either pattern or attributes. - - Per Fluent spec: Message ::= ID "=" ((Pattern Attribute*) | (Attribute+)) - - Args: - pattern: Message value pattern (may be None) - attributes: List of message attributes - - Returns: - True if validation passed, False if validation failed - """ - has_pattern = pattern is not None and len(pattern.elements) > 0 - has_attributes = len(attributes) > 0 - - # Message must have either value or attributes - return has_pattern or has_attributes - - -def parse_message( - cursor: Cursor, - context: ParseContext | None = None, -) -> ParseResult[Message] | None: - """Parse message with full support for select expressions. - - Examples: - "hello = World" - "welcome = Hello, {$name}!" - "count = {$num -> [one] item *[other] items}" - - Args: - cursor: Current position in source - context: Parse context for depth tracking - - Returns: - Success(ParseResult(Message, new_cursor)) on success - Failure(ParseError(...)) on parse error - """ - start_pos = cursor.pos - - # Parse: Identifier "=" - id_result = parse_message_header(cursor) - if id_result is None: - return id_result - id_parse = id_result - id_name, id_end_pos = id_parse.value # Unpack (name, end_position) - cursor = id_parse.cursor - - # Parse pattern (message value) - cursor, initial_indent = skip_multiline_pattern_start(cursor) - pattern_result = parse_pattern(cursor, context, initial_common_indent=initial_indent) - if pattern_result is None: - return pattern_result - pattern_parse = pattern_result - cursor = pattern_parse.cursor - - # Parse: Attribute* (zero or more attributes) - attributes_result = parse_message_attributes(cursor, context) - if attributes_result is None: - return attributes_result - attributes_parse = attributes_result - cursor = attributes_parse.cursor - - # Validate: Per spec, Message must have Pattern OR Attribute - is_valid = validate_message_content(pattern_parse.value, attributes_parse.value) - if not is_valid: - return None # Validation failed - - # Construct Message node - message = Message( - id=Identifier(id_name, span=Span(start=start_pos, end=id_end_pos)), - value=pattern_parse.value, - attributes=tuple(attributes_parse.value), - span=Span(start=start_pos, end=cursor.pos), - ) - - return ParseResult(message, cursor) - - -def parse_attribute( - cursor: Cursor, - context: ParseContext | None = None, -) -> ParseResult[Attribute] | None: - """Parse message attribute (.attribute = pattern). - - FTL syntax: - button = Save - .tooltip = Click to save changes - .aria-label = Save button - - Attributes are indented and start with a dot followed by an identifier. - - Args: - cursor: Current position in source (should be at start of line with '.') - context: Parse context for depth tracking - - Returns: - Success(ParseResult(Attribute, new_cursor)) on success - Failure(ParseError(...)) on parse error - """ - # Skip leading whitespace (ONLY spaces per spec, NOT tabs or newlines) - # Per spec: Attribute ::= line_end blank? "." ... - # blank can contain spaces but NOT tabs - cursor = skip_blank_inline(cursor) - - # Check for '.' at start - if cursor.is_eof or cursor.current != ".": - return None # "Expected '.' at start of attribute", cursor, expected=["."] - - attr_start_pos = cursor.pos # Start of attribute (at '.') - cursor = cursor.advance() # Skip '.' - id_start_pos = cursor.pos # Start of identifier (after '.') - - # Parse identifier after '.' - id_result = parse_identifier(cursor) - if isinstance(id_result, ParseError): - return None - - id_parse = id_result - id_end_pos = id_parse.cursor.pos # End of identifier - - # Per spec: Attribute ::= line_end blank? "." Identifier blank_inline? "=" ... - cursor = skip_blank_inline(id_parse.cursor) - - # Expect '=' - if cursor.is_eof or cursor.current != "=": - return None # "Expected '=' after attribute identifier", cursor, expected=["="] - - cursor = cursor.advance() # Skip '=' - # After '=', handle multiline pattern start (same as messages) - # Per spec: Attribute ::= ... blank_inline? "=" blank_inline? Pattern - # Pattern can start on same line or next line with indentation - cursor, initial_indent = skip_multiline_pattern_start(cursor) - - # Parse pattern - pattern_result = parse_pattern(cursor, context, initial_common_indent=initial_indent) - if pattern_result is None: - return pattern_result - - pattern_parse = pattern_result - - attribute = Attribute( - id=Identifier(id_parse.value, span=Span(start=id_start_pos, end=id_end_pos)), - value=pattern_parse.value, - span=Span(start=attr_start_pos, end=pattern_parse.cursor.pos), - ) - - return ParseResult(attribute, pattern_parse.cursor) - - -def parse_term( - cursor: Cursor, - context: ParseContext | None = None, -) -> ParseResult[Term] | None: - """Parse term definition (-term-id = pattern). - - FTL syntax: - -brand = Firefox - -brand-version = 3.0 - .tooltip = Current version - - Terms are private definitions prefixed with '-' and can have attributes. - - Args: - cursor: Current position in source (should be at '-') - context: Parse context for depth tracking - - Returns: - Success(ParseResult(Term, new_cursor)) on success - Failure(ParseError(...)) on parse error - """ - # Capture start position for span - start_pos = cursor.pos - - # Expect '-' prefix - if cursor.is_eof or cursor.current != "-": - return None # "Expected '-' at start of term", cursor, expected=["-"] - - cursor = cursor.advance() # Skip '-' - id_start_pos = cursor.pos # Start of identifier (after '-') - - # Parse identifier - id_result = parse_identifier(cursor) - if isinstance(id_result, ParseError): - return None - - id_parse = id_result - id_end_pos = id_parse.cursor.pos # End of identifier - - # Per spec: Term ::= "-" Identifier blank_inline? "=" ... - cursor = skip_blank_inline(id_parse.cursor) - - # Expect '=' - if cursor.is_eof or cursor.current != "=": - return None # "Expected '=' after term ID", cursor, expected=["="] - - cursor = cursor.advance() # Skip '=' - - # After '=', handle multiline pattern start (same as messages) - # Use skip_multiline_pattern_start to properly track initial indent for common_indent - cursor, initial_indent = skip_multiline_pattern_start(cursor) - - # Parse pattern with initial common indent for proper multiline handling - pattern_result = parse_pattern(cursor, context, initial_common_indent=initial_indent) - if pattern_result is None: - return pattern_result - - pattern_parse = pattern_result - cursor = pattern_parse.cursor - - # Validate term has non-empty value (FTL spec requirement) - if not pattern_parse.value.elements: - return None # f'Expected term "-{id_parse.value}" to have a value' - - # Parse attributes using shared helper - attributes_result = parse_message_attributes(cursor, context) - if attributes_result is None: - return None # Should not happen, but handle defensively - attributes = attributes_result.value - cursor = attributes_result.cursor - - # Create span from start to current position - span = Span(start=start_pos, end=cursor.pos) - - term = Term( - id=Identifier(id_parse.value, span=Span(start=id_start_pos, end=id_end_pos)), - value=pattern_parse.value, - attributes=tuple(attributes), - span=span, - ) - - return ParseResult(term, cursor) - - -def parse_comment(cursor: Cursor) -> ParseResult[Comment] | None: - """Parse comment line per Fluent spec. - - Per spec, comments come in three types: - - # (single-line comment) - - ## (group comment) - - ### (resource comment) - - Adjacent comment lines of the same type are joined during AST construction. - - EBNF: - CommentLine ::= ("###" | "##" | "#") ("\u0020" comment_char*)? line_end - - Args: - cursor: Current parse position (must be at '#') - - Returns: - Success with Comment node or Failure with ParseError - """ - start_pos = cursor.pos - - # Determine comment type by counting '#' characters - hash_count = 0 - temp_cursor = cursor - while not temp_cursor.is_eof and temp_cursor.current == "#": - hash_count += 1 - temp_cursor = temp_cursor.advance() - - # Validate comment type (1, 2, or 3 hashes) - if hash_count > 3: - return None # f"Invalid comment: expected 1-3 '#' characters, found {hash_count}" - - # Map hash count to comment type using module-level lookup (no per-call allocation). - # hash_count is 1/2/3 at this point; the > 3 guard above eliminates other values. - comment_type = _COMMENT_TYPE_BY_HASH_COUNT[hash_count - 1] - - # Advance cursor past the '#' characters - cursor = temp_cursor - - # Per spec: optional space after '#' - if not cursor.is_eof and cursor.current == " ": - cursor = cursor.advance() - - # Collect comment content (everything until line end) - content_start = cursor.pos - cursor = cursor.skip_to_line_end() - - # Extract comment text - content = cursor.source[content_start : cursor.pos] - - # Advance past line ending (handles LF, CRLF, CR) - cursor = cursor.skip_line_end() - - # Create Comment node with span - comment_node = Comment( - content=content, - type=comment_type, - span=Span(start=start_pos, end=cursor.pos), - ) - - return ParseResult(comment_node, cursor) +from ftllexengine.syntax.parser.primitives import parse_identifier, parse_number + +__all__ = [ + "_MAX_LOOKAHEAD_CHARS", + "ParseContext", + "_is_valid_variant_key_char", + "_is_variant_marker", + "_parse_inline_hyphen", + "_parse_inline_identifier", + "_parse_inline_number_literal", + "_parse_inline_string_literal", + "_parse_message_attribute", + "_trim_pattern_blank_lines", + "parse_argument_expression", + "parse_attribute", + "parse_call_arguments", + "parse_comment", + "parse_function_reference", + "parse_identifier", + "parse_inline_expression", + "parse_message", + "parse_message_attributes", + "parse_message_header", + "parse_number", + "parse_pattern", + "parse_placeable", + "parse_select_expression", + "parse_simple_pattern", + "parse_term", + "parse_term_reference", + "parse_variable_reference", + "parse_variant", + "parse_variant_key", + "validate_message_content", +] diff --git a/src/ftllexengine/syntax/parser/rules.py,cover b/src/ftllexengine/syntax/parser/rules.py,cover deleted file mode 100644 index 084f2751..00000000 --- a/src/ftllexengine/syntax/parser/rules.py,cover +++ /dev/null @@ -1,1980 +0,0 @@ -> """Grammar rules for Fluent FTL parser. - -> This module provides all parsing rules for FTL grammar constructs: -> - Pattern parsing (variable references, text elements, placeables) -> - Expression parsing (inline expressions, select expressions, function calls) -> - Entry parsing (messages, terms, attributes, comments) - -> All grammar rules are co-located in a single module to: -> 1. Eliminate circular imports between interdependent parsing functions -> 2. Simplify the import graph -> 3. Allow direct function calls instead of function-local imports - -> Lookahead Patterns: -> The parser uses character-based lookahead for disambiguation: -> - `{` starts a Placeable -> - `$` starts a VariableReference -> - `-` followed by identifier starts a TermReference -> - `.` in specific contexts starts an attribute access -> - `*[` marks the default variant in SelectExpression - -> These single-character or two-character lookaheads are implemented inline -> using cursor.peek(n) rather than separate Lookahead helper classes. While -> this creates some code duplication, it keeps the parsing logic explicit -> and easy to trace. Future refactoring could extract common patterns into -> a Lookahead utility class if the grammar expands significantly. - -> Security: -> Includes configurable nesting depth limit to prevent DoS attacks via -> deeply nested placeables (e.g., { { { { ... } } } }). -> """ - -> from __future__ import annotations - -> from dataclasses import dataclass - -> from ftllexengine.constants import MAX_DEPTH, MAX_LOOKAHEAD_CHARS -> from ftllexengine.enums import CommentType -> from ftllexengine.syntax.ast import ( -> Attribute, -> CallArguments, -> Comment, -> FunctionReference, -> Identifier, -> InlineExpression, -> Message, -> MessageReference, -> NamedArgument, -> NumberLiteral, -> Pattern, -> Placeable, -> SelectExpression, -> Span, -> StringLiteral, -> Term, -> TermReference, -> TextElement, -> VariableReference, -> Variant, -> ) -> from ftllexengine.syntax.cursor import Cursor, ParseResult -> from ftllexengine.syntax.parser.primitives import ( -> _ASCII_DIGITS, -> is_identifier_char, -> is_identifier_start, -> parse_identifier, -> parse_number, -> parse_number_value, -> parse_string_literal, -> ) -> from ftllexengine.syntax.parser.whitespace import ( -> is_indented_continuation, -> skip_blank, -> skip_blank_inline, -> skip_multiline_pattern_start, -> ) - -> __all__ = ["ParseContext", "parse_comment", "parse_message", "parse_term"] - - -> @dataclass(slots=True) -> class ParseContext: -> """Explicit context for parsing operations. - -> Replaces thread-local state with explicit parameter passing for: -> - Thread safety without global state -> - Async framework compatibility -> - Easier testing (no state reset needed) -> - Clear dependency flow - -> Security: -> Tracks nesting depth for BOTH placeables and function calls to prevent -> stack overflow DoS attacks. Deeply nested constructs like: -> - { { { ... } } } (nested placeables) -> - { A(B(C(D(...)))) } (nested function calls) -> Both consume stack frames and must be bounded. - -> Attributes: -> max_nesting_depth: Maximum allowed nesting depth for placeables and calls -> current_depth: Current nesting depth (0 = top level) -> """ - -> max_nesting_depth: int = MAX_DEPTH -> current_depth: int = 0 - -> def is_depth_exceeded(self) -> bool: -> """Check if maximum nesting depth has been exceeded.""" -> return self.current_depth >= self.max_nesting_depth - -> def enter_nesting(self) -> ParseContext: -> """Create new context with incremented depth for entering nested construct. - -> Used for both placeables and function/term calls with arguments. -> Each recursive descent into nested syntax increments depth. -> """ -> return ParseContext( -> max_nesting_depth=self.max_nesting_depth, -> current_depth=self.current_depth + 1, -> ) - - - # ============================================================================= - # Pattern Parsing - # ============================================================================= - - -> def parse_variable_reference(cursor: Cursor) -> ParseResult[VariableReference] | None: -> """Parse variable reference: $variable - -> Variables start with $ followed by an identifier. - -> Examples: -> $name -> VariableReference(Identifier("name")) -> $count -> VariableReference(Identifier("count")) - -> Args: -> cursor: Current position in source - -> Returns: -> Success(ParseResult(VariableReference, new_cursor)) on success -> Failure(ParseError(...)) if not a variable reference -> """ - # Capture start position for span -> start_pos = cursor.pos - - # Expect $ -> if cursor.is_eof or cursor.current != "$": -> return None # "Expected variable reference (starts with $)", cursor, expected=["$"] - -> cursor = cursor.advance() # Skip $ - - # Parse identifier -> result = parse_identifier(cursor) -> if result is None: -> return result - -> parse_result = result -> var_ref = VariableReference( -> id=Identifier(parse_result.value), -> span=Span(start=start_pos, end=parse_result.cursor.pos), -> ) -> return ParseResult(var_ref, parse_result.cursor) - - -> def _is_valid_variant_key_char(ch: str, is_first: bool) -> bool: -> """Check if character is valid in a variant key (identifier or number). - -> Variant keys are either identifiers or number literals: -> - Identifiers: [a-zA-Z_][a-zA-Z0-9_-]* -> - Numbers: [0-9]+ or [0-9]+.[0-9]+ - -> Note: -> This helper permits '.' for number literals (e.g., "1.5") but identifiers -> cannot contain '.'. The caller (_is_variant_marker) uses this for lookahead -> scanning, not strict grammar validation. A key like "foo.bar" would pass -> this check but fail later grammar validation as an invalid identifier. - -> Args: -> ch: Character to check -> is_first: True if this is the first character - -> Returns: -> True if character is valid for variant key content -> """ -> if is_first: - # First char: ASCII letter (for identifiers), underscore, or digit (for numbers) - # Note: Uses ASCII-only check per Fluent spec for cross-implementation compatibility -> return is_identifier_start(ch) or ch == "_" or ch in _ASCII_DIGITS - # Subsequent chars: ASCII alphanumeric, underscore, hyphen, or dot (for decimals) - # Note: '.' is only valid in number literals, not identifiers -> return is_identifier_char(ch) or ch == "." - - -> def _is_variant_marker(cursor: Cursor) -> bool: -> """Check if cursor is at a variant marker using bounded lookahead. - -> Distinguishes actual variant syntax from literal text: -> - '*' is a variant marker only if followed by '[' -> - '[' is a variant marker only if: -> 1. Content is valid identifier/number -> 2. Ends with ']' -> 3. After ']', no non-whitespace text before newline/variant/end - -> Valid variant keys (stop parsing): -> - [one] (followed by newline, }, or another variant) -> - *[other] (default variant) - -> NOT variant keys (literal text): -> - [1, 2, 3] - contains comma and spaces -> - [INFO] message - has text after ] on same line -> - [matrix * vector] - contains spaces and operators - -> Security: -> Uses bounded lookahead (max 128 chars) to prevent O(N^2) parsing -> on adversarial input like `[[[[...` with many unclosed brackets. -> Variant keys are identifiers/numbers which are always short. - -> Args: -> cursor: Current position in source - -> Returns: -> True if at variant marker syntax, False if literal text - -> Note: -> PLR0911 waiver: Multiple returns are intentional for early-exit -> pattern matching, which is clearer than nested conditionals. -> """ - # Use centralized lookahead limit - variant keys are short (identifiers/numbers) - # This prevents O(N^2) worst-case on adversarial input like [[[[... -> max_lookahead = MAX_LOOKAHEAD_CHARS - -> if cursor.is_eof: -> return False - -> ch = cursor.current - -> if ch == "*": - # '*' is variant marker only if followed by '[' -> next_cursor = cursor.advance() -> return not next_cursor.is_eof and next_cursor.current == "[" - -> if ch == "[": - # '[' is variant marker only if: - # 1. Content is valid identifier or number (no spaces, commas, etc.) - # 2. Ends with ']' - # 3. After ']', the next thing is whitespace leading to newline, }, [, or *[ -> scan = cursor.advance() -> is_first = True -> has_content = False -> lookahead_count = 0 - - # Find the closing ] with bounded lookahead -> while not scan.is_eof and lookahead_count < max_lookahead: -> c = scan.current -> lookahead_count += 1 - -> if c == "]": - # Found closing bracket - now check what follows -> if not has_content: -> return False # Empty [] is not a variant key - - # Check what comes after ] -> after_bracket = scan.advance() - - # Skip inline whitespace (ONLY space per spec, NOT tab) - # Per Fluent EBNF: blank_inline ::= "\u0020"+ -> while ( -> not after_bracket.is_eof -> and after_bracket.current == " " -> and lookahead_count < max_lookahead -> ): -> after_bracket = after_bracket.advance() -> lookahead_count += 1 - -> if after_bracket.is_eof: -> return True # EOF after ] - valid variant - - # Valid if followed by: newline, }, [, or * (for *[other]) - # Note: Line endings are normalized to LF at parser entry. -> return after_bracket.current in ("\n", "}", "[", "*") - -> if c in ("\n", "{", "}", " ", "\t", ",", ":", ";", "=", "+", "*", "/"): - # Invalid char for variant key - this is literal text -> return False -> if not _is_valid_variant_key_char(c, is_first): - # Character not valid for identifier/number -> return False -> has_content = True -> is_first = False -> scan = scan.advance() - - # Exceeded lookahead or EOF before ']' - treat as literal text -> return False - -> return False - - -> def _trim_pattern_blank_lines( -> elements: list[TextElement | Placeable], -> ) -> tuple[TextElement | Placeable, ...]: -> """Trim leading and trailing blank lines from pattern elements. - -> Per Fluent spec, patterns should not include leading or trailing blank lines. -> A blank line is defined as a line containing only whitespace. - -> This function: -> 1. Strips leading whitespace/blank lines from the first TextElement -> 2. Strips trailing blank lines from the last TextElement (but preserves -> trailing whitespace on content lines - only removes after last newline) -> 3. Removes empty TextElements resulting from stripping - -> Args: -> elements: List of pattern elements (TextElement or Placeable) - -> Returns: -> Tuple of trimmed pattern elements -> """ -> if not elements: -> return () - -> result = list(elements) - - # Trim leading whitespace from first element if it's a TextElement -> while result and isinstance(result[0], TextElement): -> first = result[0] -> stripped = first.value.lstrip() -> if stripped: - # Keep non-empty content -> result[0] = TextElement(value=stripped) -> break - # Element was all whitespace - remove it -> result.pop(0) - - # Trim trailing BLANK LINES from last element if it's a TextElement. - # Per Fluent spec, only trailing blank lines should be removed, - # NOT trailing whitespace on content lines. - # Example: "Firefox " should preserve trailing spaces, - # but "Firefox\n \n" should become "Firefox". -> while result and isinstance(result[-1], TextElement): -> last = result[-1] -> text = last.value - - # Find the last newline in the text -> last_newline = text.rfind("\n") - -> if last_newline == -1: - # No newlines - this is a single-line text element. - # Do NOT strip trailing whitespace (it's significant per Fluent spec). -> break - - # Check if everything after the last newline is whitespace (blank line) -> after_newline = text[last_newline + 1 :] -> if after_newline.strip(): - # Content after last newline - preserve it all (including trailing spaces) -> break - - # Everything after last newline is whitespace - trim this blank line -> trimmed = text[:last_newline] -> if trimmed: -> result[-1] = TextElement(value=trimmed) - # Continue loop to check for more trailing blank lines -> else: - # Element was all whitespace - remove it -> result.pop() - -> return tuple(result) - - -> class _TextAccumulator: -> """Accumulator for building TextElement with efficient string concatenation. - -> Avoids O(N^2) behavior when processing continuation lines by collecting -> text fragments in a list and joining once. -> """ - -> __slots__ = ("fragments",) - -> def __init__(self) -> None: -> """Initialize empty accumulator.""" -> self.fragments: list[str] = [] - -> def add(self, text: str) -> None: -> """Add text fragment to accumulator. - -> Args: -> text: Text fragment to add -> """ -> self.fragments.append(text) - -> def has_content(self) -> bool: -> """Check if accumulator has any content. - -> Returns: -> True if accumulator has fragments, False otherwise -> """ -> return len(self.fragments) > 0 - -> def finalize(self) -> TextElement: -> """Create TextElement from accumulated fragments. - -> Returns: -> TextElement with joined content -> """ -> return TextElement(value="".join(self.fragments)) - -> def clear(self) -> None: -> """Clear accumulated fragments.""" -> self.fragments.clear() - - -> def parse_simple_pattern( -> cursor: Cursor, -> context: ParseContext | None = None, -> ) -> ParseResult[Pattern] | None: -> """Parse simple pattern (text with optional placeables). - -> Used for parsing variant value patterns within select expressions. -> Stops at variant delimiters to allow proper parsing of inline and -> multiline select expressions. - -> Supports multiline continuation: variant values can span multiple lines when -> continuation lines are indented, matching the behavior of top-level patterns. - -> Per Fluent spec, common indentation handling: -> - The first continuation line's indentation sets the "common indent" baseline -> - Subsequent continuation lines have only the common indent stripped -> - Extra indentation beyond the common baseline is preserved in the pattern - -> Handles: -> - Plain text with multi-line continuation (indented lines) -> - All placeable types: {$var}, {-term}, {NUMBER(...)}, {"string"}, {42} - -> Stop conditions: -> - Close brace (}): End of containing select expression -> - Open bracket ([): Start of next variant key (with lookahead) -> - Asterisk (*): Start of default variant marker (only if followed by '[') -> - Newline (\\n): End of variant value UNLESS followed by indented continuation - -> Lookahead: -> '*' and '[' are only treated as variant markers when they form valid -> variant syntax. Standalone '*' or '[' without matching pattern are -> treated as literal text, enabling values like "[INFO]" or "3 * 5". - -> Examples: -> "Hello" -> Pattern([TextElement("Hello")]) -> "Hi {$name}" -> Pattern([TextElement("Hi "), Placeable(...)]) -> "[INFO] msg" -> Pattern([TextElement("[INFO] msg")]) # [ is literal -> "3 * 5" -> Pattern([TextElement("3 * 5")]) # * is literal -> "Line 1\\n Line 2" -> Pattern with multiline content - -> Args: -> cursor: Current position in source -> context: Parse context for depth tracking - -> Returns: -> ParseResult(Pattern, new_cursor) on success, None on parse error -> """ -> elements: list[TextElement | Placeable] = [] - # Track common indentation (set on first continuation line) -> common_indent: int | None = None - # Accumulate text fragments to avoid O(N^2) string concatenation -> text_acc = _TextAccumulator() - -> while not cursor.is_eof: -> ch = cursor.current - - # Stop condition: end of select expression -> if ch == "}": -> break - - # Check variant markers with lookahead - # - [: start of next variant key (only if followed by text and ]) - # - *: start of default variant marker (only if followed by [) -> if ch in ("[", "*") and _is_variant_marker(cursor): -> break - - # Handle newline - check for indented continuation. - # Note: Line endings are normalized to LF at parser entry. -> if ch == "\n": -> if is_indented_continuation(cursor): - # Skip newline -> cursor = cursor.advance() - - # Skip any blank lines (consecutive newlines) before measuring indent. - # This matches is_indented_continuation() which looks past blank lines - # to find indented content. Without this, blank lines before first - # content would set common_indent to 0 (measuring at newline position). -> while not cursor.is_eof and cursor.current == "\n": -> cursor = cursor.advance() - - # Track common indentation from first continuation line -> if common_indent is None: -> common_indent = _count_leading_spaces(cursor) - # Skip the common indent -> cursor = cursor.skip_spaces() -> extra_spaces = "" -> else: - # Skip only common indent, preserve extra spaces -> cursor, extra_spaces = _skip_common_indent(cursor, common_indent) - - # Per Fluent spec, continuation lines are joined with newlines. - # IMPORTANT: The newline belongs to the END of the previous element, - # but extra_spaces belong to the START of the next element. - # Merge newline with previous element immediately. -> if elements and not isinstance(elements[-1], Placeable): -> last_elem = elements[-1] -> elements[-1] = TextElement(value=last_elem.value + "\n") -> else: - # No previous text element to merge with -> elements.append(TextElement(value="\n")) - - # Store extra_spaces to prepend to next text element -> if extra_spaces: -> text_acc.add(extra_spaces) -> continue # Continue parsing on next line -> break # Not a continuation, stop parsing pattern - - # Parse placeable expression -> if ch == "{": - # Add accumulated extra_spaces as text element before placeable -> if text_acc.has_content(): -! elements.append(text_acc.finalize()) -! text_acc.clear() - -> cursor = cursor.advance() # Skip { - - # Use full placeable parser which handles all expression types - # (variables, terms, functions, strings, numbers, select expressions) -> placeable_result = parse_placeable(cursor, context) -> if placeable_result is None: -> return placeable_result - -> placeable_parse = placeable_result -> cursor = placeable_parse.cursor -> elements.append(placeable_parse.value) - -> else: - # Parse text until { or stop condition -> text_start = cursor.pos -> while not cursor.is_eof: # pragma: no branch -> ch = cursor.current - # Stop at: placeable start, newline, closing brace - # Note: Line endings are normalized to LF at parser entry. -> if ch in ("{", "\n", "}"): -> break - # Check variant markers with lookahead -> if ch in ("[", "*") and _is_variant_marker(cursor): -> break -> cursor = cursor.advance() - -> if cursor.pos > text_start: # pragma: no branch - # Note: This condition is always True because entering the else block - # at line 355 means ch was not a stop character, so the inner while - # loop at 358 will always advance at least once before breaking. - # The False branch (cursor.pos == text_start) is structurally unreachable. -> text = Cursor(cursor.source, text_start).slice_to(cursor.pos) - # Prepend extra_spaces from continuation to new text element -> if text_acc.has_content(): -> text = text_acc.finalize().value + text -> text_acc.clear() -> elements.append(TextElement(value=text)) - - # Finalize any remaining accumulated extra_spaces (trailing spaces at end of pattern) -> if text_acc.has_content(): - # These are just trailing extra_spaces; add as text element (may be trimmed) -! elements.append(text_acc.finalize()) - - # Per Fluent spec, trim leading and trailing blank lines from patterns -> trimmed_elements = _trim_pattern_blank_lines(elements) -> pattern = Pattern(elements=trimmed_elements) -> return ParseResult(pattern, cursor) - - -> def _count_leading_spaces(cursor: Cursor) -> int: -> """Count leading spaces at current position (for common indentation tracking). - -> Args: -> cursor: Current position (at start of line content after newline) - -> Returns: -> Number of leading space characters (U+0020 only, not tabs) -> """ -> count = 0 -> scan = cursor -> while not scan.is_eof and scan.current == " ": -> count += 1 -> scan = scan.advance() -> return count - - -> def _skip_common_indent(cursor: Cursor, common_indent: int) -> tuple[Cursor, str]: -> """Skip common indentation and return any extra spaces. - -> Per Fluent spec, only the common indentation is stripped from continuation -> lines. Extra indentation beyond the common baseline is preserved. - -> Args: -> cursor: Current position (at start of line content after newline) -> common_indent: Number of spaces to strip (common indentation) - -> Returns: -> Tuple of (new cursor position, extra spaces to preserve) -> """ - # Skip common indent spaces -> skipped = 0 -> while skipped < common_indent and not cursor.is_eof and cursor.current == " ": -> cursor = cursor.advance() -> skipped += 1 - - # Collect extra spaces beyond common indent -> extra_spaces: list[str] = [] -> while not cursor.is_eof and cursor.current == " ": -> extra_spaces.append(" ") -> cursor = cursor.advance() - -> return cursor, "".join(extra_spaces) - - -> def parse_pattern( -> cursor: Cursor, -> context: ParseContext | None = None, -> *, -> initial_common_indent: int | None = None, -> ) -> ParseResult[Pattern] | None: -> """Parse full pattern with multi-line continuation support. - -> Use this for top-level message/attribute patterns. For variant patterns -> inside select expressions, use parse_simple_pattern() which has simpler -> stop conditions (no multi-line continuation). - -> Per Fluent spec, common indentation handling: -> - The first continuation line's indentation sets the "common indent" baseline -> - Subsequent continuation lines have only the common indent stripped -> - Extra indentation beyond the common baseline is preserved in the pattern - -> Handles: -> - Plain text with multi-line continuation (indented lines) -> - All placeable types: {$var}, {-term}, {NUMBER(...)}, {"string"}, {42} -> - Select expressions: {$var -> [key] value} - -> Args: -> cursor: Current position in source -> context: Parse context for depth tracking -> initial_common_indent: Pre-computed common indent from skip_multiline_pattern_start. -> When provided, this is the indentation of the first line of a multiline -> pattern (already skipped by skip_multiline_pattern_start). - -> Returns: -> ParseResult with Pattern on success, None on parse error -> """ -> elements: list[TextElement | Placeable] = [] - # Track common indentation (set on first continuation line, or from initial_common_indent) -> common_indent: int | None = initial_common_indent if initial_common_indent else None - # Accumulate text fragments to avoid O(N^2) string concatenation -> text_acc = _TextAccumulator() - -> while not cursor.is_eof: -> ch = cursor.current - - # Handle newline - check for indented continuation. - # Note: Line endings are normalized to LF at parser entry. -> if ch == "\n": -> if is_indented_continuation(cursor): - # Skip newline -> cursor = cursor.advance() - - # Skip any blank lines (consecutive newlines) before measuring indent. - # This matches is_indented_continuation() which looks past blank lines - # to find indented content. Without this, blank lines before first - # content would set common_indent to 0 (measuring at newline position). -> while not cursor.is_eof and cursor.current == "\n": -! cursor = cursor.advance() - - # Track common indentation from first continuation line -> if common_indent is None: -> common_indent = _count_leading_spaces(cursor) - # Skip the common indent -> cursor = cursor.skip_spaces() -> extra_spaces = "" -> else: - # Skip only common indent, preserve extra spaces -> cursor, extra_spaces = _skip_common_indent(cursor, common_indent) - - # Per Fluent spec, continuation lines are joined with newlines. - # IMPORTANT: The newline belongs to the END of the previous element, - # but extra_spaces belong to the START of the next element. - # Merge newline with previous element immediately. -> if elements and not isinstance(elements[-1], Placeable): -> last_elem = elements[-1] -> elements[-1] = TextElement(value=last_elem.value + "\n") -> else: - # No previous text element to merge with -> elements.append(TextElement(value="\n")) - - # Store extra_spaces to prepend to next text element -> if extra_spaces: -! text_acc.add(extra_spaces) -> continue # Continue parsing on next line -> break # Not a continuation, stop parsing pattern - - # Note: '.' is removed from stop conditions here. - # Per Fluent spec, '.' only starts an attribute when it appears at the - # beginning of a NEW LINE (after newline + optional indentation). - # A '.' on the same line as '=' is valid text content. - # Attributes are detected in message/term parsing after pattern completes. - - # Placeable: {$var} or {$var -> ...} -> if ch == "{": - # Add accumulated extra_spaces as text element before placeable -> if text_acc.has_content(): -! elements.append(text_acc.finalize()) -! text_acc.clear() - -> cursor = cursor.advance() # Skip { - - # Use helper method to parse placeable (reduces nesting!) -> placeable_result = parse_placeable(cursor, context) -> if placeable_result is None: -> return placeable_result - -> placeable_parse = placeable_result -> elements.append(placeable_parse.value) -> cursor = placeable_parse.cursor - -> else: - # Parse text until { or stop condition -> text_start = cursor.pos -> while not cursor.is_eof: -> ch = cursor.current - # Stop at: placeable start or newline only. - # Note: '}', '[', '*' are valid text in top-level patterns. - # They only have special meaning inside select expressions (handled - # by parse_simple_pattern). An unescaped '}' is technically invalid - # FTL syntax, but treating it as text is more robust than skipping. - # Note: Line endings are normalized to LF at parser entry. -> if ch in ("{", "\n"): -> break -> cursor = cursor.advance() - -> if cursor.pos > text_start: # pragma: no branch - # Note: False branch (cursor.pos == text_start) occurs when inner loop - # breaks immediately without consuming text. This happens when cursor - # starts on a stop char ('{', '\n'). However, outer loop checks for '\n' - # before text parsing, and '{' enters placeable parsing, so this condition - # is always True when reached. -> text = Cursor(cursor.source, text_start).slice_to(cursor.pos) - # Prepend extra_spaces from continuation to new text element -> if text_acc.has_content(): -! text = text_acc.finalize().value + text -! text_acc.clear() -> elements.append(TextElement(value=text)) - - # Finalize any remaining accumulated extra_spaces (trailing spaces at end of pattern) -> if text_acc.has_content(): - # These are just trailing extra_spaces; add as text element (may be trimmed) -! elements.append(text_acc.finalize()) - - # Per Fluent spec, trim leading and trailing blank lines from patterns -> trimmed_elements = _trim_pattern_blank_lines(elements) -> pattern = Pattern(elements=trimmed_elements) -> return ParseResult(pattern, cursor) - - - # ============================================================================= - # Expression Parsing - # ============================================================================= - - -> def parse_variant_key(cursor: Cursor) -> ParseResult[Identifier | NumberLiteral] | None: -> """Parse variant key (identifier or number). - -> Helper method extracted from parse_variant to reduce complexity. - -> Args: -> cursor: Current position in source - -> Returns: -> Success(ParseResult(Identifier | NumberLiteral, cursor)) on success -> Failure(ParseError(...)) on parse error -> """ - # Try number first (ASCII digits only, not Unicode like 2) -> if not cursor.is_eof and (cursor.current in _ASCII_DIGITS or cursor.current == "-"): -> num_result = parse_number(cursor) -> if num_result is not None: -> num_parse = num_result -> num_str = num_parse.value -> num_value = parse_number_value(num_str) -> return ParseResult( -> NumberLiteral(value=num_value, raw=num_str), num_parse.cursor -> ) - - # Failed to parse as number, try identifier -> id_result = parse_identifier(cursor) -> if id_result is None: - # Both failed - return parse error -> return None # "Expected variant key (identifier or number)", cursor - -> id_parse = id_result -> return ParseResult(Identifier(id_parse.value), id_parse.cursor) - - # Parse as identifier -> id_result = parse_identifier(cursor) -> if id_result is None: -> return id_result - -> id_parse = id_result -> return ParseResult(Identifier(id_parse.value), id_parse.cursor) - - -> def parse_variant( -> cursor: Cursor, -> context: ParseContext | None = None, -> ) -> ParseResult[Variant] | None: -> """Parse variant: [key] pattern or *[key] pattern - -> Variants are the cases in a select expression. - -> Examples: -> [zero] no items -> *[other] many items - -> Args: -> cursor: Current position in source -> context: Parse context for depth tracking - -> Returns: -> Success(ParseResult(Variant, new_cursor)) on success -> Failure(ParseError(...)) on parse error -> """ - # Check for default marker * -> is_default = False -> if not cursor.is_eof and cursor.current == "*": -> is_default = True -> cursor = cursor.advance() - - # Expect [ -> if cursor.is_eof or cursor.current != "[": -> return None # "Expected '[' at start of variant", cursor - -> cursor = cursor.advance() # Skip [ - - # Parse variant key (identifier or number) using extracted helper - # Per spec: VariantKey ::= "[" blank? (NumberLiteral | Identifier) blank? "]" -> cursor = skip_blank(cursor) -> key_result = parse_variant_key(cursor) -> if key_result is None: -> return key_result - -> key_parse = key_result -> variant_key = key_parse.value -> cursor = skip_blank(key_parse.cursor) - - # Expect ] -> if cursor.is_eof or cursor.current != "]": -> return None # "Expected ']' after variant key", cursor - -> cursor = cursor.advance() # Skip ] - # After ], before pattern: blank_inline (same line) or newline+indent -> cursor = skip_blank_inline(cursor) - - # Parse pattern (on same line or next line with indent) - # Simplified: parse until newline that's not indented -> pattern_result = parse_simple_pattern(cursor, context) -> if pattern_result is None: -> return pattern_result - -> pattern_parse = pattern_result - - # Don't skip trailing whitespace - let select expression parser handle it -> variant = Variant(key=variant_key, value=pattern_parse.value, default=is_default) -> return ParseResult(variant, pattern_parse.cursor) - - -> def parse_select_expression( -> cursor: Cursor, -> selector: InlineExpression, -> start_pos: int, -> context: ParseContext | None = None, -> ) -> ParseResult[SelectExpression] | None: -> """Parse select expression after seeing selector and -> - -> Format: {$var -> [key1] value1 *[key2] value2} - -> The selector has already been parsed. - -> Example: -> After parsing {$count and seeing ->, we parse: -> [zero] {$count} items -> [one] {$count} item -> *[other] {$count} items -> } - -> Args: -> cursor: Current position (should be after ->) -> selector: The selector expression (e.g., VariableReference($count)) -> start_pos: Start position of the select expression (for span tracking) -> context: Parse context for depth tracking - -> Returns: -> Success(ParseResult(SelectExpression, new_cursor)) on success -> Failure(ParseError(...)) on parse error -> """ - # Per spec: SelectExpression ::= InlineExpression blank? "->" blank_inline? variant_list - # After ->, we need blank_inline before variant list starts (could be on next line) - # variant_list allows line_end, so use skip_blank to handle newlines -> cursor = skip_blank(cursor) - - # Parse variants -> variants: list[Variant] = [] - -> while not cursor.is_eof: - # Within variant_list, allow blank (spaces and newlines) -> cursor = skip_blank(cursor) - - # Check for end of select } -> if cursor.current == "}": -> break - - # Parse variant (pass context for nested placeable depth tracking) -> variant_result = parse_variant(cursor, context) -> if variant_result is None: -> return variant_result - -> variant_parse = variant_result -> variants.append(variant_parse.value) -> cursor = variant_parse.cursor - -> if not variants: -> return None # "Select expression must have at least one variant", cursor - - # Validate exactly one default variant (FTL spec requirement) -> default_count = sum(1 for v in variants if v.default) -> if default_count == 0: -> return None # "Select expression must have exactly one default variant (marked with *)" -> if default_count > 1: -> return None # "Select expression must have exactly one default variant, found multiple" - - # Create span from start position to current position (end of last variant) -> span = Span(start=start_pos, end=cursor.pos) -> select_expr = SelectExpression(selector=selector, variants=tuple(variants), span=span) -> return ParseResult(select_expr, cursor) - - -> def parse_argument_expression( -> cursor: Cursor, -> context: ParseContext | None = None, -> ) -> ParseResult[InlineExpression] | None: -> """Parse a single argument expression per FTL spec. - -> FTL Argument Grammar: -> InlineExpression ::= StringLiteral | NumberLiteral | FunctionReference -> | MessageReference | TermReference | VariableReference -> | inline_placeable - -> This handles all valid positional argument types including: -> - Variable references: $var -> - String literals: "text" -> - Number literals: 42, -123 -> - Term references: -brand -> - Function references: NUMBER($val) -> - Inline placeables: { expr } -> - Message references: identifier - -> Args: -> cursor: Current position in source -> context: Parse context for nested placeable depth tracking - -> Returns: -> Success(ParseResult(InlineExpression, cursor)) on success -> None on parse error -> """ -> if cursor.is_eof: -> return None - - # Capture start position for span (used by identifier-based expressions) -> start_pos = cursor.pos -> ch = cursor.current - - # Variable reference: $var -> if ch == "$": -> var_result = parse_variable_reference(cursor) -> if var_result is None: -> return None -> return ParseResult(var_result.value, var_result.cursor) - - # String literal: "text" -> if ch == '"': -> str_result = parse_string_literal(cursor) -> if str_result is None: -> return None -> return ParseResult(StringLiteral(value=str_result.value), str_result.cursor) - - # Hyphen: could be TermReference (-brand) or negative number (-123) -> if ch == "-": -> next_cursor = cursor.advance() -> if not next_cursor.is_eof and is_identifier_start(next_cursor.current): - # Term reference: -brand (ASCII letter after hyphen) -> term_result = parse_term_reference(cursor, context) -> if term_result is None: -> return None -> return ParseResult(term_result.value, term_result.cursor) - # Negative number: -123 -> num_result = parse_number(cursor) -> if num_result is None: -> return None -> num_value = parse_number_value(num_result.value) -> return ParseResult( -> NumberLiteral(value=num_value, raw=num_result.value), num_result.cursor -> ) - - # Positive number: 42 -> if ch in _ASCII_DIGITS: -> num_result = parse_number(cursor) -> if num_result is None: -> return None -> num_value = parse_number_value(num_result.value) -> return ParseResult( -> NumberLiteral(value=num_value, raw=num_result.value), num_result.cursor -> ) - - # Inline placeable: { expr } -> if ch == "{": -> cursor = cursor.advance() # Skip opening { -> placeable_result = parse_placeable(cursor, context) -> if placeable_result is None: -> return None -> return ParseResult(placeable_result.value, placeable_result.cursor) - - # Identifier: function call (any case per spec) or message reference - # Note: ASCII letter check per Fluent spec for identifier start -> if is_identifier_start(ch) or ch == "_": -> id_result = parse_identifier(cursor) -> if id_result is None: -> return None - -> name = id_result.value -> cursor_after_id = id_result.cursor - - # Check if identifier followed by '(' -> function call (any case per spec) -> lookahead = skip_blank_inline(cursor_after_id) -> if not lookahead.is_eof and lookahead.current == "(": -> func_result = parse_function_reference(cursor, context) -> if func_result is None: -> return None -> return ParseResult(func_result.value, func_result.cursor) - - # Message reference (or identifier for named argument name) -> return ParseResult( -> MessageReference( -> id=Identifier(name), -> span=Span(start=start_pos, end=cursor_after_id.pos), -> ), -> cursor_after_id, -> ) - -> return None # "Expected argument expression" - - -> def parse_call_arguments( -> cursor: Cursor, -> context: ParseContext | None = None, -> ) -> ParseResult[CallArguments] | None: -> """Parse function call arguments: (pos1, pos2, name1: val1, name2: val2) - -> Arguments consist of positional arguments followed by named arguments. -> Positional arguments must come before named arguments. -> Named argument names must be unique. - -> Examples: -> ($value) -> CallArguments(positional=[$value], named=[]) -> ($value, minimumFractionDigits: 2) -> CallArguments with both types - -> Args: -> cursor: Position AFTER the opening '(' -> context: Parse context for nested placeable depth tracking - -> Returns: -> Success(ParseResult(CallArguments, cursor_after_))) on success -> Failure(ParseError(...)) on parse error -> """ - # Per spec: CallArguments ::= blank? "(" blank? argument_list blank? ")" -> cursor = skip_blank_inline(cursor) - -> positional: list[InlineExpression] = [] -> named: list[NamedArgument] = [] -> seen_named_arg_names: set[str] = set() -> seen_named = False # Track if we've seen any named args - - # Parse comma-separated arguments -> while not cursor.is_eof: -> cursor = skip_blank_inline(cursor) - - # Check for end of arguments -> if cursor.current == ")": -> break - - # Parse the argument expression using extracted helper -> arg_result = parse_argument_expression(cursor, context) -> if arg_result is None: -> return arg_result - -> arg_parse = arg_result -> arg_expr = arg_parse.value -> cursor = skip_blank_inline(arg_parse.cursor) - - # Check if this is a named argument (followed by :) -> if not cursor.is_eof and cursor.current == ":": - # This is a named argument -> cursor = cursor.advance() # Skip : -> cursor = skip_blank_inline(cursor) - - # The argument expression must be an identifier (MessageReference) -> if not isinstance(arg_expr, MessageReference): -> return None # "Named argument name must be an identifier", cursor - -> arg_name = arg_expr.id.name - - # Check for duplicate named argument names -> if arg_name in seen_named_arg_names: -> return None # f"Duplicate named argument: '{arg_name}'", cursor -> seen_named_arg_names.add(arg_name) - - # Parse the value (must be inline expression) -> if cursor.is_eof: -> return None # "Expected value after ':'", cursor - - # Parse value expression using extracted helper -> value_result = parse_argument_expression(cursor, context) -> if value_result is None: -> return value_result - -> value_parse = value_result -> value_expr = value_parse.value -> cursor = value_parse.cursor - - # Per FTL spec: NamedArgument ::= Identifier ":" (StringLiteral | NumberLiteral) - # Named argument values MUST be literals, NOT references or variables -> if not isinstance(value_expr, (StringLiteral, NumberLiteral)): - # Named argument values must be literals per FTL spec - # This restriction enables static analysis by translation tools -> return None # f"Named argument '{arg_name}' requires a literal value", cursor - -> named.append(NamedArgument(name=Identifier(arg_name), value=value_expr)) -> seen_named = True - -> else: - # This is a positional argument -> if seen_named: -> return None # "Positional arguments must come before named arguments", cursor -> positional.append(arg_expr) - -> cursor = skip_blank_inline(cursor) - - # Check for comma (optional before closing paren) -> if not cursor.is_eof and cursor.current == ",": -> cursor = cursor.advance() # Skip comma -> cursor = skip_blank_inline(cursor) - -> call_args = CallArguments(positional=tuple(positional), named=tuple(named)) -> return ParseResult(call_args, cursor) - - -> def parse_function_reference( -> cursor: Cursor, -> context: ParseContext | None = None, -> ) -> ParseResult[FunctionReference] | None: -> """Parse function reference: identifier(args) - -> Per Fluent 1.0 spec, function names follow the standard Identifier grammar -> which allows any case. The convention of uppercase function names (NUMBER, -> DATETIME) is stylistic, not syntactic. - -> FTL EBNF: FunctionReference ::= Identifier CallArguments - -> Security: -> Function calls increment nesting depth to prevent DoS via deeply nested -> calls like NUMBER(A(B(C(...)))). Each level consumes stack frames. - -> Examples: -> NUMBER($value) -> number($value) -> DateTime($date, dateStyle: "full") - -> Args: -> cursor: Position at start of function name -> context: Parse context for nesting depth tracking - -> Returns: -> Success(ParseResult(FunctionReference, cursor_after_))) on success -> None on parse error or nesting depth exceeded -> """ - # Create default context if not provided -> if context is None: -> context = ParseContext() - - # Check nesting depth limit (DoS prevention) - # Function calls can nest arbitrarily: A(B(C(D(...)))) -> if context.is_depth_exceeded(): -> return None - - # Capture start position for span -> start_pos = cursor.pos - - # Parse function name (any case per spec) -> id_result = parse_identifier(cursor) -> if id_result is None: -> return id_result - -> id_parse = id_result -> func_name = id_parse.value - - # Per spec: FunctionReference uses blank? before "(" -> cursor = skip_blank_inline(id_parse.cursor) - - # Expect opening parenthesis -> if cursor.is_eof or cursor.current != "(": -> return None # "Expected '(' after function name", cursor - -> cursor = cursor.advance() # Skip ( - - # Create nested context with incremented depth for argument parsing -> nested_context = context.enter_nesting() - - # Parse arguments with nested context -> args_result = parse_call_arguments(cursor, nested_context) -> if args_result is None: -> return args_result - -> args_parse = args_result -> cursor = skip_blank_inline(args_parse.cursor) - - # Expect closing parenthesis -> if cursor.is_eof or cursor.current != ")": -> return None # "Expected ')' after function arguments" - -> cursor = cursor.advance() # Skip ) - -> func_ref = FunctionReference( -> id=Identifier(func_name), -> arguments=args_parse.value, -> span=Span(start=start_pos, end=cursor.pos), -> ) -> return ParseResult(func_ref, cursor) - - -> def parse_term_reference( -> cursor: Cursor, -> context: ParseContext | None = None, -> ) -> ParseResult[TermReference] | None: -> """Parse term reference in inline expression (-term-id or -term.attr). - -> FTL syntax: -> { -brand } -> { -brand.short } -> { -brand(case: "nominative") } - -> Term references can have optional attribute access and arguments. - -> Security: -> Term calls with arguments increment nesting depth to prevent DoS via -> deeply nested calls. Arguments can contain nested expressions. - -> Args: -> cursor: Current position (should be at '-') -> context: Parse context for nesting depth tracking - -> Returns: -> Success(ParseResult(TermReference, new_cursor)) on success -> None on parse error or nesting depth exceeded -> """ - # Create default context if not provided -> if context is None: -> context = ParseContext() - - # Capture start position for span -> start_pos = cursor.pos - - # Expect '-' prefix -> if cursor.is_eof or cursor.current != "-": -> return None # "Expected '-' at start of term reference", cursor, expected=["-"] - -> cursor = cursor.advance() # Skip '-' - - # Parse identifier -> id_result = parse_identifier(cursor) -> if id_result is None: -> return id_result - -> id_parse = id_result -> cursor = id_parse.cursor - - # Check for optional attribute access (.attribute) -> attribute: Identifier | None = None -> if not cursor.is_eof and cursor.current == ".": -> cursor = cursor.advance() # Skip '.' - -> attr_id_result = parse_identifier(cursor) -> if attr_id_result is None: -> return attr_id_result - -> attr_id_parse = attr_id_result -> attribute = Identifier(attr_id_parse.value) -> cursor = attr_id_parse.cursor - - # Check for optional arguments (case: "nominative") - # Per spec: TermReference uses blank? before "(" -> cursor = skip_blank_inline(cursor) - -> arguments: CallArguments | None = None -> if not cursor.is_eof and cursor.current == "(": - # Check nesting depth limit (DoS prevention) before parsing arguments -> if context.is_depth_exceeded(): -> return None - - # Parse call arguments with incremented depth -> cursor = cursor.advance() # Skip '(' -> nested_context = context.enter_nesting() -> args_result = parse_call_arguments(cursor, nested_context) -> if args_result is None: -> return args_result - -> args_parse = args_result -> cursor = skip_blank_inline(args_parse.cursor) - - # Expect closing parenthesis -> if cursor.is_eof or cursor.current != ")": -> return None # "Expected ')' after term arguments" - -> cursor = cursor.advance() # Skip ')' -> arguments = args_parse.value - -> term_ref = TermReference( -> id=Identifier(id_parse.value), -> attribute=attribute, -> arguments=arguments, -> span=Span(start=start_pos, end=cursor.pos), -> ) - -> return ParseResult(term_ref, cursor) - - -> def _parse_inline_string_literal(cursor: Cursor) -> ParseResult[InlineExpression] | None: -> """Parse string literal inline expression.""" -> str_result = parse_string_literal(cursor) -> if str_result is None: -> return None -> return ParseResult(StringLiteral(value=str_result.value), str_result.cursor) - - -> def _parse_inline_number_literal(cursor: Cursor) -> ParseResult[InlineExpression] | None: -> """Parse number literal inline expression.""" -> num_result = parse_number(cursor) -> if num_result is None: -> return None -> num_str = num_result.value -> num_value = parse_number_value(num_str) -> return ParseResult(NumberLiteral(value=num_value, raw=num_str), num_result.cursor) - - -> def _parse_inline_hyphen( -> cursor: Cursor, -> context: ParseContext | None = None, -> ) -> ParseResult[InlineExpression] | None: -> """Parse hyphen-prefixed expression: term reference (-brand) or negative number (-123). - -> Args: -> cursor: Current position in source -> context: Parse context for nested placeable depth tracking -> """ -> next_cursor = cursor.advance() -> if not next_cursor.is_eof and is_identifier_start(next_cursor.current): - # Term reference: -brand (ASCII letter after hyphen) -> term_result = parse_term_reference(cursor, context) -> if term_result is None: -> return None -> return ParseResult(term_result.value, term_result.cursor) - # Negative number: -123 -> return _parse_inline_number_literal(cursor) - - -> def _parse_message_attribute(cursor: Cursor) -> tuple[Identifier | None, Cursor]: -> """Parse optional .attribute suffix on message/function references.""" -> if cursor.is_eof or cursor.current != ".": -> return None, cursor -> cursor = cursor.advance() # Skip '.' -> attr_id_result = parse_identifier(cursor) -> if attr_id_result is None: -> return None, cursor -> return Identifier(attr_id_result.value), attr_id_result.cursor - - -> def _parse_inline_identifier( -> cursor: Cursor, -> context: ParseContext | None = None, -> ) -> ParseResult[InlineExpression] | None: -> """Parse identifier-based expression: function call or message reference. - -> Per Fluent 1.0 spec, function names follow the standard Identifier grammar -> which allows any case. The convention of uppercase function names (NUMBER, -> DATETIME) is stylistic, not syntactic. - -> Args: -> cursor: Current position in source -> context: Parse context for nested placeable depth tracking -> """ - # Capture start position for span -> start_pos = cursor.pos - -> id_result = parse_identifier(cursor) -> if id_result is None: -> return None - -> name = id_result.value -> cursor_after_id = id_result.cursor - - # Check if identifier followed by '(' -> function call (any case per spec) - # Per Fluent spec: FunctionReference ::= Identifier CallArguments - # Identifier allows any case; uppercase is convention, not requirement -> lookahead = skip_blank_inline(cursor_after_id) -> if not lookahead.is_eof and lookahead.current == "(": -> func_result = parse_function_reference(cursor, context) -> if func_result is None: -> return None -> return ParseResult(func_result.value, func_result.cursor) - - # Message reference with optional attribute -> attribute, final_cursor = _parse_message_attribute(cursor_after_id) -> return ParseResult( -> MessageReference( -> id=Identifier(name), -> attribute=attribute, -> span=Span(start=start_pos, end=final_cursor.pos), -> ), -> final_cursor, -> ) - - -> def parse_inline_expression( -> cursor: Cursor, -> context: ParseContext | None = None, -> ) -> ParseResult[InlineExpression] | None: -> """Parse inline expression per Fluent spec. - -> Uses character-based dispatch for efficient parsing. Each expression type -> has a dedicated handler function. - -> Per Fluent EBNF: -> InlineExpression ::= StringLiteral | NumberLiteral | FunctionReference -> | MessageReference | TermReference | VariableReference -> | inline_placeable - -> Handles: -> - Variable references: $var -> - String literals: "text" -> - Number literals: 42 or -123 -> - Function calls: FUNC(args) or func(args) (any case per spec) -> - Message references: identifier or identifier.attribute -> - Term references: -term-id or -term-id.attribute -> - Nested placeables: { expr } (inline_placeable per spec) - -> Args: -> cursor: Current position in source -> context: Parse context for nested placeable depth tracking - -> Returns: -> ParseResult with InlineExpression on success, None on parse error -> """ -> if cursor.is_eof: -> return None - -> ch = cursor.current - - # Dispatch based on first character -> match ch: -> case "$": -> var_result = parse_variable_reference(cursor) -> if var_result is None: -> return None -> return ParseResult(var_result.value, var_result.cursor) - -> case '"': -> return _parse_inline_string_literal(cursor) - -> case "-": -> return _parse_inline_hyphen(cursor, context) - -> case "{": - # Nested placeable: { expr } per spec (inline_placeable) - # Advance past opening brace and delegate to parse_placeable -> placeable_result = parse_placeable(cursor.advance(), context) -> if placeable_result is None: -> return None -> return ParseResult(placeable_result.value, placeable_result.cursor) - -> case _ if ch in _ASCII_DIGITS: -> return _parse_inline_number_literal(cursor) - -> case _ if is_identifier_start(ch): - # ASCII letter [a-zA-Z] check per Fluent spec for identifier start -> return _parse_inline_identifier(cursor, context) - -> case _: -> return None - - -> def parse_placeable( -> cursor: Cursor, -> context: ParseContext | None = None, -> ) -> ParseResult[Placeable] | None: -> """Parse placeable expression: {$var}, {"\\n"}, {$var -> [key] value}, or {FUNC()}. - -> Parser combinator helper that reduces nesting in parse_pattern(). - -> Handles: -> - Variable references: {$var} -> - String literals: {"\\n"} -> - Number literals: {42} -> - Select expressions: {$var -> [one] item *[other] items} -> - Function calls: {NUMBER($value, minimumFractionDigits: 2)} - -> Security: -> Enforces maximum nesting depth to prevent DoS attacks via deeply -> nested placeables. Configure via max_nesting_depth on FluentParserV1. - -> Args: -> cursor: Position AFTER the opening '{' -> context: Parse context for depth tracking. If None, creates fresh context. - -> Returns: -> Success(ParseResult(Placeable, cursor_after_})) on success -> None on parse error or nesting depth exceeded - -> Example: -> cursor at: "$var}" -> parses to Placeable(VariableReference("var")) -> cursor at: "\"\\n\"}" -> parses to Placeable(StringLiteral("\\n")) -> cursor at: "$n -> [one] 1 *[other] N}" -> parses to Placeable(SelectExpression(...)) -> cursor at: "NUMBER($val)}" -> parses to Placeable(FunctionReference(...)) -> """ - # Create default context if not provided -> if context is None: -> context = ParseContext() - - # Check nesting depth limit (DoS prevention) -> if context.is_depth_exceeded(): - # Nesting depth exceeded - return None to signal parse failure - # This prevents stack overflow from deeply nested constructs -> return None - - # Create child context with incremented depth for nested parsing -> nested_context = context.enter_nesting() - - # Per spec: inline_placeable ::= "{" blank? (SelectExpression | InlineExpression) blank? "}" - # blank ::= (blank_inline | line_end)+ ; allows newlines inside placeables -> cursor = skip_blank(cursor) - - # Capture start position before parsing expression (for select expression span) -> expr_start_pos = cursor.pos - - # Parse the inline expression with nested context for depth tracking -> expr_result = parse_inline_expression(cursor, nested_context) -> if expr_result is None: -> return expr_result - -> expr_parse = expr_result -> expression = expr_parse.value -> parse_result_cursor = expr_parse.cursor - - # Per spec: blank allows newlines after expression -> cursor = skip_blank(parse_result_cursor) - - # Check for select expression (->) - # Per FTL 1.0 spec: SelectExpression ::= InlineExpression blank? "->" ... - # Valid selectors (any InlineExpression): - # - VariableReference: { $var -> ... } - # - StringLiteral: { "foo" -> ... } - # - NumberLiteral: { 42 -> ... } - # - FunctionReference: { NUMBER($x) -> ... } - # - MessageReference: { msg -> ... } or { msg.attr -> ... } - # - TermReference: { -term -> ... } or { -term.attr -> ... } -> is_valid_selector = isinstance( -> expression, -> ( -> VariableReference, -> StringLiteral, -> NumberLiteral, -> FunctionReference, -> MessageReference, -> TermReference, -> ), -> ) - -> if is_valid_selector and not cursor.is_eof and cursor.current == "-": - # Peek ahead for -> -> next_cursor = cursor.advance() -> if not next_cursor.is_eof and next_cursor.current == ">": - # It's a select expression! -> cursor = next_cursor.advance() # Skip -> - -> select_result = parse_select_expression( -> cursor, expression, expr_start_pos, nested_context -> ) -> if select_result is None: -> return select_result - -> select_parse = select_result - # Per spec: blank allows newlines after select expression -> cursor = skip_blank(select_parse.cursor) - - # Expect } -> if cursor.is_eof or cursor.current != "}": -> return None # "Expected '}' after select expression", cursor - -> cursor = cursor.advance() # Skip } -> return ParseResult(Placeable(expression=select_parse.value), cursor) - - # Just a simple inline expression {$var}, {"\n"}, or {42} - # Expect } -> if cursor.is_eof or cursor.current != "}": -> return None # "Expected '}'", cursor - -> cursor = cursor.advance() # Skip } -> return ParseResult(Placeable(expression=expression), cursor) - - - # ============================================================================= - # Entry Parsing - # ============================================================================= - - -> def parse_message_header(cursor: Cursor) -> ParseResult[str] | None: -> """Parse message header: Identifier "=" - -> Returns identifier string and cursor after '='. -> """ -> id_result = parse_identifier(cursor) -> if id_result is None: -> return id_result - -> id_parse = id_result - # Per spec: Message ::= Identifier blank_inline? "=" ... -> cursor = skip_blank_inline(id_parse.cursor) - -> if cursor.is_eof or cursor.current != "=": -> return None # "Expected '=' after message ID", cursor - -> cursor = cursor.advance() # Skip = -> return ParseResult(id_parse.value, cursor) - - -> def parse_message_attributes( -> cursor: Cursor, -> context: ParseContext | None = None, -> ) -> ParseResult[list[Attribute]] | None: -> """Parse zero or more message attributes. - -> Attributes must appear on new lines starting with '.'. - -> Args: -> cursor: Current position in source -> context: Parse context for depth tracking -> """ -> attributes: list[Attribute] = [] - -> while not cursor.is_eof: - # Advance to next line. - # Note: Line endings are normalized to LF at parser entry. -> if cursor.current == "\n": -> cursor = cursor.advance() -> else: -> break # No newline, done with attributes - - # Check if line starts with '.' (attribute marker) - # Per spec: Attribute ::= line_end blank? "." ... - # blank allows spaces and newlines, but NOT tabs -> saved_cursor = cursor - # Skip leading spaces on this line (NOT tabs per spec) -> cursor = cursor.skip_spaces() - -> if cursor.is_eof or cursor.current != ".": -> cursor = saved_cursor -> break # Not an attribute - - # Parse attribute -> attr_result = parse_attribute(saved_cursor, context) -> if attr_result is None: -> cursor = saved_cursor -> break # Invalid attribute syntax - -> attr_parse = attr_result -> attributes.append(attr_parse.value) -> cursor = attr_parse.cursor - -> return ParseResult(attributes, cursor) - - -> def validate_message_content(pattern: Pattern | None, attributes: list[Attribute]) -> bool: -> """Validate message has either pattern or attributes. - -> Per Fluent spec: Message ::= ID "=" ((Pattern Attribute*) | (Attribute+)) - -> Args: -> pattern: Message value pattern (may be None) -> attributes: List of message attributes - -> Returns: -> True if validation passed, False if validation failed -> """ -> has_pattern = pattern is not None and len(pattern.elements) > 0 -> has_attributes = len(attributes) > 0 - - # Message must have either value or attributes -> return has_pattern or has_attributes - - -> def parse_message( -> cursor: Cursor, -> context: ParseContext | None = None, -> ) -> ParseResult[Message] | None: -> """Parse message with full support for select expressions. - -> Examples: -> "hello = World" -> "welcome = Hello, {$name}!" -> "count = {$num -> [one] item *[other] items}" - -> Args: -> cursor: Current position in source -> context: Parse context for depth tracking - -> Returns: -> Success(ParseResult(Message, new_cursor)) on success -> Failure(ParseError(...)) on parse error -> """ -> start_pos = cursor.pos - - # Parse: Identifier "=" -> id_result = parse_message_header(cursor) -> if id_result is None: -> return id_result -> id_parse = id_result -> cursor = id_parse.cursor - - # Parse pattern (message value) -> cursor, initial_indent = skip_multiline_pattern_start(cursor) -> pattern_result = parse_pattern(cursor, context, initial_common_indent=initial_indent) -> if pattern_result is None: -> return pattern_result -> pattern_parse = pattern_result -> cursor = pattern_parse.cursor - - # Parse: Attribute* (zero or more attributes) -> attributes_result = parse_message_attributes(cursor, context) -> if attributes_result is None: -> return attributes_result -> attributes_parse = attributes_result -> cursor = attributes_parse.cursor - - # Validate: Per spec, Message must have Pattern OR Attribute -> is_valid = validate_message_content(pattern_parse.value, attributes_parse.value) -> if not is_valid: -> return None # Validation failed - - # Construct Message node -> message = Message( -> id=Identifier(id_parse.value), -> value=pattern_parse.value, -> attributes=tuple(attributes_parse.value), -> span=Span(start=start_pos, end=cursor.pos), -> ) - -> return ParseResult(message, cursor) - - -> def parse_attribute( -> cursor: Cursor, -> context: ParseContext | None = None, -> ) -> ParseResult[Attribute] | None: -> """Parse message attribute (.attribute = pattern). - -> FTL syntax: -> button = Save -> .tooltip = Click to save changes -> .aria-label = Save button - -> Attributes are indented and start with a dot followed by an identifier. - -> Args: -> cursor: Current position in source (should be at start of line with '.') -> context: Parse context for depth tracking - -> Returns: -> Success(ParseResult(Attribute, new_cursor)) on success -> Failure(ParseError(...)) on parse error -> """ - # Skip leading whitespace (ONLY spaces per spec, NOT tabs or newlines) - # Per spec: Attribute ::= line_end blank? "." ... - # blank can contain spaces but NOT tabs -> cursor = skip_blank_inline(cursor) - - # Check for '.' at start -> if cursor.is_eof or cursor.current != ".": -> return None # "Expected '.' at start of attribute", cursor, expected=["."] - -> cursor = cursor.advance() # Skip '.' - - # Parse identifier after '.' -> id_result = parse_identifier(cursor) -> if id_result is None: -> return id_result - -> id_parse = id_result - # Per spec: Attribute ::= line_end blank? "." Identifier blank_inline? "=" ... -> cursor = skip_blank_inline(id_parse.cursor) - - # Expect '=' -> if cursor.is_eof or cursor.current != "=": -> return None # "Expected '=' after attribute identifier", cursor, expected=["="] - -> cursor = cursor.advance() # Skip '=' - # After '=', handle multiline pattern start (same as messages) - # Per spec: Attribute ::= ... blank_inline? "=" blank_inline? Pattern - # Pattern can start on same line or next line with indentation -> cursor, initial_indent = skip_multiline_pattern_start(cursor) - - # Parse pattern -> pattern_result = parse_pattern(cursor, context, initial_common_indent=initial_indent) -> if pattern_result is None: -> return pattern_result - -> pattern_parse = pattern_result - -> attribute = Attribute(id=Identifier(id_parse.value), value=pattern_parse.value) - -> return ParseResult(attribute, pattern_parse.cursor) - - -> def parse_term( -> cursor: Cursor, -> context: ParseContext | None = None, -> ) -> ParseResult[Term] | None: -> """Parse term definition (-term-id = pattern). - -> FTL syntax: -> -brand = Firefox -> -brand-version = 3.0 -> .tooltip = Current version - -> Terms are private definitions prefixed with '-' and can have attributes. - -> Args: -> cursor: Current position in source (should be at '-') -> context: Parse context for depth tracking - -> Returns: -> Success(ParseResult(Term, new_cursor)) on success -> Failure(ParseError(...)) on parse error -> """ - # Capture start position for span -> start_pos = cursor.pos - - # Expect '-' prefix -> if cursor.is_eof or cursor.current != "-": -> return None # "Expected '-' at start of term", cursor, expected=["-"] - -> cursor = cursor.advance() # Skip '-' - - # Parse identifier -> id_result = parse_identifier(cursor) -> if id_result is None: -> return id_result - -> id_parse = id_result - # Per spec: Term ::= "-" Identifier blank_inline? "=" ... -> cursor = skip_blank_inline(id_parse.cursor) - - # Expect '=' -> if cursor.is_eof or cursor.current != "=": -> return None # "Expected '=' after term ID", cursor, expected=["="] - -> cursor = cursor.advance() # Skip '=' - - # After '=', handle multiline pattern start (same as messages) - # Use skip_multiline_pattern_start to properly track initial indent for common_indent -> cursor, initial_indent = skip_multiline_pattern_start(cursor) - - # Parse pattern with initial common indent for proper multiline handling -> pattern_result = parse_pattern(cursor, context, initial_common_indent=initial_indent) -> if pattern_result is None: -> return pattern_result - -> pattern_parse = pattern_result -> cursor = pattern_parse.cursor - - # Validate term has non-empty value (FTL spec requirement) -> if not pattern_parse.value.elements: -> return None # f'Expected term "-{id_parse.value}" to have a value' - - # Parse attributes using shared helper -> attributes_result = parse_message_attributes(cursor, context) -> if attributes_result is None: -> return None # Should not happen, but handle defensively -> attributes = attributes_result.value -> cursor = attributes_result.cursor - - # Create span from start to current position -> span = Span(start=start_pos, end=cursor.pos) - -> term = Term( -> id=Identifier(id_parse.value), -> value=pattern_parse.value, -> attributes=tuple(attributes), -> span=span, -> ) - -> return ParseResult(term, cursor) - - -> def parse_comment(cursor: Cursor) -> ParseResult[Comment] | None: -> """Parse comment line per Fluent spec. - -> Per spec, comments come in three types: -> - # (single-line comment) -> - ## (group comment) -> - ### (resource comment) - -> Adjacent comment lines of the same type are joined during AST construction. - -> EBNF: -> CommentLine ::= ("###" | "##" | "#") ("\u0020" comment_char*)? line_end - -> Args: -> cursor: Current parse position (must be at '#') - -> Returns: -> Success with Comment node or Failure with ParseError -> """ -> start_pos = cursor.pos - - # Determine comment type by counting '#' characters -> hash_count = 0 -> temp_cursor = cursor -> while not temp_cursor.is_eof and temp_cursor.current == "#": -> hash_count += 1 -> temp_cursor = temp_cursor.advance() - - # Validate comment type (1, 2, or 3 hashes) -> if hash_count > 3: -> return None # f"Invalid comment: expected 1-3 '#' characters, found {hash_count}" - - # Map hash count to comment type -> comment_type = { -> 1: CommentType.COMMENT, -> 2: CommentType.GROUP, -> 3: CommentType.RESOURCE, -> }.get(hash_count, CommentType.COMMENT) - - # Advance cursor past the '#' characters -> cursor = temp_cursor - - # Per spec: optional space after '#' -> if not cursor.is_eof and cursor.current == " ": -> cursor = cursor.advance() - - # Collect comment content (everything until line end) -> content_start = cursor.pos -> cursor = cursor.skip_to_line_end() - - # Extract comment text -> content = cursor.source[content_start : cursor.pos] - - # Advance past line ending (handles LF, CRLF, CR) -> cursor = cursor.skip_line_end() - - # Create Comment node with span -> comment_node = Comment( -> content=content, -> type=comment_type, -> span=Span(start=start_pos, end=cursor.pos), -> ) - -> return ParseResult(comment_node, cursor) diff --git a/src/ftllexengine/syntax/position.py b/src/ftllexengine/syntax/position.py index 5c8854ac..b5db0cfb 100644 --- a/src/ftllexengine/syntax/position.py +++ b/src/ftllexengine/syntax/position.py @@ -34,12 +34,12 @@ def line_offset(source: str, pos: int) -> int: 0-based line number Example: - >>> source = "line1\\nline2\\nline3" - >>> line_offset(source, 0) # Start of file + >>> source = "line1\\nline2\\nline3" # doctest: +SKIP + >>> line_offset(source, 0) # Start of file # doctest: +SKIP 0 - >>> line_offset(source, 6) # Start of line2 + >>> line_offset(source, 6) # Start of line2 # doctest: +SKIP 1 - >>> line_offset(source, 12) # Start of line3 + >>> line_offset(source, 12) # Start of line3 # doctest: +SKIP 2 Note: @@ -69,14 +69,14 @@ def column_offset(source: str, pos: int) -> int: 0-based column number (characters from line start) Example: - >>> source = "hello\\nworld" - >>> column_offset(source, 0) # 'h' in "hello" + >>> source = "hello\\nworld" # doctest: +SKIP + >>> column_offset(source, 0) # 'h' in "hello" # doctest: +SKIP 0 - >>> column_offset(source, 2) # 'l' in "hello" + >>> column_offset(source, 2) # 'l' in "hello" # doctest: +SKIP 2 - >>> column_offset(source, 6) # 'w' in "world" + >>> column_offset(source, 6) # 'w' in "world" # doctest: +SKIP 0 - >>> column_offset(source, 10) # 'd' in "world" + >>> column_offset(source, 10) # 'd' in "world" # doctest: +SKIP 4 Note: @@ -112,10 +112,10 @@ def format_position(source: str, pos: int, *, zero_based: bool = True) -> str: Position string like "line:col" (e.g., "2:5" or "3:6") Example: - >>> source = "hello\\nworld\\ntest" - >>> format_position(source, 6, zero_based=True) + >>> source = "hello\\nworld\\ntest" # doctest: +SKIP + >>> format_position(source, 6, zero_based=True) # doctest: +SKIP '1:0' - >>> format_position(source, 6, zero_based=False) + >>> format_position(source, 6, zero_based=False) # doctest: +SKIP '2:1' """ line = line_offset(source, pos) @@ -142,10 +142,10 @@ def get_line_content(source: str, line_number: int, *, zero_based: bool = True) Content of the line (without trailing newline) Example: - >>> source = "hello\\nworld\\ntest" - >>> get_line_content(source, 0, zero_based=True) + >>> source = "hello\\nworld\\ntest" # doctest: +SKIP + >>> get_line_content(source, 0, zero_based=True) # doctest: +SKIP 'hello' - >>> get_line_content(source, 2, zero_based=False) + >>> get_line_content(source, 2, zero_based=False) # doctest: +SKIP 'world' """ if not zero_based: @@ -180,8 +180,8 @@ def get_error_context(source: str, pos: int, context_lines: int = 2, marker: str Formatted error context string Example: - >>> source = "line1\\nline2\\nerror here\\nline4\\nline5" - >>> print(get_error_context(source, 12, context_lines=1)) + >>> source = "line1\\nline2\\nerror here\\nline4\\nline5" # doctest: +SKIP + >>> print(get_error_context(source, 12, context_lines=1)) # doctest: +SKIP line2 error here ^ diff --git a/src/ftllexengine/syntax/reference_extraction.py b/src/ftllexengine/syntax/reference_extraction.py new file mode 100644 index 00000000..0d1c27d2 --- /dev/null +++ b/src/ftllexengine/syntax/reference_extraction.py @@ -0,0 +1,84 @@ +"""AST-only reference extraction helpers. + +These helpers operate purely on syntax nodes and therefore live in the syntax +layer, where validation and higher-level introspection code can both depend on +them without introducing upward imports. +""" + +from __future__ import annotations + +from ftllexengine.constants import MAX_DEPTH + +from .ast import Message, MessageReference, Term, TermReference +from .visitor import ASTVisitor + +__all__ = [ + "ReferenceExtractor", + "extract_references", + "extract_references_by_attribute", +] + + +class ReferenceExtractor(ASTVisitor[MessageReference | TermReference]): + """Extract message and term references from AST for dependency analysis.""" + + __slots__ = ("message_refs", "term_refs") + + def __init__(self, *, max_depth: int = MAX_DEPTH) -> None: + super().__init__(max_depth=max_depth) + self.message_refs: set[str] = set() + self.term_refs: set[str] = set() + + def visit_MessageReference( # noqa: N802 - AST visitor dispatch contract + self, node: MessageReference + ) -> MessageReference: + """Collect message reference ID with optional attribute qualification.""" + if node.attribute is not None: + self.message_refs.add(f"{node.id.name}.{node.attribute.name}") + else: + self.message_refs.add(node.id.name) + return node + + def visit_TermReference( # noqa: N802 - AST visitor dispatch contract + self, node: TermReference + ) -> TermReference: + """Collect term reference ID and traverse nested call arguments.""" + if node.attribute is not None: + self.term_refs.add(f"{node.id.name}.{node.attribute.name}") + else: + self.term_refs.add(node.id.name) + with self._depth_guard: + self.generic_visit(node) + return node + + +def extract_references(entry: Message | Term) -> tuple[frozenset[str], frozenset[str]]: + """Extract message and term references from an AST entry.""" + extractor = ReferenceExtractor() + + if entry.value is not None: + extractor.visit(entry.value) + + for attr in entry.attributes: + extractor.visit(attr.value) + + return frozenset(extractor.message_refs), frozenset(extractor.term_refs) + + +def extract_references_by_attribute( + entry: Message | Term, +) -> dict[str | None, tuple[frozenset[str], frozenset[str]]]: + """Extract references per source attribute for attribute-granular analysis.""" + result: dict[str | None, tuple[frozenset[str], frozenset[str]]] = {} + + if entry.value is not None: + extractor = ReferenceExtractor() + extractor.visit(entry.value) + result[None] = (frozenset(extractor.message_refs), frozenset(extractor.term_refs)) + + for attr in entry.attributes: + extractor = ReferenceExtractor() + extractor.visit(attr.value) + result[attr.id.name] = (frozenset(extractor.message_refs), frozenset(extractor.term_refs)) + + return result diff --git a/src/ftllexengine/syntax/serializer.py b/src/ftllexengine/syntax/serializer.py index f4ace5ab..c31b66f1 100644 --- a/src/ftllexengine/syntax/serializer.py +++ b/src/ftllexengine/syntax/serializer.py @@ -15,13 +15,11 @@ from __future__ import annotations -from enum import Enum, auto from typing import assert_never from ftllexengine.constants import MAX_DEPTH -from ftllexengine.core.depth_guard import DepthGuard -from ftllexengine.core.identifier_validation import is_valid_identifier -from ftllexengine.diagnostics import ErrorCategory, FrozenFluentError +from ftllexengine.core.depth_guard import DepthGuard, DepthLimitExceededError +from ftllexengine.diagnostics import FrozenFluentError from ftllexengine.enums import CommentType from .ast import ( @@ -46,6 +44,23 @@ TextElement, VariableReference, ) +from .serializer_lines import ( + _ATTR_INDENT, + _CHAR_PLACEABLE, + _CONT_INDENT, + _VARIANT_INDENT, + _classify_line, + _escape_text, + _LineKind, +) +from .serializer_validation import ( + SerializationDepthError, + SerializationValidationError, + _validate_pattern, +) +from .serializer_validation import ( + validate_resource as _validate_resource_impl, +) from .visitor import ASTVisitor __all__ = [ @@ -55,354 +70,13 @@ ] -class SerializationValidationError(ValueError): - """Raised when AST validation fails during serialization. - - This error indicates the AST structure would produce invalid FTL syntax. - Common causes: - - Duplicate named argument names in function or term calls - - Named argument values that are not StringLiteral or NumberLiteral - - Invalid identifiers in messages, terms, or attributes - """ - - -class SerializationDepthError(ValueError): - """Raised when AST nesting exceeds maximum serialization depth. - - This error indicates the AST is too deeply nested for safe serialization. - Prevents stack overflow from: - - Adversarially constructed ASTs with excessive Placeable nesting - - Malformed programmatic AST construction - - The default limit is 100, matching the parser's maximum nesting depth. - """ - - -def _validate_identifier(identifier: Identifier, context: str) -> None: - """Validate identifier follows FTL grammar rules. - - Uses unified validation module to ensure consistency between parser - and serializer. Validates both syntax and length constraints. - - Args: - identifier: Identifier to validate - context: Context string for error messages - - Raises: - SerializationValidationError: If identifier name is invalid - """ - if not is_valid_identifier(identifier.name): - msg = ( - f"Invalid identifier '{identifier.name}' in {context}. " - f"Identifiers must match [a-zA-Z][a-zA-Z0-9_-]* and be ≤256 characters" - ) - raise SerializationValidationError(msg) - - -def _validate_pattern(pattern: Pattern, context: str, depth_guard: DepthGuard) -> None: - """Validate all expressions within a Pattern. - - Args: - pattern: Pattern AST to validate - context: Context string for error messages - depth_guard: Depth guard for recursion protection - """ - for element in pattern.elements: - if isinstance(element, Placeable): - with depth_guard: - _validate_expression(element.expression, context, depth_guard) - - -def _assert_named_arg_value_is_literal( - value: object, arg_name: str, context: str -) -> None: - """Defense-in-depth check: named argument value must be StringLiteral or NumberLiteral. - - NamedArgument.value is typed FTLLiteral (StringLiteral | NumberLiteral), which - enforces the spec constraint at the type level. However, Python type annotations - are not enforced at runtime: a frozen dataclass field can be bypassed via - object.__setattr__, deserialization, or direct AST construction. - - This function accepts the value as ``object`` (not ``FTLLiteral``) so that the - isinstance check is not redundant from mypy's perspective — mypy cannot see - through the ``object`` parameter to know the value will always be FTLLiteral. - The check is a permanent last-line defense before invalid FTL is emitted. - - Args: - value: The named argument value to check. - arg_name: The argument name (for error messages). - context: The call context (for error messages). - - Raises: - SerializationValidationError: If value is not a literal. - """ - if not isinstance(value, (StringLiteral, NumberLiteral)): - value_type = type(value).__name__ - msg = ( - f"Named argument '{arg_name}' in {context} has invalid value type " - f"'{value_type}'. Named argument values must be StringLiteral or " - f"NumberLiteral per FTL specification " - f'(NamedArgument ::= Identifier ":" (StringLiteral | NumberLiteral)).' - ) - raise SerializationValidationError(msg) - - -def _validate_call_arguments( - args: CallArguments, context: str, depth_guard: DepthGuard -) -> None: - """Validate CallArguments per FTL specification. - - Per FTL EBNF: - NamedArgument ::= Identifier blank? ":" blank? (StringLiteral | NumberLiteral) - - Enforces: - 1. Positional argument expressions are valid - 2. Named argument names must be unique (no duplicates) - 3. Named argument identifiers are valid - 4. Named argument values are StringLiteral or NumberLiteral (defense-in-depth: - type annotation is FTLLiteral, but runtime bypass is possible via - object.__setattr__ on frozen dataclasses) - - The parser enforces these constraints during parsing, but programmatically - constructed ASTs may violate the uniqueness, identifier, and literal-value constraints. - This validation catches such errors before serialization produces invalid FTL. - - Args: - args: CallArguments to validate - context: Context string for error messages - depth_guard: Depth guard for recursion protection - - Raises: - SerializationValidationError: If constraints are violated - """ - # Validate positional arguments - for pos_arg in args.positional: - with depth_guard: - _validate_expression(pos_arg, context, depth_guard) - - # Validate named arguments with duplicate detection - seen_names: set[str] = set() - for named_arg in args.named: - arg_name = named_arg.name.name - - # Check for duplicate named argument names - if arg_name in seen_names: - msg = ( - f"Duplicate named argument '{arg_name}' in {context}. " - "Named argument names must be unique per FTL specification." - ) - raise SerializationValidationError(msg) - seen_names.add(arg_name) - - # Validate the identifier - _validate_identifier(named_arg.name, f"{context}, named argument") - - # Defense-in-depth: verify value is a literal (FTLLiteral type annotation is - # bypassable at runtime; this check is the serializer's last line of defense). - _assert_named_arg_value_is_literal(named_arg.value, arg_name, context) - - -def _validate_expression( # noqa: PLR0912 - validation dispatch over closed Expression union type - expr: Expression, context: str, depth_guard: DepthGuard -) -> None: - """Validate an Expression recursively. - - Args: - expr: Expression AST to validate - context: Context string for error messages - depth_guard: Depth guard for recursion protection - """ - match expr: - case SelectExpression(): - # Defense-in-depth: __post_init__ validates at construction time, but - # programmatically built ASTs (e.g., in tests or external tooling) can - # bypass __post_init__ via object.__new__. Verify the invariant here as - # a last guard before emitting invalid FTL. - n_defaults = sum(1 for v in expr.variants if v.default) - if n_defaults == 0: - msg = ( - f"SelectExpression in {context} has no default variant. " - "Exactly one variant must be marked as default." - ) - raise SerializationValidationError(msg) - if n_defaults > 1: - msg = ( - f"SelectExpression in {context} has {n_defaults} default variants. " - "Exactly one variant must be marked as default." - ) - raise SerializationValidationError(msg) - # Validate selector expression and variant keys - with depth_guard: - _validate_expression(expr.selector, context, depth_guard) - # Validate variant keys (if Identifier) and patterns - for variant in expr.variants: - if isinstance(variant.key, Identifier): - _validate_identifier(variant.key, f"{context}, variant key") - with depth_guard: - _validate_pattern(variant.value, context, depth_guard) - case Placeable(): - with depth_guard: - _validate_expression(expr.expression, context, depth_guard) - case VariableReference(): - _validate_identifier(expr.id, f"{context}, variable reference") - case MessageReference(): - _validate_identifier(expr.id, f"{context}, message reference") - if expr.attribute: - _validate_identifier(expr.attribute, f"{context}, message attribute") - case TermReference(): - _validate_identifier(expr.id, f"{context}, term reference") - if expr.attribute: - _validate_identifier(expr.attribute, f"{context}, term attribute") - if expr.arguments: - _validate_call_arguments(expr.arguments, context, depth_guard) - case FunctionReference(): - _validate_identifier(expr.id, f"{context}, function reference") - _validate_call_arguments(expr.arguments, context, depth_guard) - case _: - pass # Other expressions (NumberLiteral, StringLiteral) don't need validation - - def _validate_resource(resource: Resource, max_depth: int = MAX_DEPTH) -> None: - """Validate a Resource AST for serialization. - - Checks identifiers, call arguments, and nested expression structure. - Enforces depth limits to prevent stack overflow. - - Args: - resource: Resource AST to validate - max_depth: Maximum AST nesting depth (default: MAX_DEPTH) - - Raises: - SerializationValidationError: If validation fails - SerializationDepthError: If AST nesting exceeds max_depth - """ - depth_guard = DepthGuard(max_depth=max_depth) - - try: - for entry in resource.entries: - match entry: - case Message(): - _validate_identifier(entry.id, "message ID") - context = f"message '{entry.id.name}'" - if entry.value: - _validate_pattern(entry.value, context, depth_guard) - for attr in entry.attributes: - _validate_identifier(attr.id, f"{context}, attribute ID") - _validate_pattern(attr.value, f"{context}.{attr.id.name}", depth_guard) - case Term(): - _validate_identifier(entry.id, "term ID") - context = f"term '-{entry.id.name}'" - _validate_pattern(entry.value, context, depth_guard) - for attr in entry.attributes: - _validate_identifier(attr.id, f"{context}, attribute ID") - _validate_pattern(attr.value, f"{context}.{attr.id.name}", depth_guard) - case _: - pass # Comments and Junk don't need validation - except FrozenFluentError as e: - if e.category == ErrorCategory.RESOLUTION: - # Depth limit exceeded - wrap in SerializationDepthError - msg = f"Validation depth limit exceeded (max: {max_depth}): {e}" - raise SerializationDepthError(msg) from e - raise - -# FTL indentation constants per Fluent spec. -# Standard continuation indent: 4 spaces. -_CONT_INDENT: str = " " - -# Attributes use 4 spaces for standard indentation. -_ATTR_INDENT: str = "\n " - -# Select expression variants use 3 spaces to align with the `*[` marker. -# This produces: "\n *[key] value" where the `[` aligns with attribute `.`. -_VARIANT_INDENT: str = "\n " - -# Characters that are syntactically significant at the start of a continuation -# line in FTL: '[' (variant key), '*' (default variant), '.' (attribute). -# The FTL parser strips leading whitespace and checks the first non-whitespace -# character against these markers. Content containing these characters at -# structurally ambiguous positions must be wrapped in StringLiteral placeables. -_LINE_START_SYNTAX_CHARS: frozenset[str] = frozenset(".[*") - -# Precomputed StringLiteral placeable forms for special characters. -# Used by both continuation line dispatch and brace escaping. -_CHAR_PLACEABLE: dict[str, str] = { - "{": '{ "{" }', - "}": '{ "}" }', - "[": '{ "[" }', - "*": '{ "*" }', - ".": '{ "." }', -} - - -class _LineKind(Enum): - """Classification of a continuation line's content for serialization. - - The FTL parser interprets continuation lines structurally: leading - whitespace is syntactic indent, blank lines are stripped, and - characters '.', '*', '[' as the first non-whitespace trigger - attribute/variant parsing. Each kind maps to one unambiguous - emission strategy. - """ - - EMPTY = auto() - WHITESPACE_ONLY = auto() - SYNTAX_LEADING = auto() - NORMAL = auto() - - -def _classify_line(line: str) -> tuple[_LineKind, int]: - """Classify a continuation line for serialization dispatch. - - Returns the line kind and, for SYNTAX_LEADING, the number of - leading whitespace characters before the syntax character. - For all other kinds the second element is 0. - - Pure function with no side effects. - - Args: - line: Text content of a single continuation line (no newlines). - - Returns: - (kind, ws_prefix_len) tuple. - """ - if not line: - return (_LineKind.EMPTY, 0) - - # Scan to first non-space character. - ws_len = 0 - length = len(line) - while ws_len < length and line[ws_len] == " ": - ws_len += 1 - - if ws_len == length: - return (_LineKind.WHITESPACE_ONLY, 0) - - if line[ws_len] in _LINE_START_SYNTAX_CHARS: - return (_LineKind.SYNTAX_LEADING, ws_len) - - return (_LineKind.NORMAL, 0) - - -def _escape_text(text: str, output: list[str]) -> None: - """Escape brace characters in text content. - - Wraps { and } as StringLiteral placeables per Fluent spec. - Character-level escaping only; line-level concerns (whitespace - ambiguity, syntax chars) are handled by _emit_classified_line. - """ - pos = 0 - length = len(text) - while pos < length: - ch = text[pos] - if ch in ("{", "}"): - output.append(_CHAR_PLACEABLE[ch]) - pos += 1 - continue - run_start = pos - pos += 1 - while pos < length and text[pos] not in ("{", "}"): - pos += 1 - output.append(text[run_start:pos]) + """Validate a resource using the serializer module's patchable helpers.""" + _validate_resource_impl( + resource, + max_depth=max_depth, + validate_pattern=_validate_pattern, + ) class FluentSerializer(ASTVisitor): @@ -412,18 +86,18 @@ class FluentSerializer(ASTVisitor): All serialization state is local to the serialize() call. Usage: - >>> from ftllexengine.syntax import parse, serialize - >>> ast = parse("hello = Hello, world!") - >>> ftl = serialize(ast) - >>> print(ftl) + >>> from ftllexengine.syntax import parse, serialize # doctest: +SKIP + >>> ast = parse("hello = Hello, world!") # doctest: +SKIP + >>> ftl = serialize(ast) # doctest: +SKIP + >>> print(ftl) # doctest: +SKIP hello = Hello, world! Advanced usage (direct class instantiation): - >>> from ftllexengine.syntax import parse - >>> from ftllexengine.syntax.serializer import FluentSerializer - >>> ast = parse("hello = Hello, world!") - >>> serializer = FluentSerializer() - >>> ftl = serializer.serialize(ast) + >>> from ftllexengine.syntax import parse # doctest: +SKIP + >>> from ftllexengine.syntax.serializer import FluentSerializer # doctest: +SKIP + >>> ast = parse("hello = Hello, world!") # doctest: +SKIP + >>> serializer = FluentSerializer() # doctest: +SKIP + >>> ftl = serializer.serialize(ast) # doctest: +SKIP """ def serialize( @@ -461,11 +135,10 @@ def serialize( try: self._serialize_resource(resource, output, depth_guard) - except FrozenFluentError as e: - if e.category == ErrorCategory.RESOLUTION: - # Depth limit exceeded - wrap in SerializationDepthError - msg = f"AST nesting exceeds maximum depth ({max_depth})" - raise SerializationDepthError(msg) from e + except DepthLimitExceededError as exc: + msg = f"AST nesting exceeds maximum depth ({max_depth})" + raise SerializationDepthError(msg) from exc + except FrozenFluentError: raise return "".join(output) @@ -961,10 +634,10 @@ def serialize( SerializationDepthError: If AST nesting exceeds max_depth Example: - >>> from ftllexengine.syntax import parse, serialize - >>> ast = parse("hello = Hello, world!") - >>> ftl = serialize(ast) - >>> assert ftl == "hello = Hello, world!\\n" + >>> from ftllexengine.syntax import parse, serialize # doctest: +SKIP + >>> ast = parse("hello = Hello, world!") # doctest: +SKIP + >>> ftl = serialize(ast) # doctest: +SKIP + >>> assert ftl == "hello = Hello, world!\\n" # doctest: +SKIP """ serializer = FluentSerializer() return serializer.serialize(resource, validate=validate, max_depth=max_depth) diff --git a/src/ftllexengine/syntax/serializer.py,cover b/src/ftllexengine/syntax/serializer.py,cover deleted file mode 100644 index 97035dd2..00000000 --- a/src/ftllexengine/syntax/serializer.py,cover +++ /dev/null @@ -1,784 +0,0 @@ -> """Serialize Fluent AST back to FTL syntax. - -> Converts AST nodes to FTL source code. Useful for: -> - Formatters -> - Code generators -> - Property-based testing (roundtrip: parse → serialize → parse) - -> Security: -> - DepthGuard protects against stack overflow from deeply nested ASTs. -> - Maximum nesting depth defaults to 100 (matching parser limit). -> - Raises SerializationDepthError on overflow (not RecursionError). - -> Python 3.13+. -> """ - -> from __future__ import annotations - -> import re - -> from ftllexengine.constants import MAX_DEPTH -> from ftllexengine.core.depth_guard import DepthGuard -> from ftllexengine.core.identifier_validation import is_valid_identifier -> from ftllexengine.diagnostics import ErrorCategory, FrozenFluentError -> from ftllexengine.enums import CommentType - -> from .ast import ( -> Attribute, -> CallArguments, -> Comment, -> Expression, -> FunctionReference, -> Identifier, -> Junk, -> Message, -> MessageReference, -> NamedArgument, -> NumberLiteral, -> Pattern, -> Placeable, -> Resource, -> SelectExpression, -> StringLiteral, -> Term, -> TermReference, -> TextElement, -> VariableReference, -> ) -> from .validation_helpers import count_default_variants -> from .visitor import ASTVisitor - -> __all__ = [ -> "SerializationDepthError", -> "SerializationValidationError", -> "serialize", -> ] - - -> class SerializationValidationError(ValueError): -> """Raised when AST validation fails during serialization. - -> This error indicates the AST structure would produce invalid FTL syntax. -> Common causes: -> - SelectExpression without exactly one default variant -> - Malformed AST nodes from programmatic construction -> """ - - -> class SerializationDepthError(ValueError): -> """Raised when AST nesting exceeds maximum serialization depth. - -> This error indicates the AST is too deeply nested for safe serialization. -> Prevents stack overflow from: -> - Adversarially constructed ASTs with excessive Placeable nesting -> - Malformed programmatic AST construction - -> The default limit is 100, matching the parser's maximum nesting depth. -> """ - - -> def _validate_identifier(identifier: Identifier, context: str) -> None: -> """Validate identifier follows FTL grammar rules. - -> Uses unified validation module to ensure consistency between parser -> and serializer. Validates both syntax and length constraints. - -> Args: -> identifier: Identifier to validate -> context: Context string for error messages - -> Raises: -> SerializationValidationError: If identifier name is invalid -> """ -> if not is_valid_identifier(identifier.name): -> msg = ( -> f"Invalid identifier '{identifier.name}' in {context}. " -> f"Identifiers must match [a-zA-Z][a-zA-Z0-9_-]* and be ≤256 characters" -> ) -> raise SerializationValidationError(msg) - - -> def _validate_select_expression(expr: SelectExpression, context: str) -> None: -> """Validate SelectExpression has exactly one default variant. - -> Per FTL spec, every SelectExpression must have exactly one variant -> marked as default with the * prefix. - -> Args: -> expr: SelectExpression to validate -> context: Description of location for error message - -> Raises: -> SerializationValidationError: If validation fails -> """ -> default_count = count_default_variants(expr) - -> if default_count == 0: -> msg = f"SelectExpression in {context} has no default variant (requires exactly one *[key])" -> raise SerializationValidationError(msg) - -> if default_count > 1: -> msg = ( -> f"SelectExpression in {context} has {default_count} default variants " -> "(requires exactly one)" -> ) -> raise SerializationValidationError(msg) - - -> def _validate_pattern(pattern: Pattern, context: str, depth_guard: DepthGuard) -> None: -> """Validate all expressions within a Pattern. - -> Args: -> pattern: Pattern AST to validate -> context: Context string for error messages -> depth_guard: Depth guard for recursion protection -> """ -> for element in pattern.elements: -> if isinstance(element, Placeable): -> with depth_guard: -> _validate_expression(element.expression, context, depth_guard) - - -> def _validate_call_arguments( -> args: CallArguments, context: str, depth_guard: DepthGuard -> ) -> None: -> """Validate CallArguments per FTL specification. - -> Per FTL EBNF: -> NamedArgument ::= Identifier blank? ":" blank? (StringLiteral | NumberLiteral) - -> Enforces: -> 1. Named argument names must be unique (no duplicates) -> 2. Named argument values must be StringLiteral or NumberLiteral - -> The parser enforces these constraints during parsing, but programmatically -> constructed ASTs may violate them. This validation catches such errors -> before serialization produces invalid FTL. - -> Args: -> args: CallArguments to validate -> context: Context string for error messages -> depth_guard: Depth guard for recursion protection - -> Raises: -> SerializationValidationError: If constraints are violated -> """ - # Validate positional arguments -> for pos_arg in args.positional: -> with depth_guard: -> _validate_expression(pos_arg, context, depth_guard) - - # Validate named arguments with duplicate detection and type enforcement -> seen_names: set[str] = set() -> for named_arg in args.named: -> arg_name = named_arg.name.name - - # Check for duplicate named argument names -> if arg_name in seen_names: -> msg = ( -> f"Duplicate named argument '{arg_name}' in {context}. " -> "Named argument names must be unique per FTL specification." -> ) -> raise SerializationValidationError(msg) -> seen_names.add(arg_name) - - # Validate the identifier -> _validate_identifier(named_arg.name, f"{context}, named argument") - - # Per FTL spec, named argument values must be StringLiteral or NumberLiteral -> if not isinstance(named_arg.value, (StringLiteral, NumberLiteral)): -> value_type = type(named_arg.value).__name__ -> msg = ( -> f"Named argument '{arg_name}' in {context} has invalid value type " -> f"'{value_type}'. Per FTL specification, named argument values must be " -> "StringLiteral or NumberLiteral, not arbitrary expressions." -> ) -> raise SerializationValidationError(msg) - - # No need to recursively validate StringLiteral/NumberLiteral (they have no sub-expressions) - - -> def _validate_expression(expr: Expression, context: str, depth_guard: DepthGuard) -> None: # noqa: PLR0912 -> """Validate an Expression recursively. - -> Args: -> expr: Expression AST to validate -> context: Context string for error messages -> depth_guard: Depth guard for recursion protection -> """ -> match expr: -> case SelectExpression(): -> _validate_select_expression(expr, context) - # Validate selector expression and variant keys -> with depth_guard: -> _validate_expression(expr.selector, context, depth_guard) - # Validate variant keys (if Identifier) and patterns -> for variant in expr.variants: -> if isinstance(variant.key, Identifier): -> _validate_identifier(variant.key, f"{context}, variant key") -> with depth_guard: -> _validate_pattern(variant.value, context, depth_guard) -> case Placeable(): -> with depth_guard: -> _validate_expression(expr.expression, context, depth_guard) -> case VariableReference(): -> _validate_identifier(expr.id, f"{context}, variable reference") -> case MessageReference(): -> _validate_identifier(expr.id, f"{context}, message reference") -> if expr.attribute: -> _validate_identifier(expr.attribute, f"{context}, message attribute") -> case TermReference(): -> _validate_identifier(expr.id, f"{context}, term reference") -> if expr.attribute: -> _validate_identifier(expr.attribute, f"{context}, term attribute") -> if expr.arguments: -> _validate_call_arguments(expr.arguments, context, depth_guard) -> case FunctionReference(): -> _validate_identifier(expr.id, f"{context}, function reference") -> if expr.arguments: -> _validate_call_arguments(expr.arguments, context, depth_guard) -> case _: -> pass # Other expressions (NumberLiteral, StringLiteral) don't need validation - - -> def _validate_resource(resource: Resource, max_depth: int = MAX_DEPTH) -> None: -> """Validate a Resource AST for serialization. - -> Checks all SelectExpressions have exactly one default variant. -> Enforces depth limits to prevent stack overflow. - -> Args: -> resource: Resource AST to validate -> max_depth: Maximum AST nesting depth (default: MAX_DEPTH) - -> Raises: -> SerializationValidationError: If validation fails -> SerializationDepthError: If AST nesting exceeds max_depth -> """ -> depth_guard = DepthGuard(max_depth=max_depth) - -> try: -> for entry in resource.entries: -> match entry: -> case Message(): -> _validate_identifier(entry.id, "message ID") -> context = f"message '{entry.id.name}'" -> if entry.value: -> _validate_pattern(entry.value, context, depth_guard) -> for attr in entry.attributes: -> _validate_identifier(attr.id, f"{context}, attribute ID") -> _validate_pattern(attr.value, f"{context}.{attr.id.name}", depth_guard) -> case Term(): -> _validate_identifier(entry.id, "term ID") -> context = f"term '-{entry.id.name}'" -> _validate_pattern(entry.value, context, depth_guard) -> for attr in entry.attributes: -> _validate_identifier(attr.id, f"{context}, attribute ID") -> _validate_pattern(attr.value, f"{context}.{attr.id.name}", depth_guard) -> case _: -> pass # Comments and Junk don't need validation -> except FrozenFluentError as e: -! if e.category == ErrorCategory.RESOLUTION: - # Depth limit exceeded - wrap in SerializationDepthError -! msg = f"Validation depth limit exceeded (max: {max_depth}): {e}" -! raise SerializationDepthError(msg) from e -! raise - - # FTL indentation constants per Fluent spec. - # Standard continuation indent: 4 spaces. -> _CONT_INDENT: str = " " - - # Attributes use 4 spaces for standard indentation. -> _ATTR_INDENT: str = "\n " - - # Select expression variants use 3 spaces to align with the `*[` marker. - # This produces: "\n *[key] value" where the `[` aligns with attribute `.`. -> _VARIANT_INDENT: str = "\n " - - -> class FluentSerializer(ASTVisitor): -> """Converts AST back to FTL source string. - -> Thread-safe serializer with no mutable instance state. -> All serialization state is local to the serialize() call. - -> Usage: -> >>> from ftllexengine.syntax import parse, serialize -> >>> ast = parse("hello = Hello, world!") -> >>> ftl = serialize(ast) -> >>> print(ftl) -> hello = Hello, world! - -> Advanced usage (direct class instantiation): -> >>> from ftllexengine.syntax import parse -> >>> from ftllexengine.syntax.serializer import FluentSerializer -> >>> ast = parse("hello = Hello, world!") -> >>> serializer = FluentSerializer() -> >>> ftl = serializer.serialize(ast) -> """ - -> def serialize( -> self, -> resource: Resource, -> *, -> validate: bool = True, -> max_depth: int = MAX_DEPTH, -> ) -> str: -> """Serialize Resource to FTL string. - -> Pure function - builds output locally without mutating instance state. -> Thread-safe and reusable. - -> Args: -> resource: Resource AST node -> validate: If True, validate AST before serialization (default: True). -> Checks that SelectExpressions have exactly one default variant. -> Set to False only for trusted ASTs from the parser. -> max_depth: Maximum nesting depth (default: 100). Prevents stack -> overflow from adversarial or malformed ASTs. - -> Returns: -> FTL source code - -> Raises: -> SerializationValidationError: If validate=True and AST is invalid -> SerializationDepthError: If AST nesting exceeds max_depth -> """ -> if validate: -> _validate_resource(resource, max_depth=max_depth) - -> output: list[str] = [] -> depth_guard = DepthGuard(max_depth=max_depth) - -> try: -> self._serialize_resource(resource, output, depth_guard) -> except FrozenFluentError as e: -> if e.category == ErrorCategory.RESOLUTION: - # Depth limit exceeded - wrap in SerializationDepthError -> msg = f"AST nesting exceeds maximum depth ({max_depth})" -> raise SerializationDepthError(msg) from e -! raise - -> return "".join(output) - -> def _serialize_resource( -> self, node: Resource, output: list[str], depth_guard: DepthGuard -> ) -> None: -> """Serialize Resource to output list. - -> Handles blank line insertion between entries per Fluent spec: -> - Consecutive standalone comments of the same type require a blank -> line between them to prevent merging during re-parse. -> - Messages and terms get standard single newline separation. -> """ -> prev_entry: Message | Term | Comment | Junk | None = None - -> for entry in node.entries: -> if prev_entry is not None: - # Skip separator if Junk already contains leading whitespace. - # Parser includes preceding whitespace in Junk.content for containment, - # so adding another separator would duplicate newlines on roundtrip. -> if isinstance(entry, Junk) and entry.content and entry.content[0] in "\n ": -> pass # Junk content already has leading whitespace -> else: - # Determine if we need extra blank line to preserve roundtrip. - # Per Fluent spec: - # 1. Adjacent comments of the same type without a blank line - # between them are merged. Insert extra newline to preserve. - # 2. A comment followed by 0-1 blank lines then a message/term - # becomes an attached comment. If the Comment is a standalone - # entry (in entries[], not as entry.comment), we need 2 blank - # lines to prevent attachment during re-parse. -> needs_extra_blank = ( -> isinstance(prev_entry, Comment) -> and isinstance(entry, Comment) -> and prev_entry.type == entry.type -> ) or ( -> isinstance(prev_entry, Comment) -> and isinstance(entry, (Message, Term)) - # Standalone Comment followed by Message/Term needs extra blank - # to prevent the comment from becoming attached on re-parse -> ) -> if needs_extra_blank: -> output.append("\n\n") -> elif isinstance(prev_entry, (Message, Term)) and isinstance( -> entry, (Message, Term) -> ): - # Message/Term already end with \n; no extra separator for compact output -> pass -> else: -> output.append("\n") - -> self._serialize_entry(entry, output, depth_guard) -> prev_entry = entry - -> def _serialize_entry( -> self, -> entry: Message | Term | Comment | Junk, -> output: list[str], -> depth_guard: DepthGuard, -> ) -> None: -> """Serialize a top-level entry.""" -> match entry: -> case Message(): -> self._serialize_message(entry, output, depth_guard) -> case Term(): -> self._serialize_term(entry, output, depth_guard) -> case Comment(): -> self._serialize_comment(entry, output) -> case Junk(): -> self._serialize_junk(entry, output) - -> def _serialize_message( -> self, node: Message, output: list[str], depth_guard: DepthGuard -> ) -> None: -> """Serialize Message.""" - # Comment if present (attached comment, no blank line before message) - # Per Fluent spec, attached comments (#) should immediately precede their entry -> if node.comment: -> self._serialize_comment(node.comment, output) - - # Message ID -> output.append(node.id.name) - - # Value -> if node.value: -> output.append(" = ") -> self._serialize_pattern(node.value, output, depth_guard) - - # Attributes -> for attr in node.attributes: -> output.append(_ATTR_INDENT) -> self._serialize_attribute(attr, output, depth_guard) - -> output.append("\n") - -> def _serialize_term( -> self, node: Term, output: list[str], depth_guard: DepthGuard -> ) -> None: -> """Serialize Term.""" - # Comment if present (attached comment, no blank line before term) - # Per Fluent spec, attached comments (#) should immediately precede their entry -> if node.comment: -> self._serialize_comment(node.comment, output) - - # Term ID (with leading -) -> output.append(f"-{node.id.name} = ") - - # Value -> self._serialize_pattern(node.value, output, depth_guard) - - # Attributes -> for attr in node.attributes: -> output.append(_ATTR_INDENT) -> self._serialize_attribute(attr, output, depth_guard) - -> output.append("\n") - -> def _serialize_attribute( -> self, node: Attribute, output: list[str], depth_guard: DepthGuard -> ) -> None: -> """Serialize Attribute.""" -> output.append(f".{node.id.name} = ") -> self._serialize_pattern(node.value, output, depth_guard) - -> def _serialize_comment(self, node: Comment, output: list[str]) -> None: -> """Serialize Comment. - -> Note: Content should NOT have trailing newlines. The parser produces -> content without trailing newlines (e.g., "Line1\\nLine2", not "Line1\\nLine2\\n"). -> If manually constructed AST nodes include trailing newlines, they will -> produce extra empty comment lines, which is arguably the correct behavior -> for the content provided. -> """ -> if node.type is CommentType.COMMENT: -> prefix = "#" -> elif node.type is CommentType.GROUP: -> prefix = "##" -> else: # CommentType.RESOURCE -> prefix = "###" - -> lines = node.content.split("\n") -> for line in lines: -> if line: -> output.append(f"{prefix} {line}\n") -> else: -> output.append(f"{prefix}\n") - -> def _serialize_junk(self, node: Junk, output: list[str]) -> None: -> """Serialize Junk (keep as-is). - -> Only appends newline if content doesn't already end with one, -> preventing redundant blank lines in parse/serialize cycles. -> """ -> output.append(node.content) -> if not node.content.endswith("\n"): -> output.append("\n") - -> def _pattern_needs_separate_line(self, pattern: Pattern) -> bool: -> """Check if pattern needs separate-line serialization for roundtrip correctness. - -> Returns True if any TextElement starting with whitespace is preceded by -> an element ending with newline. This structure would lose the leading -> whitespace during roundtrip if serialized on the same line, because: - -> 1. Parser sets common_indent from first continuation line's FULL indentation -> 2. Serializer adds 4-space continuation indent after newlines -> 3. Content's leading whitespace becomes part of combined indentation -> 4. On re-parse, common_indent strips ALL indentation including content whitespace - -> By outputting on a separate line, we establish initial_common_indent before -> any content with embedded leading whitespace, so extra whitespace is preserved -> as extra_spaces on subsequent continuation lines. -> """ -> prev_ends_newline = False -> for elem in pattern.elements: -> if isinstance(elem, TextElement): - # Check if this element starts with whitespace and follows a newline -> if prev_ends_newline and elem.value and elem.value[0] == " ": -> return True -> prev_ends_newline = elem.value.endswith("\n") -> else: - # Placeable doesn't end with newline -> prev_ends_newline = False -> return False - -> def _serialize_pattern( -> self, pattern: Pattern, output: list[str], depth_guard: DepthGuard -> ) -> None: -> """Serialize Pattern elements. - -> Per Fluent Spec 1.0: Backslash has no escaping power in TextElements. -> Literal braces MUST be expressed as StringLiterals within Placeables: -> - { must be serialized as {"{"} (Placeable containing StringLiteral) -> - } must be serialized as {"}"} (Placeable containing StringLiteral) - -> Multi-line patterns: Newlines in text elements are followed by -> 4-space indentation to create valid continuation lines for roundtrip. - -> Roundtrip Whitespace Preservation: -> If the pattern has TextElements where leading whitespace follows a newline -> in a preceding element, the pattern is output on a separate line. This -> ensures the parser establishes initial_common_indent from a line without -> semantic whitespace, preserving extra whitespace on continuation lines. - -> This ensures output is valid FTL that compliant parsers accept. -> """ - # Check if pattern needs separate-line serialization for roundtrip correctness. - # This handles patterns where leading whitespace follows a newline in separate - # TextElements (e.g., "Line 1\n" followed by " Line 2"). -> if self._pattern_needs_separate_line(pattern): -> output.append("\n" + _CONT_INDENT) - -> for element in pattern.elements: -> if isinstance(element, TextElement): - # Per Fluent spec: no escape sequences in TextElements - # Literal braces must become Placeable(StringLiteral("{"/"}") -> text = element.value - - # Handle newlines: add indentation after each newline for continuation - # Only add indentation if not already present (prevents double-indentation - # in roundtrip scenarios where the parsed AST already contains indented text) -> if "\n" in text: - # Use regex to replace "\n" not followed by 4+ spaces -> text = re.sub(r"\n(?! )", "\n ", text) - -> if "{" in text or "}" in text: - # Split and emit braces as StringLiteral Placeables -> self._serialize_text_with_braces(text, output) -> else: - # No special characters - emit directly -> output.append(text) -> elif isinstance(element, Placeable): -> output.append("{ ") -> with depth_guard: -> self._serialize_expression(element.expression, output, depth_guard) -> output.append(" }") - -> def _serialize_text_with_braces(self, text: str, output: list[str]) -> None: -> """Serialize text containing literal braces per Fluent spec. - -> Converts literal { and } characters to Placeable(StringLiteral) form. -> Example: "a{b}c" becomes: a{"{"}b{"}"}c -> """ - # C-level str.find() outperforms Python-level character iteration. - # Scans for next brace, emits text run, then emits brace placeholder. -> pos = 0 -> length = len(text) - -> while pos < length: - # Find next brace (whichever comes first) -> open_pos = text.find("{", pos) -> close_pos = text.find("}", pos) - - # Determine which brace is next (or neither) -> if open_pos == -1 and close_pos == -1: - # No more braces - emit remaining text -> output.append(text[pos:]) -> break -> if open_pos == -1: -> next_brace_pos = close_pos -> brace_placeholder = '{ "}" }' -> elif close_pos == -1 or open_pos < close_pos: -> next_brace_pos = open_pos -> brace_placeholder = '{ "{" }' -> else: -> next_brace_pos = close_pos -> brace_placeholder = '{ "}" }' - - # Emit text before brace (if any) -> if next_brace_pos > pos: -> output.append(text[pos:next_brace_pos]) - - # Emit brace as StringLiteral Placeable -> output.append(brace_placeholder) -> pos = next_brace_pos + 1 - -> def _serialize_expression( # noqa: PLR0912 # Branches required by Expression union type -> self, expr: Expression, output: list[str], depth_guard: DepthGuard -> ) -> None: -> """Serialize Expression nodes using structural pattern matching. - -> Handles all Expression types including nested Placeables (valid per FTL spec). -> """ -> match expr: -> case StringLiteral(): - # Escape special characters per FTL spec - # Uses \uHHHH for ALL control characters (< 0x20 and 0x7F) - # to produce robust output that works in all editors and parsers -> result: list[str] = [] -> for char in expr.value: -> code = ord(char) -> if char == "\\": -> result.append("\\\\") -> elif char == '"': -> result.append('\\"') -> elif code < 0x20 or code == 0x7F: - # All control characters: NUL, BEL, BS, TAB, LF, VT, FF, CR, ESC, DEL, etc. -> result.append(f"\\u{code:04X}") -> else: -> result.append(char) -> output.append(f'"{"".join(result)}"') - -> case NumberLiteral(): -> output.append(expr.raw) - -> case VariableReference(): -> output.append(f"${expr.id.name}") - -> case MessageReference(): -> output.append(expr.id.name) -> if expr.attribute: -> output.append(f".{expr.attribute.name}") - -> case TermReference(): -> output.append(f"-{expr.id.name}") -> if expr.attribute: -> output.append(f".{expr.attribute.name}") -> if expr.arguments: -> self._serialize_call_arguments(expr.arguments, output, depth_guard) - -> case FunctionReference(): -> output.append(expr.id.name) -> self._serialize_call_arguments(expr.arguments, output, depth_guard) - -> case Placeable(): - # Nested Placeable - serialize inner expression with braces - # Valid per FTL spec: { { $var } } is a nested placeable -> output.append("{ ") -> with depth_guard: -> self._serialize_expression(expr.expression, output, depth_guard) -> output.append(" }") - -> case SelectExpression(): -> self._serialize_select_expression(expr, output, depth_guard) - -> def _serialize_call_arguments( -> self, args: CallArguments, output: list[str], depth_guard: DepthGuard -> ) -> None: -> """Serialize CallArguments.""" -> output.append("(") - - # Positional arguments -> for i, arg in enumerate(args.positional): -> if i > 0: -> output.append(", ") -> self._serialize_expression(arg, output, depth_guard) - - # Named arguments -> named_arg: NamedArgument -> for i, named_arg in enumerate(args.named): -> if i > 0 or args.positional: -> output.append(", ") -> output.append(f"{named_arg.name.name}: ") -> self._serialize_expression(named_arg.value, output, depth_guard) - -> output.append(")") - -> def _serialize_select_expression( -> self, -> expr: SelectExpression, -> output: list[str], -> depth_guard: DepthGuard, -> ) -> None: -> """Serialize SelectExpression.""" - # Wrap selector serialization in depth_guard to track depth for DoS protection. - # Without this, a deeply nested selector could bypass depth limits. -> with depth_guard: -> self._serialize_expression(expr.selector, output, depth_guard) -> output.append(" ->") - -> for variant in expr.variants: -> output.append(_VARIANT_INDENT) -> if variant.default: -> output.append("*") -> output.append("[") - - # Variant key (Identifier or NumberLiteral) - explicit match for exhaustiveness -> match variant.key: -> case Identifier(): -> output.append(variant.key.name) -> case NumberLiteral(): -> output.append(variant.key.raw) - -> output.append("] ") -> self._serialize_pattern(variant.value, output, depth_guard) - -> output.append("\n") - - -> def serialize( -> resource: Resource, -> *, -> validate: bool = True, -> max_depth: int = MAX_DEPTH, -> ) -> str: -> """Serialize Resource to FTL string. - -> Convenience function for FluentSerializer.serialize(). - -> Args: -> resource: Resource AST node -> validate: If True, validate AST before serialization (default: True). -> Checks that: -> - SelectExpressions have exactly one default variant -> - Identifiers follow FTL grammar ([a-zA-Z][a-zA-Z0-9_-]*) -> Set to False only for trusted ASTs from the parser. -> max_depth: Maximum nesting depth (default: 100). Prevents stack -> overflow from adversarial or malformed ASTs. - -> Returns: -> FTL source code - -> Raises: -> SerializationValidationError: If validate=True and AST is invalid -> SerializationDepthError: If AST nesting exceeds max_depth - -> Example: -> >>> from ftllexengine.syntax import parse, serialize -> >>> ast = parse("hello = Hello, world!") -> >>> ftl = serialize(ast) -> >>> assert ftl == "hello = Hello, world!\\n" -> """ -> serializer = FluentSerializer() -> return serializer.serialize(resource, validate=validate, max_depth=max_depth) diff --git a/src/ftllexengine/syntax/serializer_lines.py b/src/ftllexengine/syntax/serializer_lines.py new file mode 100644 index 00000000..dbcc7778 --- /dev/null +++ b/src/ftllexengine/syntax/serializer_lines.py @@ -0,0 +1,72 @@ +"""Low-level line and text emission helpers for the serializer.""" + +from __future__ import annotations + +from enum import Enum, auto + +__all__ = [ + "_ATTR_INDENT", + "_CHAR_PLACEABLE", + "_CONT_INDENT", + "_VARIANT_INDENT", + "_LineKind", + "_classify_line", + "_escape_text", +] + +_CONT_INDENT: str = " " +_ATTR_INDENT: str = "\n " +_VARIANT_INDENT: str = "\n " +_LINE_START_SYNTAX_CHARS: frozenset[str] = frozenset(".[*") +_CHAR_PLACEABLE: dict[str, str] = { + "{": '{ "{" }', + "}": '{ "}" }', + "[": '{ "[" }', + "*": '{ "*" }', + ".": '{ "." }', +} + + +class _LineKind(Enum): + """Classification of continuation-line content for serialization.""" + + EMPTY = auto() + WHITESPACE_ONLY = auto() + SYNTAX_LEADING = auto() + NORMAL = auto() + + +def _classify_line(line: str) -> tuple[_LineKind, int]: + """Classify a continuation line for serializer dispatch.""" + if not line: + return (_LineKind.EMPTY, 0) + + ws_len = 0 + length = len(line) + while ws_len < length and line[ws_len] == " ": + ws_len += 1 + + if ws_len == length: + return (_LineKind.WHITESPACE_ONLY, 0) + + if line[ws_len] in _LINE_START_SYNTAX_CHARS: + return (_LineKind.SYNTAX_LEADING, ws_len) + + return (_LineKind.NORMAL, 0) + + +def _escape_text(text: str, output: list[str]) -> None: + """Escape brace characters in text content.""" + pos = 0 + length = len(text) + while pos < length: + ch = text[pos] + if ch in ("{", "}"): + output.append(_CHAR_PLACEABLE[ch]) + pos += 1 + continue + run_start = pos + pos += 1 + while pos < length and text[pos] not in ("{", "}"): + pos += 1 + output.append(text[run_start:pos]) diff --git a/src/ftllexengine/syntax/serializer_validation.py b/src/ftllexengine/syntax/serializer_validation.py new file mode 100644 index 00000000..d0f387be --- /dev/null +++ b/src/ftllexengine/syntax/serializer_validation.py @@ -0,0 +1,210 @@ +"""Validation helpers for serializer-facing AST checks.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, assert_never + +from ftllexengine.constants import MAX_DEPTH +from ftllexengine.core.depth_guard import DepthGuard, DepthLimitExceededError +from ftllexengine.core.identifier_validation import is_valid_identifier +from ftllexengine.diagnostics import FrozenFluentError + +from .ast import ( + CallArguments, + Expression, + FunctionReference, + Identifier, + Message, + MessageReference, + NumberLiteral, + Pattern, + Placeable, + Resource, + SelectExpression, + StringLiteral, + Term, + TermReference, + VariableReference, +) + +if TYPE_CHECKING: + from collections.abc import Callable + +__all__ = ["SerializationDepthError", "SerializationValidationError", "validate_resource"] + + +class SerializationValidationError(ValueError): + """Raised when AST validation fails during serialization.""" + + +class SerializationDepthError(ValueError): + """Raised when AST nesting exceeds maximum serialization depth.""" + + +def _validate_pattern(pattern: Pattern, context: str, depth_guard: DepthGuard) -> None: + for element in pattern.elements: + if isinstance(element, Placeable): + with depth_guard: + _validate_expression(element.expression, context, depth_guard) + + +def _validate_identifier(identifier: Identifier, context: str) -> None: + if not is_valid_identifier(identifier.name): + msg = ( + f"Invalid identifier '{identifier.name}' in {context}. " + f"Identifiers must match [a-zA-Z][a-zA-Z0-9_-]* and be ≤256 characters" + ) + raise SerializationValidationError(msg) + + +def _require_single_default_variant( + expr: SelectExpression, context: str +) -> None: + n_defaults = sum(1 for variant in expr.variants if variant.default) + if n_defaults == 1: + return + if n_defaults == 0: + msg = ( + f"SelectExpression in {context} has no default variant. " + "Exactly one variant must be marked as default." + ) + else: + msg = ( + f"SelectExpression in {context} has {n_defaults} default variants. " + "Exactly one variant must be marked as default." + ) + raise SerializationValidationError(msg) + + +def _validate_select_expression( + expr: SelectExpression, context: str, depth_guard: DepthGuard +) -> None: + _require_single_default_variant(expr, context) + with depth_guard: + _validate_expression(expr.selector, context, depth_guard) + for variant in expr.variants: + if isinstance(variant.key, Identifier): + _validate_identifier(variant.key, f"{context}, variant key") + with depth_guard: + _validate_pattern(variant.value, context, depth_guard) + + +def _validate_term_reference( + expr: TermReference, context: str, depth_guard: DepthGuard +) -> None: + _validate_identifier(expr.id, f"{context}, term reference") + if expr.attribute: + _validate_identifier(expr.attribute, f"{context}, term attribute") + if expr.arguments: + _validate_call_arguments(expr.arguments, context, depth_guard) + + +def _validate_message_reference(expr: MessageReference, context: str) -> None: + _validate_identifier(expr.id, f"{context}, message reference") + if expr.attribute: + _validate_identifier(expr.attribute, f"{context}, message attribute") + + +def _assert_named_arg_value_is_literal( + value: object, arg_name: str, context: str +) -> None: + if not isinstance(value, (StringLiteral, NumberLiteral)): + value_type = type(value).__name__ + msg = ( + f"Named argument '{arg_name}' in {context} has invalid value type " + f"'{value_type}'. Named argument values must be StringLiteral or " + f"NumberLiteral per FTL specification " + f'(NamedArgument ::= Identifier ":" (StringLiteral | NumberLiteral)).' + ) + raise SerializationValidationError(msg) + + +def _validate_call_arguments( + args: CallArguments, context: str, depth_guard: DepthGuard +) -> None: + for pos_arg in args.positional: + with depth_guard: + _validate_expression(pos_arg, context, depth_guard) + + seen_names: set[str] = set() + for named_arg in args.named: + arg_name = named_arg.name.name + if arg_name in seen_names: + msg = ( + f"Duplicate named argument '{arg_name}' in {context}. " + "Named argument names must be unique per FTL specification." + ) + raise SerializationValidationError(msg) + seen_names.add(arg_name) + + _validate_identifier(named_arg.name, f"{context}, named argument") + _assert_named_arg_value_is_literal(named_arg.value, arg_name, context) + + +def _validate_expression( + expr: Expression, context: str, depth_guard: DepthGuard +) -> None: + match expr: + case SelectExpression(): + _validate_select_expression(expr, context, depth_guard) + case Placeable(): + with depth_guard: + _validate_expression(expr.expression, context, depth_guard) + case VariableReference(): + _validate_identifier(expr.id, f"{context}, variable reference") + case MessageReference(): + _validate_message_reference(expr, context) + case TermReference(): + _validate_term_reference(expr, context, depth_guard) + case FunctionReference(): + _validate_identifier(expr.id, f"{context}, function reference") + _validate_call_arguments(expr.arguments, context, depth_guard) + case StringLiteral() | NumberLiteral(): + return + case _ as unreachable: # pragma: no cover + assert_never(unreachable) + + +def validate_resource( + resource: Resource, + max_depth: int = MAX_DEPTH, + *, + validate_pattern: Callable[[Pattern, str, DepthGuard], None] | None = None, +) -> None: + """Validate a Resource AST for safe serialization.""" + depth_guard = DepthGuard(max_depth=max_depth) + pattern_validator = _validate_pattern if validate_pattern is None else validate_pattern + + try: + for entry in resource.entries: + match entry: + case Message(): + _validate_identifier(entry.id, "message ID") + context = f"message '{entry.id.name}'" + if entry.value: + pattern_validator(entry.value, context, depth_guard) + for attr in entry.attributes: + _validate_identifier(attr.id, f"{context}, attribute ID") + pattern_validator( + attr.value, + f"{context}.{attr.id.name}", + depth_guard, + ) + case Term(): + _validate_identifier(entry.id, "term ID") + context = f"term '-{entry.id.name}'" + pattern_validator(entry.value, context, depth_guard) + for attr in entry.attributes: + _validate_identifier(attr.id, f"{context}, attribute ID") + pattern_validator( + attr.value, + f"{context}.{attr.id.name}", + depth_guard, + ) + case _: + pass + except DepthLimitExceededError as exc: + msg = f"Validation depth limit exceeded (max: {max_depth}): {exc}" + raise SerializationDepthError(msg) from exc + except FrozenFluentError: + raise diff --git a/src/ftllexengine/syntax/validation_helpers.py b/src/ftllexengine/syntax/validation_helpers.py index 15643bd2..6ba3da00 100644 --- a/src/ftllexengine/syntax/validation_helpers.py +++ b/src/ftllexengine/syntax/validation_helpers.py @@ -29,9 +29,9 @@ def count_default_variants(select: SelectExpression) -> int: Number of default variants (should be exactly 1 for valid FTL) Example: - >>> expr = SelectExpression(...) - >>> count = count_default_variants(expr) - >>> if count != 1: + >>> expr = SelectExpression(...) # doctest: +SKIP + >>> count = count_default_variants(expr) # doctest: +SKIP + >>> if count != 1: # doctest: +SKIP ... raise ValidationError(f"Expected 1 default, found {count}") """ return sum(1 for v in select.variants if v.default) diff --git a/src/ftllexengine/syntax/validator.py b/src/ftllexengine/syntax/validator.py index 1d91fe36..41c22ab7 100644 --- a/src/ftllexengine/syntax/validator.py +++ b/src/ftllexengine/syntax/validator.py @@ -20,6 +20,7 @@ from ftllexengine.core.depth_guard import DepthGuard from ftllexengine.diagnostics import ValidationResult from ftllexengine.diagnostics.codes import DiagnosticCode +from ftllexengine.diagnostics.depth import resolution_depth_error from ftllexengine.syntax.ast import ( Annotation, Attribute, @@ -105,7 +106,10 @@ def validate(self, resource: Resource) -> ValidationResult: ValidationResult with errors (if any) """ errors: list[Annotation] = [] - depth_guard = DepthGuard(max_depth=MAX_DEPTH) + depth_guard = DepthGuard( + max_depth=MAX_DEPTH, + error_factory=resolution_depth_error, + ) for entry in resource.entries: self._validate_entry(entry, errors, depth_guard) @@ -501,13 +505,13 @@ def validate(resource: Resource) -> ValidationResult: ValidationResult with any errors found Example: - >>> from ftllexengine.syntax.parser import FluentParserV1 - >>> from ftllexengine.syntax.validator import validate - >>> - >>> parser = FluentParserV1() - >>> resource = parser.parse("msg = value") - >>> result = validate(resource) - >>> assert result.is_valid + >>> from ftllexengine.syntax.parser import FluentParserV1 # doctest: +SKIP + >>> from ftllexengine.syntax.validator import validate # doctest: +SKIP + + >>> parser = FluentParserV1() # doctest: +SKIP + >>> resource = parser.parse("msg = value") # doctest: +SKIP + >>> result = validate(resource) # doctest: +SKIP + >>> assert result.is_valid # doctest: +SKIP """ validator = SemanticValidator() return validator.validate(resource) diff --git a/src/ftllexengine/syntax/visitor.py b/src/ftllexengine/syntax/visitor.py index 3208abd8..93a5b069 100644 --- a/src/ftllexengine/syntax/visitor.py +++ b/src/ftllexengine/syntax/visitor.py @@ -20,6 +20,7 @@ from ftllexengine.constants import MAX_DEPTH from ftllexengine.core.depth_guard import DepthGuard +from ftllexengine.diagnostics.depth import resolution_depth_error from .ast import ( ASTNode, @@ -79,7 +80,7 @@ class ASTVisitor[T = ASTNode]: - Serializers Example: - >>> class CountMessagesVisitor(ASTVisitor): + >>> class CountMessagesVisitor(ASTVisitor): # doctest: +SKIP ... def __init__(self): ... super().__init__() ... self.count = 0 @@ -88,9 +89,9 @@ class ASTVisitor[T = ASTNode]: ... self.count += 1 ... return self.generic_visit(node) # Traverse children ... - >>> visitor = CountMessagesVisitor() - >>> visitor.visit(resource) - >>> print(visitor.count) + >>> visitor = CountMessagesVisitor() # doctest: +SKIP + >>> visitor.visit(resource) # doctest: +SKIP + >>> print(visitor.count) # doctest: +SKIP """ __slots__ = ("_depth_guard",) @@ -134,7 +135,10 @@ def __init__(self, *, max_depth: int | None = None) -> None: # Depth guard prevents stack overflow from adversarial/malformed ASTs. # Uses same MAX_DEPTH (100) as parser, resolver, serializer for consistency. effective_max_depth = max_depth if max_depth is not None else MAX_DEPTH - self._depth_guard = DepthGuard(max_depth=effective_max_depth) + self._depth_guard = DepthGuard( + max_depth=effective_max_depth, + error_factory=resolution_depth_error, + ) def visit(self, node: ASTNode) -> T: """Visit a node (dispatcher with depth protection). @@ -246,15 +250,15 @@ class ASTTransformer(ASTVisitor[TransformerResult]): Uses Python 3.13's pattern matching for elegant node type handling. Example - Remove all comments: - >>> class RemoveCommentsTransformer(ASTTransformer): + >>> class RemoveCommentsTransformer(ASTTransformer): # doctest: +SKIP ... def visit_Comment(self, node: Comment) -> None: ... return None # Remove comments ... - >>> transformer = RemoveCommentsTransformer() - >>> cleaned_resource = transformer.transform(resource) + >>> transformer = RemoveCommentsTransformer() # doctest: +SKIP + >>> cleaned_resource = transformer.transform(resource) # doctest: +SKIP Example - Rename all variables: - >>> class RenameVariablesTransformer(ASTTransformer): + >>> class RenameVariablesTransformer(ASTTransformer): # doctest: +SKIP ... def __init__(self, mapping: dict[str, str]): ... super().__init__() ... self.mapping = mapping @@ -266,17 +270,17 @@ class ASTTransformer(ASTVisitor[TransformerResult]): ... ) ... return node ... - >>> transformer = RenameVariablesTransformer({"old": "new"}) - >>> modified_resource = transformer.transform(resource) + >>> transformer = RenameVariablesTransformer({"old": "new"}) # doctest: +SKIP + >>> modified_resource = transformer.transform(resource) # doctest: +SKIP Example - Expand messages (1 → multiple): - >>> class ExpandPluralsTransformer(ASTTransformer): + >>> class ExpandPluralsTransformer(ASTTransformer): # doctest: +SKIP ... def visit_Message(self, node: Message) -> list[Message]: ... # Generate multiple messages from select expressions ... return [node, expanded_variant_1, expanded_variant_2] ... - >>> transformer = ExpandPluralsTransformer() - >>> expanded_resource = transformer.transform(resource) + >>> transformer = ExpandPluralsTransformer() # doctest: +SKIP + >>> expanded_resource = transformer.transform(resource) # doctest: +SKIP """ def transform(self, node: ASTNode) -> TransformerResult: diff --git a/src/ftllexengine/validation/resource.py b/src/ftllexengine/validation/resource.py index 5cb29650..f06edab3 100644 --- a/src/ftllexengine/validation/resource.py +++ b/src/ftllexengine/validation/resource.py @@ -22,11 +22,8 @@ import re from typing import TYPE_CHECKING -if TYPE_CHECKING: - from collections.abc import Mapping - -from ftllexengine.analysis.graph import detect_cycles, make_cycle_key from ftllexengine.constants import MAX_DEPTH +from ftllexengine.core.reference_graph import detect_cycles, make_cycle_key from ftllexengine.diagnostics import ( ValidationError, ValidationResult, @@ -34,16 +31,33 @@ WarningSeverity, ) from ftllexengine.diagnostics.codes import DiagnosticCode -from ftllexengine.introspection import extract_references, extract_references_by_attribute from ftllexengine.syntax import Attribute, Junk, Message, Resource, Term from ftllexengine.syntax.cursor import LineOffsetCache +from ftllexengine.syntax.reference_extraction import extract_references from ftllexengine.syntax.validator import SemanticValidator +from ftllexengine.validation.resource_graph import ( + _compute_longest_paths as _compute_longest_paths_impl, +) +from ftllexengine.validation.resource_graph import ( + build_dependency_graph, + detect_long_chains, +) +from ftllexengine.validation.resource_graph import ( + detect_circular_references as _detect_circular_references_impl, +) if TYPE_CHECKING: + from collections.abc import Mapping + from ftllexengine.syntax.parser import FluentParserV1 __all__ = ["validate_resource"] +# Backward-compatible private re-exports for existing tests and internal callers. +_build_dependency_graph = build_dependency_graph +_compute_longest_paths = _compute_longest_paths_impl +_detect_long_chains = detect_long_chains + logger = logging.getLogger(__name__) @@ -436,336 +450,13 @@ def _check_undefined_references( return warnings -def _detect_circular_references( - graph: dict[str, set[str]], -) -> list[ValidationWarning]: - """Detect circular dependencies in messages and terms. - - Uses iterative DFS via analysis.graph module to avoid stack overflow - on deep dependency chains. - - Accepts a unified dependency graph with type-prefixed nodes to detect: - - Message-only cycles (msg:A -> msg:B -> msg:A) - - Term-only cycles (term:A -> term:B -> term:A) - - Cross-type cycles (msg:A -> term:B -> msg:A) - - Cross-resource cycles (current resource -> known entry -> current resource) - - Args: - graph: Unified dependency graph with type-prefixed nodes (msg:name, term:name) - - Returns: - List of warnings for circular references - """ - warnings: list[ValidationWarning] = [] - seen_cycle_keys: set[str] = set() - - # Detect all cycles in the unified graph - for cycle in detect_cycles(graph): - cycle_key = make_cycle_key(cycle) - if cycle_key not in seen_cycle_keys: - seen_cycle_keys.add(cycle_key) - - # Format cycle for human-readable output - # Convert "msg:foo" -> "foo", "msg:foo.bar" -> "foo.bar", - # "term:baz" -> "-baz", "term:baz.attr" -> "-baz.attr" - formatted_parts: list[str] = [] - for node in cycle: - if node.startswith("msg:"): - formatted_parts.append(node[4:]) # Strip "msg:" prefix - elif node.startswith("term:"): - formatted_parts.append(f"-{node[5:]}") # Strip "term:", add "-" - - cycle_str = " -> ".join(formatted_parts) - - # Determine cycle type for appropriate message - has_messages = any(n.startswith("msg:") for n in cycle) - has_terms = any(n.startswith("term:") for n in cycle) - - if has_messages and has_terms: - msg = f"Circular cross-reference: {cycle_str}" - elif has_terms: - msg = f"Circular term reference: {cycle_str}" - else: - msg = f"Circular message reference: {cycle_str}" - - warnings.append( - ValidationWarning( - code=DiagnosticCode.VALIDATION_CIRCULAR_REFERENCE, - message=msg, - context=cycle_str, - severity=WarningSeverity.CRITICAL, - ) - ) - - return warnings - - -def _resolve_reference( - ref: str, - prefix: str, - local_entries: dict[str, Message] | dict[str, Term], - known_ids: frozenset[str] | None, -) -> str | None: - """Resolve a reference string to a graph node key. - - Shared logic for both message and term reference resolution. - References may be attribute-qualified ("name.attr") or bare ("name"). - - Args: - ref: Reference string (possibly attribute-qualified) - prefix: Graph node prefix ("msg" or "term") - local_entries: Local entries dict for this namespace - known_ids: Optional set of IDs already in bundle - - Returns: - Prefixed graph node key, or None if reference is unknown - """ - if "." in ref: - base, attr = ref.split(".", 1) - if base in local_entries or (known_ids and base in known_ids): - return f"{prefix}:{base}.{attr}" - elif ref in local_entries or (known_ids and ref in known_ids): - return f"{prefix}:{ref}" - return None - - -def _add_entry_nodes( - entries: dict[str, Message] | dict[str, Term], - prefix: str, - messages_dict: dict[str, Message], - terms_dict: dict[str, Term], - known_messages: frozenset[str] | None, - known_terms: frozenset[str] | None, - graph: dict[str, set[str]], -) -> None: - """Add nodes and edges for a set of entries to the dependency graph. - - Shared logic for both message and term node building. - - Args: - entries: The entries to process (messages or terms) - prefix: Graph node prefix ("msg" or "term") - messages_dict: All local messages (for reference resolution) - terms_dict: All local terms (for reference resolution) - known_messages: Optional set of message IDs already in bundle - known_terms: Optional set of term IDs already in bundle - graph: Mutable graph to add nodes to - """ - for name, entry in entries.items(): - refs_by_attr = extract_references_by_attribute(entry) - - for attr_name, (msg_refs, term_refs) in refs_by_attr.items(): - node_key = ( - f"{prefix}:{name}" - if attr_name is None - else f"{prefix}:{name}.{attr_name}" - ) - deps: set[str] = set() - for ref in msg_refs: - resolved = _resolve_reference( - ref, "msg", messages_dict, known_messages - ) - if resolved is not None: - deps.add(resolved) - for ref in term_refs: - resolved = _resolve_reference( - ref, "term", terms_dict, known_terms - ) - if resolved is not None: - deps.add(resolved) - graph[node_key] = deps - - -def _add_known_entries( - known_ids: frozenset[str] | None, - prefix: str, - known_deps: Mapping[str, frozenset[str]] | None, - graph: dict[str, set[str]], -) -> None: - """Add known (pre-existing) entries to the graph. - - Args: - known_ids: Set of known entry IDs - prefix: Graph node prefix ("msg" or "term") - known_deps: Optional dependency map for known entries - graph: Mutable graph to add nodes to - """ - if not known_ids: - return - for known_id in known_ids: - node_key = f"{prefix}:{known_id}" - if node_key not in graph: - if known_deps and known_id in known_deps: - graph[node_key] = set(known_deps[known_id]) - else: - graph[node_key] = set() - - -def _build_dependency_graph( - messages_dict: dict[str, Message], - terms_dict: dict[str, Term], - *, - known_messages: frozenset[str] | None = None, - known_terms: frozenset[str] | None = None, - known_msg_deps: Mapping[str, frozenset[str]] | None = None, - known_term_deps: Mapping[str, frozenset[str]] | None = None, -) -> dict[str, set[str]]: - """Build unified dependency graph for messages and terms. - - Creates a graph with type-prefixed nodes (msg:name, term:name) for - both cycle detection and chain depth analysis. - - Args: - messages_dict: Map of message IDs to Message nodes from current resource - terms_dict: Map of term IDs to Term nodes from current resource - known_messages: Optional set of message IDs already in bundle - known_terms: Optional set of term IDs already in bundle - known_msg_deps: Optional dependency map for known messages. Maps message ID - to frozenset of prefixed dependencies (e.g., {"msg:foo", "term:bar"}). - known_term_deps: Optional dependency map for known terms. - - Returns: - Graph as adjacency list (node -> set of dependencies) - """ - graph: dict[str, set[str]] = {} - - # Add entry nodes with attribute-granular dependencies. - # Each attribute gets its own node to avoid false positive cycles - # when msg.a references msg.b (non-cyclic intra-entry reference). - _add_entry_nodes( - messages_dict, "msg", - messages_dict, terms_dict, - known_messages, known_terms, graph, +def _detect_circular_references(graph: dict[str, set[str]]) -> list[ValidationWarning]: + """Compatibility wrapper preserving patch points for cycle tests.""" + return _detect_circular_references_impl( + graph, + detect_cycles_fn=detect_cycles, + make_cycle_key_fn=make_cycle_key, ) - _add_entry_nodes( - terms_dict, "term", - messages_dict, terms_dict, - known_messages, known_terms, graph, - ) - - # Add known entries with their dependencies for cross-resource - # cycle detection. - _add_known_entries( - known_messages, "msg", known_msg_deps, graph, - ) - _add_known_entries( - known_terms, "term", known_term_deps, graph, - ) - - return graph - - -def _compute_longest_paths( - graph: dict[str, set[str]], -) -> dict[str, tuple[int, list[str]]]: - """Compute longest path from each node using memoized iterative DFS. - - Args: - graph: Dependency graph as adjacency list - - Returns: - Map from node to (path_length, path_nodes) - """ - longest_path: dict[str, tuple[int, list[str]]] = {} - in_stack: set[str] = set() - - for start in graph: - if start in longest_path: - continue - - # Iterative DFS with two-phase processing - stack: list[tuple[str, int, list[str]]] = [(start, 0, list(graph.get(start, set())))] - - while stack: - node, phase, children = stack.pop() - - if phase == 0: - if node in longest_path: - continue - - in_stack.add(node) - stack.append((node, 1, children)) - - stack.extend( - (child, 0, list(graph.get(child, set()))) - for child in children - if child not in longest_path and child not in in_stack - ) - else: - in_stack.discard(node) - best_depth, best_path = 0, [] - for child in children: - if child in longest_path: - child_depth, child_path = longest_path[child] - if child_depth + 1 > best_depth: - best_depth = child_depth + 1 - best_path = child_path - longest_path[node] = (best_depth, [node, *best_path]) - - return longest_path - - -def _detect_long_chains( - graph: dict[str, set[str]], - max_depth: int = MAX_DEPTH, -) -> list[ValidationWarning]: - """Detect ALL reference chains that exceed maximum depth. - - Computes longest path from each node and reports ALL chains exceeding - max_depth. This allows users to see and fix all depth violations in a - single validation pass rather than iteratively discovering them. - - Args: - graph: Unified dependency graph with type-prefixed nodes (msg:name, term:name) - max_depth: Maximum allowed chain depth (default: MAX_DEPTH) - - Returns: - List of warnings for ALL chains exceeding max_depth, sorted by depth - (deepest first) for prioritized remediation - """ - if not graph: - return [] - - longest_paths = _compute_longest_paths(graph) - - # Collect ALL chains exceeding max_depth - exceeding_chains: list[tuple[int, list[str], str]] = [] - for node, (depth, path) in longest_paths.items(): - # Only report chains starting from their origin (first node in path) - # to avoid duplicate warnings for the same chain from different nodes - if depth > max_depth and path and path[0] == node: - exceeding_chains.append((depth, path, node)) - - if not exceeding_chains: - return [] - - # Sort by depth descending (deepest chains first) for prioritized remediation - exceeding_chains.sort(key=lambda x: x[0], reverse=True) - - warnings: list[ValidationWarning] = [] - for chain_depth, chain_path, _origin in exceeding_chains: - # Format path for human-readable output - formatted = [ - node[4:] if node.startswith("msg:") else f"-{node[5:]}" - for node in chain_path[:10] - ] - chain_str = " -> ".join(formatted) - if len(chain_path) > 10: - chain_str += f" -> ... ({len(chain_path)} total)" - - warnings.append( - ValidationWarning( - code=DiagnosticCode.VALIDATION_CHAIN_DEPTH_EXCEEDED, - message=( - f"Reference chain depth ({chain_depth}) exceeds maximum ({max_depth}); " - f"will fail at runtime with MAX_DEPTH_EXCEEDED" - ), - context=chain_str, - severity=WarningSeverity.WARNING, - ) - ) - - return warnings def validate_resource( @@ -811,12 +502,12 @@ def validate_resource( TypeError: If source is not a string (e.g., bytes were passed). Example: - >>> from ftllexengine.validation import validate_resource - >>> result = validate_resource(ftl_source) - >>> if not result.is_valid: + >>> from ftllexengine.validation import validate_resource # doctest: +SKIP + >>> result = validate_resource(ftl_source) # doctest: +SKIP + >>> if not result.is_valid: # doctest: +SKIP ... for error in result.errors: ... print(f"Error [{error.code}]: {error.message}") - >>> for warning in result.warnings: + >>> for warning in result.warnings: # doctest: +SKIP ... print(f"Warning [{warning.code}]: {warning.message}") Thread Safety: @@ -873,7 +564,7 @@ def validate_resource( # Build unified dependency graph once for both cycle and chain detection # Avoids redundant graph construction (important for large resources) - dependency_graph = _build_dependency_graph( + dependency_graph = build_dependency_graph( messages_dict, terms_dict, known_messages=known_messages, @@ -886,7 +577,7 @@ def validate_resource( cycle_warnings = _detect_circular_references(dependency_graph) # Pass 5: Detect long reference chains (would fail at runtime) - chain_warnings = _detect_long_chains(dependency_graph) + chain_warnings = detect_long_chains(dependency_graph, max_depth=MAX_DEPTH) # Pass 6: Fluent spec compliance (E0001-E0013) semantic_validator = SemanticValidator() diff --git a/src/ftllexengine/validation/resource.py,cover b/src/ftllexengine/validation/resource.py,cover deleted file mode 100644 index f1dde731..00000000 --- a/src/ftllexengine/validation/resource.py,cover +++ /dev/null @@ -1,842 +0,0 @@ -> """FTL resource validation. - -> Provides standalone validation for FTL resources without requiring -> a FluentBundle instance. Useful for CI/CD pipelines, linters, and -> tooling that needs to validate FTL files without runtime resolution. - -> Architecture: -> - validate_resource(): Main entry point, orchestrates validation passes -> - _extract_syntax_errors(): Pass 1 - Convert Junk entries to ValidationError -> - _collect_entries(): Pass 2 - Collect messages/terms, check duplicates -> - _check_undefined_references(): Pass 3 - Validate message/term references -> - _detect_circular_references(): Pass 4 - Check for reference cycles -> - _detect_long_chains(): Pass 5 - Check for chains exceeding MAX_DEPTH -> - SemanticValidator: Pass 6 - Fluent spec compliance - -> Python 3.13+. -> """ - -> from __future__ import annotations - -> import logging -> import re -> from typing import TYPE_CHECKING - -> from ftllexengine.analysis.graph import detect_cycles, make_cycle_key -> from ftllexengine.constants import MAX_DEPTH -> from ftllexengine.diagnostics import ( -> ValidationError, -> ValidationResult, -> ValidationWarning, -> WarningSeverity, -> ) -> from ftllexengine.diagnostics.codes import DiagnosticCode -> from ftllexengine.introspection import extract_references, extract_references_by_attribute -> from ftllexengine.syntax import Junk, Message, Resource, Term -> from ftllexengine.syntax.cursor import LineOffsetCache -> from ftllexengine.syntax.validator import SemanticValidator - -- if TYPE_CHECKING: -- from ftllexengine.syntax.parser import FluentParserV1 - -> __all__ = ["validate_resource"] - -> logger = logging.getLogger(__name__) - - -> def _get_entry_position( -> entry: Message | Term, -> line_cache: LineOffsetCache, -> ) -> tuple[int | None, int | None]: -> """Get line/column from entry's span if available. - -> Args: -> entry: Message or Term with optional span -> line_cache: Line offset cache for position lookup - -> Returns: -> (line, column) tuple, or (None, None) if no span -> """ -> if entry.span: -> return line_cache.get_line_col(entry.span.start) -> return None, None - - -> def _extract_syntax_errors( -> resource: Resource, -> line_cache: LineOffsetCache, -> ) -> list[ValidationError]: -> """Extract syntax errors from Junk entries. - -> Converts Junk AST nodes (unparseable content) to structured -> ValidationError objects with line/column information. - -> Propagates annotations from Junk nodes to preserve specific error codes -> and messages from the parser. If a Junk entry has no annotations, falls -> back to a generic parse error. - -> Args: -> resource: Parsed Resource AST (may contain Junk entries) -> line_cache: Shared line offset cache for position lookups - -> Returns: -> List of ValidationError objects for each Junk entry -> """ -> errors: list[ValidationError] = [] - -> for entry in resource.entries: -> if isinstance(entry, Junk): - # Propagate annotations from Junk to preserve specific parser errors -> if entry.annotations: -> for annotation in entry.annotations: - # Use annotation's span if available, otherwise fall back to Junk span -> ann_line: int | None = None -> ann_column: int | None = None -> if annotation.span: -> ann_line, ann_column = line_cache.get_line_col( -> annotation.span.start -> ) -> elif entry.span: -> ann_line, ann_column = line_cache.get_line_col(entry.span.start) - -> errors.append( -> ValidationError( -> code=annotation.code, -> message=annotation.message, -> content=entry.content, -> line=ann_line, -> column=ann_column, -> ) -> ) -> else: - # Fallback for Junk without annotations (shouldn't happen normally) -> line: int | None = None -> column: int | None = None -> if entry.span: -> line, column = line_cache.get_line_col(entry.span.start) - -> errors.append( -> ValidationError( -> code=DiagnosticCode.VALIDATION_PARSE_ERROR.name, -> message="Failed to parse FTL content", -> content=entry.content, -> line=line, -> column=column, -> ) -> ) - -> return errors - - -> def _collect_entries( -> resource: Resource, -> line_cache: LineOffsetCache, -> *, -> known_messages: frozenset[str] | None = None, -> known_terms: frozenset[str] | None = None, -> ) -> tuple[dict[str, Message], dict[str, Term], list[ValidationWarning]]: -> """Collect message/term entries and check for structural issues. - -> Performs the following checks: -> - Duplicate message IDs (within message namespace) -> - Duplicate term IDs (within term namespace) -> - Messages without values or attributes -> - Duplicate attribute IDs within entries -> - Shadow warnings when entry ID conflicts with known entry - -> Note: Per Fluent spec, messages and terms have separate namespaces. -> A message named "foo" and a term named "foo" are NOT duplicates. - -> Args: -> resource: Parsed Resource AST -> line_cache: Shared line offset cache for position lookups -> known_messages: Optional set of message IDs already in bundle -> known_terms: Optional set of term IDs already in bundle - -> Returns: -> Tuple of (messages_dict, terms_dict, warnings) -> """ -> warnings: list[ValidationWarning] = [] - # Per Fluent spec, messages and terms have separate namespaces. - # A message "foo" and a term "-foo" can coexist without conflict. -> seen_message_ids: set[str] = set() -> seen_term_ids: set[str] = set() -> messages_dict: dict[str, Message] = {} -> terms_dict: dict[str, Term] = {} - -> for entry in resource.entries: -> match entry: -> case Message(id=msg_id, value=value, attributes=attributes): - # Check for duplicate message IDs within message namespace -> if msg_id.name in seen_message_ids: -> line, column = _get_entry_position(entry, line_cache) -> warnings.append( -> ValidationWarning( -> code=DiagnosticCode.VALIDATION_DUPLICATE_ID.name, -> message=( -> f"Duplicate message ID '{msg_id.name}' " -> f"(later definition will overwrite earlier)" -> ), -> context=msg_id.name, -> line=line, -> column=column, -> severity=WarningSeverity.WARNING, -> ) -> ) -> seen_message_ids.add(msg_id.name) -> messages_dict[msg_id.name] = entry - - # Check for shadow conflict with known messages -> if known_messages and msg_id.name in known_messages: -> line, column = _get_entry_position(entry, line_cache) -> warnings.append( -> ValidationWarning( -> code=DiagnosticCode.VALIDATION_SHADOW_WARNING.name, -> message=( -> f"Message '{msg_id.name}' shadows existing message " -> f"(this definition will override the earlier one)" -> ), -> context=msg_id.name, -> line=line, -> column=column, -> severity=WarningSeverity.WARNING, -> ) -> ) - - # Check for duplicate attribute IDs within this message -> seen_message_attr_ids: set[str] = set() -> for attr in attributes: -> if attr.id.name in seen_message_attr_ids: -> line, column = _get_entry_position(entry, line_cache) -> warnings.append( -> ValidationWarning( -> code=DiagnosticCode.VALIDATION_DUPLICATE_ATTRIBUTE.name, -> message=( -> f"Message '{msg_id.name}' has duplicate attribute " -> f"'{attr.id.name}' (later will override earlier)" -> ), -> context=f"{msg_id.name}.{attr.id.name}", -> line=line, -> column=column, -> severity=WarningSeverity.WARNING, -> ) -> ) -> seen_message_attr_ids.add(attr.id.name) - - # Check for messages without values (only attributes) - # NOTE: This check is unreachable due to defense-in-depth: - # 1. Parser validates in validate_message_content() and creates Junk instead - # 2. Message.__post_init__() raises ValueError if value=None and no attributes - # Kept as defensive programming for external AST construction scenarios. -- if value is None and len(attributes) == 0: # pragma: no cover -- line, column = _get_entry_position(entry, line_cache) # pragma: no cover -- warnings.append( # pragma: no cover -> ValidationWarning( -> code=DiagnosticCode.VALIDATION_NO_VALUE_OR_ATTRS.name, -> message=f"Message '{msg_id.name}' has neither value nor attributes", -> context=msg_id.name, -> line=line, -> column=column, -> severity=WarningSeverity.WARNING, -> ) -> ) - -> case Term(id=term_id, attributes=attributes): - # Check for duplicate term IDs within term namespace -> if term_id.name in seen_term_ids: -> line, column = _get_entry_position(entry, line_cache) -> warnings.append( -> ValidationWarning( -> code=DiagnosticCode.VALIDATION_DUPLICATE_ID.name, -> message=( -> f"Duplicate term ID '{term_id.name}' " -> f"(later definition will overwrite earlier)" -> ), -> context=term_id.name, -> line=line, -> column=column, -> severity=WarningSeverity.WARNING, -> ) -> ) -> seen_term_ids.add(term_id.name) -> terms_dict[term_id.name] = entry - - # Check for shadow conflict with known terms -> if known_terms and term_id.name in known_terms: -> line, column = _get_entry_position(entry, line_cache) -> warnings.append( -> ValidationWarning( -> code=DiagnosticCode.VALIDATION_SHADOW_WARNING.name, -> message=( -> f"Term '{term_id.name}' shadows existing term " -> f"(this definition will override the earlier one)" -> ), -> context=term_id.name, -> line=line, -> column=column, -> severity=WarningSeverity.WARNING, -> ) -> ) - - # Check for duplicate attribute IDs within this term -> seen_term_attr_ids: set[str] = set() -> for attr in attributes: -> if attr.id.name in seen_term_attr_ids: -> line, column = _get_entry_position(entry, line_cache) -> warnings.append( -> ValidationWarning( -> code=DiagnosticCode.VALIDATION_DUPLICATE_ATTRIBUTE.name, -> message=( -> f"Term '{term_id.name}' has duplicate attribute " -> f"'{attr.id.name}' (later will override earlier)" -> ), -> context=f"{term_id.name}.{attr.id.name}", -> line=line, -> column=column, -> severity=WarningSeverity.WARNING, -> ) -> ) -> seen_term_attr_ids.add(attr.id.name) - -> return messages_dict, terms_dict, warnings - - -> def _check_undefined_references( -> messages_dict: dict[str, Message], -> terms_dict: dict[str, Term], -> line_cache: LineOffsetCache, -> *, -> known_messages: frozenset[str] | None = None, -> known_terms: frozenset[str] | None = None, -> ) -> list[ValidationWarning]: -> """Check for undefined message and term references. - -> Validates that all message and term references in the resource -> point to defined entries. Optionally considers entries already -> present in a bundle for cross-resource reference validation. - -> Args: -> messages_dict: Map of message IDs to Message nodes from current resource -> terms_dict: Map of term IDs to Term nodes from current resource -> line_cache: Shared line offset cache for position lookups -> known_messages: Optional set of message IDs already in bundle -> known_terms: Optional set of term IDs already in bundle - -> Returns: -> List of warnings for undefined references -> """ -> warnings: list[ValidationWarning] = [] - - # Combine current resource entries with known bundle entries -> all_messages = set(messages_dict.keys()) -> all_terms = set(terms_dict.keys()) -> if known_messages is not None: -> all_messages |= known_messages -> if known_terms is not None: -> all_terms |= known_terms - - # Check message references -> for msg_name, message in messages_dict.items(): -> msg_refs, term_refs = extract_references(message) -> line, column = _get_entry_position(message, line_cache) - -> for ref in msg_refs: - # Strip attribute qualification for existence check - # "msg.tooltip" -> check if "msg" exists -> base_ref = ref.split(".", 1)[0] if "." in ref else ref -> if base_ref not in all_messages: -> warnings.append( -> ValidationWarning( -> code=DiagnosticCode.VALIDATION_UNDEFINED_REFERENCE.name, -> message=f"Message '{msg_name}' references undefined message '{base_ref}'", -> context=base_ref, -> line=line, -> column=column, -> severity=WarningSeverity.CRITICAL, -> ) -> ) - -> for ref in term_refs: - # Strip attribute qualification for existence check - # "term.attr" -> check if "term" exists -> base_ref = ref.split(".", 1)[0] if "." in ref else ref -> if base_ref not in all_terms: -> warnings.append( -> ValidationWarning( -> code=DiagnosticCode.VALIDATION_UNDEFINED_REFERENCE.name, -> message=f"Message '{msg_name}' references undefined term '-{base_ref}'", -> context=f"-{base_ref}", -> line=line, -> column=column, -> severity=WarningSeverity.CRITICAL, -> ) -> ) - - # Check term references -> for term_name, term in terms_dict.items(): -> msg_refs, term_refs = extract_references(term) -> line, column = _get_entry_position(term, line_cache) - -> for ref in msg_refs: - # Strip attribute qualification for existence check -> base_ref = ref.split(".", 1)[0] if "." in ref else ref -> if base_ref not in all_messages: -> warnings.append( -> ValidationWarning( -> code=DiagnosticCode.VALIDATION_UNDEFINED_REFERENCE.name, -> message=f"Term '-{term_name}' references undefined message '{base_ref}'", -> context=base_ref, -> line=line, -> column=column, -> severity=WarningSeverity.CRITICAL, -> ) -> ) - -> for ref in term_refs: - # Strip attribute qualification for existence check -> base_ref = ref.split(".", 1)[0] if "." in ref else ref -> if base_ref not in all_terms: -> warnings.append( -> ValidationWarning( -> code=DiagnosticCode.VALIDATION_UNDEFINED_REFERENCE.name, -> message=f"Term '-{term_name}' references undefined term '-{base_ref}'", -> context=f"-{base_ref}", -> line=line, -> column=column, -> severity=WarningSeverity.CRITICAL, -> ) -> ) - -> return warnings - - -> def _detect_circular_references( -> graph: dict[str, set[str]], -> ) -> list[ValidationWarning]: -> """Detect circular dependencies in messages and terms. - -> Uses iterative DFS via analysis.graph module to avoid stack overflow -> on deep dependency chains. - -> Accepts a unified dependency graph with type-prefixed nodes to detect: -> - Message-only cycles (msg:A -> msg:B -> msg:A) -> - Term-only cycles (term:A -> term:B -> term:A) -> - Cross-type cycles (msg:A -> term:B -> msg:A) -> - Cross-resource cycles (current resource -> known entry -> current resource) - -> Args: -> graph: Unified dependency graph with type-prefixed nodes (msg:name, term:name) - -> Returns: -> List of warnings for circular references -> """ -> warnings: list[ValidationWarning] = [] -> seen_cycle_keys: set[str] = set() - - # Detect all cycles in the unified graph -> for cycle in detect_cycles(graph): -> cycle_key = make_cycle_key(cycle) -> if cycle_key not in seen_cycle_keys: -> seen_cycle_keys.add(cycle_key) - - # Format cycle for human-readable output - # Convert "msg:foo" -> "foo", "msg:foo.bar" -> "foo.bar", - # "term:baz" -> "-baz", "term:baz.attr" -> "-baz.attr" -> formatted_parts: list[str] = [] -> for node in cycle: -> if node.startswith("msg:"): -> formatted_parts.append(node[4:]) # Strip "msg:" prefix -> elif node.startswith("term:"): -> formatted_parts.append(f"-{node[5:]}") # Strip "term:", add "-" - -> cycle_str = " -> ".join(formatted_parts) - - # Determine cycle type for appropriate message -> has_messages = any(n.startswith("msg:") for n in cycle) -> has_terms = any(n.startswith("term:") for n in cycle) - -> if has_messages and has_terms: -> msg = f"Circular cross-reference: {cycle_str}" -> elif has_terms: -> msg = f"Circular term reference: {cycle_str}" -> else: -> msg = f"Circular message reference: {cycle_str}" - -> warnings.append( -> ValidationWarning( -> code=DiagnosticCode.VALIDATION_CIRCULAR_REFERENCE.name, -> message=msg, -> context=cycle_str, -> severity=WarningSeverity.CRITICAL, -> ) -> ) - -> return warnings - - -> def _build_dependency_graph( -> messages_dict: dict[str, Message], -> terms_dict: dict[str, Term], -> *, -> known_messages: frozenset[str] | None = None, -> known_terms: frozenset[str] | None = None, -> known_msg_deps: dict[str, set[str]] | None = None, -> known_term_deps: dict[str, set[str]] | None = None, -> ) -> dict[str, set[str]]: -> """Build unified dependency graph for messages and terms. - -> Creates a graph with type-prefixed nodes (msg:name, term:name) for -> both cycle detection and chain depth analysis. - -> Args: -> messages_dict: Map of message IDs to Message nodes from current resource -> terms_dict: Map of term IDs to Term nodes from current resource -> known_messages: Optional set of message IDs already in bundle -> known_terms: Optional set of term IDs already in bundle -> known_msg_deps: Optional dependency map for known messages. Maps message ID -> to set of prefixed dependencies (e.g., {"msg:foo", "term:bar"}). -> known_term_deps: Optional dependency map for known terms. - -> Returns: -> Graph as adjacency list (node -> set of dependencies) -> """ -> graph: dict[str, set[str]] = {} - - # Helper to resolve a reference string to a graph node key. - # References may be attribute-qualified ("msg.attr") or bare ("msg"). -> def _resolve_msg_ref(ref: str) -> str | None: -> """Resolve a message reference to its graph node key, or None if unknown.""" -> if "." in ref: - # Attribute-qualified reference (e.g., "msg.tooltip") -> base, attr = ref.split(".", 1) -> if base in messages_dict or (known_messages and base in known_messages): -> return f"msg:{base}.{attr}" - # Bare message reference -> elif ref in messages_dict or (known_messages and ref in known_messages): -> return f"msg:{ref}" -> return None - -> def _resolve_term_ref(ref: str) -> str | None: -> """Resolve a term reference to its graph node key, or None if unknown.""" -> if "." in ref: - # Attribute-qualified reference (e.g., "-term.attr") -> base, attr = ref.split(".", 1) -> if base in terms_dict or (known_terms and base in known_terms): -> return f"term:{base}.{attr}" -> elif ref in terms_dict or (known_terms and ref in known_terms): -> return f"term:{ref}" -> return None - - # Add message nodes with attribute-granular dependencies. - # Each attribute gets its own node in the graph to avoid false positive - # cycles when msg.a references msg.b (non-cyclic intra-message reference). -> for msg_name, message in messages_dict.items(): -> refs_by_attr = extract_references_by_attribute(message) - -> for attr_name, (msg_refs, term_refs) in refs_by_attr.items(): -> node_key = f"msg:{msg_name}" if attr_name is None else f"msg:{msg_name}.{attr_name}" - -> deps: set[str] = set() -> for ref in msg_refs: -> resolved = _resolve_msg_ref(ref) -> if resolved is not None: -> deps.add(resolved) -> for ref in term_refs: -> resolved = _resolve_term_ref(ref) -> if resolved is not None: -> deps.add(resolved) -> graph[node_key] = deps - - # Add term nodes with all their dependencies (both message and term refs) -> for term_name, term in terms_dict.items(): -> refs_by_attr = extract_references_by_attribute(term) - -> for attr_name, (msg_refs, term_refs) in refs_by_attr.items(): -> node_key = f"term:{term_name}" if attr_name is None else f"term:{term_name}.{attr_name}" - -> term_deps: set[str] = set() -> for ref in msg_refs: -> resolved = _resolve_msg_ref(ref) -> if resolved is not None: -> term_deps.add(resolved) -> for ref in term_refs: -> resolved = _resolve_term_ref(ref) -> if resolved is not None: -> term_deps.add(resolved) -> graph[node_key] = term_deps - - # Add known entries as nodes WITH their actual dependencies if provided. - # This enables detection of cross-resource cycles involving dependencies OF known entries. -> if known_messages: -> for known_msg in known_messages: -> node_key = f"msg:{known_msg}" -> if node_key not in graph: - # Use provided dependencies if available, otherwise empty set -> if known_msg_deps and known_msg in known_msg_deps: -> graph[node_key] = known_msg_deps[known_msg].copy() -> else: -> graph[node_key] = set() - -> if known_terms: -> for known_term in known_terms: -> node_key = f"term:{known_term}" -> if node_key not in graph: - # Use provided dependencies if available, otherwise empty set -> if known_term_deps and known_term in known_term_deps: -> graph[node_key] = known_term_deps[known_term].copy() -> else: -> graph[node_key] = set() - -> return graph - - -> def _compute_longest_paths( -> graph: dict[str, set[str]], -> ) -> dict[str, tuple[int, list[str]]]: -> """Compute longest path from each node using memoized iterative DFS. - -> Args: -> graph: Dependency graph as adjacency list - -> Returns: -> Map from node to (path_length, path_nodes) -> """ -> longest_path: dict[str, tuple[int, list[str]]] = {} -> in_stack: set[str] = set() - -> for start in graph: -> if start in longest_path: -> continue - - # Iterative DFS with two-phase processing -> stack: list[tuple[str, int, list[str]]] = [(start, 0, list(graph.get(start, set())))] - -> while stack: -> node, phase, children = stack.pop() - -> if phase == 0: -> if node in longest_path or node in in_stack: -> continue - -> in_stack.add(node) -> stack.append((node, 1, children)) - -> for child in children: -> if child not in longest_path and child not in in_stack: -> stack.append((child, 0, list(graph.get(child, set())))) -> else: -> in_stack.discard(node) -> best_depth, best_path = 0, [] -> for child in children: -> if child in longest_path: -> child_depth, child_path = longest_path[child] -> if child_depth + 1 > best_depth: -> best_depth = child_depth + 1 -> best_path = child_path -> longest_path[node] = (best_depth, [node, *best_path]) - -> return longest_path - - -> def _detect_long_chains( -> graph: dict[str, set[str]], -> max_depth: int = MAX_DEPTH, -> ) -> list[ValidationWarning]: -> """Detect ALL reference chains that exceed maximum depth. - -> Computes longest path from each node and reports ALL chains exceeding -> max_depth. This allows users to see and fix all depth violations in a -> single validation pass rather than iteratively discovering them. - -> Args: -> graph: Unified dependency graph with type-prefixed nodes (msg:name, term:name) -> max_depth: Maximum allowed chain depth (default: MAX_DEPTH) - -> Returns: -> List of warnings for ALL chains exceeding max_depth, sorted by depth -> (deepest first) for prioritized remediation -> """ -> if not graph: -> return [] - -> longest_paths = _compute_longest_paths(graph) - - # Collect ALL chains exceeding max_depth -> exceeding_chains: list[tuple[int, list[str], str]] = [] -> for node, (depth, path) in longest_paths.items(): - # Only report chains starting from their origin (first node in path) - # to avoid duplicate warnings for the same chain from different nodes -> if depth > max_depth and path and path[0] == node: -> exceeding_chains.append((depth, path, node)) - -> if not exceeding_chains: -> return [] - - # Sort by depth descending (deepest chains first) for prioritized remediation -> exceeding_chains.sort(key=lambda x: x[0], reverse=True) - -> warnings: list[ValidationWarning] = [] -> for chain_depth, chain_path, _origin in exceeding_chains: - # Format path for human-readable output -> formatted = [ -> node[4:] if node.startswith("msg:") else f"-{node[5:]}" -> for node in chain_path[:10] -> ] -> chain_str = " -> ".join(formatted) -> if len(chain_path) > 10: -> chain_str += f" -> ... ({len(chain_path)} total)" - -> warnings.append( -> ValidationWarning( -> code=DiagnosticCode.VALIDATION_CHAIN_DEPTH_EXCEEDED.name, -> message=( -> f"Reference chain depth ({chain_depth}) exceeds maximum ({max_depth}); " -> f"will fail at runtime with MAX_DEPTH_EXCEEDED" -> ), -> context=chain_str, -> severity=WarningSeverity.WARNING, -> ) -> ) - -> return warnings - - -> def validate_resource( -> source: str, -> *, -> parser: FluentParserV1 | None = None, -> known_messages: frozenset[str] | None = None, -> known_terms: frozenset[str] | None = None, -> known_msg_deps: dict[str, set[str]] | None = None, -> known_term_deps: dict[str, set[str]] | None = None, -> ) -> ValidationResult: -> """Validate FTL resource without adding to a bundle. - -> Standalone validation function for CI/CD pipelines and tooling. -> Performs syntax validation (errors) and semantic validation (warnings). - -> Validation passes: -> 1. Syntax errors: Parse failures (Junk entries) -> 2. Structural: Duplicate IDs, messages without values -> 3. References: Undefined message/term references -> 4. Cycles: Circular dependency detection -> 5. Chain depth: Reference chains exceeding MAX_DEPTH -> 6. Semantic: Fluent spec compliance (E0001-E0013) - -> Args: -> source: FTL file content -> parser: Optional parser instance (creates default if not provided) -> known_messages: Optional set of message IDs already in bundle (for -> cross-resource reference validation) -> known_terms: Optional set of term IDs already in bundle (for -> cross-resource reference validation) -> known_msg_deps: Optional dependency graph for known messages. Maps message -> ID to set of dependencies (prefixed: "msg:name", "term:name"). Enables -> detection of cross-resource cycles involving dependencies OF known entries. -> known_term_deps: Optional dependency graph for known terms. Maps term ID -> to set of dependencies (prefixed: "msg:name", "term:name"). - -> Returns: -> ValidationResult with parse errors and semantic warnings - -> Raises: -> TypeError: If source is not a string (e.g., bytes were passed). - -> Example: -> >>> from ftllexengine.validation import validate_resource -> >>> result = validate_resource(ftl_source) -> >>> if not result.is_valid: -> ... for error in result.errors: -> ... print(f"Error [{error.code}]: {error.message}") -> >>> for warning in result.warnings: -> ... print(f"Warning [{warning.code}]: {warning.message}") - -> Thread Safety: -> Thread-safe. Creates isolated parser if not provided. -> """ - # Type validation at API boundary - type hints are not enforced at runtime. - # Defensive check: users may pass bytes despite str annotation. -> if not isinstance(source, str): -! msg = ( # type: ignore[unreachable] -! f"source must be str, not {type(source).__name__}. " -! "Decode bytes to str (e.g., source.decode('utf-8')) before calling validate_resource()." -! ) -! raise TypeError(msg) - -> if parser is None: - # Local import to avoid import-time overhead for callers not providing parser -> from ftllexengine.syntax.parser import ( # noqa: PLC0415 -> FluentParserV1 as ParserClass, -> ) - -> parser = ParserClass() - - # Normalize line endings to match parser behavior (CRLF/CR -> LF). - # The parser normalizes internally before creating AST spans, so we must - # use the same normalized source for LineOffsetCache to ensure position - # lookups match AST span positions correctly. -> normalized_source = re.sub(r"\r\n?", "\n", source) - -> resource = parser.parse(source) - - # Build line offset cache once for all validation passes (O(n)) - # Uses normalized_source to match AST span positions -> line_cache = LineOffsetCache(normalized_source) - - # Pass 1: Extract syntax errors from Junk entries -> errors = _extract_syntax_errors(resource, line_cache) - - # Pass 2: Collect entries and check structural issues -> messages_dict, terms_dict, structure_warnings = _collect_entries( -> resource, -> line_cache, -> known_messages=known_messages, -> known_terms=known_terms, -> ) - - # Pass 3: Check undefined references (with bundle context if provided) -> ref_warnings = _check_undefined_references( -> messages_dict, -> terms_dict, -> line_cache, -> known_messages=known_messages, -> known_terms=known_terms, -> ) - - # Build unified dependency graph once for both cycle and chain detection - # Avoids redundant graph construction (important for large resources) -> dependency_graph = _build_dependency_graph( -> messages_dict, -> terms_dict, -> known_messages=known_messages, -> known_terms=known_terms, -> known_msg_deps=known_msg_deps, -> known_term_deps=known_term_deps, -> ) - - # Pass 4: Detect circular dependencies -> cycle_warnings = _detect_circular_references(dependency_graph) - - # Pass 5: Detect long reference chains (would fail at runtime) -> chain_warnings = _detect_long_chains(dependency_graph) - - # Pass 6: Fluent spec compliance (E0001-E0013) -> semantic_validator = SemanticValidator() -> semantic_result = semantic_validator.validate(resource) -> semantic_annotations = semantic_result.annotations - - # Combine all warnings -> all_warnings = structure_warnings + ref_warnings + cycle_warnings + chain_warnings - -> logger.debug( -> "Validated resource: %d errors, %d warnings, %d annotations", -> len(errors), -> len(all_warnings), -> len(semantic_annotations), -> ) - -> return ValidationResult( -> errors=tuple(errors), -> warnings=tuple(all_warnings), -> annotations=semantic_annotations, -> ) diff --git a/src/ftllexengine/validation/resource_graph.py b/src/ftllexengine/validation/resource_graph.py new file mode 100644 index 00000000..459c18b8 --- /dev/null +++ b/src/ftllexengine/validation/resource_graph.py @@ -0,0 +1,262 @@ +"""Dependency-graph validation helpers for resource validation. + +Split from ``validation.resource`` so the main validation entry point stays +focused on orchestration while graph construction and traversal remain in one +cohesive unit. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ftllexengine.constants import MAX_DEPTH +from ftllexengine.core.reference_graph import detect_cycles, make_cycle_key +from ftllexengine.diagnostics import ValidationWarning, WarningSeverity +from ftllexengine.diagnostics.codes import DiagnosticCode +from ftllexengine.syntax.reference_extraction import extract_references_by_attribute + +if TYPE_CHECKING: + from collections.abc import Callable, Mapping + + from ftllexengine.syntax import Message, Term + +__all__ = [ + "_compute_longest_paths", + "build_dependency_graph", + "detect_circular_references", + "detect_long_chains", +] + + +def detect_circular_references( + graph: dict[str, set[str]], + *, + detect_cycles_fn: Callable[[dict[str, set[str]]], list[list[str]]] = detect_cycles, + make_cycle_key_fn: Callable[[list[str]], str] = make_cycle_key, +) -> list[ValidationWarning]: + """Detect circular dependencies in a unified reference graph.""" + warnings: list[ValidationWarning] = [] + seen_cycle_keys: set[str] = set() + + for cycle in detect_cycles_fn(graph): + cycle_key = make_cycle_key_fn(cycle) + if cycle_key in seen_cycle_keys: + continue + seen_cycle_keys.add(cycle_key) + + formatted_parts: list[str] = [] + for node in cycle: + if node.startswith("msg:"): + formatted_parts.append(node[4:]) + elif node.startswith("term:"): + formatted_parts.append(f"-{node[5:]}") + + cycle_str = " -> ".join(formatted_parts) + has_messages = any(node.startswith("msg:") for node in cycle) + has_terms = any(node.startswith("term:") for node in cycle) + + if has_messages and has_terms: + message = f"Circular cross-reference: {cycle_str}" + elif has_terms: + message = f"Circular term reference: {cycle_str}" + else: + message = f"Circular message reference: {cycle_str}" + + warnings.append( + ValidationWarning( + code=DiagnosticCode.VALIDATION_CIRCULAR_REFERENCE, + message=message, + context=cycle_str, + severity=WarningSeverity.CRITICAL, + ) + ) + + return warnings + + +def _resolve_reference( + ref: str, + prefix: str, + local_entries: dict[str, Message] | dict[str, Term], + known_ids: frozenset[str] | None, +) -> str | None: + """Resolve a reference string to a graph node key.""" + if "." in ref: + base, attr = ref.split(".", 1) + if base in local_entries or (known_ids and base in known_ids): + return f"{prefix}:{base}.{attr}" + elif ref in local_entries or (known_ids and ref in known_ids): + return f"{prefix}:{ref}" + return None + + +def _add_entry_nodes( + entries: dict[str, Message] | dict[str, Term], + prefix: str, + messages_dict: dict[str, Message], + terms_dict: dict[str, Term], + known_messages: frozenset[str] | None, + known_terms: frozenset[str] | None, + graph: dict[str, set[str]], +) -> None: + """Add nodes and edges for a set of entries to the dependency graph.""" + for name, entry in entries.items(): + refs_by_attr = extract_references_by_attribute(entry) + + for attr_name, (msg_refs, term_refs) in refs_by_attr.items(): + node_key = f"{prefix}:{name}" if attr_name is None else f"{prefix}:{name}.{attr_name}" + deps: set[str] = set() + + for ref in msg_refs: + resolved = _resolve_reference(ref, "msg", messages_dict, known_messages) + if resolved is not None: + deps.add(resolved) + + for ref in term_refs: + resolved = _resolve_reference(ref, "term", terms_dict, known_terms) + if resolved is not None: + deps.add(resolved) + + graph[node_key] = deps + + +def _add_known_entries( + known_ids: frozenset[str] | None, + prefix: str, + known_deps: Mapping[str, frozenset[str]] | None, + graph: dict[str, set[str]], +) -> None: + """Add pre-existing bundle entries to the graph.""" + if not known_ids: + return + + for known_id in known_ids: + node_key = f"{prefix}:{known_id}" + if node_key not in graph: + if known_deps and known_id in known_deps: + graph[node_key] = set(known_deps[known_id]) + else: + graph[node_key] = set() + + +def build_dependency_graph( + messages_dict: dict[str, Message], + terms_dict: dict[str, Term], + *, + known_messages: frozenset[str] | None = None, + known_terms: frozenset[str] | None = None, + known_msg_deps: Mapping[str, frozenset[str]] | None = None, + known_term_deps: Mapping[str, frozenset[str]] | None = None, +) -> dict[str, set[str]]: + """Build a unified dependency graph for messages and terms.""" + graph: dict[str, set[str]] = {} + + _add_entry_nodes( + messages_dict, + "msg", + messages_dict, + terms_dict, + known_messages, + known_terms, + graph, + ) + _add_entry_nodes( + terms_dict, + "term", + messages_dict, + terms_dict, + known_messages, + known_terms, + graph, + ) + + _add_known_entries(known_messages, "msg", known_msg_deps, graph) + _add_known_entries(known_terms, "term", known_term_deps, graph) + + return graph + + +def _compute_longest_paths( + graph: dict[str, set[str]], +) -> dict[str, tuple[int, list[str]]]: + """Compute the longest path from each node using memoized iterative DFS.""" + longest_path: dict[str, tuple[int, list[str]]] = {} + in_stack: set[str] = set() + + for start in graph: + if start in longest_path: + continue + + stack: list[tuple[str, int, list[str]]] = [(start, 0, list(graph.get(start, set())))] + + while stack: + node, phase, children = stack.pop() + + if phase == 0: + if node in longest_path: + continue + + in_stack.add(node) + stack.append((node, 1, children)) + stack.extend( + (child, 0, list(graph.get(child, set()))) + for child in children + if child not in longest_path and child not in in_stack + ) + else: + in_stack.discard(node) + best_depth, best_path = 0, [] + for child in children: + if child in longest_path: + child_depth, child_path = longest_path[child] + if child_depth + 1 > best_depth: + best_depth = child_depth + 1 + best_path = child_path + longest_path[node] = (best_depth, [node, *best_path]) + + return longest_path + + +def detect_long_chains( + graph: dict[str, set[str]], + max_depth: int = MAX_DEPTH, +) -> list[ValidationWarning]: + """Detect reference chains that exceed the maximum runtime depth.""" + if not graph: + return [] + + longest_paths = _compute_longest_paths(graph) + exceeding_chains: list[tuple[int, list[str], str]] = [] + + for node, (depth, path) in longest_paths.items(): + if depth > max_depth and path and path[0] == node: + exceeding_chains.append((depth, path, node)) + + if not exceeding_chains: + return [] + + exceeding_chains.sort(key=lambda item: item[0], reverse=True) + + warnings: list[ValidationWarning] = [] + for chain_depth, chain_path, _origin in exceeding_chains: + formatted = [ + node[4:] if node.startswith("msg:") else f"-{node[5:]}" + for node in chain_path[:10] + ] + chain_str = " -> ".join(formatted) + if len(chain_path) > 10: + chain_str += f" -> ... ({len(chain_path)} total)" + + warnings.append( + ValidationWarning( + code=DiagnosticCode.VALIDATION_CHAIN_DEPTH_EXCEEDED, + message=( + f"Reference chain depth ({chain_depth}) exceeds maximum ({max_depth}); " + f"will fail at runtime with MAX_DEPTH_EXCEEDED" + ), + context=chain_str, + severity=WarningSeverity.WARNING, + ) + ) + + return warnings diff --git a/tests/helpers/fluentjs_fixtures.py b/tests/helpers/fluentjs_fixtures.py new file mode 100644 index 00000000..241867e3 --- /dev/null +++ b/tests/helpers/fluentjs_fixtures.py @@ -0,0 +1,145 @@ +"""Vendored Fluent.js structure fixtures for offline spec-conformance tests. + +The FTL payloads below are copied from the Fluent.js reference implementation's +``fluent-syntax/test/fixtures_structure`` directory. The expected structural +counts come from the corresponding upstream JSON AST fixtures, so the tests +remain deterministic without depending on live network fetches. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +__all__ = ["SOURCE_BASE_URL", "STRUCTURE_FIXTURES", "StructureFixture"] + +SOURCE_BASE_URL = ( + "https://github.com/projectfluent/fluent.js/tree/main/" + "fluent-syntax/test/fixtures_structure" +) + + +@dataclass(frozen=True, slots=True) +class StructureFixture: + """Vendored Fluent.js structure fixture plus derived reference counts.""" + + description: str + ftl: str + expected_messages: int + expected_terms: int + + +STRUCTURE_FIXTURES: dict[str, StructureFixture] = { + "simple_message": StructureFixture( + description="Basic message", + ftl="foo = Foo\n", + expected_messages=1, + expected_terms=0, + ), + "multiline_pattern": StructureFixture( + description="Multiline pattern", + ftl=( + "key01 = Value\n" + " Continued here.\n\n" + "key02 =\n" + " Value\n" + " Continued here.\n\n" + '# ERROR "Continued" looks like a new message.\n' + '# key03 parses fine with just "Value".\n' + "key03 =\n" + " Value\n" + "Continued here\n" + " and here.\n\n" + '# ERROR "Continued" and "and" look like new messages\n' + '# key04 parses fine with just "Value".\n' + "key04 =\n" + " Value\n" + "Continued here\n" + "and even here.\n" + ), + expected_messages=4, + expected_terms=0, + ), + "multiline_with_placeables": StructureFixture( + description="Pattern with placeables", + ftl=( + "key =\n" + " Foo { bar }\n" + " Baz\n" + ), + expected_messages=1, + expected_terms=0, + ), + "select_expressions": StructureFixture( + description="Select expressions", + ftl=( + "# ERROR No blanks are allowed between * and [.\n" + "err01 = { $sel ->\n" + " * [key] Value\n" + "}\n\n" + "# ERROR Missing default variant.\n" + "err02 = { $sel ->\n" + " [key] Value\n" + "}\n" + ), + expected_messages=0, + expected_terms=0, + ), + "blank_lines": StructureFixture( + description="Blank lines handling", + ftl=( + "### NOTE: Disable final newline insertion and trimming when editing this file.\n\n" + "key01 = Value 01\n\n" + "key02 = Value 02\n\n\n" + "key03 =\n\n" + " Value 03\n\n" + " Continued\n\n" + '# There are four spaces on the line between "Value 04" and "Continued".\n' + "key04 =\n\n" + " Value 04\n" + " \n" + " Continued\n\n" + '# There are four spaces on the line following "Value 05".\n' + "key05 =\n" + " Value 05\n" + " \n" + '# There are four spaces on the line following "Value 06".\n' + "key06 = Value 06\n" + " " + ), + expected_messages=6, + expected_terms=0, + ), + "term": StructureFixture( + description="Simple term", + ftl=( + "-term =\n" + " { $case ->\n" + " *[uppercase] Term\n" + " [lowercase] term\n" + " }\n" + " .attr = a\n\n" + "key01 = {-term}\n" + "key02 = {-term()}\n" + 'key03 = {-term(case: "uppercase")}\n\n\n' + "key04 =\n" + " { -term.attr ->\n" + " [a] { -term } A\n" + " [b] { -term() } B\n" + " *[x] X\n" + " }\n\n" + "-err1 =\n" + "-err2 =\n" + " .attr = Attribute\n" + "--err3 = Error\n" + "err4 = { --err4 }\n" + ), + expected_messages=4, + expected_terms=1, + ), + "empty_resource": StructureFixture( + description="Empty FTL file", + ftl="", + expected_messages=0, + expected_terms=0, + ), +} diff --git a/tests/test_analysis_graph.py b/tests/test_analysis_graph.py index 50ec19d0..70fdfb8c 100644 --- a/tests/test_analysis_graph.py +++ b/tests/test_analysis_graph.py @@ -16,7 +16,9 @@ from hypothesis import strategies as st import ftllexengine.analysis.graph as _graph_mod +import ftllexengine.core.reference_graph as _core_graph_mod from ftllexengine.analysis.graph import ( + _canonicalize_cycle, detect_cycles, entry_dependency_set, make_cycle_key, @@ -134,6 +136,14 @@ def test_property_prefix_preserves_identity( class TestMakeCycleKey: """Tests for make_cycle_key canonical display format.""" + def test_private_canonicalize_cycle_wrapper(self) -> None: + """Compatibility wrapper delegates to the canonical tuple helper.""" + assert _canonicalize_cycle(["term:b", "term:a", "term:b"]) == ( + "term:a", + "term:b", + "term:a", + ) + def test_empty_cycle_yields_empty_string(self) -> None: """Empty input produces empty key.""" assert make_cycle_key([]) == "" @@ -559,3 +569,17 @@ def test_max_graph_dfs_stack_push_guard_fires( # Function must not raise and must return a valid list assert isinstance(cycles, list) assert all(isinstance(c, list) for c in cycles) + + def test_monkeypatched_limits_do_not_leak_into_core_module( + self, monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Compatibility wrapper restores core graph limits after monkeypatch runs.""" + original_max_cycles = _core_graph_mod.MAX_DETECTED_CYCLES + original_max_stack = _core_graph_mod.MAX_GRAPH_DFS_STACK + monkeypatch.setattr(_graph_mod, "MAX_DETECTED_CYCLES", 1) + monkeypatch.setattr(_graph_mod, "MAX_GRAPH_DFS_STACK", 2) + + detect_cycles({"a": {"a"}}) + + assert original_max_cycles == _core_graph_mod.MAX_DETECTED_CYCLES + assert original_max_stack == _core_graph_mod.MAX_GRAPH_DFS_STACK diff --git a/tests/test_architecture_contract.py b/tests/test_architecture_contract.py new file mode 100644 index 00000000..702309d8 --- /dev/null +++ b/tests/test_architecture_contract.py @@ -0,0 +1,254 @@ +"""Architecture contract tests for import direction and workflow hygiene.""" + +from __future__ import annotations + +import ast +import re +import shutil +import subprocess +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parent.parent +SRC_ROOT = REPO_ROOT / "src" / "ftllexengine" + +LAYER_ORDER = { + "core": 0, + "diagnostics": 1, + "syntax": 2, + "validation": 3, + "analysis": 4, + "introspection": 4, + "parsing": 5, + "runtime": 6, + "localization": 7, +} + +PATH_HACK_PATTERNS = ( + re.compile(r"\bsys\.path\.(?:insert|append)\("), + re.compile(r"\bPYTHONPATH=src\b"), + re.compile(r'PYTHONPATH"\]\s*='), + re.compile(r"\bexport\s+PYTHONPATH=.*\bsrc\b"), +) +LIVE_NETWORK_TEST_PATTERNS = ( + re.compile(r"\burllib\.request\b"), + re.compile(r"\burlopen\("), + re.compile(r"raw\.githubusercontent\.com"), +) + +VERSION_PROVENANCE_PATTERN = re.compile(r"\b(?:Added|Pre|Post|Prior to)\s+v\d+\.\d+\.\d+\b|v\d+\.\d+\.\d+\+") + +CODE_MODULE_LINE_BUDGETS = { + "src/ftllexengine/runtime/bundle.py": 900, + "src/ftllexengine/runtime/cache.py": 700, + "src/ftllexengine/runtime/locale_context.py": 500, + "src/ftllexengine/runtime/locale_formatting.py": 400, + "src/ftllexengine/runtime/resolver.py": 600, + "src/ftllexengine/introspection/iso.py": 700, + "src/ftllexengine/localization/orchestrator.py": 400, + "src/ftllexengine/parsing/currency.py": 650, + "src/ftllexengine/parsing/dates.py": 350, + "src/ftllexengine/syntax/serializer.py": 700, +} + + +def _module_name(path: Path) -> str: + relative = path.relative_to(REPO_ROOT / "src").with_suffix("") + return ".".join(relative.parts) + + +def _layer_name(module_name: str) -> str | None: + parts = module_name.split(".") + if len(parts) < 2 or parts[0] != "ftllexengine": + return None + return parts[1] if parts[1] in LAYER_ORDER else None + + +def _resolve_import(importer: str, node: ast.ImportFrom) -> str | None: + package_parts = importer.split(".")[:-1] + if node.level: + package_parts = package_parts[: len(package_parts) - node.level + 1] + if node.module: + return ".".join([*package_parts, node.module]) + return ".".join(package_parts) if package_parts else None + + +def _git_visible_repo_files() -> list[Path]: + """List tracked and unignored files that currently exist in the worktree.""" + git = shutil.which("git") + assert git is not None + result = subprocess.run( + [git, "ls-files", "--cached", "--others", "--exclude-standard", "-z"], + check=True, + capture_output=True, + cwd=REPO_ROOT, + ) + files: list[Path] = [] + for raw_path in result.stdout.split(b"\0"): + if not raw_path: + continue + path = REPO_ROOT / raw_path.decode("utf-8") + if path.is_file(): + files.append(path) + return files + + +def test_internal_modules_do_not_reverse_layer_dependencies() -> None: + """Non-facade modules should only import within or below their own layer.""" + violations: list[str] = [] + + for path in sorted(SRC_ROOT.rglob("*.py")): + if path.name == "__init__.py": + continue + + importer = _module_name(path) + importer_layer = _layer_name(importer) + if importer_layer is None: + continue + + tree = ast.parse(path.read_text(encoding="utf-8"), filename=str(path)) + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + if not alias.name.startswith("ftllexengine."): + continue + imported_layer = _layer_name(alias.name) + if imported_layer and LAYER_ORDER[imported_layer] > LAYER_ORDER[importer_layer]: + violations.append( + f"{importer} ({importer_layer}) imports {alias.name} ({imported_layer})" + ) + elif isinstance(node, ast.ImportFrom): + imported = _resolve_import(importer, node) + if imported is None or not imported.startswith("ftllexengine."): + continue + imported_layer = _layer_name(imported) + if imported_layer and LAYER_ORDER[imported_layer] > LAYER_ORDER[importer_layer]: + violations.append( + f"{importer} ({importer_layer}) imports {imported} ({imported_layer})" + ) + + assert violations == [] + + +def test_repo_avoids_legacy_import_path_hacks() -> None: + """Code and docs should not rely on sys.path or PYTHONPATH src injection.""" + offenders: list[str] = [] + scan_roots = ( + REPO_ROOT / "src", + REPO_ROOT / "tests", + REPO_ROOT / "scripts", + REPO_ROOT / "docs", + REPO_ROOT / "examples", + REPO_ROOT / "README.md", + ) + + paths: list[Path] = [] + for root in scan_roots: + if root.is_file(): + paths.append(root) + elif root.exists(): + paths.extend(p for p in root.rglob("*") if p.suffix in {".py", ".sh", ".md"}) + + for path in sorted(paths): + if path == Path(__file__).resolve(): + continue + text = path.read_text(encoding="utf-8") + for pattern in PATH_HACK_PATTERNS: + if pattern.search(text): + offenders.append(f"{path.relative_to(REPO_ROOT)}: {pattern.pattern}") + + assert offenders == [] + + +def test_tests_do_not_depend_on_live_network_fixture_fetches() -> None: + """Test fixtures should be vendored instead of fetched over the live network.""" + offenders: list[str] = [] + + for path in sorted((REPO_ROOT / "tests").rglob("*.py")): + if path == Path(__file__).resolve(): + continue + text = path.read_text(encoding="utf-8") + for pattern in LIVE_NETWORK_TEST_PATTERNS: + if pattern.search(text): + offenders.append(f"{path.relative_to(REPO_ROOT)}: {pattern.pattern}") + + assert offenders == [] + + +def test_docs_avoid_deep_localization_types_imports() -> None: + """Public docs should reference stable facades, not helper submodules.""" + offenders: list[str] = [] + doc_paths = [REPO_ROOT / "README.md", *sorted((REPO_ROOT / "docs").glob("*.md"))] + + for path in doc_paths: + text = path.read_text(encoding="utf-8") + if "ftllexengine.localization.types" in text: + offenders.append(str(path.relative_to(REPO_ROOT))) + + assert offenders == [] + + +def test_parser_grammar_modules_stay_split() -> None: + """Parser grammar implementation should remain partitioned instead of collapsing back.""" + parser_root = SRC_ROOT / "syntax" / "parser" + expected_modules = ( + parser_root / "context.py", + parser_root / "patterns.py", + parser_root / "expressions.py", + parser_root / "entries.py", + ) + + missing = [str(path.relative_to(REPO_ROOT)) for path in expected_modules if not path.exists()] + assert missing == [] + + rules_path = parser_root / "rules.py" + assert rules_path.exists() + assert len(rules_path.read_text(encoding="utf-8").splitlines()) <= 80 + + +def test_repo_has_no_generated_cover_artifacts_in_tree() -> None: + """Generated coverage/cache artifacts should not live in the repository tree.""" + offenders = [ + str(path.relative_to(REPO_ROOT)) + for path in _git_visible_repo_files() + if re.search(r"(^|/)__pycache__/|\.pyc$|,cover$|\.cover$", str(path)) + ] + assert offenders == [] + + +def test_repo_avoids_version_provenance_annotations_outside_changelog() -> None: + """Historical version provenance belongs in CHANGELOG.md, not code or examples.""" + offenders: list[str] = [] + for root in (REPO_ROOT / "src", REPO_ROOT / "tests", REPO_ROOT / "examples"): + for path in sorted(root.rglob("*")): + if path.suffix not in {".py", ".md", ".ini", ".pyi"}: + continue + text = path.read_text(encoding="utf-8") + if VERSION_PROVENANCE_PATTERN.search(text): + offenders.append(str(path.relative_to(REPO_ROOT))) + + assert offenders == [] + + +def test_public_examples_avoid_thread_local_storage_patterns() -> None: + """Examples should model explicit ownership instead of threading.local().""" + offenders: list[str] = [] + for path in ( + REPO_ROOT / "examples" / "thread_safety.py", + REPO_ROOT / "examples" / "README_TYPE_CHECKING.md", + ): + if "threading.local" in path.read_text(encoding="utf-8"): + offenders.append(str(path.relative_to(REPO_ROOT))) + + assert offenders == [] + + +def test_core_runtime_modules_stay_under_line_budgets() -> None: + """Large internal modules should remain split by responsibility.""" + offenders: list[str] = [] + for relative_path, max_lines in CODE_MODULE_LINE_BUDGETS.items(): + path = REPO_ROOT / relative_path + line_count = len(path.read_text(encoding="utf-8").splitlines()) + if line_count > max_lines: + offenders.append(f"{relative_path}: {line_count} > {max_lines}") + + assert offenders == [] diff --git a/tests/test_coverage_policy.py b/tests/test_coverage_policy.py new file mode 100644 index 00000000..b08e48ff --- /dev/null +++ b/tests/test_coverage_policy.py @@ -0,0 +1,29 @@ +"""Tests enforcing the repository coverage policy configuration.""" + +from __future__ import annotations + +import re +import tomllib +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parent.parent + + +def test_pyproject_enforces_full_line_and_branch_coverage() -> None: + """Coverage config should require 100% and track branches.""" + pyproject = tomllib.loads((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8")) + + coverage_run = pyproject["tool"]["coverage"]["run"] + coverage_report = pyproject["tool"]["coverage"]["report"] + + assert coverage_run["branch"] is True + assert coverage_report["fail_under"] == 100.0 + + +def test_scripts_test_sh_uses_same_coverage_threshold() -> None: + """The main test script should match the pyproject coverage policy.""" + content = (REPO_ROOT / "scripts" / "test.sh").read_text(encoding="utf-8") + + match = re.search(r"^DEFAULT_COV_LIMIT=(\d+)$", content, re.MULTILINE) + assert match is not None + assert int(match.group(1)) == 100 diff --git a/tests/test_documentation_tooling.py b/tests/test_documentation_tooling.py new file mode 100644 index 00000000..84342e18 --- /dev/null +++ b/tests/test_documentation_tooling.py @@ -0,0 +1,417 @@ +"""Regression tests for documentation tooling and source docstring policy.""" + +from __future__ import annotations + +import doctest +import importlib +import importlib.util +import inspect +import pkgutil +import re +import subprocess +import sys +import tomllib +from pathlib import Path +from tempfile import TemporaryDirectory +from types import ModuleType + +REPO_ROOT = Path(__file__).resolve().parent.parent +SRC_ROOT = REPO_ROOT / "src" +DOCUMENTED_MODULES = ( + "ftllexengine", + "ftllexengine.runtime", + "ftllexengine.localization", + "ftllexengine.syntax", + "ftllexengine.parsing", + "ftllexengine.diagnostics", + "ftllexengine.introspection", + "ftllexengine.validation", +) +DOCUMENTED_REPO_SCRIPTS = ( + "check.sh", + "scripts/validate_docs.py", + "scripts/validate_version.py", + "scripts/run_examples.py", + "scripts/lint.sh", + "scripts/test.sh", + "scripts/fuzz_hypofuzz.sh", + "scripts/fuzz_atheris.sh", +) +ROUTE_NAME_OVERRIDES: dict[str, dict[str, str]] = { + "ftllexengine.syntax": { + "ParseResult": "ftllexengine.syntax.ParseResult", + }, +} +UNDOCUMENTED_REFERENCE_ALIASES = ("InlineExpression", "VariantKey") +REFERENCE_DOC_LINE_BUDGET = 450 + + +def _load_script_module(name: str, path: Path) -> ModuleType: + """Load a repository script as an importable module for testing.""" + spec = importlib.util.spec_from_file_location(name, path) + assert spec is not None + assert spec.loader is not None + module = importlib.util.module_from_spec(spec) + + sys.modules[name] = module + spec.loader.exec_module(module) + return module + + +def _index_routes() -> dict[str, tuple[Path, str]]: + """Parse the API routing table from docs/DOC_00_Index.md.""" + index_path = REPO_ROOT / "docs" / "DOC_00_Index.md" + text = index_path.read_text(encoding="utf-8") + routes: dict[str, tuple[Path, str]] = {} + + row_pattern = re.compile( + r"^\| `([^`]+)` \| \[([^\]]+)\]\(([^)]+)\) \| `([^`]+)` \|$", + re.MULTILINE, + ) + for symbol, _label, rel_target, section in row_pattern.findall(text): + routes[symbol] = ((index_path.parent / rel_target).resolve(), section) + return routes + + +def _symbol_headings(md_path: Path) -> set[str]: + """Return the set of second-level symbol headings in a markdown file.""" + text = md_path.read_text(encoding="utf-8") + return set(re.findall(r"^## `([^`]+)`$", text, re.MULTILINE)) + + +def _extract_signature_block(md_path: Path, section: str) -> str | None: + """Return the python signature block for one AFAD reference entry.""" + text = md_path.read_text(encoding="utf-8") + pattern = re.compile( + rf"^## `{re.escape(section)}`\n\n.*?### Signature\n```python\n(.*?)\n```", + re.MULTILINE | re.DOTALL, + ) + match = pattern.search(text) + return match.group(1).strip() if match else None + + +def test_validate_docs_configuration_tracks_runnable_python_docs() -> None: + """validate_docs should know which markdown files contain runnable Python.""" + validate_docs = _load_script_module( + "validate_docs_script", REPO_ROOT / "scripts" / "validate_docs.py" + ) + + config = validate_docs.CheckConfig.from_pyproject(REPO_ROOT) + + assert "README.md" in config.scan_globs + assert "examples/**/*.md" in config.scan_globs + assert "fuzz_atheris/README.md" in config.scan_globs + assert "README.md" in config.python_exec_globs + assert "docs/CUSTOM_FUNCTIONS_GUIDE.md" in config.python_exec_globs + assert "docs/LOCALE_GUIDE.md" in config.python_exec_globs + assert "docs/MIGRATION.md" in config.python_exec_globs + assert "docs/PARSING_GUIDE.md" in config.python_exec_globs + assert "docs/QUICK_REFERENCE.md" in config.python_exec_globs + assert "docs/TYPE_HINTS_GUIDE.md" in config.python_exec_globs + assert "docs/VALIDATION_GUIDE.md" in config.python_exec_globs + assert ( + validate_docs.validate_python_code("from ftllexengine import __version__", REPO_ROOT) + is None + ) + assert validate_docs.validate_python_code("raise RuntimeError('boom')", REPO_ROOT) is not None + + +def test_validate_version_uses_afad_frontmatter_version_contract() -> None: + """validate_version should enforce the AFAD v3.5 `version:` frontmatter key.""" + pyproject = tomllib.loads((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8")) + + validate_version = _load_script_module( + "validate_version_script", REPO_ROOT / "scripts" / "validate_version.py" + ) + + assert pyproject["tool"]["validate-version"]["frontmatter_key"] == "version" + + with TemporaryDirectory() as td: + root = Path(td) + (root / "doc.md").write_text( + "---\nversion: 0.0.1\n---\n\nbody\n", + encoding="utf-8", + ) + result = validate_version.check_configurable_frontmatter( + {"project": {"version": "9.9.9"}}, + root, + ["doc.md"], + "version", + ) + + assert result.passed is False + assert result.severity == validate_version.SEVERITY_DOC + assert "(expected '9.9.9')" in result.message + + +def test_source_doctest_prompts_are_explicitly_non_executable() -> None: + """Raw doctest prompts in source docstrings must be explicitly skipped.""" + offenders: list[str] = [] + + for path in sorted((SRC_ROOT / "ftllexengine").rglob("*.py")): + for lineno, line in enumerate(path.read_text(encoding="utf-8").splitlines(), 1): + if ">>>" in line and "+SKIP" not in line: + offenders.append(f"{path}:{lineno}:{line}") + + assert offenders == [] + + +def test_doctest_sweep_is_clean_under_repo_docstring_policy() -> None: + """A package-wide doctest sweep should pass under the repository policy.""" + package = importlib.import_module("ftllexengine") + module_names = ["ftllexengine"] + [ + m.name for m in pkgutil.walk_packages(package.__path__, prefix="ftllexengine.") + ] + + failures: list[str] = [] + for name in module_names: + module = importlib.import_module(name) + result = doctest.testmod(module, optionflags=doctest.ELLIPSIS, report=False) + if result.failed: + failures.append(f"{name}: failed={result.failed} attempted={result.attempted}") + + assert failures == [] + + +def test_api_index_covers_public_root_exports_and_existing_sections() -> None: + """Public root exports should always be routed to a real API reference section.""" + package = importlib.import_module("ftllexengine") + routes = _index_routes() + public_exports = set(package.__all__) + + missing = sorted(public_exports - set(routes)) + assert missing == [] + + for symbol, (target_path, section) in routes.items(): + assert target_path.exists(), symbol + assert section in _symbol_headings(target_path), symbol + + +def test_api_index_covers_documented_module_exports() -> None: + """Reference index should cover the exported surfaces the docs claim to cover.""" + routes = _index_routes() + + expected_routes: set[str] = set() + for module_name in DOCUMENTED_MODULES: + module = importlib.import_module(module_name) + overrides = ROUTE_NAME_OVERRIDES.get(module_name, {}) + for symbol in getattr(module, "__all__", []): + expected_routes.add(overrides.get(symbol, symbol)) + + missing = sorted(expected_routes - set(routes)) + assert missing == [] + + for symbol in expected_routes: + target_path, section = routes[symbol] + assert target_path.exists(), symbol + assert section in _symbol_headings(target_path), symbol + + +def test_api_index_covers_documented_repo_scripts() -> None: + """Reference index should route the repo's supported operational scripts.""" + routes = _index_routes() + + missing = sorted(set(DOCUMENTED_REPO_SCRIPTS) - set(routes)) + assert missing == [] + + for symbol in DOCUMENTED_REPO_SCRIPTS: + target_path, section = routes[symbol] + assert target_path.exists(), symbol + assert section in _symbol_headings(target_path), symbol + + +def test_reference_doc_import_statements_resolve() -> None: + """Reference-doc import examples should stay copy-paste correct.""" + import_pattern = re.compile(r"- Import: `([^`]+)`") + doc_paths = sorted((REPO_ROOT / "docs").glob("DOC_*.md")) + + failures: list[str] = [] + for path in doc_paths: + for statement in import_pattern.findall(path.read_text(encoding="utf-8")): + result = subprocess.run( + [sys.executable, "-c", f"{statement}\nprint('OK')\n"], + cwd=SRC_ROOT, + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + stderr = result.stderr.strip() or result.stdout.strip() + failures.append(f"{path.name}: {statement} -> {stderr}") + + assert failures == [] + + +def test_reference_doc_signatures_avoid_undocumented_internal_aliases() -> None: + """Reference docs should not leak undocumented submodule-only alias names.""" + doc_paths = sorted((REPO_ROOT / "docs").glob("DOC_*.md")) + offenders: list[str] = [] + + for path in doc_paths: + text = path.read_text(encoding="utf-8") + for alias in UNDOCUMENTED_REFERENCE_ALIASES: + if alias in text: + offenders.append(f"{path.name}: {alias}") + + assert offenders == [] + + +def test_reference_docs_stay_split_under_line_budget() -> None: + """Reference docs should stay partitioned instead of regressing into god files.""" + offenders: list[str] = [] + + for path in sorted((REPO_ROOT / "docs").glob("DOC_*.md")): + line_count = len(path.read_text(encoding="utf-8").splitlines()) + if line_count > REFERENCE_DOC_LINE_BUDGET: + offenders.append(f"{path.name}: {line_count}") + + assert offenders == [] + + +def test_check_script_covers_full_quality_surface() -> None: + """Top-level check.sh should orchestrate the repo's supported validation gates.""" + text = (REPO_ROOT / "check.sh").read_text(encoding="utf-8") + + required_commands = ( + "scripts/validate_version.py", + "scripts/validate_docs.py", + "scripts/run_examples.py", + "./scripts/lint.sh", + "./scripts/test.sh", + "./scripts/fuzz_hypofuzz.sh --preflight", + "./scripts/fuzz_atheris.sh --corpus", + "./scripts/fuzz_atheris.sh graph --time", + "./scripts/fuzz_atheris.sh introspection --time", + ) + + for command in required_commands: + assert command in text + + +def test_atheris_corpus_health_bootstraps_its_venv() -> None: + """Atheris corpus health should create its dedicated venv before execution.""" + text = (REPO_ROOT / "scripts" / "fuzz_atheris.sh").read_text(encoding="utf-8") + marker = "run_corpus_health() {" + assert marker in text + body = text.split(marker, 1)[1].split("}", 1)[0] + + assert "ensure_atheris_venv" in body or "run_diagnostics" in body + + +def test_atheris_bootstrap_discovers_uv_managed_python_313() -> None: + """Atheris bootstrap should recognize uv-managed Python 3.13 interpreters.""" + text = (REPO_ROOT / "scripts" / "fuzz_atheris.sh").read_text(encoding="utf-8") + + assert "uv python find 3.13" in text + + +def test_atheris_bootstrap_recreates_broken_venv_dirs() -> None: + """Atheris bootstrap should discard stale venv directories with broken Python links.""" + text = (REPO_ROOT / "scripts" / "fuzz_atheris.sh").read_text(encoding="utf-8") + + assert '[[ -d "$ATHERIS_VENV" ]] && [[ ! -x "$ATHERIS_PYTHON" ]]' in text + + +def test_reference_signature_parameter_names_match_live_exports() -> None: + """AFAD reference signatures should keep parameter names aligned with live exports.""" + routes = _index_routes() + issues: list[str] = [] + + for module_name in DOCUMENTED_MODULES: + module = importlib.import_module(module_name) + overrides = ROUTE_NAME_OVERRIDES.get(module_name, {}) + for symbol in getattr(module, "__all__", []): + route_name = overrides.get(symbol, symbol) + if route_name not in routes: + continue + + target_path, section = routes[route_name] + signature_block = _extract_signature_block(target_path, section) + if signature_block is None or ( + "def " not in signature_block and "class " not in signature_block + ): + continue + + obj = getattr(module, symbol) + try: + signature = inspect.signature(obj) + except (TypeError, ValueError): + continue + + if "def __init__(" in signature_block: + params_source = signature_block.split("def __init__(", 1)[1].split(") ->", 1)[0] + elif signature_block.lstrip().startswith("def "): + params_source = signature_block.split("(", 1)[1].rsplit(")", 1)[0] + else: + continue + + doc_params = [ + name + for name in re.findall(r"\b([A-Za-z_][A-Za-z0-9_]*)\s*:", params_source) + if name != "self" + ] + live_params = [ + param.name + for param in signature.parameters.values() + if param.name != "self" + ] + if live_params != doc_params: + issues.append( + f"{route_name}: live={live_params!r} doc={doc_params!r}" + ) + + assert issues == [] + + +def test_sdist_includes_root_frontmatter_docs_and_readme() -> None: + """Root markdown docs with frontmatter should ship in the source distribution.""" + pyproject = tomllib.loads((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8")) + only_include = set(pyproject["tool"]["hatch"]["build"]["targets"]["sdist"]["only-include"]) + + expected = {"README.md"} + for path in REPO_ROOT.glob("*.md"): + if path.name == "README.md": + continue + text = path.read_text(encoding="utf-8") + if text.startswith("---\n") and "\nafad:" in text: + expected.add(path.name) + + missing = sorted(expected - only_include) + assert missing == [] + + +def test_release_protocol_lives_under_docs_and_repo_links_follow_it() -> None: + """Release protocol should live under docs/ and repo surfaces should link there.""" + release_doc = REPO_ROOT / "docs" / "RELEASE_PROTOCOL.md" + assert release_doc.exists() + assert not (REPO_ROOT / "RELEASE_PROTOCOL.md").exists() + + readme = (REPO_ROOT / "README.md").read_text(encoding="utf-8") + contributing = (REPO_ROOT / "CONTRIBUTING.md").read_text(encoding="utf-8") + pyproject = tomllib.loads((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8")) + + assert "(docs/RELEASE_PROTOCOL.md)" in readme + assert "(docs/RELEASE_PROTOCOL.md)" in contributing + + frontmatter_globs = set(pyproject["tool"]["validate-version"]["frontmatter_globs"]) + only_include = set(pyproject["tool"]["hatch"]["build"]["targets"]["sdist"]["only-include"]) + + assert "RELEASE_PROTOCOL.md" not in frontmatter_globs + assert "RELEASE_PROTOCOL.md" not in only_include + + +def test_public_docs_and_examples_avoid_fix_later_markers() -> None: + """Public-facing docs and examples should not ship TODO/FIXME/HACK markers.""" + offenders: list[str] = [] + scan_paths = [REPO_ROOT / "README.md", *sorted((REPO_ROOT / "docs").glob("*.md"))] + scan_paths.extend(sorted((REPO_ROOT / "examples").rglob("*.py"))) + scan_paths.extend(sorted((REPO_ROOT / "examples").glob("*.md"))) + + marker_re = re.compile(r"\b(TODO|FIXME|HACK)\b") + + for path in scan_paths: + for lineno, line in enumerate(path.read_text(encoding="utf-8").splitlines(), 1): + if marker_re.search(line): + offenders.append(f"{path.relative_to(REPO_ROOT)}:{lineno}:{line.strip()}") + + assert offenders == [] diff --git a/tests/test_init_module.py b/tests/test_init_module.py index 0bc78bbc..5fe1b6ce 100644 --- a/tests/test_init_module.py +++ b/tests/test_init_module.py @@ -395,7 +395,7 @@ def test_all_exports_count(self) -> None: """ import ftllexengine - assert len(ftllexengine.__all__) == 57 + assert len(ftllexengine.__all__) == 60 def test_babel_optional_exports_are_in_all(self) -> None: """Babel-optional symbols (FluentBundle, etc.) are listed in __all__.""" diff --git a/tests/test_regression_currency_decimal.py b/tests/test_regression_currency_decimal.py index e46cae46..6841cbf4 100644 --- a/tests/test_regression_currency_decimal.py +++ b/tests/test_regression_currency_decimal.py @@ -215,7 +215,7 @@ def test_empty_graph_no_warnings(self) -> None: class TestIntegration: - """Integration tests for v0.94.0 fixes.""" + """Integration tests for currency decimal handling.""" def test_currency_in_fluent_bundle(self) -> None: """Currency formatting in FluentBundle uses ISO decimals.""" diff --git a/tests/test_runtime_bundle_property.py b/tests/test_runtime_bundle_property.py index 60e450f7..230cfe3c 100644 --- a/tests/test_runtime_bundle_property.py +++ b/tests/test_runtime_bundle_property.py @@ -45,6 +45,11 @@ "fr", "fr_FR", ]) +log_source_paths = st.from_regex( + r"[A-Za-z0-9_-][A-Za-z0-9_. /-]{0,31}", + fullmatch=True, +) + # ============================================================================ # PROPERTY TESTS - TERM ATTRIBUTES IN CYCLE DETECTION @@ -168,7 +173,7 @@ def test_parse_error_with_source_path_logging(self, caplog: pytest.LogCaptureFix if log_messages: assert any("error_file.ftl" in msg for msg in log_messages) - @given(locale=locale_codes, filename=st.text(min_size=1)) # Remove arbitrary max + @given(locale=locale_codes, filename=log_source_paths) @settings(suppress_health_check=[HealthCheck.function_scoped_fixture]) def test_source_path_appears_in_logs_property( self, @@ -177,9 +182,6 @@ def test_source_path_appears_in_logs_property( caplog: pytest.LogCaptureFixture, ) -> None: """Property: source_path always appears in error/warning logs when provided.""" - assume(filename.isprintable()) - assume(not filename.startswith(".")) - bundle = FluentBundle(locale) invalid_ftl = "invalid syntax $$$" diff --git a/tests/test_runtime_cache_integrity.py b/tests/test_runtime_cache_integrity.py index 591aed91..4efc9364 100644 --- a/tests/test_runtime_cache_integrity.py +++ b/tests/test_runtime_cache_integrity.py @@ -1023,7 +1023,7 @@ def put_different(i: int) -> None: # ============================================================================ -# CACHE KEY COLLISION PREVENTION TESTS (v0.93.0) +# CACHE KEY COLLISION PREVENTION TESTS # ============================================================================ diff --git a/tests/test_runtime_depth_guard.py b/tests/test_runtime_depth_guard.py index 52294b8a..1010dabd 100644 --- a/tests/test_runtime_depth_guard.py +++ b/tests/test_runtime_depth_guard.py @@ -16,9 +16,7 @@ from hypothesis import strategies as st from ftllexengine.constants import MAX_DEPTH -from ftllexengine.core.depth_guard import DepthGuard, depth_clamp -from ftllexengine.diagnostics import ErrorCategory, FrozenFluentError -from ftllexengine.diagnostics.templates import ErrorTemplate +from ftllexengine.core.depth_guard import DepthGuard, DepthLimitExceededError, depth_clamp # ============================================================================ # Construction @@ -95,15 +93,15 @@ def test_context_manager_nested(self) -> None: assert guard.current_depth == 0 def test_context_manager_raises_on_exceeded(self) -> None: - """Context manager raises FrozenFluentError when depth exceeded.""" + """Context manager raises DepthLimitExceededError when depth exceeded.""" guard = DepthGuard(max_depth=3) with guard, guard, guard: # noqa: SIM117 - nested with - with pytest.raises(FrozenFluentError) as exc_info: + with pytest.raises(DepthLimitExceededError) as exc_info: with guard: pass - assert exc_info.value.category == ErrorCategory.RESOLUTION + assert exc_info.value.max_depth == 3 assert "3" in str(exc_info.value) def test_context_manager_depth_restoration_on_error(self) -> None: @@ -140,7 +138,7 @@ def test_state_not_corrupted_on_enter_failure(self) -> None: with guard, guard: assert guard.current_depth == 2 - with pytest.raises(FrozenFluentError), guard: + with pytest.raises(DepthLimitExceededError), guard: pass # current_depth must still be 2, not 3 assert guard.current_depth == 2 @@ -164,13 +162,13 @@ def test_check_passes_below_limit(self) -> None: guard.check() # Should not raise def test_check_raises_at_limit(self) -> None: - """check() raises FrozenFluentError when depth >= max_depth.""" + """check() raises DepthLimitExceededError when depth >= max_depth.""" guard = DepthGuard(max_depth=2) - with guard, guard, pytest.raises(FrozenFluentError) as exc_info: + with guard, guard, pytest.raises(DepthLimitExceededError) as exc_info: guard.check() - assert exc_info.value.category == ErrorCategory.RESOLUTION + assert exc_info.value.max_depth == 2 assert "2" in str(exc_info.value) def test_check_raises_above_limit(self) -> None: @@ -178,45 +176,40 @@ def test_check_raises_above_limit(self) -> None: guard = DepthGuard(max_depth=2) guard.current_depth = 5 - with pytest.raises(FrozenFluentError) as exc_info: + with pytest.raises(DepthLimitExceededError) as exc_info: guard.check() - assert exc_info.value.category == ErrorCategory.RESOLUTION + assert exc_info.value.max_depth == 2 # ============================================================================ -# FrozenFluentError from DepthGuard +# DepthLimitExceededError from DepthGuard # ============================================================================ class TestDepthGuardError: - """Test FrozenFluentError raised by DepthGuard.""" + """Test DepthLimitExceededError raised by DepthGuard.""" - def test_error_is_frozen_fluent_error(self) -> None: - """DepthGuard raises FrozenFluentError with RESOLUTION category.""" + def test_error_is_depth_limit_error(self) -> None: + """DepthGuard raises DepthLimitExceededError with the configured limit.""" guard = DepthGuard(max_depth=1) - with guard, pytest.raises(FrozenFluentError) as exc_info, guard: + with guard, pytest.raises(DepthLimitExceededError) as exc_info, guard: pass - assert exc_info.value.category == ErrorCategory.RESOLUTION + assert exc_info.value.max_depth == 1 assert isinstance(exc_info.value, Exception) - def test_error_carries_diagnostic(self) -> None: - """DepthGuard error carries diagnostic template data.""" - diagnostic = ErrorTemplate.depth_exceeded(50) - error = FrozenFluentError( - str(diagnostic), - ErrorCategory.RESOLUTION, - diagnostic=diagnostic, - ) - + def test_error_records_max_depth(self) -> None: + """DepthLimitExceededError stores the configured limit.""" + error = DepthLimitExceededError(50) + assert error.max_depth == 50 assert "50" in str(error) def test_error_includes_max_depth_value(self) -> None: """Error message includes the configured max_depth.""" guard = DepthGuard(max_depth=2) - with guard, guard, pytest.raises(FrozenFluentError, match="2"), guard: + with guard, guard, pytest.raises(DepthLimitExceededError, match="2"), guard: pass @@ -276,7 +269,7 @@ def test_property_context_manager_enforces_limit(max_depth: int) -> None: For any max_depth in [1, 100]: - Nesting max_depth times succeeds - - Nesting max_depth + 1 times raises FrozenFluentError + - Nesting max_depth + 1 times raises DepthLimitExceededError """ event(f"max_depth={max_depth}") guard = DepthGuard(max_depth=max_depth) @@ -290,9 +283,9 @@ def nest(remaining: int) -> None: nest(max_depth) assert guard.current_depth == 0 - with pytest.raises(FrozenFluentError) as exc_info: + with pytest.raises(DepthLimitExceededError) as exc_info: nest(max_depth + 1) - assert exc_info.value.category == ErrorCategory.RESOLUTION + assert exc_info.value.max_depth == max_depth @given(max_depth=st.integers(min_value=1, max_value=100)) @@ -336,9 +329,9 @@ def test_property_check_consistent_with_context_manager( guard.current_depth = target_depth if target_depth >= max_depth: - with pytest.raises(FrozenFluentError): + with pytest.raises(DepthLimitExceededError): guard.check() - with pytest.raises(FrozenFluentError), guard: + with pytest.raises(DepthLimitExceededError), guard: pass else: guard.check() # Should not raise diff --git a/tests/test_runtime_locale_context_property.py b/tests/test_runtime_locale_context_property.py index 7072840e..1d751561 100644 --- a/tests/test_runtime_locale_context_property.py +++ b/tests/test_runtime_locale_context_property.py @@ -494,9 +494,8 @@ def test_format_number_min_max_clamping_never_truncates( ) -> None: """When min > max, output has at least min decimal places (clamping, not error). - Pre-v0.145.0: min=3, max=2 would quantize to 2 places but format with - 3 required digits, producing "1.230" — wrong and incoherent. - Post-v0.145.0: max is clamped to max(min, max), output is consistent. + When minimumFractionDigits exceeds maximumFractionDigits, the effective + maximum is clamped upward so quantization and formatting stay coherent. Events emitted: - relation=min_gt_max: min > max (clamping applies) diff --git a/tests/test_runtime_resolver_core.py b/tests/test_runtime_resolver_core.py index 27ea44f7..e494372a 100644 --- a/tests/test_runtime_resolver_core.py +++ b/tests/test_runtime_resolver_core.py @@ -10,6 +10,7 @@ from __future__ import annotations +import importlib from datetime import UTC, datetime from decimal import Decimal from unittest.mock import patch @@ -475,6 +476,15 @@ def _nest_guards(self, guards: list[GlobalDepthGuard]) -> None: class TestResolverModuleExports: """Test resolver module export boundaries.""" + @staticmethod + def _import_fluent_value_from_resolver() -> object: + module = importlib.import_module("ftllexengine.runtime.resolver") + try: + return module.__dict__["FluentValue"] + except KeyError as exc: + msg = "cannot import name 'FluentValue'" + raise ImportError(msg) from exc + def test_fluent_value_available_from_function_bridge(self) -> None: """FluentValue is available from function_bridge module.""" from ftllexengine.runtime.function_bridge import ( @@ -486,11 +496,7 @@ def test_fluent_value_available_from_function_bridge(self) -> None: def test_importing_fluent_value_from_resolver_fails(self) -> None: """FluentValue is not importable from resolver module.""" with pytest.raises(ImportError, match="cannot import name 'FluentValue'"): - # pylint: disable=unused-import - # Intentional ImportError test — FluentValue removed from resolver exports. - from ftllexengine.runtime.resolver import ( # noqa: F401 - lazy import and re-export - FluentValue, - ) + self._import_fluent_value_from_resolver() def test_fluent_resolver_still_exported_from_resolver(self) -> None: """FluentResolver is exported from resolver module.""" @@ -711,7 +717,7 @@ class CustomObj: def __str__(self) -> str: return "custom" - assert resolver._format_value(CustomObj()) == "custom" # type: ignore[arg-type] + assert resolver._format_value(CustomObj()) == "custom" # ============================================================================ diff --git a/tests/test_runtime_security_expansion.py b/tests/test_runtime_security_expansion.py index 4ecd00c7..b0bf4468 100644 --- a/tests/test_runtime_security_expansion.py +++ b/tests/test_runtime_security_expansion.py @@ -6,14 +6,10 @@ from __future__ import annotations -import sys - import pytest from hypothesis import event, given, settings from hypothesis import strategies as st -sys.path.insert(0, "src") - from ftllexengine.constants import DEFAULT_MAX_EXPANSION_SIZE from ftllexengine.diagnostics import DiagnosticCode from ftllexengine.runtime.bundle import FluentBundle diff --git a/tests/test_runtime_survivability.py b/tests/test_runtime_survivability.py index bef12e8b..3ae184f2 100644 --- a/tests/test_runtime_survivability.py +++ b/tests/test_runtime_survivability.py @@ -315,7 +315,7 @@ def test_concurrent(thread_count, operation_count): # ============================================================================= -# INTEGRITY AND CORRECTNESS TESTS (v0.80.0+) +# INTEGRITY AND CORRECTNESS TESTS # ============================================================================= class TestRuntimeIntegrityBehavior: diff --git a/tests/test_rwlock_coverage.py b/tests/test_rwlock_coverage.py index 4d1aab21..490843fb 100644 --- a/tests/test_rwlock_coverage.py +++ b/tests/test_rwlock_coverage.py @@ -9,6 +9,7 @@ import threading import time +from typing import cast import pytest @@ -374,7 +375,8 @@ def test_active_writer_consistency(self) -> None: assert lock._active_writer is None lock._acquire_write() - assert lock._active_writer == current_thread_id + active_writer = cast("int", lock._active_writer) + assert active_writer == current_thread_id lock._release_write() assert lock._active_writer is None diff --git a/tests/test_syntax_parser_attributes.py b/tests/test_syntax_parser_attributes.py index f558c8c3..37239fbc 100644 --- a/tests/test_syntax_parser_attributes.py +++ b/tests/test_syntax_parser_attributes.py @@ -1,7 +1,7 @@ """Parser tests for message attributes (.attribute = pattern). Phase 3B: Attribute Parsing Tests -Coverage Target: +60-80 lines of parser.py (lines 1148-1199 and 1145-1182) +Focus: attribute parsing, multiline patterns, and real-world examples Tests cover: - Basic attribute parsing diff --git a/tests/test_syntax_parser_entries.py b/tests/test_syntax_parser_entries.py index c4cd2a32..b345a8ee 100644 --- a/tests/test_syntax_parser_entries.py +++ b/tests/test_syntax_parser_entries.py @@ -23,18 +23,18 @@ TextElement, ) from ftllexengine.syntax.cursor import Cursor, ParseResult -from ftllexengine.syntax.parser import rules +from ftllexengine.syntax.parser import entries as entry_rules +from ftllexengine.syntax.parser.context import ParseContext from ftllexengine.syntax.parser.core import FluentParserV1 -from ftllexengine.syntax.parser.rules import ( - ParseContext, +from ftllexengine.syntax.parser.entries import ( parse_attribute, parse_comment, parse_message, parse_message_attributes, parse_message_header, - parse_pattern, parse_term, ) +from ftllexengine.syntax.parser.patterns import parse_pattern from ftllexengine.syntax.parser.whitespace import ( skip_multiline_pattern_start, ) @@ -159,7 +159,7 @@ def test_attributes_only(self) -> None: def test_pattern_mock_fails(self) -> None: """Returns None when parse_pattern returns None.""" with patch( - "ftllexengine.syntax.parser.rules.parse_pattern", + "ftllexengine.syntax.parser.entries.parse_pattern", return_value=None, ): assert parse_message( @@ -169,10 +169,10 @@ def test_pattern_mock_fails(self) -> None: def test_attributes_mock_fails(self) -> None: """Returns None when parse_message_attributes returns None.""" with patch.object( - rules, "parse_message_attributes", + entry_rules, "parse_message_attributes", return_value=None, ): - assert rules.parse_message( + assert entry_rules.parse_message( Cursor("msg = Value\n", 0) ) is None @@ -215,7 +215,7 @@ def test_valid_attribute(self) -> None: def test_pattern_mock_fails(self) -> None: """Returns None when parse_pattern returns None.""" with patch( - "ftllexengine.syntax.parser.rules.parse_pattern", + "ftllexengine.syntax.parser.entries.parse_pattern", return_value=None, ): assert parse_attribute( @@ -318,7 +318,7 @@ def test_trailing_space_breaks_attr_loop(self) -> None: def test_pattern_mock_fails(self) -> None: """Returns None when parse_pattern returns None.""" with patch( - "ftllexengine.syntax.parser.rules.parse_pattern", + "ftllexengine.syntax.parser.entries.parse_pattern", return_value=None, ): assert parse_term( @@ -337,7 +337,7 @@ def mock_parse_pattern( return ParseResult(empty_pattern, cursor) with patch( - "ftllexengine.syntax.parser.rules.parse_pattern", + "ftllexengine.syntax.parser.entries.parse_pattern", side_effect=mock_parse_pattern, ): assert parse_term(Cursor(source, 0)) is None @@ -354,7 +354,7 @@ def mock_parse_pattern( return ParseResult(pattern, Cursor(source, 14)) with patch( - "ftllexengine.syntax.parser.rules.parse_pattern", + "ftllexengine.syntax.parser.entries.parse_pattern", side_effect=mock_parse_pattern, ): result = parse_term(Cursor(source, 0)) @@ -364,7 +364,7 @@ def mock_parse_pattern( def test_invalid_attribute_syntax(self) -> None: """Invalid attribute syntax restores cursor and breaks.""" source = "-term = Value\n.invalid" - original = rules.parse_attribute + original = entry_rules.parse_attribute def mock_attr(cursor, context=None): if ".invalid" in cursor.source[cursor.pos:]: @@ -372,7 +372,7 @@ def mock_attr(cursor, context=None): return original(cursor, context) with patch.object( - rules, "parse_attribute", side_effect=mock_attr + entry_rules, "parse_attribute", side_effect=mock_attr ): result = parse_term(Cursor(source, 0)) assert result is not None @@ -428,7 +428,7 @@ def mock_parse_pattern( return ParseResult(Pattern(elements=()), cursor) with patch( - "ftllexengine.syntax.parser.rules.parse_pattern", + "ftllexengine.syntax.parser.entries.parse_pattern", side_effect=mock_parse_pattern, ): assert parse_term(Cursor(source, 0)) is None diff --git a/tests/test_syntax_parser_error_recovery.py b/tests/test_syntax_parser_error_recovery.py index a4bd6c47..0ce64c77 100644 --- a/tests/test_syntax_parser_error_recovery.py +++ b/tests/test_syntax_parser_error_recovery.py @@ -67,10 +67,10 @@ def test_negative_sign_identifier_fallback_via_mock(self) -> None: """ with ( patch( - "ftllexengine.syntax.parser.rules.parse_number" + "ftllexengine.syntax.parser.expressions.parse_number" ) as mock_num, patch( - "ftllexengine.syntax.parser.rules.parse_identifier" + "ftllexengine.syntax.parser.expressions.parse_identifier" ) as mock_id, ): mock_num.return_value = ParseError("forced failure", Cursor("-test", 0)) @@ -138,7 +138,7 @@ def test_number_fails_defensive_line_1120(self) -> None: Requires mocking because parse_number is robust for digit start. """ with patch( - "ftllexengine.syntax.parser.rules.parse_number" + "ftllexengine.syntax.parser.expressions.parse_number" ) as mock: mock.return_value = ParseError("forced failure", Cursor("9)", 0)) assert parse_argument_expression(Cursor("9)", 0)) is None @@ -149,7 +149,7 @@ def test_identifier_fails_defensive_line_1139(self) -> None: Requires mocking because is_identifier_start guarantees success. """ with patch( - "ftllexengine.syntax.parser.rules.parse_identifier" + "ftllexengine.syntax.parser.expressions.parse_identifier" ) as mock: mock.return_value = ParseError("forced failure", Cursor("x)", 0)) assert parse_argument_expression(Cursor("x)", 0)) is None @@ -463,7 +463,7 @@ class TestDefensiveMocking: def test_parse_message_attrs_returns_none(self) -> None: """parse_message_attributes returns None (defensive).""" with patch( - "ftllexengine.syntax.parser.rules" + "ftllexengine.syntax.parser.entries" ".parse_message_attributes" ) as mock: mock.return_value = None @@ -474,7 +474,7 @@ def test_parse_message_attrs_returns_none(self) -> None: def test_parse_attribute_pattern_returns_none(self) -> None: """parse_pattern returns None in parse_attribute (defensive).""" with patch( - "ftllexengine.syntax.parser.rules.parse_pattern" + "ftllexengine.syntax.parser.entries.parse_pattern" ) as mock: mock.return_value = None assert parse_attribute( @@ -484,7 +484,7 @@ def test_parse_attribute_pattern_returns_none(self) -> None: def test_parse_term_pattern_returns_none(self) -> None: """parse_pattern returns None in parse_term (defensive).""" with patch( - "ftllexengine.syntax.parser.rules.parse_pattern" + "ftllexengine.syntax.parser.entries.parse_pattern" ) as mock: mock.return_value = None assert parse_term( @@ -494,7 +494,7 @@ def test_parse_term_pattern_returns_none(self) -> None: def test_parse_term_attrs_returns_none_line_2038(self) -> None: """Line 2038: parse_message_attributes returns None in term.""" with patch( - "ftllexengine.syntax.parser.rules" + "ftllexengine.syntax.parser.entries" ".parse_message_attributes" ) as mock: mock.return_value = None @@ -505,7 +505,7 @@ def test_parse_term_attrs_returns_none_line_2038(self) -> None: def test_parse_message_pattern_returns_none(self) -> None: """parse_pattern returns None in parse_message (defensive).""" with patch( - "ftllexengine.syntax.parser.rules.parse_pattern" + "ftllexengine.syntax.parser.entries.parse_pattern" ) as mock: mock.return_value = None assert parse_message( diff --git a/tests/test_syntax_parser_expression_coverage.py b/tests/test_syntax_parser_expression_coverage.py index 3a268db3..2ed40978 100644 --- a/tests/test_syntax_parser_expression_coverage.py +++ b/tests/test_syntax_parser_expression_coverage.py @@ -1,10 +1,10 @@ -"""Targeted tests for specific uncovered lines in expressions.py. +"""Targeted tests for specific expression-parser edge branches. Focuses on: -- Lines 117-118: Variant key starting with - that becomes identifier -- Line 307: Identifier parsing failure in argument expression -- Line 627: Identifier parsing failure in inline expression -- Line 740: Nesting depth exceeded in parse_placeable +- Variant-key fallback from number parsing to identifier parsing +- Identifier parsing failure in argument expressions +- Identifier parsing failure in inline expressions +- Nesting-depth rejection in parse_placeable """ from __future__ import annotations @@ -32,15 +32,15 @@ # ============================================================================ -class TestLines117To118VariantKeyMinusIdentifier: - """Test lines 117-118: variant key with - prefix that parses as identifier.""" +class TestVariantKeyMinusIdentifierFallback: + """Variant keys can fall back from number parsing to identifier parsing.""" def test_variant_key_minus_then_alpha_becomes_identifier(self) -> None: - """Test parse_variant_key with '-' followed by alpha (lines 117-118). + """parse_variant_key can recover from numeric parse failure. When variant key starts with '-' and next char is alpha, parse_number - fails, so we fall through to parse_identifier (lines 112-118). - To hit lines 117-118, we need parse_number to fail but parse_identifier + fails, so we fall through to parse_identifier. + To hit that fallback, we need parse_number to fail but parse_identifier to succeed. This requires mocking both functions. """ source = "-abc" @@ -51,15 +51,15 @@ def test_variant_key_minus_then_alpha_becomes_identifier(self) -> None: mock_id_result = ParseResult("abc", Cursor(source, 4)) with patch( - "ftllexengine.syntax.parser.rules.parse_number", + "ftllexengine.syntax.parser.expressions.parse_number", return_value=ParseError("forced failure", Cursor("-abc", 0)), ), patch( - "ftllexengine.syntax.parser.rules.parse_identifier", + "ftllexengine.syntax.parser.expressions.parse_identifier", return_value=mock_id_result, ): result = parse_variant_key(cursor) - # Should return Identifier created from string (lines 117-118) + # Should return Identifier created from the fallback parse result assert result is not None assert isinstance(result.value, Identifier) assert result.value.name == "abc" @@ -70,14 +70,14 @@ def test_variant_key_minus_then_alpha_becomes_identifier(self) -> None: # ============================================================================ -class TestLine307ArgumentExpressionIdentifierFailure: - """Test line 307: parse_argument_expression when identifier parsing fails.""" +class TestArgumentExpressionIdentifierFailure: + """parse_argument_expression returns None when identifier parsing fails.""" def test_argument_expression_identifier_fails_line_307(self) -> None: - """Test parse_argument_expression returns None when parse_identifier fails (line 307).""" + """parse_argument_expression returns None when parse_identifier fails.""" # Start with alpha char but have invalid identifier # (e.g., just 'a' followed by invalid char) - # To hit line 307, we need cursor.current.isalpha() to be True + # To hit this branch, we need cursor.current.isalpha() to be True # but parse_identifier to return None # This is hard to trigger naturally since parse_identifier is quite permissive @@ -87,12 +87,12 @@ def test_argument_expression_identifier_fails_line_307(self) -> None: cursor = Cursor(source, 0) with patch( - "ftllexengine.syntax.parser.rules.parse_identifier", + "ftllexengine.syntax.parser.expressions.parse_identifier", return_value=ParseError("forced failure", Cursor("a999!!!", 0)), ): result = parse_argument_expression(cursor) - # Should return None when identifier parsing fails (line 307) + # Should return None when identifier parsing fails assert result is None @@ -101,22 +101,22 @@ def test_argument_expression_identifier_fails_line_307(self) -> None: # ============================================================================ -class TestLine627InlineExpressionIdentifierFailure: - """Test line 627: parse_inline_expression when identifier parsing fails.""" +class TestInlineExpressionIdentifierFailure: + """parse_inline_expression returns None when identifier parsing fails.""" def test_inline_expression_identifier_fails_line_627(self) -> None: - """Test parse_inline_expression returns None when parse_identifier fails (line 627).""" + """parse_inline_expression returns None when parse_identifier fails.""" source = "U999" # Starts with uppercase cursor = Cursor(source, 0) with patch( - "ftllexengine.syntax.parser.rules.parse_identifier", + "ftllexengine.syntax.parser.expressions.parse_identifier", return_value=ParseError("forced failure", Cursor("U999", 0)), ): result = parse_inline_expression(cursor) - # Should return None when identifier parsing fails (line 627) + # Should return None when identifier parsing fails assert result is None @@ -125,11 +125,11 @@ def test_inline_expression_identifier_fails_line_627(self) -> None: # ============================================================================ -class TestLine740NestingDepthExceeded: - """Test line 740: parse_placeable when nesting depth is exceeded.""" +class TestPlaceableNestingDepthExceeded: + """parse_placeable returns None when nesting depth is exceeded.""" def test_placeable_nesting_depth_exceeded_line_740(self) -> None: - """Test parse_placeable returns None when nesting depth exceeded (line 740).""" + """parse_placeable returns None when nesting depth is exceeded.""" source = "$var}" cursor = Cursor(source, 0) @@ -138,7 +138,7 @@ def test_placeable_nesting_depth_exceeded_line_740(self) -> None: result = parse_placeable(cursor, context) - # Should return None due to depth exceeded (line 740) + # Should return None due to depth exceeded assert result is None def test_bundle_with_excessive_nesting(self) -> None: @@ -167,34 +167,34 @@ class TestAdditionalUncoveredLines: """Tests for other uncovered lines in expressions.py.""" def test_variant_missing_closing_bracket_line_177(self) -> None: - """Test variant missing ] (line 177).""" + """Variant parsing fails when the closing bracket is missing.""" source = "[one text" # Missing ] cursor = Cursor(source, 0) result = parse_variant(cursor) - # Should return None due to missing ] (line 177) + # Should return None due to the missing closing bracket assert result is None def test_variant_pattern_parse_fails_line_187(self) -> None: - """Test variant when pattern parsing fails (line 187).""" + """Variant parsing fails when the variant pattern parser fails.""" source = "[one] pattern" cursor = Cursor(source, 0) with patch( - "ftllexengine.syntax.parser.rules.parse_simple_pattern", + "ftllexengine.syntax.parser.expressions.parse_simple_pattern", return_value=None, ): result = parse_variant(cursor) - # Should return None when pattern parsing fails (line 187) + # Should return None when pattern parsing fails assert result is None def test_string_literal_parse_fails_line_289(self) -> None: - """Test argument expression when string literal parsing fails (line 289).""" + """Argument parsing fails when string literal parsing fails.""" source = '"invalid' cursor = Cursor(source, 0) @@ -205,23 +205,23 @@ def test_string_literal_parse_fails_line_289(self) -> None: ): result = parse_argument_expression(cursor) - # Should return None when string parsing fails (line 289) + # Should return None when string parsing fails assert result is None def test_number_parse_fails_line_296(self) -> None: - """Test argument expression when number parsing fails (line 296).""" + """Argument parsing fails when number parsing fails.""" source = "123" cursor = Cursor(source, 0) # Patch in the module where it's used (expressions), not where it's defined with patch( - "ftllexengine.syntax.parser.rules.parse_number", + "ftllexengine.syntax.parser.expressions.parse_number", return_value=ParseError("forced failure", Cursor("123", 0)), ): result = parse_argument_expression(cursor) - # Should return None when number parsing fails (line 296) + # Should return None when number parsing fails assert result is None def test_function_name_lowercase_is_valid(self) -> None: @@ -240,7 +240,7 @@ def test_function_name_lowercase_is_valid(self) -> None: assert result.value.id.name == "lowercase" def test_named_argument_not_identifier_line_367(self) -> None: - """Test named argument when name is not identifier (line 367), soft recovery.""" + """Soft recovery when a named argument name is not an identifier.""" # This is hard to trigger directly, but we can test via function call bundle = FluentBundle("en_US", strict=False) # Try to use a non-identifier as named argument name @@ -265,44 +265,44 @@ def test_named_argument_value_not_literal_line_394(self) -> None: assert result is not None def test_term_reference_missing_dash_line_491(self) -> None: - """Test term reference without - prefix (line 491).""" + """Term references require the leading '-' prefix.""" source = "brand" # No - prefix cursor = Cursor(source, 0) result = parse_term_reference(cursor) - # Should return None without - prefix (line 491) + # Should return None without the required '-' prefix assert result is None def test_term_arguments_missing_closing_paren_line_533(self) -> None: - """Test term reference with arguments missing ) (line 533).""" + """Term-reference parsing fails when the closing ')' is missing.""" source = '-brand(case: "nom"' # Missing ) cursor = Cursor(source, 0) result = parse_term_reference(cursor) - # Should return None due to missing ) (line 533) + # Should return None due to the missing closing parenthesis assert result is None def test_inline_expression_variable_parse_fails_line_576(self) -> None: - """Test inline expression when variable parsing fails (line 576).""" + """Inline-expression parsing fails when variable parsing fails.""" source = "$var" cursor = Cursor(source, 0) with patch( - "ftllexengine.syntax.parser.rules.parse_variable_reference", + "ftllexengine.syntax.parser.expressions.parse_variable_reference", return_value=None, ): result = parse_inline_expression(cursor) - # Should return None when variable parsing fails (line 576) + # Should return None when variable parsing fails assert result is None def test_inline_expression_string_parse_fails_line_584(self) -> None: - """Test inline expression when string parsing fails (line 584).""" + """Inline-expression parsing fails when string parsing fails.""" source = '"invalid' cursor = Cursor(source, 0) @@ -313,52 +313,52 @@ def test_inline_expression_string_parse_fails_line_584(self) -> None: ): result = parse_inline_expression(cursor) - # Should return None when string parsing fails (line 584) + # Should return None when string parsing fails assert result is None def test_inline_expression_number_parse_fails_line_602(self) -> None: - """Test inline expression when negative number parsing fails (line 602).""" + """Inline-expression parsing fails when negative-number parsing fails.""" source = "-123" cursor = Cursor(source, 0) with patch( - "ftllexengine.syntax.parser.rules.parse_number", + "ftllexengine.syntax.parser.expressions.parse_number", return_value=ParseError("forced failure", Cursor("-123", 0)), ): result = parse_inline_expression(cursor) - # Should return None when number parsing fails (line 602) + # Should return None when number parsing fails assert result is None def test_inline_expression_digit_number_parse_fails_line_614(self) -> None: - """Test inline expression when positive number parsing fails (line 614).""" + """Inline-expression parsing fails when positive-number parsing fails.""" source = "123" cursor = Cursor(source, 0) with patch( - "ftllexengine.syntax.parser.rules.parse_number", + "ftllexengine.syntax.parser.expressions.parse_number", return_value=ParseError("forced failure", Cursor("123", 0)), ): result = parse_inline_expression(cursor) - # Should return None when number parsing fails (line 614) + # Should return None when number parsing fails assert result is None def test_inline_expression_lowercase_identifier_fails_line_672(self) -> None: - """Test inline expression when lowercase identifier parsing fails (line 672).""" + """Inline-expression parsing fails when lowercase identifier parsing fails.""" source = "msg" cursor = Cursor(source, 0) with patch( - "ftllexengine.syntax.parser.rules.parse_identifier", + "ftllexengine.syntax.parser.expressions.parse_identifier", return_value=ParseError("forced failure", Cursor("msg", 0)), ): result = parse_inline_expression(cursor) - # Should return None when identifier parsing fails (line 672) + # Should return None when identifier parsing fails assert result is None def test_uppercase_message_reference_with_trailing_dot(self) -> None: @@ -402,8 +402,8 @@ def test_lowercase_message_reference_with_trailing_dot(self) -> None: assert result.cursor.pos == 4 def test_select_expression_missing_closing_brace_line_801(self) -> None: - """Test select expression missing } (line 801).""" - # To hit line 801, parse_placeable needs to successfully parse a select expression + """Select-expression parsing fails when the closing '}' is missing.""" + # To hit this branch, parse_placeable needs to successfully parse a select expression # but then find that cursor is not at } (either EOF or wrong character) # The easiest way is to create FTL that has valid select but no closing } @@ -419,7 +419,7 @@ def test_select_expression_missing_closing_brace_line_801(self) -> None: assert len(resource.entries) >= 1 def test_positional_after_named_argument_line_402(self) -> None: - """Test positional argument after named argument (line 402), soft recovery.""" + """Soft recovery when a positional argument follows a named argument.""" bundle = FluentBundle("en_US", strict=False) # Positional args must come before named args ftl = """msg = {NUMBER(style: "percent", $val)}""" diff --git a/tests/test_syntax_parser_functions.py b/tests/test_syntax_parser_functions.py index 7877aceb..f1bd1f9b 100644 --- a/tests/test_syntax_parser_functions.py +++ b/tests/test_syntax_parser_functions.py @@ -1,7 +1,6 @@ """Parser tests for FunctionReference (NUMBER, DATETIME, custom functions). -These tests cover 193 lines of parser code (lines 689-881) that were previously -at ZERO coverage. Function parsing includes: +These tests exercise function-call parsing, including: - Basic function calls: NUMBER($value) - Positional arguments: CUSTOM($a, $b, $c) - Named arguments: NUMBER($val, minimumFractionDigits: 2) @@ -9,7 +8,7 @@ - String/Number literals as arguments - Error cases: invalid syntax, argument ordering -Phase 3A Coverage Target: +193 lines (54% → 68% parser coverage) +Phase 3A focus: function-call grammar and argument handling """ from __future__ import annotations diff --git a/tests/test_syntax_parser_patterns.py b/tests/test_syntax_parser_patterns.py index 08e0dbde..b616e333 100644 --- a/tests/test_syntax_parser_patterns.py +++ b/tests/test_syntax_parser_patterns.py @@ -386,7 +386,7 @@ def test_placeable_parse_fails(self) -> None: """Returns None when placeable parsing fails.""" cursor = Cursor("Text {invalid", 0) with patch( - "ftllexengine.syntax.parser.rules.parse_placeable", + "ftllexengine.syntax.parser.expressions.parse_placeable", return_value=None, ): result = parse_simple_pattern(cursor) @@ -672,7 +672,7 @@ def test_placeable_parse_fails(self) -> None: """Returns None when parse_placeable fails.""" cursor = Cursor("Text {invalid", 0) with patch( - "ftllexengine.syntax.parser.rules.parse_placeable", + "ftllexengine.syntax.parser.expressions.parse_placeable", return_value=None, ): result = parse_pattern(cursor) diff --git a/tests/test_syntax_parser_systematic.py b/tests/test_syntax_parser_systematic.py index 9a338d38..7c3383d9 100644 --- a/tests/test_syntax_parser_systematic.py +++ b/tests/test_syntax_parser_systematic.py @@ -1,8 +1,8 @@ -"""Systematic error path testing for parser.py. +"""Systematic error path testing for syntax parser primitives. -This module provides comprehensive coverage of all error paths in the parser, -organized by parser method. Each test targets a specific uncovered line that -returns a Failure(ParseError(...)). +This module provides comprehensive coverage of parser error paths, organized +by parser method. Each test targets a specific failure branch that returns +``ParseError``. Testing Philosophy: - Every error path should have at least one test @@ -30,12 +30,12 @@ class TestParseNumberErrorPaths: """Error path tests for _parse_number method. Target coverage: - - Line 222: No digits after minus sign - - Line 234: No digits after decimal point + - No digits after minus sign + - No digits after decimal point """ def test_number_no_digits_after_minus(self): - """Line 222: '-' not followed by digit. + """'-' not followed by a digit fails. Example: test = { - } Trigger: Minus sign followed by non-digit or EOF @@ -48,7 +48,7 @@ def test_number_no_digits_after_minus(self): assert isinstance(result, ParseError) def test_number_no_digits_after_minus_eof(self): - """Line 222: '-' at end of string. + """'-' at end of string fails. Example: test = { - Trigger: Minus sign at EOF @@ -60,7 +60,7 @@ def test_number_no_digits_after_minus_eof(self): assert isinstance(result, ParseError) def test_number_no_digits_after_minus_non_digit(self): - """Line 222: '-' followed by letter. + """'-' followed by a letter fails. Example: test = { -x } Trigger: Minus sign followed by alphabetic character @@ -72,7 +72,7 @@ def test_number_no_digits_after_minus_non_digit(self): assert isinstance(result, ParseError) def test_number_decimal_no_digits(self): - """Line 234: '3.' with no digits after decimal point. + """'3.' with no digits after the decimal point fails. Example: test = { 3. } Trigger: Decimal point followed by non-digit @@ -84,7 +84,7 @@ def test_number_decimal_no_digits(self): assert isinstance(result, ParseError) def test_number_decimal_no_digits_eof(self): - """Line 234: Number ending with decimal at EOF. + """A number ending with a decimal point at EOF fails. Example: test = { 3. Trigger: Decimal point at end of input @@ -96,11 +96,11 @@ def test_number_decimal_no_digits_eof(self): assert isinstance(result, ParseError) def test_number_just_decimal_point(self): - """Line 234: Just a decimal point with no integer part. + """A decimal point without an integer part fails. Example: test = { . } Trigger: Decimal point as first character - Note: This actually triggers line 222 first (no digits before decimal) + Note: This actually fails earlier because there are no digits before the decimal point """ cursor = Cursor("test = { . }", 9) # Position at '.' @@ -114,9 +114,9 @@ class TestParseEscapeSequenceErrorPaths: """Error path tests for _parse_escape_sequence method. Target coverage: - - Line 293: EOF after backslash in string - - Line 298: Escape sequence \" (actually SUCCESS, testing for coverage) - - Line 300: Escape sequence \\ (actually SUCCESS, testing for coverage) + - EOF after backslash in string + - Quote escape sequence + - Backslash escape sequence """ def test_escape_eof_after_backslash(self): @@ -578,7 +578,7 @@ def test_escape_sequence(self, escape, expected, description): """Test all supported escape sequences systematically. This parametrized test ensures all escape sequences work correctly. - Covers lines 298, 300, 302, 304, 307-321. + It covers the supported escape-sequence branches. """ parser = FluentParserV1() ftl = f'test = {{ "text{escape}more" }}' diff --git a/tests/test_syntax_parser_term_references.py b/tests/test_syntax_parser_term_references.py index 92461b10..45f66445 100644 --- a/tests/test_syntax_parser_term_references.py +++ b/tests/test_syntax_parser_term_references.py @@ -1,7 +1,7 @@ """Parser tests for term references in inline expressions ({ -term }). Phase 3D: TermReference Parsing Tests -Coverage Target: +50-60 lines of parser.py (lines 1348-1409 and 937-953) +Focus: term references with attributes, arguments, and integration cases Tests cover: - Basic term reference: { -brand } diff --git a/tests/test_syntax_parser_terms.py b/tests/test_syntax_parser_terms.py index 92c2489c..3b031501 100644 --- a/tests/test_syntax_parser_terms.py +++ b/tests/test_syntax_parser_terms.py @@ -1,7 +1,7 @@ """Parser tests for term definitions (-term-id = pattern). Phase 3C: Term Parsing Tests -Coverage Target: +60-70 lines of parser.py (lines 1237-1318 and 100-108) +Focus: term definitions, attributes, and integration with messages Tests cover: - Basic term parsing diff --git a/tests/test_syntax_parser_validator_branches.py b/tests/test_syntax_parser_validator_branches.py index 46977633..1c10772a 100644 --- a/tests/test_syntax_parser_validator_branches.py +++ b/tests/test_syntax_parser_validator_branches.py @@ -1,7 +1,7 @@ -"""Targeted branch coverage for rules.py and validator.py. +"""Targeted branch coverage for parser grammar modules and validator.py. Addresses specific uncovered lines and branches: -- rules.py line 885: parse_term_reference returning None in parse_argument_expression +- parse_term_reference returning None in parse_argument_expression - NumberLiteral.__post_init__: invariant enforcement for raw/value consistency - validator.py branches 157->exit, 246->exit: Match case exits for Junk and TextElement @@ -40,7 +40,7 @@ class TestRulesLine885TermReferenceFailure: - """Test rules.py line 885: parse_term_reference returning None. + """Test parse_term_reference returning None inside parse_argument_expression. Line 885 is triggered when parse_term_reference fails after we've already verified the character after '-' is an identifier start. @@ -48,13 +48,13 @@ class TestRulesLine885TermReferenceFailure: """ def test_term_reference_with_invalid_attribute_name(self) -> None: - """Term reference with dot but invalid attribute triggers line 885. + """Term reference with dot but invalid attribute returns None. Input: -brand.123 - '-' followed by 'b' (identifier start) -> tries parse_term_reference - parse_term_reference parses '-brand', sees '.', tries attribute - Attribute identifier fails (starts with digit) -> returns None - - Back in parse_argument_expression, line 885 returns None + - Back in parse_argument_expression, the defensive branch returns None """ cursor = Cursor("-brand.123", 0) context = ParseContext() @@ -64,7 +64,7 @@ def test_term_reference_with_invalid_attribute_name(self) -> None: assert result is None def test_term_reference_with_dot_at_eof(self) -> None: - """Term reference with trailing dot at EOF triggers line 885. + """Term reference with trailing dot at EOF returns None. Input: -brand. - parse_term_reference tries to parse attribute after '.' @@ -78,7 +78,7 @@ def test_term_reference_with_dot_at_eof(self) -> None: assert result is None def test_term_reference_with_dot_followed_by_space(self) -> None: - """Term reference with dot followed by space triggers line 885. + """Term reference with dot followed by space returns None. Input: -brand. x - parse_term_reference sees '.', tries to parse attribute @@ -92,7 +92,7 @@ def test_term_reference_with_dot_followed_by_space(self) -> None: assert result is None def test_term_reference_with_dot_followed_by_special_char(self) -> None: - """Term reference with dot followed by special char triggers line 885.""" + """Term reference with dot followed by special char returns None.""" cursor = Cursor("-brand.@", 0) context = ParseContext() result = parse_argument_expression(cursor, context) @@ -305,7 +305,7 @@ def test_empty_text_element(self) -> None: class TestValidatorCommentEntry: - """Comprehensive tests for Comment entry validation (line 156).""" + """Comprehensive tests for comment-entry validation.""" def test_single_line_comment(self) -> None: """Single-line comment passes validation.""" diff --git a/tests/test_syntax_parser_variant.py b/tests/test_syntax_parser_variant.py index 250157ad..c27561f7 100644 --- a/tests/test_syntax_parser_variant.py +++ b/tests/test_syntax_parser_variant.py @@ -246,11 +246,10 @@ def test_variant_keys_with_arbitrary_blank_count( class TestLongIdentifierVariantKeys: - """Tests for variant keys with long identifiers (v0.89.0 fix). + """Tests for variant keys with long identifiers. - Prior to v0.89.0, MAX_LOOKAHEAD_CHARS (128) was smaller than - _MAX_IDENTIFIER_LENGTH (256), causing variant keys with 129-256 char - identifiers to be misparsed as literal text. + A previous parser lookahead mismatch caused 129-256 character identifiers + to be misparsed as literal text instead of variant keys. """ def test_variant_key_with_max_length_identifier(self) -> None: diff --git a/tests/test_syntax_spec_conformance.py b/tests/test_syntax_spec_conformance.py index 20506164..a70cc82e 100644 --- a/tests/test_syntax_spec_conformance.py +++ b/tests/test_syntax_spec_conformance.py @@ -1,13 +1,14 @@ -"""Spec conformance tests using official Fluent.js test fixtures. +"""Spec conformance tests using vendored official Fluent.js test fixtures. This module implements SYSTEM 8 from the testing strategy: Spec Conformance Testing. -We import official test fixtures from the Fluent.js reference implementation to ensure -our parser conforms to the Fluent specification. +We vendor selected official test fixtures from the Fluent.js reference +implementation to ensure our parser conforms to the Fluent specification +without depending on live network fetches at test time. Strategy: -1. Fetch official .ftl/.json test fixtures from projectfluent/fluent.js +1. Load vendored official .ftl fixtures from projectfluent/fluent.js 2. Parse .ftl files with our parser -3. Compare structural properties with reference JSON AST +3. Compare structural properties with counts derived from the reference JSON AST 4. Focus on semantic equivalence (not exact AST match since schemas differ) Official fixtures: @@ -20,70 +21,21 @@ from __future__ import annotations -import json -from typing import Any -from urllib.error import URLError -from urllib.request import urlopen - import pytest from ftllexengine.syntax.ast import Comment, Junk, Message, Resource, Term from ftllexengine.syntax.parser import FluentParserV1 +from tests.helpers.fluentjs_fixtures import STRUCTURE_FIXTURES -# ============================================================================== -# FIXTURE FETCHING -# ============================================================================== - -FIXTURES_BASE_URL = "https://raw.githubusercontent.com/projectfluent/fluent.js/main/fluent-syntax/test/fixtures_structure" # pylint: disable=line-too-long - -# Selected fixtures covering core functionality -# Format: (name, description) - expected counts read from reference JSON -# Note: Only includes fixtures that exist in Fluent.js repository -CORE_FIXTURES = [ - ("simple_message", "Basic message"), - ("multiline_pattern", "Multiline pattern"), - ("multiline_with_placeables", "Pattern with placeables"), - ("select_expressions", "Select expressions"), - ("blank_lines", "Blank lines handling"), - ("term", "Simple term"), -] - -# Error handling fixtures -ERROR_FIXTURES = [ - ("empty_resource", "Empty FTL file"), -] - - -def fetch_fixture(name: str, extension: str) -> str: - """Fetch fixture content from GitHub. - - Args: - name: Fixture name (without extension) - extension: File extension (.ftl or .json) - - Returns: - File content as string - - Raises: - Exception: If fetch fails (test will be skipped) - """ - url = f"{FIXTURES_BASE_URL}/{name}.{extension}" - try: - with urlopen(url, timeout=10) as response: - return response.read().decode("utf-8") - except (URLError, OSError, UnicodeDecodeError) as e: - pytest.skip(f"Could not fetch fixture from {url}: {e}") - - -def fetch_ftl_fixture(name: str) -> str: - """Fetch .ftl fixture.""" - return fetch_fixture(name, "ftl") - - -def fetch_json_fixture(name: str) -> dict[str, Any]: - """Fetch and parse .json fixture.""" - content = fetch_fixture(name, "json") - return json.loads(content) +CORE_FIXTURES = ( + "simple_message", + "multiline_pattern", + "multiline_with_placeables", + "select_expressions", + "blank_lines", + "term", +) +ERROR_FIXTURES = ("empty_resource",) # ============================================================================== @@ -120,31 +72,6 @@ def count_ast_nodes_by_type(resource: Resource) -> dict[str, int]: return counts -def count_reference_nodes_by_type(json_ast: dict[str, Any]) -> dict[str, int]: - """Count nodes by type in reference JSON AST. - - Args: - json_ast: Reference AST from Fluent.js - - Returns: - Dictionary mapping node type names to counts - """ - counts: dict[str, int] = { - "Message": 0, - "Term": 0, - "Comment": 0, - "Junk": 0, - } - - body = json_ast.get("body", ()) - for entry in body: - entry_type = entry.get("type", "") - if entry_type in counts: - counts[entry_type] += 1 - - return counts - - # ============================================================================== # SPEC CONFORMANCE TESTS # ============================================================================== @@ -154,14 +81,13 @@ class TestSpecConformanceCoreFeatures: """Test parser conformance with official Fluent.js fixtures for core features.""" @pytest.mark.parametrize( - ("fixture_name", "description"), + "fixture_name", CORE_FIXTURES, - ids=[f[0] for f in CORE_FIXTURES], + ids=CORE_FIXTURES, ) def test_core_fixture_structural_conformance( self, fixture_name: str, - description: str, ) -> None: """Test structural conformance with official fixtures. @@ -170,13 +96,11 @@ def test_core_fixture_structural_conformance( Note: Some fixtures show spec discrepancies where our parser behavior differs from Fluent.js reference. These are marked for investigation. """ - # Fetch fixtures - ftl_content = fetch_ftl_fixture(fixture_name) - reference_ast = fetch_json_fixture(fixture_name) + fixture = STRUCTURE_FIXTURES[fixture_name] # Parse with our parser parser = FluentParserV1() - resource = parser.parse(ftl_content) + resource = parser.parse(fixture.ftl) # Verify resource type assert isinstance(resource, Resource) @@ -184,32 +108,34 @@ def test_core_fixture_structural_conformance( # Count nodes in our AST our_counts = count_ast_nodes_by_type(resource) - # Count nodes in reference AST - ref_counts = count_reference_nodes_by_type(reference_ast) - # Verify structural equivalence assert ( - our_counts["Message"] == ref_counts["Message"] - ), f"Message count mismatch for {fixture_name}: expected {ref_counts['Message']}, got {our_counts['Message']}" + our_counts["Message"] == fixture.expected_messages + ), ( + f"Message count mismatch for {fixture_name}: expected " + f"{fixture.expected_messages}, got {our_counts['Message']}" + ) assert ( - our_counts["Term"] == ref_counts["Term"] - ), f"Term count mismatch for {fixture_name}: expected {ref_counts['Term']}, got {our_counts['Term']}" + our_counts["Term"] == fixture.expected_terms + ), ( + f"Term count mismatch for {fixture_name}: expected " + f"{fixture.expected_terms}, got {our_counts['Term']}" + ) @pytest.mark.parametrize( - ("fixture_name", "description"), + "fixture_name", CORE_FIXTURES[:5], # First 5 fixtures for detailed testing - ids=[f[0] for f in CORE_FIXTURES[:5]], + ids=CORE_FIXTURES[:5], ) def test_parse_determinism_on_fixtures( self, fixture_name: str, - description: str, ) -> None: """Test parser determinism on official fixtures. Property: Parsing same fixture twice yields identical results. """ - ftl_content = fetch_ftl_fixture(fixture_name) + ftl_content = STRUCTURE_FIXTURES[fixture_name].ftl parser = FluentParserV1() # Parse twice @@ -229,21 +155,19 @@ class TestSpecConformanceErrorHandling: """Test parser error handling conformance with official fixtures.""" @pytest.mark.parametrize( - ("fixture_name", "description"), + "fixture_name", ERROR_FIXTURES, - ids=[f[0] for f in ERROR_FIXTURES], + ids=ERROR_FIXTURES, ) def test_error_fixture_robustness( self, fixture_name: str, - description: str, ) -> None: """Test parser robustness on error fixtures. Property: Parser never crashes on invalid input. """ - # Fetch fixture - ftl_content = fetch_ftl_fixture(fixture_name) + ftl_content = STRUCTURE_FIXTURES[fixture_name].ftl # Parse (should not crash) parser = FluentParserV1() @@ -267,14 +191,13 @@ class TestSpecConformanceRoundtrip: """Test roundtrip property on official fixtures.""" @pytest.mark.parametrize( - ("fixture_name", "description"), + "fixture_name", CORE_FIXTURES[:5], # First 5 fixtures for roundtrip - ids=[f[0] for f in CORE_FIXTURES[:5]], + ids=CORE_FIXTURES[:5], ) def test_parse_serialize_parse_converges( self, fixture_name: str, - description: str, ) -> None: """Test parse→serialize→parse convergence on official fixtures. @@ -282,7 +205,7 @@ def test_parse_serialize_parse_converges( """ from ftllexengine.syntax.serializer import serialize - ftl_content = fetch_ftl_fixture(fixture_name) + ftl_content = STRUCTURE_FIXTURES[fixture_name].ftl parser = FluentParserV1() # Parse original @@ -308,9 +231,9 @@ def test_parse_serialize_parse_converges( class TestSpecConformanceEdgeCases: """Test parser behavior on edge cases from official fixtures.""" - def test_empty_resource(self): + def test_empty_resource(self) -> None: """Test empty FTL resource.""" - ftl = fetch_ftl_fixture("empty_resource") + ftl = STRUCTURE_FIXTURES["empty_resource"].ftl parser = FluentParserV1() resource = parser.parse(ftl) @@ -318,9 +241,9 @@ def test_empty_resource(self): # Empty resource should have no entries or only whitespace/comments assert len(resource.entries) >= 0 - def test_blank_lines_handling(self): + def test_blank_lines_handling(self) -> None: """Test blank lines don't affect parsing.""" - ftl = fetch_ftl_fixture("blank_lines") + ftl = STRUCTURE_FIXTURES["blank_lines"].ftl parser = FluentParserV1() resource = parser.parse(ftl) diff --git a/uv.lock b/uv.lock index 46ff028a..f7b32f4e 100644 --- a/uv.lock +++ b/uv.lock @@ -2,7 +2,8 @@ version = 1 revision = 3 requires-python = ">=3.13" resolution-markers = [ - "python_full_version >= '3.14'", + "python_full_version >= '3.15'", + "python_full_version == '3.14.*'", "python_full_version < '3.14'", ] @@ -74,16 +75,16 @@ wheels = [ [[package]] name = "build" -version = "1.4.0" +version = "1.4.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "os_name == 'nt'" }, { name = "packaging" }, { name = "pyproject-hooks" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/42/18/94eaffda7b329535d91f00fe605ab1f1e5cd68b2074d03f255c7d250687d/build-1.4.0.tar.gz", hash = "sha256:f1b91b925aa322be454f8330c6fb48b465da993d1e7e7e6fa35027ec49f3c936", size = 50054, upload-time = "2026-01-08T16:41:47.696Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/16/4b272700dea44c1d2e8ca963ebb3c684efe22b3eba8cfa31c5fdb60de707/build-1.4.3.tar.gz", hash = "sha256:5aa4231ae0e807efdf1fd0623e07366eca2ab215921345a2e38acdd5d0fa0a74", size = 89314, upload-time = "2026-04-10T21:25:40.857Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c5/0d/84a4380f930db0010168e0aa7b7a8fed9ba1835a8fbb1472bc6d0201d529/build-1.4.0-py3-none-any.whl", hash = "sha256:6a07c1b8eb6f2b311b96fcbdbce5dab5fe637ffda0fd83c9cac622e927501596", size = 24141, upload-time = "2026-01-08T16:41:46.453Z" }, + { url = "https://files.pythonhosted.org/packages/b2/30/f169e1d8b2071beaf8b97088787e30662b1d8fb82f8c0941d14678c0cbf1/build-1.4.3-py3-none-any.whl", hash = "sha256:1bc22b19b383303de8f2c8554c9a32894a58d3f185fe3756b0b20d255bee9a38", size = 26171, upload-time = "2026-04-10T21:25:39.671Z" }, ] [[package]] @@ -300,7 +301,7 @@ wheels = [ [[package]] name = "ftllexengine" -version = "0.162.0" +version = "0.163.0" source = { editable = "." } [package.optional-dependencies] @@ -314,7 +315,6 @@ atheris = [ ] dev = [ { name = "babel" }, - { name = "hypofuzz" }, { name = "hypothesis" }, { name = "mypy" }, { name = "psutil" }, @@ -324,6 +324,9 @@ dev = [ { name = "ruff" }, { name = "types-psutil" }, ] +fuzz = [ + { name = "hypofuzz" }, +] release = [ { name = "build" }, { name = "twine" }, @@ -337,18 +340,18 @@ provides-extras = ["babel"] atheris = [{ name = "atheris", marker = "python_full_version < '3.14'", specifier = ">=3.0.0" }] dev = [ { name = "babel", specifier = ">=2.18.0,<3.0.0" }, - { name = "hypofuzz", specifier = ">=25.11.1" }, - { name = "hypothesis", specifier = ">=6.151.9" }, - { name = "mypy", specifier = ">=1.19.1" }, + { name = "hypothesis", specifier = ">=6.152.1" }, + { name = "mypy", specifier = ">=1.20.2" }, { name = "psutil", specifier = ">=7.2.2" }, - { name = "pytest", specifier = ">=9.0.2" }, + { name = "pytest", specifier = ">=9.0.3" }, { name = "pytest-benchmark", specifier = ">=5.2.3" }, - { name = "pytest-cov", specifier = ">=7.0.0" }, - { name = "ruff", specifier = ">=0.15.7" }, - { name = "types-psutil", specifier = ">=7.2.2.20260130" }, + { name = "pytest-cov", specifier = ">=7.1.0" }, + { name = "ruff", specifier = ">=0.15.11" }, + { name = "types-psutil", specifier = ">=7.2.2.20260408" }, ] +fuzz = [{ name = "hypofuzz", specifier = ">=25.11.1" }] release = [ - { name = "build", specifier = ">=1.4.0" }, + { name = "build", specifier = ">=1.4.3" }, { name = "twine", specifier = ">=6.2.0" }, ] @@ -429,14 +432,14 @@ wheels = [ [[package]] name = "hypothesis" -version = "6.151.9" +version = "6.152.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "sortedcontainers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/19/e1/ef365ff480903b929d28e057f57b76cae51a30375943e33374ec9a165d9c/hypothesis-6.151.9.tar.gz", hash = "sha256:2f284428dda6c3c48c580de0e18470ff9c7f5ef628a647ee8002f38c3f9097ca", size = 463534, upload-time = "2026-02-16T22:59:23.09Z" } +sdist = { url = "https://files.pythonhosted.org/packages/64/b1/c32bcddb9aab9e3abc700f1f56faf14e7655c64a16ca47701a57362276ea/hypothesis-6.152.1.tar.gz", hash = "sha256:4f4ed934eee295dd84ee97592477d23e8dc03e9f12ae0ee30a4e7c9ef3fca3b0", size = 465029, upload-time = "2026-04-14T22:29:24.062Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c4/f7/5cc291d701094754a1d327b44d80a44971e13962881d9a400235726171da/hypothesis-6.151.9-py3-none-any.whl", hash = "sha256:7b7220585c67759b1b1ef839b1e6e9e3d82ed468cfc1ece43c67184848d7edd9", size = 529307, upload-time = "2026-02-16T22:59:20.443Z" }, + { url = "https://files.pythonhosted.org/packages/5d/83/860fb3075e00b0fc19a22a2301bc3c96f00437558c3911bdd0a3573a4a53/hypothesis-6.152.1-py3-none-any.whl", hash = "sha256:40a3619d9e0cb97b018857c7986f75cf5de2e5ec0fa8a0b172d00747758f749e", size = 530752, upload-time = "2026-04-14T22:29:20.893Z" }, ] [package.optional-dependencies] @@ -584,43 +587,49 @@ wheels = [ [[package]] name = "librt" -version = "0.7.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/93/e4/b59bdf1197fdf9888452ea4d2048cdad61aef85eb83e99dc52551d7fdc04/librt-0.7.4.tar.gz", hash = "sha256:3871af56c59864d5fd21d1ac001eb2fb3b140d52ba0454720f2e4a19812404ba", size = 145862, upload-time = "2025-12-15T16:52:43.862Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fe/4d/46a53ccfbb39fd0b493fd4496eb76f3ebc15bb3e45d8c2e695a27587edf5/librt-0.7.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d44a1b1ba44cbd2fc3cb77992bef6d6fdb1028849824e1dd5e4d746e1f7f7f0b", size = 55745, upload-time = "2025-12-15T16:51:46.636Z" }, - { url = "https://files.pythonhosted.org/packages/7f/2b/3ac7f5212b1828bf4f979cf87f547db948d3e28421d7a430d4db23346ce4/librt-0.7.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c9cab4b3de1f55e6c30a84c8cee20e4d3b2476f4d547256694a1b0163da4fe32", size = 57166, upload-time = "2025-12-15T16:51:48.219Z" }, - { url = "https://files.pythonhosted.org/packages/e8/99/6523509097cbe25f363795f0c0d1c6a3746e30c2994e25b5aefdab119b21/librt-0.7.4-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:2857c875f1edd1feef3c371fbf830a61b632fb4d1e57160bb1e6a3206e6abe67", size = 165833, upload-time = "2025-12-15T16:51:49.443Z" }, - { url = "https://files.pythonhosted.org/packages/fe/35/323611e59f8fe032649b4fb7e77f746f96eb7588fcbb31af26bae9630571/librt-0.7.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b370a77be0a16e1ad0270822c12c21462dc40496e891d3b0caf1617c8cc57e20", size = 174818, upload-time = "2025-12-15T16:51:51.015Z" }, - { url = "https://files.pythonhosted.org/packages/41/e6/40fb2bb21616c6e06b6a64022802228066e9a31618f493e03f6b9661548a/librt-0.7.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d05acd46b9a52087bfc50c59dfdf96a2c480a601e8898a44821c7fd676598f74", size = 189607, upload-time = "2025-12-15T16:51:52.671Z" }, - { url = "https://files.pythonhosted.org/packages/32/48/1b47c7d5d28b775941e739ed2bfe564b091c49201b9503514d69e4ed96d7/librt-0.7.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:70969229cb23d9c1a80e14225838d56e464dc71fa34c8342c954fc50e7516dee", size = 184585, upload-time = "2025-12-15T16:51:54.027Z" }, - { url = "https://files.pythonhosted.org/packages/75/a6/ee135dfb5d3b54d5d9001dbe483806229c6beac3ee2ba1092582b7efeb1b/librt-0.7.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4450c354b89dbb266730893862dbff06006c9ed5b06b6016d529b2bf644fc681", size = 178249, upload-time = "2025-12-15T16:51:55.248Z" }, - { url = "https://files.pythonhosted.org/packages/04/87/d5b84ec997338be26af982bcd6679be0c1db9a32faadab1cf4bb24f9e992/librt-0.7.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:adefe0d48ad35b90b6f361f6ff5a1bd95af80c17d18619c093c60a20e7a5b60c", size = 199851, upload-time = "2025-12-15T16:51:56.933Z" }, - { url = "https://files.pythonhosted.org/packages/86/63/ba1333bf48306fe398e3392a7427ce527f81b0b79d0d91618c4610ce9d15/librt-0.7.4-cp313-cp313-win32.whl", hash = "sha256:21ea710e96c1e050635700695095962a22ea420d4b3755a25e4909f2172b4ff2", size = 43249, upload-time = "2025-12-15T16:51:58.498Z" }, - { url = "https://files.pythonhosted.org/packages/f9/8a/de2c6df06cdfa9308c080e6b060fe192790b6a48a47320b215e860f0e98c/librt-0.7.4-cp313-cp313-win_amd64.whl", hash = "sha256:772e18696cf5a64afee908662fbcb1f907460ddc851336ee3a848ef7684c8e1e", size = 49417, upload-time = "2025-12-15T16:51:59.618Z" }, - { url = "https://files.pythonhosted.org/packages/31/66/8ee0949efc389691381ed686185e43536c20e7ad880c122dd1f31e65c658/librt-0.7.4-cp313-cp313-win_arm64.whl", hash = "sha256:52e34c6af84e12921748c8354aa6acf1912ca98ba60cdaa6920e34793f1a0788", size = 42824, upload-time = "2025-12-15T16:52:00.784Z" }, - { url = "https://files.pythonhosted.org/packages/74/81/6921e65c8708eb6636bbf383aa77e6c7dad33a598ed3b50c313306a2da9d/librt-0.7.4-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4f1ee004942eaaed6e06c087d93ebc1c67e9a293e5f6b9b5da558df6bf23dc5d", size = 55191, upload-time = "2025-12-15T16:52:01.97Z" }, - { url = "https://files.pythonhosted.org/packages/0d/d6/3eb864af8a8de8b39cc8dd2e9ded1823979a27795d72c4eea0afa8c26c9f/librt-0.7.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d854c6dc0f689bad7ed452d2a3ecff58029d80612d336a45b62c35e917f42d23", size = 56898, upload-time = "2025-12-15T16:52:03.356Z" }, - { url = "https://files.pythonhosted.org/packages/49/bc/b1d4c0711fdf79646225d576faee8747b8528a6ec1ceb6accfd89ade7102/librt-0.7.4-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a4f7339d9e445280f23d63dea842c0c77379c4a47471c538fc8feedab9d8d063", size = 163725, upload-time = "2025-12-15T16:52:04.572Z" }, - { url = "https://files.pythonhosted.org/packages/2c/08/61c41cd8f0a6a41fc99ea78a2205b88187e45ba9800792410ed62f033584/librt-0.7.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39003fc73f925e684f8521b2dbf34f61a5deb8a20a15dcf53e0d823190ce8848", size = 172469, upload-time = "2025-12-15T16:52:05.863Z" }, - { url = "https://files.pythonhosted.org/packages/8b/c7/4ee18b4d57f01444230bc18cf59103aeab8f8c0f45e84e0e540094df1df1/librt-0.7.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6bb15ee29d95875ad697d449fe6071b67f730f15a6961913a2b0205015ca0843", size = 186804, upload-time = "2025-12-15T16:52:07.192Z" }, - { url = "https://files.pythonhosted.org/packages/a1/af/009e8ba3fbf830c936842da048eda1b34b99329f402e49d88fafff6525d1/librt-0.7.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:02a69369862099e37d00765583052a99d6a68af7e19b887e1b78fee0146b755a", size = 181807, upload-time = "2025-12-15T16:52:08.554Z" }, - { url = "https://files.pythonhosted.org/packages/85/26/51ae25f813656a8b117c27a974f25e8c1e90abcd5a791ac685bf5b489a1b/librt-0.7.4-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ec72342cc4d62f38b25a94e28b9efefce41839aecdecf5e9627473ed04b7be16", size = 175595, upload-time = "2025-12-15T16:52:10.186Z" }, - { url = "https://files.pythonhosted.org/packages/48/93/36d6c71f830305f88996b15c8e017aa8d1e03e2e947b40b55bbf1a34cf24/librt-0.7.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:776dbb9bfa0fc5ce64234b446995d8d9f04badf64f544ca036bd6cff6f0732ce", size = 196504, upload-time = "2025-12-15T16:52:11.472Z" }, - { url = "https://files.pythonhosted.org/packages/08/11/8299e70862bb9d704735bf132c6be09c17b00fbc7cda0429a9df222fdc1b/librt-0.7.4-cp314-cp314-win32.whl", hash = "sha256:0f8cac84196d0ffcadf8469d9ded4d4e3a8b1c666095c2a291e22bf58e1e8a9f", size = 39738, upload-time = "2025-12-15T16:52:12.962Z" }, - { url = "https://files.pythonhosted.org/packages/54/d5/656b0126e4e0f8e2725cd2d2a1ec40f71f37f6f03f135a26b663c0e1a737/librt-0.7.4-cp314-cp314-win_amd64.whl", hash = "sha256:037f5cb6fe5abe23f1dc058054d50e9699fcc90d0677eee4e4f74a8677636a1a", size = 45976, upload-time = "2025-12-15T16:52:14.441Z" }, - { url = "https://files.pythonhosted.org/packages/60/86/465ff07b75c1067da8fa7f02913c4ead096ef106cfac97a977f763783bfb/librt-0.7.4-cp314-cp314-win_arm64.whl", hash = "sha256:a5deebb53d7a4d7e2e758a96befcd8edaaca0633ae71857995a0f16033289e44", size = 39073, upload-time = "2025-12-15T16:52:15.621Z" }, - { url = "https://files.pythonhosted.org/packages/b3/a0/24941f85960774a80d4b3c2aec651d7d980466da8101cae89e8b032a3e21/librt-0.7.4-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:b4c25312c7f4e6ab35ab16211bdf819e6e4eddcba3b2ea632fb51c9a2a97e105", size = 57369, upload-time = "2025-12-15T16:52:16.782Z" }, - { url = "https://files.pythonhosted.org/packages/77/a0/ddb259cae86ab415786c1547d0fe1b40f04a7b089f564fd5c0242a3fafb2/librt-0.7.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:618b7459bb392bdf373f2327e477597fff8f9e6a1878fffc1b711c013d1b0da4", size = 59230, upload-time = "2025-12-15T16:52:18.259Z" }, - { url = "https://files.pythonhosted.org/packages/31/11/77823cb530ab8a0c6fac848ac65b745be446f6f301753b8990e8809080c9/librt-0.7.4-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:1437c3f72a30c7047f16fd3e972ea58b90172c3c6ca309645c1c68984f05526a", size = 183869, upload-time = "2025-12-15T16:52:19.457Z" }, - { url = "https://files.pythonhosted.org/packages/a4/ce/157db3614cf3034b3f702ae5ba4fefda4686f11eea4b7b96542324a7a0e7/librt-0.7.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c96cb76f055b33308f6858b9b594618f1b46e147a4d03a4d7f0c449e304b9b95", size = 194606, upload-time = "2025-12-15T16:52:20.795Z" }, - { url = "https://files.pythonhosted.org/packages/30/ef/6ec4c7e3d6490f69a4fd2803516fa5334a848a4173eac26d8ee6507bff6e/librt-0.7.4-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:28f990e6821204f516d09dc39966ef8b84556ffd648d5926c9a3f681e8de8906", size = 206776, upload-time = "2025-12-15T16:52:22.229Z" }, - { url = "https://files.pythonhosted.org/packages/ad/22/750b37bf549f60a4782ab80e9d1e9c44981374ab79a7ea68670159905918/librt-0.7.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bc4aebecc79781a1b77d7d4e7d9fe080385a439e198d993b557b60f9117addaf", size = 203205, upload-time = "2025-12-15T16:52:23.603Z" }, - { url = "https://files.pythonhosted.org/packages/7a/87/2e8a0f584412a93df5faad46c5fa0a6825fdb5eba2ce482074b114877f44/librt-0.7.4-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:022cc673e69283a42621dd453e2407cf1647e77f8bd857d7ad7499901e62376f", size = 196696, upload-time = "2025-12-15T16:52:24.951Z" }, - { url = "https://files.pythonhosted.org/packages/e5/ca/7bf78fa950e43b564b7de52ceeb477fb211a11f5733227efa1591d05a307/librt-0.7.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:2b3ca211ae8ea540569e9c513da052699b7b06928dcda61247cb4f318122bdb5", size = 217191, upload-time = "2025-12-15T16:52:26.194Z" }, - { url = "https://files.pythonhosted.org/packages/d6/49/3732b0e8424ae35ad5c3166d9dd5bcdae43ce98775e0867a716ff5868064/librt-0.7.4-cp314-cp314t-win32.whl", hash = "sha256:8a461f6456981d8c8e971ff5a55f2e34f4e60871e665d2f5fde23ee74dea4eeb", size = 40276, upload-time = "2025-12-15T16:52:27.54Z" }, - { url = "https://files.pythonhosted.org/packages/35/d6/d8823e01bd069934525fddb343189c008b39828a429b473fb20d67d5cd36/librt-0.7.4-cp314-cp314t-win_amd64.whl", hash = "sha256:721a7b125a817d60bf4924e1eec2a7867bfcf64cfc333045de1df7a0629e4481", size = 46772, upload-time = "2025-12-15T16:52:28.653Z" }, - { url = "https://files.pythonhosted.org/packages/36/e9/a0aa60f5322814dd084a89614e9e31139702e342f8459ad8af1984a18168/librt-0.7.4-cp314-cp314t-win_arm64.whl", hash = "sha256:76b2ba71265c0102d11458879b4d53ccd0b32b0164d14deb8d2b598a018e502f", size = 39724, upload-time = "2025-12-15T16:52:29.836Z" }, +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/6b/3d5c13fb3e3c4f43206c8f9dfed13778c2ed4f000bacaa0b7ce3c402a265/librt-0.9.0.tar.gz", hash = "sha256:a0951822531e7aee6e0dfb556b30d5ee36bbe234faf60c20a16c01be3530869d", size = 184368, upload-time = "2026-04-09T16:06:26.173Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/d7/1b3e26fffde1452d82f5666164858a81c26ebe808e7ae8c9c88628981540/librt-0.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f29b68cd9714531672db62cc54f6e8ff981900f824d13fa0e00749189e13778e", size = 68367, upload-time = "2026-04-09T16:05:17.243Z" }, + { url = "https://files.pythonhosted.org/packages/a5/5b/c61b043ad2e091fbe1f2d35d14795e545d0b56b03edaa390fa1dcee3d160/librt-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d5c8a5929ac325729f6119802070b561f4db793dffc45e9ac750992a4ed4d22", size = 70595, upload-time = "2026-04-09T16:05:18.471Z" }, + { url = "https://files.pythonhosted.org/packages/a3/22/2448471196d8a73370aa2f23445455dc42712c21404081fcd7a03b9e0749/librt-0.9.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:756775d25ec8345b837ab52effee3ad2f3b2dfd6bbee3e3f029c517bd5d8f05a", size = 204354, upload-time = "2026-04-09T16:05:19.593Z" }, + { url = "https://files.pythonhosted.org/packages/ac/5e/39fc4b153c78cfd2c8a2dcb32700f2d41d2312aa1050513183be4540930d/librt-0.9.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b8f5d00b49818f4e2b1667db994488b045835e0ac16fe2f924f3871bd2b8ac5", size = 216238, upload-time = "2026-04-09T16:05:20.868Z" }, + { url = "https://files.pythonhosted.org/packages/d7/42/bc2d02d0fa7badfa63aa8d6dcd8793a9f7ef5a94396801684a51ed8d8287/librt-0.9.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c81aef782380f0f13ead670aae01825eb653b44b046aa0e5ebbb79f76ed4aa11", size = 230589, upload-time = "2026-04-09T16:05:22.305Z" }, + { url = "https://files.pythonhosted.org/packages/c8/7b/e2d95cc513866373692aa5edf98080d5602dd07cabfb9e5d2f70df2f25f7/librt-0.9.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:66b58fed90a545328e80d575467244de3741e088c1af928f0b489ebec3ef3858", size = 224610, upload-time = "2026-04-09T16:05:23.647Z" }, + { url = "https://files.pythonhosted.org/packages/31/d5/6cec4607e998eaba57564d06a1295c21b0a0c8de76e4e74d699e627bd98c/librt-0.9.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e78fb7419e07d98c2af4b8567b72b3eaf8cb05caad642e9963465569c8b2d87e", size = 232558, upload-time = "2026-04-09T16:05:25.025Z" }, + { url = "https://files.pythonhosted.org/packages/95/8c/27f1d8d3aaf079d3eb26439bf0b32f1482340c3552e324f7db9dca858671/librt-0.9.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2c3786f0f4490a5cd87f1ed6cefae833ad6b1060d52044ce0434a2e85893afd0", size = 225521, upload-time = "2026-04-09T16:05:26.311Z" }, + { url = "https://files.pythonhosted.org/packages/6b/d8/1e0d43b1c329b416017619469b3c3801a25a6a4ef4a1c68332aeaa6f72ca/librt-0.9.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:8494cfc61e03542f2d381e71804990b3931175a29b9278fdb4a5459948778dc2", size = 227789, upload-time = "2026-04-09T16:05:27.624Z" }, + { url = "https://files.pythonhosted.org/packages/2c/b4/d3d842e88610fcd4c8eec7067b0c23ef2d7d3bff31496eded6a83b0f99be/librt-0.9.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:07cf11f769831186eeac424376e6189f20ace4f7263e2134bdb9757340d84d4d", size = 248616, upload-time = "2026-04-09T16:05:29.181Z" }, + { url = "https://files.pythonhosted.org/packages/ec/28/527df8ad0d1eb6c8bdfa82fc190f1f7c4cca5a1b6d7b36aeabf95b52d74d/librt-0.9.0-cp313-cp313-win32.whl", hash = "sha256:850d6d03177e52700af605fd60db7f37dcb89782049a149674d1a9649c2138fd", size = 56039, upload-time = "2026-04-09T16:05:30.709Z" }, + { url = "https://files.pythonhosted.org/packages/f3/a7/413652ad0d92273ee5e30c000fc494b361171177c83e57c060ecd3c21538/librt-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:a5af136bfba820d592f86c67affcef9b3ff4d4360ac3255e341e964489b48519", size = 63264, upload-time = "2026-04-09T16:05:31.881Z" }, + { url = "https://files.pythonhosted.org/packages/a4/0a/92c244309b774e290ddb15e93363846ae7aa753d9586b8aad511c5e6145b/librt-0.9.0-cp313-cp313-win_arm64.whl", hash = "sha256:4c4d0440a3a8e31d962340c3e1cc3fc9ee7febd34c8d8f770d06adb947779ea5", size = 53728, upload-time = "2026-04-09T16:05:33.31Z" }, + { url = "https://files.pythonhosted.org/packages/cd/c1/184e539543f06ea2912f4b92a5ffaede4f9b392689e3f00acbf8134bee92/librt-0.9.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:3f05d145df35dca5056a8bc3838e940efebd893a54b3e19b2dda39ceaa299bcb", size = 67830, upload-time = "2026-04-09T16:05:34.517Z" }, + { url = "https://files.pythonhosted.org/packages/f3/ad/23399bdcb7afca819acacdef31b37ee59de261bd66b503a7995c03c4b0dc/librt-0.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1c587494461ebd42229d0f1739f3aa34237dd9980623ecf1be8d3bcba79f4499", size = 70280, upload-time = "2026-04-09T16:05:35.649Z" }, + { url = "https://files.pythonhosted.org/packages/9f/0b/4542dc5a2b8772dbf92cafb9194701230157e73c14b017b6961a23598b03/librt-0.9.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b0a2040f801406b93657a70b72fa12311063a319fee72ce98e1524da7200171f", size = 201925, upload-time = "2026-04-09T16:05:36.739Z" }, + { url = "https://files.pythonhosted.org/packages/31/d4/8ee7358b08fd0cfce051ef96695380f09b3c2c11b77c9bfbc367c921cce5/librt-0.9.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f38bc489037eca88d6ebefc9c4d41a4e07c8e8b4de5188a9e6d290273ad7ebb1", size = 212381, upload-time = "2026-04-09T16:05:38.043Z" }, + { url = "https://files.pythonhosted.org/packages/f2/94/a2025fe442abedf8b038038dab3dba942009ad42b38ea064a1a9e6094241/librt-0.9.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3fd278f5e6bf7c75ccd6d12344eb686cc020712683363b66f46ac79d37c799f", size = 227065, upload-time = "2026-04-09T16:05:39.394Z" }, + { url = "https://files.pythonhosted.org/packages/7c/e9/b9fcf6afa909f957cfbbf918802f9dada1bd5d3c1da43d722fd6a310dc3f/librt-0.9.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fcbdf2a9ca24e87bbebb47f1fe34e531ef06f104f98c9ccfc953a3f3344c567a", size = 221333, upload-time = "2026-04-09T16:05:40.999Z" }, + { url = "https://files.pythonhosted.org/packages/ac/7c/ba54cd6aa6a3c8cd12757a6870e0c79a64b1e6327f5248dcff98423f4d43/librt-0.9.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e306d956cfa027fe041585f02a1602c32bfa6bb8ebea4899d373383295a6c62f", size = 229051, upload-time = "2026-04-09T16:05:42.605Z" }, + { url = "https://files.pythonhosted.org/packages/4b/4b/8cfdbad314c8677a0148bf0b70591d6d18587f9884d930276098a235461b/librt-0.9.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:465814ab157986acb9dfa5ccd7df944be5eefc0d08d31ec6e8d88bc71251d845", size = 222492, upload-time = "2026-04-09T16:05:43.842Z" }, + { url = "https://files.pythonhosted.org/packages/1f/d1/2eda69563a1a88706808decdce035e4b32755dbfbb0d05e1a65db9547ed1/librt-0.9.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:703f4ae36d6240bfe24f542bac784c7e4194ec49c3ba5a994d02891649e2d85b", size = 223849, upload-time = "2026-04-09T16:05:45.054Z" }, + { url = "https://files.pythonhosted.org/packages/04/44/b2ed37df6be5b3d42cfe36318e0598e80843d5c6308dd63d0bf4e0ce5028/librt-0.9.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:3be322a15ee5e70b93b7a59cfd074614f22cc8c9ff18bd27f474e79137ea8d3b", size = 245001, upload-time = "2026-04-09T16:05:46.34Z" }, + { url = "https://files.pythonhosted.org/packages/47/e7/617e412426df89169dd2a9ed0cc8752d5763336252c65dbf945199915119/librt-0.9.0-cp314-cp314-win32.whl", hash = "sha256:b8da9f8035bb417770b1e1610526d87ad4fc58a2804dc4d79c53f6d2cf5a6eb9", size = 51799, upload-time = "2026-04-09T16:05:47.738Z" }, + { url = "https://files.pythonhosted.org/packages/24/ed/c22ca4db0ca3cbc285e4d9206108746beda561a9792289c3c31281d7e9df/librt-0.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:b8bd70d5d816566a580d193326912f4a76ec2d28a97dc4cd4cc831c0af8e330e", size = 59165, upload-time = "2026-04-09T16:05:49.198Z" }, + { url = "https://files.pythonhosted.org/packages/24/56/875398fafa4cbc8f15b89366fc3287304ddd3314d861f182a4b87595ace0/librt-0.9.0-cp314-cp314-win_arm64.whl", hash = "sha256:fc5758e2b7a56532dc33e3c544d78cbaa9ecf0a0f2a2da2df882c1d6b99a317f", size = 49292, upload-time = "2026-04-09T16:05:50.362Z" }, + { url = "https://files.pythonhosted.org/packages/4c/61/bc448ecbf9b2d69c5cff88fe41496b19ab2a1cbda0065e47d4d0d51c0867/librt-0.9.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:f24b90b0e0c8cc9491fb1693ae91fe17cb7963153a1946395acdbdd5818429a4", size = 70175, upload-time = "2026-04-09T16:05:51.564Z" }, + { url = "https://files.pythonhosted.org/packages/60/f2/c47bb71069a73e2f04e70acbd196c1e5cc411578ac99039a224b98920fd4/librt-0.9.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3fe56e80badb66fdcde06bef81bbaa5bfcf6fbd7aefb86222d9e369c38c6b228", size = 72951, upload-time = "2026-04-09T16:05:52.699Z" }, + { url = "https://files.pythonhosted.org/packages/29/19/0549df59060631732df758e8886d92088da5fdbedb35b80e4643664e8412/librt-0.9.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:527b5b820b47a09e09829051452bb0d1dd2122261254e2a6f674d12f1d793d54", size = 225864, upload-time = "2026-04-09T16:05:53.895Z" }, + { url = "https://files.pythonhosted.org/packages/9d/f8/3b144396d302ac08e50f89e64452c38db84bc7b23f6c60479c5d3abd303c/librt-0.9.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d429bdd4ac0ab17c8e4a8af0ed2a7440b16eba474909ab357131018fe8c7e71", size = 241155, upload-time = "2026-04-09T16:05:55.191Z" }, + { url = "https://files.pythonhosted.org/packages/7a/ce/ee67ec14581de4043e61d05786d2aed6c9b5338816b7859bcf07455c6a9f/librt-0.9.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7202bdcac47d3a708271c4304a474a8605a4a9a4a709e954bf2d3241140aa938", size = 252235, upload-time = "2026-04-09T16:05:56.549Z" }, + { url = "https://files.pythonhosted.org/packages/8a/fa/0ead15daa2b293a54101550b08d4bafe387b7d4a9fc6d2b985602bae69b6/librt-0.9.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c0d620e74897f8c2613b3c4e2e9c1e422eb46d2ddd07df540784d44117836af3", size = 244963, upload-time = "2026-04-09T16:05:57.858Z" }, + { url = "https://files.pythonhosted.org/packages/29/68/9fbf9a9aa704ba87689e40017e720aced8d9a4d2b46b82451d8142f91ec9/librt-0.9.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d69fc39e627908f4c03297d5a88d9284b73f4d90b424461e32e8c2485e21c283", size = 257364, upload-time = "2026-04-09T16:05:59.686Z" }, + { url = "https://files.pythonhosted.org/packages/1a/8d/9d60869f1b6716c762e45f66ed945b1e5dd649f7377684c3b176ae424648/librt-0.9.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:c2640e23d2b7c98796f123ffd95cf2022c7777aa8a4a3b98b36c570d37e85eee", size = 247661, upload-time = "2026-04-09T16:06:00.938Z" }, + { url = "https://files.pythonhosted.org/packages/70/ff/a5c365093962310bfdb4f6af256f191085078ffb529b3f0cbebb5b33ebe2/librt-0.9.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:451daa98463b7695b0a30aa56bf637831ea559e7b8101ac2ef6382e8eb15e29c", size = 248238, upload-time = "2026-04-09T16:06:02.537Z" }, + { url = "https://files.pythonhosted.org/packages/a0/3c/2d34365177f412c9e19c0a29f969d70f5343f27634b76b765a54d8b27705/librt-0.9.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:928bd06eca2c2bbf4349e5b817f837509b0604342e65a502de1d50a7570afd15", size = 269457, upload-time = "2026-04-09T16:06:03.833Z" }, + { url = "https://files.pythonhosted.org/packages/bc/cd/de45b239ea3bdf626f982a00c14bfcf2e12d261c510ba7db62c5969a27cd/librt-0.9.0-cp314-cp314t-win32.whl", hash = "sha256:a9c63e04d003bc0fb6a03b348018b9a3002f98268200e22cc80f146beac5dc40", size = 52453, upload-time = "2026-04-09T16:06:05.229Z" }, + { url = "https://files.pythonhosted.org/packages/7f/f9/bfb32ae428aa75c0c533915622176f0a17d6da7b72b5a3c6363685914f70/librt-0.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f162af66a2ed3f7d1d161a82ca584efd15acd9c1cff190a373458c32f7d42118", size = 60044, upload-time = "2026-04-09T16:06:06.398Z" }, + { url = "https://files.pythonhosted.org/packages/aa/47/7d70414bcdbb3bc1f458a8d10558f00bbfdb24e5a11740fc8197e12c3255/librt-0.9.0-cp314-cp314t-win_arm64.whl", hash = "sha256:a4b25c6c25cac5d0d9d6d6da855195b254e0021e513e0249f0e3b444dc6e0e61", size = 50009, upload-time = "2026-04-09T16:06:07.995Z" }, ] [[package]] @@ -655,7 +664,7 @@ wheels = [ [[package]] name = "mypy" -version = "1.19.1" +version = "1.20.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "librt", marker = "platform_python_implementation != 'PyPy'" }, @@ -663,21 +672,30 @@ dependencies = [ { name = "pathspec" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f5/db/4efed9504bc01309ab9c2da7e352cc223569f05478012b5d9ece38fd44d2/mypy-1.19.1.tar.gz", hash = "sha256:19d88bb05303fe63f71dd2c6270daca27cb9401c4ca8255fe50d1d920e0eb9ba", size = 3582404, upload-time = "2025-12-15T05:03:48.42Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/de/9f/a6abae693f7a0c697dbb435aac52e958dc8da44e92e08ba88d2e42326176/mypy-1.19.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e3157c7594ff2ef1634ee058aafc56a82db665c9438fd41b390f3bde1ab12250", size = 13201927, upload-time = "2025-12-15T05:02:29.138Z" }, - { url = "https://files.pythonhosted.org/packages/9a/a4/45c35ccf6e1c65afc23a069f50e2c66f46bd3798cbe0d680c12d12935caa/mypy-1.19.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdb12f69bcc02700c2b47e070238f42cb87f18c0bc1fc4cdb4fb2bc5fd7a3b8b", size = 12206730, upload-time = "2025-12-15T05:03:01.325Z" }, - { url = "https://files.pythonhosted.org/packages/05/bb/cdcf89678e26b187650512620eec8368fded4cfd99cfcb431e4cdfd19dec/mypy-1.19.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f859fb09d9583a985be9a493d5cfc5515b56b08f7447759a0c5deaf68d80506e", size = 12724581, upload-time = "2025-12-15T05:03:20.087Z" }, - { url = "https://files.pythonhosted.org/packages/d1/32/dd260d52babf67bad8e6770f8e1102021877ce0edea106e72df5626bb0ec/mypy-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9a6538e0415310aad77cb94004ca6482330fece18036b5f360b62c45814c4ef", size = 13616252, upload-time = "2025-12-15T05:02:49.036Z" }, - { url = "https://files.pythonhosted.org/packages/71/d0/5e60a9d2e3bd48432ae2b454b7ef2b62a960ab51292b1eda2a95edd78198/mypy-1.19.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:da4869fc5e7f62a88f3fe0b5c919d1d9f7ea3cef92d3689de2823fd27e40aa75", size = 13840848, upload-time = "2025-12-15T05:02:55.95Z" }, - { url = "https://files.pythonhosted.org/packages/98/76/d32051fa65ecf6cc8c6610956473abdc9b4c43301107476ac03559507843/mypy-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:016f2246209095e8eda7538944daa1d60e1e8134d98983b9fc1e92c1fc0cb8dd", size = 10135510, upload-time = "2025-12-15T05:02:58.438Z" }, - { url = "https://files.pythonhosted.org/packages/de/eb/b83e75f4c820c4247a58580ef86fcd35165028f191e7e1ba57128c52782d/mypy-1.19.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:06e6170bd5836770e8104c8fdd58e5e725cfeb309f0a6c681a811f557e97eac1", size = 13199744, upload-time = "2025-12-15T05:03:30.823Z" }, - { url = "https://files.pythonhosted.org/packages/94/28/52785ab7bfa165f87fcbb61547a93f98bb20e7f82f90f165a1f69bce7b3d/mypy-1.19.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:804bd67b8054a85447c8954215a906d6eff9cabeabe493fb6334b24f4bfff718", size = 12215815, upload-time = "2025-12-15T05:02:42.323Z" }, - { url = "https://files.pythonhosted.org/packages/0a/c6/bdd60774a0dbfb05122e3e925f2e9e846c009e479dcec4821dad881f5b52/mypy-1.19.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21761006a7f497cb0d4de3d8ef4ca70532256688b0523eee02baf9eec895e27b", size = 12740047, upload-time = "2025-12-15T05:03:33.168Z" }, - { url = "https://files.pythonhosted.org/packages/32/2a/66ba933fe6c76bd40d1fe916a83f04fed253152f451a877520b3c4a5e41e/mypy-1.19.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:28902ee51f12e0f19e1e16fbe2f8f06b6637f482c459dd393efddd0ec7f82045", size = 13601998, upload-time = "2025-12-15T05:03:13.056Z" }, - { url = "https://files.pythonhosted.org/packages/e3/da/5055c63e377c5c2418760411fd6a63ee2b96cf95397259038756c042574f/mypy-1.19.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:481daf36a4c443332e2ae9c137dfee878fcea781a2e3f895d54bd3002a900957", size = 13807476, upload-time = "2025-12-15T05:03:17.977Z" }, - { url = "https://files.pythonhosted.org/packages/cd/09/4ebd873390a063176f06b0dbf1f7783dd87bd120eae7727fa4ae4179b685/mypy-1.19.1-cp314-cp314-win_amd64.whl", hash = "sha256:8bb5c6f6d043655e055be9b542aa5f3bdd30e4f3589163e85f93f3640060509f", size = 10281872, upload-time = "2025-12-15T05:03:05.549Z" }, - { url = "https://files.pythonhosted.org/packages/8d/f4/4ce9a05ce5ded1de3ec1c1d96cf9f9504a04e54ce0ed55cfa38619a32b8d/mypy-1.19.1-py3-none-any.whl", hash = "sha256:f1235f5ea01b7db5468d53ece6aaddf1ad0b88d9e7462b86ef96fe04995d7247", size = 2471239, upload-time = "2025-12-15T05:03:07.248Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/04/af/e3d4b3e9ec91a0ff9aabfdb38692952acf49bbb899c2e4c29acb3a6da3ae/mypy-1.20.2.tar.gz", hash = "sha256:e8222c26daaafd9e8626dec58ae36029f82585890589576f769a650dd20fd665", size = 3817349, upload-time = "2026-04-21T17:12:28.473Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5b/c4/b93812d3a192c9bcf5df405bd2f30277cd0e48106a14d1023c7f6ed6e39b/mypy-1.20.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:edfbfca868cdd6bd8d974a60f8a3682f5565d3f5c99b327640cedd24c4264026", size = 14524670, upload-time = "2026-04-21T17:10:30.737Z" }, + { url = "https://files.pythonhosted.org/packages/f3/47/42c122501bff18eaf1e8f457f5c017933452d8acdc52918a9f59f6812955/mypy-1.20.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e2877a02380adfcdbc69071a0f74d6e9dbbf593c0dc9d174e1f223ffd5281943", size = 13336218, upload-time = "2026-04-21T17:08:44.069Z" }, + { url = "https://files.pythonhosted.org/packages/92/8f/75bbc92f41725fbd585fb17b440b1119b576105df1013622983e18640a93/mypy-1.20.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7488448de6007cd5177c6cea0517ac33b4c0f5ee9b5e9f2be51ce75511a85517", size = 13724906, upload-time = "2026-04-21T17:08:01.02Z" }, + { url = "https://files.pythonhosted.org/packages/a1/32/4c49da27a606167391ff0c39aa955707a00edc500572e562f7c36c08a71f/mypy-1.20.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bb9c2fa06887e21d6a3a868762acb82aec34e2c6fd0174064f27c93ede68ad15", size = 14726046, upload-time = "2026-04-21T17:11:22.354Z" }, + { url = "https://files.pythonhosted.org/packages/7f/fc/4e354a1bd70216359deb0c9c54847ee6b32ef78dfb09f5131ff99b494078/mypy-1.20.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9d56a78b646f2e3daa865bc70cd5ec5a46c50045801ca8ff17a0c43abc97e3ee", size = 14955587, upload-time = "2026-04-21T17:12:16.033Z" }, + { url = "https://files.pythonhosted.org/packages/62/b2/c0f2056e9eb8f08c62cafd9715e4584b89132bdc832fcf85d27d07b5f3e5/mypy-1.20.2-cp313-cp313-win_amd64.whl", hash = "sha256:2a4102b03bb7481d9a91a6da8d174740c9c8c4401024684b9ca3b7cc5e49852f", size = 10922681, upload-time = "2026-04-21T17:06:35.842Z" }, + { url = "https://files.pythonhosted.org/packages/e5/14/065e333721f05de8ef683d0aa804c23026bcc287446b61cac657b902ccac/mypy-1.20.2-cp313-cp313-win_arm64.whl", hash = "sha256:a95a9248b0c6fd933a442c03c3b113c3b61320086b88e2c444676d3fd1ca3330", size = 9830560, upload-time = "2026-04-21T17:07:51.023Z" }, + { url = "https://files.pythonhosted.org/packages/ae/d1/b4ec96b0ecc620a4443570c6e95c867903428cfcde4206518eafdd5880c3/mypy-1.20.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:419413398fe250aae057fd2fe50166b61077083c9b82754c341cf4fd73038f30", size = 14524561, upload-time = "2026-04-21T17:06:27.325Z" }, + { url = "https://files.pythonhosted.org/packages/3a/63/d2c2ff4fa66bc49477d32dfa26e8a167ba803ea6a69c5efb416036909d30/mypy-1.20.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e73c07f23009962885c197ccb9b41356a30cc0e5a1d0c2ea8fd8fb1362d7f924", size = 13363883, upload-time = "2026-04-21T17:11:11.239Z" }, + { url = "https://files.pythonhosted.org/packages/2a/56/983916806bf4eddeaaa2c9230903c3669c6718552a921154e1c5182c701f/mypy-1.20.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0c64e5973df366b747646fc98da921f9d6eba9716d57d1db94a83c026a08e0fb", size = 13742945, upload-time = "2026-04-21T17:08:34.181Z" }, + { url = "https://files.pythonhosted.org/packages/19/65/0cd9285ab010ee8214c83d67c6b49417c40d86ce46f1aa109457b5a9b8d7/mypy-1.20.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a65aa591af023864fd08a97da9974e919452cfe19cb146c8a5dc692626445dc", size = 14706163, upload-time = "2026-04-21T17:05:15.51Z" }, + { url = "https://files.pythonhosted.org/packages/94/97/48ff3b297cafcc94d185243a9190836fb1b01c1b0918fff64e941e973cc9/mypy-1.20.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4fef51b01e638974a6e69885687e9bd40c8d1e09a6cd291cca0619625cf1f558", size = 14938677, upload-time = "2026-04-21T17:05:39.562Z" }, + { url = "https://files.pythonhosted.org/packages/fd/a1/1b4233d255bdd0b38a1f284feeb1c143ca508c19184964e22f8d837ec851/mypy-1.20.2-cp314-cp314-win_amd64.whl", hash = "sha256:913485a03f1bcf5d279409a9d2b9ed565c151f61c09f29991e5faa14033da4c8", size = 11089322, upload-time = "2026-04-21T17:06:44.29Z" }, + { url = "https://files.pythonhosted.org/packages/78/c2/ce7ee2ba36aeb954ba50f18fa25d9c1188578654b97d02a66a15b6f09531/mypy-1.20.2-cp314-cp314-win_arm64.whl", hash = "sha256:c3bae4f855d965b5453784300c12ffc63a548304ac7f99e55d4dc7c898673aa3", size = 10017775, upload-time = "2026-04-21T17:07:20.732Z" }, + { url = "https://files.pythonhosted.org/packages/4e/a1/9d93a7d0b5859af0ead82b4888b46df6c8797e1bc5e1e262a08518c6d48e/mypy-1.20.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:2de3dcea53babc1c3237a19002bc3d228ce1833278f093b8d619e06e7cc79609", size = 15549002, upload-time = "2026-04-21T17:08:23.107Z" }, + { url = "https://files.pythonhosted.org/packages/00/d2/09a6a10ee1bf0008f6c144d9676f2ca6a12512151b4e0ad0ff6c4fac5337/mypy-1.20.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:52b176444e2e5054dfcbcb8c75b0b719865c96247b37407184bbfca5c353f2c2", size = 14401942, upload-time = "2026-04-21T17:07:31.837Z" }, + { url = "https://files.pythonhosted.org/packages/57/da/9594b75c3c019e805250bed3583bdf4443ff9e6ef08f97e39ae308cb06f2/mypy-1.20.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:688c3312e5dadb573a2c69c82af3a298d43ecf9e6d264e0f95df960b5f6ac19c", size = 15041649, upload-time = "2026-04-21T17:09:34.653Z" }, + { url = "https://files.pythonhosted.org/packages/97/77/f75a65c278e6e8eba2071f7f5a90481891053ecc39878cc444634d892abe/mypy-1.20.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29752dbbf8cc53f89f6ac096d363314333045c257c9c75cbd189ca2de0455744", size = 15864588, upload-time = "2026-04-21T17:11:44.936Z" }, + { url = "https://files.pythonhosted.org/packages/d7/46/1a4e1c66e96c1a3246ddf5403d122ac9b0a8d2b7e65730b9d6533ba7a6d3/mypy-1.20.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:803203d2b6ea644982c644895c2f78b28d0e208bba7b27d9b921e0ec5eb207c6", size = 16093956, upload-time = "2026-04-21T17:10:17.683Z" }, + { url = "https://files.pythonhosted.org/packages/5a/2c/78a8851264dec38cd736ca5b8bc9380674df0dd0be7792f538916157716c/mypy-1.20.2-cp314-cp314t-win_amd64.whl", hash = "sha256:9bcb8aa397ff0093c824182fd76a935a9ba7ad097fcbef80ae89bf6c1731d8ec", size = 12568661, upload-time = "2026-04-21T17:11:54.473Z" }, + { url = "https://files.pythonhosted.org/packages/83/01/cd7318aa03493322ce275a0e14f4f52b8896335e4e79d4fb8153a7ad2b77/mypy-1.20.2-cp314-cp314t-win_arm64.whl", hash = "sha256:e061b58443f1736f8a37c48978d7ab581636d6ab03e3d4f99e3fa90463bb9382", size = 10389240, upload-time = "2026-04-21T17:09:42.719Z" }, + { url = "https://files.pythonhosted.org/packages/28/9a/f23c163e25b11074188251b0b5a0342625fc1cdb6af604757174fa9acc9b/mypy-1.20.2-py3-none-any.whl", hash = "sha256:a94c5a76ab46c5e6257c7972b6c8cff0574201ca7dc05647e33e795d78680563", size = 2637314, upload-time = "2026-04-21T17:05:54.5Z" }, ] [[package]] @@ -745,11 +763,11 @@ wheels = [ [[package]] name = "pathspec" -version = "0.12.1" +version = "1.0.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fa/36/e27608899f9b8d4dff0617b2d9ab17ca5608956ca44461ac14ac48b44015/pathspec-1.0.4.tar.gz", hash = "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645", size = 131200, upload-time = "2026-01-27T03:59:46.938Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" }, + { url = "https://files.pythonhosted.org/packages/ef/3c/2c197d226f9ea224a9ab8d197933f9da0ae0aac5b6e0f884e2b8d9c8e9f7/pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723", size = 55206, upload-time = "2026-01-27T03:59:45.137Z" }, ] [[package]] @@ -845,7 +863,7 @@ wheels = [ [[package]] name = "pytest" -version = "9.0.2" +version = "9.0.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -854,9 +872,9 @@ dependencies = [ { name = "pluggy" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, + { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, ] [[package]] @@ -874,16 +892,16 @@ wheels = [ [[package]] name = "pytest-cov" -version = "7.0.0" +version = "7.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "coverage" }, { name = "pluggy" }, { name = "pytest" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b1/51/a849f96e117386044471c8ec2bd6cfebacda285da9525c9106aeb28da671/pytest_cov-7.1.0.tar.gz", hash = "sha256:30674f2b5f6351aa09702a9c8c364f6a01c27aae0c1366ae8016160d1efc56b2", size = 55592, upload-time = "2026-03-21T20:11:16.284Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" }, + { url = "https://files.pythonhosted.org/packages/9d/7a/d968e294073affff457b041c2be9868a40c1c71f4a35fcc1e45e5493067b/pytest_cov-7.1.0-py3-none-any.whl", hash = "sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678", size = 22876, upload-time = "2026-03-21T20:11:14.438Z" }, ] [[package]] @@ -1029,27 +1047,27 @@ wheels = [ [[package]] name = "ruff" -version = "0.15.7" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a1/22/9e4f66ee588588dc6c9af6a994e12d26e19efbe874d1a909d09a6dac7a59/ruff-0.15.7.tar.gz", hash = "sha256:04f1ae61fc20fe0b148617c324d9d009b5f63412c0b16474f3d5f1a1a665f7ac", size = 4601277, upload-time = "2026-03-19T16:26:22.605Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/41/2f/0b08ced94412af091807b6119ca03755d651d3d93a242682bf020189db94/ruff-0.15.7-py3-none-linux_armv6l.whl", hash = "sha256:a81cc5b6910fb7dfc7c32d20652e50fa05963f6e13ead3c5915c41ac5d16668e", size = 10489037, upload-time = "2026-03-19T16:26:32.47Z" }, - { url = "https://files.pythonhosted.org/packages/91/4a/82e0fa632e5c8b1eba5ee86ecd929e8ff327bbdbfb3c6ac5d81631bef605/ruff-0.15.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:722d165bd52403f3bdabc0ce9e41fc47070ac56d7a91b4e0d097b516a53a3477", size = 10955433, upload-time = "2026-03-19T16:27:00.205Z" }, - { url = "https://files.pythonhosted.org/packages/ab/10/12586735d0ff42526ad78c049bf51d7428618c8b5c467e72508c694119df/ruff-0.15.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7fbc2448094262552146cbe1b9643a92f66559d3761f1ad0656d4991491af49e", size = 10269302, upload-time = "2026-03-19T16:26:26.183Z" }, - { url = "https://files.pythonhosted.org/packages/eb/5d/32b5c44ccf149a26623671df49cbfbd0a0ae511ff3df9d9d2426966a8d57/ruff-0.15.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b39329b60eba44156d138275323cc726bbfbddcec3063da57caa8a8b1d50adf", size = 10607625, upload-time = "2026-03-19T16:27:03.263Z" }, - { url = "https://files.pythonhosted.org/packages/5d/f1/f0001cabe86173aaacb6eb9bb734aa0605f9a6aa6fa7d43cb49cbc4af9c9/ruff-0.15.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:87768c151808505f2bfc93ae44e5f9e7c8518943e5074f76ac21558ef5627c85", size = 10324743, upload-time = "2026-03-19T16:27:09.791Z" }, - { url = "https://files.pythonhosted.org/packages/7a/87/b8a8f3d56b8d848008559e7c9d8bf367934d5367f6d932ba779456e2f73b/ruff-0.15.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb0511670002c6c529ec66c0e30641c976c8963de26a113f3a30456b702468b0", size = 11138536, upload-time = "2026-03-19T16:27:06.101Z" }, - { url = "https://files.pythonhosted.org/packages/e4/f2/4fd0d05aab0c5934b2e1464784f85ba2eab9d54bffc53fb5430d1ed8b829/ruff-0.15.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0d19644f801849229db8345180a71bee5407b429dd217f853ec515e968a6912", size = 11994292, upload-time = "2026-03-19T16:26:48.718Z" }, - { url = "https://files.pythonhosted.org/packages/64/22/fc4483871e767e5e95d1622ad83dad5ebb830f762ed0420fde7dfa9d9b08/ruff-0.15.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4806d8e09ef5e84eb19ba833d0442f7e300b23fe3f0981cae159a248a10f0036", size = 11398981, upload-time = "2026-03-19T16:26:54.513Z" }, - { url = "https://files.pythonhosted.org/packages/b0/99/66f0343176d5eab02c3f7fcd2de7a8e0dd7a41f0d982bee56cd1c24db62b/ruff-0.15.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dce0896488562f09a27b9c91b1f58a097457143931f3c4d519690dea54e624c5", size = 11242422, upload-time = "2026-03-19T16:26:29.277Z" }, - { url = "https://files.pythonhosted.org/packages/5d/3a/a7060f145bfdcce4c987ea27788b30c60e2c81d6e9a65157ca8afe646328/ruff-0.15.7-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:1852ce241d2bc89e5dc823e03cff4ce73d816b5c6cdadd27dbfe7b03217d2a12", size = 11232158, upload-time = "2026-03-19T16:26:42.321Z" }, - { url = "https://files.pythonhosted.org/packages/a7/53/90fbb9e08b29c048c403558d3cdd0adf2668b02ce9d50602452e187cd4af/ruff-0.15.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5f3e4b221fb4bd293f79912fc5e93a9063ebd6d0dcbd528f91b89172a9b8436c", size = 10577861, upload-time = "2026-03-19T16:26:57.459Z" }, - { url = "https://files.pythonhosted.org/packages/2f/aa/5f486226538fe4d0f0439e2da1716e1acf895e2a232b26f2459c55f8ddad/ruff-0.15.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:b15e48602c9c1d9bdc504b472e90b90c97dc7d46c7028011ae67f3861ceba7b4", size = 10327310, upload-time = "2026-03-19T16:26:35.909Z" }, - { url = "https://files.pythonhosted.org/packages/99/9e/271afdffb81fe7bfc8c43ba079e9d96238f674380099457a74ccb3863857/ruff-0.15.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1b4705e0e85cedc74b0a23cf6a179dbb3df184cb227761979cc76c0440b5ab0d", size = 10840752, upload-time = "2026-03-19T16:26:45.723Z" }, - { url = "https://files.pythonhosted.org/packages/bf/29/a4ae78394f76c7759953c47884eb44de271b03a66634148d9f7d11e721bd/ruff-0.15.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:112c1fa316a558bb34319282c1200a8bf0495f1b735aeb78bfcb2991e6087580", size = 11336961, upload-time = "2026-03-19T16:26:39.076Z" }, - { url = "https://files.pythonhosted.org/packages/26/6b/8786ba5736562220d588a2f6653e6c17e90c59ced34a2d7b512ef8956103/ruff-0.15.7-py3-none-win32.whl", hash = "sha256:6d39e2d3505b082323352f733599f28169d12e891f7dd407f2d4f54b4c2886de", size = 10582538, upload-time = "2026-03-19T16:26:15.992Z" }, - { url = "https://files.pythonhosted.org/packages/2b/e9/346d4d3fffc6871125e877dae8d9a1966b254fbd92a50f8561078b88b099/ruff-0.15.7-py3-none-win_amd64.whl", hash = "sha256:4d53d712ddebcd7dace1bc395367aec12c057aacfe9adbb6d832302575f4d3a1", size = 11755839, upload-time = "2026-03-19T16:26:19.897Z" }, - { url = "https://files.pythonhosted.org/packages/8f/e8/726643a3ea68c727da31570bde48c7a10f1aa60eddd628d94078fec586ff/ruff-0.15.7-py3-none-win_arm64.whl", hash = "sha256:18e8d73f1c3fdf27931497972250340f92e8c861722161a9caeb89a58ead6ed2", size = 11023304, upload-time = "2026-03-19T16:26:51.669Z" }, +version = "0.15.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/8d/192f3d7103816158dfd5ea50d098ef2aec19194e6cbccd4b3485bdb2eb2d/ruff-0.15.11.tar.gz", hash = "sha256:f092b21708bf0e7437ce9ada249dfe688ff9a0954fc94abab05dcea7dcd29c33", size = 4637264, upload-time = "2026-04-16T18:46:26.58Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/1e/6aca3427f751295ab011828e15e9bf452200ac74484f1db4be0197b8170b/ruff-0.15.11-py3-none-linux_armv6l.whl", hash = "sha256:e927cfff503135c558eb581a0c9792264aae9507904eb27809cdcff2f2c847b7", size = 10607943, upload-time = "2026-04-16T18:46:05.967Z" }, + { url = "https://files.pythonhosted.org/packages/e7/26/1341c262e74f36d4e84f3d6f4df0ac68cd53331a66bfc5080daa17c84c0b/ruff-0.15.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:7a1b5b2938d8f890b76084d4fa843604d787a912541eae85fd7e233398bbb73e", size = 10988592, upload-time = "2026-04-16T18:46:00.742Z" }, + { url = "https://files.pythonhosted.org/packages/03/71/850b1d6ffa9564fbb6740429bad53df1094082fe515c8c1e74b6d8d05f18/ruff-0.15.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d4176f3d194afbdaee6e41b9ccb1a2c287dba8700047df474abfbe773825d1cb", size = 10338501, upload-time = "2026-04-16T18:46:03.723Z" }, + { url = "https://files.pythonhosted.org/packages/f2/11/cc1284d3e298c45a817a6aadb6c3e1d70b45c9b36d8d9cce3387b495a03a/ruff-0.15.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b17c886fb88203ced3afe7f14e8d5ae96e9d2f4ccc0ee66aa19f2c2675a27e4", size = 10670693, upload-time = "2026-04-16T18:46:41.941Z" }, + { url = "https://files.pythonhosted.org/packages/ce/9e/f8288b034ab72b371513c13f9a41d9ba3effac54e24bfb467b007daee2ca/ruff-0.15.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:49fafa220220afe7758a487b048de4c8f9f767f37dfefad46b9dd06759d003eb", size = 10416177, upload-time = "2026-04-16T18:46:21.717Z" }, + { url = "https://files.pythonhosted.org/packages/85/71/504d79abfd3d92532ba6bbe3d1c19fada03e494332a59e37c7c2dabae427/ruff-0.15.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2ab8427e74a00d93b8bda1307b1e60970d40f304af38bccb218e056c220120d", size = 11221886, upload-time = "2026-04-16T18:46:15.086Z" }, + { url = "https://files.pythonhosted.org/packages/43/5a/947e6ab7a5ad603d65b474be15a4cbc6d29832db5d762cd142e4e3a74164/ruff-0.15.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:195072c0c8e1fc8f940652073df082e37a5d9cb43b4ab1e4d0566ab8977a13b7", size = 12075183, upload-time = "2026-04-16T18:46:07.944Z" }, + { url = "https://files.pythonhosted.org/packages/9f/a1/0b7bb6268775fdd3a0818aee8efd8f5b4e231d24dd4d528ced2534023182/ruff-0.15.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a3a0996d486af3920dec930a2e7daed4847dfc12649b537a9335585ada163e9e", size = 11516575, upload-time = "2026-04-16T18:46:31.687Z" }, + { url = "https://files.pythonhosted.org/packages/30/c3/bb5168fc4d233cc06e95f482770d0f3c87945a0cd9f614b90ea8dc2f2833/ruff-0.15.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bef2cb556d509259f1fe440bb9cd33c756222cf0a7afe90d15edf0866702431", size = 11306537, upload-time = "2026-04-16T18:46:36.988Z" }, + { url = "https://files.pythonhosted.org/packages/e4/92/4cfae6441f3967317946f3b788136eecf093729b94d6561f963ed810c82e/ruff-0.15.11-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:030d921a836d7d4a12cf6e8d984a88b66094ccb0e0f17ddd55067c331191bf19", size = 11296813, upload-time = "2026-04-16T18:46:24.182Z" }, + { url = "https://files.pythonhosted.org/packages/43/26/972784c5dde8313acde8ac71ba8ac65475b85db4a2352a76c9934361f9bc/ruff-0.15.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0e783b599b4577788dbbb66b9addcef87e9a8832f4ce0c19e34bf55543a2f890", size = 10633136, upload-time = "2026-04-16T18:46:39.802Z" }, + { url = "https://files.pythonhosted.org/packages/5b/53/3985a4f185020c2f367f2e08a103032e12564829742a1b417980ce1514a0/ruff-0.15.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ae90592246625ba4a34349d68ec28d4400d75182b71baa196ddb9f82db025ef5", size = 10424701, upload-time = "2026-04-16T18:46:10.381Z" }, + { url = "https://files.pythonhosted.org/packages/d3/57/bf0dfb32241b56c83bb663a826133da4bf17f682ba8c096973065f6e6a68/ruff-0.15.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1f111d62e3c983ed20e0ca2e800f8d77433a5b1161947df99a5c2a3fb60514f0", size = 10873887, upload-time = "2026-04-16T18:46:29.157Z" }, + { url = "https://files.pythonhosted.org/packages/02/05/e48076b2a57dc33ee8c7a957296f97c744ca891a8ffb4ffb1aaa3b3f517d/ruff-0.15.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:06f483d6646f59eaffba9ae30956370d3a886625f511a3108994000480621d1c", size = 11404316, upload-time = "2026-04-16T18:46:19.462Z" }, + { url = "https://files.pythonhosted.org/packages/88/27/0195d15fe7a897cbcba0904792c4b7c9fdd958456c3a17d2ea6093716a9a/ruff-0.15.11-py3-none-win32.whl", hash = "sha256:476a2aa56b7da0b73a3ee80b6b2f0e19cce544245479adde7baa65466664d5f3", size = 10655535, upload-time = "2026-04-16T18:46:12.47Z" }, + { url = "https://files.pythonhosted.org/packages/3a/5e/c927b325bd4c1d3620211a4b96f47864633199feed60fa936025ab27e090/ruff-0.15.11-py3-none-win_amd64.whl", hash = "sha256:8b6756d88d7e234fb0c98c91511aae3cd519d5e3ed271cae31b20f39cb2a12a3", size = 11779692, upload-time = "2026-04-16T18:46:17.268Z" }, + { url = "https://files.pythonhosted.org/packages/63/b6/aeadee5443e49baa2facd51131159fd6301cc4ccfc1541e4df7b021c37dd/ruff-0.15.11-py3-none-win_arm64.whl", hash = "sha256:063fed18cc1bbe0ee7393957284a6fe8b588c6a406a285af3ee3f46da2391ee4", size = 11032614, upload-time = "2026-04-16T18:46:34.487Z" }, ] [[package]] @@ -1134,11 +1152,11 @@ wheels = [ [[package]] name = "types-psutil" -version = "7.2.2.20260130" +version = "7.2.2.20260408" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/69/14/fc5fb0a6ddfadf68c27e254a02ececd4d5c7fdb0efcb7e7e917a183497fb/types_psutil-7.2.2.20260130.tar.gz", hash = "sha256:15b0ab69c52841cf9ce3c383e8480c620a4d13d6a8e22b16978ebddac5590950", size = 26535, upload-time = "2026-01-30T03:58:14.116Z" } +sdist = { url = "https://files.pythonhosted.org/packages/44/14/279fd5defebbd560ede04aecd38f7651cccee7336f2264d0889d8c9a9d43/types_psutil-7.2.2.20260408.tar.gz", hash = "sha256:e8053450685965b8cd52afb62569073d00ea9967ae78bb45dff5f606847f97f2", size = 26556, upload-time = "2026-04-08T04:27:44.349Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/17/d7/60974b7e31545d3768d1770c5fe6e093182c3bfd819429b33133ba6b3e89/types_psutil-7.2.2.20260130-py3-none-any.whl", hash = "sha256:15523a3caa7b3ff03ac7f9b78a6470a59f88f48df1d74a39e70e06d2a99107da", size = 32876, upload-time = "2026-01-30T03:58:13.172Z" }, + { url = "https://files.pythonhosted.org/packages/af/40/2fd92a4a1ee088c4dbcc44c977908d9869838d9cd2a2fa2e001352f56694/types_psutil-7.2.2.20260408-py3-none-any.whl", hash = "sha256:0c334f6f6bc9e9c24fca5c7d1f0b6971c961a0a2e3956dc5ce704722c01f9762", size = 32861, upload-time = "2026-04-08T04:27:42.929Z" }, ] [[package]]