diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5b6932c..f669829 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,19 +2,22 @@ name: Tests on: push: - branches: [ lwt, test-suite ] + branches: [ master ] pull_request: - branches: [ lwt ] + branches: [ master ] + +permissions: + contents: read jobs: test: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 - name: Set up OCaml - uses: ocaml/setup-ocaml@v3 + uses: ocaml/setup-ocaml@e32b06a3e831ff2fbc6f08cf35be2085e3918014 # v3.6.1 with: ocaml-compiler: 5.1.1 dune-cache: true @@ -28,16 +31,33 @@ jobs: opam install . --deps-only --update-invariant npm install --no-save typescript browserify pug-lexer pug-parser pug-walk + - name: Cache QuickJS + id: cache-quickjs + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + path: quickjs + key: quickjs-2021-03-27-${{ runner.os }} + - name: Install QuickJS + if: steps.cache-quickjs.outputs.cache-hit != 'true' run: | curl -fsSL https://bellard.org/quickjs/quickjs-2021-03-27.tar.xz -o quickjs.tar.xz tar xvf quickjs.tar.xz && rm quickjs.tar.xz mv quickjs-2021-03-27 quickjs cd quickjs && make + - name: Cache Flow + id: cache-flow + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + path: flow + key: flow-v0.183.1 + - name: Install Flow run: | - git clone --branch v0.183.1 --depth 1 https://github.com/facebook/flow.git flow + if [ ! -d flow ]; then + git clone --branch v0.183.1 --depth 1 https://github.com/facebook/flow.git flow + fi ln -s "$(pwd)/flow/src/parser" src/flow_parser ln -s "$(pwd)/flow/src/third-party/sedlex" src/sedlex ln -s "$(pwd)/flow/src/hack_forked/utils/collections" src/collections diff --git a/AGENTS.md b/AGENTS.md index 5228f22..0a963ee 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,6 +1,6 @@ # Agent Information - String Extractor -This repository contains an OCaml-based internationalization (i18n) string extraction tool. It parses source files (JS, TS, Vue, Pug, HTML) and extracts strings for translation management. +This repository contains an OCaml-based internationalization (i18n) string extraction tool. It parses source files (JS, TS, Vue, Pug, HTML, Astro) and extracts strings for translation management. ## Documentation @@ -10,69 +10,91 @@ This repository contains an OCaml-based internationalization (i18n) string extra - Looking for specific functionality or function definitions before searching. - **[DEVELOPMENT.md](DEVELOPMENT.md)**: Contains instructions for environment setup, build processes for various platforms, and release workflows. **Read this file first** when: - - Setting up the development environment or installing dependencies (OCaml, JS, QuickJS). + - Setting up the development environment or installing dependencies (OCaml, JS, QuickJS, Flow). - Building the project for development or release. - Executing the tool for manual verification or testing. - Managing version numbers or release artifacts. ## Project Overview -- **Language**: OCaml (5.1.1) with some C++ (QuickJS bridge) and JavaScript (parsers via Browserify). +- **Language**: OCaml (5.1.1 in CI) with some C++ (QuickJS bridge) and JavaScript (parsers via Browserify). - **Architecture**: - - `src/cli/`: Main entry point, command-line interface, and output generation logic. + - `src/cli/`: Main entry point (`strings.ml`), command-line interface, output generation (`.strings`/`.json`), and Vue file splitting (`vue.ml`). - `src/parsing/`: OCaml parsers using `Angstrom` for custom formats and `Flow_parser` for JS. - `src/quickjs/`: Bridge to QuickJS to run JavaScript-based parsers (TypeScript/Pug) from OCaml. - `src/utils/`: Common utilities for collection, timing, and I/O. -- **Key Libraries**: `Core`, `Lwt` (concurrency), `Angstrom` (parsing), `Yojson`, `Ppx_jane`. +- **Key Libraries**: `Core`, `Lwt` (concurrency), `Angstrom`, `Yojson`, `Ppx_jane`. +- **Active branch context**: This codebase is the **Lwt** variant (an Eio port exists on other branches). CI runs on branches `lwt` and `test-suite`. Concurrency code uses `Lwt.Syntax`/`Lwt_io`, and `Strings.parse` returns `string Core.String.Table.t Lwt.t`. ## Essential Commands ### Build - **Development build**: `dune build src/cli/strings.exe` - **Watch mode**: `dune build src/cli/strings.exe -w` -- **Release build (MacOS)**: `DUNE_PROFILE=release dune build src/cli/strings.exe` -- **Full release cycle**: See `DEVELOPMENT.md` for `cp`, `strip`, and Docker commands. +- **Release build**: `DUNE_PROFILE=release dune build src/cli/strings.exe` +- **Full release cycle** (strip, Docker/Linux): see `DEVELOPMENT.md`. +- If `dune` is not on PATH, run `eval $(opam env)` first (or prefix with `opam exec --`). + +### Test +```sh +eval $(opam env) +dune runtest tests/ +``` +This runs both the inline unit tests (`tests/test_runner.ml`) and an integration test defined as a `runtest` rule in `tests/dune`, which builds the CLI, runs it against `tests/fixtures/` in a temp directory, and verifies that existing French translations are preserved and `MISSING TRANSLATION` markers are emitted. ### Run - After building: `./_build/default/src/cli/strings.exe [directory-to-extract-from]` -- The CLI expects to be run from the root of a project containing a `strings/` directory (or it will create one if a `.git` folder is present). - -### Installation (Dev Setup) -Refer to `DEVELOPMENT.md` for specific `opam` and `npm` setup steps, as the project has several external dependencies (Flow, QuickJS, pug-lexer, etc.). +- The CLI **fails with "This program must be run from the root of your project"** unless the working directory contains either a `strings/` directory or a `.git` directory. +- Output directory defaults to `strings/`; override with `--output DIR` (`-o`). +- All long flags require the full `--` form (`~full_flag_required` is set everywhere). + +### CLI Flags (actual, from `src/cli/strings.ml`) +- `--output DIR` / `-o`: change output directory (default `strings`). +- `--ts`: treat scripts in HTML/Pug element attributes as TypeScript. +- `--slow-pug` / `--sp`: use the official Pug parser via QuickJS instead of the fast native OCaml one. +- `--debug-pug` / `--dp`, `--debug-html` / `--dh`, and `--debug-astro` / `--da`: debug template parsing (mutually exclusive). The first two target `.vue` files; `--debug-astro` targets `.astro` files. +- There is **no** `--show-debugging` flag. + +## Setup Gotchas (things that break builds) + +- **Flow symlinks**: `src/flow_parser`, `src/sedlex`, and `src/collections` are symlinks into a cloned `flow` repo (v0.183.1) at the project root. If they're missing or dangling, builds fail with module errors. Recreate per `DEVELOPMENT.md`. +- **QuickJS dependency**: Requires a compiled `quickjs` directory (quickjs-2021-03-27, `make` run) at the project root. `dune` rules in `src/quickjs/dune` copy `quickjs.h`, `libquickjs.a`, and invoke `quickjs/qjsc` from there. +- **Generated runtime**: `src/quickjs/runtime.h` is generated at build time from `src/quickjs/parsers.js` via `npx browserify` then `qjsc`. Requires `npm install --no-save typescript browserify pug-lexer pug-parser pug-walk` at the repo root. +- **libomp**: `src/quickjs/dune` searches a hardcoded list of paths for `libomp.a`/`libgomp.a` (Homebrew Cellar paths on macOS, `/usr/lib/...` on Linux). If your system has it elsewhere, the build fails with "Could not find libomp.a" — add your path to the list in `src/quickjs/dune`. +- **Link flags**: Platform/profile-specific link flags live in `src/cli/link_flags.{system}.{dev,release}.dune` (the Linux dev one is just `()`). A missing file for your platform/profile combination breaks the build. +- **Version number**: `let version = "x.y.z"` in `src/cli/strings.ml` must be bumped manually for releases. ## Code Conventions & Patterns ### Parsing Strategy 1. **Direct Parsers**: Simple formats like `.strings`, `HTML`, and basic `Vue` tags are parsed using `Angstrom` in `src/parsing/`. -2. **JS/TS Parsing**: - - Javascript uses `Flow_parser` and a custom AST walker in `src/parsing/js_ast.ml`. +2. **JS/TS Parsing**: + - JavaScript uses `Flow_parser` and a custom AST walker in `src/parsing/js_ast.ml`. - TypeScript uses the official TS parser running inside QuickJS (`src/quickjs/`). -3. **Pug Parsing**: Has a "fast" OCaml implementation (`src/parsing/pug.ml`) and a "slow" official Pug implementation via QuickJS (`src/quickjs/`). +3. **Pug Parsing**: Has a "fast" OCaml implementation (`src/parsing/pug.ml`) and a "slow" official Pug implementation via QuickJS (enabled with `--slow-pug`). +4. **Astro Parsing**: Native Angstrom scanner (`src/parsing/astro.ml`) segments `.astro` files into frontmatter, ``/`` blocks, `{...}` expressions, and ` +``` + +With a Pug template: + +```vue + +``` + +``` +/* Home.vue */ +"Good morning, {name}!" = "Good morning, {name}!"; + +/* Home.vue */ +"Logout" = "Logout"; + +/* Widget.vue */ +"You and {count} other members are contributing." = "You and {count} other members are contributing."; +``` + +Note how `{name}` and `{count}` placeholders are part of the extracted string: the translator moves them around freely, and the runtime `L()` / `i18n` implementation substitutes the values. + +Plain `.html` and `.pug` files are scanned the same way as the corresponding Vue template languages. + +### Astro + +The extractor also scans `.astro` files ([Astro framework](https://astro.build)). It extracts: + +- The text inside `...` (or `...`) components, used as the translation key — including components nested inside JSX expressions such as `{cond && ...}` or `{items.map(() => ...)}`. +- `L('...')` calls found in the frontmatter (`--- ... ---`), in `{...}` expressions (including JSX), in `args={...}`, and in `" in + [%test_eq: string list] (Queue.to_list collector.possible_scripts) [ "\nconsole.log(L('Hey'))\n" ] + +let%test_unit "astro: style block is ignored" = + let collector = test_collect "" in + [%test_eq: string list] (Queue.to_list collector.possible_scripts) [] + +let%test_unit "astro: html comments are ignored" = + let collector = test_collect "Yes" in + [%test_eq: string list] (Queue.to_list collector.strings) [ "Yes" ] + +let%test_unit "astro: self-closing I18n" = + let collector = test_collect "" in + [%test_eq: string list] (Queue.to_list collector.strings) []; + [%test_eq: string list] (Queue.to_list collector.possible_scripts) [ "({ a: 1 })" ] + +let%test_unit "astro: full document" = + let source = + {|--- +import I18n from '../components/I18n.astro' +const title = L('Create a group') +--- + +

{L('Welcome to Group Income')}

+ +Logout + + + Yes, I want to {strong_}delete {name} permanently{_strong}. + +|} + in + let collector = test_collect source in + [%test_eq: string list] + (List.sort ~compare:String.compare (Queue.to_list collector.strings)) + [ "Logout"; "Yes, I want to {strong_}delete {name} permanently{_strong}." ]; + [%test_eq: int] (Queue.length collector.warnings) 0; + [%test_eq: bool] + (List.exists (Queue.to_list collector.possible_scripts) ~f:(fun s -> + String.is_substring s ~substring:"L('Create a group')" ) ) + true + +let%test_unit "astro: I18n nested in a mapped JSX expression" = + let collector = test_collect "{items.map(item => Mapped string)}" in + [%test_eq: string list] (Queue.to_list collector.strings) [ "Mapped string" ]; + [%test_eq: int] (Queue.length collector.warnings) 0 + +let%test_unit "astro: I18n nested in a conditional JSX expression" = + let collector = test_collect "{cond &&

Hello {name}!

}" in + [%test_eq: string list] (Queue.to_list collector.strings) [ "Hello {name}!" ]; + [%test_eq: int] (Queue.length collector.warnings) 1 + +let%test_unit "astro: I18n nested two expression levels deep" = + let collector = test_collect "{cond &&
{items.map(() => Deep)}
}" in + [%test_eq: string list] (Queue.to_list collector.strings) [ "Deep" ] + +let%test_unit "astro: nested I18n args expression is captured" = + let collector = + test_collect "{cond && Nested {b_}x{_b}}" + in + [%test_eq: string list] (Queue.to_list collector.strings) [ "Nested {b_}x{_b}" ]; + [%test_eq: bool] + (List.exists (Queue.to_list collector.possible_scripts) ~f:(fun s -> + String.is_substring s ~substring:"LTags(\"b\")" ) ) + true + +let%test_unit "astro: expression without I18n is not rescanned into strings" = + let collector = test_collect "{cond &&

{L('Inner JSX string')}

}" in + [%test_eq: string list] (Queue.to_list collector.strings) []; + [%test_eq: string list] + (Queue.to_list collector.possible_scripts) + [ "(cond &&

{L('Inner JSX string')}

)" ] + +let%test_unit "astro: regex literal with unbalanced closing brace" = + let collector = test_collect "

{x.replace(/}/g, '')}

" in + [%test_eq: string list] (Queue.to_list collector.possible_scripts) [ "(x.replace(/}/g, ''))" ] + +let%test_unit "astro: regex literal with unbalanced opening brace" = + let collector = test_collect "{x.replace(/{/g, '')}

{L('After')}

" in + [%test_eq: string list] + (Queue.to_list collector.possible_scripts) + [ "(x.replace(/{/g, ''))"; "(L('After'))" ] + +let%test_unit "astro: regex character class containing slash and brace" = + let collector = test_collect "{x.split(/[/}]/)}" in + [%test_eq: string list] (Queue.to_list collector.possible_scripts) [ "(x.split(/[/}]/))" ] + +let%test_unit "astro: regex with escaped slash and brace" = + let collector = test_collect {|{x.replace(/\/\}/g, '')}|} in + [%test_eq: string list] (Queue.to_list collector.possible_scripts) [ {|(x.replace(/\/\}/g, ''))|} ] + +let%test_unit "astro: regex after assignment" = + let collector = test_collect "{(() => { const re = /}/; return x.replace(re, '') })()}" in + [%test_eq: string list] + (Queue.to_list collector.possible_scripts) + [ "((() => { const re = /}/; return x.replace(re, '') })())" ] + +let%test_unit "astro: division is not treated as a regex" = + let collector = test_collect "{a / b}{x.length / 2 + y.length / 2}" in + [%test_eq: string list] + (Queue.to_list collector.possible_scripts) + [ "(a / b)"; "(x.length / 2 + y.length / 2)" ] + +let%test_unit "astro: define:vars expression on a script tag" = + let collector = test_collect "" in + [%test_eq: string list] + (Queue.to_list collector.possible_scripts) + [ "alert(message)"; "({ message: L('Hello') })" ] + +let%test_unit "astro: define:vars expression on a style tag" = + let collector = + test_collect "" + in + [%test_eq: string list] (Queue.to_list collector.possible_scripts) [ "({ color: theme })" ] + +let%test_unit "astro: expression on a self-closing script tag" = + let collector = test_collect " diff --git a/tests/test_runner.ml b/tests/test_runner.ml index f03d8d8..c5d6333 100644 --- a/tests/test_runner.ml +++ b/tests/test_runner.ml @@ -6,14 +6,16 @@ let%test_unit "js_extraction_basic" = let source = "L('Hello World'); L('Foo Bar');" in Js.extract_to_collector collector source; let strings = Queue.to_list collector.strings in - [%test_eq: string list] (List.sort strings ~compare:String.compare) (List.sort ["Hello World"; "Foo Bar"] ~compare:String.compare) + [%test_eq: string list] + (List.sort strings ~compare:String.compare) + (List.sort [ "Hello World"; "Foo Bar" ] ~compare:String.compare) let%test_unit "js_extraction_nested" = let collector = Utils.Collector.create ~path:"test.js" in let source = "function test() { if (true) { return L('Nested'); } }" in Js.extract_to_collector collector source; let strings = Queue.to_list collector.strings in - [%test_eq: string list] strings ["Nested"] + [%test_eq: string list] strings [ "Nested" ] let%test_unit "js_extraction_no_match" = let collector = Utils.Collector.create ~path:"test.js" in @@ -23,7 +25,8 @@ let%test_unit "js_extraction_no_match" = [%test_eq: string list] strings [] let%test_unit "strings_parsing" = - Lwt_main.run @@ ( + Lwt_main.run + @@ let path = "test.strings" in let content = {| /* Comment */ @@ -35,23 +38,26 @@ let%test_unit "strings_parsing" = let+ table = Strings.parse ~path ic in [%test_eq: string option] (Hashtbl.find table "Hello") (Some "Bonjour"); [%test_eq: string option] (Hashtbl.find table "World") (Some "Monde"); - [%test_eq: string option] (Hashtbl.find table "Missing") None) + [%test_eq: string option] (Hashtbl.find table "Missing") None let%test_unit "french_strings_parsing" = - Lwt_main.run @@ ( + Lwt_main.run + @@ let path = "french.strings" in - let content = {| + let content = + {| /* Accented characters */ "Logout" = "Déconnexion"; "You and {count} others" = "Vous et {count} autres"; "Settings" = "Paramètres"; -|} in +|} + in let ic = Lwt_io.of_bytes ~mode:Lwt_io.input @@ Lwt_bytes.of_string content in let open Lwt.Syntax in let+ table = Strings.parse ~path ic in [%test_eq: string option] (Hashtbl.find table "Logout") (Some "Déconnexion"); [%test_eq: string option] (Hashtbl.find table "You and {count} others") (Some "Vous et {count} autres"); - [%test_eq: string option] (Hashtbl.find table "Settings") (Some "Paramètres")) + [%test_eq: string option] (Hashtbl.find table "Settings") (Some "Paramètres") let%test_unit "html_extraction" = let collector = Utils.Collector.create ~path:"test.html" in @@ -59,13 +65,53 @@ let%test_unit "html_extraction" = let on_ok parsed = Parsing.Html.collect collector parsed in Parsing.Basic.exec_parser ~on_ok Parsing.Html.parser ~path:"test.html" ~language_name:"HTML" source; let strings = Queue.to_list collector.strings in - [%test_eq: string list] strings ["Hello HTML"] + [%test_eq: string list] strings [ "Hello HTML" ] let%test_unit "pug_extraction" = let collector = Utils.Collector.create ~path:"test.pug" in let source = "i18n Hello Pug" in let string_parsers = Parsing.Basic.make_string_parsers () in let on_ok parsed = Parsing.Pug.collect collector parsed in - Parsing.Basic.exec_parser ~on_ok (Parsing.Pug.parser string_parsers) ~path:"test.pug" ~language_name:"Pug" source; + Parsing.Basic.exec_parser ~on_ok (Parsing.Pug.parser string_parsers) ~path:"test.pug" + ~language_name:"Pug" source; let strings = Queue.to_list collector.strings in - [%test_eq: string list] strings ["Hello Pug"] + [%test_eq: string list] strings [ "Hello Pug" ] + +let extract_astro source = + let collector = Utils.Collector.create ~path:"test.astro" in + let on_ok parsed = Parsing.Astro.collect collector parsed in + Parsing.Basic.exec_parser ~on_ok (Parsing.Astro.parser ()) ~path:"test.astro" ~language_name:"Astro" + source; + collector + +let%test_unit "astro_i18n_extraction" = + let collector = extract_astro "Hello Astro" in + [%test_eq: string list] (Queue.to_list collector.strings) [ "Hello Astro" ]; + [%test_eq: int] (Queue.length collector.warnings) 0 + +let%test_unit "astro_lowercase_i18n_extraction" = + let collector = extract_astro "Hello Astro" in + [%test_eq: string list] (Queue.to_list collector.strings) [ "Hello Astro" ] + +let%test_unit "astro_missing_is_raw_warning" = + let collector = extract_astro "Hello {name}!" in + [%test_eq: string list] (Queue.to_list collector.strings) [ "Hello {name}!" ]; + [%test_eq: int] (Queue.length collector.warnings) 1 + +let%test_unit "astro_frontmatter_and_expression_scripts" = + let collector = + extract_astro "---\nconst a = L('From Frontmatter')\n---\n

{L('From Expression')}

" + in + let scripts = Queue.to_list collector.possible_scripts in + [%test_eq: bool] (List.exists scripts ~f:(String.is_substring ~substring:"L('From Frontmatter')")) true; + [%test_eq: bool] (List.exists scripts ~f:(String.is_substring ~substring:"L('From Expression')")) true + +let%test_unit "astro_args_and_script_block" = + let collector = + extract_astro + "Hi" + in + [%test_eq: string list] (Queue.to_list collector.strings) [ "Hi" ]; + let scripts = Queue.to_list collector.possible_scripts in + [%test_eq: bool] (List.exists scripts ~f:(String.is_substring ~substring:"LTags(\"strong\")")) true; + [%test_eq: bool] (List.exists scripts ~f:(String.is_substring ~substring:"L('From Script')")) true