From 7d52b0aa1276fe17e2c1663d120e651e3b2d2acf Mon Sep 17 00:00:00 2001 From: "Marko K. S." Date: Fri, 1 May 2026 01:02:31 +0200 Subject: [PATCH] Refactor project into yini_parser package and implement validation policy - Reorganize source code into a unified `yini_parser` package structure - Implement `YiniValidator` to support strict and lenient parsing modes - Add detection and handling for duplicate keys, duplicate sections, and name collisions - Introduce custom `YiniParseError` and `YiniParseWarning` classes for better error reporting - Expand the test suite to cover value parsing, section nesting, and conflict resolution - Update `Taskfile.yml` with improved cleanup, typechecking, and ANTLR generation tasks --- .github/workflows/run-tests.yml | 1 + .gitignore | 12 +- README.md | 7 + Taskfile.yml | 54 +- src/api/load.py | 50 -- src/core/VisitorInterp.py | 41 -- src/core/parser_engine.py | 3 - src/core/yini_builder_visitor.py | 300 ---------- src/dev.py | 54 ++ src/grammar/__init__.py | 0 src/grammar/generated/__init__.py | 0 src/main.py | 2 +- src/yini_parser/__init__.py | 14 + src/yini_parser/api/__init__.py | 13 + src/{ => yini_parser}/api/errors.py | 2 +- src/yini_parser/api/load.py | 55 ++ src/yini_parser/api/warnings.py | 53 ++ src/yini_parser/core/__init__.py | 1 + src/yini_parser/core/validator.py | 157 ++++++ src/yini_parser/core/yini_builder_visitor.py | 522 ++++++++++++++++++ src/{ => yini_parser}/grammar/README.md | 0 src/yini_parser/grammar/__init__.py | 1 + .../grammar/generated/YiniLexer.interp | 4 +- .../grammar/generated/YiniLexer.py | 8 +- .../grammar/generated/YiniLexer.tokens | 2 +- .../grammar/generated/YiniParser.interp | 2 +- .../grammar/generated/YiniParser.py | 7 +- .../grammar/generated/YiniParser.tokens | 2 +- .../grammar/generated/YiniParserVisitor.py | 2 +- tests/test_conflicts.py | 227 ++++++++ ..._json.py => test_fixtures_against_json.py} | 2 +- tests/test_load.py | 2 +- tests/test_sections.py | 179 ++++++ .../__init__.py => tests/test_strict_mode.py | 0 tests/test_values.py | 193 +++++++ 35 files changed, 1543 insertions(+), 429 deletions(-) delete mode 100644 src/api/load.py delete mode 100644 src/core/VisitorInterp.py delete mode 100644 src/core/parser_engine.py delete mode 100644 src/core/yini_builder_visitor.py create mode 100644 src/dev.py delete mode 100644 src/grammar/__init__.py delete mode 100644 src/grammar/generated/__init__.py create mode 100644 src/yini_parser/__init__.py create mode 100644 src/yini_parser/api/__init__.py rename src/{ => yini_parser}/api/errors.py (94%) create mode 100644 src/yini_parser/api/load.py create mode 100644 src/yini_parser/api/warnings.py create mode 100644 src/yini_parser/core/__init__.py create mode 100644 src/yini_parser/core/validator.py create mode 100644 src/yini_parser/core/yini_builder_visitor.py rename src/{ => yini_parser}/grammar/README.md (100%) create mode 100644 src/yini_parser/grammar/__init__.py rename src/{ => yini_parser}/grammar/generated/YiniLexer.interp (99%) rename src/{ => yini_parser}/grammar/generated/YiniLexer.py (99%) rename src/{ => yini_parser}/grammar/generated/YiniLexer.tokens (96%) rename src/{ => yini_parser}/grammar/generated/YiniParser.interp (99%) rename src/{ => yini_parser}/grammar/generated/YiniParser.py (99%) rename src/{ => yini_parser}/grammar/generated/YiniParser.tokens (96%) rename src/{ => yini_parser}/grammar/generated/YiniParserVisitor.py (98%) create mode 100644 tests/test_conflicts.py rename tests/{test_samples_against_json.py => test_fixtures_against_json.py} (97%) create mode 100644 tests/test_sections.py rename src/core/__init__.py => tests/test_strict_mode.py (100%) create mode 100644 tests/test_values.py diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index f82e1f7..42a55b5 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -1,3 +1,4 @@ +# .github/workflows/run-tests.yml name: Run tests on: diff --git a/.gitignore b/.gitignore index fd0c54a..537234a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,19 @@ # Ignore ENV file. .env -# Python bytecode / caches +# Python bytecode / caches. __pycache__/ -*.py[cod] *$py.class +# "*.py[cod]"" covers "*.pyc" too. +*.py[cod] + +.pytest_cache/ +.mypy_cache/ + +# Ignore Python linting cache. +.ruff_cache/ + # Ignore compiled output (containing all emitted code and types). dist/ diff --git a/README.md b/README.md index be95f8d..38379e8 100644 --- a/README.md +++ b/README.md @@ -5,3 +5,10 @@ from yini_parser import load data = load("sample/basic.yini") print(data["App"]["name"]) +## Tests + +In the dir `tests` contains some small but focused implementation-local test suite, with: + +- Tests for the public API. +- Some few key semantic tests. +- Some smoke/golden tests. diff --git a/Taskfile.yml b/Taskfile.yml index 3e7da07..4da2747 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -7,13 +7,13 @@ version: '3' vars: - GRAMMAR_BASE: ./grammar/v1.0.0-rc.5 + GRAMMAR_BASE: ./grammar/v1.0.0-rc.5x # BASE_FILE: YiniLexerBase.py # Target/language specific base file. PARSER_FILE: "{{.GRAMMAR_BASE}}/YiniParser.g4" LEXER_FILE: "{{.GRAMMAR_BASE}}/YiniLexer.g4" ANTLR_JAR: ./libs/antlr4/antlr4-4.13.2-complete.jar - DIR_OUTPUT: ./src/grammar/generated + DIR_OUTPUT: ./src/yini_parser/grammar/generated START_RULE: yini tasks: @@ -32,6 +32,11 @@ tasks: # - python src/main.py ./sample/basic.yini - python src/main.py sample/basic.yini + dev: + desc: Run development entrypoint + cmds: + - python src/dev.py + install: desc: Install Python packages cmds: @@ -42,7 +47,7 @@ tasks: cmds: - python -m pip install -r requirements-dev.txt - generate-antlr: + antlr: desc: Generate sources for the grammar (Windows only) platforms: [windows] cmds: @@ -97,7 +102,7 @@ tasks: # -lib {{.DIR_OUTPUT}} # {{.PARSER_FILE}} - update-generate: + update-antlr: desc: Generate using antlr4 wrapper (Windows only) platforms: [windows] cmds: @@ -133,16 +138,35 @@ tasks: typecheck: desc: Run mypy type checking cmds: -#- python -m mypy src - - python -m mypy src/main.py src/core - - cls-gen: -# desc: Delete generated output directory contents (Windows only) -# platforms: [windows] -# cmds: -# - del "gen-output" - desc: Delete generated output directory contents (Windows only) + #- python -m mypy src + #- python -m mypy src/main.py src/yini-parser/core + #- python -m mypy src + # - python -m mypy -p src/yini_parser --explicit-package-bases --exclude "src[/\\\\]yini_parser[/\\\\]grammar[/\\\\]generated[/\\\\]" --ignore-missing-imports + - cmd /c "set MYPYPATH=src&& python -m mypy -p yini_parser --explicit-package-bases --ignore-missing-imports" + + clean-gen: + desc: Delete generated ANTLR output directory contents (Windows only) + platforms: [windows] + cmds: + #- rmdir /s /q "{{.DIR_OUTPUT}}" + #- cmd /c rmdir /s /q "{{.DIR_OUTPUT}}" + - powershell -NoProfile -Command "if (Test-Path '{{.DIR_OUTPUT}}') { Remove-Item -Recurse -Force '{{.DIR_OUTPUT}}' }" + + # Deletes Python caches under src: + # - __pycache__ directories + # - .pyc files + # - .pyo files, if any + clean-cache: + desc: Delete Python cache files and __pycache__ directories under src (Windows only) platforms: [windows] cmds: - - rmdir /s /q "{{.DIR_OUTPUT}}" - \ No newline at end of file + - powershell -NoProfile -Command "Get-ChildItem 'src' -Recurse -Directory -Filter '__pycache__' | Remove-Item -Recurse -Force" + - powershell -NoProfile -Command "Get-ChildItem 'src' -Recurse -File -Include *.pyc,*.pyo | Remove-Item -Force" + - powershell -NoProfile -Command "Get-ChildItem 'tests' -Recurse -Directory -Filter '__pycache__' | Remove-Item -Recurse -Force" + - powershell -NoProfile -Command "Get-ChildItem 'tests' -Recurse -File -Include *.pyc,*.pyo | Remove-Item -Force" + + clean: + desc: Run all cleanup tasks + deps: + - clean-gen + - clean-cache diff --git a/src/api/load.py b/src/api/load.py deleted file mode 100644 index 38af6cc..0000000 --- a/src/api/load.py +++ /dev/null @@ -1,50 +0,0 @@ -# src/api/load.py - -from __future__ import annotations - -from pathlib import Path -from typing import Any - -from antlr4 import CommonTokenStream, FileStream, InputStream - -from core.yini_builder_visitor import YiniBuilderVisitor -from grammar.generated.YiniLexer import YiniLexer -from grammar.generated.YiniParser import YiniParser - - -def loads(text: str, strict=False) -> dict[str, Any]: - """ - Parse YINI text and return the resulting Python dictionary. - """ - - input_stream = InputStream(text) - return _parse_input_stream(input_stream) - - -def load(path: str, strict=False) -> dict[str, Any]: - """ - Parse a YINI file from disk and return the resulting Python dictionary. - """ - - file_path = Path(path) - input_stream = FileStream(str(file_path), encoding="utf-8") - return _parse_input_stream(input_stream) - - -def _parse_input_stream(input_stream: InputStream | FileStream) -> dict[str, Any]: - lexer = YiniLexer(input_stream) - stream = CommonTokenStream(lexer) - parser = YiniParser(stream) - - tree = parser.yini() - - if parser.getNumberOfSyntaxErrors() > 0: - raise ValueError(f"Failed to parse YINI input: {parser.getNumberOfSyntaxErrors()} syntax error(s).") - - visitor = YiniBuilderVisitor() - result = visitor.visit(tree) - - if not isinstance(result, dict): - raise TypeError(f"Expected parsed result to be a dict, got {type(result).__name__}.") - - return result diff --git a/src/core/VisitorInterp.py b/src/core/VisitorInterp.py deleted file mode 100644 index b5fbead..0000000 --- a/src/core/VisitorInterp.py +++ /dev/null @@ -1,41 +0,0 @@ -import sys -from antlr4 import * -from grammar.generated import YiniParserVisitor -from grammar.generated.YiniLexer import YiniLexer -from grammar.generated.YiniParser import YiniParser - -class VisitorInterp(YiniParserVisitor): - def visitAtom(self, ctx:YiniParser.AtomContext): - return int(ctx.getText()) - - def visitExpr(self, ctx:YiniParser.ExprContext): - if ctx.getChildCount() == 3: - if ctx.getChild(0).getText() == "(": - return self.visit(ctx.getChild(1)) - op = ctx.getChild(1).getText() - v1 = self.visit(ctx.getChild(0)) - v2 = self.visit(ctx.getChild(2)) - if op == "+": - return v1 + v2 - if op == "-": - return v1 - v2 - if op == "*": - return v1 * v2 - if op == "/": - return v1 / v2 - return 0 - if ctx.getChildCount() == 2: - opc = ctx.getChild(0).getText() - if opc == "+": - return self.visit(ctx.getChild(1)) - if opc == "-": - return - self.visit(ctx.getChild(1)) - return 0 - if ctx.getChildCount() == 1: - return self.visit(ctx.getChild(0)) - return 0 - - def visitStart_(self, ctx:YiniParser.Start_Context): - for i in range(0, ctx.getChildCount(), 2): - print(self.visit(ctx.getChild(i))) - return 0 diff --git a/src/core/parser_engine.py b/src/core/parser_engine.py deleted file mode 100644 index 40398a2..0000000 --- a/src/core/parser_engine.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -parse -> ANTLR lexer/parser creates tree -""" \ No newline at end of file diff --git a/src/core/yini_builder_visitor.py b/src/core/yini_builder_visitor.py deleted file mode 100644 index 547b712..0000000 --- a/src/core/yini_builder_visitor.py +++ /dev/null @@ -1,300 +0,0 @@ -""" -transform / build_model -> visitor converts tree to Python values -""" - -# src/core/VisitorInterp.py -from __future__ import annotations - -from decimal import Decimal -from typing import Any - -from grammar.generated.YiniParser import YiniParser -from grammar.generated.YiniParserVisitor import YiniParserVisitor - - -class YiniBuilderVisitor(YiniParserVisitor): - """ - Builds a Python dictionary from the parsed YINI tree. - - Current behavior: - - Top-level sections become nested dicts. - - Assignments go into the current section. - - Lists become Python lists. - - Inline objects become Python dicts. - - Booleans become True/False. - - Null becomes None. - - Strings become Python str. - - Numbers become int or float. - - Notes: - - This is a first practical version, not the final strict/lenient validator. - - Repeated keys currently overwrite earlier values. - """ - - def __init__(self) -> None: - super().__init__() - self._root: dict[str, Any] = {} - self._section_stack: list[dict[str, Any]] = [] - self._section_names: list[str] = [] - - # ------------------------------------------------------------ - # Public/root - # ------------------------------------------------------------ - - def visitYini(self, ctx: YiniParser.YiniContext) -> dict[str, Any]: - for stmt_ctx in ctx.stmt(): - self.visit(stmt_ctx) - return self._root - - # ------------------------------------------------------------ - # Statements - # ------------------------------------------------------------ - - def visitStmt(self, ctx: YiniParser.StmtContext) -> Any: - section_token = ctx.SECTION_HEAD() - if section_token is not None: - self._enter_section(section_token.getText()) - return None - - assignment_ctx = ctx.assignment() - if assignment_ctx is not None: - return self.visit(assignment_ctx) - - return None - - def visitAssignment(self, ctx: YiniParser.AssignmentContext) -> None: - key, value = self.visit(ctx.member()) - target = self._current_container() - target[key] = value - return None - - def visitMember(self, ctx: YiniParser.MemberContext) -> tuple[str, Any]: - key = ctx.KEY().getText() - value_ctx = ctx.value() - - # In your grammar, empty value is allowed and intended to mean null - value = self.visit(value_ctx) if value_ctx is not None else None - return key, value - - # ------------------------------------------------------------ - # Values - # ------------------------------------------------------------ - - def visitValue(self, ctx: YiniParser.ValueContext) -> Any: - return self.visitChildren(ctx) - - def visitNull_literal(self, ctx: YiniParser.Null_literalContext) -> None: - return None - - def visitBoolean_literal(self, ctx: YiniParser.Boolean_literalContext) -> bool: - text = ctx.getText().strip().lower() - return text in {"true", "on", "yes"} - - def visitNumber_literal(self, ctx: YiniParser.Number_literalContext) -> int | float: - text = ctx.getText().strip() - - lowered = text.lower() - if lowered.startswith(("0x", "#")): - cleaned = text[1:] if text.startswith("#") else text[2:] - return int(cleaned, 16) - - if lowered.startswith("0b"): - return int(text[2:], 2) - - if lowered.startswith("%"): - return int(text[1:], 2) - - if lowered.startswith("0o"): - return int(text[2:], 8) - - if lowered.startswith("0z"): - return self._parse_duodecimal(text[2:]) - - if any(ch in text for ch in ".eE"): - return float(text) - - return int(text, 10) - - def visitString_literal(self, ctx: YiniParser.String_literalContext) -> str: - first = self._decode_string_token(ctx.STRING().getText()) - suffix = "".join(self.visit(part) for part in ctx.string_concat()) - return first + suffix - - def visitString_concat(self, ctx: YiniParser.String_concatContext) -> str: - return self._decode_string_token(ctx.STRING().getText()) - - def visitList_literal(self, ctx: YiniParser.List_literalContext) -> list[Any]: - if ctx.EMPTY_LIST() is not None: - return [] - - elements_ctx = ctx.elements() - if elements_ctx is None: - return [] - - return self.visit(elements_ctx) - - def visitElements(self, ctx: YiniParser.ElementsContext) -> list[Any]: - return [self.visit(value_ctx) for value_ctx in ctx.value()] - - def visitObject_literal(self, ctx: YiniParser.Object_literalContext) -> dict[str, Any]: - if ctx.EMPTY_OBJECT() is not None: - return {} - - object_members_ctx = ctx.object_members() - if object_members_ctx is None: - return {} - - return self.visit(object_members_ctx) - - def visitObject_members(self, ctx: YiniParser.Object_membersContext) -> dict[str, Any]: - result: dict[str, Any] = {} - for member_ctx in ctx.object_member(): - key, value = self.visit(member_ctx) - result[key] = value - return result - - def visitObject_member(self, ctx: YiniParser.Object_memberContext) -> tuple[str, Any]: - key = ctx.KEY().getText() - value = self.visit(ctx.value()) - return key, value - - # ------------------------------------------------------------ - # Helpers - # ------------------------------------------------------------ - - def _current_container(self) -> dict[str, Any]: - if self._section_stack: - return self._section_stack[-1] - return self._root - - def _enter_section(self, raw_text: str) -> None: - level, name = self._parse_section_head(raw_text) - - # Root-level section is level 1. - # level N means nesting depth N. - while len(self._section_stack) >= level: - self._section_stack.pop() - self._section_names.pop() - - parent = self._section_stack[-1] if self._section_stack else self._root - - existing = parent.get(name) - if existing is None: - new_section: dict[str, Any] = {} - parent[name] = new_section - elif isinstance(existing, dict): - new_section = existing - else: - raise ValueError(f"Section name conflicts with non-object value: {name!r}") - - self._section_stack.append(new_section) - self._section_names.append(name) - - def _parse_section_head(self, raw_text: str) -> tuple[int, str]: - """ - Parses a SECTION_HEAD token text like: - "^ App\\n" - "^^ Server\\n" - "^7 DeepSection\\n" - - Returns: - (level, name) - """ - text = raw_text.strip() - if not text: - raise ValueError("Empty section header") - - marker = text[0] - - if marker not in {"^", "<", "§"}: - raise ValueError(f"Invalid section marker: {marker!r}") - - i = 0 - while i < len(text) and text[i] == marker: - i += 1 - - if i == 1 and i < len(text) and text[i].isdigit(): - j = i - while j < len(text) and text[j].isdigit(): - j += 1 - level = int(text[i:j]) - name = text[j:].strip() - else: - level = i - name = text[i:].strip() - - if not name: - raise ValueError(f"Missing section name in header: {raw_text!r}") - - return level, self._strip_backticks(name) - - def _strip_backticks(self, text: str) -> str: - if len(text) >= 2 and text[0] == "`" and text[-1] == "`": - return text[1:-1] - return text - - def _decode_string_token(self, token_text: str) -> str: - """ - Minimal first-pass string decoding. - - Handles: - - optional prefixes: r, c, h in either case - - single/double quoted strings - - triple-quoted strings - - simple quote stripping - - This is intentionally conservative for now. - """ - text = token_text - - if not text: - return "" - - prefix = "" - if len(text) >= 2 and text[0] in "RrCcHh" and text[1] in {'"', "'"}: - prefix = text[0] - text = text[1:] - elif len(text) >= 4 and text[0] in "RrCc" and text[1:4] == '"""': - prefix = text[0] - text = text[1:] - - # Triple-quoted - if text.startswith('"""') and text.endswith('"""') and len(text) >= 6: - inner = text[3:-3] - if prefix in {"C", "c"}: - return bytes(inner, "utf-8").decode("unicode_escape") - return inner - - # Single-quoted or double-quoted - if len(text) >= 2 and text[0] == text[-1] and text[0] in {"'", '"'}: - inner = text[1:-1] - - # Raw, hyper, and unprefixed strings: return as-is - if prefix in {"", "R", "r", "H", "h"}: - return inner - - # Classic strings: decode escapes - if prefix in {"C", "c"}: - return bytes(inner, "utf-8").decode("unicode_escape") - - return inner - - return text - - def _parse_duodecimal(self, text: str) -> int: - value = 0 - for ch in text: - if ch.isdigit(): - digit = int(ch) - else: - lowered = ch.lower() - if lowered == "a" or lowered == "x": - digit = 10 - elif lowered == "b" or lowered == "e": - digit = 11 - else: - raise ValueError(f"Invalid duodecimal digit: {ch!r}") - if digit >= 12: - raise ValueError(f"Invalid duodecimal digit: {ch!r}") - value = value * 12 + digit - return value diff --git a/src/dev.py b/src/dev.py new file mode 100644 index 0000000..6d6b428 --- /dev/null +++ b/src/dev.py @@ -0,0 +1,54 @@ +# src/dev.py +""" +For development, playground, and local manual testing/debugging. +""" + +from pprint import pprint + +from yini_parser.api import load, loads + +def main() -> None: + print("*** dev run ***\n") + + print("--- data: --------------------------------------------") + data = load("sample/basic.yini") + pprint(data) + print("------------------------------------------------------\n") + + print("--- text2: --------------------------------------------") + text2 = """ +^ App +name = "This is a test-name." +debug = true +isDebug = YES +object = { x: 3, y: 3, content: {env: "dev", log: ['aa', 'bb', 'cc']}} +""" + config2 = loads(text2) + pprint(config2) + print() + + print(f"isDebug = {config2['App']['isDebug']}") + print(f"xxx = {config2['App']['object']['x']}") + print(f"xxx = {config2['App']['object']['content']}") + print(f"xxx = {config2['App']['object']['content']['log'][1]}") + + print("------------------------------------------------------\n") + + print("--- text3: --------------------------------------------") + text3 = """ +^ Title +name = "This is a test-name." +debug = true +isDebug = YES +^ Title +name2 = 'name2data' +""" + config3 = loads(text3) + pprint(config3) + print() + + print("------------------------------------------------------\n") + + +if __name__ == "__main__": + main() diff --git a/src/grammar/__init__.py b/src/grammar/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/grammar/generated/__init__.py b/src/grammar/generated/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/main.py b/src/main.py index c372e99..8178618 100644 --- a/src/main.py +++ b/src/main.py @@ -11,7 +11,7 @@ import sys from pprint import pprint -from api.load import load +from yini_parser.api.load import load def main(argv): diff --git a/src/yini_parser/__init__.py b/src/yini_parser/__init__.py new file mode 100644 index 0000000..1d238b9 --- /dev/null +++ b/src/yini_parser/__init__.py @@ -0,0 +1,14 @@ +# src/yini_parser/__init__.py +"""Public API for the yini_parser package.""" + +from .api import YiniParseError, load, loads + +""" +So users can write: +from yini_parser import load, loads, YiniParseError + +Instead: +from yini_parser.api import load, loads, YiniParseError +""" + +__all__ = ["YiniParseError", "load", "loads"] diff --git a/src/yini_parser/api/__init__.py b/src/yini_parser/api/__init__.py new file mode 100644 index 0000000..31b546b --- /dev/null +++ b/src/yini_parser/api/__init__.py @@ -0,0 +1,13 @@ +# src/yini_parser/api/__init__.py +"""Public API helpers for parsing YINI documents.""" + +from .errors import YiniParseError +from .warnings import YiniParseWarning +from .load import load, loads + +__all__ = [ + "load", + "loads", + "YiniParseError", + "YiniParseWarning", +] diff --git a/src/api/errors.py b/src/yini_parser/api/errors.py similarity index 94% rename from src/api/errors.py rename to src/yini_parser/api/errors.py index efa889c..83b9e3f 100644 --- a/src/api/errors.py +++ b/src/yini_parser/api/errors.py @@ -1,4 +1,4 @@ -# src/api/errors.py +# src/yini_parser/api/errors.py class YiniParseError(Exception): def __init__(self, message: str, line: int | None = None, column: int | None = None): diff --git a/src/yini_parser/api/load.py b/src/yini_parser/api/load.py new file mode 100644 index 0000000..fc8fea8 --- /dev/null +++ b/src/yini_parser/api/load.py @@ -0,0 +1,55 @@ +# src/yini_parser/api/load.py +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from antlr4 import CommonTokenStream, FileStream, InputStream + +from yini_parser.api.errors import YiniParseError + +from ..core.yini_builder_visitor import YiniBuilderVisitor +from ..grammar.generated.YiniLexer import YiniLexer +from ..grammar.generated.YiniParser import YiniParser + + +def loads(text: str, strict: bool=False) -> dict[str, Any]: + """ + Parse YINI text and return the resulting Python dictionary. + """ + + input_stream = InputStream(text) + return _parse_input_stream(input_stream, strict=strict) + + +def load(path: str, strict: bool=False) -> dict[str, Any]: + """ + Parse a YINI file from disk and return the resulting Python dictionary. + """ + + file_path = Path(path) + input_stream = FileStream(str(file_path), encoding="utf-8") + return _parse_input_stream(input_stream, strict=strict) + + +def _parse_input_stream( + input_stream: InputStream | FileStream, + strict: bool + ) -> dict[str, Any]: + lexer = YiniLexer(input_stream) + stream = CommonTokenStream(lexer) + parser = YiniParser(stream) + + tree = parser.yini() + + if parser.getNumberOfSyntaxErrors() > 0: +# raise ValueError(f"Failed to parse YINI input: {parser.getNumberOfSyntaxErrors()} syntax error(s).") + raise YiniParseError(f"Failed to parse YINI input: {parser.getNumberOfSyntaxErrors()} syntax error(s).") + + visitor = YiniBuilderVisitor(strict=strict) + result = visitor.visit(tree) + + if not isinstance(result, dict): + raise TypeError(f"Expected parsed result to be a dict, got {type(result).__name__}.") + + return result diff --git a/src/yini_parser/api/warnings.py b/src/yini_parser/api/warnings.py new file mode 100644 index 0000000..8da433b --- /dev/null +++ b/src/yini_parser/api/warnings.py @@ -0,0 +1,53 @@ +# src/yini_parser/api/warnings.py + +from __future__ import annotations + + +class YiniParseWarning(Warning): + """ + Warning raised for non-fatal YINI parse issues. + + A YiniParseWarning represents a problem that was detected while parsing, + but which does not prevent a result from being produced. + + Typical examples: + - Duplicate keys ignored in lenient mode. + - Duplicate sections ignored in lenient mode. + - Key/section name collisions handled by lenient-mode policy. + """ + + def __init__( + self, + message: str, + line: int | None = None, + column: int | None = None, + code: str | None = None, + ) -> None: + super().__init__(message) + self.message = message + self.line = line + self.column = column + self.code = code + + def __str__(self) -> str: + location = self._format_location() + + if self.code is not None and location is not None: + return f"{self.message} [{self.code}] {location}" + + if self.code is not None: + return f"{self.message} [{self.code}]" + + if location is not None: + return f"{self.message} {location}" + + return self.message + + def _format_location(self) -> str | None: + if self.line is not None and self.column is not None: + return f"(line {self.line}, column {self.column})" + + if self.line is not None: + return f"(line {self.line})" + + return None diff --git a/src/yini_parser/core/__init__.py b/src/yini_parser/core/__init__.py new file mode 100644 index 0000000..d2029b2 --- /dev/null +++ b/src/yini_parser/core/__init__.py @@ -0,0 +1 @@ +# This file marks this directory as a Python package so it can be imported (as a package/module). diff --git a/src/yini_parser/core/validator.py b/src/yini_parser/core/validator.py new file mode 100644 index 0000000..632eeaa --- /dev/null +++ b/src/yini_parser/core/validator.py @@ -0,0 +1,157 @@ +# src/yini_parser/core/validator.py + +from __future__ import annotations + +import warnings + +from ..api.errors import YiniParseError +from ..api.warnings import YiniParseWarning + + +class YiniValidator: + """ + Handles validation policy for strict and lenient parsing. + + In strict mode, conflicts are errors. + In lenient mode, conflicts are warnings and the first definition wins. + """ + + def __init__(self, strict: bool = False) -> None: + self.strict = strict + + def handle_duplicate_key( + self, + key: str, + *, + line: int | None = None, + column: int | None = None, + ) -> bool: + """ + Handles duplicate keys. + + Returns: + True -> caller may keep/replace the value + False -> caller should ignore the new value + """ + message = ( + f"Duplicate key {key!r} ignored. " + "The first value is kept." + ) + + if self.strict: + raise YiniParseError( + f"Duplicate key {key!r} is not allowed in strict mode.", + line=line, + column=column, + ) + + self._warn( + message, + line=line, + column=column, + code="duplicate-key", + ) + + return False + + def handle_duplicate_section( + self, + name: str, + *, + line: int | None = None, + column: int | None = None, + ) -> bool: + """ + Handles duplicate sections. + + Returns: + True -> caller may reuse/merge the existing section + False -> caller should ignore the new section block + """ + message = ( + f"Duplicate section {name!r} ignored. " + "The first section is kept." + ) + + if self.strict: + raise YiniParseError( + f"Duplicate section {name!r} is not allowed in strict mode.", + line=line, + column=column, + ) + + self._warn( + message, + line=line, + column=column, + code="duplicate-section", + ) + + return False + + def handle_key_section_collision( + self, + name: str, + existing_kind: str, + incoming_kind: str, + *, + line: int | None = None, + column: int | None = None, + ) -> bool: + """ + Handles name collisions between keys and sections. + + Example: + app = "demo" + ^ app + + or: + + ^ app + app = "demo" + + Returns: + True -> caller may accept the incoming definition + False -> caller should ignore the incoming definition + """ + message = ( + f"Name collision for {name!r} ignored. " + f"A {existing_kind} with this name already exists, so the incoming " + f"{incoming_kind} was ignored." + ) + + if self.strict: + raise YiniParseError( + f"Name collision for {name!r}. " + f"A {existing_kind} with this name already exists, so it cannot " + f"also be used as a {incoming_kind} in strict mode.", + line=line, + column=column, + ) + + self._warn( + message, + line=line, + column=column, + code="key-section-collision", + ) + + return False + + def _warn( + self, + message: str, + *, + line: int | None = None, + column: int | None = None, + code: str | None = None, + ) -> None: + warnings.warn( + YiniParseWarning( + message, + line=line, + column=column, + code=code, + ), + stacklevel=3, + ) diff --git a/src/yini_parser/core/yini_builder_visitor.py b/src/yini_parser/core/yini_builder_visitor.py new file mode 100644 index 0000000..22761d2 --- /dev/null +++ b/src/yini_parser/core/yini_builder_visitor.py @@ -0,0 +1,522 @@ +""" +Transform / build_model -> visitor converts tree to Python values +""" + +from __future__ import annotations + +from typing import Any + +from ..api.errors import YiniParseError +from ..grammar.generated.YiniParser import YiniParser +from ..grammar.generated.YiniParserVisitor import YiniParserVisitor +from .validator import YiniValidator + + +class YiniBuilderVisitor(YiniParserVisitor): + """ + Builds a Python dictionary from the parsed YINI tree. + + Current behavior: + - Top-level sections become nested dicts. + - Assignments go into the current section. + - Lists become Python lists. + - Inline objects become Python dicts. + - Booleans become True/False. + - Null becomes None. + - Strings become Python str. + - Numbers become int or float. + + Conflict behavior: + - lenient mode: first definition wins, later conflicting definitions warn + - strict mode: conflicting definitions raise YiniParseError + """ + + def __init__(self, strict: bool = False) -> None: + super().__init__() + self._root: dict[str, Any] = {} + self._section_stack: list[dict[str, Any]] = [] + self._section_names: list[str] = [] + self._ignored_section_level: int | None = None + self._validator = YiniValidator(strict=strict) + + # ------------------------------------------------------------ + # Public/root + # ------------------------------------------------------------ + + def visitYini(self, ctx: YiniParser.YiniContext) -> dict[str, Any]: + for stmt_ctx in ctx.stmt(): + self.visit(stmt_ctx) + return self._root + + # ------------------------------------------------------------ + # Statements + # ------------------------------------------------------------ + + def visitStmt(self, ctx: YiniParser.StmtContext) -> Any: + section_token = ctx.SECTION_HEAD() + if section_token is not None: + symbol = section_token.getSymbol() + line = symbol.line + column = symbol.column + 1 # ANTLR columns are zero-based. + + level, name = self._parse_section_head( + section_token.getText(), + line=line, + column=column, + ) + + # Clear ignore state when a new section at same or shallower level appears. + if self._ignored_section_level is not None and level <= self._ignored_section_level: + self._ignored_section_level = None + + self._enter_section_with_parsed( + level, + name, + line=line, + column=column, + ) + return None + + # Skip assignments while inside an ignored duplicate/conflicting section block. + if self._ignored_section_level is not None: + return None + + assignment_ctx = ctx.assignment() + if assignment_ctx is not None: + return self.visit(assignment_ctx) + + return None + + def visitAssignment(self, ctx: YiniParser.AssignmentContext) -> None: + key, value = self.visit(ctx.member()) + target = self._current_container() + + line = ctx.start.line if ctx.start is not None else None + column = ctx.start.column + 1 if ctx.start is not None else None + + # Assignment-level key/section collision: + # if an earlier section already exists with this name, the first definition wins. + existing = target.get(key) + if isinstance(existing, dict): + if not self._validator.handle_key_section_collision( + name=key, + existing_kind="section", + incoming_kind="key", + line=line, + column=column, + ): + return None + + # Duplicate key: + if key in target: + if not self._validator.handle_duplicate_key( + key, + line=line, + column=column, + ): + return None + + target[key] = value + return None + + def visitMember(self, ctx: YiniParser.MemberContext) -> tuple[str, Any]: + key = ctx.KEY().getText() + value_ctx = ctx.value() + + # In grammar, empty value is allowed and intended to mean null. + value = self.visit(value_ctx) if value_ctx is not None else None + return key, value + + # ------------------------------------------------------------ + # Values + # ------------------------------------------------------------ + + def visitValue(self, ctx: YiniParser.ValueContext) -> Any: + return self.visitChildren(ctx) + + def visitNull_literal(self, ctx: YiniParser.Null_literalContext) -> None: + return None + + def visitBoolean_literal(self, ctx: YiniParser.Boolean_literalContext) -> bool: + text = ctx.getText().strip().lower() + return text in {"true", "on", "yes"} + + def visitNumber_literal(self, ctx: YiniParser.Number_literalContext) -> int | float: + text = ctx.getText().strip() + line = ctx.start.line if ctx.start is not None else None + column = ctx.start.column + 1 if ctx.start is not None else None + + try: + lowered = text.lower() + + if lowered.startswith(("0x", "#")): + cleaned = text[1:] if text.startswith("#") else text[2:] + return int(cleaned, 16) + + if lowered.startswith("0b"): + return int(text[2:], 2) + + if lowered.startswith("%"): + return int(text[1:], 2) + + if lowered.startswith("0o"): + return int(text[2:], 8) + + if lowered.startswith("0z"): + return self._parse_duodecimal(text[2:], line=line, column=column) + + if any(ch in text for ch in ".eE"): + return float(text) + + return int(text, 10) + + except YiniParseError: + raise + + except ValueError: + raise YiniParseError( + f"Invalid number literal: {text!r}", + line=line, + column=column, + ) from None + + def visitString_literal(self, ctx: YiniParser.String_literalContext) -> str: + token = ctx.STRING().getSymbol() + line = token.line + column = token.column + 1 + + first = self._decode_string_token( + ctx.STRING().getText(), + line=line, + column=column, + ) + + suffix = "".join(self.visit(part) for part in ctx.string_concat()) + return first + suffix + + + def visitString_concat(self, ctx: YiniParser.String_concatContext) -> str: + token = ctx.STRING().getSymbol() + line = token.line + column = token.column + 1 + + return self._decode_string_token( + ctx.STRING().getText(), + line=line, + column=column, + ) + + def visitList_literal(self, ctx: YiniParser.List_literalContext) -> list[Any]: + if ctx.EMPTY_LIST() is not None: + return [] + + elements_ctx = ctx.elements() + if elements_ctx is None: + return [] + + return self.visit(elements_ctx) + + def visitElements(self, ctx: YiniParser.ElementsContext) -> list[Any]: + return [self.visit(value_ctx) for value_ctx in ctx.value()] + + def visitObject_literal(self, ctx: YiniParser.Object_literalContext) -> dict[str, Any]: + if ctx.EMPTY_OBJECT() is not None: + return {} + + object_members_ctx = ctx.object_members() + if object_members_ctx is None: + return {} + + return self.visit(object_members_ctx) + + def visitObject_members(self, ctx: YiniParser.Object_membersContext) -> dict[str, Any]: + result: dict[str, Any] = {} + + for member_ctx in ctx.object_member(): + key, value = self.visit(member_ctx) + + line = member_ctx.start.line if member_ctx.start is not None else None + column = member_ctx.start.column + 1 if member_ctx.start is not None else None + + if key in result: + if not self._validator.handle_duplicate_key( + key, + line=line, + column=column, + ): + continue + + result[key] = value + return result + + def visitObject_member(self, ctx: YiniParser.Object_memberContext) -> tuple[str, Any]: + key = ctx.KEY().getText() + value = self.visit(ctx.value()) + return key, value + + # ------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------ + + def _current_container(self) -> dict[str, Any]: + if self._section_stack: + return self._section_stack[-1] + return self._root + + # def _enter_section_with_parsed( + # self, level: int, name: str + # ) -> None: + def _enter_section_with_parsed( + self, + level: int, + name: str, + *, + line: int | None = None, + column: int | None = None, + ) -> None: + + # Root-level section is level 1. + # level N means nesting depth N. + while len(self._section_stack) >= level: + self._section_stack.pop() + self._section_names.pop() + + parent = self._section_stack[-1] if self._section_stack else self._root + existing = parent.get(name) + + if existing is None: + new_section: dict[str, Any] = {} + parent[name] = new_section + self._section_stack.append(new_section) + self._section_names.append(name) + return + + # Duplicate section name: first definition wins. + if isinstance(existing, dict): + if not self._validator.handle_duplicate_section( + name, + line=line, + column=column, + ): + self._ignored_section_level = level + return + + # If validator policy ever changes to allow this. + self._section_stack.append(existing) + self._section_names.append(name) + return + + # Existing scalar/other value, incoming section -> collision. + if not self._validator.handle_key_section_collision( + name=name, + existing_kind="key", + incoming_kind="section", + line=line, + column=column, + ): + self._ignored_section_level = level + return + + def _parse_section_head( + self, + raw_text: str, + *, + line: int | None = None, + column: int | None = None, + ) -> tuple[int, str]: + """ + Parses a SECTION_HEAD token text like: + "^ App\\n" + "^^ Server\\n" + "^7 DeepSection\\n" + + Returns: + (level, name) + """ + text = raw_text.strip() + + if not text: + raise YiniParseError( + "Invalid section header: the header is empty.", + line=line, + column=column, + ) + + marker = text[0] + + if marker not in {"^", "<", "§"}: + raise YiniParseError( + f"Invalid section header: {marker!r} is not a valid section marker. " + "Use one of: '^', '<', or '§'.", + line=line, + column=column, + ) + + i = 0 + while i < len(text) and text[i] == marker: + i += 1 + + if i == 1 and i < len(text) and text[i].isdigit(): + j = i + while j < len(text) and text[j].isdigit(): + j += 1 + + level_text = text[i:j] + + try: + level = int(level_text) + except ValueError: + raise YiniParseError( + f"Invalid section level: {level_text!r} is not a valid number.", + line=line, + column=column, + ) from None + + name = text[j:].strip() + else: + level = i + name = text[i:].strip() + + if not name: + raise YiniParseError( + f"Missing section name after section marker {marker!r}.", + line=line, + column=column, + ) + + return level, self._strip_backticks(name) + + def _strip_backticks(self, text: str) -> str: + if len(text) >= 2 and text[0] == "`" and text[-1] == "`": + return text[1:-1] + return text + + def _decode_string_token( + self, + token_text: str, + *, + line: int | None = None, + column: int | None = None, + ) -> str: + """ + Minimal first-pass string decoding. + + Handles: + - Optional prefixes: r, c, h in either case. + - Single/double quoted strings. + - Triple-quoted strings. + - Simple quote stripping. + + This is intentionally conservative for now. + """ + text = token_text + + if not text: + return "" + + prefix = "" + + if len(text) >= 2 and text[0] in "RrCcHh" and text[1] in {'"', "'"}: + prefix = text[0] + text = text[1:] + elif len(text) >= 4 and text[0] in "RrCc" and text[1:4] == '"""': + prefix = text[0] + text = text[1:] + + # Triple-quoted string. + if text.startswith('"""') and text.endswith('"""') and len(text) >= 6: + inner = text[3:-3] + + if prefix in {"C", "c"}: + return self._decode_classic_string( + inner, + line=line, + column=column, + ) + + return inner + + # Single-quoted or double-quoted string. + if len(text) >= 2 and text[0] == text[-1] and text[0] in {"'", '"'}: + inner = text[1:-1] + + # Raw, hyper, and unprefixed strings: return as-is. + if prefix in {"", "R", "r", "H", "h"}: + return inner + + # Classic strings: decode escapes. + if prefix in {"C", "c"}: + return self._decode_classic_string( + inner, + line=line, + column=column, + ) + + return inner + + raise YiniParseError( + f"Invalid string literal: {token_text!r}", + line=line, + column=column, + ) + + def _parse_duodecimal( + self, + text: str, + *, + line: int | None = None, + column: int | None = None, + ) -> int: + value = 0 + + if not text: + raise YiniParseError( + "Invalid duodecimal number: missing digits after '0z'.", + line=line, + column=column, + ) + + for ch in text: + if ch.isdigit(): + digit = int(ch) + else: + lowered = ch.lower() + + if lowered in {"a", "x"}: + digit = 10 + elif lowered in {"b", "e"}: + digit = 11 + else: + raise YiniParseError( + f"Invalid duodecimal number: {ch!r} is not a valid base-12 digit.", + line=line, + column=column, + ) + + if digit >= 12: + raise YiniParseError( + f"Invalid duodecimal number: {ch!r} is not a valid base-12 digit.", + line=line, + column=column, + ) + + value = value * 12 + digit + + return value + + # Helper + def _decode_classic_string( + self, + inner: str, + *, + line: int | None = None, + column: int | None = None, + ) -> str: + try: + return bytes(inner, "utf-8").decode("unicode_escape") + except UnicodeDecodeError as exc: + raise YiniParseError( + f"Invalid string escape sequence: {exc.reason}.", + line=line, + column=column, + ) from None diff --git a/src/grammar/README.md b/src/yini_parser/grammar/README.md similarity index 100% rename from src/grammar/README.md rename to src/yini_parser/grammar/README.md diff --git a/src/yini_parser/grammar/__init__.py b/src/yini_parser/grammar/__init__.py new file mode 100644 index 0000000..d2029b2 --- /dev/null +++ b/src/yini_parser/grammar/__init__.py @@ -0,0 +1 @@ +# This file marks this directory as a Python package so it can be imported (as a package/module). diff --git a/src/grammar/generated/YiniLexer.interp b/src/yini_parser/grammar/generated/YiniLexer.interp similarity index 99% rename from src/grammar/generated/YiniLexer.interp rename to src/yini_parser/grammar/generated/YiniLexer.interp index c265e3a..2ee4dcd 100644 --- a/src/grammar/generated/YiniLexer.interp +++ b/src/yini_parser/grammar/generated/YiniLexer.interp @@ -85,7 +85,7 @@ SEMICOLON NL WS BLOCK_COMMENT -LINE_COMMENT +FULL_LINE_COMMENT INLINE_COMMENT KEY IDENT_INVALID @@ -162,7 +162,7 @@ SECTION_TAIL_COMMENT WS BLOCK_COMMENT DISABLE_LINE_MARKER -LINE_COMMENT +FULL_LINE_COMMENT INLINE_COMMENT KEY IDENT_INVALID diff --git a/src/grammar/generated/YiniLexer.py b/src/yini_parser/grammar/generated/YiniLexer.py similarity index 99% rename from src/grammar/generated/YiniLexer.py rename to src/yini_parser/grammar/generated/YiniLexer.py index 19890a9..25a6d21 100644 --- a/src/grammar/generated/YiniLexer.py +++ b/src/yini_parser/grammar/generated/YiniLexer.py @@ -1,4 +1,4 @@ -# Generated from ./grammar/v1.0.0-rc.5/YiniLexer.g4 by ANTLR 4.13.2 +# Generated from ./grammar/v1.0.0-rc.5x/YiniLexer.g4 by ANTLR 4.13.2 from antlr4 import * from io import StringIO import sys @@ -332,7 +332,7 @@ class YiniLexer(Lexer): NL = 36 WS = 37 BLOCK_COMMENT = 38 - LINE_COMMENT = 39 + FULL_LINE_COMMENT = 39 INLINE_COMMENT = 40 KEY = 41 IDENT_INVALID = 42 @@ -355,7 +355,7 @@ class YiniLexer(Lexer): "TRIPLE_QUOTED_STRING", "SINGLE_OR_DOUBLE", "R_AND_C_STRING", "HYPER_STRING", "NUMBER", "SS", "CARET", "GT", "LT", "EQ", "HASH", "COMMA", "COLON", "OB", "CB", "OC", "CC", "PLUS", "DOLLAR", - "PC", "AT", "SEMICOLON", "NL", "WS", "BLOCK_COMMENT", "LINE_COMMENT", + "PC", "AT", "SEMICOLON", "NL", "WS", "BLOCK_COMMENT", "FULL_LINE_COMMENT", "INLINE_COMMENT", "KEY", "IDENT_INVALID", "REST", "META_INVALID" ] ruleNames = [ "EBD", "HSPACE", "DIGIT", "SIGN", "IDENT_SIMPLE_START", @@ -373,7 +373,7 @@ class YiniLexer(Lexer): "NUMBER", "SS", "CARET", "GT", "LT", "EQ", "HASH", "COMMA", "COLON", "OB", "CB", "OC", "CC", "PLUS", "DOLLAR", "PC", "AT", "SEMICOLON", "EOL", "NL", "SECTION_TAIL_COMMENT", - "WS", "BLOCK_COMMENT", "DISABLE_LINE_MARKER", "LINE_COMMENT", + "WS", "BLOCK_COMMENT", "DISABLE_LINE_MARKER", "FULL_LINE_COMMENT", "INLINE_COMMENT", "KEY", "IDENT_INVALID", "REST", "META_INVALID" ] grammarFileName = "YiniLexer.g4" diff --git a/src/grammar/generated/YiniLexer.tokens b/src/yini_parser/grammar/generated/YiniLexer.tokens similarity index 96% rename from src/grammar/generated/YiniLexer.tokens rename to src/yini_parser/grammar/generated/YiniLexer.tokens index 520ce0c..aa9566b 100644 --- a/src/grammar/generated/YiniLexer.tokens +++ b/src/yini_parser/grammar/generated/YiniLexer.tokens @@ -36,7 +36,7 @@ SEMICOLON=35 NL=36 WS=37 BLOCK_COMMENT=38 -LINE_COMMENT=39 +FULL_LINE_COMMENT=39 INLINE_COMMENT=40 KEY=41 IDENT_INVALID=42 diff --git a/src/grammar/generated/YiniParser.interp b/src/yini_parser/grammar/generated/YiniParser.interp similarity index 99% rename from src/grammar/generated/YiniParser.interp rename to src/yini_parser/grammar/generated/YiniParser.interp index 41f85c8..0e243fb 100644 --- a/src/grammar/generated/YiniParser.interp +++ b/src/yini_parser/grammar/generated/YiniParser.interp @@ -85,7 +85,7 @@ SEMICOLON NL WS BLOCK_COMMENT -LINE_COMMENT +FULL_LINE_COMMENT INLINE_COMMENT KEY IDENT_INVALID diff --git a/src/grammar/generated/YiniParser.py b/src/yini_parser/grammar/generated/YiniParser.py similarity index 99% rename from src/grammar/generated/YiniParser.py rename to src/yini_parser/grammar/generated/YiniParser.py index 414cbc9..de005d5 100644 --- a/src/grammar/generated/YiniParser.py +++ b/src/yini_parser/grammar/generated/YiniParser.py @@ -1,6 +1,5 @@ -# Generated from ./grammar/v1.0.0-rc.5/YiniParser.g4 by ANTLR 4.13.2 +# Generated from ./grammar/v1.0.0-rc.5x/YiniParser.g4 by ANTLR 4.13.2 # encoding: utf-8 -# src\grammar\generated\YiniParser.py from antlr4 import * from io import StringIO import sys @@ -145,7 +144,7 @@ class YiniParser ( Parser ): "NUMBER", "SS", "CARET", "GT", "LT", "EQ", "HASH", "COMMA", "COLON", "OB", "CB", "OC", "CC", "PLUS", "DOLLAR", "PC", "AT", "SEMICOLON", "NL", "WS", "BLOCK_COMMENT", - "LINE_COMMENT", "INLINE_COMMENT", "KEY", "IDENT_INVALID", + "FULL_LINE_COMMENT", "INLINE_COMMENT", "KEY", "IDENT_INVALID", "REST", "META_INVALID" ] RULE_yini = 0 @@ -219,7 +218,7 @@ class YiniParser ( Parser ): NL=36 WS=37 BLOCK_COMMENT=38 - LINE_COMMENT=39 + FULL_LINE_COMMENT=39 INLINE_COMMENT=40 KEY=41 IDENT_INVALID=42 diff --git a/src/grammar/generated/YiniParser.tokens b/src/yini_parser/grammar/generated/YiniParser.tokens similarity index 96% rename from src/grammar/generated/YiniParser.tokens rename to src/yini_parser/grammar/generated/YiniParser.tokens index 520ce0c..aa9566b 100644 --- a/src/grammar/generated/YiniParser.tokens +++ b/src/yini_parser/grammar/generated/YiniParser.tokens @@ -36,7 +36,7 @@ SEMICOLON=35 NL=36 WS=37 BLOCK_COMMENT=38 -LINE_COMMENT=39 +FULL_LINE_COMMENT=39 INLINE_COMMENT=40 KEY=41 IDENT_INVALID=42 diff --git a/src/grammar/generated/YiniParserVisitor.py b/src/yini_parser/grammar/generated/YiniParserVisitor.py similarity index 98% rename from src/grammar/generated/YiniParserVisitor.py rename to src/yini_parser/grammar/generated/YiniParserVisitor.py index 241777e..be3175f 100644 --- a/src/grammar/generated/YiniParserVisitor.py +++ b/src/yini_parser/grammar/generated/YiniParserVisitor.py @@ -1,4 +1,4 @@ -# Generated from ./grammar/v1.0.0-rc.5/YiniParser.g4 by ANTLR 4.13.2 +# Generated from ./grammar/v1.0.0-rc.5x/YiniParser.g4 by ANTLR 4.13.2 from antlr4 import * if "." in __name__: from .YiniParser import YiniParser diff --git a/tests/test_conflicts.py b/tests/test_conflicts.py new file mode 100644 index 0000000..65a6ece --- /dev/null +++ b/tests/test_conflicts.py @@ -0,0 +1,227 @@ +# tests/test_conflicts.py +from __future__ import annotations + +import pytest + +from yini_parser.api.load import loads +from yini_parser.api.errors import YiniParseError +from yini_parser.api.warnings import YiniParseWarning + +""" +These rules apply: + +- In lenient mode, when duplicate or conflicting definitions occur, the FIRST definition always wins. +- Duplicate keys MUST NOT overwrite an earlier key. Later duplicate keys should emit warnings. +- Duplicate section names MUST NOT overwrite or merge with an earlier section. Later duplicate section definitions should emit warnings, and the duplicate section block, not only the header line, should be ignored. +- Key/section name collisions MUST NOT overwrite an earlier definition. In lenient mode, the first definition should win and a warning should be emitted. +- No merging of repeated sections is allowed. +- In strict mode, duplicate keys, duplicate section names, and key/section name collisions MUST result in an error. +""" + +def test_duplicate_key_first_value_wins_in_lenient_mode() -> None: + text = """ +^ App +name = "First" +name = "Second" +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "name": "First", + }, + } + + +def test_duplicate_scalar_key_first_value_wins_in_lenient_mode() -> None: + text = """ +^ App +pageSize = 10 +pageSize = 25 +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "pageSize": 10, + }, + } + + +def test_repeated_top_level_section_first_definition_wins_in_lenient_mode() -> None: + text = """ +^ App +name = "Demo" + +^ App +debug = true +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "name": "Demo", + }, + } + + +def test_repeated_nested_section_first_definition_wins_in_lenient_mode() -> None: + text = """ +^ App +^^ Server +host = "localhost" + +^^ Server +port = 8080 +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "Server": { + "host": "localhost", + }, + }, + } + + +def test_key_then_section_name_collision_first_definition_wins_in_lenient_mode() -> None: + text = """ +^ App +Server = "localhost" + +^^ Server +port = 8080 +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "Server": "localhost", + }, + } + + +def test_section_then_key_name_collision_first_definition_wins_in_lenient_mode() -> None: + text = """ +^ App +^^ Server +port = 8080 + +^ App +Server = "localhost" +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "Server": { + "port": 8080, + }, + }, + } + +def test_duplicate_key_should_warn_in_lenient_mode() -> None: + text = """ +^ App +name = "First" +name = "Second" +""".lstrip() + + with pytest.warns( + YiniParseWarning, + match=r"Duplicate key 'name' ignored\. The first value is kept\.", + ): + loads(text) + + +def test_repeated_section_should_warn_in_lenient_mode() -> None: + text = """ +^ App +name = "Demo" + +^ App +debug = true +""".lstrip() + + with pytest.warns( + YiniParseWarning, + match=r"Duplicate section 'App' ignored\. The first section is kept\.", + ): + loads(text) + + +def test_key_section_collision_should_warn_in_lenient_mode() -> None: + text = """ +^ App +Server = "localhost" + +^^ Server +port = 8080 +""".lstrip() + + with pytest.warns( + YiniParseWarning, + match=( + r"Name collision for 'Server' ignored\. " + r"A key with this name already exists, so the incoming section was ignored\." + ), + ): + loads(text) + + +def test_duplicate_key_should_error_in_strict_mode() -> None: + text = """ +^ App +name = "First" +name = "Second" +""".lstrip() + + with pytest.raises(YiniParseError): + loads(text, strict=True) + + +def test_repeated_section_should_error_in_strict_mode() -> None: + text = """ +^ App +name = "Demo" + +^ App +debug = true +""".lstrip() + + with pytest.raises(YiniParseError): + loads(text, strict=True) + + +def test_key_then_section_name_collision_should_error_in_strict_mode() -> None: + text = """ +^ App +Server = "localhost" + +^^ Server +port = 8080 +""".lstrip() + + with pytest.raises(YiniParseError): + loads(text, strict=True) + + +def test_section_then_key_name_collision_should_error_in_strict_mode() -> None: + text = """ +^ App +^^ Server +port = 8080 + +^ App +Server = "localhost" +""".lstrip() + + with pytest.raises(YiniParseError): + loads(text, strict=True) diff --git a/tests/test_samples_against_json.py b/tests/test_fixtures_against_json.py similarity index 97% rename from tests/test_samples_against_json.py rename to tests/test_fixtures_against_json.py index 9137cba..cda25ec 100644 --- a/tests/test_samples_against_json.py +++ b/tests/test_fixtures_against_json.py @@ -5,7 +5,7 @@ import pytest -from api.load import load +from yini_parser.api.load import load FIXTURES_DIR = Path("tests/fixtures/smoke-fixtures") diff --git a/tests/test_load.py b/tests/test_load.py index 06ba044..bb60b02 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -1,7 +1,7 @@ # tests/test_load.py from __future__ import annotations -from api.load import load, loads +from yini_parser.api.load import load, loads def test_loads_parses_basic_document() -> None: diff --git a/tests/test_sections.py b/tests/test_sections.py new file mode 100644 index 0000000..8997fbc --- /dev/null +++ b/tests/test_sections.py @@ -0,0 +1,179 @@ +# tests/test_sections.py +from __future__ import annotations + +from yini_parser.api.load import loads + + +def test_parses_single_top_level_section() -> None: + text = """ +^ App +name = "Demo App" +version = 1.0 +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "name": "Demo App", + "version": 1.0, + }, + } + + +def test_parses_nested_sections() -> None: + text = """ +^ App +name = "Demo App" + +^^ Server +host = "localhost" +port = 8080 +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "name": "Demo App", + "Server": { + "host": "localhost", + "port": 8080, + }, + }, + } + + +def test_parses_multiple_sibling_sections() -> None: + text = """ +^ App +name = "Demo App" + +^ User +name = "Marko" +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "name": "Demo App", + }, + "User": { + "name": "Marko", + }, + } + + +def test_returns_from_deeper_section_to_shallower_section() -> None: + text = """ +^ Root +name = "root" + +^^ Child +enabled = true + +^ Sibling +active = false +""".lstrip() + + result = loads(text) + + assert result == { + "Root": { + "name": "root", + "Child": { + "enabled": True, + }, + }, + "Sibling": { + "active": False, + }, + } + + +def test_parses_backticked_section_names() -> None: + text = """ +^ `DB Config` +host = "db.internal" + +^^ `Connection Pool` +size = 10 +""".lstrip() + + result = loads(text) + + assert result == { + "DB Config": { + "host": "db.internal", + "Connection Pool": { + "size": 10, + }, + }, + } + + +def test_parses_numeric_section_level_shorthand() -> None: + text = """ +^ Root +name = "root" + +^2 Child +enabled = true + +^3 GrandChild +value = 42 +""".lstrip() + + result = loads(text) + + assert result == { + "Root": { + "name": "root", + "Child": { + "enabled": True, + "GrandChild": { + "value": 42, + }, + }, + }, + } + + +def test_parses_mixed_section_depth_transitions() -> None: + text = """ +^ Root +rootValue = 1 + +^^ ChildA +childAValue = 2 + +^^ ChildB +childBValue = 3 + +^^^ GrandChild +grandChildValue = 4 + +^ AnotherRoot +anotherRootValue = 5 +""".lstrip() + + result = loads(text) + + assert result == { + "Root": { + "rootValue": 1, + "ChildA": { + "childAValue": 2, + }, + "ChildB": { + "childBValue": 3, + "GrandChild": { + "grandChildValue": 4, + }, + }, + }, + "AnotherRoot": { + "anotherRootValue": 5, + }, + } diff --git a/src/core/__init__.py b/tests/test_strict_mode.py similarity index 100% rename from src/core/__init__.py rename to tests/test_strict_mode.py diff --git a/tests/test_values.py b/tests/test_values.py new file mode 100644 index 0000000..d95bda0 --- /dev/null +++ b/tests/test_values.py @@ -0,0 +1,193 @@ +# tests/test_values.py +from __future__ import annotations + +from yini_parser.api.load import loads + + +""" +Keep: +- One responsibility per test. +- Readable assertions. +- Simple input documents. +""" + + +def test_parses_booleans() -> None: + text = """ +^ App +enabled = true +disabled = off +confirmed = yes +rejected = no +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "enabled": True, + "disabled": False, + "confirmed": True, + "rejected": False, + }, + } + + +def test_parses_null_and_empty_value_as_none() -> None: + text = """ +^ App +explicitNull = null +implicitNull = +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "explicitNull": None, + "implicitNull": None, + }, + } + + +def test_parses_numbers() -> None: + text = """ +^ App +intValue = 25 +floatValue = 1.25 +negativeInt = -7 +negativeFloat = -0.5 +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "intValue": 25, + "floatValue": 1.25, + "negativeInt": -7, + "negativeFloat": -0.5, + }, + } + + +def test_parses_basic_strings() -> None: + text = r''' +^ App +name = "Demo App" +single = 'hello' +path = "\home\user\docs\report.docx" +'''.lstrip() + + result = loads(text) + + assert result == { + "App": { + "name": "Demo App", + "single": "hello", + "path": r"\home\user\docs\report.docx", + }, + } + + +def test_parses_lists() -> None: + text = """ +^ App +items = ["search", "logs", "metrics"] +numbers = [1, 2, 3] +flags = [true, off, yes, no] +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "items": ["search", "logs", "metrics"], + "numbers": [1, 2, 3], + "flags": [True, False, True, False], + }, + } + + +def test_parses_empty_list_and_empty_object() -> None: + text = """ +^ App +items = [] +meta = {} +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "items": [], + "meta": {}, + }, + } + + +def test_parses_inline_objects() -> None: + text = """ +^ App +cache = { maxMb: 256, maxHours: 0.5 } +db = { host: "localhost", port: 5432, enabled: true } +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "cache": { + "maxMb": 256, + "maxHours": 0.5, + }, + "db": { + "host": "localhost", + "port": 5432, + "enabled": True, + }, + }, + } + + +def test_parses_nested_inline_objects_and_lists() -> None: + text = """ +^ App +config = { + name: "demo", + tags: ["alpha", "beta"], + limits: { maxUsers: 10, timeoutSec: 30 } +} +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "config": { + "name": "demo", + "tags": ["alpha", "beta"], + "limits": { + "maxUsers": 10, + "timeoutSec": 30, + }, + }, + }, + } + + +def test_parses_string_concatenation() -> None: + text = """ +^ App +message = "hello" + + " " + + "world" +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "message": "hello world", + }, + }