From 1170b933d09246d10f045b6a17d09a6bd5d0ae19 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 12:35:48 +0700 Subject: [PATCH 01/29] Add .editorconfig --- .editorconfig | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..330beb2 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,23 @@ +# .editorconfig +root = true + +[*] +indent_style = space +indent_size = 4 +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true + +[*.py] +indent_size = 4 +max_line_length = 88 + +[*.{yml,yaml}] +indent_size = 2 + +[*.{md,toml,json}] +indent_size = 2 + +[Makefile] +indent_style = tab From d423bd601cb699b22190945ec1d3d896c32566ba Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 12:35:57 +0700 Subject: [PATCH 02/29] Add pyproject.toml --- pyproject.toml | 136 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..93da482 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,136 @@ +[build-system] +requires = ['setuptools>=45', 'wheel'] +build-backend = 'setuptools.build_meta' + +[project] +name = 'xmlassert' +version = '0.1.0' +description = 'Human-readable XML comparison for testing with clean diff output' +readme = 'README.md' +requires-python = '>=3.8' +license = {text = 'MIT'} +authors = [ + {name = 'Maxim Ivanov', email = 'ivanovmg@gmail.com'}, +] +keywords = ['xml', 'testing', 'assert', 'diff', 'comparison'] +classifiers = [ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Topic :: Software Development :: Testing', + 'Topic :: Text Processing :: Markup :: XML', +] +dependencies = [ + 'defusedxml~=0.7.1', + 'lxml~=4.9.0', +] + +[project.urls] +Homepage = 'https://github.com/ivanovmg/xmlassert' +Changelog = 'https://github.com/ivanovmg/xmlassert/blob/main/CHANGELOG.md' +Issues = 'https://github.com/ivanovmg/xmlassert/issues' + +[tool.setuptools] +packages = {find = {where = ['src']}} +package-dir = {'' = 'src'} + +[tool.setuptools.package-data] +xmlassert = ['py.typed'] + +[project.optional-dependencies] +dev = [ + 'pytest>=7.0.0', + 'pytest-cov>=4.0.0', + 'ruff>=0.1.0', + 'mypy>=1.0.0', + 'types-setuptools', + 'build>=0.10.0', + 'twine>=4.0.0', +] +docs = [ + 'sphinx>=7.0.0', + 'sphinx-rtd-theme>=1.0.0', +] + +[tool.ruff] +line-length = 79 +target-version = 'py38' + +# Linting rules (updated to new format) +[tool.ruff.lint] +select = [ + 'E', # pycodestyle errors + 'W', # pycodestyle warnings + 'F', # pyflakes + 'I', # isort + 'B', # flake8-bugbear + 'C4', # flake8-comprehensions + 'UP', # pyupgrade + 'YTT', # flake8-2020 + 'RUF', # ruff-specific rules +] +ignore = [ + 'B008', # do not perform function calls in argument defaults + 'B905', # zip() without an explicit strict= parameter + 'E501', # line too long (handled by formatter) + 'RUF012', # mutable class defaults +] + +# Import sorting (updated to new format) +[tool.ruff.lint.isort] +known-first-party = ['xmlassert'] +lines-after-imports = 2 +combine-as-imports = true + +# Per-file ignores (updated to new format) +[tool.ruff.lint.per-file-ignores] +'__init__.py' = ['F401'] # allow unused imports in __init__.py + +# Formatting configuration +[tool.ruff.format] +quote-style = 'single' +skip-magic-trailing-comma = false + +[tool.coverage.run] +source = ['src/xmlassert'] +branch = true +parallel = true + +[tool.coverage.report] +show_missing = true +skip_covered = true +fail_under = 90 + +[tool.coverage.paths] +source = ['src/xmlassert', '*/site-packages'] + +[tool.coverage.exclude] +lines = [ + '# pragma: no cover', + 'if __name__ == .__main__.:', + 'raise NotImplementedError', + 'return NotImplemented', + '@abc.abstractmethod', + 'class.*Protocol', + 'class.*ABC', + 'pass', + 'if TYPE_CHECKING:', + 'from __future__ import', +] + +[tool.pytest.ini_options] +testpaths = ['tests'] +addopts = '--cov=src/xmlassert --cov-report=term-missing --cov-report=html' +python_files = ['test_*.py'] +python_classes = ['Test*'] +python_functions = ['test_*'] + +[tool.mypy] +python_version = '3.8' +strict = true From 5576c55e627e6ac9a683ab291d8a169e49ba03cf Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 12:36:05 +0700 Subject: [PATCH 03/29] Add Makefile --- Makefile | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..aadec64 --- /dev/null +++ b/Makefile @@ -0,0 +1,110 @@ +.PHONY: help tests lint format typecheck build publish clean install-dev + +# Default target +help: + @echo "Available commands:" + @echo " make install-dev - Install development dependencies" + @echo " make tests - Run tests with coverage" + @echo " make lint - Run linting checks" + @echo " make format - Format code" + @echo " make typecheck - Run type checking" + @echo " make check - Run all checks (lint, format, typecheck, tests)" + @echo " make build - Build package" + @echo " make publish - Build and publish to PyPI" + @echo " make clean - Clean build artifacts" + +# Install development dependencies +install-dev: + pip install -e ".[dev]" + +# Run tests with coverage +tests: + python -m pytest --cov=src/xmlassert --cov-report=term-missing --cov-report=html -v + +# Run linting checks +lint: + ruff check src/xmlassert tests + +# Format code +format: + ruff check --select I --fix # fix imports + ruff format src/xmlassert tests + +# Check formatting without making changes +format-check: + ruff format --check src/xmlassert tests + +# Run type checking +typecheck: + mypy src/xmlassert tests + +# Run all checks: lint, format check, typecheck, and tests +check: lint format-check typecheck tests + +# Build package +build: + python -m build + +# Build and publish to PyPI (requires TWINE_USERNAME and TWINE_PASSWORD) +publish: build + python -m twine upload dist/* + +# Clean build artifacts +clean: + rm -rf build/ + rm -rf dist/ + rm -rf *.egg-info/ + rm -rf .ruff_cache/ + rm -rf .mypy_cache/ + rm -rf .pytest_cache/ + rm -rf htmlcov/ + rm -rf .coverage + rm -rf coverage.xml + +# Install package in development mode +develop: + pip install -e . + +# Run tests in watch mode (requires pytest-watch) +watch: + ptw --onpass "echo ? Tests passed" --onfail "echo ? Tests failed" + +# Generate coverage report +coverage: + python -m pytest --cov=src/xmlassert --cov-report=html + @echo "Coverage report generated at htmlcov/index.html" + +# Check for security vulnerabilities +safety: + pip install safety + safety check + +# Update dependencies +update-deps: + pip install --upgrade pip + pip install --upgrade -e ".[dev]" + +# Show dependency tree +deps-tree: + pip install pipdeptree + pipdeptree + +# Run benchmarks (if you add benchmarks later) +benchmark: + @echo "Benchmarks not yet implemented" + +# Helpers for CI +ci-install: + pip install -e ".[dev]" + +ci-test: + python -m pytest --cov=src/xmlassert --cov-report=xml + +ci-lint: + ruff check src/xmlassert tests + +ci-format: + ruff format --check src/xmlassert tests + +ci-typecheck: + mypy src/xmlassert tests From 412eea09343dcb02879984657ef035b1c5130fa3 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 12:36:27 +0700 Subject: [PATCH 04/29] Ignore venv* --- .gitignore | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index b7faf40..e3f54bd 100644 --- a/.gitignore +++ b/.gitignore @@ -140,6 +140,7 @@ celerybeat.pid .venv env/ venv/ +venv*/ ENV/ env.bak/ venv.bak/ @@ -182,9 +183,9 @@ cython_debug/ .abstra/ # Visual Studio Code -# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore -# and can be added to the global gitignore or merged into this file. However, if you prefer, +# and can be added to the global gitignore or merged into this file. However, if you prefer, # you could uncomment the following to ignore the entire vscode folder # .vscode/ From 22e4a7c99aac8c9db3e969a84adb2b492096ab2b Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 12:36:35 +0700 Subject: [PATCH 05/29] Add tests --- tests/test_assert_xml_equal.py | 344 +++++++++++++++++++++++++++++++++ 1 file changed, 344 insertions(+) create mode 100644 tests/test_assert_xml_equal.py diff --git a/tests/test_assert_xml_equal.py b/tests/test_assert_xml_equal.py new file mode 100644 index 0000000..45539e7 --- /dev/null +++ b/tests/test_assert_xml_equal.py @@ -0,0 +1,344 @@ +from xml.etree.ElementTree import ParseError + +import pytest + +from xmlassert import assert_xml_equal + + +def test_IdenticalXmlStrings_PassWithoutError() -> None: + xml_string = 'text' + assert_xml_equal(xml_string, xml_string) + + +@pytest.mark.parametrize( + 'actual,expected', + [ + # Different formatting + ( + 'text', + '\n text\n', + ), + # Different whitespace + ('text', ' text '), + ( + 'text', + ' text ', + ), + (' text ', 'text'), + # Comments ignored + ('', ''), + # Self-closing vs explicit + ('', ''), + ('', ''), + # Multiple children + ( + '', + '', + ), + # Mixed content + ( + 'textmore text', + 'textmore text', + ), + # Quote styles + ( + "", + '', + ), + # Processing instructions + ( + '', + '', + ), + ], +) +def test_EquivalentXmlWithDifferentFormatting_Pass( + actual: str, expected: str +) -> None: + assert_xml_equal(actual, expected) + + +@pytest.mark.parametrize( + 'actual,expected', + [ + # Different structure + ( + 'text', + 'text', + ), + # Different content + ( + 'actual text', + 'expected text', + ), + ('one', 'two'), + # Different attributes + ( + '', + '', + ), + ('', ''), + # Different namespaces + ( + '', + '', + ), + ], +) +def test_DifferentXmlContent_RaisesAssertionError( + actual: str, expected: str +) -> None: + with pytest.raises(AssertionError, match='XML documents differ'): + assert_xml_equal(actual, expected) + + +@pytest.mark.parametrize( + 'xml1,xml2', + [ + ( + '', + '', + ), + ('', ''), + ], +) +def test_AttributeOrder_DoesNotAffectEquality(xml1: str, xml2: str) -> None: + assert_xml_equal(xml1, xml2) + + +def test_XmlWithIdenticalNamespaces_Pass() -> None: + actual = '' + expected = '' + assert_xml_equal(actual, expected) + + +def test_NestedXmlStructure_EquivalentRegardlessOfFormatting() -> None: + actual = """ + + + text + + + """ + expected = 'text' + assert_xml_equal(actual, expected) + + +def test_ComplexXmlStructure_EquivalentRegardlessOfFormatting() -> None: + complex_xml = """ + + + + + + + + + + + + """ + + compact_xml = '' + + assert_xml_equal(complex_xml, compact_xml) + + +def test_DifferentXml_ErrorMessageContainsReadableDiff() -> None: + actual = 'actual' + expected = 'expected' + + with pytest.raises(AssertionError) as exc_info: + assert_xml_equal(actual, expected) + + error_message = str(exc_info.value) + assert 'XML documents differ' in error_message + assert 'expected' in error_message + assert 'actual' in error_message + assert '---' in error_message and '+++' in error_message + + +def test_XmlWithSpecialCharacters_HandledCorrectly() -> None: + actual = '& < >' + expected = '& < >' + assert_xml_equal(actual, expected) + + +def test_XmlWithCdataSections_Equivalent() -> None: + actual = 'content]]>' + expected = 'content]]>' + assert_xml_equal(actual, expected) + + +@pytest.mark.parametrize( + 'invalid_xml', + [ + '', + ' ', + '', + '', + ], +) +def test_InvalidXml_RaisesParseError(invalid_xml: str) -> None: + valid_xml = '' + + with pytest.raises(ParseError): + assert_xml_equal(invalid_xml, valid_xml) + + with pytest.raises(ParseError): + assert_xml_equal(valid_xml, invalid_xml) + + +@pytest.mark.parametrize( + 'invalid_input', + [ + None, + 123, + [], + {}, + ], +) +def test_NonStringInput_RaisesError(invalid_input) -> None: + valid_xml = '' + + with pytest.raises((TypeError, ValueError)): + assert_xml_equal(invalid_input, valid_xml) + + with pytest.raises((TypeError, ValueError)): + assert_xml_equal(valid_xml, invalid_input) + + +def test_LargeXmlDocuments_HandledSuccessfully() -> None: + large_xml = '' + 'text' * 100 + '' + assert_xml_equal(large_xml, large_xml) + + +def test_XmlWithUnicodeCharacters_HandledCorrectly() -> None: + actual = 'caf\u00e9 na\u00efve' + expected = 'caf\u00e9 na\u00efve' + assert_xml_equal(actual, expected) + + +def test_MalformedXml_RaisesParseErrorWithMeaningfulMessage() -> None: + malformed_xml = '' + well_formed_xml = '' + + with pytest.raises(ParseError): + assert_xml_equal(malformed_xml, well_formed_xml) + + +def test_EmptyElementsWithDifferentSyntax_Equivalent() -> None: + test_cases = [ + ('', ''), + ('', ''), + ('', ''), + ] + + for actual, expected in test_cases: + assert_xml_equal(actual, expected) + + +def test_XmlWithAttributesInDifferentOrder_Equivalent() -> None: + actual = '' + expected = '' + assert_xml_equal(actual, expected) + + +def test_XmlWithNamespacePrefixes_HandledCorrectly() -> None: + actual = '' + expected = '' + assert_xml_equal(actual, expected) + + +def test_XmlWithDifferentNamespaceUris_RaisesAssertionError() -> None: + actual = '' + expected = '' + + with pytest.raises(AssertionError): + assert_xml_equal(actual, expected) + + +def test_XmlWithMixedContentAndFormatting_Equivalent() -> None: + actual = ' text value more text ' + expected = 'textvaluemore text' + assert_xml_equal(actual, expected) + + +@pytest.mark.parametrize( + 'xml_content', + [ + '', # Minimal self-closing + '', # Minimal with closing + 'text', # With text content + "", # With attribute + '', # With child + ], +) +def test_EdgeCaseXml_ThatTriggersBranchCoverage(xml_content: str) -> None: + """Test edge cases that might trigger different code paths""" + assert_xml_equal(xml_content, xml_content) + + +@pytest.mark.parametrize( + 'whitespace_xml', + [ + '\n\n\n', # Newlines only + ' ', # Spaces only + '\t\t\t', # Tabs only + '\n \t\n \t\n \t', # Mixed whitespace + ], +) +def test_XmlWithVariousWhitespacePatterns(whitespace_xml: str) -> None: + """Test XML with different whitespace patterns""" + assert_xml_equal(whitespace_xml, whitespace_xml) + + +@pytest.mark.parametrize( + 'empty_element_format', + [ + '', + '', + '\n\n', + '\n\n', + ], +) +def test_XmlWithEmptyElements_DifferentFormats( + empty_element_format: str, +) -> None: + """Test empty elements in various formats""" + canonical = '' + assert_xml_equal(empty_element_format, canonical) + + +def test_XmlWithDeepNesting_CoversRecursivePaths() -> None: + """Test deeply nested XML to cover recursive formatting paths""" + deep_xml = 'deep' + assert_xml_equal(deep_xml, deep_xml) + + +def test_XmlWithMixedContentAndFormatting_CoversAllBranches() -> None: + """Test XML with mixed content types""" + mixed_xml = """ + + Text content + child text + More text + + + + """ + assert_xml_equal(mixed_xml, mixed_xml) + + +def test_XmlWithSpecialCharacters_InAttributesAndText() -> None: + """Test XML with special characters in different contexts""" + special_chars_xml = ( + '<content>' + ) + assert_xml_equal(special_chars_xml, special_chars_xml) + + +def test_XmlWithNamespace_AndAttributes_CoversComplexPaths() -> None: + """Test XML with namespaces and attributes""" + ns_xml = '' + assert_xml_equal(ns_xml, ns_xml) From 964f28ba01ba58122b2f7091689307f1b8c4818c Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 12:36:40 +0700 Subject: [PATCH 06/29] Add production code --- src/xmlassert/__init__.py | 6 ++ src/xmlassert/equal.py | 119 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 src/xmlassert/__init__.py create mode 100644 src/xmlassert/equal.py diff --git a/src/xmlassert/__init__.py b/src/xmlassert/__init__.py new file mode 100644 index 0000000..892f522 --- /dev/null +++ b/src/xmlassert/__init__.py @@ -0,0 +1,6 @@ +from .equal import assert_xml_equal + + +__all__ = [ + 'assert_xml_equal', +] diff --git a/src/xmlassert/equal.py b/src/xmlassert/equal.py new file mode 100644 index 0000000..68fe8a6 --- /dev/null +++ b/src/xmlassert/equal.py @@ -0,0 +1,119 @@ +import difflib +from typing import Protocol +from xml.etree.ElementTree import ( + Element, # For type hints only + canonicalize, +) + +from defusedxml.ElementTree import fromstring, tostring + + +__all__ = [ + 'assert_xml_equal', +] + + +def assert_xml_equal(actual: str, expected: str) -> None: + """ + Securely compare XML strings with clean diff output. + """ + actual_canonical = canonicalize( + actual, + strip_text=True, + with_comments=False, + ) + expected_canonical = canonicalize( + expected, + strip_text=True, + with_comments=False, + ) + + if actual_canonical == expected_canonical: + return + + actual_pretty = _pretty_format_xml(actual) + expected_pretty = _pretty_format_xml(expected) + + # Generate clean diff + diff = difflib.unified_diff( + expected_pretty.splitlines(), + actual_pretty.splitlines(), + fromfile='expected', + tofile='actual', + lineterm='', + ) + + diff_text = '\n'.join(diff) + raise AssertionError(f'XML documents differ:\n{diff_text}') + + +def _pretty_format_xml(xml_str: str) -> str: + """Securely format XML with clean, consistent indentation""" + try: + root = fromstring(xml_str) + _indent(root) + return str(tostring(root, encoding='unicode')) + except Exception: + return canonicalize(xml_str, strip_text=False) + + +class ElementVisitor(Protocol): + def visit(self, elem: Element, level: int) -> None: ... + + +class IndentVisitorFactory: + """Factory that provides appropriate visitors based on element type""" + + @staticmethod + def get_pre_visitor(elem: Element) -> ElementVisitor: + if len(elem): + return ParentElementVisitor() + return LeafElementVisitor() + + @staticmethod + def get_post_visitor(elem: Element) -> ElementVisitor: + return PostProcessingVisitor() + + +# Visitors remain the same as above +class ParentElementVisitor: + def visit(self, elem: Element, level: int) -> None: + indent_str = ' ' * level + if not elem.text or not elem.text.strip(): + elem.text = '\n' + indent_str + ' ' + if not elem.tail or not elem.tail.strip(): + elem.tail = '\n' + indent_str + + +class LeafElementVisitor: + def visit(self, elem: Element, level: int) -> None: + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = '\n' + ' ' * level + + +class PostProcessingVisitor: + def visit(self, elem: Element, level: int) -> None: + if len(elem) and (not elem.tail or not elem.tail.strip()): + elem.tail = '\n' + ' ' * level + + +def _indent(elem: Element, level: int = 0) -> None: + """Recursively indent XML elements using visitor factory""" + factory = IndentVisitorFactory() + _traverse_with_factory(elem, factory, level) + + +def _traverse_with_factory( + elem: Element, factory: IndentVisitorFactory, level: int = 0 +) -> None: + # Pre-visit with appropriate visitor + pre_visitor = factory.get_pre_visitor(elem) + pre_visitor.visit(elem, level) + + # Process children + for child in elem: + _traverse_with_factory(child, factory, level + 1) + + # Post-visit + post_visitor = factory.get_post_visitor(elem) + post_visitor.visit(elem, level) From 4a6b42a6ed4393a5a97e7903f33e378aa1b12f95 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 13:38:13 +0700 Subject: [PATCH 07/29] REF: extract clean_diff into separate module --- src/xmlassert/diffs.py | 93 +++++++++++++++++++++++++++++++++++ src/xmlassert/equal.py | 108 ++++------------------------------------- 2 files changed, 102 insertions(+), 99 deletions(-) create mode 100644 src/xmlassert/diffs.py diff --git a/src/xmlassert/diffs.py b/src/xmlassert/diffs.py new file mode 100644 index 0000000..4e7ede5 --- /dev/null +++ b/src/xmlassert/diffs.py @@ -0,0 +1,93 @@ +import difflib +from typing import Protocol +from xml.etree.ElementTree import ( + Element, # For type hints only + canonicalize, +) + +from defusedxml.ElementTree import fromstring, tostring + + +def clean_diff(actual: str, expected: str) -> str: + actual_pretty = _pretty_format_xml(actual) + expected_pretty = _pretty_format_xml(expected) + diff = difflib.unified_diff( + expected_pretty.splitlines(), + actual_pretty.splitlines(), + fromfile='expected', + tofile='actual', + lineterm='', + ) + return '\n'.join(diff) + + +def _pretty_format_xml(xml_str: str) -> str: + """Securely format XML with clean, consistent indentation""" + try: + root = fromstring(xml_str) + _indent(root) + return str(tostring(root, encoding='unicode')) + except Exception: + return canonicalize(xml_str, strip_text=False) + + +class ElementVisitor(Protocol): + def visit(self, elem: Element, level: int) -> None: ... + + +class IndentVisitorFactory: + """Factory that provides appropriate visitors based on element type""" + + @staticmethod + def get_pre_visitor(elem: Element) -> ElementVisitor: + if len(elem): + return ParentElementVisitor() + return LeafElementVisitor() + + @staticmethod + def get_post_visitor(elem: Element) -> ElementVisitor: + return PostProcessingVisitor() + + +# Visitors remain the same as above +class ParentElementVisitor: + def visit(self, elem: Element, level: int) -> None: + indent_str = ' ' * level + if not elem.text or not elem.text.strip(): + elem.text = '\n' + indent_str + ' ' + if not elem.tail or not elem.tail.strip(): + elem.tail = '\n' + indent_str + + +class LeafElementVisitor: + def visit(self, elem: Element, level: int) -> None: + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = '\n' + ' ' * level + + +class PostProcessingVisitor: + def visit(self, elem: Element, level: int) -> None: + if len(elem) and (not elem.tail or not elem.tail.strip()): + elem.tail = '\n' + ' ' * level + + +def _indent(elem: Element, level: int = 0) -> None: + """Recursively indent XML elements using visitor factory""" + factory = IndentVisitorFactory() + _traverse_with_factory(elem, factory, level) + + +def _traverse_with_factory( + elem: Element, factory: IndentVisitorFactory, level: int = 0 +) -> None: + # Pre-visit with appropriate visitor + pre_visitor = factory.get_pre_visitor(elem) + pre_visitor.visit(elem, level) + + # Process children + for child in elem: + _traverse_with_factory(child, factory, level + 1) + + # Post-visit + post_visitor = factory.get_post_visitor(elem) + post_visitor.visit(elem, level) diff --git a/src/xmlassert/equal.py b/src/xmlassert/equal.py index 68fe8a6..582b5a5 100644 --- a/src/xmlassert/equal.py +++ b/src/xmlassert/equal.py @@ -1,11 +1,8 @@ -import difflib -from typing import Protocol from xml.etree.ElementTree import ( - Element, # For type hints only canonicalize, ) -from defusedxml.ElementTree import fromstring, tostring +from .diffs import clean_diff __all__ = [ @@ -17,103 +14,16 @@ def assert_xml_equal(actual: str, expected: str) -> None: """ Securely compare XML strings with clean diff output. """ - actual_canonical = canonicalize( - actual, - strip_text=True, - with_comments=False, - ) - expected_canonical = canonicalize( - expected, - strip_text=True, - with_comments=False, - ) - - if actual_canonical == expected_canonical: + if _canonical(actual) == _canonical(expected): return - actual_pretty = _pretty_format_xml(actual) - expected_pretty = _pretty_format_xml(expected) - - # Generate clean diff - diff = difflib.unified_diff( - expected_pretty.splitlines(), - actual_pretty.splitlines(), - fromfile='expected', - tofile='actual', - lineterm='', - ) - - diff_text = '\n'.join(diff) + diff_text = clean_diff(actual, expected) raise AssertionError(f'XML documents differ:\n{diff_text}') -def _pretty_format_xml(xml_str: str) -> str: - """Securely format XML with clean, consistent indentation""" - try: - root = fromstring(xml_str) - _indent(root) - return str(tostring(root, encoding='unicode')) - except Exception: - return canonicalize(xml_str, strip_text=False) - - -class ElementVisitor(Protocol): - def visit(self, elem: Element, level: int) -> None: ... - - -class IndentVisitorFactory: - """Factory that provides appropriate visitors based on element type""" - - @staticmethod - def get_pre_visitor(elem: Element) -> ElementVisitor: - if len(elem): - return ParentElementVisitor() - return LeafElementVisitor() - - @staticmethod - def get_post_visitor(elem: Element) -> ElementVisitor: - return PostProcessingVisitor() - - -# Visitors remain the same as above -class ParentElementVisitor: - def visit(self, elem: Element, level: int) -> None: - indent_str = ' ' * level - if not elem.text or not elem.text.strip(): - elem.text = '\n' + indent_str + ' ' - if not elem.tail or not elem.tail.strip(): - elem.tail = '\n' + indent_str - - -class LeafElementVisitor: - def visit(self, elem: Element, level: int) -> None: - if level and (not elem.tail or not elem.tail.strip()): - elem.tail = '\n' + ' ' * level - - -class PostProcessingVisitor: - def visit(self, elem: Element, level: int) -> None: - if len(elem) and (not elem.tail or not elem.tail.strip()): - elem.tail = '\n' + ' ' * level - - -def _indent(elem: Element, level: int = 0) -> None: - """Recursively indent XML elements using visitor factory""" - factory = IndentVisitorFactory() - _traverse_with_factory(elem, factory, level) - - -def _traverse_with_factory( - elem: Element, factory: IndentVisitorFactory, level: int = 0 -) -> None: - # Pre-visit with appropriate visitor - pre_visitor = factory.get_pre_visitor(elem) - pre_visitor.visit(elem, level) - - # Process children - for child in elem: - _traverse_with_factory(child, factory, level + 1) - - # Post-visit - post_visitor = factory.get_post_visitor(elem) - post_visitor.visit(elem, level) +def _canonical(content: str) -> str: + return canonicalize( + content, + strip_text=True, + with_comments=False, + ) From f8611ec432c298fda695297c7b972cbc393d1df3 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 13:42:24 +0700 Subject: [PATCH 08/29] REF: extract pretty_format_xml into separate module --- src/xmlassert/diffs.py | 83 ++----------------------------------- src/xmlassert/formatting.py | 83 +++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 80 deletions(-) create mode 100644 src/xmlassert/formatting.py diff --git a/src/xmlassert/diffs.py b/src/xmlassert/diffs.py index 4e7ede5..f580fee 100644 --- a/src/xmlassert/diffs.py +++ b/src/xmlassert/diffs.py @@ -1,16 +1,11 @@ import difflib -from typing import Protocol -from xml.etree.ElementTree import ( - Element, # For type hints only - canonicalize, -) -from defusedxml.ElementTree import fromstring, tostring +from .formatting import pretty_format_xml def clean_diff(actual: str, expected: str) -> str: - actual_pretty = _pretty_format_xml(actual) - expected_pretty = _pretty_format_xml(expected) + actual_pretty = pretty_format_xml(actual) + expected_pretty = pretty_format_xml(expected) diff = difflib.unified_diff( expected_pretty.splitlines(), actual_pretty.splitlines(), @@ -19,75 +14,3 @@ def clean_diff(actual: str, expected: str) -> str: lineterm='', ) return '\n'.join(diff) - - -def _pretty_format_xml(xml_str: str) -> str: - """Securely format XML with clean, consistent indentation""" - try: - root = fromstring(xml_str) - _indent(root) - return str(tostring(root, encoding='unicode')) - except Exception: - return canonicalize(xml_str, strip_text=False) - - -class ElementVisitor(Protocol): - def visit(self, elem: Element, level: int) -> None: ... - - -class IndentVisitorFactory: - """Factory that provides appropriate visitors based on element type""" - - @staticmethod - def get_pre_visitor(elem: Element) -> ElementVisitor: - if len(elem): - return ParentElementVisitor() - return LeafElementVisitor() - - @staticmethod - def get_post_visitor(elem: Element) -> ElementVisitor: - return PostProcessingVisitor() - - -# Visitors remain the same as above -class ParentElementVisitor: - def visit(self, elem: Element, level: int) -> None: - indent_str = ' ' * level - if not elem.text or not elem.text.strip(): - elem.text = '\n' + indent_str + ' ' - if not elem.tail or not elem.tail.strip(): - elem.tail = '\n' + indent_str - - -class LeafElementVisitor: - def visit(self, elem: Element, level: int) -> None: - if level and (not elem.tail or not elem.tail.strip()): - elem.tail = '\n' + ' ' * level - - -class PostProcessingVisitor: - def visit(self, elem: Element, level: int) -> None: - if len(elem) and (not elem.tail or not elem.tail.strip()): - elem.tail = '\n' + ' ' * level - - -def _indent(elem: Element, level: int = 0) -> None: - """Recursively indent XML elements using visitor factory""" - factory = IndentVisitorFactory() - _traverse_with_factory(elem, factory, level) - - -def _traverse_with_factory( - elem: Element, factory: IndentVisitorFactory, level: int = 0 -) -> None: - # Pre-visit with appropriate visitor - pre_visitor = factory.get_pre_visitor(elem) - pre_visitor.visit(elem, level) - - # Process children - for child in elem: - _traverse_with_factory(child, factory, level + 1) - - # Post-visit - post_visitor = factory.get_post_visitor(elem) - post_visitor.visit(elem, level) diff --git a/src/xmlassert/formatting.py b/src/xmlassert/formatting.py new file mode 100644 index 0000000..20051e0 --- /dev/null +++ b/src/xmlassert/formatting.py @@ -0,0 +1,83 @@ +from typing import Protocol +from xml.etree.ElementTree import ( + Element, # For type hints only + canonicalize, +) + +from defusedxml.ElementTree import fromstring, tostring + + +__all__ = [ + 'pretty_format_xml', +] + + +def pretty_format_xml(xml_str: str) -> str: + """Securely format XML with clean, consistent indentation""" + try: + root = fromstring(xml_str) + _indent(root) + return str(tostring(root, encoding='unicode')) + except Exception: + return canonicalize(xml_str, strip_text=False) + + +class ElementVisitor(Protocol): + def visit(self, elem: Element, level: int) -> None: ... + + +class IndentVisitorFactory: + """Factory that provides appropriate visitors based on element type""" + + @staticmethod + def get_pre_visitor(elem: Element) -> ElementVisitor: + if len(elem): + return ParentElementVisitor() + return LeafElementVisitor() + + @staticmethod + def get_post_visitor(elem: Element) -> ElementVisitor: + return PostProcessingVisitor() + + +class ParentElementVisitor: + def visit(self, elem: Element, level: int) -> None: + indent_str = ' ' * level + if not elem.text or not elem.text.strip(): + elem.text = '\n' + indent_str + ' ' + if not elem.tail or not elem.tail.strip(): + elem.tail = '\n' + indent_str + + +class LeafElementVisitor: + def visit(self, elem: Element, level: int) -> None: + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = '\n' + ' ' * level + + +class PostProcessingVisitor: + def visit(self, elem: Element, level: int) -> None: + if len(elem) and (not elem.tail or not elem.tail.strip()): + elem.tail = '\n' + ' ' * level + + +def _indent(elem: Element, level: int = 0) -> None: + """Recursively indent XML elements using visitor factory""" + factory = IndentVisitorFactory() + _traverse_with_factory(elem, factory, level) + + +def _traverse_with_factory( + elem: Element, factory: IndentVisitorFactory, level: int = 0 +) -> None: + # Pre-visit with appropriate visitor + pre_visitor = factory.get_pre_visitor(elem) + pre_visitor.visit(elem, level) + + # Process children + for child in elem: + _traverse_with_factory(child, factory, level + 1) + + # Post-visit + post_visitor = factory.get_post_visitor(elem) + post_visitor.visit(elem, level) From c40b1f561a1368e49ff76365d5662a87e113bacb Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 13:43:21 +0700 Subject: [PATCH 09/29] TYP: test function --- tests/test_assert_xml_equal.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_assert_xml_equal.py b/tests/test_assert_xml_equal.py index 45539e7..aa57e15 100644 --- a/tests/test_assert_xml_equal.py +++ b/tests/test_assert_xml_equal.py @@ -1,3 +1,4 @@ +from typing import Any from xml.etree.ElementTree import ParseError import pytest @@ -198,7 +199,7 @@ def test_InvalidXml_RaisesParseError(invalid_xml: str) -> None: {}, ], ) -def test_NonStringInput_RaisesError(invalid_input) -> None: +def test_NonStringInput_RaisesError(invalid_input: Any) -> None: valid_xml = '' with pytest.raises((TypeError, ValueError)): From 66c1a724dc11c00f7d77449775b917c714c34cec Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 13:45:36 +0700 Subject: [PATCH 10/29] Add types-defusedxml as a dev dependency for mypy --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 93da482..68828a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ dev = [ 'types-setuptools', 'build>=0.10.0', 'twine>=4.0.0', + 'types-defusedxml', ] docs = [ 'sphinx>=7.0.0', From fb636c31cf2840f98eff5381f7c851c45fac345c Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 13:50:29 +0700 Subject: [PATCH 11/29] FIX: exclude lines in coverage --- pyproject.toml | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 68828a7..b5e193e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -103,27 +103,28 @@ source = ['src/xmlassert'] branch = true parallel = true +[tool.coverage.paths] +source = ['src/xmlassert', '*/site-packages'] + [tool.coverage.report] show_missing = true skip_covered = true fail_under = 90 - -[tool.coverage.paths] -source = ['src/xmlassert', '*/site-packages'] - -[tool.coverage.exclude] -lines = [ - '# pragma: no cover', - 'if __name__ == .__main__.:', +exclude_lines = [ + 'pragma: no cover', + 'def __repr__', + 'if self\.debug', + 'raise AssertionError', 'raise NotImplementedError', - 'return NotImplemented', - '@abc.abstractmethod', - 'class.*Protocol', - 'class.*ABC', - 'pass', + 'if 0:', + 'if __name__ == .__main__.:', 'if TYPE_CHECKING:', - 'from __future__ import', + 'class .*\bProtocol\):', + 'abc.abstractmethod', + 'abstractmethod', + 'pass', ] +ignore_errors = true [tool.pytest.ini_options] testpaths = ['tests'] From 065bdf20bb26cb2d7c10f337e1beb86c159373e0 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 13:54:54 +0700 Subject: [PATCH 12/29] REF: extract indenting module --- src/xmlassert/formatting.py | 69 ++--------------------------------- src/xmlassert/indenting.py | 71 +++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 66 deletions(-) create mode 100644 src/xmlassert/indenting.py diff --git a/src/xmlassert/formatting.py b/src/xmlassert/formatting.py index 20051e0..98924fe 100644 --- a/src/xmlassert/formatting.py +++ b/src/xmlassert/formatting.py @@ -1,11 +1,9 @@ from typing import Protocol -from xml.etree.ElementTree import ( - Element, # For type hints only - canonicalize, -) +from xml.etree.ElementTree import canonicalize from defusedxml.ElementTree import fromstring, tostring +from .indenting import indent __all__ = [ 'pretty_format_xml', @@ -16,68 +14,7 @@ def pretty_format_xml(xml_str: str) -> str: """Securely format XML with clean, consistent indentation""" try: root = fromstring(xml_str) - _indent(root) + indent(root) return str(tostring(root, encoding='unicode')) except Exception: return canonicalize(xml_str, strip_text=False) - - -class ElementVisitor(Protocol): - def visit(self, elem: Element, level: int) -> None: ... - - -class IndentVisitorFactory: - """Factory that provides appropriate visitors based on element type""" - - @staticmethod - def get_pre_visitor(elem: Element) -> ElementVisitor: - if len(elem): - return ParentElementVisitor() - return LeafElementVisitor() - - @staticmethod - def get_post_visitor(elem: Element) -> ElementVisitor: - return PostProcessingVisitor() - - -class ParentElementVisitor: - def visit(self, elem: Element, level: int) -> None: - indent_str = ' ' * level - if not elem.text or not elem.text.strip(): - elem.text = '\n' + indent_str + ' ' - if not elem.tail or not elem.tail.strip(): - elem.tail = '\n' + indent_str - - -class LeafElementVisitor: - def visit(self, elem: Element, level: int) -> None: - if level and (not elem.tail or not elem.tail.strip()): - elem.tail = '\n' + ' ' * level - - -class PostProcessingVisitor: - def visit(self, elem: Element, level: int) -> None: - if len(elem) and (not elem.tail or not elem.tail.strip()): - elem.tail = '\n' + ' ' * level - - -def _indent(elem: Element, level: int = 0) -> None: - """Recursively indent XML elements using visitor factory""" - factory = IndentVisitorFactory() - _traverse_with_factory(elem, factory, level) - - -def _traverse_with_factory( - elem: Element, factory: IndentVisitorFactory, level: int = 0 -) -> None: - # Pre-visit with appropriate visitor - pre_visitor = factory.get_pre_visitor(elem) - pre_visitor.visit(elem, level) - - # Process children - for child in elem: - _traverse_with_factory(child, factory, level + 1) - - # Post-visit - post_visitor = factory.get_post_visitor(elem) - post_visitor.visit(elem, level) diff --git a/src/xmlassert/indenting.py b/src/xmlassert/indenting.py new file mode 100644 index 0000000..55a5c9c --- /dev/null +++ b/src/xmlassert/indenting.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Protocol + +if TYPE_CHECKING: + from xml.etree.ElementTree import Element + +__all__ = [ + 'indent', +] + + +def indent(elem: Element, level: int = 0) -> None: + """Recursively indent XML elements using visitor factory""" + factory = IndentVisitorFactory() + _traverse_with_factory(elem, factory, level) + + +class ElementVisitor(Protocol): + def visit(self, elem: Element, level: int) -> None: ... + + +class IndentVisitorFactory: + """Factory that provides appropriate visitors based on element type""" + + @staticmethod + def get_pre_visitor(elem: Element) -> ElementVisitor: + if len(elem): + return ParentElementVisitor() + return LeafElementVisitor() + + @staticmethod + def get_post_visitor(elem: Element) -> ElementVisitor: + return PostProcessingVisitor() + + +class ParentElementVisitor: + def visit(self, elem: Element, level: int) -> None: + indent_str = ' ' * level + if not elem.text or not elem.text.strip(): + elem.text = '\n' + indent_str + ' ' + if not elem.tail or not elem.tail.strip(): + elem.tail = '\n' + indent_str + + +class LeafElementVisitor: + def visit(self, elem: Element, level: int) -> None: + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = '\n' + ' ' * level + + +class PostProcessingVisitor: + def visit(self, elem: Element, level: int) -> None: + if len(elem) and (not elem.tail or not elem.tail.strip()): + elem.tail = '\n' + ' ' * level + + +def _traverse_with_factory( + elem: Element, factory: IndentVisitorFactory, level: int = 0 +) -> None: + # Pre-visit with appropriate visitor + pre_visitor = factory.get_pre_visitor(elem) + pre_visitor.visit(elem, level) + + # Process children + for child in elem: + _traverse_with_factory(child, factory, level + 1) + + # Post-visit + post_visitor = factory.get_post_visitor(elem) + post_visitor.visit(elem, level) From 857d0069207927cefa12401d1952b456d998493a Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 13:56:39 +0700 Subject: [PATCH 13/29] REF: move _canonical to formatting.py --- src/xmlassert/equal.py | 15 ++------------- src/xmlassert/formatting.py | 11 ++++++++++- src/xmlassert/indenting.py | 1 + 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/xmlassert/equal.py b/src/xmlassert/equal.py index 582b5a5..df6616c 100644 --- a/src/xmlassert/equal.py +++ b/src/xmlassert/equal.py @@ -1,8 +1,5 @@ -from xml.etree.ElementTree import ( - canonicalize, -) - from .diffs import clean_diff +from .formatting import canonical __all__ = [ @@ -14,16 +11,8 @@ def assert_xml_equal(actual: str, expected: str) -> None: """ Securely compare XML strings with clean diff output. """ - if _canonical(actual) == _canonical(expected): + if canonical(actual) == canonical(expected): return diff_text = clean_diff(actual, expected) raise AssertionError(f'XML documents differ:\n{diff_text}') - - -def _canonical(content: str) -> str: - return canonicalize( - content, - strip_text=True, - with_comments=False, - ) diff --git a/src/xmlassert/formatting.py b/src/xmlassert/formatting.py index 98924fe..f4612a7 100644 --- a/src/xmlassert/formatting.py +++ b/src/xmlassert/formatting.py @@ -1,11 +1,12 @@ -from typing import Protocol from xml.etree.ElementTree import canonicalize from defusedxml.ElementTree import fromstring, tostring from .indenting import indent + __all__ = [ + 'canonical', 'pretty_format_xml', ] @@ -18,3 +19,11 @@ def pretty_format_xml(xml_str: str) -> str: return str(tostring(root, encoding='unicode')) except Exception: return canonicalize(xml_str, strip_text=False) + + +def canonical(content: str) -> str: + return canonicalize( + content, + strip_text=True, + with_comments=False, + ) diff --git a/src/xmlassert/indenting.py b/src/xmlassert/indenting.py index 55a5c9c..42411d6 100644 --- a/src/xmlassert/indenting.py +++ b/src/xmlassert/indenting.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Protocol + if TYPE_CHECKING: from xml.etree.ElementTree import Element From a7b8f69be4ed071c5e9ba4d539df06c5b9c0fdd9 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 14:14:37 +0700 Subject: [PATCH 14/29] Export clean_diff explicitly --- src/xmlassert/diffs.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/xmlassert/diffs.py b/src/xmlassert/diffs.py index f580fee..d5de66d 100644 --- a/src/xmlassert/diffs.py +++ b/src/xmlassert/diffs.py @@ -2,6 +2,10 @@ from .formatting import pretty_format_xml +__all__ = [ + 'clean_diff', +] + def clean_diff(actual: str, expected: str) -> str: actual_pretty = pretty_format_xml(actual) From d7abb565972e50de228e9ba4f6fec7b12ffe1eef Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 14:14:49 +0700 Subject: [PATCH 15/29] Split parameters one per line --- src/xmlassert/indenting.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/xmlassert/indenting.py b/src/xmlassert/indenting.py index 42411d6..22a3f68 100644 --- a/src/xmlassert/indenting.py +++ b/src/xmlassert/indenting.py @@ -57,7 +57,9 @@ def visit(self, elem: Element, level: int) -> None: def _traverse_with_factory( - elem: Element, factory: IndentVisitorFactory, level: int = 0 + elem: Element, + factory: IndentVisitorFactory, + level: int = 0, ) -> None: # Pre-visit with appropriate visitor pre_visitor = factory.get_pre_visitor(elem) From b8061f438ed968199d913b61c57d20ddcffcfff4 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 14:15:06 +0700 Subject: [PATCH 16/29] Set minimal requirement on defusedxml>=0.6.0 --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b5e193e..01571fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,8 +27,7 @@ classifiers = [ 'Topic :: Text Processing :: Markup :: XML', ] dependencies = [ - 'defusedxml~=0.7.1', - 'lxml~=4.9.0', + 'defusedxml>=0.6.0', ] [project.urls] From 3fb03315f2bd2ff453e14ef5c9d1660221a26010 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 14:29:01 +0700 Subject: [PATCH 17/29] REF: simplify indent --- src/xmlassert/indenting.py | 76 +++++++++++--------------------------- 1 file changed, 21 insertions(+), 55 deletions(-) diff --git a/src/xmlassert/indenting.py b/src/xmlassert/indenting.py index 22a3f68..4f7a0d4 100644 --- a/src/xmlassert/indenting.py +++ b/src/xmlassert/indenting.py @@ -12,63 +12,29 @@ def indent(elem: Element, level: int = 0) -> None: - """Recursively indent XML elements using visitor factory""" - factory = IndentVisitorFactory() - _traverse_with_factory(elem, factory, level) - - -class ElementVisitor(Protocol): - def visit(self, elem: Element, level: int) -> None: ... - - -class IndentVisitorFactory: - """Factory that provides appropriate visitors based on element type""" - - @staticmethod - def get_pre_visitor(elem: Element) -> ElementVisitor: - if len(elem): - return ParentElementVisitor() - return LeafElementVisitor() - - @staticmethod - def get_post_visitor(elem: Element) -> ElementVisitor: - return PostProcessingVisitor() - - -class ParentElementVisitor: - def visit(self, elem: Element, level: int) -> None: - indent_str = ' ' * level + """ + Recursively indent XML elements with consistent spacing. + Based on the standard ElementTree indentation approach. + """ + # Set indentation for current element + indent_str = '\n' + ' ' * level + + if len(elem): + # If element has children if not elem.text or not elem.text.strip(): - elem.text = '\n' + indent_str + ' ' + elem.text = indent_str + ' ' if not elem.tail or not elem.tail.strip(): - elem.tail = '\n' + indent_str + elem.tail = indent_str + # Process children + for child in elem: + indent(child, level + 1) -class LeafElementVisitor: - def visit(self, elem: Element, level: int) -> None: - if level and (not elem.tail or not elem.tail.strip()): - elem.tail = '\n' + ' ' * level - - -class PostProcessingVisitor: - def visit(self, elem: Element, level: int) -> None: - if len(elem) and (not elem.tail or not elem.tail.strip()): - elem.tail = '\n' + ' ' * level - + # Set tail for the last child + if not elem[-1].tail or not elem[-1].tail.strip(): + elem[-1].tail = indent_str -def _traverse_with_factory( - elem: Element, - factory: IndentVisitorFactory, - level: int = 0, -) -> None: - # Pre-visit with appropriate visitor - pre_visitor = factory.get_pre_visitor(elem) - pre_visitor.visit(elem, level) - - # Process children - for child in elem: - _traverse_with_factory(child, factory, level + 1) - - # Post-visit - post_visitor = factory.get_post_visitor(elem) - post_visitor.visit(elem, level) + else: + # Leaf element + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = indent_str From ab6573883a1d12487acfb4d11f0aeae770bc5f54 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 14:29:15 +0700 Subject: [PATCH 18/29] TST: make tests on assertion error more strict --- src/xmlassert/diffs.py | 1 + src/xmlassert/indenting.py | 2 +- tests/test_assert_xml_equal.py | 46 +++++++++++++++++++++++++++++++--- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/xmlassert/diffs.py b/src/xmlassert/diffs.py index d5de66d..8ccfb80 100644 --- a/src/xmlassert/diffs.py +++ b/src/xmlassert/diffs.py @@ -2,6 +2,7 @@ from .formatting import pretty_format_xml + __all__ = [ 'clean_diff', ] diff --git a/src/xmlassert/indenting.py b/src/xmlassert/indenting.py index 4f7a0d4..0af1282 100644 --- a/src/xmlassert/indenting.py +++ b/src/xmlassert/indenting.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Protocol +from typing import TYPE_CHECKING if TYPE_CHECKING: diff --git a/tests/test_assert_xml_equal.py b/tests/test_assert_xml_equal.py index aa57e15..9a98554 100644 --- a/tests/test_assert_xml_equal.py +++ b/tests/test_assert_xml_equal.py @@ -1,3 +1,4 @@ +from textwrap import dedent from typing import Any from xml.etree.ElementTree import ParseError @@ -153,10 +154,47 @@ def test_DifferentXml_ErrorMessageContainsReadableDiff() -> None: assert_xml_equal(actual, expected) error_message = str(exc_info.value) - assert 'XML documents differ' in error_message - assert 'expected' in error_message - assert 'actual' in error_message - assert '---' in error_message and '+++' in error_message + expected = dedent( + """\ + XML documents differ: + --- expected + +++ actual + @@ -1,3 +1,3 @@ + + - expected + + actual + + """ + ).strip() + assert error_message == expected + + +def test_DifferentXmlNested_ErrorMessageContainsReadableDiff() -> None: + actual = ( + ' one\r\n actual' + ) + expected = 'expected' + + with pytest.raises(AssertionError) as exc_info: + assert_xml_equal(actual, expected) + + error_message = str(exc_info.value) + expected = dedent( + """\ + XML documents differ: + --- expected + +++ actual + @@ -1,3 +1,6 @@ + + - expected + + one + + + + actual + + + + """ + ).strip() + assert error_message == expected def test_XmlWithSpecialCharacters_HandledCorrectly() -> None: From 523b76bf08dd198107f68ee7f0796ce90470dff6 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 14:45:16 +0700 Subject: [PATCH 19/29] Add copyright statement --- Makefile | 3 +++ src/xmlassert/__init__.py | 3 +++ src/xmlassert/diffs.py | 3 +++ src/xmlassert/equal.py | 3 +++ src/xmlassert/formatting.py | 3 +++ src/xmlassert/indenting.py | 3 +++ tests/test_assert_xml_equal.py | 3 +++ 7 files changed, 21 insertions(+) diff --git a/Makefile b/Makefile index aadec64..8adb835 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,6 @@ +# Copyright (c) 2025 Maxim Ivanov +# SPDX-License-Identifier: MIT + .PHONY: help tests lint format typecheck build publish clean install-dev # Default target diff --git a/src/xmlassert/__init__.py b/src/xmlassert/__init__.py index 892f522..03caa94 100644 --- a/src/xmlassert/__init__.py +++ b/src/xmlassert/__init__.py @@ -1,3 +1,6 @@ +# Copyright (c) 2025 Maxim Ivanov +# SPDX-License-Identifier: MIT + from .equal import assert_xml_equal diff --git a/src/xmlassert/diffs.py b/src/xmlassert/diffs.py index 8ccfb80..f03bdef 100644 --- a/src/xmlassert/diffs.py +++ b/src/xmlassert/diffs.py @@ -1,3 +1,6 @@ +# Copyright (c) 2025 Maxim Ivanov +# SPDX-License-Identifier: MIT + import difflib from .formatting import pretty_format_xml diff --git a/src/xmlassert/equal.py b/src/xmlassert/equal.py index df6616c..0073088 100644 --- a/src/xmlassert/equal.py +++ b/src/xmlassert/equal.py @@ -1,3 +1,6 @@ +# Copyright (c) 2025 Maxim Ivanov +# SPDX-License-Identifier: MIT + from .diffs import clean_diff from .formatting import canonical diff --git a/src/xmlassert/formatting.py b/src/xmlassert/formatting.py index f4612a7..1fa5452 100644 --- a/src/xmlassert/formatting.py +++ b/src/xmlassert/formatting.py @@ -1,3 +1,6 @@ +# Copyright (c) 2025 Maxim Ivanov +# SPDX-License-Identifier: MIT + from xml.etree.ElementTree import canonicalize from defusedxml.ElementTree import fromstring, tostring diff --git a/src/xmlassert/indenting.py b/src/xmlassert/indenting.py index 0af1282..b2e5667 100644 --- a/src/xmlassert/indenting.py +++ b/src/xmlassert/indenting.py @@ -1,3 +1,6 @@ +# Copyright (c) 2025 Maxim Ivanov +# SPDX-License-Identifier: MIT + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/tests/test_assert_xml_equal.py b/tests/test_assert_xml_equal.py index 9a98554..af68e6b 100644 --- a/tests/test_assert_xml_equal.py +++ b/tests/test_assert_xml_equal.py @@ -1,3 +1,6 @@ +# Copyright (c) 2025 Maxim Ivanov +# SPDX-License-Identifier: MIT + from textwrap import dedent from typing import Any from xml.etree.ElementTree import ParseError From b07d2f7823de9c270092a28d7db281c076032457 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 14:46:15 +0700 Subject: [PATCH 20/29] Add SPDX identifier in LICENSE --- LICENSE | 2 ++ 1 file changed, 2 insertions(+) diff --git a/LICENSE b/LICENSE index d568108..7906bf7 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,7 @@ MIT License +SPDX-License-Identifier: MIT + Copyright (c) 2025 Maxim Ivanov Permission is hereby granted, free of charge, to any person obtaining a copy From f97c547278416924249da47a509cc6cec291922d Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 14:56:27 +0700 Subject: [PATCH 21/29] TYP: add empty py.typed --- src/xmlassert/py.typed | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/xmlassert/py.typed diff --git a/src/xmlassert/py.typed b/src/xmlassert/py.typed new file mode 100644 index 0000000..e69de29 From 61ba3ff0573cfbacee8098dc8fe1bf290db356be Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 15:16:20 +0700 Subject: [PATCH 22/29] Add CHANGELOG --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..431180c --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,8 @@ +# CHANGELOG + +## v0.1.0 (2025-08-25) + +**Initial Release** + +- First release of `xmlassert` +- Includes `assert_xml_equal` function From 78dc35b44d3b2a492e67ea8320438931537314d8 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 15:17:19 +0700 Subject: [PATCH 23/29] Make version 0.1.0rc1 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 01571fa..fb9ecd2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = 'setuptools.build_meta' [project] name = 'xmlassert' -version = '0.1.0' +version = '0.1.0rc1' description = 'Human-readable XML comparison for testing with clean diff output' readme = 'README.md' requires-python = '>=3.8' From b40fcbc9d1d7ab321fb40f1cdbc0eeb1ef936d3f Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 15:43:13 +0700 Subject: [PATCH 24/29] Add github actions --- .github/workflows/release.yml | 183 ++++++++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..a65ef09 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,183 @@ +name: Release Pipeline + +on: + push: + tags: + - 'v*' # trigger on version tags + pull_request: + branches: [ master ] + +# Allow only one concurrent deployment to avoid conflicts +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-flight: true + +jobs: + test: + name: Test on Python ${{ matrix.python-version }} + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e .[dev] + + - name: Run tests with pytest + run: | + pytest -v --cov=xmlassert --cov-report=xml + + build: + name: Build package + runs-on: ubuntu-latest + needs: test # Only build if tests pass + if: startsWith(github.ref, 'refs/tags/v') + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.8' + + - name: Install build tools + run: | + python -m pip install --upgrade pip + pip install build twine + + - name: Build package + run: python -m build + + - name: Verify package + run: twine check dist/* + + - name: Inspect built packages (optional) + run: | + echo "Built packages:" + ls -la dist/ + echo "Wheel compatibility:" + for wheel in dist/*.whl; do + echo "$wheel:" + unzip -l "$wheel" | grep -E "(.py$|METADATA)" | head -5 + done + + - name: Upload build artifacts (tags only) + if: startsWith(github.ref, 'refs/tags/v') + uses: actions/upload-artifact@v3 + with: + name: distribution-packages + path: dist/ + + test-pypi: + name: Publish to TestPyPI + runs-on: ubuntu-latest + needs: build + if: | + startsWith(github.ref, 'refs/tags/v') && + (contains(github.ref, 'alpha') || + contains(github.ref, 'beta') || + contains(github.ref, 'rc') || + github.ref == 'refs/tags/v0.0.0-test') # Custom condition for test releases + + steps: + - name: Download build artifacts + uses: actions/download-artifact@v3 + with: + name: distribution-packages + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.8' + + - name: Install twine + run: pip install twine + + - name: Publish to TestPyPI + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }} + run: twine upload --repository-url https://test.pypi.org/legacy/ dist/* + + - name: Verify TestPyPI installation + run: | + python -m pip install --index-url https://test.pypi.org/simple/ \ + --extra-index-url https://pypi.org/simple/ \ + xmlassert==${{ github.ref_name#v }} + + pypi: + name: Publish to PyPI + runs-on: ubuntu-latest + needs: build + if: | + startsWith(github.ref, 'refs/tags/v') && + !contains(github.ref, 'alpha') && + !contains(github.ref, 'beta') && + !contains(github.ref, 'rc') && + github.ref != 'refs/tags/v0.0.0-test' + + steps: + - name: Download build artifacts + uses: actions/download-artifact@v3 + with: + name: distribution-packages + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install twine + run: pip install twine + + - name: Publish to PyPI + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} + run: twine upload dist/* + + - name: Verify PyPI installation + run: | + # Wait a moment for PyPI to update + sleep 30 + pip install xmlassert==${{ github.ref_name#v }} + + github-release: + name: Create GitHub Release + runs-on: ubuntu-latest + needs: [test-pypi, pypi] + if: always() && startsWith(github.ref, 'refs/tags/v') + + steps: + - name: Download build artifacts + uses: actions/download-artifact@v3 + with: + name: distribution-packages + + - name: Create GitHub Release + uses: softprops/action-gh-release@v1 + with: + files: | + dist/*.whl + dist/*.tar.gz + generate_release_notes: true + body: | + Automated release for ${{ github.ref_name }} + + See CHANGELOG.md for details. + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 3f9a358e1a0d6b8297c1668fedab0d92a59e52b7 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 15:48:58 +0700 Subject: [PATCH 25/29] Validate package version and git tag version --- .github/workflows/release.yml | 53 +++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a65ef09..77cf6c6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -13,6 +13,55 @@ concurrency: cancel-in-flight: true jobs: + validate-version: + name: Validate tag version + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags/v') + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Extract package version + id: package-version + run: | + # Extract version from pyproject.toml + PACKAGE_VERSION=$(python -c " + import tomli + with open('pyproject.toml', 'rb') as f: + data = tomli.load(f) + print(data['project']['version']) + ") + echo "PACKAGE_VERSION=$PACKAGE_VERSION" >> $GITHUB_OUTPUT + + - name: Extract tag version + id: tag-version + run: | + TAG_VERSION="${GITHUB_REF#refs/tags/v}" + echo "TAG_VERSION=$TAG_VERSION" >> $GITHUB_OUTPUT + + - name: Validate versions match + run: | + PACKAGE_VERSION="${{ steps.package-version.outputs.PACKAGE_VERSION }}" + TAG_VERSION="${{ steps.tag-version.outputs.TAG_VERSION }}" + + if [ "$PACKAGE_VERSION" != "$TAG_VERSION" ]; then + echo "? CRITICAL: VERSION MISMATCH DETECTED!" + echo "::error::Git tag version does not match package version!" + echo "::error::Tag version: v$TAG_VERSION" + echo "::error::Package version: $PACKAGE_VERSION" + echo "::error::" + echo "::error::To fix this:" + echo "::error::1. Delete the wrong tag: git tag -d v$TAG_VERSION" + echo "::error::2. Delete remote tag: git push --delete origin v$TAG_VERSION" + echo "::error::3. Update pyproject.toml with correct version" + echo "::error::4. Create correct tag: git tag -a v$PACKAGE_VERSION -m 'Release v$PACKAGE_VERSION'" + echo "::error::5. Push correct tag: git push origin v$PACKAGE_VERSION" + exit 1 + else + echo "? Versions match: v$TAG_VERSION" + fi + test: name: Test on Python ${{ matrix.python-version }} runs-on: ubuntu-latest @@ -85,7 +134,7 @@ jobs: test-pypi: name: Publish to TestPyPI runs-on: ubuntu-latest - needs: build + needs: validate-version build if: | startsWith(github.ref, 'refs/tags/v') && (contains(github.ref, 'alpha') || @@ -122,7 +171,7 @@ jobs: pypi: name: Publish to PyPI runs-on: ubuntu-latest - needs: build + needs: validate-version build if: | startsWith(github.ref, 'refs/tags/v') && !contains(github.ref, 'alpha') && From e2ccb4774bb6e4a7080577898c1a010329563eaf Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 16:03:30 +0700 Subject: [PATCH 26/29] Fix version extraction from git tag name --- .github/workflows/release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 77cf6c6..9bc4f09 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -166,7 +166,7 @@ jobs: run: | python -m pip install --index-url https://test.pypi.org/simple/ \ --extra-index-url https://pypi.org/simple/ \ - xmlassert==${{ github.ref_name#v }} + xmlassert==${{ fromJSON('{"v": "' + github.ref_name + '"}').v.replace('v', '') }} pypi: name: Publish to PyPI @@ -203,7 +203,7 @@ jobs: run: | # Wait a moment for PyPI to update sleep 30 - pip install xmlassert==${{ github.ref_name#v }} + pip install xmlassert==${{ fromJSON('{"v": "' + github.ref_name + '"}').v.replace('v', '') }} github-release: name: Create GitHub Release From 7f156b3b6fdd3320cf178c1797fe1065f0e898c4 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 16:08:41 +0700 Subject: [PATCH 27/29] FIX: git tag version parsing maybe --- .github/workflows/release.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9bc4f09..76d8f27 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,6 +17,8 @@ jobs: name: Validate tag version runs-on: ubuntu-latest if: startsWith(github.ref, 'refs/tags/v') + outputs: + tag_version: ${{ steps.tag-version.outputs.TAG_VERSION }} steps: - name: Checkout code @@ -134,7 +136,7 @@ jobs: test-pypi: name: Publish to TestPyPI runs-on: ubuntu-latest - needs: validate-version build + needs: [validate-version, build] if: | startsWith(github.ref, 'refs/tags/v') && (contains(github.ref, 'alpha') || @@ -166,12 +168,12 @@ jobs: run: | python -m pip install --index-url https://test.pypi.org/simple/ \ --extra-index-url https://pypi.org/simple/ \ - xmlassert==${{ fromJSON('{"v": "' + github.ref_name + '"}').v.replace('v', '') }} + xmlassert==${{ needs.validate-version.outputs.tag_version }} pypi: name: Publish to PyPI runs-on: ubuntu-latest - needs: validate-version build + needs: [validate-version, build] if: | startsWith(github.ref, 'refs/tags/v') && !contains(github.ref, 'alpha') && @@ -203,7 +205,8 @@ jobs: run: | # Wait a moment for PyPI to update sleep 30 - pip install xmlassert==${{ fromJSON('{"v": "' + github.ref_name + '"}').v.replace('v', '') }} + pip install \ + xmlassert==${{ needs.validate-version.outputs.tag_version }} github-release: name: Create GitHub Release From ae0caed5fe547248bae77c7d3d8e93813d433765 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 16:10:09 +0700 Subject: [PATCH 28/29] cancel-in-flight -> cancel-in-progress --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 76d8f27..bfc0c38 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -10,7 +10,7 @@ on: # Allow only one concurrent deployment to avoid conflicts concurrency: group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-flight: true + cancel-in-progress: true jobs: validate-version: From 759d03ba4ba39b703943732cc2088118f3d23f85 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Mon, 25 Aug 2025 16:11:57 +0700 Subject: [PATCH 29/29] FIX: drop comment --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index bfc0c38..b67884c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -142,7 +142,7 @@ jobs: (contains(github.ref, 'alpha') || contains(github.ref, 'beta') || contains(github.ref, 'rc') || - github.ref == 'refs/tags/v0.0.0-test') # Custom condition for test releases + github.ref == 'refs/tags/v0.0.0-test') steps: - name: Download build artifacts