diff --git a/.github/workflows/release-rust-python-package.yaml b/.github/workflows/release-rust-python-package.yaml index e524b02..fa2885f 100644 --- a/.github/workflows/release-rust-python-package.yaml +++ b/.github/workflows/release-rust-python-package.yaml @@ -54,9 +54,6 @@ jobs: with: python-version: "3.12" - - name: Validate plugin catalog - run: python3 tools/plugin_catalog.py validate . - - id: resolve shell: bash env: @@ -79,12 +76,15 @@ jobs: tag_ref="${GITHUB_REF}" fi - if git merge-base --is-ancestor "${tag_ref}" "refs/remotes/origin/main"; then + tag_commit="$(git rev-list -n 1 "${tag_ref}")" + if git merge-base --is-ancestor "${tag_commit}" "refs/remotes/origin/main"; then tag_on_main=true else tag_on_main=false fi + git checkout --detach "${checkout_ref}" + python3 tools/plugin_catalog.py validate . release_info="$(python3 tools/plugin_catalog.py release-info . "${tag}")" plugin="$(printf '%s' "${release_info}" | python3 -c 'import json, sys; print(json.load(sys.stdin)["slug"])')" plugin_path="$(printf '%s' "${release_info}" | python3 -c 'import json, sys; print(json.load(sys.stdin)["path"])')" @@ -255,7 +255,7 @@ jobs: fi publish: - if: ${{ (github.event_name != 'workflow_call' || inputs.publish_enabled) && (needs.resolve.outputs.publish_env != 'pypi' || needs.resolve.outputs.tag_on_main == 'true') }} + if: ${{ github.event_name != 'workflow_call' || inputs.publish_enabled }} needs: [resolve, build-wheel, build-sdist] runs-on: ubuntu-latest environment: ${{ needs.resolve.outputs.publish_env }} @@ -276,8 +276,14 @@ jobs: repository-url: https://test.pypi.org/legacy/ skip-existing: true + - name: Verify PyPI tag is on main + if: needs.resolve.outputs.publish_env == 'pypi' && needs.resolve.outputs.tag_on_main != 'true' + run: | + echo "Refusing to publish to PyPI because the release tag is not reachable from origin/main" >&2 + exit 1 + - name: Publish distributions to PyPI - if: needs.resolve.outputs.publish_env == 'pypi' + if: needs.resolve.outputs.publish_env == 'pypi' && needs.resolve.outputs.tag_on_main == 'true' uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e with: packages-dir: dist/ diff --git a/Cargo.lock b/Cargo.lock index ee01421..2c55cb1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1405,6 +1405,19 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +[[package]] +name = "regex_filter" +version = "0.1.0" +dependencies = [ + "cpex_framework_bridge", + "criterion", + "log", + "pyo3", + "pyo3-log", + "pyo3-stub-gen", + "regex", +] + [[package]] name = "retry_with_backoff" version = "0.1.1" diff --git a/Cargo.toml b/Cargo.toml index a7786a4..55a762d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,7 @@ members = [ "plugins/rust/python-package/encoded_exfil_detection", "plugins/rust/python-package/pii_filter", + "plugins/rust/python-package/regex_filter", "plugins/rust/python-package/rate_limiter", "plugins/rust/python-package/retry_with_backoff", "plugins/rust/python-package/url_reputation", diff --git a/README.md b/README.md index 5cd497e..7dffd0d 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,11 @@ Current plugins: - `rate_limiter` - `pii_filter` +- `encoded_exfil_detection` +- `regex_filter` +- `retry_with_backoff` +- `secrets_detection` +- `url_reputation` Each managed plugin must include: diff --git a/plugins/rust/python-package/regex_filter/Cargo.toml b/plugins/rust/python-package/regex_filter/Cargo.toml new file mode 100644 index 0000000..b72d5da --- /dev/null +++ b/plugins/rust/python-package/regex_filter/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "regex_filter" +version = "0.1.0" +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +description = "Rust-backed regex search and replace plugin for MCP Gateway" + +[lib] +name = "regex_filter_rust" +crate-type = ["cdylib", "rlib"] + +[[bin]] +name = "stub_gen" +path = "src/bin/stub_gen.rs" + +[dependencies] +cpex_framework_bridge = { workspace = true } +log = { workspace = true } +pyo3 = { workspace = true } +pyo3-log = { workspace = true } +pyo3-stub-gen = { workspace = true } +regex = { workspace = true } + +[dev-dependencies] +criterion = { workspace = true } + +[[bench]] +name = "regex_filter" +harness = false diff --git a/plugins/rust/python-package/regex_filter/Makefile b/plugins/rust/python-package/regex_filter/Makefile new file mode 100644 index 0000000..023d353 --- /dev/null +++ b/plugins/rust/python-package/regex_filter/Makefile @@ -0,0 +1,123 @@ +.PHONY: help +help: + @grep '^# help\:' $(firstword $(MAKEFILE_LIST)) | sed 's/^# help\: //' + +PACKAGE_NAME := cpex-regex-filter +WHEEL_PREFIX := cpex_regex_filter +CARGO := cargo +STUB_FILES := cpex_regex_filter/__init__.pyi cpex_regex_filter/regex_filter_rust/__init__.pyi +WHEEL_DIR := ../../../../target/wheels + +GREEN := \033[0;32m +YELLOW := \033[0;33m +NC := \033[0m + +# help: fmt - Format Rust code with rustfmt +# help: fmt-check - Check Rust code formatting (CI) +# help: clippy - Run clippy lints +.PHONY: fmt fmt-check clippy + +fmt: + $(CARGO) fmt + +fmt-check: + $(CARGO) fmt -- --check + +clippy: + $(CARGO) clippy -- -D warnings + +# help: sync - Install plugin development dependencies +# help: test - Run Rust unit tests +# help: test-verbose - Run Rust tests with verbose output +# help: test-python - Run Python plugin tests +# help: test-all - Run both Rust and Python tests +.PHONY: sync test test-verbose test-python test-all verify-stubs + +sync: + uv sync --dev + +test: + @echo "$(GREEN)Running regex_filter Rust tests...$(NC)" + $(CARGO) test + +test-verbose: + @echo "$(GREEN)Running regex_filter Rust tests (verbose)...$(NC)" + $(CARGO) test -- --nocapture + +test-python: + @echo "$(GREEN)Running Python tests...$(NC)" + uv run pytest tests/ -v + +test-all: test test-python + +verify-stubs: stub-gen + @git diff --exit-code -- $(STUB_FILES) + +# help: stub-gen - Generate Python type stubs (.pyi files) +# help: build - Build release wheel (no install) +# help: install - Build and install editable extension into project venv +# help: install-wheel - Install the previously built wheel into project venv +.PHONY: stub-gen build install install-wheel uninstall + +stub-gen: + @echo "$(GREEN)Generating Python type stubs...$(NC)" + $(CARGO) run --bin stub_gen + @echo "$(GREEN)Stubs generated$(NC)" + +build: stub-gen + @echo "$(GREEN)Building $(PACKAGE_NAME)...$(NC)" + uv run maturin build --release + @echo "$(GREEN)Build complete$(NC)" + +install: stub-gen + @echo "$(GREEN)Installing $(PACKAGE_NAME)...$(NC)" + uv run maturin develop --release + @echo "$(GREEN)Installation complete$(NC)" + +install-wheel: build + @echo "$(GREEN)Installing built wheel for $(PACKAGE_NAME)...$(NC)" + python3 ../../../../tools/install_built_wheel.py --wheel-dir "$(WHEEL_DIR)" --wheel-prefix "$(WHEEL_PREFIX)" --package-name "$(PACKAGE_NAME)" --venv-dir .venv + @echo "$(GREEN)Wheel installation complete$(NC)" + +uninstall: + @echo "$(YELLOW)Uninstalling $(PACKAGE_NAME)...$(NC)" + @uv pip uninstall -y $(PACKAGE_NAME) 2>/dev/null || true + +# help: bench - Run Criterion benchmarks +# help: bench-no-run - Compile Criterion benchmarks without running them +.PHONY: bench bench-no-run + +bench: + @echo "$(GREEN)Running benchmarks...$(NC)" + $(CARGO) bench + +bench-no-run: + @echo "$(GREEN)Compiling benchmarks without running them...$(NC)" + $(CARGO) bench --no-run + +.PHONY: clean clean-all + +clean: + $(CARGO) clean + rm -rf target/ coverage/ + find . -name "*.whl" -delete + +clean-all: clean + +# help: verify - Verify plugin installation +# help: check-all - Run fmt-check + clippy + Rust tests +# help: ci - Run the full CI-equivalent plugin verification flow +.PHONY: verify check-all ci pre-commit + +verify: + @uv run python -c "from cpex_regex_filter import regex_filter_rust; print('regex_filter_rust available')" || echo "regex_filter_rust not installed — run: make install" + +check-all: fmt-check clippy test + @echo "$(GREEN)All checks passed$(NC)" + +ci: check-all verify-stubs build bench-no-run install-wheel test-python + @echo "$(GREEN)CI verification passed$(NC)" + +pre-commit: check-all + +.DEFAULT_GOAL := help diff --git a/plugins/rust/python-package/regex_filter/README.md b/plugins/rust/python-package/regex_filter/README.md new file mode 100644 index 0000000..911522a --- /dev/null +++ b/plugins/rust/python-package/regex_filter/README.md @@ -0,0 +1,52 @@ +# Regex Filter Plugin for CPEX + +Rust-backed regex search and replace for prompt arguments, prompt messages, tool arguments, and tool results. +Patterns use Rust `regex` syntax, which does not support look-around or backreferences. +Replacement strings use Rust `regex` expansion syntax (`$0`, `$1`, `$name`, `${name}`, and `$$` for a literal dollar). +Recursive filtering covers strings inside dicts and lists, plus Python tuples and sets; custom object attributes are left unchanged. + +This package follows the same layout as the other Rust+Python CPEX plugins in this repository: + +- Rust owns the matching and recursive data traversal +- Python keeps a minimal gateway-facing `Plugin` shim +- Tests cover both the Rust engine and the gateway hook surface + +## Configuration + +```yaml +plugins: + - name: regex_filter + kind: cpex_regex_filter.regex_filter.SearchReplacePlugin + hooks: + - prompt_pre_fetch + - prompt_post_fetch + - tool_pre_invoke + - tool_post_invoke + mode: enforce + config: + words: + - search: "\\bsecret\\b" + replace: "[REDACTED]" + - search: "\\d{3}-\\d{2}-\\d{4}" + replace: "XXX-XX-XXXX" + max_text_bytes: 10485760 + max_total_text_bytes: 10485760 + max_nested_depth: 64 + max_collection_items: 4096 + max_total_items: 65536 + max_patterns: 1024 + max_search_bytes: 1048576 + max_replace_bytes: 1048576 + max_output_bytes: 10485760 +``` + +## Development + +From this plugin directory: + +```bash +uv sync --dev +make install +make test-all +make check-all +``` diff --git a/plugins/rust/python-package/regex_filter/benches/regex_filter.rs b/plugins/rust/python-package/regex_filter/benches/regex_filter.rs new file mode 100644 index 0000000..3b05d30 --- /dev/null +++ b/plugins/rust/python-package/regex_filter/benches/regex_filter.rs @@ -0,0 +1,39 @@ +// Copyright 2026 +// SPDX-License-Identifier: Apache-2.0 + +use criterion::{Criterion, criterion_group, criterion_main}; +use regex::RegexSet; +use regex_filter_rust::{SearchReplace, SearchReplaceConfig, SearchReplacePluginRust}; + +fn bench_apply_patterns(c: &mut Criterion) { + let config = SearchReplaceConfig { + words: vec![ + SearchReplace { + search: r"\bsecret\b".to_string(), + replace: "[REDACTED]".to_string(), + compiled: regex::Regex::new(r"\bsecret\b").unwrap(), + }, + SearchReplace { + search: r"\d{3}-\d{2}-\d{4}".to_string(), + replace: "XXX-XX-XXXX".to_string(), + compiled: regex::Regex::new(r"\d{3}-\d{2}-\d{4}").unwrap(), + }, + ], + pattern_set: RegexSet::new([r"\bsecret\b", r"\d{3}-\d{2}-\d{4}"]).ok(), + max_text_bytes: 10 * 1024 * 1024, + max_total_text_bytes: 10 * 1024 * 1024, + max_nested_depth: 64, + max_collection_items: 4096, + max_total_items: 65_536, + max_output_bytes: 10 * 1024 * 1024, + }; + let plugin = SearchReplacePluginRust { config }; + let text = "The secret number is 123-45-6789"; + + c.bench_function("regex_filter_apply_patterns", |b| { + b.iter(|| plugin.apply_patterns(text).unwrap()) + }); +} + +criterion_group!(benches, bench_apply_patterns); +criterion_main!(benches); diff --git a/plugins/rust/python-package/regex_filter/cpex_regex_filter/__init__.py b/plugins/rust/python-package/regex_filter/cpex_regex_filter/__init__.py new file mode 100644 index 0000000..5026455 --- /dev/null +++ b/plugins/rust/python-package/regex_filter/cpex_regex_filter/__init__.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +"""Regex filter plugin package.""" + +from __future__ import annotations + + +def __getattr__(name: str): + if name == "SearchReplacePlugin": + from cpex_regex_filter.regex_filter import SearchReplacePlugin + + return SearchReplacePlugin + if name == "SearchReplacePluginRust": + from cpex_regex_filter.regex_filter_rust import SearchReplacePluginRust + + return SearchReplacePluginRust + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +__all__ = ["SearchReplacePlugin", "SearchReplacePluginRust"] diff --git a/plugins/rust/python-package/regex_filter/cpex_regex_filter/__init__.pyi b/plugins/rust/python-package/regex_filter/cpex_regex_filter/__init__.pyi new file mode 100644 index 0000000..1dafd4e --- /dev/null +++ b/plugins/rust/python-package/regex_filter/cpex_regex_filter/__init__.pyi @@ -0,0 +1,10 @@ +# This file is automatically generated by pyo3_stub_gen +# ruff: noqa: E501, F401, F403, F405 + +from .regex_filter import SearchReplacePlugin +from .regex_filter_rust import SearchReplacePluginRust + +__all__ = [ + "SearchReplacePlugin", + "SearchReplacePluginRust", +] diff --git a/plugins/rust/python-package/regex_filter/cpex_regex_filter/plugin-manifest.yaml b/plugins/rust/python-package/regex_filter/cpex_regex_filter/plugin-manifest.yaml new file mode 100644 index 0000000..5136335 --- /dev/null +++ b/plugins/rust/python-package/regex_filter/cpex_regex_filter/plugin-manifest.yaml @@ -0,0 +1,20 @@ +description: "Rust-backed regex search and replace for prompt arguments, prompt messages, tool inputs, and tool outputs" +author: "ContextForge Contributors" +kind: "cpex_regex_filter.regex_filter.SearchReplacePlugin" +version: "0.1.0" +available_hooks: + - "prompt_pre_fetch" + - "prompt_post_fetch" + - "tool_pre_invoke" + - "tool_post_invoke" +default_configs: + words: [] + max_text_bytes: 10485760 + max_total_text_bytes: 10485760 + max_nested_depth: 64 + max_collection_items: 4096 + max_total_items: 65536 + max_patterns: 1024 + max_search_bytes: 1048576 + max_replace_bytes: 1048576 + max_output_bytes: 10485760 diff --git a/plugins/rust/python-package/regex_filter/cpex_regex_filter/regex_filter.py b/plugins/rust/python-package/regex_filter/cpex_regex_filter/regex_filter.py new file mode 100644 index 0000000..4330f9b --- /dev/null +++ b/plugins/rust/python-package/regex_filter/cpex_regex_filter/regex_filter.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +"""Thin compatibility shim for the Rust-owned regex filter plugin.""" + +from __future__ import annotations + +try: + from mcpgateway.plugins.framework import Plugin +except ModuleNotFoundError: + class Plugin: # type: ignore[no-redef] + def __init__(self, config) -> None: + self._config = config + +from cpex_regex_filter.regex_filter_rust import RegexFilterPluginCore, SearchReplacePluginRust + +_RUST_AVAILABLE = True + + +class SearchReplacePlugin(Plugin): + """Gateway-facing Plugin subclass that delegates behavior to Rust.""" + + def __init__(self, config) -> None: + super().__init__(config) + self._core = RegexFilterPluginCore(config.config or {}) + + async def prompt_pre_fetch(self, payload, context): + result = self._core.prompt_pre_fetch(payload, context) + if hasattr(result, "__await__"): + return await result + return result + + async def prompt_post_fetch(self, payload, context): + result = self._core.prompt_post_fetch(payload, context) + if hasattr(result, "__await__"): + return await result + return result + + async def tool_pre_invoke(self, payload, context): + result = self._core.tool_pre_invoke(payload, context) + if hasattr(result, "__await__"): + return await result + return result + + async def tool_post_invoke(self, payload, context): + result = self._core.tool_post_invoke(payload, context) + if hasattr(result, "__await__"): + return await result + return result + + +__all__ = ["SearchReplacePlugin", "SearchReplacePluginRust", "_RUST_AVAILABLE"] diff --git a/plugins/rust/python-package/regex_filter/cpex_regex_filter/regex_filter_rust/__init__.pyi b/plugins/rust/python-package/regex_filter/cpex_regex_filter/regex_filter_rust/__init__.pyi new file mode 100644 index 0000000..e38ffb5 --- /dev/null +++ b/plugins/rust/python-package/regex_filter/cpex_regex_filter/regex_filter_rust/__init__.pyi @@ -0,0 +1,23 @@ +# This file is automatically generated by pyo3_stub_gen +# ruff: noqa: E501, F401, F403, F405 + +import builtins +import typing +__all__ = [ + "RegexFilterPluginCore", + "SearchReplacePluginRust", +] + +@typing.final +class RegexFilterPluginCore: + def __new__(cls, config: dict) -> RegexFilterPluginCore: ... + def prompt_pre_fetch(self, payload: typing.Any, _context: typing.Any) -> typing.Any: ... + def prompt_post_fetch(self, payload: typing.Any, _context: typing.Any) -> typing.Any: ... + def tool_pre_invoke(self, payload: typing.Any, _context: typing.Any) -> typing.Any: ... + def tool_post_invoke(self, payload: typing.Any, _context: typing.Any) -> typing.Any: ... + +@typing.final +class SearchReplacePluginRust: + def __new__(cls, config_dict: dict) -> SearchReplacePluginRust: ... + def apply_patterns(self, text: builtins.str) -> builtins.str: ... + def process_nested(self, data: typing.Any) -> tuple[builtins.bool, typing.Any]: ... diff --git a/plugins/rust/python-package/regex_filter/pyproject.toml b/plugins/rust/python-package/regex_filter/pyproject.toml new file mode 100644 index 0000000..043f324 --- /dev/null +++ b/plugins/rust/python-package/regex_filter/pyproject.toml @@ -0,0 +1,39 @@ +[build-system] +requires = ["maturin>=1.4,<2.0"] +build-backend = "maturin" + +[project] +name = "cpex-regex-filter" +dynamic = ["version"] +description = "Rust-backed regex search and replace plugin for MCP Gateway" +authors = [{ name = "ContextForge Contributors" }] +license = { text = "Apache-2.0" } +readme = "README.md" +requires-python = ">=3.11" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] + +[project.entry-points."cpex.plugins"] +regex_filter = "cpex_regex_filter.regex_filter:SearchReplacePlugin" + +[tool.maturin] +module-name = "cpex_regex_filter.regex_filter_rust" +python-source = "." +features = ["pyo3/extension-module"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +pythonpath = ["tests"] +asyncio_mode = "auto" + +[dependency-groups] +dev = [ + "maturin>=1.12.6", + "pytest>=8.0", + "pytest-asyncio>=0.23", +] diff --git a/plugins/rust/python-package/regex_filter/src/bin/stub_gen.rs b/plugins/rust/python-package/regex_filter/src/bin/stub_gen.rs new file mode 100644 index 0000000..d21ffd4 --- /dev/null +++ b/plugins/rust/python-package/regex_filter/src/bin/stub_gen.rs @@ -0,0 +1,98 @@ +// Copyright 2026 +// SPDX-License-Identifier: Apache-2.0 +// +// Stub file generator for regex_filter module + +use std::fs; +use std::path::Path; + +use regex_filter_rust::stub_info; + +const EXTENSION_STUB_PATH: &str = "cpex_regex_filter/regex_filter_rust/__init__.pyi"; +const ORPHAN_EXTENSION_STUB_PATH: &str = "python/regex_filter_rust/__init__.pyi"; +const GENERATED_ALL_MARKER: &str = "__all__ = [\n"; +const PLUGIN_CORE_CLASS_MARKER: &str = "class RegexFilterPluginCore:"; +const ENGINE_CLASS_MARKER: &str = "class SearchReplacePluginRust:"; +const CURATED_ALL_BLOCK: &str = + "__all__ = [\n \"RegexFilterPluginCore\",\n \"SearchReplacePluginRust\",\n]\n"; +const PLUGIN_CORE_CLASS_DEF: &str = "\n\n@typing.final\nclass RegexFilterPluginCore:\n def __new__(cls, config: dict) -> RegexFilterPluginCore: ...\n def prompt_pre_fetch(self, payload: typing.Any, context: typing.Any) -> typing.Any: ...\n def prompt_post_fetch(self, payload: typing.Any, context: typing.Any) -> typing.Any: ...\n def tool_pre_invoke(self, payload: typing.Any, context: typing.Any) -> typing.Any: ...\n def tool_post_invoke(self, payload: typing.Any, context: typing.Any) -> typing.Any: ...\n"; + +fn curate_extension_stub_content(content: &str) -> String { + let mut curated = content.to_string(); + if let Some(all_start) = curated.find(GENERATED_ALL_MARKER) + && let Some(relative_end) = curated[all_start..].find("]\n") + { + let all_end = all_start + relative_end + 2; + curated.replace_range(all_start..all_end, CURATED_ALL_BLOCK); + } + if !curated.contains(PLUGIN_CORE_CLASS_MARKER) { + curated.push_str(PLUGIN_CORE_CLASS_DEF); + } + + assert!( + curated.contains("\"RegexFilterPluginCore\""), + "curated extension stub is missing RegexFilterPluginCore in __all__", + ); + assert!( + curated.contains("\"SearchReplacePluginRust\""), + "curated extension stub is missing SearchReplacePluginRust in __all__", + ); + assert!( + curated.contains(PLUGIN_CORE_CLASS_MARKER), + "curated extension stub is missing RegexFilterPluginCore class definition", + ); + assert!( + curated.contains(ENGINE_CLASS_MARKER), + "curated extension stub is missing SearchReplacePluginRust class definition", + ); + + while curated.ends_with("\n\n") { + curated.pop(); + } + + curated +} + +fn curate_extension_stub() { + let stub_path = Path::new(EXTENSION_STUB_PATH); + let content = fs::read_to_string(stub_path).expect("Failed to read generated stub file"); + let curated = curate_extension_stub_content(&content); + fs::write(stub_path, curated).expect("Failed to write curated stub file"); +} + +fn remove_orphan_extension_stub() { + let orphan_stub_path = Path::new(ORPHAN_EXTENSION_STUB_PATH); + if orphan_stub_path.exists() { + fs::remove_file(orphan_stub_path).expect("Failed to remove orphan extension stub file"); + } +} + +fn curate_top_level_stub() { + let stub_path = Path::new("cpex_regex_filter/__init__.pyi"); + let content = "# This file is automatically generated by pyo3_stub_gen\n# ruff: noqa: E501, F401, F403, F405\n\nfrom .regex_filter import SearchReplacePlugin\nfrom .regex_filter_rust import SearchReplacePluginRust\n\n__all__ = [\n \"SearchReplacePlugin\",\n \"SearchReplacePluginRust\",\n]\n"; + fs::write(stub_path, content).expect("Failed to write curated top-level stub file"); +} + +fn main() { + let stub_info = stub_info().expect("Failed to get stub info"); + stub_info.generate().expect("Failed to generate stub file"); + curate_top_level_stub(); + curate_extension_stub(); + remove_orphan_extension_stub(); + println!("✓ Generated stub files successfully"); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_curate_extension_stub_adds_required_exports_and_class() { + let generated = "# This file is automatically generated by pyo3_stub_gen\n# ruff: noqa: E501, F401, F403, F405\n\nimport builtins\nimport typing\n__all__ = [\n \"SearchReplacePluginRust\",\n]\n\n@typing.final\nclass SearchReplacePluginRust:\n def __new__(cls, config: typing.Any) -> SearchReplacePluginRust: ...\n"; + let curated = curate_extension_stub_content(generated); + assert!(curated.contains("\"RegexFilterPluginCore\"")); + assert!(curated.contains("\"SearchReplacePluginRust\"")); + assert!(curated.contains(PLUGIN_CORE_CLASS_MARKER)); + assert!(!curated.ends_with("\n\n")); + } +} diff --git a/plugins/rust/python-package/regex_filter/src/lib.rs b/plugins/rust/python-package/regex_filter/src/lib.rs new file mode 100644 index 0000000..f44b4a5 --- /dev/null +++ b/plugins/rust/python-package/regex_filter/src/lib.rs @@ -0,0 +1,1017 @@ +// Copyright 2026 +// SPDX-License-Identifier: Apache-2.0 +// +// Regex Filter Plugin - Rust Implementation + +use std::borrow::Cow; +use std::collections::HashSet; +use std::sync::Once; + +use log::debug; +use pyo3::prelude::*; +use pyo3::types::{PyDict, PyList, PyModule, PySet, PyString, PyTuple}; +use pyo3_stub_gen::define_stub_info_gatherer; +use pyo3_stub_gen::derive::*; +use regex::{Captures, Regex, RegexSet}; + +pub mod plugin; + +const DEFAULT_MAX_TEXT_BYTES: usize = 10 * 1024 * 1024; +const DEFAULT_MAX_TOTAL_TEXT_BYTES: usize = 10 * 1024 * 1024; +const DEFAULT_MAX_NESTED_DEPTH: usize = 64; +const DEFAULT_MAX_COLLECTION_ITEMS: usize = 4096; +const DEFAULT_MAX_TOTAL_ITEMS: usize = 65_536; +const DEFAULT_MAX_PATTERNS: usize = 1024; +const DEFAULT_MAX_SEARCH_BYTES: usize = 1024 * 1024; +const DEFAULT_MAX_REPLACE_BYTES: usize = 1024 * 1024; +const DEFAULT_MAX_OUTPUT_BYTES: usize = 10 * 1024 * 1024; + +enum TraversalResult { + Unchanged(Py), + Modified(Py), +} + +#[derive(Debug, Clone)] +pub struct SearchReplace { + pub search: String, + pub replace: String, + pub compiled: Regex, +} + +#[derive(Debug, Clone)] +pub struct SearchReplaceConfig { + pub words: Vec, + pub pattern_set: Option, + pub max_text_bytes: usize, + pub max_total_text_bytes: usize, + pub max_nested_depth: usize, + pub max_collection_items: usize, + pub max_total_items: usize, + pub max_output_bytes: usize, +} + +impl SearchReplaceConfig { + pub fn from_py_dict(dict: &Bound<'_, PyDict>) -> PyResult { + let mut words = Vec::new(); + let mut patterns = Vec::new(); + let mut validation_errors = Vec::new(); + let max_text_bytes = get_usize(dict, "max_text_bytes", DEFAULT_MAX_TEXT_BYTES)?; + let max_total_text_bytes = + get_usize(dict, "max_total_text_bytes", DEFAULT_MAX_TOTAL_TEXT_BYTES)?; + let max_nested_depth = get_usize(dict, "max_nested_depth", DEFAULT_MAX_NESTED_DEPTH)?; + let max_collection_items = + get_usize(dict, "max_collection_items", DEFAULT_MAX_COLLECTION_ITEMS)?; + let max_total_items = get_usize(dict, "max_total_items", DEFAULT_MAX_TOTAL_ITEMS)?; + let max_patterns = get_usize(dict, "max_patterns", DEFAULT_MAX_PATTERNS)?; + let max_search_bytes = get_usize(dict, "max_search_bytes", DEFAULT_MAX_SEARCH_BYTES)?; + let max_replace_bytes = get_usize(dict, "max_replace_bytes", DEFAULT_MAX_REPLACE_BYTES)?; + let max_output_bytes = get_usize(dict, "max_output_bytes", DEFAULT_MAX_OUTPUT_BYTES)?; + + if let Some(words_value) = dict.get_item("words")? { + let py_list = words_value + .cast::() + .map_err(|_| pyo3::exceptions::PyValueError::new_err("'words' must be a list"))?; + if py_list.len() > max_patterns { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "'words' contains {} patterns, maximum is {}", + py_list.len(), + max_patterns + ))); + } + for (idx, item) in py_list.iter().enumerate() { + let py_dict = item.cast::()?; + let search: String = py_dict + .get_item("search")? + .ok_or_else(|| { + pyo3::exceptions::PyValueError::new_err("Missing 'search' field") + })? + .extract()?; + let replace: String = py_dict + .get_item("replace")? + .ok_or_else(|| { + pyo3::exceptions::PyValueError::new_err("Missing 'replace' field") + })? + .extract()?; + if search.len() > max_search_bytes { + validation_errors.push(format!( + "Pattern {}: search exceeds max_search_bytes ({})", + idx, max_search_bytes + )); + continue; + } + if replace.len() > max_replace_bytes { + validation_errors.push(format!( + "Pattern {}: replacement exceeds max_replace_bytes ({})", + idx, max_replace_bytes + )); + continue; + } + + match Regex::new(&search) { + Ok(compiled) => { + patterns.push(search.clone()); + words.push(SearchReplace { + search, + replace, + compiled, + }); + } + Err(error) => validation_errors.push(format!( + "Pattern {}: Invalid regex pattern '{}': {}", + idx, search, error + )), + } + } + } + + if !validation_errors.is_empty() { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "Invalid regex patterns detected:\n{}", + validation_errors.join("\n") + ))); + } + + let pattern_set = if patterns.is_empty() { + None + } else { + Some(RegexSet::new(&patterns).map_err(|error| { + pyo3::exceptions::PyValueError::new_err(format!( + "Invalid regex set configuration: {}", + error + )) + })?) + }; + + Ok(Self { + words, + pattern_set, + max_text_bytes, + max_total_text_bytes, + max_nested_depth, + max_collection_items, + max_total_items, + max_output_bytes, + }) + } +} + +fn get_usize(dict: &Bound<'_, PyDict>, key: &str, default: usize) -> PyResult { + match dict.get_item(key)? { + Some(value) => value.extract::(), + None => Ok(default), + } +} + +#[gen_stub_pyclass] +#[derive(Debug)] +#[pyclass] +pub struct SearchReplacePluginRust { + pub config: SearchReplaceConfig, +} + +fn output_limit_error(limit: usize) -> PyErr { + pyo3::exceptions::PyValueError::new_err(format!("Output exceeds max_output_bytes ({})", limit)) +} + +fn apply_patterns_impl<'a>( + config: &'a SearchReplaceConfig, + text: &'a str, +) -> PyResult> { + if let Some(ref pattern_set) = config.pattern_set + && !pattern_set.is_match(text) + { + return Ok(Cow::Borrowed(text)); + } + + let mut result = Cow::Borrowed(text); + + for pattern in &config.words { + let mut captures = pattern.compiled.captures_iter(&result).peekable(); + if captures.peek().is_none() { + continue; + } + + let mut replaced = String::new(); + let mut last_end = 0; + for caps in captures { + let matched = caps.get(0).expect("regex captures always include group 0"); + append_limited( + &mut replaced, + &result[last_end..matched.start()], + config.max_output_bytes, + )?; + append_replacement_limited( + &mut replaced, + &pattern.replace, + &caps, + config.max_output_bytes, + )?; + last_end = matched.end(); + } + append_limited(&mut replaced, &result[last_end..], config.max_output_bytes)?; + result = Cow::Owned(replaced); + } + + Ok(result) +} + +fn append_limited(target: &mut String, value: &str, limit: usize) -> PyResult<()> { + if target.len().saturating_add(value.len()) > limit { + return Err(output_limit_error(limit)); + } + target.push_str(value); + Ok(()) +} + +fn append_replacement_limited( + target: &mut String, + replacement: &str, + caps: &Captures<'_>, + limit: usize, +) -> PyResult<()> { + let mut chars = replacement.char_indices().peekable(); + while let Some((idx, ch)) = chars.next() { + if ch != '$' { + append_limited(target, &replacement[idx..idx + ch.len_utf8()], limit)?; + continue; + } + + let Some((next_idx, next_ch)) = chars.peek().copied() else { + append_limited(target, "$", limit)?; + continue; + }; + + if next_ch == '$' { + chars.next(); + append_limited(target, "$", limit)?; + continue; + } + + if next_ch == '{' { + chars.next(); + let name_start = next_idx + next_ch.len_utf8(); + let mut name_end = None; + for (candidate_idx, candidate_ch) in chars.by_ref() { + if candidate_ch == '}' { + name_end = Some(candidate_idx); + break; + } + } + let Some(name_end) = name_end else { + append_limited(target, "$", limit)?; + append_limited(target, &replacement[next_idx..], limit)?; + break; + }; + append_capture_limited(target, caps, &replacement[name_start..name_end], limit)?; + continue; + } + + if is_unbraced_capture_char(next_ch) { + let name_start = next_idx; + let mut name_end = next_idx + next_ch.len_utf8(); + chars.next(); + while let Some((name_idx, name_ch)) = chars.peek().copied() { + if !is_unbraced_capture_char(name_ch) { + break; + } + chars.next(); + name_end = name_idx + name_ch.len_utf8(); + } + append_capture_limited(target, caps, &replacement[name_start..name_end], limit)?; + continue; + } + + append_limited(target, "$", limit)?; + } + Ok(()) +} + +fn append_capture_limited( + target: &mut String, + caps: &Captures<'_>, + name: &str, + limit: usize, +) -> PyResult<()> { + let capture = name + .parse::() + .ok() + .and_then(|index| caps.get(index)) + .or_else(|| caps.name(name)); + if let Some(capture) = capture { + append_limited(target, capture.as_str(), limit)?; + } + Ok(()) +} + +fn is_unbraced_capture_char(ch: char) -> bool { + ch == '_' || ch.is_ascii_alphanumeric() +} + +pub(crate) fn apply_patterns_checked<'a>( + config: &'a SearchReplaceConfig, + text: &'a str, +) -> PyResult> { + if text.len() > config.max_text_bytes { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "Text exceeds max_text_bytes ({})", + config.max_text_bytes + ))); + } + let result = apply_patterns_impl(config, text)?; + if result.len() > config.max_output_bytes { + return Err(output_limit_error(config.max_output_bytes)); + } + Ok(result) +} + +struct TraversalBudget { + visited: usize, + max_total_items: usize, + output_bytes: usize, + max_output_bytes: usize, + input_bytes: usize, + max_total_text_bytes: usize, +} + +impl TraversalBudget { + fn new(max_total_items: usize, max_output_bytes: usize, max_total_text_bytes: usize) -> Self { + Self { + visited: 0, + max_total_items, + output_bytes: 0, + max_output_bytes, + input_bytes: 0, + max_total_text_bytes, + } + } + + fn visit(&mut self) -> PyResult<()> { + self.visited += 1; + if self.visited > self.max_total_items { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "Traversal exceeds max_total_items ({})", + self.max_total_items + ))); + } + Ok(()) + } + + fn add_output(&mut self, bytes: usize) -> PyResult<()> { + self.output_bytes = self.output_bytes.saturating_add(bytes); + if self.output_bytes > self.max_output_bytes { + return Err(output_limit_error(self.max_output_bytes)); + } + Ok(()) + } + + fn add_input(&mut self, bytes: usize) -> PyResult<()> { + self.input_bytes = self.input_bytes.saturating_add(bytes); + if self.input_bytes > self.max_total_text_bytes { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "Input exceeds max_total_text_bytes ({})", + self.max_total_text_bytes + ))); + } + Ok(()) + } +} + +fn process_nested_impl( + plugin: &SearchReplacePluginRust, + py: Python<'_>, + data: &Bound<'_, PyAny>, + depth: usize, + seen: &mut HashSet, + budget: &mut TraversalBudget, +) -> PyResult { + budget.visit()?; + + if depth >= plugin.config.max_nested_depth { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "Maximum nested depth of {} exceeded", + plugin.config.max_nested_depth + ))); + } + + if let Ok(text) = data.cast::() { + let text = text.to_str()?; + budget.add_input(text.len())?; + let modified_text = apply_patterns_checked(&plugin.config, text)?; + return match modified_text { + Cow::Borrowed(_) => { + budget.add_output(text.len())?; + Ok(TraversalResult::Unchanged(data.clone().unbind())) + } + Cow::Owned(value) => { + budget.add_output(value.len())?; + Ok(TraversalResult::Modified( + value.into_pyobject(py)?.into_any().unbind(), + )) + } + }; + } + + if let Ok(dict) = data.cast::() { + if dict.len() > plugin.config.max_collection_items { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "Collection exceeds max_collection_items ({})", + plugin.config.max_collection_items + ))); + } + let identity = dict.as_ptr() as usize; + if !seen.insert(identity) { + return Err(pyo3::exceptions::PyValueError::new_err( + "Cyclic containers are not supported", + )); + } + + let mut processed_items: Option, Py)>> = None; + for (index, (key, value)) in dict.iter().enumerate() { + match process_nested_impl(plugin, py, &value, depth + 1, seen, budget)? { + TraversalResult::Unchanged(new_value) => { + if let Some(items) = processed_items.as_mut() { + items.push((key.clone().unbind(), new_value)); + } + } + TraversalResult::Modified(new_value) => { + let items = processed_items.get_or_insert_with(|| { + let mut items = Vec::with_capacity(dict.len()); + for (prior_key, prior_value) in dict.iter().take(index) { + items.push((prior_key.clone().unbind(), prior_value.clone().unbind())); + } + items + }); + items.push((key.clone().unbind(), new_value)); + } + } + } + seen.remove(&identity); + + let Some(processed_items) = processed_items else { + return Ok(TraversalResult::Unchanged(data.clone().unbind())); + }; + + let new_dict = PyDict::new(py); + for (key, value) in processed_items { + new_dict.set_item(key.bind(py), value.bind(py))?; + } + return Ok(TraversalResult::Modified(new_dict.into_any().unbind())); + } + + if let Ok(list) = data.cast::() { + if list.len() > plugin.config.max_collection_items { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "Collection exceeds max_collection_items ({})", + plugin.config.max_collection_items + ))); + } + let identity = list.as_ptr() as usize; + if !seen.insert(identity) { + return Err(pyo3::exceptions::PyValueError::new_err( + "Cyclic containers are not supported", + )); + } + + let mut new_items: Option>> = None; + for (index, item) in list.iter().enumerate() { + match process_nested_impl(plugin, py, &item, depth + 1, seen, budget)? { + TraversalResult::Unchanged(new_item) => { + if let Some(items) = new_items.as_mut() { + items.push(new_item); + } + } + TraversalResult::Modified(new_item) => { + let items = new_items.get_or_insert_with(|| { + list.iter() + .take(index) + .map(|prior_item| prior_item.clone().unbind()) + .collect() + }); + items.push(new_item); + } + } + } + seen.remove(&identity); + + let Some(new_items) = new_items else { + return Ok(TraversalResult::Unchanged(data.clone().unbind())); + }; + + let new_list = PyList::empty(py); + for item in new_items { + new_list.append(item.bind(py))?; + } + return Ok(TraversalResult::Modified(new_list.into_any().unbind())); + } + + if let Ok(tuple) = data.cast::() { + if tuple.len() > plugin.config.max_collection_items { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "Collection exceeds max_collection_items ({})", + plugin.config.max_collection_items + ))); + } + let identity = tuple.as_ptr() as usize; + if !seen.insert(identity) { + return Err(pyo3::exceptions::PyValueError::new_err( + "Cyclic containers are not supported", + )); + } + + let mut new_items: Option>> = None; + for (index, item) in tuple.iter().enumerate() { + match process_nested_impl(plugin, py, &item, depth + 1, seen, budget)? { + TraversalResult::Unchanged(new_item) => { + if let Some(items) = new_items.as_mut() { + items.push(new_item); + } + } + TraversalResult::Modified(new_item) => { + let items = new_items.get_or_insert_with(|| { + tuple + .iter() + .take(index) + .map(|prior_item| prior_item.clone().unbind()) + .collect() + }); + items.push(new_item); + } + } + } + seen.remove(&identity); + + let Some(new_items) = new_items else { + return Ok(TraversalResult::Unchanged(data.clone().unbind())); + }; + + let new_tuple = PyTuple::new(py, new_items.iter().map(|item| item.bind(py)))?; + return Ok(TraversalResult::Modified(new_tuple.into_any().unbind())); + } + + if let Ok(set) = data.cast::() { + if set.len() > plugin.config.max_collection_items { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "Collection exceeds max_collection_items ({})", + plugin.config.max_collection_items + ))); + } + let identity = set.as_ptr() as usize; + if !seen.insert(identity) { + return Err(pyo3::exceptions::PyValueError::new_err( + "Cyclic containers are not supported", + )); + } + + let mut new_items: Option>> = None; + for (index, item) in set.iter().enumerate() { + match process_nested_impl(plugin, py, &item, depth + 1, seen, budget)? { + TraversalResult::Unchanged(new_item) => { + if let Some(items) = new_items.as_mut() { + items.push(new_item); + } + } + TraversalResult::Modified(new_item) => { + let items = new_items.get_or_insert_with(|| { + set.iter() + .take(index) + .map(|prior_item| prior_item.clone().unbind()) + .collect() + }); + items.push(new_item); + } + } + } + seen.remove(&identity); + + let Some(new_items) = new_items else { + return Ok(TraversalResult::Unchanged(data.clone().unbind())); + }; + + let new_set = PySet::new(py, new_items.iter().map(|item| item.bind(py)))?; + return Ok(TraversalResult::Modified(new_set.into_any().unbind())); + } + + Ok(TraversalResult::Unchanged(data.clone().unbind())) +} + +#[gen_stub_pymethods] +#[pymethods] +impl SearchReplacePluginRust { + #[new] + pub fn new(config_dict: &Bound<'_, PyDict>) -> PyResult { + let config = SearchReplaceConfig::from_py_dict(config_dict).map_err(|error| { + PyErr::new::(format!("Invalid config: {}", error)) + })?; + Ok(Self { config }) + } + + pub fn apply_patterns(&self, text: &str) -> PyResult { + Ok(apply_patterns_checked(&self.config, text)?.into_owned()) + } + + pub fn process_nested( + &self, + py: Python<'_>, + data: &Bound<'_, PyAny>, + ) -> PyResult<(bool, Py)> { + let mut seen = HashSet::new(); + let mut budget = TraversalBudget::new( + self.config.max_total_items, + self.config.max_output_bytes, + self.config.max_total_text_bytes, + ); + Ok( + match process_nested_impl(self, py, data, 0, &mut seen, &mut budget)? { + TraversalResult::Unchanged(value) => (false, value), + TraversalResult::Modified(value) => (true, value), + }, + ) + } +} + +fn init_logging() { + static INIT: Once = Once::new(); + INIT.call_once(|| { + pyo3_log::init(); + }); +} + +#[pymodule] +fn regex_filter_rust(m: &Bound<'_, PyModule>) -> PyResult<()> { + init_logging(); + debug!("Initialized regex_filter Rust module"); + m.add_class::()?; + m.add_class::()?; + Ok(()) +} + +define_stub_info_gatherer!(stub_info); + +#[cfg(test)] +mod tests { + use super::*; + + fn plugin_with_words(words: Vec<(&str, &str)>) -> SearchReplacePluginRust { + let patterns = words + .iter() + .map(|(search, _)| search.to_string()) + .collect::>(); + let config = SearchReplaceConfig { + words: words + .into_iter() + .map(|(search, replace)| SearchReplace { + search: search.to_string(), + replace: replace.to_string(), + compiled: Regex::new(search).unwrap(), + }) + .collect(), + pattern_set: RegexSet::new(patterns).ok(), + max_text_bytes: DEFAULT_MAX_TEXT_BYTES, + max_total_text_bytes: DEFAULT_MAX_TOTAL_TEXT_BYTES, + max_nested_depth: DEFAULT_MAX_NESTED_DEPTH, + max_collection_items: DEFAULT_MAX_COLLECTION_ITEMS, + max_total_items: DEFAULT_MAX_TOTAL_ITEMS, + max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES, + }; + SearchReplacePluginRust { config } + } + + #[test] + fn test_apply_patterns() { + let plugin = plugin_with_words(vec![ + (r"\bsecret\b", "[REDACTED]"), + (r"\bpassword\b", "[REDACTED]"), + ]); + assert_eq!( + plugin + .apply_patterns("The secret password is hidden") + .unwrap(), + "The [REDACTED] [REDACTED] is hidden" + ); + } + + #[test] + fn test_no_match() { + let plugin = plugin_with_words(vec![(r"\bsecret\b", "[REDACTED]")]); + assert_eq!( + plugin.apply_patterns("No sensitive data here").unwrap(), + "No sensitive data here" + ); + } + + #[test] + fn test_multiple_matches() { + let plugin = plugin_with_words(vec![(r"\d{3}-\d{2}-\d{4}", "XXX-XX-XXXX")]); + assert_eq!( + plugin + .apply_patterns("SSN: 123-45-6789 and 987-65-4321") + .unwrap(), + "SSN: XXX-XX-XXXX and XXX-XX-XXXX" + ); + } + + #[test] + fn test_empty_config() { + let plugin = SearchReplacePluginRust { + config: SearchReplaceConfig { + words: vec![], + pattern_set: None, + max_text_bytes: DEFAULT_MAX_TEXT_BYTES, + max_total_text_bytes: DEFAULT_MAX_TOTAL_TEXT_BYTES, + max_nested_depth: DEFAULT_MAX_NESTED_DEPTH, + max_collection_items: DEFAULT_MAX_COLLECTION_ITEMS, + max_total_items: DEFAULT_MAX_TOTAL_ITEMS, + max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES, + }, + }; + assert_eq!( + plugin + .apply_patterns("Any text should pass through unchanged") + .unwrap(), + "Any text should pass through unchanged" + ); + } + + #[test] + fn test_case_insensitive_matching() { + let plugin = plugin_with_words(vec![(r"(?i)\bsecret\b", "[REDACTED]")]); + assert_eq!( + plugin.apply_patterns("Secret data").unwrap(), + "[REDACTED] data" + ); + assert_eq!( + plugin.apply_patterns("secret data").unwrap(), + "[REDACTED] data" + ); + assert_eq!( + plugin.apply_patterns("SECRET data").unwrap(), + "[REDACTED] data" + ); + } + + #[test] + fn test_replacement_with_capture_groups() { + let plugin = plugin_with_words(vec![(r"(\d{3})-(\d{2})-(\d{4})", "***-**-$3")]); + assert_eq!( + plugin.apply_patterns("SSN: 123-45-6789").unwrap(), + "SSN: ***-**-6789" + ); + } + + #[test] + fn test_word_boundary_patterns() { + let plugin = plugin_with_words(vec![(r"\bcat\b", "dog")]); + assert_eq!(plugin.apply_patterns("the cat sat").unwrap(), "the dog sat"); + assert_eq!(plugin.apply_patterns("category").unwrap(), "category"); + assert_eq!(plugin.apply_patterns("scat").unwrap(), "scat"); + } + + #[test] + fn test_email_pattern() { + let plugin = plugin_with_words(vec![( + r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", + "[EMAIL]", + )]); + assert_eq!( + plugin + .apply_patterns("Contact user@example.com for info") + .unwrap(), + "Contact [EMAIL] for info" + ); + } + + #[test] + fn test_url_pattern() { + let plugin = plugin_with_words(vec![(r"https?://[^\s]+", "[URL]")]); + assert_eq!( + plugin + .apply_patterns("Visit https://example.com for more") + .unwrap(), + "Visit [URL] for more" + ); + } + + #[test] + fn test_multiple_replacements_in_sequence() { + let plugin = plugin_with_words(vec![("a", "1"), ("b", "2"), ("c", "3")]); + assert_eq!(plugin.apply_patterns("abc").unwrap(), "123"); + } + + #[test] + fn test_newline_handling() { + let plugin = plugin_with_words(vec![("secret", "[REDACTED]")]); + assert_eq!( + plugin.apply_patterns("Line 1\nsecret\nLine 3").unwrap(), + "Line 1\n[REDACTED]\nLine 3" + ); + } + + #[test] + fn test_empty_replacement() { + let plugin = plugin_with_words(vec![(r"\bremove\b", "")]); + assert_eq!( + plugin.apply_patterns("Please remove this word").unwrap(), + "Please this word" + ); + } + + #[test] + fn test_invalid_config_reports_source_style_errors() { + Python::initialize(); + Python::attach(|py| { + let dict = PyDict::new(py); + let words = PyList::empty(py); + let word = PyDict::new(py); + word.set_item("search", "[invalid(").unwrap(); + word.set_item("replace", "x").unwrap(); + words.append(word).unwrap(); + dict.set_item("words", words).unwrap(); + + let error = SearchReplacePluginRust::new(&dict).unwrap_err(); + assert!( + error + .to_string() + .contains("Invalid regex patterns detected") + ); + assert!(error.to_string().contains("[invalid(")); + }); + } + + #[test] + fn test_config_limits_report_validation_errors() { + Python::initialize(); + Python::attach(|py| { + let dict = PyDict::new(py); + let words = PyList::empty(py); + let word = PyDict::new(py); + word.set_item("search", "secret").unwrap(); + word.set_item("replace", "redacted").unwrap(); + words.append(word).unwrap(); + dict.set_item("words", words).unwrap(); + dict.set_item("max_search_bytes", 2).unwrap(); + dict.set_item("max_replace_bytes", 3).unwrap(); + + let error = SearchReplacePluginRust::new(&dict).unwrap_err(); + assert!( + error + .to_string() + .contains("search exceeds max_search_bytes") + ); + }); + } + + #[test] + fn test_config_rejects_too_many_patterns_and_non_list_words() { + Python::initialize(); + Python::attach(|py| { + let dict = PyDict::new(py); + let words = PyList::empty(py); + for search in ["a", "b"] { + let word = PyDict::new(py); + word.set_item("search", search).unwrap(); + word.set_item("replace", "x").unwrap(); + words.append(word).unwrap(); + } + dict.set_item("words", words).unwrap(); + dict.set_item("max_patterns", 1).unwrap(); + let error = SearchReplacePluginRust::new(&dict).unwrap_err(); + assert!(error.to_string().contains("'words' contains 2 patterns")); + + let dict = PyDict::new(py); + dict.set_item("words", "not-a-list").unwrap(); + let error = SearchReplacePluginRust::new(&dict).unwrap_err(); + assert!(error.to_string().contains("'words' must be a list")); + }); + } + + #[test] + fn test_process_nested_rewrites_dict_list_tuple_and_set() { + let plugin = plugin_with_words(vec![("bad", "good")]); + Python::initialize(); + Python::attach(|py| { + let nested = PyDict::new(py); + nested.set_item("value", "bad").unwrap(); + let list = PyList::new(py, ["bad", "fine"]).unwrap(); + let tuple = PyTuple::new(py, ["bad"]).unwrap(); + let set = PySet::new(py, ["bad", "fine"]).unwrap(); + let payload = PyDict::new(py); + payload.set_item("nested", nested).unwrap(); + payload.set_item("list", list).unwrap(); + payload.set_item("tuple", tuple).unwrap(); + payload.set_item("set", set).unwrap(); + + let (modified, result) = plugin.process_nested(py, payload.as_any()).unwrap(); + assert!(modified); + let result = result.bind(py).cast::().unwrap(); + let nested_obj = result.get_item("nested").unwrap().unwrap(); + let nested = nested_obj.cast::().unwrap(); + let nested_value: String = nested + .get_item("value") + .unwrap() + .unwrap() + .extract() + .unwrap(); + assert_eq!(nested_value, "good"); + let list_obj = result.get_item("list").unwrap().unwrap(); + let list = list_obj.cast::().unwrap(); + let list_value: String = list.get_item(0).unwrap().extract().unwrap(); + assert_eq!(list_value, "good"); + let tuple_obj = result.get_item("tuple").unwrap().unwrap(); + let tuple = tuple_obj.cast::().unwrap(); + let tuple_value: String = tuple.get_item(0).unwrap().extract().unwrap(); + assert_eq!(tuple_value, "good"); + let set_obj = result.get_item("set").unwrap().unwrap(); + let set = set_obj.cast::().unwrap(); + assert!(set.contains("good").unwrap()); + }); + } + + #[test] + fn test_process_nested_returns_original_on_no_change() { + let plugin = plugin_with_words(vec![("missing", "found")]); + Python::initialize(); + Python::attach(|py| { + let payload = PyList::new(py, ["clean"]).unwrap(); + let original_ptr = payload.as_ptr(); + let (modified, result) = plugin.process_nested(py, payload.as_any()).unwrap(); + assert!(!modified); + assert_eq!(result.bind(py).as_ptr(), original_ptr); + }); + } + + #[test] + fn test_process_nested_enforces_runtime_budgets() { + let mut plugin = plugin_with_words(vec![("a", "bbbb")]); + plugin.config.max_text_bytes = 2; + Python::initialize(); + Python::attach(|py| { + let error = plugin + .process_nested(py, PyString::new(py, "aaa").as_any()) + .unwrap_err(); + assert!(error.to_string().contains("Text exceeds max_text_bytes")); + }); + + let mut plugin = plugin_with_words(vec![("missing", "found")]); + plugin.config.max_total_text_bytes = 5; + Python::attach(|py| { + let payload = PyList::new(py, ["aaa", "aaa"]).unwrap(); + let error = plugin.process_nested(py, payload.as_any()).unwrap_err(); + assert!( + error + .to_string() + .contains("Input exceeds max_total_text_bytes") + ); + }); + + let mut plugin = plugin_with_words(vec![("missing", "found")]); + plugin.config.max_output_bytes = 5; + Python::attach(|py| { + let payload = PyList::new(py, ["aaa", "aaa"]).unwrap(); + let error = plugin.process_nested(py, payload.as_any()).unwrap_err(); + assert!( + error + .to_string() + .contains("Output exceeds max_output_bytes") + ); + }); + } + + #[test] + fn test_process_nested_enforces_shape_limits_and_cycles() { + let mut plugin = plugin_with_words(vec![("bad", "good")]); + plugin.config.max_collection_items = 1; + Python::initialize(); + Python::attach(|py| { + let payload = PyList::new(py, ["bad", "bad"]).unwrap(); + let error = plugin.process_nested(py, payload.as_any()).unwrap_err(); + assert!( + error + .to_string() + .contains("Collection exceeds max_collection_items") + ); + }); + + let mut plugin = plugin_with_words(vec![("bad", "good")]); + plugin.config.max_total_items = 2; + Python::attach(|py| { + let inner_one = PyList::new(py, ["bad"]).unwrap(); + let inner_two = PyList::new(py, ["bad"]).unwrap(); + let outer = PyList::new(py, [inner_one.as_any(), inner_two.as_any()]).unwrap(); + let error = plugin.process_nested(py, outer.as_any()).unwrap_err(); + assert!( + error + .to_string() + .contains("Traversal exceeds max_total_items") + ); + }); + + let mut plugin = plugin_with_words(vec![("bad", "good")]); + plugin.config.max_nested_depth = 1; + Python::attach(|py| { + let inner = PyList::new(py, ["bad"]).unwrap(); + let outer = PyList::new(py, [inner.as_any()]).unwrap(); + let error = plugin.process_nested(py, outer.as_any()).unwrap_err(); + assert!(error.to_string().contains("Maximum nested depth")); + }); + } +} diff --git a/plugins/rust/python-package/regex_filter/src/plugin.rs b/plugins/rust/python-package/regex_filter/src/plugin.rs new file mode 100644 index 0000000..98a765c --- /dev/null +++ b/plugins/rust/python-package/regex_filter/src/plugin.rs @@ -0,0 +1,221 @@ +// Copyright 2026 +// SPDX-License-Identifier: Apache-2.0 +// +// Rust-owned regex filter plugin core. + +use cpex_framework_bridge::{build_framework_object, default_result}; +use pyo3::prelude::*; +use pyo3::types::{PyAny, PyDict, PyList, PyModule, PyString}; +use pyo3_stub_gen::derive::*; + +use crate::{SearchReplacePluginRust, apply_patterns_checked}; + +#[gen_stub_pyclass] +#[pyclass] +pub struct RegexFilterPluginCore { + engine: SearchReplacePluginRust, +} + +#[gen_stub_pymethods] +#[pymethods] +impl RegexFilterPluginCore { + #[new] + pub fn new(config: &Bound<'_, PyDict>) -> PyResult { + let engine = SearchReplacePluginRust::new(config)?; + Ok(Self { engine }) + } + + pub fn prompt_pre_fetch( + &self, + py: Python<'_>, + payload: &Bound<'_, PyAny>, + _context: &Bound<'_, PyAny>, + ) -> PyResult> { + self.process_payload_attr(py, payload, "args", "PromptPrehookResult") + } + + pub fn prompt_post_fetch( + &self, + py: Python<'_>, + payload: &Bound<'_, PyAny>, + _context: &Bound<'_, PyAny>, + ) -> PyResult> { + let result = payload.getattr("result")?; + let messages_value = result.getattr("messages")?; + let Ok(messages) = messages_value.cast::() else { + return default_result(py, "PromptPosthookResult"); + }; + if messages.len() > self.engine.config.max_collection_items { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "Collection exceeds max_collection_items ({})", + self.engine.config.max_collection_items + ))); + } + + let mut visited = 0usize; + let mut input_bytes = 0usize; + let mut output_bytes = 0usize; + let mut updated_messages: Option>> = None; + for (index, message) in messages.iter().enumerate() { + visited += 1; + if visited > self.engine.config.max_total_items { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "Traversal exceeds max_total_items ({})", + self.engine.config.max_total_items + ))); + } + let Ok(content) = message.getattr("content") else { + if let Some(items) = updated_messages.as_mut() { + items.push(message.clone().unbind()); + } + continue; + }; + let Ok(text_obj) = content.getattr("text") else { + if let Some(items) = updated_messages.as_mut() { + items.push(message.clone().unbind()); + } + continue; + }; + let Ok(text) = text_obj.cast::() else { + if let Some(items) = updated_messages.as_mut() { + items.push(message.clone().unbind()); + } + continue; + }; + input_bytes = input_bytes.saturating_add(text.to_str()?.len()); + if input_bytes > self.engine.config.max_total_text_bytes { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "Input exceeds max_total_text_bytes ({})", + self.engine.config.max_total_text_bytes + ))); + } + let replaced = apply_patterns_checked(&self.engine.config, text.to_str()?)?; + if let std::borrow::Cow::Owned(replaced) = replaced { + output_bytes = output_bytes.saturating_add(replaced.len()); + if output_bytes > self.engine.config.max_output_bytes { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "Output exceeds max_output_bytes ({})", + self.engine.config.max_output_bytes + ))); + } + let text_obj = replaced.into_pyobject(py)?.into_any().unbind(); + let cloned_content = clone_payload_with_attr(py, &content, "text", &text_obj)?; + let items = updated_messages.get_or_insert_with(|| { + messages + .iter() + .take(index) + .map(|prior_message| prior_message.clone().unbind()) + .collect() + }); + items.push(clone_payload_with_attr( + py, + &message, + "content", + &cloned_content, + )?); + } else { + output_bytes = output_bytes.saturating_add(text.to_str()?.len()); + if output_bytes > self.engine.config.max_output_bytes { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "Output exceeds max_output_bytes ({})", + self.engine.config.max_output_bytes + ))); + } + if let Some(items) = updated_messages.as_mut() { + items.push(message.clone().unbind()); + } + } + } + + if let Some(updated_messages) = updated_messages { + let new_messages = PyList::new(py, updated_messages.iter().map(|item| item.bind(py)))?; + let new_result = clone_payload_with_attr( + py, + &result, + "messages", + &new_messages.into_any().unbind(), + )?; + let new_payload = clone_payload_with_attr(py, payload, "result", &new_result)?; + return build_framework_object( + py, + "PromptPosthookResult", + [("modified_payload", new_payload)], + ); + } + default_result(py, "PromptPosthookResult") + } + + pub fn tool_pre_invoke( + &self, + py: Python<'_>, + payload: &Bound<'_, PyAny>, + _context: &Bound<'_, PyAny>, + ) -> PyResult> { + self.process_payload_attr(py, payload, "args", "ToolPreInvokeResult") + } + + pub fn tool_post_invoke( + &self, + py: Python<'_>, + payload: &Bound<'_, PyAny>, + _context: &Bound<'_, PyAny>, + ) -> PyResult> { + self.process_payload_attr(py, payload, "result", "ToolPostInvokeResult") + } +} + +impl RegexFilterPluginCore { + fn process_payload_attr( + &self, + py: Python<'_>, + payload: &Bound<'_, PyAny>, + attr: &str, + result_class: &str, + ) -> PyResult> { + let value = payload.getattr(attr)?; + if value.is_none() { + return default_result(py, result_class); + } + + let (modified, new_value) = self.engine.process_nested(py, &value)?; + if !modified { + return default_result(py, result_class); + } + + let new_payload = clone_payload_with_attr(py, payload, attr, &new_value)?; + build_framework_object(py, result_class, [("modified_payload", new_payload)]) + } +} + +fn clone_python_object<'py>( + py: Python<'py>, + object: &Bound<'py, PyAny>, +) -> PyResult> { + if object.hasattr("model_copy")? { + object.call_method0("model_copy") + } else { + let copy = PyModule::import(py, "copy")?; + copy.getattr("copy")?.call1((object,)) + } +} + +fn clone_payload_with_attr( + py: Python<'_>, + payload: &Bound<'_, PyAny>, + attr: &str, + new_value: &Py, +) -> PyResult> { + let cloned = if payload.hasattr("model_copy")? { + let kwargs = PyDict::new(py); + let update = PyDict::new(py); + update.set_item(attr, new_value.bind(py))?; + kwargs.set_item("update", update)?; + payload.call_method("model_copy", (), Some(&kwargs))? + } else { + let cloned = clone_python_object(py, payload)?; + cloned.setattr(attr, new_value.bind(py))?; + cloned + }; + + Ok(cloned.unbind()) +} diff --git a/plugins/rust/python-package/regex_filter/tests/conftest.py b/plugins/rust/python-package/regex_filter/tests/conftest.py new file mode 100644 index 0000000..62fc3dc --- /dev/null +++ b/plugins/rust/python-package/regex_filter/tests/conftest.py @@ -0,0 +1,11 @@ +"""Pytest configuration — inject mcpgateway mock before any plugin imports.""" + +import sys + +import mcpgateway_mock +import mcpgateway_mock.plugins +import mcpgateway_mock.plugins.framework + +sys.modules.setdefault("mcpgateway", mcpgateway_mock) +sys.modules.setdefault("mcpgateway.plugins", mcpgateway_mock.plugins) +sys.modules.setdefault("mcpgateway.plugins.framework", mcpgateway_mock.plugins.framework) diff --git a/plugins/rust/python-package/regex_filter/tests/mcpgateway_mock/__init__.py b/plugins/rust/python-package/regex_filter/tests/mcpgateway_mock/__init__.py new file mode 100644 index 0000000..3cffc17 --- /dev/null +++ b/plugins/rust/python-package/regex_filter/tests/mcpgateway_mock/__init__.py @@ -0,0 +1 @@ +"""Mock mcpgateway package for plugin tests.""" diff --git a/plugins/rust/python-package/regex_filter/tests/mcpgateway_mock/plugins/__init__.py b/plugins/rust/python-package/regex_filter/tests/mcpgateway_mock/plugins/__init__.py new file mode 100644 index 0000000..e274787 --- /dev/null +++ b/plugins/rust/python-package/regex_filter/tests/mcpgateway_mock/plugins/__init__.py @@ -0,0 +1 @@ +"""Mock mcpgateway.plugins package for plugin tests.""" diff --git a/plugins/rust/python-package/regex_filter/tests/mcpgateway_mock/plugins/framework.py b/plugins/rust/python-package/regex_filter/tests/mcpgateway_mock/plugins/framework.py new file mode 100644 index 0000000..b687f9e --- /dev/null +++ b/plugins/rust/python-package/regex_filter/tests/mcpgateway_mock/plugins/framework.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + + +class Plugin: + def __init__(self, config: "PluginConfig") -> None: + self._config = config + + +@dataclass +class PluginConfig: + name: str = "" + kind: str = "" + version: str = "" + hooks: list[str] | None = None + config: dict[str, Any] | None = None + + +@dataclass +class GlobalContext: + user: Any = None + tenant_id: str | None = None + + +@dataclass +class PluginContext: + global_context: GlobalContext = field(default_factory=GlobalContext) + metadata: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class PluginViolation: + reason: str = "" + description: str = "" + code: str = "" + details: dict[str, Any] | None = None + http_status_code: int = 400 + http_headers: dict[str, str] | None = None + + +@dataclass +class PromptPrehookPayload: + prompt_id: str = "" + args: dict[str, Any] | None = None + + +@dataclass +class PromptPrehookResult: + continue_processing: bool = True + violation: PluginViolation | None = None + modified_payload: PromptPrehookPayload | None = None + + +@dataclass +class TextContent: + text: str + + +@dataclass +class Message: + role: str + content: TextContent + + +@dataclass +class PromptResult: + messages: list[Message] + + +@dataclass +class PromptPosthookPayload: + result: PromptResult + + +@dataclass +class PromptPosthookResult: + continue_processing: bool = True + violation: PluginViolation | None = None + modified_payload: PromptPosthookPayload | None = None + + +@dataclass +class ToolPreInvokePayload: + name: str = "" + args: dict[str, Any] | None = None + + +@dataclass +class ToolPreInvokeResult: + continue_processing: bool = True + violation: PluginViolation | None = None + modified_payload: ToolPreInvokePayload | None = None + + +@dataclass +class ToolPostInvokePayload: + name: str = "" + result: Any = None + + +@dataclass +class ToolPostInvokeResult: + continue_processing: bool = True + violation: PluginViolation | None = None + modified_payload: ToolPostInvokePayload | None = None diff --git a/plugins/rust/python-package/regex_filter/tests/test_plugin.py b/plugins/rust/python-package/regex_filter/tests/test_plugin.py new file mode 100644 index 0000000..094ddf8 --- /dev/null +++ b/plugins/rust/python-package/regex_filter/tests/test_plugin.py @@ -0,0 +1,685 @@ +"""Ported regex filter plugin tests for the CPEX package layout.""" + +from __future__ import annotations + +import pytest + +from mcpgateway_mock.plugins.framework import ( + GlobalContext, + Message, + PluginConfig, + PluginContext, + PromptPosthookPayload, + PromptPrehookPayload, + PromptResult, + TextContent, + ToolPostInvokePayload, + ToolPreInvokePayload, +) + +from cpex_regex_filter.regex_filter import SearchReplacePlugin +from cpex_regex_filter.regex_filter_rust import SearchReplacePluginRust + + +def _make_config(words=None) -> PluginConfig: + return PluginConfig( + name="regex_filter", + kind="cpex_regex_filter.regex_filter.SearchReplacePlugin", + version="0.1.0", + hooks=[ + "prompt_pre_fetch", + "prompt_post_fetch", + "tool_pre_invoke", + "tool_post_invoke", + ], + config={"words": words or [{"search": "bad", "replace": "good"}]}, + ) + + +def _make_context() -> PluginContext: + return PluginContext(global_context=GlobalContext(user="user-1")) + + +class TestRustEngine: + def test_simple_replacement(self): + plugin = SearchReplacePluginRust({"words": [{"search": "bad", "replace": "good"}]}) + assert plugin.apply_patterns("This is bad") == "This is good" + + def test_regex_replacement(self): + plugin = SearchReplacePluginRust( + {"words": [{"search": r"\bsecret\b", "replace": "[REDACTED]"}]} + ) + assert ( + plugin.apply_patterns("The secret password is hidden") + == "The [REDACTED] password is hidden" + ) + + def test_ssn_replacement(self): + plugin = SearchReplacePluginRust( + {"words": [{"search": r"\d{3}-\d{2}-\d{4}", "replace": "XXX-XX-XXXX"}]} + ) + assert plugin.apply_patterns("SSN: 123-45-6789") == "SSN: XXX-XX-XXXX" + + def test_multiple_replacements(self): + plugin = SearchReplacePluginRust( + { + "words": [ + {"search": "bad", "replace": "good"}, + {"search": r"\bsecret\b", "replace": "[REDACTED]"}, + ] + } + ) + assert plugin.apply_patterns("This bad secret is bad") == "This good [REDACTED] is good" + + def test_nested_dict(self): + plugin = SearchReplacePluginRust({"words": [{"search": "bad", "replace": "good"}]}) + modified, new_args = plugin.process_nested({"outer": {"inner": "This is bad"}}) + assert modified is True + assert new_args["outer"]["inner"] == "This is good" + + def test_list_result(self): + plugin = SearchReplacePluginRust({"words": [{"search": "bad", "replace": "good"}]}) + modified, new_result = plugin.process_nested(["This is bad", "Another bad thing"]) + assert modified is True + assert new_result == ["This is good", "Another good thing"] + + def test_tuple_result(self): + plugin = SearchReplacePluginRust({"words": [{"search": "bad", "replace": "good"}]}) + modified, new_result = plugin.process_nested(("bad", {"nested": "bad"})) + assert modified is True + assert new_result == ("good", {"nested": "good"}) + + def test_cyclic_list_does_not_recurse_forever(self): + plugin = SearchReplacePluginRust({"words": [{"search": "bad", "replace": "good"}]}) + data = [] + data.append(data) + with pytest.raises(ValueError, match="Cyclic containers are not supported"): + plugin.process_nested(data) + + def test_cyclic_list_with_modified_sibling_raises(self): + plugin = SearchReplacePluginRust({"words": [{"search": "bad", "replace": "good"}]}) + data = ["bad"] + data.append(data) + with pytest.raises(ValueError, match="Cyclic containers are not supported"): + plugin.process_nested(data) + + def test_mixed_dict_list_cycle_raises(self): + plugin = SearchReplacePluginRust({"words": [{"search": "bad", "replace": "good"}]}) + data = {"items": ["bad"]} + data["items"].append(data) + with pytest.raises(ValueError, match="Cyclic containers are not supported"): + plugin.process_nested(data) + + def test_deeply_nested_values_stop_at_depth_limit(self): + plugin = SearchReplacePluginRust({"words": [{"search": "bad", "replace": "good"}]}) + data = "bad" + for _ in range(70): + data = [data] + with pytest.raises(ValueError, match="Maximum nested depth"): + plugin.process_nested(data) + + def test_large_text_still_filters(self): + plugin = SearchReplacePluginRust( + {"words": [{"search": "bad", "replace": "good"}], "max_text_bytes": 1024} + ) + text = "bad" * 100 + modified, new_result = plugin.process_nested(text) + assert modified is True + assert new_result == "good" * 100 + + def test_chained_replacements(self): + plugin = SearchReplacePluginRust( + { + "words": [ + {"search": "foo", "replace": "bar"}, + {"search": "bar", "replace": "baz"}, + ] + } + ) + assert plugin.apply_patterns("foo") == "baz" + + @pytest.mark.parametrize( + ("replace", "expected"), + [ + ("$0", "ab"), + ("$1", "a"), + ("$10", ""), + ("$1_a", ""), + ("$1.a", "a.a"), + ("$word.ext", "b.ext"), + ("${word}", "b"), + ("${word.ext}", ""), + ("$$", "$"), + ("$", "$"), + ("${word", "${word"), + ("[$missing]", "[]"), + ], + ) + def test_replacement_syntax_matches_rust_regex(self, replace, expected): + plugin = SearchReplacePluginRust( + {"words": [{"search": r"(a)(?Pb)?", "replace": replace}]} + ) + assert plugin.apply_patterns("ab") == expected + + def test_empty_string_input(self): + plugin = SearchReplacePluginRust({"words": [{"search": "test", "replace": "TEST"}]}) + modified, result = plugin.process_nested("") + assert modified is False + assert result == "" + + def test_unicode_emojis(self): + plugin = SearchReplacePluginRust({"words": [{"search": "bad", "replace": "good"}]}) + modified, result = plugin.process_nested("This is bad 😀 very bad 🎉") + assert modified is True + assert result == "This is good 😀 very good 🎉" + + def test_dict_with_none_values(self): + plugin = SearchReplacePluginRust({"words": [{"search": "bad", "replace": "good"}]}) + modified, result = plugin.process_nested({"key1": "bad", "key2": None}) + assert modified is True + assert result["key1"] == "good" + assert result["key2"] is None + + def test_list_with_mixed_types(self): + plugin = SearchReplacePluginRust({"words": [{"search": "bad", "replace": "good"}]}) + modified, result = plugin.process_nested(["bad", 123, None, {"nested": "bad"}]) + assert modified is True + assert result == ["good", 123, None, {"nested": "good"}] + + def test_character_class(self): + plugin = SearchReplacePluginRust({"words": [{"search": r"[0-9]+", "replace": "NUM"}]}) + assert plugin.apply_patterns("I have 123 apples and 456 oranges") == "I have NUM apples and NUM oranges" + + def test_word_boundary_pattern(self): + plugin = SearchReplacePluginRust({"words": [{"search": r"\bcat\b", "replace": "dog"}]}) + assert plugin.apply_patterns("The cat and the caterpillar") == "The dog and the caterpillar" + + def test_case_insensitive_pattern(self): + plugin = SearchReplacePluginRust({"words": [{"search": r"(?i)test", "replace": "EXAM"}]}) + assert plugin.apply_patterns("Test TEST test TeSt") == "EXAM EXAM EXAM EXAM" + + def test_email_redaction(self): + plugin = SearchReplacePluginRust( + { + "words": [ + { + "search": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", + "replace": "[EMAIL]", + } + ] + } + ) + assert ( + plugin.apply_patterns("Contact me at john.doe@example.com or jane@test.org") + == "Contact me at [EMAIL] or [EMAIL]" + ) + + def test_credit_card_redaction(self): + plugin = SearchReplacePluginRust( + {"words": [{"search": r"\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b", "replace": "[CARD]"}]} + ) + assert plugin.apply_patterns("Card: 1234-5678-9012-3456 or 1234567890123456") == "Card: [CARD] or [CARD]" + + def test_ipv4_address_redaction(self): + plugin = SearchReplacePluginRust( + {"words": [{"search": r"\b(?:\d{1,3}\.){3}\d{1,3}\b", "replace": "[IP]"}]} + ) + assert plugin.apply_patterns("Server at 192.168.1.1 and 10.0.0.1") == "Server at [IP] and [IP]" + + def test_url_redaction(self): + plugin = SearchReplacePluginRust({"words": [{"search": r"https?://[^\s]+", "replace": "[URL]"}]}) + assert plugin.apply_patterns("Visit https://example.com or http://test.org/path") == "Visit [URL] or [URL]" + + def test_empty_config_no_words(self): + plugin = SearchReplacePluginRust({"words": []}) + assert plugin.apply_patterns("clean") == "clean" + + def test_invalid_regex_detected(self): + with pytest.raises(ValueError, match="Invalid regex patterns detected"): + SearchReplacePluginRust({"words": [{"search": "[invalid(", "replace": "test"}]}) + + def test_missing_search_field(self): + with pytest.raises(ValueError, match="Missing 'search' field"): + SearchReplacePluginRust({"words": [{"replace": "test"}]}) + + def test_missing_replace_field(self): + with pytest.raises(ValueError, match="Missing 'replace' field"): + SearchReplacePluginRust({"words": [{"search": "test"}]}) + + def test_words_must_be_a_list(self): + with pytest.raises(ValueError, match="'words' must be a list"): + SearchReplacePluginRust({"words": {"search": "bad", "replace": "good"}}) + + def test_text_limit_rejects_oversized_payload(self): + plugin = SearchReplacePluginRust( + {"words": [{"search": "bad", "replace": "good"}], "max_text_bytes": 2} + ) + with pytest.raises(ValueError, match="Text exceeds max_text_bytes"): + plugin.process_nested("bad") + + def test_apply_patterns_enforces_text_limit(self): + plugin = SearchReplacePluginRust( + {"words": [{"search": "bad", "replace": "good"}], "max_text_bytes": 2} + ) + with pytest.raises(ValueError, match="Text exceeds max_text_bytes"): + plugin.apply_patterns("bad") + + def test_output_limit_rejects_expanding_replacement(self): + plugin = SearchReplacePluginRust( + { + "words": [{"search": "a", "replace": "bbbb"}], + "max_text_bytes": 16, + "max_output_bytes": 3, + } + ) + with pytest.raises(ValueError, match="Output exceeds max_output_bytes"): + plugin.apply_patterns("a") + + def test_output_limit_stops_before_huge_replacement_finishes(self): + plugin = SearchReplacePluginRust( + { + "words": [{"search": "a", "replace": "bbbb"}], + "max_text_bytes": 16, + "max_output_bytes": 12, + } + ) + with pytest.raises(ValueError, match="Output exceeds max_output_bytes"): + plugin.apply_patterns("aaaa") + + def test_output_limit_bounds_capture_expansion(self): + plugin = SearchReplacePluginRust( + { + "words": [{"search": r"(a+)", "replace": "$1$1$1"}], + "max_text_bytes": 16, + "max_output_bytes": 5, + } + ) + with pytest.raises(ValueError, match="Output exceeds max_output_bytes"): + plugin.apply_patterns("aa") + + def test_pattern_limit_rejects_oversized_config(self): + with pytest.raises(ValueError, match="'words' contains 2 patterns, maximum is 1"): + SearchReplacePluginRust( + { + "words": [ + {"search": "bad", "replace": "good"}, + {"search": "secret", "replace": "safe"}, + ], + "max_patterns": 1, + } + ) + + def test_replacement_limit_rejects_oversized_config(self): + with pytest.raises(ValueError, match="replacement exceeds max_replace_bytes"): + SearchReplacePluginRust( + { + "words": [{"search": "bad", "replace": "good"}], + "max_replace_bytes": 2, + } + ) + + def test_search_limit_rejects_oversized_config(self): + with pytest.raises(ValueError, match="search exceeds max_search_bytes"): + SearchReplacePluginRust( + { + "words": [{"search": "bad", "replace": "good"}], + "max_search_bytes": 2, + } + ) + + def test_collection_limit_rejects_oversized_payload(self): + plugin = SearchReplacePluginRust( + {"words": [{"search": "bad", "replace": "good"}], "max_collection_items": 1} + ) + with pytest.raises(ValueError, match="Collection exceeds max_collection_items"): + plugin.process_nested(["bad", "bad"]) + + def test_total_item_limit_rejects_oversized_traversal(self): + plugin = SearchReplacePluginRust( + {"words": [{"search": "bad", "replace": "good"}], "max_total_items": 2} + ) + with pytest.raises(ValueError, match="Traversal exceeds max_total_items"): + plugin.process_nested([["bad"], ["bad"]]) + + def test_nested_output_limit_is_aggregate(self): + plugin = SearchReplacePluginRust( + { + "words": [{"search": "a", "replace": "bbb"}], + "max_text_bytes": 16, + "max_output_bytes": 5, + } + ) + with pytest.raises(ValueError, match="Output exceeds max_output_bytes"): + plugin.process_nested(["a", "a"]) + + def test_nested_output_limit_counts_unchanged_strings(self): + plugin = SearchReplacePluginRust( + { + "words": [{"search": "missing", "replace": "found"}], + "max_text_bytes": 16, + "max_output_bytes": 5, + } + ) + with pytest.raises(ValueError, match="Output exceeds max_output_bytes"): + plugin.process_nested(["aaa", "aaa"]) + + def test_nested_input_limit_is_aggregate(self): + plugin = SearchReplacePluginRust( + { + "words": [{"search": "missing", "replace": "found"}], + "max_text_bytes": 16, + "max_total_text_bytes": 5, + } + ) + with pytest.raises(ValueError, match="Input exceeds max_total_text_bytes"): + plugin.process_nested(["aaa", "aaa"]) + + def test_custom_objects_are_left_unchanged(self): + class CustomValue: + text = "bad" + + plugin = SearchReplacePluginRust({"words": [{"search": "bad", "replace": "good"}]}) + value = CustomValue() + modified, result = plugin.process_nested(value) + assert modified is False + assert result is value + + def test_depth_limit_boundary(self): + plugin = SearchReplacePluginRust( + {"words": [{"search": "bad", "replace": "good"}], "max_nested_depth": 2} + ) + modified, result = plugin.process_nested(["bad"]) + assert modified is True + assert result == ["good"] + with pytest.raises(ValueError, match="Maximum nested depth"): + plugin.process_nested([["bad"]]) + + def test_set_result(self): + plugin = SearchReplacePluginRust({"words": [{"search": "bad", "replace": "good"}]}) + modified, new_result = plugin.process_nested({"bad", "fine"}) + assert modified is True + assert new_result == {"good", "fine"} + + +class TestPluginHooks: + @pytest.fixture + def plugin(self): + return SearchReplacePlugin(_make_config()) + + class ModelCopyPayload: + def __init__(self, **attrs): + self.__dict__.update(attrs) + + def model_copy(self, *, update=None): + attrs = dict(self.__dict__) + if update: + attrs.update(update) + return TestPluginHooks.ModelCopyPayload(**attrs) + + async def test_prompt_pre_fetch_simple_replacement(self, plugin): + payload = PromptPrehookPayload(prompt_id="prompt-1", args={"message": "This is bad"}) + result = await plugin.prompt_pre_fetch(payload, _make_context()) + assert result.modified_payload is not None + assert result.modified_payload.args["message"] == "This is good" + + async def test_prompt_pre_fetch_no_change_returns_default_result(self, plugin): + payload = PromptPrehookPayload(prompt_id="prompt-1", args={"message": "This is fine"}) + result = await plugin.prompt_pre_fetch(payload, _make_context()) + assert result.modified_payload is None + assert result.continue_processing is True + + async def test_prompt_post_fetch_message_replacement(self, plugin): + original_content = TextContent(text="This is bad") + original_message = Message(role="assistant", content=original_content) + payload = PromptPosthookPayload( + result=PromptResult(messages=[original_message]) + ) + result = await plugin.prompt_post_fetch(payload, _make_context()) + assert result.modified_payload is not None + assert result.modified_payload.result.messages[0].content.text == "This is good" + assert payload.result.messages[0].content.text == "This is bad" + assert result.modified_payload is not payload + assert result.modified_payload.result is not payload.result + assert result.modified_payload.result.messages[0] is not original_message + assert result.modified_payload.result.messages[0].content is not original_content + + async def test_prompt_post_fetch_uses_model_copy_path(self, plugin): + original_content = self.ModelCopyPayload(text="This is bad") + original_message = self.ModelCopyPayload(role="assistant", content=original_content) + original_result = self.ModelCopyPayload(messages=[original_message]) + payload = self.ModelCopyPayload(result=original_result) + + result = await plugin.prompt_post_fetch(payload, _make_context()) + + assert result.modified_payload is not None + assert result.modified_payload.result.messages[0].content.text == "This is good" + assert payload.result.messages[0].content.text == "This is bad" + assert result.modified_payload is not payload + assert result.modified_payload.result is not original_result + assert result.modified_payload.result.messages[0] is not original_message + assert result.modified_payload.result.messages[0].content is not original_content + + async def test_prompt_post_fetch_error_does_not_partially_mutate(self): + plugin = SearchReplacePlugin( + PluginConfig( + name="regex_filter", + kind="cpex_regex_filter.regex_filter.SearchReplacePlugin", + version="0.1.0", + hooks=["prompt_post_fetch"], + config={ + "words": [{"search": "bad", "replace": "good"}], + "max_text_bytes": 4, + }, + ) + ) + payload = PromptPosthookPayload( + result=PromptResult( + messages=[ + Message(role="assistant", content=TextContent(text="bad")), + Message(role="assistant", content=TextContent(text="too long")), + ] + ) + ) + with pytest.raises(ValueError, match="Text exceeds max_text_bytes"): + await plugin.prompt_post_fetch(payload, _make_context()) + assert payload.result.messages[0].content.text == "bad" + assert payload.result.messages[1].content.text == "too long" + + async def test_prompt_post_fetch_model_copy_error_does_not_partially_mutate(self): + plugin = SearchReplacePlugin( + PluginConfig( + name="regex_filter", + kind="cpex_regex_filter.regex_filter.SearchReplacePlugin", + version="0.1.0", + hooks=["prompt_post_fetch"], + config={ + "words": [{"search": "bad", "replace": "good"}], + "max_text_bytes": 4, + }, + ) + ) + payload = self.ModelCopyPayload( + result=self.ModelCopyPayload( + messages=[ + self.ModelCopyPayload( + role="assistant", content=self.ModelCopyPayload(text="bad") + ), + self.ModelCopyPayload( + role="assistant", content=self.ModelCopyPayload(text="too long") + ), + ] + ) + ) + + with pytest.raises(ValueError, match="Text exceeds max_text_bytes"): + await plugin.prompt_post_fetch(payload, _make_context()) + assert payload.result.messages[0].content.text == "bad" + assert payload.result.messages[1].content.text == "too long" + + async def test_prompt_post_fetch_enforces_message_count_limit(self): + plugin = SearchReplacePlugin( + PluginConfig( + name="regex_filter", + kind="cpex_regex_filter.regex_filter.SearchReplacePlugin", + version="0.1.0", + hooks=["prompt_post_fetch"], + config={ + "words": [{"search": "bad", "replace": "good"}], + "max_collection_items": 1, + }, + ) + ) + payload = PromptPosthookPayload( + result=PromptResult( + messages=[ + Message(role="assistant", content=TextContent(text="bad")), + Message(role="assistant", content=TextContent(text="bad")), + ] + ) + ) + with pytest.raises(ValueError, match="Collection exceeds max_collection_items"): + await plugin.prompt_post_fetch(payload, _make_context()) + + async def test_prompt_post_fetch_enforces_total_item_limit(self): + plugin = SearchReplacePlugin( + PluginConfig( + name="regex_filter", + kind="cpex_regex_filter.regex_filter.SearchReplacePlugin", + version="0.1.0", + hooks=["prompt_post_fetch"], + config={ + "words": [{"search": "bad", "replace": "good"}], + "max_total_items": 1, + }, + ) + ) + payload = PromptPosthookPayload( + result=PromptResult( + messages=[ + Message(role="assistant", content=TextContent(text="bad")), + Message(role="assistant", content=TextContent(text="bad")), + ] + ) + ) + with pytest.raises(ValueError, match="Traversal exceeds max_total_items"): + await plugin.prompt_post_fetch(payload, _make_context()) + + async def test_prompt_post_fetch_enforces_aggregate_input_limit(self): + plugin = SearchReplacePlugin( + PluginConfig( + name="regex_filter", + kind="cpex_regex_filter.regex_filter.SearchReplacePlugin", + version="0.1.0", + hooks=["prompt_post_fetch"], + config={ + "words": [{"search": "missing", "replace": "found"}], + "max_text_bytes": 16, + "max_total_text_bytes": 5, + }, + ) + ) + payload = PromptPosthookPayload( + result=PromptResult( + messages=[ + Message(role="assistant", content=TextContent(text="aaa")), + Message(role="assistant", content=TextContent(text="aaa")), + ] + ) + ) + with pytest.raises(ValueError, match="Input exceeds max_total_text_bytes"): + await plugin.prompt_post_fetch(payload, _make_context()) + + async def test_prompt_post_fetch_enforces_aggregate_output_limit(self): + plugin = SearchReplacePlugin( + PluginConfig( + name="regex_filter", + kind="cpex_regex_filter.regex_filter.SearchReplacePlugin", + version="0.1.0", + hooks=["prompt_post_fetch"], + config={ + "words": [{"search": "bad", "replace": "good"}], + "max_output_bytes": 7, + }, + ) + ) + payload = PromptPosthookPayload( + result=PromptResult( + messages=[ + Message(role="assistant", content=TextContent(text="bad")), + Message(role="assistant", content=TextContent(text="bad")), + ] + ) + ) + with pytest.raises(ValueError, match="Output exceeds max_output_bytes"): + await plugin.prompt_post_fetch(payload, _make_context()) + + async def test_prompt_post_fetch_output_limit_counts_unchanged_messages(self): + plugin = SearchReplacePlugin( + PluginConfig( + name="regex_filter", + kind="cpex_regex_filter.regex_filter.SearchReplacePlugin", + version="0.1.0", + hooks=["prompt_post_fetch"], + config={ + "words": [{"search": "missing", "replace": "found"}], + "max_output_bytes": 5, + }, + ) + ) + payload = PromptPosthookPayload( + result=PromptResult( + messages=[ + Message(role="assistant", content=TextContent(text="aaa")), + Message(role="assistant", content=TextContent(text="aaa")), + ] + ) + ) + with pytest.raises(ValueError, match="Output exceeds max_output_bytes"): + await plugin.prompt_post_fetch(payload, _make_context()) + + async def test_prompt_post_fetch_no_change_returns_default_result(self, plugin): + payload = PromptPosthookPayload( + result=PromptResult( + messages=[Message(role="assistant", content=TextContent(text="This is fine"))] + ) + ) + result = await plugin.prompt_post_fetch(payload, _make_context()) + assert result.modified_payload is None + assert result.continue_processing is True + + async def test_prompt_post_fetch_ignores_messages_without_text(self, plugin): + class BadContent: + pass + + class BadMessage: + role = "assistant" + content = BadContent() + + payload = PromptPosthookPayload(result=PromptResult(messages=[BadMessage()])) + result = await plugin.prompt_post_fetch(payload, _make_context()) + assert result.modified_payload is None + + async def test_prompt_post_fetch_non_list_messages_returns_default_result(self, plugin): + class BadResult: + messages = "not-a-list" + + payload = PromptPosthookPayload(result=BadResult()) + result = await plugin.prompt_post_fetch(payload, _make_context()) + assert result.modified_payload is None + + async def test_tool_pre_invoke_nested_dict(self, plugin): + payload = ToolPreInvokePayload(name="search", args={"outer": {"inner": "bad"}}) + result = await plugin.tool_pre_invoke(payload, _make_context()) + assert result.modified_payload is not None + assert result.modified_payload.args["outer"]["inner"] == "good" + + async def test_tool_post_invoke_list_result(self, plugin): + payload = ToolPostInvokePayload(name="search", result=["bad", "still bad"]) + result = await plugin.tool_post_invoke(payload, _make_context()) + assert result.modified_payload is not None + assert result.modified_payload.result == ["good", "still good"] + + async def test_none_args_are_left_untouched(self, plugin): + payload = PromptPrehookPayload(prompt_id="prompt-1", args=None) + result = await plugin.prompt_pre_fetch(payload, _make_context()) + assert result.modified_payload is None diff --git a/plugins/rust/python-package/regex_filter/uv.lock b/plugins/rust/python-package/regex_filter/uv.lock new file mode 100644 index 0000000..7efd8bf --- /dev/null +++ b/plugins/rust/python-package/regex_filter/uv.lock @@ -0,0 +1,127 @@ +version = 1 +revision = 3 +requires-python = ">=3.11" + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "cpex-regex-filter" +source = { editable = "." } + +[package.dev-dependencies] +dev = [ + { name = "maturin" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, +] + +[package.metadata] + +[package.metadata.requires-dev] +dev = [ + { name = "maturin", specifier = ">=1.12.6" }, + { name = "pytest", specifier = ">=8.0" }, + { name = "pytest-asyncio", specifier = ">=0.23" }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + +[[package]] +name = "maturin" +version = "1.12.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0c/18/8b2eebd3ea086a5ec73d7081f95ec64918ceda1900075902fc296ea3ad55/maturin-1.12.6.tar.gz", hash = "sha256:d37be3a811a7f2ee28a0fa0964187efa50e90f21da0c6135c27787fa0b6a89db", size = 269165, upload-time = "2026-03-01T14:54:04.21Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/71/8b/9ddfde8a485489e3ebdc50ee3042ef1c854f00dfea776b951068f6ffe451/maturin-1.12.6-py3-none-linux_armv6l.whl", hash = "sha256:6892b4176992fcc143f9d1c1c874a816e9a041248eef46433db87b0f0aff4278", size = 9789847, upload-time = "2026-03-01T14:54:09.172Z" }, + { url = "https://files.pythonhosted.org/packages/ef/e8/5f7fd3763f214a77ac0388dbcc71cc30aec5490016bd0c8e6bd729fc7b0a/maturin-1.12.6-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:c0c742beeeef7fb93b6a81bd53e75507887e396fd1003c45117658d063812dad", size = 19023833, upload-time = "2026-03-01T14:53:46.743Z" }, + { url = "https://files.pythonhosted.org/packages/e0/7f/706ff3839c8b2046436d4c2bc97596c558728264d18abc298a1ad862a4be/maturin-1.12.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:2cb41139295eed6411d3cdafc7430738094c2721f34b7eeb44f33cac516115dc", size = 9821620, upload-time = "2026-03-01T14:54:12.04Z" }, + { url = "https://files.pythonhosted.org/packages/0e/9c/70917fb123c8dd6b595e913616c9c72d730cbf4a2b6cac8077dc02a12586/maturin-1.12.6-py3-none-manylinux_2_12_i686.manylinux2010_i686.musllinux_1_1_i686.whl", hash = "sha256:351f3af1488a7cbdcff3b6d8482c17164273ac981378a13a4a9937a49aec7d71", size = 9849107, upload-time = "2026-03-01T14:53:48.971Z" }, + { url = "https://files.pythonhosted.org/packages/59/ea/f1d6ad95c0a12fbe761a7c28a57540341f188564dbe8ad730a4d1788cd32/maturin-1.12.6-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64.whl", hash = "sha256:6dbddfe4dc7ddee60bbac854870bd7cfec660acb54d015d24597d59a1c828f61", size = 10242855, upload-time = "2026-03-01T14:53:44.605Z" }, + { url = "https://files.pythonhosted.org/packages/93/1b/2419843a4f1d2fb4747f3dc3d9c4a2881cd97a3274dd94738fcdf0835e79/maturin-1.12.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:8fdb0f63e77ee3df0f027a120e9af78dbc31edf0eb0f263d55783c250c33b728", size = 9674972, upload-time = "2026-03-01T14:53:52.763Z" }, + { url = "https://files.pythonhosted.org/packages/71/46/b60ab2fc996d904b40e55bd475599dcdccd8f7ad3e649bf95e87970df466/maturin-1.12.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.musllinux_1_1_armv7l.whl", hash = "sha256:fa84b7493a2e80759cacc2e668fa5b444d55b9994e90707c42904f55d6322c1e", size = 9645755, upload-time = "2026-03-01T14:53:58.497Z" }, + { url = "https://files.pythonhosted.org/packages/a4/96/03f2b55a8c226805115232fc23c4a4f33f0c9d39e11efab8166dc440f80d/maturin-1.12.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.musllinux_1_1_ppc64le.whl", hash = "sha256:e90dc12bc6a38e9495692a36c9e231c4d7e0c9bfde60719468ab7d8673db3c45", size = 12737612, upload-time = "2026-03-01T14:54:05.393Z" }, + { url = "https://files.pythonhosted.org/packages/2b/c2/648667022c5b53cdccefa67c245e8a984970f3045820f00c2e23bdb2aff4/maturin-1.12.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:06fc8d089f98623ce924c669b70911dfed30f9a29956c362945f727f9abc546b", size = 10455028, upload-time = "2026-03-01T14:54:07.349Z" }, + { url = "https://files.pythonhosted.org/packages/63/d6/5b5efe3ca0c043357ed3f8d2b2d556169fdbf1ff75e50e8e597708a359d2/maturin-1.12.6-py3-none-manylinux_2_31_riscv64.musllinux_1_1_riscv64.whl", hash = "sha256:75133e56274d43b9227fd49dca9a86e32f1fd56a7b55544910c4ce978c2bb5aa", size = 10014531, upload-time = "2026-03-01T14:53:54.548Z" }, + { url = "https://files.pythonhosted.org/packages/68/d5/39c594c27b1a8b32a0cb95fff9ad60b888c4352d1d1c389ac1bd20dc1e16/maturin-1.12.6-py3-none-win32.whl", hash = "sha256:3f32e0a3720b81423c9d35c14e728cb1f954678124749776dc72d533ea1115e8", size = 8553012, upload-time = "2026-03-01T14:53:50.706Z" }, + { url = "https://files.pythonhosted.org/packages/94/66/b262832a91747e04051e21f986bd01a8af81fbffafacc7d66a11e79aab5f/maturin-1.12.6-py3-none-win_amd64.whl", hash = "sha256:977290159d252db946054a0555263c59b3d0c7957135c69e690f4b1558ee9983", size = 9890470, upload-time = "2026-03-01T14:53:56.659Z" }, + { url = "https://files.pythonhosted.org/packages/e3/47/76b8ca470ddc8d7d36aa8c15f5a6aed1841806bb93a0f4ead8ee61e9a088/maturin-1.12.6-py3-none-win_arm64.whl", hash = "sha256:bae91976cdc8148038e13c881e1e844e5c63e58e026e8b9945aa2d19b3b4ae89", size = 8606158, upload-time = "2026-03-01T14:54:02.423Z" }, +] + +[[package]] +name = "packaging" +version = "26.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + +[[package]] +name = "pygments" +version = "2.20.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" }, +] + +[[package]] +name = "pytest" +version = "9.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, +] + +[[package]] +name = "pytest-asyncio" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, +] diff --git a/tests/test_plugin_catalog.py b/tests/test_plugin_catalog.py index 496993d..7b13c15 100644 --- a/tests/test_plugin_catalog.py +++ b/tests/test_plugin_catalog.py @@ -1,7 +1,9 @@ import json import os import ast +import importlib.util import subprocess +import sys import tempfile import textwrap import tomllib @@ -12,6 +14,12 @@ REPO_ROOT = Path(__file__).resolve().parents[1] SCRIPT = REPO_ROOT / "tools" / "plugin_catalog.py" +PLUGIN_CATALOG_SPEC = importlib.util.spec_from_file_location("plugin_catalog", SCRIPT) +assert PLUGIN_CATALOG_SPEC is not None +plugin_catalog = importlib.util.module_from_spec(PLUGIN_CATALOG_SPEC) +assert PLUGIN_CATALOG_SPEC.loader is not None +sys.modules[PLUGIN_CATALOG_SPEC.name] = plugin_catalog +PLUGIN_CATALOG_SPEC.loader.exec_module(plugin_catalog) def run_catalog(*args: str, cwd: Path | None = None) -> subprocess.CompletedProcess[str]: @@ -150,6 +158,18 @@ def _create_plugin(self, root: Path, slug: str) -> Path: ) return plugin_dir + def _plugin_record(self, slug: str) -> object: + return plugin_catalog.PluginRecord( + slug=slug, + path=f"plugins/rust/python-package/{slug}", + package_name=f"cpex-{slug.replace('_', '-')}", + cargo_package_name=slug, + module_name=f"cpex_{slug}", + kind=f"cpex_{slug}.{slug}:DemoPlugin", + version="0.0.1", + release_wheel_matrix=[], + ) + def _parse_manifest_defaults(self, manifest_path: Path) -> dict[str, object]: defaults: dict[str, object] = {} in_defaults = False @@ -173,6 +193,84 @@ def _parse_manifest_defaults(self, manifest_path: Path) -> dict[str, object]: defaults[key] = int(value) return defaults + def test_workspace_dependency_policy_fails_closed_for_known_plugin_subset(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + (root / "Cargo.toml").write_text( + "[workspace]\n" + 'members = ["plugins/rust/python-package/rate_limiter"]\n' + "[workspace.dependencies]\n" + ) + with self.assertRaisesRegex( + plugin_catalog.CatalogError, + "Workspace dependency policy must list every managed plugin", + ): + plugin_catalog._validate_workspace_dependency_ownership( + root, [self._plugin_record("rate_limiter")] + ) + + def test_workspace_dependency_policy_requires_workspace_dependencies_for_known_plugin(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + members = [ + f"plugins/rust/python-package/{slug}" + for slug in sorted(plugin_catalog.REQUIRED_PLUGIN_WORKSPACE_DEPENDENCIES) + ] + (root / "Cargo.toml").write_text( + "[workspace]\n" + f"members = {members!r}\n" + ) + with self.assertRaisesRegex( + plugin_catalog.CatalogError, + r"Workspace Cargo.toml must define \[workspace.dependencies\]", + ): + plugin_catalog._validate_workspace_dependency_ownership( + root, + [ + self._plugin_record(slug) + for slug in plugin_catalog.REQUIRED_PLUGIN_WORKSPACE_DEPENDENCIES + ], + ) + + def test_workspace_dependency_policy_rejects_direct_managed_dependency(self) -> None: + original_required = plugin_catalog.REQUIRED_WORKSPACE_DEPENDENCIES + original_plugin_required = plugin_catalog.REQUIRED_PLUGIN_WORKSPACE_DEPENDENCIES + plugin_catalog.REQUIRED_WORKSPACE_DEPENDENCIES = { + "regex": "1.12", + "serde_json": "1.0", + } + plugin_catalog.REQUIRED_PLUGIN_WORKSPACE_DEPENDENCIES = { + "demo_plugin": {"dependencies": ("regex",)} + } + try: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + plugin_dir = root / "plugins" / "rust" / "python-package" / "demo_plugin" + plugin_dir.mkdir(parents=True) + (root / "Cargo.toml").write_text( + '[workspace]\n' + 'members = ["plugins/rust/python-package/demo_plugin"]\n' + '[workspace.dependencies]\n' + 'regex = "1.12"\n' + 'serde_json = "1.0"\n' + ) + (plugin_dir / "Cargo.toml").write_text( + "[dependencies]\n" + "regex = { workspace = true }\n" + 'serde_json = "1.0"\n' + ) + + with self.assertRaisesRegex( + plugin_catalog.CatalogError, + "unexpected workspace dependencies", + ): + plugin_catalog._validate_workspace_dependency_ownership( + root, [self._plugin_record("demo_plugin")] + ) + finally: + plugin_catalog.REQUIRED_WORKSPACE_DEPENDENCIES = original_required + plugin_catalog.REQUIRED_PLUGIN_WORKSPACE_DEPENDENCIES = original_plugin_required + def _extract_pii_runtime_defaults(self) -> dict[str, object]: config_text = ( REPO_ROOT @@ -318,6 +416,19 @@ def test_repo_centralizes_shared_cargo_dependencies(self) -> None: "criterion": {"workspace": True}, }, }, + "regex_filter": { + "dependencies": { + "cpex_framework_bridge": {"workspace": True}, + "log": {"workspace": True}, + "pyo3": {"workspace": True}, + "pyo3-log": {"workspace": True}, + "pyo3-stub-gen": {"workspace": True}, + "regex": {"workspace": True}, + }, + "dev-dependencies": { + "criterion": {"workspace": True}, + }, + }, "retry_with_backoff": { "dependencies": { "log": {"workspace": True}, @@ -369,6 +480,17 @@ def test_repo_centralizes_shared_cargo_dependencies(self) -> None: for section_name, expected_deps in expected_sections.items(): actual_section = plugin_cargo.get(section_name, {}) self.assertIsInstance(actual_section, dict) + self.assertEqual( + { + dependency_name + for dependency_name, value in actual_section.items() + if dependency_name in workspace_deps + and isinstance(value, dict) + and value.get("workspace") is True + }, + set(expected_deps), + f"{slug} should not add untracked workspace dependencies in {section_name}", + ) for dependency_name, expected_value in expected_deps.items(): self.assertEqual( actual_section.get(dependency_name), @@ -387,6 +509,7 @@ def test_repo_lists_all_managed_plugins(self) -> None: "encoded_exfil_detection", "pii_filter", "rate_limiter", + "regex_filter", "retry_with_backoff", "secrets_detection", "url_reputation", @@ -399,6 +522,7 @@ def test_repo_lists_all_managed_plugins(self) -> None: "encoded_exfil_detection": "cpex_encoded_exfil_detection", "pii_filter": "cpex_pii_filter", "rate_limiter": "cpex_rate_limiter", + "regex_filter": "cpex_regex_filter", "retry_with_backoff": "cpex_retry_with_backoff", "secrets_detection": "cpex_secrets_detection", "url_reputation": "cpex_url_reputation", @@ -410,6 +534,7 @@ def test_repo_lists_all_managed_plugins(self) -> None: "encoded_exfil_detection": "cpex_encoded_exfil_detection.encoded_exfil_detection.EncodedExfilDetectorPlugin", "pii_filter": "cpex_pii_filter.pii_filter.PIIFilterPlugin", "rate_limiter": "cpex_rate_limiter.rate_limiter.RateLimiterPlugin", + "regex_filter": "cpex_regex_filter.regex_filter.SearchReplacePlugin", "retry_with_backoff": "cpex_retry_with_backoff.retry_with_backoff.RetryWithBackoffPlugin", "secrets_detection": "cpex_secrets_detection.secrets_detection.SecretsDetectionPlugin", "url_reputation": "cpex_url_reputation.url_reputation.URLReputationPlugin", @@ -1832,6 +1957,7 @@ def test_ci_selection_field_prints_json_and_bool_scalars(self) -> None: "encoded_exfil_detection", "pii_filter", "rate_limiter", + "regex_filter", "retry_with_backoff", "secrets_detection", "url_reputation", @@ -1844,7 +1970,7 @@ def test_ci_selection_field_prints_json_and_bool_scalars(self) -> None: result = run_catalog("ci-selection-field", str(REPO_ROOT), "all", "", "", "plugin_count") self.assertEqual(result.returncode, 0, result.stderr) - self.assertEqual(result.stdout.strip(), "6") + self.assertEqual(result.stdout.strip(), "7") result = run_catalog("ci-selection-field", str(REPO_ROOT), "all", "", "", "cargo_packages") self.assertEqual(result.returncode, 0, result.stderr) @@ -1854,6 +1980,7 @@ def test_ci_selection_field_prints_json_and_bool_scalars(self) -> None: "encoded_exfil_detection", "pii_filter", "rate_limiter", + "regex_filter", "retry_with_backoff", "secrets_detection", "url_reputation", @@ -2026,6 +2153,7 @@ def test_plugin_makefiles_expose_ci_targets(self) -> None: "encoded_exfil_detection", "pii_filter", "rate_limiter", + "regex_filter", "retry_with_backoff", "secrets_detection", "url_reputation", @@ -2243,11 +2371,12 @@ def test_ci_workflow_dispatch_detect_step_selects_all_plugins(self) -> None: if "=" in line ) self.assertEqual(outputs["has_plugins"], "true") - self.assertEqual(outputs["plugin_count"], "6") + self.assertEqual(outputs["plugin_count"], "7") expected_plugins = [ "encoded_exfil_detection", "pii_filter", "rate_limiter", + "regex_filter", "retry_with_backoff", "secrets_detection", "url_reputation", @@ -2532,8 +2661,18 @@ def test_release_workflow_tests_artifacts_outside_source_tree(self) -> None: self.assertIn("publish_enabled:", workflow) self.assertIn('default: false', workflow) self.assertIn('git fetch --force origin "refs/heads/main:refs/remotes/origin/main"', workflow) - self.assertIn('if git merge-base --is-ancestor "${tag_ref}" "refs/remotes/origin/main"; then', workflow) + self.assertIn('tag_commit="$(git rev-list -n 1 "${tag_ref}")"', workflow) + self.assertIn('if git merge-base --is-ancestor "${tag_commit}" "refs/remotes/origin/main"; then', workflow) + self.assertIn('git checkout --detach "${checkout_ref}"', workflow) self.assertIn("tag_on_main: ${{ steps.resolve.outputs.tag_on_main }}", workflow) + self.assertLess( + workflow.index('git checkout --detach "${checkout_ref}"'), + workflow.index("python3 tools/plugin_catalog.py validate ."), + ) + self.assertLess( + workflow.index("python3 tools/plugin_catalog.py validate ."), + workflow.index('release_info="$(python3 tools/plugin_catalog.py release-info . "${tag}")'), + ) self.assertIn( 'wheel_matrix="$(python3 -c \'import json; print(json.dumps([{', workflow, @@ -2555,9 +2694,12 @@ def test_release_workflow_tests_artifacts_outside_source_tree(self) -> None: self.assertIn("runs-on: ${{ matrix.runner }}", workflow) self.assertIn("name: wheel-${{ matrix.platform }}", workflow) self.assertIn( - "if: ${{ (github.event_name != 'workflow_call' || inputs.publish_enabled) && (needs.resolve.outputs.publish_env != 'pypi' || needs.resolve.outputs.tag_on_main == 'true') }}", + "if: ${{ github.event_name != 'workflow_call' || inputs.publish_enabled }}", workflow, ) + self.assertIn("Verify PyPI tag is on main", workflow) + self.assertIn("needs.resolve.outputs.tag_on_main != 'true'", workflow) + self.assertIn("needs.resolve.outputs.tag_on_main == 'true'", workflow) self.assertNotIn("matrix.", preflight_section) self.assertIn( "matrix.runner != 'ubuntu-24.04-s390x' && matrix.runner != 'ubuntu-24.04-ppc64le'", diff --git a/tools/plugin_catalog.py b/tools/plugin_catalog.py index f2f5257..ca26271 100644 --- a/tools/plugin_catalog.py +++ b/tools/plugin_catalog.py @@ -89,6 +89,17 @@ ), "dev-dependencies": ("criterion",), }, + "regex_filter": { + "dependencies": ( + "cpex_framework_bridge", + "log", + "pyo3", + "pyo3-log", + "pyo3-stub-gen", + "regex", + ), + "dev-dependencies": ("criterion",), + }, "retry_with_backoff": { "dependencies": ("log", "pyo3", "pyo3-log", "pyo3-stub-gen", "rand"), "dev-dependencies": (), @@ -315,16 +326,27 @@ def _validate_workspace_dependency_ownership( root: Path, plugins: list[PluginRecord] ) -> None: plugin_records = {plugin.slug: plugin for plugin in plugins} - if set(plugin_records) != set(REQUIRED_PLUGIN_WORKSPACE_DEPENDENCIES): - return - cargo = _parse_cargo(root / "Cargo.toml") workspace = cargo.get("workspace", {}) if not isinstance(workspace, dict): raise CatalogError("Workspace Cargo.toml must define [workspace] metadata as a table") + discovered_slugs = set(plugin_records) + policy_slugs = set(REQUIRED_PLUGIN_WORKSPACE_DEPENDENCIES) workspace_dependencies = workspace.get("dependencies") if not isinstance(workspace_dependencies, dict): - raise CatalogError("Workspace Cargo.toml must define [workspace.dependencies]") + if discovered_slugs == policy_slugs: + raise CatalogError("Workspace Cargo.toml must define [workspace.dependencies]") + return + + if discovered_slugs != policy_slugs: + if not discovered_slugs.intersection(policy_slugs): + return + missing_policy = sorted(discovered_slugs - policy_slugs) + missing_plugins = sorted(policy_slugs - discovered_slugs) + raise CatalogError( + "Workspace dependency policy must list every managed plugin; " + f"missing policy entries: {missing_policy}; missing plugins: {missing_plugins}" + ) for dependency_name, expected_value in REQUIRED_WORKSPACE_DEPENDENCIES.items(): actual_value = workspace_dependencies.get(dependency_name) @@ -341,6 +363,16 @@ def _validate_workspace_dependency_ownership( section = plugin_cargo.get(section_name, {}) if not isinstance(section, dict): raise CatalogError(f"{cargo_path}: [{section_name}] must be a table") + unexpected_workspace_deps = sorted( + dependency_name + for dependency_name, value in section.items() + if dependency_name in REQUIRED_WORKSPACE_DEPENDENCIES + and dependency_name not in dependency_names + ) + if unexpected_workspace_deps: + raise CatalogError( + f"{cargo_path}: unexpected workspace dependencies in [{section_name}]: {unexpected_workspace_deps}" + ) for dependency_name in dependency_names: if section.get(dependency_name) != {"workspace": True}: raise CatalogError(