From 08628610a8ccb53e832f0e0dc70d8949b8610b9b Mon Sep 17 00:00:00 2001 From: huazhuang80-star <295584745+huazhuang80-star@users.noreply.github.com> Date: Mon, 22 Jun 2026 22:35:40 +0800 Subject: [PATCH] refactor: deprecate root PII scrubber --- scrubber.py | 20 +++++----------- tests/test_pii_scrubber.py | 48 ++++++++++++++++++-------------------- 2 files changed, 29 insertions(+), 39 deletions(-) diff --git a/scrubber.py b/scrubber.py index b93b6a43..b5ac7d0b 100644 --- a/scrubber.py +++ b/scrubber.py @@ -1,16 +1,8 @@ """ -Legacy PII scrubber — preserved for reference. -Active anonymization logic has moved to app/ai-service/services/pii_scrubber.py. -""" -import re - -def scrub_pii(text: str) -> str: - """Remove emails, phone numbers, and IDs from text using regex. - - This is a legacy module kept for documentation purposes. - """ - text = re.sub(r'[\w\.-]+@[\w\.-]+', '[REDACTED_EMAIL]', text) - text = re.sub(r'\+?\d[\d\s\-]{7,}\d', '[REDACTED_PHONE]', text) - text = re.sub(r'\b\d{4,}\b', '[REDACTED_ID]', text) - return text +Deprecated root PII scrubber module. +The active anonymization implementation lives in +app/ai-service/services/pii_scrubber.py as PIIScrubberService. +This module intentionally no longer exposes the legacy regex-only scrub_pii +function so new code does not depend on the deprecated implementation. +""" diff --git a/tests/test_pii_scrubber.py b/tests/test_pii_scrubber.py index 7240af7e..f163732c 100644 --- a/tests/test_pii_scrubber.py +++ b/tests/test_pii_scrubber.py @@ -1,36 +1,34 @@ -import json -import difflib -from scrubber import scrub_pii +import ast +from pathlib import Path -def load_json(path): - with open(path, "r") as f: - return json.load(f) +ROOT = Path(__file__).resolve().parents[1] +ROOT_SCRUBBER = ROOT / "scrubber.py" +AI_SERVICE_SCRUBBER = ROOT / "app" / "ai-service" / "services" / "pii_scrubber.py" -inputs = load_json("tests/fixtures/pii_inputs.json") -expected = load_json("tests/fixtures/expected_outputs.json") +def _function_names(path: Path) -> set[str]: + tree = ast.parse(path.read_text(encoding="utf-8")) + return {node.name for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)} -def test_pii_scrubbing(): +def _class_method_names(path: Path, class_name: str) -> set[str]: + tree = ast.parse(path.read_text(encoding="utf-8")) + for node in ast.walk(tree): + if isinstance(node, ast.ClassDef) and node.name == class_name: + return { + child.name + for child in node.body + if isinstance(child, ast.FunctionDef) + } + return set() - for inp, exp in zip(inputs, expected): - result = scrub_pii(inp["input"]) +def test_root_scrubber_no_longer_exports_legacy_scrub_function(): + assert "scrub_pii" not in _function_names(ROOT_SCRUBBER) - if result != exp["expected"]: - diff = "\n".join( - difflib.unified_diff( - [exp["expected"]], - [result], - fromfile="expected", - tofile="actual", - lineterm="" - ) - ) +def test_ai_service_pii_scrubber_is_canonical_implementation(): + methods = _class_method_names(AI_SERVICE_SCRUBBER, "PIIScrubberService") - print("\nRegression Detected:") - print(diff) - - assert result == exp["expected"] + assert "anonymize" in methods