Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 6 additions & 14 deletions scrubber.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,8 @@
"""
Legacy PII scrubber — preserved for reference.
Active anonymization logic has moved to app/ai-service/services/pii_scrubber.py.
"""
import re

def scrub_pii(text: str) -> str:
"""Remove emails, phone numbers, and IDs from text using regex.

This is a legacy module kept for documentation purposes.
"""
text = re.sub(r'[\w\.-]+@[\w\.-]+', '[REDACTED_EMAIL]', text)
text = re.sub(r'\+?\d[\d\s\-]{7,}\d', '[REDACTED_PHONE]', text)
text = re.sub(r'\b\d{4,}\b', '[REDACTED_ID]', text)
return text
Deprecated root PII scrubber module.

The active anonymization implementation lives in
app/ai-service/services/pii_scrubber.py as PIIScrubberService.
This module intentionally no longer exposes the legacy regex-only scrub_pii
function so new code does not depend on the deprecated implementation.
"""
48 changes: 23 additions & 25 deletions tests/test_pii_scrubber.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,34 @@
import json
import difflib
from scrubber import scrub_pii
import ast
from pathlib import Path


def load_json(path):
with open(path, "r") as f:
return json.load(f)
ROOT = Path(__file__).resolve().parents[1]
ROOT_SCRUBBER = ROOT / "scrubber.py"
AI_SERVICE_SCRUBBER = ROOT / "app" / "ai-service" / "services" / "pii_scrubber.py"


inputs = load_json("tests/fixtures/pii_inputs.json")
expected = load_json("tests/fixtures/expected_outputs.json")
def _function_names(path: Path) -> set[str]:
tree = ast.parse(path.read_text(encoding="utf-8"))
return {node.name for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)}


def test_pii_scrubbing():
def _class_method_names(path: Path, class_name: str) -> set[str]:
tree = ast.parse(path.read_text(encoding="utf-8"))
for node in ast.walk(tree):
if isinstance(node, ast.ClassDef) and node.name == class_name:
return {
child.name
for child in node.body
if isinstance(child, ast.FunctionDef)
}
return set()

for inp, exp in zip(inputs, expected):

result = scrub_pii(inp["input"])
def test_root_scrubber_no_longer_exports_legacy_scrub_function():
assert "scrub_pii" not in _function_names(ROOT_SCRUBBER)

if result != exp["expected"]:

diff = "\n".join(
difflib.unified_diff(
[exp["expected"]],
[result],
fromfile="expected",
tofile="actual",
lineterm=""
)
)
def test_ai_service_pii_scrubber_is_canonical_implementation():
methods = _class_method_names(AI_SERVICE_SCRUBBER, "PIIScrubberService")

print("\nRegression Detected:")
print(diff)

assert result == exp["expected"]
assert "anonymize" in methods