diff --git a/scrubber.py b/scrubber.py index b93b6a43..8055263f 100644 --- a/scrubber.py +++ b/scrubber.py @@ -1,16 +1,9 @@ -""" -Legacy PII scrubber — preserved for reference. -Active anonymization logic has moved to app/ai-service/services/pii_scrubber.py. -""" -import re +"""Deprecated legacy PII scrubber reference. -def scrub_pii(text: str) -> str: - """Remove emails, phone numbers, and IDs from text using regex. - - This is a legacy module kept for documentation purposes. - """ - text = re.sub(r'[\w\.-]+@[\w\.-]+', '[REDACTED_EMAIL]', text) - text = re.sub(r'\+?\d[\d\s\-]{7,}\d', '[REDACTED_PHONE]', text) - text = re.sub(r'\b\d{4,}\b', '[REDACTED_ID]', text) - return text +The active anonymization implementation is +``app/ai-service/services/pii_scrubber.py::PIIScrubberService``. +This module intentionally does not expose ``scrub_pii`` so new code and tests +use the canonical AI service implementation instead of the legacy regex-only +helper. +""" diff --git a/tests/fixtures/expected_outputs.json b/tests/fixtures/expected_outputs.json index 7a9888b8..efc5ba2c 100644 --- a/tests/fixtures/expected_outputs.json +++ b/tests/fixtures/expected_outputs.json @@ -1,15 +1,15 @@ [ { "name": "email test", - "expected": "Contact me at [REDACTED_EMAIL]" + "expected": "Contact me at [EMAIL_ADDRESS]" }, { "name": "phone test", - "expected": "Call me on [REDACTED_PHONE]" + "expected": "Call me on [PHONE_NUMBER]" }, { "name": "id test", - "expected": "My ID is [REDACTED_ID]" + "expected": "My ID is [ID_NUMBER]" }, { "name": "safe text", diff --git a/tests/fixtures/pii_inputs.json b/tests/fixtures/pii_inputs.json index b31fc830..21dcba58 100644 --- a/tests/fixtures/pii_inputs.json +++ b/tests/fixtures/pii_inputs.json @@ -9,7 +9,7 @@ }, { "name": "id test", - "input": "My ID is 12345678" + "input": "My ID is AB12345678" }, { "name": "safe text", diff --git a/tests/test_pii_scrubber.py b/tests/test_pii_scrubber.py index 7240af7e..d2246052 100644 --- a/tests/test_pii_scrubber.py +++ b/tests/test_pii_scrubber.py @@ -1,6 +1,13 @@ import json import difflib -from scrubber import scrub_pii +import sys +from pathlib import Path + +ROOT_DIR = Path(__file__).resolve().parents[1] +AI_SERVICE_DIR = ROOT_DIR / "app" / "ai-service" +sys.path.insert(0, str(AI_SERVICE_DIR)) + +from services.pii_scrubber import PIIScrubberService def load_json(path): @@ -13,10 +20,11 @@ def load_json(path): def test_pii_scrubbing(): + service = PIIScrubberService() for inp, exp in zip(inputs, expected): - result = scrub_pii(inp["input"]) + result = service.anonymize(inp["input"])["anonymized_text"] if result != exp["expected"]: