diff --git a/scrubber.py b/scrubber.py deleted file mode 100644 index b93b6a43..00000000 --- a/scrubber.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -Legacy PII scrubber — preserved for reference. -Active anonymization logic has moved to app/ai-service/services/pii_scrubber.py. -""" -import re - -def scrub_pii(text: str) -> str: - """Remove emails, phone numbers, and IDs from text using regex. - - This is a legacy module kept for documentation purposes. - """ - text = re.sub(r'[\w\.-]+@[\w\.-]+', '[REDACTED_EMAIL]', text) - text = re.sub(r'\+?\d[\d\s\-]{7,}\d', '[REDACTED_PHONE]', text) - text = re.sub(r'\b\d{4,}\b', '[REDACTED_ID]', text) - return text - diff --git a/tests/fixtures/expected_outputs.json b/tests/fixtures/expected_outputs.json index 7a9888b8..efc5ba2c 100644 --- a/tests/fixtures/expected_outputs.json +++ b/tests/fixtures/expected_outputs.json @@ -1,15 +1,15 @@ [ { "name": "email test", - "expected": "Contact me at [REDACTED_EMAIL]" + "expected": "Contact me at [EMAIL_ADDRESS]" }, { "name": "phone test", - "expected": "Call me on [REDACTED_PHONE]" + "expected": "Call me on [PHONE_NUMBER]" }, { "name": "id test", - "expected": "My ID is [REDACTED_ID]" + "expected": "My ID is [ID_NUMBER]" }, { "name": "safe text", diff --git a/tests/fixtures/pii_inputs.json b/tests/fixtures/pii_inputs.json index b31fc830..1f96bd1c 100644 --- a/tests/fixtures/pii_inputs.json +++ b/tests/fixtures/pii_inputs.json @@ -9,7 +9,7 @@ }, { "name": "id test", - "input": "My ID is 12345678" + "input": "My ID is 12345678901" }, { "name": "safe text", diff --git a/tests/test_pii_scrubber.py b/tests/test_pii_scrubber.py index 7240af7e..3f7bcf4c 100644 --- a/tests/test_pii_scrubber.py +++ b/tests/test_pii_scrubber.py @@ -1,6 +1,12 @@ import json import difflib -from scrubber import scrub_pii +import sys +from pathlib import Path + +AI_SERVICE_PATH = Path(__file__).resolve().parents[1] / "app" / "ai-service" +sys.path.insert(0, str(AI_SERVICE_PATH)) + +from services.pii_scrubber import PIIScrubberService def load_json(path): @@ -10,13 +16,14 @@ def load_json(path): inputs = load_json("tests/fixtures/pii_inputs.json") expected = load_json("tests/fixtures/expected_outputs.json") +scrubber = PIIScrubberService() def test_pii_scrubbing(): for inp, exp in zip(inputs, expected): - result = scrub_pii(inp["input"]) + result = scrubber.anonymize(inp["input"])["anonymized_text"] if result != exp["expected"]: