Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 7 additions & 14 deletions scrubber.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,9 @@
"""
Legacy PII scrubber — preserved for reference.
Active anonymization logic has moved to app/ai-service/services/pii_scrubber.py.
"""
import re
"""Deprecated legacy PII scrubber reference.

def scrub_pii(text: str) -> str:
"""Remove emails, phone numbers, and IDs from text using regex.

This is a legacy module kept for documentation purposes.
"""
text = re.sub(r'[\w\.-]+@[\w\.-]+', '[REDACTED_EMAIL]', text)
text = re.sub(r'\+?\d[\d\s\-]{7,}\d', '[REDACTED_PHONE]', text)
text = re.sub(r'\b\d{4,}\b', '[REDACTED_ID]', text)
return text
The active anonymization implementation is
``app/ai-service/services/pii_scrubber.py::PIIScrubberService``.

This module intentionally does not expose ``scrub_pii`` so new code and tests
use the canonical AI service implementation instead of the legacy regex-only
helper.
"""
6 changes: 3 additions & 3 deletions tests/fixtures/expected_outputs.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
[
{
"name": "email test",
"expected": "Contact me at [REDACTED_EMAIL]"
"expected": "Contact me at [EMAIL_ADDRESS]"
},
{
"name": "phone test",
"expected": "Call me on [REDACTED_PHONE]"
"expected": "Call me on [PHONE_NUMBER]"
},
{
"name": "id test",
"expected": "My ID is [REDACTED_ID]"
"expected": "My ID is [ID_NUMBER]"
},
{
"name": "safe text",
Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/pii_inputs.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
{
"name": "id test",
"input": "My ID is 12345678"
"input": "My ID is AB12345678"
},
{
"name": "safe text",
Expand Down
12 changes: 10 additions & 2 deletions tests/test_pii_scrubber.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import json
import difflib
from scrubber import scrub_pii
import sys
from pathlib import Path

ROOT_DIR = Path(__file__).resolve().parents[1]
AI_SERVICE_DIR = ROOT_DIR / "app" / "ai-service"
sys.path.insert(0, str(AI_SERVICE_DIR))

from services.pii_scrubber import PIIScrubberService


def load_json(path):
Expand All @@ -13,10 +20,11 @@ def load_json(path):


def test_pii_scrubbing():
service = PIIScrubberService()

for inp, exp in zip(inputs, expected):

result = scrub_pii(inp["input"])
result = service.anonymize(inp["input"])["anonymized_text"]

if result != exp["expected"]:

Expand Down