Skip to content

Instantly share code, notes, and snippets.

@ugochukwu95
Created November 7, 2025 21:08
Show Gist options
  • Select an option

  • Save ugochukwu95/6da2fc69e6e61f7cb30d70cddb410ef8 to your computer and use it in GitHub Desktop.

Select an option

Save ugochukwu95/6da2fc69e6e61f7cb30d70cddb410ef8 to your computer and use it in GitHub Desktop.
PII Detector tool
"""PII detection and redaction to prevent sensitive information disclosure."""
import re
from typing import Dict, List
class PIIDetector:
"""Detects and redacts sensitive information in prompts and outputs."""
PATTERNS = {
"email": r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
"phone": r'\b(?:\+?1[-.]?)?\(?([0-9]{3})\)?[-.]?([0-9]{3})[-.]?([0-9]{4})\b',
"ssn": r'\b\d{3}-\d{2}-\d{4}\b',
"credit_card": r'\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b',
"ip_address": r'\b(?:\d{1,3}\.){3}\d{1,3}\b',
"api_key": r'\b[A-Za-z0-9]{32,}\b',
"aws_key": r'AKIA[0-9A-Z]{16}',
}
@classmethod
def detect(cls, text: str) -> Dict[str, any]:
"""Detect PII in text."""
findings = []
for pii_type, pattern in cls.PATTERNS.items():
matches = re.finditer(pattern, text)
for match in matches:
findings.append({
"type": pii_type,
"value": match.group(),
"start": match.start(),
"end": match.end()
})
return {
"has_pii": len(findings) > 0,
"findings": findings,
"risk_level": "high" if findings else "low"
}
@classmethod
def redact(cls, text: str) -> Dict[str, any]:
"""Redact PII from text."""
redacted = text
findings = []
for pii_type, pattern in cls.PATTERNS.items():
matches = list(re.finditer(pattern, redacted))
for match in matches:
findings.append({"type": pii_type, "redacted": True})
redacted = redacted.replace(match.group(), f"[REDACTED_{pii_type.upper()}]")
return {
"original": text,
"redacted": redacted,
"findings": findings,
"has_pii": len(findings) > 0
}
@classmethod
def validate_safe(cls, text: str, allow_pii: bool = False) -> Dict[str, any]:
"""Validate text is safe to process."""
detection = cls.detect(text)
if detection["has_pii"] and not allow_pii:
return {
"safe": False,
"reason": f"PII detected: {[f['type'] for f in detection['findings']]}",
"detection": detection
}
return {"safe": True, "detection": detection}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment