Created
November 7, 2025 21:08
-
-
Save ugochukwu95/6da2fc69e6e61f7cb30d70cddb410ef8 to your computer and use it in GitHub Desktop.
PII Detector tool
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """PII detection and redaction to prevent sensitive information disclosure.""" | |
| import re | |
| from typing import Dict, List | |
| class PIIDetector: | |
| """Detects and redacts sensitive information in prompts and outputs.""" | |
| PATTERNS = { | |
| "email": r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', | |
| "phone": r'\b(?:\+?1[-.]?)?\(?([0-9]{3})\)?[-.]?([0-9]{3})[-.]?([0-9]{4})\b', | |
| "ssn": r'\b\d{3}-\d{2}-\d{4}\b', | |
| "credit_card": r'\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b', | |
| "ip_address": r'\b(?:\d{1,3}\.){3}\d{1,3}\b', | |
| "api_key": r'\b[A-Za-z0-9]{32,}\b', | |
| "aws_key": r'AKIA[0-9A-Z]{16}', | |
| } | |
| @classmethod | |
| def detect(cls, text: str) -> Dict[str, any]: | |
| """Detect PII in text.""" | |
| findings = [] | |
| for pii_type, pattern in cls.PATTERNS.items(): | |
| matches = re.finditer(pattern, text) | |
| for match in matches: | |
| findings.append({ | |
| "type": pii_type, | |
| "value": match.group(), | |
| "start": match.start(), | |
| "end": match.end() | |
| }) | |
| return { | |
| "has_pii": len(findings) > 0, | |
| "findings": findings, | |
| "risk_level": "high" if findings else "low" | |
| } | |
| @classmethod | |
| def redact(cls, text: str) -> Dict[str, any]: | |
| """Redact PII from text.""" | |
| redacted = text | |
| findings = [] | |
| for pii_type, pattern in cls.PATTERNS.items(): | |
| matches = list(re.finditer(pattern, redacted)) | |
| for match in matches: | |
| findings.append({"type": pii_type, "redacted": True}) | |
| redacted = redacted.replace(match.group(), f"[REDACTED_{pii_type.upper()}]") | |
| return { | |
| "original": text, | |
| "redacted": redacted, | |
| "findings": findings, | |
| "has_pii": len(findings) > 0 | |
| } | |
| @classmethod | |
| def validate_safe(cls, text: str, allow_pii: bool = False) -> Dict[str, any]: | |
| """Validate text is safe to process.""" | |
| detection = cls.detect(text) | |
| if detection["has_pii"] and not allow_pii: | |
| return { | |
| "safe": False, | |
| "reason": f"PII detected: {[f['type'] for f in detection['findings']]}", | |
| "detection": detection | |
| } | |
| return {"safe": True, "detection": detection} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment