ugochukwu95/output_sanitizer.py

## output_sanitizer.py
"""Output sanitization for LLM responses to prevent injection attacks."""
import re
import html
from typing import Dict

class OutputSanitizer:
    """Sanitizes LLM outputs before downstream consumption."""

    DANGEROUS_PATTERNS = [
        r'<script[^>]*>.*?</script>',
        r'javascript:',
        r'on\w+\s*=',
        r'DROP\s+TABLE',
        r'DELETE\s+FROM',
        r'EXEC\s*\(',
        r'eval\s*\(',
        r'__import__',
    ]

    @classmethod
    def sanitize_for_web(cls, output: str) -> str:
        """Sanitize output for web display (prevent XSS)."""
        return html.escape(output)

    @classmethod
    def sanitize_for_sql(cls, output: str) -> str:
        """Sanitize output for SQL context."""
        dangerous_keywords = ['DROP', 'DELETE', 'TRUNCATE', 'EXEC', 'EXECUTE', '--', ';']
        sanitized = output
        for keyword in dangerous_keywords:
            sanitized = re.sub(rf'\b{keyword}\b', '', sanitized, flags=re.IGNORECASE)
        return sanitized.replace("'", "''").replace('"', '""')

    @classmethod
    def detect_injection(cls, output: str) -> Dict[str, any]:
        """Detect potential injection attempts in output."""
        detected = []
        for pattern in cls.DANGEROUS_PATTERNS:
            if re.search(pattern, output, re.IGNORECASE | re.DOTALL):
                detected.append(pattern)

        return {
            "is_safe": len(detected) == 0,
            "detected_patterns": detected,
            "risk_level": "high" if detected else "low"
        }

    @classmethod
    def sanitize_generic(cls, output: str, context: str = "web") -> Dict[str, any]:
        """Generic sanitization with detection."""
        detection = cls.detect_injection(output)

        if context == "web":
            sanitized = cls.sanitize_for_web(output)
        elif context == "sql":
            sanitized = cls.sanitize_for_sql(output)
        else:
            sanitized = output

        return {
            "original": output,
            "sanitized": sanitized,
            "detection": detection
        }
	"""Output sanitization for LLM responses to prevent injection attacks."""
	import re
	import html
	from typing import Dict

	class OutputSanitizer:
	"""Sanitizes LLM outputs before downstream consumption."""

	DANGEROUS_PATTERNS = [
	r'<script[^>]>.?</script>',
	r'javascript:',
	r'on\w+\s*=',
	r'DROP\s+TABLE',
	r'DELETE\s+FROM',
	r'EXEC\s*\(',
	r'eval\s*\(',
	r'__import__',
	]

	@classmethod
	def sanitize_for_web(cls, output: str) -> str:
	"""Sanitize output for web display (prevent XSS)."""
	return html.escape(output)

	@classmethod
	def sanitize_for_sql(cls, output: str) -> str:
	"""Sanitize output for SQL context."""
	dangerous_keywords = ['DROP', 'DELETE', 'TRUNCATE', 'EXEC', 'EXECUTE', '--', ';']
	sanitized = output
	for keyword in dangerous_keywords:
	sanitized = re.sub(rf'\b{keyword}\b', '', sanitized, flags=re.IGNORECASE)
	return sanitized.replace("'", "''").replace('"', '""')

	@classmethod
	def detect_injection(cls, output: str) -> Dict[str, any]:
	"""Detect potential injection attempts in output."""
	detected = []
	for pattern in cls.DANGEROUS_PATTERNS:
	if re.search(pattern, output, re.IGNORECASE \| re.DOTALL):
	detected.append(pattern)

	return {
	"is_safe": len(detected) == 0,
	"detected_patterns": detected,
	"risk_level": "high" if detected else "low"
	}

	@classmethod
	def sanitize_generic(cls, output: str, context: str = "web") -> Dict[str, any]:
	"""Generic sanitization with detection."""
	detection = cls.detect_injection(output)

	if context == "web":
	sanitized = cls.sanitize_for_web(output)
	elif context == "sql":
	sanitized = cls.sanitize_for_sql(output)
	else:
	sanitized = output

	return {
	"original": output,
	"sanitized": sanitized,
	"detection": detection
	}
No results found