Created
January 13, 2026 07:46
-
-
Save hitswa/bc9e0a09c95bbf87ec9a982b810c1f4d to your computer and use it in GitHub Desktop.
This function performs normalization, validation, and contextual escaping and sanitize input in php
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * sanitize() | |
| * | |
| * Unified Zero-Trust Sanitization & Encoding Pipeline. | |
| * This function performs normalization, validation, and contextual escaping | |
| * based on both DATA TYPE and OUTPUT CONTEXT. | |
| * | |
| * ⚠ IMPORTANT: | |
| * - This function MUST be used only with prepared SQL statements. | |
| * - Sanitization is context aware – do not use blindly. | |
| * | |
| * @param mixed $value Raw untrusted input value | |
| * @param string $type Expected data type: | |
| * string | int | float | bool | email | filename | |
| * @param string $context Output context: | |
| * db | html | js | url | css | log | cmd | |
| * | |
| * @return mixed Sanitized, normalized and context-safe value | |
| */ | |
| function sanitize($value, string $type = 'string', string $context = 'db') | |
| { | |
| // NULL-safe: nothing to sanitize | |
| if ($value === null) return null; | |
| /* ---------------------------------------------------- | |
| * Step-1 : Unicode normalization and control char purge | |
| * ---------------------------------------------------- */ | |
| if (is_string($value)) { | |
| // Prevent UTF-8 smuggling and broken multibyte injection | |
| $value = mb_convert_encoding($value, 'UTF-8', 'UTF-8'); | |
| // Remove invisible control characters (NULL, CRLF injection etc.) | |
| $value = preg_replace('/\p{C}+/u', '', $value); | |
| // Trim whitespace to avoid padding attacks | |
| $value = trim($value); | |
| } | |
| /* ----------------------------------------- | |
| * Step-2 : Type validation & normalization | |
| * ----------------------------------------- */ | |
| switch ($type) { | |
| case 'int': | |
| // Accept only real integers | |
| $value = filter_var($value, FILTER_VALIDATE_INT); | |
| break; | |
| case 'float': | |
| // Accept only real float values | |
| $value = filter_var($value, FILTER_VALIDATE_FLOAT); | |
| break; | |
| case 'email': | |
| // RFC-compliant email validation | |
| $value = filter_var($value, FILTER_VALIDATE_EMAIL); | |
| break; | |
| case 'bool': | |
| // Normalize to true/false | |
| $value = filter_var($value, FILTER_VALIDATE_BOOLEAN); | |
| break; | |
| case 'filename': | |
| // Remove directory traversal & illegal characters | |
| $value = preg_replace('/[^a-zA-Z0-9._-]/', '_', basename($value)); | |
| break; | |
| } | |
| /* ---------------------------------------------------- | |
| * Step-3 : Contextual Output Encoding (REAL SECURITY) | |
| * ---------------------------------------------------- */ | |
| switch ($context) { | |
| // HTML output – XSS safe | |
| case 'html': | |
| return htmlspecialchars($value, ENT_QUOTES | ENT_HTML5, 'UTF-8'); | |
| // JavaScript literal – prevents JS injection | |
| case 'js': | |
| return json_encode($value, JSON_HEX_TAG | JSON_HEX_AMP | JSON_HEX_APOS | JSON_HEX_QUOT); | |
| // URL parameter – prevents URL breaking & injection | |
| case 'url': | |
| return rawurlencode($value); | |
| // CSS selector – prevents CSS injection (client side escape) | |
| case 'css': | |
| return "<script>document.write(CSS.escape(" . json_encode($value) . "));</script>"; | |
| // Logs – prevents CRLF log forging | |
| case 'log': | |
| return preg_replace('/[\r\n]/', ' ', $value); | |
| // OS command – prevents command injection | |
| case 'cmd': | |
| return escapeshellarg($value); | |
| // Database (ONLY for prepared statements) | |
| case 'db': | |
| default: | |
| return $value; | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment