Skip to content

Instantly share code, notes, and snippets.

@hitswa
Created January 13, 2026 07:46
Show Gist options
  • Select an option

  • Save hitswa/bc9e0a09c95bbf87ec9a982b810c1f4d to your computer and use it in GitHub Desktop.

Select an option

Save hitswa/bc9e0a09c95bbf87ec9a982b810c1f4d to your computer and use it in GitHub Desktop.
This function performs normalization, validation, and contextual escaping and sanitize input in php
/**
* sanitize()
*
* Unified Zero-Trust Sanitization & Encoding Pipeline.
* This function performs normalization, validation, and contextual escaping
* based on both DATA TYPE and OUTPUT CONTEXT.
*
* ⚠ IMPORTANT:
* - This function MUST be used only with prepared SQL statements.
* - Sanitization is context aware – do not use blindly.
*
* @param mixed $value Raw untrusted input value
* @param string $type Expected data type:
* string | int | float | bool | email | filename
* @param string $context Output context:
* db | html | js | url | css | log | cmd
*
* @return mixed Sanitized, normalized and context-safe value
*/
function sanitize($value, string $type = 'string', string $context = 'db')
{
// NULL-safe: nothing to sanitize
if ($value === null) return null;
/* ----------------------------------------------------
* Step-1 : Unicode normalization and control char purge
* ---------------------------------------------------- */
if (is_string($value)) {
// Prevent UTF-8 smuggling and broken multibyte injection
$value = mb_convert_encoding($value, 'UTF-8', 'UTF-8');
// Remove invisible control characters (NULL, CRLF injection etc.)
$value = preg_replace('/\p{C}+/u', '', $value);
// Trim whitespace to avoid padding attacks
$value = trim($value);
}
/* -----------------------------------------
* Step-2 : Type validation & normalization
* ----------------------------------------- */
switch ($type) {
case 'int':
// Accept only real integers
$value = filter_var($value, FILTER_VALIDATE_INT);
break;
case 'float':
// Accept only real float values
$value = filter_var($value, FILTER_VALIDATE_FLOAT);
break;
case 'email':
// RFC-compliant email validation
$value = filter_var($value, FILTER_VALIDATE_EMAIL);
break;
case 'bool':
// Normalize to true/false
$value = filter_var($value, FILTER_VALIDATE_BOOLEAN);
break;
case 'filename':
// Remove directory traversal & illegal characters
$value = preg_replace('/[^a-zA-Z0-9._-]/', '_', basename($value));
break;
}
/* ----------------------------------------------------
* Step-3 : Contextual Output Encoding (REAL SECURITY)
* ---------------------------------------------------- */
switch ($context) {
// HTML output – XSS safe
case 'html':
return htmlspecialchars($value, ENT_QUOTES | ENT_HTML5, 'UTF-8');
// JavaScript literal – prevents JS injection
case 'js':
return json_encode($value, JSON_HEX_TAG | JSON_HEX_AMP | JSON_HEX_APOS | JSON_HEX_QUOT);
// URL parameter – prevents URL breaking & injection
case 'url':
return rawurlencode($value);
// CSS selector – prevents CSS injection (client side escape)
case 'css':
return "<script>document.write(CSS.escape(" . json_encode($value) . "));</script>";
// Logs – prevents CRLF log forging
case 'log':
return preg_replace('/[\r\n]/', ' ', $value);
// OS command – prevents command injection
case 'cmd':
return escapeshellarg($value);
// Database (ONLY for prepared statements)
case 'db':
default:
return $value;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment