& are unsafe characters for HTML, but what about CSS, JSON, SQL, or * even shell scripts? Those have a completely different set of unsafe characters. * * Every so often developers talk about “sanitizing user input” to prevent * cross-site scripting attacks. This is well-intentioned, but leads to a * false sense of security, and sometimes mangles perfectly good input. */ namespace bs_tts; enum HTML_FLAG { case raw; // Dangerious XSS attacks... case strip; case encode; case purify; // Allow safe whitelisted HTML elements/tags case escape; // safely Escape HTML } enum INPUTS: int { case variable = 998; // User Defined VAR case debugging = 999; // check POST and then if debugging is set, check GET case json = 1000; // uses JSON on raw POST BODY case post = 0; // INPUT_POST; case get = 1; // INPUT_GET; case cookie = 2; //INPUT_COOKIE; case env = 4; // INPUT_ENV; case server = 5; // INPUT_SERVER; public function resolve(): int { return match($this) { self::post => INPUT_POST, self::get => INPUT_GET, self::cookie => INPUT_COOKIE, self::env => INPUT_ENV, self::server => INPUT_SERVER, }; } } enum DB_FILTER { case ON; // Tries to Filter out SQL from User Input case OFF; // Normal pass thourgh... } enum FIELD_FILTER: string { case raw_string = "string"; case array_of_strings = "strings"; case email = "email-address"; case url = "site-url"; case raw = "unfiltered-non-sanitized"; case integer_number = "integer"; case array_of_ints = "integers"; case floating_point = "float"; case array_of_floats = "floats"; public function resolve() { return match($this) { self::raw_string => FILTER_UNSAFE_RAW, self::array_of_strings => [ 'filter' => FILTER_UNSAFE_RAW, 'flags' => FILTER_REQUIRE_ARRAY ], self::email => FILTER_SANITIZE_EMAIL, self::url => FILTER_SANITIZE_URL, self::raw => FILTER_DEFAULT, // Unfiltered, non-sanitized!!! self::integer_number => [ 'filter' => FILTER_SANITIZE_NUMBER_INT, 'flags' => FILTER_REQUIRE_SCALAR ], self::array_of_ints => [ 'filter' => FILTER_SANITIZE_NUMBER_INT, 'flags' => FILTER_REQUIRE_ARRAY ], self::floating_point => [ 'filter' => FILTER_SANITIZE_NUMBER_FLOAT, 'flags' => FILTER_FLAG_ALLOW_FRACTION ], self::array_of_floats => [ 'filter' => FILTER_SANITIZE_NUMBER_FLOAT, 'flags' => FILTER_REQUIRE_ARRAY ], }; } } final class use_io { public $input_var; public $input_type; public $field_filter; public $escape_html; public $validation_rule; public $validation_message; public $skip_the_db; public $use_db_filter; } final class use_iol { public static function auto_wire( string $root_folder, string $file, string $method = 'index', string $db_service= 'db_mocker' ) { $project = rtrim(\bs_tts\site_helper::get_project(), '/'); \main_tts\registry::set('db', \main_tts\registry::get('di')->get_service($db_service) ); $class_name = "\\prj\\{$project}\\inputs\\{$root_folder}\\{$file}_in"; $input = $class_name::$method(); $class_name = "\\prj\\{$project}\\logic\\{$root_folder}\\{$file}_logic"; $class_name::$method($input); $class_name = "\\prj\\{$project}\\outputs\\{$root_folder}\\{$file}_out"; return $class_name::$method($input); } } final class safer_io { protected function __construct() { } public static function convert_to_utf8(string $in_str): string { if (! extension_loaded('mbstring')) { return $in_str; } $cur_encoding = mb_detect_encoding($in_str); if($cur_encoding == "UTF-8" && mb_check_encoding($in_str,"UTF-8")) { return $in_str; } else { return mb_convert_encoding($in_str, 'UTF-8', $cur_encoding); } } // Escape HTML output public static function h(string $string): string { $utf8 = self::convert_to_utf8($string); return htmlspecialchars($utf8, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8'); } // Reverse encode of HTML public static function html_decode(string $string): string { return htmlspecialchars_decode($string, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5); } // HTML Purify library public static function p(string $string): string { $purifer = \main_tts\registry::get('di')->get_service('html_filter'); if (!$purifer->has_loaded()) { $purifer->set_defaults(); } return $purifer->purify($string); } // Escape JavaScript output public static function j($input, int $levels_deep = 512): mixed { try { return json_encode($input, JSON_PRETTY_PRINT | JSON_THROW_ON_ERROR, $levels_deep); } catch (\JsonException $ex) { return $ex; } } public static function json_decode(string $string, bool $return_as_an_array = true, int $levels_deep = 512): mixed { try { return json_decode($string, $return_as_an_array, $levels_deep, JSON_THROW_ON_ERROR); } catch (\JsonException $ex) { return $ex; } } public static function has_json_error($object): bool { return ($object instanceof \JsonException); } // Escape URL output public static function u(string $string): string { return urlencode($string); } /* * Encode HTML kindof... The problem with htmlentities() is that it is not * very powerful, in fact, it does not escape single quotes, cannot detect * the character set and does not validate HTML as well. */ public static function e(string $string): string { $utf8 = self::convert_to_utf8($string); return htmlentities($utf8, ENT_QUOTES, 'UTF-8'); } public static function de(string $data): string { return html_entity_decode($data); } /* * Note: Generally, "strip_tags" is just the wrong function. * Never use it. And if you do, absolutely never use the second parameter, * because sooner or later someone will abuse it. */ public static function get_clean_server_var(string $var): mixed { return filter_input(INPUT_SERVER, $var, FILTER_UNSAFE_RAW); } public static function get_bool($in): bool { return (filter_var($in, FILTER_VALIDATE_BOOLEAN)); } /** * Purpose: To decode JQuery encoded objects, arrays, strings, int, bool types. * The content must be of application/json. * Returns the JSON encoded POST data, if any.... * @param type $return_as_array (true) -> Array, (false) -> Object * @retval type Object/Array|null|false * Note: It will return null if not valid json. false is not application/json */ public static function get_json_post_data(bool $return_as_array = true, int $levels_deep = 512): mixed { $content_type = self::get_clean_server_var('CONTENT_TYPE'); if ($content_type === null) { return false; } if (str_contains($content_type, "application/json")) { $post_body = trim(file_get_contents("php://input")); // get raw POST data. $ret_json = self::json_decode($post_body, $return_as_array, $levels_deep); if (self::has_json_error($ret_json)) { return false; } return $ret_json; } else { return false; } } public static function safer_html(string $input, HTML_FLAG $safety_level = HTML_FLAG::escape): string { switch ($safety_level) { case HTML_FLAG::raw : throw new \Exception('Raw HTML not supported!'); case HTML_FLAG::strip : return strip_tags($input); case HTML_FLAG::encode : return self::e($input); case HTML_FLAG::purify : return self::p($input); case HTML_FLAG::escape : default: return self::h($input); } } private static function t($item, bool $do_trim = true) { if ($do_trim) { if (is_string($item)) { return trim($item); } if (\bs_tts\common::get_count($data)) { $ret = []; foreach($data as $text) { if (is_bool($text) || is_int($text)) { $ret[] = $text; continue; } if (! is_string($text)) { continue; // Deny Arrays and Objects here! } $ret[] = trim($text); } return $ret; } } return $item; } static $JSON_POST_DATA = []; private static function get_input_by_type( string $input_field_name, INPUTS $input_type, ): mixed { if ($input_type == INPUTS::debugging) { if (isset(self::$JSON_POST_DATA[$input_field_name])) { return self::$JSON_POST_DATA[$input_field_name]; } $is_set = filter_has_var(INPUT_POST, $input_field_name); if ($is_set) { return filter_input(INPUT_POST, $input_field_name); } if (!filter_has_var(INPUT_GET, "debugging")) { return null; } $is_get_set = filter_has_var(INPUT_GET, $input_field_name); if ($is_get_set) { return filter_input(INPUT_GET, $input_field_name); } return null; } if ($input_type === INPUTS::json) { return (isset(self::$JSON_POST_DATA[$input_field_name])) ? self::$JSON_POST_DATA[$input_field_name] : null; } $resolve_input = $input_type->resolve(); $is_set = filter_has_var($resolve_input, $input_field_name); if ($is_set) { return filter_input($resolve_input, $input_field_name); } return null; } /** * * @param string $data * @param array $a['html'] of type HTML_FLAG * @return string|bool */ private static function get_safer_string(string $data, use_io $a): string | bool { if (isset($a->escape_html) && $a->escape_html instanceof \UnitEnum) { return self::safer_html($data, $a->escape_html); } return self::safer_html($data); } private static function get_safer_html($data, use_io $a) { if (is_string($data)) { return self::get_safer_string($data, $a); } else if (\bs_tts\common::get_count($data)) { $ret = []; foreach($data as $text) { if (is_bool($text) || is_int($text)) { $ret[] = $text; continue; } if (! is_string($text)) { continue; // Deny Arrays and Objects here! } $ret[] = self::get_safer_string($text, $a); } return $ret; } return $data; } /** * Initialize JSON post data into static array, if used.... * @param int $levels_deep are JSON Levels to use */ public static function init_json(int $levels_deep = 512): void { self::$JSON_POST_DATA = self::get_json_post_data(true, $levels_deep); } public static function required_fields_were_NOT_all_submitted(array $data): bool { $field = $data['name'] ?? false; $empty = $data['meta'][$field]['empty'] ?? true; $required = $data['meta'][$field]['validation_rules_set'] ?? false; return ($empty && $required); } private static function sanitize_helper( string $from, string $input_field_name, use_io $a, FIELD_FILTER $default_filter = FIELD_FILTER::raw_string, bool $trim = true, ) : array { $meta = []; $meta['missing'] = []; $safer_data = ""; $rules = []; $messages = []; if (isset($a->field_filter) && $a->field_filter instanceof \UnitEnum) { $field_type = $a->field_filter; } else { $field_type = $default_filter; } if (isset($a->input_var)) { $user_text = $a->input_var; } elseif (isset($a->input_type) && $a->input_type instanceof \UnitEnum) { $user_text = self::get_input_by_type($input_field_name, $a->input_type); } else { $ret['name'] = $input_field_name; $ret['meta']['missing'][] = $input_field_name; $ret['errors'][$input_field_name] = "Missing Field $input_field_name"; $ret['html'] = null; $ret['db'] = false; $ret['logic'] = false; return $ret; } $safer_data = false; // needs to be false to fail the validator $safer_html_data = null; // should be null for ?? operator to work with it.... if (isset($a->validation_rule)) { $rules[$input_field_name] = $a->validation_rule; } if (isset($a->validation_message) && isset($a->validation_rule)) { $messages[$input_field_name] = $a->validation_message; } $meta[$input_field_name]['validation_rules_set'] = (count($rules)) ? true : false; $db = (isset($a->skip_the_db)) ? $a->skip_the_db : false; $meta[$input_field_name]['type'] = $field_type->name; $meta[$input_field_name]['skip_db'] = $db; if ($user_text === null) { $safer_data = null; $safer_db_data = null; $safer_html_data = null; $meta[$input_field_name]['empty'] = true; } else { $field_filter_resolved = $field_type->resolve(); $meta[$input_field_name]['empty'] = false; $safer_data = $user_text; if ($field_type == FIELD_FILTER::email) { $safer_data = substr($safer_data, 0, 254); } $safer_data = filter_var($safer_data, FILTER_DEFAULT, $field_filter_resolved); // FallBack: These field types should never allow arrays anyways if ($field_type == FIELD_FILTER::raw_string || $field_type == FIELD_FILTER::raw ) { if (\bs_tts\common::get_count($safer_data)) { $safer_data = $safer_data[0]; } } if ($from === "html") { $safer_html = self::get_safer_html($safer_data, $a); if ($safer_html !== false) { $safer_html_data = $safer_html; } if (isset($safer_html_data)) { $safer_html_data = self::t($safer_html_data, $trim); } } else { $safer_data = self::t($safer_data, $trim); } if ($field_type == FIELD_FILTER::integer_number) { $safer_data = intval($safer_data); } if ($field_type == FIELD_FILTER::floating_point) { $safer_data = floatval($safer_data); } if ($from === "db") { if ($field_type == FIELD_FILTER::integer_number || $field_type == FIELD_FILTER::floating_point) { $safer_db_data = $safer_data; } else { if (isset($a->use_db_filter) && $a->use_db_filter == DB_FILTER::ON) { $safe_for_db = \tts\safer_sql::get_safer_sql_text($safer_data); $text = $safe_for_db["text"]; $meta[$input_field_name]['db_filter_status'] = $safe_for_db["status"] ?? \tts\SQL_SAFETY_FLAG::filtered; } else { $text = $safer_data; } $safer_db_data = $text; } } } $ret['name'] = $input_field_name; $ret['meta'] = $meta; if ($from === "db") { $ret['db'] = $safer_db_data; $data[$input_field_name] = $safer_db_data; } elseif ($from === "logic") { $ret['logic'] = $safer_data; $data[$input_field_name] = $safer_data; } elseif ($from === "html") { $ret['html'] = $safer_html_data; $data[$input_field_name] = $safer_html_data; } $ret['errors'] = (count($rules)) ? \bs_tts\validator::validate($data, $rules, $messages) : []; return $ret; } public static function db_sanitize( array $inputs, FIELD_FILTER $default_filter = FIELD_FILTER::raw_string, bool $trim = true, ) : \Generator { foreach ($inputs as $input_field_name => $a) { if (! $a instanceof use_io) { continue; } $yield = static::sanitize_helper( "db", $input_field_name, $a, $default_filter, $trim ); yield $yield; } } public static function logic_sanitize( array $inputs, FIELD_FILTER $default_filter = FIELD_FILTER::raw_string, bool $trim = true, ) : \Generator { foreach ($inputs as $input_field_name => $a) { if (! $a instanceof use_io) { continue; } $yield = static::sanitize_helper( "logic", $input_field_name, $a, $default_filter, $trim ); yield $yield; } } /** * Sanitize the inputs based on the rules an optionally trim the string * @param FIELD_FILTER $default_filter FILTER_SANITIZE_STRING * @param bool $trim * @return Generator */ public static function html_escape_and_sanitize( array $inputs, FIELD_FILTER $default_filter = FIELD_FILTER::raw_string, bool $trim = true, ) : \Generator { foreach ($inputs as $input_field_name => $a) { if (! $a instanceof use_io) { continue; } $yield = static::sanitize_helper( "html", $input_field_name, $a, $default_filter, $trim ); yield $yield; } } }