* @copyright Copyright (c) 2022, Robert Strutts. * @license https://mit-license.org/ */ /* Sanitize Input, Validate data and Escape output. * 1) In web development to sanitize means that you remove unsafe * characters from the input. Makes safer DB inserts/selects, etc...! * 2) Validation is not sanitization, this step does not remove any bad data, * validation confirms that the info that is coming to your application meets * the criteria you want. * 3) Escape output - to get free from something, or to avoid something. Pay * attention to not escape the data more than once, you must escape only when * you received it or when you need to output it. I recommend it on all output only. * * Don’t try to sanitize input. Escape output. Perhaps more importantly, * it gives a false sense of security. What does “unsafe” mean? In what context? * Sure, <>& are unsafe characters for HTML, but what about CSS, JSON, SQL, or * even shell scripts? Those have a completely different set of unsafe characters. * * Every so often developers talk about “sanitizing user input” to prevent * cross-site scripting attacks. This is well-intentioned, but leads to a * false sense of security, and sometimes mangles perfectly good input. */ namespace bs_tts; use \tts\enum\FIELD_FILTER; // Defined in enum\safer_io_enums use \tts\enum\DB_FILTER; use \tts\enum\HTML_FLAG; use \tts\enum\INPUTS; /** * use_io defines public members to be used on safer_io INPUTS */ final class use_io { public $input_var; public $input_type; public $field_filter; public $escape_html; public $validation_rule; public $validation_message; public $skip_the_db; public $use_db_filter; } /** * use_iol is to Auto-Wire Input Output Logic controllers * in standard paths defined below */ final class use_iol { public static function auto_wire( string $root_folder, string $file, string $method = 'index', string $db_service= 'db_mocker' ) { new \tts\enum\safer_io_enums(); // Auto load $project = rtrim(\bs_tts\site_helper::get_project(), '/'); \main_tts\registry::set('db', \main_tts\registry::get('di')->get_service($db_service) ); $class_name = "\\prj\\{$project}\\inputs\\{$root_folder}\\{$file}_in"; $input = $class_name::$method(); $class_name = "\\prj\\{$project}\\logic\\{$root_folder}\\{$file}_logic"; $class_name::$method($input); $class_name = "\\prj\\{$project}\\outputs\\{$root_folder}\\{$file}_out"; return $class_name::$method($input); } } final class safer_io { private static string $string_of_POST_data = ""; private static array $DATA_INPUTS = []; protected function __construct() { } // Allow anything to set_data_inputs is desired here public static function set_data_input(string $var_name, mixed $data_in): void { if (! isset(self::$DATA_INPUTS[$var_name])) { self::$DATA_INPUTS[$var_name] = $data_in; } } // Do not allow anyone out-side of this class to get this un-filtered input private static function get_data_input(string $var_name) { return (isset(self::$DATA_INPUTS[$var_name])) ? self::$DATA_INPUTS[$var_name] : null; } public static function grab_all_post_data( int $bytes_limit = 650000, int $max_params = 400 ): void { if ($stream = fopen("php://input", 'r')) { if ($bytes_limit === 0) { $post_data = stream_get_contents($stream); } else { $post_data = stream_get_contents($stream, $bytes_limit); } fclose($stream); if ($bytes_limit > 0 && strlen($post_data) == $bytes_limit) { throw new \Exception("Too much input data!"); } $count_params = substr_count($post_data, "&"); if ($max_params > 0 && $count_params > $max_params) { throw new \Exception("Too many input parameters!"); } self::$string_of_POST_data = $post_data; } } public static function clear_post_data() { self::$string_of_POST_data = ""; } public static function convert_to_utf8(string $in_str): string { if (! extension_loaded('mbstring')) { return $in_str; } $cur_encoding = mb_detect_encoding($in_str); if($cur_encoding == "UTF-8" && mb_check_encoding($in_str,"UTF-8")) { return $in_str; } else { return mb_convert_encoding($in_str, 'UTF-8', $cur_encoding); } } // Escape HTML output public static function h(string $string): string { $utf8 = self::convert_to_utf8($string); return htmlspecialchars($utf8, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8'); } // Reverse encode of HTML public static function html_decode(string $string): string { return htmlspecialchars_decode($string, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5); } // HTML Purify library public static function p(string $string): string { $purifer = \main_tts\registry::get('di')->get_service('html_filter'); if (!$purifer->has_loaded()) { $purifer->set_defaults(); } return $purifer->purify($string); } // Escape JavaScript output public static function j($input, int $levels_deep = 512): mixed { try { return json_encode($input, JSON_PRETTY_PRINT | JSON_THROW_ON_ERROR, $levels_deep); } catch (\JsonException $ex) { return $ex; } } public static function json_decode(string $string, bool $return_as_an_array = true, int $levels_deep = 512): mixed { try { return json_decode($string, $return_as_an_array, $levels_deep, JSON_THROW_ON_ERROR); } catch (\JsonException $ex) { return $ex; } } public static function has_json_error($object): bool { return ($object instanceof \JsonException); } // Escape URL output public static function u(string $string): string { return urlencode($string); } /* * Encode HTML kindof... The problem with htmlentities() is that it is not * very powerful, in fact, it does not escape single quotes, cannot detect * the character set and does not validate HTML as well. */ public static function e(string $string): string { $utf8 = self::convert_to_utf8($string); return htmlentities($utf8, ENT_QUOTES, 'UTF-8'); } public static function de(string $data): string { return html_entity_decode($data); } /* * Note: Generally, "strip_tags" is just the wrong function. * Never use it. And if you do, absolutely never use the second parameter, * because sooner or later someone will abuse it. */ public static function get_clean_server_var(string $var): mixed { return filter_input(INPUT_SERVER, $var, FILTER_UNSAFE_RAW); } public static function get_bool($in): bool { return (filter_var($in, FILTER_VALIDATE_BOOLEAN)); } /** * Purpose: To decode JQuery encoded objects, arrays, strings, int, bool types. * The content must be of application/json. * Note: It will return null if not valid json. false is not application/json */ private static function get_json_post_data( string $input_field_name, bool $return_as_array = true, int $levels_deep = 512 ) { $ret_json = self::json_decode( self::$string_of_POST_data, $return_as_array, $levels_deep ); if (self::has_json_error($ret_json)) { return false; } if (isset($ret_json[$input_field_name])) { return $ret_json[$input_field_name]; } return false; } private static function get_post_data(): \Generator { $pairs = explode("&", self::$string_of_POST_data); while(true) { $pair = array_pop($pairs); if ($pair === null) { break; } $nv = explode("=", $pair); $n = $nv[0] ?? false; $v = $nv[1] ?? ""; unset($nv); if ($n === false || empty($n)) { continue; } $cmd = (yield urldecode($n) => urldecode($v)); if ($cmd == "stop") { break; } } unset($n); unset($v); unset($pairs); } private static function safer_html(string $input, HTML_FLAG $safety_level = HTML_FLAG::escape): string { switch ($safety_level) { case HTML_FLAG::raw : throw new \Exception('Raw HTML not supported!'); case HTML_FLAG::strip : return strip_tags($input); case HTML_FLAG::encode : return self::e($input); case HTML_FLAG::purify : return self::p($input); case HTML_FLAG::escape : default: return self::h($input); } } private static function t($item, bool $do_trim = true) { if ($do_trim) { if (is_string($item)) { return trim($item); } if (\bs_tts\common::get_count($data)) { $ret = []; foreach($data as $text) { if (is_bool($text) || is_int($text)) { $ret[] = $text; continue; } if (! is_string($text)) { continue; // Deny Arrays and Objects here! } $ret[] = trim($text); } return $ret; } } return $item; } private static function find_post_field(string $input_field_name): mixed { $content_type = self::get_clean_server_var('CONTENT_TYPE'); if ($content_type === null) { return false; } if (str_contains($content_type, "application/json")) { return self::get_json_post_data($input_field_name); } if (str_contains($content_type, "application/x-www-form-urlencoded")) { $post = self::get_post_data(); foreach($post as $key => $data) { if ($key === $input_field_name) { $post->send("stop"); // Break loop in Generator return $data; } } } return false; } private static function find_get_field(string $input_field_name): mixed { if (isset($_SERVER['QUERY_STRING'])) { $query = self::get_clean_server_var('QUERY_STRING'); $get = []; parse_str($query, $get); if (isset($get[$input_field_name])) { return $get[$input_field_name]; } } return false; } private static function get_input_by_type( string $input_field_name, INPUTS $input_type, ): mixed { /* Must return here to avoid Failing Resolve later on in this FN * as input types variable, debugging, and json will not Resolve! */ if ($input_type == INPUTS::variable) { return self::get_data_input($input_field_name); } if ($input_type == INPUTS::debugging) { $rd = self::find_post_field($input_field_name); if ($rd !== false) { return $rd; } $is_set = filter_has_var(INPUT_POST, $input_field_name); if ($is_set) { return filter_input(INPUT_POST, $input_field_name); } if (!self::find_get_field("debugging")) { return null; } $get_var = self::find_get_field($input_field_name); if ($get_var !== false) { return $get_var; } $is_get_set = filter_has_var(INPUT_GET, $input_field_name); if ($is_get_set) { return filter_input(INPUT_GET, $input_field_name); } return null; } if ($input_type === INPUTS::json) { $rd = self::find_post_field($input_field_name); if ($rd !== false) { return $rd; } return null; } if ($input_type === INPUTS::get) { $get_var = self::find_get_field($input_field_name); if ($get_var !== false) { return $get_var; } } $resolve_input = $input_type->resolve(); $is_set = filter_has_var($resolve_input, $input_field_name); if ($is_set) { return filter_input($resolve_input, $input_field_name); } return null; } /** * * @param string $data * @param array $a['html'] of type HTML_FLAG * @return string|bool */ private static function get_safer_string(string $data, use_io $a): string | bool { if (isset($a->escape_html) && $a->escape_html instanceof \UnitEnum) { return self::safer_html($data, $a->escape_html); } return self::safer_html($data); } private static function get_safer_html($data, use_io $a) { if (is_string($data)) { return self::get_safer_string($data, $a); } else if (\bs_tts\common::get_count($data)) { $ret = []; foreach($data as $text) { if (is_bool($text) || is_int($text)) { $ret[] = $text; continue; } if (! is_string($text)) { continue; // Deny Arrays and Objects here! } $ret[] = self::get_safer_string($text, $a); } return $ret; } return $data; } public static function required_fields_were_NOT_all_submitted(array $data): bool { $field = $data['name'] ?? false; $empty = $data['meta'][$field]['empty'] ?? true; $required = $data['meta'][$field]['validation_rules_set'] ?? false; return ($empty && $required); } private static function sanitize_helper( string $from, string $input_field_name, use_io $a, FIELD_FILTER $default_filter = FIELD_FILTER::raw_string, bool $trim = true, ) : array { $meta = []; $meta['missing'] = []; $safer_data = ""; $rules = []; $messages = []; if (isset($a->field_filter) && $a->field_filter instanceof \UnitEnum) { $field_type = $a->field_filter; } else { $field_type = $default_filter; } if (isset($a->input_var)) { $user_text = $a->input_var; } elseif (isset($a->input_type) && $a->input_type instanceof \UnitEnum) { $user_text = self::get_input_by_type($input_field_name, $a->input_type); } else { $ret['name'] = $input_field_name; $ret['meta']['missing'][] = $input_field_name; $ret['errors'][$input_field_name] = "Missing Field $input_field_name"; $ret['html'] = null; $ret['db'] = false; $ret['logic'] = false; return $ret; } $safer_data = false; // needs to be false to fail the validator $safer_html_data = null; // should be null for ?? operator to work with it.... if (isset($a->validation_rule)) { $rules[$input_field_name] = $a->validation_rule; } if (isset($a->validation_message) && isset($a->validation_rule)) { $messages[$input_field_name] = $a->validation_message; } $meta[$input_field_name]['validation_rules_set'] = (count($rules)) ? true : false; $db = (isset($a->skip_the_db)) ? $a->skip_the_db : false; $meta[$input_field_name]['type'] = $field_type->name; $meta[$input_field_name]['skip_db'] = $db; if ($user_text === null) { $safer_data = null; $safer_db_data = null; $safer_html_data = null; $meta[$input_field_name]['empty'] = true; } else { $field_filter_resolved = $field_type->resolve(); $meta[$input_field_name]['empty'] = false; $safer_data = $user_text; if ($field_type == FIELD_FILTER::email) { $safer_data = substr($safer_data, 0, 254); } $safer_data = filter_var($safer_data, FILTER_DEFAULT, $field_filter_resolved); // FallBack: These field types should never allow arrays anyways if ($field_type == FIELD_FILTER::raw_string || $field_type == FIELD_FILTER::raw ) { if (\bs_tts\common::get_count($safer_data)) { $safer_data = $safer_data[0]; } } if ($from === "html") { $safer_html = self::get_safer_html($safer_data, $a); if ($safer_html !== false) { $safer_html_data = $safer_html; } if (isset($safer_html_data)) { $safer_html_data = self::t($safer_html_data, $trim); } } else { $safer_data = self::t($safer_data, $trim); } if ($field_type == FIELD_FILTER::integer_number) { $safer_data = intval($safer_data); } if ($field_type == FIELD_FILTER::floating_point) { $safer_data = floatval($safer_data); } if ($from === "db") { if ($field_type == FIELD_FILTER::integer_number || $field_type == FIELD_FILTER::floating_point) { $safer_db_data = $safer_data; } else { if (isset($a->use_db_filter) && $a->use_db_filter == DB_FILTER::ON) { $safe_for_db = \tts\extras\safer_sql::get_safer_sql_text($safer_data); $text = $safe_for_db["text"]; $meta[$input_field_name]['db_filter_status'] = $safe_for_db["status"] ?? \tts\SQL_SAFETY_FLAG::filtered; } else { $text = $safer_data; } $safer_db_data = $text; } } } $ret['name'] = $input_field_name; $ret['meta'] = $meta; if ($from === "db") { $ret['db'] = $safer_db_data; $data[$input_field_name] = $safer_db_data; } elseif ($from === "logic") { $ret['logic'] = $safer_data; $data[$input_field_name] = $safer_data; } elseif ($from === "html") { $ret['html'] = $safer_html_data; $data[$input_field_name] = $safer_html_data; } $ret['errors'] = (count($rules)) ? \tts\validator::validate($data, $rules, $messages) : []; return $ret; } public static function db_sanitize( array $inputs, FIELD_FILTER $default_filter = FIELD_FILTER::raw_string, bool $trim = true, ) : \Generator { foreach ($inputs as $input_field_name => $a) { if (! $a instanceof use_io) { continue; } $yield = static::sanitize_helper( "db", $input_field_name, $a, $default_filter, $trim ); yield $yield; } } public static function logic_sanitize( array $inputs, FIELD_FILTER $default_filter = FIELD_FILTER::raw_string, bool $trim = true, ) : \Generator { foreach ($inputs as $input_field_name => $a) { if (! $a instanceof use_io) { continue; } $yield = static::sanitize_helper( "logic", $input_field_name, $a, $default_filter, $trim ); yield $yield; } } /** * Sanitize the inputs based on the rules an optionally trim the string * @param FIELD_FILTER $default_filter FILTER_SANITIZE_STRING * @param bool $trim * @return Generator */ public static function html_escape_and_sanitize( array $inputs, FIELD_FILTER $default_filter = FIELD_FILTER::raw_string, bool $trim = true, ) : \Generator { foreach ($inputs as $input_field_name => $a) { if (! $a instanceof use_io) { continue; } $yield = static::sanitize_helper( "html", $input_field_name, $a, $default_filter, $trim ); yield $yield; } } }