* @copyright (c) 2024, Robert Strutts * @license MIT */ namespace CodeHydrater; class String_fns { public static function isUTF8(string $string) { // Empty string is valid UTF-8 if ($string === '') { return true; } // Convert string to array of bytes $bytes = unpack('C*', $string); // Pattern matching state $state = 0; $expectedBytes = 0; foreach ($bytes as $byte) { // Single byte character (0xxxxxxx) if ($byte <= 0x7F) { $state = 0; continue; } // Start of multibyte sequence if ($state === 0) { // 2 bytes (110xxxxx) if (($byte & 0xE0) === 0xC0) { $expectedBytes = 1; } // 3 bytes (1110xxxx) elseif (($byte & 0xF0) === 0xE0) { $expectedBytes = 2; } // 4 bytes (11110xxx) elseif (($byte & 0xF8) === 0xF0) { $expectedBytes = 3; } // Invalid UTF-8 start byte else { return false; } $state = $expectedBytes; continue; } // Continuation byte (10xxxxxx) if (($byte & 0xC0) !== 0x80) { return false; } $state--; } // Check if we finished the last multibyte sequence return $state === 0; } // Check if string contains multibyte characters public static function has_multibyte_chars(string $string) { return (bool) preg_match('/[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2}/', $string); } // Get the length of a string public function strlen($string) { return strlen($string); } // Convert a string to lowercase public function strtolower($string) { return strtolower($string); } // Convert a string to uppercase public function strtoupper($string) { return strtoupper($string); } // Get a substring from a string public function substr($string, $start, $length = null) { return ($length !== null) ? substr($string, $start, $length) : substr($string, $start); } // Reverse a string public function strrev($string) { return strrev($string); } }