From dbbdf4f230e6d76d6f7774397545e9275fd9ffbe Mon Sep 17 00:00:00 2001 From: Joshua Ramon Enslin Date: Thu, 1 Jul 2021 15:34:46 +0200 Subject: [PATCH] Add function to ensure an input string is UTF-8 encoded --- src/MD_STD_IN.php | 51 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/MD_STD_IN.php b/src/MD_STD_IN.php index 0bae377..b9578b6 100644 --- a/src/MD_STD_IN.php +++ b/src/MD_STD_IN.php @@ -266,4 +266,55 @@ final class MD_STD_IN { throw new MDgenericInvalidInputsException("ISBNs must be either 10 or 13 characters long."); } + + /** + * Returns an UTF8 version of a string. + * + * @param string $input Input string. + * + * @return string + */ + public static function ensureStringIsUtf8(string $input):string { + + // If the input is valid UTF8 from the start, it is simply returned in its + // original form. + if (\mb_check_encoding($input, 'UTF-8')) { + return $input; + } + + // To detect and convert the encoding for non-UTF8 strings, the list of + // encodings known to PHP's mbstring functions is checked against the input string. + // If any encoding matches the string, it will be converted to UTF8 accordingly. + $suitableEncodings = []; + $encodings = \mb_list_encodings(); + foreach ($encodings as $encoding) { + if (\mb_detect_encoding($input, $encoding, true) !== false) { + $suitableEncodings[] = $encoding; + } + } + + // If ISO-8859-1 is in the list of suitable encodings, try to convert with that. + if (\in_array('ISO-8859-1', $suitableEncodings, true)) { + if (($converted = \iconv('ISO-8859-1', "UTF-8//TRANSLIT", $input)) !== false) { + return $converted; + } + } + + // If a conversion from ISO-8859-1 doesn't work, just take any of the other ones. + $suitableEncodings = \array_reverse($suitableEncodings); + foreach ($suitableEncodings as $encoding) { + if (($converted = \iconv($encoding, "UTF-8//TRANSLIT", $input)) !== false) { + return $converted; + } + } + + /* + if (count($suitableEncodings) === 1) { + return mb_convert_encoding($input, 'UTF-8', ); + } + */ + + return $input; + + } }