Add function to ensure an input string is UTF-8 encoded

This commit is contained in:
Joshua Ramon Enslin 2021-07-01 15:34:46 +02:00
parent f030adba20
commit dbbdf4f230
Signed by: jrenslin
GPG Key ID: 46016F84501B70AE

View File

@ -266,4 +266,55 @@ final class MD_STD_IN {
throw new MDgenericInvalidInputsException("ISBNs must be either 10 or 13 characters long.");
}
/**
* Returns an UTF8 version of a string.
*
* @param string $input Input string.
*
* @return string
*/
public static function ensureStringIsUtf8(string $input):string {
// If the input is valid UTF8 from the start, it is simply returned in its
// original form.
if (\mb_check_encoding($input, 'UTF-8')) {
return $input;
}
// To detect and convert the encoding for non-UTF8 strings, the list of
// encodings known to PHP's mbstring functions is checked against the input string.
// If any encoding matches the string, it will be converted to UTF8 accordingly.
$suitableEncodings = [];
$encodings = \mb_list_encodings();
foreach ($encodings as $encoding) {
if (\mb_detect_encoding($input, $encoding, true) !== false) {
$suitableEncodings[] = $encoding;
}
}
// If ISO-8859-1 is in the list of suitable encodings, try to convert with that.
if (\in_array('ISO-8859-1', $suitableEncodings, true)) {
if (($converted = \iconv('ISO-8859-1', "UTF-8//TRANSLIT", $input)) !== false) {
return $converted;
}
}
// If a conversion from ISO-8859-1 doesn't work, just take any of the other ones.
$suitableEncodings = \array_reverse($suitableEncodings);
foreach ($suitableEncodings as $encoding) {
if (($converted = \iconv($encoding, "UTF-8//TRANSLIT", $input)) !== false) {
return $converted;
}
}
/*
if (count($suitableEncodings) === 1) {
return mb_convert_encoding($input, 'UTF-8', );
}
*/
return $input;
}
}