Add function to ensure an input string is UTF-8 encoded
This commit is contained in:
parent
f030adba20
commit
dbbdf4f230
|
@ -266,4 +266,55 @@ final class MD_STD_IN {
|
|||
throw new MDgenericInvalidInputsException("ISBNs must be either 10 or 13 characters long.");
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an UTF8 version of a string.
|
||||
*
|
||||
* @param string $input Input string.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function ensureStringIsUtf8(string $input):string {
|
||||
|
||||
// If the input is valid UTF8 from the start, it is simply returned in its
|
||||
// original form.
|
||||
if (\mb_check_encoding($input, 'UTF-8')) {
|
||||
return $input;
|
||||
}
|
||||
|
||||
// To detect and convert the encoding for non-UTF8 strings, the list of
|
||||
// encodings known to PHP's mbstring functions is checked against the input string.
|
||||
// If any encoding matches the string, it will be converted to UTF8 accordingly.
|
||||
$suitableEncodings = [];
|
||||
$encodings = \mb_list_encodings();
|
||||
foreach ($encodings as $encoding) {
|
||||
if (\mb_detect_encoding($input, $encoding, true) !== false) {
|
||||
$suitableEncodings[] = $encoding;
|
||||
}
|
||||
}
|
||||
|
||||
// If ISO-8859-1 is in the list of suitable encodings, try to convert with that.
|
||||
if (\in_array('ISO-8859-1', $suitableEncodings, true)) {
|
||||
if (($converted = \iconv('ISO-8859-1', "UTF-8//TRANSLIT", $input)) !== false) {
|
||||
return $converted;
|
||||
}
|
||||
}
|
||||
|
||||
// If a conversion from ISO-8859-1 doesn't work, just take any of the other ones.
|
||||
$suitableEncodings = \array_reverse($suitableEncodings);
|
||||
foreach ($suitableEncodings as $encoding) {
|
||||
if (($converted = \iconv($encoding, "UTF-8//TRANSLIT", $input)) !== false) {
|
||||
return $converted;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
if (count($suitableEncodings) === 1) {
|
||||
return mb_convert_encoding($input, 'UTF-8', );
|
||||
}
|
||||
*/
|
||||
|
||||
return $input;
|
||||
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user