Add function to ensure an input string is UTF-8 encoded
This commit is contained in:
parent
f030adba20
commit
dbbdf4f230
|
@ -266,4 +266,55 @@ final class MD_STD_IN {
|
||||||
throw new MDgenericInvalidInputsException("ISBNs must be either 10 or 13 characters long.");
|
throw new MDgenericInvalidInputsException("ISBNs must be either 10 or 13 characters long.");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an UTF8 version of a string.
|
||||||
|
*
|
||||||
|
* @param string $input Input string.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public static function ensureStringIsUtf8(string $input):string {
|
||||||
|
|
||||||
|
// If the input is valid UTF8 from the start, it is simply returned in its
|
||||||
|
// original form.
|
||||||
|
if (\mb_check_encoding($input, 'UTF-8')) {
|
||||||
|
return $input;
|
||||||
|
}
|
||||||
|
|
||||||
|
// To detect and convert the encoding for non-UTF8 strings, the list of
|
||||||
|
// encodings known to PHP's mbstring functions is checked against the input string.
|
||||||
|
// If any encoding matches the string, it will be converted to UTF8 accordingly.
|
||||||
|
$suitableEncodings = [];
|
||||||
|
$encodings = \mb_list_encodings();
|
||||||
|
foreach ($encodings as $encoding) {
|
||||||
|
if (\mb_detect_encoding($input, $encoding, true) !== false) {
|
||||||
|
$suitableEncodings[] = $encoding;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If ISO-8859-1 is in the list of suitable encodings, try to convert with that.
|
||||||
|
if (\in_array('ISO-8859-1', $suitableEncodings, true)) {
|
||||||
|
if (($converted = \iconv('ISO-8859-1', "UTF-8//TRANSLIT", $input)) !== false) {
|
||||||
|
return $converted;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If a conversion from ISO-8859-1 doesn't work, just take any of the other ones.
|
||||||
|
$suitableEncodings = \array_reverse($suitableEncodings);
|
||||||
|
foreach ($suitableEncodings as $encoding) {
|
||||||
|
if (($converted = \iconv($encoding, "UTF-8//TRANSLIT", $input)) !== false) {
|
||||||
|
return $converted;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
if (count($suitableEncodings) === 1) {
|
||||||
|
return mb_convert_encoding($input, 'UTF-8', );
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
return $input;
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user