Add tests for MD_STD_IN::sanitize_url() and ensure it supports rewriting

unencoded cyrillic inputs

Close #7
This commit is contained in:
2023-11-05 23:29:14 +01:00
parent 2176e7312b
commit ae12cfdf0f
4 changed files with 99 additions and 4 deletions

View File

@ -197,13 +197,30 @@ final class MD_STD_IN {
return "";
}
$output = \filter_var($input, FILTER_SANITIZE_URL);
if (($output = \filter_var($output, FILTER_VALIDATE_URL)) === false) {
throw new MDInvalidUrl("Invalid input URL");
try {
if (($output = \filter_var($input, FILTER_VALIDATE_URL)) === false) {
throw new MDInvalidUrl("Invalid input URL");
}
}
catch (MDInvalidUrl $e) {
if (($parsed = parse_url($input)) === false || empty($parsed['scheme'])) {
throw new MDInvalidUrl("Invalid input URL");
}
$rewritten = $parsed['scheme'] . '://';
if (!empty($parsed['user']) && !empty($parsed['pass'])) {
$rewritten .= $parsed['user'] . ':' . $parsed['pass'] . '@';
}
$rewritten .= $parsed['host'];
if (!empty($parsed['port'])) $rewritten .= ':' . $parsed['port'];
$rewritten .= str_replace('%2F' , '/', urlencode($parsed['path']));
if (!empty($parsed['query'])) $rewritten .= '?' . urlencode($parsed['query']);
if (($output = \filter_var($rewritten, FILTER_VALIDATE_URL)) === false) {
throw new MDInvalidUrl("Invalid input URL" . \urlencode($input));
}
}
// Check for valid schemes
if (MD_STD::startsWithAny($input, ['https://', 'http://', 'ftp://']) === false) {
if (MD_STD::startsWithAny($output, ['https://', 'http://', 'ftp://']) === false) {
throw new MDInvalidUrl("Invalid input URL");
}