Extend tests for MD_STD_IN considerably, fix some edge cases

This commit is contained in:
2023-11-08 21:24:23 +01:00
parent a03f072a69
commit 66e704de47
4 changed files with 430 additions and 60 deletions

View File

@ -43,7 +43,7 @@ final class MD_STD_IN {
*/
public static function sanitize_id_or_zero(mixed $input):int {
if ($input === "") {
if ($input === "" || $input === 0) {
return 0;
}
@ -96,11 +96,14 @@ final class MD_STD_IN {
*/
public static function sanitize_rgb_color(mixed $input):string {
$output = \filter_var($input, FILTER_UNSAFE_RAW, FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH);
if (($output = \filter_var($input, FILTER_UNSAFE_RAW, FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH)) === false) {
throw new MDInvalidColorCode("Invalid color code provided: " . $output);
}
if ($output === false
|| !in_array(strlen($output), [3, 6], true)
|| (preg_match('/^[a-fA-F0-9]{3}$/', $output) === false && preg_match('/^[a-fA-F0-9]{6}$/', $output) === false)
$output = \strtoupper($output);
if (!in_array(strlen($output), [3, 6], true)
|| (MD_STD::preg_replace_str('/[A-F0-9]/', '', $output) !== '')
) {
throw new MDInvalidColorCode("Invalid color code provided: " . $output);
}
@ -214,19 +217,30 @@ final class MD_STD_IN {
$rewritten .= $parsed['host'];
if (!empty($parsed['port'])) $rewritten .= ':' . $parsed['port'];
$rewritten .= str_replace('%2F' , '/', urlencode($parsed['path']));
if (!empty($parsed['query'])) $rewritten .= '?' . urlencode($parsed['query']);
if (!empty($parsed['query'])) {
$rewritten .= '?' . str_replace('%3D', '=', urlencode($parsed['query']));
}
if (($output = \filter_var($rewritten, FILTER_VALIDATE_URL)) === false) {
throw new MDInvalidUrl("Invalid input URL" . \urlencode($input));
}
}
if (empty($output)) return '';
// As per the RFC, URLs should not exceed 2048. Enough real-world ones
// do. But they certainly should not exceed 10000 characters.
if (\strlen($output) > 10000) {
throw new MDInvalidUrl("The entered URL seems to be valid otherwise, but is overly long.");
}
// Check for valid schemes
if (MD_STD::startsWithAny($output, ['https://', 'http://', 'ftp://']) === false) {
throw new MDInvalidUrl("Invalid input URL");
}
if (\str_contains($output, '.') === false) {
throw new MDInvalidUrl("Invalid input URL");
}
return $output;
}
@ -245,7 +259,7 @@ final class MD_STD_IN {
}
if (($output = \filter_var($input, FILTER_VALIDATE_EMAIL)) === false) {
throw new MDInvalidEmail("Invalid input email address");
throw new MDInvalidEmail("Invalid input email address" . ' '. $input);
}
return $output;

View File

@ -0,0 +1,146 @@
<?PHP
/**
* Test for ensuring that search RSS feeds are generated correctly.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
declare(strict_types = 1);
use PHPUnit\Framework\TestCase;
/**
* Tests for the manifest.
*/
final class MD_STD_TEST_PROVIDERS {
/**
* Data provider for returning invalid URLs.
*
* @return array<array{0: string}>
*/
public static function invalid_url_provider():array {
$output = [
'Space in protocol name' => ["h ttps://www.museum-digital.org"],
'Unwanted protocol' => ["telegram://www.museum-digital.org"],
'String without protocol' => ["www.museum-digital.org"],
'Localhost' => ["http://localhost"],
// As per the RFC, URLs should not exceed 2048. Enough real-world ones
// do. But they certainly should not exceed 10000 characters.
'Overly long URL (> 10000 chars)' => ["https://www.museum-digital.org/" . str_repeat('a', 10000)],
];
return $output;
}
/**
* Data provider for working URLs.
*
* @return array<array{0: string, 1: string}>
*/
public static function valid_url_provider():array {
return [
'Regular URL without path or query' => ['https://www.museum-digital.org', 'https://www.museum-digital.org'],
'URL with cyrillic characters, HTML-encoded ' => [
'https://sr.wikipedia.org/wiki/%D0%91%D0%B5%D0%BE%D0%B3%D1%80%D0%B0%D0%B4',
'https://sr.wikipedia.org/wiki/%D0%91%D0%B5%D0%BE%D0%B3%D1%80%D0%B0%D0%B4',
],
'URL with cyrillic characters, not HTML-encoded ' => [
'https://sr.wikipedia.org/wiki/Београд',
'https://sr.wikipedia.org/wiki/%D0%91%D0%B5%D0%BE%D0%B3%D1%80%D0%B0%D0%B4',
],
'URL with: scheme, user, pass, host, path, query' => [
'https://username:password@sr.wikipedia.org:9000/wiki/Београд?test=hi',
'https://username:password@sr.wikipedia.org:9000/wiki/%D0%91%D0%B5%D0%BE%D0%B3%D1%80%D0%B0%D0%B4?test=hi',
],
];
}
/**
* Data provider for working mail addresses.
*
* @return array<array{0: string, 1: string}>
*/
public static function invalid_email_provider():array {
// Invalid addresses as per https://codefool.tumblr.com/post/15288874550/list-of-valid-and-invalid-email-addresses
$invalid = [
'plainaddress',
'#@%^%#$@#$@#.com',
'@example.com',
'Joe Smith <email@example.com>',
'email.example.com',
'email@example@example.com',
'.email@example.com',
'email.@example.com',
'email..email@example.com',
'あいうえお@example.com',
'email@example.com (Joe Smith)',
'email@example',
'email@-example.com',
'email@111.222.333.44444',
'email@example..com',
'Abc..123@example.com',
'“(),:;<>[\]@example.com',
'just"not"right@example.com',
'this\ is"really"not\allowed@example.com',
];
$output = [];
foreach ($invalid as $addr) {
$output[$addr] = [
$addr,
];
}
return $output;
}
/**
* Data provider for working mail addresses.
*
* @return array<array{0: string, 1: string}>
*/
public static function valid_email_provider():array {
// Valid addresses as per https://codefool.tumblr.com/post/15288874550/list-of-valid-and-invalid-email-addresses
// Excluding:
//
// 'email@123.123.123.123',
// 'email@[123.123.123.123]',
// '“email”@example.com',
//
// as per PHP's FILTER_VALIDATE_EMAIL
$valid = [
'email@example.com',
'firstname.lastname@example.com',
'email@subdomain.example.com',
'firstname+lastname@example.com',
'1234567890@example.com',
'email@example-one.com',
'_______@example.com',
'email@example.name',
'email@example.museum',
'email@example.co.jp',
'firstname-lastname@example.com',
];
$output = [];
foreach ($valid as $addr) {
$output[$addr] = [
$addr,
$addr,
];
}
return $output;
}
}