From ae12cfdf0fc1c117a901b0ef8957df94eed17b8b Mon Sep 17 00:00:00 2001 From: Joshua Ramon Enslin Date: Sun, 5 Nov 2023 23:29:14 +0100 Subject: [PATCH] Add tests for MD_STD_IN::sanitize_url() and ensure it supports rewriting unencoded cyrillic inputs Close #7 --- phpunit.xml | 16 ++++++++++++++++ src/MD_STD_IN.php | 25 +++++++++++++++++++++---- tests/MD_STD_IN_Test.php | 37 +++++++++++++++++++++++++++++++++++++ tests/bootstrap.php | 25 +++++++++++++++++++++++++ 4 files changed, 99 insertions(+), 4 deletions(-) create mode 100644 phpunit.xml create mode 100644 tests/MD_STD_IN_Test.php create mode 100644 tests/bootstrap.php diff --git a/phpunit.xml b/phpunit.xml new file mode 100644 index 0000000..5694643 --- /dev/null +++ b/phpunit.xml @@ -0,0 +1,16 @@ + + + diff --git a/src/MD_STD_IN.php b/src/MD_STD_IN.php index 272c916..1ced094 100644 --- a/src/MD_STD_IN.php +++ b/src/MD_STD_IN.php @@ -197,13 +197,30 @@ final class MD_STD_IN { return ""; } - $output = \filter_var($input, FILTER_SANITIZE_URL); - if (($output = \filter_var($output, FILTER_VALIDATE_URL)) === false) { - throw new MDInvalidUrl("Invalid input URL"); + try { + if (($output = \filter_var($input, FILTER_VALIDATE_URL)) === false) { + throw new MDInvalidUrl("Invalid input URL"); + } + } + catch (MDInvalidUrl $e) { + if (($parsed = parse_url($input)) === false || empty($parsed['scheme'])) { + throw new MDInvalidUrl("Invalid input URL"); + } + $rewritten = $parsed['scheme'] . '://'; + if (!empty($parsed['user']) && !empty($parsed['pass'])) { + $rewritten .= $parsed['user'] . ':' . $parsed['pass'] . '@'; + } + $rewritten .= $parsed['host']; + if (!empty($parsed['port'])) $rewritten .= ':' . $parsed['port']; + $rewritten .= str_replace('%2F' , '/', urlencode($parsed['path'])); + if (!empty($parsed['query'])) $rewritten .= '?' . urlencode($parsed['query']); + if (($output = \filter_var($rewritten, FILTER_VALIDATE_URL)) === false) { + throw new MDInvalidUrl("Invalid input URL" . \urlencode($input)); + } } // Check for valid schemes - if (MD_STD::startsWithAny($input, ['https://', 'http://', 'ftp://']) === false) { + if (MD_STD::startsWithAny($output, ['https://', 'http://', 'ftp://']) === false) { throw new MDInvalidUrl("Invalid input URL"); } diff --git a/tests/MD_STD_IN_Test.php b/tests/MD_STD_IN_Test.php new file mode 100644 index 0000000..88a02ac --- /dev/null +++ b/tests/MD_STD_IN_Test.php @@ -0,0 +1,37 @@ + + */ +declare(strict_types = 1); + +use PHPUnit\Framework\TestCase; + +/** + * Tests for MD_STD_IN. + */ +final class MD_STD_IN_Test extends TestCase { + /** + * Function for testing sanitize_url(). + * + * @return void + */ + public function testSanitizeUrlWorksBasically():void { + + // Ensure empty inputs return empty output + self::assertEquals("", MD_STD_IN::sanitize_url("")); + self::assertEquals("https://www.museum-digital.org", MD_STD_IN::sanitize_url("https://www.museum-digital.org")); + + // Ensure that cyrillic characters are accepted + self::assertEquals("https://sr.wikipedia.org/wiki/%D0%91%D0%B5%D0%BE%D0%B3%D1%80%D0%B0%D0%B4", MD_STD_IN::sanitize_url("https://sr.wikipedia.org/wiki/%D0%91%D0%B5%D0%BE%D0%B3%D1%80%D0%B0%D0%B4")); + self::assertEquals("https://sr.wikipedia.org/wiki/%D0%91%D0%B5%D0%BE%D0%B3%D1%80%D0%B0%D0%B4", MD_STD_IN::sanitize_url("https://sr.wikipedia.org/wiki/Београд")); + + self::assertEquals("https://username:password@sr.wikipedia.org:9000/wiki/%D0%91%D0%B5%D0%BE%D0%B3%D1%80%D0%B0%D0%B4", MD_STD_IN::sanitize_url("https://username:password@sr.wikipedia.org:9000/wiki/%D0%91%D0%B5%D0%BE%D0%B3%D1%80%D0%B0%D0%B4")); + self::assertEquals("https://username:password@sr.wikipedia.org:9000/wiki/%D0%91%D0%B5%D0%BE%D0%B3%D1%80%D0%B0%D0%B4", MD_STD_IN::sanitize_url("https://username:password@sr.wikipedia.org:9000/wiki/Београд")); + + self::expectException(MDInvalidUrl::class); + MD_STD_IN::sanitize_url("h ttps://www.museum-digital.org"); + + } +} diff --git a/tests/bootstrap.php b/tests/bootstrap.php new file mode 100644 index 0000000..3da3df3 --- /dev/null +++ b/tests/bootstrap.php @@ -0,0 +1,25 @@ +