From 1d1a690e0fd698a4ac7017198eaeea899024aed4 Mon Sep 17 00:00:00 2001 From: Joshua Ramon Enslin Date: Mon, 16 Oct 2023 16:44:33 +0200 Subject: [PATCH] Remove whitespaces from vocabulary links, add dedicated exception for lcsh links that are actually loc links --- .../MDInvalidNodaLinkLcshIdIsGeneralLoc.php | 20 +++++++++++++++++++ src/classes/MDNodaLink.php | 2 +- src/enums/MDNodaRepository.php | 10 +++++++++- tests/MDNodaRepositoryTest.php | 1 + 4 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 exceptions/MDInvalidNodaLinkLcshIdIsGeneralLoc.php diff --git a/exceptions/MDInvalidNodaLinkLcshIdIsGeneralLoc.php b/exceptions/MDInvalidNodaLinkLcshIdIsGeneralLoc.php new file mode 100644 index 0000000..edcac82 --- /dev/null +++ b/exceptions/MDInvalidNodaLinkLcshIdIsGeneralLoc.php @@ -0,0 +1,20 @@ +source = $source; - if (($validatedId = $this->source->validateId(trim($id))) === false) { + if (($validatedId = $this->source->validateId(strtr(trim($id), [" " => "", "\t" => "", "\n" => ""]))) === false) { throw new MDInvalidNodaLink("Invalid noda ID / link: \"" . $id . "\" [Repository: " . $this->source->toDbName() . "]"); } $this->id = $validatedId; diff --git a/src/enums/MDNodaRepository.php b/src/enums/MDNodaRepository.php index e383e23..b5daa2b 100644 --- a/src/enums/MDNodaRepository.php +++ b/src/enums/MDNodaRepository.php @@ -351,6 +351,7 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable { return false; } if (is_numeric(strtr($id, ['-' => '', 'X' => ''])) === false) { + throw new Exception($id); return false; } @@ -409,7 +410,10 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable { } if (substr($id, 0, 2) !== 'sh') { - throw new MDInvalidNodaLinkException("LCSH IDs must start with sh"); + if (str_starts_with(substr($id, 0, 1), 'n')) { + throw new MDInvalidNodaLinkLcshIdIsGeneralLoc("LCSH IDs must start with sh. ID started with n. Use general LOC link instead."); + } + throw new MDInvalidNodaLinkException("LCSH IDs must start with sh. ID provided is: " . $id); } if (filter_var(ltrim(substr($id, 2), "0"), FILTER_VALIDATE_INT) === false) { @@ -538,6 +542,10 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable { 'https://en.wikipedia.org/wiki/' => '', 'http://fr.wikipedia.org/wiki/' => '', 'https://fr.wikipedia.org/wiki/' => '', + 'http://id.wikipedia.org/wiki/' => '', + 'https://id.wikipedia.org/wiki/' => '', + 'http://it.wikipedia.org/wiki/' => '', + 'https://it.wikipedia.org/wiki/' => '', 'http://nl.wikipedia.org/wiki/' => '', 'https://nl.wikipedia.org/wiki/' => '', 'http://sv.wikipedia.org/wiki/' => '', diff --git a/tests/MDNodaRepositoryTest.php b/tests/MDNodaRepositoryTest.php index 05e4bd8..f23c728 100644 --- a/tests/MDNodaRepositoryTest.php +++ b/tests/MDNodaRepositoryTest.php @@ -27,6 +27,7 @@ final class MDNodaRepositoryTest extends TestCase { // GND (Germany) self::assertEquals("102423008", MDNodaRepository::gnd->validateId("https://d-nb.info/gnd/102423008")); self::assertEquals("102423008", MDNodaRepository::gnd->validateId("http://d-nb.info/gnd/102423008")); + self::assertEquals("102423008", MDNodaRepository::gnd->validateId("http://d-nb.info/gnd/ 102423008")); self::assertEquals("102423008", MDNodaRepository::gnd->validateId("102423008")); // NDL (Japan)