From 0e863619682a73625290a1092db4f218a24fe2d9 Mon Sep 17 00:00:00 2001 From: Joshua Ramon Enslin Date: Tue, 25 Jul 2023 22:33:57 +0200 Subject: [PATCH] Fix overly strict validation for npg, ndl --- src/enums/MDNodaRepository.php | 26 +++++++++++++----- tests/MDNodaRepositoryTest.php | 48 ++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 6 deletions(-) create mode 100644 tests/MDNodaRepositoryTest.php diff --git a/src/enums/MDNodaRepository.php b/src/enums/MDNodaRepository.php index 2507cbc..699c554 100644 --- a/src/enums/MDNodaRepository.php +++ b/src/enums/MDNodaRepository.php @@ -251,11 +251,16 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable { if (filter_var($id, FILTER_VALIDATE_URL) !== false) { $toRemove = []; - foreach ($prefixes as $prefix) $toRemove[$prefix] = ""; + foreach ($prefixes as $prefix) { + $toRemove[$prefix] = ""; + } $id = strtr($id, $toRemove); } - if (filter_var($id, FILTER_VALIDATE_INT) === false) { + // Strings starting with 0 are quite often linked, notably with the NDL. + // PHP's FILTER_VALIDATE_INT does not accept a leading 0 however, so it + // is stripped before checking. + if (filter_var(ltrim($id, '0'), FILTER_VALIDATE_INT) === false) { return false; } @@ -320,7 +325,10 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable { private static function validateLocId(string $id):string|false { if (filter_var($id, FILTER_VALIDATE_URL) !== false) { - $id = strtr($id, ['http://id.loc.gov/authorities/names/' => '']); + $id = strtr($id, [ + 'http://id.loc.gov/authorities/names/' => '', + 'https://id.loc.gov/authorities/names/' => '', + ]); } if (substr($id, 0, 1) !== 'n') { @@ -399,7 +407,7 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable { self::aat => self::validateNumericId($id, ['https://vocab.getty.edu/page/aat/']), self::ackerbau => self::validateNumericId($id, ['https://term.museum-digital.de/ackerbau/tag/']), self::bne => self::validateNumericId($id, ['http://datos.bne.es/persona/']), - self::bnf => self::validateNumericId($id, ["https://catalogue.bnf.fr/ark:/12148/cb"]), + self::bnf => self::validateNumericId(rtrim($id, 't'), ["https://catalogue.bnf.fr/ark:/12148/cb"]), // cb11960399t is a valid entry, too (general) self::cona => self::validateNumericId($id, ['https://vocab.getty.edu/page/cona/']), self::editionhumboldtdigital => self::validateGndId($id, ['https://edition-humboldt.de/register/personen/detail.xql?normid=http://d-nb.info/gnd/']), self::gnd => self::validateGndId($id, ['http://d-nb.info/gnd/', 'https://d-nb.info/gnd/']), @@ -411,11 +419,17 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable { self::mindatorg => self::validateNumericId($id, ['https://www.mindat.org/min-', '.html']), self::moebeltypologie => self::validateNumericId($id, ['https://term.museum-digital.de/moebel/tag/']), self::ndb_adb => self::validateGndId($id, ['https://www.deutsche-biographie.de/pnd', '.html']), - self::ndl => self::validateNumericId($id, ['https://id.ndl.go.jp/auth/ndlna/']), + self::ndl => self::validateNumericId($id, [ + 'http://id.ndl.go.jp/auth/ndlna/', + 'https://id.ndl.go.jp/auth/ndlna/', + ]), self::ndp_ikmk => self::validateNumericId($id, ['https://ikmk.smb.museum/ndp/land/']), self::ndp_ikmk_persons => self::validateNumericId($id, ['https://ikmk.smb.museum/ndp/person/']), self::nomisma => str_replace('http://nomisma.org/id/', '', $id), - self::npg => self::validateNumericId($id, ['https://www.npg.org.uk/collections/search/person/']), + self::npg => self::validateNumericId($id, [ + 'https://www.npg.org.uk/collections/search/person/', + 'https://www.npg.org.uk/collections/search/person/mp', + ]), self::oberbegriffsdatei => self::validateNumericId($id, ['https://term.museum-digital.de/oberbegriffsdatei/tag/']), self::orcid => preg_match('/^[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}$/', $id) ? $id : false, self::osm => self::validateNumericId($id, ['https://www.openstreetmap.org/relation/']), diff --git a/tests/MDNodaRepositoryTest.php b/tests/MDNodaRepositoryTest.php new file mode 100644 index 0000000..02d66f9 --- /dev/null +++ b/tests/MDNodaRepositoryTest.php @@ -0,0 +1,48 @@ + + */ +declare(strict_types = 1); +use PHPUnit\Framework\TestCase; + +require_once __DIR__ . '/../src/enums/MDValueEnumInterface.php'; +require_once __DIR__ . '/../src/enums/MDNodaRepository.php'; + +/** + * Tests for home page. + */ +final class MDNodaRepositoryTest extends TestCase { + /** + * Tests that valid IDs actually validate as valid. + * + * @return void + */ + public function testValidIdsValidate():void { + + // GND (Germany) + self::assertNotFalse(MDNodaRepository::gnd->validateId("https://d-nb.info/gnd/102423008")); + self::assertNotFalse(MDNodaRepository::gnd->validateId("http://d-nb.info/gnd/102423008")); + self::assertNotFalse(MDNodaRepository::gnd->validateId("102423008")); + + // NDL (Japan) + self::assertNotFalse(MDNodaRepository::ndl->validateId("00967046")); + self::assertNotFalse(MDNodaRepository::ndl->validateId("https://id.ndl.go.jp/auth/ndlna/00967046")); + self::assertNotFalse(MDNodaRepository::ndl->validateId("http://id.ndl.go.jp/auth/ndlna/00967046")); + + // NPG: National Portrait Gallery + self::assertNotFalse(MDNodaRepository::npg->validateId("https://www.npg.org.uk/collections/search/person/mp01751")); + + // Library of Congress + self::assertNotFalse(MDNodaRepository::loc->validateId("https://id.loc.gov/authorities/names/n2022014604")); + self::assertNotFalse(MDNodaRepository::loc->validateId("http://id.loc.gov/authorities/names/n2022014604")); + self::assertNotFalse(MDNodaRepository::loc->validateId("n2022014604")); + + + self::assertNotFalse(MDNodaRepository::loc->validateId("https://id.loc.gov/authorities/names/n2022014604")); + self::assertNotFalse(MDNodaRepository::loc->validateId("http://id.loc.gov/authorities/names/n2022014604")); + self::assertNotFalse(MDNodaRepository::loc->validateId("n2022014604")); + + } +}