From 7fb7bb83c1bde0c88ebbcf13bfb83d58a7cfc6ee Mon Sep 17 00:00:00 2001 From: Joshua Ramon Enslin Date: Thu, 3 Aug 2023 17:20:03 +0200 Subject: [PATCH] Improve validation of noda repository links --- src/enums/MDNodaRepository.php | 48 ++++++++++++++++++++++++++++------ tests/MDNodaRepositoryTest.php | 35 ++++++++++++++----------- 2 files changed, 60 insertions(+), 23 deletions(-) diff --git a/src/enums/MDNodaRepository.php b/src/enums/MDNodaRepository.php index 290ed6a..b162b33 100644 --- a/src/enums/MDNodaRepository.php +++ b/src/enums/MDNodaRepository.php @@ -260,6 +260,15 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable { $id = strtr($id, $toRemove); } + // FILTER_VALIDATE_INT fails on overly large IDs (e.g. VIAF IDs having + // more than 20 digits). + // In these cases, simply check for the existence of non-numeric characters. + if (strlen($id) > 9) { + if (empty(trim($id, '0123456789'))) { + return $id; + } + } + // Strings starting with 0 are quite often linked, notably with the NDL. // PHP's FILTER_VALIDATE_INT does not accept a leading 0 however, so it // is stripped before checking. @@ -349,15 +358,15 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable { ]); } - if (substr($id, 0, 1) !== 'n') { - throw new MDgenericInvalidInputsException("LOC IDs must start with n"); + if (in_array(substr($id, 0, 2), ['nr', 'nb', 'no'], true)) { + if (filter_var(trim(substr($id, 2), '0'), FILTER_VALIDATE_INT) === false) return false; } - - if (filter_var(substr($id, 1), FILTER_VALIDATE_INT) === false) { - return false; + else if (substr($id, 0, 1) === 'n') { + if (filter_var(trim(substr($id, 1), '0'), FILTER_VALIDATE_INT) === false) return false; } + else throw new MDgenericInvalidInputsException("LOC IDs must start with n or nr or nb"); - return $id; + return (string)$id; } @@ -414,6 +423,26 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable { } + /** + * Validates a BNF ID. BNF IDs are either fully numeric or end on a single non-numeric character. + * + * @param string $id ID to validate. + * + * @return string|false + */ + public static function validateBnfId(string $id):string|false { + + if (!is_numeric(substr($id, -1))) { + $validation = self::validateNumericId(substr($id, 0, -1), ["https://catalogue.bnf.fr/ark:/12148/cb"]); + } + else $validation = self::validateNumericId($id, ["https://catalogue.bnf.fr/ark:/12148/cb"]); + + if ($validation === false) return false; + + return $id; + + } + /** * Validates an ID. * @@ -428,7 +457,7 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable { self::aat => self::validateNumericId($id, ['https://vocab.getty.edu/page/aat/']), self::ackerbau => self::validateNumericId($id, ['https://term.museum-digital.de/ackerbau/tag/']), self::bne => self::validateBneId($id, ['http://datos.bne.es/persona/']), - self::bnf => self::validateNumericId(rtrim($id, 't'), ["https://catalogue.bnf.fr/ark:/12148/cb"]), // cb11960399t is a valid entry, too (general) + self::bnf => self::validateBnfId($id), self::cona => self::validateNumericId($id, ['https://vocab.getty.edu/page/cona/']), self::editionhumboldtdigital => self::validateGndId($id, ['https://edition-humboldt.de/register/personen/detail.xql?normid=http://d-nb.info/gnd/']), self::gnd => self::validateGndId($id, ['http://d-nb.info/gnd/', 'https://d-nb.info/gnd/']), @@ -458,7 +487,10 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable { self::pleiades => self::validateNumericId($id, ['https://pleiades.stoa.org/places/']), self::rkd => self::validateNumericId($id, ['http://rkd.nl/explore/artists/', 'https://rkd.nl/explore/artists/']), self::ulan => self::validateNumericId($id, ['http://vocab.getty.edu/ulan/', 'http://vocab.getty.edu/page/ulan/', 'https://vocab.getty.edu/page/ulan/']), - self::viaf => self::validateNumericId($id, ['https://viaf.org/viaf/']), + self::viaf => self::validateNumericId($id, [ + 'https://viaf.org/viaf/', + 'http://viaf.org/viaf/', + ]), self::wikidata => self::validateWikidataId($id), self::wikipedia => str_replace('https://de.wikipedia.org/wiki/', '', $id), }; diff --git a/tests/MDNodaRepositoryTest.php b/tests/MDNodaRepositoryTest.php index 9c00b14..0ae4c5c 100644 --- a/tests/MDNodaRepositoryTest.php +++ b/tests/MDNodaRepositoryTest.php @@ -22,29 +22,34 @@ final class MDNodaRepositoryTest extends TestCase { public function testValidIdsValidate():void { // GND (Germany) - self::assertNotFalse(MDNodaRepository::gnd->validateId("https://d-nb.info/gnd/102423008")); - self::assertNotFalse(MDNodaRepository::gnd->validateId("http://d-nb.info/gnd/102423008")); - self::assertNotFalse(MDNodaRepository::gnd->validateId("102423008")); + self::assertEquals("102423008", MDNodaRepository::gnd->validateId("https://d-nb.info/gnd/102423008")); + self::assertEquals("102423008", MDNodaRepository::gnd->validateId("http://d-nb.info/gnd/102423008")); + self::assertEquals("102423008", MDNodaRepository::gnd->validateId("102423008")); // NDL (Japan) - self::assertNotFalse(MDNodaRepository::ndl->validateId("00967046")); - self::assertNotFalse(MDNodaRepository::ndl->validateId("https://id.ndl.go.jp/auth/ndlna/00967046")); - self::assertNotFalse(MDNodaRepository::ndl->validateId("http://id.ndl.go.jp/auth/ndlna/00967046")); + self::assertEquals("00967046", MDNodaRepository::ndl->validateId("00967046")); + self::assertEquals("00967046", MDNodaRepository::ndl->validateId("https://id.ndl.go.jp/auth/ndlna/00967046")); + self::assertEquals("00967046", MDNodaRepository::ndl->validateId("http://id.ndl.go.jp/auth/ndlna/00967046")); // NPG: National Portrait Gallery - self::assertNotFalse(MDNodaRepository::npg->validateId("https://www.npg.org.uk/collections/search/person/mp01751")); + self::assertEquals('01751', MDNodaRepository::npg->validateId("https://www.npg.org.uk/collections/search/person/mp01751")); // Library of Congress - self::assertNotFalse(MDNodaRepository::loc->validateId("https://id.loc.gov/authorities/names/n2022014604")); - self::assertNotFalse(MDNodaRepository::loc->validateId("http://id.loc.gov/authorities/names/n2022014604")); - self::assertNotFalse(MDNodaRepository::loc->validateId("n2022014604")); + self::assertEquals("n2022014604", MDNodaRepository::loc->validateId("https://id.loc.gov/authorities/names/n2022014604")); + self::assertEquals("n2022014604", MDNodaRepository::loc->validateId("http://id.loc.gov/authorities/names/n2022014604")); + self::assertEquals("n2022014604", MDNodaRepository::loc->validateId("n2022014604")); - self::assertNotFalse(MDNodaRepository::lcsh->validateId("https://id.loc.gov/authorities/names/sh2022014604")); - self::assertNotFalse(MDNodaRepository::lcsh->validateId("http://id.loc.gov/authorities/names/sh2022014604")); - self::assertNotFalse(MDNodaRepository::lcsh->validateId("sh2022014604")); + self::assertEquals("sh2022014604", MDNodaRepository::lcsh->validateId("https://id.loc.gov/authorities/names/sh2022014604")); + self::assertEquals("sh2022014604", MDNodaRepository::lcsh->validateId("http://id.loc.gov/authorities/names/sh2022014604")); + self::assertEquals("sh2022014604", MDNodaRepository::lcsh->validateId("sh2022014604")); - self::assertNotFalse(MDNodaRepository::bne->validateId("http://datos.bne.es/persona/XX5034943")); - self::assertNotFalse(MDNodaRepository::bne->validateId("XX5034943")); + self::assertEquals("XX5034943", MDNodaRepository::bne->validateId("http://datos.bne.es/persona/XX5034943")); + self::assertEquals("XX5034943", MDNodaRepository::bne->validateId("XX5034943")); + + self::assertEquals("86145857811423020454", MDNodaRepository::viaf->validateId("86145857811423020454")); + self::assertEquals("2869150688328112660005", MDNodaRepository::viaf->validateId("2869150688328112660005")); + self::assertEquals("248941990", MDNodaRepository::viaf->validateId("https://viaf.org/viaf/248941990")); + self::assertEquals("248941990", MDNodaRepository::viaf->validateId("http://viaf.org/viaf/248941990")); } }