Improve validation of noda repository links

This commit is contained in:
Joshua Ramon Enslin 2023-08-03 17:20:03 +02:00
parent c082ca685f
commit 7fb7bb83c1
Signed by: jrenslin
GPG Key ID: 46016F84501B70AE
2 changed files with 60 additions and 23 deletions

View File

@ -260,6 +260,15 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
$id = strtr($id, $toRemove);
}
// FILTER_VALIDATE_INT fails on overly large IDs (e.g. VIAF IDs having
// more than 20 digits).
// In these cases, simply check for the existence of non-numeric characters.
if (strlen($id) > 9) {
if (empty(trim($id, '0123456789'))) {
return $id;
}
}
// Strings starting with 0 are quite often linked, notably with the NDL.
// PHP's FILTER_VALIDATE_INT does not accept a leading 0 however, so it
// is stripped before checking.
@ -349,15 +358,15 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
]);
}
if (substr($id, 0, 1) !== 'n') {
throw new MDgenericInvalidInputsException("LOC IDs must start with n");
if (in_array(substr($id, 0, 2), ['nr', 'nb', 'no'], true)) {
if (filter_var(trim(substr($id, 2), '0'), FILTER_VALIDATE_INT) === false) return false;
}
if (filter_var(substr($id, 1), FILTER_VALIDATE_INT) === false) {
return false;
else if (substr($id, 0, 1) === 'n') {
if (filter_var(trim(substr($id, 1), '0'), FILTER_VALIDATE_INT) === false) return false;
}
else throw new MDgenericInvalidInputsException("LOC IDs must start with n or nr or nb");
return $id;
return (string)$id;
}
@ -414,6 +423,26 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
}
/**
* Validates a BNF ID. BNF IDs are either fully numeric or end on a single non-numeric character.
*
* @param string $id ID to validate.
*
* @return string|false
*/
public static function validateBnfId(string $id):string|false {
if (!is_numeric(substr($id, -1))) {
$validation = self::validateNumericId(substr($id, 0, -1), ["https://catalogue.bnf.fr/ark:/12148/cb"]);
}
else $validation = self::validateNumericId($id, ["https://catalogue.bnf.fr/ark:/12148/cb"]);
if ($validation === false) return false;
return $id;
}
/**
* Validates an ID.
*
@ -428,7 +457,7 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
self::aat => self::validateNumericId($id, ['https://vocab.getty.edu/page/aat/']),
self::ackerbau => self::validateNumericId($id, ['https://term.museum-digital.de/ackerbau/tag/']),
self::bne => self::validateBneId($id, ['http://datos.bne.es/persona/']),
self::bnf => self::validateNumericId(rtrim($id, 't'), ["https://catalogue.bnf.fr/ark:/12148/cb"]), // cb11960399t is a valid entry, too (general)
self::bnf => self::validateBnfId($id),
self::cona => self::validateNumericId($id, ['https://vocab.getty.edu/page/cona/']),
self::editionhumboldtdigital => self::validateGndId($id, ['https://edition-humboldt.de/register/personen/detail.xql?normid=http://d-nb.info/gnd/']),
self::gnd => self::validateGndId($id, ['http://d-nb.info/gnd/', 'https://d-nb.info/gnd/']),
@ -458,7 +487,10 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
self::pleiades => self::validateNumericId($id, ['https://pleiades.stoa.org/places/']),
self::rkd => self::validateNumericId($id, ['http://rkd.nl/explore/artists/', 'https://rkd.nl/explore/artists/']),
self::ulan => self::validateNumericId($id, ['http://vocab.getty.edu/ulan/', 'http://vocab.getty.edu/page/ulan/', 'https://vocab.getty.edu/page/ulan/']),
self::viaf => self::validateNumericId($id, ['https://viaf.org/viaf/']),
self::viaf => self::validateNumericId($id, [
'https://viaf.org/viaf/',
'http://viaf.org/viaf/',
]),
self::wikidata => self::validateWikidataId($id),
self::wikipedia => str_replace('https://de.wikipedia.org/wiki/', '', $id),
};

View File

@ -22,29 +22,34 @@ final class MDNodaRepositoryTest extends TestCase {
public function testValidIdsValidate():void {
// GND (Germany)
self::assertNotFalse(MDNodaRepository::gnd->validateId("https://d-nb.info/gnd/102423008"));
self::assertNotFalse(MDNodaRepository::gnd->validateId("http://d-nb.info/gnd/102423008"));
self::assertNotFalse(MDNodaRepository::gnd->validateId("102423008"));
self::assertEquals("102423008", MDNodaRepository::gnd->validateId("https://d-nb.info/gnd/102423008"));
self::assertEquals("102423008", MDNodaRepository::gnd->validateId("http://d-nb.info/gnd/102423008"));
self::assertEquals("102423008", MDNodaRepository::gnd->validateId("102423008"));
// NDL (Japan)
self::assertNotFalse(MDNodaRepository::ndl->validateId("00967046"));
self::assertNotFalse(MDNodaRepository::ndl->validateId("https://id.ndl.go.jp/auth/ndlna/00967046"));
self::assertNotFalse(MDNodaRepository::ndl->validateId("http://id.ndl.go.jp/auth/ndlna/00967046"));
self::assertEquals("00967046", MDNodaRepository::ndl->validateId("00967046"));
self::assertEquals("00967046", MDNodaRepository::ndl->validateId("https://id.ndl.go.jp/auth/ndlna/00967046"));
self::assertEquals("00967046", MDNodaRepository::ndl->validateId("http://id.ndl.go.jp/auth/ndlna/00967046"));
// NPG: National Portrait Gallery
self::assertNotFalse(MDNodaRepository::npg->validateId("https://www.npg.org.uk/collections/search/person/mp01751"));
self::assertEquals('01751', MDNodaRepository::npg->validateId("https://www.npg.org.uk/collections/search/person/mp01751"));
// Library of Congress
self::assertNotFalse(MDNodaRepository::loc->validateId("https://id.loc.gov/authorities/names/n2022014604"));
self::assertNotFalse(MDNodaRepository::loc->validateId("http://id.loc.gov/authorities/names/n2022014604"));
self::assertNotFalse(MDNodaRepository::loc->validateId("n2022014604"));
self::assertEquals("n2022014604", MDNodaRepository::loc->validateId("https://id.loc.gov/authorities/names/n2022014604"));
self::assertEquals("n2022014604", MDNodaRepository::loc->validateId("http://id.loc.gov/authorities/names/n2022014604"));
self::assertEquals("n2022014604", MDNodaRepository::loc->validateId("n2022014604"));
self::assertNotFalse(MDNodaRepository::lcsh->validateId("https://id.loc.gov/authorities/names/sh2022014604"));
self::assertNotFalse(MDNodaRepository::lcsh->validateId("http://id.loc.gov/authorities/names/sh2022014604"));
self::assertNotFalse(MDNodaRepository::lcsh->validateId("sh2022014604"));
self::assertEquals("sh2022014604", MDNodaRepository::lcsh->validateId("https://id.loc.gov/authorities/names/sh2022014604"));
self::assertEquals("sh2022014604", MDNodaRepository::lcsh->validateId("http://id.loc.gov/authorities/names/sh2022014604"));
self::assertEquals("sh2022014604", MDNodaRepository::lcsh->validateId("sh2022014604"));
self::assertNotFalse(MDNodaRepository::bne->validateId("http://datos.bne.es/persona/XX5034943"));
self::assertNotFalse(MDNodaRepository::bne->validateId("XX5034943"));
self::assertEquals("XX5034943", MDNodaRepository::bne->validateId("http://datos.bne.es/persona/XX5034943"));
self::assertEquals("XX5034943", MDNodaRepository::bne->validateId("XX5034943"));
self::assertEquals("86145857811423020454", MDNodaRepository::viaf->validateId("86145857811423020454"));
self::assertEquals("2869150688328112660005", MDNodaRepository::viaf->validateId("2869150688328112660005"));
self::assertEquals("248941990", MDNodaRepository::viaf->validateId("https://viaf.org/viaf/248941990"));
self::assertEquals("248941990", MDNodaRepository::viaf->validateId("http://viaf.org/viaf/248941990"));
}
}