Fix overly strict validation for npg, ndl

This commit is contained in:
Joshua Ramon Enslin 2023-07-25 22:33:57 +02:00
parent 17f1e162f8
commit 0e86361968
Signed by: jrenslin
GPG Key ID: 46016F84501B70AE
2 changed files with 68 additions and 6 deletions

View File

@ -251,11 +251,16 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
if (filter_var($id, FILTER_VALIDATE_URL) !== false) {
$toRemove = [];
foreach ($prefixes as $prefix) $toRemove[$prefix] = "";
foreach ($prefixes as $prefix) {
$toRemove[$prefix] = "";
}
$id = strtr($id, $toRemove);
}
if (filter_var($id, FILTER_VALIDATE_INT) === false) {
// Strings starting with 0 are quite often linked, notably with the NDL.
// PHP's FILTER_VALIDATE_INT does not accept a leading 0 however, so it
// is stripped before checking.
if (filter_var(ltrim($id, '0'), FILTER_VALIDATE_INT) === false) {
return false;
}
@ -320,7 +325,10 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
private static function validateLocId(string $id):string|false {
if (filter_var($id, FILTER_VALIDATE_URL) !== false) {
$id = strtr($id, ['http://id.loc.gov/authorities/names/' => '']);
$id = strtr($id, [
'http://id.loc.gov/authorities/names/' => '',
'https://id.loc.gov/authorities/names/' => '',
]);
}
if (substr($id, 0, 1) !== 'n') {
@ -399,7 +407,7 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
self::aat => self::validateNumericId($id, ['https://vocab.getty.edu/page/aat/']),
self::ackerbau => self::validateNumericId($id, ['https://term.museum-digital.de/ackerbau/tag/']),
self::bne => self::validateNumericId($id, ['http://datos.bne.es/persona/']),
self::bnf => self::validateNumericId($id, ["https://catalogue.bnf.fr/ark:/12148/cb"]),
self::bnf => self::validateNumericId(rtrim($id, 't'), ["https://catalogue.bnf.fr/ark:/12148/cb"]), // cb11960399t is a valid entry, too (general)
self::cona => self::validateNumericId($id, ['https://vocab.getty.edu/page/cona/']),
self::editionhumboldtdigital => self::validateGndId($id, ['https://edition-humboldt.de/register/personen/detail.xql?normid=http://d-nb.info/gnd/']),
self::gnd => self::validateGndId($id, ['http://d-nb.info/gnd/', 'https://d-nb.info/gnd/']),
@ -411,11 +419,17 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
self::mindatorg => self::validateNumericId($id, ['https://www.mindat.org/min-', '.html']),
self::moebeltypologie => self::validateNumericId($id, ['https://term.museum-digital.de/moebel/tag/']),
self::ndb_adb => self::validateGndId($id, ['https://www.deutsche-biographie.de/pnd', '.html']),
self::ndl => self::validateNumericId($id, ['https://id.ndl.go.jp/auth/ndlna/']),
self::ndl => self::validateNumericId($id, [
'http://id.ndl.go.jp/auth/ndlna/',
'https://id.ndl.go.jp/auth/ndlna/',
]),
self::ndp_ikmk => self::validateNumericId($id, ['https://ikmk.smb.museum/ndp/land/']),
self::ndp_ikmk_persons => self::validateNumericId($id, ['https://ikmk.smb.museum/ndp/person/']),
self::nomisma => str_replace('http://nomisma.org/id/', '', $id),
self::npg => self::validateNumericId($id, ['https://www.npg.org.uk/collections/search/person/']),
self::npg => self::validateNumericId($id, [
'https://www.npg.org.uk/collections/search/person/',
'https://www.npg.org.uk/collections/search/person/mp',
]),
self::oberbegriffsdatei => self::validateNumericId($id, ['https://term.museum-digital.de/oberbegriffsdatei/tag/']),
self::orcid => preg_match('/^[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}$/', $id) ? $id : false,
self::osm => self::validateNumericId($id, ['https://www.openstreetmap.org/relation/']),

View File

@ -0,0 +1,48 @@
<?PHP
/**
* This script contains tests for the home page.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
declare(strict_types = 1);
use PHPUnit\Framework\TestCase;
require_once __DIR__ . '/../src/enums/MDValueEnumInterface.php';
require_once __DIR__ . '/../src/enums/MDNodaRepository.php';
/**
* Tests for home page.
*/
final class MDNodaRepositoryTest extends TestCase {
/**
* Tests that valid IDs actually validate as valid.
*
* @return void
*/
public function testValidIdsValidate():void {
// GND (Germany)
self::assertNotFalse(MDNodaRepository::gnd->validateId("https://d-nb.info/gnd/102423008"));
self::assertNotFalse(MDNodaRepository::gnd->validateId("http://d-nb.info/gnd/102423008"));
self::assertNotFalse(MDNodaRepository::gnd->validateId("102423008"));
// NDL (Japan)
self::assertNotFalse(MDNodaRepository::ndl->validateId("00967046"));
self::assertNotFalse(MDNodaRepository::ndl->validateId("https://id.ndl.go.jp/auth/ndlna/00967046"));
self::assertNotFalse(MDNodaRepository::ndl->validateId("http://id.ndl.go.jp/auth/ndlna/00967046"));
// NPG: National Portrait Gallery
self::assertNotFalse(MDNodaRepository::npg->validateId("https://www.npg.org.uk/collections/search/person/mp01751"));
// Library of Congress
self::assertNotFalse(MDNodaRepository::loc->validateId("https://id.loc.gov/authorities/names/n2022014604"));
self::assertNotFalse(MDNodaRepository::loc->validateId("http://id.loc.gov/authorities/names/n2022014604"));
self::assertNotFalse(MDNodaRepository::loc->validateId("n2022014604"));
self::assertNotFalse(MDNodaRepository::loc->validateId("https://id.loc.gov/authorities/names/n2022014604"));
self::assertNotFalse(MDNodaRepository::loc->validateId("http://id.loc.gov/authorities/names/n2022014604"));
self::assertNotFalse(MDNodaRepository::loc->validateId("n2022014604"));
}
}