Improve validation of noda repository links

This commit is contained in:
2023-08-03 17:20:03 +02:00
parent c082ca685f
commit 7fb7bb83c1
2 changed files with 60 additions and 23 deletions

View File

@ -260,6 +260,15 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
$id = strtr($id, $toRemove);
}
// FILTER_VALIDATE_INT fails on overly large IDs (e.g. VIAF IDs having
// more than 20 digits).
// In these cases, simply check for the existence of non-numeric characters.
if (strlen($id) > 9) {
if (empty(trim($id, '0123456789'))) {
return $id;
}
}
// Strings starting with 0 are quite often linked, notably with the NDL.
// PHP's FILTER_VALIDATE_INT does not accept a leading 0 however, so it
// is stripped before checking.
@ -349,15 +358,15 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
]);
}
if (substr($id, 0, 1) !== 'n') {
throw new MDgenericInvalidInputsException("LOC IDs must start with n");
if (in_array(substr($id, 0, 2), ['nr', 'nb', 'no'], true)) {
if (filter_var(trim(substr($id, 2), '0'), FILTER_VALIDATE_INT) === false) return false;
}
if (filter_var(substr($id, 1), FILTER_VALIDATE_INT) === false) {
return false;
else if (substr($id, 0, 1) === 'n') {
if (filter_var(trim(substr($id, 1), '0'), FILTER_VALIDATE_INT) === false) return false;
}
else throw new MDgenericInvalidInputsException("LOC IDs must start with n or nr or nb");
return $id;
return (string)$id;
}
@ -414,6 +423,26 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
}
/**
* Validates a BNF ID. BNF IDs are either fully numeric or end on a single non-numeric character.
*
* @param string $id ID to validate.
*
* @return string|false
*/
public static function validateBnfId(string $id):string|false {
if (!is_numeric(substr($id, -1))) {
$validation = self::validateNumericId(substr($id, 0, -1), ["https://catalogue.bnf.fr/ark:/12148/cb"]);
}
else $validation = self::validateNumericId($id, ["https://catalogue.bnf.fr/ark:/12148/cb"]);
if ($validation === false) return false;
return $id;
}
/**
* Validates an ID.
*
@ -428,7 +457,7 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
self::aat => self::validateNumericId($id, ['https://vocab.getty.edu/page/aat/']),
self::ackerbau => self::validateNumericId($id, ['https://term.museum-digital.de/ackerbau/tag/']),
self::bne => self::validateBneId($id, ['http://datos.bne.es/persona/']),
self::bnf => self::validateNumericId(rtrim($id, 't'), ["https://catalogue.bnf.fr/ark:/12148/cb"]), // cb11960399t is a valid entry, too (general)
self::bnf => self::validateBnfId($id),
self::cona => self::validateNumericId($id, ['https://vocab.getty.edu/page/cona/']),
self::editionhumboldtdigital => self::validateGndId($id, ['https://edition-humboldt.de/register/personen/detail.xql?normid=http://d-nb.info/gnd/']),
self::gnd => self::validateGndId($id, ['http://d-nb.info/gnd/', 'https://d-nb.info/gnd/']),
@ -458,7 +487,10 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
self::pleiades => self::validateNumericId($id, ['https://pleiades.stoa.org/places/']),
self::rkd => self::validateNumericId($id, ['http://rkd.nl/explore/artists/', 'https://rkd.nl/explore/artists/']),
self::ulan => self::validateNumericId($id, ['http://vocab.getty.edu/ulan/', 'http://vocab.getty.edu/page/ulan/', 'https://vocab.getty.edu/page/ulan/']),
self::viaf => self::validateNumericId($id, ['https://viaf.org/viaf/']),
self::viaf => self::validateNumericId($id, [
'https://viaf.org/viaf/',
'http://viaf.org/viaf/',
]),
self::wikidata => self::validateWikidataId($id),
self::wikipedia => str_replace('https://de.wikipedia.org/wiki/', '', $id),
};