Handle large numbers in GND IDs
This commit is contained in:
parent
c57d180aa1
commit
8f32c30fbd
|
@ -69,6 +69,7 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
|
|||
'http://d-nb.info/gnd' => self::gnd,
|
||||
'http://d-nb.info/gnd/' => self::gnd,
|
||||
'd-nb.info' => self::gnd,
|
||||
'd-nb' => self::gnd,
|
||||
'https://portal.dnb.de' => self::gnd,
|
||||
'grobsystematik' => self::grobsystematik,
|
||||
'iconclass' => self::iconclass,
|
||||
|
@ -102,6 +103,9 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
|
|||
'WIKIPEDIA' => self::wikidata,
|
||||
'wikipedia' => self::wikipedia,
|
||||
'Wikipedia' => self::wikipedia,
|
||||
'de.wikipedia.org' => self::wikipedia,
|
||||
'en.wikipedia.org' => self::wikipedia,
|
||||
'fr.wikipedia.org' => self::wikipedia,
|
||||
default => throw new MDpageParameterNotFromListException("Unknown norm data repository: '" . $input . "'"),
|
||||
};
|
||||
|
||||
|
@ -258,6 +262,35 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* Extension of PHP's built-in is_numeric() that also supports large numbers.
|
||||
*
|
||||
* @param string $value Input to check.
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
private static function _is_numeric(string $value):bool {
|
||||
|
||||
// Strings starting with 0 are quite often linked, notably with the NDL.
|
||||
// PHP's FILTER_VALIDATE_INT does not accept a leading 0 however, so it
|
||||
// is stripped before checking.
|
||||
if (filter_var(ltrim($value, '0'), FILTER_VALIDATE_INT) !== false) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// FILTER_VALIDATE_INT fails on overly large IDs (e.g. VIAF IDs having
|
||||
// more than 20 digits).
|
||||
// In these cases, simply check for the existence of non-numeric characters.
|
||||
if (strlen($value) > 9) {
|
||||
if (empty(trim($value, '0123456789'))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates a numeric ID, returning a string or false.
|
||||
*
|
||||
|
@ -276,22 +309,9 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
|
|||
$id = strtr($id, $toRemove);
|
||||
}
|
||||
|
||||
// FILTER_VALIDATE_INT fails on overly large IDs (e.g. VIAF IDs having
|
||||
// more than 20 digits).
|
||||
// In these cases, simply check for the existence of non-numeric characters.
|
||||
if (strlen($id) > 9) {
|
||||
if (empty(trim($id, '0123456789'))) {
|
||||
return $id;
|
||||
}
|
||||
}
|
||||
|
||||
// Strings starting with 0 are quite often linked, notably with the NDL.
|
||||
// PHP's FILTER_VALIDATE_INT does not accept a leading 0 however, so it
|
||||
// is stripped before checking.
|
||||
if (filter_var(ltrim($id, '0'), FILTER_VALIDATE_INT) === false) {
|
||||
if (!self::_is_numeric($id)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return $id;
|
||||
|
||||
}
|
||||
|
@ -340,6 +360,10 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
|
|||
*/
|
||||
private static function validateGndId(string $id, array $prefixes):string|false {
|
||||
|
||||
if (str_contains($id, ' ')) {
|
||||
$id = strtr($id, [' ' => '', "\t" => '']);
|
||||
}
|
||||
|
||||
if (filter_var($id, FILTER_VALIDATE_URL) !== false) {
|
||||
$toRemove = [];
|
||||
foreach ($prefixes as $prefix) $toRemove[$prefix] = "";
|
||||
|
@ -350,7 +374,8 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
|
|||
if (preg_match("/^[0-9-X]*$/", $id) === false) {
|
||||
return false;
|
||||
}
|
||||
if (is_numeric(strtr($id, ['-' => '', 'X' => ''])) === false) {
|
||||
if (self::_is_numeric(strtr($id, ['-' => '', 'X' => ''])) === false) {
|
||||
throw new Exception($id);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -447,7 +472,7 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
|
|||
if (preg_match("/^[0-9-PIM]*$/", $id) === false) {
|
||||
return false;
|
||||
}
|
||||
if (is_numeric(substr($id, 3)) === false) {
|
||||
if (self::_is_numeric(substr($id, 3)) === false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -468,11 +493,11 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
|
|||
$id = strtr($id, ['https://www.npg.org.uk/collections/search/person/' => '']);
|
||||
}
|
||||
|
||||
if (substr($id, 0, 2) === 'mp' && is_numeric(substr($id, 2))) {
|
||||
if (substr($id, 0, 2) === 'mp' && self::_is_numeric(substr($id, 2))) {
|
||||
return $id;
|
||||
}
|
||||
|
||||
if (filter_var($id, FILTER_VALIDATE_INT) === false) {
|
||||
if (self::_is_numeric($id) === false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -490,11 +515,14 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
|
|||
private static function validateWikidataId(string $id):string|false {
|
||||
|
||||
if (filter_var($id, FILTER_VALIDATE_URL) !== false) {
|
||||
$id = strtr($id, ['https://www.wikidata.org/wiki/' => '']);
|
||||
$id = strtr($id, [
|
||||
'https://www.wikidata.org/wiki/' => '',
|
||||
'https://www.wikidata.org/w/index.php?search=&search=' => '',
|
||||
]);
|
||||
}
|
||||
|
||||
if (substr($id, 0, 1) !== 'Q') {
|
||||
throw new MDgenericInvalidInputsException("Wikidata IDs must be Q IDs - and start with that letter");
|
||||
throw new MDInvalidNodaLinkException("Wikidata IDs must be Q IDs - and start with that letter (provided: $id)");
|
||||
}
|
||||
|
||||
if (filter_var(substr($id, 1), FILTER_VALIDATE_INT) === false) {
|
||||
|
@ -537,10 +565,16 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
|
|||
$validation = strtr($id, [
|
||||
'http://de.wikipedia.org/wiki/' => '',
|
||||
'https://de.wikipedia.org/wiki/' => '',
|
||||
'http://da.wikipedia.org/wiki/' => '',
|
||||
'https://da.wikipedia.org/wiki/' => '',
|
||||
'http://en.wikipedia.org/wiki/' => '',
|
||||
'https://en.wikipedia.org/wiki/' => '',
|
||||
'http://es.wikipedia.org/wiki/' => '',
|
||||
'https://es.wikipedia.org/wiki/' => '',
|
||||
'http://fr.wikipedia.org/wiki/' => '',
|
||||
'https://fr.wikipedia.org/wiki/' => '',
|
||||
'http://hu.wikipedia.org/wiki/' => '',
|
||||
'https://hu.wikipedia.org/wiki/' => '',
|
||||
'http://id.wikipedia.org/wiki/' => '',
|
||||
'https://id.wikipedia.org/wiki/' => '',
|
||||
'http://it.wikipedia.org/wiki/' => '',
|
||||
|
@ -590,7 +624,7 @@ enum MDNodaRepository implements MDValueEnumInterface, JsonSerializable {
|
|||
self::mbl => self::validateNumericId($id, ['http://www.uni-magdeburg.de/mbl/PHP_Skripte/mbl_verwaltung/mbl_verw_anzeige_biog.php?auswahl=3&liste_biog_name=']),
|
||||
self::mindatorg => self::validateNumericId($id, ['https://www.mindat.org/min-', '.html']),
|
||||
self::moebeltypologie => self::validateNumericId($id, ['https://term.museum-digital.de/moebel/tag/']),
|
||||
self::ndb_adb => self::validateGndId($id, ['https://www.deutsche-biographie.de/pnd', '.html', '#adbcontent', '#ndbcontent']),
|
||||
self::ndb_adb => self::validateGndId($id, ['https://www.deutsche-biographie.de/pnd', '.html', '#adbcontent', '#ndbcontent', '#indexcontent']),
|
||||
self::ndl => self::validateNumericId($id, [
|
||||
'http://id.ndl.go.jp/auth/ndlna/',
|
||||
'https://id.ndl.go.jp/auth/ndlna/',
|
||||
|
|
|
@ -28,6 +28,7 @@ final class MDNodaRepositoryTest extends TestCase {
|
|||
self::assertEquals("102423008", MDNodaRepository::gnd->validateId("https://d-nb.info/gnd/102423008"));
|
||||
self::assertEquals("102423008", MDNodaRepository::gnd->validateId("http://d-nb.info/gnd/102423008"));
|
||||
self::assertEquals("102423008", MDNodaRepository::gnd->validateId("http://d-nb.info/gnd/ 102423008"));
|
||||
self::assertEquals("1037602218", MDNodaRepository::gnd->validateId("http://d-nb.info/gnd/1037602218"));
|
||||
self::assertEquals("102423008", MDNodaRepository::gnd->validateId("102423008"));
|
||||
|
||||
// NDL (Japan)
|
||||
|
|
Loading…
Reference in New Issue
Block a user