Improve parsing of LOC / LCSH from Wikidata
This commit is contained in:
parent
0a18449e06
commit
9942c58b12
@ -22,27 +22,6 @@ final class NodaWikidataFetcher {
|
||||
|
||||
const LANGUAGES_TO_CAPITALIZE = ["cs", "da", "de", "en", "es", "fr", "fi", "id", "it", "nl", "pl", "pt", "ru", "sv", "tl", "tr"];
|
||||
|
||||
const URL_PREFIXES_PLACES_NODA_SOURCE = [
|
||||
"gnd" => "https://d-nb.info/gnd/",
|
||||
"nomisma" => "http://nomisma.org/id/",
|
||||
"osm" => "https://www.openstreetmap.org/relation/",
|
||||
"loc" => "http://id.loc.gov/authorities/names/",
|
||||
"cona" => "http://vocab.getty.edu/page/cona/",
|
||||
"aat" => "http://vocab.getty.edu/page/aat/",
|
||||
"iconclass" => "http://iconclass.org/rkd/",
|
||||
"lcsh" => "http://id.loc.gov/authorities/subjects/",
|
||||
"wikidata" => "https://www.wikidata.org/wiki/",
|
||||
"bne" => "http://datos.bne.es/persona/",
|
||||
"viaf" => "https://viaf.org/viaf/",
|
||||
"bnf" => "https://catalogue.bnf.fr/ark:/12148/cb",
|
||||
"ulan" => "http://vocab.getty.edu/page/ulan/",
|
||||
"rkd" => "https://rkd.nl/explore/artists/",
|
||||
"pim" => "https://opac-nevter.pim.hu/en/record/-/record/",
|
||||
"ndl" => "https://id.ndl.go.jp/auth/ndlna/",
|
||||
"npg" => "https://www.npg.org.uk/collections/search/person/",
|
||||
"orcid" => "https://orcid.org/",
|
||||
];
|
||||
|
||||
const P_IDS_NODA_TAGS = [
|
||||
'gnd' => 'P227',
|
||||
'lcsh' => 'P244',
|
||||
@ -112,6 +91,40 @@ final class NodaWikidataFetcher {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a vocabulary link to the library
|
||||
* of congress is a valid LCSH or LOC link or
|
||||
* something else completely.
|
||||
* This is necessary, since Wikidata only knows
|
||||
* one type of link to the LOC authority files,
|
||||
* while museum-digital knows two.
|
||||
*
|
||||
* @param string $url LOC ID to check.
|
||||
*
|
||||
* @return 'loc'|'lcsh'|''
|
||||
*/
|
||||
private function _determineLocRefMode(string $url):string {
|
||||
|
||||
try {
|
||||
if (MDNodaRepository::loc->validateId($url) !== false) {
|
||||
return 'loc';
|
||||
}
|
||||
}
|
||||
catch (MDgenericInvalidInputsException $e) {
|
||||
}
|
||||
|
||||
try {
|
||||
if (MDNodaRepository::lcsh->validateId($url) !== false) {
|
||||
return 'lcsh';
|
||||
}
|
||||
}
|
||||
catch (MDgenericInvalidInputsException $e) {
|
||||
}
|
||||
|
||||
return '';
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans basic tags off Wikidata input.
|
||||
*
|
||||
@ -959,11 +972,19 @@ final class NodaWikidataFetcher {
|
||||
new MDNodaLink(MDNodaRepository::wikidata, $wikidata_id)
|
||||
];
|
||||
foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) {
|
||||
if (!in_array($vocabName, MDNodaRepositoriesSet::REPOSITORIES_ACTOR, true)) continue;
|
||||
if ($vocabName === 'lcsh') continue;
|
||||
|
||||
if (isset($data['claims'][$pId])) {
|
||||
if (empty($data['claims'][$pId][0]['mainsnak']['datavalue'])) continue;
|
||||
$nodaLinks[] = new MDNodaLink(MDNodaRepository::fromString($vocabName), $data['claims'][$pId][0]['mainsnak']['datavalue']['value']);
|
||||
$url = $data['claims'][$pId][0]['mainsnak']['datavalue']['value'];
|
||||
|
||||
if ($vocabName === 'loc' || ($vocabName === 'lcsh')) {
|
||||
$vocabName = $this->_determineLocRefMode($url);
|
||||
if (empty($vocabName)) continue;
|
||||
}
|
||||
|
||||
if (!in_array($vocabName, MDNodaRepositoriesSet::REPOSITORIES_ACTOR, true)) continue;
|
||||
|
||||
$nodaLinks[] = new MDNodaLink(MDNodaRepository::fromString($vocabName), $url);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1258,11 +1279,18 @@ final class NodaWikidataFetcher {
|
||||
new MDNodaLink(MDNodaRepository::wikidata, $wikidata_id)
|
||||
];
|
||||
foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) {
|
||||
if (!in_array($vocabName, MDNodaRepositoriesSet::REPOSITORIES_PLACE, true)) continue;
|
||||
if ($vocabName === 'lcsh') continue;
|
||||
if (isset($data['claims'][$pId])) {
|
||||
if (empty($data['claims'][$pId][0]['mainsnak']['datavalue'])) continue;
|
||||
$nodaLinks[] = new MDNodaLink(MDNodaRepository::fromString($vocabName), $data['claims'][$pId][0]['mainsnak']['datavalue']['value']);
|
||||
$url = $data['claims'][$pId][0]['mainsnak']['datavalue']['value'];
|
||||
|
||||
if ($vocabName === 'loc' || ($vocabName === 'lcsh')) {
|
||||
$vocabName = $this->_determineLocRefMode($url);
|
||||
if (empty($vocabName)) continue;
|
||||
}
|
||||
|
||||
if (!in_array($vocabName, MDNodaRepositoriesSet::REPOSITORIES_PLACE, true)) continue;
|
||||
|
||||
$nodaLinks[] = new MDNodaLink(MDNodaRepository::fromString($vocabName), $url);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1274,7 +1302,7 @@ final class NodaWikidataFetcher {
|
||||
NodaBatchInserter::linkNodaForPlace($this->_mysqli_noda, $onum, $nodaLinks, $erfasst_von);
|
||||
|
||||
$this->_mysqli_noda->autocommit(false);
|
||||
if (!empty($tgn_id) and is_numeric($tgn_id)) {
|
||||
if (!empty($tgn_id)) {
|
||||
|
||||
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte`
|
||||
SET `ort_land` = ?
|
||||
@ -1285,7 +1313,7 @@ final class NodaWikidataFetcher {
|
||||
unset($updateStmt);
|
||||
|
||||
}
|
||||
if (!empty($geonames_id) and is_numeric($geonames_id)) {
|
||||
if (!empty($geonames_id)) {
|
||||
|
||||
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte`
|
||||
SET `ort_geonames` = ?
|
||||
@ -1512,11 +1540,19 @@ final class NodaWikidataFetcher {
|
||||
new MDNodaLink(MDNodaRepository::wikidata, $wikidata_id)
|
||||
];
|
||||
foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) {
|
||||
if (!in_array($vocabName, MDNodaRepositoriesSet::REPOSITORIES_TAG, true)) continue;
|
||||
if ($vocabName === 'loc') continue;
|
||||
if (isset($data['claims'][$pId])) {
|
||||
|
||||
if (empty($data['claims'][$pId][0]['mainsnak']['datavalue'])) continue;
|
||||
$nodaLinks[] = new MDNodaLink(MDNodaRepository::fromString($vocabName), $data['claims'][$pId][0]['mainsnak']['datavalue']['value']);
|
||||
$url = $data['claims'][$pId][0]['mainsnak']['datavalue']['value'];
|
||||
|
||||
if ($vocabName === 'loc' || ($vocabName === 'lcsh')) {
|
||||
$vocabName = $this->_determineLocRefMode($url);
|
||||
if (empty($vocabName)) continue;
|
||||
}
|
||||
|
||||
if (!in_array($vocabName, MDNodaRepositoriesSet::REPOSITORIES_TAG, true)) continue;
|
||||
|
||||
$nodaLinks[] = new MDNodaLink(MDNodaRepository::fromString($vocabName), $url);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user