Refactor parsing of norm data links from Wikidata into a dedicated

function
This commit is contained in:
Joshua Ramon Enslin 2024-10-03 15:03:38 +02:00
parent 96ba020514
commit 9b63a4d95d
Signed by: jrenslin
GPG Key ID: 46016F84501B70AE

View File

@ -91,6 +91,52 @@ final class NodaWikidataFetcher {
} }
/**
* Parses wikidata results to MDNodaLink entries.
*
* @param 'tag'|'persinst'|'place' $target Target vocabulary type.
* @param string $wikidata_id Wikidata ID.
* @param array<mixed> $data Wikidata result.
*
* @return list<MDNodaLink>
*/
public function _getNodaLinksFromWikidataResult(string $target, string $wikidata_id, array $data):array {
$linkableVocabularies = match($target) {
'tag' => MDNodaRepositoriesSet::REPOSITORIES_TAG,
'persinst' => MDNodaRepositoriesSet::REPOSITORIES_ACTOR,
'place' => MDNodaRepositoriesSet::REPOSITORIES_PLACE,
};
$output = [
new MDNodaLink(MDNodaRepository::wikidata, $wikidata_id)
];
foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) {
if (!isset($data['claims'][$pId])) {
continue;
}
if (empty($data['claims'][$pId][0]['mainsnak']['datavalue'])) {
continue;
}
$url = $data['claims'][$pId][0]['mainsnak']['datavalue']['value'];
if ($vocabName === 'loc' || ($vocabName === 'lcsh')) {
$vocabName = $this->_determineLocRefMode($url);
if (empty($vocabName)) continue;
}
if (!in_array($vocabName, $linkableVocabularies, true)) continue;
$output[] = new MDNodaLink(MDNodaRepository::fromString($vocabName), $url);
}
return $output;
}
/** /**
* Checks if a vocabulary link to the library * Checks if a vocabulary link to the library
* of congress is a valid LCSH or LOC link or * of congress is a valid LCSH or LOC link or
@ -963,29 +1009,9 @@ final class NodaWikidataFetcher {
// Get links to other norm data sources // Get links to other norm data sources
$nodaLinks = [ if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('persinst', $wikidata_id, $data))) {
new MDNodaLink(MDNodaRepository::wikidata, $wikidata_id)
];
foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) {
if (isset($data['claims'][$pId])) {
if (empty($data['claims'][$pId][0]['mainsnak']['datavalue'])) continue;
$url = $data['claims'][$pId][0]['mainsnak']['datavalue']['value'];
if ($vocabName === 'loc' || ($vocabName === 'lcsh')) {
$vocabName = $this->_determineLocRefMode($url);
if (empty($vocabName)) continue;
}
if (!in_array($vocabName, MDNodaRepositoriesSet::REPOSITORIES_ACTOR, true)) continue;
$nodaLinks[] = new MDNodaLink(MDNodaRepository::fromString($vocabName), $url);
}
}
// GET links to other noda entries.
NodaBatchInserter::linkNodaForPersinst($this->_mysqli_noda, $persinst_id, $nodaLinks, $erfasst_von); NodaBatchInserter::linkNodaForPersinst($this->_mysqli_noda, $persinst_id, $nodaLinks, $erfasst_von);
}
$this->getWikidataTranslationsForPersinst($data, $persinst_id); $this->getWikidataTranslationsForPersinst($data, $persinst_id);
@ -1264,23 +1290,8 @@ final class NodaWikidataFetcher {
if (isset($data['claims']['P1566'])) $geonames_id = filter_var($data['claims']['P1566'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT); if (isset($data['claims']['P1566'])) $geonames_id = filter_var($data['claims']['P1566'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT);
if (isset($data['claims']['P1667'])) $tgn_id = filter_var($data['claims']['P1667'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT); if (isset($data['claims']['P1667'])) $tgn_id = filter_var($data['claims']['P1667'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT);
$nodaLinks = [ if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('place', $wikidata_id, $data))) {
new MDNodaLink(MDNodaRepository::wikidata, $wikidata_id) NodaBatchInserter::linkNodaForPlace($this->_mysqli_noda, $onum, $nodaLinks, $erfasst_von);
];
foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) {
if (isset($data['claims'][$pId])) {
if (empty($data['claims'][$pId][0]['mainsnak']['datavalue'])) continue;
$url = $data['claims'][$pId][0]['mainsnak']['datavalue']['value'];
if ($vocabName === 'loc' || ($vocabName === 'lcsh')) {
$vocabName = $this->_determineLocRefMode($url);
if (empty($vocabName)) continue;
}
if (!in_array($vocabName, MDNodaRepositoriesSet::REPOSITORIES_PLACE, true)) continue;
$nodaLinks[] = new MDNodaLink(MDNodaRepository::fromString($vocabName), $url);
}
} }
if (isset($data['claims']['P625'])) { if (isset($data['claims']['P625'])) {
@ -1288,8 +1299,6 @@ final class NodaWikidataFetcher {
$longitude_wd = \filter_var($data['claims']['P625'][0]['mainsnak']['datavalue']['value']['longitude'], FILTER_VALIDATE_FLOAT); $longitude_wd = \filter_var($data['claims']['P625'][0]['mainsnak']['datavalue']['value']['longitude'], FILTER_VALIDATE_FLOAT);
} }
NodaBatchInserter::linkNodaForPlace($this->_mysqli_noda, $onum, $nodaLinks, $erfasst_von);
$this->_mysqli_noda->autocommit(false); $this->_mysqli_noda->autocommit(false);
if (!empty($tgn_id)) { if (!empty($tgn_id)) {
@ -1521,27 +1530,9 @@ final class NodaWikidataFetcher {
} }
$nodaLinks = [ if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('tag', $wikidata_id, $data))) {
new MDNodaLink(MDNodaRepository::wikidata, $wikidata_id)
];
foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) {
if (isset($data['claims'][$pId])) {
if (empty($data['claims'][$pId][0]['mainsnak']['datavalue'])) continue;
$url = $data['claims'][$pId][0]['mainsnak']['datavalue']['value'];
if ($vocabName === 'loc' || ($vocabName === 'lcsh')) {
$vocabName = $this->_determineLocRefMode($url);
if (empty($vocabName)) continue;
}
if (!in_array($vocabName, MDNodaRepositoriesSet::REPOSITORIES_TAG, true)) continue;
$nodaLinks[] = new MDNodaLink(MDNodaRepository::fromString($vocabName), $url);
}
}
NodaBatchInserter::linkNodaForTag($this->_mysqli_noda, $tag_id, $nodaLinks, $erfasst_von); NodaBatchInserter::linkNodaForTag($this->_mysqli_noda, $tag_id, $nodaLinks, $erfasst_von);
}
// Get translations // Get translations
if (!empty($data)) $this->getWikidataTranslationsForTag($data, $tag_id); if (!empty($data)) $this->getWikidataTranslationsForTag($data, $tag_id);