Add new functions for linking norm data repositories in batch and use

them in Wikidata fetcher
This commit is contained in:
Joshua Ramon Enslin 2023-08-29 17:32:22 +02:00
parent f27d0900ae
commit 67f7bf9fab
Signed by: jrenslin
GPG Key ID: 46016F84501B70AE
2 changed files with 145 additions and 132 deletions

View File

@ -115,4 +115,118 @@ final class NodaBatchInserter {
$mysqli_noda->autocommit(true); $mysqli_noda->autocommit(true);
} }
/**
* Adds links to norm data repositories for an actor.
*
* @param MDMysqli $mysqli_noda DB connection.
* @param integer $persinst_id Actor ID.
* @param non-empty-array<MDNodaLink> $noda_links Entries to link.
* @param string $user_name Name of the current user.
*
* @return void
*/
public static function linkNodaForPersinst(MDMysqli $mysqli_noda, int $persinst_id, array $noda_links, string $user_name):void {
$mysqli_noda->autocommit(false);
$insertNodaLinkStmt = $mysqli_noda->do_prepare("INSERT INTO `noda`
(`persinst_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`)
VALUES
(?, ?, ?, ?, ?)
ON DUPLICATE KEY UPDATE `noda_erfasst_am` = NOW()");
foreach ($noda_links as $nodaLink) {
$noda_source = $nodaLink->source->toDbName();
$noda_id = $nodaLink->id;
$noda_link_url = $nodaLink->getEntityLink();
$insertNodaLinkStmt->bind_param("issss", $persinst_id, $noda_source, $noda_id, $noda_link_url, $user_name);
$insertNodaLinkStmt->execute();
}
$insertNodaLinkStmt->close();
unset($insertNodaLinkStmt);
$mysqli_noda->commit();
$mysqli_noda->autocommit(true);
}
/**
* Adds links to norm data repositories for a place.
*
* @param MDMysqli $mysqli_noda DB connection.
* @param integer $ort_id Place ID.
* @param non-empty-array<MDNodaLink> $noda_links Entries to link.
* @param string $user_name Name of the current user.
*
* @return void
*/
public static function linkNodaForPlace(MDMysqli $mysqli_noda, int $ort_id, array $noda_links, string $user_name):void {
$mysqli_noda->autocommit(false);
$insertNodaLinkStmt = $mysqli_noda->do_prepare("INSERT INTO `noda_orte`
(`ort_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`)
VALUES
(?, ?, ?, ?, ?)
ON DUPLICATE KEY UPDATE `noda_erfasst_am` = NOW()");
foreach ($noda_links as $nodaLink) {
$noda_source = $nodaLink->source->toDbName();
$noda_id = $nodaLink->id;
$noda_link_url = $nodaLink->getEntityLink();
$insertNodaLinkStmt->bind_param("issss", $ort_id, $noda_source, $noda_id, $noda_link_url, $user_name);
$insertNodaLinkStmt->execute();
}
$insertNodaLinkStmt->close();
unset($insertNodaLinkStmt);
$mysqli_noda->commit();
$mysqli_noda->autocommit(true);
}
/**
* Adds links to norm data repositories for a tag.
*
* @param MDMysqli $mysqli_noda DB connection.
* @param integer $tag_id Tag ID.
* @param non-empty-array<MDNodaLink> $noda_links Entries to link.
* @param string $user_name Name of the current user.
*
* @return void
*/
public static function linkNodaForTag(MDMysqli $mysqli_noda, int $tag_id, array $noda_links, string $user_name):void {
$mysqli_noda->autocommit(false);
$insertNodaLinkStmt = $mysqli_noda->do_prepare("INSERT INTO `noda_tag`
(`tag_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`)
VALUES
(?, ?, ?, ?, ?)
ON DUPLICATE KEY UPDATE `noda_erfasst_am` = NOW()");
foreach ($noda_links as $nodaLink) {
$noda_source = $nodaLink->source->toDbName();
$noda_id = $nodaLink->id;
$noda_link_url = $nodaLink->getEntityLink();
$insertNodaLinkStmt->bind_param("issss", $tag_id, $noda_source, $noda_id, $noda_link_url, $user_name);
$insertNodaLinkStmt->execute();
}
$insertNodaLinkStmt->close();
unset($insertNodaLinkStmt);
$mysqli_noda->commit();
$mysqli_noda->autocommit(true);
}
} }

View File

@ -707,14 +707,9 @@ final class NodaWikidataFetcher {
// Set link to Wikipedia in noda table // Set link to Wikipedia in noda table
$insertNodaStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda` NodaBatchInserter::linkNodaForPersinst($this->_mysqli_noda, $persinst_id, [
(`persinst_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`) new MDNodaLink(MDNodaRepository::wikipedia, $wikilink)
VALUES ], $erfasst_von);
(?, 'Wikipedia', '', ?, ?)
ON DUPLICATE KEY UPDATE `noda_link` = ?");
$insertNodaStmt->bind_param("isss", $persinst_id, $wikilink, $erfasst_von, $wikilink);
$insertNodaStmt->execute();
$insertNodaStmt->close();
// Update edit metadata // Update edit metadata
$updatePersinstEditInfoStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst` $updatePersinstEditInfoStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst`
@ -890,44 +885,20 @@ final class NodaWikidataFetcher {
// Get links to other norm data sources // Get links to other norm data sources
$nodaLinks = [ $nodaLinks = [
"wikidata" => $wikidata_id, new MDNodaLink(MDNodaRepository::wikidata, $wikidata_id)
]; ];
foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) { foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) {
if (!in_array($vocabName, MDNodaRepositoriesSet::REPOSITORIES_ACTOR, true)) continue; if (!in_array($vocabName, MDNodaRepositoriesSet::REPOSITORIES_ACTOR, true)) continue;
if ($vocabName === 'lcsh') continue; if ($vocabName === 'lcsh') continue;
if (isset($data['claims'][$pId])) $nodaLinks[$vocabName] = $data['claims'][$pId][0]['mainsnak']['datavalue']['value']; if (isset($data['claims'][$pId])) {
if (empty($data['claims'][$pId][0]['mainsnak']['datavalue'])) continue;
$nodaLinks[] = new MDNodaLink(MDNodaRepository::fromString($vocabName), $data['claims'][$pId][0]['mainsnak']['datavalue']['value']);
}
} }
// GET links to other noda entries. // GET links to other noda entries.
$insertNodaLinkStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda` NodaBatchInserter::linkNodaForPersinst($this->_mysqli_noda, $persinst_id, $nodaLinks, $erfasst_von);
(`persinst_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`)
VALUES
(?, ?, ?, ?, ?)
ON DUPLICATE KEY UPDATE `noda_nrinsource` = ?,
`noda_link` = ?,
`noda_erfasst_von` = ?");
$this->_mysqli_noda->autocommit(false);
foreach ($nodaLinks as $noda_source => $nodaId) {
if ($nodaId === null) continue;
/*
if (!isset(self::URL_PREFIXES_PLACES_NODA_SOURCE[$noda_source])) {
throw new Exception("Unknown noda link: " . $noda_source);
}
*/
$noda_link_url = self::URL_PREFIXES_PLACES_NODA_SOURCE[$noda_source] . $nodaId;
$insertNodaLinkStmt->bind_param("isssssss", $persinst_id, $noda_source, $nodaId, $noda_link_url, $erfasst_von, $nodaId, $noda_link_url, $noda_link_url);
$insertNodaLinkStmt->execute();
}
$this->_mysqli_noda->commit();
$this->_mysqli_noda->autocommit(true);
$insertNodaLinkStmt->close();
$this->getWikidataTranslationsForPersinst($data, $persinst_id); $this->getWikidataTranslationsForPersinst($data, $persinst_id);
@ -1041,15 +1012,9 @@ final class NodaWikidataFetcher {
// Write link to wikipedia to relevant noda DB table // Write link to wikipedia to relevant noda DB table
$insertWikiStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda_orte` NodaBatchInserter::linkNodaForPlace($this->_mysqli_noda, $placeID, [
(`ort_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_am`, `noda_erfasst_von`) new MDNodaLink(MDNodaRepository::wikipedia, $wikilink[$lang])
VALUES ], $erfasst_von);
(?, 'Wikipedia', '', ?, NOW(), ?)
ON DUPLICATE KEY UPDATE `noda_link` = ?");
$insertWikiStmt->bind_param("isss", $placeID, $wikilink[$lang], $erfasst_von, $wikilink[$lang]);
$insertWikiStmt->execute();
$insertWikiStmt->close();
unset($insertWikiStmt);
return true; return true;
@ -1178,12 +1143,15 @@ final class NodaWikidataFetcher {
if (isset($data['claims']['P1667'])) $tgn_id = filter_var($data['claims']['P1667'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT); if (isset($data['claims']['P1667'])) $tgn_id = filter_var($data['claims']['P1667'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT);
$nodaLinks = [ $nodaLinks = [
"wikidata" => $wikidata_id, new MDNodaLink(MDNodaRepository::wikidata, $wikidata_id)
]; ];
foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) { foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) {
if (!in_array($vocabName, MDNodaRepositoriesSet::REPOSITORIES_PLACE, true)) continue; if (!in_array($vocabName, MDNodaRepositoriesSet::REPOSITORIES_PLACE, true)) continue;
if ($vocabName === 'lcsh') continue; if ($vocabName === 'lcsh') continue;
if (isset($data['claims'][$pId])) $nodaLinks[$vocabName] = $data['claims'][$pId][0]['mainsnak']['datavalue']['value']; if (isset($data['claims'][$pId])) {
if (empty($data['claims'][$pId][0]['mainsnak']['datavalue'])) continue;
$nodaLinks[] = new MDNodaLink(MDNodaRepository::fromString($vocabName), $data['claims'][$pId][0]['mainsnak']['datavalue']['value']);
}
} }
if (isset($data['claims']['P625'])) { if (isset($data['claims']['P625'])) {
@ -1191,31 +1159,9 @@ final class NodaWikidataFetcher {
$longitude_wd = $data['claims']['P625'][0]['mainsnak']['datavalue']['value']['longitude']; $longitude_wd = $data['claims']['P625'][0]['mainsnak']['datavalue']['value']['longitude'];
} }
NodaBatchInserter::linkNodaForPlace($this->_mysqli_noda, $onum, $nodaLinks, $erfasst_von);
$this->_mysqli_noda->autocommit(false); $this->_mysqli_noda->autocommit(false);
$insertNodaLinkStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda_orte`
(`ort_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`)
VALUES
(?, ?, ?, ?, ?)
ON DUPLICATE KEY UPDATE `noda_nrinsource` = ?,
`noda_link` = ?");
foreach ($nodaLinks as $noda_source => $nodaId) {
/*
if (!isset(self::URL_PREFIXES_PLACES_NODA_SOURCE[$noda_source])) {
throw new Exception("Unknown noda link: " . $noda_source);
}
*/
$noda_link_url = self::URL_PREFIXES_PLACES_NODA_SOURCE[$noda_source] . $nodaId;
$insertNodaLinkStmt->bind_param("issssss", $onum, $noda_source, $nodaId, $noda_link_url, $erfasst_von, $nodaId, $noda_link_url);
$insertNodaLinkStmt->execute();
}
$insertNodaLinkStmt->close();
unset($insertNodaLinkStmt);
if (!empty($tgn_id)) { if (!empty($tgn_id)) {
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte` $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte`
@ -1383,16 +1329,12 @@ final class NodaWikidataFetcher {
$updateTagDescStmt->close(); $updateTagDescStmt->close();
$insertNodaTagStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda_tag` $this->_mysqli_noda->commit();
(`tag_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`) $this->_mysqli_noda->autocommit(true);
VALUES
(?, 'Wikipedia', '', ?, ?)
ON DUPLICATE KEY UPDATE `noda_link` = ?");
$insertNodaTagStmt->bind_param("isss", $tag_id, $wikilink, $erfasst_von, $wikilink);
$insertNodaTagStmt->execute();
$insertNodaTagStmt->close();
$output = true; NodaBatchInserter::linkNodaForTag($this->_mysqli_noda, $tag_id, [
new MDNodaLink(MDNodaRepository::wikipedia, $wikilink)
], $erfasst_von);
// Update tag editing metadata // Update tag editing metadata
$updateTagEditInfoStmt = $this->_mysqli_noda->do_prepare("UPDATE `tag` $updateTagEditInfoStmt = $this->_mysqli_noda->do_prepare("UPDATE `tag`
@ -1403,53 +1345,7 @@ final class NodaWikidataFetcher {
$updateTagEditInfoStmt->execute(); $updateTagEditInfoStmt->execute();
$updateTagEditInfoStmt->close(); $updateTagEditInfoStmt->close();
$this->_mysqli_noda->commit(); return true;
$this->_mysqli_noda->autocommit(true);
return $output;
}
/**
* Writes relations to norm data sources to DB.
*
* @param array<string> $nodaLinks Links to other noda sources.
* @param integer $tag_id Tag ID.
* @param string $erfasst_von Name of the user to edit this.
*
* @return void
*/
public function writeNodaLinksTag(array $nodaLinks, int $tag_id, string $erfasst_von):void {
$this->_mysqli_noda->autocommit(false);
$insertNodaTagStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda_tag`
(`tag_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`)
VALUES
(?, ?, ?, ?, ?)
ON DUPLICATE KEY UPDATE
`noda_nrinsource` = ?,
`noda_link` = ?,
`noda_erfasst_von` = ?");
foreach ($nodaLinks as $vocabName => $nodaId) {
if (empty(self::URL_PREFIXES_PLACES_NODA_SOURCE[$vocabName])) {
throw new Exception("Unknown URL prefix for: " . $vocabName);
}
if (empty($nodaId)) continue;
$noda_link = self::URL_PREFIXES_PLACES_NODA_SOURCE[$vocabName] . $nodaId;
$insertNodaTagStmt->bind_param("isssssss", $tag_id, $vocabName, $nodaId, $noda_link, $erfasst_von, $nodaId, $noda_link, $erfasst_von);
$insertNodaTagStmt->execute();
}
$insertNodaTagStmt->close();
$this->_mysqli_noda->commit();
$this->_mysqli_noda->autocommit(true);
} }
@ -1511,15 +1407,18 @@ final class NodaWikidataFetcher {
} }
$nodaLinks = [ $nodaLinks = [
"wikidata" => $wikidata_id, new MDNodaLink(MDNodaRepository::wikidata, $wikidata_id)
]; ];
foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) { foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) {
if (!in_array($vocabName, MDNodaRepositoriesSet::REPOSITORIES_TAG, true)) continue; if (!in_array($vocabName, MDNodaRepositoriesSet::REPOSITORIES_TAG, true)) continue;
if ($vocabName === 'loc') continue; if ($vocabName === 'loc') continue;
if (isset($data['claims'][$pId])) $nodaLinks[$vocabName] = $data['claims'][$pId][0]['mainsnak']['datavalue']['value']; if (isset($data['claims'][$pId])) {
if (empty($data['claims'][$pId][0]['mainsnak']['datavalue'])) continue;
$nodaLinks[] = new MDNodaLink(MDNodaRepository::fromString($vocabName), $data['claims'][$pId][0]['mainsnak']['datavalue']['value']);
}
} }
$this->writeNodaLinksTag($nodaLinks, $tag_id, $erfasst_von); NodaBatchInserter::linkNodaForTag($this->_mysqli_noda, $tag_id, $nodaLinks, $erfasst_von);
// Get translations // Get translations
if (!empty($data)) $this->getWikidataTranslationsForTag($data, $tag_id); if (!empty($data)) $this->getWikidataTranslationsForTag($data, $tag_id);