MDNodaHelpers/src/NodaWikidataFetcher.php

1918 lines
74 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?PHP
/**
* This file contains tools for fetching data from Wikidata.
*
* @file
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
declare(strict_types = 1);
/**
* Helps fetching information from Wikidata.
*/
final class NodaWikidataFetcher {
const WIKIDATA_FETCH_HEADERS = [
'User-Agent: museum-digital-bot GND-to-Wikidata PHP/' . PHP_VERSION,
'Accept: application/sparql-results+json',
];
const LANGUAGES_MAIN_DESC = ['de', 'da', 'en', 'es', 'fr', 'hu', 'it', 'jp', 'nl', 'pt', 'ru', 'sv', 'zh'];
const LANGUAGES_TO_CHECK = ['ar', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sw', 'ta', 'th', 'tl', 'tr', 'ur', 'vi', 'zh'];
const LANGUAGES_TO_CAPITALIZE = ["cs", "da", "de", "en", "es", "fr", "fi", "id", "it", "nl", "pl", "pt", "ru", "sv", "tl", "tr"];
const URL_PREFIXES_PLACES_NODA_SOURCE = [
"gnd" => "https://d-nb.info/gnd/",
"nomisma" => "http://nomisma.org/id/",
"osm" => "https://www.openstreetmap.org/relation/",
"loc" => "http://id.loc.gov/authorities/names/",
"cona" => "http://vocab.getty.edu/page/cona/",
"aat" => "http://vocab.getty.edu/page/aat/",
"lcsh" => "http://id.loc.gov/authorities/subjects/",
"wikidata" => "https://www.wikidata.org/wiki/",
"bne" => "http://datos.bne.es/persona/",
"viaf" => "https://viaf.org/viaf/",
"bnf" => "https://catalogue.bnf.fr/ark:/12148/cb",
"ulan" => "http://vocab.getty.edu/page/ulan/",
"rkd" => "https://rkd.nl/explore/artists/",
"pim" => "https://opac-nevter.pim.hu/en/record/-/record/",
"ndl" => "https://id.ndl.go.jp/auth/ndlna/",
"npg" => "https://www.npg.org.uk/collections/search/person/",
"orcid" => "https://orcid.org/",
];
const P_IDS_NODA_TAGS = [
'gnd' => 'P227',
'lcsh' => 'P244',
'aat' => 'P1014',
'iconclass' => 'P1256',
'osm' => 'P402',
'loc' => 'P244',
'nomisma' => 'P2950',
'cona' => 'P1669',
"rkd" => "P650",
"ulan" => "P245",
"viaf" => "P214",
"bnf" => "P268",
"pim" => "P3973",
"ndl" => "P349", // National Diet Library (Japan)
"npg" => "P1816", // "National" portrait gallery
"bne" => "P950", // Espana National Library
"orcid" => "P496",
];
const WIKIPEDIA_REMOVE_LITERALS = [
"<p>Si vous disposez d'ouvrages ou d'articles de référence ou si vous ",
'<p><b>En pratique&#160;:</b> <a href="/wiki/Wikip%C3%A9dia:Citez_vos_sources#Qualité_des_sources" title="Wikipédia:Citez vos sources">Quelles sources sont attendu',
'<pVous pouvez partager vos connaissances en laméliorant (',
'<p class="mw-empty-elt">',
'<p><small>Géolocalisation sur la carte',
'<p><b>Koordinaatit:</b>',
'<p><span class="executeJS" data-gadgetname="ImgToggle"></span',
'<p><span class="imgtoggleboxTitle">',
//'<div class="mw-parser-output"><p>',
'<p><span style="font-size: small;"><span id="coordinates">',
'<p><span></span></p>',
'<p><a rel="nofollow" class="external text" href="https://maps.gs',
'<p><span class="plainlinks nourlexpansion"><a class="external text" href="//tools.wmflabs.org/geohack/geohack.php?langu',
'<p><span style="display:none">',
'<p>&#32;</p>',
'<p><span class="geo noexcerpt"',
];
/** @var MDMysqli */
private MDMysqli $_mysqli_noda;
/**
* Attempts to fetch a Wikidata ID from a provided URL.
*
* @param non-empty-string $linkUrl Link to a page.
*
* @return string
*/
public static function getWikidataIdFromLink(string $linkUrl):string {
if (!filter_var($linkUrl, FILTER_VALIDATE_URL)) {
throw new MDExpectedException("Invalid URL");
}
if (strpos($linkUrl, "https://www.wikidata.org/wiki/") !== false) {
if ($output = self::getWikidataIdFromWikidataLink($linkUrl)) {
return $output;
}
}
if (strpos($linkUrl, ".wikipedia.org/") !== false) {
if ($output = self::getWikidataIdFromWikipedia($linkUrl)) {
return $output;
}
}
return '';
}
/**
* Attempts to fetch a Wikidata ID from a provided URL.
*
* @param string $linkUrl Link to a Wikidata page.
*
* @return string
*/
public static function getWikidataIdFromWikidataLink(string $linkUrl):string {
if (strpos($linkUrl, "https://www.wikidata.org/wiki/") === false) {
return '';
}
$linkUrl = trim($linkUrl, '/ ');
$parts = explode('/', $linkUrl);
return end($parts);
}
/**
* Attempts to fetch a Wikidata ID from a provided URL.
*
* @param non-empty-string $linkUrl Link to a Wikipedia page.
*
* @return string
*/
public static function getWikidataIdFromWikipedia(string $linkUrl):string {
if (strpos($linkUrl, ".wikipedia.org/") === false) {
return '';
}
if (empty($wikipedia_cont = MD_STD::runCurl($linkUrl))) return '';
libxml_use_internal_errors(true);
$doc = new DOMDocument();
if (!$doc->loadHTML($wikipedia_cont)) {
return '';
}
libxml_use_internal_errors(false);
if (!($wikidataLinkLi = $doc->getElementById("t-wikibase"))) {
return '';
}
if (!($wikidataLink = $wikidataLinkLi->firstChild)) {
return '';
}
if (!($t_wikibase_href = $wikidataLink->getAttribute('href'))) {
return '';
}
$t_wikibase = (string)$t_wikibase_href;
// if (!empty($t_wikibase)) {
if (($wikidata_id_end = strrpos($t_wikibase, '/')) !== false) {
$wikidata_id = trim(substr($t_wikibase, $wikidata_id_end + 1), '/');
if (substr($wikidata_id, 0, 1) === 'Q') {
return $wikidata_id;
}
}
// }
return '';
}
/**
* Runs a SPARQL query against the Wikidata SPARQL endpoint.
*
* @param string $sparqlQuery Query string.
*
* @return array<mixed>
*/
public static function sparqlQuery(string $sparqlQuery):array {
$url = 'https://query.wikidata.org/sparql?query=' . urlencode($sparqlQuery);
$result = MD_STD::runCurl($url, 100000000, self::WIKIDATA_FETCH_HEADERS);
return json_decode($result, true);
}
/**
* Formulates a SPARQL query string for fetching from Wikidata based on an external ID.
*
* @param string $repoName Name of the repository.
* @param string $externalId ID in the external repository.
* @param string $repoPId Optional P-ID of the external repository. Needed for
* Geonames and TGN, obsolete otherwise.
*
* @return string
*/
public static function formulateWikidataQueryByExtId(string $repoName, string $externalId, string $repoPId = ''):string {
if (empty($repoPId)) {
if (empty(NodaWikidataFetcher::P_IDS_NODA_TAGS[$repoName])) {
throw new MDmainEntityNotExistentException("Unknown external repository. The following repositories are known with their Wikidata ID: " . implode(', ', array_keys(NodaWikidataFetcher::P_IDS_NODA_TAGS)));
}
$repoPId = NodaWikidataFetcher::P_IDS_NODA_TAGS[$repoName];
}
$sparqlQueryString = 'SELECT ?id ?idLabel WHERE {
?id wdt:' . $repoPId . ' "' . $externalId . '".
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
}
}';
return $sparqlQueryString;
}
/**
* Gets the Wikidata ID based on a result from Wikidata's SPARQL endpoint.
*
* @param array<mixed> $queryResult Query result.
*
* @return string
*/
public static function readWikidataIdFromSparqlResult(array $queryResult):string {
if (count($queryResult['results']['bindings']) !== 1) return '';
if (!empty($wikidataLink = $queryResult['results']['bindings'][0]['id']['value'])) {
if (($endSlashPos = strrpos($wikidataLink, '/')) !== false) {
return substr($wikidataLink, $endSlashPos + 1);
}
}
return '';
}
/**
* Queries Wikidata by an external repository's ID and returns the matching Q-ID
* if there is any.
*
* @param string $repoName Name of the repository.
* @param string $externalId ID in the external repository.
* @param string $repoPId Optional P-ID of the external repository. Needed for
* Geonames and TGN, obsolete otherwise.
*
* @return string
*/
public static function getWikidataIdByExternalId(string $repoName, string $externalId, string $repoPId = ''):string {
$sparqlQueryString = self::formulateWikidataQueryByExtId($repoName, $externalId, $repoPId = '');
$queryResult = self::sparqlQuery($sparqlQueryString);
return self::readWikidataIdFromSparqlResult($queryResult);
}
/**
* Gets translation source Wikipedia pages from Wikidata.
*
* @param array<string> $checkagainstLanguage The language to check against.
* @param array<mixed> $data Data fetched from Wikidata.
*
* @return array{0: array<string, non-empty-string>, 1: array<string, string>}
*/
public static function getWikidataWikipediaTranslationSources(array $checkagainstLanguage, array $data) {
$languagesToFetch = $wikilinks = [];
foreach ($checkagainstLanguage as $lang) {
if (empty($data['labels'][$lang])) {
continue;
}
if (!empty($data['sitelinks'][$lang . 'wiki'])) {
$wikilink = $data['sitelinks'][$lang . 'wiki']['url'];
$wikilinkterm = str_replace(' ', '_', $data['sitelinks'][$lang . 'wiki']['title']);
if (isset($wikilink)) {
$languagesToFetch[$lang] = "https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm) . "&prop=text&section=0&format=json";
$wikilinks[$lang] = $wikilink;
}
}
}
return [$languagesToFetch, $wikilinks];
}
/**
* Cleans contents parsed from Wikipedia.
*
* @param string $input Input string.
*
* @return string
*/
private static function _cleanWikidataInput(string $input):string {
if (substr($input, 0, strlen('<')) === '<') {
$doc = new DOMDocument();
$doc->loadXML($input);
$list = $doc->getElementsByTagName("style");
while ($list->length > 0) {
$p = $list->item(0);
$p->parentNode->removeChild($p);
}
$list = $doc->getElementsByTagName("table");
while ($list->length > 0) {
$p = $list->item(0);
$p->parentNode->removeChild($p);
}
$list = $doc->getElementsByTagName("div");
while ($list->length > 1) {
$p = $list->item(1);
$p->parentNode->removeChild($p);
}
$list = $doc->getElementsByTagName("ol");
while ($list->length > 0) {
$p = $list->item(0);
$p->parentNode->removeChild($p);
}
$firstP = $doc->getElementsByTagName("p")->item(0);
if (strpos($doc->saveHTML($firstP), 'geohack') !== false) {
$firstP->parentNode->removeChild($firstP);
}
/*
if (strpos($doc->saveHTML(), 'Coordinates:') !== false) {
echo $doc->saveHTML();
exit;
}
*/
$input = str_replace(PHP_EOL, PHP_EOL . PHP_EOL, trim($doc->textContent));
if (mb_strlen($input) > 600) {
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
$input = substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600));
}
}
$bracketsToRemove = [];
for ($i = 0; $i < 100; $i++) {
$bracketsToRemove["[$i]"] = "";
}
$input = strtr($input, $bracketsToRemove);
$input = str_replace("\t", " ", $input);
// Remove newlines with ensuing spaces
while (strpos($input, PHP_EOL . " ") !== false) {
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
}
// Remove double newlines
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
}
return $input;
}
$input = str_replace(PHP_EOL, '', $input);
foreach (self::WIKIPEDIA_REMOVE_LITERALS as $tToRemove) $input = str_replace($tToRemove, "", $input);
$first_mention_of_paragraph = strpos($input, '<p>');
if ($first_mention_of_paragraph !== false) $input = substr($input, $first_mention_of_paragraph, (strrpos($input, '</p>') ?: strlen($input)) - $first_mention_of_paragraph);
// Remove infobox tables specifically
$removeFirstParagraph = false;
if (empty($input)) return "";
$firstParagraphPosition = strpos($input, '<p', 1);
$currentSearchPos = strpos($input, "<table>");
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
if (($tableEndPos = strpos($input, "</table>")) !== false) {
if (($pStartPos = strpos($input, '<p', $tableEndPos + 6)) !== false) {
$input = substr($input, $pStartPos);
}
}
}
// Remove leftover unnecessary paragraphs before actual content
$removeFirstParagraph = false;
$firstParagraphPosition = strpos($input, '<p', 1);
foreach (["</table>", "<img"] as $tagPart) {
$currentSearchPos = strpos($input, $tagPart);
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
$removeFirstParagraph = true;
break;
}
}
if ($removeFirstParagraph === true) {
$input = substr($input, $firstParagraphPosition ?: 0);
}
$input = str_replace('</p>', '</p>' . PHP_EOL . PHP_EOL . PHP_EOL, $input);
# $input = str_replace('?/i', '', $input);
$input = strip_tags($input);
# for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input);
$i = 0;
while (strpos($input, ".mw-parser-output") !== false and strpos($input, "}", strpos($input, ".mw-parser-output")) !== false) {
$part1 = substr($input, 0, strpos($input, ".mw-parser-output"));
$part2 = substr($input, strpos($input, "}", strpos($input, ".mw-parser-output")) + 1);
$input = $part1 . $part2;
++$i;
if ($i === 30) break;
}
$bracketsToRemove = [];
for ($i = 0; $i < 100; $i++) {
$bracketsToRemove["[$i]"] = "";
}
$input = strtr($input, $bracketsToRemove);
$input = str_replace("\t", " ", $input);
// Remove double whitespaces
while (strpos($input, " ") !== false) {
$input = str_replace(" ", " ", $input);
}
// Remove newlines with ensuing spaces
while (strpos($input, PHP_EOL . " ") !== false) {
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
}
// Remove double newlines
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
}
$stableToRemove = [
"Vous pouvez partager vos connaissances en laméliorant (comment ?) selon les recommandations des projets correspondants.",
];
foreach ($stableToRemove as $tToRemove) $input = str_replace($tToRemove, "", $input);
$endings = [
"StubDenne artikel om et vandløb ",
];
foreach ($endings as $ending) {
if (strpos($input, $ending) !== false) $input = substr($input, 0, strpos($input, $ending));
}
$input = trim($input);
// Cut off overly long articles
if (mb_strlen($input) > 600) {
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
$input = trim(substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600)));
}
}
if (empty($input)) return '';
$input = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $input));
$input = html_entity_decode($input);
return $input;
}
/**
* Function for fetching description from Wikipedia
*
* @param integer $persinst_id Person ID.
* @param string $wikidata_id Wikidata ID.
* @param string $datafromwiki Data fetched from Wikipedia.
* @param string $wikilink Link to wikipedia entry.
* @param string $preflang The user's currently used language.
* @param string $lang Currently queried language.
* @param string $erfasst_von User who adds the info.
*
* @return boolean
*/
public function retrievePersinstDescFromWikipedia(int $persinst_id, string $wikidata_id, string $datafromwiki, string $wikilink, string $preflang, string $lang, string $erfasst_von):bool {
$output = false;
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date('d.m.Y') . ')';
$cergebnis = $this->_mysqli_noda->query_by_stmt("SELECT `persinst_kurzinfo`, `persinst_anzeigename` AS `display_name`
FROM `persinst`
WHERE `persinst_id` = ?", "i", $persinst_id);
// Update persinst table
$updatePersinstStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst`
SET `persinst_kurzinfo` = ?
WHERE `persinst_id` = ?");
if ($cergebnis->num_rows === 0) {
throw new Exception("There is no actor of ID #" . $persinst_id);
}
$cinfo = $cergebnis->fetch_assoc();
if (!empty($cinfo['persinst_kurzinfo']) and substr($cinfo['persinst_kurzinfo'], 0, 3) !== 'GND') {
if (isset($_GET['keep'])) {
if (!$_GET['keep'] || $_GET['keep'] === 'replace') {
$updatePersinstStmt->bind_param("si", $datafromwiki, $persinst_id);
$updatePersinstStmt->execute();
}
else if ($_GET['keep'] === 'add') {
$newDesc = $cinfo['persinst_kurzinfo'] . PHP_EOL . PHP_EOL . $datafromwiki;
$updatePersinstStmt->bind_param("si", $newDesc, $persinst_id);
$updatePersinstStmt->execute();
}
$output = true;
}
else {
$tlLoader = new MDTlLoader("wiki_getter_persinst", $preflang);
echo self::generateHTMLHeadForWikidataFetcher($lang);
echo self::generateWikidataFetcherHeader($tlLoader, "", $cinfo['display_name']);
echo '
<p class="alert icons iconsAlert">Es gibt schon einen Eintrag im Beschreibungsfeld</p>
<div class="wikiReplaceTTile">
<h3>Bisher vorhanden</h3>
<p>' . nl2br($cinfo['persinst_kurzinfo']) . '</p>
</div>
<div class="wikiReplaceTTile">
<h3>Jetzt gefunden</h3><p>' . $datafromwiki . '</p>
</div>
<a href="get_wikidata_for_persinst.php?wikidata_id=' . $wikidata_id . write_get_vars(['suchbegriff', 'lang', 'persinst_id']) . '&keep=keep" class="buttonLike icons iconsPin">Keep old entry</a>';
echo '<br><a href="get_wikidata_for_persinst.php?wikidata_id=' . $wikidata_id . write_get_vars(['suchbegriff', 'lang', 'persinst_id']) . '&keep=replace" class="buttonLike icons iconsPinOff">Replace with new entry</a>';
echo '<br><a href="get_wikidata_for_persinst.php?wikidata_id=' . $wikidata_id . write_get_vars(['suchbegriff', 'lang', 'persinst_id']) . '&keep=add" class="buttonLike icons iconsPlusOne">Keep old and add new entry</a><br><br><br>';
exit;
}
}
else {
$updatePersinstStmt->bind_param("si", $datafromwiki, $persinst_id);
$updatePersinstStmt->execute();
$output = true;
}
$cergebnis->close();
$updatePersinstStmt->close();
// Set link to Wikipedia in noda table
$insertNodaStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda`
(`persinst_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`)
VALUES
(?, 'Wikipedia', '', ?, ?)
ON DUPLICATE KEY UPDATE `noda_link` = ?");
$insertNodaStmt->bind_param("isss", $persinst_id, $wikilink, $erfasst_von, $wikilink);
$insertNodaStmt->execute();
$insertNodaStmt->close();
// Update edit metadata
$updatePersinstEditInfoStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst`
SET `persinst_erfasst_am` = NOW(),
`persinst_erfasst_von` = ?
WHERE `persinst_id` = ?");
$updatePersinstEditInfoStmt->bind_param("si", $erfasst_von, $persinst_id);
$updatePersinstEditInfoStmt->execute();
$updatePersinstEditInfoStmt->close();
$updatePersinstEditInfoStmt = null;
return $output;
}
/**
* Function for updating birth and death times based on Wikidata information.
*
* @param array<mixed> $data Data loaded from Wikidata.
* @param integer $persinst_id Actor ID.
*
* @return void
*/
public function enterPersinstBirthDeathDatesFromWikidata(array $data, int $persinst_id):void {
$result = $this->_mysqli_noda->query_by_stmt("SELECT `persinst_geburtsjahr`,
`persinst_sterbejahr`, `persinst_gender`
FROM `persinst`
WHERE `persinst_id` = ?", "i", $persinst_id);
if (!($actor_dates = $result->fetch_assoc())) {
throw new MDmainEntityNotExistentException("Failed to fetch actor information");
}
$result->close();
$result = null;
if ($actor_dates['persinst_geburtsjahr'] === '') {
// Try to get birth date
if (!empty($data['claims']['P569']) and !empty($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time'])) {
$birth_date = self::wikidataBirthDeathToYear($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time']);
}
if (!empty($birth_date)) {
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst`
SET `persinst_geburtsjahr` = ?
WHERE `persinst_id` = ?
LIMIT 1");
$updateStmt->bind_param("ii", $birth_date, $persinst_id);
$updateStmt->execute();
$updateStmt->close();
$updateStmt = null;
}
}
if ($actor_dates['persinst_sterbejahr'] === '') {
// Try to get birth date
if (!empty($data['claims']['P570']) and !empty($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time'])) {
$death_date = self::wikidataBirthDeathToYear($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time']);
}
if (!empty($death_date)) {
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst`
SET `persinst_sterbejahr` = ?
WHERE `persinst_id` = ?
LIMIT 1");
$updateStmt->bind_param("ii", $death_date, $persinst_id);
$updateStmt->execute();
$updateStmt->close();
$updateStmt = null;
}
}
if ($actor_dates['persinst_gender'] === '') {
// Try to get birth date
if (!empty($data['claims']['P21']) and !empty($data['claims']['P21']['0']['mainsnak']['datavalue']['value']['id'])) {
$wikidata_gender_id = $data['claims']['P21']['0']['mainsnak']['datavalue']['value']['id'];
switch ($wikidata_gender_id) {
case "Q6581097": // male
case "Q44148": //male organism
$wikidata_gender = "male";
break;
case "Q6581072":
case "Q1052281": // transgender female
case "Q43445": // female organism
$wikidata_gender = "female";
break;
case "Q48270":
$wikidata_gender = "other";
break;
default:
throw new Exception("Unknown gender: Q-ID is " . $wikidata_gender_id);
}
}
if (!empty($wikidata_gender)) {
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst`
SET `persinst_gender` = ?
WHERE `persinst_id` = ?
LIMIT 1");
$updateStmt->bind_param("si", $wikidata_gender, $persinst_id);
$updateStmt->execute();
$updateStmt->close();
$updateStmt = null;
}
}
}
/**
* Function for retrieving information.
*
* @param string $lang The user's selected used language.
* @param string $wikidata_id Wikidata ID.
* @param integer $persinst_id Actor ID.
* @param string $erfasst_von User name who's currently editing.
*
* @return void
*/
public function retrievePersinstInfoFromWikidataID(string $lang, string $wikidata_id, int $persinst_id, string $erfasst_von) {
$data = json_decode(MD_STD::runCurl("https://www.wikidata.org/wiki/Special:EntityData/" . $wikidata_id . ".json", 10000), true);
if ($data === null) {
throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later.");
}
$data = $data['entities'][$wikidata_id];
// Get links to wikipedia
$wikilink = $wikilinkterm = [];
foreach (self::LANGUAGES_MAIN_DESC as $tLang) {
if (isset($data['sitelinks'][$tLang . 'wiki']['url'])) $wikilink[$tLang] = $data['sitelinks'][$tLang . 'wiki']['url'];
if (isset($data['sitelinks'][$tLang . 'wiki']['title'])) $wikilinkterm[$tLang] = str_replace(' ', '_', $data['sitelinks'][$tLang . 'wiki']['title']);
}
$alreadyEntered = false;
if (isset($wikilink[$lang]) and isset($wikilinkterm[$lang]) and is_string($wikilinkterm[$lang])) {
$datafromwiki = MD_STD::runCurl("https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text&section=0&format=json", 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
# Process data retrieved from wikipedia
if (!empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $wikilink[$lang], $lang, $lang, $erfasst_von);
}
}
foreach (self::LANGUAGES_MAIN_DESC as $sprache) {
if ($alreadyEntered === true) break;
if (!isset($wikilink[$sprache]) || !isset($wikilinkterm[$sprache]) || !is_string($wikilinkterm[$sprache])) continue;
$datafromwiki = MD_STD::runCurl("https://" . $sprache . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode((string)$wikilinkterm[$sprache]) . "&prop=text&section=0&format=json", 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
# Process data retrieved from wikipedia
if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) {
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $wikilink[$sprache], $lang, "$sprache", $erfasst_von);
}
}
$this->enterPersinstBirthDeathDatesFromWikidata($data, $persinst_id);
// Get links to other norm data sources
$nodaLinks = [
"wikidata" => $wikidata_id,
];
foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) {
if ($vocabName === 'lcsh') continue;
if (isset($data['claims'][$pId])) $nodaLinks[$vocabName] = $data['claims'][$pId][0]['mainsnak']['datavalue']['value'];
}
// GET links to other noda entries.
$insertNodaLinkStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda`
(`persinst_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`)
VALUES
(?, ?, ?, ?, ?)
ON DUPLICATE KEY UPDATE `noda_nrinsource` = ?,
`noda_link` = ?,
`noda_erfasst_von` = ?");
$this->_mysqli_noda->autocommit(false);
foreach ($nodaLinks as $noda_source => $nodaId) {
if ($nodaId === null) continue;
if (!isset(self::URL_PREFIXES_PLACES_NODA_SOURCE[$noda_source])) {
throw new Exception("Unknown noda link: " . $noda_source);
}
$noda_link_url = self::URL_PREFIXES_PLACES_NODA_SOURCE[$noda_source] . $nodaId;
$insertNodaLinkStmt->bind_param("isssssss", $persinst_id, $noda_source, $nodaId, $noda_link_url, $erfasst_von, $nodaId, $noda_link_url, $noda_link_url);
$insertNodaLinkStmt->execute();
}
$this->_mysqli_noda->commit();
$this->_mysqli_noda->autocommit(true);
$insertNodaLinkStmt->close();
$this->getWikidataTranslationsForPersinst($data, $persinst_id);
NodaLogEdit::logPersinstEdit($this->_mysqli_noda, $persinst_id, "wikidata-fetcher", $erfasst_von, 'update', 'synchronize');
}
/**
* Function for fetching translations from Wikipedia, based on Wikidata information.
*
* @param array<mixed> $data Entity fetched from wikidata.
* @param integer $persinst_id Actor ID.
*
* @return void
*/
public function getWikidataTranslationsForPersinst(array $data, int $persinst_id):void {
$checkagainstLanguage = self::LANGUAGES_TO_CHECK;
list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data);
if (empty($languagesToFetch)) {
return;
}
try {
$contents = MD_STD::runCurlMulti($languagesToFetch, 10000);
}
catch (TypeError $e) {
throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again.");
}
$insertStmt = $this->_mysqli_noda->do_prepare("CALL nodaInsertPersinstTranslation(?, ?, ?, ?, ?)");
$this->_mysqli_noda->autocommit(false);
foreach ($checkagainstLanguage as $lang) {
if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki'])) {
$url = $languagesToFetch[$lang];
$wikilink = $wikilinks[$lang];
if (!empty($contents[$lang])) {
$descFromWiki = $contents[$lang];
$descFromWiki = json_decode($descFromWiki, true)['parse']['text']['*'];
# Process data retrieved from wikipedia
if ($descFromWiki !== null) $tDescription = self::_cleanWikidataInput((string)$descFromWiki);
else $tDescription = "";
if (substr($tDescription, -1) == chr(10)) $tDescription = substr($tDescription, 0, strlen($tDescription) - 1);
$tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')';
// Inhalt erster Absatz jeweilige Wikipedia: ' . $tDescription
// dies enthält den ersten Absatz der jeweiligen Wikipedia
}
else {
$tDescription = "";
}
$tLang = self::_cleanWikidataInput((string)$data['labels'][$lang]['language']);
$tLabel = self::_cleanWikidataInput((string)$data['labels'][$lang]['value']);
try {
$insertStmt->bind_param("issss", $persinst_id, $tLang, $tLabel, $tDescription, $wikilink);
$insertStmt->execute();
}
catch (MDMysqliInvalidEncodingError $e) {
}
}
// echo '<br><b style="color: cc0000;">Wikipedia Links fehlen</b>';
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
$wikilink = "";
$insertStmt->bind_param("issss", $persinst_id, $data['labels'][$lang]['language'], $data['labels'][$lang]['value'], $data['descriptions'][$lang]['value'], $wikilink);
$insertStmt->execute();
}
}
$this->_mysqli_noda->commit();
$this->_mysqli_noda->autocommit(true);
$insertStmt->close();
unset($insertStmt);
}
/**
* Function for entering base information about a place from wikidata.
*
* @param mysqli_result $currentPlaceResult Mysqli result pointing to the current place.
* @param string $datafromwiki Data parsed from wikidata.
* @param array<mixed> $wikilink Wikilink.
* @param string $preflang Language of the user interface in general.
* @param string $lang Language of the main entry.
* @param integer $placeID ID of the place.
* @param string $erfasst_von User name.
*
* @return boolean
*/
public function enterPlaceDescFromWikidata(mysqli_result $currentPlaceResult, string $datafromwiki, array $wikilink, string $preflang, string $lang, int $placeID, string $erfasst_von) {
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date("d.m.Y") . ')';
if (!($curPlaceInfo = $currentPlaceResult->fetch_assoc())) return false;
if (!empty(trim($curPlaceInfo['ort_anmerkung'])) and substr($curPlaceInfo['ort_anmerkung'], 0, 3) !== 'GND') {
if (isset($_GET['keep'])) {
if ($_GET['keep'] === 'add') {
$datafromwiki = $curPlaceInfo['ort_anmerkung'] . PHP_EOL . PHP_EOL . $datafromwiki;
}
else if ($_GET['keep'] === 'keep') {
$datafromwiki = $curPlaceInfo['ort_anmerkung'];
}
}
else {
$tlLoader = new MDTlLoader("wiki_getter_place", $preflang);
echo self::generateHTMLHeadForWikidataFetcher($lang);
echo self::generateWikidataFetcherHeader($tlLoader);
echo '
<p class="alert icons iconsAlert">There is already an entry for description ...</p>
<div class="wikiReplaceTTile">
<h3>Actual entry</h3><p>' . nl2br($curPlaceInfo['ort_anmerkung']) . '</p>
</div>
<div class="wikiReplaceTTile">
<h3>Now found</h3>
<p>' . $datafromwiki . '</p>
</div>
<a href="get_wikidata_for_ort.php?keep=keep' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'ort_id']) . '" class="buttonLike icons iconsPin">Keep old entry</a>
<br><a href="get_wikidata_for_ort.php?keep=replace' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'ort_id']) . '" class="buttonLike icons iconsPinOff">Replace with new entry</a>
<br><a href="get_wikidata_for_ort.php?keep=add' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'ort_id']) . '" class="buttonLike icons iconsPlusOne">Keep old and add new entry</a><br><br><br>
';
exit;
}
}
// Write description to DB
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte`
SET `ort_anmerkung` = ?,
`ort_erfasst_am` = NOW(),
`ort_erfasst_von` = ?
WHERE ort_id = ?");
try {
$updateStmt->bind_param("ssi", $datafromwiki, $erfasst_von, $placeID);
$updateStmt->execute();
}
catch (MDMysqliInvalidEncodingError $e) {
$_SESSION["editHistory"] = ["changesStored", "Error adding base description"];
}
$updateStmt->close();
unset($updateStmt);
// Write link to wikipedia to relevant noda DB table
$wikiAlreadyResult = $this->_mysqli_noda->query_by_stmt("SELECT `noda_orte`.`noda_id`
FROM `noda_orte`
WHERE `noda_orte`.`ort_id` = ?
AND `noda_orte`.`noda_source` = 'Wikipedia'", "i", $placeID);
switch ($wikiAlreadyResult->num_rows) {
case 0:
$insertWikiStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda_orte`
(`ort_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_am`, `noda_erfasst_von`)
VALUES
(?, 'Wikipedia', '', ?, NOW(), ?)");
$insertWikiStmt->bind_param("iss", $placeID, $wikilink[$lang], $erfasst_von);
$insertWikiStmt->execute();
$insertWikiStmt->close();
unset($insertWikiStmt);
break;
case 1:
if ($wikiAlreadyData = $wikiAlreadyResult->fetch_assoc()) {
$wikischon_id = $wikiAlreadyData['noda_id'];
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `noda_orte` SET `noda_link` = ? WHERE `noda_id` = ?");
$updateStmt->bind_param("si", $wikilink[$lang], $wikischon_id);
$updateStmt->execute();
$updateStmt->close();
unset($updateStmt);
}
break;
}
$wikiAlreadyResult->close();
unset($wikiAlreadyResult);
return true;
}
/**
* Function for retrieving a superordinate place relation from Wikidata information
* for places
*
* @param integer $onum Place ID.
* @param array<mixed> $data Wikidata information (P131 claim).
*
* @return void
*/
public function retrieveSuperordinateAdministrativePlace(int $onum, array $data):void {
if (!empty($data[0]["mainsnak"]["datavalue"]["value"]["id"])) {
// Check if there already is a superordinate of the current place
$result = $this->_mysqli_noda->query_by_stmt("SELECT 1
FROM `ort_relation`
WHERE `ort_menor_id` = ?
LIMIT 1", "i", $onum);
if ($result->num_rows !== 0) {
$result->close();
$result = null;
return;
}
$result->close();
$result = null;
// If there is no superordinate, check if the identified superordinate
// is known in the noda DB.
$superordinateId = $data[0]["mainsnak"]["datavalue"]["value"]["id"];
$result = $this->_mysqli_noda->query_by_stmt("SELECT `ort_id`
FROM `noda_orte`
WHERE `noda_source` = 'wikidata'
AND `noda_nrinsource` = ?", "s", $superordinateId);
if (!($superordinateData = $result->fetch_row())) {
$result->close();
$result = null;
return;
}
$result->close();
$result = null;
$topPlaceId = $superordinateData[0];
// Enter superordinate place by Wikidata
$insertStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `ort_relation`
(`ort_mayor_id`, `ort_menor_id`, `ort_relation`)
VALUES
(?, ?, 1)");
$insertStmt->bind_param("ii", $topPlaceId, $onum);
$insertStmt->execute();
$insertStmt->close();
$insertStmt = null;
}
}
/**
* Function for retrieving place information based on a Wikidata ID.
*
* @param string $lang Language.
* @param string $wikidata_id Wikidata Q-ID.
* @param integer $onum Place ID.
* @param string $erfasst_von User name of the current user.
*
* @return void
*/
public function retrievePlaceInfoFromWikidataID(string $lang, string $wikidata_id, int $onum, string $erfasst_von) {
$data = MD_STD::runCurl("https://www.wikidata.org/wiki/Special:EntityData/" . urlencode($wikidata_id) . ".json", 10000);
if (!$data = json_decode($data, true)) {
throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later.");
}
$data = $data['entities'][$wikidata_id];
$wikilink = $wikilinkterm = [];
foreach (self::LANGUAGES_MAIN_DESC as $tLang) {
if (isset($data['sitelinks'][$tLang . 'wiki']['url'])) $wikilink[$tLang] = $data['sitelinks'][$tLang . 'wiki']['url'];
if (isset($data['sitelinks'][$tLang . 'wiki']['title'])) $wikilinkterm[$tLang] = str_replace(' ', '_', $data['sitelinks'][$tLang . 'wiki']['title']);
}
$currentPlaceResult = $this->_mysqli_noda->query_by_stmt("SELECT `ort_anmerkung`
FROM `orte`
WHERE `ort_id` = ?", "i", $onum);
$alreadyEntered = false;
// P131: Located in administrative unit
if (isset($data['claims']['P131'])) {
$this->retrieveSuperordinateAdministrativePlace($onum, $data['claims']['P131']);
}
if (!empty($wikilink[$lang])) {
$datafromwiki = MD_STD::runCurl("https://" . urlencode($lang) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text&section=0&format=json", 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
if (!empty($datafromwiki) and $datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) {
$alreadyEntered = $this->enterPlaceDescFromWikidata($currentPlaceResult, $datafromwiki, $wikilink, $lang, $lang, $onum, $erfasst_von);
}
}
foreach (self::LANGUAGES_MAIN_DESC as $sprache) {
//if ($alreadyEntered === true) break;
if ($alreadyEntered === true) break;
if (!isset($wikilink[$sprache])) continue;
$datafromwiki = MD_STD::runCurl("https://" . urlencode($sprache) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$sprache]) . "&prop=text&section=0&format=json", 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
if (!empty($datafromwiki) and $datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) {
$alreadyEntered = $this->enterPlaceDescFromWikidata($currentPlaceResult, $datafromwiki, $wikilink, $lang, $sprache, $onum, $erfasst_von);
}
}
$currentPlaceResult->close();
unset($currentPlaceResult);
if (isset($data['claims']['P1566'])) $geonames_id = $data['claims']['P1566'][0]['mainsnak']['datavalue']['value'];
if (isset($data['claims']['P1667'])) $tgn_id = $data['claims']['P1667'][0]['mainsnak']['datavalue']['value'];
$nodaLinks = [
"wikidata" => $wikidata_id,
];
foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) {
if ($vocabName === 'lcsh') continue;
if (isset($data['claims'][$pId])) $nodaLinks[$vocabName] = $data['claims'][$pId][0]['mainsnak']['datavalue']['value'];
}
if (isset($data['claims']['P625'])) {
$latitude_wd = $data['claims']['P625'][0]['mainsnak']['datavalue']['value']['latitude'];
$longitude_wd = $data['claims']['P625'][0]['mainsnak']['datavalue']['value']['longitude'];
}
$this->_mysqli_noda->autocommit(false);
$insertNodaLinkStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda_orte`
(`ort_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`)
VALUES
(?, ?, ?, ?, ?)
ON DUPLICATE KEY UPDATE `noda_nrinsource` = ?,
`noda_link` = ?");
foreach ($nodaLinks as $noda_source => $nodaId) {
if (!isset(self::URL_PREFIXES_PLACES_NODA_SOURCE[$noda_source])) {
throw new Exception("Unknown noda link: " . $noda_source);
}
$noda_link_url = self::URL_PREFIXES_PLACES_NODA_SOURCE[$noda_source] . $nodaId;
$insertNodaLinkStmt->bind_param("issssss", $onum, $noda_source, $nodaId, $noda_link_url, $erfasst_von, $nodaId, $noda_link_url);
$insertNodaLinkStmt->execute();
}
$insertNodaLinkStmt->close();
unset($insertNodaLinkStmt);
if (!empty($tgn_id)) {
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte`
SET `ort_land` = ?
WHERE `ort_id` = ?");
$updateStmt->bind_param("si", $tgn_id, $onum);
$updateStmt->execute();
$updateStmt->close();
unset($updateStmt);
}
if (!empty($geonames_id)) {
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte`
SET `ort_geonames` = ?
WHERE `ort_id` = ?");
$updateStmt->bind_param("si", $geonames_id, $onum);
$updateStmt->execute();
$updateStmt->close();
unset($updateStmt);
}
if (!empty($latitude_wd) and !empty($longitude_wd)) {
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte`
SET `ort_nord_sued` = ?, `ort_west_ost` = ?, ort_zoom = '9'
WHERE `ort_id` = ?");
$updateStmt->bind_param("ddi", $latitude_wd, $longitude_wd, $onum);
$updateStmt->execute();
$updateStmt->close();
unset($updateStmt);
}
$this->_mysqli_noda->commit();
$this->_mysqli_noda->autocommit(true);
$this->getWikidataTranslationsForPlace($data, $onum);
NodaLogEdit::logPlaceEdit($this->_mysqli_noda, $onum, "wikidata-fetcher", $erfasst_von, 'update', 'synchronize');
}
/**
* Function for fetching translations from wikidata.
*
* @param array<mixed> $data Entity data fetched from wikidata.
* @param integer $ort_id Place ID.
*
* @return void
*/
public function getWikidataTranslationsForPlace(array $data, int $ort_id) {
$checkagainstLanguage = self::LANGUAGES_TO_CHECK;
list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data);
if (empty($languagesToFetch)) {
return;
}
try {
$contents = MD_STD::runCurlMulti($languagesToFetch, 10000);
}
catch (TypeError $e) {
throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again.");
}
$insertStmt = $this->_mysqli_noda->do_prepare("CALL `nodaInsertOrtTranslation`(?, ?, ?, ?, ?)");
$this->_mysqli_noda->autocommit(false);
foreach ($checkagainstLanguage as $lang) {
if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki'])) {
$url = $languagesToFetch[$lang];
$wikilink = $wikilinks[$lang];
if (!empty($contents[$lang])) {
$descFromWiki = $contents[$lang];
if (!($wikiDataDecoded = json_decode($descFromWiki, true))) {
continue;
}
$tLabel = $wikiDataDecoded['parse']['title'];
$descFromWiki = $wikiDataDecoded['parse']['text']['*'];
# Process data retrieved from wikipedia
if (empty($descFromWiki)) $tDescription = "";
else {
$tDescription = self::_cleanWikidataInput((string)$descFromWiki);
if (substr($tDescription, -1) == chr(10)) $tDescription = substr($tDescription, 0, strlen($tDescription) - 1);
$tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')';
$tDescription = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $tDescription));
// echo '<br>Inhalt erster Absatz jeweilige Wikipedia: ' . $tDescription; // dies enthält den ersten Absatz der jeweiligen Wikipedia
}
}
else {
$tDescription = "";
}
$tLang = self::_cleanWikidataInput((string)$data['labels'][$lang]['language']);
if (empty($tLabel)) $tLabel = self::_cleanWikidataInput((string)$data['labels'][$lang]['value']);
try {
$insertStmt->bind_param("issss", $ort_id, $tLang, $tLabel, $tDescription, $wikilink);
$insertStmt->execute();
}
catch (MDMysqliInvalidEncodingError $e) {
$_SESSION["editHistory"] = ["changesStored", "Error adding translation for language $tLang"];
}
}
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
$wikilink = "";
$insertStmt->bind_param("issss", $ort_id, $data['labels'][$lang]['language'], $data['labels'][$lang]['value'], $data['descriptions'][$lang]['value'], $wikilink);
$insertStmt->execute();
}
}
$this->_mysqli_noda->commit();
$this->_mysqli_noda->autocommit(true);
$insertStmt->close();
unset($insertStmt);
}
/**
* Function for fetching description from Wikipedia
*
* @param integer $tag_id Tag ID.
* @param string $datafromwiki Data fetched from Wikipedia.
* @param string $wikilink Link to wikipedia entry.
* @param string $preflang The user's currently used language.
* @param string $lang Currently queried language.
* @param string $erfasst_von User who adds the info.
*
* @return boolean
*/
public function retrieveTagDescFromWikipedia(int $tag_id, string $datafromwiki, string $wikilink, string $preflang, string $lang, string $erfasst_von):bool {
$output = false;
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date("d.m.Y") . ')';
$datafromwiki = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $datafromwiki));
$cergebnis = $this->_mysqli_noda->query_by_stmt("SELECT `tag_anmerkung`
FROM `tag`
WHERE `tag_id` = ?", "i", $tag_id);
if (!($cinfo = $cergebnis->fetch_assoc())) {
$cergebnis->close();
$cergebnis = null;
return $output;
}
$cergebnis->close();
$cergebnis = null;
$this->_mysqli_noda->autocommit(false);
$updateTagDescStmt = $this->_mysqli_noda->do_prepare("UPDATE `tag`
SET `tag_anmerkung` = ?
WHERE `tag_id` = ?");
if (!empty($cinfo['tag_anmerkung']) and substr($cinfo['tag_anmerkung'], 0, 3) !== 'GND') {
if (isset($_GET['keep'])) {
if (!$_GET['keep'] || $_GET['keep'] === 'replace') {
$updateTagDescStmt->bind_param("si", $datafromwiki, $tag_id);
$updateTagDescStmt->execute();
}
else if ($_GET['keep'] === 'add') {
$newDesc = $cinfo['tag_anmerkung'] . PHP_EOL . PHP_EOL . $datafromwiki;
$updateTagDescStmt->bind_param("si", $newDesc, $tag_id);
$updateTagDescStmt->execute();
}
$output = true;
}
else {
$tlLoader = new MDTlLoader("wiki_getter_tag", $preflang);
echo self::generateHTMLHeadForWikidataFetcher($lang);
echo self::generateWikidataFetcherHeader($tlLoader);
echo '
<p class="alert icons iconsAlert">Es gibt schon einen Eintrag im Beschreibungsfeld</b>
<div class="wikiReplaceTTile">
<h3>Bisher vorhanden</h3><p>' . nl2br($cinfo['tag_anmerkung']) . '</p>
</div>
<div class="wikiReplaceTTile">
<h3>Jetzt gefunden</h3><p>' . $datafromwiki . '<p>
</div>
<a href="get_wikidata_for_tag.php?keep=keep' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'tag_id']) . '" class="buttonLike icons iconsPin">Keep old entry</a>';
echo '<br><a href="get_wikidata_for_tag.php?keep=replace' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'tag_id']) . '" class="buttonLike icons iconsPinOff">Replace with new entry</a>';
echo '<br><a href="get_wikidata_for_tag.php?keep=add' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'tag_id']) . '" class="buttonLike icons iconsPlusOne">Keep old and add new entry</a><br><br><br>';
exit;
}
}
else {
$updateTagDescStmt->bind_param("si", $datafromwiki, $tag_id);
$updateTagDescStmt->execute();
}
$updateTagDescStmt->close();
$updateTagDescStmt = null;
$insertNodaTagStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda_tag`
(`tag_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`)
VALUES
(?, 'Wikipedia', '', ?, ?)
ON DUPLICATE KEY UPDATE `noda_link` = ?");
$insertNodaTagStmt->bind_param("isss", $tag_id, $wikilink, $erfasst_von, $wikilink);
$insertNodaTagStmt->execute();
$insertNodaTagStmt->close();
$output = true;
// Update tag editing metadata
$updateTagEditInfoStmt = $this->_mysqli_noda->do_prepare("UPDATE `tag`
SET `tag_erfasst_am` = NOW(),
`tag_erfasst_von` = ?
WHERE `tag_id` = ?");
$updateTagEditInfoStmt->bind_param("si", $erfasst_von, $tag_id);
$updateTagEditInfoStmt->execute();
$updateTagEditInfoStmt->close();
$updateTagEditInfoStmt = null;
$this->_mysqli_noda->commit();
$this->_mysqli_noda->autocommit(true);
return $output;
}
/**
* Writes relations to norm data sources to DB.
*
* @param array<string> $nodaLinks Links to other noda sources.
* @param integer $tag_id Tag ID.
* @param string $erfasst_von Name of the user to edit this.
*
* @return void
*/
public function writeNodaLinksTag(array $nodaLinks, int $tag_id, string $erfasst_von):void {
$this->_mysqli_noda->autocommit(false);
$insertNodaTagStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda_tag`
(`tag_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`)
VALUES
(?, ?, ?, ?, ?)
ON DUPLICATE KEY UPDATE
`noda_nrinsource` = ?,
`noda_link` = ?,
`noda_erfasst_von` = ?");
foreach ($nodaLinks as $vocabName => $nodaId) {
if (empty(self::URL_PREFIXES_PLACES_NODA_SOURCE[$vocabName])) {
throw new Exception("Unknown URL prefix for: " . $vocabName);
}
if (empty($nodaId)) continue;
$noda_link = self::URL_PREFIXES_PLACES_NODA_SOURCE[$vocabName] . $nodaId;
$insertNodaTagStmt->bind_param("isssssss", $tag_id, $vocabName, $nodaId, $noda_link, $erfasst_von, $nodaId, $noda_link, $erfasst_von);
$insertNodaTagStmt->execute();
}
$insertNodaTagStmt->close();
$insertNodaTagStmt = null;
$this->_mysqli_noda->commit();
$this->_mysqli_noda->autocommit(true);
}
/**
* Function for retrieving information.
*
* @param string $lang The user's selected used language.
* @param string $wikidata_id Wikidata ID.
* @param integer $tag_id Tag ID.
* @param string $erfasst_von User name who's currently editing.
*
* @return void
*/
public function retrieveTagInfoFromWikidataID(string $lang, string $wikidata_id, int $tag_id, string $erfasst_von) {
$data = MD_STD::runCurl("https://www.wikidata.org/wiki/Special:EntityData/" . $wikidata_id . ".json", 10000);
$data = json_decode($data, true);
if ($data === null) {
throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later.");
}
$data = $data['entities'][$wikidata_id];
$wikilink = $wikilinkterm = [];
foreach (self::LANGUAGES_MAIN_DESC as $tLang) {
if (isset($data['sitelinks'][$tLang . 'wiki']['url'])) $wikilink[$tLang] = $data['sitelinks'][$tLang . 'wiki']['url'];
if (isset($data['sitelinks'][$tLang . 'wiki']['title'])) $wikilinkterm[$tLang] = str_replace(' ', '_', $data['sitelinks'][$tLang . 'wiki']['title']);
}
$alreadyEntered = false;
if (isset($wikilink[$lang]) and isset($wikilinkterm[$lang]) and is_string($wikilinkterm[$lang])) {
$datafromwiki = MD_STD::runCurl("https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text&section=0&format=json", 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
# Process data retrieved from wikipedia
if (!empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $wikilink[$lang], $lang, $lang, $erfasst_von);
}
}
foreach (self::LANGUAGES_MAIN_DESC as $sprache) {
if ($alreadyEntered === true) break;
if (!isset($wikilink[$sprache]) || !isset($wikilinkterm[$sprache]) || !is_string($wikilinkterm[$sprache])) continue;
$datafromwiki = MD_STD::runCurl("https://" . $sprache . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode((string)$wikilinkterm[$sprache]) . "&prop=text&section=0&format=json", 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
# Process data retrieved from wikipedia
if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) {
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $wikilink[$sprache], $lang, "$sprache", $erfasst_von);
}
}
$nodaLinks = [
"wikidata" => $wikidata_id,
];
foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) {
if ($vocabName === 'loc') continue;
if (isset($data['claims'][$pId])) $nodaLinks[$vocabName] = $data['claims'][$pId][0]['mainsnak']['datavalue']['value'];
}
$this->writeNodaLinksTag($nodaLinks, $tag_id, $erfasst_von);
// Get translations
if (!empty($data)) $this->getWikidataTranslationsForTag($data, $tag_id);
NodaLogEdit::logTagEdit($this->_mysqli_noda, $tag_id, "wikidata-fetcher", $erfasst_von, 'update', 'synchronize');
}
/**
* Function for fetching translations from wikidata.
*
* @param array<mixed> $data Entity data fetched from wikidata.
* @param integer $tag_id Tag ID.
*
* @return void
*/
public function getWikidataTranslationsForTag(array $data, int $tag_id) {
$checkagainstLanguage = self::LANGUAGES_TO_CHECK;
list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data);
if (empty($languagesToFetch)) {
return;
}
try {
$contents = MD_STD::runCurlMulti($languagesToFetch, 10000);
}
catch (TypeError $e) {
throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again.");
}
$insertStmt = $this->_mysqli_noda->do_prepare("CALL nodaInsertTagTranslation(?, ?, ?, ?, ?)");
$this->_mysqli_noda->autocommit(false);
foreach ($checkagainstLanguage as $lang) {
if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki'])) {
$url = $languagesToFetch[$lang];
$wikilink = $wikilinks[$lang];
if (!empty($contents[$lang])) {
$descFromWiki = $contents[$lang];
$descFromWiki = json_decode($descFromWiki, true)['parse']['text']['*'];
if (!empty($descFromWiki)) {
# Process data retrieved from wikipedia
$tDescription = self::_cleanWikidataInput((string)$descFromWiki);
if (substr($tDescription, -1) == chr(10)) {
$tDescription = substr($tDescription, 0, strlen($tDescription) - 1);
}
$tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')';
$tDescription = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $tDescription));
}
else {
$tDescription = "";
}
}
else {
$tDescription = "";
}
$tLang = self::_cleanWikidataInput((string)$data['labels'][$lang]['language']);
$tLabel = self::_cleanWikidataInput((string)$data['labels'][$lang]['value']);
if (in_array($tLang, self::LANGUAGES_TO_CAPITALIZE, true)) {
$tLabel = ucfirst(trim($tLabel));
$tDescription = ucfirst(trim($tDescription));
}
try {
$insertStmt->bind_param("issss", $tag_id, $tLang, $tLabel, $tDescription, $wikilink);
$insertStmt->execute();
}
catch (MDMysqliInvalidEncodingError $e) {
}
}
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
$wikilink = "";
if (in_array($lang, self::LANGUAGES_TO_CAPITALIZE, true)) {
$data['labels'][$lang]['value'] = ucfirst(trim($data['labels'][$lang]['value']));
$data['descriptions'][$lang]['value'] = ucfirst(trim($data['descriptions'][$lang]['value']));
}
$insertStmt->bind_param("issss", $tag_id, $data['labels'][$lang]['language'], $data['labels'][$lang]['value'], $data['descriptions'][$lang]['value'], $wikilink);
$insertStmt->execute();
}
}
$this->_mysqli_noda->commit();
$this->_mysqli_noda->autocommit(true);
$insertStmt->close();
unset($insertStmt);
}
/**
* Searches Wikidata for a string.
*
* @param string $searchTerm Search string.
* @param string $lang Searched language. Defaults to German.
*
* @return array<mixed>
*/
public static function searchWikidataForString(string $searchTerm, string $lang = "de"):array {
$wikidata_data = MD_STD::runCurl("https://www.wikidata.org/w/api.php?action=wbsearchentities&format=json&search=" . urlencode($searchTerm) . "&language=" . urlencode($lang) . "&limit=20", 10000);
if (($wikidata_data = json_decode($wikidata_data, true)) === false) {
return [];
}
if (empty($wikidata_data['search'])) {
return [];
}
$output = [];
foreach ($wikidata_data['search'] as $result) {
if (empty($result['label'])
or (!empty($result['description']) and $result['description'] === 'Wikipedia disambiguation page')
or (!empty($result['description']) and $result['description'] === 'Wikimedia disambiguation page')
) continue;
$cur = [
'id' => $result['id'],
'label' => $result['label'],
'label_ext' => '',
'description' => '',
];
if (!empty($result['match'])) {
$cur['label_ext'] = "{$result['match']['language']}: {$result['match']['text']}";
}
if (!empty($result['description'])) {
$cur['description'] = $result['description'];
}
$output[] = $cur;
}
return $output;
}
/**
* Generates the HTML for an entry in the general wikidata search results list.
*
* @param string $link Links.
* @param string $searchTerm Search term.
* @param string $lang Language.
* @param array<mixed> $result Single result to display.
*
* @return string
*/
public static function generateWikidataResultsListEntry(string $link, string $searchTerm, string $lang, array $result):string {
if ((isset($result['label']) and $result['label'] == '') or !isset($result['label']) or (isset($result['description']) and $result['description'] === 'Wikipedia disambiguation page') or (isset($result['description']) and $result['description'] === 'Wikimedia disambiguation page')) {
return '';
}
$output = '<div><a href="' . $link . 'suchbegriff=' . htmlspecialchars($searchTerm) . '&wikidata_id=' . htmlspecialchars((string)$result['id']) . '&lang=' . htmlspecialchars($lang) . '">
<h4 class="icons iconsTag">' . $result['id'] . '</h4>';
$output .= '<p class="wikidataSummary">' . $result['label'];
if (!empty($result['label_ext'])) $output .= " (<span class='icons iconsTranslate'>{$result['label_ext']}</span>)";
$output .= '</p>';
if (!empty($result['description'])) $output .= '<p>' . $result['description'] . '</p>';
$output .= '</a><a class="icons iconsEye" target="_blank" href="https://www.wikidata.org/wiki/' . $result['id'] . '">Wikidata page</a></div>';
return $output;
}
/**
* Function for generating a wikidata results list.
*
* @param string $link Links.
* @param string $searchTerm Search term.
* @param string $lang Language.
*
* @return string
*/
public static function generateWikidataResultsList(string $link, string $searchTerm, string $lang):string {
if (empty($wikidata_data = self::searchWikidataForString($searchTerm))) {
return '<p class="icons iconsAlert alert"><b>' . ucfirst($searchTerm) . '</b> not found in Wikidata</p>';
}
$output = '
<main id="wikidataResultsList">';
foreach ($wikidata_data as $result) {
$output .= self::generateWikidataResultsListEntry($link, $searchTerm, $lang, $result);
}
$output .= '
</main>';
return $output;
}
/**
* Attempts to parse birth or death years from the data returned by wikidata.
*
* @param string $inputTime Input time in the format delivered by wikidata.
*
* @return string
*/
public static function wikidataBirthDeathToYear(string $inputTime):string {
$birth_date_int = strtotime(substr($inputTime, 1, 4));
if ($birth_date_int) {
$birth_date = date("Y", $birth_date_int);
if ($birth_date === date("Y") and $tTime = strtotime($inputTime)) {
$birth_date = date("Y", $tTime);
}
return $birth_date;
}
return '';
}
/**
* Function for generating a wikidata results list for actors, keeping track of life dates.
*
* @param string $link Links.
* @param string $searchTerm Search term.
* @param string $lang Language.
* @param integer $yearOfBirth Year of birth.
* @param integer $yearOfDeath Year of death.
*
* @return string
*/
public static function generateWikidataResultsListForActors(string $link, string $searchTerm, string $lang, int $yearOfBirth, int $yearOfDeath):string {
if (empty($wikidata_data = self::searchWikidataForString($searchTerm))) {
return '<p class="icons iconsAlert alert"><b>' . ucfirst($searchTerm) . '</b> not found in Wikidata</p>';
}
$qLinksToCheck = [];
foreach ($wikidata_data as $entry) {
$qLinksToCheck[$entry['id']] = "https://www.wikidata.org/wiki/Special:EntityData/" . $entry['id'] . ".json";
}
$fetched = MD_STD::runCurlMulti($qLinksToCheck, 10000);
$yearsOfBirthList = $yearsOfDeathList = [];
foreach ($fetched as $qId => $data) {
if (!($jsonData = json_decode($data, true))) {
continue;
}
if (empty($jsonData['entities'][$qId])) {
continue;
}
$data = $jsonData['entities'][$qId];
if (!empty($data['claims']['P569']) and !empty($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time'])) {
$yearsOfBirthList[$qId] = (int)self::wikidataBirthDeathToYear($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time']);
}
if (!empty($data['claims']['P570']) and !empty($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time'])) {
$yearsOfDeathList[$qId] = (int)self::wikidataBirthDeathToYear($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time']);
}
}
$output = '
<main id="wikidataResultsList">';
foreach ($wikidata_data as $result) {
if (empty($result['id'])) continue;
if (!empty($yearsOfBirthList[$result['id']])) {
if (empty($result['description'])) {
$result['description'] = 'Born: ' . $yearsOfBirthList[$result['id']];
}
else $result['description'] .= '<br/>Born: ' . $yearsOfBirthList[$result['id']];
}
if (!empty($yearsOfDeathList[$result['id']])) {
if (empty($result['description'])) {
$result['description'] = 'Death: ' . $yearsOfDeathList[$result['id']];
}
else $result['description'] .= '<br/>Death: ' . $yearsOfDeathList[$result['id']];
}
if (!empty($yearsOfBirthList[$result['id']]) && !empty($yearsOfDeathList[$result['id']])) {
if ($yearsOfBirthList[$result['id']] === $yearOfBirth
&& $yearsOfDeathList[$result['id']] === $yearOfDeath
) {
$result['description'] .= '<br/><span class="buttonLike">Suggestion!</span>';
}
}
$output .= self::generateWikidataResultsListEntry($link, $searchTerm, $lang, $result);
}
$output .= '
</main>';
return $output;
}
/**
* Function generates HTML head for wikidata fetchers.
*
* @param string $lang User language.
* @param boolean $implyEnd If set to true, the end string will be echoed at the end of the script execution.
*
* @return string
*/
public static function generateHTMLHeadForWikidataFetcher(string $lang, bool $implyEnd = true):string {
$output = "<!DOCTYPE html><html class=\"getWikidata\" lang=\"{$lang}\">
<head>
<title>Get Wikidata</title>
<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />
<link rel=\"manifest\" href=\"../manifest.webmanifest\" />
<meta name=\"theme-color\" content=\"#0b1728\" />
<link rel=\"shortcut icon\" sizes=\"16x16 32x32\" href=\"../img/mdlogo-nodac.svg.png\" />
<link rel=\"apple-touch-icon\" sizes=\"256x256\" href=\"../img/mdterm-256px.png\" />
<script type=\"text/javascript\" src=\"../js/wikidataGetter.min.js\" async></script>
<meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\"/>";
if (defined("MAIN_CSS_FILE")) {
$output .= "<link rel=\"stylesheet\" type=\"text/css\" href=\"" . htmlspecialchars(MAIN_CSS_FILE) . "\">";
}
$output .= "
<meta name=\"description\" content=\"Fetch information from Wikidata.\" />
</head>
<body>";
if ($implyEnd === true) {
register_shutdown_function(function() :void {
echo printHTMLEnd();
});
}
return MD_STD::minimizeHTMLString($output);
}
/**
* Function generate header for wikidata fetcher pages.
*
* @param MDTlLoader $tlLoader Translation variable.
* @param string $additional Additional info.
* @param string $searchTerm Search term.
*
* @return string
*/
public static function generateWikidataFetcherHeader(MDTlLoader $tlLoader, string $additional = "", string $searchTerm = ""):string {
if (empty($searchTerm) and !empty($_GET['suchbegriff'])) {
$searchTerm = $_GET['suchbegriff'];
}
$output = '
<header>
<h1><img src="../img/wikidata.png" alt="Logo: Wikidata" />' . $tlLoader->tl("wiki", "wiki", "fetch_from_wikidata");
$output .= ': ' . $searchTerm;
$output .= '</h1>';
$output .= $additional;
$output .= '</header>';
return $output;
}
/**
* Constructor.
*
* @param MDMysqli $mysqli_noda DB connection.
*
* @return void
*/
public function __construct(MDMysqli $mysqli_noda) {
$this->_mysqli_noda = $mysqli_noda;
}
}