*/ declare(strict_types = 1); /** * Helps fetching information from Wikidata. */ final class NodaWikidataFetcher { const WIKIDATA_FETCH_HEADERS = [ 'User-Agent: museum-digital-bot GND-to-Wikidata PHP/' . PHP_VERSION, 'Accept: application/sparql-results+json', ]; const LANGUAGES_MAIN_DESC = ['de', 'da', 'en', 'es', 'fr', 'hu', 'it', 'jp', 'nl', 'pt', 'ru', 'sv', 'zh']; const LANGUAGES_TO_CHECK = ['ar', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sw', 'ta', 'th', 'tl', 'tr', 'ur', 'vi', 'zh']; const LANGUAGES_TO_CAPITALIZE = ["cs", "da", "de", "en", "es", "fr", "fi", "id", "it", "nl", "pl", "pt", "ru", "sv", "tl", "tr"]; const URL_PREFIXES_PLACES_NODA_SOURCE = [ "gnd" => "https://d-nb.info/gnd/", "nomisma" => "http://nomisma.org/id/", "osm" => "https://www.openstreetmap.org/relation/", "loc" => "http://id.loc.gov/authorities/names/", "cona" => "http://vocab.getty.edu/page/cona/", "aat" => "http://vocab.getty.edu/page/aat/", "lcsh" => "http://id.loc.gov/authorities/subjects/", "wikidata" => "https://www.wikidata.org/wiki/", "bne" => "http://datos.bne.es/persona/", "viaf" => "https://viaf.org/viaf/", "bnf" => "https://catalogue.bnf.fr/ark:/12148/cb", "ulan" => "http://vocab.getty.edu/page/ulan/", "rkd" => "https://rkd.nl/explore/artists/", "pim" => "https://opac-nevter.pim.hu/en/record/-/record/", "ndl" => "https://id.ndl.go.jp/auth/ndlna/", "npg" => "https://www.npg.org.uk/collections/search/person/", "orcid" => "https://orcid.org/", ]; const P_IDS_NODA_TAGS = [ 'gnd' => 'P227', 'lcsh' => 'P244', 'aat' => 'P1014', 'iconclass' => 'P1256', 'osm' => 'P402', 'loc' => 'P244', 'nomisma' => 'P2950', 'cona' => 'P1669', "rkd" => "P650", "ulan" => "P245", "viaf" => "P214", "bnf" => "P268", "pim" => "P3973", "ndl" => "P349", // National Diet Library (Japan) "npg" => "P1816", // "National" portrait gallery "bne" => "P950", // Espana National Library "orcid" => "P496", ]; const WIKIPEDIA_REMOVE_LITERALS = [ "

Si vous disposez d'ouvrages ou d'articles de référence ou si vous ", '

En pratique : Quelles sources sont attendu', '', '

Géolocalisation sur la carte', '

Koordinaatit:', '

', //'

', '

', '

', '

', '

', '

loadHTML($wikipedia_cont)) { return ''; } libxml_use_internal_errors(false); if (!($wikidataLinkLi = $doc->getElementById("t-wikibase"))) { return ''; } if (!($wikidataLink = $wikidataLinkLi->firstChild)) { return ''; } if (!($t_wikibase_href = $wikidataLink->getAttribute('href'))) { return ''; } $t_wikibase = (string)$t_wikibase_href; // if (!empty($t_wikibase)) { if (($wikidata_id_end = strrpos($t_wikibase, '/')) !== false) { $wikidata_id = trim(substr($t_wikibase, $wikidata_id_end + 1), '/'); if (substr($wikidata_id, 0, 1) === 'Q') { return $wikidata_id; } } // } return ''; } /** * Runs a SPARQL query against the Wikidata SPARQL endpoint. * * @param string $sparqlQuery Query string. * * @return array */ public static function sparqlQuery(string $sparqlQuery):array { $url = 'https://query.wikidata.org/sparql?query=' . urlencode($sparqlQuery); $result = MD_STD::runCurl($url, 100000000, self::WIKIDATA_FETCH_HEADERS); return json_decode($result, true); } /** * Formulates a SPARQL query string for fetching from Wikidata based on an external ID. * * @param string $repoName Name of the repository. * @param string $externalId ID in the external repository. * @param string $repoPId Optional P-ID of the external repository. Needed for * Geonames and TGN, obsolete otherwise. * * @return string */ public static function formulateWikidataQueryByExtId(string $repoName, string $externalId, string $repoPId = ''):string { if (empty($repoPId)) { if (empty(NodaWikidataFetcher::P_IDS_NODA_TAGS[$repoName])) { throw new MDmainEntityNotExistentException("Unknown external repository. The following repositories are known with their Wikidata ID: " . implode(', ', array_keys(NodaWikidataFetcher::P_IDS_NODA_TAGS))); } $repoPId = NodaWikidataFetcher::P_IDS_NODA_TAGS[$repoName]; } $sparqlQueryString = 'SELECT ?id ?idLabel WHERE { ?id wdt:' . $repoPId . ' "' . $externalId . '". SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . } }'; return $sparqlQueryString; } /** * Gets the Wikidata ID based on a result from Wikidata's SPARQL endpoint. * * @param array $queryResult Query result. * * @return string */ public static function readWikidataIdFromSparqlResult(array $queryResult):string { if (count($queryResult['results']['bindings']) !== 1) return ''; if (!empty($wikidataLink = $queryResult['results']['bindings'][0]['id']['value'])) { if (($endSlashPos = strrpos($wikidataLink, '/')) !== false) { return substr($wikidataLink, $endSlashPos + 1); } } return ''; } /** * Queries Wikidata by an external repository's ID and returns the matching Q-ID * if there is any. * * @param string $repoName Name of the repository. * @param string $externalId ID in the external repository. * @param string $repoPId Optional P-ID of the external repository. Needed for * Geonames and TGN, obsolete otherwise. * * @return string */ public static function getWikidataIdByExternalId(string $repoName, string $externalId, string $repoPId = ''):string { $sparqlQueryString = self::formulateWikidataQueryByExtId($repoName, $externalId, $repoPId = ''); $queryResult = self::sparqlQuery($sparqlQueryString); return self::readWikidataIdFromSparqlResult($queryResult); } /** * Gets translation source Wikipedia pages from Wikidata. * * @param array $checkagainstLanguage The language to check against. * @param array $data Data fetched from Wikidata. * * @return array{0: array, 1: array} */ public static function getWikidataWikipediaTranslationSources(array $checkagainstLanguage, array $data) { $languagesToFetch = $wikilinks = []; foreach ($checkagainstLanguage as $lang) { if (empty($data['labels'][$lang])) { continue; } if (!empty($data['sitelinks'][$lang . 'wiki'])) { $wikilink = $data['sitelinks'][$lang . 'wiki']['url']; $wikilinkterm = str_replace(' ', '_', $data['sitelinks'][$lang . 'wiki']['title']); if (isset($wikilink)) { $languagesToFetch[$lang] = "https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm) . "&prop=text§ion=0&format=json"; $wikilinks[$lang] = $wikilink; } } } return [$languagesToFetch, $wikilinks]; } /** * Cleans contents parsed from Wikipedia. * * @param string $input Input string. * * @return string */ private static function _cleanWikidataInput(string $input):string { if (substr($input, 0, strlen('<')) === '<') { $doc = new DOMDocument(); $doc->loadXML($input); $list = $doc->getElementsByTagName("style"); while ($list->length > 0) { $p = $list->item(0); $p->parentNode->removeChild($p); } $list = $doc->getElementsByTagName("table"); while ($list->length > 0) { $p = $list->item(0); $p->parentNode->removeChild($p); } $list = $doc->getElementsByTagName("div"); while ($list->length > 1) { $p = $list->item(1); $p->parentNode->removeChild($p); } $list = $doc->getElementsByTagName("ol"); while ($list->length > 0) { $p = $list->item(0); $p->parentNode->removeChild($p); } $firstP = $doc->getElementsByTagName("p")->item(0); if (strpos($doc->saveHTML($firstP), 'geohack') !== false) { $firstP->parentNode->removeChild($firstP); } /* if (strpos($doc->saveHTML(), 'Coordinates:') !== false) { echo $doc->saveHTML(); exit; } */ $input = str_replace(PHP_EOL, PHP_EOL . PHP_EOL, trim($doc->textContent)); if (mb_strlen($input) > 600) { if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) { $input = substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600)); } } $bracketsToRemove = []; for ($i = 0; $i < 100; $i++) { $bracketsToRemove["[$i]"] = ""; } $input = strtr($input, $bracketsToRemove); $input = str_replace("\t", " ", $input); // Remove newlines with ensuing spaces while (strpos($input, PHP_EOL . " ") !== false) { $input = str_replace(PHP_EOL . " ", PHP_EOL, $input); } // Remove double newlines while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) { $input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input); } return $input; } $input = str_replace(PHP_EOL, '', $input); foreach (self::WIKIPEDIA_REMOVE_LITERALS as $tToRemove) $input = str_replace($tToRemove, "", $input); $first_mention_of_paragraph = strpos($input, '

'); if ($first_mention_of_paragraph !== false) $input = substr($input, $first_mention_of_paragraph, (strrpos($input, '

') ?: strlen($input)) - $first_mention_of_paragraph); // Remove infobox tables specifically $removeFirstParagraph = false; if (empty($input)) return ""; $firstParagraphPosition = strpos($input, '"); if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) { if (($tableEndPos = strpos($input, "")) !== false) { if (($pStartPos = strpos($input, '", "', '

' . PHP_EOL . PHP_EOL . PHP_EOL, $input); # $input = str_replace('?/i', '', $input); $input = strip_tags($input); # for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input); $i = 0; while (strpos($input, ".mw-parser-output") !== false and strpos($input, "}", strpos($input, ".mw-parser-output")) !== false) { $part1 = substr($input, 0, strpos($input, ".mw-parser-output")); $part2 = substr($input, strpos($input, "}", strpos($input, ".mw-parser-output")) + 1); $input = $part1 . $part2; ++$i; if ($i === 30) break; } $bracketsToRemove = []; for ($i = 0; $i < 100; $i++) { $bracketsToRemove["[$i]"] = ""; } $input = strtr($input, $bracketsToRemove); $input = str_replace("\t", " ", $input); // Remove double whitespaces while (strpos($input, " ") !== false) { $input = str_replace(" ", " ", $input); } // Remove newlines with ensuing spaces while (strpos($input, PHP_EOL . " ") !== false) { $input = str_replace(PHP_EOL . " ", PHP_EOL, $input); } // Remove double newlines while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) { $input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input); } $stableToRemove = [ "Vous pouvez partager vos connaissances en l’améliorant (comment ?) selon les recommandations des projets correspondants.", ]; foreach ($stableToRemove as $tToRemove) $input = str_replace($tToRemove, "", $input); $endings = [ "StubDenne artikel om et vandløb ", ]; foreach ($endings as $ending) { if (strpos($input, $ending) !== false) $input = substr($input, 0, strpos($input, $ending)); } $input = trim($input); // Cut off overly long articles if (mb_strlen($input) > 600) { if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) { $input = trim(substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600))); } } if (empty($input)) return ''; $input = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $input)); $input = html_entity_decode($input); return $input; } /** * Function for fetching description from Wikipedia * * @param integer $persinst_id Person ID. * @param string $wikidata_id Wikidata ID. * @param string $datafromwiki Data fetched from Wikipedia. * @param string $wikilink Link to wikipedia entry. * @param string $preflang The user's currently used language. * @param string $lang Currently queried language. * @param string $erfasst_von User who adds the info. * * @return boolean */ public function retrievePersinstDescFromWikipedia(int $persinst_id, string $wikidata_id, string $datafromwiki, string $wikilink, string $preflang, string $lang, string $erfasst_von):bool { $output = false; $datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date('d.m.Y') . ')'; $cergebnis = $this->_mysqli_noda->query_by_stmt("SELECT `persinst_kurzinfo`, `persinst_anzeigename` AS `display_name` FROM `persinst` WHERE `persinst_id` = ?", "i", $persinst_id); // Update persinst table $updatePersinstStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst` SET `persinst_kurzinfo` = ? WHERE `persinst_id` = ?"); if ($cergebnis->num_rows === 0) { throw new Exception("There is no actor of ID #" . $persinst_id); } $cinfo = $cergebnis->fetch_assoc(); if (!empty($cinfo['persinst_kurzinfo']) and substr($cinfo['persinst_kurzinfo'], 0, 3) !== 'GND') { if (isset($_GET['keep'])) { if (!$_GET['keep'] || $_GET['keep'] === 'replace') { $updatePersinstStmt->bind_param("si", $datafromwiki, $persinst_id); $updatePersinstStmt->execute(); } else if ($_GET['keep'] === 'add') { $newDesc = $cinfo['persinst_kurzinfo'] . PHP_EOL . PHP_EOL . $datafromwiki; $updatePersinstStmt->bind_param("si", $newDesc, $persinst_id); $updatePersinstStmt->execute(); } $output = true; } else { $tlLoader = new MDTlLoader("wiki_getter_persinst", $preflang); echo self::generateHTMLHeadForWikidataFetcher($lang); echo self::generateWikidataFetcherHeader($tlLoader, "", $cinfo['display_name']); echo '

Es gibt schon einen Eintrag im Beschreibungsfeld

Bisher vorhanden

' . nl2br($cinfo['persinst_kurzinfo']) . '

Jetzt gefunden

' . $datafromwiki . '

Keep old entry'; echo '
Replace with new entry'; echo '
Keep old and add new entry


'; exit; } } else { $updatePersinstStmt->bind_param("si", $datafromwiki, $persinst_id); $updatePersinstStmt->execute(); $output = true; } $cergebnis->close(); $updatePersinstStmt->close(); // Set link to Wikipedia in noda table $insertNodaStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda` (`persinst_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`) VALUES (?, 'Wikipedia', '', ?, ?) ON DUPLICATE KEY UPDATE `noda_link` = ?"); $insertNodaStmt->bind_param("isss", $persinst_id, $wikilink, $erfasst_von, $wikilink); $insertNodaStmt->execute(); $insertNodaStmt->close(); // Update edit metadata $updatePersinstEditInfoStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst` SET `persinst_erfasst_am` = NOW(), `persinst_erfasst_von` = ? WHERE `persinst_id` = ?"); $updatePersinstEditInfoStmt->bind_param("si", $erfasst_von, $persinst_id); $updatePersinstEditInfoStmt->execute(); $updatePersinstEditInfoStmt->close(); $updatePersinstEditInfoStmt = null; return $output; } /** * Function for updating birth and death times based on Wikidata information. * * @param array $data Data loaded from Wikidata. * @param integer $persinst_id Actor ID. * * @return void */ public function enterPersinstBirthDeathDatesFromWikidata(array $data, int $persinst_id):void { $result = $this->_mysqli_noda->query_by_stmt("SELECT `persinst_geburtsjahr`, `persinst_sterbejahr`, `persinst_gender` FROM `persinst` WHERE `persinst_id` = ?", "i", $persinst_id); if (!($actor_dates = $result->fetch_assoc())) { throw new MDmainEntityNotExistentException("Failed to fetch actor information"); } $result->close(); $result = null; if ($actor_dates['persinst_geburtsjahr'] === '') { // Try to get birth date if (!empty($data['claims']['P569']) and !empty($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time'])) { $birth_date = self::wikidataBirthDeathToYear($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time']); } if (!empty($birth_date)) { $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst` SET `persinst_geburtsjahr` = ? WHERE `persinst_id` = ? LIMIT 1"); $updateStmt->bind_param("ii", $birth_date, $persinst_id); $updateStmt->execute(); $updateStmt->close(); $updateStmt = null; } } if ($actor_dates['persinst_sterbejahr'] === '') { // Try to get birth date if (!empty($data['claims']['P570']) and !empty($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time'])) { $death_date = self::wikidataBirthDeathToYear($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time']); } if (!empty($death_date)) { $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst` SET `persinst_sterbejahr` = ? WHERE `persinst_id` = ? LIMIT 1"); $updateStmt->bind_param("ii", $death_date, $persinst_id); $updateStmt->execute(); $updateStmt->close(); $updateStmt = null; } } if ($actor_dates['persinst_gender'] === '') { // Try to get birth date if (!empty($data['claims']['P21']) and !empty($data['claims']['P21']['0']['mainsnak']['datavalue']['value']['id'])) { $wikidata_gender_id = $data['claims']['P21']['0']['mainsnak']['datavalue']['value']['id']; switch ($wikidata_gender_id) { case "Q6581097": // male case "Q44148": //male organism $wikidata_gender = "male"; break; case "Q6581072": case "Q1052281": // transgender female case "Q43445": // female organism $wikidata_gender = "female"; break; case "Q48270": $wikidata_gender = "other"; break; default: throw new Exception("Unknown gender: Q-ID is " . $wikidata_gender_id); } } if (!empty($wikidata_gender)) { $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst` SET `persinst_gender` = ? WHERE `persinst_id` = ? LIMIT 1"); $updateStmt->bind_param("si", $wikidata_gender, $persinst_id); $updateStmt->execute(); $updateStmt->close(); $updateStmt = null; } } } /** * Function for retrieving information. * * @param string $lang The user's selected used language. * @param string $wikidata_id Wikidata ID. * @param integer $persinst_id Actor ID. * @param string $erfasst_von User name who's currently editing. * * @return void */ public function retrievePersinstInfoFromWikidataID(string $lang, string $wikidata_id, int $persinst_id, string $erfasst_von) { $data = json_decode(MD_STD::runCurl("https://www.wikidata.org/wiki/Special:EntityData/" . $wikidata_id . ".json", 10000), true); if ($data === null) { throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later."); } $data = $data['entities'][$wikidata_id]; // Get links to wikipedia $wikilink = $wikilinkterm = []; foreach (self::LANGUAGES_MAIN_DESC as $tLang) { if (isset($data['sitelinks'][$tLang . 'wiki']['url'])) $wikilink[$tLang] = $data['sitelinks'][$tLang . 'wiki']['url']; if (isset($data['sitelinks'][$tLang . 'wiki']['title'])) $wikilinkterm[$tLang] = str_replace(' ', '_', $data['sitelinks'][$tLang . 'wiki']['title']); } $alreadyEntered = false; if (isset($wikilink[$lang]) and isset($wikilinkterm[$lang]) and is_string($wikilinkterm[$lang])) { $datafromwiki = MD_STD::runCurl("https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text§ion=0&format=json", 10000); $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; # Process data retrieved from wikipedia if (!empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) { $alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $wikilink[$lang], $lang, $lang, $erfasst_von); } } foreach (self::LANGUAGES_MAIN_DESC as $sprache) { if ($alreadyEntered === true) break; if (!isset($wikilink[$sprache]) || !isset($wikilinkterm[$sprache]) || !is_string($wikilinkterm[$sprache])) continue; $datafromwiki = MD_STD::runCurl("https://" . $sprache . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode((string)$wikilinkterm[$sprache]) . "&prop=text§ion=0&format=json", 10000); $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; # Process data retrieved from wikipedia if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) { $alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $wikilink[$sprache], $lang, "$sprache", $erfasst_von); } } $this->enterPersinstBirthDeathDatesFromWikidata($data, $persinst_id); // Get links to other norm data sources $nodaLinks = [ "wikidata" => $wikidata_id, ]; foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) { if ($vocabName === 'lcsh') continue; if (isset($data['claims'][$pId])) $nodaLinks[$vocabName] = $data['claims'][$pId][0]['mainsnak']['datavalue']['value']; } // GET links to other noda entries. $insertNodaLinkStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda` (`persinst_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`) VALUES (?, ?, ?, ?, ?) ON DUPLICATE KEY UPDATE `noda_nrinsource` = ?, `noda_link` = ?, `noda_erfasst_von` = ?"); $this->_mysqli_noda->autocommit(false); foreach ($nodaLinks as $noda_source => $nodaId) { if ($nodaId === null) continue; if (!isset(self::URL_PREFIXES_PLACES_NODA_SOURCE[$noda_source])) { throw new Exception("Unknown noda link: " . $noda_source); } $noda_link_url = self::URL_PREFIXES_PLACES_NODA_SOURCE[$noda_source] . $nodaId; $insertNodaLinkStmt->bind_param("isssssss", $persinst_id, $noda_source, $nodaId, $noda_link_url, $erfasst_von, $nodaId, $noda_link_url, $noda_link_url); $insertNodaLinkStmt->execute(); } $this->_mysqli_noda->commit(); $this->_mysqli_noda->autocommit(true); $insertNodaLinkStmt->close(); $this->getWikidataTranslationsForPersinst($data, $persinst_id); NodaLogEdit::logPersinstEdit($this->_mysqli_noda, $persinst_id, "wikidata-fetcher", $erfasst_von, 'update', 'synchronize'); } /** * Function for fetching translations from Wikipedia, based on Wikidata information. * * @param array $data Entity fetched from wikidata. * @param integer $persinst_id Actor ID. * * @return void */ public function getWikidataTranslationsForPersinst(array $data, int $persinst_id):void { $checkagainstLanguage = self::LANGUAGES_TO_CHECK; list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data); if (empty($languagesToFetch)) { return; } try { $contents = MD_STD::runCurlMulti($languagesToFetch, 10000); } catch (TypeError $e) { throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again."); } $insertStmt = $this->_mysqli_noda->do_prepare("CALL nodaInsertPersinstTranslation(?, ?, ?, ?, ?)"); $this->_mysqli_noda->autocommit(false); foreach ($checkagainstLanguage as $lang) { if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki'])) { $url = $languagesToFetch[$lang]; $wikilink = $wikilinks[$lang]; if (!empty($contents[$lang])) { $descFromWiki = $contents[$lang]; $descFromWiki = json_decode($descFromWiki, true)['parse']['text']['*']; # Process data retrieved from wikipedia if ($descFromWiki !== null) $tDescription = self::_cleanWikidataInput((string)$descFromWiki); else $tDescription = ""; if (substr($tDescription, -1) == chr(10)) $tDescription = substr($tDescription, 0, strlen($tDescription) - 1); $tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')'; // Inhalt erster Absatz jeweilige Wikipedia: ' . $tDescription // dies enthält den ersten Absatz der jeweiligen Wikipedia } else { $tDescription = ""; } $tLang = self::_cleanWikidataInput((string)$data['labels'][$lang]['language']); $tLabel = self::_cleanWikidataInput((string)$data['labels'][$lang]['value']); try { $insertStmt->bind_param("issss", $persinst_id, $tLang, $tLabel, $tDescription, $wikilink); $insertStmt->execute(); } catch (MDMysqliInvalidEncodingError $e) { } } // echo '
Wikipedia Links fehlen'; else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) { $wikilink = ""; $insertStmt->bind_param("issss", $persinst_id, $data['labels'][$lang]['language'], $data['labels'][$lang]['value'], $data['descriptions'][$lang]['value'], $wikilink); $insertStmt->execute(); } } $this->_mysqli_noda->commit(); $this->_mysqli_noda->autocommit(true); $insertStmt->close(); unset($insertStmt); } /** * Function for entering base information about a place from wikidata. * * @param mysqli_result $currentPlaceResult Mysqli result pointing to the current place. * @param string $datafromwiki Data parsed from wikidata. * @param array $wikilink Wikilink. * @param string $preflang Language of the user interface in general. * @param string $lang Language of the main entry. * @param integer $placeID ID of the place. * @param string $erfasst_von User name. * * @return boolean */ public function enterPlaceDescFromWikidata(mysqli_result $currentPlaceResult, string $datafromwiki, array $wikilink, string $preflang, string $lang, int $placeID, string $erfasst_von) { $datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date("d.m.Y") . ')'; if (!($curPlaceInfo = $currentPlaceResult->fetch_assoc())) return false; if (!empty(trim($curPlaceInfo['ort_anmerkung'])) and substr($curPlaceInfo['ort_anmerkung'], 0, 3) !== 'GND') { if (isset($_GET['keep'])) { if ($_GET['keep'] === 'add') { $datafromwiki = $curPlaceInfo['ort_anmerkung'] . PHP_EOL . PHP_EOL . $datafromwiki; } else if ($_GET['keep'] === 'keep') { $datafromwiki = $curPlaceInfo['ort_anmerkung']; } } else { $tlLoader = new MDTlLoader("wiki_getter_place", $preflang); echo self::generateHTMLHeadForWikidataFetcher($lang); echo self::generateWikidataFetcherHeader($tlLoader); echo '

There is already an entry for description ...

Actual entry

' . nl2br($curPlaceInfo['ort_anmerkung']) . '

Now found

' . $datafromwiki . '

Keep old entry
Replace with new entry
Keep old and add new entry


'; exit; } } // Write description to DB $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte` SET `ort_anmerkung` = ?, `ort_erfasst_am` = NOW(), `ort_erfasst_von` = ? WHERE ort_id = ?"); try { $updateStmt->bind_param("ssi", $datafromwiki, $erfasst_von, $placeID); $updateStmt->execute(); } catch (MDMysqliInvalidEncodingError $e) { $_SESSION["editHistory"] = ["changesStored", "Error adding base description"]; } $updateStmt->close(); unset($updateStmt); // Write link to wikipedia to relevant noda DB table $wikiAlreadyResult = $this->_mysqli_noda->query_by_stmt("SELECT `noda_orte`.`noda_id` FROM `noda_orte` WHERE `noda_orte`.`ort_id` = ? AND `noda_orte`.`noda_source` = 'Wikipedia'", "i", $placeID); switch ($wikiAlreadyResult->num_rows) { case 0: $insertWikiStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda_orte` (`ort_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_am`, `noda_erfasst_von`) VALUES (?, 'Wikipedia', '', ?, NOW(), ?)"); $insertWikiStmt->bind_param("iss", $placeID, $wikilink[$lang], $erfasst_von); $insertWikiStmt->execute(); $insertWikiStmt->close(); unset($insertWikiStmt); break; case 1: if ($wikiAlreadyData = $wikiAlreadyResult->fetch_assoc()) { $wikischon_id = $wikiAlreadyData['noda_id']; $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `noda_orte` SET `noda_link` = ? WHERE `noda_id` = ?"); $updateStmt->bind_param("si", $wikilink[$lang], $wikischon_id); $updateStmt->execute(); $updateStmt->close(); unset($updateStmt); } break; } $wikiAlreadyResult->close(); unset($wikiAlreadyResult); return true; } /** * Function for retrieving a superordinate place relation from Wikidata information * for places * * @param integer $onum Place ID. * @param array $data Wikidata information (P131 claim). * * @return void */ public function retrieveSuperordinateAdministrativePlace(int $onum, array $data):void { if (!empty($data[0]["mainsnak"]["datavalue"]["value"]["id"])) { // Check if there already is a superordinate of the current place $result = $this->_mysqli_noda->query_by_stmt("SELECT 1 FROM `ort_relation` WHERE `ort_menor_id` = ? LIMIT 1", "i", $onum); if ($result->num_rows !== 0) { $result->close(); $result = null; return; } $result->close(); $result = null; // If there is no superordinate, check if the identified superordinate // is known in the noda DB. $superordinateId = $data[0]["mainsnak"]["datavalue"]["value"]["id"]; $result = $this->_mysqli_noda->query_by_stmt("SELECT `ort_id` FROM `noda_orte` WHERE `noda_source` = 'wikidata' AND `noda_nrinsource` = ?", "s", $superordinateId); if (!($superordinateData = $result->fetch_row())) { $result->close(); $result = null; return; } $result->close(); $result = null; $topPlaceId = $superordinateData[0]; // Enter superordinate place by Wikidata $insertStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `ort_relation` (`ort_mayor_id`, `ort_menor_id`, `ort_relation`) VALUES (?, ?, 1)"); $insertStmt->bind_param("ii", $topPlaceId, $onum); $insertStmt->execute(); $insertStmt->close(); $insertStmt = null; } } /** * Function for retrieving place information based on a Wikidata ID. * * @param string $lang Language. * @param string $wikidata_id Wikidata Q-ID. * @param integer $onum Place ID. * @param string $erfasst_von User name of the current user. * * @return void */ public function retrievePlaceInfoFromWikidataID(string $lang, string $wikidata_id, int $onum, string $erfasst_von) { $data = MD_STD::runCurl("https://www.wikidata.org/wiki/Special:EntityData/" . urlencode($wikidata_id) . ".json", 10000); if (!$data = json_decode($data, true)) { throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later."); } $data = $data['entities'][$wikidata_id]; $wikilink = $wikilinkterm = []; foreach (self::LANGUAGES_MAIN_DESC as $tLang) { if (isset($data['sitelinks'][$tLang . 'wiki']['url'])) $wikilink[$tLang] = $data['sitelinks'][$tLang . 'wiki']['url']; if (isset($data['sitelinks'][$tLang . 'wiki']['title'])) $wikilinkterm[$tLang] = str_replace(' ', '_', $data['sitelinks'][$tLang . 'wiki']['title']); } $currentPlaceResult = $this->_mysqli_noda->query_by_stmt("SELECT `ort_anmerkung` FROM `orte` WHERE `ort_id` = ?", "i", $onum); $alreadyEntered = false; // P131: Located in administrative unit if (isset($data['claims']['P131'])) { $this->retrieveSuperordinateAdministrativePlace($onum, $data['claims']['P131']); } if (!empty($wikilink[$lang])) { $datafromwiki = MD_STD::runCurl("https://" . urlencode($lang) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text§ion=0&format=json", 10000); $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; if (!empty($datafromwiki) and $datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) { $alreadyEntered = $this->enterPlaceDescFromWikidata($currentPlaceResult, $datafromwiki, $wikilink, $lang, $lang, $onum, $erfasst_von); } } foreach (self::LANGUAGES_MAIN_DESC as $sprache) { //if ($alreadyEntered === true) break; if ($alreadyEntered === true) break; if (!isset($wikilink[$sprache])) continue; $datafromwiki = MD_STD::runCurl("https://" . urlencode($sprache) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$sprache]) . "&prop=text§ion=0&format=json", 10000); $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; if (!empty($datafromwiki) and $datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) { $alreadyEntered = $this->enterPlaceDescFromWikidata($currentPlaceResult, $datafromwiki, $wikilink, $lang, $sprache, $onum, $erfasst_von); } } $currentPlaceResult->close(); unset($currentPlaceResult); if (isset($data['claims']['P1566'])) $geonames_id = $data['claims']['P1566'][0]['mainsnak']['datavalue']['value']; if (isset($data['claims']['P1667'])) $tgn_id = $data['claims']['P1667'][0]['mainsnak']['datavalue']['value']; $nodaLinks = [ "wikidata" => $wikidata_id, ]; foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) { if ($vocabName === 'lcsh') continue; if (isset($data['claims'][$pId])) $nodaLinks[$vocabName] = $data['claims'][$pId][0]['mainsnak']['datavalue']['value']; } if (isset($data['claims']['P625'])) { $latitude_wd = $data['claims']['P625'][0]['mainsnak']['datavalue']['value']['latitude']; $longitude_wd = $data['claims']['P625'][0]['mainsnak']['datavalue']['value']['longitude']; } $this->_mysqli_noda->autocommit(false); $insertNodaLinkStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda_orte` (`ort_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`) VALUES (?, ?, ?, ?, ?) ON DUPLICATE KEY UPDATE `noda_nrinsource` = ?, `noda_link` = ?"); foreach ($nodaLinks as $noda_source => $nodaId) { if (!isset(self::URL_PREFIXES_PLACES_NODA_SOURCE[$noda_source])) { throw new Exception("Unknown noda link: " . $noda_source); } $noda_link_url = self::URL_PREFIXES_PLACES_NODA_SOURCE[$noda_source] . $nodaId; $insertNodaLinkStmt->bind_param("issssss", $onum, $noda_source, $nodaId, $noda_link_url, $erfasst_von, $nodaId, $noda_link_url); $insertNodaLinkStmt->execute(); } $insertNodaLinkStmt->close(); unset($insertNodaLinkStmt); if (!empty($tgn_id)) { $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte` SET `ort_land` = ? WHERE `ort_id` = ?"); $updateStmt->bind_param("si", $tgn_id, $onum); $updateStmt->execute(); $updateStmt->close(); unset($updateStmt); } if (!empty($geonames_id)) { $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte` SET `ort_geonames` = ? WHERE `ort_id` = ?"); $updateStmt->bind_param("si", $geonames_id, $onum); $updateStmt->execute(); $updateStmt->close(); unset($updateStmt); } if (!empty($latitude_wd) and !empty($longitude_wd)) { $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte` SET `ort_nord_sued` = ?, `ort_west_ost` = ?, ort_zoom = '9' WHERE `ort_id` = ?"); $updateStmt->bind_param("ddi", $latitude_wd, $longitude_wd, $onum); $updateStmt->execute(); $updateStmt->close(); unset($updateStmt); } $this->_mysqli_noda->commit(); $this->_mysqli_noda->autocommit(true); $this->getWikidataTranslationsForPlace($data, $onum); NodaLogEdit::logPlaceEdit($this->_mysqli_noda, $onum, "wikidata-fetcher", $erfasst_von, 'update', 'synchronize'); } /** * Function for fetching translations from wikidata. * * @param array $data Entity data fetched from wikidata. * @param integer $ort_id Place ID. * * @return void */ public function getWikidataTranslationsForPlace(array $data, int $ort_id) { $checkagainstLanguage = self::LANGUAGES_TO_CHECK; list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data); if (empty($languagesToFetch)) { return; } try { $contents = MD_STD::runCurlMulti($languagesToFetch, 10000); } catch (TypeError $e) { throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again."); } $insertStmt = $this->_mysqli_noda->do_prepare("CALL `nodaInsertOrtTranslation`(?, ?, ?, ?, ?)"); $this->_mysqli_noda->autocommit(false); foreach ($checkagainstLanguage as $lang) { if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki'])) { $url = $languagesToFetch[$lang]; $wikilink = $wikilinks[$lang]; if (!empty($contents[$lang])) { $descFromWiki = $contents[$lang]; if (!($wikiDataDecoded = json_decode($descFromWiki, true))) { continue; } $tLabel = $wikiDataDecoded['parse']['title']; $descFromWiki = $wikiDataDecoded['parse']['text']['*']; # Process data retrieved from wikipedia if (empty($descFromWiki)) $tDescription = ""; else { $tDescription = self::_cleanWikidataInput((string)$descFromWiki); if (substr($tDescription, -1) == chr(10)) $tDescription = substr($tDescription, 0, strlen($tDescription) - 1); $tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')'; $tDescription = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $tDescription)); // echo '
Inhalt erster Absatz jeweilige Wikipedia: ' . $tDescription; // dies enthält den ersten Absatz der jeweiligen Wikipedia } } else { $tDescription = ""; } $tLang = self::_cleanWikidataInput((string)$data['labels'][$lang]['language']); if (empty($tLabel)) $tLabel = self::_cleanWikidataInput((string)$data['labels'][$lang]['value']); try { $insertStmt->bind_param("issss", $ort_id, $tLang, $tLabel, $tDescription, $wikilink); $insertStmt->execute(); } catch (MDMysqliInvalidEncodingError $e) { $_SESSION["editHistory"] = ["changesStored", "Error adding translation for language $tLang"]; } } else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) { $wikilink = ""; $insertStmt->bind_param("issss", $ort_id, $data['labels'][$lang]['language'], $data['labels'][$lang]['value'], $data['descriptions'][$lang]['value'], $wikilink); $insertStmt->execute(); } } $this->_mysqli_noda->commit(); $this->_mysqli_noda->autocommit(true); $insertStmt->close(); unset($insertStmt); } /** * Function for fetching description from Wikipedia * * @param integer $tag_id Tag ID. * @param string $datafromwiki Data fetched from Wikipedia. * @param string $wikilink Link to wikipedia entry. * @param string $preflang The user's currently used language. * @param string $lang Currently queried language. * @param string $erfasst_von User who adds the info. * * @return boolean */ public function retrieveTagDescFromWikipedia(int $tag_id, string $datafromwiki, string $wikilink, string $preflang, string $lang, string $erfasst_von):bool { $output = false; $datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date("d.m.Y") . ')'; $datafromwiki = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $datafromwiki)); $cergebnis = $this->_mysqli_noda->query_by_stmt("SELECT `tag_anmerkung` FROM `tag` WHERE `tag_id` = ?", "i", $tag_id); if (!($cinfo = $cergebnis->fetch_assoc())) { $cergebnis->close(); $cergebnis = null; return $output; } $cergebnis->close(); $cergebnis = null; $this->_mysqli_noda->autocommit(false); $updateTagDescStmt = $this->_mysqli_noda->do_prepare("UPDATE `tag` SET `tag_anmerkung` = ? WHERE `tag_id` = ?"); if (!empty($cinfo['tag_anmerkung']) and substr($cinfo['tag_anmerkung'], 0, 3) !== 'GND') { if (isset($_GET['keep'])) { if (!$_GET['keep'] || $_GET['keep'] === 'replace') { $updateTagDescStmt->bind_param("si", $datafromwiki, $tag_id); $updateTagDescStmt->execute(); } else if ($_GET['keep'] === 'add') { $newDesc = $cinfo['tag_anmerkung'] . PHP_EOL . PHP_EOL . $datafromwiki; $updateTagDescStmt->bind_param("si", $newDesc, $tag_id); $updateTagDescStmt->execute(); } $output = true; } else { $tlLoader = new MDTlLoader("wiki_getter_tag", $preflang); echo self::generateHTMLHeadForWikidataFetcher($lang); echo self::generateWikidataFetcherHeader($tlLoader); echo '

Es gibt schon einen Eintrag im Beschreibungsfeld

Bisher vorhanden

' . nl2br($cinfo['tag_anmerkung']) . '

Jetzt gefunden

' . $datafromwiki . '

Keep old entry'; echo '
Replace with new entry'; echo '
Keep old and add new entry


'; exit; } } else { $updateTagDescStmt->bind_param("si", $datafromwiki, $tag_id); $updateTagDescStmt->execute(); } $updateTagDescStmt->close(); $updateTagDescStmt = null; $insertNodaTagStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda_tag` (`tag_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`) VALUES (?, 'Wikipedia', '', ?, ?) ON DUPLICATE KEY UPDATE `noda_link` = ?"); $insertNodaTagStmt->bind_param("isss", $tag_id, $wikilink, $erfasst_von, $wikilink); $insertNodaTagStmt->execute(); $insertNodaTagStmt->close(); $output = true; // Update tag editing metadata $updateTagEditInfoStmt = $this->_mysqli_noda->do_prepare("UPDATE `tag` SET `tag_erfasst_am` = NOW(), `tag_erfasst_von` = ? WHERE `tag_id` = ?"); $updateTagEditInfoStmt->bind_param("si", $erfasst_von, $tag_id); $updateTagEditInfoStmt->execute(); $updateTagEditInfoStmt->close(); $updateTagEditInfoStmt = null; $this->_mysqli_noda->commit(); $this->_mysqli_noda->autocommit(true); return $output; } /** * Writes relations to norm data sources to DB. * * @param array $nodaLinks Links to other noda sources. * @param integer $tag_id Tag ID. * @param string $erfasst_von Name of the user to edit this. * * @return void */ public function writeNodaLinksTag(array $nodaLinks, int $tag_id, string $erfasst_von):void { $this->_mysqli_noda->autocommit(false); $insertNodaTagStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda_tag` (`tag_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`) VALUES (?, ?, ?, ?, ?) ON DUPLICATE KEY UPDATE `noda_nrinsource` = ?, `noda_link` = ?, `noda_erfasst_von` = ?"); foreach ($nodaLinks as $vocabName => $nodaId) { if (empty(self::URL_PREFIXES_PLACES_NODA_SOURCE[$vocabName])) { throw new Exception("Unknown URL prefix for: " . $vocabName); } if (empty($nodaId)) continue; $noda_link = self::URL_PREFIXES_PLACES_NODA_SOURCE[$vocabName] . $nodaId; $insertNodaTagStmt->bind_param("isssssss", $tag_id, $vocabName, $nodaId, $noda_link, $erfasst_von, $nodaId, $noda_link, $erfasst_von); $insertNodaTagStmt->execute(); } $insertNodaTagStmt->close(); $insertNodaTagStmt = null; $this->_mysqli_noda->commit(); $this->_mysqli_noda->autocommit(true); } /** * Function for retrieving information. * * @param string $lang The user's selected used language. * @param string $wikidata_id Wikidata ID. * @param integer $tag_id Tag ID. * @param string $erfasst_von User name who's currently editing. * * @return void */ public function retrieveTagInfoFromWikidataID(string $lang, string $wikidata_id, int $tag_id, string $erfasst_von) { $data = MD_STD::runCurl("https://www.wikidata.org/wiki/Special:EntityData/" . $wikidata_id . ".json", 10000); $data = json_decode($data, true); if ($data === null) { throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later."); } $data = $data['entities'][$wikidata_id]; $wikilink = $wikilinkterm = []; foreach (self::LANGUAGES_MAIN_DESC as $tLang) { if (isset($data['sitelinks'][$tLang . 'wiki']['url'])) $wikilink[$tLang] = $data['sitelinks'][$tLang . 'wiki']['url']; if (isset($data['sitelinks'][$tLang . 'wiki']['title'])) $wikilinkterm[$tLang] = str_replace(' ', '_', $data['sitelinks'][$tLang . 'wiki']['title']); } $alreadyEntered = false; if (isset($wikilink[$lang]) and isset($wikilinkterm[$lang]) and is_string($wikilinkterm[$lang])) { $datafromwiki = MD_STD::runCurl("https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text§ion=0&format=json", 10000); $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; # Process data retrieved from wikipedia if (!empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) { $alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $wikilink[$lang], $lang, $lang, $erfasst_von); } } foreach (self::LANGUAGES_MAIN_DESC as $sprache) { if ($alreadyEntered === true) break; if (!isset($wikilink[$sprache]) || !isset($wikilinkterm[$sprache]) || !is_string($wikilinkterm[$sprache])) continue; $datafromwiki = MD_STD::runCurl("https://" . $sprache . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode((string)$wikilinkterm[$sprache]) . "&prop=text§ion=0&format=json", 10000); $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; # Process data retrieved from wikipedia if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) { $alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $wikilink[$sprache], $lang, "$sprache", $erfasst_von); } } $nodaLinks = [ "wikidata" => $wikidata_id, ]; foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) { if ($vocabName === 'loc') continue; if (isset($data['claims'][$pId])) $nodaLinks[$vocabName] = $data['claims'][$pId][0]['mainsnak']['datavalue']['value']; } $this->writeNodaLinksTag($nodaLinks, $tag_id, $erfasst_von); // Get translations if (!empty($data)) $this->getWikidataTranslationsForTag($data, $tag_id); NodaLogEdit::logTagEdit($this->_mysqli_noda, $tag_id, "wikidata-fetcher", $erfasst_von, 'update', 'synchronize'); } /** * Function for fetching translations from wikidata. * * @param array $data Entity data fetched from wikidata. * @param integer $tag_id Tag ID. * * @return void */ public function getWikidataTranslationsForTag(array $data, int $tag_id) { $checkagainstLanguage = self::LANGUAGES_TO_CHECK; list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data); if (empty($languagesToFetch)) { return; } try { $contents = MD_STD::runCurlMulti($languagesToFetch, 10000); } catch (TypeError $e) { throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again."); } $insertStmt = $this->_mysqli_noda->do_prepare("CALL nodaInsertTagTranslation(?, ?, ?, ?, ?)"); $this->_mysqli_noda->autocommit(false); foreach ($checkagainstLanguage as $lang) { if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki'])) { $url = $languagesToFetch[$lang]; $wikilink = $wikilinks[$lang]; if (!empty($contents[$lang])) { $descFromWiki = $contents[$lang]; $descFromWiki = json_decode($descFromWiki, true)['parse']['text']['*']; if (!empty($descFromWiki)) { # Process data retrieved from wikipedia $tDescription = self::_cleanWikidataInput((string)$descFromWiki); if (substr($tDescription, -1) == chr(10)) { $tDescription = substr($tDescription, 0, strlen($tDescription) - 1); } $tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')'; $tDescription = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $tDescription)); } else { $tDescription = ""; } } else { $tDescription = ""; } $tLang = self::_cleanWikidataInput((string)$data['labels'][$lang]['language']); $tLabel = self::_cleanWikidataInput((string)$data['labels'][$lang]['value']); if (in_array($tLang, self::LANGUAGES_TO_CAPITALIZE, true)) { $tLabel = ucfirst(trim($tLabel)); $tDescription = ucfirst(trim($tDescription)); } try { $insertStmt->bind_param("issss", $tag_id, $tLang, $tLabel, $tDescription, $wikilink); $insertStmt->execute(); } catch (MDMysqliInvalidEncodingError $e) { } } else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) { $wikilink = ""; if (in_array($lang, self::LANGUAGES_TO_CAPITALIZE, true)) { $data['labels'][$lang]['value'] = ucfirst(trim($data['labels'][$lang]['value'])); $data['descriptions'][$lang]['value'] = ucfirst(trim($data['descriptions'][$lang]['value'])); } $insertStmt->bind_param("issss", $tag_id, $data['labels'][$lang]['language'], $data['labels'][$lang]['value'], $data['descriptions'][$lang]['value'], $wikilink); $insertStmt->execute(); } } $this->_mysqli_noda->commit(); $this->_mysqli_noda->autocommit(true); $insertStmt->close(); unset($insertStmt); } /** * Searches Wikidata for a string. * * @param string $searchTerm Search string. * @param string $lang Searched language. Defaults to German. * * @return array */ public static function searchWikidataForString(string $searchTerm, string $lang = "de"):array { $wikidata_data = MD_STD::runCurl("https://www.wikidata.org/w/api.php?action=wbsearchentities&format=json&search=" . urlencode($searchTerm) . "&language=" . urlencode($lang) . "&limit=20", 10000); if (($wikidata_data = json_decode($wikidata_data, true)) === false) { return []; } if (empty($wikidata_data['search'])) { return []; } $output = []; foreach ($wikidata_data['search'] as $result) { if (empty($result['label']) or (!empty($result['description']) and $result['description'] === 'Wikipedia disambiguation page') or (!empty($result['description']) and $result['description'] === 'Wikimedia disambiguation page') ) continue; $cur = [ 'id' => $result['id'], 'label' => $result['label'], 'label_ext' => '', 'description' => '', ]; if (!empty($result['match'])) { $cur['label_ext'] = "{$result['match']['language']}: {$result['match']['text']}"; } if (!empty($result['description'])) { $cur['description'] = $result['description']; } $output[] = $cur; } return $output; } /** * Generates the HTML for an entry in the general wikidata search results list. * * @param string $link Links. * @param string $searchTerm Search term. * @param string $lang Language. * @param array $result Single result to display. * * @return string */ public static function generateWikidataResultsListEntry(string $link, string $searchTerm, string $lang, array $result):string { if ((isset($result['label']) and $result['label'] == '') or !isset($result['label']) or (isset($result['description']) and $result['description'] === 'Wikipedia disambiguation page') or (isset($result['description']) and $result['description'] === 'Wikimedia disambiguation page')) { return ''; } $output = ''; return $output; } /** * Function for generating a wikidata results list. * * @param string $link Links. * @param string $searchTerm Search term. * @param string $lang Language. * * @return string */ public static function generateWikidataResultsList(string $link, string $searchTerm, string $lang):string { if (empty($wikidata_data = self::searchWikidataForString($searchTerm))) { return '

' . ucfirst($searchTerm) . ' not found in Wikidata

'; } $output = '
'; foreach ($wikidata_data as $result) { $output .= self::generateWikidataResultsListEntry($link, $searchTerm, $lang, $result); } $output .= '
'; return $output; } /** * Attempts to parse birth or death years from the data returned by wikidata. * * @param string $inputTime Input time in the format delivered by wikidata. * * @return string */ public static function wikidataBirthDeathToYear(string $inputTime):string { $birth_date_int = strtotime(substr($inputTime, 1, 4)); if ($birth_date_int) { $birth_date = date("Y", $birth_date_int); if ($birth_date === date("Y") and $tTime = strtotime($inputTime)) { $birth_date = date("Y", $tTime); } return $birth_date; } return ''; } /** * Function for generating a wikidata results list for actors, keeping track of life dates. * * @param string $link Links. * @param string $searchTerm Search term. * @param string $lang Language. * @param integer $yearOfBirth Year of birth. * @param integer $yearOfDeath Year of death. * * @return string */ public static function generateWikidataResultsListForActors(string $link, string $searchTerm, string $lang, int $yearOfBirth, int $yearOfDeath):string { if (empty($wikidata_data = self::searchWikidataForString($searchTerm))) { return '

' . ucfirst($searchTerm) . ' not found in Wikidata

'; } $qLinksToCheck = []; foreach ($wikidata_data as $entry) { $qLinksToCheck[$entry['id']] = "https://www.wikidata.org/wiki/Special:EntityData/" . $entry['id'] . ".json"; } $fetched = MD_STD::runCurlMulti($qLinksToCheck, 10000); $yearsOfBirthList = $yearsOfDeathList = []; foreach ($fetched as $qId => $data) { if (!($jsonData = json_decode($data, true))) { continue; } if (empty($jsonData['entities'][$qId])) { continue; } $data = $jsonData['entities'][$qId]; if (!empty($data['claims']['P569']) and !empty($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time'])) { $yearsOfBirthList[$qId] = (int)self::wikidataBirthDeathToYear($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time']); } if (!empty($data['claims']['P570']) and !empty($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time'])) { $yearsOfDeathList[$qId] = (int)self::wikidataBirthDeathToYear($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time']); } } $output = '
'; foreach ($wikidata_data as $result) { if (empty($result['id'])) continue; if (!empty($yearsOfBirthList[$result['id']])) { if (empty($result['description'])) { $result['description'] = 'Born: ' . $yearsOfBirthList[$result['id']]; } else $result['description'] .= '
Born: ' . $yearsOfBirthList[$result['id']]; } if (!empty($yearsOfDeathList[$result['id']])) { if (empty($result['description'])) { $result['description'] = 'Death: ' . $yearsOfDeathList[$result['id']]; } else $result['description'] .= '
Death: ' . $yearsOfDeathList[$result['id']]; } if (!empty($yearsOfBirthList[$result['id']]) && !empty($yearsOfDeathList[$result['id']])) { if ($yearsOfBirthList[$result['id']] === $yearOfBirth && $yearsOfDeathList[$result['id']] === $yearOfDeath ) { $result['description'] .= '
Suggestion!'; } } $output .= self::generateWikidataResultsListEntry($link, $searchTerm, $lang, $result); } $output .= '
'; return $output; } /** * Function generates HTML head for wikidata fetchers. * * @param string $lang User language. * @param boolean $implyEnd If set to true, the end string will be echoed at the end of the script execution. * * @return string */ public static function generateHTMLHeadForWikidataFetcher(string $lang, bool $implyEnd = true):string { $output = " Get Wikidata "; if (defined("MAIN_CSS_FILE")) { $output .= ""; } $output .= " "; if ($implyEnd === true) { register_shutdown_function(function() :void { echo printHTMLEnd(); }); } return MD_STD::minimizeHTMLString($output); } /** * Function generate header for wikidata fetcher pages. * * @param MDTlLoader $tlLoader Translation variable. * @param string $additional Additional info. * @param string $searchTerm Search term. * * @return string */ public static function generateWikidataFetcherHeader(MDTlLoader $tlLoader, string $additional = "", string $searchTerm = ""):string { if (empty($searchTerm) and !empty($_GET['suchbegriff'])) { $searchTerm = $_GET['suchbegriff']; } $output = '

Logo: Wikidata' . $tlLoader->tl("wiki", "wiki", "fetch_from_wikidata"); $output .= ': ' . $searchTerm; $output .= '

'; $output .= $additional; $output .= '
'; return $output; } /** * Constructor. * * @param MDMysqli $mysqli_noda DB connection. * * @return void */ public function __construct(MDMysqli $mysqli_noda) { $this->_mysqli_noda = $mysqli_noda; } }