From f0b5a08cdfa229c67ea39683782c8e59461063e5 Mon Sep 17 00:00:00 2001 From: Joshua Ramon Enslin Date: Wed, 17 Mar 2021 16:10:49 +0100 Subject: [PATCH] Move NodaWikidataFetcher to this repository --- src/NodaWikidataFetcher.php | 1539 +++++++++++++++++++++++++++++++++++ 1 file changed, 1539 insertions(+) create mode 100644 src/NodaWikidataFetcher.php diff --git a/src/NodaWikidataFetcher.php b/src/NodaWikidataFetcher.php new file mode 100644 index 0000000..490c345 --- /dev/null +++ b/src/NodaWikidataFetcher.php @@ -0,0 +1,1539 @@ + + */ +declare(strict_types = 1); + +/** + * Helps fetching information from Wikidata. + */ +class NodaWikidataFetcher { + + const LANGUAGES_MAIN_DESC = ['da', 'de', 'en', 'es', 'fr', 'hu', 'it', 'nl', 'sv', 'ru', 'zh', 'jp', 'pt']; + const LANGUAGES_TO_CHECK = ['ar', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sw', 'ta', 'th', 'tl', 'tr', 'ur', 'vi', 'zh']; + + const URL_PREFIXES_PERSINST_NODA_SOURCE = [ + "gnd_id" => "https://d-nb.info/gnd/", + "viaf_id" => "https://viaf.org/viaf/", + "bnf_id" => "https://catalogue.bnf.fr/ark:/12148/cb", + "ulan_id" => "http://vocab.getty.edu/page/ulan/", + "rkd_id" => "https://rkd.nl/explore/artists/", + "pim_id" => "https://opac-nevter.pim.hu/en/record/-/record/", + "ndl_id" => "https://id.ndl.go.jp/auth/ndlna/", + "npg_id" => "https://www.npg.org.uk/collections/search/person/", + "loc_id" => "http://id.loc.gov/authorities/names/", + "bne_id" => "http://datos.bne.es/persona/", + "nomisma_id" => "http://nomisma.org/id/", + ]; + + const URL_PREFIXES_PLACES_NODA_SOURCE = [ + "gnd" => "https://d-nb.info/gnd/", + "nomisma" => "http://nomisma.org/id/", + "osm" => "https://www.openstreetmap.org/relation/", + "loc" => "http://id.loc.gov/authorities/names/", + "cona" => "http://vocab.getty.edu/page/cona/", + ]; + + /** @var MDMysqli */ + private MDMysqli $_mysqli_noda; + + /** + * Gets translation source Wikipedia pages from Wikidata. + * + * @param array $checkagainstLanguage The language to check against. + * @param array $data Data fetched from Wikidata. + * + * @return array> + */ + public static function getWikidataWikipediaTranslationSources(array $checkagainstLanguage, array $data) { + + $languagesToFetch = $wikilinks = []; + foreach ($checkagainstLanguage as $lang) { + + if (empty($data['labels'][$lang])) { + continue; + } + + if (!empty($data['sitelinks'][$lang . 'wiki'])) { + + $wikilink = $data['sitelinks'][$lang . 'wiki']['url']; + $wikilinkterm = str_replace(' ', '_', $data['sitelinks'][$lang . 'wiki']['title']); + + if (isset($wikilink)) { + + $languagesToFetch[$lang] = "https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm) . "&prop=text§ion=0&format=json"; + $wikilinks[$lang] = $wikilink; + + } + } + } + + return [$languagesToFetch, $wikilinks]; + + } + + /** + * Cleans contents parsed from Wikipedia. + * + * @param string $input Input string. + * + * @return string + */ + private static function _cleanWikidataInput(string $input):string { + + if (substr($input, 0, strlen('<')) === '<') { + $doc = new DOMDocument(); + $doc->loadXML($input); + + $list = $doc->getElementsByTagName("style"); + while ($list->length > 0) { + $p = $list->item(0); + $p->parentNode->removeChild($p); + } + + $list = $doc->getElementsByTagName("table"); + while ($list->length > 0) { + $p = $list->item(0); + $p->parentNode->removeChild($p); + } + + $list = $doc->getElementsByTagName("div"); + while ($list->length > 1) { + $p = $list->item(1); + $p->parentNode->removeChild($p); + } + + $list = $doc->getElementsByTagName("ol"); + while ($list->length > 0) { + $p = $list->item(0); + $p->parentNode->removeChild($p); + } + + $firstP = $doc->getElementsByTagName("p")->item(0); + if (strpos($doc->saveHTML($firstP), 'geohack') !== false) { + $firstP->parentNode->removeChild($firstP); + } + + /* + if (strpos($doc->saveHTML(), 'Coordinates:') !== false) { + echo $doc->saveHTML(); + exit; + } + */ + + $input = str_replace(PHP_EOL, PHP_EOL . PHP_EOL, trim($doc->textContent)); + + if (mb_strlen($input) > 600) { + if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) { + $input = substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600)); + } + } + + $bracketsToRemove = []; + for ($i = 0; $i < 100; $i++) { + $bracketsToRemove["[$i]"] = ""; + } + $input = strtr($input, $bracketsToRemove); + + $input = str_replace("\t", " ", $input); + + // Remove newlines with ensuing spaces + while (strpos($input, PHP_EOL . " ") !== false) { + $input = str_replace(PHP_EOL . " ", PHP_EOL, $input); + } + + // Remove double newlines + while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) { + $input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input); + } + return $input; + + } + + $input = str_replace(PHP_EOL, '', $input); + + $stableToRemoveWHTML = [ + "

Si vous disposez d'ouvrages ou d'articles de référence ou si vous ", + '

En pratique : Quelles sources sont attendu', + '', + '

Géolocalisation sur la carte', + '

Koordinaatit:', + '

', + //'

', + '

', + '

', + '

', + '

', + '

'); + if ($first_mention_of_paragraph !== false) $input = substr($input, $first_mention_of_paragraph, (strrpos($input, '

') ?: strlen($input)) - $first_mention_of_paragraph); + + // Remove infobox tables specifically + $removeFirstParagraph = false; + $firstParagraphPosition = strpos($input, '"); + if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) { + if (($tableEndPos = strpos($input, "")) !== false) { + if (($pStartPos = strpos($input, '", "', '

' . PHP_EOL . PHP_EOL . PHP_EOL, $input); + # $input = str_replace('?/i', '', $input); + $input = strip_tags($input); + + # for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input); + $i = 0; + while (strpos($input, ".mw-parser-output") !== false and strpos($input, "}", strpos($input, ".mw-parser-output")) !== false) { + $part1 = substr($input, 0, strpos($input, ".mw-parser-output")); + $part2 = substr($input, strpos($input, "}", strpos($input, ".mw-parser-output")) + 1); + $input = $part1 . $part2; + $i++; + if ($i === 30) break; + } + + $bracketsToRemove = []; + for ($i = 0; $i < 100; $i++) { + $bracketsToRemove["[$i]"] = ""; + } + $input = strtr($input, $bracketsToRemove); + + $input = str_replace("\t", " ", $input); + + // Remove double whitespaces + while (strpos($input, " ") !== false) { + $input = str_replace(" ", " ", $input); + } + + // Remove newlines with ensuing spaces + while (strpos($input, PHP_EOL . " ") !== false) { + $input = str_replace(PHP_EOL . " ", PHP_EOL, $input); + } + + // Remove double newlines + while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) { + $input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input); + } + + $stableToRemove = [ + "Vous pouvez partager vos connaissances en l’améliorant (comment ?) selon les recommandations des projets correspondants.", + ]; + foreach ($stableToRemove as $tToRemove) $input = str_replace($tToRemove, "", $input); + + $endings = [ + "StubDenne artikel om et vandløb ", + ]; + foreach ($endings as $ending) { + if (strpos($input, $ending) !== false) $input = substr($input, 0, strpos($input, $ending)); + } + + $input = trim($input); + + // Cut off overly long articles + if (mb_strlen($input) > 600) { + if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) { + $input = substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600)); + } + } + + // Trim again to make really, really no superfluous whitespaces remain + $input = trim($input); + + $input = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $input)); + + /* + if (strpos($input, " ") !== false) { + echo html_entity_decode($input); + die(); + } + */ + $input = html_entity_decode($input); + + return $input; + + } + + /** + * Function for fetching description from Wikipedia + * + * @param integer $persinst_id Person ID. + * @param string $wikidata_id Wikidata ID. + * @param string $datafromwiki Data fetched from Wikipedia. + * @param string $wikilink Link to wikipedia entry. + * @param string $preflang The user's currently used language. + * @param string $lang Currently queried language. + * @param string $erfasst_von User who adds the info. + * + * @return boolean + */ + public function retrievePersinstDescFromWikipedia(int $persinst_id, string $wikidata_id, string $datafromwiki, string $wikilink, string $preflang, string $lang, string $erfasst_von):bool { + + $output = false; + + $datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date('d.m.Y') . ')'; + + $cergebnis = $this->_mysqli_noda->query_by_stmt("SELECT `persinst_kurzinfo`, `persinst_anzeigename` AS `display_name` + FROM `persinst` + WHERE `persinst_id` = ?", "i", $persinst_id); + + // Update persinst table + $updatePersinstStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst` + SET `persinst_kurzinfo` = ? + WHERE `persinst_id` = ?"); + + if ($cergebnis->num_rows === 0) exit; + + $cinfo = $cergebnis->fetch_assoc(); + if (!empty($cinfo['persinst_kurzinfo']) and substr($cinfo['persinst_kurzinfo'], 0, 3) != 'GND') { + + if (isset($_GET['keep'])) { + if (!($_GET['keep']) || $_GET['keep'] === 'replace') { + + $updatePersinstStmt->bind_param("si", $datafromwiki, $persinst_id); + $updatePersinstStmt->execute(); + + } + else if ($_GET['keep'] === 'add') { + + $newDesc = $cinfo['persinst_kurzinfo'] . PHP_EOL . PHP_EOL . $datafromwiki; + + $updatePersinstStmt->bind_param("si", $newDesc, $persinst_id); + $updatePersinstStmt->execute(); + + } + + $output = true; + } + else { + + $tlLoader = new MDTlLoader("wiki_getter_persinst", $preflang); + echo self::generateHTMLHeadForWikidataFetcher($lang); + echo self::generateWikidataFetcherHeader($tlLoader, "", $cinfo['display_name']); + echo ' +

Es gibt schon einen Eintrag im Beschreibungsfeld

+
+

Bisher vorhanden

+

' . nl2br($cinfo['persinst_kurzinfo']) . '

+
+
+

Jetzt gefunden

' . $datafromwiki . '

+
+
Keep old entry'; + echo '
Replace with new entry'; + echo '
Keep old and add new entry


'; + + exit; + + } + + } + else { + + $updatePersinstStmt->bind_param("si", $datafromwiki, $persinst_id); + $updatePersinstStmt->execute(); + + $output = true; + + } + + $cergebnis->close(); + $updatePersinstStmt->close(); + + // Set link to Wikipedia in noda table + + $insertNodaStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda` + (`persinst_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`) + VALUES + (?, 'Wikipedia', '', ?, ?) + ON DUPLICATE KEY UPDATE `noda_link` = ?"); + $insertNodaStmt->bind_param("isss", $persinst_id, $wikilink, $erfasst_von, $wikilink); + $insertNodaStmt->execute(); + $insertNodaStmt->close(); + + // Update edit metadata + $updatePersinstEditInfoStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst` + SET `persinst_erfasst_am` = NOW(), + `persinst_erfasst_von` = ? + WHERE `persinst_id` = ?"); + $updatePersinstEditInfoStmt->bind_param("si", $erfasst_von, $persinst_id); + $updatePersinstEditInfoStmt->execute(); + $updatePersinstEditInfoStmt->close(); + $updatePersinstEditInfoStmt = null; + + return $output; + + } + + /** + * Function for updating birth and death times based on Wikidata information. + * + * @param array $data Data loaded from Wikidata. + * @param integer $persinst_id Actor ID. + * + * @return void + */ + public function enterPersinstBirthDeathDatesFromWikidata(array $data, int $persinst_id):void { + + $result = $this->_mysqli_noda->query_by_stmt("SELECT `persinst_geburtsjahr`, + `persinst_sterbejahr`, `persinst_gender` + FROM `persinst` + WHERE `persinst_id` = ?", "i", $persinst_id); + if (!($actor_dates = $result->fetch_assoc())) { + throw new MDmainEntityNotExistentException("Failed to fetch actor information"); + } + $result->close(); + $result = null; + + if ($actor_dates['persinst_geburtsjahr'] === '') { + + // Try to get birth date + if (!empty($data['claims']['P569']) and !empty($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time'])) { + $birth_date_int = strtotime(substr($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time'], 1, 4)); + if ($birth_date_int) { + $birth_date = date("Y", $birth_date_int); + if ($birth_date === date("Y")) { + $birth_date = date("Y", strtotime($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time'])); + } + } + } + + if (!empty($birth_date)) { + $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst` + SET `persinst_geburtsjahr` = ? + WHERE `persinst_id` = ? + LIMIT 1"); + $updateStmt->bind_param("ii", $birth_date, $persinst_id); + $updateStmt->execute(); + $updateStmt->close(); + $updateStmt = null; + } + + } + + if ($actor_dates['persinst_sterbejahr'] === '') { + + // Try to get birth date + if (!empty($data['claims']['P570']) and !empty($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time'])) { + $death_date_int = strtotime(substr($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time'], 1, 4)); + if ($death_date_int) { + $death_date = date("Y", $death_date_int); + if ($death_date === date("Y")) { + $death_date = date("Y", strtotime($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time'])); + } + } + } + + if (!empty($death_date)) { + $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst` + SET `persinst_sterbejahr` = ? + WHERE `persinst_id` = ? + LIMIT 1"); + $updateStmt->bind_param("ii", $death_date, $persinst_id); + $updateStmt->execute(); + $updateStmt->close(); + $updateStmt = null; + } + + } + + if ($actor_dates['persinst_gender'] === '') { + + // Try to get birth date + if (!empty($data['claims']['P21']) and !empty($data['claims']['P21']['0']['mainsnak']['datavalue']['value']['id'])) { + $wikidata_gender_id = $data['claims']['P21']['0']['mainsnak']['datavalue']['value']['id']; + + switch ($wikidata_gender_id) { + case "Q6581097": + $wikidata_gender = "male"; + break; + case "Q6581072": + case "Q1052281": + $wikidata_gender = "female"; + break; + case "Q48270": + $wikidata_gender = "other"; + break; + default: + echo "Unknown gender: Q-ID is " . $wikidata_gender_id; + exit; + } + } + + if (!empty($wikidata_gender)) { + $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst` + SET `persinst_gender` = ? + WHERE `persinst_id` = ? + LIMIT 1"); + $updateStmt->bind_param("si", $wikidata_gender, $persinst_id); + $updateStmt->execute(); + $updateStmt->close(); + $updateStmt = null; + } + + } + + } + + /** + * Function for retrieving information. + * + * @param string $lang The user's selected used language. + * @param string $wikidata_id Wikidata ID. + * @param integer $persinst_id Actor ID. + * @param string $erfasst_von User name who's currently editing. + * + * @return void + */ + public function retrievePersinstInfoFromWikidataID(string $lang, string $wikidata_id, int $persinst_id, string $erfasst_von) { + + $data = json_decode(MD_STD::runCurl("https://www.wikidata.org/wiki/Special:EntityData/" . $wikidata_id . ".json", 10000), true); + if ($data === null) { + throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later."); + } + $data = $data['entities'][$wikidata_id]; + + // Get links to other norm data sources + $normDataSources = [ + "gnd_id" => "P227", + "rkd_id" => "P650", + "ulan_id" => "P245", + "viaf_id" => "P214", + "nomisma_id" => "P2950", + "bnf_id" => "P268", + "pim_id" => "P3973", + "ndl_id" => "P349", // National Diet Library (Japan) + "npg_id" => "P1816", // "National" portrait gallery + //"bmpi_id" => "P6077", // British Museum Person-Institution + "loc_id" => "P244", // National library of Brazil + "bne_id" => "P950", // Espana National Library + ]; + + foreach ($normDataSources as $sourceName => $sourceIDWikidata) { + if (isset($data['claims'][$sourceIDWikidata])) $$sourceName = $data['claims'][$sourceIDWikidata][0]['mainsnak']['datavalue']['value']; + } + + // Get links to wikipedia + $wikilink = $wikilinkterm = []; + if (isset($data['sitelinks']['dawiki']['url'])) $wikilink['da'] = $data['sitelinks']['dawiki']['url']; + if (isset($data['sitelinks']['dawiki']['title'])) $wikilinkterm['da'] = str_replace(' ', '_', $data['sitelinks']['dawiki']['title']); + if (isset($data['sitelinks']['dewiki']['url'])) $wikilink['de'] = $data['sitelinks']['dewiki']['url']; + if (isset($data['sitelinks']['dewiki']['title'])) $wikilinkterm['de'] = str_replace(' ', '_', $data['sitelinks']['dewiki']['title']); + if (isset($data['sitelinks']['enwiki']['url'])) $wikilink['en'] = $data['sitelinks']['enwiki']['url']; + if (isset($data['sitelinks']['enwiki']['title'])) $wikilinkterm['en'] = str_replace(' ', '_', $data['sitelinks']['enwiki']['title']); + if (isset($data['sitelinks']['eswiki']['url'])) $wikilink['es'] = $data['sitelinks']['eswiki']['url']; + if (isset($data['sitelinks']['eswiki']['title'])) $wikilinkterm['es'] = str_replace(' ', '_', $data['sitelinks']['eswiki']['title']); + if (isset($data['sitelinks']['frwiki']['url'])) $wikilink['fr'] = $data['sitelinks']['frwiki']['url']; + if (isset($data['sitelinks']['frwiki']['title'])) $wikilinkterm['fr'] = str_replace(' ', '_', $data['sitelinks']['frwiki']['title']); + if (isset($data['sitelinks']['huwiki']['url'])) $wikilink['hu'] = $data['sitelinks']['huwiki']['url']; + if (isset($data['sitelinks']['huwiki']['title'])) $wikilinkterm['hu'] = str_replace(' ', '_', $data['sitelinks']['huwiki']['title']); + if (isset($data['sitelinks']['itwiki']['url'])) $wikilink['it'] = $data['sitelinks']['itwiki']['url']; + if (isset($data['sitelinks']['itwiki']['title'])) $wikilinkterm['it'] = str_replace(' ', '_', $data['sitelinks']['itwiki']['title']); + if (isset($data['sitelinks']['nlwiki']['url'])) $wikilink['nl'] = $data['sitelinks']['nlwiki']['url']; + if (isset($data['sitelinks']['nlwiki']['title'])) $wikilinkterm['nl'] = str_replace(' ', '_', $data['sitelinks']['nlwiki']['title']); + if (isset($data['sitelinks']['ruwiki']['url'])) $wikilink['ru'] = $data['sitelinks']['ruwiki']['url']; + if (isset($data['sitelinks']['ruwiki']['title'])) $wikilinkterm['ru'] = str_replace(' ', '_', $data['sitelinks']['ruwiki']['title']); + if (isset($data['sitelinks']['svwiki']['url'])) $wikilink['sv'] = $data['sitelinks']['svwiki']['url']; + if (isset($data['sitelinks']['svwiki']['title'])) $wikilinkterm['sv'] = str_replace(' ', '_', $data['sitelinks']['svwiki']['title']); + if (isset($data['sitelinks']['zhwiki']['url'])) $wikilink['zh'] = $data['sitelinks']['zhwiki']['url']; + if (isset($data['sitelinks']['zhwiki']['title'])) $wikilinkterm['zh'] = str_replace(' ', '_', $data['sitelinks']['zhwiki']['title']); + + $alreadyEntered = false; + + if (isset($wikilink[$lang]) and isset($wikilinkterm[$lang]) and is_string($wikilinkterm[$lang])) { + + $datafromwiki = MD_STD::runCurl("https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text§ion=0&format=json", 10000); + $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; + + # Process data retrieved from wikipedia + if (!empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) { + $alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $wikilink[$lang], $lang, $lang, $erfasst_von); + } + + } + + foreach (self::LANGUAGES_MAIN_DESC as $sprache) { + + if ($alreadyEntered === true) break; + if (!isset($wikilink[$sprache]) || !isset($wikilinkterm[$sprache]) || !is_string($wikilinkterm[$sprache])) continue; + + $datafromwiki = MD_STD::runCurl("https://" . $sprache . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode((string)$wikilinkterm[$sprache]) . "&prop=text§ion=0&format=json", 10000); + $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; + + # Process data retrieved from wikipedia + if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) { + $alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $wikilink[$sprache], $lang, "$sprache", $erfasst_von); + } + + } + + $this->enterPersinstBirthDeathDatesFromWikidata($data, $persinst_id); + + // GET links to other noda entries. + + $insertNodaRelationsStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda` + (`persinst_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`) + VALUES + (?, ?, ?, ?, ?) + ON DUPLICATE KEY UPDATE `noda_nrinsource` = ?, + `noda_link` = ?, + `noda_erfasst_von` = ?"); + + $this->_mysqli_noda->autocommit(false); + + foreach (self::URL_PREFIXES_PERSINST_NODA_SOURCE as $idVar => $urlStart) { + + if (empty($$idVar)) continue; + + $tNoda_id = $$idVar; + $nameInDB = str_replace("_id", "", $idVar); + + $tNodaurl = $urlStart . $tNoda_id; + + $insertNodaRelationsStmt->bind_param("isssssss", $persinst_id, $nameInDB, $tNoda_id, $tNodaurl, $erfasst_von, $tNoda_id, $tNodaurl, $erfasst_von); + $insertNodaRelationsStmt->execute(); + + } + + $nameInDB = 'wikidata'; + $wikidataurl = "https://www.wikidata.org/wiki/" . $wikidata_id; + + $insertNodaRelationsStmt->bind_param("isssssss", $persinst_id, $nameInDB, $wikidata_id, $wikidataurl, $erfasst_von, $wikidata_id, $wikidataurl, $erfasst_von); + $insertNodaRelationsStmt->execute(); + + $this->_mysqli_noda->commit(); + $this->_mysqli_noda->autocommit(true); + + $insertNodaRelationsStmt->close(); + + $this->getWikidataTranslationsForPersinst($data, $persinst_id); + + } + + /** + * Function for fetching translations from Wikipedia, based on Wikidata information. + * + * @param array $data Entity fetched from wikidata. + * @param integer $persinst_id Actor ID. + * + * @return void + */ + public function getWikidataTranslationsForPersinst(array $data, int $persinst_id) { + + $checkagainstLanguage = self::LANGUAGES_TO_CHECK; + + $insertStmt = $this->_mysqli_noda->do_prepare("CALL nodaInsertPersinstTranslation(?, ?, ?, ?, ?)"); + + list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data); + + try { + $contents = MD_STD::runCurlMulti($languagesToFetch, 10000); + } + catch (TypeError $e) { + throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again."); + } + + $this->_mysqli_noda->autocommit(false); + + foreach ($checkagainstLanguage as $lang) { + + if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki'])) { + + $url = $languagesToFetch[$lang]; + + $wikilink = $wikilinks[$lang]; + if (!empty($contents[$lang])) { + + $descFromWiki = $contents[$lang]; + $descFromWiki = json_decode($descFromWiki, true)['parse']['text']['*']; + + # Process data retrieved from wikipedia + + if ($descFromWiki !== null) $tDescription = self::_cleanWikidataInput((string)$descFromWiki); + else $tDescription = ""; + + if (substr($tDescription, -1) == chr(10)) $tDescription = substr($tDescription, 0, strlen($tDescription) - 1); + + $tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')'; + // Inhalt erster Absatz jeweilige Wikipedia: ' . $tDescription + // dies enthält den ersten Absatz der jeweiligen Wikipedia + + } + else { + $tDescription = ""; + } + + $tLang = self::_cleanWikidataInput((string)$data['labels'][$lang]['language']); + $tLabel = self::_cleanWikidataInput((string)$data['labels'][$lang]['value']); + + try { + $insertStmt->bind_param("issss", $persinst_id, $tLang, $tLabel, $tDescription, $wikilink); + $insertStmt->execute(); + } + catch (MDMysqliInvalidEncodingError $e) { + } + + } + // echo '
Wikipedia Links fehlen'; + else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) { + + $wikilink = ""; + $insertStmt->bind_param("issss", $persinst_id, $data['labels'][$lang]['language'], $data['labels'][$lang]['value'], $data['descriptions'][$lang]['value'], $wikilink); + $insertStmt->execute(); + } + + } + + $this->_mysqli_noda->commit(); + $this->_mysqli_noda->autocommit(true); + + $insertStmt->close(); + unset($insertStmt); + + } + + /** + * Function for entering base information about a place from wikidata. + * + * @param mysqli_result $currentPlaceResult Mysqli result pointing to the current place. + * @param string $datafromwiki Data parsed from wikidata. + * @param array $wikilink Wikilink. + * @param string $preflang Language of the user interface in general. + * @param string $lang Language of the main entry. + * @param integer $placeID ID of the place. + * @param string $erfasst_von User name. + * + * @return boolean + */ + public function enterPlaceDescFromWikidata(mysqli_result $currentPlaceResult, string $datafromwiki, array $wikilink, string $preflang, string $lang, int $placeID, string $erfasst_von) { + + $datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date("d.m.Y") . ')'; + + if (!($curPlaceInfo = $currentPlaceResult->fetch_assoc())) return false; + + if (!empty(trim($curPlaceInfo['ort_anmerkung'])) and substr($curPlaceInfo['ort_anmerkung'], 0, 3) !== 'GND') { + + if (isset($_GET['keep'])) { + + if ($_GET['keep'] === 'add') { + + $datafromwiki = $curPlaceInfo['ort_anmerkung'] . PHP_EOL . PHP_EOL . $datafromwiki; + + } + else if ($_GET['keep'] === 'keep') { + $datafromwiki = $curPlaceInfo['ort_anmerkung']; + } + + } + else { + + $tlLoader = new MDTlLoader("wiki_getter_place", $preflang); + echo self::generateHTMLHeadForWikidataFetcher($lang); + echo self::generateWikidataFetcherHeader($tlLoader); + echo ' +

There is already an entry for description ...

+
+

Actual entry

' . nl2br($curPlaceInfo['ort_anmerkung']) . '

+
+
+

Now found

+

' . $datafromwiki . '

+
+ Keep old entry +
Replace with new entry +
Keep old and add new entry


+ '; + + exit; + + } + + } + + // Write description to DB + $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte` + SET `ort_anmerkung` = ?, + `ort_erfasst_am` = NOW(), + `ort_erfasst_von` = ? + WHERE ort_id = ?"); + try { + $updateStmt->bind_param("ssi", $datafromwiki, $erfasst_von, $placeID); + $updateStmt->execute(); + } + catch (MDMysqliInvalidEncodingError $e) { + $_SESSION["editHistory"] = ["changesStored", "Error adding base description"]; + } + $updateStmt->close(); + unset($updateStmt); + + // Write link to wikipedia to relevant noda DB table + + $wikiAlreadyResult = $this->_mysqli_noda->query_by_stmt("SELECT `noda_orte`.`noda_id` + FROM `noda_orte` + WHERE `noda_orte`.`ort_id` = ? + AND `noda_orte`.`noda_source` = 'Wikipedia'", "i", $placeID); + + switch ($wikiAlreadyResult->num_rows) { + case 0: + + $insertWikiStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda_orte` + (`ort_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_am`, `noda_erfasst_von`) + VALUES + (?, 'Wikipedia', '', ?, NOW(), ?)"); + $insertWikiStmt->bind_param("iss", $placeID, $wikilink[$lang], $erfasst_von); + $insertWikiStmt->execute(); + $insertWikiStmt->close(); + unset($insertWikiStmt); + + break; + case 1: + + if ($wikiAlreadyData = $wikiAlreadyResult->fetch_assoc()) { + $wikischon_id = $wikiAlreadyData['noda_id']; + + $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `noda_orte` SET `noda_link` = ? WHERE `noda_id` = ?"); + $updateStmt->bind_param("si", $wikilink[$lang], $wikischon_id); + $updateStmt->execute(); + $updateStmt->close(); + unset($updateStmt); + } + break; + } + + $wikiAlreadyResult->close(); + unset($wikiAlreadyResult); + + return true; + + } + + /** + * Function for retrieving place information based on a Wikidata ID. + * + * @param string $lang Language. + * @param string $wikidata_id Wikidata Q-ID. + * @param integer $onum Place ID. + * @param string $erfasst_von User name of the current user. + * + * @return void + */ + public function retrievePlaceInfoFromWikidataID(string $lang, string $wikidata_id, int $onum, string $erfasst_von) { + + $data = MD_STD::runCurl("https://www.wikidata.org/wiki/Special:EntityData/" . urlencode($wikidata_id) . ".json", 10000); + if (!$data = json_decode($data, true)) { + throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later."); + } + $data = $data['entities'][$wikidata_id]; + + $wikilink = $wikilinkterm = []; + + foreach (self::LANGUAGES_MAIN_DESC as $tLang) { + if (isset($data['sitelinks'][$tLang . 'wiki']['url'])) $wikilink[$tLang] = $data['sitelinks'][$tLang . 'wiki']['url']; + if (isset($data['sitelinks'][$tLang . 'wiki']['title'])) $wikilinkterm[$tLang] = str_replace(' ', '_', $data['sitelinks'][$tLang . 'wiki']['title']); + } + + $currentPlaceResult = $this->_mysqli_noda->query_by_stmt("SELECT `ort_anmerkung` + FROM `orte` + WHERE `ort_id` = ?", "i", $onum); + + $alreadyEntered = false; + + if (!empty($wikilink[$lang])) { + + $datafromwiki = MD_STD::runCurl("https://" . urlencode($lang) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text§ion=0&format=json", 10000); + $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; + + if (!empty($datafromwiki) and $datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) { + $alreadyEntered = $this->enterPlaceDescFromWikidata($currentPlaceResult, $datafromwiki, $wikilink, $lang, $lang, $onum, $erfasst_von); + } + } + + foreach (self::LANGUAGES_MAIN_DESC as $sprache) { + + //if ($alreadyEntered === true) break; + if ($alreadyEntered === true) break; + if (!isset($wikilink[$sprache])) continue; + + $datafromwiki = MD_STD::runCurl("https://" . urlencode($sprache) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$sprache]) . "&prop=text§ion=0&format=json", 10000); + $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; + if (!empty($datafromwiki) and $datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) { + $alreadyEntered = $this->enterPlaceDescFromWikidata($currentPlaceResult, $datafromwiki, $wikilink, $lang, $sprache, $onum, $erfasst_von); + } + + } + $currentPlaceResult->close(); + unset($currentPlaceResult); + + if (isset($data['claims']['P1566'])) $geonames_id = $data['claims']['P1566'][0]['mainsnak']['datavalue']['value']; + if (isset($data['claims']['P227'])) $gnd_id = $data['claims']['P227'][0]['mainsnak']['datavalue']['value']; + if (isset($data['claims']['P650'])) $rkd_id = $data['claims']['P650'][0]['mainsnak']['datavalue']['value']; + if (isset($data['claims']['P1667'])) $tgn_id = $data['claims']['P1667'][0]['mainsnak']['datavalue']['value']; + if (isset($data['claims']['P402'])) $osm_id = $data['claims']['P402'][0]['mainsnak']['datavalue']['value']; + if (isset($data['claims']['P244'])) $loc_id = $data['claims']['P244'][0]['mainsnak']['datavalue']['value']; + if (isset($data['claims']['P2950'])) $nomisma_id = $data['claims']['P2950'][0]['mainsnak']['datavalue']['value']; + if (isset($data['claims']['P1669'])) $cona_id = $data['claims']['P1669'][0]['mainsnak']['datavalue']['value']; + + if (isset($data['claims']['P625'])) { + $latitude_wd = $data['claims']['P625'][0]['mainsnak']['datavalue']['value']['latitude']; + $longitude_wd = $data['claims']['P625'][0]['mainsnak']['datavalue']['value']['longitude']; + } + + $this->_mysqli_noda->autocommit(false); + + $insertNodaLinkStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda_orte` + (`ort_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`) + VALUES + (?, ?, ?, ?, ?) + ON DUPLICATE KEY UPDATE `noda_nrinsource` = ?, + `noda_link` = ?"); + + foreach (self::URL_PREFIXES_PLACES_NODA_SOURCE as $noda_source => $noda_url_base) { + + if (!empty(${"{$noda_source}_id"})) { + + $noda_link_url = $noda_url_base . ${"{$noda_source}_id"}; + + $insertNodaLinkStmt->bind_param("issssss", $onum, $noda_source, ${"{$noda_source}_id"}, $noda_link_url, $erfasst_von, ${"{$noda_source}_id"}, $noda_link_url); + $insertNodaLinkStmt->execute(); + + } + + } + + $insertNodaLinkStmt->close(); + unset($insertNodaLinkStmt); + + if (!empty($wikidata_id)) { + + $wikidataurl = "https://www.wikidata.org/wiki/" . $wikidata_id; + + $insertStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda_orte` + (`ort_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`) + VALUES + (?, 'wikidata', ?, ?, ?) + ON DUPLICATE KEY UPDATE `noda_nrinsource` = ?, + `noda_link` = ?"); + $insertStmt->bind_param("isssss", $onum, $wikidata_id, $wikidataurl, $erfasst_von, $wikidata_id, $wikidataurl); + $insertStmt->execute(); + $insertStmt->close(); + unset($insertStmt); + } + + if (!empty($tgn_id)) { + + $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte` + SET `ort_land` = ? + WHERE `ort_id` = ?"); + $updateStmt->bind_param("si", $tgn_id, $onum); + $updateStmt->execute(); + $updateStmt->close(); + unset($updateStmt); + + } + if (!empty($geonames_id)) { + + $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte` + SET `ort_geonames` = ? + WHERE `ort_id` = ?"); + $updateStmt->bind_param("si", $geonames_id, $onum); + $updateStmt->execute(); + $updateStmt->close(); + unset($updateStmt); + + } + if (!empty($latitude_wd) and !empty($longitude_wd)) { + + $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte` + SET `ort_nord_sued` = ?, `ort_west_ost` = ?, ort_zoom = '9' + WHERE `ort_id` = ?"); + $updateStmt->bind_param("ssi", $latitude_wd, $longitude_wd, $onum); + $updateStmt->execute(); + $updateStmt->close(); + unset($updateStmt); + + } + + $this->_mysqli_noda->commit(); + $this->_mysqli_noda->autocommit(true); + + $this->getWikidataTranslationsForPlace($data, $onum); + + } + + /** + * Function for fetching translations from wikidata. + * + * @param array $data Entity data fetched from wikidata. + * @param integer $ort_id Place ID. + * + * @return void + */ + public function getWikidataTranslationsForPlace(array $data, int $ort_id) { + + $checkagainstLanguage = self::LANGUAGES_TO_CHECK; + + $insertStmt = $this->_mysqli_noda->do_prepare("CALL `nodaInsertOrtTranslation`(?, ?, ?, ?, ?)"); + + list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data); + + try { + $contents = MD_STD::runCurlMulti($languagesToFetch, 10000); + } + catch (TypeError $e) { + throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again."); + } + + $this->_mysqli_noda->autocommit(false); + + foreach ($checkagainstLanguage as $lang) { + + if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki'])) { + + $url = $languagesToFetch[$lang]; + + $wikilink = $wikilinks[$lang]; + if (!empty($contents[$lang])) { + + $descFromWiki = $contents[$lang]; + + if (!($wikiDataDecoded = json_decode($descFromWiki, true))) { + continue; + } + $tLabel = $wikiDataDecoded['parse']['title']; + $descFromWiki = $wikiDataDecoded['parse']['text']['*']; + + # Process data retrieved from wikipedia + if (empty($descFromWiki)) $tDescription = ""; + else { + + $tDescription = self::_cleanWikidataInput((string)$descFromWiki); + + if (substr($tDescription, -1) == chr(10)) $tDescription = substr($tDescription, 0, strlen($tDescription) - 1); + $tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')'; + $tDescription = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $tDescription)); + // echo '
Inhalt erster Absatz jeweilige Wikipedia: ' . $tDescription; // dies enthält den ersten Absatz der jeweiligen Wikipedia + + } + + } + else { + $tDescription = ""; + } + + $tLang = self::_cleanWikidataInput((string)$data['labels'][$lang]['language']); + if (empty($tLabel)) $tLabel = self::_cleanWikidataInput((string)$data['labels'][$lang]['value']); + + try { + $insertStmt->bind_param("issss", $ort_id, $tLang, $tLabel, $tDescription, $wikilink); + $insertStmt->execute(); + } + catch (MDMysqliInvalidEncodingError $e) { + $_SESSION["editHistory"] = ["changesStored", "Error adding translation for language $tLang"]; + } + + } + else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) { + + $wikilink = ""; + $insertStmt->bind_param("issss", $ort_id, $data['labels'][$lang]['language'], $data['labels'][$lang]['value'], $data['descriptions'][$lang]['value'], $wikilink); + $insertStmt->execute(); + } + + } + + $this->_mysqli_noda->commit(); + $this->_mysqli_noda->autocommit(true); + + $insertStmt->close(); + unset($insertStmt); + + } + + /** + * Function for fetching description from Wikipedia + * + * @param integer $tag_id Tag ID. + * @param string $datafromwiki Data fetched from Wikipedia. + * @param string $wikilink Link to wikipedia entry. + * @param string $preflang The user's currently used language. + * @param string $lang Currently queried language. + * @param string $erfasst_von User who adds the info. + * + * @return boolean + */ + public function retrieveTagDescFromWikipedia(int $tag_id, string $datafromwiki, string $wikilink, string $preflang, string $lang, string $erfasst_von):bool { + + $output = false; + + $datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date("d.m.Y") . ')'; + $datafromwiki = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $datafromwiki)); + + $cergebnis = $this->_mysqli_noda->query_by_stmt("SELECT `tag_anmerkung` + FROM `tag` + WHERE `tag_id` = ?", "i", $tag_id); + + if (!($cinfo = $cergebnis->fetch_assoc())) { + $cergebnis->close(); + $cergebnis = null; + return $output; + } + + $cergebnis->close(); + $cergebnis = null; + + $this->_mysqli_noda->autocommit(false); + + $updateTagDescStmt = $this->_mysqli_noda->do_prepare("UPDATE `tag` + SET `tag_anmerkung` = ? + WHERE `tag_id` = ?"); + + if (!empty($cinfo['tag_anmerkung']) and substr($cinfo['tag_anmerkung'], 0, 3) != 'GND') { + + if (isset($_GET['keep'])) { + if (!($_GET['keep']) || $_GET['keep'] === 'replace') { + + $updateTagDescStmt->bind_param("si", $datafromwiki, $tag_id); + $updateTagDescStmt->execute(); + + } + else if ($_GET['keep'] === 'add') { + + $newDesc = $cinfo['tag_anmerkung'] . PHP_EOL . PHP_EOL . $datafromwiki; + + $updateTagDescStmt->bind_param("si", $newDesc, $tag_id); + $updateTagDescStmt->execute(); + + } + + $output = true; + } + else { + + $tlLoader = new MDTlLoader("wiki_getter_tag", $preflang); + echo self::generateHTMLHeadForWikidataFetcher($lang); + echo self::generateWikidataFetcherHeader($tlLoader); + echo ' +

Es gibt schon einen Eintrag im Beschreibungsfeld +

+

Bisher vorhanden

' . nl2br($cinfo['tag_anmerkung']) . '

+
+
+

Jetzt gefunden

' . $datafromwiki . '

+

+ + Keep old entry'; + echo '
Replace with new entry'; + echo '
Keep old and add new entry


'; + + exit; + + } + + } + else { + + $updateTagDescStmt->bind_param("si", $datafromwiki, $tag_id); + $updateTagDescStmt->execute(); + } + + $updateTagDescStmt->close(); + $updateTagDescStmt = null; + + $insertNodaTagStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda_tag` + (`tag_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`) + VALUES + (?, 'Wikipedia', '', ?, ?) + ON DUPLICATE KEY UPDATE `noda_link` = ?"); + $insertNodaTagStmt->bind_param("isss", $tag_id, $wikilink, $erfasst_von, $wikilink); + $insertNodaTagStmt->execute(); + $insertNodaTagStmt->close(); + + $output = true; + + // Update tag editing metadata + $updateTagEditInfoStmt = $this->_mysqli_noda->do_prepare("UPDATE `tag` + SET `tag_erfasst_am` = NOW(), + `tag_erfasst_von` = ? + WHERE `tag_id` = ?"); + $updateTagEditInfoStmt->bind_param("si", $erfasst_von, $tag_id); + $updateTagEditInfoStmt->execute(); + $updateTagEditInfoStmt->close(); + $updateTagEditInfoStmt = null; + + $this->_mysqli_noda->commit(); + $this->_mysqli_noda->autocommit(true); + + return $output; + + } + + /** + * Function for retrieving information. + * + * @param string $lang The user's selected used language. + * @param string $wikidata_id Wikidata ID. + * @param integer $tag_id Tag ID. + * @param string $erfasst_von User name who's currently editing. + * + * @return void + */ + public function retrieveTagInfoFromWikidataID(string $lang, string $wikidata_id, int $tag_id, string $erfasst_von) { + + $data = MD_STD::runCurl("https://www.wikidata.org/wiki/Special:EntityData/" . $wikidata_id . ".json", 10000); + $data = json_decode($data, true); + if ($data === null) { + throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later."); + } + $data = $data['entities'][$wikidata_id]; + + $wikilink = $wikilinkterm = []; + if (isset($data['sitelinks']['dawiki']['url'])) $wikilink['da'] = $data['sitelinks']['dawiki']['url']; + if (isset($data['sitelinks']['dawiki']['title'])) $wikilinkterm['da'] = str_replace(' ', '_', $data['sitelinks']['dawiki']['title']); + if (isset($data['sitelinks']['dewiki']['url'])) $wikilink['de'] = $data['sitelinks']['dewiki']['url']; + if (isset($data['sitelinks']['dewiki']['title'])) $wikilinkterm['de'] = str_replace(' ', '_', $data['sitelinks']['dewiki']['title']); + if (isset($data['sitelinks']['enwiki']['url'])) $wikilink['en'] = $data['sitelinks']['enwiki']['url']; + if (isset($data['sitelinks']['enwiki']['title'])) $wikilinkterm['en'] = str_replace(' ', '_', $data['sitelinks']['enwiki']['title']); + if (isset($data['sitelinks']['eswiki']['url'])) $wikilink['es'] = $data['sitelinks']['eswiki']['url']; + if (isset($data['sitelinks']['eswiki']['title'])) $wikilinkterm['es'] = str_replace(' ', '_', $data['sitelinks']['eswiki']['title']); + if (isset($data['sitelinks']['frwiki']['url'])) $wikilink['fr'] = $data['sitelinks']['frwiki']['url']; + if (isset($data['sitelinks']['frwiki']['title'])) $wikilinkterm['fr'] = str_replace(' ', '_', $data['sitelinks']['frwiki']['title']); + if (isset($data['sitelinks']['huwiki']['url'])) $wikilink['hu'] = $data['sitelinks']['huwiki']['url']; + if (isset($data['sitelinks']['huwiki']['title'])) $wikilinkterm['hu'] = str_replace(' ', '_', $data['sitelinks']['huwiki']['title']); + if (isset($data['sitelinks']['itwiki']['url'])) $wikilink['it'] = $data['sitelinks']['itwiki']['url']; + if (isset($data['sitelinks']['itwiki']['title'])) $wikilinkterm['it'] = str_replace(' ', '_', $data['sitelinks']['itwiki']['title']); + if (isset($data['sitelinks']['nlwiki']['url'])) $wikilink['nl'] = $data['sitelinks']['nlwiki']['url']; + if (isset($data['sitelinks']['nlwiki']['title'])) $wikilinkterm['nl'] = str_replace(' ', '_', $data['sitelinks']['nlwiki']['title']); + if (isset($data['sitelinks']['ruwiki']['url'])) $wikilink['ru'] = $data['sitelinks']['ruwiki']['url']; + if (isset($data['sitelinks']['ruwiki']['title'])) $wikilinkterm['ru'] = str_replace(' ', '_', $data['sitelinks']['ruwiki']['title']); + if (isset($data['sitelinks']['svwiki']['url'])) $wikilink['sv'] = $data['sitelinks']['svwiki']['url']; + if (isset($data['sitelinks']['svwiki']['title'])) $wikilinkterm['sv'] = str_replace(' ', '_', $data['sitelinks']['svwiki']['title']); + if (isset($data['sitelinks']['zhwiki']['url'])) $wikilink['zh'] = $data['sitelinks']['zhwiki']['url']; + if (isset($data['sitelinks']['zhwiki']['title'])) $wikilinkterm['zh'] = str_replace(' ', '_', $data['sitelinks']['zhwiki']['title']); + + $alreadyEntered = false; + + if (isset($wikilink[$lang]) and isset($wikilinkterm[$lang]) and is_string($wikilinkterm[$lang])) { + + $datafromwiki = MD_STD::runCurl("https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text§ion=0&format=json", 10000); + $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; + + # Process data retrieved from wikipedia + if (!empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) { + $alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $wikilink[$lang], $lang, $lang, $erfasst_von); + } + + } + + foreach (self::LANGUAGES_MAIN_DESC as $sprache) { + + if ($alreadyEntered === true) break; + if (!isset($wikilink[$sprache]) || !isset($wikilinkterm[$sprache]) || !is_string($wikilinkterm[$sprache])) continue; + + $datafromwiki = MD_STD::runCurl("https://" . $sprache . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode((string)$wikilinkterm[$sprache]) . "&prop=text§ion=0&format=json", 10000); + $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; + + # Process data retrieved from wikipedia + if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) { + $alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $wikilink[$sprache], $lang, "$sprache", $erfasst_von); + } + + } + + if (isset($data['claims']['P227'])) $gnd_id = $data['claims']['P227'][0]['mainsnak']['datavalue']['value']; + if (isset($data['claims']['P244'])) $lcsh_id = $data['claims']['P244'][0]['mainsnak']['datavalue']['value']; + if (isset($data['claims']['P1014'])) $aat_id = $data['claims']['P1014'][0]['mainsnak']['datavalue']['value']; + if (isset($data['claims']['P1256'])) $iconclass_id = $data['claims']['P1256'][0]['mainsnak']['datavalue']['value']; + + $insertNodaTagLink = function(MDMysqliStmt $insertNodaTagStmt, + string $noda_nrinsource, string $noda_source, string $noda_link, string $erfasst_von, int $tag_id) :void { + + $insertNodaTagStmt->bind_param("isssssss", $tag_id, $noda_source, $noda_nrinsource, $noda_link, $erfasst_von, $noda_nrinsource, $noda_link, $erfasst_von); + $insertNodaTagStmt->execute(); + }; + + $this->_mysqli_noda->autocommit(false); + + $insertNodaTagStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `noda_tag` + (`tag_id`, `noda_source`, `noda_nrinsource`, `noda_link`, `noda_erfasst_von`) + VALUES + (?, ?, ?, ?, ?) + ON DUPLICATE KEY UPDATE + `noda_nrinsource` = ?, + `noda_link` = ?, + `noda_erfasst_von` = ?"); + + if (!empty($gnd_id)) { + $insertNodaTagLink($insertNodaTagStmt, + $gnd_id, 'gnd', "https://d-nb.info/gnd/" . $gnd_id, $erfasst_von, $tag_id); + } + + if (!empty($aat_id)) { + $insertNodaTagLink($insertNodaTagStmt, + $aat_id, 'aat', "http://vocab.getty.edu/page/aat/" . $aat_id, $erfasst_von, $tag_id); + } + + if (!empty($iconclass_id)) { + $insertNodaTagLink($insertNodaTagStmt, + $iconclass_id, 'iconclass', "http://iconclass.org/rkd/{$iconclass_id}/", $erfasst_von, $tag_id); + } + + if (!empty($lcsh_id)) { + $insertNodaTagLink($insertNodaTagStmt, + $lcsh_id, 'lcsh', "http://id.loc.gov/authorities/subjects/" . $lcsh_id, $erfasst_von, $tag_id); + } + + $insertNodaTagLink($insertNodaTagStmt, + $wikidata_id, 'wikidata', "https://www.wikidata.org/wiki/" . $wikidata_id, $erfasst_von, $tag_id); + + $insertNodaTagStmt->close(); + + $this->_mysqli_noda->commit(); + $this->_mysqli_noda->autocommit(true); + + if (!empty($data)) $this->getWikidataTranslationsForTag($data, $tag_id); + + } + + /** + * Function for fetching translations from wikidata. + * + * @param array $data Entity data fetched from wikidata. + * @param integer $tag_id Tag ID. + * + * @return void + */ + public function getWikidataTranslationsForTag(array $data, int $tag_id) { + + $checkagainstLanguage = self::LANGUAGES_TO_CHECK; + + $insertStmt = $this->_mysqli_noda->do_prepare("CALL nodaInsertTagTranslation(?, ?, ?, ?, ?)"); + + $toCapitalize = ["cs", "da", "de", "en", "es", "fr", "fi", "id", "it", "nl", "pl", "pt", "ru", "sv", "tl", "tr"]; + + list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data); + + try { + $contents = MD_STD::runCurlMulti($languagesToFetch, 10000); + } + catch (TypeError $e) { + throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again."); + } + + $this->_mysqli_noda->autocommit(false); + + foreach ($checkagainstLanguage as $lang) { + + if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki'])) { + + $url = $languagesToFetch[$lang]; + + $wikilink = $wikilinks[$lang]; + if (!empty($contents[$lang])) { + + $descFromWiki = $contents[$lang]; + $descFromWiki = json_decode($descFromWiki, true)['parse']['text']['*']; + + if (!empty($descFromWiki)) { + + # Process data retrieved from wikipedia + $tDescription = self::_cleanWikidataInput((string)$descFromWiki); + + if (substr($tDescription, -1) == chr(10)) { + $tDescription = substr($tDescription, 0, strlen($tDescription) - 1); + } + + $tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')'; + $tDescription = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $tDescription)); + + } + else { + $tDescription = ""; + } + + } + else { + $tDescription = ""; + } + + $tLang = self::_cleanWikidataInput((string)$data['labels'][$lang]['language']); + $tLabel = self::_cleanWikidataInput((string)$data['labels'][$lang]['value']); + + if (in_array($tLang, $toCapitalize, true)) { + $tLabel = ucfirst(trim($tLabel)); + $tDescription = ucfirst(trim($tDescription)); + } + + try { + $insertStmt->bind_param("issss", $tag_id, $tLang, $tLabel, $tDescription, $wikilink); + $insertStmt->execute(); + } + catch (MDMysqliInvalidEncodingError $e) { + } + + } + else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) { + + $wikilink = ""; + + if (in_array($lang, $toCapitalize, true)) { + $data['labels'][$lang]['value'] = ucfirst(trim($data['labels'][$lang]['value'])); + $data['descriptions'][$lang]['value'] = ucfirst(trim($data['descriptions'][$lang]['value'])); + } + + $insertStmt->bind_param("issss", $tag_id, $data['labels'][$lang]['language'], $data['labels'][$lang]['value'], $data['descriptions'][$lang]['value'], $wikilink); + $insertStmt->execute(); + } + + } + + $this->_mysqli_noda->commit(); + $this->_mysqli_noda->autocommit(true); + + $insertStmt->close(); + unset($insertStmt); + + } + + /** + * Function for generating a wikidata results list. + * + * @param array> $wikidata_data Wikidata data. + * @param string $link Links. + * @param string $searchTerm Search term. + * @param string $lang Language. + * + * @return void + */ + public static function generateWikidataResultsList(array $wikidata_data, string $link, string $searchTerm, string $lang):void { + + if (count($wikidata_data['search']) === 0) { + echo '

' . ucfirst($searchTerm) . ' not found in Wikidata

'; + return; + } + + echo ' +
'; + + foreach ($wikidata_data['search'] as $result) { + + if ((isset($result['description']) and $result['description'] == '') or (isset($result['label']) and $result['label'] == '') or !isset($result['label']) or (isset($result['description']) and $result['description'] == 'Wikipedia disambiguation page') or (isset($result['description']) and $result['description'] == 'Wikimedia disambiguation page')) continue; + + echo ''; + + } + + echo ' +
'; + + } + + /** + * Function generates HTML head for wikidata fetchers. + * + * @param string $lang User language. + * @param boolean $implyEnd If set to true, the end string will be echoed at the end of the script execution. + * + * @return string + */ + public static function generateHTMLHeadForWikidataFetcher(string $lang, bool $implyEnd = true):string { + $output = " + + Get Wikidata + + + + + + "; + if (defined("MAIN_CSS_FILE")) { + $output .= ""; + } + $output .= " + + + "; + + if ($implyEnd === true) { + register_shutdown_function(function() :void { + echo printHTMLEnd(); + }); + } + + return MD_STD::minimizeHTMLString($output); + + } + + /** + * Function generate header for wikidata fetcher pages. + * + * @param MDTlLoader $tlLoader Translation variable. + * @param string $additional Additional info. + * @param string $searchTerm Search term. + * + * @return string + */ + public static function generateWikidataFetcherHeader(MDTlLoader $tlLoader, string $additional = "", string $searchTerm = ""):string { + + if (empty($searchTerm) and !empty($_GET['suchbegriff'])) { + $searchTerm = $_GET['suchbegriff']; + } + + $output = ' +
+

Logo: Wikidata' . $tlLoader->tl("wiki", "wiki", "fetch_from_wikidata"); + $output .= ': ' . $searchTerm; + $output .= '

'; + $output .= $additional; + $output .= '
'; + return $output; + + } + + /** + * Constructor. + * + * @param MDMysqli $mysqli_noda DB connection. + * + * @return void + */ + public function __construct(MDMysqli $mysqli_noda) { + + $this->_mysqli_noda = $mysqli_noda; + + } +}