Improve NodaWikidataFetcher's loading of descriptions

Close #15
This commit is contained in:
2023-08-31 15:38:12 +02:00
parent 869e0f263d
commit 107a4cd640
2 changed files with 538 additions and 235 deletions

View File

@ -98,6 +98,256 @@ final class NodaWikidataFetcher {
/** @var MDMysqli */
private MDMysqli $_mysqli_noda;
/**
* Returns the API link to Wikipedia's API for getting information on a page.
*
* @param string $lang Language / wikipedia version to fetch.
* @param string $searchTerm Search term.
*
* @return non-empty-string
*/
private static function _getWikipediaApiLink(string $lang, string $searchTerm):string {
return "https://" . urlencode($lang) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($searchTerm) . "&prop=text&section=0&format=json";
}
/**
* Cleans basic tags off Wikidata input.
*
* @param string $input Input string.
*
* @return string
*/
private static function _cleanWikidataInputHtml(string $input):string {
// Clean off anything before first <p>
if ($pStartPos = strpos($input, '<p')) {
$input = substr($input, $pStartPos);
}
if ($pEndPos = strrpos($input, '</p>')) {
$input = substr($input, 0, $pEndPos + 4);
}
$doc = new DOMDocument();
try {
$doc->loadXML('<section>' . trim($input) . '</section>');
}
catch (Exception $e) {
throw new Exception("Failed to load DOMDocument." . PHP_EOL . $e->getMessage() . PHP_EOL . PHP_EOL . $input);
}
$list = $doc->getElementsByTagName("style");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
$list = $doc->getElementsByTagName("table");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
$list = $doc->getElementsByTagName("ol");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
if (($firstP = $doc->getElementsByTagName("p")->item(0)) !== null) {
if (($firstPhtml = $doc->saveHTML($firstP)) !== false) {
if (strpos($firstPhtml, 'geohack') !== false) {
if ($firstP->parentNode !== null) $firstP->parentNode->removeChild($firstP);
}
}
}
$output = [];
foreach ($doc->getElementsByTagName("p") as $p) {
$output[] = trim($p->textContent);
}
/*
if (strpos($doc->saveHTML(), 'Coordinates:') !== false) {
echo $doc->saveHTML();
exit;
}
*/
return str_replace(PHP_EOL, PHP_EOL . PHP_EOL, trim(implode(PHP_EOL, $output)));
}
/**
* Cleans brackets ([1], [2]) off description text.
*
* @param string $input Input string.
*
* @return string
*/
private static function _cleanSourceBracketsOffTranslation(string $input):string {
$bracketsToRemove = [];
for ($i = 0; $i < 100; $i++) {
$bracketsToRemove["[$i]"] = "";
}
return strtr($input, $bracketsToRemove);
}
/**
* Cleans contents parsed from Wikipedia.
*
* @param string $input Input string.
*
* @return string
*/
private static function _cleanWikidataInput(string $input):string {
$input = trim($input, '"');
foreach (self::WIKIPEDIA_REMOVE_LITERALS as $tToRemove) $input = str_replace($tToRemove, "", $input);
if (substr($input, 0, strlen('<')) === '<') {
$input = self::_cleanWikidataInputHtml($input);
if (mb_strlen($input) > 600) {
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
$input = substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600));
}
}
$input = self::_cleanSourceBracketsOffTranslation($input);
$input = str_replace("\t", " ", $input);
return $input;
// Remove newlines with ensuing spaces
while (strpos($input, PHP_EOL . " ") !== false) {
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
}
// Remove double newlines
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
}
return MD_STD_IN::sanitize_text($input);
}
$input = str_replace(PHP_EOL, '', $input);
if (empty($input)) return "";
// Remove infobox tables specifically
$firstParagraphPosition = strpos($input, '<p', 1);
$currentSearchPos = strpos($input, "<table>");
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
if (($tableEndPos = strpos($input, "</table>")) !== false) {
if (($pStartPos = strpos($input, '<p', $tableEndPos + 6)) !== false) {
$input = substr($input, $pStartPos);
}
}
}
// Remove leftover unnecessary paragraphs before actual content
$removeFirstParagraph = false;
$firstParagraphPosition = strpos($input, '<p', 1);
foreach (["</table>", "<img"] as $tagPart) {
$currentSearchPos = strpos($input, $tagPart);
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
$removeFirstParagraph = true;
break;
}
}
if ($removeFirstParagraph === true) {
$input = substr($input, $firstParagraphPosition ?: 0);
}
$input = str_replace('</p>', '</p>' . PHP_EOL . PHP_EOL . PHP_EOL, $input);
# $input = str_replace('?/i', '', $input);
$input = strip_tags($input);
# for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input);
$i = 0;
while (strpos($input, ".mw-parser-output") !== false and strpos($input, "}", strpos($input, ".mw-parser-output")) !== false) {
$part1 = substr($input, 0, strpos($input, ".mw-parser-output"));
$part2 = substr($input, strpos($input, "}", strpos($input, ".mw-parser-output")) + 1);
$input = $part1 . $part2;
++$i;
if ($i === 30) break;
}
$input = self::_cleanSourceBracketsOffTranslation($input);
$input = str_replace("\t", " ", $input);
// Remove double whitespaces
while (strpos($input, " ") !== false) {
$input = str_replace(" ", " ", $input);
}
// Remove newlines with ensuing spaces
while (strpos($input, PHP_EOL . " ") !== false) {
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
}
// Remove double newlines
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
}
$stableToRemove = [
"Vous pouvez partager vos connaissances en laméliorant (comment ?) selon les recommandations des projets correspondants.",
];
foreach ($stableToRemove as $tToRemove) $input = str_replace($tToRemove, "", $input);
$endings = [
"StubDenne artikel om et vandløb ",
];
foreach ($endings as $ending) {
if (strpos($input, $ending) !== false) $input = substr($input, 0, strpos($input, $ending));
}
$input = trim($input);
// Cut off overly long articles
if (mb_strlen($input) > 600) {
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
$input = trim(substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600)));
}
}
if (empty($input)) return '';
$input = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $input));
$input = html_entity_decode($input);
return MD_STD_IN::sanitize_text($input);
}
/**
* Wrapper around _cleanWikidataInput for testing.
*
* @param string $input Input string.
*
* @return string
*/
public static function cleanWikidataInput(string $input):string {
if (PHP_SAPI !== 'cli') throw new Exception("Use this function only for testing");
return self::_cleanWikidataInput($input);
}
/**
* Sets the retrieval mode.
*
@ -343,7 +593,7 @@ final class NodaWikidataFetcher {
if (isset($wikilink)) {
$languagesToFetch[$lang] = "https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm) . "&prop=text&section=0&format=json";
$languagesToFetch[$lang] = self::_getWikipediaApiLink($lang, $wikilinkterm);
$wikilinks[$lang] = $wikilink;
}
@ -379,6 +629,7 @@ final class NodaWikidataFetcher {
$output = [];
$descs = [];
foreach ($checkagainstLanguage as $lang) {
if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki']) && !empty($wikilinks[$lang])) {
@ -393,18 +644,22 @@ final class NodaWikidataFetcher {
if ($descFromWiki !== null) $tDescription = (string)$descFromWiki;
else $tDescription = "";
$tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')';
}
else {
$tDescription = "";
}
$output[$lang] = [
'label' => self::_cleanWikidataInput((string)$data['labels'][$lang]['value']),
'description' => self::_cleanWikidataInput($tDescription),
'link' => $wikilink,
];
if ($tDescription !== '') {
$descs[$lang] = $tDescription;
$desc_cleaned = self::_cleanWikidataInput($tDescription);
if ($desc_cleaned !== '') {
$output[$lang] = [
'label' => self::_cleanWikidataInput((string)$data['labels'][$lang]['value']),
'description' => '"' . $desc_cleaned . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')',
'link' => $wikilink,
];
}
}
}
// echo '<br><b style="color: cc0000;">Wikipedia Links fehlen</b>';
@ -417,6 +672,7 @@ final class NodaWikidataFetcher {
];
}
# print_r($descs);
}
@ -424,196 +680,6 @@ final class NodaWikidataFetcher {
}
/**
* Cleans contents parsed from Wikipedia.
*
* @param string $input Input string.
*
* @return string
*/
private static function _cleanWikidataInput(string $input):string {
if (substr($input, 0, strlen('<')) === '<') {
$doc = new DOMDocument();
$doc->loadXML($input);
$list = $doc->getElementsByTagName("style");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
$list = $doc->getElementsByTagName("table");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
$list = $doc->getElementsByTagName("div");
while ($list->length > 1) {
$p = $list->item(1);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
$list = $doc->getElementsByTagName("ol");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
if (($firstP = $doc->getElementsByTagName("p")->item(0)) !== null) {
if (($firstPhtml = $doc->saveHTML($firstP)) !== false) {
if (strpos($firstPhtml, 'geohack') !== false) {
if ($firstP->parentNode !== null) $firstP->parentNode->removeChild($firstP);
}
}
}
/*
if (strpos($doc->saveHTML(), 'Coordinates:') !== false) {
echo $doc->saveHTML();
exit;
}
*/
$input = str_replace(PHP_EOL, PHP_EOL . PHP_EOL, trim($doc->textContent));
if (mb_strlen($input) > 600) {
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
$input = substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600));
}
}
$bracketsToRemove = [];
for ($i = 0; $i < 100; $i++) {
$bracketsToRemove["[$i]"] = "";
}
$input = strtr($input, $bracketsToRemove);
$input = str_replace("\t", " ", $input);
// Remove newlines with ensuing spaces
while (strpos($input, PHP_EOL . " ") !== false) {
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
}
// Remove double newlines
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
}
return $input;
}
$input = str_replace(PHP_EOL, '', $input);
foreach (self::WIKIPEDIA_REMOVE_LITERALS as $tToRemove) $input = str_replace($tToRemove, "", $input);
$first_mention_of_paragraph = strpos($input, '<p>');
if ($first_mention_of_paragraph !== false) $input = substr($input, $first_mention_of_paragraph, (strrpos($input, '</p>') ?: strlen($input)) - $first_mention_of_paragraph);
// Remove infobox tables specifically
$removeFirstParagraph = false;
if (empty($input)) return "";
$firstParagraphPosition = strpos($input, '<p', 1);
$currentSearchPos = strpos($input, "<table>");
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
if (($tableEndPos = strpos($input, "</table>")) !== false) {
if (($pStartPos = strpos($input, '<p', $tableEndPos + 6)) !== false) {
$input = substr($input, $pStartPos);
}
}
}
// Remove leftover unnecessary paragraphs before actual content
$removeFirstParagraph = false;
$firstParagraphPosition = strpos($input, '<p', 1);
foreach (["</table>", "<img"] as $tagPart) {
$currentSearchPos = strpos($input, $tagPart);
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
$removeFirstParagraph = true;
break;
}
}
if ($removeFirstParagraph === true) {
$input = substr($input, $firstParagraphPosition ?: 0);
}
$input = str_replace('</p>', '</p>' . PHP_EOL . PHP_EOL . PHP_EOL, $input);
# $input = str_replace('?/i', '', $input);
$input = strip_tags($input);
# for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input);
$i = 0;
while (strpos($input, ".mw-parser-output") !== false and strpos($input, "}", strpos($input, ".mw-parser-output")) !== false) {
$part1 = substr($input, 0, strpos($input, ".mw-parser-output"));
$part2 = substr($input, strpos($input, "}", strpos($input, ".mw-parser-output")) + 1);
$input = $part1 . $part2;
++$i;
if ($i === 30) break;
}
$bracketsToRemove = [];
for ($i = 0; $i < 100; $i++) {
$bracketsToRemove["[$i]"] = "";
}
$input = strtr($input, $bracketsToRemove);
$input = str_replace("\t", " ", $input);
// Remove double whitespaces
while (strpos($input, " ") !== false) {
$input = str_replace(" ", " ", $input);
}
// Remove newlines with ensuing spaces
while (strpos($input, PHP_EOL . " ") !== false) {
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
}
// Remove double newlines
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
}
$stableToRemove = [
"Vous pouvez partager vos connaissances en laméliorant (comment ?) selon les recommandations des projets correspondants.",
];
foreach ($stableToRemove as $tToRemove) $input = str_replace($tToRemove, "", $input);
$endings = [
"StubDenne artikel om et vandløb ",
];
foreach ($endings as $ending) {
if (strpos($input, $ending) !== false) $input = substr($input, 0, strpos($input, $ending));
}
$input = trim($input);
// Cut off overly long articles
if (mb_strlen($input) > 600) {
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
$input = trim(substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600)));
}
}
if (empty($input)) return '';
$input = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $input));
$input = html_entity_decode($input);
return $input;
}
/**
* Function for fetching description from Wikipedia
*
@ -855,7 +921,7 @@ final class NodaWikidataFetcher {
if (isset($wikilink[$lang]) and isset($wikilinkterm[$lang]) and is_string($wikilinkterm[$lang])) {
$datafromwiki = MD_STD::runCurl("https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text&section=0&format=json", 10000);
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinkterm[$lang]), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
# Process data retrieved from wikipedia
@ -865,17 +931,17 @@ final class NodaWikidataFetcher {
}
foreach (self::LANGUAGES_MAIN_DESC as $sprache) {
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
if ($alreadyEntered === true) break;
if (!isset($wikilink[$sprache]) || !isset($wikilinkterm[$sprache]) || !is_string($wikilinkterm[$sprache])) continue;
if (!isset($wikilink[$cur_lang]) || !isset($wikilinkterm[$cur_lang]) || !is_string($wikilinkterm[$cur_lang])) continue;
$datafromwiki = MD_STD::runCurl("https://" . $sprache . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode((string)$wikilinkterm[$sprache]) . "&prop=text&section=0&format=json", 10000);
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinkterm[$cur_lang]), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
# Process data retrieved from wikipedia
if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) {
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $wikilink[$sprache], $lang, "$sprache", $erfasst_von);
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $wikilink[$cur_lang], $lang, "$cur_lang", $erfasst_von);
}
}
@ -940,33 +1006,54 @@ final class NodaWikidataFetcher {
}
/**
* Gets the current description of a place.
*
* @param integer $onum Place ID.
*
* @return string
*/
private function getPlaceDescription(int $onum):string {
$currentPlaceResult = $this->_mysqli_noda->query_by_stmt("SELECT `ort_anmerkung`
FROM `orte`
WHERE `ort_id` = ?", "i", $onum);
if (!($curPlaceInfo = $currentPlaceResult->fetch_row())) {
$currentPlaceResult->close();
throw new Exception("This place does not exist");
}
$currentPlaceResult->close();
return $curPlaceInfo[0];
}
/**
* Function for entering base information about a place from wikidata.
*
* @param mysqli_result $currentPlaceResult Mysqli result pointing to the current place.
* @param string $datafromwiki Data parsed from wikidata.
* @param array<mixed> $wikilink Wikilink.
* @param string $preflang Language of the user interface in general.
* @param string $lang Language of the main entry.
* @param integer $placeID ID of the place.
* @param string $erfasst_von User name.
* @param string $cur_place_desc Mysqli result pointing to the current place.
* @param string $datafromwiki Data parsed from wikidata.
* @param array<mixed> $wikilink Wikilink.
* @param string $preflang Language of the user interface in general.
* @param string $lang Language of the main entry.
* @param integer $placeID ID of the place.
* @param string $erfasst_von User name.
*
* @return boolean
*/
public function enterPlaceDescFromWikidata(mysqli_result $currentPlaceResult, string $datafromwiki, array $wikilink, string $preflang, string $lang, int $placeID, string $erfasst_von) {
public function enterPlaceDescFromWikidata(string $cur_place_desc, string $datafromwiki, array $wikilink, string $preflang, string $lang, int $placeID, string $erfasst_von):bool {
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date("d.m.Y") . ')';
if (!($curPlaceInfo = $currentPlaceResult->fetch_assoc())) return false;
if (!empty(trim($curPlaceInfo['ort_anmerkung'])) and substr($curPlaceInfo['ort_anmerkung'], 0, 3) !== 'GND') {
if (!empty(trim($cur_place_desc)) and substr($cur_place_desc, 0, 3) !== 'GND') {
switch ($this->_retrievalMode) {
case "add":
$datafromwiki = $curPlaceInfo['ort_anmerkung'] . PHP_EOL . PHP_EOL . $datafromwiki;
$datafromwiki = $cur_place_desc . PHP_EOL . PHP_EOL . $datafromwiki;
break;
case "keep":
$datafromwiki = $curPlaceInfo['ort_anmerkung'];
$datafromwiki = $cur_place_desc;
break;
case "replace":
break;
@ -977,7 +1064,7 @@ final class NodaWikidataFetcher {
echo '
<p class="alert icons iconsAlert">There is already an entry for description ...</p>
<div class="wikiReplaceTTile">
<h3>Actual entry</h3><p>' . nl2br($curPlaceInfo['ort_anmerkung']) . '</p>
<h3>Actual entry</h3><p>' . nl2br($cur_place_desc) . '</p>
</div>
<div class="wikiReplaceTTile">
<h3>Now found</h3>
@ -1102,10 +1189,9 @@ final class NodaWikidataFetcher {
if (isset($data['sitelinks'][$tLang . 'wiki']['title'])) $wikilinkterm[$tLang] = str_replace(' ', '_', $data['sitelinks'][$tLang . 'wiki']['title']);
}
$currentPlaceResult = $this->_mysqli_noda->query_by_stmt("SELECT `ort_anmerkung`
FROM `orte`
WHERE `ort_id` = ?", "i", $onum);
// Get current description for overwriting
$cur_place_desc = $this->getPlaceDescription($onum);
$alreadyEntered = false;
// P131: Located in administrative unit
@ -1115,29 +1201,27 @@ final class NodaWikidataFetcher {
if (!empty($wikilink[$lang])) {
$datafromwiki = MD_STD::runCurl("https://" . urlencode($lang) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text&section=0&format=json", 10000);
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinkterm[$lang]), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
$alreadyEntered = $this->enterPlaceDescFromWikidata($currentPlaceResult, $datafromwiki, $wikilink, $lang, $lang, $onum, $erfasst_von);
$alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $wikilink, $lang, $lang, $onum, $erfasst_von);
}
}
foreach (self::LANGUAGES_MAIN_DESC as $sprache) {
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
//if ($alreadyEntered === true) break;
if ($alreadyEntered === true) break;
if (!isset($wikilink[$sprache])) continue;
if (!isset($wikilink[$cur_lang])) continue;
$datafromwiki = MD_STD::runCurl("https://" . urlencode($sprache) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$sprache]) . "&prop=text&section=0&format=json", 10000);
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinkterm[$cur_lang]), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
$alreadyEntered = $this->enterPlaceDescFromWikidata($currentPlaceResult, $datafromwiki, $wikilink, $lang, $sprache, $onum, $erfasst_von);
$alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $wikilink, $lang, $cur_lang, $onum, $erfasst_von);
}
}
$currentPlaceResult->close();
unset($currentPlaceResult);
if (isset($data['claims']['P1566'])) $geonames_id = filter_var($data['claims']['P1566'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT);
if (isset($data['claims']['P1667'])) $tgn_id = filter_var($data['claims']['P1667'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT);
@ -1381,7 +1465,7 @@ final class NodaWikidataFetcher {
if (isset($wikilink[$lang]) and isset($wikilinkterm[$lang]) and is_string($wikilinkterm[$lang])) {
$datafromwiki = MD_STD::runCurl("https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text&section=0&format=json", 10000);
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinkterm[$lang]), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
# Process data retrieved from wikipedia
@ -1391,17 +1475,17 @@ final class NodaWikidataFetcher {
}
foreach (self::LANGUAGES_MAIN_DESC as $sprache) {
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
if ($alreadyEntered === true) break;
if (!isset($wikilink[$sprache]) || !isset($wikilinkterm[$sprache]) || !is_string($wikilinkterm[$sprache])) continue;
if (!isset($wikilink[$cur_lang]) || !isset($wikilinkterm[$cur_lang]) || !is_string($wikilinkterm[$cur_lang])) continue;
$datafromwiki = MD_STD::runCurl("https://" . $sprache . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode((string)$wikilinkterm[$sprache]) . "&prop=text&section=0&format=json", 10000);
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinkterm[$cur_lang]), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
# Process data retrieved from wikipedia
if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) {
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $wikilink[$sprache], $lang, "$sprache", $erfasst_von);
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $wikilink[$cur_lang], $lang, $cur_lang, $erfasst_von);
}
}
@ -1724,13 +1808,13 @@ final class NodaWikidataFetcher {
public static function generateWikidataFetcherHeader(MDTlLoader $tlLoader, string $additional = "", string $searchTerm = ""):string {
if (empty($searchTerm) and !empty($_GET['suchbegriff'])) {
$searchTerm = $_GET['suchbegriff'];
$searchTerm = (string)$_GET['suchbegriff'];
}
$output = '
<header>
<h1><img src="../img/wikidata.png" alt="Logo: Wikidata" />' . $tlLoader->tl("wiki", "wiki", "fetch_from_wikidata");
$output .= ': ' . $searchTerm;
$output .= ': ' . htmlspecialchars($searchTerm);
$output .= '</h1>';
$output .= $additional;
$output .= '</header>';