Improve NodaWikidataFetcher's loading of descriptions

Close #15
2023-08-31 15:38:12 +02:00
parent 869e0f263d
commit 107a4cd640
2 changed files with 538 additions and 235 deletions
--- a/src/NodaWikidataFetcher.php
+++ b/src/NodaWikidataFetcher.php
@@ -98,6 +98,256 @@ final class NodaWikidataFetcher {
    /** @var MDMysqli */
    private MDMysqli $_mysqli_noda;

+    /**
+     * Returns the API link to Wikipedia's API for getting information on a page.
+     *
+     * @param string $lang       Language / wikipedia version to fetch.
+     * @param string $searchTerm Search term.
+     *
+     * @return non-empty-string
+     */
+    private static function _getWikipediaApiLink(string $lang, string $searchTerm):string {
+
+        return "https://" . urlencode($lang) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($searchTerm) . "&prop=text&section=0&format=json";
+
+    }
+
+    /**
+     * Cleans basic tags off Wikidata input.
+     *
+     * @param string $input Input string.
+     *
+     * @return string
+     */
+    private static function _cleanWikidataInputHtml(string $input):string {
+
+        // Clean off anything before first <p>
+        if ($pStartPos = strpos($input, '<p')) {
+            $input = substr($input, $pStartPos);
+        }
+        if ($pEndPos = strrpos($input, '</p>')) {
+            $input = substr($input, 0, $pEndPos + 4);
+        }
+
+        $doc = new DOMDocument();
+        try {
+            $doc->loadXML('<section>' . trim($input) . '</section>');
+        }
+        catch (Exception $e) {
+            throw new Exception("Failed to load DOMDocument." . PHP_EOL . $e->getMessage() . PHP_EOL . PHP_EOL . $input);
+        }
+
+        $list = $doc->getElementsByTagName("style");
+        while ($list->length > 0) {
+            $p = $list->item(0);
+            if ($p === null || $p->parentNode === null) break;
+            $p->parentNode->removeChild($p);
+        }
+
+        $list = $doc->getElementsByTagName("table");
+        while ($list->length > 0) {
+            $p = $list->item(0);
+            if ($p === null || $p->parentNode === null) break;
+            $p->parentNode->removeChild($p);
+        }
+
+        $list = $doc->getElementsByTagName("ol");
+        while ($list->length > 0) {
+            $p = $list->item(0);
+            if ($p === null || $p->parentNode === null) break;
+            $p->parentNode->removeChild($p);
+        }
+
+        if (($firstP = $doc->getElementsByTagName("p")->item(0)) !== null) {
+            if (($firstPhtml = $doc->saveHTML($firstP)) !== false) {
+                if (strpos($firstPhtml, 'geohack') !== false) {
+                    if ($firstP->parentNode !== null) $firstP->parentNode->removeChild($firstP);
+                }
+            }
+        }
+
+        $output = [];
+        foreach ($doc->getElementsByTagName("p") as $p) {
+            $output[] = trim($p->textContent);
+        }
+
+        /*
+        if (strpos($doc->saveHTML(), 'Coordinates:') !== false) {
+            echo $doc->saveHTML();
+            exit;
+        }
+         */
+        return str_replace(PHP_EOL, PHP_EOL . PHP_EOL, trim(implode(PHP_EOL, $output)));
+
+    }
+
+    /**
+     * Cleans brackets ([1], [2]) off description text.
+     *
+     * @param string $input Input string.
+     *
+     * @return string
+     */
+    private static function _cleanSourceBracketsOffTranslation(string $input):string {
+
+        $bracketsToRemove = [];
+        for ($i = 0; $i < 100; $i++) {
+            $bracketsToRemove["[$i]"] = "";
+        }
+        return strtr($input, $bracketsToRemove);
+
+    }
+
+    /**
+     * Cleans contents parsed from Wikipedia.
+     *
+     * @param string $input Input string.
+     *
+     * @return string
+     */
+    private static function _cleanWikidataInput(string $input):string {
+
+        $input = trim($input, '"');
+        foreach (self::WIKIPEDIA_REMOVE_LITERALS as $tToRemove) $input = str_replace($tToRemove, "", $input);
+
+        if (substr($input, 0, strlen('<')) === '<') {
+
+            $input = self::_cleanWikidataInputHtml($input);
+
+            if (mb_strlen($input) > 600) {
+                if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
+                    $input = substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600));
+                }
+            }
+
+            $input = self::_cleanSourceBracketsOffTranslation($input);
+
+            $input = str_replace("\t", " ", $input);
+
+            return $input;
+            // Remove newlines with ensuing spaces
+            while (strpos($input, PHP_EOL . " ") !== false) {
+                $input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
+            }
+
+            // Remove double newlines
+            while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
+                $input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
+            }
+            return MD_STD_IN::sanitize_text($input);
+
+        }
+
+        $input = str_replace(PHP_EOL, '', $input);
+
+        if (empty($input)) return "";
+
+        // Remove infobox tables specifically
+        $firstParagraphPosition = strpos($input, '<p', 1);
+        $currentSearchPos = strpos($input, "<table>");
+        if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
+            if (($tableEndPos = strpos($input, "</table>")) !== false) {
+                if (($pStartPos = strpos($input, '<p', $tableEndPos + 6)) !== false) {
+                    $input = substr($input, $pStartPos);
+                }
+            }
+        }
+
+        // Remove leftover unnecessary paragraphs before actual content
+
+        $removeFirstParagraph = false;
+        $firstParagraphPosition = strpos($input, '<p', 1);
+
+        foreach (["</table>", "<img"] as $tagPart) {
+            $currentSearchPos = strpos($input, $tagPart);
+            if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
+                $removeFirstParagraph = true;
+                break;
+            }
+        }
+
+        if ($removeFirstParagraph === true) {
+            $input = substr($input, $firstParagraphPosition ?: 0);
+        }
+
+        $input = str_replace('</p>', '</p>' . PHP_EOL . PHP_EOL . PHP_EOL, $input);
+        # $input = str_replace('?/i', '', $input);
+        $input = strip_tags($input);
+
+        # for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input);
+        $i = 0;
+        while (strpos($input, ".mw-parser-output") !== false and strpos($input, "}", strpos($input, ".mw-parser-output")) !== false) {
+            $part1 = substr($input, 0, strpos($input, ".mw-parser-output"));
+            $part2 = substr($input, strpos($input, "}", strpos($input, ".mw-parser-output")) + 1);
+            $input = $part1 . $part2;
+            ++$i;
+            if ($i === 30) break;
+        }
+
+        $input = self::_cleanSourceBracketsOffTranslation($input);
+
+        $input = str_replace("\t", " ", $input);
+
+        // Remove double whitespaces
+        while (strpos($input, "  ") !== false) {
+            $input = str_replace("  ", " ", $input);
+        }
+
+        // Remove newlines with ensuing spaces
+        while (strpos($input, PHP_EOL . " ") !== false) {
+            $input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
+        }
+
+        // Remove double newlines
+        while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
+            $input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
+        }
+
+        $stableToRemove = [
+            "Vous pouvez partager vos connaissances en l’améliorant (comment ?) selon les recommandations des projets correspondants.",
+        ];
+        foreach ($stableToRemove as $tToRemove) $input = str_replace($tToRemove, "", $input);
+
+        $endings = [
+            "StubDenne artikel om et vandløb ",
+        ];
+        foreach ($endings as $ending) {
+            if (strpos($input, $ending) !== false) $input = substr($input, 0, strpos($input, $ending));
+        }
+
+        $input = trim($input);
+
+        // Cut off overly long articles
+        if (mb_strlen($input) > 600) {
+            if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
+                $input = trim(substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600)));
+            }
+        }
+
+        if (empty($input)) return '';
+
+        $input = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $input));
+
+        $input = html_entity_decode($input);
+
+        return MD_STD_IN::sanitize_text($input);
+
+    }
+
+    /**
+     * Wrapper around _cleanWikidataInput for testing.
+     *
+     * @param string $input Input string.
+     *
+     * @return string
+     */
+    public static function cleanWikidataInput(string $input):string {
+
+        if (PHP_SAPI !== 'cli') throw new Exception("Use this function only for testing");
+        return self::_cleanWikidataInput($input);
+
+    }
+
    /**
     * Sets the retrieval mode.
     *
@@ -343,7 +593,7 @@ final class NodaWikidataFetcher {

                if (isset($wikilink)) {

-                    $languagesToFetch[$lang] = "https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm) . "&prop=text&section=0&format=json";
+                    $languagesToFetch[$lang] = self::_getWikipediaApiLink($lang, $wikilinkterm);
                    $wikilinks[$lang] = $wikilink;

                }
@@ -379,6 +629,7 @@ final class NodaWikidataFetcher {

        $output = [];

+        $descs = [];
        foreach ($checkagainstLanguage as $lang) {

            if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki']) && !empty($wikilinks[$lang])) {
@@ -393,18 +644,22 @@ final class NodaWikidataFetcher {
                    if ($descFromWiki !== null) $tDescription = (string)$descFromWiki;
                    else $tDescription = "";

-                    $tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')';
-
                }
                else {
                    $tDescription = "";
                }

-                $output[$lang] = [
-                    'label' => self::_cleanWikidataInput((string)$data['labels'][$lang]['value']),
-                    'description' => self::_cleanWikidataInput($tDescription),
-                    'link' => $wikilink,
-                ];
+                if ($tDescription !== '') {
+                    $descs[$lang] = $tDescription;
+                    $desc_cleaned = self::_cleanWikidataInput($tDescription);
+                    if ($desc_cleaned !== '') {
+                        $output[$lang] = [
+                            'label' => self::_cleanWikidataInput((string)$data['labels'][$lang]['value']),
+                            'description' => '"' . $desc_cleaned . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')',
+                            'link' => $wikilink,
+                        ];
+                    }
+                }

            }
                // echo '<br><b style="color: cc0000;">Wikipedia Links fehlen</b>';
@@ -417,6 +672,7 @@ final class NodaWikidataFetcher {
                ];

            }
+            # print_r($descs);

        }

@@ -424,196 +680,6 @@ final class NodaWikidataFetcher {

    }

-    /**
-     * Cleans contents parsed from Wikipedia.
-     *
-     * @param string $input Input string.
-     *
-     * @return string
-     */
-    private static function _cleanWikidataInput(string $input):string {
-
-        if (substr($input, 0, strlen('<')) === '<') {
-            $doc = new DOMDocument();
-            $doc->loadXML($input);
-
-            $list = $doc->getElementsByTagName("style");
-            while ($list->length > 0) {
-                $p = $list->item(0);
-                if ($p === null || $p->parentNode === null) break;
-                $p->parentNode->removeChild($p);
-            }
-
-            $list = $doc->getElementsByTagName("table");
-            while ($list->length > 0) {
-                $p = $list->item(0);
-                if ($p === null || $p->parentNode === null) break;
-                $p->parentNode->removeChild($p);
-            }
-
-            $list = $doc->getElementsByTagName("div");
-            while ($list->length > 1) {
-                $p = $list->item(1);
-                if ($p === null || $p->parentNode === null) break;
-                $p->parentNode->removeChild($p);
-            }
-
-            $list = $doc->getElementsByTagName("ol");
-            while ($list->length > 0) {
-                $p = $list->item(0);
-                if ($p === null || $p->parentNode === null) break;
-                $p->parentNode->removeChild($p);
-            }
-
-            if (($firstP = $doc->getElementsByTagName("p")->item(0)) !== null) {
-                if (($firstPhtml = $doc->saveHTML($firstP)) !== false) {
-                    if (strpos($firstPhtml, 'geohack') !== false) {
-                        if ($firstP->parentNode !== null) $firstP->parentNode->removeChild($firstP);
-                    }
-                }
-            }
-
-            /*
-            if (strpos($doc->saveHTML(), 'Coordinates:') !== false) {
-                echo $doc->saveHTML();
-                exit;
-            }
-             */
-
-            $input = str_replace(PHP_EOL, PHP_EOL . PHP_EOL, trim($doc->textContent));
-
-            if (mb_strlen($input) > 600) {
-                if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
-                    $input = substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600));
-                }
-            }
-
-            $bracketsToRemove = [];
-            for ($i = 0; $i < 100; $i++) {
-                $bracketsToRemove["[$i]"] = "";
-            }
-            $input = strtr($input, $bracketsToRemove);
-
-            $input = str_replace("\t", " ", $input);
-
-            // Remove newlines with ensuing spaces
-            while (strpos($input, PHP_EOL . " ") !== false) {
-                $input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
-            }
-
-            // Remove double newlines
-            while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
-                $input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
-            }
-            return $input;
-
-        }
-
-        $input = str_replace(PHP_EOL, '', $input);
-
-        foreach (self::WIKIPEDIA_REMOVE_LITERALS as $tToRemove) $input = str_replace($tToRemove, "", $input);
-
-        $first_mention_of_paragraph = strpos($input, '<p>');
-        if ($first_mention_of_paragraph !== false) $input = substr($input, $first_mention_of_paragraph, (strrpos($input, '</p>') ?: strlen($input)) - $first_mention_of_paragraph);
-
-        // Remove infobox tables specifically
-        $removeFirstParagraph = false;
-        if (empty($input)) return "";
-        $firstParagraphPosition = strpos($input, '<p', 1);
-        $currentSearchPos = strpos($input, "<table>");
-        if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
-            if (($tableEndPos = strpos($input, "</table>")) !== false) {
-                if (($pStartPos = strpos($input, '<p', $tableEndPos + 6)) !== false) {
-                    $input = substr($input, $pStartPos);
-                }
-            }
-        }
-
-        // Remove leftover unnecessary paragraphs before actual content
-
-        $removeFirstParagraph = false;
-        $firstParagraphPosition = strpos($input, '<p', 1);
-
-        foreach (["</table>", "<img"] as $tagPart) {
-            $currentSearchPos = strpos($input, $tagPart);
-            if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
-                $removeFirstParagraph = true;
-                break;
-            }
-        }
-
-        if ($removeFirstParagraph === true) {
-            $input = substr($input, $firstParagraphPosition ?: 0);
-        }
-
-        $input = str_replace('</p>', '</p>' . PHP_EOL . PHP_EOL . PHP_EOL, $input);
-        # $input = str_replace('?/i', '', $input);
-        $input = strip_tags($input);
-
-        # for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input);
-        $i = 0;
-        while (strpos($input, ".mw-parser-output") !== false and strpos($input, "}", strpos($input, ".mw-parser-output")) !== false) {
-            $part1 = substr($input, 0, strpos($input, ".mw-parser-output"));
-            $part2 = substr($input, strpos($input, "}", strpos($input, ".mw-parser-output")) + 1);
-            $input = $part1 . $part2;
-            ++$i;
-            if ($i === 30) break;
-        }
-
-        $bracketsToRemove = [];
-        for ($i = 0; $i < 100; $i++) {
-            $bracketsToRemove["[$i]"] = "";
-        }
-        $input = strtr($input, $bracketsToRemove);
-
-        $input = str_replace("\t", " ", $input);
-
-        // Remove double whitespaces
-        while (strpos($input, "  ") !== false) {
-            $input = str_replace("  ", " ", $input);
-        }
-
-        // Remove newlines with ensuing spaces
-        while (strpos($input, PHP_EOL . " ") !== false) {
-            $input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
-        }
-
-        // Remove double newlines
-        while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
-            $input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
-        }
-
-        $stableToRemove = [
-            "Vous pouvez partager vos connaissances en l’améliorant (comment ?) selon les recommandations des projets correspondants.",
-        ];
-        foreach ($stableToRemove as $tToRemove) $input = str_replace($tToRemove, "", $input);
-
-        $endings = [
-            "StubDenne artikel om et vandløb ",
-        ];
-        foreach ($endings as $ending) {
-            if (strpos($input, $ending) !== false) $input = substr($input, 0, strpos($input, $ending));
-        }
-
-        $input = trim($input);
-
-        // Cut off overly long articles
-        if (mb_strlen($input) > 600) {
-            if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
-                $input = trim(substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600)));
-            }
-        }
-
-        if (empty($input)) return '';
-
-        $input = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $input));
-
-        $input = html_entity_decode($input);
-
-        return $input;
-
-    }
-
    /**
     * Function for fetching description from Wikipedia
     *
@@ -855,7 +921,7 @@ final class NodaWikidataFetcher {

        if (isset($wikilink[$lang]) and isset($wikilinkterm[$lang]) and is_string($wikilinkterm[$lang])) {

-            $datafromwiki = MD_STD::runCurl("https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text&section=0&format=json", 10000);
+            $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinkterm[$lang]), 10000);
            $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];

            # Process data retrieved from wikipedia
@@ -865,17 +931,17 @@ final class NodaWikidataFetcher {

        }

-        foreach (self::LANGUAGES_MAIN_DESC as $sprache) {
+        foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {

            if ($alreadyEntered === true) break;
-            if (!isset($wikilink[$sprache]) || !isset($wikilinkterm[$sprache]) || !is_string($wikilinkterm[$sprache])) continue;
+            if (!isset($wikilink[$cur_lang]) || !isset($wikilinkterm[$cur_lang]) || !is_string($wikilinkterm[$cur_lang])) continue;

-            $datafromwiki = MD_STD::runCurl("https://" . $sprache . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode((string)$wikilinkterm[$sprache]) . "&prop=text&section=0&format=json", 10000);
+            $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinkterm[$cur_lang]), 10000);
            $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];

            # Process data retrieved from wikipedia
            if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) {
-                $alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $wikilink[$sprache], $lang, "$sprache", $erfasst_von);
+                $alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $wikilink[$cur_lang], $lang, "$cur_lang", $erfasst_von);
            }

        }
@@ -940,33 +1006,54 @@ final class NodaWikidataFetcher {

    }

+    /**
+     * Gets the current description of a place.
+     *
+     * @param integer $onum Place ID.
+     *
+     * @return string
+     */
+    private function getPlaceDescription(int $onum):string {
+
+        $currentPlaceResult = $this->_mysqli_noda->query_by_stmt("SELECT `ort_anmerkung`
+            FROM `orte`
+            WHERE `ort_id` = ?", "i", $onum);
+
+        if (!($curPlaceInfo = $currentPlaceResult->fetch_row())) {
+            $currentPlaceResult->close();
+            throw new Exception("This place does not exist");
+        }
+        $currentPlaceResult->close();
+
+        return $curPlaceInfo[0];
+
+    }
+
    /**
     * Function for entering base information about a place from wikidata.
     *
-     * @param mysqli_result $currentPlaceResult Mysqli result pointing to the current place.
-     * @param string        $datafromwiki       Data parsed from wikidata.
-     * @param array<mixed>  $wikilink           Wikilink.
-     * @param string        $preflang           Language of the user interface in general.
-     * @param string        $lang               Language of the main entry.
-     * @param integer       $placeID            ID of the place.
-     * @param string        $erfasst_von        User name.
+     * @param string       $cur_place_desc Mysqli result pointing to the current place.
+     * @param string       $datafromwiki   Data parsed from wikidata.
+     * @param array<mixed> $wikilink       Wikilink.
+     * @param string       $preflang       Language of the user interface in general.
+     * @param string       $lang           Language of the main entry.
+     * @param integer      $placeID        ID of the place.
+     * @param string       $erfasst_von    User name.
     *
     * @return boolean
     */
-    public function enterPlaceDescFromWikidata(mysqli_result $currentPlaceResult, string $datafromwiki, array $wikilink, string $preflang, string $lang, int $placeID, string $erfasst_von) {
+    public function enterPlaceDescFromWikidata(string $cur_place_desc, string $datafromwiki, array $wikilink, string $preflang, string $lang, int $placeID, string $erfasst_von):bool {

        $datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date("d.m.Y") . ')';

-        if (!($curPlaceInfo = $currentPlaceResult->fetch_assoc())) return false;
-
-        if (!empty(trim($curPlaceInfo['ort_anmerkung'])) and substr($curPlaceInfo['ort_anmerkung'], 0, 3) !== 'GND') {
+        if (!empty(trim($cur_place_desc)) and substr($cur_place_desc, 0, 3) !== 'GND') {

            switch ($this->_retrievalMode) {
            case "add":
-                $datafromwiki = $curPlaceInfo['ort_anmerkung'] . PHP_EOL . PHP_EOL . $datafromwiki;
+                $datafromwiki = $cur_place_desc . PHP_EOL . PHP_EOL . $datafromwiki;
                break;
            case "keep":
-                $datafromwiki = $curPlaceInfo['ort_anmerkung'];
+                $datafromwiki = $cur_place_desc;
                break;
            case "replace":
                break;
@@ -977,7 +1064,7 @@ final class NodaWikidataFetcher {
                echo '
                <p class="alert icons iconsAlert">There is already an entry for description ...</p>
                <div class="wikiReplaceTTile">
-                    <h3>Actual entry</h3><p>' . nl2br($curPlaceInfo['ort_anmerkung']) . '</p>
+                    <h3>Actual entry</h3><p>' . nl2br($cur_place_desc) . '</p>
                </div>
                <div class="wikiReplaceTTile">
                    <h3>Now found</h3>
@@ -1102,10 +1189,9 @@ final class NodaWikidataFetcher {
            if (isset($data['sitelinks'][$tLang . 'wiki']['title'])) $wikilinkterm[$tLang] = str_replace(' ', '_', $data['sitelinks'][$tLang . 'wiki']['title']);
        }

-        $currentPlaceResult = $this->_mysqli_noda->query_by_stmt("SELECT `ort_anmerkung`
-            FROM `orte`
-            WHERE `ort_id` = ?", "i", $onum);
+        // Get current description for overwriting

+        $cur_place_desc = $this->getPlaceDescription($onum);
        $alreadyEntered = false;

        // P131: Located in administrative unit
@@ -1115,29 +1201,27 @@ final class NodaWikidataFetcher {

        if (!empty($wikilink[$lang])) {

-            $datafromwiki = MD_STD::runCurl("https://" . urlencode($lang) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text&section=0&format=json", 10000);
+            $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinkterm[$lang]), 10000);
            $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];

            if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
-                $alreadyEntered = $this->enterPlaceDescFromWikidata($currentPlaceResult, $datafromwiki, $wikilink, $lang, $lang, $onum, $erfasst_von);
+                $alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $wikilink, $lang, $lang, $onum, $erfasst_von);
            }
        }

-        foreach (self::LANGUAGES_MAIN_DESC as $sprache) {
+        foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {

            //if ($alreadyEntered === true) break;
            if ($alreadyEntered === true) break;
-            if (!isset($wikilink[$sprache])) continue;
+            if (!isset($wikilink[$cur_lang])) continue;

-            $datafromwiki = MD_STD::runCurl("https://" . urlencode($sprache) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$sprache]) . "&prop=text&section=0&format=json", 10000);
+            $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinkterm[$cur_lang]), 10000);
            $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
            if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
-                $alreadyEntered = $this->enterPlaceDescFromWikidata($currentPlaceResult, $datafromwiki, $wikilink, $lang, $sprache, $onum, $erfasst_von);
+                $alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $wikilink, $lang, $cur_lang, $onum, $erfasst_von);
            }

        }
-        $currentPlaceResult->close();
-        unset($currentPlaceResult);

        if (isset($data['claims']['P1566'])) $geonames_id = filter_var($data['claims']['P1566'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT);
        if (isset($data['claims']['P1667'])) $tgn_id = filter_var($data['claims']['P1667'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT);
@@ -1381,7 +1465,7 @@ final class NodaWikidataFetcher {

        if (isset($wikilink[$lang]) and isset($wikilinkterm[$lang]) and is_string($wikilinkterm[$lang])) {

-            $datafromwiki = MD_STD::runCurl("https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text&section=0&format=json", 10000);
+            $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinkterm[$lang]), 10000);
            $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];

            # Process data retrieved from wikipedia
@@ -1391,17 +1475,17 @@ final class NodaWikidataFetcher {

        }

-        foreach (self::LANGUAGES_MAIN_DESC as $sprache) {
+        foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {

            if ($alreadyEntered === true) break;
-            if (!isset($wikilink[$sprache]) || !isset($wikilinkterm[$sprache]) || !is_string($wikilinkterm[$sprache])) continue;
+            if (!isset($wikilink[$cur_lang]) || !isset($wikilinkterm[$cur_lang]) || !is_string($wikilinkterm[$cur_lang])) continue;

-            $datafromwiki = MD_STD::runCurl("https://" . $sprache . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode((string)$wikilinkterm[$sprache]) . "&prop=text&section=0&format=json", 10000);
+            $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinkterm[$cur_lang]), 10000);
            $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];

            # Process data retrieved from wikipedia
            if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) {
-                $alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $wikilink[$sprache], $lang, "$sprache", $erfasst_von);
+                $alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $wikilink[$cur_lang], $lang, $cur_lang, $erfasst_von);
            }

        }
@@ -1724,13 +1808,13 @@ final class NodaWikidataFetcher {
    public static function generateWikidataFetcherHeader(MDTlLoader $tlLoader, string $additional = "", string $searchTerm = ""):string {

        if (empty($searchTerm) and !empty($_GET['suchbegriff'])) {
-            $searchTerm = $_GET['suchbegriff'];
+            $searchTerm = (string)$_GET['suchbegriff'];
        }

        $output = '
        <header>
            <h1><img src="../img/wikidata.png" alt="Logo: Wikidata" />' . $tlLoader->tl("wiki", "wiki", "fetch_from_wikidata");
-        $output .= ': ' . $searchTerm;
+        $output .= ': ' . htmlspecialchars($searchTerm);
        $output .= '</h1>';
        $output .= $additional;
        $output .= '</header>';