Use Wikipedia API for getting descriptions from Wikipedia rather than
parsing HTML in Wikidata fetcher Thanks @awinkler
This commit is contained in:
		| @@ -43,25 +43,6 @@ final class NodaWikidataFetcher { | ||||
|         "orcid" => "P496", | ||||
|     ]; | ||||
|  | ||||
|     private const WIKIPEDIA_REMOVE_LITERALS = [ | ||||
|         "<p>Si vous disposez d'ouvrages ou d'articles de référence ou si vous ", | ||||
|         '<p><b>En pratique :</b> <a href="/wiki/Wikip%C3%A9dia:Citez_vos_sources#Qualité_des_sources" title="Wikipédia:Citez vos sources">Quelles sources sont attendu', | ||||
|         '<pVous pouvez partager vos connaissances en l’améliorant (', | ||||
|         '<p class="mw-empty-elt">', | ||||
|         '<p><small>Géolocalisation sur la carte', | ||||
|         '<p><b>Koordinaatit:</b>', | ||||
|         '<p><span class="executeJS" data-gadgetname="ImgToggle"></span', | ||||
|         '<p><span class="imgtoggleboxTitle">', | ||||
|         //'<div class="mw-parser-output"><p>', | ||||
|         '<p><span style="font-size: small;"><span id="coordinates">', | ||||
|         '<p><span></span></p>', | ||||
|         '<p><a rel="nofollow" class="external text" href="https://maps.gs', | ||||
|         '<p><span class="plainlinks nourlexpansion"><a class="external text" href="//tools.wmflabs.org/geohack/geohack.php?langu', | ||||
|         '<p><span style="display:none">', | ||||
|         '<p> </p>', | ||||
|         '<p><span class="geo noexcerpt"', | ||||
|     ]; | ||||
|  | ||||
|     public const RETRIEVAL_MODES_ACCEPTED = [ | ||||
|         'list', | ||||
|         'add', | ||||
| @@ -87,7 +68,8 @@ final class NodaWikidataFetcher { | ||||
|      */ | ||||
|     private static function _getWikipediaApiLink(string $lang, string $searchTerm):string { | ||||
|  | ||||
|         return "https://" . urlencode($lang) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($searchTerm) . "&prop=text§ion=0&format=json"; | ||||
|         return "https://" . urlencode($lang) . ".wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro&explaintext&redirects=1&titles=" . urlencode($searchTerm); | ||||
|         # w/api.php?action=parse&page=" . urlencode($searchTerm) . "&prop=text§ion=0&format=json"; | ||||
|  | ||||
|     } | ||||
|  | ||||
| @@ -152,12 +134,13 @@ final class NodaWikidataFetcher { | ||||
|  | ||||
|         $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $title), 10000); | ||||
|         $json_decoded = json_decode($datafromwiki, true); | ||||
|         if (empty($json_decoded) || !isset($json_decoded['parse'])) { | ||||
|         if (empty($json_decoded) || !isset($json_decoded['query']) || empty($json_decoded['query']['pages'])) { | ||||
|             return ''; | ||||
|         } | ||||
|         $datafromwiki = strval($json_decoded['parse']['text']['*']); | ||||
|         $firstPageId = array_keys($json_decoded['query']['pages'])[0]; | ||||
|         $datafromwiki = strval($json_decoded['query']['pages'][$firstPageId]['extract']); | ||||
|  | ||||
|         return self::_cleanWikidataInput($datafromwiki); | ||||
|         return self::_cleanInputSimple($datafromwiki); | ||||
|  | ||||
|     } | ||||
|  | ||||
| @@ -276,239 +259,21 @@ final class NodaWikidataFetcher { | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Cleans basic tags off Wikidata input. | ||||
|      * Cleans remaining HTML elements and leading, trailing whitespaces. | ||||
|      * | ||||
|      * @param string $input Input string. | ||||
|      * | ||||
|      * @return string | ||||
|      */ | ||||
|     private static function _cleanWikidataInputHtml(string $input):string { | ||||
|     private static function _cleanInputSimple(string $input):string { | ||||
|  | ||||
|         // Clean off anything before first <p> | ||||
|         if ($pStartPos = strpos($input, '<p')) { | ||||
|             $input = substr($input, $pStartPos); | ||||
|         } | ||||
|         if ($pEndPos = strrpos($input, '</p>')) { | ||||
|             $input = substr($input, 0, $pEndPos + 4); | ||||
|         } | ||||
|  | ||||
|         $doc = new DOMDocument(); | ||||
|         try { | ||||
|             libxml_use_internal_errors(true); | ||||
|             $doc->loadXML('<section>' . trim($input) . '</section>'); | ||||
|             libxml_use_internal_errors(false); | ||||
|         } | ||||
|         catch (Exception $e) { | ||||
|             throw new Exception("Failed to load DOMDocument." . PHP_EOL . $e->getMessage() . PHP_EOL . PHP_EOL . '---' . $input . '---'); | ||||
|         } | ||||
|  | ||||
|         $list = $doc->getElementsByTagName("style"); | ||||
|         while ($list->length > 0) { | ||||
|             $p = $list->item(0); | ||||
|             if ($p === null || $p->parentNode === null) break; | ||||
|             $p->parentNode->removeChild($p); | ||||
|         } | ||||
|  | ||||
|         $list = $doc->getElementsByTagName("table"); | ||||
|         while ($list->length > 0) { | ||||
|             $p = $list->item(0); | ||||
|             if ($p === null || $p->parentNode === null) break; | ||||
|             $p->parentNode->removeChild($p); | ||||
|         } | ||||
|  | ||||
|         $list = $doc->getElementsByTagName("ol"); | ||||
|         while ($list->length > 0) { | ||||
|             $p = $list->item(0); | ||||
|             if ($p === null || $p->parentNode === null) break; | ||||
|             $p->parentNode->removeChild($p); | ||||
|         } | ||||
|  | ||||
|         if (($firstP = $doc->getElementsByTagName("p")->item(0)) !== null) { | ||||
|             if (($firstPhtml = $doc->saveHTML($firstP)) !== false) { | ||||
|                 if (strpos($firstPhtml, 'geohack') !== false) { | ||||
|                     if ($firstP->parentNode !== null) $firstP->parentNode->removeChild($firstP); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         $output = []; | ||||
|         foreach ($doc->getElementsByTagName("p") as $p) { | ||||
|             $output[] = trim($p->textContent); | ||||
|         } | ||||
|  | ||||
|         /* | ||||
|         if (strpos($doc->saveHTML(), 'Coordinates:') !== false) { | ||||
|             echo $doc->saveHTML(); | ||||
|             exit; | ||||
|         } | ||||
|          */ | ||||
|         return str_replace(PHP_EOL, PHP_EOL . PHP_EOL, trim(implode(PHP_EOL, $output))); | ||||
|  | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Cleans brackets ([1], [2]) off description text. | ||||
|      * | ||||
|      * @param string $input Input string. | ||||
|      * | ||||
|      * @return string | ||||
|      */ | ||||
|     private static function _cleanSourceBracketsOffTranslation(string $input):string { | ||||
|  | ||||
|         $bracketsToRemove = []; | ||||
|         for ($i = 0; $i < 100; $i++) { | ||||
|             $bracketsToRemove["[$i]"] = ""; | ||||
|         } | ||||
|         return strtr($input, $bracketsToRemove); | ||||
|  | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Cleans contents parsed from Wikipedia. | ||||
|      * | ||||
|      * @param string $input Input string. | ||||
|      * | ||||
|      * @return string | ||||
|      */ | ||||
|     private static function _cleanWikidataInput(string $input):string { | ||||
|  | ||||
|         $input = trim($input, '"'); | ||||
|         foreach (self::WIKIPEDIA_REMOVE_LITERALS as $tToRemove) $input = str_replace($tToRemove, "", $input); | ||||
|  | ||||
|         if (substr($input, 0, strlen('<')) === '<') { | ||||
|  | ||||
|             $input = self::_cleanWikidataInputHtml($input); | ||||
|  | ||||
|             if (mb_strlen($input) > 600) { | ||||
|                 if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) { | ||||
|                     $input = substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600)); | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             $input = self::_cleanSourceBracketsOffTranslation($input); | ||||
|  | ||||
|             $input = str_replace("\t", " ", $input); | ||||
|  | ||||
|             // Remove newlines with ensuing spaces | ||||
|             while (strpos($input, PHP_EOL . " ") !== false) { | ||||
|                 $input = str_replace(PHP_EOL . " ", PHP_EOL, $input); | ||||
|             } | ||||
|  | ||||
|             // Remove double newlines | ||||
|             while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) { | ||||
|                 $input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input); | ||||
|             } | ||||
|             return MD_STD_IN::sanitize_text($input); | ||||
|  | ||||
|         } | ||||
|  | ||||
|         $input = str_replace(PHP_EOL, '', $input); | ||||
|  | ||||
|         if (empty($input)) return ""; | ||||
|  | ||||
|         // Remove infobox tables specifically | ||||
|         $firstParagraphPosition = strpos($input, '<p', 1); | ||||
|         $currentSearchPos = strpos($input, "<table>"); | ||||
|         if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) { | ||||
|             if (($tableEndPos = strpos($input, "</table>")) !== false) { | ||||
|                 if (($pStartPos = strpos($input, '<p', $tableEndPos + 6)) !== false) { | ||||
|                     $input = substr($input, $pStartPos); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // Remove leftover unnecessary paragraphs before actual content | ||||
|  | ||||
|         $removeFirstParagraph = false; | ||||
|         $firstParagraphPosition = strpos($input, '<p', 1); | ||||
|  | ||||
|         foreach (["</table>", "<img"] as $tagPart) { | ||||
|             $currentSearchPos = strpos($input, $tagPart); | ||||
|             if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) { | ||||
|                 $removeFirstParagraph = true; | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if ($removeFirstParagraph === true) { | ||||
|             $input = substr($input, $firstParagraphPosition ?: 0); | ||||
|         } | ||||
|  | ||||
|         $input = str_replace('</p>', '</p>' . PHP_EOL . PHP_EOL . PHP_EOL, $input); | ||||
|         # $input = str_replace('?/i', '', $input); | ||||
|         $input = strip_tags($input); | ||||
|  | ||||
|         # for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input); | ||||
|         $i = 0; | ||||
|         while (strpos($input, ".mw-parser-output") !== false and strpos($input, "}", strpos($input, ".mw-parser-output")) !== false) { | ||||
|             $part1 = substr($input, 0, strpos($input, ".mw-parser-output")); | ||||
|             $part2 = substr($input, strpos($input, "}", strpos($input, ".mw-parser-output")) + 1); | ||||
|             $input = $part1 . $part2; | ||||
|             ++$i; | ||||
|             if ($i === 30) break; | ||||
|         } | ||||
|  | ||||
|         $input = self::_cleanSourceBracketsOffTranslation($input); | ||||
|  | ||||
|         $input = str_replace("\t", " ", $input); | ||||
|  | ||||
|         // Remove double whitespaces | ||||
|         while (strpos($input, "  ") !== false) { | ||||
|             $input = str_replace("  ", " ", $input); | ||||
|         } | ||||
|  | ||||
|         // Remove newlines with ensuing spaces | ||||
|         while (strpos($input, PHP_EOL . " ") !== false) { | ||||
|             $input = str_replace(PHP_EOL . " ", PHP_EOL, $input); | ||||
|         } | ||||
|  | ||||
|         // Remove double newlines | ||||
|         while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) { | ||||
|             $input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input); | ||||
|         } | ||||
|  | ||||
|         $stableToRemove = [ | ||||
|             "Vous pouvez partager vos connaissances en l’améliorant (comment ?) selon les recommandations des projets correspondants.", | ||||
|         ]; | ||||
|         foreach ($stableToRemove as $tToRemove) $input = str_replace($tToRemove, "", $input); | ||||
|  | ||||
|         $endings = [ | ||||
|             "StubDenne artikel om et vandløb ", | ||||
|         ]; | ||||
|         foreach ($endings as $ending) { | ||||
|             if (strpos($input, $ending) !== false) $input = substr($input, 0, strpos($input, $ending)); | ||||
|         } | ||||
|  | ||||
|         $input = trim($input); | ||||
|  | ||||
|         // Cut off overly long articles | ||||
|         if (mb_strlen($input) > 600) { | ||||
|             if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) { | ||||
|                 $input = trim(substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600))); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if (empty($input)) return ''; | ||||
|  | ||||
|         $input = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $input)); | ||||
|  | ||||
|         $input = html_entity_decode($input); | ||||
|  | ||||
|         return MD_STD_IN::sanitize_text($input); | ||||
|  | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Wrapper around _cleanWikidataInput for testing. | ||||
|      * | ||||
|      * @param string $input Input string. | ||||
|      * | ||||
|      * @return string | ||||
|      */ | ||||
|     public static function cleanWikidataInput(string $input):string { | ||||
|  | ||||
|         if (PHP_SAPI !== 'cli') throw new Exception("Use this function only for testing"); | ||||
|         return self::_cleanWikidataInput($input); | ||||
|         return strtr( | ||||
|             trim(MD_STD_IN::sanitize_text($input)), | ||||
|             [ | ||||
|                 PHP_EOL => PHP_EOL . PHP_EOL, | ||||
|                 PHP_EOL . PHP_EOL . PHP_EOL => PHP_EOL . PHP_EOL, | ||||
|             ] | ||||
|         ); | ||||
|  | ||||
|     } | ||||
|  | ||||
| @@ -815,25 +580,20 @@ final class NodaWikidataFetcher { | ||||
|                 $wikilink = $wikilinks[$lang]; | ||||
|                 if (!empty($contents[$lang])) { | ||||
|  | ||||
|                     $fromWikipedia = json_decode($contents[$lang], true)['parse']; | ||||
|                     $titleFromWikipedia = $fromWikipedia['title']; | ||||
|                     $descFromWiki = $fromWikipedia['text']['*']; | ||||
|  | ||||
|                     # Process data retrieved from wikipedia | ||||
|  | ||||
|                     if ($descFromWiki !== null) $tDescription = (string)$descFromWiki; | ||||
|                     else $tDescription = ""; | ||||
|                     $titleFromWikipedia = $data['sitelinks'][$lang . 'wiki']['title']; | ||||
|                     $tDescription = self::_getCleanedWikipediaSnippet($lang, $titleFromWikipedia); | ||||
|  | ||||
|                 } | ||||
|                 else { | ||||
|                     $tDescription = ""; | ||||
|                 } | ||||
|  | ||||
|                 if (!empty($titleFromWikipedia) && !empty($tDescription) && !empty($desc_cleaned = self::_cleanWikidataInput($tDescription))) { | ||||
|                 if (!empty($titleFromWikipedia) && !empty($tDescription)) { | ||||
|  | ||||
|                     # $descs[$lang] = $tDescription; | ||||
|                     $output[$lang] = [ | ||||
|                         'label' => $titleFromWikipedia, | ||||
|                         'description' => '"' . $desc_cleaned . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')', | ||||
|                         'description' => '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')', | ||||
|                         'link' => $wikilink, | ||||
|                     ]; | ||||
|                 } | ||||
| @@ -841,8 +601,8 @@ final class NodaWikidataFetcher { | ||||
|                 else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) { | ||||
|  | ||||
|                     $output[$lang] = [ | ||||
|                         'label' => self::_cleanWikidataInput($data['labels'][$lang]['value']), | ||||
|                         'description' => self::_cleanWikidataInput($data['descriptions'][$lang]['value']), | ||||
|                         'label' => self::_cleanInputSimple($data['labels'][$lang]['value']), | ||||
|                         'description' => self::_cleanInputSimple($data['descriptions'][$lang]['value']), | ||||
|                         'link' => "", | ||||
|                     ]; | ||||
|  | ||||
| @@ -853,8 +613,8 @@ final class NodaWikidataFetcher { | ||||
|             else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) { | ||||
|  | ||||
|                 $output[$lang] = [ | ||||
|                     'label' => self::_cleanWikidataInput($data['labels'][$lang]['value']), | ||||
|                     'description' => self::_cleanWikidataInput($data['descriptions'][$lang]['value']), | ||||
|                     'label' => self::_cleanInputSimple($data['labels'][$lang]['value']), | ||||
|                     'description' => self::_cleanInputSimple($data['descriptions'][$lang]['value']), | ||||
|                     'link' => "", | ||||
|                 ]; | ||||
|  | ||||
| @@ -1070,6 +830,51 @@ final class NodaWikidataFetcher { | ||||
|  | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Function for retrieving information. | ||||
|      * | ||||
|      * @param string                                           $lang      The user's selected used language. | ||||
|      * @param array<mixed>                                     $data      Data fetched from wikidata. | ||||
|      * @param array<string, array{url: string, title: string}> $wikilinks Links to wikipedia APIs. | ||||
|      * | ||||
|      * @return array{}|array{lang: string, desc: string, source: 'wikidata'|'wikipedia'} | ||||
|      */ | ||||
|     private static function _getDescriptionFromWikidataAndWikipediaLinks(string $lang, array $data, array $wikilinks):array { | ||||
|  | ||||
|         // Try the current user language for retrieving wikipedia texts | ||||
|         if (isset($wikilinks[$lang])) { | ||||
|             # Process data retrieved from wikipedia | ||||
|             if (!empty($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$lang]['title']))) { | ||||
|                 return ['lang' => $lang, 'desc' => $datafromwiki, 'source' => 'wikipedia']; | ||||
|             } | ||||
|  | ||||
|         } | ||||
|  | ||||
|         // Try the alternative languages for retrieving wikidata tests | ||||
|         foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) { | ||||
|  | ||||
|             if ($lang === $cur_lang || !isset($wikilinks[$cur_lang])) continue; | ||||
|  | ||||
|             if ($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$cur_lang]['title'])) { | ||||
|                 return ['lang' => $cur_lang, 'desc' => $datafromwiki, 'source' => 'wikipedia']; | ||||
|             } | ||||
|  | ||||
|         } | ||||
|  | ||||
|         // If the description still has not been entered, try retrieving it from wikidata. | ||||
|         if (!empty($data['descriptions'][$lang])) { | ||||
|             return ['lang' => $lang, 'desc' => $data['descriptions'][$lang]['value'], 'source' => 'wikidata']; | ||||
|         } | ||||
|         else if (!empty($data['descriptions'])) { | ||||
|             $tLang = (string)array_keys($data['descriptions'])[0]; | ||||
|             $desc = $data['descriptions'][$tLang]; | ||||
|             return ['lang' => $tLang, 'desc' => (string)$desc['value'], 'source' => 'wikidata']; | ||||
|         } | ||||
|  | ||||
|         return []; | ||||
|  | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Function for retrieving information. | ||||
|      * | ||||
| @@ -1087,24 +892,8 @@ final class NodaWikidataFetcher { | ||||
|         // Get links to wikipedia | ||||
|  | ||||
|         $wikilinks = self::_getWikipediaLinksFromWikidataOutput($data); | ||||
|         $alreadyEntered = false; | ||||
|  | ||||
|         if (isset($wikilinks[$lang])) { | ||||
|             # Process data retrieved from wikipedia | ||||
|             if (!empty($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$lang]['title']))) { | ||||
|                 $alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $lang, $lang, $erfasst_von); | ||||
|             } | ||||
|  | ||||
|         } | ||||
|  | ||||
|         foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) { | ||||
|  | ||||
|             if ($alreadyEntered === true || !isset($wikilinks[$cur_lang])) continue; | ||||
|  | ||||
|             if ($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$cur_lang]['title'])) { | ||||
|                 $alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $lang, "$cur_lang", $erfasst_von); | ||||
|             } | ||||
|  | ||||
|         if (!empty($desc = self::_getDescriptionFromWikidataAndWikipediaLinks($lang, $data, $wikilinks))) { | ||||
|             $alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $desc['desc'], $lang, $desc['lang'], $erfasst_von); | ||||
|         } | ||||
|  | ||||
|         $this->enterPersinstBirthDeathDatesFromWikidata($data, $persinst_id); | ||||
| @@ -1386,30 +1175,8 @@ final class NodaWikidataFetcher { | ||||
|         } | ||||
|  | ||||
|         $cur_place_desc = $this->getPlaceDescription($onum); | ||||
|         $alreadyEntered = false; | ||||
|  | ||||
|         if (!empty($wikilinks[$lang])) { | ||||
|  | ||||
|             $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinks[$lang]['title']), 10000); | ||||
|             $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; | ||||
|  | ||||
|             if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) { | ||||
|                 $alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $lang, $lang, $onum, $erfasst_von); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) { | ||||
|  | ||||
|             //if ($alreadyEntered === true) break; | ||||
|             if ($alreadyEntered === true) break; | ||||
|             if (!isset($wikilinks[$cur_lang]['url'])) continue; | ||||
|  | ||||
|             $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinks[$cur_lang]['title']), 10000); | ||||
|             $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; | ||||
|             if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) { | ||||
|                 $alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $lang, $cur_lang, $onum, $erfasst_von); | ||||
|             } | ||||
|  | ||||
|         if (!empty($desc = self::_getDescriptionFromWikidataAndWikipediaLinks($lang, $data, $wikilinks))) { | ||||
|             $this->enterPlaceDescFromWikidata($cur_place_desc, $desc['desc'], $lang, $desc['lang'], $onum, $erfasst_von); | ||||
|         } | ||||
|  | ||||
|         if (isset($data['claims']['P1566'])) $geonames_id = filter_var($data['claims']['P1566'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT); | ||||
| @@ -1611,32 +1378,8 @@ final class NodaWikidataFetcher { | ||||
|  | ||||
|         $wikilinks = self::_getWikipediaLinksFromWikidataOutput($data); | ||||
|  | ||||
|         $alreadyEntered = false; | ||||
|  | ||||
|         if (isset($wikilinks[$lang])) { | ||||
|  | ||||
|             $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinks[$lang]['title']), 10000); | ||||
|             $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; | ||||
|  | ||||
|             # Process data retrieved from wikipedia | ||||
|             if (!empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) { | ||||
|                 $alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $lang, $lang, $erfasst_von); | ||||
|             } | ||||
|  | ||||
|         } | ||||
|  | ||||
|         foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) { | ||||
|  | ||||
|             if ($alreadyEntered === true || !isset($wikilinks[$cur_lang])) continue; | ||||
|  | ||||
|             $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinks[$cur_lang]['title']), 10000); | ||||
|             $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; | ||||
|  | ||||
|             # Process data retrieved from wikipedia | ||||
|             if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) { | ||||
|                 $alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $lang, $cur_lang, $erfasst_von); | ||||
|             } | ||||
|  | ||||
|         if (!empty($desc = self::_getDescriptionFromWikidataAndWikipediaLinks($lang, $data, $wikilinks))) { | ||||
|             $alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $desc['desc'], $lang, $desc['lang'], $erfasst_von); | ||||
|         } | ||||
|  | ||||
|         if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('tag', $wikidata_id, $data))) { | ||||
|   | ||||
| @@ -93,225 +93,6 @@ final class NodaWikidataFetcherTest extends TestCase { | ||||
|  | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Test for cleaning wikidata info. | ||||
|      * | ||||
|      * @group  ValidOutput | ||||
|      * | ||||
|      * @return void | ||||
|      */ | ||||
|     public function testCleanWikidataInput():void { | ||||
|  | ||||
|         $testStr = '"<div class="mw-parser-output"><table class="infobox float-right toccolours toptextcells" style="margin: 0 0 1em 1em; width: 300px;" id="Vorlage_Infobox_Ort_in_der_Ukraine" summary="Infobox Ort in der Ukraine"> | ||||
|  | ||||
| <tbody><tr> | ||||
| <td colspan="2" style="background-color:#AFD6FF; font-size:1.3em; font-weight:bold; text-align:center;">Werbowez (Kossiw) | ||||
| </td></tr> | ||||
| <tr> | ||||
| <td colspan="2" style="background-color:#FFC; font-size:1em; font-weight:bold; text-align:center;"><span lang="uk-Cyrl" class="Cyrl">Вербовець</span> | ||||
| </td></tr> | ||||
|  | ||||
|  | ||||
| <tr style="height:120px; background-color:#FFF;"> | ||||
| <td style="width: 130px; text-align:center;"><span typeof="mw:File"><a href="/wiki/Datei:Coats_of_arms_of_None.svg" class="mw-file-description" title="Wappen fehlt"><img alt="Wappen fehlt" src="//upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Coats_of_arms_of_None.svg/100px-Coats_of_arms_of_None.svg.png" decoding="async" width="100" height="120" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Coats_of_arms_of_None.svg/150px-Coats_of_arms_of_None.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Coats_of_arms_of_None.svg/200px-Coats_of_arms_of_None.svg.png 2x" data-file-width="125" data-file-height="150" /></a></span> | ||||
| </td> | ||||
| <td style="width: 170px; text-align:center;"><table class="centered" style="background-color: #f9f9f9; border: none; border-collapse: collapse; width: 1px;"> | ||||
| <tbody><tr><td style="border: none; padding: 0; text-align: center;"><div style="position: relative; z-index: 0; padding: 0; display: inline-block; width: -webkit-max-content; width: -moz-max-content; width: max-content; border: none;"><figure class="mw-halign-center noviewer notpageimage" typeof="mw:File"><a href="/wiki/Datei:Ukraine_adm_location_map.svg" class="mw-file-description" title="Werbowez (Kossiw) (Ukraine)"><img alt="Werbowez (Kossiw) (Ukraine)" src="//upload.wikimedia.org/wikipedia/commons/thumb/7/78/Ukraine_adm_location_map.svg/180px-Ukraine_adm_location_map.svg.png" decoding="async" width="180" height="121" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/7/78/Ukraine_adm_location_map.svg/270px-Ukraine_adm_location_map.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/7/78/Ukraine_adm_location_map.svg/360px-Ukraine_adm_location_map.svg.png 2x" data-file-width="1546" data-file-height="1038" /></a><figcaption>Werbowez (Kossiw) (Ukraine)</figcaption></figure><div style="position:absolute; top:50.7%; left:18.9%; height:0; width:0;"><div style="position:relative;z-index:100;left:-4px;top:-4px;width:8px;height:8px;line-height:0px;"><span typeof="mw:File"><a href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&language=de&params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)&title=Werbowez+%28Kossiw%29" title="Werbowez (Kossiw) (48° 20′ 32″ N, 25° 8′ 0″O)"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/8px-ButtonRed.svg.png" decoding="async" width="8" height="8" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/12px-ButtonRed.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/16px-ButtonRed.svg.png 2x" data-file-width="480" data-file-height="480" /></a></span></div> | ||||
| <table style="font-size:90%; border:none; background-color:transparent; border-collapse:collapse; line-height:1em; position:absolute; width:6em; margin: 0 .2em; text-align:left; left:1px; bottom:1px;"><tbody><tr><td style="border:none; vertical-align:middle;"><span style="position:relative; z-index:9; background-color:none;">Werbowez (Kossiw) </span></td></tr></tbody></table></div></div></td></tr> | ||||
| </tbody></table> | ||||
| </td></tr> | ||||
| <tr style="background-color:#AFD6FF;"> | ||||
| <th colspan="2">Basisdaten | ||||
| </th></tr> | ||||
| <tr> | ||||
| <td><a href="/wiki/Liste_der_Oblaste_der_Ukraine" title="Liste der Oblaste der Ukraine">Oblast</a>:</td> | ||||
| <td><a href="/wiki/Oblast_Iwano-Frankiwsk" title="Oblast Iwano-Frankiwsk">Oblast Iwano-Frankiwsk</a> | ||||
| </td></tr> | ||||
| <tr> | ||||
| <td><a href="/wiki/Liste_der_Rajone_der_Ukraine" title="Liste der Rajone der Ukraine">Rajon</a>:</td> | ||||
| <td><a href="/wiki/Rajon_Kossiw" title="Rajon Kossiw">Rajon Kossiw</a> | ||||
| </td></tr> | ||||
| <tr> | ||||
| <td><a href="/wiki/H%C3%B6he_%C3%BCber_dem_Meeresspiegel" title="Höhe über dem Meeresspiegel">Höhe</a>:</td> | ||||
| <td>369 m | ||||
| </td></tr> | ||||
| <tr> | ||||
| <td><a href="/wiki/Fl%C3%A4cheninhalt" title="Flächeninhalt">Fläche</a>:</td> | ||||
| <td>18,77 <a href="/wiki/Quadratmeter#Quadratkilometer" title="Quadratmeter">km²</a> | ||||
| </td></tr> | ||||
| <tr> | ||||
| <td><a href="/wiki/Einwohner" title="Einwohner">Einwohner</a>:</td> | ||||
| <td>3.395 <small><i>(2001)</i></small> | ||||
| </td></tr> | ||||
| <tr> | ||||
| <td><a href="/wiki/Bev%C3%B6lkerungsdichte" title="Bevölkerungsdichte">Bevölkerungsdichte</a>: | ||||
| </td> | ||||
| <td>181 Einwohner je km² | ||||
| </td></tr> | ||||
| <tr> | ||||
| <td><a href="/wiki/Postleitzahl" title="Postleitzahl">Postleitzahlen</a>:</td> | ||||
| <td>78605 | ||||
| </td></tr> | ||||
| <tr> | ||||
| <td><a href="/wiki/Telefonvorwahl" title="Telefonvorwahl">Vorwahl</a>:</td> | ||||
| <td>+380 3478 | ||||
| </td></tr> | ||||
| <tr> | ||||
| <td><a href="/wiki/Geographische_Koordinaten" title="Geographische Koordinaten">Geographische Lage</a>:</td> | ||||
| <td><span id="text_coordinates" class="coordinates plainlinks-print"><a class="external text" href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&language=de&params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)"><span title="Breitengrad">48° 21′ <abbr title="Nord">N</abbr></span>, <span title="Längengrad">25° 8′ <abbr title="Ost">O</abbr></span></a></span><span class="geo noexcerpt" style="display:none"><span class="body"></span><span class="latitude">48.342222222222</span><span class="longitude">25.133333333333</span><span class="elevation"></span></span><span id="coordinates" class="coordinates noprint"><span title="Koordinatensystem WGS84">Koordinaten: </span><a class="external text" href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&language=de&params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)"><span title="Breitengrad">48° 20′ 32″ <abbr title="Nord">N</abbr></span>, <span title="Längengrad">25° 8′ 0″ <abbr title="Ost">O</abbr></span></a></span> | ||||
| </td></tr> | ||||
| <tr> | ||||
| <td><a href="/wiki/KATOTTH" title="KATOTTH">KATOTTH</a>: | ||||
| </td> | ||||
| <td>UA26100010030094355 | ||||
| </td></tr> | ||||
| <tr> | ||||
| <td><a href="/wiki/KOATUU" title="KOATUU">KOATUU</a>: | ||||
| </td> | ||||
| <td>2623682401 | ||||
| </td></tr> | ||||
| <tr> | ||||
| <td><a href="/wiki/Verwaltungsgliederung_der_Ukraine" title="Verwaltungsgliederung der Ukraine">Verwaltungsgliederung</a>: | ||||
| </td> | ||||
| <td>1 Dorf | ||||
| </td></tr> | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| <tr> | ||||
| <td>Adresse: | ||||
| </td> | ||||
| <td>вул. Миру, буд. 15<br />78605 с. Вербовець | ||||
| </td></tr> | ||||
| <tr> | ||||
| <td><a href="/wiki/Website" title="Website">Website</a>: | ||||
| </td> | ||||
| <td><a rel="nofollow" class="external text" href="http://verbovets.kosiv.net/">Offizielle Webseite</a> | ||||
| </td></tr> | ||||
| <tr> | ||||
| <td colspan="2" style="padding-bottom:3px; text-align:center; border-bottom:1px solid #bbb; border-top:1px solid #bbb;"><a rel="nofollow" class="external text" href="http://w1.c1.rada.gov.ua/pls/z7503/A005?rdat1=31.08.2023&rf7571=13801">Statistische Informationen</a> | ||||
| </td></tr> | ||||
| <tr> | ||||
| <td colspan="2" style="padding-bottom:3px; text-align:center; border-bottom:1px solid #bbb; border-top:1px solid #bbb;"> | ||||
| <table class="centered" style="background-color: #f9f9f9; border: none; border-collapse: collapse; width: 1px;"> | ||||
| <tbody><tr><td style="border: none; padding: 0; text-align: center;"><div style="position: relative; z-index: 0; padding: 0; display: inline-block; width: -webkit-max-content; width: -moz-max-content; width: max-content; border: none;"><figure class="mw-halign-center noviewer notpageimage" typeof="mw:File"><a href="/wiki/Datei:Ivano-Frankivsk_location_map.svg" class="mw-file-description" title="Werbowez (Kossiw) (Oblast Iwano-Frankiwsk)"><img alt="Werbowez (Kossiw) (Oblast Iwano-Frankiwsk)" src="//upload.wikimedia.org/wikipedia/commons/thumb/8/8e/Ivano-Frankivsk_location_map.svg/290px-Ivano-Frankivsk_location_map.svg.png" decoding="async" width="290" height="347" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/8/8e/Ivano-Frankivsk_location_map.svg/435px-Ivano-Frankivsk_location_map.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/8/8e/Ivano-Frankivsk_location_map.svg/580px-Ivano-Frankivsk_location_map.svg.png 2x" data-file-width="533" data-file-height="637" /></a><figcaption>Werbowez (Kossiw) (Oblast Iwano-Frankiwsk)</figcaption></figure><div style="position:absolute; top:63.3%; left:74.4%; height:0; width:0;"><div style="position:relative;z-index:100;left:-4px;top:-4px;width:8px;height:8px;line-height:0px;"><span typeof="mw:File"><a href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&language=de&params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)&title=Werbowez+%28Kossiw%29" title="Werbowez (Kossiw) (48° 20′ 32″ N, 25° 8′ 0″O)"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/8px-ButtonRed.svg.png" decoding="async" width="8" height="8" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/12px-ButtonRed.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/16px-ButtonRed.svg.png 2x" data-file-width="480" data-file-height="480" /></a></span></div> | ||||
| <table style="font-size:90%; border:none; background-color:transparent; border-collapse:collapse; line-height:1em; position:absolute; width:6em; margin: 0 .2em; text-align:right; right:1px; bottom:1px;"><tbody><tr><td style="border:none; vertical-align:middle;"><span style="position:relative; z-index:9; background-color:none;">Werbowez (Kossiw) </span></td></tr></tbody></table></div></div></td></tr> | ||||
| </tbody></table><span style="display:none;"><a href="/w/index.php?title=Vorlage:Positionskarte_ISO_3166-2/Wartung/noregion&action=edit&redlink=1" class="new" title="Vorlage:Positionskarte ISO 3166-2/Wartung/noregion (Seite nicht vorhanden)">i1</a></span> | ||||
| </td></tr></tbody></table> | ||||
| <p><b>Werbowez</b> (<b><span style="font-style:normal;font-weight:normal"><a href="/wiki/Ukrainische_Sprache" title="Ukrainische Sprache">ukrainisch</a></span> <span lang="uk-Cyrl" class="Cyrl" style="font-style:normal">Вербовець</span></b>; <span style="font-style:normal;font-weight:normal"><a href="/wiki/Russische_Sprache" title="Russische Sprache">russisch</a></span> <span lang="ru-Cyrl" class="Cyrl" style="font-style:normal">Вербовец</span>, <a href="/wiki/Polnische_Sprache" title="Polnische Sprache">polnisch</a> <span lang="pl" style="font-style:italic;font-weight:normal">Wierzbowiec</span>; <span style="font-style:normal;font-weight:normal"><a href="/wiki/Rum%C3%A4nische_Sprache" title="Rumänische Sprache">rumänisch</a></span> <span lang="ro-Latn" style="font-style:italic">Verboveț</span>) ist ein <a href="/wiki/Dorf" title="Dorf">Dorf</a> in der <a href="/wiki/Ukraine" title="Ukraine">ukrainischen</a> <a href="/wiki/Oblast_Iwano-Frankiwsk" title="Oblast Iwano-Frankiwsk">Oblast Iwano-Frankiwsk</a> mit etwa 3400 Einwohnern (2001).<sup id="cite_ref-1" class="reference"><a href="#cite_note-1">[1]</a></sup>  | ||||
| </p> | ||||
| <figure class="mw-default-size mw-halign-left" typeof="mw:File/Thumb"><a href="/wiki/Datei:%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/220px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" decoding="async" width="220" height="147" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/330px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/440px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 2x" data-file-width="5184" data-file-height="3456" /></a><figcaption>Blick auf das Dorf</figcaption></figure> | ||||
| <p>Das um 1650 erstmals schriftlich erwähnte Dorf<sup id="cite_ref-2" class="reference"><a href="#cite_note-2">[2]</a></sup> liegt im Osten der <a href="/wiki/Historische_Landschaft" title="Historische Landschaft">historischen Landschaft</a> <a href="/wiki/Galizien" title="Galizien">Galizien</a> am Ufer der <a href="/w/index.php?title=Rybnyzja_(Fluss)&action=edit&redlink=1" class="new" title="Rybnyzja (Fluss) (Seite nicht vorhanden)">Rybnyzja</a> (<span lang="uk-Cyrl" class="Cyrl">Рибниця</span>), einem 56 km langen Nebenfluss des <a href="/wiki/Pruth" title="Pruth">Pruth</a> 7 km nordöstlich vom Rajonzentrum <a href="/wiki/Kossiw" title="Kossiw">Kossiw</a> und 95 km südlich vom Oblastzentrum <a href="/wiki/Iwano-Frankiwsk" title="Iwano-Frankiwsk">Iwano-Frankiwsk</a>. Südlich der Ortschaft verläuft die <a href="/wiki/Territorialstra%C3%9Fe" title="Territorialstraße">Territorialstraße</a> <i>T–09–09</i>. | ||||
| </p><p>Am 12. Juni 2020 wurde das Dorf ein Teil der neu gegründeten <i>Stadtgemeinde <a href="/wiki/Kossiw" title="Kossiw">Kossiw</a></i> im <a href="/wiki/Rajon_Kossiw" title="Rajon Kossiw">Rajon Kossiw</a><sup id="cite_ref-3" class="reference"><a href="#cite_note-3">[3]</a></sup>, bis dahin bildete es zusammen mit dem Dorf <a href="/w/index.php?title=Staryj_Kossiw&action=edit&redlink=1" class="new" title="Staryj Kossiw (Seite nicht vorhanden)">Staryj Kossiw</a> (<span lang="uk-Cyrl" class="Cyrl">Старий Косів</span>) die <i>Landratsgemeinde Werbowez</i> (Вербовецька сільська рада/<i>Werbowezka silska rada</i>) im Osten des Rajons. | ||||
| </p> | ||||
| <ol class="references"> | ||||
| <li id="cite_note-1"><span class="mw-cite-backlink"><a href="#cite_ref-1">↑</a></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://w1.c1.rada.gov.ua/pls/z7503/A005?rf7571=13801">Ortswebseite</a> auf der offiziellen Webpräsenz der <a href="/wiki/Werchowna_Rada" title="Werchowna Rada">Werchowna Rada</a>; abgerufen am 14. November 2017 (ukrainisch)</span> | ||||
| </li> | ||||
| <li id="cite_note-2"><span class="mw-cite-backlink"><a href="#cite_ref-2">↑</a></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://ukrssr.com.ua/ifrank/kosivskiy/verbovets-kosivskiy-rayon-ivano-frankivska-oblast">Ortsgeschichte Werbowez</a> in der <a href="/wiki/Geschichte_der_St%C3%A4dte_und_D%C3%B6rfer_der_Ukrainischen_SSR" title="Geschichte der Städte und Dörfer der Ukrainischen SSR">Geschichte der Städte und Dörfer der Ukrainischen SSR</a>; abgerufen am 14. November 2017 (ukrainisch)</span> | ||||
| </li> | ||||
| <li id="cite_note-3"><span class="mw-cite-backlink"><a href="#cite_ref-3">↑</a></span> <span class="reference-text"><a rel="nofollow" class="external text" href="https://zakon.rada.gov.ua/laws/show/714-2020-%D1%80#Text">Кабінет Міністрів України Розпорядження від 12 червня 2020 р. № 714-р "Про визначення адміністративних центрів та затвердження територій територіальних громад Івано-Франківської області"</a></span> | ||||
| </li> | ||||
| </ol> | ||||
| <!--  | ||||
| NewPP limit report | ||||
| Parsed by mw1396 | ||||
| Cached time: 20230831121013 | ||||
| Cache expiry: 42588 | ||||
| Reduced expiry: true | ||||
| Complications: [] | ||||
| CPU time usage: 0.219 seconds | ||||
| Real time usage: 0.274 seconds | ||||
| Preprocessor visited node count: 6414/1000000 | ||||
| Post‐expand include size: 33611/2097152 bytes | ||||
| Template argument size: 12317/2097152 bytes | ||||
| Highest expansion depth: 34/100 | ||||
| Expensive parser function count: 9/500 | ||||
| Unstrip recursion depth: 0/20 | ||||
| Unstrip post‐expand size: 1476/5000000 bytes | ||||
| Lua time usage: 0.080/10.000 seconds | ||||
| Lua memory usage: 3398800/52428800 bytes | ||||
| Number of Wikibase entities loaded: 0/400 | ||||
| --> | ||||
| <!-- | ||||
| Transclusion expansion time report (%,ms,calls,template) | ||||
| 100.00%  239.600      1 -total | ||||
|  93.55%  224.134      1 Vorlage:Infobox_Ort_in_der_Ukraine | ||||
|  50.81%  121.740      2 Vorlage:Positionskarte | ||||
|  49.72%  119.121      2 Vorlage:Positionskarte+ | ||||
|  44.41%  106.401      2 Vorlage:Positionskarte~ | ||||
|  33.28%   79.732      2 Vorlage:Positionskarte~* | ||||
|  25.69%   61.558      3 Vorlage:Lang | ||||
|  19.41%   46.499      1 Vorlage:Positionskarte_ISO_3166-2 | ||||
|  16.90%   40.486     12 Vorlage:CoordinateLONG | ||||
|  14.02%   33.586     10 Vorlage:CoordinateLAT | ||||
| --> | ||||
| </div>" - (de.wikipedia.org 31.08.2023)'; | ||||
|  | ||||
|         $output = NodaWikidataFetcher::cleanWikidataInput($testStr); | ||||
|         $expected = 'Werbowez (ukrainisch Вербовець; russisch Вербовец, polnisch Wierzbowiec; rumänisch Verboveț) ist ein Dorf in der ukrainischen Oblast Iwano-Frankiwsk mit etwa 3400 Einwohnern (2001).'; | ||||
|         self::assertTrue( | ||||
|             str_starts_with($output, $expected), | ||||
|             "Start of parsed Wikipedia text should be:" . PHP_EOL . PHP_EOL . $expected . PHP_EOL . PHP_EOL . 'Real start text is: ' . PHP_EOL . PHP_EOL . substr($output, 0, 250) | ||||
|         ); | ||||
|  | ||||
|         $output = NodaWikidataFetcher::cleanWikidataInput('<div class="mw-parser-output"><figure class="mw-default-size mw-halign-right" typeof="mw:File/Thumb"><a href="/wiki/File:%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/220px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" decoding="async" width="220" height="147" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/330px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/440px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 2x" data-file-width="5184" data-file-height="3456" /></a><figcaption></figcaption></figure> | ||||
| <p><span style="font-size: small;"><span id="coordinates"><a href="/wiki/%E5%9C%B0%E7%90%86%E5%9D%90%E6%A0%87" class="mw-redirect" title="地理坐标">坐标</a>:<style data-mw-deduplicate="TemplateStyles:r65292569">.mw-parser-output .geo-default,.mw-parser-output .geo-dms,.mw-parser-output .geo-dec{display:inline}.mw-parser-output .geo-nondefault,.mw-parser-output .geo-multi-punct{display:none}.mw-parser-output .longitude,.mw-parser-output .latitude{white-space:nowrap}</style><span class="plainlinks nourlexpansion"><a class="external text" href="//geohack.toolforge.org/geohack.php?language=zh&pagename=%E9%9F%8B%E7%88%BE%E5%8D%9A%E9%9F%8B%E9%BD%8A_(%E7%A7%91%E7%B4%A2%E5%A4%AB%E5%8D%80)&params=48_20_32_N_25_8_0_E_scale:30000"><span class="geo-default"><span class="geo-dms" title="此地的地图、航拍照片和其他数据"><span class="latitude">48°20′32″N</span> <span class="longitude">25°8′0″E</span></span></span><span class="geo-multi-punct"> / </span><span class="geo-nondefault"><span class="geo-dec" title="此地的地图、航拍照片和其他数据">48.34222°N 25.13333°E</span><span style="display:none"> / <span class="geo">48.34222; 25.13333</span></span></span></a></span></span></span> | ||||
| </p><p><b>韋爾博韋齊</b>(<a href="/wiki/%E7%83%8F%E5%85%8B%E8%98%AD%E8%AA%9E" class="mw-redirect" title="烏克蘭語">烏克蘭語</a>:<span lang="uk">Вербовець</span>),是<a href="/wiki/%E7%83%8F%E5%85%8B%E8%98%AD" class="mw-redirect" title="烏克蘭">烏克蘭</a>的村落,位於該國西部<a href="/wiki/%E4%BC%8A%E4%B8%87%E8%AF%BA-%E5%BC%97%E5%85%B0%E7%A7%91%E5%A4%AB%E6%96%AF%E5%85%8B%E5%B7%9E" title="伊万诺-弗兰科夫斯克州">伊萬諾-弗蘭科夫斯克州</a>,由<a href="/wiki/%E7%A7%91%E7%B4%A2%E5%A4%AB%E5%8D%80" class="mw-redirect" title="科索夫區">科索夫區</a>負責管轄,始建於1456年,面積18.77平方公里,2001年人口3,395。 | ||||
| </p> | ||||
| <!--  | ||||
| NewPP limit report | ||||
| Parsed by mw1412 | ||||
| Cached time: 20230831132208 | ||||
| Cache expiry: 1814400 | ||||
| Reduced expiry: false | ||||
| Complications: [] | ||||
| CPU time usage: 0.147 seconds | ||||
| Real time usage: 0.186 seconds | ||||
| Preprocessor visited node count: 48/1000000 | ||||
| Post‐expand include size: 2084/2097152 bytes | ||||
| Template argument size: 0/2097152 bytes | ||||
| Highest expansion depth: 3/100 | ||||
| Expensive parser function count: 1/500 | ||||
| Unstrip recursion depth: 0/20 | ||||
| Unstrip post‐expand size: 362/5000000 bytes | ||||
| Lua time usage: 0.110/10.000 seconds | ||||
| Lua memory usage: 15402517/52428800 bytes | ||||
| Number of Wikibase entities loaded: 1/400 | ||||
| --> | ||||
| <!-- | ||||
| Transclusion expansion time report (%,ms,calls,template) | ||||
| 100.00%  152.989      1 -total | ||||
|  70.07%  107.204      1 Template:Lang-uk | ||||
|  29.62%   45.313      1 Template:Coord | ||||
| --> | ||||
| </div>'); | ||||
|         $expected = '韋爾博韋齊(烏克蘭語:Вербовець),是烏克蘭的村落,位於該國西部伊萬諾-弗蘭科夫斯克州,由科索夫區負責管轄,始建於1456年,面積18.77平方公里,2001年人口3,3'; | ||||
|         self::assertTrue( | ||||
|             str_starts_with($output, $expected), | ||||
|             "Start of parsed Wikipedia text should be:" . PHP_EOL . PHP_EOL . $expected . PHP_EOL . PHP_EOL . 'Real start text is: ' . PHP_EOL . PHP_EOL . substr($output, 0, 250) | ||||
|         ); | ||||
|  | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Test for cleaning wikidata info. | ||||
|      * | ||||
|      * @group  ValidOutput | ||||
|      * | ||||
|      * @return void | ||||
|      */ | ||||
|     public function testCleanWikidataInputWithoutHtml():void { | ||||
|  | ||||
|         $output = NodaWikidataFetcher::cleanWikidataInput('Werbowez (ukrainisch Вербовець; russisch Вербовец, polnisch Wierzbowiec; rumänisch Verboveț) ist ein Dorf in der ukrainischen Oblast Iwano-Frankiwsk mit etwa 3400 Einwohnern (2001).[1]'); | ||||
|         $expected = 'Werbowez (ukrainisch Вербовець; russisch Вербовец, polnisch Wierzbowiec; rumänisch Verboveț) ist ein Dorf in der ukrainischen Oblast Iwano-Frankiwsk mit etwa 3400 Einwohnern (2001).'; | ||||
|         self::assertTrue( | ||||
|             str_starts_with($output, $expected), | ||||
|             "Start of parsed Wikipedia text should be:" . PHP_EOL . PHP_EOL . $expected . PHP_EOL . PHP_EOL . 'Real start text is: ' . PHP_EOL . PHP_EOL . substr($output, 0, 250) | ||||
|         ); | ||||
|  | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Data provider for an actor that has a wikidata link and a Telugu translation. | ||||
|      * | ||||
|   | ||||
		Reference in New Issue
	
	Block a user