Improve NodaWikidataFetcher's loading of descriptions

Close #15
This commit is contained in:
Joshua Ramon Enslin 2023-08-31 15:38:12 +02:00
parent 869e0f263d
commit 107a4cd640
Signed by: jrenslin
GPG Key ID: 46016F84501B70AE
2 changed files with 538 additions and 235 deletions

View File

@ -98,6 +98,256 @@ final class NodaWikidataFetcher {
/** @var MDMysqli */ /** @var MDMysqli */
private MDMysqli $_mysqli_noda; private MDMysqli $_mysqli_noda;
/**
* Returns the API link to Wikipedia's API for getting information on a page.
*
* @param string $lang Language / wikipedia version to fetch.
* @param string $searchTerm Search term.
*
* @return non-empty-string
*/
private static function _getWikipediaApiLink(string $lang, string $searchTerm):string {
return "https://" . urlencode($lang) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($searchTerm) . "&prop=text&section=0&format=json";
}
/**
* Cleans basic tags off Wikidata input.
*
* @param string $input Input string.
*
* @return string
*/
private static function _cleanWikidataInputHtml(string $input):string {
// Clean off anything before first <p>
if ($pStartPos = strpos($input, '<p')) {
$input = substr($input, $pStartPos);
}
if ($pEndPos = strrpos($input, '</p>')) {
$input = substr($input, 0, $pEndPos + 4);
}
$doc = new DOMDocument();
try {
$doc->loadXML('<section>' . trim($input) . '</section>');
}
catch (Exception $e) {
throw new Exception("Failed to load DOMDocument." . PHP_EOL . $e->getMessage() . PHP_EOL . PHP_EOL . $input);
}
$list = $doc->getElementsByTagName("style");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
$list = $doc->getElementsByTagName("table");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
$list = $doc->getElementsByTagName("ol");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
if (($firstP = $doc->getElementsByTagName("p")->item(0)) !== null) {
if (($firstPhtml = $doc->saveHTML($firstP)) !== false) {
if (strpos($firstPhtml, 'geohack') !== false) {
if ($firstP->parentNode !== null) $firstP->parentNode->removeChild($firstP);
}
}
}
$output = [];
foreach ($doc->getElementsByTagName("p") as $p) {
$output[] = trim($p->textContent);
}
/*
if (strpos($doc->saveHTML(), 'Coordinates:') !== false) {
echo $doc->saveHTML();
exit;
}
*/
return str_replace(PHP_EOL, PHP_EOL . PHP_EOL, trim(implode(PHP_EOL, $output)));
}
/**
* Cleans brackets ([1], [2]) off description text.
*
* @param string $input Input string.
*
* @return string
*/
private static function _cleanSourceBracketsOffTranslation(string $input):string {
$bracketsToRemove = [];
for ($i = 0; $i < 100; $i++) {
$bracketsToRemove["[$i]"] = "";
}
return strtr($input, $bracketsToRemove);
}
/**
* Cleans contents parsed from Wikipedia.
*
* @param string $input Input string.
*
* @return string
*/
private static function _cleanWikidataInput(string $input):string {
$input = trim($input, '"');
foreach (self::WIKIPEDIA_REMOVE_LITERALS as $tToRemove) $input = str_replace($tToRemove, "", $input);
if (substr($input, 0, strlen('<')) === '<') {
$input = self::_cleanWikidataInputHtml($input);
if (mb_strlen($input) > 600) {
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
$input = substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600));
}
}
$input = self::_cleanSourceBracketsOffTranslation($input);
$input = str_replace("\t", " ", $input);
return $input;
// Remove newlines with ensuing spaces
while (strpos($input, PHP_EOL . " ") !== false) {
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
}
// Remove double newlines
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
}
return MD_STD_IN::sanitize_text($input);
}
$input = str_replace(PHP_EOL, '', $input);
if (empty($input)) return "";
// Remove infobox tables specifically
$firstParagraphPosition = strpos($input, '<p', 1);
$currentSearchPos = strpos($input, "<table>");
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
if (($tableEndPos = strpos($input, "</table>")) !== false) {
if (($pStartPos = strpos($input, '<p', $tableEndPos + 6)) !== false) {
$input = substr($input, $pStartPos);
}
}
}
// Remove leftover unnecessary paragraphs before actual content
$removeFirstParagraph = false;
$firstParagraphPosition = strpos($input, '<p', 1);
foreach (["</table>", "<img"] as $tagPart) {
$currentSearchPos = strpos($input, $tagPart);
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
$removeFirstParagraph = true;
break;
}
}
if ($removeFirstParagraph === true) {
$input = substr($input, $firstParagraphPosition ?: 0);
}
$input = str_replace('</p>', '</p>' . PHP_EOL . PHP_EOL . PHP_EOL, $input);
# $input = str_replace('?/i', '', $input);
$input = strip_tags($input);
# for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input);
$i = 0;
while (strpos($input, ".mw-parser-output") !== false and strpos($input, "}", strpos($input, ".mw-parser-output")) !== false) {
$part1 = substr($input, 0, strpos($input, ".mw-parser-output"));
$part2 = substr($input, strpos($input, "}", strpos($input, ".mw-parser-output")) + 1);
$input = $part1 . $part2;
++$i;
if ($i === 30) break;
}
$input = self::_cleanSourceBracketsOffTranslation($input);
$input = str_replace("\t", " ", $input);
// Remove double whitespaces
while (strpos($input, " ") !== false) {
$input = str_replace(" ", " ", $input);
}
// Remove newlines with ensuing spaces
while (strpos($input, PHP_EOL . " ") !== false) {
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
}
// Remove double newlines
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
}
$stableToRemove = [
"Vous pouvez partager vos connaissances en laméliorant (comment ?) selon les recommandations des projets correspondants.",
];
foreach ($stableToRemove as $tToRemove) $input = str_replace($tToRemove, "", $input);
$endings = [
"StubDenne artikel om et vandløb ",
];
foreach ($endings as $ending) {
if (strpos($input, $ending) !== false) $input = substr($input, 0, strpos($input, $ending));
}
$input = trim($input);
// Cut off overly long articles
if (mb_strlen($input) > 600) {
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
$input = trim(substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600)));
}
}
if (empty($input)) return '';
$input = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $input));
$input = html_entity_decode($input);
return MD_STD_IN::sanitize_text($input);
}
/**
* Wrapper around _cleanWikidataInput for testing.
*
* @param string $input Input string.
*
* @return string
*/
public static function cleanWikidataInput(string $input):string {
if (PHP_SAPI !== 'cli') throw new Exception("Use this function only for testing");
return self::_cleanWikidataInput($input);
}
/** /**
* Sets the retrieval mode. * Sets the retrieval mode.
* *
@ -343,7 +593,7 @@ final class NodaWikidataFetcher {
if (isset($wikilink)) { if (isset($wikilink)) {
$languagesToFetch[$lang] = "https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm) . "&prop=text&section=0&format=json"; $languagesToFetch[$lang] = self::_getWikipediaApiLink($lang, $wikilinkterm);
$wikilinks[$lang] = $wikilink; $wikilinks[$lang] = $wikilink;
} }
@ -379,6 +629,7 @@ final class NodaWikidataFetcher {
$output = []; $output = [];
$descs = [];
foreach ($checkagainstLanguage as $lang) { foreach ($checkagainstLanguage as $lang) {
if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki']) && !empty($wikilinks[$lang])) { if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki']) && !empty($wikilinks[$lang])) {
@ -393,18 +644,22 @@ final class NodaWikidataFetcher {
if ($descFromWiki !== null) $tDescription = (string)$descFromWiki; if ($descFromWiki !== null) $tDescription = (string)$descFromWiki;
else $tDescription = ""; else $tDescription = "";
$tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')';
} }
else { else {
$tDescription = ""; $tDescription = "";
} }
$output[$lang] = [ if ($tDescription !== '') {
'label' => self::_cleanWikidataInput((string)$data['labels'][$lang]['value']), $descs[$lang] = $tDescription;
'description' => self::_cleanWikidataInput($tDescription), $desc_cleaned = self::_cleanWikidataInput($tDescription);
'link' => $wikilink, if ($desc_cleaned !== '') {
]; $output[$lang] = [
'label' => self::_cleanWikidataInput((string)$data['labels'][$lang]['value']),
'description' => '"' . $desc_cleaned . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')',
'link' => $wikilink,
];
}
}
} }
// echo '<br><b style="color: cc0000;">Wikipedia Links fehlen</b>'; // echo '<br><b style="color: cc0000;">Wikipedia Links fehlen</b>';
@ -417,6 +672,7 @@ final class NodaWikidataFetcher {
]; ];
} }
# print_r($descs);
} }
@ -424,196 +680,6 @@ final class NodaWikidataFetcher {
} }
/**
* Cleans contents parsed from Wikipedia.
*
* @param string $input Input string.
*
* @return string
*/
private static function _cleanWikidataInput(string $input):string {
if (substr($input, 0, strlen('<')) === '<') {
$doc = new DOMDocument();
$doc->loadXML($input);
$list = $doc->getElementsByTagName("style");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
$list = $doc->getElementsByTagName("table");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
$list = $doc->getElementsByTagName("div");
while ($list->length > 1) {
$p = $list->item(1);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
$list = $doc->getElementsByTagName("ol");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
if (($firstP = $doc->getElementsByTagName("p")->item(0)) !== null) {
if (($firstPhtml = $doc->saveHTML($firstP)) !== false) {
if (strpos($firstPhtml, 'geohack') !== false) {
if ($firstP->parentNode !== null) $firstP->parentNode->removeChild($firstP);
}
}
}
/*
if (strpos($doc->saveHTML(), 'Coordinates:') !== false) {
echo $doc->saveHTML();
exit;
}
*/
$input = str_replace(PHP_EOL, PHP_EOL . PHP_EOL, trim($doc->textContent));
if (mb_strlen($input) > 600) {
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
$input = substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600));
}
}
$bracketsToRemove = [];
for ($i = 0; $i < 100; $i++) {
$bracketsToRemove["[$i]"] = "";
}
$input = strtr($input, $bracketsToRemove);
$input = str_replace("\t", " ", $input);
// Remove newlines with ensuing spaces
while (strpos($input, PHP_EOL . " ") !== false) {
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
}
// Remove double newlines
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
}
return $input;
}
$input = str_replace(PHP_EOL, '', $input);
foreach (self::WIKIPEDIA_REMOVE_LITERALS as $tToRemove) $input = str_replace($tToRemove, "", $input);
$first_mention_of_paragraph = strpos($input, '<p>');
if ($first_mention_of_paragraph !== false) $input = substr($input, $first_mention_of_paragraph, (strrpos($input, '</p>') ?: strlen($input)) - $first_mention_of_paragraph);
// Remove infobox tables specifically
$removeFirstParagraph = false;
if (empty($input)) return "";
$firstParagraphPosition = strpos($input, '<p', 1);
$currentSearchPos = strpos($input, "<table>");
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
if (($tableEndPos = strpos($input, "</table>")) !== false) {
if (($pStartPos = strpos($input, '<p', $tableEndPos + 6)) !== false) {
$input = substr($input, $pStartPos);
}
}
}
// Remove leftover unnecessary paragraphs before actual content
$removeFirstParagraph = false;
$firstParagraphPosition = strpos($input, '<p', 1);
foreach (["</table>", "<img"] as $tagPart) {
$currentSearchPos = strpos($input, $tagPart);
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
$removeFirstParagraph = true;
break;
}
}
if ($removeFirstParagraph === true) {
$input = substr($input, $firstParagraphPosition ?: 0);
}
$input = str_replace('</p>', '</p>' . PHP_EOL . PHP_EOL . PHP_EOL, $input);
# $input = str_replace('?/i', '', $input);
$input = strip_tags($input);
# for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input);
$i = 0;
while (strpos($input, ".mw-parser-output") !== false and strpos($input, "}", strpos($input, ".mw-parser-output")) !== false) {
$part1 = substr($input, 0, strpos($input, ".mw-parser-output"));
$part2 = substr($input, strpos($input, "}", strpos($input, ".mw-parser-output")) + 1);
$input = $part1 . $part2;
++$i;
if ($i === 30) break;
}
$bracketsToRemove = [];
for ($i = 0; $i < 100; $i++) {
$bracketsToRemove["[$i]"] = "";
}
$input = strtr($input, $bracketsToRemove);
$input = str_replace("\t", " ", $input);
// Remove double whitespaces
while (strpos($input, " ") !== false) {
$input = str_replace(" ", " ", $input);
}
// Remove newlines with ensuing spaces
while (strpos($input, PHP_EOL . " ") !== false) {
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
}
// Remove double newlines
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
}
$stableToRemove = [
"Vous pouvez partager vos connaissances en laméliorant (comment ?) selon les recommandations des projets correspondants.",
];
foreach ($stableToRemove as $tToRemove) $input = str_replace($tToRemove, "", $input);
$endings = [
"StubDenne artikel om et vandløb ",
];
foreach ($endings as $ending) {
if (strpos($input, $ending) !== false) $input = substr($input, 0, strpos($input, $ending));
}
$input = trim($input);
// Cut off overly long articles
if (mb_strlen($input) > 600) {
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
$input = trim(substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600)));
}
}
if (empty($input)) return '';
$input = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $input));
$input = html_entity_decode($input);
return $input;
}
/** /**
* Function for fetching description from Wikipedia * Function for fetching description from Wikipedia
* *
@ -855,7 +921,7 @@ final class NodaWikidataFetcher {
if (isset($wikilink[$lang]) and isset($wikilinkterm[$lang]) and is_string($wikilinkterm[$lang])) { if (isset($wikilink[$lang]) and isset($wikilinkterm[$lang]) and is_string($wikilinkterm[$lang])) {
$datafromwiki = MD_STD::runCurl("https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text&section=0&format=json", 10000); $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinkterm[$lang]), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
# Process data retrieved from wikipedia # Process data retrieved from wikipedia
@ -865,17 +931,17 @@ final class NodaWikidataFetcher {
} }
foreach (self::LANGUAGES_MAIN_DESC as $sprache) { foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
if ($alreadyEntered === true) break; if ($alreadyEntered === true) break;
if (!isset($wikilink[$sprache]) || !isset($wikilinkterm[$sprache]) || !is_string($wikilinkterm[$sprache])) continue; if (!isset($wikilink[$cur_lang]) || !isset($wikilinkterm[$cur_lang]) || !is_string($wikilinkterm[$cur_lang])) continue;
$datafromwiki = MD_STD::runCurl("https://" . $sprache . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode((string)$wikilinkterm[$sprache]) . "&prop=text&section=0&format=json", 10000); $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinkterm[$cur_lang]), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
# Process data retrieved from wikipedia # Process data retrieved from wikipedia
if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) { if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) {
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $wikilink[$sprache], $lang, "$sprache", $erfasst_von); $alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $wikilink[$cur_lang], $lang, "$cur_lang", $erfasst_von);
} }
} }
@ -940,33 +1006,54 @@ final class NodaWikidataFetcher {
} }
/**
* Gets the current description of a place.
*
* @param integer $onum Place ID.
*
* @return string
*/
private function getPlaceDescription(int $onum):string {
$currentPlaceResult = $this->_mysqli_noda->query_by_stmt("SELECT `ort_anmerkung`
FROM `orte`
WHERE `ort_id` = ?", "i", $onum);
if (!($curPlaceInfo = $currentPlaceResult->fetch_row())) {
$currentPlaceResult->close();
throw new Exception("This place does not exist");
}
$currentPlaceResult->close();
return $curPlaceInfo[0];
}
/** /**
* Function for entering base information about a place from wikidata. * Function for entering base information about a place from wikidata.
* *
* @param mysqli_result $currentPlaceResult Mysqli result pointing to the current place. * @param string $cur_place_desc Mysqli result pointing to the current place.
* @param string $datafromwiki Data parsed from wikidata. * @param string $datafromwiki Data parsed from wikidata.
* @param array<mixed> $wikilink Wikilink. * @param array<mixed> $wikilink Wikilink.
* @param string $preflang Language of the user interface in general. * @param string $preflang Language of the user interface in general.
* @param string $lang Language of the main entry. * @param string $lang Language of the main entry.
* @param integer $placeID ID of the place. * @param integer $placeID ID of the place.
* @param string $erfasst_von User name. * @param string $erfasst_von User name.
* *
* @return boolean * @return boolean
*/ */
public function enterPlaceDescFromWikidata(mysqli_result $currentPlaceResult, string $datafromwiki, array $wikilink, string $preflang, string $lang, int $placeID, string $erfasst_von) { public function enterPlaceDescFromWikidata(string $cur_place_desc, string $datafromwiki, array $wikilink, string $preflang, string $lang, int $placeID, string $erfasst_von):bool {
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date("d.m.Y") . ')'; $datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date("d.m.Y") . ')';
if (!($curPlaceInfo = $currentPlaceResult->fetch_assoc())) return false; if (!empty(trim($cur_place_desc)) and substr($cur_place_desc, 0, 3) !== 'GND') {
if (!empty(trim($curPlaceInfo['ort_anmerkung'])) and substr($curPlaceInfo['ort_anmerkung'], 0, 3) !== 'GND') {
switch ($this->_retrievalMode) { switch ($this->_retrievalMode) {
case "add": case "add":
$datafromwiki = $curPlaceInfo['ort_anmerkung'] . PHP_EOL . PHP_EOL . $datafromwiki; $datafromwiki = $cur_place_desc . PHP_EOL . PHP_EOL . $datafromwiki;
break; break;
case "keep": case "keep":
$datafromwiki = $curPlaceInfo['ort_anmerkung']; $datafromwiki = $cur_place_desc;
break; break;
case "replace": case "replace":
break; break;
@ -977,7 +1064,7 @@ final class NodaWikidataFetcher {
echo ' echo '
<p class="alert icons iconsAlert">There is already an entry for description ...</p> <p class="alert icons iconsAlert">There is already an entry for description ...</p>
<div class="wikiReplaceTTile"> <div class="wikiReplaceTTile">
<h3>Actual entry</h3><p>' . nl2br($curPlaceInfo['ort_anmerkung']) . '</p> <h3>Actual entry</h3><p>' . nl2br($cur_place_desc) . '</p>
</div> </div>
<div class="wikiReplaceTTile"> <div class="wikiReplaceTTile">
<h3>Now found</h3> <h3>Now found</h3>
@ -1102,10 +1189,9 @@ final class NodaWikidataFetcher {
if (isset($data['sitelinks'][$tLang . 'wiki']['title'])) $wikilinkterm[$tLang] = str_replace(' ', '_', $data['sitelinks'][$tLang . 'wiki']['title']); if (isset($data['sitelinks'][$tLang . 'wiki']['title'])) $wikilinkterm[$tLang] = str_replace(' ', '_', $data['sitelinks'][$tLang . 'wiki']['title']);
} }
$currentPlaceResult = $this->_mysqli_noda->query_by_stmt("SELECT `ort_anmerkung` // Get current description for overwriting
FROM `orte`
WHERE `ort_id` = ?", "i", $onum);
$cur_place_desc = $this->getPlaceDescription($onum);
$alreadyEntered = false; $alreadyEntered = false;
// P131: Located in administrative unit // P131: Located in administrative unit
@ -1115,29 +1201,27 @@ final class NodaWikidataFetcher {
if (!empty($wikilink[$lang])) { if (!empty($wikilink[$lang])) {
$datafromwiki = MD_STD::runCurl("https://" . urlencode($lang) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text&section=0&format=json", 10000); $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinkterm[$lang]), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) { if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
$alreadyEntered = $this->enterPlaceDescFromWikidata($currentPlaceResult, $datafromwiki, $wikilink, $lang, $lang, $onum, $erfasst_von); $alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $wikilink, $lang, $lang, $onum, $erfasst_von);
} }
} }
foreach (self::LANGUAGES_MAIN_DESC as $sprache) { foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
//if ($alreadyEntered === true) break; //if ($alreadyEntered === true) break;
if ($alreadyEntered === true) break; if ($alreadyEntered === true) break;
if (!isset($wikilink[$sprache])) continue; if (!isset($wikilink[$cur_lang])) continue;
$datafromwiki = MD_STD::runCurl("https://" . urlencode($sprache) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$sprache]) . "&prop=text&section=0&format=json", 10000); $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinkterm[$cur_lang]), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) { if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
$alreadyEntered = $this->enterPlaceDescFromWikidata($currentPlaceResult, $datafromwiki, $wikilink, $lang, $sprache, $onum, $erfasst_von); $alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $wikilink, $lang, $cur_lang, $onum, $erfasst_von);
} }
} }
$currentPlaceResult->close();
unset($currentPlaceResult);
if (isset($data['claims']['P1566'])) $geonames_id = filter_var($data['claims']['P1566'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT); if (isset($data['claims']['P1566'])) $geonames_id = filter_var($data['claims']['P1566'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT);
if (isset($data['claims']['P1667'])) $tgn_id = filter_var($data['claims']['P1667'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT); if (isset($data['claims']['P1667'])) $tgn_id = filter_var($data['claims']['P1667'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT);
@ -1381,7 +1465,7 @@ final class NodaWikidataFetcher {
if (isset($wikilink[$lang]) and isset($wikilinkterm[$lang]) and is_string($wikilinkterm[$lang])) { if (isset($wikilink[$lang]) and isset($wikilinkterm[$lang]) and is_string($wikilinkterm[$lang])) {
$datafromwiki = MD_STD::runCurl("https://" . $lang . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($wikilinkterm[$lang]) . "&prop=text&section=0&format=json", 10000); $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinkterm[$lang]), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
# Process data retrieved from wikipedia # Process data retrieved from wikipedia
@ -1391,17 +1475,17 @@ final class NodaWikidataFetcher {
} }
foreach (self::LANGUAGES_MAIN_DESC as $sprache) { foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
if ($alreadyEntered === true) break; if ($alreadyEntered === true) break;
if (!isset($wikilink[$sprache]) || !isset($wikilinkterm[$sprache]) || !is_string($wikilinkterm[$sprache])) continue; if (!isset($wikilink[$cur_lang]) || !isset($wikilinkterm[$cur_lang]) || !is_string($wikilinkterm[$cur_lang])) continue;
$datafromwiki = MD_STD::runCurl("https://" . $sprache . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode((string)$wikilinkterm[$sprache]) . "&prop=text&section=0&format=json", 10000); $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinkterm[$cur_lang]), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*']; $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
# Process data retrieved from wikipedia # Process data retrieved from wikipedia
if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) { if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) {
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $wikilink[$sprache], $lang, "$sprache", $erfasst_von); $alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $wikilink[$cur_lang], $lang, $cur_lang, $erfasst_von);
} }
} }
@ -1724,13 +1808,13 @@ final class NodaWikidataFetcher {
public static function generateWikidataFetcherHeader(MDTlLoader $tlLoader, string $additional = "", string $searchTerm = ""):string { public static function generateWikidataFetcherHeader(MDTlLoader $tlLoader, string $additional = "", string $searchTerm = ""):string {
if (empty($searchTerm) and !empty($_GET['suchbegriff'])) { if (empty($searchTerm) and !empty($_GET['suchbegriff'])) {
$searchTerm = $_GET['suchbegriff']; $searchTerm = (string)$_GET['suchbegriff'];
} }
$output = ' $output = '
<header> <header>
<h1><img src="../img/wikidata.png" alt="Logo: Wikidata" />' . $tlLoader->tl("wiki", "wiki", "fetch_from_wikidata"); <h1><img src="../img/wikidata.png" alt="Logo: Wikidata" />' . $tlLoader->tl("wiki", "wiki", "fetch_from_wikidata");
$output .= ': ' . $searchTerm; $output .= ': ' . htmlspecialchars($searchTerm);
$output .= '</h1>'; $output .= '</h1>';
$output .= $additional; $output .= $additional;
$output .= '</header>'; $output .= '</header>';

View File

@ -9,6 +9,7 @@ use PHPUnit\Framework\TestCase;
require_once __DIR__ . "/../src/NodaWikidataFetcher.php"; require_once __DIR__ . "/../src/NodaWikidataFetcher.php";
require_once __DIR__ . "/../../MDErrorReporter/exceptions/generic/MDExpectedException.php"; require_once __DIR__ . "/../../MDErrorReporter/exceptions/generic/MDExpectedException.php";
require_once __DIR__ . "/../../MD_STD/src/MD_STD.php"; require_once __DIR__ . "/../../MD_STD/src/MD_STD.php";
require_once __DIR__ . "/../../MD_STD/src/MD_STD_IN.php";
/** /**
* This script contains tests for the Wikidata fetcher. * This script contains tests for the Wikidata fetcher.
@ -56,4 +57,222 @@ final class NodaWikidataFetcherTest extends TestCase {
self::assertEquals(NodaWikidataFetcher::getWikidataIdFromWikidataLink("https://www.wikidata.org/wiki/Q106697"), "Q106697"); self::assertEquals(NodaWikidataFetcher::getWikidataIdFromWikidataLink("https://www.wikidata.org/wiki/Q106697"), "Q106697");
} }
/**
* Test for cleaning wikidata info.
*
* @return void
*/
public function testCleanWikidataInput():void {
$testStr = '"<div class="mw-parser-output"><table class="infobox float-right toccolours toptextcells" style="margin: 0 0 1em 1em; width: 300px;" id="Vorlage_Infobox_Ort_in_der_Ukraine" summary="Infobox Ort in der Ukraine">
<tbody><tr>
<td colspan="2" style="background-color:#AFD6FF; font-size:1.3em; font-weight:bold; text-align:center;">Werbowez (Kossiw)
</td></tr>
<tr>
<td colspan="2" style="background-color:#FFC; font-size:1em; font-weight:bold; text-align:center;"><span lang="uk-Cyrl" class="Cyrl">Вербовець</span>
</td></tr>
<tr style="height:120px; background-color:#FFF;">
<td style="width: 130px; text-align:center;"><span typeof="mw:File"><a href="/wiki/Datei:Coats_of_arms_of_None.svg" class="mw-file-description" title="Wappen fehlt"><img alt="Wappen fehlt" src="//upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Coats_of_arms_of_None.svg/100px-Coats_of_arms_of_None.svg.png" decoding="async" width="100" height="120" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Coats_of_arms_of_None.svg/150px-Coats_of_arms_of_None.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Coats_of_arms_of_None.svg/200px-Coats_of_arms_of_None.svg.png 2x" data-file-width="125" data-file-height="150" /></a></span>
</td>
<td style="width: 170px; text-align:center;"><table class="centered" style="background-color: #f9f9f9; border: none; border-collapse: collapse; width: 1px;">
<tbody><tr><td style="border: none; padding: 0; text-align: center;"><div style="position: relative; z-index: 0; padding: 0; display: inline-block; width: -webkit-max-content; width: -moz-max-content; width: max-content; border: none;"><figure class="mw-halign-center noviewer notpageimage" typeof="mw:File"><a href="/wiki/Datei:Ukraine_adm_location_map.svg" class="mw-file-description" title="Werbowez (Kossiw) (Ukraine)"><img alt="Werbowez (Kossiw) (Ukraine)" src="//upload.wikimedia.org/wikipedia/commons/thumb/7/78/Ukraine_adm_location_map.svg/180px-Ukraine_adm_location_map.svg.png" decoding="async" width="180" height="121" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/7/78/Ukraine_adm_location_map.svg/270px-Ukraine_adm_location_map.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/7/78/Ukraine_adm_location_map.svg/360px-Ukraine_adm_location_map.svg.png 2x" data-file-width="1546" data-file-height="1038" /></a><figcaption>Werbowez (Kossiw) (Ukraine)</figcaption></figure><div style="position:absolute; top:50.7%; left:18.9%; height:0; width:0;"><div style="position:relative;z-index:100;left:-4px;top:-4px;width:8px;height:8px;line-height:0px;"><span typeof="mw:File"><a href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&amp;language=de&amp;params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)&amp;title=Werbowez+%28Kossiw%29" title="Werbowez (Kossiw) (48° 20 32″ N, 25° 8 0″O)"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/8px-ButtonRed.svg.png" decoding="async" width="8" height="8" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/12px-ButtonRed.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/16px-ButtonRed.svg.png 2x" data-file-width="480" data-file-height="480" /></a></span></div>
<table style="font-size:90%; border:none; background-color:transparent; border-collapse:collapse; line-height:1em; position:absolute; width:6em; margin: 0 .2em; text-align:left; left:1px; bottom:1px;"><tbody><tr><td style="border:none; vertical-align:middle;"><span style="position:relative; z-index:9; background-color:none;">Werbowez (Kossiw) </span></td></tr></tbody></table></div></div></td></tr>
</tbody></table>
</td></tr>
<tr style="background-color:#AFD6FF;">
<th colspan="2">Basisdaten
</th></tr>
<tr>
<td><a href="/wiki/Liste_der_Oblaste_der_Ukraine" title="Liste der Oblaste der Ukraine">Oblast</a>:</td>
<td><a href="/wiki/Oblast_Iwano-Frankiwsk" title="Oblast Iwano-Frankiwsk">Oblast Iwano-Frankiwsk</a>
</td></tr>
<tr>
<td><a href="/wiki/Liste_der_Rajone_der_Ukraine" title="Liste der Rajone der Ukraine">Rajon</a>:</td>
<td><a href="/wiki/Rajon_Kossiw" title="Rajon Kossiw">Rajon Kossiw</a>
</td></tr>
<tr>
<td><a href="/wiki/H%C3%B6he_%C3%BCber_dem_Meeresspiegel" title="Höhe über dem Meeresspiegel">Höhe</a>:</td>
<td>369 m
</td></tr>
<tr>
<td><a href="/wiki/Fl%C3%A4cheninhalt" title="Flächeninhalt">Fläche</a>:</td>
<td>18,77 <a href="/wiki/Quadratmeter#Quadratkilometer" title="Quadratmeter">km²</a>
</td></tr>
<tr>
<td><a href="/wiki/Einwohner" title="Einwohner">Einwohner</a>:</td>
<td>3.395 <small><i>(2001)</i></small>
</td></tr>
<tr>
<td><a href="/wiki/Bev%C3%B6lkerungsdichte" title="Bevölkerungsdichte">Bevölkerungsdichte</a>:
</td>
<td>181 Einwohner je km²
</td></tr>
<tr>
<td><a href="/wiki/Postleitzahl" title="Postleitzahl">Postleitzahlen</a>:</td>
<td>78605
</td></tr>
<tr>
<td><a href="/wiki/Telefonvorwahl" title="Telefonvorwahl">Vorwahl</a>:</td>
<td>+380 3478
</td></tr>
<tr>
<td><a href="/wiki/Geographische_Koordinaten" title="Geographische Koordinaten">Geographische Lage</a>:</td>
<td><span id="text_coordinates" class="coordinates plainlinks-print"><a class="external text" href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&amp;language=de&amp;params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)"><span title="Breitengrad">48°&#160;21&#160;<abbr title="Nord">N</abbr></span>, <span title="Längengrad">25°&#160;8&#160;<abbr title="Ost">O</abbr></span></a></span><span class="geo noexcerpt" style="display:none"><span class="body"></span><span class="latitude">48.342222222222</span><span class="longitude">25.133333333333</span><span class="elevation"></span></span><span id="coordinates" class="coordinates noprint"><span title="Koordinatensystem WGS84">Koordinaten: </span><a class="external text" href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&amp;language=de&amp;params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)"><span title="Breitengrad">48°&#160;20&#160;32″&#160;<abbr title="Nord">N</abbr></span>, <span title="Längengrad">25°&#160;8&#160;0″&#160;<abbr title="Ost">O</abbr></span></a></span>
</td></tr>
<tr>
<td><a href="/wiki/KATOTTH" title="KATOTTH">KATOTTH</a>:
</td>
<td>UA26100010030094355
</td></tr>
<tr>
<td><a href="/wiki/KOATUU" title="KOATUU">KOATUU</a>:
</td>
<td>2623682401
</td></tr>
<tr>
<td><a href="/wiki/Verwaltungsgliederung_der_Ukraine" title="Verwaltungsgliederung der Ukraine">Verwaltungsgliederung</a>:
</td>
<td>1 Dorf
</td></tr>
<tr>
<td>Adresse:
</td>
<td>вул. Миру, буд. 15<br />78605 с. Вербовець
</td></tr>
<tr>
<td><a href="/wiki/Website" title="Website">Website</a>:
</td>
<td><a rel="nofollow" class="external text" href="http://verbovets.kosiv.net/">Offizielle Webseite</a>
</td></tr>
<tr>
<td colspan="2" style="padding-bottom:3px; text-align:center; border-bottom:1px solid #bbb; border-top:1px solid #bbb;"><a rel="nofollow" class="external text" href="http://w1.c1.rada.gov.ua/pls/z7503/A005?rdat1=31.08.2023&amp;rf7571=13801">Statistische Informationen</a>
</td></tr>
<tr>
<td colspan="2" style="padding-bottom:3px; text-align:center; border-bottom:1px solid #bbb; border-top:1px solid #bbb;">
<table class="centered" style="background-color: #f9f9f9; border: none; border-collapse: collapse; width: 1px;">
<tbody><tr><td style="border: none; padding: 0; text-align: center;"><div style="position: relative; z-index: 0; padding: 0; display: inline-block; width: -webkit-max-content; width: -moz-max-content; width: max-content; border: none;"><figure class="mw-halign-center noviewer notpageimage" typeof="mw:File"><a href="/wiki/Datei:Ivano-Frankivsk_location_map.svg" class="mw-file-description" title="Werbowez (Kossiw) (Oblast Iwano-Frankiwsk)"><img alt="Werbowez (Kossiw) (Oblast Iwano-Frankiwsk)" src="//upload.wikimedia.org/wikipedia/commons/thumb/8/8e/Ivano-Frankivsk_location_map.svg/290px-Ivano-Frankivsk_location_map.svg.png" decoding="async" width="290" height="347" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/8/8e/Ivano-Frankivsk_location_map.svg/435px-Ivano-Frankivsk_location_map.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/8/8e/Ivano-Frankivsk_location_map.svg/580px-Ivano-Frankivsk_location_map.svg.png 2x" data-file-width="533" data-file-height="637" /></a><figcaption>Werbowez (Kossiw) (Oblast Iwano-Frankiwsk)</figcaption></figure><div style="position:absolute; top:63.3%; left:74.4%; height:0; width:0;"><div style="position:relative;z-index:100;left:-4px;top:-4px;width:8px;height:8px;line-height:0px;"><span typeof="mw:File"><a href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&amp;language=de&amp;params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)&amp;title=Werbowez+%28Kossiw%29" title="Werbowez (Kossiw) (48° 20 32″ N, 25° 8 0″O)"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/8px-ButtonRed.svg.png" decoding="async" width="8" height="8" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/12px-ButtonRed.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/16px-ButtonRed.svg.png 2x" data-file-width="480" data-file-height="480" /></a></span></div>
<table style="font-size:90%; border:none; background-color:transparent; border-collapse:collapse; line-height:1em; position:absolute; width:6em; margin: 0 .2em; text-align:right; right:1px; bottom:1px;"><tbody><tr><td style="border:none; vertical-align:middle;"><span style="position:relative; z-index:9; background-color:none;">Werbowez (Kossiw) </span></td></tr></tbody></table></div></div></td></tr>
</tbody></table><span style="display:none;"><a href="/w/index.php?title=Vorlage:Positionskarte_ISO_3166-2/Wartung/noregion&amp;action=edit&amp;redlink=1" class="new" title="Vorlage:Positionskarte ISO 3166-2/Wartung/noregion (Seite nicht vorhanden)">i1</a></span>
</td></tr></tbody></table>
<p><b>Werbowez</b> (<b><span style="font-style:normal;font-weight:normal"><a href="/wiki/Ukrainische_Sprache" title="Ukrainische Sprache">ukrainisch</a></span> <span lang="uk-Cyrl" class="Cyrl" style="font-style:normal">Вербовець</span></b>; <span style="font-style:normal;font-weight:normal"><a href="/wiki/Russische_Sprache" title="Russische Sprache">russisch</a></span> <span lang="ru-Cyrl" class="Cyrl" style="font-style:normal">Вербовец</span>, <a href="/wiki/Polnische_Sprache" title="Polnische Sprache">polnisch</a> <span lang="pl" style="font-style:italic;font-weight:normal">Wierzbowiec</span>; <span style="font-style:normal;font-weight:normal"><a href="/wiki/Rum%C3%A4nische_Sprache" title="Rumänische Sprache">rumänisch</a></span> <span lang="ro-Latn" style="font-style:italic">Verboveț</span>) ist ein <a href="/wiki/Dorf" title="Dorf">Dorf</a> in der <a href="/wiki/Ukraine" title="Ukraine">ukrainischen</a> <a href="/wiki/Oblast_Iwano-Frankiwsk" title="Oblast Iwano-Frankiwsk">Oblast Iwano-Frankiwsk</a> mit etwa 3400 Einwohnern (2001).<sup id="cite_ref-1" class="reference"><a href="#cite_note-1">&#91;1&#93;</a></sup>
</p>
<figure class="mw-default-size mw-halign-left" typeof="mw:File/Thumb"><a href="/wiki/Datei:%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/220px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" decoding="async" width="220" height="147" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/330px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/440px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 2x" data-file-width="5184" data-file-height="3456" /></a><figcaption>Blick auf das Dorf</figcaption></figure>
<p>Das um 1650 erstmals schriftlich erwähnte Dorf<sup id="cite_ref-2" class="reference"><a href="#cite_note-2">&#91;2&#93;</a></sup> liegt im Osten der <a href="/wiki/Historische_Landschaft" title="Historische Landschaft">historischen Landschaft</a> <a href="/wiki/Galizien" title="Galizien">Galizien</a> am Ufer der <a href="/w/index.php?title=Rybnyzja_(Fluss)&amp;action=edit&amp;redlink=1" class="new" title="Rybnyzja (Fluss) (Seite nicht vorhanden)">Rybnyzja</a> (<span lang="uk-Cyrl" class="Cyrl">Рибниця</span>), einem 56&#160;km langen Nebenfluss des <a href="/wiki/Pruth" title="Pruth">Pruth</a> 7&#160;km nordöstlich vom Rajonzentrum <a href="/wiki/Kossiw" title="Kossiw">Kossiw</a> und 95&#160;km südlich vom Oblastzentrum <a href="/wiki/Iwano-Frankiwsk" title="Iwano-Frankiwsk">Iwano-Frankiwsk</a>. Südlich der Ortschaft verläuft die <a href="/wiki/Territorialstra%C3%9Fe" title="Territorialstraße">Territorialstraße</a> <i>T0909</i>.
</p><p>Am 12. Juni 2020 wurde das Dorf ein Teil der neu gegründeten <i>Stadtgemeinde <a href="/wiki/Kossiw" title="Kossiw">Kossiw</a></i> im <a href="/wiki/Rajon_Kossiw" title="Rajon Kossiw">Rajon Kossiw</a><sup id="cite_ref-3" class="reference"><a href="#cite_note-3">&#91;3&#93;</a></sup>, bis dahin bildete es zusammen mit dem Dorf <a href="/w/index.php?title=Staryj_Kossiw&amp;action=edit&amp;redlink=1" class="new" title="Staryj Kossiw (Seite nicht vorhanden)">Staryj Kossiw</a> (<span lang="uk-Cyrl" class="Cyrl">Старий Косів</span>) die <i>Landratsgemeinde Werbowez</i> (Вербовецька сільська рада/<i>Werbowezka silska rada</i>) im Osten des Rajons.
</p>
<ol class="references">
<li id="cite_note-1"><span class="mw-cite-backlink"><a href="#cite_ref-1"></a></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://w1.c1.rada.gov.ua/pls/z7503/A005?rf7571=13801">Ortswebseite</a> auf der offiziellen Webpräsenz der <a href="/wiki/Werchowna_Rada" title="Werchowna Rada">Werchowna Rada</a>; abgerufen am 14. November 2017 (ukrainisch)</span>
</li>
<li id="cite_note-2"><span class="mw-cite-backlink"><a href="#cite_ref-2"></a></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://ukrssr.com.ua/ifrank/kosivskiy/verbovets-kosivskiy-rayon-ivano-frankivska-oblast">Ortsgeschichte Werbowez</a> in der <a href="/wiki/Geschichte_der_St%C3%A4dte_und_D%C3%B6rfer_der_Ukrainischen_SSR" title="Geschichte der Städte und Dörfer der Ukrainischen SSR">Geschichte der Städte und Dörfer der Ukrainischen SSR</a>; abgerufen am 14. November 2017 (ukrainisch)</span>
</li>
<li id="cite_note-3"><span class="mw-cite-backlink"><a href="#cite_ref-3"></a></span> <span class="reference-text"><a rel="nofollow" class="external text" href="https://zakon.rada.gov.ua/laws/show/714-2020-%D1%80#Text">Кабінет Міністрів України Розпорядження від 12 червня 2020 р. 714-р "Про визначення адміністративних центрів та затвердження територій територіальних громад Івано-Франківської області"</a></span>
</li>
</ol>
<!--
NewPP limit report
Parsed by mw1396
Cached time: 20230831121013
Cache expiry: 42588
Reduced expiry: true
Complications: []
CPU time usage: 0.219 seconds
Real time usage: 0.274 seconds
Preprocessor visited node count: 6414/1000000
Postexpand include size: 33611/2097152 bytes
Template argument size: 12317/2097152 bytes
Highest expansion depth: 34/100
Expensive parser function count: 9/500
Unstrip recursion depth: 0/20
Unstrip postexpand size: 1476/5000000 bytes
Lua time usage: 0.080/10.000 seconds
Lua memory usage: 3398800/52428800 bytes
Number of Wikibase entities loaded: 0/400
-->
<!--
Transclusion expansion time report (%,ms,calls,template)
100.00% 239.600 1 -total
93.55% 224.134 1 Vorlage:Infobox_Ort_in_der_Ukraine
50.81% 121.740 2 Vorlage:Positionskarte
49.72% 119.121 2 Vorlage:Positionskarte+
44.41% 106.401 2 Vorlage:Positionskarte~
33.28% 79.732 2 Vorlage:Positionskarte~*
25.69% 61.558 3 Vorlage:Lang
19.41% 46.499 1 Vorlage:Positionskarte_ISO_3166-2
16.90% 40.486 12 Vorlage:CoordinateLONG
14.02% 33.586 10 Vorlage:CoordinateLAT
-->
</div>" - (de.wikipedia.org 31.08.2023)';
$output = NodaWikidataFetcher::cleanWikidataInput($testStr);
$expected = 'Werbowez (ukrainisch Вербовець; russisch Вербовец, polnisch Wierzbowiec; rumänisch Verboveț) ist ein Dorf in der ukrainischen Oblast Iwano-Frankiwsk mit etwa 3400 Einwohnern (2001).';
self::assertTrue(
str_starts_with($output, $expected),
"Start of parsed Wikipedia text should be:" . PHP_EOL . PHP_EOL . $expected . PHP_EOL . PHP_EOL . 'Real start text is: ' . PHP_EOL . PHP_EOL . substr($output, 0, 250)
);
$output = NodaWikidataFetcher::cleanWikidataInput('<div class="mw-parser-output"><figure class="mw-default-size mw-halign-right" typeof="mw:File/Thumb"><a href="/wiki/File:%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/220px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" decoding="async" width="220" height="147" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/330px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/440px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 2x" data-file-width="5184" data-file-height="3456" /></a><figcaption></figcaption></figure>
<p><span style="font-size: small;"><span id="coordinates"><a href="/wiki/%E5%9C%B0%E7%90%86%E5%9D%90%E6%A0%87" class="mw-redirect" title="地理坐标">坐标</a><style data-mw-deduplicate="TemplateStyles:r65292569">.mw-parser-output .geo-default,.mw-parser-output .geo-dms,.mw-parser-output .geo-dec{display:inline}.mw-parser-output .geo-nondefault,.mw-parser-output .geo-multi-punct{display:none}.mw-parser-output .longitude,.mw-parser-output .latitude{white-space:nowrap}</style><span class="plainlinks nourlexpansion"><a class="external text" href="//geohack.toolforge.org/geohack.php?language=zh&amp;pagename=%E9%9F%8B%E7%88%BE%E5%8D%9A%E9%9F%8B%E9%BD%8A_(%E7%A7%91%E7%B4%A2%E5%A4%AB%E5%8D%80)&amp;params=48_20_32_N_25_8_0_E_scale:30000"><span class="geo-default"><span class="geo-dms" title="此地的地图、航拍照片和其他数据"><span class="latitude">48°2032″N</span> <span class="longitude">25°80″E</span></span></span><span class="geo-multi-punct">&#xfeff; / &#xfeff;</span><span class="geo-nondefault"><span class="geo-dec" title="此地的地图、航拍照片和其他数据">48.34222°N 25.13333°E</span><span style="display:none">&#xfeff; / <span class="geo">48.34222; 25.13333</span></span></span></a></span></span></span>
</p><p><b>韋爾博韋齊</b><a href="/wiki/%E7%83%8F%E5%85%8B%E8%98%AD%E8%AA%9E" class="mw-redirect" title="烏克蘭語">烏克蘭語</a><span lang="uk">Вербовець</span>),是<a href="/wiki/%E7%83%8F%E5%85%8B%E8%98%AD" class="mw-redirect" title="烏克蘭">烏克蘭</a>的村落,位於該國西部<a href="/wiki/%E4%BC%8A%E4%B8%87%E8%AF%BA-%E5%BC%97%E5%85%B0%E7%A7%91%E5%A4%AB%E6%96%AF%E5%85%8B%E5%B7%9E" title="伊万诺-弗兰科夫斯克州">伊萬諾-弗蘭科夫斯克州</a>,由<a href="/wiki/%E7%A7%91%E7%B4%A2%E5%A4%AB%E5%8D%80" class="mw-redirect" title="科索夫區">科索夫區</a>負責管轄始建於1456年面積18.77平方公里2001年人口3,395
</p>
<!--
NewPP limit report
Parsed by mw1412
Cached time: 20230831132208
Cache expiry: 1814400
Reduced expiry: false
Complications: []
CPU time usage: 0.147 seconds
Real time usage: 0.186 seconds
Preprocessor visited node count: 48/1000000
Postexpand include size: 2084/2097152 bytes
Template argument size: 0/2097152 bytes
Highest expansion depth: 3/100
Expensive parser function count: 1/500
Unstrip recursion depth: 0/20
Unstrip postexpand size: 362/5000000 bytes
Lua time usage: 0.110/10.000 seconds
Lua memory usage: 15402517/52428800 bytes
Number of Wikibase entities loaded: 1/400
-->
<!--
Transclusion expansion time report (%,ms,calls,template)
100.00% 152.989 1 -total
70.07% 107.204 1 Template:Lang-uk
29.62% 45.313 1 Template:Coord
-->
</div>');
$expected = '韋爾博韋齊(烏克蘭語:Вербовець),是烏克蘭的村落,位於該國西部伊萬諾-弗蘭科夫斯克州由科索夫區負責管轄始建於1456年面積18.77平方公里2001年人口3,3';
self::assertTrue(
str_starts_with($output, $expected),
"Start of parsed Wikipedia text should be:" . PHP_EOL . PHP_EOL . $expected . PHP_EOL . PHP_EOL . 'Real start text is: ' . PHP_EOL . PHP_EOL . substr($output, 0, 250)
);
}
/**
* Test for cleaning wikidata info.
*
* @return void
*/
public function testCleanWikidataInputWithoutHtml():void {
$output = NodaWikidataFetcher::cleanWikidataInput('Werbowez (ukrainisch Вербовець; russisch Вербовец, polnisch Wierzbowiec; rumänisch Verboveț) ist ein Dorf in der ukrainischen Oblast Iwano-Frankiwsk mit etwa 3400 Einwohnern (2001).[1]');
$expected = 'Werbowez (ukrainisch Вербовець; russisch Вербовец, polnisch Wierzbowiec; rumänisch Verboveț) ist ein Dorf in der ukrainischen Oblast Iwano-Frankiwsk mit etwa 3400 Einwohnern (2001).';
self::assertTrue(
str_starts_with($output, $expected),
"Start of parsed Wikipedia text should be:" . PHP_EOL . PHP_EOL . $expected . PHP_EOL . PHP_EOL . 'Real start text is: ' . PHP_EOL . PHP_EOL . substr($output, 0, 250)
);
}
} }