Use Wikipedia API for getting descriptions from Wikipedia rather than
parsing HTML in Wikidata fetcher Thanks @awinkler
This commit is contained in:
@ -43,25 +43,6 @@ final class NodaWikidataFetcher {
|
||||
"orcid" => "P496",
|
||||
];
|
||||
|
||||
private const WIKIPEDIA_REMOVE_LITERALS = [
|
||||
"<p>Si vous disposez d'ouvrages ou d'articles de référence ou si vous ",
|
||||
'<p><b>En pratique :</b> <a href="/wiki/Wikip%C3%A9dia:Citez_vos_sources#Qualité_des_sources" title="Wikipédia:Citez vos sources">Quelles sources sont attendu',
|
||||
'<pVous pouvez partager vos connaissances en l’améliorant (',
|
||||
'<p class="mw-empty-elt">',
|
||||
'<p><small>Géolocalisation sur la carte',
|
||||
'<p><b>Koordinaatit:</b>',
|
||||
'<p><span class="executeJS" data-gadgetname="ImgToggle"></span',
|
||||
'<p><span class="imgtoggleboxTitle">',
|
||||
//'<div class="mw-parser-output"><p>',
|
||||
'<p><span style="font-size: small;"><span id="coordinates">',
|
||||
'<p><span></span></p>',
|
||||
'<p><a rel="nofollow" class="external text" href="https://maps.gs',
|
||||
'<p><span class="plainlinks nourlexpansion"><a class="external text" href="//tools.wmflabs.org/geohack/geohack.php?langu',
|
||||
'<p><span style="display:none">',
|
||||
'<p> </p>',
|
||||
'<p><span class="geo noexcerpt"',
|
||||
];
|
||||
|
||||
public const RETRIEVAL_MODES_ACCEPTED = [
|
||||
'list',
|
||||
'add',
|
||||
@ -87,7 +68,8 @@ final class NodaWikidataFetcher {
|
||||
*/
|
||||
private static function _getWikipediaApiLink(string $lang, string $searchTerm):string {
|
||||
|
||||
return "https://" . urlencode($lang) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($searchTerm) . "&prop=text§ion=0&format=json";
|
||||
return "https://" . urlencode($lang) . ".wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro&explaintext&redirects=1&titles=" . urlencode($searchTerm);
|
||||
# w/api.php?action=parse&page=" . urlencode($searchTerm) . "&prop=text§ion=0&format=json";
|
||||
|
||||
}
|
||||
|
||||
@ -152,12 +134,13 @@ final class NodaWikidataFetcher {
|
||||
|
||||
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $title), 10000);
|
||||
$json_decoded = json_decode($datafromwiki, true);
|
||||
if (empty($json_decoded) || !isset($json_decoded['parse'])) {
|
||||
if (empty($json_decoded) || !isset($json_decoded['query']) || empty($json_decoded['query']['pages'])) {
|
||||
return '';
|
||||
}
|
||||
$datafromwiki = strval($json_decoded['parse']['text']['*']);
|
||||
$firstPageId = array_keys($json_decoded['query']['pages'])[0];
|
||||
$datafromwiki = strval($json_decoded['query']['pages'][$firstPageId]['extract']);
|
||||
|
||||
return self::_cleanWikidataInput($datafromwiki);
|
||||
return self::_cleanInputSimple($datafromwiki);
|
||||
|
||||
}
|
||||
|
||||
@ -276,239 +259,21 @@ final class NodaWikidataFetcher {
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans basic tags off Wikidata input.
|
||||
* Cleans remaining HTML elements and leading, trailing whitespaces.
|
||||
*
|
||||
* @param string $input Input string.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private static function _cleanWikidataInputHtml(string $input):string {
|
||||
private static function _cleanInputSimple(string $input):string {
|
||||
|
||||
// Clean off anything before first <p>
|
||||
if ($pStartPos = strpos($input, '<p')) {
|
||||
$input = substr($input, $pStartPos);
|
||||
}
|
||||
if ($pEndPos = strrpos($input, '</p>')) {
|
||||
$input = substr($input, 0, $pEndPos + 4);
|
||||
}
|
||||
|
||||
$doc = new DOMDocument();
|
||||
try {
|
||||
libxml_use_internal_errors(true);
|
||||
$doc->loadXML('<section>' . trim($input) . '</section>');
|
||||
libxml_use_internal_errors(false);
|
||||
}
|
||||
catch (Exception $e) {
|
||||
throw new Exception("Failed to load DOMDocument." . PHP_EOL . $e->getMessage() . PHP_EOL . PHP_EOL . '---' . $input . '---');
|
||||
}
|
||||
|
||||
$list = $doc->getElementsByTagName("style");
|
||||
while ($list->length > 0) {
|
||||
$p = $list->item(0);
|
||||
if ($p === null || $p->parentNode === null) break;
|
||||
$p->parentNode->removeChild($p);
|
||||
}
|
||||
|
||||
$list = $doc->getElementsByTagName("table");
|
||||
while ($list->length > 0) {
|
||||
$p = $list->item(0);
|
||||
if ($p === null || $p->parentNode === null) break;
|
||||
$p->parentNode->removeChild($p);
|
||||
}
|
||||
|
||||
$list = $doc->getElementsByTagName("ol");
|
||||
while ($list->length > 0) {
|
||||
$p = $list->item(0);
|
||||
if ($p === null || $p->parentNode === null) break;
|
||||
$p->parentNode->removeChild($p);
|
||||
}
|
||||
|
||||
if (($firstP = $doc->getElementsByTagName("p")->item(0)) !== null) {
|
||||
if (($firstPhtml = $doc->saveHTML($firstP)) !== false) {
|
||||
if (strpos($firstPhtml, 'geohack') !== false) {
|
||||
if ($firstP->parentNode !== null) $firstP->parentNode->removeChild($firstP);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$output = [];
|
||||
foreach ($doc->getElementsByTagName("p") as $p) {
|
||||
$output[] = trim($p->textContent);
|
||||
}
|
||||
|
||||
/*
|
||||
if (strpos($doc->saveHTML(), 'Coordinates:') !== false) {
|
||||
echo $doc->saveHTML();
|
||||
exit;
|
||||
}
|
||||
*/
|
||||
return str_replace(PHP_EOL, PHP_EOL . PHP_EOL, trim(implode(PHP_EOL, $output)));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans brackets ([1], [2]) off description text.
|
||||
*
|
||||
* @param string $input Input string.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private static function _cleanSourceBracketsOffTranslation(string $input):string {
|
||||
|
||||
$bracketsToRemove = [];
|
||||
for ($i = 0; $i < 100; $i++) {
|
||||
$bracketsToRemove["[$i]"] = "";
|
||||
}
|
||||
return strtr($input, $bracketsToRemove);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans contents parsed from Wikipedia.
|
||||
*
|
||||
* @param string $input Input string.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private static function _cleanWikidataInput(string $input):string {
|
||||
|
||||
$input = trim($input, '"');
|
||||
foreach (self::WIKIPEDIA_REMOVE_LITERALS as $tToRemove) $input = str_replace($tToRemove, "", $input);
|
||||
|
||||
if (substr($input, 0, strlen('<')) === '<') {
|
||||
|
||||
$input = self::_cleanWikidataInputHtml($input);
|
||||
|
||||
if (mb_strlen($input) > 600) {
|
||||
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
|
||||
$input = substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600));
|
||||
}
|
||||
}
|
||||
|
||||
$input = self::_cleanSourceBracketsOffTranslation($input);
|
||||
|
||||
$input = str_replace("\t", " ", $input);
|
||||
|
||||
// Remove newlines with ensuing spaces
|
||||
while (strpos($input, PHP_EOL . " ") !== false) {
|
||||
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
|
||||
}
|
||||
|
||||
// Remove double newlines
|
||||
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
|
||||
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
|
||||
}
|
||||
return MD_STD_IN::sanitize_text($input);
|
||||
|
||||
}
|
||||
|
||||
$input = str_replace(PHP_EOL, '', $input);
|
||||
|
||||
if (empty($input)) return "";
|
||||
|
||||
// Remove infobox tables specifically
|
||||
$firstParagraphPosition = strpos($input, '<p', 1);
|
||||
$currentSearchPos = strpos($input, "<table>");
|
||||
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
|
||||
if (($tableEndPos = strpos($input, "</table>")) !== false) {
|
||||
if (($pStartPos = strpos($input, '<p', $tableEndPos + 6)) !== false) {
|
||||
$input = substr($input, $pStartPos);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove leftover unnecessary paragraphs before actual content
|
||||
|
||||
$removeFirstParagraph = false;
|
||||
$firstParagraphPosition = strpos($input, '<p', 1);
|
||||
|
||||
foreach (["</table>", "<img"] as $tagPart) {
|
||||
$currentSearchPos = strpos($input, $tagPart);
|
||||
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
|
||||
$removeFirstParagraph = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($removeFirstParagraph === true) {
|
||||
$input = substr($input, $firstParagraphPosition ?: 0);
|
||||
}
|
||||
|
||||
$input = str_replace('</p>', '</p>' . PHP_EOL . PHP_EOL . PHP_EOL, $input);
|
||||
# $input = str_replace('?/i', '', $input);
|
||||
$input = strip_tags($input);
|
||||
|
||||
# for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input);
|
||||
$i = 0;
|
||||
while (strpos($input, ".mw-parser-output") !== false and strpos($input, "}", strpos($input, ".mw-parser-output")) !== false) {
|
||||
$part1 = substr($input, 0, strpos($input, ".mw-parser-output"));
|
||||
$part2 = substr($input, strpos($input, "}", strpos($input, ".mw-parser-output")) + 1);
|
||||
$input = $part1 . $part2;
|
||||
++$i;
|
||||
if ($i === 30) break;
|
||||
}
|
||||
|
||||
$input = self::_cleanSourceBracketsOffTranslation($input);
|
||||
|
||||
$input = str_replace("\t", " ", $input);
|
||||
|
||||
// Remove double whitespaces
|
||||
while (strpos($input, " ") !== false) {
|
||||
$input = str_replace(" ", " ", $input);
|
||||
}
|
||||
|
||||
// Remove newlines with ensuing spaces
|
||||
while (strpos($input, PHP_EOL . " ") !== false) {
|
||||
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
|
||||
}
|
||||
|
||||
// Remove double newlines
|
||||
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
|
||||
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
|
||||
}
|
||||
|
||||
$stableToRemove = [
|
||||
"Vous pouvez partager vos connaissances en l’améliorant (comment ?) selon les recommandations des projets correspondants.",
|
||||
];
|
||||
foreach ($stableToRemove as $tToRemove) $input = str_replace($tToRemove, "", $input);
|
||||
|
||||
$endings = [
|
||||
"StubDenne artikel om et vandløb ",
|
||||
];
|
||||
foreach ($endings as $ending) {
|
||||
if (strpos($input, $ending) !== false) $input = substr($input, 0, strpos($input, $ending));
|
||||
}
|
||||
|
||||
$input = trim($input);
|
||||
|
||||
// Cut off overly long articles
|
||||
if (mb_strlen($input) > 600) {
|
||||
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
|
||||
$input = trim(substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600)));
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($input)) return '';
|
||||
|
||||
$input = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $input));
|
||||
|
||||
$input = html_entity_decode($input);
|
||||
|
||||
return MD_STD_IN::sanitize_text($input);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around _cleanWikidataInput for testing.
|
||||
*
|
||||
* @param string $input Input string.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function cleanWikidataInput(string $input):string {
|
||||
|
||||
if (PHP_SAPI !== 'cli') throw new Exception("Use this function only for testing");
|
||||
return self::_cleanWikidataInput($input);
|
||||
return strtr(
|
||||
trim(MD_STD_IN::sanitize_text($input)),
|
||||
[
|
||||
PHP_EOL => PHP_EOL . PHP_EOL,
|
||||
PHP_EOL . PHP_EOL . PHP_EOL => PHP_EOL . PHP_EOL,
|
||||
]
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
@ -815,25 +580,20 @@ final class NodaWikidataFetcher {
|
||||
$wikilink = $wikilinks[$lang];
|
||||
if (!empty($contents[$lang])) {
|
||||
|
||||
$fromWikipedia = json_decode($contents[$lang], true)['parse'];
|
||||
$titleFromWikipedia = $fromWikipedia['title'];
|
||||
$descFromWiki = $fromWikipedia['text']['*'];
|
||||
|
||||
# Process data retrieved from wikipedia
|
||||
|
||||
if ($descFromWiki !== null) $tDescription = (string)$descFromWiki;
|
||||
else $tDescription = "";
|
||||
$titleFromWikipedia = $data['sitelinks'][$lang . 'wiki']['title'];
|
||||
$tDescription = self::_getCleanedWikipediaSnippet($lang, $titleFromWikipedia);
|
||||
|
||||
}
|
||||
else {
|
||||
$tDescription = "";
|
||||
}
|
||||
|
||||
if (!empty($titleFromWikipedia) && !empty($tDescription) && !empty($desc_cleaned = self::_cleanWikidataInput($tDescription))) {
|
||||
if (!empty($titleFromWikipedia) && !empty($tDescription)) {
|
||||
|
||||
# $descs[$lang] = $tDescription;
|
||||
$output[$lang] = [
|
||||
'label' => $titleFromWikipedia,
|
||||
'description' => '"' . $desc_cleaned . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')',
|
||||
'description' => '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')',
|
||||
'link' => $wikilink,
|
||||
];
|
||||
}
|
||||
@ -841,8 +601,8 @@ final class NodaWikidataFetcher {
|
||||
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
|
||||
|
||||
$output[$lang] = [
|
||||
'label' => self::_cleanWikidataInput($data['labels'][$lang]['value']),
|
||||
'description' => self::_cleanWikidataInput($data['descriptions'][$lang]['value']),
|
||||
'label' => self::_cleanInputSimple($data['labels'][$lang]['value']),
|
||||
'description' => self::_cleanInputSimple($data['descriptions'][$lang]['value']),
|
||||
'link' => "",
|
||||
];
|
||||
|
||||
@ -853,8 +613,8 @@ final class NodaWikidataFetcher {
|
||||
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
|
||||
|
||||
$output[$lang] = [
|
||||
'label' => self::_cleanWikidataInput($data['labels'][$lang]['value']),
|
||||
'description' => self::_cleanWikidataInput($data['descriptions'][$lang]['value']),
|
||||
'label' => self::_cleanInputSimple($data['labels'][$lang]['value']),
|
||||
'description' => self::_cleanInputSimple($data['descriptions'][$lang]['value']),
|
||||
'link' => "",
|
||||
];
|
||||
|
||||
@ -1070,6 +830,51 @@ final class NodaWikidataFetcher {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Function for retrieving information.
|
||||
*
|
||||
* @param string $lang The user's selected used language.
|
||||
* @param array<mixed> $data Data fetched from wikidata.
|
||||
* @param array<string, array{url: string, title: string}> $wikilinks Links to wikipedia APIs.
|
||||
*
|
||||
* @return array{}|array{lang: string, desc: string, source: 'wikidata'|'wikipedia'}
|
||||
*/
|
||||
private static function _getDescriptionFromWikidataAndWikipediaLinks(string $lang, array $data, array $wikilinks):array {
|
||||
|
||||
// Try the current user language for retrieving wikipedia texts
|
||||
if (isset($wikilinks[$lang])) {
|
||||
# Process data retrieved from wikipedia
|
||||
if (!empty($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$lang]['title']))) {
|
||||
return ['lang' => $lang, 'desc' => $datafromwiki, 'source' => 'wikipedia'];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Try the alternative languages for retrieving wikidata tests
|
||||
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
|
||||
|
||||
if ($lang === $cur_lang || !isset($wikilinks[$cur_lang])) continue;
|
||||
|
||||
if ($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$cur_lang]['title'])) {
|
||||
return ['lang' => $cur_lang, 'desc' => $datafromwiki, 'source' => 'wikipedia'];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// If the description still has not been entered, try retrieving it from wikidata.
|
||||
if (!empty($data['descriptions'][$lang])) {
|
||||
return ['lang' => $lang, 'desc' => $data['descriptions'][$lang]['value'], 'source' => 'wikidata'];
|
||||
}
|
||||
else if (!empty($data['descriptions'])) {
|
||||
$tLang = (string)array_keys($data['descriptions'])[0];
|
||||
$desc = $data['descriptions'][$tLang];
|
||||
return ['lang' => $tLang, 'desc' => (string)$desc['value'], 'source' => 'wikidata'];
|
||||
}
|
||||
|
||||
return [];
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Function for retrieving information.
|
||||
*
|
||||
@ -1087,24 +892,8 @@ final class NodaWikidataFetcher {
|
||||
// Get links to wikipedia
|
||||
|
||||
$wikilinks = self::_getWikipediaLinksFromWikidataOutput($data);
|
||||
$alreadyEntered = false;
|
||||
|
||||
if (isset($wikilinks[$lang])) {
|
||||
# Process data retrieved from wikipedia
|
||||
if (!empty($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$lang]['title']))) {
|
||||
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $lang, $lang, $erfasst_von);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
|
||||
|
||||
if ($alreadyEntered === true || !isset($wikilinks[$cur_lang])) continue;
|
||||
|
||||
if ($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$cur_lang]['title'])) {
|
||||
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $lang, "$cur_lang", $erfasst_von);
|
||||
}
|
||||
|
||||
if (!empty($desc = self::_getDescriptionFromWikidataAndWikipediaLinks($lang, $data, $wikilinks))) {
|
||||
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $desc['desc'], $lang, $desc['lang'], $erfasst_von);
|
||||
}
|
||||
|
||||
$this->enterPersinstBirthDeathDatesFromWikidata($data, $persinst_id);
|
||||
@ -1386,30 +1175,8 @@ final class NodaWikidataFetcher {
|
||||
}
|
||||
|
||||
$cur_place_desc = $this->getPlaceDescription($onum);
|
||||
$alreadyEntered = false;
|
||||
|
||||
if (!empty($wikilinks[$lang])) {
|
||||
|
||||
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinks[$lang]['title']), 10000);
|
||||
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
|
||||
|
||||
if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
|
||||
$alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $lang, $lang, $onum, $erfasst_von);
|
||||
}
|
||||
}
|
||||
|
||||
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
|
||||
|
||||
//if ($alreadyEntered === true) break;
|
||||
if ($alreadyEntered === true) break;
|
||||
if (!isset($wikilinks[$cur_lang]['url'])) continue;
|
||||
|
||||
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinks[$cur_lang]['title']), 10000);
|
||||
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
|
||||
if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
|
||||
$alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $lang, $cur_lang, $onum, $erfasst_von);
|
||||
}
|
||||
|
||||
if (!empty($desc = self::_getDescriptionFromWikidataAndWikipediaLinks($lang, $data, $wikilinks))) {
|
||||
$this->enterPlaceDescFromWikidata($cur_place_desc, $desc['desc'], $lang, $desc['lang'], $onum, $erfasst_von);
|
||||
}
|
||||
|
||||
if (isset($data['claims']['P1566'])) $geonames_id = filter_var($data['claims']['P1566'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT);
|
||||
@ -1611,32 +1378,8 @@ final class NodaWikidataFetcher {
|
||||
|
||||
$wikilinks = self::_getWikipediaLinksFromWikidataOutput($data);
|
||||
|
||||
$alreadyEntered = false;
|
||||
|
||||
if (isset($wikilinks[$lang])) {
|
||||
|
||||
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinks[$lang]['title']), 10000);
|
||||
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
|
||||
|
||||
# Process data retrieved from wikipedia
|
||||
if (!empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
|
||||
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $lang, $lang, $erfasst_von);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
|
||||
|
||||
if ($alreadyEntered === true || !isset($wikilinks[$cur_lang])) continue;
|
||||
|
||||
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinks[$cur_lang]['title']), 10000);
|
||||
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
|
||||
|
||||
# Process data retrieved from wikipedia
|
||||
if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) {
|
||||
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $lang, $cur_lang, $erfasst_von);
|
||||
}
|
||||
|
||||
if (!empty($desc = self::_getDescriptionFromWikidataAndWikipediaLinks($lang, $data, $wikilinks))) {
|
||||
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $desc['desc'], $lang, $desc['lang'], $erfasst_von);
|
||||
}
|
||||
|
||||
if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('tag', $wikidata_id, $data))) {
|
||||
|
Reference in New Issue
Block a user