1963 lines
70 KiB
PHP
1963 lines
70 KiB
PHP
<?PHP
|
||
/**
|
||
* This file contains tools for fetching data from Wikidata.
|
||
*
|
||
* @file
|
||
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||
*/
|
||
declare(strict_types = 1);
|
||
|
||
/**
|
||
* Helps fetching information from Wikidata.
|
||
*/
|
||
final class NodaWikidataFetcher {
|
||
|
||
private const WIKIDATA_FETCH_HEADERS = [
|
||
'User-Agent: museum-digital-bot GND-to-Wikidata PHP/' . PHP_VERSION,
|
||
'Accept: application/sparql-results+json',
|
||
];
|
||
|
||
public const LANGUAGES_MAIN_DESC = ['de', 'da', 'en', 'es', 'fr', 'hu', 'it', 'jp', 'nl', 'pt', 'ru', 'sv', 'sk', 'uk', 'zh'];
|
||
public const LANGUAGES_TO_CHECK = ['ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sk', 'sw', 'ta', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'zh'];
|
||
|
||
public const LANGUAGES_TO_CAPITALIZE = ["cs", "da", "de", "en", "es", "fr", "fi", "id", "it", "nl", "pl", "pt", "ru", "sv", 'sk', "tl", "tr"];
|
||
|
||
public const P_IDS_NODA_TAGS = [
|
||
'gnd' => 'P227',
|
||
'lcsh' => 'P244',
|
||
'aat' => 'P1014',
|
||
'iconclass' => 'P1256',
|
||
'osm' => 'P402',
|
||
'loc' => 'P244',
|
||
'nomisma' => 'P2950',
|
||
'cona' => 'P1669',
|
||
|
||
"rkd" => "P650",
|
||
"ulan" => "P245",
|
||
"viaf" => "P214",
|
||
"bnf" => "P268",
|
||
"pim" => "P3973",
|
||
"ndl" => "P349", // National Diet Library (Japan)
|
||
"npg" => "P1816", // "National" portrait gallery
|
||
"bne" => "P950", // Espana National Library
|
||
"orcid" => "P496",
|
||
];
|
||
|
||
private const WIKIPEDIA_REMOVE_LITERALS = [
|
||
"<p>Si vous disposez d'ouvrages ou d'articles de référence ou si vous ",
|
||
'<p><b>En pratique :</b> <a href="/wiki/Wikip%C3%A9dia:Citez_vos_sources#Qualité_des_sources" title="Wikipédia:Citez vos sources">Quelles sources sont attendu',
|
||
'<pVous pouvez partager vos connaissances en l’améliorant (',
|
||
'<p class="mw-empty-elt">',
|
||
'<p><small>Géolocalisation sur la carte',
|
||
'<p><b>Koordinaatit:</b>',
|
||
'<p><span class="executeJS" data-gadgetname="ImgToggle"></span',
|
||
'<p><span class="imgtoggleboxTitle">',
|
||
//'<div class="mw-parser-output"><p>',
|
||
'<p><span style="font-size: small;"><span id="coordinates">',
|
||
'<p><span></span></p>',
|
||
'<p><a rel="nofollow" class="external text" href="https://maps.gs',
|
||
'<p><span class="plainlinks nourlexpansion"><a class="external text" href="//tools.wmflabs.org/geohack/geohack.php?langu',
|
||
'<p><span style="display:none">',
|
||
'<p> </p>',
|
||
'<p><span class="geo noexcerpt"',
|
||
];
|
||
|
||
public const RETRIEVAL_MODES_ACCEPTED = [
|
||
'list',
|
||
'add',
|
||
'keep',
|
||
'replace',
|
||
];
|
||
|
||
public const RETRIEVAL_MODES_DEFAULT = 'list';
|
||
|
||
/** @var 'list'|'add'|'keep'|'replace' */
|
||
private string $_retrievalMode = self::RETRIEVAL_MODES_DEFAULT;
|
||
|
||
/** @var MDMysqli */
|
||
private MDMysqli $_mysqli_noda;
|
||
|
||
/**
|
||
* Returns the API link to Wikipedia's API for getting information on a page.
|
||
*
|
||
* @param string $lang Language / wikipedia version to fetch.
|
||
* @param string $searchTerm Search term.
|
||
*
|
||
* @return non-empty-string
|
||
*/
|
||
private static function _getWikipediaApiLink(string $lang, string $searchTerm):string {
|
||
|
||
return "https://" . urlencode($lang) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($searchTerm) . "&prop=text§ion=0&format=json";
|
||
|
||
}
|
||
|
||
/**
|
||
* Returns Wikipedia links from Wikidata's API output.
|
||
*
|
||
* @param array<mixed> $data Wikidata API output.
|
||
*
|
||
* @return array<string, array{url: string, title: string}>
|
||
*/
|
||
private static function _getWikipediaLinksFromWikidataOutput(array $data):array {
|
||
|
||
$wikilinks = [];
|
||
|
||
foreach (self::LANGUAGES_MAIN_DESC as $tLang) {
|
||
if (!isset($data['sitelinks'][$tLang . 'wiki']['url']) || !isset($data['sitelinks'][$tLang . 'wiki']['title'])) continue;
|
||
if (!is_string($data['sitelinks'][$tLang . 'wiki']['url']) || !is_string($data['sitelinks'][$tLang . 'wiki']['title'])) continue;
|
||
$wikilinks[$tLang] = [
|
||
'url' => $data['sitelinks'][$tLang . 'wiki']['url'],
|
||
'title' => str_replace(' ', '_', $data['sitelinks'][$tLang . 'wiki']['title']),
|
||
];
|
||
}
|
||
|
||
return $wikilinks;
|
||
|
||
}
|
||
|
||
/**
|
||
* Parses coordinates from Wikidata API output.
|
||
*
|
||
* @param array<mixed> $data Wikidata API output.
|
||
*
|
||
* @return array{}|array{longitude: float, latitude: float}
|
||
*/
|
||
private static function _getPlaceCoordinatesFromWikidata(array $data):array {
|
||
|
||
if (!isset($data['claims']['P625'])) {
|
||
return [];
|
||
}
|
||
|
||
$latitude_wd = \filter_var($data['claims']['P625'][0]['mainsnak']['datavalue']['value']['latitude'], FILTER_VALIDATE_FLOAT);
|
||
$longitude_wd = \filter_var($data['claims']['P625'][0]['mainsnak']['datavalue']['value']['longitude'], FILTER_VALIDATE_FLOAT);
|
||
|
||
if ($latitude_wd === false || $longitude_wd === false) return [];
|
||
|
||
return [
|
||
'longitude' => $longitude_wd,
|
||
'latitude' => $latitude_wd,
|
||
];
|
||
|
||
}
|
||
|
||
/**
|
||
* Loads Wikipedia page and cleans output.
|
||
*
|
||
* @param string $lang Language to load in.
|
||
* @param string $title Title to load from.
|
||
*
|
||
* @return string
|
||
*/
|
||
private static function _getCleanedWikipediaSnippet(string $lang, string $title):string {
|
||
|
||
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $title), 10000);
|
||
$json_decoded = json_decode($datafromwiki, true);
|
||
if (empty($json_decoded) || !isset($json_decoded['parse'])) {
|
||
return '';
|
||
}
|
||
$datafromwiki = strval($json_decoded['parse']['text']['*']);
|
||
|
||
return self::_cleanWikidataInput($datafromwiki);
|
||
|
||
}
|
||
|
||
/**
|
||
* Loads data for a single entity from Wikidata.
|
||
*
|
||
* @param string $wikidata_id Wikidata Q-ID.
|
||
*
|
||
* @return array<mixed>
|
||
*/
|
||
private static function _getWikidataEntity(string $wikidata_id):array {
|
||
|
||
$data = json_decode(MD_STD::runCurl("https://www.wikidata.org/wiki/Special:EntityData/" . urlencode($wikidata_id) . ".json", 10000), true);
|
||
if ($data === null) {
|
||
throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later.");
|
||
}
|
||
if (empty($data['entities'][$wikidata_id])) {
|
||
throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later.");
|
||
}
|
||
return $data['entities'][$wikidata_id];
|
||
|
||
}
|
||
|
||
/**
|
||
* Parses wikidata results to MDNodaLink entries.
|
||
*
|
||
* @param 'tag'|'persinst'|'place' $target Target vocabulary type.
|
||
* @param string $wikidata_id Wikidata ID.
|
||
* @param array<mixed> $data Wikidata result.
|
||
*
|
||
* @return list<MDNodaLink>
|
||
*/
|
||
public function _getNodaLinksFromWikidataResult(string $target, string $wikidata_id, array $data):array {
|
||
|
||
$linkableVocabularies = match($target) {
|
||
'tag' => MDNodaRepositoriesSet::REPOSITORIES_TAG,
|
||
'persinst' => MDNodaRepositoriesSet::REPOSITORIES_ACTOR,
|
||
'place' => MDNodaRepositoriesSet::REPOSITORIES_PLACE,
|
||
};
|
||
|
||
$output = [
|
||
new MDNodaLink(MDNodaRepository::wikidata, $wikidata_id)
|
||
];
|
||
foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) {
|
||
|
||
if (!isset($data['claims'][$pId])) {
|
||
continue;
|
||
}
|
||
if (empty($data['claims'][$pId][0]['mainsnak']['datavalue'])) {
|
||
continue;
|
||
}
|
||
|
||
$url = $data['claims'][$pId][0]['mainsnak']['datavalue']['value'];
|
||
|
||
if ($vocabName === 'loc' || ($vocabName === 'lcsh')) {
|
||
$vocabName = $this->_determineLocRefMode($url);
|
||
if (empty($vocabName)) continue;
|
||
}
|
||
|
||
if (!in_array($vocabName, $linkableVocabularies, true)) continue;
|
||
|
||
$output[] = new MDNodaLink(MDNodaRepository::fromString($vocabName), $url);
|
||
|
||
}
|
||
|
||
return $output;
|
||
|
||
}
|
||
|
||
/**
|
||
* Checks if a vocabulary link to the library
|
||
* of congress is a valid LCSH or LOC link or
|
||
* something else completely.
|
||
* This is necessary, since Wikidata only knows
|
||
* one type of link to the LOC authority files,
|
||
* while museum-digital knows two.
|
||
*
|
||
* @param string $url LOC ID to check.
|
||
*
|
||
* @return 'loc'|'lcsh'|''
|
||
*/
|
||
private function _determineLocRefMode(string $url):string {
|
||
|
||
try {
|
||
if (MDNodaRepository::loc->validateId($url) !== false) {
|
||
return 'loc';
|
||
}
|
||
}
|
||
catch (MDgenericInvalidInputsException | MDInvalidNodaLinkException | MDInvalidNodaLink $e) {
|
||
}
|
||
|
||
try {
|
||
if (MDNodaRepository::lcsh->validateId($url) !== false) {
|
||
return 'lcsh';
|
||
}
|
||
}
|
||
catch (MDgenericInvalidInputsException | MDInvalidNodaLinkException | MDInvalidNodaLink $e) {
|
||
}
|
||
|
||
return '';
|
||
|
||
}
|
||
|
||
/**
|
||
* Cleans basic tags off Wikidata input.
|
||
*
|
||
* @param string $input Input string.
|
||
*
|
||
* @return string
|
||
*/
|
||
private static function _cleanWikidataInputHtml(string $input):string {
|
||
|
||
// Clean off anything before first <p>
|
||
if ($pStartPos = strpos($input, '<p')) {
|
||
$input = substr($input, $pStartPos);
|
||
}
|
||
if ($pEndPos = strrpos($input, '</p>')) {
|
||
$input = substr($input, 0, $pEndPos + 4);
|
||
}
|
||
|
||
$doc = new DOMDocument();
|
||
try {
|
||
$doc->loadXML('<section>' . trim($input) . '</section>');
|
||
}
|
||
catch (Exception $e) {
|
||
throw new Exception("Failed to load DOMDocument." . PHP_EOL . $e->getMessage() . PHP_EOL . PHP_EOL . '---' . $input . '---');
|
||
}
|
||
|
||
$list = $doc->getElementsByTagName("style");
|
||
while ($list->length > 0) {
|
||
$p = $list->item(0);
|
||
if ($p === null || $p->parentNode === null) break;
|
||
$p->parentNode->removeChild($p);
|
||
}
|
||
|
||
$list = $doc->getElementsByTagName("table");
|
||
while ($list->length > 0) {
|
||
$p = $list->item(0);
|
||
if ($p === null || $p->parentNode === null) break;
|
||
$p->parentNode->removeChild($p);
|
||
}
|
||
|
||
$list = $doc->getElementsByTagName("ol");
|
||
while ($list->length > 0) {
|
||
$p = $list->item(0);
|
||
if ($p === null || $p->parentNode === null) break;
|
||
$p->parentNode->removeChild($p);
|
||
}
|
||
|
||
if (($firstP = $doc->getElementsByTagName("p")->item(0)) !== null) {
|
||
if (($firstPhtml = $doc->saveHTML($firstP)) !== false) {
|
||
if (strpos($firstPhtml, 'geohack') !== false) {
|
||
if ($firstP->parentNode !== null) $firstP->parentNode->removeChild($firstP);
|
||
}
|
||
}
|
||
}
|
||
|
||
$output = [];
|
||
foreach ($doc->getElementsByTagName("p") as $p) {
|
||
$output[] = trim($p->textContent);
|
||
}
|
||
|
||
/*
|
||
if (strpos($doc->saveHTML(), 'Coordinates:') !== false) {
|
||
echo $doc->saveHTML();
|
||
exit;
|
||
}
|
||
*/
|
||
return str_replace(PHP_EOL, PHP_EOL . PHP_EOL, trim(implode(PHP_EOL, $output)));
|
||
|
||
}
|
||
|
||
/**
|
||
* Cleans brackets ([1], [2]) off description text.
|
||
*
|
||
* @param string $input Input string.
|
||
*
|
||
* @return string
|
||
*/
|
||
private static function _cleanSourceBracketsOffTranslation(string $input):string {
|
||
|
||
$bracketsToRemove = [];
|
||
for ($i = 0; $i < 100; $i++) {
|
||
$bracketsToRemove["[$i]"] = "";
|
||
}
|
||
return strtr($input, $bracketsToRemove);
|
||
|
||
}
|
||
|
||
/**
|
||
* Cleans contents parsed from Wikipedia.
|
||
*
|
||
* @param string $input Input string.
|
||
*
|
||
* @return string
|
||
*/
|
||
private static function _cleanWikidataInput(string $input):string {
|
||
|
||
$input = trim($input, '"');
|
||
foreach (self::WIKIPEDIA_REMOVE_LITERALS as $tToRemove) $input = str_replace($tToRemove, "", $input);
|
||
|
||
if (substr($input, 0, strlen('<')) === '<') {
|
||
|
||
$input = self::_cleanWikidataInputHtml($input);
|
||
|
||
if (mb_strlen($input) > 600) {
|
||
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
|
||
$input = substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600));
|
||
}
|
||
}
|
||
|
||
$input = self::_cleanSourceBracketsOffTranslation($input);
|
||
|
||
$input = str_replace("\t", " ", $input);
|
||
|
||
// Remove newlines with ensuing spaces
|
||
while (strpos($input, PHP_EOL . " ") !== false) {
|
||
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
|
||
}
|
||
|
||
// Remove double newlines
|
||
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
|
||
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
|
||
}
|
||
return MD_STD_IN::sanitize_text($input);
|
||
|
||
}
|
||
|
||
$input = str_replace(PHP_EOL, '', $input);
|
||
|
||
if (empty($input)) return "";
|
||
|
||
// Remove infobox tables specifically
|
||
$firstParagraphPosition = strpos($input, '<p', 1);
|
||
$currentSearchPos = strpos($input, "<table>");
|
||
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
|
||
if (($tableEndPos = strpos($input, "</table>")) !== false) {
|
||
if (($pStartPos = strpos($input, '<p', $tableEndPos + 6)) !== false) {
|
||
$input = substr($input, $pStartPos);
|
||
}
|
||
}
|
||
}
|
||
|
||
// Remove leftover unnecessary paragraphs before actual content
|
||
|
||
$removeFirstParagraph = false;
|
||
$firstParagraphPosition = strpos($input, '<p', 1);
|
||
|
||
foreach (["</table>", "<img"] as $tagPart) {
|
||
$currentSearchPos = strpos($input, $tagPart);
|
||
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
|
||
$removeFirstParagraph = true;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if ($removeFirstParagraph === true) {
|
||
$input = substr($input, $firstParagraphPosition ?: 0);
|
||
}
|
||
|
||
$input = str_replace('</p>', '</p>' . PHP_EOL . PHP_EOL . PHP_EOL, $input);
|
||
# $input = str_replace('?/i', '', $input);
|
||
$input = strip_tags($input);
|
||
|
||
# for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input);
|
||
$i = 0;
|
||
while (strpos($input, ".mw-parser-output") !== false and strpos($input, "}", strpos($input, ".mw-parser-output")) !== false) {
|
||
$part1 = substr($input, 0, strpos($input, ".mw-parser-output"));
|
||
$part2 = substr($input, strpos($input, "}", strpos($input, ".mw-parser-output")) + 1);
|
||
$input = $part1 . $part2;
|
||
++$i;
|
||
if ($i === 30) break;
|
||
}
|
||
|
||
$input = self::_cleanSourceBracketsOffTranslation($input);
|
||
|
||
$input = str_replace("\t", " ", $input);
|
||
|
||
// Remove double whitespaces
|
||
while (strpos($input, " ") !== false) {
|
||
$input = str_replace(" ", " ", $input);
|
||
}
|
||
|
||
// Remove newlines with ensuing spaces
|
||
while (strpos($input, PHP_EOL . " ") !== false) {
|
||
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
|
||
}
|
||
|
||
// Remove double newlines
|
||
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
|
||
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
|
||
}
|
||
|
||
$stableToRemove = [
|
||
"Vous pouvez partager vos connaissances en l’améliorant (comment ?) selon les recommandations des projets correspondants.",
|
||
];
|
||
foreach ($stableToRemove as $tToRemove) $input = str_replace($tToRemove, "", $input);
|
||
|
||
$endings = [
|
||
"StubDenne artikel om et vandløb ",
|
||
];
|
||
foreach ($endings as $ending) {
|
||
if (strpos($input, $ending) !== false) $input = substr($input, 0, strpos($input, $ending));
|
||
}
|
||
|
||
$input = trim($input);
|
||
|
||
// Cut off overly long articles
|
||
if (mb_strlen($input) > 600) {
|
||
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
|
||
$input = trim(substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600)));
|
||
}
|
||
}
|
||
|
||
if (empty($input)) return '';
|
||
|
||
$input = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $input));
|
||
|
||
$input = html_entity_decode($input);
|
||
|
||
return MD_STD_IN::sanitize_text($input);
|
||
|
||
}
|
||
|
||
/**
|
||
* Wrapper around _cleanWikidataInput for testing.
|
||
*
|
||
* @param string $input Input string.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function cleanWikidataInput(string $input):string {
|
||
|
||
if (PHP_SAPI !== 'cli') throw new Exception("Use this function only for testing");
|
||
return self::_cleanWikidataInput($input);
|
||
|
||
}
|
||
|
||
/**
|
||
* Sets the retrieval mode.
|
||
*
|
||
* @param string $retrievalMode New retrieval mode to set.
|
||
*
|
||
* @return void
|
||
*/
|
||
public function setRetrievalMode(string $retrievalMode):void {
|
||
|
||
if (!in_array($retrievalMode, self::RETRIEVAL_MODES_ACCEPTED, true)) {
|
||
throw new Exception("Retrieval mode not in list of accepted retrieval modes: " . implode(',', self::RETRIEVAL_MODES_ACCEPTED));
|
||
}
|
||
|
||
$this->_retrievalMode = $retrievalMode;
|
||
|
||
}
|
||
|
||
/**
|
||
* Validates a Wikidata ID. A Wikidata ID must start with a capital Q and be
|
||
* numeric otherwise.
|
||
*
|
||
* @param string $wikidata_id Input ID to validate.
|
||
*
|
||
* @return void
|
||
*/
|
||
public static function validateWikidataId(string $wikidata_id):void {
|
||
|
||
if (substr($wikidata_id, 0, 1) !== 'Q') {
|
||
throw new MDgenericInvalidInputsException("Wikidata IDs start with Q");
|
||
}
|
||
|
||
if (!is_numeric(substr($wikidata_id, 1))) {
|
||
throw new MDgenericInvalidInputsException("Wikidata IDs are numeric following the Q");
|
||
}
|
||
|
||
}
|
||
|
||
/**
|
||
* Attempts to fetch a Wikidata ID from a provided URL.
|
||
*
|
||
* @param non-empty-string $linkUrl Link to a page.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function getWikidataIdFromLink(string $linkUrl):string {
|
||
|
||
if (!filter_var($linkUrl, FILTER_VALIDATE_URL)) {
|
||
throw new MDExpectedException("Invalid URL");
|
||
}
|
||
|
||
if (strpos($linkUrl, "http://www.wikidata.org/entity/") !== false) {
|
||
if ($output = self::getWikidataIdFromWikidataLink($linkUrl)) {
|
||
return $output;
|
||
}
|
||
}
|
||
if (strpos($linkUrl, "https://www.wikidata.org/entity/") !== false) {
|
||
if ($output = self::getWikidataIdFromWikidataLink($linkUrl)) {
|
||
return $output;
|
||
}
|
||
}
|
||
|
||
if (strpos($linkUrl, "https://www.wikidata.org/wiki/") !== false) {
|
||
if ($output = self::getWikidataIdFromWikidataLink($linkUrl)) {
|
||
return $output;
|
||
}
|
||
}
|
||
|
||
if (strpos($linkUrl, ".wikipedia.org/") !== false) {
|
||
if ($output = self::getWikidataIdFromWikipedia($linkUrl)) {
|
||
return $output;
|
||
}
|
||
}
|
||
|
||
return '';
|
||
|
||
}
|
||
|
||
/**
|
||
* Attempts to fetch a Wikidata ID from a provided URL.
|
||
*
|
||
* @param string $linkUrl Link to a Wikidata page.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function getWikidataIdFromWikidataLink(string $linkUrl):string {
|
||
|
||
if (str_contains($linkUrl, "https://www.wikidata.org/wiki/") === false
|
||
&& str_contains($linkUrl, "https://www.wikidata.org/entity/") === false
|
||
&& str_contains($linkUrl, "http://www.wikidata.org/entity/") === false
|
||
) {
|
||
return '';
|
||
}
|
||
|
||
$linkUrl = trim($linkUrl, '/ ');
|
||
$parts = explode('/', $linkUrl);
|
||
return end($parts);
|
||
|
||
}
|
||
|
||
/**
|
||
* Attempts to fetch a Wikidata ID from a provided URL.
|
||
*
|
||
* @param non-empty-string $linkUrl Link to a Wikipedia page.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function getWikidataIdFromWikipedia(string $linkUrl):string {
|
||
|
||
if (strpos($linkUrl, ".wikipedia.org/") === false) {
|
||
return '';
|
||
}
|
||
|
||
if (empty($wikipedia_cont = MD_STD::runCurl($linkUrl))) return '';
|
||
|
||
libxml_use_internal_errors(true);
|
||
$doc = new DOMDocument();
|
||
if (!$doc->loadHTML($wikipedia_cont)) {
|
||
return '';
|
||
}
|
||
libxml_use_internal_errors(false);
|
||
|
||
if (!($wikidataLinkLi = $doc->getElementById("t-wikibase"))) {
|
||
return '';
|
||
}
|
||
if (!($wikidataLink = $wikidataLinkLi->firstChild)) {
|
||
return '';
|
||
}
|
||
|
||
if (!($t_wikibase_href = $wikidataLink->getAttribute('href'))) {
|
||
return '';
|
||
}
|
||
$t_wikibase = (string)$t_wikibase_href;
|
||
|
||
// if (!empty($t_wikibase)) {
|
||
if (($wikidata_id_end = strrpos($t_wikibase, '/')) !== false) {
|
||
$wikidata_id = trim(substr($t_wikibase, $wikidata_id_end + 1), '/');
|
||
|
||
if (substr($wikidata_id, 0, 1) === 'Q') {
|
||
return $wikidata_id;
|
||
}
|
||
}
|
||
// }
|
||
|
||
return '';
|
||
|
||
}
|
||
|
||
/**
|
||
* Runs a SPARQL query against the Wikidata SPARQL endpoint.
|
||
*
|
||
* @param string $sparqlQuery Query string.
|
||
*
|
||
* @return array<mixed>
|
||
*/
|
||
public static function sparqlQuery(string $sparqlQuery):array {
|
||
|
||
$url = 'https://query.wikidata.org/sparql?query=' . urlencode($sparqlQuery);
|
||
$result = MD_STD::runCurl($url, 100000000, self::WIKIDATA_FETCH_HEADERS);
|
||
|
||
return json_decode($result, true);
|
||
|
||
}
|
||
|
||
/**
|
||
* Formulates a SPARQL query string for fetching from Wikidata based on an external ID.
|
||
*
|
||
* @param string $repoName Name of the repository.
|
||
* @param string $externalId ID in the external repository.
|
||
* @param string $repoPId Optional P-ID of the external repository. Needed for
|
||
* Geonames and TGN, obsolete otherwise.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function formulateWikidataQueryByExtId(string $repoName, string $externalId, string $repoPId = ''):string {
|
||
|
||
if (empty($repoPId)) {
|
||
|
||
if (empty(NodaWikidataFetcher::P_IDS_NODA_TAGS[$repoName])) {
|
||
throw new MDmainEntityNotExistentException("Unknown external repository. The following repositories are known with their Wikidata ID: " . implode(', ', array_keys(NodaWikidataFetcher::P_IDS_NODA_TAGS)));
|
||
}
|
||
|
||
$repoPId = NodaWikidataFetcher::P_IDS_NODA_TAGS[$repoName];
|
||
|
||
}
|
||
|
||
return 'SELECT ?id ?idLabel WHERE {
|
||
?id wdt:' . $repoPId . ' "' . $externalId . '".
|
||
SERVICE wikibase:label {
|
||
bd:serviceParam wikibase:language "en" .
|
||
}
|
||
}';
|
||
|
||
}
|
||
|
||
/**
|
||
* Gets the Wikidata ID based on a result from Wikidata's SPARQL endpoint.
|
||
*
|
||
* @param array<mixed> $queryResult Query result.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function readWikidataIdFromSparqlResult(array $queryResult):string {
|
||
|
||
if (count($queryResult['results']['bindings']) !== 1) return '';
|
||
|
||
if (!empty($wikidataLink = $queryResult['results']['bindings'][0]['id']['value'])) {
|
||
if (($endSlashPos = strrpos($wikidataLink, '/')) !== false) {
|
||
return substr($wikidataLink, $endSlashPos + 1);
|
||
}
|
||
}
|
||
|
||
return '';
|
||
|
||
}
|
||
|
||
/**
|
||
* Queries Wikidata by an external repository's ID and returns the matching Q-ID
|
||
* if there is any.
|
||
*
|
||
* @param string $repoName Name of the repository.
|
||
* @param string $externalId ID in the external repository.
|
||
* @param string $repoPId Optional P-ID of the external repository. Needed for
|
||
* Geonames and TGN, obsolete otherwise.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function getWikidataIdByExternalId(string $repoName, string $externalId, string $repoPId = ''):string {
|
||
|
||
$sparqlQueryString = self::formulateWikidataQueryByExtId($repoName, $externalId, $repoPId = '');
|
||
|
||
$queryResult = self::sparqlQuery($sparqlQueryString);
|
||
return self::readWikidataIdFromSparqlResult($queryResult);
|
||
|
||
}
|
||
|
||
/**
|
||
* Gets translation source Wikipedia pages from Wikidata.
|
||
*
|
||
* @param array<string> $checkagainstLanguage The language to check against.
|
||
* @param array<mixed> $data Data fetched from Wikidata.
|
||
*
|
||
* @return array{0: array<string, non-empty-string>, 1: array<string, string>}
|
||
*/
|
||
public static function getWikidataWikipediaTranslationSources(array $checkagainstLanguage, array $data) {
|
||
|
||
$languagesToFetch = $wikilinks = [];
|
||
foreach ($checkagainstLanguage as $lang) {
|
||
|
||
if (empty($data['labels'][$lang])) {
|
||
continue;
|
||
}
|
||
|
||
if (!empty($data['sitelinks'][$lang . 'wiki'])) {
|
||
|
||
$wikilink = $data['sitelinks'][$lang . 'wiki']['url'];
|
||
$wikilinkterm = str_replace(' ', '_', $data['sitelinks'][$lang . 'wiki']['title']);
|
||
|
||
if (isset($wikilink)) {
|
||
|
||
$languagesToFetch[$lang] = self::_getWikipediaApiLink($lang, $wikilinkterm);
|
||
$wikilinks[$lang] = $wikilink;
|
||
|
||
}
|
||
}
|
||
}
|
||
|
||
return [$languagesToFetch, $wikilinks];
|
||
|
||
}
|
||
|
||
/**
|
||
* Loads translations from Wikipedia pages through wikidata and then merges
|
||
* them with Wikidata's own translations into a usable array.
|
||
*
|
||
* @param array<string> $checkagainstLanguage The language to check against.
|
||
* @param array<mixed> $data Data fetched from Wikidata.
|
||
*
|
||
* @return array<string, array{label: string, description: string, link: string}>
|
||
*/
|
||
public static function listTranslationsFromWikidataWikipedia(array $checkagainstLanguage, array $data):array {
|
||
|
||
list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data);
|
||
if (empty($languagesToFetch)) {
|
||
return [];
|
||
}
|
||
|
||
try {
|
||
$contents = MD_STD::runCurlMulti($languagesToFetch, 10000);
|
||
}
|
||
catch (TypeError $e) {
|
||
throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again.");
|
||
}
|
||
|
||
$output = [];
|
||
|
||
# $descs = [];
|
||
foreach ($checkagainstLanguage as $lang) {
|
||
|
||
if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki']) && !empty($wikilinks[$lang])) {
|
||
|
||
$wikilink = $wikilinks[$lang];
|
||
if (!empty($contents[$lang])) {
|
||
|
||
$descFromWiki = json_decode($contents[$lang], true)['parse']['text']['*'];
|
||
|
||
# Process data retrieved from wikipedia
|
||
|
||
if ($descFromWiki !== null) $tDescription = (string)$descFromWiki;
|
||
else $tDescription = "";
|
||
|
||
}
|
||
else {
|
||
$tDescription = "";
|
||
}
|
||
|
||
if ($tDescription !== '' && !empty($desc_cleaned = self::_cleanWikidataInput($tDescription))) {
|
||
# $descs[$lang] = $tDescription;
|
||
$output[$lang] = [
|
||
'label' => self::_cleanWikidataInput((string)$data['labels'][$lang]['value']),
|
||
'description' => '"' . $desc_cleaned . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')',
|
||
'link' => $wikilink,
|
||
];
|
||
}
|
||
// Fallback: Use Wikidata description
|
||
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
|
||
|
||
$output[$lang] = [
|
||
'label' => self::_cleanWikidataInput($data['labels'][$lang]['value']),
|
||
'description' => self::_cleanWikidataInput($data['descriptions'][$lang]['value']),
|
||
'link' => "",
|
||
];
|
||
|
||
}
|
||
|
||
}
|
||
// echo '<br><b style="color: cc0000;">Wikipedia Links fehlen</b>';
|
||
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
|
||
|
||
$output[$lang] = [
|
||
'label' => self::_cleanWikidataInput($data['labels'][$lang]['value']),
|
||
'description' => self::_cleanWikidataInput($data['descriptions'][$lang]['value']),
|
||
'link' => "",
|
||
];
|
||
|
||
}
|
||
|
||
}
|
||
|
||
return $output;
|
||
|
||
}
|
||
|
||
/**
|
||
* Function for fetching description from Wikipedia
|
||
*
|
||
* @param integer $persinst_id Person ID.
|
||
* @param string $wikidata_id Wikidata ID.
|
||
* @param string $datafromwiki Data fetched from Wikipedia.
|
||
* @param string $preflang The user's currently used language.
|
||
* @param string $lang Currently queried language.
|
||
* @param string $erfasst_von User who adds the info.
|
||
*
|
||
* @return boolean
|
||
*/
|
||
public function retrievePersinstDescFromWikipedia(int $persinst_id, string $wikidata_id, string $datafromwiki, string $preflang, string $lang, string $erfasst_von):bool {
|
||
|
||
$output = false;
|
||
|
||
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date('d.m.Y') . ')';
|
||
|
||
$cergebnis = $this->_mysqli_noda->query_by_stmt("SELECT `persinst_kurzinfo`, `persinst_anzeigename` AS `display_name`
|
||
FROM `persinst`
|
||
WHERE `persinst_id` = ?", "i", $persinst_id);
|
||
|
||
if (!($cinfo = $cergebnis->fetch_row())) {
|
||
throw new Exception("There is no actor of ID #" . $persinst_id);
|
||
}
|
||
$cergebnis->close();
|
||
|
||
$persinst_kurzinfo = $cinfo[0];
|
||
$display_name = $cinfo[1];
|
||
|
||
// Update persinst table
|
||
$updatePersinstStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst`
|
||
SET `persinst_kurzinfo` = ?
|
||
WHERE `persinst_id` = ?");
|
||
|
||
if (!empty($persinst_kurzinfo) and substr($persinst_kurzinfo, 0, 3) !== 'GND') {
|
||
|
||
switch ($this->_retrievalMode) {
|
||
case "add":
|
||
|
||
$newDesc = $persinst_kurzinfo . PHP_EOL . PHP_EOL . $datafromwiki;
|
||
|
||
$updatePersinstStmt->bind_param("si", $newDesc, $persinst_id);
|
||
$updatePersinstStmt->execute();
|
||
$output = true;
|
||
break;
|
||
|
||
case "keep":
|
||
break;
|
||
|
||
case "replace":
|
||
|
||
$updatePersinstStmt->bind_param("si", $datafromwiki, $persinst_id);
|
||
$updatePersinstStmt->execute();
|
||
$output = true;
|
||
break;
|
||
|
||
default:
|
||
|
||
$tlLoader = new MDTlLoader("wiki_getter_persinst", $preflang);
|
||
echo self::generateHTMLHeadForWikidataFetcher($lang);
|
||
echo self::generateWikidataFetcherHeader($tlLoader, "", $display_name);
|
||
echo '
|
||
<p class="alert icons iconsAlert">Es gibt schon einen Eintrag im Beschreibungsfeld</p>
|
||
<div class="wikiReplaceTTile">
|
||
<h3>Bisher vorhanden</h3>
|
||
<p>' . nl2br($persinst_kurzinfo) . '</p>
|
||
</div>
|
||
<div class="wikiReplaceTTile">
|
||
<h3>Jetzt gefunden</h3><p>' . $datafromwiki . '</p>
|
||
</div>
|
||
<a href="get_wikidata_for_persinst.php?wikidata_id=' . $wikidata_id . write_get_vars(['suchbegriff', 'lang', 'persinst_id']) . '&keep=keep" class="buttonLike icons iconsPin">Keep old entry</a>';
|
||
echo '<br><a href="get_wikidata_for_persinst.php?wikidata_id=' . $wikidata_id . write_get_vars(['suchbegriff', 'lang', 'persinst_id']) . '&keep=replace" class="buttonLike icons iconsPinOff">Replace with new entry</a>';
|
||
echo '<br><a href="get_wikidata_for_persinst.php?wikidata_id=' . $wikidata_id . write_get_vars(['suchbegriff', 'lang', 'persinst_id']) . '&keep=add" class="buttonLike icons iconsPlusOne">Keep old and add new entry</a><br><br><br>';
|
||
|
||
exit;
|
||
|
||
}
|
||
|
||
}
|
||
else {
|
||
|
||
$updatePersinstStmt->bind_param("si", $datafromwiki, $persinst_id);
|
||
$updatePersinstStmt->execute();
|
||
|
||
$output = true;
|
||
|
||
}
|
||
$updatePersinstStmt->close();
|
||
|
||
// Update edit metadata
|
||
$updatePersinstEditInfoStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst`
|
||
SET `persinst_erfasst_am` = NOW(),
|
||
`persinst_erfasst_von` = ?
|
||
WHERE `persinst_id` = ?");
|
||
$updatePersinstEditInfoStmt->bind_param("si", $erfasst_von, $persinst_id);
|
||
$updatePersinstEditInfoStmt->execute();
|
||
$updatePersinstEditInfoStmt->close();
|
||
|
||
return $output;
|
||
|
||
}
|
||
|
||
/**
|
||
* Function for updating birth and death times based on Wikidata information.
|
||
*
|
||
* @param array<mixed> $data Data loaded from Wikidata.
|
||
* @param integer $persinst_id Actor ID.
|
||
*
|
||
* @return void
|
||
*/
|
||
public function enterPersinstBirthDeathDatesFromWikidata(array $data, int $persinst_id):void {
|
||
|
||
$result = $this->_mysqli_noda->query_by_stmt("SELECT `persinst_geburtsjahr`,
|
||
`persinst_sterbejahr`, `persinst_gender`
|
||
FROM `persinst`
|
||
WHERE `persinst_id` = ?", "i", $persinst_id);
|
||
if (!($actor_dates = $result->fetch_assoc())) {
|
||
throw new MDmainEntityNotExistentException("Failed to fetch actor information");
|
||
}
|
||
$result->close();
|
||
|
||
if ($actor_dates['persinst_geburtsjahr'] === '') {
|
||
|
||
// Try to get birth date
|
||
if (!empty($data['claims']['P569'])
|
||
and !empty($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time'])
|
||
// Ignore entries with century / very inprecise birth dates
|
||
and (empty($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['precision']) || (int)$data['claims']['P569']['0']['mainsnak']['datavalue']['value']['precision'] !== 7)
|
||
) {
|
||
$birth_date = self::wikidataBirthDeathToYear($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time']);
|
||
}
|
||
|
||
if (!empty($birth_date)) {
|
||
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst`
|
||
SET `persinst_geburtsjahr` = ?
|
||
WHERE `persinst_id` = ?
|
||
LIMIT 1");
|
||
$updateStmt->bind_param("ii", $birth_date, $persinst_id);
|
||
$updateStmt->execute();
|
||
$updateStmt->close();
|
||
}
|
||
|
||
}
|
||
|
||
if ($actor_dates['persinst_sterbejahr'] === '') {
|
||
|
||
// Try to get birth date
|
||
if (!empty($data['claims']['P570']) and !empty($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time'])) {
|
||
$death_date = self::wikidataBirthDeathToYear($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time']);
|
||
}
|
||
|
||
if (!empty($death_date)) {
|
||
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst`
|
||
SET `persinst_sterbejahr` = ?
|
||
WHERE `persinst_id` = ?
|
||
LIMIT 1");
|
||
$updateStmt->bind_param("ii", $death_date, $persinst_id);
|
||
$updateStmt->execute();
|
||
$updateStmt->close();
|
||
}
|
||
|
||
}
|
||
|
||
if ($actor_dates['persinst_gender'] === '') {
|
||
|
||
// Try to get birth date
|
||
if (!empty($data['claims']['P21']) and !empty($data['claims']['P21']['0']['mainsnak']['datavalue']['value']['id'])) {
|
||
$wikidata_gender_id = $data['claims']['P21']['0']['mainsnak']['datavalue']['value']['id'];
|
||
|
||
switch ($wikidata_gender_id) {
|
||
case "Q6581097": // male
|
||
case "Q44148": // male organism
|
||
case "Q2449503": // transgender man
|
||
$wikidata_gender = "male";
|
||
break;
|
||
case "Q6581072":
|
||
case "Q1052281": // transgender female
|
||
case "Q43445": // female organism
|
||
$wikidata_gender = "female";
|
||
break;
|
||
case "Q48270":
|
||
$wikidata_gender = "other";
|
||
break;
|
||
default:
|
||
throw new Exception("Unknown gender: Q-ID is " . $wikidata_gender_id);
|
||
}
|
||
}
|
||
|
||
if (!empty($wikidata_gender)) {
|
||
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst`
|
||
SET `persinst_gender` = ?
|
||
WHERE `persinst_id` = ?
|
||
LIMIT 1");
|
||
$updateStmt->bind_param("si", $wikidata_gender, $persinst_id);
|
||
$updateStmt->execute();
|
||
$updateStmt->close();
|
||
}
|
||
|
||
}
|
||
|
||
}
|
||
|
||
/**
|
||
* Function for retrieving information.
|
||
*
|
||
* @param string $lang The user's selected used language.
|
||
* @param string $wikidata_id Wikidata ID.
|
||
* @param integer $persinst_id Actor ID.
|
||
* @param string $erfasst_von User name who's currently editing.
|
||
*
|
||
* @return void
|
||
*/
|
||
public function retrievePersinstInfoFromWikidataID(string $lang, string $wikidata_id, int $persinst_id, string $erfasst_von) {
|
||
|
||
self::validateWikidataId($wikidata_id);
|
||
$data = self::_getWikidataEntity($wikidata_id);
|
||
|
||
// Get links to wikipedia
|
||
|
||
$wikilinks = self::_getWikipediaLinksFromWikidataOutput($data);
|
||
$alreadyEntered = false;
|
||
|
||
if (isset($wikilinks[$lang])) {
|
||
# Process data retrieved from wikipedia
|
||
if (!empty($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$lang]['title']))) {
|
||
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $lang, $lang, $erfasst_von);
|
||
}
|
||
|
||
}
|
||
|
||
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
|
||
|
||
if ($alreadyEntered === true || !isset($wikilinks[$cur_lang])) continue;
|
||
|
||
if ($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$cur_lang]['title'])) {
|
||
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $lang, "$cur_lang", $erfasst_von);
|
||
}
|
||
|
||
}
|
||
|
||
$this->enterPersinstBirthDeathDatesFromWikidata($data, $persinst_id);
|
||
|
||
// Get links to other norm data sources
|
||
|
||
if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('persinst', $wikidata_id, $data))) {
|
||
NodaBatchInserter::linkNodaForPersinst($this->_mysqli_noda, $persinst_id, $nodaLinks, $erfasst_von);
|
||
}
|
||
|
||
$this->getWikidataTranslationsForPersinst($data, $persinst_id);
|
||
|
||
NodaLogEdit::logPersinstEdit($this->_mysqli_noda, $persinst_id, "wikidata-fetcher", $erfasst_von, 'update', 'synchronize');
|
||
|
||
}
|
||
|
||
/**
|
||
* Function for retrieving information.
|
||
*
|
||
* @param string $wikidata_id Wikidata ID.
|
||
* @param integer $persinst_id Actor ID.
|
||
* @param string $erfasst_von User name who's currently editing.
|
||
*
|
||
* @return void
|
||
*/
|
||
public function retrievePersinstNormDataLinksFromWikidataID(string $wikidata_id, int $persinst_id, string $erfasst_von) {
|
||
|
||
self::validateWikidataId($wikidata_id);
|
||
$data = self::_getWikidataEntity($wikidata_id);
|
||
if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('persinst', $wikidata_id, $data))) {
|
||
NodaBatchInserter::linkNodaForPersinst($this->_mysqli_noda, $persinst_id, $nodaLinks, $erfasst_von);
|
||
}
|
||
|
||
}
|
||
|
||
/**
|
||
* Retrieves only norm data links from Wikidata.
|
||
*
|
||
* @param string $wikidata_id Wikidata Q-ID.
|
||
* @param integer $onum Place ID.
|
||
* @param string $erfasst_von User name of the current user.
|
||
*
|
||
* @return void
|
||
*/
|
||
public function retrievePlaceNormDataLinksFromWikidataID(string $wikidata_id, int $onum, string $erfasst_von) {
|
||
|
||
self::validateWikidataId($wikidata_id);
|
||
$data = self::_getWikidataEntity($wikidata_id);
|
||
if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('place', $wikidata_id, $data))) {
|
||
NodaBatchInserter::linkNodaForPlace($this->_mysqli_noda, $onum, $nodaLinks, $erfasst_von);
|
||
}
|
||
|
||
}
|
||
|
||
/**
|
||
* Function for fetching translations from Wikipedia, based on Wikidata information.
|
||
*
|
||
* @param array<mixed> $data Entity fetched from wikidata.
|
||
* @param integer $persinst_id Actor ID.
|
||
* @param string[] $checkForLangs Languages to check for. Defaults to all
|
||
* languages generally loaded by the wikidata fetcher.
|
||
*
|
||
* @return void
|
||
*/
|
||
public function getWikidataTranslationsForPersinst(array $data, int $persinst_id, array $checkForLangs = self::LANGUAGES_TO_CHECK):void {
|
||
|
||
if (empty($translations = self::listTranslationsFromWikidataWikipedia($checkForLangs, $data))) {
|
||
return;
|
||
}
|
||
|
||
$toInsert = [];
|
||
|
||
foreach ($translations as $lang => $values) {
|
||
|
||
$toInsert[] = [
|
||
'persinst_id' => $persinst_id,
|
||
'lang' => $lang,
|
||
'name' => $values['label'],
|
||
'description' => $values['description'],
|
||
'link' => $values['link'],
|
||
];
|
||
|
||
}
|
||
|
||
NodaBatchInserter::insertPersinstTranslations($this->_mysqli_noda, $toInsert);
|
||
|
||
}
|
||
|
||
/**
|
||
* Returns the current description of a place.
|
||
*
|
||
* @param integer $onum Place ID.
|
||
*
|
||
* @return string
|
||
*/
|
||
private function getPlaceDescription(int $onum):string {
|
||
|
||
$currentPlaceResult = $this->_mysqli_noda->query_by_stmt("SELECT `ort_anmerkung`
|
||
FROM `orte`
|
||
WHERE `ort_id` = ?", "i", $onum);
|
||
|
||
if (!($curPlaceInfo = $currentPlaceResult->fetch_row())) {
|
||
$currentPlaceResult->close();
|
||
throw new Exception("This place does not exist");
|
||
}
|
||
$currentPlaceResult->close();
|
||
|
||
return $curPlaceInfo[0];
|
||
|
||
}
|
||
|
||
/**
|
||
* Returns the current description of a tag.
|
||
*
|
||
* @param integer $tag_id Tag ID.
|
||
*
|
||
* @return string
|
||
*/
|
||
private function getTagDescription(int $tag_id):string {
|
||
|
||
$result = $this->_mysqli_noda->query_by_stmt("SELECT `tag_anmerkung`
|
||
FROM `tag`
|
||
WHERE `tag_id` = ?", "i", $tag_id);
|
||
|
||
if (!($cur = $result->fetch_row())) {
|
||
$result->close();
|
||
return '';
|
||
}
|
||
$result->close();
|
||
|
||
return $cur[0];
|
||
|
||
}
|
||
|
||
/**
|
||
* Function for entering base information about a place from wikidata.
|
||
*
|
||
* @param string $cur_place_desc Mysqli result pointing to the current place.
|
||
* @param string $datafromwiki Data parsed from wikidata.
|
||
* @param string $preflang Language of the user interface in general.
|
||
* @param string $lang Language of the main entry.
|
||
* @param integer $placeID ID of the place.
|
||
* @param string $erfasst_von User name.
|
||
*
|
||
* @return boolean
|
||
*/
|
||
public function enterPlaceDescFromWikidata(string $cur_place_desc, string $datafromwiki, string $preflang, string $lang, int $placeID, string $erfasst_von):bool {
|
||
|
||
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date("d.m.Y") . ')';
|
||
|
||
if (!empty(trim($cur_place_desc)) and substr($cur_place_desc, 0, 3) !== 'GND') {
|
||
|
||
switch ($this->_retrievalMode) {
|
||
case "add":
|
||
$datafromwiki = $cur_place_desc . PHP_EOL . PHP_EOL . $datafromwiki;
|
||
break;
|
||
case "keep":
|
||
$datafromwiki = $cur_place_desc;
|
||
break;
|
||
case "replace":
|
||
break;
|
||
default:
|
||
$tlLoader = new MDTlLoader("wiki_getter_place", $preflang);
|
||
echo self::generateHTMLHeadForWikidataFetcher($lang);
|
||
echo self::generateWikidataFetcherHeader($tlLoader);
|
||
echo '
|
||
<p class="alert icons iconsAlert">There is already an entry for description ...</p>
|
||
<div class="wikiReplaceTTile">
|
||
<h3>Actual entry</h3><p>' . nl2br($cur_place_desc) . '</p>
|
||
</div>
|
||
<div class="wikiReplaceTTile">
|
||
<h3>Now found</h3>
|
||
<p>' . $datafromwiki . '</p>
|
||
</div>
|
||
<a href="get_wikidata_for_ort.php?keep=keep' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'ort_id']) . '" class="buttonLike icons iconsPin">Keep old entry</a>
|
||
<br><a href="get_wikidata_for_ort.php?keep=replace' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'ort_id']) . '" class="buttonLike icons iconsPinOff">Replace with new entry</a>
|
||
<br><a href="get_wikidata_for_ort.php?keep=add' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'ort_id']) . '" class="buttonLike icons iconsPlusOne">Keep old and add new entry</a><br><br><br>
|
||
';
|
||
|
||
exit;
|
||
|
||
}
|
||
|
||
}
|
||
|
||
// Write description to DB
|
||
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte`
|
||
SET `ort_anmerkung` = ?,
|
||
`ort_erfasst_am` = NOW(),
|
||
`ort_erfasst_von` = ?
|
||
WHERE ort_id = ?");
|
||
try {
|
||
$updateStmt->bind_param("ssi", $datafromwiki, $erfasst_von, $placeID);
|
||
$updateStmt->execute();
|
||
}
|
||
catch (MDMysqliInvalidEncodingError $e) {
|
||
}
|
||
$updateStmt->close();
|
||
unset($updateStmt);
|
||
|
||
return true;
|
||
|
||
}
|
||
|
||
/**
|
||
* Function for retrieving a superordinate place relation from Wikidata information
|
||
* for places
|
||
*
|
||
* @param integer $onum Place ID.
|
||
* @param array<mixed> $data Wikidata information (P131 claim).
|
||
*
|
||
* @return void
|
||
*/
|
||
public function retrieveSuperordinateAdministrativePlace(int $onum, array $data):void {
|
||
|
||
if (!empty($data[0]["mainsnak"]["datavalue"]["value"]["id"])) {
|
||
|
||
// Check if there already is a superordinate of the current place
|
||
$result = $this->_mysqli_noda->query_by_stmt("SELECT 1
|
||
FROM `ort_relation`
|
||
WHERE `ort_menor_id` = ?
|
||
LIMIT 1", "i", $onum);
|
||
if ($result->num_rows !== 0) {
|
||
$result->close();
|
||
return;
|
||
}
|
||
$result->close();
|
||
|
||
// If there is no superordinate, check if the identified superordinate
|
||
// is known in the noda DB.
|
||
|
||
$superordinateId = $data[0]["mainsnak"]["datavalue"]["value"]["id"];
|
||
|
||
$result = $this->_mysqli_noda->query_by_stmt("SELECT `ort_id`
|
||
FROM `noda_orte`
|
||
WHERE `noda_source` = 'wikidata'
|
||
AND `noda_nrinsource` = ?", "s", $superordinateId);
|
||
|
||
if (!($superordinateData = $result->fetch_row())) {
|
||
$result->close();
|
||
return;
|
||
}
|
||
$result->close();
|
||
|
||
$topPlaceId = $superordinateData[0];
|
||
|
||
// Enter superordinate place by Wikidata
|
||
$insertStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `ort_relation`
|
||
(`ort_mayor_id`, `ort_menor_id`, `ort_relation`)
|
||
VALUES
|
||
(?, ?, 1)");
|
||
$insertStmt->bind_param("ii", $topPlaceId, $onum);
|
||
$insertStmt->execute();
|
||
$insertStmt->close();
|
||
|
||
}
|
||
|
||
}
|
||
|
||
/**
|
||
* Function for retrieving place information based on a Wikidata ID.
|
||
*
|
||
* @param string $lang Language.
|
||
* @param string $wikidata_id Wikidata Q-ID.
|
||
* @param integer $onum Place ID.
|
||
* @param string $erfasst_von User name of the current user.
|
||
*
|
||
* @return void
|
||
*/
|
||
public function retrievePlaceInfoFromWikidataID(string $lang, string $wikidata_id, int $onum, string $erfasst_von) {
|
||
|
||
self::validateWikidataId($wikidata_id);
|
||
$data = self::_getWikidataEntity($wikidata_id);
|
||
|
||
$wikilinks = self::_getWikipediaLinksFromWikidataOutput($data);
|
||
|
||
// Get current description for overwriting
|
||
|
||
// P131: Located in administrative unit
|
||
if (isset($data['claims']['P131'])) {
|
||
$this->retrieveSuperordinateAdministrativePlace($onum, $data['claims']['P131']);
|
||
}
|
||
|
||
$cur_place_desc = $this->getPlaceDescription($onum);
|
||
$alreadyEntered = false;
|
||
|
||
if (!empty($wikilinks[$lang])) {
|
||
|
||
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinks[$lang]['title']), 10000);
|
||
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
|
||
|
||
if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
|
||
$alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $lang, $lang, $onum, $erfasst_von);
|
||
}
|
||
}
|
||
|
||
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
|
||
|
||
//if ($alreadyEntered === true) break;
|
||
if ($alreadyEntered === true) break;
|
||
if (!isset($wikilinks[$cur_lang]['url'])) continue;
|
||
|
||
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinks[$cur_lang]['title']), 10000);
|
||
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
|
||
if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
|
||
$alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $lang, $cur_lang, $onum, $erfasst_von);
|
||
}
|
||
|
||
}
|
||
|
||
if (isset($data['claims']['P1566'])) $geonames_id = filter_var($data['claims']['P1566'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT);
|
||
if (isset($data['claims']['P1667'])) $tgn_id = filter_var($data['claims']['P1667'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT);
|
||
|
||
if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('place', $wikidata_id, $data))) {
|
||
NodaBatchInserter::linkNodaForPlace($this->_mysqli_noda, $onum, $nodaLinks, $erfasst_von);
|
||
}
|
||
|
||
$coordinates_wd = self::_getPlaceCoordinatesFromWikidata($data);
|
||
|
||
$this->_mysqli_noda->autocommit(false);
|
||
if (!empty($tgn_id)) {
|
||
|
||
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte`
|
||
SET `ort_land` = ?
|
||
WHERE `ort_id` = ?");
|
||
$updateStmt->bind_param("ii", $tgn_id, $onum);
|
||
$updateStmt->execute();
|
||
$updateStmt->close();
|
||
unset($updateStmt);
|
||
|
||
}
|
||
if (!empty($geonames_id)) {
|
||
|
||
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte`
|
||
SET `ort_geonames` = ?
|
||
WHERE `ort_id` = ?");
|
||
$updateStmt->bind_param("ii", $geonames_id, $onum);
|
||
$updateStmt->execute();
|
||
$updateStmt->close();
|
||
unset($updateStmt);
|
||
|
||
}
|
||
|
||
if (!empty($coordinates_wd)) {
|
||
|
||
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte`
|
||
SET `ort_nord_sued` = ?, `ort_west_ost` = ?
|
||
WHERE `ort_id` = ?");
|
||
$updateStmt->bind_param("ddi", $coordinates_wd['latitude'], $coordinates_wd['longitude'], $onum);
|
||
$updateStmt->execute();
|
||
$updateStmt->close();
|
||
unset($updateStmt);
|
||
|
||
}
|
||
|
||
$this->_mysqli_noda->commit();
|
||
$this->_mysqli_noda->autocommit(true);
|
||
|
||
$this->getWikidataTranslationsForPlace($data, $onum);
|
||
|
||
NodaLogEdit::logPlaceEdit($this->_mysqli_noda, $onum, "wikidata-fetcher", $erfasst_von, 'update', 'synchronize');
|
||
|
||
}
|
||
|
||
/**
|
||
* Function for fetching translations from wikidata.
|
||
*
|
||
* @param array<mixed> $data Entity data fetched from wikidata.
|
||
* @param integer $ort_id Place ID.
|
||
* @param string[] $checkForLangs Languages to check for. Defaults to all
|
||
* languages generally loaded by the wikidata fetcher.
|
||
*
|
||
* @return void
|
||
*/
|
||
public function getWikidataTranslationsForPlace(array $data, int $ort_id, array $checkForLangs = self::LANGUAGES_TO_CHECK):void {
|
||
|
||
if (empty($translations = self::listTranslationsFromWikidataWikipedia($checkForLangs, $data))) {
|
||
return;
|
||
}
|
||
|
||
$toInsert = [];
|
||
|
||
foreach ($translations as $lang => $values) {
|
||
|
||
$toInsert[] = [
|
||
'ort_id' => $ort_id,
|
||
'lang' => $lang,
|
||
'name' => $values['label'],
|
||
'description' => $values['description'],
|
||
'link' => $values['link'],
|
||
];
|
||
|
||
}
|
||
|
||
NodaBatchInserter::insertPlaceTranslations($this->_mysqli_noda, $toInsert);
|
||
|
||
}
|
||
|
||
/**
|
||
* Function for fetching description from Wikipedia
|
||
*
|
||
* @param integer $tag_id Tag ID.
|
||
* @param string $datafromwiki Data fetched from Wikipedia.
|
||
* @param string $preflang The user's currently used language.
|
||
* @param string $lang Currently queried language.
|
||
* @param string $erfasst_von User who adds the info.
|
||
*
|
||
* @return boolean
|
||
*/
|
||
public function retrieveTagDescFromWikipedia(int $tag_id, string $datafromwiki, string $preflang, string $lang, string $erfasst_von):bool {
|
||
|
||
$output = false;
|
||
|
||
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date("d.m.Y") . ')';
|
||
$datafromwiki = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $datafromwiki));
|
||
|
||
$tag_anmerkung = $this->getTagDescription($tag_id);
|
||
|
||
$this->_mysqli_noda->autocommit(false);
|
||
|
||
$updateTagDescStmt = $this->_mysqli_noda->do_prepare("UPDATE `tag`
|
||
SET `tag_anmerkung` = ?
|
||
WHERE `tag_id` = ?");
|
||
|
||
if (!empty($tag_anmerkung) and substr($tag_anmerkung, 0, 3) !== 'GND') {
|
||
|
||
switch ($this->_retrievalMode) {
|
||
case "add":
|
||
|
||
$newDesc = $tag_anmerkung . PHP_EOL . PHP_EOL . $datafromwiki;
|
||
|
||
$updateTagDescStmt->bind_param("si", $newDesc, $tag_id);
|
||
$updateTagDescStmt->execute();
|
||
$output = true;
|
||
break;
|
||
|
||
case "keep":
|
||
break;
|
||
|
||
case "replace":
|
||
|
||
$updateTagDescStmt->bind_param("si", $datafromwiki, $tag_id);
|
||
$updateTagDescStmt->execute();
|
||
$output = true;
|
||
break;
|
||
|
||
default:
|
||
|
||
$tlLoader = new MDTlLoader("wiki_getter_tag", $preflang);
|
||
echo self::generateHTMLHeadForWikidataFetcher($lang);
|
||
echo self::generateWikidataFetcherHeader($tlLoader);
|
||
echo '
|
||
<p class="alert icons iconsAlert">Es gibt schon einen Eintrag im Beschreibungsfeld</b>
|
||
<div class="wikiReplaceTTile">
|
||
<h3>Bisher vorhanden</h3><p>' . nl2br($tag_anmerkung) . '</p>
|
||
</div>
|
||
<div class="wikiReplaceTTile">
|
||
<h3>Jetzt gefunden</h3><p>' . $datafromwiki . '<p>
|
||
</div>
|
||
|
||
<a href="get_wikidata_for_tag.php?keep=keep' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'tag_id']) . '" class="buttonLike icons iconsPin">Keep old entry</a>';
|
||
echo '<br><a href="get_wikidata_for_tag.php?keep=replace' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'tag_id']) . '" class="buttonLike icons iconsPinOff">Replace with new entry</a>';
|
||
echo '<br><a href="get_wikidata_for_tag.php?keep=add' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'tag_id']) . '" class="buttonLike icons iconsPlusOne">Keep old and add new entry</a><br><br><br>';
|
||
|
||
exit;
|
||
|
||
}
|
||
|
||
}
|
||
else {
|
||
|
||
$updateTagDescStmt->bind_param("si", $datafromwiki, $tag_id);
|
||
$updateTagDescStmt->execute();
|
||
}
|
||
|
||
$updateTagDescStmt->close();
|
||
|
||
$this->_mysqli_noda->commit();
|
||
$this->_mysqli_noda->autocommit(true);
|
||
|
||
// Update tag editing metadata
|
||
$updateTagEditInfoStmt = $this->_mysqli_noda->do_prepare("UPDATE `tag`
|
||
SET `tag_erfasst_am` = NOW(),
|
||
`tag_erfasst_von` = ?
|
||
WHERE `tag_id` = ?");
|
||
$updateTagEditInfoStmt->bind_param("si", $erfasst_von, $tag_id);
|
||
$updateTagEditInfoStmt->execute();
|
||
$updateTagEditInfoStmt->close();
|
||
|
||
return true;
|
||
|
||
}
|
||
|
||
/**
|
||
* Function for retrieving information.
|
||
*
|
||
* @param string $lang The user's selected used language.
|
||
* @param string $wikidata_id Wikidata ID.
|
||
* @param integer $tag_id Tag ID.
|
||
* @param string $erfasst_von User name who's currently editing.
|
||
*
|
||
* @return void
|
||
*/
|
||
public function retrieveTagInfoFromWikidataID(string $lang, string $wikidata_id, int $tag_id, string $erfasst_von) {
|
||
|
||
self::validateWikidataId($wikidata_id);
|
||
$data = self::_getWikidataEntity($wikidata_id);
|
||
|
||
$wikilinks = self::_getWikipediaLinksFromWikidataOutput($data);
|
||
|
||
$alreadyEntered = false;
|
||
|
||
if (isset($wikilinks[$lang])) {
|
||
|
||
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinks[$lang]['title']), 10000);
|
||
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
|
||
|
||
# Process data retrieved from wikipedia
|
||
if (!empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
|
||
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $lang, $lang, $erfasst_von);
|
||
}
|
||
|
||
}
|
||
|
||
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
|
||
|
||
if ($alreadyEntered === true || !isset($wikilinks[$cur_lang])) continue;
|
||
|
||
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinks[$cur_lang]['title']), 10000);
|
||
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
|
||
|
||
# Process data retrieved from wikipedia
|
||
if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) {
|
||
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $lang, $cur_lang, $erfasst_von);
|
||
}
|
||
|
||
}
|
||
|
||
if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('tag', $wikidata_id, $data))) {
|
||
NodaBatchInserter::linkNodaForTag($this->_mysqli_noda, $tag_id, $nodaLinks, $erfasst_von);
|
||
}
|
||
|
||
// Get translations
|
||
if (!empty($data)) $this->getWikidataTranslationsForTag($data, $tag_id);
|
||
|
||
NodaLogEdit::logTagEdit($this->_mysqli_noda, $tag_id, "wikidata-fetcher", $erfasst_von, 'update', 'synchronize');
|
||
|
||
}
|
||
|
||
/**
|
||
* Function for fetching translations from wikidata.
|
||
*
|
||
* @param array<mixed> $data Entity data fetched from wikidata.
|
||
* @param integer $tag_id Tag ID.
|
||
* @param string[] $checkForLangs Languages to check for. Defaults to all
|
||
* languages generally loaded by the wikidata fetcher.
|
||
*
|
||
* @return void
|
||
*/
|
||
public function getWikidataTranslationsForTag(array $data, int $tag_id, array $checkForLangs = self::LANGUAGES_TO_CHECK):void {
|
||
|
||
if (empty($translations = self::listTranslationsFromWikidataWikipedia($checkForLangs, $data))) {
|
||
return;
|
||
}
|
||
|
||
$toInsert = [];
|
||
|
||
foreach ($translations as $lang => $values) {
|
||
|
||
if (in_array($lang, self::LANGUAGES_TO_CAPITALIZE, true)) {
|
||
$label = ucfirst($values['label']);
|
||
$description = ucfirst($values['description']);
|
||
}
|
||
else {
|
||
$label = $values['label'];
|
||
$description = $values['description'];
|
||
}
|
||
|
||
$toInsert[] = [
|
||
'tag_id' => $tag_id,
|
||
'lang' => $lang,
|
||
'name' => $label,
|
||
'description' => $description,
|
||
'link' => $values['link'],
|
||
];
|
||
|
||
}
|
||
|
||
NodaBatchInserter::insertTagTranslations($this->_mysqli_noda, $toInsert);
|
||
|
||
}
|
||
|
||
/**
|
||
* Searches Wikidata for a string.
|
||
*
|
||
* @param string $searchTerm Search string.
|
||
* @param string $lang Searched language. Defaults to German.
|
||
*
|
||
* @return array<mixed>
|
||
*/
|
||
public static function searchWikidataForString(string $searchTerm, string $lang = "de"):array {
|
||
|
||
$wikidata_data = MD_STD::runCurl("https://www.wikidata.org/w/api.php?action=wbsearchentities&format=json&search=" . urlencode($searchTerm) . "&language=" . urlencode($lang) . "&limit=20", 10000);
|
||
if (($wikidata_data = json_decode($wikidata_data, true)) === false) {
|
||
return [];
|
||
}
|
||
|
||
if (empty($wikidata_data['search'])) {
|
||
return [];
|
||
}
|
||
|
||
$output = [];
|
||
|
||
foreach ($wikidata_data['search'] as $result) {
|
||
|
||
if (empty($result['label'])
|
||
or (!empty($result['description']) and $result['description'] === 'Wikipedia disambiguation page')
|
||
or (!empty($result['description']) and $result['description'] === 'Wikimedia disambiguation page')
|
||
) continue;
|
||
|
||
$cur = [
|
||
'id' => $result['id'],
|
||
'label' => $result['label'],
|
||
'label_ext' => '',
|
||
'description' => '',
|
||
];
|
||
|
||
if (!empty($result['match'])) {
|
||
$cur['label_ext'] = "{$result['match']['language']}: {$result['match']['text']}";
|
||
}
|
||
if (!empty($result['description'])) {
|
||
$cur['description'] = $result['description'];
|
||
}
|
||
|
||
$output[] = $cur;
|
||
|
||
}
|
||
|
||
return $output;
|
||
|
||
}
|
||
|
||
/**
|
||
* Generates the HTML for an entry in the general wikidata search results list.
|
||
*
|
||
* @param string $link Links.
|
||
* @param string $searchTerm Search term.
|
||
* @param string $lang Language.
|
||
* @param array<mixed> $result Single result to display.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function generateWikidataResultsListEntry(string $link, string $searchTerm, string $lang, array $result):string {
|
||
|
||
if (empty($result['label']) or (isset($result['description']) and in_array($result['description'], ['Wikipedia disambiguation page', 'Wikimedia disambiguation page'], true))) {
|
||
return '';
|
||
}
|
||
|
||
$output = '<div><a href="' . $link . 'suchbegriff=' . htmlspecialchars($searchTerm) . '&wikidata_id=' . htmlspecialchars((string)$result['id']) . '&lang=' . htmlspecialchars($lang) . '">
|
||
<h4 class="icons iconsTag">' . $result['id'] . '</h4>';
|
||
$output .= '<p class="wikidataSummary">' . $result['label'];
|
||
if (!empty($result['label_ext'])) $output .= " (<span class='icons iconsTranslate'>{$result['label_ext']}</span>)";
|
||
$output .= '</p>';
|
||
if (!empty($result['description'])) $output .= '<p>' . $result['description'] . '</p>';
|
||
$output .= '</a><a class="icons iconsEye" target="_blank" href="https://www.wikidata.org/wiki/' . $result['id'] . '">Wikidata page</a></div>';
|
||
return $output;
|
||
|
||
}
|
||
|
||
/**
|
||
* Function for generating a wikidata results list.
|
||
*
|
||
* @param string $link Links.
|
||
* @param string $searchTerm Search term.
|
||
* @param string $lang Language.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function generateWikidataResultsList(string $link, string $searchTerm, string $lang):string {
|
||
|
||
if (empty($wikidata_data = self::searchWikidataForString($searchTerm))) {
|
||
return '<p class="icons iconsAlert alert"><b>' . ucfirst($searchTerm) . '</b> not found in Wikidata</p>';
|
||
}
|
||
|
||
$output = '
|
||
<main id="wikidataResultsList">';
|
||
foreach ($wikidata_data as $result) {
|
||
$output .= self::generateWikidataResultsListEntry($link, $searchTerm, $lang, $result);
|
||
}
|
||
$output .= '
|
||
</main>';
|
||
|
||
return $output;
|
||
|
||
}
|
||
|
||
/**
|
||
* Attempts to parse birth or death years from the data returned by wikidata.
|
||
*
|
||
* @param string $inputTime Input time in the format delivered by wikidata.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function wikidataBirthDeathToYear(string $inputTime):string {
|
||
|
||
$birth_date_int = strtotime(substr($inputTime, 1, 4));
|
||
if ($birth_date_int) {
|
||
$birth_date = date("Y", $birth_date_int);
|
||
if ($birth_date === date("Y") and ($tTime = strtotime($inputTime)) !== false) {
|
||
$birth_date = date("Y", $tTime);
|
||
}
|
||
|
||
return $birth_date;
|
||
}
|
||
|
||
return '';
|
||
|
||
}
|
||
|
||
/**
|
||
* Function for generating a wikidata results list for actors, keeping track of life dates.
|
||
*
|
||
* @param string $link Links.
|
||
* @param string $searchTerm Search term.
|
||
* @param string $lang Language.
|
||
* @param integer $yearOfBirth Year of birth.
|
||
* @param integer $yearOfDeath Year of death.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function generateWikidataResultsListForActors(string $link, string $searchTerm, string $lang, int $yearOfBirth, int $yearOfDeath):string {
|
||
|
||
if (empty($wikidata_data = self::searchWikidataForString($searchTerm))) {
|
||
return '<p class="icons iconsAlert alert"><b>' . ucfirst($searchTerm) . '</b> not found in Wikidata</p>';
|
||
}
|
||
|
||
$qLinksToCheck = [];
|
||
foreach ($wikidata_data as $entry) {
|
||
$qLinksToCheck[$entry['id']] = "https://www.wikidata.org/wiki/Special:EntityData/" . $entry['id'] . ".json";
|
||
}
|
||
$fetched = MD_STD::runCurlMulti($qLinksToCheck, 10000);
|
||
|
||
$yearsOfBirthList = $yearsOfDeathList = [];
|
||
foreach ($fetched as $qId => $data) {
|
||
if (!($jsonData = json_decode($data, true))) {
|
||
continue;
|
||
}
|
||
if (empty($jsonData['entities'][$qId])) {
|
||
continue;
|
||
}
|
||
$data = $jsonData['entities'][$qId];
|
||
|
||
if (!empty($data['claims']['P569']) and !empty($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time'])) {
|
||
$yearsOfBirthList[$qId] = (int)self::wikidataBirthDeathToYear($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time']);
|
||
}
|
||
|
||
if (!empty($data['claims']['P570']) and !empty($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time'])) {
|
||
$yearsOfDeathList[$qId] = (int)self::wikidataBirthDeathToYear($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time']);
|
||
}
|
||
}
|
||
|
||
$output = '
|
||
<main id="wikidataResultsList">';
|
||
foreach ($wikidata_data as $result) {
|
||
if (empty($result['id'])) continue;
|
||
|
||
if (!empty($yearsOfBirthList[$result['id']])) {
|
||
if (empty($result['description'])) {
|
||
$result['description'] = 'Born: ' . $yearsOfBirthList[$result['id']];
|
||
}
|
||
else $result['description'] .= '<br/>Born: ' . $yearsOfBirthList[$result['id']];
|
||
}
|
||
|
||
if (!empty($yearsOfDeathList[$result['id']])) {
|
||
if (empty($result['description'])) {
|
||
$result['description'] = 'Death: ' . $yearsOfDeathList[$result['id']];
|
||
}
|
||
else $result['description'] .= '<br/>Death: ' . $yearsOfDeathList[$result['id']];
|
||
}
|
||
|
||
if (!empty($yearsOfBirthList[$result['id']]) && !empty($yearsOfDeathList[$result['id']])) {
|
||
if ($yearsOfBirthList[$result['id']] === $yearOfBirth
|
||
&& $yearsOfDeathList[$result['id']] === $yearOfDeath
|
||
) {
|
||
$result['description'] .= '<br/><span class="buttonLike">Suggestion!</span>';
|
||
}
|
||
}
|
||
|
||
$output .= self::generateWikidataResultsListEntry($link, $searchTerm, $lang, $result);
|
||
}
|
||
$output .= '
|
||
</main>';
|
||
|
||
return $output;
|
||
|
||
}
|
||
|
||
/**
|
||
* Function generates HTML head for wikidata fetchers.
|
||
*
|
||
* @param string $lang User language.
|
||
* @param boolean $implyEnd If set to true, the end string will be echoed at the end of the script execution.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function generateHTMLHeadForWikidataFetcher(string $lang, bool $implyEnd = true):string {
|
||
$output = "<!DOCTYPE html><html class=\"getWikidata\" lang=\"{$lang}\">
|
||
<head>
|
||
<title>Get Wikidata</title>
|
||
<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />
|
||
<link rel=\"manifest\" href=\"../manifest.webmanifest\" />
|
||
<meta name=\"theme-color\" content=\"#0b1728\" />
|
||
<link rel=\"shortcut icon\" sizes=\"16x16 32x32\" href=\"../img/mdlogo-nodac.svg.png\" />
|
||
<link rel=\"apple-touch-icon\" sizes=\"256x256\" href=\"../img/mdterm-256px.png\" />
|
||
<script type=\"text/javascript\" src=\"../js/wikidataGetter.min.js\" async></script>
|
||
<meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\"/>";
|
||
if (defined("MAIN_CSS_FILE")) {
|
||
$output .= "<link rel=\"stylesheet\" type=\"text/css\" href=\"" . htmlspecialchars(MAIN_CSS_FILE) . "\">";
|
||
}
|
||
$output .= "
|
||
<meta name=\"description\" content=\"Fetch information from Wikidata.\" />
|
||
</head>
|
||
<body>";
|
||
|
||
if ($implyEnd === true) {
|
||
register_shutdown_function(function() :void {
|
||
echo printHTMLEnd();
|
||
});
|
||
}
|
||
|
||
return MD_STD::minimizeHTMLString($output);
|
||
|
||
}
|
||
|
||
/**
|
||
* Function generate header for wikidata fetcher pages.
|
||
*
|
||
* @param MDTlLoader $tlLoader Translation variable.
|
||
* @param string $additional Additional info.
|
||
* @param string $searchTerm Search term.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function generateWikidataFetcherHeader(MDTlLoader $tlLoader, string $additional = "", string $searchTerm = ""):string {
|
||
|
||
if (empty($searchTerm) and !empty($_GET['suchbegriff'])) {
|
||
$searchTerm = (string)$_GET['suchbegriff'];
|
||
}
|
||
|
||
$output = '
|
||
<header>
|
||
<h1><img src="../img/wikidata.png" alt="Logo: Wikidata" />' . $tlLoader->tl("wiki", "wiki", "fetch_from_wikidata");
|
||
$output .= ': ' . htmlspecialchars($searchTerm);
|
||
$output .= '</h1>';
|
||
$output .= $additional;
|
||
$output .= '</header>';
|
||
return $output;
|
||
|
||
}
|
||
|
||
/**
|
||
* Constructor.
|
||
*
|
||
* @param MDMysqli $mysqli_noda DB connection.
|
||
*
|
||
* @return void
|
||
*/
|
||
public function __construct(MDMysqli $mysqli_noda) {
|
||
|
||
$this->_mysqli_noda = $mysqli_noda;
|
||
|
||
}
|
||
}
|