MDNodaHelpers/src/NodaWikidataFetcher.php

1963 lines
70 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?PHP
/**
* This file contains tools for fetching data from Wikidata.
*
* @file
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
declare(strict_types = 1);
/**
* Helps fetching information from Wikidata.
*/
final class NodaWikidataFetcher {
private const WIKIDATA_FETCH_HEADERS = [
'User-Agent: museum-digital-bot GND-to-Wikidata PHP/' . PHP_VERSION,
'Accept: application/sparql-results+json',
];
public const LANGUAGES_MAIN_DESC = ['de', 'da', 'en', 'es', 'fr', 'hu', 'it', 'jp', 'nl', 'pt', 'ru', 'sv', 'sk', 'uk', 'zh'];
public const LANGUAGES_TO_CHECK = ['ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sk', 'sw', 'ta', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'zh'];
public const LANGUAGES_TO_CAPITALIZE = ["cs", "da", "de", "en", "es", "fr", "fi", "id", "it", "nl", "pl", "pt", "ru", "sv", 'sk', "tl", "tr"];
public const P_IDS_NODA_TAGS = [
'gnd' => 'P227',
'lcsh' => 'P244',
'aat' => 'P1014',
'iconclass' => 'P1256',
'osm' => 'P402',
'loc' => 'P244',
'nomisma' => 'P2950',
'cona' => 'P1669',
"rkd" => "P650",
"ulan" => "P245",
"viaf" => "P214",
"bnf" => "P268",
"pim" => "P3973",
"ndl" => "P349", // National Diet Library (Japan)
"npg" => "P1816", // "National" portrait gallery
"bne" => "P950", // Espana National Library
"orcid" => "P496",
];
private const WIKIPEDIA_REMOVE_LITERALS = [
"<p>Si vous disposez d'ouvrages ou d'articles de référence ou si vous ",
'<p><b>En pratique&#160;:</b> <a href="/wiki/Wikip%C3%A9dia:Citez_vos_sources#Qualité_des_sources" title="Wikipédia:Citez vos sources">Quelles sources sont attendu',
'<pVous pouvez partager vos connaissances en laméliorant (',
'<p class="mw-empty-elt">',
'<p><small>Géolocalisation sur la carte',
'<p><b>Koordinaatit:</b>',
'<p><span class="executeJS" data-gadgetname="ImgToggle"></span',
'<p><span class="imgtoggleboxTitle">',
//'<div class="mw-parser-output"><p>',
'<p><span style="font-size: small;"><span id="coordinates">',
'<p><span></span></p>',
'<p><a rel="nofollow" class="external text" href="https://maps.gs',
'<p><span class="plainlinks nourlexpansion"><a class="external text" href="//tools.wmflabs.org/geohack/geohack.php?langu',
'<p><span style="display:none">',
'<p>&#32;</p>',
'<p><span class="geo noexcerpt"',
];
public const RETRIEVAL_MODES_ACCEPTED = [
'list',
'add',
'keep',
'replace',
];
public const RETRIEVAL_MODES_DEFAULT = 'list';
/** @var 'list'|'add'|'keep'|'replace' */
private string $_retrievalMode = self::RETRIEVAL_MODES_DEFAULT;
/** @var MDMysqli */
private MDMysqli $_mysqli_noda;
/**
* Returns the API link to Wikipedia's API for getting information on a page.
*
* @param string $lang Language / wikipedia version to fetch.
* @param string $searchTerm Search term.
*
* @return non-empty-string
*/
private static function _getWikipediaApiLink(string $lang, string $searchTerm):string {
return "https://" . urlencode($lang) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($searchTerm) . "&prop=text&section=0&format=json";
}
/**
* Returns Wikipedia links from Wikidata's API output.
*
* @param array<mixed> $data Wikidata API output.
*
* @return array<string, array{url: string, title: string}>
*/
private static function _getWikipediaLinksFromWikidataOutput(array $data):array {
$wikilinks = [];
foreach (self::LANGUAGES_MAIN_DESC as $tLang) {
if (!isset($data['sitelinks'][$tLang . 'wiki']['url']) || !isset($data['sitelinks'][$tLang . 'wiki']['title'])) continue;
if (!is_string($data['sitelinks'][$tLang . 'wiki']['url']) || !is_string($data['sitelinks'][$tLang . 'wiki']['title'])) continue;
$wikilinks[$tLang] = [
'url' => $data['sitelinks'][$tLang . 'wiki']['url'],
'title' => str_replace(' ', '_', $data['sitelinks'][$tLang . 'wiki']['title']),
];
}
return $wikilinks;
}
/**
* Parses coordinates from Wikidata API output.
*
* @param array<mixed> $data Wikidata API output.
*
* @return array{}|array{longitude: float, latitude: float}
*/
private static function _getPlaceCoordinatesFromWikidata(array $data):array {
if (!isset($data['claims']['P625'])) {
return [];
}
$latitude_wd = \filter_var($data['claims']['P625'][0]['mainsnak']['datavalue']['value']['latitude'], FILTER_VALIDATE_FLOAT);
$longitude_wd = \filter_var($data['claims']['P625'][0]['mainsnak']['datavalue']['value']['longitude'], FILTER_VALIDATE_FLOAT);
if ($latitude_wd === false || $longitude_wd === false) return [];
return [
'longitude' => $longitude_wd,
'latitude' => $latitude_wd,
];
}
/**
* Loads Wikipedia page and cleans output.
*
* @param string $lang Language to load in.
* @param string $title Title to load from.
*
* @return string
*/
private static function _getCleanedWikipediaSnippet(string $lang, string $title):string {
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $title), 10000);
$json_decoded = json_decode($datafromwiki, true);
if (empty($json_decoded) || !isset($json_decoded['parse'])) {
return '';
}
$datafromwiki = strval($json_decoded['parse']['text']['*']);
return self::_cleanWikidataInput($datafromwiki);
}
/**
* Loads data for a single entity from Wikidata.
*
* @param string $wikidata_id Wikidata Q-ID.
*
* @return array<mixed>
*/
private static function _getWikidataEntity(string $wikidata_id):array {
$data = json_decode(MD_STD::runCurl("https://www.wikidata.org/wiki/Special:EntityData/" . urlencode($wikidata_id) . ".json", 10000), true);
if ($data === null) {
throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later.");
}
if (empty($data['entities'][$wikidata_id])) {
throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later.");
}
return $data['entities'][$wikidata_id];
}
/**
* Parses wikidata results to MDNodaLink entries.
*
* @param 'tag'|'persinst'|'place' $target Target vocabulary type.
* @param string $wikidata_id Wikidata ID.
* @param array<mixed> $data Wikidata result.
*
* @return list<MDNodaLink>
*/
public function _getNodaLinksFromWikidataResult(string $target, string $wikidata_id, array $data):array {
$linkableVocabularies = match($target) {
'tag' => MDNodaRepositoriesSet::REPOSITORIES_TAG,
'persinst' => MDNodaRepositoriesSet::REPOSITORIES_ACTOR,
'place' => MDNodaRepositoriesSet::REPOSITORIES_PLACE,
};
$output = [
new MDNodaLink(MDNodaRepository::wikidata, $wikidata_id)
];
foreach (self::P_IDS_NODA_TAGS as $vocabName => $pId) {
if (!isset($data['claims'][$pId])) {
continue;
}
if (empty($data['claims'][$pId][0]['mainsnak']['datavalue'])) {
continue;
}
$url = $data['claims'][$pId][0]['mainsnak']['datavalue']['value'];
if ($vocabName === 'loc' || ($vocabName === 'lcsh')) {
$vocabName = $this->_determineLocRefMode($url);
if (empty($vocabName)) continue;
}
if (!in_array($vocabName, $linkableVocabularies, true)) continue;
$output[] = new MDNodaLink(MDNodaRepository::fromString($vocabName), $url);
}
return $output;
}
/**
* Checks if a vocabulary link to the library
* of congress is a valid LCSH or LOC link or
* something else completely.
* This is necessary, since Wikidata only knows
* one type of link to the LOC authority files,
* while museum-digital knows two.
*
* @param string $url LOC ID to check.
*
* @return 'loc'|'lcsh'|''
*/
private function _determineLocRefMode(string $url):string {
try {
if (MDNodaRepository::loc->validateId($url) !== false) {
return 'loc';
}
}
catch (MDgenericInvalidInputsException | MDInvalidNodaLinkException | MDInvalidNodaLink $e) {
}
try {
if (MDNodaRepository::lcsh->validateId($url) !== false) {
return 'lcsh';
}
}
catch (MDgenericInvalidInputsException | MDInvalidNodaLinkException | MDInvalidNodaLink $e) {
}
return '';
}
/**
* Cleans basic tags off Wikidata input.
*
* @param string $input Input string.
*
* @return string
*/
private static function _cleanWikidataInputHtml(string $input):string {
// Clean off anything before first <p>
if ($pStartPos = strpos($input, '<p')) {
$input = substr($input, $pStartPos);
}
if ($pEndPos = strrpos($input, '</p>')) {
$input = substr($input, 0, $pEndPos + 4);
}
$doc = new DOMDocument();
try {
$doc->loadXML('<section>' . trim($input) . '</section>');
}
catch (Exception $e) {
throw new Exception("Failed to load DOMDocument." . PHP_EOL . $e->getMessage() . PHP_EOL . PHP_EOL . '---' . $input . '---');
}
$list = $doc->getElementsByTagName("style");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
$list = $doc->getElementsByTagName("table");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
$list = $doc->getElementsByTagName("ol");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
if (($firstP = $doc->getElementsByTagName("p")->item(0)) !== null) {
if (($firstPhtml = $doc->saveHTML($firstP)) !== false) {
if (strpos($firstPhtml, 'geohack') !== false) {
if ($firstP->parentNode !== null) $firstP->parentNode->removeChild($firstP);
}
}
}
$output = [];
foreach ($doc->getElementsByTagName("p") as $p) {
$output[] = trim($p->textContent);
}
/*
if (strpos($doc->saveHTML(), 'Coordinates:') !== false) {
echo $doc->saveHTML();
exit;
}
*/
return str_replace(PHP_EOL, PHP_EOL . PHP_EOL, trim(implode(PHP_EOL, $output)));
}
/**
* Cleans brackets ([1], [2]) off description text.
*
* @param string $input Input string.
*
* @return string
*/
private static function _cleanSourceBracketsOffTranslation(string $input):string {
$bracketsToRemove = [];
for ($i = 0; $i < 100; $i++) {
$bracketsToRemove["[$i]"] = "";
}
return strtr($input, $bracketsToRemove);
}
/**
* Cleans contents parsed from Wikipedia.
*
* @param string $input Input string.
*
* @return string
*/
private static function _cleanWikidataInput(string $input):string {
$input = trim($input, '"');
foreach (self::WIKIPEDIA_REMOVE_LITERALS as $tToRemove) $input = str_replace($tToRemove, "", $input);
if (substr($input, 0, strlen('<')) === '<') {
$input = self::_cleanWikidataInputHtml($input);
if (mb_strlen($input) > 600) {
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
$input = substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600));
}
}
$input = self::_cleanSourceBracketsOffTranslation($input);
$input = str_replace("\t", " ", $input);
// Remove newlines with ensuing spaces
while (strpos($input, PHP_EOL . " ") !== false) {
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
}
// Remove double newlines
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
}
return MD_STD_IN::sanitize_text($input);
}
$input = str_replace(PHP_EOL, '', $input);
if (empty($input)) return "";
// Remove infobox tables specifically
$firstParagraphPosition = strpos($input, '<p', 1);
$currentSearchPos = strpos($input, "<table>");
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
if (($tableEndPos = strpos($input, "</table>")) !== false) {
if (($pStartPos = strpos($input, '<p', $tableEndPos + 6)) !== false) {
$input = substr($input, $pStartPos);
}
}
}
// Remove leftover unnecessary paragraphs before actual content
$removeFirstParagraph = false;
$firstParagraphPosition = strpos($input, '<p', 1);
foreach (["</table>", "<img"] as $tagPart) {
$currentSearchPos = strpos($input, $tagPart);
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
$removeFirstParagraph = true;
break;
}
}
if ($removeFirstParagraph === true) {
$input = substr($input, $firstParagraphPosition ?: 0);
}
$input = str_replace('</p>', '</p>' . PHP_EOL . PHP_EOL . PHP_EOL, $input);
# $input = str_replace('?/i', '', $input);
$input = strip_tags($input);
# for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input);
$i = 0;
while (strpos($input, ".mw-parser-output") !== false and strpos($input, "}", strpos($input, ".mw-parser-output")) !== false) {
$part1 = substr($input, 0, strpos($input, ".mw-parser-output"));
$part2 = substr($input, strpos($input, "}", strpos($input, ".mw-parser-output")) + 1);
$input = $part1 . $part2;
++$i;
if ($i === 30) break;
}
$input = self::_cleanSourceBracketsOffTranslation($input);
$input = str_replace("\t", " ", $input);
// Remove double whitespaces
while (strpos($input, " ") !== false) {
$input = str_replace(" ", " ", $input);
}
// Remove newlines with ensuing spaces
while (strpos($input, PHP_EOL . " ") !== false) {
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
}
// Remove double newlines
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
}
$stableToRemove = [
"Vous pouvez partager vos connaissances en laméliorant (comment ?) selon les recommandations des projets correspondants.",
];
foreach ($stableToRemove as $tToRemove) $input = str_replace($tToRemove, "", $input);
$endings = [
"StubDenne artikel om et vandløb ",
];
foreach ($endings as $ending) {
if (strpos($input, $ending) !== false) $input = substr($input, 0, strpos($input, $ending));
}
$input = trim($input);
// Cut off overly long articles
if (mb_strlen($input) > 600) {
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
$input = trim(substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600)));
}
}
if (empty($input)) return '';
$input = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $input));
$input = html_entity_decode($input);
return MD_STD_IN::sanitize_text($input);
}
/**
* Wrapper around _cleanWikidataInput for testing.
*
* @param string $input Input string.
*
* @return string
*/
public static function cleanWikidataInput(string $input):string {
if (PHP_SAPI !== 'cli') throw new Exception("Use this function only for testing");
return self::_cleanWikidataInput($input);
}
/**
* Sets the retrieval mode.
*
* @param string $retrievalMode New retrieval mode to set.
*
* @return void
*/
public function setRetrievalMode(string $retrievalMode):void {
if (!in_array($retrievalMode, self::RETRIEVAL_MODES_ACCEPTED, true)) {
throw new Exception("Retrieval mode not in list of accepted retrieval modes: " . implode(',', self::RETRIEVAL_MODES_ACCEPTED));
}
$this->_retrievalMode = $retrievalMode;
}
/**
* Validates a Wikidata ID. A Wikidata ID must start with a capital Q and be
* numeric otherwise.
*
* @param string $wikidata_id Input ID to validate.
*
* @return void
*/
public static function validateWikidataId(string $wikidata_id):void {
if (substr($wikidata_id, 0, 1) !== 'Q') {
throw new MDgenericInvalidInputsException("Wikidata IDs start with Q");
}
if (!is_numeric(substr($wikidata_id, 1))) {
throw new MDgenericInvalidInputsException("Wikidata IDs are numeric following the Q");
}
}
/**
* Attempts to fetch a Wikidata ID from a provided URL.
*
* @param non-empty-string $linkUrl Link to a page.
*
* @return string
*/
public static function getWikidataIdFromLink(string $linkUrl):string {
if (!filter_var($linkUrl, FILTER_VALIDATE_URL)) {
throw new MDExpectedException("Invalid URL");
}
if (strpos($linkUrl, "http://www.wikidata.org/entity/") !== false) {
if ($output = self::getWikidataIdFromWikidataLink($linkUrl)) {
return $output;
}
}
if (strpos($linkUrl, "https://www.wikidata.org/entity/") !== false) {
if ($output = self::getWikidataIdFromWikidataLink($linkUrl)) {
return $output;
}
}
if (strpos($linkUrl, "https://www.wikidata.org/wiki/") !== false) {
if ($output = self::getWikidataIdFromWikidataLink($linkUrl)) {
return $output;
}
}
if (strpos($linkUrl, ".wikipedia.org/") !== false) {
if ($output = self::getWikidataIdFromWikipedia($linkUrl)) {
return $output;
}
}
return '';
}
/**
* Attempts to fetch a Wikidata ID from a provided URL.
*
* @param string $linkUrl Link to a Wikidata page.
*
* @return string
*/
public static function getWikidataIdFromWikidataLink(string $linkUrl):string {
if (str_contains($linkUrl, "https://www.wikidata.org/wiki/") === false
&& str_contains($linkUrl, "https://www.wikidata.org/entity/") === false
&& str_contains($linkUrl, "http://www.wikidata.org/entity/") === false
) {
return '';
}
$linkUrl = trim($linkUrl, '/ ');
$parts = explode('/', $linkUrl);
return end($parts);
}
/**
* Attempts to fetch a Wikidata ID from a provided URL.
*
* @param non-empty-string $linkUrl Link to a Wikipedia page.
*
* @return string
*/
public static function getWikidataIdFromWikipedia(string $linkUrl):string {
if (strpos($linkUrl, ".wikipedia.org/") === false) {
return '';
}
if (empty($wikipedia_cont = MD_STD::runCurl($linkUrl))) return '';
libxml_use_internal_errors(true);
$doc = new DOMDocument();
if (!$doc->loadHTML($wikipedia_cont)) {
return '';
}
libxml_use_internal_errors(false);
if (!($wikidataLinkLi = $doc->getElementById("t-wikibase"))) {
return '';
}
if (!($wikidataLink = $wikidataLinkLi->firstChild)) {
return '';
}
if (!($t_wikibase_href = $wikidataLink->getAttribute('href'))) {
return '';
}
$t_wikibase = (string)$t_wikibase_href;
// if (!empty($t_wikibase)) {
if (($wikidata_id_end = strrpos($t_wikibase, '/')) !== false) {
$wikidata_id = trim(substr($t_wikibase, $wikidata_id_end + 1), '/');
if (substr($wikidata_id, 0, 1) === 'Q') {
return $wikidata_id;
}
}
// }
return '';
}
/**
* Runs a SPARQL query against the Wikidata SPARQL endpoint.
*
* @param string $sparqlQuery Query string.
*
* @return array<mixed>
*/
public static function sparqlQuery(string $sparqlQuery):array {
$url = 'https://query.wikidata.org/sparql?query=' . urlencode($sparqlQuery);
$result = MD_STD::runCurl($url, 100000000, self::WIKIDATA_FETCH_HEADERS);
return json_decode($result, true);
}
/**
* Formulates a SPARQL query string for fetching from Wikidata based on an external ID.
*
* @param string $repoName Name of the repository.
* @param string $externalId ID in the external repository.
* @param string $repoPId Optional P-ID of the external repository. Needed for
* Geonames and TGN, obsolete otherwise.
*
* @return string
*/
public static function formulateWikidataQueryByExtId(string $repoName, string $externalId, string $repoPId = ''):string {
if (empty($repoPId)) {
if (empty(NodaWikidataFetcher::P_IDS_NODA_TAGS[$repoName])) {
throw new MDmainEntityNotExistentException("Unknown external repository. The following repositories are known with their Wikidata ID: " . implode(', ', array_keys(NodaWikidataFetcher::P_IDS_NODA_TAGS)));
}
$repoPId = NodaWikidataFetcher::P_IDS_NODA_TAGS[$repoName];
}
return 'SELECT ?id ?idLabel WHERE {
?id wdt:' . $repoPId . ' "' . $externalId . '".
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
}
}';
}
/**
* Gets the Wikidata ID based on a result from Wikidata's SPARQL endpoint.
*
* @param array<mixed> $queryResult Query result.
*
* @return string
*/
public static function readWikidataIdFromSparqlResult(array $queryResult):string {
if (count($queryResult['results']['bindings']) !== 1) return '';
if (!empty($wikidataLink = $queryResult['results']['bindings'][0]['id']['value'])) {
if (($endSlashPos = strrpos($wikidataLink, '/')) !== false) {
return substr($wikidataLink, $endSlashPos + 1);
}
}
return '';
}
/**
* Queries Wikidata by an external repository's ID and returns the matching Q-ID
* if there is any.
*
* @param string $repoName Name of the repository.
* @param string $externalId ID in the external repository.
* @param string $repoPId Optional P-ID of the external repository. Needed for
* Geonames and TGN, obsolete otherwise.
*
* @return string
*/
public static function getWikidataIdByExternalId(string $repoName, string $externalId, string $repoPId = ''):string {
$sparqlQueryString = self::formulateWikidataQueryByExtId($repoName, $externalId, $repoPId = '');
$queryResult = self::sparqlQuery($sparqlQueryString);
return self::readWikidataIdFromSparqlResult($queryResult);
}
/**
* Gets translation source Wikipedia pages from Wikidata.
*
* @param array<string> $checkagainstLanguage The language to check against.
* @param array<mixed> $data Data fetched from Wikidata.
*
* @return array{0: array<string, non-empty-string>, 1: array<string, string>}
*/
public static function getWikidataWikipediaTranslationSources(array $checkagainstLanguage, array $data) {
$languagesToFetch = $wikilinks = [];
foreach ($checkagainstLanguage as $lang) {
if (empty($data['labels'][$lang])) {
continue;
}
if (!empty($data['sitelinks'][$lang . 'wiki'])) {
$wikilink = $data['sitelinks'][$lang . 'wiki']['url'];
$wikilinkterm = str_replace(' ', '_', $data['sitelinks'][$lang . 'wiki']['title']);
if (isset($wikilink)) {
$languagesToFetch[$lang] = self::_getWikipediaApiLink($lang, $wikilinkterm);
$wikilinks[$lang] = $wikilink;
}
}
}
return [$languagesToFetch, $wikilinks];
}
/**
* Loads translations from Wikipedia pages through wikidata and then merges
* them with Wikidata's own translations into a usable array.
*
* @param array<string> $checkagainstLanguage The language to check against.
* @param array<mixed> $data Data fetched from Wikidata.
*
* @return array<string, array{label: string, description: string, link: string}>
*/
public static function listTranslationsFromWikidataWikipedia(array $checkagainstLanguage, array $data):array {
list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data);
if (empty($languagesToFetch)) {
return [];
}
try {
$contents = MD_STD::runCurlMulti($languagesToFetch, 10000);
}
catch (TypeError $e) {
throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again.");
}
$output = [];
# $descs = [];
foreach ($checkagainstLanguage as $lang) {
if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki']) && !empty($wikilinks[$lang])) {
$wikilink = $wikilinks[$lang];
if (!empty($contents[$lang])) {
$descFromWiki = json_decode($contents[$lang], true)['parse']['text']['*'];
# Process data retrieved from wikipedia
if ($descFromWiki !== null) $tDescription = (string)$descFromWiki;
else $tDescription = "";
}
else {
$tDescription = "";
}
if ($tDescription !== '' && !empty($desc_cleaned = self::_cleanWikidataInput($tDescription))) {
# $descs[$lang] = $tDescription;
$output[$lang] = [
'label' => self::_cleanWikidataInput((string)$data['labels'][$lang]['value']),
'description' => '"' . $desc_cleaned . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')',
'link' => $wikilink,
];
}
// Fallback: Use Wikidata description
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
$output[$lang] = [
'label' => self::_cleanWikidataInput($data['labels'][$lang]['value']),
'description' => self::_cleanWikidataInput($data['descriptions'][$lang]['value']),
'link' => "",
];
}
}
// echo '<br><b style="color: cc0000;">Wikipedia Links fehlen</b>';
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
$output[$lang] = [
'label' => self::_cleanWikidataInput($data['labels'][$lang]['value']),
'description' => self::_cleanWikidataInput($data['descriptions'][$lang]['value']),
'link' => "",
];
}
}
return $output;
}
/**
* Function for fetching description from Wikipedia
*
* @param integer $persinst_id Person ID.
* @param string $wikidata_id Wikidata ID.
* @param string $datafromwiki Data fetched from Wikipedia.
* @param string $preflang The user's currently used language.
* @param string $lang Currently queried language.
* @param string $erfasst_von User who adds the info.
*
* @return boolean
*/
public function retrievePersinstDescFromWikipedia(int $persinst_id, string $wikidata_id, string $datafromwiki, string $preflang, string $lang, string $erfasst_von):bool {
$output = false;
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date('d.m.Y') . ')';
$cergebnis = $this->_mysqli_noda->query_by_stmt("SELECT `persinst_kurzinfo`, `persinst_anzeigename` AS `display_name`
FROM `persinst`
WHERE `persinst_id` = ?", "i", $persinst_id);
if (!($cinfo = $cergebnis->fetch_row())) {
throw new Exception("There is no actor of ID #" . $persinst_id);
}
$cergebnis->close();
$persinst_kurzinfo = $cinfo[0];
$display_name = $cinfo[1];
// Update persinst table
$updatePersinstStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst`
SET `persinst_kurzinfo` = ?
WHERE `persinst_id` = ?");
if (!empty($persinst_kurzinfo) and substr($persinst_kurzinfo, 0, 3) !== 'GND') {
switch ($this->_retrievalMode) {
case "add":
$newDesc = $persinst_kurzinfo . PHP_EOL . PHP_EOL . $datafromwiki;
$updatePersinstStmt->bind_param("si", $newDesc, $persinst_id);
$updatePersinstStmt->execute();
$output = true;
break;
case "keep":
break;
case "replace":
$updatePersinstStmt->bind_param("si", $datafromwiki, $persinst_id);
$updatePersinstStmt->execute();
$output = true;
break;
default:
$tlLoader = new MDTlLoader("wiki_getter_persinst", $preflang);
echo self::generateHTMLHeadForWikidataFetcher($lang);
echo self::generateWikidataFetcherHeader($tlLoader, "", $display_name);
echo '
<p class="alert icons iconsAlert">Es gibt schon einen Eintrag im Beschreibungsfeld</p>
<div class="wikiReplaceTTile">
<h3>Bisher vorhanden</h3>
<p>' . nl2br($persinst_kurzinfo) . '</p>
</div>
<div class="wikiReplaceTTile">
<h3>Jetzt gefunden</h3><p>' . $datafromwiki . '</p>
</div>
<a href="get_wikidata_for_persinst.php?wikidata_id=' . $wikidata_id . write_get_vars(['suchbegriff', 'lang', 'persinst_id']) . '&keep=keep" class="buttonLike icons iconsPin">Keep old entry</a>';
echo '<br><a href="get_wikidata_for_persinst.php?wikidata_id=' . $wikidata_id . write_get_vars(['suchbegriff', 'lang', 'persinst_id']) . '&keep=replace" class="buttonLike icons iconsPinOff">Replace with new entry</a>';
echo '<br><a href="get_wikidata_for_persinst.php?wikidata_id=' . $wikidata_id . write_get_vars(['suchbegriff', 'lang', 'persinst_id']) . '&keep=add" class="buttonLike icons iconsPlusOne">Keep old and add new entry</a><br><br><br>';
exit;
}
}
else {
$updatePersinstStmt->bind_param("si", $datafromwiki, $persinst_id);
$updatePersinstStmt->execute();
$output = true;
}
$updatePersinstStmt->close();
// Update edit metadata
$updatePersinstEditInfoStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst`
SET `persinst_erfasst_am` = NOW(),
`persinst_erfasst_von` = ?
WHERE `persinst_id` = ?");
$updatePersinstEditInfoStmt->bind_param("si", $erfasst_von, $persinst_id);
$updatePersinstEditInfoStmt->execute();
$updatePersinstEditInfoStmt->close();
return $output;
}
/**
* Function for updating birth and death times based on Wikidata information.
*
* @param array<mixed> $data Data loaded from Wikidata.
* @param integer $persinst_id Actor ID.
*
* @return void
*/
public function enterPersinstBirthDeathDatesFromWikidata(array $data, int $persinst_id):void {
$result = $this->_mysqli_noda->query_by_stmt("SELECT `persinst_geburtsjahr`,
`persinst_sterbejahr`, `persinst_gender`
FROM `persinst`
WHERE `persinst_id` = ?", "i", $persinst_id);
if (!($actor_dates = $result->fetch_assoc())) {
throw new MDmainEntityNotExistentException("Failed to fetch actor information");
}
$result->close();
if ($actor_dates['persinst_geburtsjahr'] === '') {
// Try to get birth date
if (!empty($data['claims']['P569'])
and !empty($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time'])
// Ignore entries with century / very inprecise birth dates
and (empty($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['precision']) || (int)$data['claims']['P569']['0']['mainsnak']['datavalue']['value']['precision'] !== 7)
) {
$birth_date = self::wikidataBirthDeathToYear($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time']);
}
if (!empty($birth_date)) {
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst`
SET `persinst_geburtsjahr` = ?
WHERE `persinst_id` = ?
LIMIT 1");
$updateStmt->bind_param("ii", $birth_date, $persinst_id);
$updateStmt->execute();
$updateStmt->close();
}
}
if ($actor_dates['persinst_sterbejahr'] === '') {
// Try to get birth date
if (!empty($data['claims']['P570']) and !empty($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time'])) {
$death_date = self::wikidataBirthDeathToYear($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time']);
}
if (!empty($death_date)) {
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst`
SET `persinst_sterbejahr` = ?
WHERE `persinst_id` = ?
LIMIT 1");
$updateStmt->bind_param("ii", $death_date, $persinst_id);
$updateStmt->execute();
$updateStmt->close();
}
}
if ($actor_dates['persinst_gender'] === '') {
// Try to get birth date
if (!empty($data['claims']['P21']) and !empty($data['claims']['P21']['0']['mainsnak']['datavalue']['value']['id'])) {
$wikidata_gender_id = $data['claims']['P21']['0']['mainsnak']['datavalue']['value']['id'];
switch ($wikidata_gender_id) {
case "Q6581097": // male
case "Q44148": // male organism
case "Q2449503": // transgender man
$wikidata_gender = "male";
break;
case "Q6581072":
case "Q1052281": // transgender female
case "Q43445": // female organism
$wikidata_gender = "female";
break;
case "Q48270":
$wikidata_gender = "other";
break;
default:
throw new Exception("Unknown gender: Q-ID is " . $wikidata_gender_id);
}
}
if (!empty($wikidata_gender)) {
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `persinst`
SET `persinst_gender` = ?
WHERE `persinst_id` = ?
LIMIT 1");
$updateStmt->bind_param("si", $wikidata_gender, $persinst_id);
$updateStmt->execute();
$updateStmt->close();
}
}
}
/**
* Function for retrieving information.
*
* @param string $lang The user's selected used language.
* @param string $wikidata_id Wikidata ID.
* @param integer $persinst_id Actor ID.
* @param string $erfasst_von User name who's currently editing.
*
* @return void
*/
public function retrievePersinstInfoFromWikidataID(string $lang, string $wikidata_id, int $persinst_id, string $erfasst_von) {
self::validateWikidataId($wikidata_id);
$data = self::_getWikidataEntity($wikidata_id);
// Get links to wikipedia
$wikilinks = self::_getWikipediaLinksFromWikidataOutput($data);
$alreadyEntered = false;
if (isset($wikilinks[$lang])) {
# Process data retrieved from wikipedia
if (!empty($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$lang]['title']))) {
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $lang, $lang, $erfasst_von);
}
}
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
if ($alreadyEntered === true || !isset($wikilinks[$cur_lang])) continue;
if ($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$cur_lang]['title'])) {
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $lang, "$cur_lang", $erfasst_von);
}
}
$this->enterPersinstBirthDeathDatesFromWikidata($data, $persinst_id);
// Get links to other norm data sources
if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('persinst', $wikidata_id, $data))) {
NodaBatchInserter::linkNodaForPersinst($this->_mysqli_noda, $persinst_id, $nodaLinks, $erfasst_von);
}
$this->getWikidataTranslationsForPersinst($data, $persinst_id);
NodaLogEdit::logPersinstEdit($this->_mysqli_noda, $persinst_id, "wikidata-fetcher", $erfasst_von, 'update', 'synchronize');
}
/**
* Function for retrieving information.
*
* @param string $wikidata_id Wikidata ID.
* @param integer $persinst_id Actor ID.
* @param string $erfasst_von User name who's currently editing.
*
* @return void
*/
public function retrievePersinstNormDataLinksFromWikidataID(string $wikidata_id, int $persinst_id, string $erfasst_von) {
self::validateWikidataId($wikidata_id);
$data = self::_getWikidataEntity($wikidata_id);
if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('persinst', $wikidata_id, $data))) {
NodaBatchInserter::linkNodaForPersinst($this->_mysqli_noda, $persinst_id, $nodaLinks, $erfasst_von);
}
}
/**
* Retrieves only norm data links from Wikidata.
*
* @param string $wikidata_id Wikidata Q-ID.
* @param integer $onum Place ID.
* @param string $erfasst_von User name of the current user.
*
* @return void
*/
public function retrievePlaceNormDataLinksFromWikidataID(string $wikidata_id, int $onum, string $erfasst_von) {
self::validateWikidataId($wikidata_id);
$data = self::_getWikidataEntity($wikidata_id);
if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('place', $wikidata_id, $data))) {
NodaBatchInserter::linkNodaForPlace($this->_mysqli_noda, $onum, $nodaLinks, $erfasst_von);
}
}
/**
* Function for fetching translations from Wikipedia, based on Wikidata information.
*
* @param array<mixed> $data Entity fetched from wikidata.
* @param integer $persinst_id Actor ID.
* @param string[] $checkForLangs Languages to check for. Defaults to all
* languages generally loaded by the wikidata fetcher.
*
* @return void
*/
public function getWikidataTranslationsForPersinst(array $data, int $persinst_id, array $checkForLangs = self::LANGUAGES_TO_CHECK):void {
if (empty($translations = self::listTranslationsFromWikidataWikipedia($checkForLangs, $data))) {
return;
}
$toInsert = [];
foreach ($translations as $lang => $values) {
$toInsert[] = [
'persinst_id' => $persinst_id,
'lang' => $lang,
'name' => $values['label'],
'description' => $values['description'],
'link' => $values['link'],
];
}
NodaBatchInserter::insertPersinstTranslations($this->_mysqli_noda, $toInsert);
}
/**
* Returns the current description of a place.
*
* @param integer $onum Place ID.
*
* @return string
*/
private function getPlaceDescription(int $onum):string {
$currentPlaceResult = $this->_mysqli_noda->query_by_stmt("SELECT `ort_anmerkung`
FROM `orte`
WHERE `ort_id` = ?", "i", $onum);
if (!($curPlaceInfo = $currentPlaceResult->fetch_row())) {
$currentPlaceResult->close();
throw new Exception("This place does not exist");
}
$currentPlaceResult->close();
return $curPlaceInfo[0];
}
/**
* Returns the current description of a tag.
*
* @param integer $tag_id Tag ID.
*
* @return string
*/
private function getTagDescription(int $tag_id):string {
$result = $this->_mysqli_noda->query_by_stmt("SELECT `tag_anmerkung`
FROM `tag`
WHERE `tag_id` = ?", "i", $tag_id);
if (!($cur = $result->fetch_row())) {
$result->close();
return '';
}
$result->close();
return $cur[0];
}
/**
* Function for entering base information about a place from wikidata.
*
* @param string $cur_place_desc Mysqli result pointing to the current place.
* @param string $datafromwiki Data parsed from wikidata.
* @param string $preflang Language of the user interface in general.
* @param string $lang Language of the main entry.
* @param integer $placeID ID of the place.
* @param string $erfasst_von User name.
*
* @return boolean
*/
public function enterPlaceDescFromWikidata(string $cur_place_desc, string $datafromwiki, string $preflang, string $lang, int $placeID, string $erfasst_von):bool {
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date("d.m.Y") . ')';
if (!empty(trim($cur_place_desc)) and substr($cur_place_desc, 0, 3) !== 'GND') {
switch ($this->_retrievalMode) {
case "add":
$datafromwiki = $cur_place_desc . PHP_EOL . PHP_EOL . $datafromwiki;
break;
case "keep":
$datafromwiki = $cur_place_desc;
break;
case "replace":
break;
default:
$tlLoader = new MDTlLoader("wiki_getter_place", $preflang);
echo self::generateHTMLHeadForWikidataFetcher($lang);
echo self::generateWikidataFetcherHeader($tlLoader);
echo '
<p class="alert icons iconsAlert">There is already an entry for description ...</p>
<div class="wikiReplaceTTile">
<h3>Actual entry</h3><p>' . nl2br($cur_place_desc) . '</p>
</div>
<div class="wikiReplaceTTile">
<h3>Now found</h3>
<p>' . $datafromwiki . '</p>
</div>
<a href="get_wikidata_for_ort.php?keep=keep' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'ort_id']) . '" class="buttonLike icons iconsPin">Keep old entry</a>
<br><a href="get_wikidata_for_ort.php?keep=replace' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'ort_id']) . '" class="buttonLike icons iconsPinOff">Replace with new entry</a>
<br><a href="get_wikidata_for_ort.php?keep=add' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'ort_id']) . '" class="buttonLike icons iconsPlusOne">Keep old and add new entry</a><br><br><br>
';
exit;
}
}
// Write description to DB
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte`
SET `ort_anmerkung` = ?,
`ort_erfasst_am` = NOW(),
`ort_erfasst_von` = ?
WHERE ort_id = ?");
try {
$updateStmt->bind_param("ssi", $datafromwiki, $erfasst_von, $placeID);
$updateStmt->execute();
}
catch (MDMysqliInvalidEncodingError $e) {
}
$updateStmt->close();
unset($updateStmt);
return true;
}
/**
* Function for retrieving a superordinate place relation from Wikidata information
* for places
*
* @param integer $onum Place ID.
* @param array<mixed> $data Wikidata information (P131 claim).
*
* @return void
*/
public function retrieveSuperordinateAdministrativePlace(int $onum, array $data):void {
if (!empty($data[0]["mainsnak"]["datavalue"]["value"]["id"])) {
// Check if there already is a superordinate of the current place
$result = $this->_mysqli_noda->query_by_stmt("SELECT 1
FROM `ort_relation`
WHERE `ort_menor_id` = ?
LIMIT 1", "i", $onum);
if ($result->num_rows !== 0) {
$result->close();
return;
}
$result->close();
// If there is no superordinate, check if the identified superordinate
// is known in the noda DB.
$superordinateId = $data[0]["mainsnak"]["datavalue"]["value"]["id"];
$result = $this->_mysqli_noda->query_by_stmt("SELECT `ort_id`
FROM `noda_orte`
WHERE `noda_source` = 'wikidata'
AND `noda_nrinsource` = ?", "s", $superordinateId);
if (!($superordinateData = $result->fetch_row())) {
$result->close();
return;
}
$result->close();
$topPlaceId = $superordinateData[0];
// Enter superordinate place by Wikidata
$insertStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `ort_relation`
(`ort_mayor_id`, `ort_menor_id`, `ort_relation`)
VALUES
(?, ?, 1)");
$insertStmt->bind_param("ii", $topPlaceId, $onum);
$insertStmt->execute();
$insertStmt->close();
}
}
/**
* Function for retrieving place information based on a Wikidata ID.
*
* @param string $lang Language.
* @param string $wikidata_id Wikidata Q-ID.
* @param integer $onum Place ID.
* @param string $erfasst_von User name of the current user.
*
* @return void
*/
public function retrievePlaceInfoFromWikidataID(string $lang, string $wikidata_id, int $onum, string $erfasst_von) {
self::validateWikidataId($wikidata_id);
$data = self::_getWikidataEntity($wikidata_id);
$wikilinks = self::_getWikipediaLinksFromWikidataOutput($data);
// Get current description for overwriting
// P131: Located in administrative unit
if (isset($data['claims']['P131'])) {
$this->retrieveSuperordinateAdministrativePlace($onum, $data['claims']['P131']);
}
$cur_place_desc = $this->getPlaceDescription($onum);
$alreadyEntered = false;
if (!empty($wikilinks[$lang])) {
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinks[$lang]['title']), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
$alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $lang, $lang, $onum, $erfasst_von);
}
}
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
//if ($alreadyEntered === true) break;
if ($alreadyEntered === true) break;
if (!isset($wikilinks[$cur_lang]['url'])) continue;
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinks[$cur_lang]['title']), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
$alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $lang, $cur_lang, $onum, $erfasst_von);
}
}
if (isset($data['claims']['P1566'])) $geonames_id = filter_var($data['claims']['P1566'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT);
if (isset($data['claims']['P1667'])) $tgn_id = filter_var($data['claims']['P1667'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT);
if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('place', $wikidata_id, $data))) {
NodaBatchInserter::linkNodaForPlace($this->_mysqli_noda, $onum, $nodaLinks, $erfasst_von);
}
$coordinates_wd = self::_getPlaceCoordinatesFromWikidata($data);
$this->_mysqli_noda->autocommit(false);
if (!empty($tgn_id)) {
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte`
SET `ort_land` = ?
WHERE `ort_id` = ?");
$updateStmt->bind_param("ii", $tgn_id, $onum);
$updateStmt->execute();
$updateStmt->close();
unset($updateStmt);
}
if (!empty($geonames_id)) {
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte`
SET `ort_geonames` = ?
WHERE `ort_id` = ?");
$updateStmt->bind_param("ii", $geonames_id, $onum);
$updateStmt->execute();
$updateStmt->close();
unset($updateStmt);
}
if (!empty($coordinates_wd)) {
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte`
SET `ort_nord_sued` = ?, `ort_west_ost` = ?
WHERE `ort_id` = ?");
$updateStmt->bind_param("ddi", $coordinates_wd['latitude'], $coordinates_wd['longitude'], $onum);
$updateStmt->execute();
$updateStmt->close();
unset($updateStmt);
}
$this->_mysqli_noda->commit();
$this->_mysqli_noda->autocommit(true);
$this->getWikidataTranslationsForPlace($data, $onum);
NodaLogEdit::logPlaceEdit($this->_mysqli_noda, $onum, "wikidata-fetcher", $erfasst_von, 'update', 'synchronize');
}
/**
* Function for fetching translations from wikidata.
*
* @param array<mixed> $data Entity data fetched from wikidata.
* @param integer $ort_id Place ID.
* @param string[] $checkForLangs Languages to check for. Defaults to all
* languages generally loaded by the wikidata fetcher.
*
* @return void
*/
public function getWikidataTranslationsForPlace(array $data, int $ort_id, array $checkForLangs = self::LANGUAGES_TO_CHECK):void {
if (empty($translations = self::listTranslationsFromWikidataWikipedia($checkForLangs, $data))) {
return;
}
$toInsert = [];
foreach ($translations as $lang => $values) {
$toInsert[] = [
'ort_id' => $ort_id,
'lang' => $lang,
'name' => $values['label'],
'description' => $values['description'],
'link' => $values['link'],
];
}
NodaBatchInserter::insertPlaceTranslations($this->_mysqli_noda, $toInsert);
}
/**
* Function for fetching description from Wikipedia
*
* @param integer $tag_id Tag ID.
* @param string $datafromwiki Data fetched from Wikipedia.
* @param string $preflang The user's currently used language.
* @param string $lang Currently queried language.
* @param string $erfasst_von User who adds the info.
*
* @return boolean
*/
public function retrieveTagDescFromWikipedia(int $tag_id, string $datafromwiki, string $preflang, string $lang, string $erfasst_von):bool {
$output = false;
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date("d.m.Y") . ')';
$datafromwiki = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $datafromwiki));
$tag_anmerkung = $this->getTagDescription($tag_id);
$this->_mysqli_noda->autocommit(false);
$updateTagDescStmt = $this->_mysqli_noda->do_prepare("UPDATE `tag`
SET `tag_anmerkung` = ?
WHERE `tag_id` = ?");
if (!empty($tag_anmerkung) and substr($tag_anmerkung, 0, 3) !== 'GND') {
switch ($this->_retrievalMode) {
case "add":
$newDesc = $tag_anmerkung . PHP_EOL . PHP_EOL . $datafromwiki;
$updateTagDescStmt->bind_param("si", $newDesc, $tag_id);
$updateTagDescStmt->execute();
$output = true;
break;
case "keep":
break;
case "replace":
$updateTagDescStmt->bind_param("si", $datafromwiki, $tag_id);
$updateTagDescStmt->execute();
$output = true;
break;
default:
$tlLoader = new MDTlLoader("wiki_getter_tag", $preflang);
echo self::generateHTMLHeadForWikidataFetcher($lang);
echo self::generateWikidataFetcherHeader($tlLoader);
echo '
<p class="alert icons iconsAlert">Es gibt schon einen Eintrag im Beschreibungsfeld</b>
<div class="wikiReplaceTTile">
<h3>Bisher vorhanden</h3><p>' . nl2br($tag_anmerkung) . '</p>
</div>
<div class="wikiReplaceTTile">
<h3>Jetzt gefunden</h3><p>' . $datafromwiki . '<p>
</div>
<a href="get_wikidata_for_tag.php?keep=keep' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'tag_id']) . '" class="buttonLike icons iconsPin">Keep old entry</a>';
echo '<br><a href="get_wikidata_for_tag.php?keep=replace' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'tag_id']) . '" class="buttonLike icons iconsPinOff">Replace with new entry</a>';
echo '<br><a href="get_wikidata_for_tag.php?keep=add' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'tag_id']) . '" class="buttonLike icons iconsPlusOne">Keep old and add new entry</a><br><br><br>';
exit;
}
}
else {
$updateTagDescStmt->bind_param("si", $datafromwiki, $tag_id);
$updateTagDescStmt->execute();
}
$updateTagDescStmt->close();
$this->_mysqli_noda->commit();
$this->_mysqli_noda->autocommit(true);
// Update tag editing metadata
$updateTagEditInfoStmt = $this->_mysqli_noda->do_prepare("UPDATE `tag`
SET `tag_erfasst_am` = NOW(),
`tag_erfasst_von` = ?
WHERE `tag_id` = ?");
$updateTagEditInfoStmt->bind_param("si", $erfasst_von, $tag_id);
$updateTagEditInfoStmt->execute();
$updateTagEditInfoStmt->close();
return true;
}
/**
* Function for retrieving information.
*
* @param string $lang The user's selected used language.
* @param string $wikidata_id Wikidata ID.
* @param integer $tag_id Tag ID.
* @param string $erfasst_von User name who's currently editing.
*
* @return void
*/
public function retrieveTagInfoFromWikidataID(string $lang, string $wikidata_id, int $tag_id, string $erfasst_von) {
self::validateWikidataId($wikidata_id);
$data = self::_getWikidataEntity($wikidata_id);
$wikilinks = self::_getWikipediaLinksFromWikidataOutput($data);
$alreadyEntered = false;
if (isset($wikilinks[$lang])) {
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinks[$lang]['title']), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
# Process data retrieved from wikipedia
if (!empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $lang, $lang, $erfasst_von);
}
}
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
if ($alreadyEntered === true || !isset($wikilinks[$cur_lang])) continue;
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinks[$cur_lang]['title']), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
# Process data retrieved from wikipedia
if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) {
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $lang, $cur_lang, $erfasst_von);
}
}
if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('tag', $wikidata_id, $data))) {
NodaBatchInserter::linkNodaForTag($this->_mysqli_noda, $tag_id, $nodaLinks, $erfasst_von);
}
// Get translations
if (!empty($data)) $this->getWikidataTranslationsForTag($data, $tag_id);
NodaLogEdit::logTagEdit($this->_mysqli_noda, $tag_id, "wikidata-fetcher", $erfasst_von, 'update', 'synchronize');
}
/**
* Function for fetching translations from wikidata.
*
* @param array<mixed> $data Entity data fetched from wikidata.
* @param integer $tag_id Tag ID.
* @param string[] $checkForLangs Languages to check for. Defaults to all
* languages generally loaded by the wikidata fetcher.
*
* @return void
*/
public function getWikidataTranslationsForTag(array $data, int $tag_id, array $checkForLangs = self::LANGUAGES_TO_CHECK):void {
if (empty($translations = self::listTranslationsFromWikidataWikipedia($checkForLangs, $data))) {
return;
}
$toInsert = [];
foreach ($translations as $lang => $values) {
if (in_array($lang, self::LANGUAGES_TO_CAPITALIZE, true)) {
$label = ucfirst($values['label']);
$description = ucfirst($values['description']);
}
else {
$label = $values['label'];
$description = $values['description'];
}
$toInsert[] = [
'tag_id' => $tag_id,
'lang' => $lang,
'name' => $label,
'description' => $description,
'link' => $values['link'],
];
}
NodaBatchInserter::insertTagTranslations($this->_mysqli_noda, $toInsert);
}
/**
* Searches Wikidata for a string.
*
* @param string $searchTerm Search string.
* @param string $lang Searched language. Defaults to German.
*
* @return array<mixed>
*/
public static function searchWikidataForString(string $searchTerm, string $lang = "de"):array {
$wikidata_data = MD_STD::runCurl("https://www.wikidata.org/w/api.php?action=wbsearchentities&format=json&search=" . urlencode($searchTerm) . "&language=" . urlencode($lang) . "&limit=20", 10000);
if (($wikidata_data = json_decode($wikidata_data, true)) === false) {
return [];
}
if (empty($wikidata_data['search'])) {
return [];
}
$output = [];
foreach ($wikidata_data['search'] as $result) {
if (empty($result['label'])
or (!empty($result['description']) and $result['description'] === 'Wikipedia disambiguation page')
or (!empty($result['description']) and $result['description'] === 'Wikimedia disambiguation page')
) continue;
$cur = [
'id' => $result['id'],
'label' => $result['label'],
'label_ext' => '',
'description' => '',
];
if (!empty($result['match'])) {
$cur['label_ext'] = "{$result['match']['language']}: {$result['match']['text']}";
}
if (!empty($result['description'])) {
$cur['description'] = $result['description'];
}
$output[] = $cur;
}
return $output;
}
/**
* Generates the HTML for an entry in the general wikidata search results list.
*
* @param string $link Links.
* @param string $searchTerm Search term.
* @param string $lang Language.
* @param array<mixed> $result Single result to display.
*
* @return string
*/
public static function generateWikidataResultsListEntry(string $link, string $searchTerm, string $lang, array $result):string {
if (empty($result['label']) or (isset($result['description']) and in_array($result['description'], ['Wikipedia disambiguation page', 'Wikimedia disambiguation page'], true))) {
return '';
}
$output = '<div><a href="' . $link . 'suchbegriff=' . htmlspecialchars($searchTerm) . '&wikidata_id=' . htmlspecialchars((string)$result['id']) . '&lang=' . htmlspecialchars($lang) . '">
<h4 class="icons iconsTag">' . $result['id'] . '</h4>';
$output .= '<p class="wikidataSummary">' . $result['label'];
if (!empty($result['label_ext'])) $output .= " (<span class='icons iconsTranslate'>{$result['label_ext']}</span>)";
$output .= '</p>';
if (!empty($result['description'])) $output .= '<p>' . $result['description'] . '</p>';
$output .= '</a><a class="icons iconsEye" target="_blank" href="https://www.wikidata.org/wiki/' . $result['id'] . '">Wikidata page</a></div>';
return $output;
}
/**
* Function for generating a wikidata results list.
*
* @param string $link Links.
* @param string $searchTerm Search term.
* @param string $lang Language.
*
* @return string
*/
public static function generateWikidataResultsList(string $link, string $searchTerm, string $lang):string {
if (empty($wikidata_data = self::searchWikidataForString($searchTerm))) {
return '<p class="icons iconsAlert alert"><b>' . ucfirst($searchTerm) . '</b> not found in Wikidata</p>';
}
$output = '
<main id="wikidataResultsList">';
foreach ($wikidata_data as $result) {
$output .= self::generateWikidataResultsListEntry($link, $searchTerm, $lang, $result);
}
$output .= '
</main>';
return $output;
}
/**
* Attempts to parse birth or death years from the data returned by wikidata.
*
* @param string $inputTime Input time in the format delivered by wikidata.
*
* @return string
*/
public static function wikidataBirthDeathToYear(string $inputTime):string {
$birth_date_int = strtotime(substr($inputTime, 1, 4));
if ($birth_date_int) {
$birth_date = date("Y", $birth_date_int);
if ($birth_date === date("Y") and ($tTime = strtotime($inputTime)) !== false) {
$birth_date = date("Y", $tTime);
}
return $birth_date;
}
return '';
}
/**
* Function for generating a wikidata results list for actors, keeping track of life dates.
*
* @param string $link Links.
* @param string $searchTerm Search term.
* @param string $lang Language.
* @param integer $yearOfBirth Year of birth.
* @param integer $yearOfDeath Year of death.
*
* @return string
*/
public static function generateWikidataResultsListForActors(string $link, string $searchTerm, string $lang, int $yearOfBirth, int $yearOfDeath):string {
if (empty($wikidata_data = self::searchWikidataForString($searchTerm))) {
return '<p class="icons iconsAlert alert"><b>' . ucfirst($searchTerm) . '</b> not found in Wikidata</p>';
}
$qLinksToCheck = [];
foreach ($wikidata_data as $entry) {
$qLinksToCheck[$entry['id']] = "https://www.wikidata.org/wiki/Special:EntityData/" . $entry['id'] . ".json";
}
$fetched = MD_STD::runCurlMulti($qLinksToCheck, 10000);
$yearsOfBirthList = $yearsOfDeathList = [];
foreach ($fetched as $qId => $data) {
if (!($jsonData = json_decode($data, true))) {
continue;
}
if (empty($jsonData['entities'][$qId])) {
continue;
}
$data = $jsonData['entities'][$qId];
if (!empty($data['claims']['P569']) and !empty($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time'])) {
$yearsOfBirthList[$qId] = (int)self::wikidataBirthDeathToYear($data['claims']['P569']['0']['mainsnak']['datavalue']['value']['time']);
}
if (!empty($data['claims']['P570']) and !empty($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time'])) {
$yearsOfDeathList[$qId] = (int)self::wikidataBirthDeathToYear($data['claims']['P570']['0']['mainsnak']['datavalue']['value']['time']);
}
}
$output = '
<main id="wikidataResultsList">';
foreach ($wikidata_data as $result) {
if (empty($result['id'])) continue;
if (!empty($yearsOfBirthList[$result['id']])) {
if (empty($result['description'])) {
$result['description'] = 'Born: ' . $yearsOfBirthList[$result['id']];
}
else $result['description'] .= '<br/>Born: ' . $yearsOfBirthList[$result['id']];
}
if (!empty($yearsOfDeathList[$result['id']])) {
if (empty($result['description'])) {
$result['description'] = 'Death: ' . $yearsOfDeathList[$result['id']];
}
else $result['description'] .= '<br/>Death: ' . $yearsOfDeathList[$result['id']];
}
if (!empty($yearsOfBirthList[$result['id']]) && !empty($yearsOfDeathList[$result['id']])) {
if ($yearsOfBirthList[$result['id']] === $yearOfBirth
&& $yearsOfDeathList[$result['id']] === $yearOfDeath
) {
$result['description'] .= '<br/><span class="buttonLike">Suggestion!</span>';
}
}
$output .= self::generateWikidataResultsListEntry($link, $searchTerm, $lang, $result);
}
$output .= '
</main>';
return $output;
}
/**
* Function generates HTML head for wikidata fetchers.
*
* @param string $lang User language.
* @param boolean $implyEnd If set to true, the end string will be echoed at the end of the script execution.
*
* @return string
*/
public static function generateHTMLHeadForWikidataFetcher(string $lang, bool $implyEnd = true):string {
$output = "<!DOCTYPE html><html class=\"getWikidata\" lang=\"{$lang}\">
<head>
<title>Get Wikidata</title>
<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />
<link rel=\"manifest\" href=\"../manifest.webmanifest\" />
<meta name=\"theme-color\" content=\"#0b1728\" />
<link rel=\"shortcut icon\" sizes=\"16x16 32x32\" href=\"../img/mdlogo-nodac.svg.png\" />
<link rel=\"apple-touch-icon\" sizes=\"256x256\" href=\"../img/mdterm-256px.png\" />
<script type=\"text/javascript\" src=\"../js/wikidataGetter.min.js\" async></script>
<meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\"/>";
if (defined("MAIN_CSS_FILE")) {
$output .= "<link rel=\"stylesheet\" type=\"text/css\" href=\"" . htmlspecialchars(MAIN_CSS_FILE) . "\">";
}
$output .= "
<meta name=\"description\" content=\"Fetch information from Wikidata.\" />
</head>
<body>";
if ($implyEnd === true) {
register_shutdown_function(function() :void {
echo printHTMLEnd();
});
}
return MD_STD::minimizeHTMLString($output);
}
/**
* Function generate header for wikidata fetcher pages.
*
* @param MDTlLoader $tlLoader Translation variable.
* @param string $additional Additional info.
* @param string $searchTerm Search term.
*
* @return string
*/
public static function generateWikidataFetcherHeader(MDTlLoader $tlLoader, string $additional = "", string $searchTerm = ""):string {
if (empty($searchTerm) and !empty($_GET['suchbegriff'])) {
$searchTerm = (string)$_GET['suchbegriff'];
}
$output = '
<header>
<h1><img src="../img/wikidata.png" alt="Logo: Wikidata" />' . $tlLoader->tl("wiki", "wiki", "fetch_from_wikidata");
$output .= ': ' . htmlspecialchars($searchTerm);
$output .= '</h1>';
$output .= $additional;
$output .= '</header>';
return $output;
}
/**
* Constructor.
*
* @param MDMysqli $mysqli_noda DB connection.
*
* @return void
*/
public function __construct(MDMysqli $mysqli_noda) {
$this->_mysqli_noda = $mysqli_noda;
}
}