Add function for reading Wikidata ID from a Wikipedia page

This commit is contained in:
Joshua Ramon Enslin 2021-03-18 01:23:45 +01:00
parent 1fd87c7e6d
commit c964053c91
Signed by: jrenslin
GPG Key ID: 46016F84501B70AE

View File

@ -10,7 +10,7 @@ declare(strict_types = 1);
/** /**
* Helps fetching information from Wikidata. * Helps fetching information from Wikidata.
*/ */
class NodaWikidataFetcher { final class NodaWikidataFetcher {
const LANGUAGES_MAIN_DESC = ['de', 'da', 'en', 'es', 'fr', 'hu', 'it', 'jp', 'nl', 'pt', 'ru', 'sv', 'zh']; const LANGUAGES_MAIN_DESC = ['de', 'da', 'en', 'es', 'fr', 'hu', 'it', 'jp', 'nl', 'pt', 'ru', 'sv', 'zh'];
const LANGUAGES_TO_CHECK = ['ar', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sw', 'ta', 'th', 'tl', 'tr', 'ur', 'vi', 'zh']; const LANGUAGES_TO_CHECK = ['ar', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sw', 'ta', 'th', 'tl', 'tr', 'ur', 'vi', 'zh'];
@ -42,7 +42,6 @@ class NodaWikidataFetcher {
'lcsh' => 'P244', 'lcsh' => 'P244',
'aat' => 'P1014', 'aat' => 'P1014',
'iconclass' => 'P1256', 'iconclass' => 'P1256',
'rkd' => 'P650',
'osm' => 'P402', 'osm' => 'P402',
'loc' => 'P244', 'loc' => 'P244',
'nomisma' => 'P2950', 'nomisma' => 'P2950',
@ -81,6 +80,52 @@ class NodaWikidataFetcher {
/** @var MDMysqli */ /** @var MDMysqli */
private MDMysqli $_mysqli_noda; private MDMysqli $_mysqli_noda;
/**
* Attempts to fetch a Wikidata ID from a provided URL.
*
* @param string $linkUrl Link to a Wikipedia page.
*
* @return string
*/
public static function getWikidataIdFromWikipedia(string $linkUrl):string {
if (!filter_var($linkUrl, FILTER_VALIDATE_URL)) {
throw new MDExpectedException("Invalid URL");
}
$wikipedia_cont = MD_STD::runCurl($linkUrl);
$doc = new DOMDocument();
if (!($doc->loadHTML($wikipedia_cont))) {
return '';
}
if (!($wikidataLinkLi = $doc->getElementById("t-wikibase"))) {
return '';
}
if (!($wikidataLink = $wikidataLinkLi->firstChild)) {
return '';
}
if (!($t_wikibase_href = $wikidataLink->getAttribute('href'))) {
return '';
}
$t_wikibase = (string)$t_wikibase_href;
if (!empty($t_wikibase)) {
if (($wikidata_id_end = strrpos($t_wikibase, '/')) !== false) {
$wikidata_id = trim(substr($t_wikibase, $wikidata_id_end + 1), '/');
if (substr($wikidata_id, 0, 1) === 'Q') {
return $wikidata_id;
}
}
}
return '';
}
/** /**
* Gets translation source Wikipedia pages from Wikidata. * Gets translation source Wikipedia pages from Wikidata.
* *
@ -1162,9 +1207,9 @@ class NodaWikidataFetcher {
/** /**
* Writes relations to norm data sources to DB. * Writes relations to norm data sources to DB.
* *
* @param array<array<string>> $nodaLinks Links to other noda sources. * @param array<string> $nodaLinks Links to other noda sources.
* @param integer $tag_id Tag ID. * @param integer $tag_id Tag ID.
* @param string $erfasst_von Name of the user to edit this. * @param string $erfasst_von Name of the user to edit this.
* *
* @return void * @return void
*/ */