From c964053c91e0a0cc8343c06bd4b2c2ee32955bac Mon Sep 17 00:00:00 2001 From: Joshua Ramon Enslin Date: Thu, 18 Mar 2021 01:23:45 +0100 Subject: [PATCH] Add function for reading Wikidata ID from a Wikipedia page --- src/NodaWikidataFetcher.php | 55 +++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/src/NodaWikidataFetcher.php b/src/NodaWikidataFetcher.php index 8c7f6c9..aa6e853 100644 --- a/src/NodaWikidataFetcher.php +++ b/src/NodaWikidataFetcher.php @@ -10,7 +10,7 @@ declare(strict_types = 1); /** * Helps fetching information from Wikidata. */ -class NodaWikidataFetcher { +final class NodaWikidataFetcher { const LANGUAGES_MAIN_DESC = ['de', 'da', 'en', 'es', 'fr', 'hu', 'it', 'jp', 'nl', 'pt', 'ru', 'sv', 'zh']; const LANGUAGES_TO_CHECK = ['ar', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sw', 'ta', 'th', 'tl', 'tr', 'ur', 'vi', 'zh']; @@ -42,7 +42,6 @@ class NodaWikidataFetcher { 'lcsh' => 'P244', 'aat' => 'P1014', 'iconclass' => 'P1256', - 'rkd' => 'P650', 'osm' => 'P402', 'loc' => 'P244', 'nomisma' => 'P2950', @@ -81,6 +80,52 @@ class NodaWikidataFetcher { /** @var MDMysqli */ private MDMysqli $_mysqli_noda; + /** + * Attempts to fetch a Wikidata ID from a provided URL. + * + * @param string $linkUrl Link to a Wikipedia page. + * + * @return string + */ + public static function getWikidataIdFromWikipedia(string $linkUrl):string { + + if (!filter_var($linkUrl, FILTER_VALIDATE_URL)) { + throw new MDExpectedException("Invalid URL"); + } + + $wikipedia_cont = MD_STD::runCurl($linkUrl); + + $doc = new DOMDocument(); + if (!($doc->loadHTML($wikipedia_cont))) { + return ''; + } + + if (!($wikidataLinkLi = $doc->getElementById("t-wikibase"))) { + return ''; + } + if (!($wikidataLink = $wikidataLinkLi->firstChild)) { + return ''; + } + + if (!($t_wikibase_href = $wikidataLink->getAttribute('href'))) { + return ''; + } + $t_wikibase = (string)$t_wikibase_href; + + if (!empty($t_wikibase)) { + if (($wikidata_id_end = strrpos($t_wikibase, '/')) !== false) { + $wikidata_id = trim(substr($t_wikibase, $wikidata_id_end + 1), '/'); + + if (substr($wikidata_id, 0, 1) === 'Q') { + return $wikidata_id; + } + } + } + + return ''; + + } + /** * Gets translation source Wikipedia pages from Wikidata. * @@ -1162,9 +1207,9 @@ class NodaWikidataFetcher { /** * Writes relations to norm data sources to DB. * - * @param array> $nodaLinks Links to other noda sources. - * @param integer $tag_id Tag ID. - * @param string $erfasst_von Name of the user to edit this. + * @param array $nodaLinks Links to other noda sources. + * @param integer $tag_id Tag ID. + * @param string $erfasst_von Name of the user to edit this. * * @return void */