diff --git a/src/NodaWikidataFetcher.php b/src/NodaWikidataFetcher.php index 85cce49..d284bb2 100644 --- a/src/NodaWikidataFetcher.php +++ b/src/NodaWikidataFetcher.php @@ -12,6 +12,11 @@ declare(strict_types = 1); */ final class NodaWikidataFetcher { + const WIKIDATA_FETCH_HEADERS = [ + 'User-Agent: museum-digital-bot GND-to-Wikidata PHP/' . PHP_VERSION, + 'Accept: application/sparql-results+json', + ]; + const LANGUAGES_MAIN_DESC = ['de', 'da', 'en', 'es', 'fr', 'hu', 'it', 'jp', 'nl', 'pt', 'ru', 'sv', 'zh']; const LANGUAGES_TO_CHECK = ['ar', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sw', 'ta', 'th', 'tl', 'tr', 'ur', 'vi', 'zh']; @@ -176,6 +181,96 @@ final class NodaWikidataFetcher { } + /** + * Runs a SPARQL query against the Wikidata SPARQL endpoint. + * + * @param string $sparqlQuery Query string. + * + * @return array + */ + public static function sparqlQuery(string $sparqlQuery):array { + + $url = 'https://query.wikidata.org/sparql?query=' . urlencode($sparqlQuery); + $result = MD_STD::runCurl($url, 10000, self::WIKIDATA_FETCH_HEADERS); + + return json_decode($result, true); + + } + + /** + * Formulates a SPARQL query string for fetching from Wikidata based on an external ID. + * + * @param string $repoName Name of the repository. + * @param string $externalId ID in the external repository. + * @param string $repoPId Optional P-ID of the external repository. Needed for + * Geonames and TGN, obsolete otherwise. + * + * @return string + */ + public static function formulateWikidataQueryByExtId(string $repoName, string $externalId, string $repoPId = ''):string { + + if (empty($repoPId)) { + + if (empty(NodaWikidataFetcher::P_IDS_NODA_TAGS[$repoName])) { + throw new MDmainEntityNotExistentException("Unknown external repository. The following repositories are known with their Wikidata ID: " . implode(', ', array_keys(NodaWikidataFetcher::P_IDS_NODA_TAGS))); + } + + $repoPId = NodaWikidataFetcher::P_IDS_NODA_TAGS[$repoName]; + + } + + $sparqlQueryString = 'SELECT ?id ?idLabel WHERE { + ?id wdt:' . $repoPId . ' "' . $externalId . '". + SERVICE wikibase:label { + bd:serviceParam wikibase:language "en" . + } + }'; + + return $sparqlQueryString; + + } + + /** + * Gets the Wikidata ID based on a result from Wikidata's SPARQL endpoint. + * + * @param array $queryResult Query result. + * + * @return string + */ + public static function readWikidataIdFromSparqlResult(array $queryResult):string { + + if (count($queryResult['results']['bindings']) !== 1) return ''; + + if (!empty($wikidataLink = $queryResult['results']['bindings'][0]['id']['value'])) { + if (($endSlashPos = strrpos($wikidataLink, '/')) !== false) { + return substr($wikidataLink, $endSlashPos + 1); + } + } + + return ''; + + } + + /** + * Queries Wikidata by an external repository's ID and returns the matching Q-ID + * if there is any. + * + * @param string $repoName Name of the repository. + * @param string $externalId ID in the external repository. + * @param string $repoPId Optional P-ID of the external repository. Needed for + * Geonames and TGN, obsolete otherwise. + * + * @return string + */ + public static function getWikidataIdByExternalId(string $repoName, string $externalId, string $repoPId = ''):string { + + $sparqlQueryString = self::formulateWikidataQueryByExtId($repoName, $externalId, $repoPId = ''); + + $queryResult = self::sparqlQuery($sparqlQueryString); + return self::readWikidataIdFromSparqlResult($queryResult); + + } + /** * Gets translation source Wikipedia pages from Wikidata. *