MDNodaHelpers/scripts/get_wikidata_country_names.php

102 lines
2.7 KiB
PHP

<?PHP
/**
* This file contains tools for fetching data from Wikidata.
*
* @file
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
declare(strict_types = 1);
require_once __DIR__ . '/../src/NodaWikidataFetcher.php';
require_once __DIR__ . '/../../MD_STD/src/MD_STD.php';
/**
* Queries wikidata for instances of a Q-ID.
*
* @param string $lang Query language.
* @param string $instanceOf Q-ID.
*
* @return array<mixed>
*/
function query(string $lang, string $instanceOf):array {
$sparqlQueryString = 'SELECT ?item ?itemLabel
WHERE
{
?item wdt:P31/wdt:P279* wd:' . $instanceOf . '.
SERVICE wikibase:label { bd:serviceParam wikibase:language "' . $lang . ',[AUTO_LANGUAGE],en". } # Helps get the label in your language, if not, then en language
}';
return NodaWikidataFetcher::sparqlQuery($sparqlQueryString);
}
/**
* Returns names from a query.
*
* @param array<mixed> $data Wikidata output values.
*
* @return array<string>
*/
function getNames(array $data):array {
$output = [];
foreach ($data['results']['bindings'] as $entry) {
$output[] = $entry['itemLabel']['value'];
}
return $output;
}
// Q6256 => country
$targets = [
/*
'Q6256' => 'countries',
'Q3024240' => 'historical_countries',
'Q10864048' => 'first_lvl_administrative_units',
*/
];
$langs = ['ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sw', 'ta', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'zh'];
foreach ($langs as $lang) {
foreach ($targets as $qid => $filename) {
$regionNames = getNames(query($lang, $qid));
file_put_contents(__DIR__ . '/../static/' . $filename . '.' . $lang . '.json', json_encode($regionNames));
echo "Fetched $lang : $filename ($qid)" . PHP_EOL;
}
}
// The following should be lists of terms that are independent of language
$targetsForMerge = [
'Q23718' => 'cardinal_directions',
];
$mergedValues = [];
foreach ($langs as $lang) {
foreach ($targetsForMerge as $qid => $filename) {
if (!isset($mergedValues[$filename])) {
$mergedValues[$filename] = [];
}
$mergedValues[$filename] = array_merge($mergedValues[$filename], getNames(query($lang, $qid)));
echo "Fetched $lang : $filename ($qid)" . PHP_EOL;
}
}
$mergedValues['cardinal_directions'][] = 'Nord';
$mergedValues['cardinal_directions'][] = 'Ost';
$mergedValues['cardinal_directions'][] = 'West';
$mergedValues['cardinal_directions'][] = 'Süd';
foreach ($mergedValues as $filename => $values) {
file_put_contents(__DIR__ . '/../static/' . $filename . '.json', json_encode(array_values(array_unique($values))));
}