102 lines
2.7 KiB
PHP
102 lines
2.7 KiB
PHP
<?PHP
|
|
/**
|
|
* This file contains tools for fetching data from Wikidata.
|
|
*
|
|
* @file
|
|
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
|
*/
|
|
declare(strict_types = 1);
|
|
|
|
require_once __DIR__ . '/../src/NodaWikidataFetcher.php';
|
|
require_once __DIR__ . '/../../MD_STD/src/MD_STD.php';
|
|
|
|
/**
|
|
* Queries wikidata for instances of a Q-ID.
|
|
*
|
|
* @param string $lang Query language.
|
|
* @param string $instanceOf Q-ID.
|
|
*
|
|
* @return array<mixed>
|
|
*/
|
|
function query(string $lang, string $instanceOf):array {
|
|
|
|
$sparqlQueryString = 'SELECT ?item ?itemLabel
|
|
WHERE
|
|
{
|
|
?item wdt:P31/wdt:P279* wd:' . $instanceOf . '.
|
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "' . $lang . ',[AUTO_LANGUAGE],en". } # Helps get the label in your language, if not, then en language
|
|
}';
|
|
|
|
return NodaWikidataFetcher::sparqlQuery($sparqlQueryString);
|
|
|
|
}
|
|
|
|
/**
|
|
* Returns names from a query.
|
|
*
|
|
* @param array<mixed> $data Wikidata output values.
|
|
*
|
|
* @return array<string>
|
|
*/
|
|
function getNames(array $data):array {
|
|
|
|
$output = [];
|
|
|
|
foreach ($data['results']['bindings'] as $entry) {
|
|
$output[] = $entry['itemLabel']['value'];
|
|
}
|
|
|
|
return $output;
|
|
|
|
}
|
|
|
|
// Q6256 => country
|
|
|
|
$targets = [
|
|
/*
|
|
'Q6256' => 'countries',
|
|
'Q3024240' => 'historical_countries',
|
|
'Q10864048' => 'first_lvl_administrative_units',
|
|
*/
|
|
];
|
|
|
|
$langs = ['ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sw', 'ta', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'zh'];
|
|
|
|
foreach ($langs as $lang) {
|
|
foreach ($targets as $qid => $filename) {
|
|
|
|
$regionNames = getNames(query($lang, $qid));
|
|
file_put_contents(__DIR__ . '/../static/' . $filename . '.' . $lang . '.json', json_encode($regionNames));
|
|
echo "Fetched $lang : $filename ($qid)" . PHP_EOL;
|
|
|
|
}
|
|
}
|
|
|
|
// The following should be lists of terms that are independent of language
|
|
$targetsForMerge = [
|
|
'Q23718' => 'cardinal_directions',
|
|
];
|
|
$mergedValues = [];
|
|
foreach ($langs as $lang) {
|
|
foreach ($targetsForMerge as $qid => $filename) {
|
|
|
|
if (!isset($mergedValues[$filename])) {
|
|
$mergedValues[$filename] = [];
|
|
}
|
|
$mergedValues[$filename] = array_merge($mergedValues[$filename], getNames(query($lang, $qid)));
|
|
echo "Fetched $lang : $filename ($qid)" . PHP_EOL;
|
|
|
|
}
|
|
}
|
|
|
|
$mergedValues['cardinal_directions'][] = 'Nord';
|
|
$mergedValues['cardinal_directions'][] = 'Ost';
|
|
$mergedValues['cardinal_directions'][] = 'West';
|
|
$mergedValues['cardinal_directions'][] = 'Süd';
|
|
|
|
foreach ($mergedValues as $filename => $values) {
|
|
|
|
file_put_contents(__DIR__ . '/../static/' . $filename . '.json', json_encode(array_values(array_unique($values))));
|
|
|
|
}
|