2023-11-26 00:54:14 +01:00
< ? PHP
/**
* This file contains tools for fetching data from Wikidata .
*
* @ file
* @ author Joshua Ramon Enslin < joshua @ museum - digital . de >
*/
declare ( strict_types = 1 );
require_once __DIR__ . '/../src/NodaWikidataFetcher.php' ;
require_once __DIR__ . '/../../MD_STD/src/MD_STD.php' ;
/**
* Queries wikidata for instances of a Q - ID .
*
* @ param string $lang Query language .
* @ param string $instanceOf Q - ID .
*
* @ return array < mixed >
*/
function query ( string $lang , string $instanceOf ) : array {
$sparqlQueryString = ' SELECT ? item ? itemLabel
WHERE
{
? item wdt : P31 / wdt : P279 * wd : ' . $instanceOf . ' .
SERVICE wikibase : label { bd : serviceParam wikibase : language " ' . $lang . ',[AUTO_LANGUAGE],en " . } # Helps get the label in your language, if not, then en language
} ' ;
return NodaWikidataFetcher :: sparqlQuery ( $sparqlQueryString );
}
/**
* Returns names from a query .
*
* @ param array < mixed > $data Wikidata output values .
*
* @ return array < string >
*/
function getNames ( array $data ) : array {
$output = [];
foreach ( $data [ 'results' ][ 'bindings' ] as $entry ) {
$output [] = $entry [ 'itemLabel' ][ 'value' ];
}
return $output ;
}
// Q6256 => country
$targets = [
2023-11-26 23:55:22 +01:00
/*
2023-11-26 00:54:14 +01:00
'Q6256' => 'countries' ,
'Q3024240' => 'historical_countries' ,
'Q10864048' => 'first_lvl_administrative_units' ,
2023-11-26 23:55:22 +01:00
*/
2023-11-26 00:54:14 +01:00
];
$langs = [ 'ar' , 'bg' , 'bn' , 'cs' , 'da' , 'de' , 'el' , 'en' , 'es' , 'fa' , 'fi' , 'fr' , 'ha' , 'he' , 'hi' , 'hu' , 'id' , 'it' , 'ja' , 'ka' , 'ko' , 'nl' , 'pl' , 'pt' , 'ro' , 'ru' , 'sv' , 'sw' , 'ta' , 'th' , 'tl' , 'tr' , 'uk' , 'ur' , 'vi' , 'zh' ];
foreach ( $langs as $lang ) {
foreach ( $targets as $qid => $filename ) {
$regionNames = getNames ( query ( $lang , $qid ));
file_put_contents ( __DIR__ . '/../static/' . $filename . '.' . $lang . '.json' , json_encode ( $regionNames ));
echo " Fetched $lang : $filename ( $qid ) " . PHP_EOL ;
}
}
2023-11-26 23:55:22 +01:00
// The following should be lists of terms that are independent of language
$targetsForMerge = [
'Q23718' => 'cardinal_directions' ,
];
$mergedValues = [];
foreach ( $langs as $lang ) {
foreach ( $targetsForMerge as $qid => $filename ) {
if ( ! isset ( $mergedValues [ $filename ])) {
$mergedValues [ $filename ] = [];
}
$mergedValues [ $filename ] = array_merge ( $mergedValues [ $filename ], getNames ( query ( $lang , $qid )));
echo " Fetched $lang : $filename ( $qid ) " . PHP_EOL ;
}
}
$mergedValues [ 'cardinal_directions' ][] = 'Nord' ;
$mergedValues [ 'cardinal_directions' ][] = 'Ost' ;
$mergedValues [ 'cardinal_directions' ][] = 'West' ;
$mergedValues [ 'cardinal_directions' ][] = 'Süd' ;
foreach ( $mergedValues as $filename => $values ) {
file_put_contents ( __DIR__ . '/../static/' . $filename . '.json' , json_encode ( array_values ( array_unique ( $values ))));
}