100 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			100 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?PHP
 | |
| /**
 | |
|  * This file contains tools for fetching data from Wikidata.
 | |
|  *
 | |
|  * @file
 | |
|  * @author Joshua Ramon Enslin <joshua@museum-digital.de>
 | |
|  */
 | |
| declare(strict_types = 1);
 | |
| 
 | |
| require_once __DIR__ . '/../src/NodaWikidataFetcher.php';
 | |
| require_once __DIR__ . '/../../MD_STD/src/MD_STD.php';
 | |
| 
 | |
| /**
 | |
|  * Queries wikidata for instances of a Q-ID.
 | |
|  *
 | |
|  * @param string $lang       Query language.
 | |
|  * @param string $instanceOf Q-ID.
 | |
|  *
 | |
|  * @return array<mixed>
 | |
|  */
 | |
| function query(string $lang, string $instanceOf):array {
 | |
| 
 | |
|     $sparqlQueryString = 'SELECT ?item ?itemLabel
 | |
|     WHERE
 | |
|     {
 | |
|       ?item wdt:P31/wdt:P279* wd:' . $instanceOf . '.
 | |
|       SERVICE wikibase:label { bd:serviceParam wikibase:language "' . $lang . ',[AUTO_LANGUAGE],en". } # Helps get the label in your language, if not, then en language
 | |
|     }';
 | |
| 
 | |
|     return NodaWikidataFetcher::sparqlQuery($sparqlQueryString);
 | |
| 
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Returns names from a query.
 | |
|  *
 | |
|  * @param array<mixed> $data Wikidata output values.
 | |
|  *
 | |
|  * @return array<string>
 | |
|  */
 | |
| function getNames(array $data):array {
 | |
| 
 | |
|     $output = [];
 | |
| 
 | |
|     foreach ($data['results']['bindings'] as $entry) {
 | |
|         $output[] = $entry['itemLabel']['value'];
 | |
|     }
 | |
| 
 | |
|     return $output;
 | |
| 
 | |
| }
 | |
| 
 | |
| // Q6256 => country
 | |
| 
 | |
| $targets = [
 | |
|     'Q6256' => 'countries',
 | |
|     'Q3024240' => 'historical_countries',
 | |
|     'Q10864048' => 'first_lvl_administrative_units',
 | |
| ];
 | |
| 
 | |
| $langs = ['ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sw', 'ta', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'zh'];
 | |
| 
 | |
| foreach ($langs as $lang) {
 | |
|     foreach ($targets as $qid => $filename) {
 | |
| 
 | |
|         $regionNames = getNames(query($lang, $qid));
 | |
|         file_put_contents(__DIR__ . '/../static/' . $filename . '.' . $lang . '.json', MD_STD::json_encode($regionNames));
 | |
|         echo "Fetched $lang : $filename ($qid)" . PHP_EOL;
 | |
| 
 | |
|     }
 | |
| }
 | |
| 
 | |
| // The following should be lists of terms that are independent of language
 | |
| $targetsForMerge = [
 | |
|     'Q23718' => 'cardinal_directions',
 | |
| ];
 | |
| $mergedValues = [];
 | |
| foreach ($langs as $lang) {
 | |
|     foreach ($targetsForMerge as $qid => $filename) {
 | |
| 
 | |
|         if (!isset($mergedValues[$filename])) {
 | |
|             $mergedValues[$filename] = [];
 | |
|         }
 | |
|         $mergedValues[$filename] = array_merge($mergedValues[$filename], getNames(query($lang, $qid)));
 | |
|         echo "Fetched $lang : $filename ($qid)" . PHP_EOL;
 | |
| 
 | |
|     }
 | |
| }
 | |
| 
 | |
| $mergedValues['cardinal_directions'][] = 'Nord';
 | |
| $mergedValues['cardinal_directions'][] = 'Ost';
 | |
| $mergedValues['cardinal_directions'][] = 'West';
 | |
| $mergedValues['cardinal_directions'][] = 'Süd';
 | |
| 
 | |
| foreach ($mergedValues as $filename => $values) {
 | |
| 
 | |
|     file_put_contents(__DIR__ . '/../static/' . $filename . '.json', MD_STD::json_encode(array_values(array_unique($values))));
 | |
| 
 | |
| }
 |