Add functions for automatic rewriting of country names to brackets at
the end of place names based on lists
This commit is contained in:
parent
f6409322e5
commit
e610723107
71
scripts/get_wikidata_country_names.php
Normal file
71
scripts/get_wikidata_country_names.php
Normal file
|
@ -0,0 +1,71 @@
|
|||
<?PHP
|
||||
/**
|
||||
* This file contains tools for fetching data from Wikidata.
|
||||
*
|
||||
* @file
|
||||
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||||
*/
|
||||
declare(strict_types = 1);
|
||||
|
||||
require_once __DIR__ . '/../src/NodaWikidataFetcher.php';
|
||||
require_once __DIR__ . '/../../MD_STD/src/MD_STD.php';
|
||||
|
||||
/**
|
||||
* Queries wikidata for instances of a Q-ID.
|
||||
*
|
||||
* @param string $lang Query language.
|
||||
* @param string $instanceOf Q-ID.
|
||||
*
|
||||
* @return array<mixed>
|
||||
*/
|
||||
function query(string $lang, string $instanceOf):array {
|
||||
|
||||
$sparqlQueryString = 'SELECT ?item ?itemLabel
|
||||
WHERE
|
||||
{
|
||||
?item wdt:P31/wdt:P279* wd:' . $instanceOf . '.
|
||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "' . $lang . ',[AUTO_LANGUAGE],en". } # Helps get the label in your language, if not, then en language
|
||||
}';
|
||||
|
||||
return NodaWikidataFetcher::sparqlQuery($sparqlQueryString);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns names from a query.
|
||||
*
|
||||
* @param array<mixed> $data Wikidata output values.
|
||||
*
|
||||
* @return array<string>
|
||||
*/
|
||||
function getNames(array $data):array {
|
||||
|
||||
$output = [];
|
||||
|
||||
foreach ($data['results']['bindings'] as $entry) {
|
||||
$output[] = $entry['itemLabel']['value'];
|
||||
}
|
||||
|
||||
return $output;
|
||||
|
||||
}
|
||||
|
||||
// Q6256 => country
|
||||
|
||||
$targets = [
|
||||
'Q6256' => 'countries',
|
||||
'Q3024240' => 'historical_countries',
|
||||
'Q10864048' => 'first_lvl_administrative_units',
|
||||
];
|
||||
|
||||
$langs = ['ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sw', 'ta', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'zh'];
|
||||
|
||||
foreach ($langs as $lang) {
|
||||
foreach ($targets as $qid => $filename) {
|
||||
|
||||
$regionNames = getNames(query($lang, $qid));
|
||||
file_put_contents(__DIR__ . '/../static/' . $filename . '.' . $lang . '.json', json_encode($regionNames));
|
||||
echo "Fetched $lang : $filename ($qid)" . PHP_EOL;
|
||||
|
||||
}
|
||||
}
|
|
@ -64,6 +64,11 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
|||
'XX' => '20',
|
||||
];
|
||||
|
||||
/**
|
||||
* @var array<string, list<string>>
|
||||
*/
|
||||
private static $_placeNameListCaches = [];
|
||||
|
||||
/**
|
||||
* Rewrites indicators for narrower locations paired with a superordinate location
|
||||
* into the format "Narrower (Broader)".
|
||||
|
@ -181,6 +186,75 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a JSON file, optionally loading it cached through a private static variable
|
||||
* if reuse is expectable (= in the case of CLI usage).
|
||||
*
|
||||
* @param non-empty-string $filename File name to load.
|
||||
*
|
||||
* @return list<string>
|
||||
*/
|
||||
private static function _loadJsonList(string $filename):array {
|
||||
|
||||
if (PHP_SAPI === 'cli' && isset(self::$_placeNameListCaches[$filename])) {
|
||||
return self::$_placeNameListCaches[$filename];
|
||||
}
|
||||
|
||||
$output = json_decode(MD_STD::file_get_contents($filename), true);
|
||||
if ($output === false) {
|
||||
throw new Exception("Failed to get list");
|
||||
}
|
||||
|
||||
if (PHP_SAPI === 'cli') {
|
||||
self::$_placeNameListCaches[$filename] = $output;
|
||||
}
|
||||
|
||||
return $output;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Moves names of regions to brackets using pre-generated lists of countries,
|
||||
* historical country names, etc.
|
||||
*
|
||||
* @param string $lang Instance language.
|
||||
* @param string $name Input string to clean.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private static function _move_region_names_to_brackets(string $lang, string $name):string {
|
||||
|
||||
$separators = ['-', ', '];
|
||||
|
||||
foreach ($separators as $separator) {
|
||||
|
||||
if (!str_contains($name, $separator) || substr_count($name, $separator) !== 1) continue;
|
||||
|
||||
// Get parts and trim them
|
||||
$parts = explode($separator, $name);
|
||||
foreach ($parts as $key => $value) {
|
||||
$parts[$key] = trim($value);
|
||||
}
|
||||
|
||||
// Load place names
|
||||
$countryNames = self::_loadJsonList(__DIR__ . "/../static/countries.$lang.json") + self::_loadJsonList(__DIR__ . "/../static/historical_countries.$lang.json");
|
||||
|
||||
$part0IsCountry = in_array($parts[0], $countryNames, true);
|
||||
$part1IsCountry = in_array($parts[1], $countryNames, true);
|
||||
|
||||
if ($part0IsCountry === true && $part1IsCountry === false) {
|
||||
return $parts[1] . ' (' . $parts[0] . ')';
|
||||
}
|
||||
else if ($part0IsCountry === false && $part1IsCountry === true) {
|
||||
return $parts[0] . ' (' . $parts[1] . ')';
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return $name;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans a place name by trimming etc. Also removes uncertainty indicators.
|
||||
*
|
||||
|
@ -203,6 +277,8 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
|||
default => $ort_name,
|
||||
};
|
||||
|
||||
$ort_name = self::_move_region_names_to_brackets($lang, $ort_name);
|
||||
|
||||
return $ort_name;
|
||||
|
||||
}
|
||||
|
|
1
static/countries.ar.json
Normal file
1
static/countries.ar.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.bg.json
Normal file
1
static/countries.bg.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.bn.json
Normal file
1
static/countries.bn.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.cs.json
Normal file
1
static/countries.cs.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.da.json
Normal file
1
static/countries.da.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.de.json
Normal file
1
static/countries.de.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.el.json
Normal file
1
static/countries.el.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.en.json
Normal file
1
static/countries.en.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.es.json
Normal file
1
static/countries.es.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.fa.json
Normal file
1
static/countries.fa.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.fi.json
Normal file
1
static/countries.fi.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.fr.json
Normal file
1
static/countries.fr.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.ha.json
Normal file
1
static/countries.ha.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.he.json
Normal file
1
static/countries.he.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.hi.json
Normal file
1
static/countries.hi.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.hu.json
Normal file
1
static/countries.hu.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.id.json
Normal file
1
static/countries.id.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.it.json
Normal file
1
static/countries.it.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.ja.json
Normal file
1
static/countries.ja.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.ka.json
Normal file
1
static/countries.ka.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.ko.json
Normal file
1
static/countries.ko.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.nl.json
Normal file
1
static/countries.nl.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.pl.json
Normal file
1
static/countries.pl.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.pt.json
Normal file
1
static/countries.pt.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.ro.json
Normal file
1
static/countries.ro.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.ru.json
Normal file
1
static/countries.ru.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.sv.json
Normal file
1
static/countries.sv.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.sw.json
Normal file
1
static/countries.sw.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.ta.json
Normal file
1
static/countries.ta.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.th.json
Normal file
1
static/countries.th.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.tl.json
Normal file
1
static/countries.tl.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.tr.json
Normal file
1
static/countries.tr.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.uk.json
Normal file
1
static/countries.uk.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.ur.json
Normal file
1
static/countries.ur.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.vi.json
Normal file
1
static/countries.vi.json
Normal file
File diff suppressed because one or more lines are too long
1
static/countries.zh.json
Normal file
1
static/countries.zh.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.ar.json
Normal file
1
static/first_lvl_administrative_units.ar.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.bg.json
Normal file
1
static/first_lvl_administrative_units.bg.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.bn.json
Normal file
1
static/first_lvl_administrative_units.bn.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.cs.json
Normal file
1
static/first_lvl_administrative_units.cs.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.da.json
Normal file
1
static/first_lvl_administrative_units.da.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.de.json
Normal file
1
static/first_lvl_administrative_units.de.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.el.json
Normal file
1
static/first_lvl_administrative_units.el.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.en.json
Normal file
1
static/first_lvl_administrative_units.en.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.es.json
Normal file
1
static/first_lvl_administrative_units.es.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.fa.json
Normal file
1
static/first_lvl_administrative_units.fa.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.fi.json
Normal file
1
static/first_lvl_administrative_units.fi.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.fr.json
Normal file
1
static/first_lvl_administrative_units.fr.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.ha.json
Normal file
1
static/first_lvl_administrative_units.ha.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.he.json
Normal file
1
static/first_lvl_administrative_units.he.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.hi.json
Normal file
1
static/first_lvl_administrative_units.hi.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.hu.json
Normal file
1
static/first_lvl_administrative_units.hu.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.id.json
Normal file
1
static/first_lvl_administrative_units.id.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.it.json
Normal file
1
static/first_lvl_administrative_units.it.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.ja.json
Normal file
1
static/first_lvl_administrative_units.ja.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.ka.json
Normal file
1
static/first_lvl_administrative_units.ka.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.ko.json
Normal file
1
static/first_lvl_administrative_units.ko.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.nl.json
Normal file
1
static/first_lvl_administrative_units.nl.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.pl.json
Normal file
1
static/first_lvl_administrative_units.pl.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.pt.json
Normal file
1
static/first_lvl_administrative_units.pt.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.ro.json
Normal file
1
static/first_lvl_administrative_units.ro.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.ru.json
Normal file
1
static/first_lvl_administrative_units.ru.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.sv.json
Normal file
1
static/first_lvl_administrative_units.sv.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.sw.json
Normal file
1
static/first_lvl_administrative_units.sw.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.ta.json
Normal file
1
static/first_lvl_administrative_units.ta.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.th.json
Normal file
1
static/first_lvl_administrative_units.th.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.tl.json
Normal file
1
static/first_lvl_administrative_units.tl.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.tr.json
Normal file
1
static/first_lvl_administrative_units.tr.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.uk.json
Normal file
1
static/first_lvl_administrative_units.uk.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.ur.json
Normal file
1
static/first_lvl_administrative_units.ur.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.vi.json
Normal file
1
static/first_lvl_administrative_units.vi.json
Normal file
File diff suppressed because one or more lines are too long
1
static/first_lvl_administrative_units.zh.json
Normal file
1
static/first_lvl_administrative_units.zh.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.ar.json
Normal file
1
static/historical_countries.ar.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.bg.json
Normal file
1
static/historical_countries.bg.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.bn.json
Normal file
1
static/historical_countries.bn.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.cs.json
Normal file
1
static/historical_countries.cs.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.da.json
Normal file
1
static/historical_countries.da.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.de.json
Normal file
1
static/historical_countries.de.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.el.json
Normal file
1
static/historical_countries.el.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.en.json
Normal file
1
static/historical_countries.en.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.es.json
Normal file
1
static/historical_countries.es.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.fa.json
Normal file
1
static/historical_countries.fa.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.fi.json
Normal file
1
static/historical_countries.fi.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.fr.json
Normal file
1
static/historical_countries.fr.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.ha.json
Normal file
1
static/historical_countries.ha.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.he.json
Normal file
1
static/historical_countries.he.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.hi.json
Normal file
1
static/historical_countries.hi.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.hu.json
Normal file
1
static/historical_countries.hu.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.id.json
Normal file
1
static/historical_countries.id.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.it.json
Normal file
1
static/historical_countries.it.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.ja.json
Normal file
1
static/historical_countries.ja.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.ka.json
Normal file
1
static/historical_countries.ka.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.ko.json
Normal file
1
static/historical_countries.ko.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.nl.json
Normal file
1
static/historical_countries.nl.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.pl.json
Normal file
1
static/historical_countries.pl.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.pt.json
Normal file
1
static/historical_countries.pt.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.ro.json
Normal file
1
static/historical_countries.ro.json
Normal file
File diff suppressed because one or more lines are too long
1
static/historical_countries.ru.json
Normal file
1
static/historical_countries.ru.json
Normal file
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user