Add functions for automatic rewriting of country names to brackets at
the end of place names based on lists
This commit is contained in:
@ -64,6 +64,11 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||
'XX' => '20',
|
||||
];
|
||||
|
||||
/**
|
||||
* @var array<string, list<string>>
|
||||
*/
|
||||
private static $_placeNameListCaches = [];
|
||||
|
||||
/**
|
||||
* Rewrites indicators for narrower locations paired with a superordinate location
|
||||
* into the format "Narrower (Broader)".
|
||||
@ -181,6 +186,75 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a JSON file, optionally loading it cached through a private static variable
|
||||
* if reuse is expectable (= in the case of CLI usage).
|
||||
*
|
||||
* @param non-empty-string $filename File name to load.
|
||||
*
|
||||
* @return list<string>
|
||||
*/
|
||||
private static function _loadJsonList(string $filename):array {
|
||||
|
||||
if (PHP_SAPI === 'cli' && isset(self::$_placeNameListCaches[$filename])) {
|
||||
return self::$_placeNameListCaches[$filename];
|
||||
}
|
||||
|
||||
$output = json_decode(MD_STD::file_get_contents($filename), true);
|
||||
if ($output === false) {
|
||||
throw new Exception("Failed to get list");
|
||||
}
|
||||
|
||||
if (PHP_SAPI === 'cli') {
|
||||
self::$_placeNameListCaches[$filename] = $output;
|
||||
}
|
||||
|
||||
return $output;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Moves names of regions to brackets using pre-generated lists of countries,
|
||||
* historical country names, etc.
|
||||
*
|
||||
* @param string $lang Instance language.
|
||||
* @param string $name Input string to clean.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private static function _move_region_names_to_brackets(string $lang, string $name):string {
|
||||
|
||||
$separators = ['-', ', '];
|
||||
|
||||
foreach ($separators as $separator) {
|
||||
|
||||
if (!str_contains($name, $separator) || substr_count($name, $separator) !== 1) continue;
|
||||
|
||||
// Get parts and trim them
|
||||
$parts = explode($separator, $name);
|
||||
foreach ($parts as $key => $value) {
|
||||
$parts[$key] = trim($value);
|
||||
}
|
||||
|
||||
// Load place names
|
||||
$countryNames = self::_loadJsonList(__DIR__ . "/../static/countries.$lang.json") + self::_loadJsonList(__DIR__ . "/../static/historical_countries.$lang.json");
|
||||
|
||||
$part0IsCountry = in_array($parts[0], $countryNames, true);
|
||||
$part1IsCountry = in_array($parts[1], $countryNames, true);
|
||||
|
||||
if ($part0IsCountry === true && $part1IsCountry === false) {
|
||||
return $parts[1] . ' (' . $parts[0] . ')';
|
||||
}
|
||||
else if ($part0IsCountry === false && $part1IsCountry === true) {
|
||||
return $parts[0] . ' (' . $parts[1] . ')';
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return $name;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans a place name by trimming etc. Also removes uncertainty indicators.
|
||||
*
|
||||
@ -203,6 +277,8 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||
default => $ort_name,
|
||||
};
|
||||
|
||||
$ort_name = self::_move_region_names_to_brackets($lang, $ort_name);
|
||||
|
||||
return $ort_name;
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user