Add functions for automatic rewriting of country names to brackets at

the end of place names based on lists
This commit is contained in:
2023-11-26 00:54:14 +01:00
parent f6409322e5
commit e610723107
110 changed files with 255 additions and 0 deletions

View File

@ -64,6 +64,11 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
'XX' => '20',
];
/**
* @var array<string, list<string>>
*/
private static $_placeNameListCaches = [];
/**
* Rewrites indicators for narrower locations paired with a superordinate location
* into the format "Narrower (Broader)".
@ -181,6 +186,75 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
}
/**
* Loads a JSON file, optionally loading it cached through a private static variable
* if reuse is expectable (= in the case of CLI usage).
*
* @param non-empty-string $filename File name to load.
*
* @return list<string>
*/
private static function _loadJsonList(string $filename):array {
if (PHP_SAPI === 'cli' && isset(self::$_placeNameListCaches[$filename])) {
return self::$_placeNameListCaches[$filename];
}
$output = json_decode(MD_STD::file_get_contents($filename), true);
if ($output === false) {
throw new Exception("Failed to get list");
}
if (PHP_SAPI === 'cli') {
self::$_placeNameListCaches[$filename] = $output;
}
return $output;
}
/**
* Moves names of regions to brackets using pre-generated lists of countries,
* historical country names, etc.
*
* @param string $lang Instance language.
* @param string $name Input string to clean.
*
* @return string
*/
private static function _move_region_names_to_brackets(string $lang, string $name):string {
$separators = ['-', ', '];
foreach ($separators as $separator) {
if (!str_contains($name, $separator) || substr_count($name, $separator) !== 1) continue;
// Get parts and trim them
$parts = explode($separator, $name);
foreach ($parts as $key => $value) {
$parts[$key] = trim($value);
}
// Load place names
$countryNames = self::_loadJsonList(__DIR__ . "/../static/countries.$lang.json") + self::_loadJsonList(__DIR__ . "/../static/historical_countries.$lang.json");
$part0IsCountry = in_array($parts[0], $countryNames, true);
$part1IsCountry = in_array($parts[1], $countryNames, true);
if ($part0IsCountry === true && $part1IsCountry === false) {
return $parts[1] . ' (' . $parts[0] . ')';
}
else if ($part0IsCountry === false && $part1IsCountry === true) {
return $parts[0] . ' (' . $parts[1] . ')';
}
}
return $name;
}
/**
* Cleans a place name by trimming etc. Also removes uncertainty indicators.
*
@ -203,6 +277,8 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
default => $ort_name,
};
$ort_name = self::_move_region_names_to_brackets($lang, $ort_name);
return $ort_name;
}