Cover more levels of Ukrainian (current and historical) place

hierarchies
This commit is contained in:
Joshua Ramon Enslin 2023-12-02 16:32:46 +01:00
parent 2badc67405
commit 55931ba3ef
Signed by: jrenslin
GPG Key ID: 46016F84501B70AE
2 changed files with 25 additions and 11 deletions

View File

@ -230,9 +230,12 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
$identifiersByLevel = [
'state' => [' РСР', 'РСР ', ' губернія', 'губернія '],
'oblast' => ['обл.'],
'region' => ['р-н'],
'oblast' => ['обл.', 'округа', 'губернії'],
'region' => ['р-н', 'район'],
'county' => ['повіт'],
'city' => ['м.'],
'parish' => ['волость'],
'village' => ['смт', 'сільська', 'с. '],
'district' => [], // Is also р-н; which it is is determined based on position
'street' => ['вул. '],
];
@ -242,7 +245,10 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
'state' => '',
'oblast' => '',
'region' => '',
'county' => '',
'city' => '',
'parish' => '',
'village' => '',
'district' => '',
'street' => '',
];
@ -259,12 +265,15 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
// If both oblast and city are already known, the region will be a
// district within the city.
// Otherwise, it is to be assumed that it is a super-city region.
if ($level === 'region' && !empty($levels['oblast']) && !empty($levels['city'])) {
if ($level === 'region' && !empty($levels['oblast'])
&& (!empty($levels['city']) || !empty($levels['village']))
) {
$level = 'district';
}
if (!empty($levels[$level])) {
throw new Exception("Used the same level (" . $level . ") twice");
# throw new Exception("Used the same level (" . $level . ") twice");
return $name;
}
$levels[$level] = $part;
continue 3;
@ -273,6 +282,12 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
}
}
// Special case: Abbreviated SSRs
if (in_array($part, ['УРСР', 'УССР'], true)) {
$levels['state'] = $part;
continue;
}
// Unspecified part level: Attempt identifying country
if (!isset($countryNames)) {
$countryNames = self::_loadJsonList(__DIR__ . "/../static/countries.uk.json") + self::_loadJsonList(__DIR__ . "/../static/historical_countries.uk.json");
@ -281,6 +296,7 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
$countryNames[] = 'Російська імперія';
$countryNames[] = 'Рос.імперія';
$countryNames[] = 'Рос.имперія';
$countryNames[] = 'Російська імперія-УНР';
}
if (in_array($part, $countryNames, true)) {
$levels['country'] = $part;
@ -288,7 +304,6 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
}
// Unspecified level; return
throw new Exception("Unknown " . $part);
return $name;
}
@ -298,7 +313,7 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
foreach (array_reverse($levels) as $level => $partname) {
if (empty($partname)) continue;
if ($level === 'city') {
if ($level === 'city' || $level === 'village') {
$strtr = [];
foreach ($identifiersByLevel[$level] as $identifier) $strtr[$identifier] = '';
$partname = trim(strtr($partname, $strtr));
@ -329,11 +344,9 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
*/
private static function _clean_ukrainian_abbreviations(string $name):string {
/*
if (str_contains($name, " krt. ") && \preg_match("/\ krt\.\ [0-9]/", $name)) {
$name = str_replace(" krt. ", " körut ", $name);
if (str_contains($name, " р-н,") || str_contains($name, " р") || str_ends_with($name, " р")) {
$name = str_replace(" р", " район", $name);
}
*/
if (str_contains($name, ',')) {
$name = self::_rewrite_ukrainian_names_by_hierarchy($name);

View File

@ -88,7 +88,8 @@ final class NodaConsolidatedNamesForPlacesTest extends TestCase {
// If both oblast and city are already known, the region will be a
// district within the city.
// Otherwise, it is to be assumed that it is a super-city region.
self::assertEquals("Приморський р-н (Запоріжжя, Запорізька обл., Україна)", NodaConsolidatedNamesForPlaces::consolidate_name("uk", "Україна, Запорізька обл., м. Запоріжжя, Приморський р"));
self::assertEquals("Приморський район (Запоріжжя, Запорізька обл., Україна)", NodaConsolidatedNamesForPlaces::consolidate_name("uk", "Україна, Запорізька обл., м. Запоріжжя, Приморський р"));
self::assertEquals("Кодимський район (Одеська обл., УССР)", NodaConsolidatedNamesForPlaces::consolidate_name("uk", "УССР, Одеська обл., Кодимський район"));
}
}