Cover more levels of Ukrainian (current and historical) place

hierarchies
This commit is contained in:
Joshua Ramon Enslin 2023-12-02 16:32:46 +01:00
parent 2badc67405
commit 55931ba3ef
Signed by: jrenslin
GPG Key ID: 46016F84501B70AE
2 changed files with 25 additions and 11 deletions

View File

@ -230,9 +230,12 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
$identifiersByLevel = [ $identifiersByLevel = [
'state' => [' РСР', 'РСР ', ' губернія', 'губернія '], 'state' => [' РСР', 'РСР ', ' губернія', 'губернія '],
'oblast' => ['обл.'], 'oblast' => ['обл.', 'округа', 'губернії'],
'region' => ['р-н'], 'region' => ['р-н', 'район'],
'county' => ['повіт'],
'city' => ['м.'], 'city' => ['м.'],
'parish' => ['волость'],
'village' => ['смт', 'сільська', 'с. '],
'district' => [], // Is also р-н; which it is is determined based on position 'district' => [], // Is also р-н; which it is is determined based on position
'street' => ['вул. '], 'street' => ['вул. '],
]; ];
@ -242,7 +245,10 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
'state' => '', 'state' => '',
'oblast' => '', 'oblast' => '',
'region' => '', 'region' => '',
'county' => '',
'city' => '', 'city' => '',
'parish' => '',
'village' => '',
'district' => '', 'district' => '',
'street' => '', 'street' => '',
]; ];
@ -259,12 +265,15 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
// If both oblast and city are already known, the region will be a // If both oblast and city are already known, the region will be a
// district within the city. // district within the city.
// Otherwise, it is to be assumed that it is a super-city region. // Otherwise, it is to be assumed that it is a super-city region.
if ($level === 'region' && !empty($levels['oblast']) && !empty($levels['city'])) { if ($level === 'region' && !empty($levels['oblast'])
&& (!empty($levels['city']) || !empty($levels['village']))
) {
$level = 'district'; $level = 'district';
} }
if (!empty($levels[$level])) { if (!empty($levels[$level])) {
throw new Exception("Used the same level (" . $level . ") twice"); # throw new Exception("Used the same level (" . $level . ") twice");
return $name;
} }
$levels[$level] = $part; $levels[$level] = $part;
continue 3; continue 3;
@ -273,6 +282,12 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
} }
} }
// Special case: Abbreviated SSRs
if (in_array($part, ['УРСР', 'УССР'], true)) {
$levels['state'] = $part;
continue;
}
// Unspecified part level: Attempt identifying country // Unspecified part level: Attempt identifying country
if (!isset($countryNames)) { if (!isset($countryNames)) {
$countryNames = self::_loadJsonList(__DIR__ . "/../static/countries.uk.json") + self::_loadJsonList(__DIR__ . "/../static/historical_countries.uk.json"); $countryNames = self::_loadJsonList(__DIR__ . "/../static/countries.uk.json") + self::_loadJsonList(__DIR__ . "/../static/historical_countries.uk.json");
@ -281,6 +296,7 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
$countryNames[] = 'Російська імперія'; $countryNames[] = 'Російська імперія';
$countryNames[] = 'Рос.імперія'; $countryNames[] = 'Рос.імперія';
$countryNames[] = 'Рос.имперія'; $countryNames[] = 'Рос.имперія';
$countryNames[] = 'Російська імперія-УНР';
} }
if (in_array($part, $countryNames, true)) { if (in_array($part, $countryNames, true)) {
$levels['country'] = $part; $levels['country'] = $part;
@ -288,7 +304,6 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
} }
// Unspecified level; return // Unspecified level; return
throw new Exception("Unknown " . $part);
return $name; return $name;
} }
@ -298,7 +313,7 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
foreach (array_reverse($levels) as $level => $partname) { foreach (array_reverse($levels) as $level => $partname) {
if (empty($partname)) continue; if (empty($partname)) continue;
if ($level === 'city') { if ($level === 'city' || $level === 'village') {
$strtr = []; $strtr = [];
foreach ($identifiersByLevel[$level] as $identifier) $strtr[$identifier] = ''; foreach ($identifiersByLevel[$level] as $identifier) $strtr[$identifier] = '';
$partname = trim(strtr($partname, $strtr)); $partname = trim(strtr($partname, $strtr));
@ -329,11 +344,9 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
*/ */
private static function _clean_ukrainian_abbreviations(string $name):string { private static function _clean_ukrainian_abbreviations(string $name):string {
/* if (str_contains($name, " р-н,") || str_contains($name, " р") || str_ends_with($name, " р")) {
if (str_contains($name, " krt. ") && \preg_match("/\ krt\.\ [0-9]/", $name)) { $name = str_replace(" р", " район", $name);
$name = str_replace(" krt. ", " körut ", $name);
} }
*/
if (str_contains($name, ',')) { if (str_contains($name, ',')) {
$name = self::_rewrite_ukrainian_names_by_hierarchy($name); $name = self::_rewrite_ukrainian_names_by_hierarchy($name);

View File

@ -88,7 +88,8 @@ final class NodaConsolidatedNamesForPlacesTest extends TestCase {
// If both oblast and city are already known, the region will be a // If both oblast and city are already known, the region will be a
// district within the city. // district within the city.
// Otherwise, it is to be assumed that it is a super-city region. // Otherwise, it is to be assumed that it is a super-city region.
self::assertEquals("Приморський р-н (Запоріжжя, Запорізька обл., Україна)", NodaConsolidatedNamesForPlaces::consolidate_name("uk", "Україна, Запорізька обл., м. Запоріжжя, Приморський р")); self::assertEquals("Приморський район (Запоріжжя, Запорізька обл., Україна)", NodaConsolidatedNamesForPlaces::consolidate_name("uk", "Україна, Запорізька обл., м. Запоріжжя, Приморський р"));
self::assertEquals("Кодимський район (Одеська обл., УССР)", NodaConsolidatedNamesForPlaces::consolidate_name("uk", "УССР, Одеська обл., Кодимський район"));
} }
} }