diff --git a/src/NodaConsolidatedNamesForPlaces.php b/src/NodaConsolidatedNamesForPlaces.php index 8c8988a..9a6af65 100644 --- a/src/NodaConsolidatedNamesForPlaces.php +++ b/src/NodaConsolidatedNamesForPlaces.php @@ -230,9 +230,12 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract $identifiersByLevel = [ 'state' => [' РСР', 'РСР ', ' губернія', 'губернія '], - 'oblast' => ['обл.'], - 'region' => ['р-н'], + 'oblast' => ['обл.', 'округа', 'губернії'], + 'region' => ['р-н', 'район'], + 'county' => ['повіт'], 'city' => ['м.'], + 'parish' => ['волость'], + 'village' => ['смт', 'сільська', 'с. '], 'district' => [], // Is also р-н; which it is is determined based on position 'street' => ['вул. '], ]; @@ -242,7 +245,10 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract 'state' => '', 'oblast' => '', 'region' => '', + 'county' => '', 'city' => '', + 'parish' => '', + 'village' => '', 'district' => '', 'street' => '', ]; @@ -259,12 +265,15 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract // If both oblast and city are already known, the region will be a // district within the city. // Otherwise, it is to be assumed that it is a super-city region. - if ($level === 'region' && !empty($levels['oblast']) && !empty($levels['city'])) { + if ($level === 'region' && !empty($levels['oblast']) + && (!empty($levels['city']) || !empty($levels['village'])) + ) { $level = 'district'; } if (!empty($levels[$level])) { - throw new Exception("Used the same level (" . $level . ") twice"); + # throw new Exception("Used the same level (" . $level . ") twice"); + return $name; } $levels[$level] = $part; continue 3; @@ -273,6 +282,12 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract } } + // Special case: Abbreviated SSRs + if (in_array($part, ['УРСР', 'УССР'], true)) { + $levels['state'] = $part; + continue; + } + // Unspecified part level: Attempt identifying country if (!isset($countryNames)) { $countryNames = self::_loadJsonList(__DIR__ . "/../static/countries.uk.json") + self::_loadJsonList(__DIR__ . "/../static/historical_countries.uk.json"); @@ -281,6 +296,7 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract $countryNames[] = 'Російська імперія'; $countryNames[] = 'Рос.імперія'; $countryNames[] = 'Рос.имперія'; + $countryNames[] = 'Російська імперія-УНР'; } if (in_array($part, $countryNames, true)) { $levels['country'] = $part; @@ -288,7 +304,6 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract } // Unspecified level; return - throw new Exception("Unknown " . $part); return $name; } @@ -298,7 +313,7 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract foreach (array_reverse($levels) as $level => $partname) { if (empty($partname)) continue; - if ($level === 'city') { + if ($level === 'city' || $level === 'village') { $strtr = []; foreach ($identifiersByLevel[$level] as $identifier) $strtr[$identifier] = ''; $partname = trim(strtr($partname, $strtr)); @@ -329,11 +344,9 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract */ private static function _clean_ukrainian_abbreviations(string $name):string { - /* - if (str_contains($name, " krt. ") && \preg_match("/\ krt\.\ [0-9]/", $name)) { - $name = str_replace(" krt. ", " körut ", $name); + if (str_contains($name, " р-н,") || str_contains($name, " р-н ") || str_ends_with($name, " р-н")) { + $name = str_replace(" р-н", " район", $name); } - */ if (str_contains($name, ',')) { $name = self::_rewrite_ukrainian_names_by_hierarchy($name); diff --git a/tests/NodaConsolidatedNamesForPlacesTest.php b/tests/NodaConsolidatedNamesForPlacesTest.php index 57e3293..e034f4d 100644 --- a/tests/NodaConsolidatedNamesForPlacesTest.php +++ b/tests/NodaConsolidatedNamesForPlacesTest.php @@ -88,7 +88,8 @@ final class NodaConsolidatedNamesForPlacesTest extends TestCase { // If both oblast and city are already known, the region will be a // district within the city. // Otherwise, it is to be assumed that it is a super-city region. - self::assertEquals("Приморський р-н (Запоріжжя, Запорізька обл., Україна)", NodaConsolidatedNamesForPlaces::consolidate_name("uk", "Україна, Запорізька обл., м. Запоріжжя, Приморський р-н")); + self::assertEquals("Приморський район (Запоріжжя, Запорізька обл., Україна)", NodaConsolidatedNamesForPlaces::consolidate_name("uk", "Україна, Запорізька обл., м. Запоріжжя, Приморський р-н")); + self::assertEquals("Кодимський район (Одеська обл., УССР)", NodaConsolidatedNamesForPlaces::consolidate_name("uk", "УССР, Одеська обл., Кодимський район")); } }