Cover more levels of Ukrainian (current and historical) place
hierarchies
This commit is contained in:
parent
2badc67405
commit
55931ba3ef
@ -230,9 +230,12 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||
|
||||
$identifiersByLevel = [
|
||||
'state' => [' РСР', 'РСР ', ' губернія', 'губернія '],
|
||||
'oblast' => ['обл.'],
|
||||
'region' => ['р-н'],
|
||||
'oblast' => ['обл.', 'округа', 'губернії'],
|
||||
'region' => ['р-н', 'район'],
|
||||
'county' => ['повіт'],
|
||||
'city' => ['м.'],
|
||||
'parish' => ['волость'],
|
||||
'village' => ['смт', 'сільська', 'с. '],
|
||||
'district' => [], // Is also р-н; which it is is determined based on position
|
||||
'street' => ['вул. '],
|
||||
];
|
||||
@ -242,7 +245,10 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||
'state' => '',
|
||||
'oblast' => '',
|
||||
'region' => '',
|
||||
'county' => '',
|
||||
'city' => '',
|
||||
'parish' => '',
|
||||
'village' => '',
|
||||
'district' => '',
|
||||
'street' => '',
|
||||
];
|
||||
@ -259,12 +265,15 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||
// If both oblast and city are already known, the region will be a
|
||||
// district within the city.
|
||||
// Otherwise, it is to be assumed that it is a super-city region.
|
||||
if ($level === 'region' && !empty($levels['oblast']) && !empty($levels['city'])) {
|
||||
if ($level === 'region' && !empty($levels['oblast'])
|
||||
&& (!empty($levels['city']) || !empty($levels['village']))
|
||||
) {
|
||||
$level = 'district';
|
||||
}
|
||||
|
||||
if (!empty($levels[$level])) {
|
||||
throw new Exception("Used the same level (" . $level . ") twice");
|
||||
# throw new Exception("Used the same level (" . $level . ") twice");
|
||||
return $name;
|
||||
}
|
||||
$levels[$level] = $part;
|
||||
continue 3;
|
||||
@ -273,6 +282,12 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||
}
|
||||
}
|
||||
|
||||
// Special case: Abbreviated SSRs
|
||||
if (in_array($part, ['УРСР', 'УССР'], true)) {
|
||||
$levels['state'] = $part;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Unspecified part level: Attempt identifying country
|
||||
if (!isset($countryNames)) {
|
||||
$countryNames = self::_loadJsonList(__DIR__ . "/../static/countries.uk.json") + self::_loadJsonList(__DIR__ . "/../static/historical_countries.uk.json");
|
||||
@ -281,6 +296,7 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||
$countryNames[] = 'Російська імперія';
|
||||
$countryNames[] = 'Рос.імперія';
|
||||
$countryNames[] = 'Рос.имперія';
|
||||
$countryNames[] = 'Російська імперія-УНР';
|
||||
}
|
||||
if (in_array($part, $countryNames, true)) {
|
||||
$levels['country'] = $part;
|
||||
@ -288,7 +304,6 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||
}
|
||||
|
||||
// Unspecified level; return
|
||||
throw new Exception("Unknown " . $part);
|
||||
return $name;
|
||||
}
|
||||
|
||||
@ -298,7 +313,7 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||
foreach (array_reverse($levels) as $level => $partname) {
|
||||
if (empty($partname)) continue;
|
||||
|
||||
if ($level === 'city') {
|
||||
if ($level === 'city' || $level === 'village') {
|
||||
$strtr = [];
|
||||
foreach ($identifiersByLevel[$level] as $identifier) $strtr[$identifier] = '';
|
||||
$partname = trim(strtr($partname, $strtr));
|
||||
@ -329,11 +344,9 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||
*/
|
||||
private static function _clean_ukrainian_abbreviations(string $name):string {
|
||||
|
||||
/*
|
||||
if (str_contains($name, " krt. ") && \preg_match("/\ krt\.\ [0-9]/", $name)) {
|
||||
$name = str_replace(" krt. ", " körut ", $name);
|
||||
if (str_contains($name, " р-н,") || str_contains($name, " р-н ") || str_ends_with($name, " р-н")) {
|
||||
$name = str_replace(" р-н", " район", $name);
|
||||
}
|
||||
*/
|
||||
|
||||
if (str_contains($name, ',')) {
|
||||
$name = self::_rewrite_ukrainian_names_by_hierarchy($name);
|
||||
|
@ -88,7 +88,8 @@ final class NodaConsolidatedNamesForPlacesTest extends TestCase {
|
||||
// If both oblast and city are already known, the region will be a
|
||||
// district within the city.
|
||||
// Otherwise, it is to be assumed that it is a super-city region.
|
||||
self::assertEquals("Приморський р-н (Запоріжжя, Запорізька обл., Україна)", NodaConsolidatedNamesForPlaces::consolidate_name("uk", "Україна, Запорізька обл., м. Запоріжжя, Приморський р-н"));
|
||||
self::assertEquals("Приморський район (Запоріжжя, Запорізька обл., Україна)", NodaConsolidatedNamesForPlaces::consolidate_name("uk", "Україна, Запорізька обл., м. Запоріжжя, Приморський р-н"));
|
||||
self::assertEquals("Кодимський район (Одеська обл., УССР)", NodaConsolidatedNamesForPlaces::consolidate_name("uk", "УССР, Одеська обл., Кодимський район"));
|
||||
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user