Cover more levels of Ukrainian (current and historical) place
hierarchies
This commit is contained in:
parent
2badc67405
commit
55931ba3ef
|
@ -230,9 +230,12 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||||
|
|
||||||
$identifiersByLevel = [
|
$identifiersByLevel = [
|
||||||
'state' => [' РСР', 'РСР ', ' губернія', 'губернія '],
|
'state' => [' РСР', 'РСР ', ' губернія', 'губернія '],
|
||||||
'oblast' => ['обл.'],
|
'oblast' => ['обл.', 'округа', 'губернії'],
|
||||||
'region' => ['р-н'],
|
'region' => ['р-н', 'район'],
|
||||||
|
'county' => ['повіт'],
|
||||||
'city' => ['м.'],
|
'city' => ['м.'],
|
||||||
|
'parish' => ['волость'],
|
||||||
|
'village' => ['смт', 'сільська', 'с. '],
|
||||||
'district' => [], // Is also р-н; which it is is determined based on position
|
'district' => [], // Is also р-н; which it is is determined based on position
|
||||||
'street' => ['вул. '],
|
'street' => ['вул. '],
|
||||||
];
|
];
|
||||||
|
@ -242,7 +245,10 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||||
'state' => '',
|
'state' => '',
|
||||||
'oblast' => '',
|
'oblast' => '',
|
||||||
'region' => '',
|
'region' => '',
|
||||||
|
'county' => '',
|
||||||
'city' => '',
|
'city' => '',
|
||||||
|
'parish' => '',
|
||||||
|
'village' => '',
|
||||||
'district' => '',
|
'district' => '',
|
||||||
'street' => '',
|
'street' => '',
|
||||||
];
|
];
|
||||||
|
@ -259,12 +265,15 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||||
// If both oblast and city are already known, the region will be a
|
// If both oblast and city are already known, the region will be a
|
||||||
// district within the city.
|
// district within the city.
|
||||||
// Otherwise, it is to be assumed that it is a super-city region.
|
// Otherwise, it is to be assumed that it is a super-city region.
|
||||||
if ($level === 'region' && !empty($levels['oblast']) && !empty($levels['city'])) {
|
if ($level === 'region' && !empty($levels['oblast'])
|
||||||
|
&& (!empty($levels['city']) || !empty($levels['village']))
|
||||||
|
) {
|
||||||
$level = 'district';
|
$level = 'district';
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!empty($levels[$level])) {
|
if (!empty($levels[$level])) {
|
||||||
throw new Exception("Used the same level (" . $level . ") twice");
|
# throw new Exception("Used the same level (" . $level . ") twice");
|
||||||
|
return $name;
|
||||||
}
|
}
|
||||||
$levels[$level] = $part;
|
$levels[$level] = $part;
|
||||||
continue 3;
|
continue 3;
|
||||||
|
@ -273,6 +282,12 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Special case: Abbreviated SSRs
|
||||||
|
if (in_array($part, ['УРСР', 'УССР'], true)) {
|
||||||
|
$levels['state'] = $part;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Unspecified part level: Attempt identifying country
|
// Unspecified part level: Attempt identifying country
|
||||||
if (!isset($countryNames)) {
|
if (!isset($countryNames)) {
|
||||||
$countryNames = self::_loadJsonList(__DIR__ . "/../static/countries.uk.json") + self::_loadJsonList(__DIR__ . "/../static/historical_countries.uk.json");
|
$countryNames = self::_loadJsonList(__DIR__ . "/../static/countries.uk.json") + self::_loadJsonList(__DIR__ . "/../static/historical_countries.uk.json");
|
||||||
|
@ -281,6 +296,7 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||||
$countryNames[] = 'Російська імперія';
|
$countryNames[] = 'Російська імперія';
|
||||||
$countryNames[] = 'Рос.імперія';
|
$countryNames[] = 'Рос.імперія';
|
||||||
$countryNames[] = 'Рос.имперія';
|
$countryNames[] = 'Рос.имперія';
|
||||||
|
$countryNames[] = 'Російська імперія-УНР';
|
||||||
}
|
}
|
||||||
if (in_array($part, $countryNames, true)) {
|
if (in_array($part, $countryNames, true)) {
|
||||||
$levels['country'] = $part;
|
$levels['country'] = $part;
|
||||||
|
@ -288,7 +304,6 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unspecified level; return
|
// Unspecified level; return
|
||||||
throw new Exception("Unknown " . $part);
|
|
||||||
return $name;
|
return $name;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -298,7 +313,7 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||||
foreach (array_reverse($levels) as $level => $partname) {
|
foreach (array_reverse($levels) as $level => $partname) {
|
||||||
if (empty($partname)) continue;
|
if (empty($partname)) continue;
|
||||||
|
|
||||||
if ($level === 'city') {
|
if ($level === 'city' || $level === 'village') {
|
||||||
$strtr = [];
|
$strtr = [];
|
||||||
foreach ($identifiersByLevel[$level] as $identifier) $strtr[$identifier] = '';
|
foreach ($identifiersByLevel[$level] as $identifier) $strtr[$identifier] = '';
|
||||||
$partname = trim(strtr($partname, $strtr));
|
$partname = trim(strtr($partname, $strtr));
|
||||||
|
@ -329,11 +344,9 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||||
*/
|
*/
|
||||||
private static function _clean_ukrainian_abbreviations(string $name):string {
|
private static function _clean_ukrainian_abbreviations(string $name):string {
|
||||||
|
|
||||||
/*
|
if (str_contains($name, " р-н,") || str_contains($name, " р-н ") || str_ends_with($name, " р-н")) {
|
||||||
if (str_contains($name, " krt. ") && \preg_match("/\ krt\.\ [0-9]/", $name)) {
|
$name = str_replace(" р-н", " район", $name);
|
||||||
$name = str_replace(" krt. ", " körut ", $name);
|
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
if (str_contains($name, ',')) {
|
if (str_contains($name, ',')) {
|
||||||
$name = self::_rewrite_ukrainian_names_by_hierarchy($name);
|
$name = self::_rewrite_ukrainian_names_by_hierarchy($name);
|
||||||
|
|
|
@ -88,7 +88,8 @@ final class NodaConsolidatedNamesForPlacesTest extends TestCase {
|
||||||
// If both oblast and city are already known, the region will be a
|
// If both oblast and city are already known, the region will be a
|
||||||
// district within the city.
|
// district within the city.
|
||||||
// Otherwise, it is to be assumed that it is a super-city region.
|
// Otherwise, it is to be assumed that it is a super-city region.
|
||||||
self::assertEquals("Приморський р-н (Запоріжжя, Запорізька обл., Україна)", NodaConsolidatedNamesForPlaces::consolidate_name("uk", "Україна, Запорізька обл., м. Запоріжжя, Приморський р-н"));
|
self::assertEquals("Приморський район (Запоріжжя, Запорізька обл., Україна)", NodaConsolidatedNamesForPlaces::consolidate_name("uk", "Україна, Запорізька обл., м. Запоріжжя, Приморський р-н"));
|
||||||
|
self::assertEquals("Кодимський район (Одеська обл., УССР)", NodaConsolidatedNamesForPlaces::consolidate_name("uk", "УССР, Одеська обл., Кодимський район"));
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user