"-", "unbekannt" => "", "Unbekannt" => "", "unknown" => "", "Unknown" => "", ]; private const _PLACE_TYPE_INDICATORS_GERMAN = [ 'Insel', 'Stadt', ]; // Indicators signifying that a place is likely subordinate to the other // if two places are provided in a comma-separated list private const _PLACE_NARROWER_LOCATION_INDICATORS_GERMAN = [ 'gasse', 'straße', ]; // Indicators signifying that a place is likely subordinate to the other // if two places are provided in a comma-separated list private const _PLACE_NARROWER_LOCATION_INDICATORS_HUNGARIAN = [ 'körut', 'utca', 'út', ]; private const _RELEVANT_ROMAN_NUMERALS = [ 'I' => '1', 'II' => '2', 'III' => '3', 'IV' => '4', 'V' => '5', 'VI' => '6', 'VII' => '7', 'VIII' => '8', 'IX' => '9', 'X' => '10', 'XI' => '11', 'XII' => '12', 'XIII' => '13', 'XIV' => '14', 'XV' => '15', 'XVI' => '16', 'XVII' => '17', 'XVIII' => '18', 'XIX' => '19', 'XX' => '20', ]; /** * Rewrites indicators for narrower locations paired with a superordinate location * into the format "Narrower (Broader)". * E.g.: "Adalbrechtstr. 12, Berlin" > Adalbrechtstraße 12 (Berlin). * * @param string $name Name in which to rewrite. * @param string $indicator Indicator for narrower place. E.g. "straße". * @param string $separator Separating character between narrower and broader, e.g. ', '. * * @return string */ private static function _rewrite_narrower_broader_pairs_to_brackets(string $name, string $indicator, $separator = ', '):string { if (str_contains($name, $indicator) && substr_count($name, $indicator) === 1 && substr_count($name, $separator) === 1 && !str_contains($name, "(") ) { $parts = explode(', ', $name); // Prevent errors in case of "Adalbrechtstraße 12, " if (!empty($parts[0]) && !empty($parts[1])) { if (str_contains($parts[0], $indicator)) { // Adalberthstraße 12, Berlin $street = $parts[0]; $town = $parts[1]; } else { // Berlin, Adalberthstraße 12 $street = $parts[1]; $town = $parts[0]; } // Prevent rewrites in cases like "Deák Ferenc utca 16-18. Budapest, V." if (str_contains($town, '.')) { return $name; } return $street . ' (' . $town . ')'; } } return $name; } /** * Cleans and consolidates name parts appearing regularly in German place names. * * @param string $name Name of an actor. * * @return string */ private static function _clean_german_abbreviations(string $name):string { // ABC, Inseln > ABC (Inseln) foreach (self::_PLACE_TYPE_INDICATORS_GERMAN as $indicator) { if (str_ends_with($name, ', ' . $indicator)) { $name = str_replace(', ' . $indicator, ' (' . $indicator . ')', $name); } } // Adalbrechtstr. 12 > Adalbrechtstraße 12 if (str_contains($name, "str. ") && \preg_match("/[a-zA-Z]str. [0-9]/", $name)) { $name = str_replace("str. ", "straße ", $name); } // "Adalbrechtstraße. 12, Berlin" > Adalbrechtstraße 12 (Berlin) foreach (self::_PLACE_NARROWER_LOCATION_INDICATORS_GERMAN as $indicator) { $name = self::_rewrite_narrower_broader_pairs_to_brackets($name, $indicator, ', '); } return $name; } /** * Cleans and consolidates name parts appearing regularly in Hungarian place names. * * @param string $name Name of an actor. * * @return string */ private static function _clean_hungarian_abbreviations(string $name):string { if (str_contains($name, " krt. ") && \preg_match("/\ krt\.\ [0-9]/", $name)) { $name = str_replace(" krt. ", " körut ", $name); } if (str_contains($name, " u. ") && \preg_match("/\ u\.\ [0-9]/", $name)) { $name = str_replace(" u. ", " utca ", $name); } // "Adalbrecht utca. 12, Berlin" > Adalbrecht utca 12 (Berlin) foreach (self::_PLACE_NARROWER_LOCATION_INDICATORS_HUNGARIAN as $indicator) { $name = self::_rewrite_narrower_broader_pairs_to_brackets($name, $indicator, ', '); } if (str_contains($name, 'Budapest') && substr_count($name, 'Budapest') === 1) { foreach(self::_RELEVANT_ROMAN_NUMERALS as $roman_numeral => $arabic) { $to_match = ' Budapest, ' . $roman_numeral . '.'; if (str_ends_with($name, $to_match)) { $name = str_replace($to_match, ' (Budapest, ' . $arabic . '. kerület)', $name); } } } return $name; } /** * Cleans a place name by trimming etc. Also removes uncertainty indicators. * * @param string $lang Instance language. * @param string $ort_name Input string to clean. * * @return string */ public static function consolidate_name(string $lang, string $ort_name):string { // Run basic replacements $nameSanitizations = self::_NAME_SANITIZATIONS; if (substr_count($ort_name, "/") === 1) $nameSanitizations["/"] = "-"; $ort_name = strtr(self::sanitizeInputString($ort_name), $nameSanitizations); $ort_name = self::sanitizeInputString(NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace($ort_name)); $ort_name = match ($lang) { 'de' => self::_clean_german_abbreviations($ort_name), 'hu' => self::_clean_hungarian_abbreviations($ort_name), default => $ort_name, }; return $ort_name; } }