MDNodaHelpers/src/NodaConsolidatedNamesForPlaces.php

<?PHP
/**
 * Gathers functions for setting uniform place names.
 */
declare(strict_types = 1);

/**
 * Gathers functions for setting uniform place names.
 */
final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract {

    /**
     * Substrings of an place name listed as a key in this array will be replaced
     * by the corresponding value.
     */
    private const _NAME_SANITIZATIONS = [
        " - " => "-",
        "unbekannt" => "",
        "Unbekannt" => "",
        "unknown" => "",
        "Unknown" => "",
    ];

    /** Blacklist for comparison with country names */
    private const _COUNTRY_REWRITE_BLACKLISTED_TERMS = [
        'District',
        'Distrikt',
        'India',
        'Indien',
        'Insel',
        'Inseln',
        'Tal',
        'Yue',
    ];

    private const _PLACE_TYPE_INDICATORS_GERMAN = [
        'Insel',
        'Stadt',
    ];

    // Indicators signifying that a place is likely subordinate to the other
    // if two places are provided in a comma-separated list
    private const _PLACE_NARROWER_LOCATION_INDICATORS_GERMAN = [
        'gasse',
        'straße',
        ' Straße',
    ];

    // Indicators signifying that a place is likely subordinate to the other
    // if two places are provided in a comma-separated list
    private const _PLACE_NARROWER_LOCATION_INDICATORS_HUNGARIAN = [
        ' körut ',
        ' utca ',
        ' út ',
    ];

    private const _RELEVANT_ROMAN_NUMERALS = [
        'I' => '1',
        'II' => '2',
        'III' => '3',
        'IV' => '4',
        'V' => '5',
        'VI' => '6',
        'VII' => '7',
        'VIII' => '8',
        'IX' => '9',
        'X' => '10',
        'XI' => '11',
        'XII' => '12',
        'XIII' => '13',
        'XIV' => '14',
        'XV' => '15',
        'XVI' => '16',
        'XVII' => '17',
        'XVIII' => '18',
        'XIX' => '19',
        'XX' => '20',
    ];

    /**
     * @var array<string, list<string>>
     */
    private static $_placeNameListCaches = [];

    /**
     * Rewrites indicators for narrower locations paired with a superordinate location
     * into the format "Narrower (Broader)".
     * E.g.: "Adalbrechtstr. 12, Berlin" > Adalbrechtstraße 12 (Berlin).
     *
     * @param string $name      Name in which to rewrite.
     * @param string $indicator Indicator for narrower place. E.g. "straße".
     * @param string $separator Separating character between narrower and broader, e.g. ', '.
     *
     * @return string
     */
    private static function _rewrite_narrower_broader_pairs_to_brackets(string $name, string $indicator, $separator = ', '):string {

        if (str_contains($name, $indicator)
            && substr_count($name, $indicator) === 1
            && substr_count($name, $separator) === 1
            && !str_contains($name, "(")
        ) {

            $parts = explode(', ', $name);

            // Skip entries like "Vaci utca 12 Budapest, Vaci utca"
            $indicatorTrimmed = trim($indicator);
            if (
                (str_ends_with($parts[0], $indicatorTrimmed) && str_contains($parts[1], $indicatorTrimmed))
                || (str_ends_with($parts[1], $indicatorTrimmed) && str_contains($parts[0], $indicatorTrimmed))
            ) {
                return $name;
            }

            // Prevent errors in case of "Adalbrechtstraße 12, "
            if (!empty($parts[0]) && !empty($parts[1])) {

                if (str_contains($parts[0], $indicator)) { // Adalberthstraße 12, Berlin
                    $street = $parts[0];
                    $town = $parts[1];
                }
                else { // Berlin, Adalberthstraße 12
                    $street = $parts[1];
                    $town = $parts[0];
                }

                // Prevent rewrites in cases like "Deák Ferenc utca 16-18. Budapest, V."
                if (str_contains($town, '.')) {
                    return $name;
                }

                return $street . ' (' . $town . ')';

            }


        }


        return $name;

    }

    /**
     * Cleans and consolidates name parts appearing regularly in German place names.
     *
     * @param string $name Name of an actor.
     *
     * @return string
     */
    private static function _clean_german_abbreviations(string $name):string {

        // ABC, Inseln > ABC (Inseln)
        foreach (self::_PLACE_TYPE_INDICATORS_GERMAN as $indicator) {
            if (str_ends_with($name, ', ' . $indicator)) {
                $name = str_replace(', ' . $indicator, ' (' . $indicator . ')', $name);
            }
        }

        // Adalbrechtstr. 12 > Adalbrechtstraße 12
        if (str_contains($name, "str. ") && \preg_match("/[a-zA-Z]str. [0-9]/", $name)) {
            $name = str_replace("str. ", "straße ", $name);
        }

        // "Adalbrechtstraße. 12, Berlin" > Adalbrechtstraße 12 (Berlin)

        foreach (self::_PLACE_NARROWER_LOCATION_INDICATORS_GERMAN as $indicator) {
            $name = self::_rewrite_narrower_broader_pairs_to_brackets($name, $indicator, ', ');
        }

        return $name;

    }

    /**
     * Cleans and consolidates name parts appearing regularly in Hungarian place names.
     *
     * @param string $name Name of an actor.
     *
     * @return string
     */
    private static function _clean_hungarian_abbreviations(string $name):string {

        if (str_contains($name, " krt. ") && \preg_match("/\ krt\.\ [0-9]/", $name)) {
            $name = str_replace(" krt. ", " körut ", $name);
        }
        if (str_contains($name, " u. ") && \preg_match("/\ u\.\ [0-9]/", $name)) {
            $name = str_replace(" u. ", " utca ", $name);
        }
        if (str_contains($name, " ucca ") && \preg_match("/\ ucca\ [0-9]/", $name)) {
            $name = str_replace(" ucca ", " utca ", $name);
        }
        if (str_contains($name, " utcza ") && \preg_match("/\ utcza\ [0-9]/", $name)) {
            $name = str_replace(" utcza ", " utca ", $name);
        }
        if (str_contains($name, " rkp. ") && \preg_match("/\ rkp\.\ [0-9]/", $name)) {
            $name = str_replace(" rkp. ", " rakpart ", $name);
        }

        // "Adalbrecht utca. 12, Berlin" > Adalbrecht utca 12 (Berlin)

        foreach (self::_PLACE_NARROWER_LOCATION_INDICATORS_HUNGARIAN as $indicator) {
            $name = self::_rewrite_narrower_broader_pairs_to_brackets($name, $indicator, ', ');
        }

        if (str_contains($name, 'Budapest') && substr_count($name, 'Budapest') === 1) {
            foreach(self::_RELEVANT_ROMAN_NUMERALS as $roman_numeral => $arabic) {

                $to_match = ' Budapest, ' . $roman_numeral . '.';
                if (str_ends_with($name, $to_match)) {
                    $name = str_replace($to_match, ' (Budapest, ' . $arabic . '. kerület)', $name);
                }

            }
        }

        return $name;

    }

    /**
     * Loads a JSON file, optionally loading it cached through a private static variable
     * if reuse is expectable (= in the case of CLI usage).
     *
     * @param non-empty-string $filename File name to load.
     *
     * @return list<string>
     */
    private static function _loadJsonList(string $filename):array {

        if (PHP_SAPI === 'cli' && isset(self::$_placeNameListCaches[$filename])) {
            return self::$_placeNameListCaches[$filename];
        }

        try {
            $output = json_decode(MD_STD::file_get_contents($filename), true);
        }
        catch (MDFileDoesNotExist $e) {
            self::$_placeNameListCaches[$filename] = [];
            return [];
        }

        if ($output === false) {
            throw new Exception("Failed to get list");
        }

        if (PHP_SAPI === 'cli') {
            self::$_placeNameListCaches[$filename] = $output;
        }

        return $output;

    }

    /**
     * Moves names of regions to brackets using pre-generated lists of countries,
     * historical country names, etc.
     *
     * @param string $lang Instance language.
     * @param string $name Input string to clean.
     *
     * @return string
     */
    private static function _move_region_names_to_brackets(string $lang, string $name):string {

        $separators = ['-', ', '];

        foreach ($separators as $separator) {

            if (!str_contains($name, $separator) || substr_count($name, $separator) !== 1) continue;

            // Get parts and trim them
            $parts = explode($separator, $name);
            foreach ($parts as $key => $value) {
                $parts[$key] = trim($value);
            }

            // Load place names
            $countryNames = self::_loadJsonList(__DIR__ . "/../static/countries.$lang.json") + self::_loadJsonList(__DIR__ . "/../static/historical_countries.$lang.json");
            $cardinal_directions = self::_loadJsonList(__DIR__ . "/../static/cardinal_directions.json");

            $part0IsCountry = in_array($parts[0], $countryNames, true);
            $part1IsCountry = in_array($parts[1], $countryNames, true);

            // Skip if the full name is in the list of country names
            if (in_array($name, $countryNames, true)) {
                return $name;
            }

            // If one of the parts is a blacklisted term or a cardinal directions, skip this

            if (
                (in_array($parts[0], self::_COUNTRY_REWRITE_BLACKLISTED_TERMS, true)
                    || in_array($parts[0], $cardinal_directions, true)
                    || in_array(strtolower($parts[0]), $cardinal_directions, true)
                )
                || (in_array($parts[1], self::_COUNTRY_REWRITE_BLACKLISTED_TERMS, true)
                    || in_array($parts[1], $cardinal_directions, true)
                    || in_array(strtolower($parts[1]), $cardinal_directions, true)
                )
            ) {
                return $name;
            }

            if ($part0IsCountry === true && $part1IsCountry === false) {
                return $parts[1] . ' (' . $parts[0] . ')';
            }
            else if ($part0IsCountry === false && $part1IsCountry === true) {
                return $parts[0] . ' (' . $parts[1] . ')';
            }

        }

        return $name;

    }

    /**
     * Removes duplicates after commas.
     *
     * @param string $ort_name Place name to clean.
     *
     * @return string
     */
    private static function _remove_duplicates_after_commas(string $ort_name):string {

        if (str_contains($ort_name, ',') === false) {
            return $ort_name;
        }

        $parts = explode(', ', $ort_name);

        return implode(', ', array_unique($parts));

    }

    /**
     * Cleans a place name by trimming etc. Also removes uncertainty indicators.
     *
     * @param string $lang     Instance language.
     * @param string $ort_name Input string to clean.
     *
     * @return string
     */
    public static function consolidate_name(string $lang, string $ort_name):string {

        // Run basic replacements
        $nameSanitizations = self::_NAME_SANITIZATIONS;
        /*
        if (substr_count($ort_name, "/") === 1 && !str_contains($ort_name, '.')) {
            $nameSanitizations["/"] = "-";
        }
         */
        $ort_name = strtr(self::sanitizeInputString($ort_name), $nameSanitizations);
        $ort_name = self::sanitizeInputString(NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace($ort_name));

        // Remove duplicates after commas
        // Västerdås, Schweden, Schweden > Västerdås, Schweden
        $ort_name = self::_remove_duplicates_after_commas($ort_name);

        $ort_name = match ($lang) {
            'de' => self::_clean_german_abbreviations($ort_name),
            'hu' => self::_clean_hungarian_abbreviations($ort_name),
            default => $ort_name,
        };

        $ort_name = self::_move_region_names_to_brackets($lang, $ort_name);

        return $ort_name;

    }
}
Add classes for writing consolidating spellings of actor and place names 2023-11-25 22:42:07 +01:00			`<?PHP`
			`/**`
			`* Gathers functions for setting uniform place names.`
			`*/`
			`declare(strict_types = 1);`

			`/**`
			`* Gathers functions for setting uniform place names.`
			`*/`
			`final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract {`

			`/**`
			`* Substrings of an place name listed as a key in this array will be replaced`
			`* by the corresponding value.`
			`*/`
			`private const _NAME_SANITIZATIONS = [`
			`" - " => "-",`
			`"unbekannt" => "",`
			`"Unbekannt" => "",`
			`"unknown" => "",`
			`"Unknown" => "",`
			`];`

Add blacklist for unwanted rewrites in consolidating place names 2023-11-26 23:55:22 +01:00			`/** Blacklist for comparison with country names */`
			`private const _COUNTRY_REWRITE_BLACKLISTED_TERMS = [`
			`'District',`
			`'Distrikt',`
			`'India',`
			`'Indien',`
			`'Insel',`
			`'Inseln',`
			`'Tal',`
			`'Yue',`
			`];`

Add classes for writing consolidating spellings of actor and place names 2023-11-25 22:42:07 +01:00			`private const _PLACE_TYPE_INDICATORS_GERMAN = [`
			`'Insel',`
			`'Stadt',`
			`];`

			`// Indicators signifying that a place is likely subordinate to the other`
			`// if two places are provided in a comma-separated list`
			`private const _PLACE_NARROWER_LOCATION_INDICATORS_GERMAN = [`
			`'gasse',`
			`'straße',`
Add blacklist for unwanted rewrites in consolidating place names 2023-11-26 23:55:22 +01:00			`' Straße',`
Add classes for writing consolidating spellings of actor and place names 2023-11-25 22:42:07 +01:00			`];`

			`// Indicators signifying that a place is likely subordinate to the other`
			`// if two places are provided in a comma-separated list`
			`private const _PLACE_NARROWER_LOCATION_INDICATORS_HUNGARIAN = [`
Add blacklist for unwanted rewrites in consolidating place names 2023-11-26 23:55:22 +01:00			`' körut ',`
			`' utca ',`
			`' út ',`
Add classes for writing consolidating spellings of actor and place names 2023-11-25 22:42:07 +01:00			`];`

			`private const _RELEVANT_ROMAN_NUMERALS = [`
			`'I' => '1',`
			`'II' => '2',`
			`'III' => '3',`
			`'IV' => '4',`
			`'V' => '5',`
			`'VI' => '6',`
			`'VII' => '7',`
			`'VIII' => '8',`
			`'IX' => '9',`
			`'X' => '10',`
			`'XI' => '11',`
			`'XII' => '12',`
			`'XIII' => '13',`
			`'XIV' => '14',`
			`'XV' => '15',`
			`'XVI' => '16',`
			`'XVII' => '17',`
			`'XVIII' => '18',`
			`'XIX' => '19',`
			`'XX' => '20',`
			`];`

Add functions for automatic rewriting of country names to brackets at the end of place names based on lists 2023-11-26 00:54:14 +01:00			`/**`
			`* @var array<string, list<string>>`
			`*/`
			`private static $_placeNameListCaches = [];`

Add classes for writing consolidating spellings of actor and place names 2023-11-25 22:42:07 +01:00			`/**`
			`* Rewrites indicators for narrower locations paired with a superordinate location`
			`* into the format "Narrower (Broader)".`
			`* E.g.: "Adalbrechtstr. 12, Berlin" > Adalbrechtstraße 12 (Berlin).`
			`*`
			`* @param string $name Name in which to rewrite.`
			`* @param string $indicator Indicator for narrower place. E.g. "straße".`
			`* @param string $separator Separating character between narrower and broader, e.g. ', '.`
			`*`
			`* @return string`
			`*/`
			`private static function _rewrite_narrower_broader_pairs_to_brackets(string $name, string $indicator, $separator = ', '):string {`

			`if (str_contains($name, $indicator)`
			`&& substr_count($name, $indicator) === 1`
			`&& substr_count($name, $separator) === 1`
			`&& !str_contains($name, "(")`
			`) {`
Add blacklist for unwanted rewrites in consolidating place names 2023-11-26 23:55:22 +01:00
Add classes for writing consolidating spellings of actor and place names 2023-11-25 22:42:07 +01:00			`$parts = explode(', ', $name);`

Add blacklist for unwanted rewrites in consolidating place names 2023-11-26 23:55:22 +01:00			`// Skip entries like "Vaci utca 12 Budapest, Vaci utca"`
			`$indicatorTrimmed = trim($indicator);`
			`if (`
			`(str_ends_with($parts[0], $indicatorTrimmed) && str_contains($parts[1], $indicatorTrimmed))`
			`\|\| (str_ends_with($parts[1], $indicatorTrimmed) && str_contains($parts[0], $indicatorTrimmed))`
			`) {`
			`return $name;`
			`}`

Add classes for writing consolidating spellings of actor and place names 2023-11-25 22:42:07 +01:00			`// Prevent errors in case of "Adalbrechtstraße 12, "`
			`if (!empty($parts[0]) && !empty($parts[1])) {`

			`if (str_contains($parts[0], $indicator)) { // Adalberthstraße 12, Berlin`
			`$street = $parts[0];`
			`$town = $parts[1];`
			`}`
			`else { // Berlin, Adalberthstraße 12`
			`$street = $parts[1];`
			`$town = $parts[0];`
			`}`

			`// Prevent rewrites in cases like "Deák Ferenc utca 16-18. Budapest, V."`
			`if (str_contains($town, '.')) {`
			`return $name;`
			`}`

			`return $street . ' (' . $town . ')';`

			`}`


			`}`


			`return $name;`

			`}`

			`/**`
			`* Cleans and consolidates name parts appearing regularly in German place names.`
			`*`
			`* @param string $name Name of an actor.`
			`*`
			`* @return string`
			`*/`
			`private static function _clean_german_abbreviations(string $name):string {`

			`// ABC, Inseln > ABC (Inseln)`
			`foreach (self::_PLACE_TYPE_INDICATORS_GERMAN as $indicator) {`
			`if (str_ends_with($name, ', ' . $indicator)) {`
			`$name = str_replace(', ' . $indicator, ' (' . $indicator . ')', $name);`
			`}`
			`}`

			`// Adalbrechtstr. 12 > Adalbrechtstraße 12`
			`if (str_contains($name, "str. ") && \preg_match("/[a-zA-Z]str. [0-9]/", $name)) {`
			`$name = str_replace("str. ", "straße ", $name);`
			`}`

			`// "Adalbrechtstraße. 12, Berlin" > Adalbrechtstraße 12 (Berlin)`

			`foreach (self::_PLACE_NARROWER_LOCATION_INDICATORS_GERMAN as $indicator) {`
			`$name = self::_rewrite_narrower_broader_pairs_to_brackets($name, $indicator, ', ');`
			`}`

			`return $name;`

			`}`

			`/**`
			`* Cleans and consolidates name parts appearing regularly in Hungarian place names.`
			`*`
			`* @param string $name Name of an actor.`
			`*`
			`* @return string`
			`*/`
			`private static function _clean_hungarian_abbreviations(string $name):string {`

			`if (str_contains($name, " krt. ") && \preg_match("/\ krt\.\ [0-9]/", $name)) {`
			`$name = str_replace(" krt. ", " körut ", $name);`
			`}`
			`if (str_contains($name, " u. ") && \preg_match("/\ u\.\ [0-9]/", $name)) {`
			`$name = str_replace(" u. ", " utca ", $name);`
			`}`
Add blacklist for unwanted rewrites in consolidating place names 2023-11-26 23:55:22 +01:00			`if (str_contains($name, " ucca ") && \preg_match("/\ ucca\ [0-9]/", $name)) {`
			`$name = str_replace(" ucca ", " utca ", $name);`
			`}`
			`if (str_contains($name, " utcza ") && \preg_match("/\ utcza\ [0-9]/", $name)) {`
			`$name = str_replace(" utcza ", " utca ", $name);`
			`}`
			`if (str_contains($name, " rkp. ") && \preg_match("/\ rkp\.\ [0-9]/", $name)) {`
			`$name = str_replace(" rkp. ", " rakpart ", $name);`
			`}`
Add classes for writing consolidating spellings of actor and place names 2023-11-25 22:42:07 +01:00
			`// "Adalbrecht utca. 12, Berlin" > Adalbrecht utca 12 (Berlin)`

			`foreach (self::_PLACE_NARROWER_LOCATION_INDICATORS_HUNGARIAN as $indicator) {`
			`$name = self::_rewrite_narrower_broader_pairs_to_brackets($name, $indicator, ', ');`
			`}`

			`if (str_contains($name, 'Budapest') && substr_count($name, 'Budapest') === 1) {`
			`foreach(self::_RELEVANT_ROMAN_NUMERALS as $roman_numeral => $arabic) {`

			`$to_match = ' Budapest, ' . $roman_numeral . '.';`
			`if (str_ends_with($name, $to_match)) {`
			`$name = str_replace($to_match, ' (Budapest, ' . $arabic . '. kerület)', $name);`
			`}`

			`}`
			`}`

			`return $name;`

			`}`

Add functions for automatic rewriting of country names to brackets at the end of place names based on lists 2023-11-26 00:54:14 +01:00			`/**`
			`* Loads a JSON file, optionally loading it cached through a private static variable`
			`* if reuse is expectable (= in the case of CLI usage).`
			`*`
			`* @param non-empty-string $filename File name to load.`
			`*`
			`* @return list<string>`
			`*/`
			`private static function _loadJsonList(string $filename):array {`

			`if (PHP_SAPI === 'cli' && isset(self::$_placeNameListCaches[$filename])) {`
			`return self::$_placeNameListCaches[$filename];`
			`}`

Add blacklist for unwanted rewrites in consolidating place names 2023-11-26 23:55:22 +01:00			`try {`
			`$output = json_decode(MD_STD::file_get_contents($filename), true);`
			`}`
			`catch (MDFileDoesNotExist $e) {`
			`self::$_placeNameListCaches[$filename] = [];`
			`return [];`
			`}`

Add functions for automatic rewriting of country names to brackets at the end of place names based on lists 2023-11-26 00:54:14 +01:00			`if ($output === false) {`
			`throw new Exception("Failed to get list");`
			`}`

			`if (PHP_SAPI === 'cli') {`
			`self::$_placeNameListCaches[$filename] = $output;`
			`}`

			`return $output;`

			`}`

			`/**`
			`* Moves names of regions to brackets using pre-generated lists of countries,`
			`* historical country names, etc.`
			`*`
			`* @param string $lang Instance language.`
			`* @param string $name Input string to clean.`
			`*`
			`* @return string`
			`*/`
			`private static function _move_region_names_to_brackets(string $lang, string $name):string {`

			`$separators = ['-', ', '];`

			`foreach ($separators as $separator) {`

			`if (!str_contains($name, $separator) \|\| substr_count($name, $separator) !== 1) continue;`

			`// Get parts and trim them`
			`$parts = explode($separator, $name);`
			`foreach ($parts as $key => $value) {`
			`$parts[$key] = trim($value);`
			`}`

			`// Load place names`
			`$countryNames = self::_loadJsonList(__DIR__ . "/../static/countries.$lang.json") + self::_loadJsonList(__DIR__ . "/../static/historical_countries.$lang.json");`
Add blacklist for unwanted rewrites in consolidating place names 2023-11-26 23:55:22 +01:00			`$cardinal_directions = self::_loadJsonList(__DIR__ . "/../static/cardinal_directions.json");`
Add functions for automatic rewriting of country names to brackets at the end of place names based on lists 2023-11-26 00:54:14 +01:00
			`$part0IsCountry = in_array($parts[0], $countryNames, true);`
			`$part1IsCountry = in_array($parts[1], $countryNames, true);`

Add blacklist for unwanted rewrites in consolidating place names 2023-11-26 23:55:22 +01:00			`// Skip if the full name is in the list of country names`
			`if (in_array($name, $countryNames, true)) {`
			`return $name;`
			`}`

			`// If one of the parts is a blacklisted term or a cardinal directions, skip this`

			`if (`
			`(in_array($parts[0], self::_COUNTRY_REWRITE_BLACKLISTED_TERMS, true)`
			`\|\| in_array($parts[0], $cardinal_directions, true)`
			`\|\| in_array(strtolower($parts[0]), $cardinal_directions, true)`
			`)`
			`\|\| (in_array($parts[1], self::_COUNTRY_REWRITE_BLACKLISTED_TERMS, true)`
			`\|\| in_array($parts[1], $cardinal_directions, true)`
			`\|\| in_array(strtolower($parts[1]), $cardinal_directions, true)`
			`)`
			`) {`
			`return $name;`
			`}`

Add functions for automatic rewriting of country names to brackets at the end of place names based on lists 2023-11-26 00:54:14 +01:00			`if ($part0IsCountry === true && $part1IsCountry === false) {`
			`return $parts[1] . ' (' . $parts[0] . ')';`
			`}`
			`else if ($part0IsCountry === false && $part1IsCountry === true) {`
			`return $parts[0] . ' (' . $parts[1] . ')';`
			`}`

			`}`

			`return $name;`

			`}`

Add blacklist for unwanted rewrites in consolidating place names 2023-11-26 23:55:22 +01:00			`/**`
			`* Removes duplicates after commas.`
			`*`
			`* @param string $ort_name Place name to clean.`
			`*`
			`* @return string`
			`*/`
			`private static function _remove_duplicates_after_commas(string $ort_name):string {`

			`if (str_contains($ort_name, ',') === false) {`
			`return $ort_name;`
			`}`

			`$parts = explode(', ', $ort_name);`

			`return implode(', ', array_unique($parts));`

			`}`

Add classes for writing consolidating spellings of actor and place names 2023-11-25 22:42:07 +01:00			`/**`
			`* Cleans a place name by trimming etc. Also removes uncertainty indicators.`
			`*`
			`* @param string $lang Instance language.`
			`* @param string $ort_name Input string to clean.`
			`*`
			`* @return string`
			`*/`
			`public static function consolidate_name(string $lang, string $ort_name):string {`

			`// Run basic replacements`
			`$nameSanitizations = self::_NAME_SANITIZATIONS;`
Add blacklist for unwanted rewrites in consolidating place names 2023-11-26 23:55:22 +01:00			`/*`
			`if (substr_count($ort_name, "/") === 1 && !str_contains($ort_name, '.')) {`
			`$nameSanitizations["/"] = "-";`
			`}`
			`*/`
Add classes for writing consolidating spellings of actor and place names 2023-11-25 22:42:07 +01:00			`$ort_name = strtr(self::sanitizeInputString($ort_name), $nameSanitizations);`
			`$ort_name = self::sanitizeInputString(NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace($ort_name));`

Add blacklist for unwanted rewrites in consolidating place names 2023-11-26 23:55:22 +01:00			`// Remove duplicates after commas`
			`// Västerdås, Schweden, Schweden > Västerdås, Schweden`
			`$ort_name = self::_remove_duplicates_after_commas($ort_name);`

Add classes for writing consolidating spellings of actor and place names 2023-11-25 22:42:07 +01:00			`$ort_name = match ($lang) {`
			`'de' => self::_clean_german_abbreviations($ort_name),`
			`'hu' => self::_clean_hungarian_abbreviations($ort_name),`
			`default => $ort_name,`
			`};`

Add functions for automatic rewriting of country names to brackets at the end of place names based on lists 2023-11-26 00:54:14 +01:00			`$ort_name = self::_move_region_names_to_brackets($lang, $ort_name);`

Add classes for writing consolidating spellings of actor and place names 2023-11-25 22:42:07 +01:00			`return $ort_name;`

			`}`
			`}`