MDNodaHelpers/src/NodaConsolidatedNamesForPersinst.php

124 lines
3.9 KiB
PHP

<?PHP
/**
* Gathers functions for setting uniform actor names.
*/
declare(strict_types = 1);
/**
* Gathers functions for setting uniform actor names.
*/
final class NodaConsolidatedNamesForPersinst extends NodaConsolidatedNamesAbstract {
/**
* Substrings of an actor name listed as a key in this array will be replaced
* by the corresponding value.
*/
private const _NAME_SANITIZATIONS = [
"mythologische Figur" => "Mythologie",
"Mythologische Figur" => "Mythologie",
"Mythologische Gestalt" => "Mythologie",
"()" => "",
];
/**
* Replaces last characters of a string if $from matches the end of the string,
*
* @param string $from Replace from.
* @param string $to Replace to.
* @param string $name Input name.
*
* @return string
*/
private static function _replaceFromEnd(string $from, string $to, string $name):string {
$length = mb_strlen($from);
if (str_ends_with($name, $from) === true && substr($name, -1 * $length - 1, 1) !== '.') {
$name = str_replace(" ", " ", substr($name, 0, -1 * $length) . $to);
}
return $name;
}
/**
* Cleans and consolidates name parts appearing regularly in German names
* that have a default writing in md.
*
* @param string $name Name of an actor.
*
* @return string
*/
private static function _clean_german_abbreviations(string $name):string {
$name = self::_replaceFromEnd(" d.Ä.", " (der Ältere)", $name);
$name = self::_replaceFromEnd(" d. Ä.", " (der Ältere)", $name);
$name = self::_replaceFromEnd(" (d.Ä.)", " (der Ältere)", $name);
$name = self::_replaceFromEnd(" (d. Ä.)", " (der Ältere)", $name);
$name = self::_replaceFromEnd(" d.J.", " (der Jüngere)", $name);
$name = self::_replaceFromEnd(" d. J.", " (der Jüngere)", $name);
$name = self::_replaceFromEnd(" (d.J.)", " (der Jüngere)", $name);
$name = self::_replaceFromEnd(" (d. J.)", " (der Jüngere)", $name);
return $name;
}
/**
* Tries to make sense of life dates in brackets at the end of an actor's name.
*
* @param string $name Input name.
*
* @return array{name: string, birth: string, death: string}|array{}
*/
public static function parse_life_dates_from_name(string $name):array {
if (str_contains($name, "(") === false || str_ends_with($name, ")") === false) return [];
$parts = explode("(", $name);
if (count($parts) !== 2) return [];
$nameOnly = trim($parts[0]);
$dateString = rtrim($parts[1], ')'); //
if (!empty($dates = NodaTimeSplitter::is_timespan($dateString))
&& $dates->start_year !== '?'
&& $dates->end_year !== '?'
&& intval($dates->end_year) - intval($dates->start_year) < 150
) {
return [
'name' => $nameOnly,
'birth' => $dates->start_year,
'death' => $dates->end_year,
];
}
return [];
}
/**
* Cleans a persinst name by trimming etc. Also removes uncertainty indicators.
*
* @param string $lang Instance language.
* @param string $persinst_name Input string to clean.
*
* @return string
*/
public static function consolidate_name(string $lang, string $persinst_name):string {
// Run basic replacements
$name = \strtr(self::sanitizeInputString($persinst_name),
self::_NAME_SANITIZATIONS);
$name = NodaUncertaintyHelper::cleanUncertaintyIndicatorsPersinst($name);
if (mb_strlen($name) > 10 && $lang === 'de') {
$name = self::_clean_german_abbreviations($name);
}
// If the persinst name is empty, unset persinst ID
return \trim($name, " ;.\t" . PHP_EOL);
}
}