2023-11-25 22:42:07 +01:00
|
|
|
<?PHP
|
|
|
|
/**
|
|
|
|
* Gathers functions for setting uniform actor names.
|
|
|
|
*/
|
|
|
|
declare(strict_types = 1);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gathers functions for setting uniform actor names.
|
|
|
|
*/
|
|
|
|
final class NodaConsolidatedNamesForPersinst extends NodaConsolidatedNamesAbstract {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Substrings of an actor name listed as a key in this array will be replaced
|
|
|
|
* by the corresponding value.
|
|
|
|
*/
|
|
|
|
private const _NAME_SANITIZATIONS = [
|
|
|
|
"mythologische Figur" => "Mythologie",
|
|
|
|
"Mythologische Figur" => "Mythologie",
|
|
|
|
"Mythologische Gestalt" => "Mythologie",
|
|
|
|
"()" => "",
|
|
|
|
];
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Replaces last characters of a string if $from matches the end of the string,
|
|
|
|
*
|
|
|
|
* @param string $from Replace from.
|
|
|
|
* @param string $to Replace to.
|
|
|
|
* @param string $name Input name.
|
|
|
|
*
|
|
|
|
* @return string
|
|
|
|
*/
|
|
|
|
private static function _replaceFromEnd(string $from, string $to, string $name):string {
|
|
|
|
|
|
|
|
$length = mb_strlen($from);
|
|
|
|
if (str_ends_with($name, $from) === true && substr($name, -1 * $length - 1, 1) !== '.') {
|
|
|
|
$name = str_replace(" ", " ", substr($name, 0, -1 * $length) . $to);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $name;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Cleans and consolidates name parts appearing regularly in German names
|
|
|
|
* that have a default writing in md.
|
|
|
|
*
|
|
|
|
* @param string $name Name of an actor.
|
|
|
|
*
|
|
|
|
* @return string
|
|
|
|
*/
|
|
|
|
private static function _clean_german_abbreviations(string $name):string {
|
|
|
|
|
|
|
|
$name = self::_replaceFromEnd(" d.Ä.", " (der Ältere)", $name);
|
|
|
|
$name = self::_replaceFromEnd(" d. Ä.", " (der Ältere)", $name);
|
|
|
|
$name = self::_replaceFromEnd(" (d.Ä.)", " (der Ältere)", $name);
|
|
|
|
$name = self::_replaceFromEnd(" (d. Ä.)", " (der Ältere)", $name);
|
|
|
|
|
|
|
|
$name = self::_replaceFromEnd(" d.J.", " (der Jüngere)", $name);
|
|
|
|
$name = self::_replaceFromEnd(" d. J.", " (der Jüngere)", $name);
|
|
|
|
$name = self::_replaceFromEnd(" (d.J.)", " (der Jüngere)", $name);
|
|
|
|
$name = self::_replaceFromEnd(" (d. J.)", " (der Jüngere)", $name);
|
|
|
|
|
|
|
|
return $name;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Tries to make sense of life dates in brackets at the end of an actor's name.
|
|
|
|
*
|
|
|
|
* @param string $name Input name.
|
|
|
|
*
|
|
|
|
* @return array{name: string, birth: string, death: string}|array{}
|
|
|
|
*/
|
|
|
|
public static function parse_life_dates_from_name(string $name):array {
|
|
|
|
|
|
|
|
if (str_contains($name, "(") === false || str_ends_with($name, ")") === false) return [];
|
|
|
|
|
|
|
|
$parts = explode("(", $name);
|
|
|
|
if (count($parts) !== 2) return [];
|
|
|
|
|
|
|
|
$nameOnly = trim($parts[0]);
|
|
|
|
$dateString = rtrim($parts[1], ')'); //
|
|
|
|
|
|
|
|
if (!empty($dates = NodaTimeSplitter::is_timespan($dateString))
|
2024-04-16 23:21:34 +02:00
|
|
|
&& $dates->start_year !== '?'
|
|
|
|
&& $dates->end_year !== '?'
|
|
|
|
&& intval($dates->end_year) - intval($dates->start_year) < 150
|
2023-11-25 22:42:07 +01:00
|
|
|
) {
|
|
|
|
return [
|
|
|
|
'name' => $nameOnly,
|
2024-04-16 23:21:34 +02:00
|
|
|
'birth' => $dates->start_year,
|
|
|
|
'death' => $dates->end_year,
|
2023-11-25 22:42:07 +01:00
|
|
|
];
|
|
|
|
}
|
|
|
|
|
|
|
|
return [];
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Cleans a persinst name by trimming etc. Also removes uncertainty indicators.
|
|
|
|
*
|
|
|
|
* @param string $lang Instance language.
|
|
|
|
* @param string $persinst_name Input string to clean.
|
|
|
|
*
|
|
|
|
* @return string
|
|
|
|
*/
|
|
|
|
public static function consolidate_name(string $lang, string $persinst_name):string {
|
|
|
|
|
|
|
|
// Run basic replacements
|
|
|
|
$name = \strtr(self::sanitizeInputString($persinst_name),
|
|
|
|
self::_NAME_SANITIZATIONS);
|
|
|
|
$name = NodaUncertaintyHelper::cleanUncertaintyIndicatorsPersinst($name);
|
|
|
|
|
|
|
|
if (mb_strlen($name) > 10 && $lang === 'de') {
|
|
|
|
$name = self::_clean_german_abbreviations($name);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the persinst name is empty, unset persinst ID
|
|
|
|
return \trim($name, " ;.\t" . PHP_EOL);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|