*/ declare(strict_types = 1); /** * Contains static functions for getting IDs for noda entries by various means. */ final class NodaUncertaintyHelper { const PERSINST_INDICATORS_DISALLOWED = [ "Unbekannt", "unbekannt", "n.n.", "N.N.", "Künstler, unbekannt", "ismeretlen", "Ismeretlen", "ismeretlen.", "Ismeretlen.", ]; const PERSINST_UNCERTAINTY_PREFIXES = [ "wohl ", "wahrscheinlich ", "Wohl ", "Wahrscheinlich ", ]; const PERSINST_UNCERTAINTY_SUFFIXES = [ "(?)", "?", ]; const TIME_INDICATORS_DISALLOWED = [ "o.D.", "O.D.", "o.J.", "O.J.", "Ohne Datum", "ohne Datum", "Unbekannt", "unbekannt", "ismeretlen", "Ismeretlen", ]; const TIME_UNCERTAINTY_PREFIXES = [ "um ", "wohl um ", "circa ", "ca. ", "ca " ]; const TIME_UNCERTAINTY_SUFFIXES = [ "(?)", "?", " körül" ]; /** * Substrings used to express uncertainty about the validity of a place name. */ const PLACE_INDICATORS_DISALLOWED = [ "Unbekannt", "unbekannt", "keine Angaben", "Keine Angaben", "ohne Angabe", "Ohne Angabe", "ismeretlen", "Ismeretlen", ]; const PLACE_UNCERTAINTY_PREFIXES = [ "vlt. ", "circa ", "ca. ", "ca ", ]; const PLACE_UNCERTAINTY_SUFFIXES = [ "(?)", "?", ]; /** * Trims common characters and charater marks. * * @param string $input Input text. * * @return string */ public static function trim(string $input):string { $input = \trim($input, ", \t\n\r\n;-:"); return $input; } /** * Removes uncertainty indicators from an time name. * * @param string $name Input string. * * @return string */ public static function cleanUncertaintyIndicatorsTime(string $name):string { $name = self::trim($name); if (\in_array($name, self::TIME_INDICATORS_DISALLOWED, true)) { return ""; } // Remove uncertainty prefixes foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_PREFIXES as $prefix) { if (\substr($name, 0, \strlen($prefix)) === "$prefix") { $name = substr($name, \strlen($prefix)); } } // Remove uncertainty sufixes foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_SUFFIXES as $suffix) { if (\substr($name, \strlen($suffix) * -1) === "$suffix") { $name = \substr($name, 0, \strlen($suffix) * -1); } } return self::trim($name); } /** * Attempts guessing whether time is uncertain. * * @param string $zeit_name Time name. * * @return boolean */ public static function guessTimeCertainty(string $zeit_name):bool { $zeit_name = \strtolower($zeit_name); // Attempt to guess uncertainty based on prefixes. foreach (self::TIME_UNCERTAINTY_PREFIXES as $prefix) { if (\substr($zeit_name, 0, \strlen($prefix)) === $prefix) { return false; // Uncertainty found } } // Attempt to guess uncertainty based on prefixes. foreach (self::TIME_UNCERTAINTY_SUFFIXES as $prefix) { if (\substr($zeit_name, -1 * \strlen($prefix)) === $prefix) { return false; // Uncertainty found } } return true; // No uncertainty found } /** * Removes uncertainty indicators from an place name. * * @param string $ort_name Input string. * * @return string */ public static function cleanUncertaintyIndicatorsPlace(string $ort_name):string { $ort_name = self::trim($ort_name); if (\in_array($ort_name, self::PLACE_INDICATORS_DISALLOWED, true)) { return ""; } // Remove uncertainty prefixes foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) { if (\substr($ort_name, 0, \strlen($prefix)) === "$prefix") { $ort_name = substr($ort_name, \strlen($prefix)); } } // Remove uncertainty sufixes foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_SUFFIXES as $suffix) { if (\substr($ort_name, \strlen($suffix) * -1) === "$suffix") { $ort_name = \substr($ort_name, 0, \strlen($suffix) * -1); } } return self::trim($ort_name); } /** * Attempts guessing whether place is uncertain. * * @param string $ort_name Place name. * * @return boolean */ public static function guessPlaceCertainty(string $ort_name):bool { $ort_name = \strtolower($ort_name); // Attempt to guess uncertainty based on prefixes. foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) { if (\substr($ort_name, 0, \strlen($prefix)) === $prefix) { return false; // Uncertain } } // Attempt to guess uncertainty based on prefixes. foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_SUFFIXES as $prefix) { if (\substr($ort_name, -1 * \strlen($prefix)) === $prefix) { return false; // Uncertain } } return true; // Certain / no uncertainty found } /** * Removes uncertainty indicators from an actor name. * * @param string $value Input string. * * @return string */ public static function cleanUncertaintyIndicatorsPersinst(string $value):string { $value = self::trim($value); if (\in_array($value, self::PERSINST_INDICATORS_DISALLOWED, true)) { return ""; } foreach (self::PERSINST_UNCERTAINTY_PREFIXES as $toRemove) { if (\mb_substr($value, 0, \mb_strlen($toRemove)) === $toRemove) { $value = substr($value, \mb_strlen($toRemove)); } } foreach (self::PLACE_UNCERTAINTY_SUFFIXES as $suffix) { if (\mb_substr($value, \mb_strlen($suffix) * -1) === "$suffix") { $value = \mb_substr($value, 0, \mb_strlen($suffix) * -1); } } return self::trim($value); } /** * Attempts guessing whether persinst is uncertain. * * @param string $name Persinst name. * * @return boolean */ public static function guessPersinstCertainty(string $name):bool { $name = \trim(\strtolower($name)); // Attempt to guess uncertainty based on prefixes. foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) { if (\substr($name, 0, \strlen($prefix)) === $prefix) { return false; // Uncertain } } // Attempt to guess uncertainty based on prefixes. foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_SUFFIXES as $prefix) { if (\substr($name, -1 * \strlen($prefix)) === $prefix) { return false; // Uncertain } } return true; // Certain / no uncertainty found } }