*/ declare(strict_types = 1); /** * Contains static functions for getting IDs for noda entries by various means. */ final class NodaUncertaintyHelper { const PERSINST_INDICATORS_DISALLOWED = [ "Unbekannt", "unbekannt", ]; const PERSINST_UNCERTAINTY_PREFIXES = [ "wohl ", "wahrscheinlich ", "Wohl ", "Wahrscheinlich ", ]; const PERSINST_UNCERTAINTY_SUFFIXES = [ "(?)", "?", ]; const TIME_INDICATORS_DISALLOWED = [ "o.D.", "Unbekannt", "unbekannt", ]; const TIME_UNCERTAINTY_PREFIXES = [ "um ", "wohl um ", "circa ", "ca. ", "ca " ]; const TIME_UNCERTAINTY_SUFFIXES = [ "(?)", "?", " körül" ]; /** * Substrings used to express uncertainty about the validity of a place name. */ const PLACE_INDICATORS_DISALLOWED = [ "Unbekannt", "unbekannt", ]; const PLACE_UNCERTAINTY_PREFIXES = [ "vlt. ", "circa ", "ca. ", "ca ", ]; const PLACE_UNCERTAINTY_SUFFIXES = [ "(?)", "?", ]; /** * Removes uncertainty indicators from an time name. * * @param string $name Input string. * * @return string */ public static function cleanUncertaintyIndicatorsTime(string $name):string { if (\in_array($name, self::TIME_INDICATORS_DISALLOWED, true)) { return ""; } // Remove uncertainty prefixes foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_PREFIXES as $prefix) { if (\substr($name, 0, \strlen($prefix)) === "$prefix") { $name = trim(substr($name, \strlen($prefix))); } } // Remove uncertainty sufixes foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_SUFFIXES as $suffix) { if (\substr($name, \strlen($suffix) * -1) === "$suffix") { $name = \trim(\substr($name, 0, \strlen($suffix) * -1)); } } return \trim($name); } /** * Attempts guessing whether time is uncertain. * * @param string $zeit_name Time name. * * @return boolean */ public static function guessTimeCertainty(string $zeit_name):bool { $zeit_name = \strtolower($zeit_name); // Attempt to guess uncertainty based on prefixes. foreach (self::TIME_UNCERTAINTY_PREFIXES as $prefix) { if (\substr($zeit_name, 0, \strlen($prefix)) === $prefix) { return false; // Uncertainty found } } // Attempt to guess uncertainty based on prefixes. foreach (self::TIME_UNCERTAINTY_SUFFIXES as $prefix) { if (\substr($zeit_name, -1 * \strlen($prefix)) === $prefix) { return false; // Uncertainty found } } return true; // No uncertainty found } /** * Removes uncertainty indicators from an place name. * * @param string $ort_name Input string. * * @return string */ public static function cleanUncertaintyIndicatorsPlace(string $ort_name):string { $ort_name = \trim($ort_name); if (\in_array($ort_name, self::PLACE_INDICATORS_DISALLOWED, true)) { return ""; } // Remove uncertainty prefixes foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) { if (\substr($ort_name, 0, \strlen($prefix)) === "$prefix") { $ort_name = trim(substr($ort_name, \strlen($prefix))); } } // Remove uncertainty sufixes foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_SUFFIXES as $suffix) { if (\substr($ort_name, \strlen($suffix) * -1) === "$suffix") { $ort_name = \trim(\substr($ort_name, 0, \strlen($suffix) * -1)); } } return \trim($ort_name); } /** * Attempts guessing whether place is uncertain. * * @param string $ort_name Place name. * * @return boolean */ public static function guessPlaceCertainty(string $ort_name):bool { $ort_name = \strtolower($ort_name); // Attempt to guess uncertainty based on prefixes. foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) { if (\substr($ort_name, 0, \strlen($prefix)) === $prefix) { return false; // Uncertain } } // Attempt to guess uncertainty based on prefixes. foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_SUFFIXES as $prefix) { if (\substr($ort_name, -1 * \strlen($prefix)) === $prefix) { return false; // Uncertain } } return true; // Certain / no uncertainty found } /** * Removes uncertainty indicators from an actor name. * * @param string $value Input string. * * @return string */ public static function cleanUncertaintyIndicatorsPersinst(string $value):string { $value = \trim($value); if (\in_array($value, self::PERSINST_INDICATORS_DISALLOWED, true)) { return ""; } foreach (self::PERSINST_UNCERTAINTY_PREFIXES as $toRemove) { if (\mb_substr($value, 0, \mb_strlen($toRemove)) === $toRemove) { $value = substr($value, \mb_strlen($toRemove)); } } foreach (self::PLACE_UNCERTAINTY_SUFFIXES as $suffix) { if (\mb_substr($value, \mb_strlen($suffix) * -1) === "$suffix") { $value = \mb_substr($value, 0, \mb_strlen($suffix) * -1); } } return \trim($value); } /** * Attempts guessing whether persinst is uncertain. * * @param string $name Persinst name. * * @return boolean */ public static function guessPersinstCertainty(string $name):bool { $name = \trim(\strtolower($name)); // Attempt to guess uncertainty based on prefixes. foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) { if (\substr($name, 0, \strlen($prefix)) === $prefix) { return false; // Uncertain } } // Attempt to guess uncertainty based on prefixes. foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_SUFFIXES as $prefix) { if (\substr($name, -1 * \strlen($prefix)) === $prefix) { return false; // Uncertain } } return true; // Certain / no uncertainty found } }