Add functions cleaning of uncertainty indicators to
NodaUncertaintyHelper
This commit is contained in:
		| @@ -11,6 +11,18 @@ declare(strict_types = 1); | ||||
|  */ | ||||
| final class NodaUncertaintyHelper { | ||||
|  | ||||
|     const PERSINST_UNCERTAINTY_PREFIXES = [ | ||||
|         "wohl ", | ||||
|         "wahrscheinlich ", | ||||
|         "Wohl ", | ||||
|         "Wahrscheinlich ", | ||||
|     ]; | ||||
|  | ||||
|     const PERSINST_UNCERTAINTY_SUFFIXES = [ | ||||
|         "(?)", | ||||
|         "?", | ||||
|     ]; | ||||
|  | ||||
|     const TIME_UNCERTAINTY_PREFIXES = [ | ||||
|         "um ", | ||||
|         "wohl um ", | ||||
| @@ -39,6 +51,33 @@ final class NodaUncertaintyHelper { | ||||
|         "?", | ||||
|     ]; | ||||
|  | ||||
|     /** | ||||
|      * Removes uncertainty indicators from an time name. | ||||
|      * | ||||
|      * @param string $name Input string. | ||||
|      * | ||||
|      * @return string | ||||
|      */ | ||||
|     public static function cleanUncertaintyIndicatorsTime(string $name):string { | ||||
|  | ||||
|         // Remove uncertainty prefixes | ||||
|         foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_PREFIXES as $prefix) { | ||||
|             if (\substr($name, 0, \strlen($prefix)) === "$prefix") { | ||||
|                 $name = trim(substr($name, \strlen($prefix))); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // Remove uncertainty sufixes | ||||
|         foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_SUFFIXES as $suffix) { | ||||
|             if (\substr($name, \strlen($suffix) * -1) === "$suffix") { | ||||
|                 $name = \trim(\substr($name, 0, \strlen($suffix) * -1)); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         return \trim($name); | ||||
|  | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Attempts guessing whether time is uncertain. | ||||
|      * | ||||
| @@ -68,6 +107,33 @@ final class NodaUncertaintyHelper { | ||||
|  | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Removes uncertainty indicators from an place name. | ||||
|      * | ||||
|      * @param string $ort_name Input string. | ||||
|      * | ||||
|      * @return string | ||||
|      */ | ||||
|     public static function cleanUncertaintyIndicatorsPlace(string $ort_name):string { | ||||
|  | ||||
|         // Remove uncertainty prefixes | ||||
|         foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) { | ||||
|             if (\substr($ort_name, 0, \strlen($prefix)) === "$prefix") { | ||||
|                 $ort_name = trim(substr($ort_name, \strlen($prefix))); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // Remove uncertainty sufixes | ||||
|         foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_SUFFIXES as $suffix) { | ||||
|             if (\substr($ort_name, \strlen($suffix) * -1) === "$suffix") { | ||||
|                 $ort_name = \trim(\substr($ort_name, 0, \strlen($suffix) * -1)); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         return \trim($ort_name); | ||||
|  | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Attempts guessing whether place is uncertain. | ||||
|      * | ||||
| @@ -97,4 +163,58 @@ final class NodaUncertaintyHelper { | ||||
|  | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Removes uncertainty indicators from an actor name. | ||||
|      * | ||||
|      * @param string $value Input string. | ||||
|      * | ||||
|      * @return string | ||||
|      */ | ||||
|     public static function cleanUncertaintyIndicatorsPersinst(string $value):string { | ||||
|  | ||||
|         foreach (self::PERSINST_UNCERTAINTY_PREFIXES as $toRemove) { | ||||
|             if (\mb_substr($value, 0, \mb_strlen($toRemove)) === $toRemove) { | ||||
|                 $value = substr($value, \mb_strlen($toRemove)); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         foreach (self::PLACE_UNCERTAINTY_SUFFIXES as $suffix) { | ||||
|             if (\mb_substr($value, \mb_strlen($suffix) * -1) === "$suffix") { | ||||
|                 $value = \mb_substr($value, 0, \mb_strlen($suffix) * -1); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         return \trim($value); | ||||
|  | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Attempts guessing whether persinst is uncertain. | ||||
|      * | ||||
|      * @param string $name Persinst name. | ||||
|      * | ||||
|      * @return boolean | ||||
|      */ | ||||
|     public static function guessPersinstCertainty(string $name):bool { | ||||
|  | ||||
|         $name = \strtolower($name); | ||||
|  | ||||
|         // Attempt to guess uncertainty based on prefixes. | ||||
|         foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) { | ||||
|             if (\substr($name, 0, \strlen($prefix)) === $prefix) { | ||||
|                 return false; // Uncertain | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // Attempt to guess uncertainty based on prefixes. | ||||
|         foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_SUFFIXES as $prefix) { | ||||
|             if (\substr($name, -1 * \strlen($prefix)) === $prefix) { | ||||
|                 return false; // Uncertain | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         return true; // Certain / no uncertainty found | ||||
|  | ||||
|     } | ||||
|  | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user