From e53eec84e6ba8cbb3a4c00d52afe1f745f085e6e Mon Sep 17 00:00:00 2001 From: Joshua Ramon Enslin Date: Mon, 21 Sep 2020 01:57:21 +0200 Subject: [PATCH] Add functions cleaning of uncertainty indicators to NodaUncertaintyHelper --- src/NodaUncertaintyHelper.php | 120 ++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/src/NodaUncertaintyHelper.php b/src/NodaUncertaintyHelper.php index b9ee265..aa0d2c8 100644 --- a/src/NodaUncertaintyHelper.php +++ b/src/NodaUncertaintyHelper.php @@ -11,6 +11,18 @@ declare(strict_types = 1); */ final class NodaUncertaintyHelper { + const PERSINST_UNCERTAINTY_PREFIXES = [ + "wohl ", + "wahrscheinlich ", + "Wohl ", + "Wahrscheinlich ", + ]; + + const PERSINST_UNCERTAINTY_SUFFIXES = [ + "(?)", + "?", + ]; + const TIME_UNCERTAINTY_PREFIXES = [ "um ", "wohl um ", @@ -39,6 +51,33 @@ final class NodaUncertaintyHelper { "?", ]; + /** + * Removes uncertainty indicators from an time name. + * + * @param string $name Input string. + * + * @return string + */ + public static function cleanUncertaintyIndicatorsTime(string $name):string { + + // Remove uncertainty prefixes + foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_PREFIXES as $prefix) { + if (\substr($name, 0, \strlen($prefix)) === "$prefix") { + $name = trim(substr($name, \strlen($prefix))); + } + } + + // Remove uncertainty sufixes + foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_SUFFIXES as $suffix) { + if (\substr($name, \strlen($suffix) * -1) === "$suffix") { + $name = \trim(\substr($name, 0, \strlen($suffix) * -1)); + } + } + + return \trim($name); + + } + /** * Attempts guessing whether time is uncertain. * @@ -68,6 +107,33 @@ final class NodaUncertaintyHelper { } + /** + * Removes uncertainty indicators from an place name. + * + * @param string $ort_name Input string. + * + * @return string + */ + public static function cleanUncertaintyIndicatorsPlace(string $ort_name):string { + + // Remove uncertainty prefixes + foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) { + if (\substr($ort_name, 0, \strlen($prefix)) === "$prefix") { + $ort_name = trim(substr($ort_name, \strlen($prefix))); + } + } + + // Remove uncertainty sufixes + foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_SUFFIXES as $suffix) { + if (\substr($ort_name, \strlen($suffix) * -1) === "$suffix") { + $ort_name = \trim(\substr($ort_name, 0, \strlen($suffix) * -1)); + } + } + + return \trim($ort_name); + + } + /** * Attempts guessing whether place is uncertain. * @@ -97,4 +163,58 @@ final class NodaUncertaintyHelper { } + /** + * Removes uncertainty indicators from an actor name. + * + * @param string $value Input string. + * + * @return string + */ + public static function cleanUncertaintyIndicatorsPersinst(string $value):string { + + foreach (self::PERSINST_UNCERTAINTY_PREFIXES as $toRemove) { + if (\mb_substr($value, 0, \mb_strlen($toRemove)) === $toRemove) { + $value = substr($value, \mb_strlen($toRemove)); + } + } + + foreach (self::PLACE_UNCERTAINTY_SUFFIXES as $suffix) { + if (\mb_substr($value, \mb_strlen($suffix) * -1) === "$suffix") { + $value = \mb_substr($value, 0, \mb_strlen($suffix) * -1); + } + } + + return \trim($value); + + } + + /** + * Attempts guessing whether persinst is uncertain. + * + * @param string $name Persinst name. + * + * @return boolean + */ + public static function guessPersinstCertainty(string $name):bool { + + $name = \strtolower($name); + + // Attempt to guess uncertainty based on prefixes. + foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) { + if (\substr($name, 0, \strlen($prefix)) === $prefix) { + return false; // Uncertain + } + } + + // Attempt to guess uncertainty based on prefixes. + foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_SUFFIXES as $prefix) { + if (\substr($name, -1 * \strlen($prefix)) === $prefix) { + return false; // Uncertain + } + } + + return true; // Certain / no uncertainty found + + } + }