diff --git a/src/NodaUncertaintyHelper.php b/src/NodaUncertaintyHelper.php index 8964875..1c33a65 100644 --- a/src/NodaUncertaintyHelper.php +++ b/src/NodaUncertaintyHelper.php @@ -11,6 +11,11 @@ declare(strict_types = 1); */ final class NodaUncertaintyHelper { + const PERSINST_INDICATORS_DISALLOWED = [ + "Unbekannt", + "unbekannt", + ]; + const PERSINST_UNCERTAINTY_PREFIXES = [ "wohl ", "wahrscheinlich ", @@ -23,6 +28,12 @@ final class NodaUncertaintyHelper { "?", ]; + const TIME_INDICATORS_DISALLOWED = [ + "o.D.", + "Unbekannt", + "unbekannt", + ]; + const TIME_UNCERTAINTY_PREFIXES = [ "um ", "wohl um ", @@ -40,6 +51,11 @@ final class NodaUncertaintyHelper { /** * Substrings used to express uncertainty about the validity of a place name. */ + const PLACE_INDICATORS_DISALLOWED = [ + "Unbekannt", + "unbekannt", + ]; + const PLACE_UNCERTAINTY_PREFIXES = [ "vlt. ", "circa ", @@ -61,6 +77,10 @@ final class NodaUncertaintyHelper { */ public static function cleanUncertaintyIndicatorsTime(string $name):string { + if (\in_array($name, self::TIME_INDICATORS_DISALLOWED, true)) { + return ""; + } + // Remove uncertainty prefixes foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_PREFIXES as $prefix) { if (\substr($name, 0, \strlen($prefix)) === "$prefix") { @@ -119,6 +139,10 @@ final class NodaUncertaintyHelper { $ort_name = \trim($ort_name); + if (\in_array($ort_name, self::PLACE_INDICATORS_DISALLOWED, true)) { + return ""; + } + // Remove uncertainty prefixes foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) { if (\substr($ort_name, 0, \strlen($prefix)) === "$prefix") { @@ -177,6 +201,10 @@ final class NodaUncertaintyHelper { $value = \trim($value); + if (\in_array($value, self::PERSINST_INDICATORS_DISALLOWED, true)) { + return ""; + } + foreach (self::PERSINST_UNCERTAINTY_PREFIXES as $toRemove) { if (\mb_substr($value, 0, \mb_strlen($toRemove)) === $toRemove) { $value = substr($value, \mb_strlen($toRemove)); @@ -202,7 +230,7 @@ final class NodaUncertaintyHelper { */ public static function guessPersinstCertainty(string $name):bool { - $name = \strtolower($name); + $name = \trim(\strtolower($name)); // Attempt to guess uncertainty based on prefixes. foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) {