Remove / disallow certain input names in NodaUncertaintyHelper

This commit is contained in:
Joshua Ramon Enslin 2020-10-04 02:40:21 +02:00 committed by Stefan Rohde-Enslin
parent 4e934e380c
commit 36d27e0f73

View File

@ -11,6 +11,11 @@ declare(strict_types = 1);
*/
final class NodaUncertaintyHelper {
const PERSINST_INDICATORS_DISALLOWED = [
"Unbekannt",
"unbekannt",
];
const PERSINST_UNCERTAINTY_PREFIXES = [
"wohl ",
"wahrscheinlich ",
@ -23,6 +28,12 @@ final class NodaUncertaintyHelper {
"?",
];
const TIME_INDICATORS_DISALLOWED = [
"o.D.",
"Unbekannt",
"unbekannt",
];
const TIME_UNCERTAINTY_PREFIXES = [
"um ",
"wohl um ",
@ -40,6 +51,11 @@ final class NodaUncertaintyHelper {
/**
* Substrings used to express uncertainty about the validity of a place name.
*/
const PLACE_INDICATORS_DISALLOWED = [
"Unbekannt",
"unbekannt",
];
const PLACE_UNCERTAINTY_PREFIXES = [
"vlt. ",
"circa ",
@ -61,6 +77,10 @@ final class NodaUncertaintyHelper {
*/
public static function cleanUncertaintyIndicatorsTime(string $name):string {
if (\in_array($name, self::TIME_INDICATORS_DISALLOWED, true)) {
return "";
}
// Remove uncertainty prefixes
foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_PREFIXES as $prefix) {
if (\substr($name, 0, \strlen($prefix)) === "$prefix") {
@ -119,6 +139,10 @@ final class NodaUncertaintyHelper {
$ort_name = \trim($ort_name);
if (\in_array($ort_name, self::PLACE_INDICATORS_DISALLOWED, true)) {
return "";
}
// Remove uncertainty prefixes
foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) {
if (\substr($ort_name, 0, \strlen($prefix)) === "$prefix") {
@ -177,6 +201,10 @@ final class NodaUncertaintyHelper {
$value = \trim($value);
if (\in_array($value, self::PERSINST_INDICATORS_DISALLOWED, true)) {
return "";
}
foreach (self::PERSINST_UNCERTAINTY_PREFIXES as $toRemove) {
if (\mb_substr($value, 0, \mb_strlen($toRemove)) === $toRemove) {
$value = substr($value, \mb_strlen($toRemove));
@ -202,7 +230,7 @@ final class NodaUncertaintyHelper {
*/
public static function guessPersinstCertainty(string $name):bool {
$name = \strtolower($name);
$name = \trim(\strtolower($name));
// Attempt to guess uncertainty based on prefixes.
foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) {