Remove / disallow certain input names in NodaUncertaintyHelper

This commit is contained in:
Joshua Ramon Enslin 2020-10-04 02:40:21 +02:00 committed by Stefan Rohde-Enslin
parent 4e934e380c
commit 36d27e0f73

View File

@ -11,6 +11,11 @@ declare(strict_types = 1);
*/ */
final class NodaUncertaintyHelper { final class NodaUncertaintyHelper {
const PERSINST_INDICATORS_DISALLOWED = [
"Unbekannt",
"unbekannt",
];
const PERSINST_UNCERTAINTY_PREFIXES = [ const PERSINST_UNCERTAINTY_PREFIXES = [
"wohl ", "wohl ",
"wahrscheinlich ", "wahrscheinlich ",
@ -23,6 +28,12 @@ final class NodaUncertaintyHelper {
"?", "?",
]; ];
const TIME_INDICATORS_DISALLOWED = [
"o.D.",
"Unbekannt",
"unbekannt",
];
const TIME_UNCERTAINTY_PREFIXES = [ const TIME_UNCERTAINTY_PREFIXES = [
"um ", "um ",
"wohl um ", "wohl um ",
@ -40,6 +51,11 @@ final class NodaUncertaintyHelper {
/** /**
* Substrings used to express uncertainty about the validity of a place name. * Substrings used to express uncertainty about the validity of a place name.
*/ */
const PLACE_INDICATORS_DISALLOWED = [
"Unbekannt",
"unbekannt",
];
const PLACE_UNCERTAINTY_PREFIXES = [ const PLACE_UNCERTAINTY_PREFIXES = [
"vlt. ", "vlt. ",
"circa ", "circa ",
@ -61,6 +77,10 @@ final class NodaUncertaintyHelper {
*/ */
public static function cleanUncertaintyIndicatorsTime(string $name):string { public static function cleanUncertaintyIndicatorsTime(string $name):string {
if (\in_array($name, self::TIME_INDICATORS_DISALLOWED, true)) {
return "";
}
// Remove uncertainty prefixes // Remove uncertainty prefixes
foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_PREFIXES as $prefix) { foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_PREFIXES as $prefix) {
if (\substr($name, 0, \strlen($prefix)) === "$prefix") { if (\substr($name, 0, \strlen($prefix)) === "$prefix") {
@ -119,6 +139,10 @@ final class NodaUncertaintyHelper {
$ort_name = \trim($ort_name); $ort_name = \trim($ort_name);
if (\in_array($ort_name, self::PLACE_INDICATORS_DISALLOWED, true)) {
return "";
}
// Remove uncertainty prefixes // Remove uncertainty prefixes
foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) { foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) {
if (\substr($ort_name, 0, \strlen($prefix)) === "$prefix") { if (\substr($ort_name, 0, \strlen($prefix)) === "$prefix") {
@ -177,6 +201,10 @@ final class NodaUncertaintyHelper {
$value = \trim($value); $value = \trim($value);
if (\in_array($value, self::PERSINST_INDICATORS_DISALLOWED, true)) {
return "";
}
foreach (self::PERSINST_UNCERTAINTY_PREFIXES as $toRemove) { foreach (self::PERSINST_UNCERTAINTY_PREFIXES as $toRemove) {
if (\mb_substr($value, 0, \mb_strlen($toRemove)) === $toRemove) { if (\mb_substr($value, 0, \mb_strlen($toRemove)) === $toRemove) {
$value = substr($value, \mb_strlen($toRemove)); $value = substr($value, \mb_strlen($toRemove));
@ -202,7 +230,7 @@ final class NodaUncertaintyHelper {
*/ */
public static function guessPersinstCertainty(string $name):bool { public static function guessPersinstCertainty(string $name):bool {
$name = \strtolower($name); $name = \trim(\strtolower($name));
// Attempt to guess uncertainty based on prefixes. // Attempt to guess uncertainty based on prefixes.
foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) { foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) {