Add functions cleaning of uncertainty indicators to

NodaUncertaintyHelper
This commit is contained in:
Joshua Ramon Enslin 2020-09-21 01:57:21 +02:00 committed by Stefan Rohde-Enslin
parent 923505f146
commit e53eec84e6

View File

@ -11,6 +11,18 @@ declare(strict_types = 1);
*/ */
final class NodaUncertaintyHelper { final class NodaUncertaintyHelper {
const PERSINST_UNCERTAINTY_PREFIXES = [
"wohl ",
"wahrscheinlich ",
"Wohl ",
"Wahrscheinlich ",
];
const PERSINST_UNCERTAINTY_SUFFIXES = [
"(?)",
"?",
];
const TIME_UNCERTAINTY_PREFIXES = [ const TIME_UNCERTAINTY_PREFIXES = [
"um ", "um ",
"wohl um ", "wohl um ",
@ -39,6 +51,33 @@ final class NodaUncertaintyHelper {
"?", "?",
]; ];
/**
* Removes uncertainty indicators from an time name.
*
* @param string $name Input string.
*
* @return string
*/
public static function cleanUncertaintyIndicatorsTime(string $name):string {
// Remove uncertainty prefixes
foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_PREFIXES as $prefix) {
if (\substr($name, 0, \strlen($prefix)) === "$prefix") {
$name = trim(substr($name, \strlen($prefix)));
}
}
// Remove uncertainty sufixes
foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_SUFFIXES as $suffix) {
if (\substr($name, \strlen($suffix) * -1) === "$suffix") {
$name = \trim(\substr($name, 0, \strlen($suffix) * -1));
}
}
return \trim($name);
}
/** /**
* Attempts guessing whether time is uncertain. * Attempts guessing whether time is uncertain.
* *
@ -68,6 +107,33 @@ final class NodaUncertaintyHelper {
} }
/**
* Removes uncertainty indicators from an place name.
*
* @param string $ort_name Input string.
*
* @return string
*/
public static function cleanUncertaintyIndicatorsPlace(string $ort_name):string {
// Remove uncertainty prefixes
foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) {
if (\substr($ort_name, 0, \strlen($prefix)) === "$prefix") {
$ort_name = trim(substr($ort_name, \strlen($prefix)));
}
}
// Remove uncertainty sufixes
foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_SUFFIXES as $suffix) {
if (\substr($ort_name, \strlen($suffix) * -1) === "$suffix") {
$ort_name = \trim(\substr($ort_name, 0, \strlen($suffix) * -1));
}
}
return \trim($ort_name);
}
/** /**
* Attempts guessing whether place is uncertain. * Attempts guessing whether place is uncertain.
* *
@ -97,4 +163,58 @@ final class NodaUncertaintyHelper {
} }
/**
* Removes uncertainty indicators from an actor name.
*
* @param string $value Input string.
*
* @return string
*/
public static function cleanUncertaintyIndicatorsPersinst(string $value):string {
foreach (self::PERSINST_UNCERTAINTY_PREFIXES as $toRemove) {
if (\mb_substr($value, 0, \mb_strlen($toRemove)) === $toRemove) {
$value = substr($value, \mb_strlen($toRemove));
}
}
foreach (self::PLACE_UNCERTAINTY_SUFFIXES as $suffix) {
if (\mb_substr($value, \mb_strlen($suffix) * -1) === "$suffix") {
$value = \mb_substr($value, 0, \mb_strlen($suffix) * -1);
}
}
return \trim($value);
}
/**
* Attempts guessing whether persinst is uncertain.
*
* @param string $name Persinst name.
*
* @return boolean
*/
public static function guessPersinstCertainty(string $name):bool {
$name = \strtolower($name);
// Attempt to guess uncertainty based on prefixes.
foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) {
if (\substr($name, 0, \strlen($prefix)) === $prefix) {
return false; // Uncertain
}
}
// Attempt to guess uncertainty based on prefixes.
foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_SUFFIXES as $prefix) {
if (\substr($name, -1 * \strlen($prefix)) === $prefix) {
return false; // Uncertain
}
}
return true; // Certain / no uncertainty found
}
} }