Add functions cleaning of uncertainty indicators to
NodaUncertaintyHelper
This commit is contained in:
parent
923505f146
commit
e53eec84e6
|
@ -11,6 +11,18 @@ declare(strict_types = 1);
|
||||||
*/
|
*/
|
||||||
final class NodaUncertaintyHelper {
|
final class NodaUncertaintyHelper {
|
||||||
|
|
||||||
|
const PERSINST_UNCERTAINTY_PREFIXES = [
|
||||||
|
"wohl ",
|
||||||
|
"wahrscheinlich ",
|
||||||
|
"Wohl ",
|
||||||
|
"Wahrscheinlich ",
|
||||||
|
];
|
||||||
|
|
||||||
|
const PERSINST_UNCERTAINTY_SUFFIXES = [
|
||||||
|
"(?)",
|
||||||
|
"?",
|
||||||
|
];
|
||||||
|
|
||||||
const TIME_UNCERTAINTY_PREFIXES = [
|
const TIME_UNCERTAINTY_PREFIXES = [
|
||||||
"um ",
|
"um ",
|
||||||
"wohl um ",
|
"wohl um ",
|
||||||
|
@ -39,6 +51,33 @@ final class NodaUncertaintyHelper {
|
||||||
"?",
|
"?",
|
||||||
];
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes uncertainty indicators from an time name.
|
||||||
|
*
|
||||||
|
* @param string $name Input string.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public static function cleanUncertaintyIndicatorsTime(string $name):string {
|
||||||
|
|
||||||
|
// Remove uncertainty prefixes
|
||||||
|
foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_PREFIXES as $prefix) {
|
||||||
|
if (\substr($name, 0, \strlen($prefix)) === "$prefix") {
|
||||||
|
$name = trim(substr($name, \strlen($prefix)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove uncertainty sufixes
|
||||||
|
foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_SUFFIXES as $suffix) {
|
||||||
|
if (\substr($name, \strlen($suffix) * -1) === "$suffix") {
|
||||||
|
$name = \trim(\substr($name, 0, \strlen($suffix) * -1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return \trim($name);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Attempts guessing whether time is uncertain.
|
* Attempts guessing whether time is uncertain.
|
||||||
*
|
*
|
||||||
|
@ -68,6 +107,33 @@ final class NodaUncertaintyHelper {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes uncertainty indicators from an place name.
|
||||||
|
*
|
||||||
|
* @param string $ort_name Input string.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public static function cleanUncertaintyIndicatorsPlace(string $ort_name):string {
|
||||||
|
|
||||||
|
// Remove uncertainty prefixes
|
||||||
|
foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) {
|
||||||
|
if (\substr($ort_name, 0, \strlen($prefix)) === "$prefix") {
|
||||||
|
$ort_name = trim(substr($ort_name, \strlen($prefix)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove uncertainty sufixes
|
||||||
|
foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_SUFFIXES as $suffix) {
|
||||||
|
if (\substr($ort_name, \strlen($suffix) * -1) === "$suffix") {
|
||||||
|
$ort_name = \trim(\substr($ort_name, 0, \strlen($suffix) * -1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return \trim($ort_name);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Attempts guessing whether place is uncertain.
|
* Attempts guessing whether place is uncertain.
|
||||||
*
|
*
|
||||||
|
@ -97,4 +163,58 @@ final class NodaUncertaintyHelper {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes uncertainty indicators from an actor name.
|
||||||
|
*
|
||||||
|
* @param string $value Input string.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public static function cleanUncertaintyIndicatorsPersinst(string $value):string {
|
||||||
|
|
||||||
|
foreach (self::PERSINST_UNCERTAINTY_PREFIXES as $toRemove) {
|
||||||
|
if (\mb_substr($value, 0, \mb_strlen($toRemove)) === $toRemove) {
|
||||||
|
$value = substr($value, \mb_strlen($toRemove));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach (self::PLACE_UNCERTAINTY_SUFFIXES as $suffix) {
|
||||||
|
if (\mb_substr($value, \mb_strlen($suffix) * -1) === "$suffix") {
|
||||||
|
$value = \mb_substr($value, 0, \mb_strlen($suffix) * -1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return \trim($value);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempts guessing whether persinst is uncertain.
|
||||||
|
*
|
||||||
|
* @param string $name Persinst name.
|
||||||
|
*
|
||||||
|
* @return boolean
|
||||||
|
*/
|
||||||
|
public static function guessPersinstCertainty(string $name):bool {
|
||||||
|
|
||||||
|
$name = \strtolower($name);
|
||||||
|
|
||||||
|
// Attempt to guess uncertainty based on prefixes.
|
||||||
|
foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) {
|
||||||
|
if (\substr($name, 0, \strlen($prefix)) === $prefix) {
|
||||||
|
return false; // Uncertain
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempt to guess uncertainty based on prefixes.
|
||||||
|
foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_SUFFIXES as $prefix) {
|
||||||
|
if (\substr($name, -1 * \strlen($prefix)) === $prefix) {
|
||||||
|
return false; // Uncertain
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true; // Certain / no uncertainty found
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user