Merge branch 'master' of gitea:museum-digital/MDNodaHelpers
This commit is contained in:
commit
c72ad51dda
54
src/NodaTagRelationIdentifier.php
Normal file
54
src/NodaTagRelationIdentifier.php
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
<?PHP
|
||||||
|
/**
|
||||||
|
* Identifies the type of tag relation to an object based on known suffixes.
|
||||||
|
*
|
||||||
|
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||||||
|
*/
|
||||||
|
declare(strict_types = 1);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Contains static functions for identifying uncertainty or blocking
|
||||||
|
* completely uncertain inputs for actors, times, and places.
|
||||||
|
*/
|
||||||
|
final class NodaTagRelationIdentifier {
|
||||||
|
|
||||||
|
private const SUFFIXES = [
|
||||||
|
'de' => [
|
||||||
|
' (Motiv)' => MDTagRelationType::display_subject,
|
||||||
|
]
|
||||||
|
];
|
||||||
|
|
||||||
|
public readonly string $name;
|
||||||
|
public readonly MDTagRelationType|false $relation;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor: Removes identifiers for well-known tag relations and determines cleaned name and relation type.
|
||||||
|
*
|
||||||
|
* @param string $lang Current language.
|
||||||
|
* @param string $input_string Input string to clean.
|
||||||
|
*
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function __construct(string $lang, string $input_string) {
|
||||||
|
|
||||||
|
if (empty(self::SUFFIXES[$lang])) {
|
||||||
|
$this->name = $input_string;
|
||||||
|
$this->relation = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$relation = false;
|
||||||
|
|
||||||
|
$suffixes = self::SUFFIXES[$lang];
|
||||||
|
foreach (array_keys($suffixes) as $suffix) {
|
||||||
|
if (\mb_substr($input_string, \mb_strlen($suffix) * -1) === "$suffix") {
|
||||||
|
$input_string = \mb_substr($input_string, 0, \mb_strlen($suffix) * -1);
|
||||||
|
$relation = $suffixes[$suffix];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->name = $input_string;
|
||||||
|
$this->relation = $relation;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -12,7 +12,7 @@ declare(strict_types = 1);
|
||||||
*/
|
*/
|
||||||
final class NodaUncertaintyHelper {
|
final class NodaUncertaintyHelper {
|
||||||
|
|
||||||
const PERSINST_INDICATORS_DISALLOWED = [
|
public const PERSINST_INDICATORS_DISALLOWED = [
|
||||||
"Unbekannt",
|
"Unbekannt",
|
||||||
"unbekannt",
|
"unbekannt",
|
||||||
"Anonymus",
|
"Anonymus",
|
||||||
|
@ -41,7 +41,7 @@ final class NodaUncertaintyHelper {
|
||||||
"Невідомий артист", // Unknown artist
|
"Невідомий артист", // Unknown artist
|
||||||
];
|
];
|
||||||
|
|
||||||
const PERSINST_UNCERTAINTY_PREFIXES = [
|
public const PERSINST_UNCERTAINTY_PREFIXES = [
|
||||||
"verm. ",
|
"verm. ",
|
||||||
"Verm. ",
|
"Verm. ",
|
||||||
"vermtl. ",
|
"vermtl. ",
|
||||||
|
@ -57,7 +57,7 @@ final class NodaUncertaintyHelper {
|
||||||
"?",
|
"?",
|
||||||
];
|
];
|
||||||
|
|
||||||
const PERSINST_UNCERTAINTY_SUFFIXES = [
|
public const PERSINST_UNCERTAINTY_SUFFIXES = [
|
||||||
"(?)",
|
"(?)",
|
||||||
"?",
|
"?",
|
||||||
" [vermutlich]",
|
" [vermutlich]",
|
||||||
|
@ -65,7 +65,7 @@ final class NodaUncertaintyHelper {
|
||||||
" [wahrscheinlich]",
|
" [wahrscheinlich]",
|
||||||
];
|
];
|
||||||
|
|
||||||
const TIME_INDICATORS_DISALLOWED = [
|
public const TIME_INDICATORS_DISALLOWED = [
|
||||||
"Nachgewiesen",
|
"Nachgewiesen",
|
||||||
"nachgewiesen",
|
"nachgewiesen",
|
||||||
"o.D.",
|
"o.D.",
|
||||||
|
@ -94,9 +94,9 @@ final class NodaUncertaintyHelper {
|
||||||
"Без датування", // No dating
|
"Без датування", // No dating
|
||||||
"б.р.", // No dating
|
"б.р.", // No dating
|
||||||
"б.д.", // No dating
|
"б.д.", // No dating
|
||||||
];
|
];
|
||||||
|
|
||||||
const TIME_UNCERTAINTY_PREFIXES = [
|
public const TIME_UNCERTAINTY_PREFIXES = [
|
||||||
"c. ",
|
"c. ",
|
||||||
"ca ",
|
"ca ",
|
||||||
"ca. ",
|
"ca. ",
|
||||||
|
@ -130,9 +130,9 @@ final class NodaUncertaintyHelper {
|
||||||
"майже", // UK: Almost / nearly / about
|
"майже", // UK: Almost / nearly / about
|
||||||
"орієнтовно", // UK: approximately
|
"орієнтовно", // UK: approximately
|
||||||
"Прибл.", // UK: approximately
|
"Прибл.", // UK: approximately
|
||||||
];
|
];
|
||||||
|
|
||||||
const TIME_UNCERTAINTY_SUFFIXES = [
|
public const TIME_UNCERTAINTY_SUFFIXES = [
|
||||||
"(?)",
|
"(?)",
|
||||||
"?",
|
"?",
|
||||||
" (ca.)",
|
" (ca.)",
|
||||||
|
@ -145,12 +145,12 @@ final class NodaUncertaintyHelper {
|
||||||
", um",
|
", um",
|
||||||
" (um)",
|
" (um)",
|
||||||
" (ок.)",
|
" (ок.)",
|
||||||
];
|
];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Substrings used to express uncertainty about the validity of a place name.
|
* Substrings used to express uncertainty about the validity of a place name.
|
||||||
*/
|
*/
|
||||||
const PLACE_INDICATORS_DISALLOWED = [
|
public const PLACE_INDICATORS_DISALLOWED = [
|
||||||
"Unbekannt",
|
"Unbekannt",
|
||||||
"unbekannt",
|
"unbekannt",
|
||||||
"Unknown",
|
"Unknown",
|
||||||
|
@ -173,9 +173,9 @@ final class NodaUncertaintyHelper {
|
||||||
"не вказано", // No place
|
"не вказано", // No place
|
||||||
"не вказане", // No place
|
"не вказане", // No place
|
||||||
"невідоме", // No place
|
"невідоме", // No place
|
||||||
];
|
];
|
||||||
|
|
||||||
const PLACE_UNCERTAINTY_PREFIXES = [
|
public const PLACE_UNCERTAINTY_PREFIXES = [
|
||||||
"ca ",
|
"ca ",
|
||||||
"Ca ",
|
"Ca ",
|
||||||
"ca. ",
|
"ca. ",
|
||||||
|
@ -210,9 +210,9 @@ final class NodaUncertaintyHelper {
|
||||||
"Wahrscheinlich ",
|
"Wahrscheinlich ",
|
||||||
"можливо",
|
"можливо",
|
||||||
"?",
|
"?",
|
||||||
];
|
];
|
||||||
|
|
||||||
const PLACE_UNCERTAINTY_SUFFIXES = [
|
public const PLACE_UNCERTAINTY_SUFFIXES = [
|
||||||
"(?)",
|
"(?)",
|
||||||
"(vermutl.)",
|
"(vermutl.)",
|
||||||
"[vermutl.]",
|
"[vermutl.]",
|
||||||
|
@ -221,206 +221,220 @@ final class NodaUncertaintyHelper {
|
||||||
"(wohl)",
|
"(wohl)",
|
||||||
"[wohl]",
|
"[wohl]",
|
||||||
"?",
|
"?",
|
||||||
];
|
];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Trims common characters and charater marks.
|
* Trims common characters and charater marks.
|
||||||
*
|
*
|
||||||
* @param string $input Input text.
|
* @param string $input Input text.
|
||||||
*
|
*
|
||||||
* @return string
|
* @return string
|
||||||
*/
|
*/
|
||||||
public static function trim(string $input):string {
|
public static function trim(string $input):string {
|
||||||
|
|
||||||
$input = \trim($input, ", \t\n\r\n;-:");
|
return \trim($input, ", \t\n\r\n;-:");
|
||||||
return $input;
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes uncertainty indicators from an time name.
|
||||||
|
*
|
||||||
|
* @param string $name Input string.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public static function cleanUncertaintyIndicatorsTime(string $name):string {
|
||||||
|
|
||||||
|
$name = self::trim($name);
|
||||||
|
|
||||||
|
if (\in_array($name, self::TIME_INDICATORS_DISALLOWED, true)) {
|
||||||
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// Remove uncertainty prefixes
|
||||||
* Removes uncertainty indicators from an time name.
|
foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_PREFIXES as $prefix) {
|
||||||
*
|
if (\substr($name, 0, \strlen($prefix)) === "$prefix") {
|
||||||
* @param string $name Input string.
|
$name = substr($name, \strlen($prefix));
|
||||||
*
|
|
||||||
* @return string
|
|
||||||
*/
|
|
||||||
public static function cleanUncertaintyIndicatorsTime(string $name):string {
|
|
||||||
|
|
||||||
$name = self::trim($name);
|
|
||||||
|
|
||||||
if (\in_array($name, self::TIME_INDICATORS_DISALLOWED, true)) {
|
|
||||||
return "";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove uncertainty prefixes
|
|
||||||
foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_PREFIXES as $prefix) {
|
|
||||||
if (\substr($name, 0, \strlen($prefix)) === "$prefix") {
|
|
||||||
$name = substr($name, \strlen($prefix));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove uncertainty sufixes
|
|
||||||
foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_SUFFIXES as $suffix) {
|
|
||||||
if (\substr($name, \strlen($suffix) * -1) === "$suffix") {
|
|
||||||
$name = \substr($name, 0, \strlen($suffix) * -1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return self::trim($name);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// Remove uncertainty sufixes
|
||||||
* Attempts guessing whether time is uncertain. Returns true if the name
|
foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_SUFFIXES as $suffix) {
|
||||||
* indicates certainty, false if it indicates uncertainty.
|
if (\substr($name, \strlen($suffix) * -1) === "$suffix") {
|
||||||
*
|
$name = \substr($name, 0, \strlen($suffix) * -1);
|
||||||
* @param string $zeit_name Time name.
|
|
||||||
*
|
|
||||||
* @return boolean
|
|
||||||
*/
|
|
||||||
public static function guessTimeCertainty(string $zeit_name):bool {
|
|
||||||
|
|
||||||
$zeit_name = \strtolower($zeit_name);
|
|
||||||
|
|
||||||
// Attempt to guess uncertainty based on prefixes.
|
|
||||||
foreach (self::TIME_UNCERTAINTY_PREFIXES as $prefix) {
|
|
||||||
if (\substr($zeit_name, 0, \strlen($prefix)) === $prefix) {
|
|
||||||
return false; // Uncertainty found
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Attempt to guess uncertainty based on prefixes.
|
|
||||||
foreach (self::TIME_UNCERTAINTY_SUFFIXES as $prefix) {
|
|
||||||
if (\substr($zeit_name, -1 * \strlen($prefix)) === $prefix) {
|
|
||||||
return false; // Uncertainty found
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true; // No uncertainty found
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
return self::trim($name);
|
||||||
* Removes uncertainty indicators from an place name.
|
|
||||||
*
|
|
||||||
* @param string $ort_name Input string.
|
|
||||||
*
|
|
||||||
* @return string
|
|
||||||
*/
|
|
||||||
public static function cleanUncertaintyIndicatorsPlace(string $ort_name):string {
|
|
||||||
|
|
||||||
$ort_name = self::trim($ort_name);
|
}
|
||||||
|
|
||||||
if (\in_array($ort_name, self::PLACE_INDICATORS_DISALLOWED, true)) {
|
/**
|
||||||
return "";
|
* Attempts guessing whether time is uncertain. Returns true if the name
|
||||||
|
* indicates certainty, false if it indicates uncertainty.
|
||||||
|
*
|
||||||
|
* @param string $zeit_name Time name.
|
||||||
|
*
|
||||||
|
* @return boolean
|
||||||
|
*/
|
||||||
|
public static function guessTimeCertainty(string $zeit_name):bool {
|
||||||
|
|
||||||
|
$zeit_name = self::trim(strtolower($zeit_name));
|
||||||
|
|
||||||
|
// Attempt to guess uncertainty based on prefixes.
|
||||||
|
foreach (self::TIME_UNCERTAINTY_PREFIXES as $prefix) {
|
||||||
|
if (\substr($zeit_name, 0, \strlen($prefix)) === $prefix) {
|
||||||
|
return false; // Uncertainty found
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove uncertainty prefixes
|
|
||||||
foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) {
|
|
||||||
if (\substr($ort_name, 0, \strlen($prefix)) === "$prefix") {
|
|
||||||
$ort_name = substr($ort_name, \strlen($prefix));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove uncertainty sufixes
|
|
||||||
foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_SUFFIXES as $suffix) {
|
|
||||||
if (\substr($ort_name, \strlen($suffix) * -1) === "$suffix") {
|
|
||||||
$ort_name = \substr($ort_name, 0, \strlen($suffix) * -1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return self::trim($ort_name);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// Attempt to guess uncertainty based on prefixes.
|
||||||
* Attempts guessing whether place is uncertain. Returns true if the name
|
foreach (self::TIME_UNCERTAINTY_SUFFIXES as $prefix) {
|
||||||
* indicates certainty, false if it indicates uncertainty.
|
if (\substr($zeit_name, -1 * \strlen($prefix)) === $prefix) {
|
||||||
*
|
return false; // Uncertainty found
|
||||||
* @param string $ort_name Place name.
|
|
||||||
*
|
|
||||||
* @return boolean
|
|
||||||
*/
|
|
||||||
public static function guessPlaceCertainty(string $ort_name):bool {
|
|
||||||
|
|
||||||
$ort_name = \strtolower($ort_name);
|
|
||||||
|
|
||||||
// Attempt to guess uncertainty based on prefixes.
|
|
||||||
foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) {
|
|
||||||
if (\substr($ort_name, 0, \strlen($prefix)) === $prefix) {
|
|
||||||
return false; // Uncertain
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Attempt to guess uncertainty based on prefixes.
|
|
||||||
foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_SUFFIXES as $prefix) {
|
|
||||||
if (\substr($ort_name, -1 * \strlen($prefix)) === $prefix) {
|
|
||||||
return false; // Uncertain
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true; // Certain / no uncertainty found
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
return true; // No uncertainty found
|
||||||
* Removes uncertainty indicators from an actor name.
|
|
||||||
*
|
|
||||||
* @param string $value Input string.
|
|
||||||
*
|
|
||||||
* @return string
|
|
||||||
*/
|
|
||||||
public static function cleanUncertaintyIndicatorsPersinst(string $value):string {
|
|
||||||
|
|
||||||
$value = self::trim($value);
|
}
|
||||||
|
|
||||||
if (\in_array(trim($value, ";. "), self::PERSINST_INDICATORS_DISALLOWED, true)) {
|
/**
|
||||||
return "";
|
* Removes uncertainty indicators from an place name.
|
||||||
}
|
*
|
||||||
|
* @param string $ort_name Input string.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public static function cleanUncertaintyIndicatorsPlace(string $ort_name):string {
|
||||||
|
|
||||||
foreach (self::PERSINST_UNCERTAINTY_PREFIXES as $toRemove) {
|
$ort_name = self::trim($ort_name);
|
||||||
if (\mb_substr($value, 0, \mb_strlen($toRemove)) === $toRemove) {
|
|
||||||
$value = substr($value, \mb_strlen($toRemove));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach (self::PLACE_UNCERTAINTY_SUFFIXES as $suffix) {
|
|
||||||
if (\mb_substr($value, \mb_strlen($suffix) * -1) === "$suffix") {
|
|
||||||
$value = \mb_substr($value, 0, \mb_strlen($suffix) * -1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return self::trim($value);
|
|
||||||
|
|
||||||
|
if (\in_array($ort_name, self::PLACE_INDICATORS_DISALLOWED, true)) {
|
||||||
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// Remove uncertainty prefixes
|
||||||
* Attempts guessing whether persinst is uncertain. Returns true if the name
|
foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) {
|
||||||
* indicates certainty, false if it indicates uncertainty.
|
if (\substr($ort_name, 0, \strlen($prefix)) === "$prefix") {
|
||||||
*
|
$ort_name = substr($ort_name, \strlen($prefix));
|
||||||
* @param string $name Persinst name.
|
|
||||||
*
|
|
||||||
* @return boolean
|
|
||||||
*/
|
|
||||||
public static function guessPersinstCertainty(string $name):bool {
|
|
||||||
|
|
||||||
$name = \trim(\strtolower($name));
|
|
||||||
|
|
||||||
// Attempt to guess uncertainty based on prefixes.
|
|
||||||
foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) {
|
|
||||||
if (\substr($name, 0, \strlen($prefix)) === $prefix) {
|
|
||||||
return false; // Uncertain
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Attempt to guess uncertainty based on prefixes.
|
|
||||||
foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_SUFFIXES as $prefix) {
|
|
||||||
if (\substr($name, -1 * \strlen($prefix)) === $prefix) {
|
|
||||||
return false; // Uncertain
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true; // Certain / no uncertainty found
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remove uncertainty sufixes
|
||||||
|
foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_SUFFIXES as $suffix) {
|
||||||
|
if (\substr($ort_name, \strlen($suffix) * -1) === "$suffix") {
|
||||||
|
$ort_name = \substr($ort_name, 0, \strlen($suffix) * -1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If brackets are included in the name, try removing prefixes and suffixes
|
||||||
|
// from the beginning.
|
||||||
|
if (($bracketPos = strpos($ort_name, "(")) !== false) {
|
||||||
|
$start = substr($ort_name, 0, $bracketPos);
|
||||||
|
$end = substr($ort_name, $bracketPos);
|
||||||
|
$ort_name = self::cleanUncertaintyIndicatorsPlace($start) . ' ' . $end;
|
||||||
|
}
|
||||||
|
|
||||||
|
return self::trim($ort_name);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempts guessing whether place is uncertain. Returns true if the name
|
||||||
|
* indicates certainty, false if it indicates uncertainty.
|
||||||
|
*
|
||||||
|
* @param string $ort_name Place name.
|
||||||
|
*
|
||||||
|
* @return boolean
|
||||||
|
*/
|
||||||
|
public static function guessPlaceCertainty(string $ort_name):bool {
|
||||||
|
|
||||||
|
$ort_name = self::trim(\strtolower($ort_name));
|
||||||
|
|
||||||
|
// Attempt to guess uncertainty based on prefixes.
|
||||||
|
foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) {
|
||||||
|
if (\substr($ort_name, 0, \strlen($prefix)) === $prefix) {
|
||||||
|
return false; // Uncertain
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempt to guess uncertainty based on prefixes.
|
||||||
|
foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_SUFFIXES as $prefix) {
|
||||||
|
if (\substr($ort_name, -1 * \strlen($prefix)) === $prefix) {
|
||||||
|
return false; // Uncertain
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If brackets are included in the name, try the same for everything up to the
|
||||||
|
// first brackets.
|
||||||
|
if (($bracketPos = strpos($ort_name, "(")) !== false) {
|
||||||
|
$name = substr($ort_name, 0, $bracketPos);
|
||||||
|
return self::guessPlaceCertainty($name);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true; // Certain / no uncertainty found
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes uncertainty indicators from an actor name.
|
||||||
|
*
|
||||||
|
* @param string $value Input string.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public static function cleanUncertaintyIndicatorsPersinst(string $value):string {
|
||||||
|
|
||||||
|
$value = self::trim($value);
|
||||||
|
|
||||||
|
if (\in_array(trim($value, ";. "), self::PERSINST_INDICATORS_DISALLOWED, true)) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach (self::PERSINST_UNCERTAINTY_PREFIXES as $toRemove) {
|
||||||
|
if (\mb_substr($value, 0, \mb_strlen($toRemove)) === $toRemove) {
|
||||||
|
$value = substr($value, \mb_strlen($toRemove));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach (self::PLACE_UNCERTAINTY_SUFFIXES as $suffix) {
|
||||||
|
if (\mb_substr($value, \mb_strlen($suffix) * -1) === "$suffix") {
|
||||||
|
$value = \mb_substr($value, 0, \mb_strlen($suffix) * -1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return self::trim($value);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempts guessing whether persinst is uncertain. Returns true if the name
|
||||||
|
* indicates certainty, false if it indicates uncertainty.
|
||||||
|
*
|
||||||
|
* @param string $name Persinst name.
|
||||||
|
*
|
||||||
|
* @return boolean
|
||||||
|
*/
|
||||||
|
public static function guessPersinstCertainty(string $name):bool {
|
||||||
|
|
||||||
|
$name = self::trim(\strtolower($name));
|
||||||
|
|
||||||
|
// Attempt to guess uncertainty based on prefixes.
|
||||||
|
foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) {
|
||||||
|
if (\substr($name, 0, \strlen($prefix)) === $prefix) {
|
||||||
|
return false; // Uncertain
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempt to guess uncertainty based on prefixes.
|
||||||
|
foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_SUFFIXES as $prefix) {
|
||||||
|
if (\substr($name, -1 * \strlen($prefix)) === $prefix) {
|
||||||
|
return false; // Uncertain
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true; // Certain / no uncertainty found
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
50
tests/NodaTagRelationIdentifierTest.php
Normal file
50
tests/NodaTagRelationIdentifierTest.php
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
<?PHP
|
||||||
|
/**
|
||||||
|
* Tests for the identification of tag relation types to objects based on input tag names.
|
||||||
|
*
|
||||||
|
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||||||
|
*/
|
||||||
|
declare(strict_types = 1);
|
||||||
|
use PHPUnit\Framework\TestCase;
|
||||||
|
use PHPUnit\Framework\Attributes\CoversClass;
|
||||||
|
use PHPUnit\Framework\Attributes\Small;
|
||||||
|
use PHPUnit\Framework\Attributes\DataProvider;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This script contains tests for the uncertainty helper.
|
||||||
|
*/
|
||||||
|
#[small]
|
||||||
|
#[CoversClass(\NodaTagRelationIdentifier::class)]
|
||||||
|
final class NodaTagRelationIdentifierTest extends TestCase {
|
||||||
|
/**
|
||||||
|
* Returns input tag names with and without known suffixes signaling the relation type.
|
||||||
|
*
|
||||||
|
* @return array<array{0: string, 1: string, 2: false|MDTagRelationType}>
|
||||||
|
*/
|
||||||
|
public static function tagNameAndRelationTypeProvider():array {
|
||||||
|
|
||||||
|
return [
|
||||||
|
'Delfin' => ["Delfin", "Delfin", false],
|
||||||
|
'Delfin (Motiv)' => ["Delfin (Motiv)", "Delfin", MDTagRelationType::display_subject],
|
||||||
|
];
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test to ensure times are correctly cleaned and parsed.
|
||||||
|
*
|
||||||
|
* @param string $term Term to check.
|
||||||
|
* @param string $target_output Expected output name.
|
||||||
|
* @param false|MDTagRelationType $target_type Expected identified relation type (or false for lack thereof).
|
||||||
|
*
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
#[DataProvider('tagNameAndRelationTypeProvider')]
|
||||||
|
public function testIdentificationWorksCorrectly(string $term, string $target_output, false|MDTagRelationType $target_type):void {
|
||||||
|
|
||||||
|
$identification = new NodaTagRelationIdentifier("de", $term);
|
||||||
|
self::assertEquals($target_output, $identification->name);
|
||||||
|
self::assertEquals($target_type, $identification->relation);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -6,110 +6,124 @@
|
||||||
*/
|
*/
|
||||||
declare(strict_types = 1);
|
declare(strict_types = 1);
|
||||||
use PHPUnit\Framework\TestCase;
|
use PHPUnit\Framework\TestCase;
|
||||||
|
use PHPUnit\Framework\Attributes\CoversClass;
|
||||||
|
use PHPUnit\Framework\Attributes\Small;
|
||||||
|
use PHPUnit\Framework\Attributes\DataProvider;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This script contains tests for the uncertainty helper.
|
* This script contains tests for the uncertainty helper.
|
||||||
*
|
|
||||||
* @covers \NodaUncertaintyHelper
|
|
||||||
*/
|
*/
|
||||||
|
#[small]
|
||||||
|
#[CoversClass(\NodaUncertaintyHelper::class)]
|
||||||
final class NodaUncertaintyHelperTest extends TestCase {
|
final class NodaUncertaintyHelperTest extends TestCase {
|
||||||
/**
|
/**
|
||||||
* Removes uncertainty indicators from an time name.
|
* Returns time names with expected cleaned version and expected parsed certainty.
|
||||||
*
|
*
|
||||||
* @group ValidOutput
|
* @return array<array{0: string, 1: string, 2: boolean}>
|
||||||
* @small
|
|
||||||
*
|
|
||||||
* @return void
|
|
||||||
*/
|
*/
|
||||||
public function testCleanUncertaintyIndicatorsTime():void {
|
public static function uncertainTimesProvider():array {
|
||||||
|
|
||||||
self::assertEquals("1950", NodaUncertaintyHelper::cleanUncertaintyIndicatorsTime("wohl 1950"));
|
return [
|
||||||
self::assertEquals("1950", NodaUncertaintyHelper::cleanUncertaintyIndicatorsTime("1950"));
|
'uncertainty prefix: "wohl 1950"' => ["wohl 1950", "1950", false],
|
||||||
|
'uncertainty suffix: "1950?"' => ["1950?", "1950", false],
|
||||||
|
'uncertainty suffix and superfluous chars: "1950 ?,"' => ["1950 ?,", "1950", false],
|
||||||
|
'certain term with superfluous chars: "1950 ,"' => ["1950 ,", "1950", true],
|
||||||
|
'certain term: 1950' => ["1950", "1950", true],
|
||||||
|
];
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Attempts guessing whether time is uncertain.
|
* Returns place names with expected cleaned version and expected parsed certainty.
|
||||||
*
|
*
|
||||||
* @group ValidOutput
|
* @return array<array{0: string, 1: string, 2: boolean}>
|
||||||
* @small
|
|
||||||
*
|
|
||||||
* @return void
|
|
||||||
*/
|
*/
|
||||||
public function testGuessTimeCertainty():void {
|
public static function uncertainPlacesProvider():array {
|
||||||
|
|
||||||
self::assertFalse(NodaUncertaintyHelper::guessTimeCertainty("wohl 1950"));
|
return [
|
||||||
self::assertTrue(NodaUncertaintyHelper::guessTimeCertainty("1950"));
|
|
||||||
|
'uncertainty prefix: "wohl Berlin"' => ["wohl Berlin", "Berlin", false],
|
||||||
|
'uncertainty prefix: "vermutl. Berlin"' => ["vermutl. Berlin", "Berlin", false],
|
||||||
|
'uncertainty prefix and superfluous chars: "?-Berlin"' => ["?-Berlin", "Berlin", false],
|
||||||
|
'uncertainty suffix: "Berlin?"' => ["Berlin?", "Berlin", false],
|
||||||
|
'uncertainty suffix: "Berlin (?)"' => ["Berlin (?)", "Berlin", false],
|
||||||
|
'uncertainty suffix and superfluous chars: "Berlin ?,"' => ["Berlin ?,", "Berlin", false],
|
||||||
|
'certain term with superfluous chars: "Berlin ,"' => ["Berlin ,", "Berlin", true],
|
||||||
|
'certain term: Berlin' => ["Berlin", "Berlin", true],
|
||||||
|
'Berlin ? (Deutschland)' => ["Berlin ? (Deutschland)", "Berlin (Deutschland)", false],
|
||||||
|
'Berli?n (Deutschland)' => ["Berl?n (Deutschland)", "Berl?n (Deutschland)", true],
|
||||||
|
|
||||||
|
];
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Removes uncertainty indicators from an place name.
|
* Returns actor names with expected cleaned version and expected parsed certainty.
|
||||||
*
|
*
|
||||||
* @group ValidOutput
|
* @return array<array{0: string, 1: string, 2: boolean}>
|
||||||
* @small
|
|
||||||
*
|
|
||||||
* @return void
|
|
||||||
*/
|
*/
|
||||||
public static function testCleanUncertaintyIndicatorsPlace():void {
|
public static function uncertainPersinstProvider():array {
|
||||||
|
|
||||||
self::assertEquals("Berlin", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace("wohl Berlin"));
|
return [
|
||||||
self::assertEquals("Berlin", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace("Berlin"));
|
'uncertainty prefix: "wohl Barbarossa"' => ["wohl Barbarossa", "Barbarossa", false],
|
||||||
|
'uncertainty prefix: "vermutl. Barbarossa"' => ["vermutl. Barbarossa", "Barbarossa", false],
|
||||||
// Real-life examples that previously passed unencumbered
|
'uncertainty prefix and superfluous chars: "?-Barbarossa"' => ["?-Barbarossa", "Barbarossa", false],
|
||||||
self::assertEquals("Augsburg", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace("vermutlich: Augsburg"));
|
'uncertainty suffix: "Barbarossa?"' => ["Barbarossa?", "Barbarossa", false],
|
||||||
self::assertEquals("Augsburg", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace("vermutl. Augsburg"));
|
'uncertainty suffix and superfluous chars: "Barbarossa ?,"' => ["Barbarossa ?,", "Barbarossa", false],
|
||||||
|
'certain term with superfluous chars: "Barbarossa ,"' => ["Barbarossa ,", "Barbarossa", true],
|
||||||
|
'certain term: Barbarossa' => ["Barbarossa", "Barbarossa", true],
|
||||||
|
];
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Attempts guessing whether place is uncertain.
|
* Test to ensure times are correctly cleaned and parsed.
|
||||||
*
|
*
|
||||||
* @group ValidOutput
|
* @param string $term Term to check.
|
||||||
* @small
|
* @param string $target_output Expected output name.
|
||||||
|
* @param boolean $target_certainty Expected output certainty.
|
||||||
*
|
*
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
public static function testGuessPlaceCertainty():void {
|
#[DataProvider('uncertainTimesProvider')]
|
||||||
|
public function testParsingUncertaintyFromTimes(string $term, string $target_output, bool $target_certainty):void {
|
||||||
|
|
||||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("vermutlich: Augsburg"));
|
self::assertEquals($target_output, NodaUncertaintyHelper::cleanUncertaintyIndicatorsTime($term));
|
||||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("vermutl. Augsburg"));
|
self::assertEquals($target_certainty, NodaUncertaintyHelper::guessTimeCertainty($term));
|
||||||
|
|
||||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Berlin"));
|
|
||||||
self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Berlin"));
|
|
||||||
|
|
||||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("?-Italien"));
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Removes uncertainty indicators from an actor name.
|
* Test to ensure places are correctly cleaned and parsed.
|
||||||
*
|
*
|
||||||
* @group ValidOutput
|
* @param string $term Term to check.
|
||||||
* @small
|
* @param string $target_output Expected output name.
|
||||||
|
* @param boolean $target_certainty Expected output certainty.
|
||||||
*
|
*
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
public static function testCleanUncertaintyIndicatorsPersinst():void {
|
#[DataProvider('uncertainPlacesProvider')]
|
||||||
|
public function testParsingUncertaintyFromPlaces(string $term, string $target_output, bool $target_certainty):void {
|
||||||
|
|
||||||
self::assertEquals("Barbarossa", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPersinst("wohl Barbarossa"));
|
self::assertEquals($target_output, NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace($term));
|
||||||
self::assertEquals("Barbarossa", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPersinst("Barbarossa"));
|
self::assertEquals($target_certainty, NodaUncertaintyHelper::guessPlaceCertainty($term));
|
||||||
self::assertEquals("Barbarossa", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPersinst("?-Barbarossa"));
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Attempts guessing whether persinst is uncertain.
|
* Test to ensure actor names are correctly cleaned and parsed.
|
||||||
*
|
*
|
||||||
* @group ValidOutput
|
* @param string $term Term to check.
|
||||||
* @small
|
* @param string $target_output Expected output name.
|
||||||
|
* @param boolean $target_certainty Expected output certainty.
|
||||||
*
|
*
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
public static function testGuessPersinstCertainty():void {
|
#[DataProvider('uncertainPersinstProvider')]
|
||||||
|
public function testParsingUncertaintyFromPersinst(string $term, string $target_output, bool $target_certainty):void {
|
||||||
|
|
||||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Barbarossa"));
|
self::assertEquals($target_output, NodaUncertaintyHelper::cleanUncertaintyIndicatorsPersinst($term));
|
||||||
self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa"));
|
self::assertEquals($target_certainty, NodaUncertaintyHelper::guessPersinstCertainty($term));
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user