Properly handle commas at the end of names when guessing certainty

This commit is contained in:
Joshua Ramon Enslin 2024-11-09 15:33:49 +01:00
parent eb371d4270
commit 29ca05f552
Signed by: jrenslin
GPG Key ID: 46016F84501B70AE
2 changed files with 202 additions and 194 deletions

View File

@ -12,7 +12,7 @@ declare(strict_types = 1);
*/
final class NodaUncertaintyHelper {
const PERSINST_INDICATORS_DISALLOWED = [
public const PERSINST_INDICATORS_DISALLOWED = [
"Unbekannt",
"unbekannt",
"Anonymus",
@ -41,7 +41,7 @@ final class NodaUncertaintyHelper {
"Невідомий артист", // Unknown artist
];
const PERSINST_UNCERTAINTY_PREFIXES = [
public const PERSINST_UNCERTAINTY_PREFIXES = [
"verm. ",
"Verm. ",
"vermtl. ",
@ -57,7 +57,7 @@ final class NodaUncertaintyHelper {
"?",
];
const PERSINST_UNCERTAINTY_SUFFIXES = [
public const PERSINST_UNCERTAINTY_SUFFIXES = [
"(?)",
"?",
" [vermutlich]",
@ -65,7 +65,7 @@ final class NodaUncertaintyHelper {
" [wahrscheinlich]",
];
const TIME_INDICATORS_DISALLOWED = [
public const TIME_INDICATORS_DISALLOWED = [
"Nachgewiesen",
"nachgewiesen",
"o.D.",
@ -96,7 +96,7 @@ final class NodaUncertaintyHelper {
"б.д.", // No dating
];
const TIME_UNCERTAINTY_PREFIXES = [
public const TIME_UNCERTAINTY_PREFIXES = [
"c. ",
"ca ",
"ca. ",
@ -132,7 +132,7 @@ final class NodaUncertaintyHelper {
"Прибл.", // UK: approximately
];
const TIME_UNCERTAINTY_SUFFIXES = [
public const TIME_UNCERTAINTY_SUFFIXES = [
"(?)",
"?",
" (ca.)",
@ -150,7 +150,7 @@ final class NodaUncertaintyHelper {
/**
* Substrings used to express uncertainty about the validity of a place name.
*/
const PLACE_INDICATORS_DISALLOWED = [
public const PLACE_INDICATORS_DISALLOWED = [
"Unbekannt",
"unbekannt",
"Unknown",
@ -175,7 +175,7 @@ final class NodaUncertaintyHelper {
"невідоме", // No place
];
const PLACE_UNCERTAINTY_PREFIXES = [
public const PLACE_UNCERTAINTY_PREFIXES = [
"ca ",
"Ca ",
"ca. ",
@ -212,7 +212,7 @@ final class NodaUncertaintyHelper {
"?",
];
const PLACE_UNCERTAINTY_SUFFIXES = [
public const PLACE_UNCERTAINTY_SUFFIXES = [
"(?)",
"(vermutl.)",
"[vermutl.]",
@ -343,7 +343,7 @@ final class NodaUncertaintyHelper {
*/
public static function guessPlaceCertainty(string $ort_name):bool {
$ort_name = \strtolower($ort_name);
$ort_name = \trim(\strtolower($ort_name), ', ;-_');
// Attempt to guess uncertainty based on prefixes.
foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) {
@ -404,7 +404,7 @@ final class NodaUncertaintyHelper {
*/
public static function guessPersinstCertainty(string $name):bool {
$name = \trim(\strtolower($name));
$name = \trim(\strtolower($name), ', ;-_');
// Attempt to guess uncertainty based on prefixes.
foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) {

View File

@ -6,12 +6,14 @@
*/
declare(strict_types = 1);
use PHPUnit\Framework\TestCase;
use PHPUnit\Framework\Attributes\CoversClass;
use PHPUnit\Framework\Attributes\Small;
/**
* This script contains tests for the uncertainty helper.
*
* @covers \NodaUncertaintyHelper
*/
#[small]
#[CoversClass(\NodaUncertaintyHelper::class)]
final class NodaUncertaintyHelperTest extends TestCase {
/**
* Removes uncertainty indicators from an time name.
@ -76,6 +78,9 @@ final class NodaUncertaintyHelperTest extends TestCase {
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("vermutl. Augsburg"));
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Berlin"));
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Berlin?"));
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Berlin?,"));
self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Berlin,"));
self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Berlin"));
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("?-Italien"));
@ -109,6 +114,9 @@ final class NodaUncertaintyHelperTest extends TestCase {
public static function testGuessPersinstCertainty():void {
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Barbarossa"));
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa?"));
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa?,"));
self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa,"));
self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa"));
}