Properly handle commas at the end of names when guessing certainty
This commit is contained in:
parent
eb371d4270
commit
29ca05f552
|
@ -12,7 +12,7 @@ declare(strict_types = 1);
|
||||||
*/
|
*/
|
||||||
final class NodaUncertaintyHelper {
|
final class NodaUncertaintyHelper {
|
||||||
|
|
||||||
const PERSINST_INDICATORS_DISALLOWED = [
|
public const PERSINST_INDICATORS_DISALLOWED = [
|
||||||
"Unbekannt",
|
"Unbekannt",
|
||||||
"unbekannt",
|
"unbekannt",
|
||||||
"Anonymus",
|
"Anonymus",
|
||||||
|
@ -41,7 +41,7 @@ final class NodaUncertaintyHelper {
|
||||||
"Невідомий артист", // Unknown artist
|
"Невідомий артист", // Unknown artist
|
||||||
];
|
];
|
||||||
|
|
||||||
const PERSINST_UNCERTAINTY_PREFIXES = [
|
public const PERSINST_UNCERTAINTY_PREFIXES = [
|
||||||
"verm. ",
|
"verm. ",
|
||||||
"Verm. ",
|
"Verm. ",
|
||||||
"vermtl. ",
|
"vermtl. ",
|
||||||
|
@ -57,7 +57,7 @@ final class NodaUncertaintyHelper {
|
||||||
"?",
|
"?",
|
||||||
];
|
];
|
||||||
|
|
||||||
const PERSINST_UNCERTAINTY_SUFFIXES = [
|
public const PERSINST_UNCERTAINTY_SUFFIXES = [
|
||||||
"(?)",
|
"(?)",
|
||||||
"?",
|
"?",
|
||||||
" [vermutlich]",
|
" [vermutlich]",
|
||||||
|
@ -65,7 +65,7 @@ final class NodaUncertaintyHelper {
|
||||||
" [wahrscheinlich]",
|
" [wahrscheinlich]",
|
||||||
];
|
];
|
||||||
|
|
||||||
const TIME_INDICATORS_DISALLOWED = [
|
public const TIME_INDICATORS_DISALLOWED = [
|
||||||
"Nachgewiesen",
|
"Nachgewiesen",
|
||||||
"nachgewiesen",
|
"nachgewiesen",
|
||||||
"o.D.",
|
"o.D.",
|
||||||
|
@ -96,7 +96,7 @@ final class NodaUncertaintyHelper {
|
||||||
"б.д.", // No dating
|
"б.д.", // No dating
|
||||||
];
|
];
|
||||||
|
|
||||||
const TIME_UNCERTAINTY_PREFIXES = [
|
public const TIME_UNCERTAINTY_PREFIXES = [
|
||||||
"c. ",
|
"c. ",
|
||||||
"ca ",
|
"ca ",
|
||||||
"ca. ",
|
"ca. ",
|
||||||
|
@ -132,7 +132,7 @@ final class NodaUncertaintyHelper {
|
||||||
"Прибл.", // UK: approximately
|
"Прибл.", // UK: approximately
|
||||||
];
|
];
|
||||||
|
|
||||||
const TIME_UNCERTAINTY_SUFFIXES = [
|
public const TIME_UNCERTAINTY_SUFFIXES = [
|
||||||
"(?)",
|
"(?)",
|
||||||
"?",
|
"?",
|
||||||
" (ca.)",
|
" (ca.)",
|
||||||
|
@ -150,7 +150,7 @@ final class NodaUncertaintyHelper {
|
||||||
/**
|
/**
|
||||||
* Substrings used to express uncertainty about the validity of a place name.
|
* Substrings used to express uncertainty about the validity of a place name.
|
||||||
*/
|
*/
|
||||||
const PLACE_INDICATORS_DISALLOWED = [
|
public const PLACE_INDICATORS_DISALLOWED = [
|
||||||
"Unbekannt",
|
"Unbekannt",
|
||||||
"unbekannt",
|
"unbekannt",
|
||||||
"Unknown",
|
"Unknown",
|
||||||
|
@ -175,7 +175,7 @@ final class NodaUncertaintyHelper {
|
||||||
"невідоме", // No place
|
"невідоме", // No place
|
||||||
];
|
];
|
||||||
|
|
||||||
const PLACE_UNCERTAINTY_PREFIXES = [
|
public const PLACE_UNCERTAINTY_PREFIXES = [
|
||||||
"ca ",
|
"ca ",
|
||||||
"Ca ",
|
"Ca ",
|
||||||
"ca. ",
|
"ca. ",
|
||||||
|
@ -212,7 +212,7 @@ final class NodaUncertaintyHelper {
|
||||||
"?",
|
"?",
|
||||||
];
|
];
|
||||||
|
|
||||||
const PLACE_UNCERTAINTY_SUFFIXES = [
|
public const PLACE_UNCERTAINTY_SUFFIXES = [
|
||||||
"(?)",
|
"(?)",
|
||||||
"(vermutl.)",
|
"(vermutl.)",
|
||||||
"[vermutl.]",
|
"[vermutl.]",
|
||||||
|
@ -343,7 +343,7 @@ final class NodaUncertaintyHelper {
|
||||||
*/
|
*/
|
||||||
public static function guessPlaceCertainty(string $ort_name):bool {
|
public static function guessPlaceCertainty(string $ort_name):bool {
|
||||||
|
|
||||||
$ort_name = \strtolower($ort_name);
|
$ort_name = \trim(\strtolower($ort_name), ', ;-_');
|
||||||
|
|
||||||
// Attempt to guess uncertainty based on prefixes.
|
// Attempt to guess uncertainty based on prefixes.
|
||||||
foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) {
|
foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) {
|
||||||
|
@ -404,7 +404,7 @@ final class NodaUncertaintyHelper {
|
||||||
*/
|
*/
|
||||||
public static function guessPersinstCertainty(string $name):bool {
|
public static function guessPersinstCertainty(string $name):bool {
|
||||||
|
|
||||||
$name = \trim(\strtolower($name));
|
$name = \trim(\strtolower($name), ', ;-_');
|
||||||
|
|
||||||
// Attempt to guess uncertainty based on prefixes.
|
// Attempt to guess uncertainty based on prefixes.
|
||||||
foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) {
|
foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) {
|
||||||
|
|
|
@ -6,12 +6,14 @@
|
||||||
*/
|
*/
|
||||||
declare(strict_types = 1);
|
declare(strict_types = 1);
|
||||||
use PHPUnit\Framework\TestCase;
|
use PHPUnit\Framework\TestCase;
|
||||||
|
use PHPUnit\Framework\Attributes\CoversClass;
|
||||||
|
use PHPUnit\Framework\Attributes\Small;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This script contains tests for the uncertainty helper.
|
* This script contains tests for the uncertainty helper.
|
||||||
*
|
|
||||||
* @covers \NodaUncertaintyHelper
|
|
||||||
*/
|
*/
|
||||||
|
#[small]
|
||||||
|
#[CoversClass(\NodaUncertaintyHelper::class)]
|
||||||
final class NodaUncertaintyHelperTest extends TestCase {
|
final class NodaUncertaintyHelperTest extends TestCase {
|
||||||
/**
|
/**
|
||||||
* Removes uncertainty indicators from an time name.
|
* Removes uncertainty indicators from an time name.
|
||||||
|
@ -76,6 +78,9 @@ final class NodaUncertaintyHelperTest extends TestCase {
|
||||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("vermutl. Augsburg"));
|
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("vermutl. Augsburg"));
|
||||||
|
|
||||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Berlin"));
|
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Berlin"));
|
||||||
|
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Berlin?"));
|
||||||
|
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Berlin?,"));
|
||||||
|
self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Berlin,"));
|
||||||
self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Berlin"));
|
self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Berlin"));
|
||||||
|
|
||||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("?-Italien"));
|
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("?-Italien"));
|
||||||
|
@ -109,6 +114,9 @@ final class NodaUncertaintyHelperTest extends TestCase {
|
||||||
public static function testGuessPersinstCertainty():void {
|
public static function testGuessPersinstCertainty():void {
|
||||||
|
|
||||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Barbarossa"));
|
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Barbarossa"));
|
||||||
|
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa?"));
|
||||||
|
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa?,"));
|
||||||
|
self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa,"));
|
||||||
self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa"));
|
self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa"));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user