Properly handle commas at the end of names when guessing certainty
This commit is contained in:
		| @@ -12,7 +12,7 @@ declare(strict_types = 1); | |||||||
|  */ |  */ | ||||||
| final class NodaUncertaintyHelper { | final class NodaUncertaintyHelper { | ||||||
|  |  | ||||||
|     const PERSINST_INDICATORS_DISALLOWED = [ |     public const PERSINST_INDICATORS_DISALLOWED = [ | ||||||
|         "Unbekannt", |         "Unbekannt", | ||||||
|         "unbekannt", |         "unbekannt", | ||||||
|         "Anonymus", |         "Anonymus", | ||||||
| @@ -41,7 +41,7 @@ final class NodaUncertaintyHelper { | |||||||
|         "Невідомий артист",     // Unknown artist |         "Невідомий артист",     // Unknown artist | ||||||
|     ]; |     ]; | ||||||
|  |  | ||||||
|     const PERSINST_UNCERTAINTY_PREFIXES = [ |     public const PERSINST_UNCERTAINTY_PREFIXES = [ | ||||||
|         "verm. ", |         "verm. ", | ||||||
|         "Verm. ", |         "Verm. ", | ||||||
|         "vermtl. ", |         "vermtl. ", | ||||||
| @@ -57,7 +57,7 @@ final class NodaUncertaintyHelper { | |||||||
|         "?", |         "?", | ||||||
|     ]; |     ]; | ||||||
|  |  | ||||||
|     const PERSINST_UNCERTAINTY_SUFFIXES = [ |     public const PERSINST_UNCERTAINTY_SUFFIXES = [ | ||||||
|         "(?)", |         "(?)", | ||||||
|         "?", |         "?", | ||||||
|         " [vermutlich]", |         " [vermutlich]", | ||||||
| @@ -65,7 +65,7 @@ final class NodaUncertaintyHelper { | |||||||
|         " [wahrscheinlich]", |         " [wahrscheinlich]", | ||||||
|     ]; |     ]; | ||||||
|  |  | ||||||
|         const TIME_INDICATORS_DISALLOWED = [ |     public const TIME_INDICATORS_DISALLOWED = [ | ||||||
|         "Nachgewiesen", |         "Nachgewiesen", | ||||||
|         "nachgewiesen", |         "nachgewiesen", | ||||||
|         "o.D.", |         "o.D.", | ||||||
| @@ -96,7 +96,7 @@ final class NodaUncertaintyHelper { | |||||||
|         "б.д.", // No dating |         "б.д.", // No dating | ||||||
|     ]; |     ]; | ||||||
|  |  | ||||||
|         const TIME_UNCERTAINTY_PREFIXES = [ |     public const TIME_UNCERTAINTY_PREFIXES = [ | ||||||
|         "c. ", |         "c. ", | ||||||
|         "ca ", |         "ca ", | ||||||
|         "ca. ", |         "ca. ", | ||||||
| @@ -132,7 +132,7 @@ final class NodaUncertaintyHelper { | |||||||
|         "Прибл.",     // UK: approximately |         "Прибл.",     // UK: approximately | ||||||
|     ]; |     ]; | ||||||
|  |  | ||||||
|         const TIME_UNCERTAINTY_SUFFIXES = [ |     public const TIME_UNCERTAINTY_SUFFIXES = [ | ||||||
|         "(?)", |         "(?)", | ||||||
|         "?", |         "?", | ||||||
|         " (ca.)", |         " (ca.)", | ||||||
| @@ -150,7 +150,7 @@ final class NodaUncertaintyHelper { | |||||||
|     /** |     /** | ||||||
|      * Substrings used to express uncertainty about the validity of a place name. |      * Substrings used to express uncertainty about the validity of a place name. | ||||||
|      */ |      */ | ||||||
|         const PLACE_INDICATORS_DISALLOWED = [ |     public const PLACE_INDICATORS_DISALLOWED = [ | ||||||
|         "Unbekannt", |         "Unbekannt", | ||||||
|         "unbekannt", |         "unbekannt", | ||||||
|         "Unknown", |         "Unknown", | ||||||
| @@ -175,7 +175,7 @@ final class NodaUncertaintyHelper { | |||||||
|         "невідоме", // No place |         "невідоме", // No place | ||||||
|     ]; |     ]; | ||||||
|  |  | ||||||
|         const PLACE_UNCERTAINTY_PREFIXES = [ |     public const PLACE_UNCERTAINTY_PREFIXES = [ | ||||||
|         "ca ", |         "ca ", | ||||||
|         "Ca ", |         "Ca ", | ||||||
|         "ca. ", |         "ca. ", | ||||||
| @@ -212,7 +212,7 @@ final class NodaUncertaintyHelper { | |||||||
|         "?", |         "?", | ||||||
|     ]; |     ]; | ||||||
|  |  | ||||||
|         const PLACE_UNCERTAINTY_SUFFIXES = [ |     public const PLACE_UNCERTAINTY_SUFFIXES = [ | ||||||
|         "(?)", |         "(?)", | ||||||
|         "(vermutl.)", |         "(vermutl.)", | ||||||
|         "[vermutl.]", |         "[vermutl.]", | ||||||
| @@ -343,7 +343,7 @@ final class NodaUncertaintyHelper { | |||||||
|      */ |      */ | ||||||
|     public static function guessPlaceCertainty(string $ort_name):bool { |     public static function guessPlaceCertainty(string $ort_name):bool { | ||||||
|  |  | ||||||
|             $ort_name = \strtolower($ort_name); |         $ort_name = \trim(\strtolower($ort_name), ', ;-_'); | ||||||
|  |  | ||||||
|         // Attempt to guess uncertainty based on prefixes. |         // Attempt to guess uncertainty based on prefixes. | ||||||
|         foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) { |         foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) { | ||||||
| @@ -404,7 +404,7 @@ final class NodaUncertaintyHelper { | |||||||
|      */ |      */ | ||||||
|     public static function guessPersinstCertainty(string $name):bool { |     public static function guessPersinstCertainty(string $name):bool { | ||||||
|  |  | ||||||
|             $name = \trim(\strtolower($name)); |         $name = \trim(\strtolower($name), ', ;-_'); | ||||||
|  |  | ||||||
|         // Attempt to guess uncertainty based on prefixes. |         // Attempt to guess uncertainty based on prefixes. | ||||||
|         foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) { |         foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) { | ||||||
|   | |||||||
| @@ -6,12 +6,14 @@ | |||||||
|  */ |  */ | ||||||
| declare(strict_types = 1); | declare(strict_types = 1); | ||||||
| use PHPUnit\Framework\TestCase; | use PHPUnit\Framework\TestCase; | ||||||
|  | use PHPUnit\Framework\Attributes\CoversClass; | ||||||
|  | use PHPUnit\Framework\Attributes\Small; | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * This script contains tests for the uncertainty helper. |  * This script contains tests for the uncertainty helper. | ||||||
|  * |  | ||||||
|  * @covers \NodaUncertaintyHelper |  | ||||||
|  */ |  */ | ||||||
|  | #[small] | ||||||
|  | #[CoversClass(\NodaUncertaintyHelper::class)] | ||||||
| final class NodaUncertaintyHelperTest extends TestCase { | final class NodaUncertaintyHelperTest extends TestCase { | ||||||
|     /** |     /** | ||||||
|      * Removes uncertainty indicators from an time name. |      * Removes uncertainty indicators from an time name. | ||||||
| @@ -76,6 +78,9 @@ final class NodaUncertaintyHelperTest extends TestCase { | |||||||
|         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("vermutl. Augsburg")); |         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("vermutl. Augsburg")); | ||||||
|  |  | ||||||
|         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Berlin")); |         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Berlin")); | ||||||
|  |         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Berlin?")); | ||||||
|  |         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Berlin?,")); | ||||||
|  |         self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Berlin,")); | ||||||
|         self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Berlin")); |         self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Berlin")); | ||||||
|  |  | ||||||
|         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("?-Italien")); |         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("?-Italien")); | ||||||
| @@ -109,6 +114,9 @@ final class NodaUncertaintyHelperTest extends TestCase { | |||||||
|     public static function testGuessPersinstCertainty():void { |     public static function testGuessPersinstCertainty():void { | ||||||
|  |  | ||||||
|         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Barbarossa")); |         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Barbarossa")); | ||||||
|  |         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa?")); | ||||||
|  |         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa?,")); | ||||||
|  |         self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa,")); | ||||||
|         self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa")); |         self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa")); | ||||||
|  |  | ||||||
|     } |     } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user