Identify uncertainty before brackets ("Berlin ? (Germany)" > "Berlin
(Germany)" + Uncertain)
This commit is contained in:
		| @@ -328,6 +328,14 @@ final class NodaUncertaintyHelper { | |||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         // If brackets are included in the name, try removing prefixes and suffixes | ||||||
|  |         // from the beginning. | ||||||
|  |         if (($bracketPos = strpos($ort_name, "(")) !== false) { | ||||||
|  |             $start = substr($ort_name, 0, $bracketPos); | ||||||
|  |             $end = substr($ort_name, $bracketPos); | ||||||
|  |             $ort_name = self::cleanUncertaintyIndicatorsPlace($start) . ' ' . $end; | ||||||
|  |         } | ||||||
|  |  | ||||||
|         return self::trim($ort_name); |         return self::trim($ort_name); | ||||||
|  |  | ||||||
|     } |     } | ||||||
| @@ -358,6 +366,13 @@ final class NodaUncertaintyHelper { | |||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         // If brackets are included in the name, try the same for everything up to the | ||||||
|  |         // first brackets. | ||||||
|  |         if (($bracketPos = strpos($ort_name, "(")) !== false) { | ||||||
|  |             $name = substr($ort_name, 0, $bracketPos); | ||||||
|  |             return self::guessPlaceCertainty($name); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         return true; // Certain / no uncertainty found |         return true; // Certain / no uncertainty found | ||||||
|  |  | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -8,6 +8,7 @@ declare(strict_types = 1); | |||||||
| use PHPUnit\Framework\TestCase; | use PHPUnit\Framework\TestCase; | ||||||
| use PHPUnit\Framework\Attributes\CoversClass; | use PHPUnit\Framework\Attributes\CoversClass; | ||||||
| use PHPUnit\Framework\Attributes\Small; | use PHPUnit\Framework\Attributes\Small; | ||||||
|  | use PHPUnit\Framework\Attributes\DataProvider; | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * This script contains tests for the uncertainty helper. |  * This script contains tests for the uncertainty helper. | ||||||
| @@ -16,111 +17,113 @@ use PHPUnit\Framework\Attributes\Small; | |||||||
| #[CoversClass(\NodaUncertaintyHelper::class)] | #[CoversClass(\NodaUncertaintyHelper::class)] | ||||||
| final class NodaUncertaintyHelperTest extends TestCase { | final class NodaUncertaintyHelperTest extends TestCase { | ||||||
|     /** |     /** | ||||||
|      * Removes uncertainty indicators from an time name. |      * Returns time names with expected cleaned version and expected parsed certainty. | ||||||
|      * |      * | ||||||
|      * @group ValidOutput |      * @return array<array{0: string, 1: string, 2: boolean}> | ||||||
|      * @small |  | ||||||
|      * |  | ||||||
|      * @return void |  | ||||||
|      */ |      */ | ||||||
|     public function testCleanUncertaintyIndicatorsTime():void { |     public static function uncertainTimesProvider():array { | ||||||
|  |  | ||||||
|         self::assertEquals("1950", NodaUncertaintyHelper::cleanUncertaintyIndicatorsTime("wohl 1950")); |         return [ | ||||||
|         self::assertEquals("1950", NodaUncertaintyHelper::cleanUncertaintyIndicatorsTime("1950?")); |             'uncertainty prefix: "wohl 1950"' => ["wohl 1950", "1950", false], | ||||||
|         self::assertEquals("1950", NodaUncertaintyHelper::cleanUncertaintyIndicatorsTime("1950?,")); |             'uncertainty suffix: "1950?"' => ["1950?", "1950", false], | ||||||
|         self::assertEquals("1950", NodaUncertaintyHelper::cleanUncertaintyIndicatorsTime("1950,")); |             'uncertainty suffix and superfluous chars: "1950 ?,"' => ["1950 ?,", "1950", false], | ||||||
|         self::assertEquals("1950", NodaUncertaintyHelper::cleanUncertaintyIndicatorsTime("1950")); |             'certain term with superfluous chars: "1950 ,"' => ["1950 ,", "1950", true], | ||||||
|  |             'certain term: 1950' => ["1950", "1950", true], | ||||||
|  |         ]; | ||||||
|  |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Attempts guessing whether time is uncertain. |      * Returns place names with expected cleaned version and expected parsed certainty. | ||||||
|      * |      * | ||||||
|      * @group ValidOutput |      * @return array<array{0: string, 1: string, 2: boolean}> | ||||||
|      * @small |  | ||||||
|      * |  | ||||||
|      * @return void |  | ||||||
|      */ |      */ | ||||||
|     public function testGuessTimeCertainty():void { |     public static function uncertainPlacesProvider():array { | ||||||
|  |  | ||||||
|         self::assertFalse(NodaUncertaintyHelper::guessTimeCertainty("wohl 1950")); |         return [ | ||||||
|         self::assertTrue(NodaUncertaintyHelper::guessTimeCertainty("1950")); |  | ||||||
|  |             'uncertainty prefix: "wohl Berlin"' => ["wohl Berlin", "Berlin", false], | ||||||
|  |             'uncertainty prefix: "vermutl. Berlin"' => ["vermutl. Berlin", "Berlin", false], | ||||||
|  |             'uncertainty prefix and superfluous chars: "?-Berlin"' => ["?-Berlin", "Berlin", false], | ||||||
|  |             'uncertainty suffix: "Berlin?"' => ["Berlin?", "Berlin", false], | ||||||
|  |             'uncertainty suffix: "Berlin (?)"' => ["Berlin (?)", "Berlin", false], | ||||||
|  |             'uncertainty suffix and superfluous chars: "Berlin ?,"' => ["Berlin ?,", "Berlin", false], | ||||||
|  |             'certain term with superfluous chars: "Berlin ,"' => ["Berlin ,", "Berlin", true], | ||||||
|  |             'certain term: Berlin' => ["Berlin", "Berlin", true], | ||||||
|  |             'Berlin ? (Deutschland)' => ["Berlin ? (Deutschland)", "Berlin (Deutschland)", false], | ||||||
|  |             'Berli?n (Deutschland)' => ["Berl?n (Deutschland)", "Berl?n (Deutschland)", true], | ||||||
|  |  | ||||||
|  |         ]; | ||||||
|  |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Removes uncertainty indicators from an place name. |      * Returns actor names with expected cleaned version and expected parsed certainty. | ||||||
|      * |      * | ||||||
|      * @group ValidOutput |      * @return array<array{0: string, 1: string, 2: boolean}> | ||||||
|      * @small |  | ||||||
|      * |  | ||||||
|      * @return void |  | ||||||
|      */ |      */ | ||||||
|     public static function testCleanUncertaintyIndicatorsPlace():void { |     public static function uncertainPersinstProvider():array { | ||||||
|  |  | ||||||
|         self::assertEquals("Berlin", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace("wohl Berlin")); |         return [ | ||||||
|         self::assertEquals("Berlin", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace("Berlin")); |             'uncertainty prefix: "wohl Barbarossa"' => ["wohl Barbarossa", "Barbarossa", false], | ||||||
|  |             'uncertainty prefix: "vermutl. Barbarossa"' => ["vermutl. Barbarossa", "Barbarossa", false], | ||||||
|         // Real-life examples that previously passed unencumbered |             'uncertainty prefix and superfluous chars: "?-Barbarossa"' => ["?-Barbarossa", "Barbarossa", false], | ||||||
|         self::assertEquals("Augsburg", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace("vermutlich: Augsburg")); |             'uncertainty suffix: "Barbarossa?"' => ["Barbarossa?", "Barbarossa", false], | ||||||
|         self::assertEquals("Augsburg", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace("vermutl. Augsburg")); |             'uncertainty suffix and superfluous chars: "Barbarossa ?,"' => ["Barbarossa ?,", "Barbarossa", false], | ||||||
|  |             'certain term with superfluous chars: "Barbarossa ,"' => ["Barbarossa ,", "Barbarossa", true], | ||||||
|  |             'certain term: Barbarossa' => ["Barbarossa", "Barbarossa", true], | ||||||
|  |         ]; | ||||||
|  |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Attempts guessing whether place is uncertain. |      * Test to ensure times are correctly cleaned and parsed. | ||||||
|      * |      * | ||||||
|      * @group ValidOutput |      * @param string  $term             Term to check. | ||||||
|      * @small |      * @param string  $target_output    Expected output name. | ||||||
|  |      * @param boolean $target_certainty Expected output certainty. | ||||||
|      * |      * | ||||||
|      * @return void |      * @return void | ||||||
|      */ |      */ | ||||||
|     public static function testGuessPlaceCertainty():void { |     #[DataProvider('uncertainTimesProvider')] | ||||||
|  |     public function testParsingUncertaintyFromTimes(string $term, string $target_output, bool $target_certainty):void { | ||||||
|  |  | ||||||
|         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("vermutlich: Augsburg")); |         self::assertEquals($target_output, NodaUncertaintyHelper::cleanUncertaintyIndicatorsTime($term)); | ||||||
|         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("vermutl. Augsburg")); |         self::assertEquals($target_certainty, NodaUncertaintyHelper::guessTimeCertainty($term)); | ||||||
|  |  | ||||||
|         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Berlin")); |  | ||||||
|         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Berlin?")); |  | ||||||
|         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Berlin?,")); |  | ||||||
|         self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Berlin,")); |  | ||||||
|         self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Berlin")); |  | ||||||
|  |  | ||||||
|         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("?-Italien")); |  | ||||||
|  |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Removes uncertainty indicators from an actor name. |      * Test to ensure places are correctly cleaned and parsed. | ||||||
|      * |      * | ||||||
|      * @group ValidOutput |      * @param string  $term             Term to check. | ||||||
|      * @small |      * @param string  $target_output    Expected output name. | ||||||
|  |      * @param boolean $target_certainty Expected output certainty. | ||||||
|      * |      * | ||||||
|      * @return void |      * @return void | ||||||
|      */ |      */ | ||||||
|     public static function testCleanUncertaintyIndicatorsPersinst():void { |     #[DataProvider('uncertainPlacesProvider')] | ||||||
|  |     public function testParsingUncertaintyFromPlaces(string $term, string $target_output, bool $target_certainty):void { | ||||||
|  |  | ||||||
|         self::assertEquals("Barbarossa", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPersinst("wohl Barbarossa")); |         self::assertEquals($target_output, NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace($term)); | ||||||
|         self::assertEquals("Barbarossa", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPersinst("Barbarossa")); |         self::assertEquals($target_certainty, NodaUncertaintyHelper::guessPlaceCertainty($term)); | ||||||
|         self::assertEquals("Barbarossa", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPersinst("?-Barbarossa")); |  | ||||||
|  |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Attempts guessing whether persinst is uncertain. |      * Test to ensure actor names are correctly cleaned and parsed. | ||||||
|      * |      * | ||||||
|      * @group ValidOutput |      * @param string  $term             Term to check. | ||||||
|      * @small |      * @param string  $target_output    Expected output name. | ||||||
|  |      * @param boolean $target_certainty Expected output certainty. | ||||||
|      * |      * | ||||||
|      * @return void |      * @return void | ||||||
|      */ |      */ | ||||||
|     public static function testGuessPersinstCertainty():void { |     #[DataProvider('uncertainPersinstProvider')] | ||||||
|  |     public function testParsingUncertaintyFromPersinst(string $term, string $target_output, bool $target_certainty):void { | ||||||
|  |  | ||||||
|         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Barbarossa")); |         self::assertEquals($target_output, NodaUncertaintyHelper::cleanUncertaintyIndicatorsPersinst($term)); | ||||||
|         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa?")); |         self::assertEquals($target_certainty, NodaUncertaintyHelper::guessPersinstCertainty($term)); | ||||||
|         self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa?,")); |  | ||||||
|         self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa,")); |  | ||||||
|         self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa")); |  | ||||||
|  |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user