Identify uncertainty before brackets ("Berlin ? (Germany)" > "Berlin
(Germany)" + Uncertain)
This commit is contained in:
parent
7cfe752c94
commit
48355a6a36
|
@ -328,6 +328,14 @@ final class NodaUncertaintyHelper {
|
|||
}
|
||||
}
|
||||
|
||||
// If brackets are included in the name, try removing prefixes and suffixes
|
||||
// from the beginning.
|
||||
if (($bracketPos = strpos($ort_name, "(")) !== false) {
|
||||
$start = substr($ort_name, 0, $bracketPos);
|
||||
$end = substr($ort_name, $bracketPos);
|
||||
$ort_name = self::cleanUncertaintyIndicatorsPlace($start) . ' ' . $end;
|
||||
}
|
||||
|
||||
return self::trim($ort_name);
|
||||
|
||||
}
|
||||
|
@ -358,6 +366,13 @@ final class NodaUncertaintyHelper {
|
|||
}
|
||||
}
|
||||
|
||||
// If brackets are included in the name, try the same for everything up to the
|
||||
// first brackets.
|
||||
if (($bracketPos = strpos($ort_name, "(")) !== false) {
|
||||
$name = substr($ort_name, 0, $bracketPos);
|
||||
return self::guessPlaceCertainty($name);
|
||||
}
|
||||
|
||||
return true; // Certain / no uncertainty found
|
||||
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ declare(strict_types = 1);
|
|||
use PHPUnit\Framework\TestCase;
|
||||
use PHPUnit\Framework\Attributes\CoversClass;
|
||||
use PHPUnit\Framework\Attributes\Small;
|
||||
use PHPUnit\Framework\Attributes\DataProvider;
|
||||
|
||||
/**
|
||||
* This script contains tests for the uncertainty helper.
|
||||
|
@ -16,111 +17,113 @@ use PHPUnit\Framework\Attributes\Small;
|
|||
#[CoversClass(\NodaUncertaintyHelper::class)]
|
||||
final class NodaUncertaintyHelperTest extends TestCase {
|
||||
/**
|
||||
* Removes uncertainty indicators from an time name.
|
||||
* Returns time names with expected cleaned version and expected parsed certainty.
|
||||
*
|
||||
* @group ValidOutput
|
||||
* @small
|
||||
*
|
||||
* @return void
|
||||
* @return array<array{0: string, 1: string, 2: boolean}>
|
||||
*/
|
||||
public function testCleanUncertaintyIndicatorsTime():void {
|
||||
public static function uncertainTimesProvider():array {
|
||||
|
||||
self::assertEquals("1950", NodaUncertaintyHelper::cleanUncertaintyIndicatorsTime("wohl 1950"));
|
||||
self::assertEquals("1950", NodaUncertaintyHelper::cleanUncertaintyIndicatorsTime("1950?"));
|
||||
self::assertEquals("1950", NodaUncertaintyHelper::cleanUncertaintyIndicatorsTime("1950?,"));
|
||||
self::assertEquals("1950", NodaUncertaintyHelper::cleanUncertaintyIndicatorsTime("1950,"));
|
||||
self::assertEquals("1950", NodaUncertaintyHelper::cleanUncertaintyIndicatorsTime("1950"));
|
||||
return [
|
||||
'uncertainty prefix: "wohl 1950"' => ["wohl 1950", "1950", false],
|
||||
'uncertainty suffix: "1950?"' => ["1950?", "1950", false],
|
||||
'uncertainty suffix and superfluous chars: "1950 ?,"' => ["1950 ?,", "1950", false],
|
||||
'certain term with superfluous chars: "1950 ,"' => ["1950 ,", "1950", true],
|
||||
'certain term: 1950' => ["1950", "1950", true],
|
||||
];
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts guessing whether time is uncertain.
|
||||
* Returns place names with expected cleaned version and expected parsed certainty.
|
||||
*
|
||||
* @group ValidOutput
|
||||
* @small
|
||||
*
|
||||
* @return void
|
||||
* @return array<array{0: string, 1: string, 2: boolean}>
|
||||
*/
|
||||
public function testGuessTimeCertainty():void {
|
||||
public static function uncertainPlacesProvider():array {
|
||||
|
||||
self::assertFalse(NodaUncertaintyHelper::guessTimeCertainty("wohl 1950"));
|
||||
self::assertTrue(NodaUncertaintyHelper::guessTimeCertainty("1950"));
|
||||
return [
|
||||
|
||||
'uncertainty prefix: "wohl Berlin"' => ["wohl Berlin", "Berlin", false],
|
||||
'uncertainty prefix: "vermutl. Berlin"' => ["vermutl. Berlin", "Berlin", false],
|
||||
'uncertainty prefix and superfluous chars: "?-Berlin"' => ["?-Berlin", "Berlin", false],
|
||||
'uncertainty suffix: "Berlin?"' => ["Berlin?", "Berlin", false],
|
||||
'uncertainty suffix: "Berlin (?)"' => ["Berlin (?)", "Berlin", false],
|
||||
'uncertainty suffix and superfluous chars: "Berlin ?,"' => ["Berlin ?,", "Berlin", false],
|
||||
'certain term with superfluous chars: "Berlin ,"' => ["Berlin ,", "Berlin", true],
|
||||
'certain term: Berlin' => ["Berlin", "Berlin", true],
|
||||
'Berlin ? (Deutschland)' => ["Berlin ? (Deutschland)", "Berlin (Deutschland)", false],
|
||||
'Berli?n (Deutschland)' => ["Berl?n (Deutschland)", "Berl?n (Deutschland)", true],
|
||||
|
||||
];
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes uncertainty indicators from an place name.
|
||||
* Returns actor names with expected cleaned version and expected parsed certainty.
|
||||
*
|
||||
* @group ValidOutput
|
||||
* @small
|
||||
*
|
||||
* @return void
|
||||
* @return array<array{0: string, 1: string, 2: boolean}>
|
||||
*/
|
||||
public static function testCleanUncertaintyIndicatorsPlace():void {
|
||||
public static function uncertainPersinstProvider():array {
|
||||
|
||||
self::assertEquals("Berlin", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace("wohl Berlin"));
|
||||
self::assertEquals("Berlin", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace("Berlin"));
|
||||
|
||||
// Real-life examples that previously passed unencumbered
|
||||
self::assertEquals("Augsburg", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace("vermutlich: Augsburg"));
|
||||
self::assertEquals("Augsburg", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace("vermutl. Augsburg"));
|
||||
return [
|
||||
'uncertainty prefix: "wohl Barbarossa"' => ["wohl Barbarossa", "Barbarossa", false],
|
||||
'uncertainty prefix: "vermutl. Barbarossa"' => ["vermutl. Barbarossa", "Barbarossa", false],
|
||||
'uncertainty prefix and superfluous chars: "?-Barbarossa"' => ["?-Barbarossa", "Barbarossa", false],
|
||||
'uncertainty suffix: "Barbarossa?"' => ["Barbarossa?", "Barbarossa", false],
|
||||
'uncertainty suffix and superfluous chars: "Barbarossa ?,"' => ["Barbarossa ?,", "Barbarossa", false],
|
||||
'certain term with superfluous chars: "Barbarossa ,"' => ["Barbarossa ,", "Barbarossa", true],
|
||||
'certain term: Barbarossa' => ["Barbarossa", "Barbarossa", true],
|
||||
];
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts guessing whether place is uncertain.
|
||||
* Test to ensure times are correctly cleaned and parsed.
|
||||
*
|
||||
* @group ValidOutput
|
||||
* @small
|
||||
* @param string $term Term to check.
|
||||
* @param string $target_output Expected output name.
|
||||
* @param boolean $target_certainty Expected output certainty.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public static function testGuessPlaceCertainty():void {
|
||||
#[DataProvider('uncertainTimesProvider')]
|
||||
public function testParsingUncertaintyFromTimes(string $term, string $target_output, bool $target_certainty):void {
|
||||
|
||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("vermutlich: Augsburg"));
|
||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("vermutl. Augsburg"));
|
||||
|
||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Berlin"));
|
||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Berlin?"));
|
||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Berlin?,"));
|
||||
self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Berlin,"));
|
||||
self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Berlin"));
|
||||
|
||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("?-Italien"));
|
||||
self::assertEquals($target_output, NodaUncertaintyHelper::cleanUncertaintyIndicatorsTime($term));
|
||||
self::assertEquals($target_certainty, NodaUncertaintyHelper::guessTimeCertainty($term));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes uncertainty indicators from an actor name.
|
||||
* Test to ensure places are correctly cleaned and parsed.
|
||||
*
|
||||
* @group ValidOutput
|
||||
* @small
|
||||
* @param string $term Term to check.
|
||||
* @param string $target_output Expected output name.
|
||||
* @param boolean $target_certainty Expected output certainty.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public static function testCleanUncertaintyIndicatorsPersinst():void {
|
||||
#[DataProvider('uncertainPlacesProvider')]
|
||||
public function testParsingUncertaintyFromPlaces(string $term, string $target_output, bool $target_certainty):void {
|
||||
|
||||
self::assertEquals("Barbarossa", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPersinst("wohl Barbarossa"));
|
||||
self::assertEquals("Barbarossa", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPersinst("Barbarossa"));
|
||||
self::assertEquals("Barbarossa", NodaUncertaintyHelper::cleanUncertaintyIndicatorsPersinst("?-Barbarossa"));
|
||||
self::assertEquals($target_output, NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace($term));
|
||||
self::assertEquals($target_certainty, NodaUncertaintyHelper::guessPlaceCertainty($term));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts guessing whether persinst is uncertain.
|
||||
* Test to ensure actor names are correctly cleaned and parsed.
|
||||
*
|
||||
* @group ValidOutput
|
||||
* @small
|
||||
* @param string $term Term to check.
|
||||
* @param string $target_output Expected output name.
|
||||
* @param boolean $target_certainty Expected output certainty.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public static function testGuessPersinstCertainty():void {
|
||||
#[DataProvider('uncertainPersinstProvider')]
|
||||
public function testParsingUncertaintyFromPersinst(string $term, string $target_output, bool $target_certainty):void {
|
||||
|
||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Barbarossa"));
|
||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa?"));
|
||||
self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa?,"));
|
||||
self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa,"));
|
||||
self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa"));
|
||||
self::assertEquals($target_output, NodaUncertaintyHelper::cleanUncertaintyIndicatorsPersinst($term));
|
||||
self::assertEquals($target_certainty, NodaUncertaintyHelper::guessPersinstCertainty($term));
|
||||
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user