diff --git a/src/NodaUncertaintyHelper.php b/src/NodaUncertaintyHelper.php index 0030a47..23a6b58 100644 --- a/src/NodaUncertaintyHelper.php +++ b/src/NodaUncertaintyHelper.php @@ -12,7 +12,7 @@ declare(strict_types = 1); */ final class NodaUncertaintyHelper { - const PERSINST_INDICATORS_DISALLOWED = [ + public const PERSINST_INDICATORS_DISALLOWED = [ "Unbekannt", "unbekannt", "Anonymus", @@ -41,7 +41,7 @@ final class NodaUncertaintyHelper { "Невідомий артист", // Unknown artist ]; - const PERSINST_UNCERTAINTY_PREFIXES = [ + public const PERSINST_UNCERTAINTY_PREFIXES = [ "verm. ", "Verm. ", "vermtl. ", @@ -57,7 +57,7 @@ final class NodaUncertaintyHelper { "?", ]; - const PERSINST_UNCERTAINTY_SUFFIXES = [ + public const PERSINST_UNCERTAINTY_SUFFIXES = [ "(?)", "?", " [vermutlich]", @@ -65,7 +65,7 @@ final class NodaUncertaintyHelper { " [wahrscheinlich]", ]; - const TIME_INDICATORS_DISALLOWED = [ + public const TIME_INDICATORS_DISALLOWED = [ "Nachgewiesen", "nachgewiesen", "o.D.", @@ -94,9 +94,9 @@ final class NodaUncertaintyHelper { "Без датування", // No dating "б.р.", // No dating "б.д.", // No dating - ]; + ]; - const TIME_UNCERTAINTY_PREFIXES = [ + public const TIME_UNCERTAINTY_PREFIXES = [ "c. ", "ca ", "ca. ", @@ -130,9 +130,9 @@ final class NodaUncertaintyHelper { "майже", // UK: Almost / nearly / about "орієнтовно", // UK: approximately "Прибл.", // UK: approximately - ]; + ]; - const TIME_UNCERTAINTY_SUFFIXES = [ + public const TIME_UNCERTAINTY_SUFFIXES = [ "(?)", "?", " (ca.)", @@ -145,12 +145,12 @@ final class NodaUncertaintyHelper { ", um", " (um)", " (ок.)", - ]; + ]; - /** - * Substrings used to express uncertainty about the validity of a place name. - */ - const PLACE_INDICATORS_DISALLOWED = [ + /** + * Substrings used to express uncertainty about the validity of a place name. + */ + public const PLACE_INDICATORS_DISALLOWED = [ "Unbekannt", "unbekannt", "Unknown", @@ -173,9 +173,9 @@ final class NodaUncertaintyHelper { "не вказано", // No place "не вказане", // No place "невідоме", // No place - ]; + ]; - const PLACE_UNCERTAINTY_PREFIXES = [ + public const PLACE_UNCERTAINTY_PREFIXES = [ "ca ", "Ca ", "ca. ", @@ -210,9 +210,9 @@ final class NodaUncertaintyHelper { "Wahrscheinlich ", "можливо", "?", - ]; + ]; - const PLACE_UNCERTAINTY_SUFFIXES = [ + public const PLACE_UNCERTAINTY_SUFFIXES = [ "(?)", "(vermutl.)", "[vermutl.]", @@ -221,206 +221,206 @@ final class NodaUncertaintyHelper { "(wohl)", "[wohl]", "?", - ]; + ]; - /** - * Trims common characters and charater marks. - * - * @param string $input Input text. - * - * @return string - */ - public static function trim(string $input):string { + /** + * Trims common characters and charater marks. + * + * @param string $input Input text. + * + * @return string + */ + public static function trim(string $input):string { - $input = \trim($input, ", \t\n\r\n;-:"); - return $input; + $input = \trim($input, ", \t\n\r\n;-:"); + return $input; + } + + /** + * Removes uncertainty indicators from an time name. + * + * @param string $name Input string. + * + * @return string + */ + public static function cleanUncertaintyIndicatorsTime(string $name):string { + + $name = self::trim($name); + + if (\in_array($name, self::TIME_INDICATORS_DISALLOWED, true)) { + return ""; } - /** - * Removes uncertainty indicators from an time name. - * - * @param string $name Input string. - * - * @return string - */ - public static function cleanUncertaintyIndicatorsTime(string $name):string { - - $name = self::trim($name); - - if (\in_array($name, self::TIME_INDICATORS_DISALLOWED, true)) { - return ""; + // Remove uncertainty prefixes + foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_PREFIXES as $prefix) { + if (\substr($name, 0, \strlen($prefix)) === "$prefix") { + $name = substr($name, \strlen($prefix)); } - - // Remove uncertainty prefixes - foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_PREFIXES as $prefix) { - if (\substr($name, 0, \strlen($prefix)) === "$prefix") { - $name = substr($name, \strlen($prefix)); - } - } - - // Remove uncertainty sufixes - foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_SUFFIXES as $suffix) { - if (\substr($name, \strlen($suffix) * -1) === "$suffix") { - $name = \substr($name, 0, \strlen($suffix) * -1); - } - } - - return self::trim($name); - } - /** - * Attempts guessing whether time is uncertain. Returns true if the name - * indicates certainty, false if it indicates uncertainty. - * - * @param string $zeit_name Time name. - * - * @return boolean - */ - public static function guessTimeCertainty(string $zeit_name):bool { - - $zeit_name = \strtolower($zeit_name); - - // Attempt to guess uncertainty based on prefixes. - foreach (self::TIME_UNCERTAINTY_PREFIXES as $prefix) { - if (\substr($zeit_name, 0, \strlen($prefix)) === $prefix) { - return false; // Uncertainty found - } + // Remove uncertainty sufixes + foreach (NodaUncertaintyHelper::TIME_UNCERTAINTY_SUFFIXES as $suffix) { + if (\substr($name, \strlen($suffix) * -1) === "$suffix") { + $name = \substr($name, 0, \strlen($suffix) * -1); } - - // Attempt to guess uncertainty based on prefixes. - foreach (self::TIME_UNCERTAINTY_SUFFIXES as $prefix) { - if (\substr($zeit_name, -1 * \strlen($prefix)) === $prefix) { - return false; // Uncertainty found - } - } - - return true; // No uncertainty found - } - /** - * Removes uncertainty indicators from an place name. - * - * @param string $ort_name Input string. - * - * @return string - */ - public static function cleanUncertaintyIndicatorsPlace(string $ort_name):string { + return self::trim($name); - $ort_name = self::trim($ort_name); + } - if (\in_array($ort_name, self::PLACE_INDICATORS_DISALLOWED, true)) { - return ""; + /** + * Attempts guessing whether time is uncertain. Returns true if the name + * indicates certainty, false if it indicates uncertainty. + * + * @param string $zeit_name Time name. + * + * @return boolean + */ + public static function guessTimeCertainty(string $zeit_name):bool { + + $zeit_name = \strtolower($zeit_name); + + // Attempt to guess uncertainty based on prefixes. + foreach (self::TIME_UNCERTAINTY_PREFIXES as $prefix) { + if (\substr($zeit_name, 0, \strlen($prefix)) === $prefix) { + return false; // Uncertainty found } - - // Remove uncertainty prefixes - foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) { - if (\substr($ort_name, 0, \strlen($prefix)) === "$prefix") { - $ort_name = substr($ort_name, \strlen($prefix)); - } - } - - // Remove uncertainty sufixes - foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_SUFFIXES as $suffix) { - if (\substr($ort_name, \strlen($suffix) * -1) === "$suffix") { - $ort_name = \substr($ort_name, 0, \strlen($suffix) * -1); - } - } - - return self::trim($ort_name); - } - /** - * Attempts guessing whether place is uncertain. Returns true if the name - * indicates certainty, false if it indicates uncertainty. - * - * @param string $ort_name Place name. - * - * @return boolean - */ - public static function guessPlaceCertainty(string $ort_name):bool { - - $ort_name = \strtolower($ort_name); - - // Attempt to guess uncertainty based on prefixes. - foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) { - if (\substr($ort_name, 0, \strlen($prefix)) === $prefix) { - return false; // Uncertain - } + // Attempt to guess uncertainty based on prefixes. + foreach (self::TIME_UNCERTAINTY_SUFFIXES as $prefix) { + if (\substr($zeit_name, -1 * \strlen($prefix)) === $prefix) { + return false; // Uncertainty found } - - // Attempt to guess uncertainty based on prefixes. - foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_SUFFIXES as $prefix) { - if (\substr($ort_name, -1 * \strlen($prefix)) === $prefix) { - return false; // Uncertain - } - } - - return true; // Certain / no uncertainty found - } - /** - * Removes uncertainty indicators from an actor name. - * - * @param string $value Input string. - * - * @return string - */ - public static function cleanUncertaintyIndicatorsPersinst(string $value):string { + return true; // No uncertainty found - $value = self::trim($value); + } - if (\in_array(trim($value, ";. "), self::PERSINST_INDICATORS_DISALLOWED, true)) { - return ""; - } + /** + * Removes uncertainty indicators from an place name. + * + * @param string $ort_name Input string. + * + * @return string + */ + public static function cleanUncertaintyIndicatorsPlace(string $ort_name):string { - foreach (self::PERSINST_UNCERTAINTY_PREFIXES as $toRemove) { - if (\mb_substr($value, 0, \mb_strlen($toRemove)) === $toRemove) { - $value = substr($value, \mb_strlen($toRemove)); - } - } - - foreach (self::PLACE_UNCERTAINTY_SUFFIXES as $suffix) { - if (\mb_substr($value, \mb_strlen($suffix) * -1) === "$suffix") { - $value = \mb_substr($value, 0, \mb_strlen($suffix) * -1); - } - } - - return self::trim($value); + $ort_name = self::trim($ort_name); + if (\in_array($ort_name, self::PLACE_INDICATORS_DISALLOWED, true)) { + return ""; } - /** - * Attempts guessing whether persinst is uncertain. Returns true if the name - * indicates certainty, false if it indicates uncertainty. - * - * @param string $name Persinst name. - * - * @return boolean - */ - public static function guessPersinstCertainty(string $name):bool { - - $name = \trim(\strtolower($name)); - - // Attempt to guess uncertainty based on prefixes. - foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) { - if (\substr($name, 0, \strlen($prefix)) === $prefix) { - return false; // Uncertain - } + // Remove uncertainty prefixes + foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) { + if (\substr($ort_name, 0, \strlen($prefix)) === "$prefix") { + $ort_name = substr($ort_name, \strlen($prefix)); } - - // Attempt to guess uncertainty based on prefixes. - foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_SUFFIXES as $prefix) { - if (\substr($name, -1 * \strlen($prefix)) === $prefix) { - return false; // Uncertain - } - } - - return true; // Certain / no uncertainty found - } + + // Remove uncertainty sufixes + foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_SUFFIXES as $suffix) { + if (\substr($ort_name, \strlen($suffix) * -1) === "$suffix") { + $ort_name = \substr($ort_name, 0, \strlen($suffix) * -1); + } + } + + return self::trim($ort_name); + + } + + /** + * Attempts guessing whether place is uncertain. Returns true if the name + * indicates certainty, false if it indicates uncertainty. + * + * @param string $ort_name Place name. + * + * @return boolean + */ + public static function guessPlaceCertainty(string $ort_name):bool { + + $ort_name = \trim(\strtolower($ort_name), ', ;-_'); + + // Attempt to guess uncertainty based on prefixes. + foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_PREFIXES as $prefix) { + if (\substr($ort_name, 0, \strlen($prefix)) === $prefix) { + return false; // Uncertain + } + } + + // Attempt to guess uncertainty based on prefixes. + foreach (NodaUncertaintyHelper::PLACE_UNCERTAINTY_SUFFIXES as $prefix) { + if (\substr($ort_name, -1 * \strlen($prefix)) === $prefix) { + return false; // Uncertain + } + } + + return true; // Certain / no uncertainty found + + } + + /** + * Removes uncertainty indicators from an actor name. + * + * @param string $value Input string. + * + * @return string + */ + public static function cleanUncertaintyIndicatorsPersinst(string $value):string { + + $value = self::trim($value); + + if (\in_array(trim($value, ";. "), self::PERSINST_INDICATORS_DISALLOWED, true)) { + return ""; + } + + foreach (self::PERSINST_UNCERTAINTY_PREFIXES as $toRemove) { + if (\mb_substr($value, 0, \mb_strlen($toRemove)) === $toRemove) { + $value = substr($value, \mb_strlen($toRemove)); + } + } + + foreach (self::PLACE_UNCERTAINTY_SUFFIXES as $suffix) { + if (\mb_substr($value, \mb_strlen($suffix) * -1) === "$suffix") { + $value = \mb_substr($value, 0, \mb_strlen($suffix) * -1); + } + } + + return self::trim($value); + + } + + /** + * Attempts guessing whether persinst is uncertain. Returns true if the name + * indicates certainty, false if it indicates uncertainty. + * + * @param string $name Persinst name. + * + * @return boolean + */ + public static function guessPersinstCertainty(string $name):bool { + + $name = \trim(\strtolower($name), ', ;-_'); + + // Attempt to guess uncertainty based on prefixes. + foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_PREFIXES as $prefix) { + if (\substr($name, 0, \strlen($prefix)) === $prefix) { + return false; // Uncertain + } + } + + // Attempt to guess uncertainty based on prefixes. + foreach (NodaUncertaintyHelper::PERSINST_UNCERTAINTY_SUFFIXES as $prefix) { + if (\substr($name, -1 * \strlen($prefix)) === $prefix) { + return false; // Uncertain + } + } + + return true; // Certain / no uncertainty found + + } } diff --git a/tests/NodaUncertaintyHelperTest.php b/tests/NodaUncertaintyHelperTest.php index ecafed6..14c0594 100644 --- a/tests/NodaUncertaintyHelperTest.php +++ b/tests/NodaUncertaintyHelperTest.php @@ -6,12 +6,14 @@ */ declare(strict_types = 1); use PHPUnit\Framework\TestCase; +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\Attributes\Small; /** * This script contains tests for the uncertainty helper. - * - * @covers \NodaUncertaintyHelper */ +#[small] +#[CoversClass(\NodaUncertaintyHelper::class)] final class NodaUncertaintyHelperTest extends TestCase { /** * Removes uncertainty indicators from an time name. @@ -76,6 +78,9 @@ final class NodaUncertaintyHelperTest extends TestCase { self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("vermutl. Augsburg")); self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Berlin")); + self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Berlin?")); + self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Berlin?,")); + self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Berlin,")); self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Berlin")); self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("?-Italien")); @@ -109,6 +114,9 @@ final class NodaUncertaintyHelperTest extends TestCase { public static function testGuessPersinstCertainty():void { self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("wohl Barbarossa")); + self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa?")); + self::assertFalse(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa?,")); + self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa,")); self::assertTrue(NodaUncertaintyHelper::guessPlaceCertainty("Barbarossa")); }