From c00cb6b6297b920b92690d45181d51b0658b6edb Mon Sep 17 00:00:00 2001 From: Joshua Ramon Enslin Date: Tue, 17 May 2022 23:27:40 +0200 Subject: [PATCH] Add class NodaValidationHelper, for now for validating actor descriptions --- src/NodaUncertaintyHelper.php | 3 +- src/NodaValidationHelper.php | 64 +++++++++++++++++++++++++ tests/NodaNameSplitterTest.php | 4 +- tests/NodaTimeAutotranslaterTest.php | 4 +- tests/NodaTimeSplitterTest.php | 4 +- tests/NodaUncertaintyHelperTest.php | 6 +-- tests/NodaValidationHelperTest.php | 71 ++++++++++++++++++++++++++++ tests/NodaWikidataFetcherTest.php | 4 +- 8 files changed, 148 insertions(+), 12 deletions(-) create mode 100644 src/NodaValidationHelper.php create mode 100644 tests/NodaValidationHelperTest.php diff --git a/src/NodaUncertaintyHelper.php b/src/NodaUncertaintyHelper.php index c3ee9d3..346e662 100644 --- a/src/NodaUncertaintyHelper.php +++ b/src/NodaUncertaintyHelper.php @@ -7,7 +7,8 @@ declare(strict_types = 1); /** - * Contains static functions for getting IDs for noda entries by various means. + * Contains static functions for identifying uncertainty or blocking + * completely uncertain inputs for actors, times, and places. */ final class NodaUncertaintyHelper { diff --git a/src/NodaValidationHelper.php b/src/NodaValidationHelper.php new file mode 100644 index 0000000..9a88ba5 --- /dev/null +++ b/src/NodaValidationHelper.php @@ -0,0 +1,64 @@ + + */ +declare(strict_types = 1); + +/** + * A class of static functions for validating single fields of noda entities. + */ +final class NodaValidationHelper { + + const ACTOR_DESCRIPTION_REQUIRED_DISTINCT_CHARS = 2; + + /** + * Validates an actor description for completeness. Of course, only an informed + * guess based on the length and character composition of the description can be + * made. + * + * @param string $description Input descrition. + * @param string[] $names Names of the actor. Optional. Setting this enables + * checks e.g. to prevent duplicating the actor name + * as a description. + * + * @return void + */ + public static function validateActorDescription(string $description, array $names = []):void { + + // Throw error on descriptions that are too short + if (\mb_strlen($description) < 10) { + throw new MDgenericInvalidInputsException("Author description is too short"); + } + + // Validate actor description based on character composition. + + $chars = \str_split($description); + + $uniqueChars = array_unique($chars); + if (count($uniqueChars) <= self::ACTOR_DESCRIPTION_REQUIRED_DISTINCT_CHARS) { + throw new MDgenericInvalidInputsException("There need to be more than " . self::ACTOR_DESCRIPTION_REQUIRED_DISTINCT_CHARS . " distinct characters."); + } + + if (!empty($names)) { + + $clearedChars = [' ' => ' ', ',' => ' ', ';' => ' ', '.' => ' ']; + + $namesMerged = implode(' ', $names); + $namesMerged = strtr($namesMerged, $clearedChars); + $uniqueNames = array_unique(array_diff(explode(' ', $namesMerged), [''])); + sort($uniqueNames); + + $descCleared = strtr($description, $clearedChars); + $descWords = array_unique(array_diff(explode(' ', $descCleared), [''])); + sort($descWords); + + if ($uniqueNames === $descWords) { + throw new MDgenericInvalidInputsException("The actor name was simply repeated in the description. This is not enough."); + } + + } + + } +} diff --git a/tests/NodaNameSplitterTest.php b/tests/NodaNameSplitterTest.php index 7e2008b..e02c520 100644 --- a/tests/NodaNameSplitterTest.php +++ b/tests/NodaNameSplitterTest.php @@ -1,6 +1,6 @@ */ @@ -10,7 +10,7 @@ require_once __DIR__ . "/../src/NodaNameSplitter.php"; require_once __DIR__ . "/../../MD_STD/src/MD_STD.php"; /** - * Tests for home page. + * This script contains tests for the actor name splitter. */ final class NodaNameSplitterTest extends TestCase { /** diff --git a/tests/NodaTimeAutotranslaterTest.php b/tests/NodaTimeAutotranslaterTest.php index 3ad7fc4..268d971 100644 --- a/tests/NodaTimeAutotranslaterTest.php +++ b/tests/NodaTimeAutotranslaterTest.php @@ -1,6 +1,6 @@ */ @@ -9,7 +9,7 @@ use PHPUnit\Framework\TestCase; require __DIR__ . "/../src/NodaTimeAutotranslater.php"; /** - * Tests for home page. + * This script contains tests for the automatic translation class for time names. */ final class NodaTimeAutotranslaterTest extends TestCase { /** diff --git a/tests/NodaTimeSplitterTest.php b/tests/NodaTimeSplitterTest.php index 8098127..11de382 100644 --- a/tests/NodaTimeSplitterTest.php +++ b/tests/NodaTimeSplitterTest.php @@ -1,6 +1,6 @@ */ @@ -11,7 +11,7 @@ require_once __DIR__ . "/../src/NodaTimeAutotranslater.php"; require_once __DIR__ . "/../../MD_STD/src/MD_STD.php"; /** - * Tests for home page. + * This script contains tests for the time name splitter. */ final class NodaTimeSplitterTest extends TestCase { /** diff --git a/tests/NodaUncertaintyHelperTest.php b/tests/NodaUncertaintyHelperTest.php index 1ad2366..fa0e048 100644 --- a/tests/NodaUncertaintyHelperTest.php +++ b/tests/NodaUncertaintyHelperTest.php @@ -1,15 +1,15 @@ */ declare(strict_types = 1); use PHPUnit\Framework\TestCase; -require __DIR__ . "/../src/NodaUncertaintyHelper.php"; +require_once __DIR__ . "/../src/NodaUncertaintyHelper.php"; /** - * Tests for home page. + * This script contains tests for the uncertainty helper. */ final class NodaUncertaintyHelperTest extends TestCase { /** diff --git a/tests/NodaValidationHelperTest.php b/tests/NodaValidationHelperTest.php new file mode 100644 index 0000000..5a42835 --- /dev/null +++ b/tests/NodaValidationHelperTest.php @@ -0,0 +1,71 @@ + + */ +declare(strict_types = 1); +use PHPUnit\Framework\TestCase; +require_once __DIR__ . "/../src/NodaValidationHelper.php"; +require_once __DIR__ . "/../../MDErrorReporter/exceptions/generic/MDgenericInvalidInputsException.php"; + +/** + * This script contains tests for the validation of single field contents. + */ +final class NodaValidationHelperTest extends TestCase { + /** + * Test successfully refusing too short actor descriptions. + * + * @return void + */ + public function testActorDescriptionValidationFailsOnTooShortInput():void { + + $this->expectException(MDgenericInvalidInputsException::class); + NodaValidationHelper::validateActorDescription("abc"); + + } + + /** + * Test successfully refusing actor descriptions that have too few distinct characters. + * + * @return void + */ + public function testActorDescriptionValidationFailsOnTooFewDistinctCharacters():void { + + $this->expectException(MDgenericInvalidInputsException::class); + NodaValidationHelper::validateActorDescription("aaaaaaaaaaaa"); + + } + + /** + * Test successfully refusing actor descriptions that simply duplicate the actor name. + * + * @return void + */ + public function testActorDescriptionValidationFailsOnDuplicatedActorNames():void { + + $this->expectException(MDgenericInvalidInputsException::class); + NodaValidationHelper::validateActorDescription("Richard Lepsius", ["Lepsius, Richard"]); + $this->expectException(MDgenericInvalidInputsException::class); + NodaValidationHelper::validateActorDescription("Richard Lepsius", ["Lepsius", "Richard"]); + $this->expectException(MDgenericInvalidInputsException::class); + NodaValidationHelper::validateActorDescription("Lepsius, Richard", ["Lepsius", "Richard"]); + $this->expectException(MDgenericInvalidInputsException::class); + NodaValidationHelper::validateActorDescription("Richard Lepsius, ", ["Lepsius", "Richard"]); + $this->expectException(MDgenericInvalidInputsException::class); + NodaValidationHelper::validateActorDescription("Helmut Testtest", ["Helmut Testtest", "Helmut Testtest", "Testtest, Helmut", "Helmut", "Testtest"]); + + } + + /** + * Test that a valid description is accepted. + * + * @return void + */ + public function testActorDescriptionValidationAcceptsValidDescription():void { + + NodaValidationHelper::validateActorDescription("Richard Lepsius war ein Forscher", ["Lepsius", "Richard"]); + self::assertTrue(true); + + } +} diff --git a/tests/NodaWikidataFetcherTest.php b/tests/NodaWikidataFetcherTest.php index 0694f99..dd0ffbb 100644 --- a/tests/NodaWikidataFetcherTest.php +++ b/tests/NodaWikidataFetcherTest.php @@ -1,6 +1,6 @@ */ @@ -11,7 +11,7 @@ require_once __DIR__ . "/../../MDErrorReporter/exceptions/generic/MDExpectedExce require_once __DIR__ . "/../../MD_STD/src/MD_STD.php"; /** - * Tests for home page. + * This script contains tests for the Wikidata fetcher. */ final class NodaWikidataFetcherTest extends TestCase { /**