Add class NodaValidationHelper, for now for validating actor

descriptions
This commit is contained in:
Joshua Ramon Enslin 2022-05-17 23:27:40 +02:00
parent ac79f421ff
commit c00cb6b629
Signed by: jrenslin
GPG Key ID: 46016F84501B70AE
8 changed files with 148 additions and 12 deletions

View File

@ -7,7 +7,8 @@
declare(strict_types = 1);
/**
* Contains static functions for getting IDs for noda entries by various means.
* Contains static functions for identifying uncertainty or blocking
* completely uncertain inputs for actors, times, and places.
*/
final class NodaUncertaintyHelper {

View File

@ -0,0 +1,64 @@
<?PHP
/**
* Contains class NodaValidationHelper.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
declare(strict_types = 1);
/**
* A class of static functions for validating single fields of noda entities.
*/
final class NodaValidationHelper {
const ACTOR_DESCRIPTION_REQUIRED_DISTINCT_CHARS = 2;
/**
* Validates an actor description for completeness. Of course, only an informed
* guess based on the length and character composition of the description can be
* made.
*
* @param string $description Input descrition.
* @param string[] $names Names of the actor. Optional. Setting this enables
* checks e.g. to prevent duplicating the actor name
* as a description.
*
* @return void
*/
public static function validateActorDescription(string $description, array $names = []):void {
// Throw error on descriptions that are too short
if (\mb_strlen($description) < 10) {
throw new MDgenericInvalidInputsException("Author description is too short");
}
// Validate actor description based on character composition.
$chars = \str_split($description);
$uniqueChars = array_unique($chars);
if (count($uniqueChars) <= self::ACTOR_DESCRIPTION_REQUIRED_DISTINCT_CHARS) {
throw new MDgenericInvalidInputsException("There need to be more than " . self::ACTOR_DESCRIPTION_REQUIRED_DISTINCT_CHARS . " distinct characters.");
}
if (!empty($names)) {
$clearedChars = [' ' => ' ', ',' => ' ', ';' => ' ', '.' => ' '];
$namesMerged = implode(' ', $names);
$namesMerged = strtr($namesMerged, $clearedChars);
$uniqueNames = array_unique(array_diff(explode(' ', $namesMerged), ['']));
sort($uniqueNames);
$descCleared = strtr($description, $clearedChars);
$descWords = array_unique(array_diff(explode(' ', $descCleared), ['']));
sort($descWords);
if ($uniqueNames === $descWords) {
throw new MDgenericInvalidInputsException("The actor name was simply repeated in the description. This is not enough.");
}
}
}
}

View File

@ -1,6 +1,6 @@
<?PHP
/**
* This script contains tests for the home page.
* This script contains tests for the actor name splitter.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
@ -10,7 +10,7 @@ require_once __DIR__ . "/../src/NodaNameSplitter.php";
require_once __DIR__ . "/../../MD_STD/src/MD_STD.php";
/**
* Tests for home page.
* This script contains tests for the actor name splitter.
*/
final class NodaNameSplitterTest extends TestCase {
/**

View File

@ -1,6 +1,6 @@
<?PHP
/**
* This script contains tests for the home page.
* This script contains tests for the automatic translation class for time names.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
@ -9,7 +9,7 @@ use PHPUnit\Framework\TestCase;
require __DIR__ . "/../src/NodaTimeAutotranslater.php";
/**
* Tests for home page.
* This script contains tests for the automatic translation class for time names.
*/
final class NodaTimeAutotranslaterTest extends TestCase {
/**

View File

@ -1,6 +1,6 @@
<?PHP
/**
* This script contains tests for the home page.
* This script contains tests for the time name splitter.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
@ -11,7 +11,7 @@ require_once __DIR__ . "/../src/NodaTimeAutotranslater.php";
require_once __DIR__ . "/../../MD_STD/src/MD_STD.php";
/**
* Tests for home page.
* This script contains tests for the time name splitter.
*/
final class NodaTimeSplitterTest extends TestCase {
/**

View File

@ -1,15 +1,15 @@
<?PHP
/**
* This script contains tests for the home page.
* This script contains tests for the uncertainty helper.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
declare(strict_types = 1);
use PHPUnit\Framework\TestCase;
require __DIR__ . "/../src/NodaUncertaintyHelper.php";
require_once __DIR__ . "/../src/NodaUncertaintyHelper.php";
/**
* Tests for home page.
* This script contains tests for the uncertainty helper.
*/
final class NodaUncertaintyHelperTest extends TestCase {
/**

View File

@ -0,0 +1,71 @@
<?PHP
/**
* This script contains tests for the validation of single field contents.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
declare(strict_types = 1);
use PHPUnit\Framework\TestCase;
require_once __DIR__ . "/../src/NodaValidationHelper.php";
require_once __DIR__ . "/../../MDErrorReporter/exceptions/generic/MDgenericInvalidInputsException.php";
/**
* This script contains tests for the validation of single field contents.
*/
final class NodaValidationHelperTest extends TestCase {
/**
* Test successfully refusing too short actor descriptions.
*
* @return void
*/
public function testActorDescriptionValidationFailsOnTooShortInput():void {
$this->expectException(MDgenericInvalidInputsException::class);
NodaValidationHelper::validateActorDescription("abc");
}
/**
* Test successfully refusing actor descriptions that have too few distinct characters.
*
* @return void
*/
public function testActorDescriptionValidationFailsOnTooFewDistinctCharacters():void {
$this->expectException(MDgenericInvalidInputsException::class);
NodaValidationHelper::validateActorDescription("aaaaaaaaaaaa");
}
/**
* Test successfully refusing actor descriptions that simply duplicate the actor name.
*
* @return void
*/
public function testActorDescriptionValidationFailsOnDuplicatedActorNames():void {
$this->expectException(MDgenericInvalidInputsException::class);
NodaValidationHelper::validateActorDescription("Richard Lepsius", ["Lepsius, Richard"]);
$this->expectException(MDgenericInvalidInputsException::class);
NodaValidationHelper::validateActorDescription("Richard Lepsius", ["Lepsius", "Richard"]);
$this->expectException(MDgenericInvalidInputsException::class);
NodaValidationHelper::validateActorDescription("Lepsius, Richard", ["Lepsius", "Richard"]);
$this->expectException(MDgenericInvalidInputsException::class);
NodaValidationHelper::validateActorDescription("Richard Lepsius, ", ["Lepsius", "Richard"]);
$this->expectException(MDgenericInvalidInputsException::class);
NodaValidationHelper::validateActorDescription("Helmut Testtest", ["Helmut Testtest", "Helmut Testtest", "Testtest, Helmut", "Helmut", "Testtest"]);
}
/**
* Test that a valid description is accepted.
*
* @return void
*/
public function testActorDescriptionValidationAcceptsValidDescription():void {
NodaValidationHelper::validateActorDescription("Richard Lepsius war ein Forscher", ["Lepsius", "Richard"]);
self::assertTrue(true);
}
}

View File

@ -1,6 +1,6 @@
<?PHP
/**
* This script contains tests for the home page.
* This script contains tests for the Wikidata fetcher.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
@ -11,7 +11,7 @@ require_once __DIR__ . "/../../MDErrorReporter/exceptions/generic/MDExpectedExce
require_once __DIR__ . "/../../MD_STD/src/MD_STD.php";
/**
* Tests for home page.
* This script contains tests for the Wikidata fetcher.
*/
final class NodaWikidataFetcherTest extends TestCase {
/**