Add classes for writing consolidating spellings of actor and place names
This commit is contained in:
parent
61e83022ae
commit
f6409322e5
68
src/NodaConsolidatedNamesAbstract.php
Normal file
68
src/NodaConsolidatedNamesAbstract.php
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
<?PHP
|
||||||
|
/**
|
||||||
|
* Abstract class to be inherited by classes for writing consolidated vocabulary names.
|
||||||
|
*
|
||||||
|
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||||||
|
*/
|
||||||
|
declare(strict_types = 1);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract class to be inherited by classes for writing consolidated vocabulary names.
|
||||||
|
*/
|
||||||
|
abstract class NodaConsolidatedNamesAbstract {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function sanitizes a string.
|
||||||
|
*
|
||||||
|
* @param string $inputString Input string.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
final protected static function _sanitizeInputStringStatic(string $inputString):string {
|
||||||
|
|
||||||
|
$string = trim($inputString, "; \t" . PHP_EOL);
|
||||||
|
$string = strtr($string, ["<" => "[", ">" => "]", "\t" => " ", '\n' => ' ',
|
||||||
|
'<br />' => ' ', '<br/>' => ' ', '<br>' => ' ',
|
||||||
|
"<br />" => ' ', '§' => '"'
|
||||||
|
]);
|
||||||
|
|
||||||
|
$string = str_replace(PHP_EOL, ' ', $string);
|
||||||
|
while (strpos($string, " ") !== false) {
|
||||||
|
$string = str_replace(" ", " ", $string);
|
||||||
|
}
|
||||||
|
|
||||||
|
$string = strip_tags((string)$string);
|
||||||
|
|
||||||
|
return trim(trim($string), ',| ');
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does general cleanup for vocabulary entries.
|
||||||
|
*
|
||||||
|
* @param string $input Input string.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
final public static function sanitizeInputString(string $input):string {
|
||||||
|
|
||||||
|
$output = strtr(
|
||||||
|
self::_sanitizeInputStringStatic($input),
|
||||||
|
[
|
||||||
|
'<' => '(',
|
||||||
|
'>' => ')',
|
||||||
|
'[' => '(',
|
||||||
|
']' => ')',
|
||||||
|
"unbekannt" => "",
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
// If the first and last character of the name are brackets, remove those.
|
||||||
|
if (substr($output, 0, 1) === '(' && substr($output, -1) === ')') {
|
||||||
|
$output = trim($output, '()');
|
||||||
|
}
|
||||||
|
|
||||||
|
return $output;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
124
src/NodaConsolidatedNamesForPersinst.php
Normal file
124
src/NodaConsolidatedNamesForPersinst.php
Normal file
|
@ -0,0 +1,124 @@
|
||||||
|
<?PHP
|
||||||
|
/**
|
||||||
|
* Gathers functions for setting uniform actor names.
|
||||||
|
*/
|
||||||
|
declare(strict_types = 1);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gathers functions for setting uniform actor names.
|
||||||
|
*/
|
||||||
|
final class NodaConsolidatedNamesForPersinst extends NodaConsolidatedNamesAbstract {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Substrings of an actor name listed as a key in this array will be replaced
|
||||||
|
* by the corresponding value.
|
||||||
|
*/
|
||||||
|
private const _NAME_SANITIZATIONS = [
|
||||||
|
"mythologische Figur" => "Mythologie",
|
||||||
|
"Mythologische Figur" => "Mythologie",
|
||||||
|
"Mythologische Gestalt" => "Mythologie",
|
||||||
|
"()" => "",
|
||||||
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replaces last characters of a string if $from matches the end of the string,
|
||||||
|
*
|
||||||
|
* @param string $from Replace from.
|
||||||
|
* @param string $to Replace to.
|
||||||
|
* @param string $name Input name.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
private static function _replaceFromEnd(string $from, string $to, string $name):string {
|
||||||
|
|
||||||
|
$length = mb_strlen($from);
|
||||||
|
if (str_ends_with($name, $from) === true && substr($name, -1 * $length - 1, 1) !== '.') {
|
||||||
|
$name = str_replace(" ", " ", substr($name, 0, -1 * $length) . $to);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $name;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cleans and consolidates name parts appearing regularly in German names
|
||||||
|
* that have a default writing in md.
|
||||||
|
*
|
||||||
|
* @param string $name Name of an actor.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
private static function _clean_german_abbreviations(string $name):string {
|
||||||
|
|
||||||
|
$name = self::_replaceFromEnd(" d.Ä.", " (der Ältere)", $name);
|
||||||
|
$name = self::_replaceFromEnd(" d. Ä.", " (der Ältere)", $name);
|
||||||
|
$name = self::_replaceFromEnd(" (d.Ä.)", " (der Ältere)", $name);
|
||||||
|
$name = self::_replaceFromEnd(" (d. Ä.)", " (der Ältere)", $name);
|
||||||
|
|
||||||
|
$name = self::_replaceFromEnd(" d.J.", " (der Jüngere)", $name);
|
||||||
|
$name = self::_replaceFromEnd(" d. J.", " (der Jüngere)", $name);
|
||||||
|
$name = self::_replaceFromEnd(" (d.J.)", " (der Jüngere)", $name);
|
||||||
|
$name = self::_replaceFromEnd(" (d. J.)", " (der Jüngere)", $name);
|
||||||
|
|
||||||
|
return $name;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tries to make sense of life dates in brackets at the end of an actor's name.
|
||||||
|
*
|
||||||
|
* @param string $name Input name.
|
||||||
|
*
|
||||||
|
* @return array{name: string, birth: string, death: string}|array{}
|
||||||
|
*/
|
||||||
|
public static function parse_life_dates_from_name(string $name):array {
|
||||||
|
|
||||||
|
if (str_contains($name, "(") === false || str_ends_with($name, ")") === false) return [];
|
||||||
|
|
||||||
|
$parts = explode("(", $name);
|
||||||
|
if (count($parts) !== 2) return [];
|
||||||
|
|
||||||
|
$nameOnly = trim($parts[0]);
|
||||||
|
$dateString = rtrim($parts[1], ')'); //
|
||||||
|
|
||||||
|
if (!empty($dates = NodaTimeSplitter::is_timespan($dateString))
|
||||||
|
&& $dates[0] !== '?'
|
||||||
|
&& $dates[1] !== '?'
|
||||||
|
&& intval($dates[1]) - intval($dates[0]) < 150
|
||||||
|
) {
|
||||||
|
return [
|
||||||
|
'name' => $nameOnly,
|
||||||
|
'birth' => $dates[0],
|
||||||
|
'death' => $dates[1],
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
return [];
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cleans a persinst name by trimming etc. Also removes uncertainty indicators.
|
||||||
|
*
|
||||||
|
* @param string $lang Instance language.
|
||||||
|
* @param string $persinst_name Input string to clean.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public static function consolidate_name(string $lang, string $persinst_name):string {
|
||||||
|
|
||||||
|
// Run basic replacements
|
||||||
|
$name = \strtr(self::sanitizeInputString($persinst_name),
|
||||||
|
self::_NAME_SANITIZATIONS);
|
||||||
|
$name = NodaUncertaintyHelper::cleanUncertaintyIndicatorsPersinst($name);
|
||||||
|
|
||||||
|
if (mb_strlen($name) > 10 && $lang === 'de') {
|
||||||
|
$name = self::_clean_german_abbreviations($name);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the persinst name is empty, unset persinst ID
|
||||||
|
return \trim($name, " ;.\t" . PHP_EOL);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
209
src/NodaConsolidatedNamesForPlaces.php
Normal file
209
src/NodaConsolidatedNamesForPlaces.php
Normal file
|
@ -0,0 +1,209 @@
|
||||||
|
<?PHP
|
||||||
|
/**
|
||||||
|
* Gathers functions for setting uniform place names.
|
||||||
|
*/
|
||||||
|
declare(strict_types = 1);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gathers functions for setting uniform place names.
|
||||||
|
*/
|
||||||
|
final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Substrings of an place name listed as a key in this array will be replaced
|
||||||
|
* by the corresponding value.
|
||||||
|
*/
|
||||||
|
private const _NAME_SANITIZATIONS = [
|
||||||
|
" - " => "-",
|
||||||
|
"unbekannt" => "",
|
||||||
|
"Unbekannt" => "",
|
||||||
|
"unknown" => "",
|
||||||
|
"Unknown" => "",
|
||||||
|
];
|
||||||
|
|
||||||
|
private const _PLACE_TYPE_INDICATORS_GERMAN = [
|
||||||
|
'Insel',
|
||||||
|
'Stadt',
|
||||||
|
];
|
||||||
|
|
||||||
|
// Indicators signifying that a place is likely subordinate to the other
|
||||||
|
// if two places are provided in a comma-separated list
|
||||||
|
private const _PLACE_NARROWER_LOCATION_INDICATORS_GERMAN = [
|
||||||
|
'gasse',
|
||||||
|
'straße',
|
||||||
|
];
|
||||||
|
|
||||||
|
// Indicators signifying that a place is likely subordinate to the other
|
||||||
|
// if two places are provided in a comma-separated list
|
||||||
|
private const _PLACE_NARROWER_LOCATION_INDICATORS_HUNGARIAN = [
|
||||||
|
'körut',
|
||||||
|
'utca',
|
||||||
|
'út',
|
||||||
|
];
|
||||||
|
|
||||||
|
private const _RELEVANT_ROMAN_NUMERALS = [
|
||||||
|
'I' => '1',
|
||||||
|
'II' => '2',
|
||||||
|
'III' => '3',
|
||||||
|
'IV' => '4',
|
||||||
|
'V' => '5',
|
||||||
|
'VI' => '6',
|
||||||
|
'VII' => '7',
|
||||||
|
'VIII' => '8',
|
||||||
|
'IX' => '9',
|
||||||
|
'X' => '10',
|
||||||
|
'XI' => '11',
|
||||||
|
'XII' => '12',
|
||||||
|
'XIII' => '13',
|
||||||
|
'XIV' => '14',
|
||||||
|
'XV' => '15',
|
||||||
|
'XVI' => '16',
|
||||||
|
'XVII' => '17',
|
||||||
|
'XVIII' => '18',
|
||||||
|
'XIX' => '19',
|
||||||
|
'XX' => '20',
|
||||||
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rewrites indicators for narrower locations paired with a superordinate location
|
||||||
|
* into the format "Narrower (Broader)".
|
||||||
|
* E.g.: "Adalbrechtstr. 12, Berlin" > Adalbrechtstraße 12 (Berlin).
|
||||||
|
*
|
||||||
|
* @param string $name Name in which to rewrite.
|
||||||
|
* @param string $indicator Indicator for narrower place. E.g. "straße".
|
||||||
|
* @param string $separator Separating character between narrower and broader, e.g. ', '.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
private static function _rewrite_narrower_broader_pairs_to_brackets(string $name, string $indicator, $separator = ', '):string {
|
||||||
|
|
||||||
|
if (str_contains($name, $indicator)
|
||||||
|
&& substr_count($name, $indicator) === 1
|
||||||
|
&& substr_count($name, $separator) === 1
|
||||||
|
&& !str_contains($name, "(")
|
||||||
|
) {
|
||||||
|
$parts = explode(', ', $name);
|
||||||
|
|
||||||
|
// Prevent errors in case of "Adalbrechtstraße 12, "
|
||||||
|
if (!empty($parts[0]) && !empty($parts[1])) {
|
||||||
|
|
||||||
|
if (str_contains($parts[0], $indicator)) { // Adalberthstraße 12, Berlin
|
||||||
|
$street = $parts[0];
|
||||||
|
$town = $parts[1];
|
||||||
|
}
|
||||||
|
else { // Berlin, Adalberthstraße 12
|
||||||
|
$street = $parts[1];
|
||||||
|
$town = $parts[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prevent rewrites in cases like "Deák Ferenc utca 16-18. Budapest, V."
|
||||||
|
if (str_contains($town, '.')) {
|
||||||
|
return $name;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $street . ' (' . $town . ')';
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return $name;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cleans and consolidates name parts appearing regularly in German place names.
|
||||||
|
*
|
||||||
|
* @param string $name Name of an actor.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
private static function _clean_german_abbreviations(string $name):string {
|
||||||
|
|
||||||
|
// ABC, Inseln > ABC (Inseln)
|
||||||
|
foreach (self::_PLACE_TYPE_INDICATORS_GERMAN as $indicator) {
|
||||||
|
if (str_ends_with($name, ', ' . $indicator)) {
|
||||||
|
$name = str_replace(', ' . $indicator, ' (' . $indicator . ')', $name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adalbrechtstr. 12 > Adalbrechtstraße 12
|
||||||
|
if (str_contains($name, "str. ") && \preg_match("/[a-zA-Z]str. [0-9]/", $name)) {
|
||||||
|
$name = str_replace("str. ", "straße ", $name);
|
||||||
|
}
|
||||||
|
|
||||||
|
// "Adalbrechtstraße. 12, Berlin" > Adalbrechtstraße 12 (Berlin)
|
||||||
|
|
||||||
|
foreach (self::_PLACE_NARROWER_LOCATION_INDICATORS_GERMAN as $indicator) {
|
||||||
|
$name = self::_rewrite_narrower_broader_pairs_to_brackets($name, $indicator, ', ');
|
||||||
|
}
|
||||||
|
|
||||||
|
return $name;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cleans and consolidates name parts appearing regularly in Hungarian place names.
|
||||||
|
*
|
||||||
|
* @param string $name Name of an actor.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
private static function _clean_hungarian_abbreviations(string $name):string {
|
||||||
|
|
||||||
|
if (str_contains($name, " krt. ") && \preg_match("/\ krt\.\ [0-9]/", $name)) {
|
||||||
|
$name = str_replace(" krt. ", " körut ", $name);
|
||||||
|
}
|
||||||
|
if (str_contains($name, " u. ") && \preg_match("/\ u\.\ [0-9]/", $name)) {
|
||||||
|
$name = str_replace(" u. ", " utca ", $name);
|
||||||
|
}
|
||||||
|
|
||||||
|
// "Adalbrecht utca. 12, Berlin" > Adalbrecht utca 12 (Berlin)
|
||||||
|
|
||||||
|
foreach (self::_PLACE_NARROWER_LOCATION_INDICATORS_HUNGARIAN as $indicator) {
|
||||||
|
$name = self::_rewrite_narrower_broader_pairs_to_brackets($name, $indicator, ', ');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (str_contains($name, 'Budapest') && substr_count($name, 'Budapest') === 1) {
|
||||||
|
foreach(self::_RELEVANT_ROMAN_NUMERALS as $roman_numeral => $arabic) {
|
||||||
|
|
||||||
|
$to_match = ' Budapest, ' . $roman_numeral . '.';
|
||||||
|
if (str_ends_with($name, $to_match)) {
|
||||||
|
$name = str_replace($to_match, ' (Budapest, ' . $arabic . '. kerület)', $name);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $name;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cleans a place name by trimming etc. Also removes uncertainty indicators.
|
||||||
|
*
|
||||||
|
* @param string $lang Instance language.
|
||||||
|
* @param string $ort_name Input string to clean.
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public static function consolidate_name(string $lang, string $ort_name):string {
|
||||||
|
|
||||||
|
// Run basic replacements
|
||||||
|
$nameSanitizations = self::_NAME_SANITIZATIONS;
|
||||||
|
if (substr_count($ort_name, "/") === 1) $nameSanitizations["/"] = "-";
|
||||||
|
$ort_name = strtr(self::sanitizeInputString($ort_name), $nameSanitizations);
|
||||||
|
$ort_name = self::sanitizeInputString(NodaUncertaintyHelper::cleanUncertaintyIndicatorsPlace($ort_name));
|
||||||
|
|
||||||
|
$ort_name = match ($lang) {
|
||||||
|
'de' => self::_clean_german_abbreviations($ort_name),
|
||||||
|
'hu' => self::_clean_hungarian_abbreviations($ort_name),
|
||||||
|
default => $ort_name,
|
||||||
|
};
|
||||||
|
|
||||||
|
return $ort_name;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
50
tests/NodaConsolidatedNamesForPersinstTest.php
Normal file
50
tests/NodaConsolidatedNamesForPersinstTest.php
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
<?PHP
|
||||||
|
/**
|
||||||
|
* Tests for setting uniform actor names.
|
||||||
|
*
|
||||||
|
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||||||
|
*/
|
||||||
|
declare(strict_types = 1);
|
||||||
|
use PHPUnit\Framework\TestCase;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for setting uniform actor names.
|
||||||
|
*
|
||||||
|
* @covers \NodaConsolidatedNamesForPersinst
|
||||||
|
*/
|
||||||
|
final class NodaConsolidatedNamesForPersinstTest extends TestCase {
|
||||||
|
/**
|
||||||
|
* Test that cleanup function returns expected values.
|
||||||
|
*
|
||||||
|
* @small
|
||||||
|
*
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function testCleaningNamesWithCanonicalForms():void {
|
||||||
|
|
||||||
|
self::assertEquals("Friedrich Barbarossa (Kaiser)", NodaConsolidatedNamesForPersinst::consolidate_name("de", "Friedrich Barbarossa <Kaiser>"));
|
||||||
|
self::assertEquals("Friedrich Barbarossa (Kaiser)", NodaConsolidatedNamesForPersinst::consolidate_name("de", "Friedrich Barbarossa <Kaiser>, "));
|
||||||
|
self::assertEquals("Friedrich Barbarossa (Kaiser)", NodaConsolidatedNamesForPersinst::consolidate_name("de", "Friedrich Barbarossa <Kaiser>, ||"));
|
||||||
|
self::assertEquals("Friedrich Barbarossa", NodaConsolidatedNamesForPersinst::consolidate_name("de", "(Friedrich Barbarossa)"));
|
||||||
|
self::assertEquals("Friedrich Barbarossa", NodaConsolidatedNamesForPersinst::consolidate_name("de", "Friedrich Barbarossa."));
|
||||||
|
|
||||||
|
self::assertEquals("Fr d.Ä", NodaConsolidatedNamesForPersinst::consolidate_name("de", "Fr d.Ä."));
|
||||||
|
self::assertEquals("Raffaelli, C. d. J", NodaConsolidatedNamesForPersinst::consolidate_name("de", "Raffaelli, C. d. J."));
|
||||||
|
self::assertEquals("Friedrich Barbarossa d.Ä", NodaConsolidatedNamesForPersinst::consolidate_name("en", "Friedrich Barbarossa d.Ä."));
|
||||||
|
|
||||||
|
self::assertEquals("Friedrich Barbarossa (der Ältere)", NodaConsolidatedNamesForPersinst::consolidate_name("de", "Friedrich Barbarossa d.Ä."));
|
||||||
|
self::assertEquals("Friedrich Barbarossa (der Ältere)", NodaConsolidatedNamesForPersinst::consolidate_name('de', "Friedrich Barbarossa d. Ä."));
|
||||||
|
self::assertEquals("Friedrich Barbarossa (der Ältere)", NodaConsolidatedNamesForPersinst::consolidate_name('de', "Friedrich Barbarossa (d.Ä.)"));
|
||||||
|
self::assertEquals("Friedrich Barbarossa (der Ältere)", NodaConsolidatedNamesForPersinst::consolidate_name('de', "Friedrich Barbarossa (d. Ä.)"));
|
||||||
|
self::assertEquals("Friedrich Barbarossa (der Ältere)", NodaConsolidatedNamesForPersinst::consolidate_name('de', "Friedrich Barbarossa [d.Ä.]"));
|
||||||
|
self::assertEquals("Friedrich Barbarossa (der Ältere)", NodaConsolidatedNamesForPersinst::consolidate_name('de', "Friedrich Barbarossa [d. Ä.]"));
|
||||||
|
|
||||||
|
self::assertEquals("Friedrich Barbarossa (der Jüngere)", NodaConsolidatedNamesForPersinst::consolidate_name('de', "Friedrich Barbarossa d.J."));
|
||||||
|
self::assertEquals("Friedrich Barbarossa (der Jüngere)", NodaConsolidatedNamesForPersinst::consolidate_name('de', "Friedrich Barbarossa d. J."));
|
||||||
|
self::assertEquals("Friedrich Barbarossa (der Jüngere)", NodaConsolidatedNamesForPersinst::consolidate_name('de', "Friedrich Barbarossa (d.J.)"));
|
||||||
|
self::assertEquals("Friedrich Barbarossa (der Jüngere)", NodaConsolidatedNamesForPersinst::consolidate_name('de', "Friedrich Barbarossa (d. J.)"));
|
||||||
|
self::assertEquals("Friedrich Barbarossa (der Jüngere)", NodaConsolidatedNamesForPersinst::consolidate_name('de', "Friedrich Barbarossa [d.J.]"));
|
||||||
|
self::assertEquals("Friedrich Barbarossa (der Jüngere)", NodaConsolidatedNamesForPersinst::consolidate_name('de', "Friedrich Barbarossa [d. J.]"));
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
63
tests/NodaConsolidatedNamesForPlacesTest.php
Normal file
63
tests/NodaConsolidatedNamesForPlacesTest.php
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
<?PHP
|
||||||
|
/**
|
||||||
|
* Tests for setting uniform place names.
|
||||||
|
*
|
||||||
|
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||||||
|
*/
|
||||||
|
declare(strict_types = 1);
|
||||||
|
use PHPUnit\Framework\TestCase;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for setting uniform place names.
|
||||||
|
*
|
||||||
|
* @covers \NodaConsolidatedNamesForPlaces
|
||||||
|
*/
|
||||||
|
final class NodaConsolidatedNamesForPlacesTest extends TestCase {
|
||||||
|
/**
|
||||||
|
* Test that cleanup function returns expected values.
|
||||||
|
*
|
||||||
|
* @small
|
||||||
|
*
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function testCleaningNamesWithCanonicalForms():void {
|
||||||
|
|
||||||
|
// Hungarian: u. [0-9] > utca.
|
||||||
|
|
||||||
|
self::assertEquals("Test u. 12", NodaConsolidatedNamesForPlaces::consolidate_name("de", "Test u. 12"));
|
||||||
|
self::assertEquals("Test utca 12", NodaConsolidatedNamesForPlaces::consolidate_name("hu", "Test u. 12"));
|
||||||
|
self::assertEquals("Test u. Test", NodaConsolidatedNamesForPlaces::consolidate_name("hu", "Test u. Test"));
|
||||||
|
|
||||||
|
// German: Ending in standard terms that be in brackets
|
||||||
|
|
||||||
|
self::assertEquals("Berlin, Insel Borneo", NodaConsolidatedNamesForPlaces::consolidate_name("de", "Berlin, Insel Borneo"));
|
||||||
|
self::assertEquals("Berlin (Insel)", NodaConsolidatedNamesForPlaces::consolidate_name("de", "Berlin, Insel"));
|
||||||
|
self::assertEquals("Berlin, Insel", NodaConsolidatedNamesForPlaces::consolidate_name("hu", "Berlin, Insel"));
|
||||||
|
|
||||||
|
// German: Ending in standard terms that be in brackets
|
||||||
|
|
||||||
|
self::assertEquals("Adalbrechtstraße 12", NodaConsolidatedNamesForPlaces::consolidate_name("de", "Adalbrechtstr. 12"));
|
||||||
|
self::assertEquals("Adalbrechtstr. 12", NodaConsolidatedNamesForPlaces::consolidate_name("hu", "Adalbrechtstr. 12"));
|
||||||
|
|
||||||
|
self::assertEquals("Adalbrechtstraße 12 (Berlin)", NodaConsolidatedNamesForPlaces::consolidate_name("de", "Berlin, Adalbrechtstr. 12"));
|
||||||
|
self::assertEquals("Adalbrechtstraße 12 (Berlin)", NodaConsolidatedNamesForPlaces::consolidate_name("de", "Adalbrechtstr. 12, Berlin"));
|
||||||
|
self::assertEquals("Berlin, Adalbrechtstr. 12", NodaConsolidatedNamesForPlaces::consolidate_name("hu", "Berlin, Adalbrechtstr. 12"));
|
||||||
|
|
||||||
|
// Same in Hungarian
|
||||||
|
self::assertEquals("Adalbrecht utca 12 (Berlin)", NodaConsolidatedNamesForPlaces::consolidate_name("hu", "Berlin, Adalbrecht utca 12"));
|
||||||
|
self::assertEquals("Adalbrecht utca 12 (Berlin)", NodaConsolidatedNamesForPlaces::consolidate_name("hu", "Adalbrecht utca 12, Berlin"));
|
||||||
|
self::assertEquals("Berlin, Adalbrecht utca 12", NodaConsolidatedNamesForPlaces::consolidate_name("de", "Berlin, Adalbrecht utca 12"));
|
||||||
|
|
||||||
|
// Deák Ferenc utca 16-18. (Budapest, 5. kerület)
|
||||||
|
self::assertEquals("Deák Ferenc utca 16-18. (Budapest, 5. kerület)", NodaConsolidatedNamesForPlaces::consolidate_name("hu", "Deák Ferenc utca 16-18. Budapest, V.,"));
|
||||||
|
self::assertEquals("Deák Ferenc utca 16-18. (Budapest, 5. kerület)", NodaConsolidatedNamesForPlaces::consolidate_name("hu", "Deák Ferenc utca 16-18. Budapest, V."));
|
||||||
|
self::assertEquals("Deák Ferenc utca 16-18. Budapest, V. abc", NodaConsolidatedNamesForPlaces::consolidate_name("hu", "Deák Ferenc utca 16-18. Budapest, V. abc"));
|
||||||
|
|
||||||
|
|
||||||
|
// Rewriting country names in brackets
|
||||||
|
self::assertEquals("Köln (Deutschland)", NodaConsolidatedNamesForPlaces::consolidate_name("de", "Deutschland-Köln"));
|
||||||
|
self::assertEquals("Köln (Deutschland)", NodaConsolidatedNamesForPlaces::consolidate_name("de", "Deutschland, Köln"));
|
||||||
|
self::assertEquals("Köln (Deutschland)", NodaConsolidatedNamesForPlaces::consolidate_name("de", "Köln, Deutschland"));
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user