Merge branch 'master' of gitea:museum-digital/MDNodaHelpers

Ping and reconnect DB in fulltext sync for actors fulltext index
Add shortened 100x A to list of blacklisted tags
2025-06-08 17:20:24 +02:00 · 2025-06-08 17:19:47 +02:00 · 2025-05-22 16:25:27 +02:00 · 2025-05-08 16:18:05 +02:00 · 2025-05-06 22:32:00 +02:00 · 2025-05-05 17:05:47 +02:00
21 changed files with 1806 additions and 1092 deletions
--- a/src/NodaBlacklistedTerms.php
+++ b/src/NodaBlacklistedTerms.php
@ -13,7 +13,7 @@ final class NodaBlacklistedTerms {
    /**
     * A blacklist of disallowed tags. All entries are listed in full lowercase.
     */
-    const TAG_BLACKLIST = [
+    public const TAG_BLACKLIST = [
        'de' => [
            'andere',
            'anderes',
@ -33,16 +33,35 @@ final class NodaBlacklistedTerms {
            'ding',
            'dinge',
            'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
+            'Aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
+            'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
+            'nichtmünzliches',
+            'unbestimmt',
+            'AA',
+            'BB',
+            'CC',
+            'DD',
+            'EE',
+            'FF',
+            'GG',
+            'HH',
+            'LL',
+            '-',
+            '?',
        ],
        'en' => [
            'other',
            'others',
            'unknown',
            'various',
+            '-',
+            '?',
        ],
        'hu' => [
            'ism.',
            'ismeretlen',
+            '-',
+            '?',
        ],
    ];

--- a/src/NodaConsolidatedNamesForPersinst.php
+++ b/src/NodaConsolidatedNamesForPersinst.php
@ -79,11 +79,12 @@ final class NodaConsolidatedNamesForPersinst extends NodaConsolidatedNamesAbstra
        if (count($parts) !== 2) return [];

        $nameOnly = trim($parts[0]);
-        $dateString = rtrim($parts[1], ')'); //
+        $dateString = trim(rtrim($parts[1], ')')); //

        if (!empty($dates = NodaTimeSplitter::is_timespan($dateString))
            && $dates->start_year !== '?'
            && $dates->end_year !== '?'
+            && $dates->start_year !== $dates->end_year
            && intval($dates->end_year) - intval($dates->start_year) < 150
        ) {
            return [
--- a/src/NodaConsolidatedNamesForPlaces.php
+++ b/src/NodaConsolidatedNamesForPlaces.php
@ -93,7 +93,7 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
     *
     * @return string
     */
-    private static function _rewrite_narrower_broader_pairs_to_brackets(string $name, string $indicator, $separator = ', '):string {
+    private static function _rewrite_narrower_broader_pairs_to_brackets(string $name, string $indicator, string $separator = ', '):string {

        if (str_contains($name, $indicator)
            && substr_count($name, $indicator) === 1
@ -223,7 +223,7 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
     *
     * @return string
     */
-    private static function _rewrite_ukrainian_names_by_hierarchy($name):string {
+    private static function _rewrite_ukrainian_names_by_hierarchy(string $name):string {

        $identifiersByLevel = [
            'state' => [' РСР', 'РСР ', ' АРСР', 'АРСР ', ' губернія', 'губернія '],
@ -325,7 +325,9 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
        }

        $output = $main_name;
-        if (!empty($specifiers)) $output .= ' (' . implode(', ', $specifiers) . ')';
+        if (!empty($specifiers)) {
+            $output .= ' (' . implode(', ', $specifiers) . ')';
+        }

        return $output;

--- a/src/NodaGroup.php
+++ b/src/NodaGroup.php
@ -101,7 +101,11 @@ final class NodaGroup {
    /**
     * Updates a group.
     *
-     * @retun void
+     * @param integer $group_id ID of the group to update.
+     * @param string  $name     Name of the group.
+     * @param string  $comment  Optional: Comment for the group.
+     *
+     * @return void
     */
    public function update(int $group_id, string $name, string $comment = ''):void {

@ -123,7 +127,9 @@ final class NodaGroup {
    /**
     * Deletes a group.
     *
-     * @retun void
+     * @param integer $group_id ID of the group to delete.
+     *
+     * @return void
     */
    public function delete(int $group_id):void {

--- a/src/NodaIDGetter.php
+++ b/src/NodaIDGetter.php
@ -155,6 +155,37 @@ final class NodaIDGetter {

    }

+    /**
+     * Returns persinst ID by entry in persinst translations table,
+     * irrespective of language.
+     *
+     * @param MDMysqli $mysqli_noda Database connection.
+     * @param string   $name        Name of the persinst to search for.
+     *
+     * @return integer
+     */
+    public static function getPersinstIDByAnyTransName(MDMysqli $mysqli_noda, string $name):int {
+
+        if (empty($name)) return 0;
+
+        $result = $mysqli_noda->query_by_stmt("
+            SELECT `persinst_id`, `trans_name`
+            FROM `persinst_translation`
+            WHERE `trans_name` = ?
+            LIMIT 2", "s", $name);
+
+        while ($cur = $result->fetch_row()) {
+            if (self::_stri_matches($cur[1], $name)) {
+                $result->close();
+                return (int)$cur[0];
+            }
+        }
+        $result->close();
+
+        return 0;
+
+    }
+
    /**
     * Returns persinst ID by entry in persinst translations table
     * plus birth and death.
@ -456,6 +487,37 @@ final class NodaIDGetter {

    }

+    /**
+     * Returns place ID by entry in place translations table, irrespective of
+     * language.
+     *
+     * @param MDMysqli $mysqli_noda Database connection.
+     * @param string   $name        Name of the place to search for.
+     *
+     * @return integer
+     */
+    public static function getPlaceIDByAnyTransName(MDMysqli $mysqli_noda, string $name):int {
+
+        if (empty($name)) return 0;
+
+        $result = $mysqli_noda->query_by_stmt("
+            SELECT `ort_id`, `trans_name`
+            FROM `ort_translation`
+            WHERE `trans_name` = ?
+            LIMIT 2", "s", $name);
+
+        while ($cur = $result->fetch_row()) {
+            if (self::_stri_matches($cur[1], $name)) {
+                $result->close();
+                return (int)$cur[0];
+            }
+        }
+        $result->close();
+
+        return 0;
+
+    }
+
    /**
     * Returns place ID by entry in place noda table.
     *
@ -647,6 +709,37 @@ final class NodaIDGetter {

    }

+    /**
+     * Returns tag ID by entry in tag translations table,
+     * irrespective of language.
+     *
+     * @param MDMysqli $mysqli_noda Database connection.
+     * @param string   $name        Name of the tag to search for.
+     *
+     * @return integer
+     */
+    public static function getTagIDByAnyTransName(MDMysqli $mysqli_noda, string $name):int {
+
+        if (empty($name)) return 0;
+
+        $result = $mysqli_noda->query_by_stmt("
+            SELECT `tag_id`, `trans_name`
+            FROM `tag_translation`
+            WHERE `trans_name` = ?
+            LIMIT 2", "s", $name);
+
+        while ($cur = $result->fetch_row()) {
+            if (self::_stri_matches($name, $cur[1])) {
+                $result->close();
+                return (int)$cur[0];
+            }
+        }
+        $result->close();
+
+        return 0;
+
+    }
+
    /**
     * Returns tag ID by entry in tag noda table.
     *
@ -838,6 +931,36 @@ final class NodaIDGetter {

    }

+    /**
+     * Returns time ID by entry in time translations table.
+     *
+     * @param MDMysqli $mysqli_noda Database connection.
+     * @param string   $name        Name of the time to search for.
+     *
+     * @return integer
+     */
+    public static function getTimeIDByAnyTransName(MDMysqli $mysqli_noda, string $name):int {
+
+        if (empty($name)) return 0;
+
+        $result = $mysqli_noda->query_by_stmt("
+            SELECT `zeit_id`, `trans_name`
+            FROM `zeit_translation`
+            WHERE `trans_name` = ?
+            LIMIT 2", "s", $name);
+
+        while ($cur = $result->fetch_row()) {
+            if (self::_stri_matches($name, $cur[1])) {
+                $result->close();
+                return (int)$cur[0];
+            }
+        }
+        $result->close();
+
+        return 0;
+
+    }
+
    /**
     * Returns time ID by entry in time translations table.
     *
@ -999,4 +1122,79 @@ final class NodaIDGetter {
        return 0;

    }
+
+    /**
+     * Checks each string in a list of strings for its existence as a tag name.
+     *
+     * @param MDMysqli                $mysqli_noda Database connection.
+     * @param string                  $lang        Language to check in.
+     * @param non-empty-array<string> $phrases     List of phrases to check.
+     *
+     * @return array{count: int, tag: integer[], actor: integer[], time: integer[], place: integer[]}
+     */
+    public static function searchEntryNamesByList(MDMysqli $mysqli_noda, string $lang, array $phrases):array {
+
+        $output = [
+            'count' => 0,
+            'tag' => [],
+            'actor' => [],
+            'time' => [],
+            'place' => [],
+        ];
+
+        foreach ($phrases as $phrase) {
+
+            if (($tag_id = NodaIDGetter::getTagIDByNamesAndRewrites($mysqli_noda, $lang, $phrase)) !== 0 && !in_array($tag_id, $output['tag'], true)) {
+                $output['tag'][] = $tag_id;
+                ++$output['count'];
+            }
+            else if (($tag_id_by_tl = NodaIDGetter::getTagIDByAnyTransName($mysqli_noda, $phrase)) !== 0 && !in_array($tag_id_by_tl, $output['tag'], true)) {
+                $output['tag'][] = $tag_id_by_tl;
+                ++$output['count'];
+            }
+            else if (($place_id = NodaIDGetter::getPlaceIDByNamesAndRewrites($mysqli_noda, $lang, $phrase)) !== 0 && !in_array($place_id, $output['place'], true)) {
+                $output['place'][] = $place_id;
+                ++$output['count'];
+            }
+            else if (($place_id = NodaIDGetter::getPlaceIDByAnyTransName($mysqli_noda, $phrase)) !== 0 && !in_array($place_id, $output['place'], true)) {
+                $output['place'][] = $place_id;
+                ++$output['count'];
+            }
+            else if (($persinst_id = NodaIDGetter::getPersinstIDByNamesAndRewrites($mysqli_noda, $lang, $phrase, '', '')) !== 0 && !in_array($persinst_id, $output['actor'], true)) {
+                $output['actor'][] = $persinst_id;
+                ++$output['count'];
+            }
+            else if (($persinst_id = NodaIDGetter::getPersinstIDByAnyTransName($mysqli_noda, $phrase)) !== 0 && !in_array($persinst_id, $output['actor'], true)) {
+                $output['actor'][] = $persinst_id;
+                ++$output['count'];
+            }
+            else if (($time_id = NodaIDGetter::getTimeIDByNamesAndRewrites($mysqli_noda, $lang, $phrase)) !== 0 && !in_array($time_id, $output['time'], true)) {
+                $output['time'][] = $time_id;
+                ++$output['count'];
+            }
+            else if (($time_id = NodaIDGetter::getTimeIDByAnyTransName($mysqli_noda, $phrase)) !== 0 && !in_array($time_id, $output['time'], true)) {
+                $output['time'][] = $time_id;
+                ++$output['count'];
+            }
+
+        }
+
+        if (count($phrases) !== $output['count']) {
+            return [
+                'count' => 0,
+                'tag' => [],
+                'actor' => [],
+                'time' => [],
+                'place' => [],
+            ];
+        }
+
+        if (!empty($output['tag'])) sort($output['tag']);
+        if (!empty($output['actor'])) sort($output['actor']);
+        if (!empty($output['time'])) sort($output['time']);
+        if (!empty($output['place'])) sort($output['place']);
+
+        return $output;
+
+    }
 }
--- a/src/NodaImportLogger.php
+++ b/src/NodaImportLogger.php
@ -32,8 +32,12 @@ final class NodaImportLogger {
        $logStmt = $mysqli_noda->do_prepare("INSERT INTO `persinst_logged_imports`
            (`instance`, `institution_id`, `input_string`, `persinst_id`)
            VALUES (?, ?, ?, ?)");
-        $logStmt->bind_param("sisi", $instance, $institution_id, $loggedName, $persinst_id);
-        $logStmt->execute();
+        try {
+            $logStmt->bind_param("sisi", $instance, $institution_id, $loggedName, $persinst_id);
+            $logStmt->execute();
+        }
+        catch (MDMysqliDuplicateKeysError $e) {
+        }
        $logStmt->close();

    }
@ -54,8 +58,12 @@ final class NodaImportLogger {
        $logStmt = $mysqli_noda->do_prepare("INSERT INTO `orte_logged_imports`
            (`instance`, `institution_id`, `input_string`, `ort_id`)
            VALUES (?, ?, ?, ?)");
-        $logStmt->bind_param("sisi", $instance, $institution_id, $name, $ort_id);
-        $logStmt->execute();
+        try {
+            $logStmt->bind_param("sisi", $instance, $institution_id, $name, $ort_id);
+            $logStmt->execute();
+        }
+        catch (MDMysqliDuplicateKeysError $e) {
+        }
        $logStmt->close();

    }
@ -76,8 +84,12 @@ final class NodaImportLogger {
        $logStmt = $mysqli_noda->do_prepare("INSERT INTO `zeiten_logged_imports`
            (`instance`, `institution_id`, `input_string`, `zeit_id`)
            VALUES (?, ?, ?, ?)");
-        $logStmt->bind_param("sisi", $instance, $institution_id, $name, $zeit_id);
-        $logStmt->execute();
+        try {
+            $logStmt->bind_param("sisi", $instance, $institution_id, $name, $zeit_id);
+            $logStmt->execute();
+        }
+        catch (MDMysqliDuplicateKeysError $e) {
+        }
        $logStmt->close();

    }
@ -98,8 +110,12 @@ final class NodaImportLogger {
        $logStmt = $mysqli_noda->do_prepare("INSERT INTO `tag_logged_imports`
            (`instance`, `institution_id`, `input_string`, `tag_id`)
            VALUES (?, ?, ?, ?)");
-        $logStmt->bind_param("sisi", $instance, $institution_id, $name, $tag_id);
-        $logStmt->execute();
+        try {
+            $logStmt->bind_param("sisi", $instance, $institution_id, $name, $tag_id);
+            $logStmt->execute();
+        }
+        catch (MDMysqliDuplicateKeysError $e) {
+        }
        $logStmt->close();

    }
--- a/src/NodaSplitTime.php
+++ b/src/NodaSplitTime.php
@ -28,9 +28,10 @@ final class NodaSplitTime {
    /**
     * Returns a single, exact date.
     *
-     * @param string $year  Year.
-     * @param string $month Month.
-     * @param string $day   Day.
+     * @param string                       $year                   Year.
+     * @param string                       $month                  Month.
+     * @param string                       $day                    Day.
+     * @param NodaTimeBeforeAfterIndicator $before_after_indicator Determines if the time is exact or before / after.
     *
     * @return NodaSplitTime
     */
@ -296,6 +297,15 @@ final class NodaSplitTime {
    /**
     * Constructor.
     *
+     * @param string                       $start_year              Year.
+     * @param string                       $end_year                Year.
+     * @param string                       $counting_time_month     Month.
+     * @param string                       $counting_time_day       Day.
+     * @param NodaCountingTimeIndicator    $counting_time_indicator Determines if the time is BCE or CCE.
+     * @param NodaTimeBeforeAfterIndicator $before_after_indicator  Determines if the time is inexact to one direction.
+     * @param false|string                 $start_date              Start date.
+     * @param false|string                 $end_date                End date.
+     *
     * @return void
     */
    public function __construct(string $start_year, string $end_year,
@ -379,5 +389,19 @@ final class NodaSplitTime {
            }
        }

+        // Validate
+        $startDateTime = MD_STD::strtotime("2000-" . substr($this->start_date, -5));
+        if (checkdate((int)date('m', $startDateTime), (int)date('d', $startDateTime), (int)date('Y', $startDateTime)) === false) {
+            throw new MDgenericInvalidInputsException("Invalid start date: " . $this->start_date);
+        }
+
+        if (!empty((int)$this->counting_time_day)) {
+            // The year 2000 is used here as it is a leap year and lots of years accepted in md are not accepted
+            // by checkdate.
+            if (checkdate((int)$this->counting_time_month, (int)$this->counting_time_day, 2000) === false) {
+                throw new MDgenericInvalidInputsException("Invalid date formed by counting time: " . $this->counting_time_month . ' -- ' . $this->counting_time_day);
+            }
+        }
+
    }
 }
--- a/src/NodaTagRelationIdentifier.php
+++ b/src/NodaTagRelationIdentifier.php
@ -15,6 +15,16 @@ final class NodaTagRelationIdentifier {
    private const SUFFIXES = [
        'de' => [
            ' (Motiv)' => MDTagRelationType::display_subject,
+            ' [Motiv]' => MDTagRelationType::display_subject,
+            ' <Motiv>' => MDTagRelationType::display_subject,
+
+            ' (Material)' => MDTagRelationType::material,
+            ' [Material]' => MDTagRelationType::material,
+            ' <Material>' => MDTagRelationType::material,
+
+            ' (Technik)' => MDTagRelationType::technique,
+            ' [Technik]' => MDTagRelationType::technique,
+            ' <Technik>' => MDTagRelationType::technique,
        ]
    ];

--- a/src/NodaTimeAutotranslater.php
+++ b/src/NodaTimeAutotranslater.php
@ -13,7 +13,7 @@ final class NodaTimeAutotranslater {

    // TODO: Move these to NodaTimeAutotranslaterLocales

-    const LANGS_SYLLABLE_CLEANING = [
+    public const LANGS_SYLLABLE_CLEANING = [
        "hu" => [
            "10-as évek" => "10-es évek",
            "40-as évek" => "40-es évek",
@ -463,13 +463,13 @@ final class NodaTimeAutotranslater {
    }

    /**
-     * Gets translations for a given entry type.
+     * Prepares translations for each available language.
     *
     * @param array<integer|string> $timeInfo Time information.
     *
     * @return array<string>
     */
-    public static function getTranslations(array $timeInfo):array {
+    public static function prepareTranslations(array $timeInfo):array {

        if (!empty($timeInfo['zeit_name']) and strlen((string)$timeInfo['zeit_name']) > 10 and !empty($timespanDates = NodaTimeSplitter::attempt_splitting_from_till((string)$timeInfo['zeit_name']))) {

@ -504,8 +504,11 @@ final class NodaTimeAutotranslater {
            $output = [];
            $cases = NodaTimeAutotranslaterLocales::cases();
            foreach ($cases as $tLang) {
-                $start_term = self::getTranslations($startTimeInfo)[$tLang->name];
-                $end_term = self::getTranslations($endTimeInfo)[$tLang->name];
+                $startTls = self::getTranslations($startTimeInfo);
+                $endTls = self::getTranslations($endTimeInfo);
+                if (empty($startTls) || empty($endTls)) return [];
+                $start_term = $startTls[$tLang->name];
+                $end_term = $endTls[$tLang->name];

                $output[$tLang->name] = \sprintf($tLang->formatYearspanForSprintf(), $start_term, $end_term);
            }
@ -604,6 +607,78 @@ final class NodaTimeAutotranslater {

    }

+    /**
+     * Validates correctness of years in translation strings.
+     *
+     * @param string|integer        $start        Start year.
+     * @param string|integer        $end          End year.
+     * @param array<string, string> $translations Translations.
+     *
+     * @return boolean
+     */
+    public static function validateTranslations(string|int $start, string|int $end, array $translations):bool {
+
+        $start = ltrim((string)$start, ' 0-');
+        $end = ltrim((string)$end, ' 0-');
+
+        // Edge cases: Centuries and decades have special translations
+        // and can thus not be validated properly
+        // Century BCE
+        if (substr($start, -1) === "0" && substr($end, -1) === '1' && $start > $end) {
+            return true;
+        }
+        // Century CE
+        if (substr($start, -1) === "1" && substr($end, -1) === '0' && $start < $end) {
+            return true;
+        }
+        // Decade
+        if (substr($start, -1) === "0" && substr($end, -1) === '9' && $start < $end) {
+            return true;
+        }
+
+        // 1920 + ? can be both Since 1920 and After 1919, so validation
+        // is impossible there, too
+        if ($start === '?' || $end === '?') return true;
+
+        // Unset unvalidatable languages
+        unset($translations['ar'], $translations['fa']);
+
+        if ($start !== '?') {
+            foreach ($translations as $t) {
+                if (!str_contains($t, $start)) {
+                    return false;
+                }
+            }
+        }
+        if ($end !== '?' && $start !== $end) {
+            foreach ($translations as $t) {
+                if (!str_contains($t, $end)) {
+                    return false;
+                }
+            }
+        }
+
+        return true;
+
+    }
+
+    /**
+     * Gets translations for a given entry type.
+     *
+     * @param array<integer|string> $timeInfo Time information.
+     *
+     * @return array<string>
+     */
+    public static function getTranslations(array $timeInfo):array {
+
+        $output = self::prepareTranslations($timeInfo);
+
+        if (self::validateTranslations($timeInfo['zeit_beginn'], $timeInfo['zeit_ende'], $output) === false) return [];
+
+        return $output;
+
+    }
+
    /**
     * Runs autotranslater.
     *
@ -613,7 +688,9 @@ final class NodaTimeAutotranslater {
     */
    public function translate(array $timeInfo):void {

-        $translations = self::getTranslations($timeInfo);
+        if (empty($translations = self::getTranslations($timeInfo))) {
+            return;
+        }

        $this->_mysqli_noda->autocommit(false);

--- a/src/NodaTimeSplitter.php
+++ b/src/NodaTimeSplitter.php
@ -140,7 +140,7 @@ final class NodaTimeSplitter {
        "decemberig",
    ];

-    private const REGEX_CENTURIES = '(\ |)(Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)';
+    private const REGEX_CENTURIES = '(\ |)(Jh|Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)';
    private const REGEX_DECADES = '(s|er|er\ Jahre|(\-|\ )es\ évek|(\-|\ )as\ \évek|\ évek|\-es\ években|\-ті)';

    /**
@ -345,16 +345,29 @@ final class NodaTimeSplitter {
                    $start_date = $output->start_date;
                    $end_date = $output->end_date;
                }
+                else if ($start === $end && (int)str_replace('-', '', $start_date) > (int)str_replace('-', '', $end_date)) {
+                    $start_date = $output->start_date;
+                    $end_date = $output->end_date;
+                }
                return new NodaSplitTime($start, $end, $output->counting_time_month, $output->counting_time_day,
                    NodaCountingTimeIndicator::bce, $output->before_after_indicator, '-' . $start_date, '-' . $end_date);
            }
        }

-        if (\preg_match("/^[0-9][0-9][0-9][0-9]\ bis [0-9][0-9][0-9][0-9]$/", $datum)) {
+        if (\preg_match("/^[0-9]{4}\ bis\ [0-9]{4}$/", $datum)) {
            $start = \substr($datum, 0, 4);
            $end = \substr($datum, -4);
            return new NodaSplitTime($start, $end);
        }
+        if (\preg_match("/^[0-9]{4}\ (und|oder|od.)\ [0-9]{4}$/", $datum)) {
+            $start = \substr($datum, 0, 4);
+            $end = \substr($datum, -4);
+            $startInt = (int)$start;
+            $endInt = (int)$end;
+            if ($startInt === $endInt - 1) {
+                return new NodaSplitTime($start, $end);
+            }
+        }

        $datum = \str_replace(". ", ".", $datum);

@ -536,22 +549,22 @@ final class NodaTimeSplitter {

        // 10000-20000
        if (!empty(\preg_match("/^[0-9]{5}(\-|\/)[0-9]{5}$/", $datum))) {
-            return new NodaSplitTime(start_year: \substr($datum, 0, 5), end_year: \substr($datum, 6, 5));
+            return new NodaSplitTime(start_year: \substr($datum, 0, 5), end_year: \substr($datum, -5));
        }

        // 0000-0000
-        if (\preg_match("/^[0-9]{4}(\-|\/)[0-9]{4}(\.|)$/", $datum)) {
-            return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, 5, 4));
+        if (\preg_match("/^[0-9]{4}(\-|\/|\–)[0-9]{4}(\.|)$/", $datum)) {
+            return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, -4));
        }

        // 1.900-2.000
-        if (\preg_match("/^[0-9]\.[0-9][0-9][0-9](\-|\/)[0-9]\.[0-9][0-9][0-9]$/", $datum)) {
+        if (\preg_match("/^[0-9]\.[0-9][0-9][0-9](\-|\/|\–)[0-9]\.[0-9][0-9][0-9]$/", $datum)) {
            $datum = \str_replace(".", "", $datum);
-            return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, 5, 4));
+            return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, -4));
        }

        // German TT.MM.JJJJ  /  TT.MM.JJJ  /  TT.MM.JJ  /  TT.MM.J
-        if (\preg_match("/^[0-9][0-9]\.[0-9][0-9]\.([0-9][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[0-9])$/", $datum)) { // German T.MM.JJJJ
+        if (\preg_match("/^[0-9]{2}\.[0-9]{2}\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
            $year = \substr($datum, 6, 4);
            $month = \substr($datum, 3, 2);
            $day   = \substr($datum, 0, 2);
@ -559,7 +572,7 @@ final class NodaTimeSplitter {
        }

        // German TT.M.JJJJ  /  TT.M.JJJ  /  TT.M.JJ  /  TT.M.J
-        if (\preg_match("/^[0-9][0-9]\.[0-9]\.([0-9][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[0-9])$/", $datum)) { // German T.MM.JJJJ
+        if (\preg_match("/^[0-9]{2}\.[0-9]\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
            $year = \substr($datum, 5, 4);
            $month = "0" . \substr($datum, 3, 1);
            $day   = \substr($datum, 0, 2);
@ -590,10 +603,24 @@ final class NodaTimeSplitter {
            return NodaSplitTime::genExactDate($year, $month, $day);
        }
        // Intl': 2020-12
-        if (\preg_match("/^[0-9]{4}\-[0-9]{2}$/", $datum)) { // German Y-m
+        if (\preg_match("/^[0-9]{4}\-[0-9]{2}$/", $datum)) { // German Y-m or 1912-15
            $year = \substr($datum, 0, 4);
-            $month = \substr($datum, 5, 2);
-            return new NodaSplitTime($year, $year, $month);
+            $month = \substr($datum, -2);
+
+            // Assume the end is a month
+            if (intval($month) < 12) {
+                // If the year is smaller than the second number, do not split, as either month
+                // or year may be meant
+                // Example: 1903-04
+                if (substr($datum, 2, 2) < 12) {
+                    return false;
+                }
+                return new NodaSplitTime($year, $year, $month);
+            }
+            else {
+                $end = \substr($year, 0, 2) . $month;
+                return new NodaSplitTime($year, $end);
+            }
        }

        // German MM.JJJJ
@ -648,7 +675,27 @@ final class NodaTimeSplitter {
        if (\preg_match("/^[0-9]{4}\-[0-9]{3}$/", $datum)) { // Hungarian Y-m
            $start = \substr($datum, 0, 4);
            $end = \substr($datum, -3);
-            return new NodaSplitTime("0" . $start, "0" . $end);
+            return new NodaSplitTime($start, "0" . $end);
+        }
+
+        // 2-3 (n. Chr.)
+        if (\preg_match("/^[0-9]{1}\-[0-9]{1}$/", $datum)) {
+            return new NodaSplitTime("000" . \substr($datum, 0, 1), "000" . \substr($datum, -1));
+        }
+
+        // 300-2 (v. Chr.)
+        if (\preg_match("/^[0-9]{3}\-[0-9]{2}$/", $datum)) {
+            return new NodaSplitTime("0" . \substr($datum, 0, 3), "00" . \substr($datum, -2));
+        }
+
+        // 30-2 (v. Chr.)
+        if (\preg_match("/^[0-9]{2}\-[0-9]{1}$/", $datum)) {
+            return new NodaSplitTime("00" . \substr($datum, 0, 2), "000" . \substr($datum, -1));
+        }
+
+        // 2-300 (n. Chr.)
+        if (\preg_match("/^[0-9]{1}\-[0-9]{3}$/", $datum)) {
+            return new NodaSplitTime("000" . \substr($datum, 0, 1), "0" . \substr($datum, -3));
        }

        // 20-30 (n. Chr.)
@ -658,6 +705,18 @@ final class NodaTimeSplitter {
            return new NodaSplitTime("00" . $start, "00" . $end);
        }

+        // 20-130 (n. Chr.)
+        if (\preg_match("/^[0-9]{2}\-[0-9]{3}$/", $datum)) { // 20-40 (n. Chr.)
+            $start = \substr($datum, 0, 2);
+            $end = \substr($datum, -3);
+            return new NodaSplitTime("00" . $start, "0" . $end);
+        }
+
+        // 120-1130 (n. Chr.)
+        if (\preg_match("/^[0-9]{3}\-[0-9]{4}$/", $datum)) { // 20-40 (n. Chr.)
+            return new NodaSplitTime("0" . \substr($datum, 0, 3), \substr($datum, -4));
+        }
+
        // 1920
        if (\preg_match("/^[0-9]{4}(\.|)$/", $datum)) {
            $start = \substr($datum, 0, 4);
@ -699,35 +758,67 @@ final class NodaTimeSplitter {

        $datum = self::clean_input($datum);

-        if (\preg_match("/^[0-9]{4}\.[0-9]{2}\.[0-9]{2}(\.|)\-$/", $datum)) { // YYYY.MM.DD.
-            $year = \substr($datum, 0, 4);
-            $month = \substr($datum, 5, 2);
-            $day = \substr($datum, 8, 2);
+        $inpDateWoSpaces = str_replace(" ", "", $datum);
+
+        if (\preg_match("/^[0-9]{4}\.[0-9]{2}\.[0-9]{2}(\.|)\-$/", $inpDateWoSpaces)) { // YYYY.MM.DD.
+            $year = \substr($inpDateWoSpaces, 0, 4);
+            $month = \substr($inpDateWoSpaces, 5, 2);
+            $day = \substr($inpDateWoSpaces, 8, 2);
            return NodaSplitTime::genExactDate($year, $month, $day, NodaTimeBeforeAfterIndicator::since);
        }
-        if (\preg_match("/^[0-9]{4}\.[0-9]{2}(\.|)\-$/", $datum)) { // YYYY.MM.-
-            $start = \substr($datum, 0, 4);
-            $month = \substr($datum, 5, 2);
+        if (\preg_match("/^[0-9]{4}\.[0-9]{2}(\.|)\-$/", $inpDateWoSpaces)) { // YYYY.MM.-
+            $start = \substr($inpDateWoSpaces, 0, 4);
+            $month = \substr($inpDateWoSpaces, 5, 2);
            return new NodaSplitTime($start, '?', $month, before_after_indicator: NodaTimeBeforeAfterIndicator::since);
        }
-        if (\preg_match("/^[0-9]{4}\-$/", $datum)) { // YYYY-
-            $start = \substr($datum, 0, 4);
+        if (\preg_match("/^[0-9]{4}\-$/", $inpDateWoSpaces)) { // YYYY-
+            $start = \substr($inpDateWoSpaces, 0, 4);
            return new NodaSplitTime($start, '?', before_after_indicator: NodaTimeBeforeAfterIndicator::since);
        }

-        if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}\.[0-9]{2}$/", $datum)) { // Hungarian Y-m
-            $year = \substr($datum, 1, 4);
-            $month = \substr($datum, 6, 2);
-            $day = \substr($datum, 9, 2);
+        // ?.6.2024
+        if (\preg_match("/^\?\.([0-9]|[0-9]{2})\.[0-9]{4}$/", $inpDateWoSpaces)) { // German Y-m
+            $year = \substr($inpDateWoSpaces, -4);
+            $month = trim(\substr($inpDateWoSpaces, 2, 2), '. ');
+            return new NodaSplitTime($year, $year, $month);
+        }
+
+        // ?.?.2024
+        if (\preg_match("/^\?\.\?\.[0-9]{4}$/", $inpDateWoSpaces)) { // German Y-m
+            $year = \substr($inpDateWoSpaces, -4);
+            return new NodaSplitTime($year, $year);
+        }
+
+        if (\preg_match("/^[0-9]{4}$/", \trim($inpDateWoSpaces, '. ?!()[]X'))) { // German Y-m
+            $year = \trim($inpDateWoSpaces, '. ?!()[]X');
+            return new NodaSplitTime($year, $year);
+        }
+
+        if ((str_starts_with($inpDateWoSpaces, '0-') || str_ends_with($inpDateWoSpaces, '-0')) && \preg_match("/^[0-9]{4}$/", \strtr($inpDateWoSpaces, ['-0' => '', '0-' => ''])) && !str_ends_with($inpDateWoSpaces, '0-0')) {
+            $year = \strtr($inpDateWoSpaces, ['-0' => '', '0-' => '']);
+            if (strlen($year) === 4) {
+                return new NodaSplitTime($year, $year);
+            }
+        }
+
+        if (\preg_match("/^[0-9]{4}$/", \strtr($inpDateWoSpaces, ['o' => '0']))) { // German Y-m
+            $year = \strtr($inpDateWoSpaces, ['o' => '0']);
+            return new NodaSplitTime($year, $year);
+        }
+
+        if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}\.[0-9]{2}$/", $inpDateWoSpaces)) { // Hungarian Y-m
+            $year = \substr($inpDateWoSpaces, 1, 4);
+            $month = \substr($inpDateWoSpaces, 6, 2);
+            $day = \substr($inpDateWoSpaces, 9, 2);
            return NodaSplitTime::genExactDate($year, $month, $day, NodaTimeBeforeAfterIndicator::until);
        }
-        if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}$/", $datum)) { // Hungarian Y-m
-            $year = \substr($datum, 1, 4);
-            $month = \substr($datum, 6, 2);
+        if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}$/", $inpDateWoSpaces)) { // Hungarian Y-m
+            $year = \substr($inpDateWoSpaces, 1, 4);
+            $month = \substr($inpDateWoSpaces, 6, 2);
            return new NodaSplitTime('?', $year, $month, before_after_indicator: NodaTimeBeforeAfterIndicator::until);
        }
-        if (\preg_match("/^\-[0-9]{4}$/", $datum)) { // Hungarian -Y
-            $year = \substr($datum, 1, 4);
+        if (\preg_match("/^\-[0-9]{4}$/", $inpDateWoSpaces)) { // Hungarian -Y
+            $year = \substr($inpDateWoSpaces, 1, 4);
            return new NodaSplitTime('?', $year, before_after_indicator: NodaTimeBeforeAfterIndicator::until);
        }

@ -828,7 +919,7 @@ final class NodaTimeSplitter {
                    $output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date);
            }
        }
-        if (str_ends_with($datum, '-as évekig') || str_ends_with($datum, '-es évekig')) {
+        if (str_ends_with($datum, ' as évekig') || str_ends_with($datum, ' es évekig') || str_ends_with($datum, '-as évekig') || str_ends_with($datum, '-es évekig')) {
            if ($output = self::attempt_splitting(\substr($datum, 0, -2))) {
                return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day,
                    $output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date);
@ -899,7 +990,7 @@ final class NodaTimeSplitter {
        }

        // 1. Jahrhundert
-        if (\preg_match("/^[0-9]\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) {
+        if (\preg_match("/^[0-9]\.\ (Jh\|Jh\.|Jahrhundert|sz|század)$/", $datum)) {
            if ($centuryNo = \intval(\substr($datum, 0, 1))) {
                $centuryNo--;
                return new NodaSplitTime((string)$centuryNo . "01", \strval($centuryNo + 1) . '00');
@ -907,7 +998,7 @@ final class NodaTimeSplitter {
        }

        // 17.-18. Jahrhundert
-        if (\preg_match("/^[0-9]{2}(\.|)(|\ Jh\.||\ Jahrhundert||\ sz||\ század)(\-|\/)[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) {
+        if (\preg_match("/^[0-9]{2}(\.|)(|\ Jh|\ Jh\.|\ Jahrhundert|\ sz|\ század)(\-|\/)[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) {
            if (\strpos($datum, '/') !== false) {
                $datum = str_replace('/', '-', $datum);
            }
@ -1040,6 +1131,33 @@ final class NodaTimeSplitter {

    }

+    /**
+     * Rewrites special formulations of a date.
+     *
+     * @param string $datum Date.
+     *
+     * @return string|false
+     */
+    private static function _rewrite_special_cases_regular(string $datum):string|false {
+
+        if (\preg_match("/^(1|2)\.\ Hälfte(|\ des)\ [0-9]{2}\.\ Jahrhundert(|s)$/", $datum)) {
+
+            $half = substr($datum, 0, 1);
+            $number = substr(ltrim(substr($datum, 10), "des Hälfte"), 0, 2);
+            if (is_numeric($number)) {
+                $num = (int)$number;
+                $targetCentury = $num - 1;
+                return match((int)$half) {
+                    1 => $targetCentury . "00-" . $targetCentury . "50",
+                    2 => $targetCentury . "50-" . $targetCentury . "99",
+                };
+            }
+        }
+
+        return false;
+
+    }
+
    /**
     * Contains special rules for incorrectly or incompletely spelled out timespan names.
     * To be called by self::attempt_splitting_from_till().
@ -1052,6 +1170,15 @@ final class NodaTimeSplitter {

        if (empty($datum)) return '';

+        if (\preg_match("/^1\.\ (Halbjahr|Hälfte)\ [0-9]{4}$/", $datum)) {
+            $year = substr($datum, -4);
+            return "Januar $year-Juni $year";
+        }
+        if (\preg_match("/^2\.\ (Halbjahr|Hälfte)\ [0-9]{4}$/", $datum)) {
+            $year = substr($datum, -4);
+            return "Juli $year-Dezember $year";
+        }
+
        $inputLength = strlen($datum);

        // Hungarian year and month until month
@ -1091,6 +1218,33 @@ final class NodaTimeSplitter {
            return $reconstituted;
        }

+        // German T.-T.MM.JJJJ / T.-T.MM.JJJ / T.-T.MM.JJ / T.-T.MM.J
+        if (\preg_match("/^[0-9].\-[0-9]\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
+            $year = \substr($datum, -4);
+            $month = trim(\substr($datum, -7, 2), '.');
+            $day   = '0' . \substr($datum, 3, 1);
+            $firstday   = '0' . \substr($datum, 0, 1);
+            return "$firstday.$month.$year-$day.$month.$year";
+        }
+
+        // German T.-TT.MM.JJJJ / T.-TT.MM.JJJ / T.-TT.MM.JJ / T.-TT.MM.J
+        if (\preg_match("/^[0-9].\-[0-9]{2}\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
+            $year = \substr($datum, -4);
+            $month = trim(\substr($datum, -7, 2), '.');
+            $day   = \substr($datum, 3, 2);
+            $firstday   = '0' . \substr($datum, 0, 1);
+            return "$firstday.$month.$year-$day.$month.$year";
+        }
+
+        // German TT.-TT.MM.JJJJ  /  TT.-TT.MM.JJJ  /  TT.-TT.MM.JJ  /  TT.-TT.MM.J
+        if (\preg_match("/^[0-9]{2}.\-[0-9]{2}\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
+            $year = \substr($datum, -4);
+            $month = trim(\substr($datum, -7, 2), '.');
+            $day   = \substr($datum, 4, 2);
+            $firstday   = \substr($datum, 0, 2);
+            return "$firstday.$month.$year-$day.$month.$year";
+        }
+
        // 17-19. Jahrhundert
        if (\preg_match("/^[0-9]{2}(\.|)\-[0-9]{2}(\.|)" . self::REGEX_CENTURIES . "$/", $datum)) {
            $parts = explode('-', $datum);
@ -1208,6 +1362,30 @@ final class NodaTimeSplitter {

    }

+    /**
+     * Removes superfluous characters and makes an input string roughly parsable.
+     *
+     * @param string $input Input string.
+     *
+     * @return string
+     */
+    private static function _runBasicNameCleanup(string $input):string {
+
+        $input = ltrim(trim(trim($input), ',;'), ' .');
+
+        // Clean away duplicate inputs
+        // 1440-1440
+        if (str_contains($input, '-')) {
+            $parts = explode('-', $input);
+            if (count($parts) === 2 && $parts[0] === $parts[1]) {
+                $input = $parts[0];
+            }
+        }
+
+        return $input;
+
+    }
+
    /**
     * Wrapper to check if any splitting command works.
     *
@ -1217,6 +1395,8 @@ final class NodaTimeSplitter {
     */
    public static function attempt_splitting(string $datum):NodaSplitTime|false {

+        $datum = self::_runBasicNameCleanup($datum);
+
        try {
            if (!empty($moda = self::is_timespan($datum))) {
                return $moda;
@ -1254,6 +1434,10 @@ final class NodaTimeSplitter {
            }
        }

+        if ($rewrite = self::_rewrite_special_cases_regular($datum)) {
+            return self::attempt_splitting($rewrite);
+        }
+
        return false;

    }
--- a/src/NodaUncertaintyHelper.php
+++ b/src/NodaUncertaintyHelper.php
@ -61,6 +61,7 @@ final class NodaUncertaintyHelper {
        "(?)",
        "?",
        " [vermutlich]",
+        " vermutlich",
        " [verm.]",
        " [wahrscheinlich]",
    ];
@ -100,6 +101,7 @@ final class NodaUncertaintyHelper {
        "c. ",
        "ca ",
        "ca. ",
+        "ca.",
        "Ca ",
        "Ca. ",
        "za. ",
@ -141,8 +143,11 @@ final class NodaUncertaintyHelper {
        " [circa]",
        " (verm.)",
        " (vermutl.)",
+        " vermutlich",
        " körül",
        ", um",
+        ", ca.",
+        ", ca",
        " (um)",
        " (ок.)",
    ];
--- a/src/NodaWikidataFetcher.php
+++ b/src/NodaWikidataFetcher.php
@ -18,7 +18,7 @@ final class NodaWikidataFetcher {
    ];

    public const LANGUAGES_MAIN_DESC = ['de', 'da', 'en', 'es', 'fr', 'hu', 'it', 'jp', 'nl', 'pt', 'ru', 'sv', 'sk', 'uk', 'zh'];
-    public const LANGUAGES_TO_CHECK = ['ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sk', 'sw', 'ta', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'zh'];
+    public const LANGUAGES_TO_CHECK = ['ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'kn', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sk', 'sw', 'ta', 'te', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'zh'];

    public const LANGUAGES_TO_CAPITALIZE = ["cs", "da", "de", "en", "es", "fr", "fi", "id", "it", "nl", "pl", "pt", "ru", "sv", 'sk', "tl", "tr"];

@ -43,25 +43,6 @@ final class NodaWikidataFetcher {
        "orcid" => "P496",
    ];

-    private const WIKIPEDIA_REMOVE_LITERALS = [
-        "<p>Si vous disposez d'ouvrages ou d'articles de référence ou si vous ",
-        '<p><b>En pratique&#160;:</b> <a href="/wiki/Wikip%C3%A9dia:Citez_vos_sources#Qualité_des_sources" title="Wikipédia:Citez vos sources">Quelles sources sont attendu',
-        '<pVous pouvez partager vos connaissances en l’améliorant (',
-        '<p class="mw-empty-elt">',
-        '<p><small>Géolocalisation sur la carte',
-        '<p><b>Koordinaatit:</b>',
-        '<p><span class="executeJS" data-gadgetname="ImgToggle"></span',
-        '<p><span class="imgtoggleboxTitle">',
-        //'<div class="mw-parser-output"><p>',
-        '<p><span style="font-size: small;"><span id="coordinates">',
-        '<p><span></span></p>',
-        '<p><a rel="nofollow" class="external text" href="https://maps.gs',
-        '<p><span class="plainlinks nourlexpansion"><a class="external text" href="//tools.wmflabs.org/geohack/geohack.php?langu',
-        '<p><span style="display:none">',
-        '<p>&#32;</p>',
-        '<p><span class="geo noexcerpt"',
-    ];
-
    public const RETRIEVAL_MODES_ACCEPTED = [
        'list',
        'add',
@ -87,7 +68,8 @@ final class NodaWikidataFetcher {
     */
    private static function _getWikipediaApiLink(string $lang, string $searchTerm):string {

-        return "https://" . urlencode($lang) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($searchTerm) . "&prop=text&section=0&format=json";
+        return "https://" . urlencode($lang) . ".wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro&explaintext&redirects=1&titles=" . urlencode($searchTerm);
+        # w/api.php?action=parse&page=" . urlencode($searchTerm) . "&prop=text&section=0&format=json";

    }

@ -151,9 +133,14 @@ final class NodaWikidataFetcher {
    private static function _getCleanedWikipediaSnippet(string $lang, string $title):string {

        $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $title), 10000);
-        $datafromwiki = strval(json_decode($datafromwiki, true)['parse']['text']['*']);
+        $json_decoded = json_decode($datafromwiki, true);
+        if (empty($json_decoded) || !isset($json_decoded['query']) || empty($json_decoded['query']['pages'])) {
+            return '';
+        }
+        $firstPageId = array_keys($json_decoded['query']['pages'])[0];
+        $datafromwiki = strval($json_decoded['query']['pages'][$firstPageId]['extract']);

-        return self::_cleanWikidataInput($datafromwiki);
+        return self::_cleanInputSimple($datafromwiki);

    }

@ -164,8 +151,9 @@ final class NodaWikidataFetcher {
     *
     * @return array<mixed>
     */
-    private static function _getWikidataEntity(string $wikidata_id):array {
+    public static function getWikidataEntity(string $wikidata_id):array {

+        self::validateWikidataId($wikidata_id);
        $data = json_decode(MD_STD::runCurl("https://www.wikidata.org/wiki/Special:EntityData/" . urlencode($wikidata_id) . ".json", 10000), true);
        if ($data === null) {
            throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later.");
@ -173,7 +161,20 @@ final class NodaWikidataFetcher {
        if (empty($data['entities'][$wikidata_id])) {
            throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later.");
        }
-        return $data['entities'][$wikidata_id];
+
+        $output = $data['entities'][$wikidata_id];
+
+        // Throw exception if this page is a dedicated disambigation item.-
+        // P31: Instance of; Q4167410: Wikimedia disambiguation page
+        if (isset($output['claims']) && isset($output['claims']['P31'])) {
+            foreach ($output['claims']['P31'] as $is_instance_of) {
+                if (isset($is_instance_of['mainsnak']['datavalue']['value']['id']) && $is_instance_of['mainsnak']['datavalue']['value']['id'] === 'Q4167410') {
+                    throw new NodaWikidataFetcherDisambiguationIsDisallowedException("Loading wikidata disambiguation pages is disallowed");
+                }
+            }
+        }
+
+        return $output;

    }

@ -258,237 +259,21 @@ final class NodaWikidataFetcher {
    }

    /**
-     * Cleans basic tags off Wikidata input.
+     * Cleans remaining HTML elements and leading, trailing whitespaces.
     *
     * @param string $input Input string.
     *
     * @return string
     */
-    private static function _cleanWikidataInputHtml(string $input):string {
+    private static function _cleanInputSimple(string $input):string {

-        // Clean off anything before first <p>
-        if ($pStartPos = strpos($input, '<p')) {
-            $input = substr($input, $pStartPos);
-        }
-        if ($pEndPos = strrpos($input, '</p>')) {
-            $input = substr($input, 0, $pEndPos + 4);
-        }
-
-        $doc = new DOMDocument();
-        try {
-            $doc->loadXML('<section>' . trim($input) . '</section>');
-        }
-        catch (Exception $e) {
-            throw new Exception("Failed to load DOMDocument." . PHP_EOL . $e->getMessage() . PHP_EOL . PHP_EOL . '---' . $input . '---');
-        }
-
-        $list = $doc->getElementsByTagName("style");
-        while ($list->length > 0) {
-            $p = $list->item(0);
-            if ($p === null || $p->parentNode === null) break;
-            $p->parentNode->removeChild($p);
-        }
-
-        $list = $doc->getElementsByTagName("table");
-        while ($list->length > 0) {
-            $p = $list->item(0);
-            if ($p === null || $p->parentNode === null) break;
-            $p->parentNode->removeChild($p);
-        }
-
-        $list = $doc->getElementsByTagName("ol");
-        while ($list->length > 0) {
-            $p = $list->item(0);
-            if ($p === null || $p->parentNode === null) break;
-            $p->parentNode->removeChild($p);
-        }
-
-        if (($firstP = $doc->getElementsByTagName("p")->item(0)) !== null) {
-            if (($firstPhtml = $doc->saveHTML($firstP)) !== false) {
-                if (strpos($firstPhtml, 'geohack') !== false) {
-                    if ($firstP->parentNode !== null) $firstP->parentNode->removeChild($firstP);
-                }
-            }
-        }
-
-        $output = [];
-        foreach ($doc->getElementsByTagName("p") as $p) {
-            $output[] = trim($p->textContent);
-        }
-
-        /*
-        if (strpos($doc->saveHTML(), 'Coordinates:') !== false) {
-            echo $doc->saveHTML();
-            exit;
-        }
-         */
-        return str_replace(PHP_EOL, PHP_EOL . PHP_EOL, trim(implode(PHP_EOL, $output)));
-
-    }
-
-    /**
-     * Cleans brackets ([1], [2]) off description text.
-     *
-     * @param string $input Input string.
-     *
-     * @return string
-     */
-    private static function _cleanSourceBracketsOffTranslation(string $input):string {
-
-        $bracketsToRemove = [];
-        for ($i = 0; $i < 100; $i++) {
-            $bracketsToRemove["[$i]"] = "";
-        }
-        return strtr($input, $bracketsToRemove);
-
-    }
-
-    /**
-     * Cleans contents parsed from Wikipedia.
-     *
-     * @param string $input Input string.
-     *
-     * @return string
-     */
-    private static function _cleanWikidataInput(string $input):string {
-
-        $input = trim($input, '"');
-        foreach (self::WIKIPEDIA_REMOVE_LITERALS as $tToRemove) $input = str_replace($tToRemove, "", $input);
-
-        if (substr($input, 0, strlen('<')) === '<') {
-
-            $input = self::_cleanWikidataInputHtml($input);
-
-            if (mb_strlen($input) > 600) {
-                if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
-                    $input = substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600));
-                }
-            }
-
-            $input = self::_cleanSourceBracketsOffTranslation($input);
-
-            $input = str_replace("\t", " ", $input);
-
-            // Remove newlines with ensuing spaces
-            while (strpos($input, PHP_EOL . " ") !== false) {
-                $input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
-            }
-
-            // Remove double newlines
-            while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
-                $input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
-            }
-            return MD_STD_IN::sanitize_text($input);
-
-        }
-
-        $input = str_replace(PHP_EOL, '', $input);
-
-        if (empty($input)) return "";
-
-        // Remove infobox tables specifically
-        $firstParagraphPosition = strpos($input, '<p', 1);
-        $currentSearchPos = strpos($input, "<table>");
-        if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
-            if (($tableEndPos = strpos($input, "</table>")) !== false) {
-                if (($pStartPos = strpos($input, '<p', $tableEndPos + 6)) !== false) {
-                    $input = substr($input, $pStartPos);
-                }
-            }
-        }
-
-        // Remove leftover unnecessary paragraphs before actual content
-
-        $removeFirstParagraph = false;
-        $firstParagraphPosition = strpos($input, '<p', 1);
-
-        foreach (["</table>", "<img"] as $tagPart) {
-            $currentSearchPos = strpos($input, $tagPart);
-            if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
-                $removeFirstParagraph = true;
-                break;
-            }
-        }
-
-        if ($removeFirstParagraph === true) {
-            $input = substr($input, $firstParagraphPosition ?: 0);
-        }
-
-        $input = str_replace('</p>', '</p>' . PHP_EOL . PHP_EOL . PHP_EOL, $input);
-        # $input = str_replace('?/i', '', $input);
-        $input = strip_tags($input);
-
-        # for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input);
-        $i = 0;
-        while (strpos($input, ".mw-parser-output") !== false and strpos($input, "}", strpos($input, ".mw-parser-output")) !== false) {
-            $part1 = substr($input, 0, strpos($input, ".mw-parser-output"));
-            $part2 = substr($input, strpos($input, "}", strpos($input, ".mw-parser-output")) + 1);
-            $input = $part1 . $part2;
-            ++$i;
-            if ($i === 30) break;
-        }
-
-        $input = self::_cleanSourceBracketsOffTranslation($input);
-
-        $input = str_replace("\t", " ", $input);
-
-        // Remove double whitespaces
-        while (strpos($input, "  ") !== false) {
-            $input = str_replace("  ", " ", $input);
-        }
-
-        // Remove newlines with ensuing spaces
-        while (strpos($input, PHP_EOL . " ") !== false) {
-            $input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
-        }
-
-        // Remove double newlines
-        while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
-            $input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
-        }
-
-        $stableToRemove = [
-            "Vous pouvez partager vos connaissances en l’améliorant (comment ?) selon les recommandations des projets correspondants.",
-        ];
-        foreach ($stableToRemove as $tToRemove) $input = str_replace($tToRemove, "", $input);
-
-        $endings = [
-            "StubDenne artikel om et vandløb ",
-        ];
-        foreach ($endings as $ending) {
-            if (strpos($input, $ending) !== false) $input = substr($input, 0, strpos($input, $ending));
-        }
-
-        $input = trim($input);
-
-        // Cut off overly long articles
-        if (mb_strlen($input) > 600) {
-            if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
-                $input = trim(substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600)));
-            }
-        }
-
-        if (empty($input)) return '';
-
-        $input = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $input));
-
-        $input = html_entity_decode($input);
-
-        return MD_STD_IN::sanitize_text($input);
-
-    }
-
-    /**
-     * Wrapper around _cleanWikidataInput for testing.
-     *
-     * @param string $input Input string.
-     *
-     * @return string
-     */
-    public static function cleanWikidataInput(string $input):string {
-
-        if (PHP_SAPI !== 'cli') throw new Exception("Use this function only for testing");
-        return self::_cleanWikidataInput($input);
+        return strtr(
+            trim(MD_STD_IN::sanitize_text($input)),
+            [
+                PHP_EOL => PHP_EOL . PHP_EOL,
+                PHP_EOL . PHP_EOL . PHP_EOL => PHP_EOL . PHP_EOL,
+            ]
+        );

    }

@ -740,7 +525,7 @@ final class NodaWikidataFetcher {
        $languagesToFetch = $wikilinks = [];
        foreach ($checkagainstLanguage as $lang) {

-            if (empty($data['labels'][$lang])) {
+            if (empty($data['labels']) || empty($data['labels'][$lang])) {
                continue;
            }

@ -795,23 +580,20 @@ final class NodaWikidataFetcher {
                $wikilink = $wikilinks[$lang];
                if (!empty($contents[$lang])) {

-                    $descFromWiki = json_decode($contents[$lang], true)['parse']['text']['*'];
-
-                    # Process data retrieved from wikipedia
-
-                    if ($descFromWiki !== null) $tDescription = (string)$descFromWiki;
-                    else $tDescription = "";
+                    $titleFromWikipedia = $data['sitelinks'][$lang . 'wiki']['title'];
+                    $tDescription = self::_getCleanedWikipediaSnippet($lang, $titleFromWikipedia);

                }
                else {
                    $tDescription = "";
                }

-                if ($tDescription !== '' && !empty($desc_cleaned = self::_cleanWikidataInput($tDescription))) {
+                if (!empty($titleFromWikipedia) && !empty($tDescription)) {
+
                    # $descs[$lang] = $tDescription;
                    $output[$lang] = [
-                        'label' => self::_cleanWikidataInput((string)$data['labels'][$lang]['value']),
-                        'description' => '"' . $desc_cleaned . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')',
+                        'label' => $titleFromWikipedia,
+                        'description' => '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')',
                        'link' => $wikilink,
                    ];
                }
@ -819,8 +601,8 @@ final class NodaWikidataFetcher {
                else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {

                    $output[$lang] = [
-                        'label' => self::_cleanWikidataInput($data['labels'][$lang]['value']),
-                        'description' => self::_cleanWikidataInput($data['descriptions'][$lang]['value']),
+                        'label' => self::_cleanInputSimple($data['labels'][$lang]['value']),
+                        'description' => self::_cleanInputSimple($data['descriptions'][$lang]['value']),
                        'link' => "",
                    ];

@ -831,8 +613,8 @@ final class NodaWikidataFetcher {
            else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {

                $output[$lang] = [
-                    'label' => self::_cleanWikidataInput($data['labels'][$lang]['value']),
-                    'description' => self::_cleanWikidataInput($data['descriptions'][$lang]['value']),
+                    'label' => self::_cleanInputSimple($data['labels'][$lang]['value']),
+                    'description' => self::_cleanInputSimple($data['descriptions'][$lang]['value']),
                    'link' => "",
                ];

@ -1026,6 +808,7 @@ final class NodaWikidataFetcher {
                    $wikidata_gender = "female";
                    break;
                case "Q48270":
+                case "Q207959": // Androgyny
                    $wikidata_gender = "other";
                    break;
                default:
@ -1047,6 +830,51 @@ final class NodaWikidataFetcher {

    }

+    /**
+     * Function for retrieving information.
+     *
+     * @param string                                           $lang      The user's selected used language.
+     * @param array<mixed>                                     $data      Data fetched from wikidata.
+     * @param array<string, array{url: string, title: string}> $wikilinks Links to wikipedia APIs.
+     *
+     * @return array{}|array{lang: string, desc: string, source: 'wikidata'|'wikipedia'}
+     */
+    private static function _getDescriptionFromWikidataAndWikipediaLinks(string $lang, array $data, array $wikilinks):array {
+
+        // Try the current user language for retrieving wikipedia texts
+        if (isset($wikilinks[$lang])) {
+            # Process data retrieved from wikipedia
+            if (!empty($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$lang]['title']))) {
+                return ['lang' => $lang, 'desc' => $datafromwiki, 'source' => 'wikipedia'];
+            }
+
+        }
+
+        // Try the alternative languages for retrieving wikidata tests
+        foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
+
+            if ($lang === $cur_lang || !isset($wikilinks[$cur_lang])) continue;
+
+            if ($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$cur_lang]['title'])) {
+                return ['lang' => $cur_lang, 'desc' => $datafromwiki, 'source' => 'wikipedia'];
+            }
+
+        }
+
+        // If the description still has not been entered, try retrieving it from wikidata.
+        if (!empty($data['descriptions'][$lang])) {
+            return ['lang' => $lang, 'desc' => $data['descriptions'][$lang]['value'], 'source' => 'wikidata'];
+        }
+        else if (!empty($data['descriptions'])) {
+            $tLang = (string)array_keys($data['descriptions'])[0];
+            $desc = $data['descriptions'][$tLang];
+            return ['lang' => $tLang, 'desc' => (string)$desc['value'], 'source' => 'wikidata'];
+        }
+
+        return [];
+
+    }
+
    /**
     * Function for retrieving information.
     *
@ -1059,30 +887,13 @@ final class NodaWikidataFetcher {
     */
    public function retrievePersinstInfoFromWikidataID(string $lang, string $wikidata_id, int $persinst_id, string $erfasst_von) {

-        self::validateWikidataId($wikidata_id);
-        $data = self::_getWikidataEntity($wikidata_id);
+        $data = self::getWikidataEntity($wikidata_id);

        // Get links to wikipedia

        $wikilinks = self::_getWikipediaLinksFromWikidataOutput($data);
-        $alreadyEntered = false;
-
-        if (isset($wikilinks[$lang])) {
-            # Process data retrieved from wikipedia
-            if (!empty($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$lang]['title']))) {
-                $alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $lang, $lang, $erfasst_von);
-            }
-
-        }
-
-        foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
-
-            if ($alreadyEntered === true || !isset($wikilinks[$cur_lang])) continue;
-
-            if ($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$cur_lang]['title'])) {
-                $alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $lang, "$cur_lang", $erfasst_von);
-            }
-
+        if (!empty($desc = self::_getDescriptionFromWikidataAndWikipediaLinks($lang, $data, $wikilinks))) {
+            $alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $desc['desc'], $lang, $desc['lang'], $erfasst_von);
        }

        $this->enterPersinstBirthDeathDatesFromWikidata($data, $persinst_id);
@ -1110,8 +921,7 @@ final class NodaWikidataFetcher {
     */
    public function retrievePersinstNormDataLinksFromWikidataID(string $wikidata_id, int $persinst_id, string $erfasst_von) {

-        self::validateWikidataId($wikidata_id);
-        $data = self::_getWikidataEntity($wikidata_id);
+        $data = self::getWikidataEntity($wikidata_id);
        if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('persinst', $wikidata_id, $data))) {
            NodaBatchInserter::linkNodaForPersinst($this->_mysqli_noda, $persinst_id, $nodaLinks, $erfasst_von);
        }
@ -1129,8 +939,7 @@ final class NodaWikidataFetcher {
     */
    public function retrievePlaceNormDataLinksFromWikidataID(string $wikidata_id, int $onum, string $erfasst_von) {

-        self::validateWikidataId($wikidata_id);
-        $data = self::_getWikidataEntity($wikidata_id);
+        $data = self::getWikidataEntity($wikidata_id);
        if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('place', $wikidata_id, $data))) {
            NodaBatchInserter::linkNodaForPlace($this->_mysqli_noda, $onum, $nodaLinks, $erfasst_von);
        }
@ -1279,7 +1088,6 @@ final class NodaWikidataFetcher {
            $updateStmt->execute();
        }
        catch (MDMysqliInvalidEncodingError $e) {
-            $_SESSION["editHistory"] = ["changesStored", "Error adding base description"];
        }
        $updateStmt->close();
        unset($updateStmt);
@ -1355,8 +1163,7 @@ final class NodaWikidataFetcher {
     */
    public function retrievePlaceInfoFromWikidataID(string $lang, string $wikidata_id, int $onum, string $erfasst_von) {

-        self::validateWikidataId($wikidata_id);
-        $data = self::_getWikidataEntity($wikidata_id);
+        $data = self::getWikidataEntity($wikidata_id);

        $wikilinks = self::_getWikipediaLinksFromWikidataOutput($data);

@ -1368,30 +1175,8 @@ final class NodaWikidataFetcher {
        }

        $cur_place_desc = $this->getPlaceDescription($onum);
-        $alreadyEntered = false;
-
-        if (!empty($wikilinks[$lang])) {
-
-            $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinks[$lang]['title']), 10000);
-            $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
-
-            if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
-                $alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $lang, $lang, $onum, $erfasst_von);
-            }
-        }
-
-        foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
-
-            //if ($alreadyEntered === true) break;
-            if ($alreadyEntered === true) break;
-            if (!isset($wikilinks[$cur_lang]['url'])) continue;
-
-            $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinks[$cur_lang]['title']), 10000);
-            $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
-            if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
-                $alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $lang, $cur_lang, $onum, $erfasst_von);
-            }
-
+        if (!empty($desc = self::_getDescriptionFromWikidataAndWikipediaLinks($lang, $data, $wikilinks))) {
+            $this->enterPlaceDescFromWikidata($cur_place_desc, $desc['desc'], $lang, $desc['lang'], $onum, $erfasst_von);
        }

        if (isset($data['claims']['P1566'])) $geonames_id = filter_var($data['claims']['P1566'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT);
@ -1589,37 +1374,12 @@ final class NodaWikidataFetcher {
     */
    public function retrieveTagInfoFromWikidataID(string $lang, string $wikidata_id, int $tag_id, string $erfasst_von) {

-        self::validateWikidataId($wikidata_id);
-        $data = self::_getWikidataEntity($wikidata_id);
+        $data = self::getWikidataEntity($wikidata_id);

        $wikilinks = self::_getWikipediaLinksFromWikidataOutput($data);

-        $alreadyEntered = false;
-
-        if (isset($wikilinks[$lang])) {
-
-            $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinks[$lang]['title']), 10000);
-            $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
-
-            # Process data retrieved from wikipedia
-            if (!empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
-                $alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $lang, $lang, $erfasst_von);
-            }
-
-        }
-
-        foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
-
-            if ($alreadyEntered === true || !isset($wikilinks[$cur_lang])) continue;
-
-            $datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinks[$cur_lang]['title']), 10000);
-            $datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
-
-            # Process data retrieved from wikipedia
-            if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) {
-                $alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $lang, $cur_lang, $erfasst_von);
-            }
-
+        if (!empty($desc = self::_getDescriptionFromWikidataAndWikipediaLinks($lang, $data, $wikilinks))) {
+            $alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $desc['desc'], $lang, $desc['lang'], $erfasst_von);
        }

        if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('tag', $wikidata_id, $data))) {
--- a/src/NodaWikidataFetcherDisambiguationIsDisallowedException.php
+++ b/src/NodaWikidataFetcherDisambiguationIsDisallowedException.php
@ -0,0 +1,27 @@
+<?PHP
+/**
+ * This file contains an exception class to be thrown if a user attempts to load
+ * data from a Wikidata item specifically established for a disambiguation page.
+ *
+ * @file
+ *
+ * @author Joshua Ramon Enslin <joshua@museum-digital.de>
+ */
+declare(strict_types = 1);
+
+/**
+ * Exception class to be thrown if a user attempts to load
+ * data from a Wikidata item specifically established for a disambiguation page.
+ */
+final class NodaWikidataFetcherDisambiguationIsDisallowedException extends MDgenericInvalidInputsException {
+    /**
+     * Error message.
+     *
+     * @return string
+     */
+    public function errorMessage() {
+        //error message
+        return 'Attempted to load a disambiguation page. Please select the specific item you want to fetch to enrich the given entry: ' . $this->getMessage();
+
+    }
+}
--- a/src/Sync/NodaPersinstFulltextSyncManticore.php
+++ b/src/Sync/NodaPersinstFulltextSyncManticore.php
@ -11,7 +11,7 @@ declare(strict_types = 1);
 */
 final class NodaPersinstFulltextSyncManticore {

-    const FULL_SYNC_COMMIT_AFTER = 30000;
+    private const FULL_SYNC_COMMIT_AFTER = 30000;

    /**
     * Returns all names and descriptions in the different languages of a actor.
@ -188,6 +188,10 @@ final class NodaPersinstFulltextSyncManticore {

        $mysqli_manticore->commit();

+        if (PHP_SAPI === 'cli' && $mysqli_noda->ping() === false) {
+            $mysqli_noda->reconnect();
+        }
+
        // Sync translations

        $result = $mysqli_noda->do_read_query("SELECT `persinst`.`persinst_id`, `trans_language`,
--- a/src/Sync/NodaTagFulltextSyncManticore.php
+++ b/src/Sync/NodaTagFulltextSyncManticore.php
@ -11,7 +11,7 @@ declare(strict_types = 1);
 */
 final class NodaTagFulltextSyncManticore {

-    const FULL_SYNC_COMMIT_AFTER = 30000;
+    private const FULL_SYNC_COMMIT_AFTER = 30000;

    /**
     * Returns all names and descriptions in the different languages of a tag.
@ -139,6 +139,10 @@ final class NodaTagFulltextSyncManticore {
    /**
     * Synchronizes base entries.
     *
+     * @param MDMysqli $mysqli_noda      Connection to MySQL DB.
+     * @param MDMysqli $mysqli_manticore Connection to Manticore DB.
+     * @param string   $databasename     Name of the main noda database.
+     *
     * @return void
     */
    public static function runFullSyncForBaseEntries(MDMysqli $mysqli_noda, MDMysqli $mysqli_manticore, string $databasename):void {
@ -189,6 +193,10 @@ final class NodaTagFulltextSyncManticore {
    /**
     * Synchronizes translated entries.
     *
+     * @param MDMysqli $mysqli_noda      Connection to MySQL DB.
+     * @param MDMysqli $mysqli_manticore Connection to Manticore DB.
+     * @param string   $databasename     Name of the main noda database.
+     *
     * @return void
     */
    public static function runFullSyncForTranslatedEntries(MDMysqli $mysqli_noda, MDMysqli $mysqli_manticore, string $databasename):void {
--- a/src/enums/NodaTimeAutotranslaterLocales.php
+++ b/src/enums/NodaTimeAutotranslaterLocales.php
@ -8,6 +8,7 @@

 enum NodaTimeAutotranslaterLocales {
    case ar;
+    case crh;
    case de;
    case en;
    case es;
@ -40,6 +41,7 @@ enum NodaTimeAutotranslaterLocales {

        return match($lang) {
            'ar' => static::ar,
+            'crh' => static::crh,
            'de' => static::de,
            'en' => static::en,
            'es' => static::es,
@ -73,6 +75,7 @@ enum NodaTimeAutotranslaterLocales {

        return match($this) {
            self::ar => 'ar_SY.utf8',
+            self::crh => 'uk_UA.utf8',
            self::de => 'de_DE.utf8',
            self::en => 'en_US.utf8',
            self::es => 'es_ES.utf8',
@ -108,6 +111,7 @@ enum NodaTimeAutotranslaterLocales {

        return match($this) {
            self::ar => 'ar-SY',
+            self::crh => 'uk-UA',
            self::de => 'de-DE',
            self::en => 'en-US',
            self::es => 'es-ES',
@ -143,6 +147,7 @@ enum NodaTimeAutotranslaterLocales {

        return match($this) {
            self::ar => '%s',
+            self::crh => '%s',
            self::de => '%s n. Chr.',
            self::en => '%s CE',
            self::es => '%s d.C.',
@ -176,6 +181,7 @@ enum NodaTimeAutotranslaterLocales {

        return match($this) {
            self::ar => '-%s',
+            self::crh => '%s рік до нашої ери',
            self::de => '%s v. Chr.',
            self::en => '%s BC',
            self::es => '%s a.C.',
@ -211,6 +217,7 @@ enum NodaTimeAutotranslaterLocales {

        return match($this) {
            self::ar => '%s',
+            self::crh => '%s',
            self::de => '%s',
            self::en => '%s',
            self::es => '%s',
@ -244,6 +251,7 @@ enum NodaTimeAutotranslaterLocales {

        return match($this) {
            self::ar => '%s-%s',
+            self::crh => '%s-%s',
            self::de => '%s-%s',
            self::en => '%s-%s',
            self::es => '%s-%s',
@ -279,6 +287,7 @@ enum NodaTimeAutotranslaterLocales {

        return match($this) {
            self::ar => '%s-',
+            self::crh => 'з %s року',
            self::de => 'Seit %s',
            self::en => 'Since %s',
            self::es => 'Desde %s',
@ -315,6 +324,7 @@ enum NodaTimeAutotranslaterLocales {

        return match($this) {
            self::ar => '%s-',
+            self::crh => 'після %s року',
            self::de => 'Nach %s',
            self::en => 'After %s',
            self::es => 'Despues de %s',
@ -350,6 +360,7 @@ enum NodaTimeAutotranslaterLocales {

        return match($this) {
            self::ar => '-%s',
+            self::crh => 'до %s року',
            self::de => 'Bis %s',
            self::en => 'Until %s',
            self::es => 'Hasta %s',
@ -384,6 +395,7 @@ enum NodaTimeAutotranslaterLocales {

        return match($this) {
            self::ar => 'القرن ال %s',
+            self::crh => '%s століття',
            self::de => '%s. Jahrhundert',
            self::en => '%s. century',
            self::es => 'Siglo %s',
@ -418,6 +430,7 @@ enum NodaTimeAutotranslaterLocales {

        return match($this) {
            self::ar => 'القرن ال %s-%s',
+            self::crh => '%s-%s століття',
            self::de => '%s.-%s. Jahrhundert',
            self::en => '%s.-%s. century',
            self::es => 'Siglo %s-%s',
@ -452,6 +465,7 @@ enum NodaTimeAutotranslaterLocales {

        return match($this) {
            self::ar => '%s-%s',
+            self::crh => '%s-ті роки',
            self::de => '%ser Jahre',
            self::en => '%ss',
            self::es => '%s-%s',
@ -486,6 +500,7 @@ enum NodaTimeAutotranslaterLocales {

        return match($this) {
            self::ar => '%s-%s',
+            self::crh => '%s-%s-ті роки',
            self::de => '%s-%ser Jahre',
            self::en => '%s-%ss',
            self::es => '%s-%s',
@ -521,6 +536,7 @@ enum NodaTimeAutotranslaterLocales {

        return match($this) {
            self::ar => '-%s',
+            self::crh => 'до %s року',
            self::de => 'Vor %s',
            self::en => 'Before %s',
            self::es => 'Antes de %s',
@ -558,6 +574,7 @@ enum NodaTimeAutotranslaterLocales {
            # self::be => '%d.%B.%Y',
            # self::bg => '%Y-%B-%d',
            # self::ca => '%d/%m/%Y',
+            self::crh => '%d.%m.%Y',
            # self::cs => '%d.%B.%Y',
            # self::da => '%d-%m-%Y',
            self::de => '%d.%m.%Y',
@ -618,6 +635,7 @@ enum NodaTimeAutotranslaterLocales {
            # self::be => '%d.%B.%Y',
            # self::bg => '%Y-%B-%d',
            # self::ca => '%d/%m/%Y',
+            self::crh => 'dd.MM.Y',
            # self::cs => '%d.%B.%Y',
            # self::da => '%d-%m-%Y',
            self::de => 'dd.MM.Y',
@ -679,6 +697,7 @@ enum NodaTimeAutotranslaterLocales {
            # self::bg => '%Y-%B',
            # self::ca => '%m/%Y',
            # self::cs => '%B.%Y',
+            self::crh => '%m %Y',
            # self::da => '%m-%Y',
            self::de => '%B %Y',
            # self::el => '%B %Y',
@ -735,6 +754,7 @@ enum NodaTimeAutotranslaterLocales {
            # self::bg => 'Y-MMMM',
            # self::ca => 'MM/Y',
            # self::cs => 'MMMM.Y',
+            self::crh => 'MMMM Y',
            # self::da => 'MM-Y',
            self::de => 'MMMM Y',
            # self::el => 'MMMM Y',
--- a/tests/NodaIDGetterTest.php
+++ b/tests/NodaIDGetterTest.php
@ -141,6 +141,35 @@ final class NodaIDGetterTest extends TestCase {

    }

+    /**
+     * Returns a test actor name and life dates.
+     *
+     * @return array<array{0: string, 1: integer, 2: string, 3: string}>
+     */
+    public static function persinstByNameAndLifeDatesProvider():array {
+
+        $mysqli = md_noda_mysqli_connect();
+        $result = $mysqli->do_read_query("SELECT `persinst_name_en`, `persinst_name`, `persinst_id`, `persinst_geburtsjahr`, `persinst_sterbejahr`
+            FROM `persinst`
+            WHERE INSTR(`persinst_name_en`, 'i')
+                AND `persinst_geburtsjahr` != ''
+                AND `persinst_sterbejahr` != ''
+            LIMIT 1");
+        if (!$cur = $result->fetch_row()) {
+            throw new Exception("Error");
+        }
+        $result->close();
+        $mysqli->close();
+
+        return [
+            'Persinst ID by name: ' . implode(' - ', $cur) => [
+                (string)$cur[0], (int)$cur[2], (string)$cur[3], (string)$cur[4],
+                (string)$cur[1], (int)$cur[2], (string)$cur[3], (string)$cur[4],
+            ]
+        ];
+
+    }
+
    /**
     * Test getting persinst by name works.
     *
@ -156,6 +185,25 @@ final class NodaIDGetterTest extends TestCase {

    }

+    /**
+     * Test getting persinst by name and life dates works.
+     *
+     * @param string  $name        Name of the entry.
+     * @param integer $expected_id Expected target ID.
+     * @param string  $birth_year  Birth year.
+     * @param string  $death_year  Death year.
+     *
+     * @return void
+     */
+    #[DataProvider('persinstByNameAndLifeDatesProvider')]
+    public function testGetPersinstIdByNameAndLifeDatesWorks(string $name, int $expected_id, string $birth_year, string $death_year):void {
+
+        self::assertEquals($expected_id,
+            NodaIDGetter::getPersinstIDByNamePlusYears($this->_mysqli, "de", $name, $birth_year, $death_year),
+            "Entry " . $name . " is not matched in exact lookup. Expected ID: " . $expected_id);
+
+    }
+
    // PersinstIDByRewrite

    /**
@ -411,7 +459,8 @@ final class NodaIDGetterTest extends TestCase {
        $mysqli = md_noda_mysqli_connect();
        $timeByRewriteSimple = self::_getNameAndIdFromDbQuery($mysqli, "SELECT `input_name`, `zeit_id`
            FROM `zeit_rewriting`
-            WHERE INSTR(`input_name`, 'i')");
+            WHERE INSTR(`input_name`, 'i')
+                AND `language` = 'de'");
        $mysqli->close();

        return [
--- a/tests/NodaNameSplitterTest.php
+++ b/tests/NodaNameSplitterTest.php
@ -6,12 +6,14 @@
 */
 declare(strict_types = 1);
 use PHPUnit\Framework\TestCase;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\Small;

 /**
 * This script contains tests for the actor name splitter.
- *
- * @covers \NodaNameSplitter
 */
+#[Small]
+#[CoversClass(\NodaIDGetter::class)]
 final class NodaNameSplitterTest extends TestCase {
    /**
     * Test to check whether the HTML page is correctly generated.
--- a/tests/NodaTimeAutotranslaterTest.php
+++ b/tests/NodaTimeAutotranslaterTest.php
@ -6,12 +6,14 @@
 */
 declare(strict_types = 1);
 use PHPUnit\Framework\TestCase;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\Medium;

 /**
 * This script contains tests for the automatic translation class for time names.
- *
- * @covers \NodaTimeAutotranslater
 */
+#[Medium]
+#[CoversClass(\NodaIDGetter::class)]
 final class NodaTimeAutotranslaterTest extends TestCase {
    /**
     * Test to check whether the HTML page is correctly generated.
@ -32,7 +34,7 @@ final class NodaTimeAutotranslaterTest extends TestCase {
            "zeit_zaehlzeit_tag"   => "01",
        ];
        $output = NodaTimeAutotranslater::getTranslations($timeInfo);
-        self::assertEquals($output["de"], "01.05.1920");
+        self::assertEquals("01.05.1920", $output["de"]);

    }

@ -671,4 +673,23 @@ final class NodaTimeAutotranslaterTest extends TestCase {
        self::assertEquals($output["de"], "Vor 01.12.1919");

    }
+
+    /**
+     * Test to check whether validating works.
+     *
+     * @author Joshua Ramon Enslin <joshua@museum-digital.de>
+     * @group  ValidOutput
+     * @small
+     *
+     * @return void
+     */
+    public function testValidation():void {
+
+        $output = [
+            'de' => '1.12.1920',
+        ];
+
+        self::assertFalse(NodaTimeAutotranslater::validateTranslations("1919", "1919", $output));
+
+    }
 }
--- a/tests/NodaTimeSplitterTest.php
+++ b/tests/NodaTimeSplitterTest.php
--- a/tests/NodaWikidataFetcherTest.php
+++ b/tests/NodaWikidataFetcherTest.php
@ -6,19 +6,28 @@
 */
 declare(strict_types = 1);
 use PHPUnit\Framework\TestCase;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\Medium;
+use PHPUnit\Framework\Attributes\DataProvider;
+
+require_once __DIR__ . '/../../MDMysqli/test_connections.conf.php';
+require_once __DIR__ . '/../src/NodaWikidataFetcherDisambiguationIsDisallowedException.php';

 /**
 * This script contains tests for the Wikidata fetcher.
- *
- * @covers \NodaWikidataFetcher
 */
+#[medium]
+#[CoversClass(\NodaWikidataFetcher::class)]
 final class NodaWikidataFetcherTest extends TestCase {
+
+    // Test for getting translations: Telugu
+    public const TEST_LANG = 'te';
+
    /**
     * Test to check whether the HTML page is correctly generated.
     *
     * @author Joshua Ramon Enslin <joshua@museum-digital.de>
     * @group  ValidOutput
-     * @small
     *
     * @return void
     */
@ -29,12 +38,39 @@ final class NodaWikidataFetcherTest extends TestCase {

    }

+    /**
+     * Data provider providing a Wikidata ID for a dedicated wikidata item for disambiguation pages.
+     *
+     * @return array<string, array{0: string}>
+     */
+    public static function disambiguationPageProvider():array {
+
+        return [
+            'Disambiguation page for "Mochi" - Q6916210' => ['Q6916210'],
+        ];
+
+    }
+
+    /**
+     * Throw error when attempting to load a dedicated wikidata entry for a disambiguation page.
+     *
+     * @param string $wikidata_id Wikidata ID.
+     *
+     * @return void
+     */
+    #[DataProvider('disambiguationPageProvider')]
+    public function testWikidataIdFromLinkFailsForDisambiguationPages(string $wikidata_id):void {
+
+        self::expectException(NodaWikidataFetcherDisambiguationIsDisallowedException::class);
+        NodaWikidataFetcher::getWikidataEntity($wikidata_id);
+
+    }
+
    /**
     * Test to check whether the HTML page is correctly generated.
     *
     * @author Joshua Ramon Enslin <joshua@museum-digital.de>
     * @group  ValidOutput
-     * @small
     *
     * @return void
     */
@ -47,9 +83,7 @@ final class NodaWikidataFetcherTest extends TestCase {
    /**
     * Test to check whether the HTML page is correctly generated.
     *
-     * @author Joshua Ramon Enslin <joshua@museum-digital.de>
     * @group  ValidOutput
-     * @small
     *
     * @return void
     */
@ -60,225 +94,208 @@ final class NodaWikidataFetcherTest extends TestCase {
    }

    /**
-     * Test for cleaning wikidata info.
+     * Data provider for an actor that has a wikidata link and a Telugu translation.
     *
-     * @author Joshua Ramon Enslin <joshua@museum-digital.de>
-     * @group  ValidOutput
-     * @small
-     *
-     * @return void
+     * @return array<string, array{0: int, 1: string}>
     */
-    public function testCleanWikidataInput():void {
+    public static function actorWithTlAndWikidataLinkProvider():array {

-        $testStr = '"<div class="mw-parser-output"><table class="infobox float-right toccolours toptextcells" style="margin: 0 0 1em 1em; width: 300px;" id="Vorlage_Infobox_Ort_in_der_Ukraine" summary="Infobox Ort in der Ukraine">
+        $mysqli = md_main_mysqli_connect();

-<tbody><tr>
-<td colspan="2" style="background-color:#AFD6FF; font-size:1.3em; font-weight:bold; text-align:center;">Werbowez (Kossiw)
-</td></tr>
-<tr>
-<td colspan="2" style="background-color:#FFC; font-size:1em; font-weight:bold; text-align:center;"><span lang="uk-Cyrl" class="Cyrl">Вербовець</span>
-</td></tr>
+        $result = $mysqli->do_read_query("SELECT `persinst_id`, `noda_nrinsource`
+            FROM `" . DATABASENAME_NODA . "`.`noda`
+            WHERE `noda_source` = 'Wikidata'
+                AND EXISTS (SELECT 1 FROM `" . DATABASENAME_NODA . "`.`persinst_translation`
+                    WHERE `persinst_translation`.`persinst_id` = `noda`.`persinst_id`
+                        AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "')");

+        if (!$cur = $result->fetch_row()) {
+            throw new Exception("Failed to identify an entry that has a wikidata entry and a translation for language " . self::TEST_LANG);
+        }
+        $result->close();
+        $mysqli->close();

-<tr style="height:120px; background-color:#FFF;">
-<td style="width: 130px; text-align:center;"><span typeof="mw:File"><a href="/wiki/Datei:Coats_of_arms_of_None.svg" class="mw-file-description" title="Wappen fehlt"><img alt="Wappen fehlt" src="//upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Coats_of_arms_of_None.svg/100px-Coats_of_arms_of_None.svg.png" decoding="async" width="100" height="120" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Coats_of_arms_of_None.svg/150px-Coats_of_arms_of_None.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Coats_of_arms_of_None.svg/200px-Coats_of_arms_of_None.svg.png 2x" data-file-width="125" data-file-height="150" /></a></span>
-</td>
-<td style="width: 170px; text-align:center;"><table class="centered" style="background-color: #f9f9f9; border: none; border-collapse: collapse; width: 1px;">
-<tbody><tr><td style="border: none; padding: 0; text-align: center;"><div style="position: relative; z-index: 0; padding: 0; display: inline-block; width: -webkit-max-content; width: -moz-max-content; width: max-content; border: none;"><figure class="mw-halign-center noviewer notpageimage" typeof="mw:File"><a href="/wiki/Datei:Ukraine_adm_location_map.svg" class="mw-file-description" title="Werbowez (Kossiw) (Ukraine)"><img alt="Werbowez (Kossiw) (Ukraine)" src="//upload.wikimedia.org/wikipedia/commons/thumb/7/78/Ukraine_adm_location_map.svg/180px-Ukraine_adm_location_map.svg.png" decoding="async" width="180" height="121" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/7/78/Ukraine_adm_location_map.svg/270px-Ukraine_adm_location_map.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/7/78/Ukraine_adm_location_map.svg/360px-Ukraine_adm_location_map.svg.png 2x" data-file-width="1546" data-file-height="1038" /></a><figcaption>Werbowez (Kossiw) (Ukraine)</figcaption></figure><div style="position:absolute; top:50.7%; left:18.9%; height:0; width:0;"><div style="position:relative;z-index:100;left:-4px;top:-4px;width:8px;height:8px;line-height:0px;"><span typeof="mw:File"><a href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&amp;language=de&amp;params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)&amp;title=Werbowez+%28Kossiw%29" title="Werbowez (Kossiw) (48° 20′ 32″ N, 25° 8′ 0″O)"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/8px-ButtonRed.svg.png" decoding="async" width="8" height="8" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/12px-ButtonRed.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/16px-ButtonRed.svg.png 2x" data-file-width="480" data-file-height="480" /></a></span></div>
-<table style="font-size:90%; border:none; background-color:transparent; border-collapse:collapse; line-height:1em; position:absolute; width:6em; margin: 0 .2em; text-align:left; left:1px; bottom:1px;"><tbody><tr><td style="border:none; vertical-align:middle;"><span style="position:relative; z-index:9; background-color:none;">Werbowez (Kossiw) </span></td></tr></tbody></table></div></div></td></tr>
-</tbody></table>
-</td></tr>
-<tr style="background-color:#AFD6FF;">
-<th colspan="2">Basisdaten
-</th></tr>
-<tr>
-<td><a href="/wiki/Liste_der_Oblaste_der_Ukraine" title="Liste der Oblaste der Ukraine">Oblast</a>:</td>
-<td><a href="/wiki/Oblast_Iwano-Frankiwsk" title="Oblast Iwano-Frankiwsk">Oblast Iwano-Frankiwsk</a>
-</td></tr>
-<tr>
-<td><a href="/wiki/Liste_der_Rajone_der_Ukraine" title="Liste der Rajone der Ukraine">Rajon</a>:</td>
-<td><a href="/wiki/Rajon_Kossiw" title="Rajon Kossiw">Rajon Kossiw</a>
-</td></tr>
-<tr>
-<td><a href="/wiki/H%C3%B6he_%C3%BCber_dem_Meeresspiegel" title="Höhe über dem Meeresspiegel">Höhe</a>:</td>
-<td>369 m
-</td></tr>
-<tr>
-<td><a href="/wiki/Fl%C3%A4cheninhalt" title="Flächeninhalt">Fläche</a>:</td>
-<td>18,77 <a href="/wiki/Quadratmeter#Quadratkilometer" title="Quadratmeter">km²</a>
-</td></tr>
-<tr>
-<td><a href="/wiki/Einwohner" title="Einwohner">Einwohner</a>:</td>
-<td>3.395 <small><i>(2001)</i></small>
-</td></tr>
-<tr>
-<td><a href="/wiki/Bev%C3%B6lkerungsdichte" title="Bevölkerungsdichte">Bevölkerungsdichte</a>:
-</td>
-<td>181 Einwohner je km²
-</td></tr>
-<tr>
-<td><a href="/wiki/Postleitzahl" title="Postleitzahl">Postleitzahlen</a>:</td>
-<td>78605
-</td></tr>
-<tr>
-<td><a href="/wiki/Telefonvorwahl" title="Telefonvorwahl">Vorwahl</a>:</td>
-<td>+380 3478
-</td></tr>
-<tr>
-<td><a href="/wiki/Geographische_Koordinaten" title="Geographische Koordinaten">Geographische Lage</a>:</td>
-<td><span id="text_coordinates" class="coordinates plainlinks-print"><a class="external text" href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&amp;language=de&amp;params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)"><span title="Breitengrad">48°&#160;21′&#160;<abbr title="Nord">N</abbr></span>, <span title="Längengrad">25°&#160;8′&#160;<abbr title="Ost">O</abbr></span></a></span><span class="geo noexcerpt" style="display:none"><span class="body"></span><span class="latitude">48.342222222222</span><span class="longitude">25.133333333333</span><span class="elevation"></span></span><span id="coordinates" class="coordinates noprint"><span title="Koordinatensystem WGS84">Koordinaten: </span><a class="external text" href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&amp;language=de&amp;params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)"><span title="Breitengrad">48°&#160;20′&#160;32″&#160;<abbr title="Nord">N</abbr></span>, <span title="Längengrad">25°&#160;8′&#160;0″&#160;<abbr title="Ost">O</abbr></span></a></span>
-</td></tr>
-<tr>
-<td><a href="/wiki/KATOTTH" title="KATOTTH">KATOTTH</a>:
-</td>
-<td>UA26100010030094355
-</td></tr>
-<tr>
-<td><a href="/wiki/KOATUU" title="KOATUU">KOATUU</a>:
-</td>
-<td>2623682401
-</td></tr>
-<tr>
-<td><a href="/wiki/Verwaltungsgliederung_der_Ukraine" title="Verwaltungsgliederung der Ukraine">Verwaltungsgliederung</a>:
-</td>
-<td>1 Dorf
-</td></tr>
-
-
-
-
-<tr>
-<td>Adresse:
-</td>
-<td>вул. Миру, буд. 15<br />78605 с. Вербовець
-</td></tr>
-<tr>
-<td><a href="/wiki/Website" title="Website">Website</a>:
-</td>
-<td><a rel="nofollow" class="external text" href="http://verbovets.kosiv.net/">Offizielle Webseite</a>
-</td></tr>
-<tr>
-<td colspan="2" style="padding-bottom:3px; text-align:center; border-bottom:1px solid #bbb; border-top:1px solid #bbb;"><a rel="nofollow" class="external text" href="http://w1.c1.rada.gov.ua/pls/z7503/A005?rdat1=31.08.2023&amp;rf7571=13801">Statistische Informationen</a>
-</td></tr>
-<tr>
-<td colspan="2" style="padding-bottom:3px; text-align:center; border-bottom:1px solid #bbb; border-top:1px solid #bbb;">
-<table class="centered" style="background-color: #f9f9f9; border: none; border-collapse: collapse; width: 1px;">
-<tbody><tr><td style="border: none; padding: 0; text-align: center;"><div style="position: relative; z-index: 0; padding: 0; display: inline-block; width: -webkit-max-content; width: -moz-max-content; width: max-content; border: none;"><figure class="mw-halign-center noviewer notpageimage" typeof="mw:File"><a href="/wiki/Datei:Ivano-Frankivsk_location_map.svg" class="mw-file-description" title="Werbowez (Kossiw) (Oblast Iwano-Frankiwsk)"><img alt="Werbowez (Kossiw) (Oblast Iwano-Frankiwsk)" src="//upload.wikimedia.org/wikipedia/commons/thumb/8/8e/Ivano-Frankivsk_location_map.svg/290px-Ivano-Frankivsk_location_map.svg.png" decoding="async" width="290" height="347" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/8/8e/Ivano-Frankivsk_location_map.svg/435px-Ivano-Frankivsk_location_map.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/8/8e/Ivano-Frankivsk_location_map.svg/580px-Ivano-Frankivsk_location_map.svg.png 2x" data-file-width="533" data-file-height="637" /></a><figcaption>Werbowez (Kossiw) (Oblast Iwano-Frankiwsk)</figcaption></figure><div style="position:absolute; top:63.3%; left:74.4%; height:0; width:0;"><div style="position:relative;z-index:100;left:-4px;top:-4px;width:8px;height:8px;line-height:0px;"><span typeof="mw:File"><a href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&amp;language=de&amp;params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)&amp;title=Werbowez+%28Kossiw%29" title="Werbowez (Kossiw) (48° 20′ 32″ N, 25° 8′ 0″O)"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/8px-ButtonRed.svg.png" decoding="async" width="8" height="8" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/12px-ButtonRed.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/16px-ButtonRed.svg.png 2x" data-file-width="480" data-file-height="480" /></a></span></div>
-<table style="font-size:90%; border:none; background-color:transparent; border-collapse:collapse; line-height:1em; position:absolute; width:6em; margin: 0 .2em; text-align:right; right:1px; bottom:1px;"><tbody><tr><td style="border:none; vertical-align:middle;"><span style="position:relative; z-index:9; background-color:none;">Werbowez (Kossiw) </span></td></tr></tbody></table></div></div></td></tr>
-</tbody></table><span style="display:none;"><a href="/w/index.php?title=Vorlage:Positionskarte_ISO_3166-2/Wartung/noregion&amp;action=edit&amp;redlink=1" class="new" title="Vorlage:Positionskarte ISO 3166-2/Wartung/noregion (Seite nicht vorhanden)">i1</a></span>
-</td></tr></tbody></table>
-<p><b>Werbowez</b> (<b><span style="font-style:normal;font-weight:normal"><a href="/wiki/Ukrainische_Sprache" title="Ukrainische Sprache">ukrainisch</a></span> <span lang="uk-Cyrl" class="Cyrl" style="font-style:normal">Вербовець</span></b>; <span style="font-style:normal;font-weight:normal"><a href="/wiki/Russische_Sprache" title="Russische Sprache">russisch</a></span> <span lang="ru-Cyrl" class="Cyrl" style="font-style:normal">Вербовец</span>, <a href="/wiki/Polnische_Sprache" title="Polnische Sprache">polnisch</a> <span lang="pl" style="font-style:italic;font-weight:normal">Wierzbowiec</span>; <span style="font-style:normal;font-weight:normal"><a href="/wiki/Rum%C3%A4nische_Sprache" title="Rumänische Sprache">rumänisch</a></span> <span lang="ro-Latn" style="font-style:italic">Verboveț</span>) ist ein <a href="/wiki/Dorf" title="Dorf">Dorf</a> in der <a href="/wiki/Ukraine" title="Ukraine">ukrainischen</a> <a href="/wiki/Oblast_Iwano-Frankiwsk" title="Oblast Iwano-Frankiwsk">Oblast Iwano-Frankiwsk</a> mit etwa 3400 Einwohnern (2001).<sup id="cite_ref-1" class="reference"><a href="#cite_note-1">&#91;1&#93;</a></sup> 
-</p>
-<figure class="mw-default-size mw-halign-left" typeof="mw:File/Thumb"><a href="/wiki/Datei:%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/220px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" decoding="async" width="220" height="147" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/330px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/440px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 2x" data-file-width="5184" data-file-height="3456" /></a><figcaption>Blick auf das Dorf</figcaption></figure>
-<p>Das um 1650 erstmals schriftlich erwähnte Dorf<sup id="cite_ref-2" class="reference"><a href="#cite_note-2">&#91;2&#93;</a></sup> liegt im Osten der <a href="/wiki/Historische_Landschaft" title="Historische Landschaft">historischen Landschaft</a> <a href="/wiki/Galizien" title="Galizien">Galizien</a> am Ufer der <a href="/w/index.php?title=Rybnyzja_(Fluss)&amp;action=edit&amp;redlink=1" class="new" title="Rybnyzja (Fluss) (Seite nicht vorhanden)">Rybnyzja</a> (<span lang="uk-Cyrl" class="Cyrl">Рибниця</span>), einem 56&#160;km langen Nebenfluss des <a href="/wiki/Pruth" title="Pruth">Pruth</a> 7&#160;km nordöstlich vom Rajonzentrum <a href="/wiki/Kossiw" title="Kossiw">Kossiw</a> und 95&#160;km südlich vom Oblastzentrum <a href="/wiki/Iwano-Frankiwsk" title="Iwano-Frankiwsk">Iwano-Frankiwsk</a>. Südlich der Ortschaft verläuft die <a href="/wiki/Territorialstra%C3%9Fe" title="Territorialstraße">Territorialstraße</a> <i>T–09–09</i>.
-</p><p>Am 12. Juni 2020 wurde das Dorf ein Teil der neu gegründeten <i>Stadtgemeinde <a href="/wiki/Kossiw" title="Kossiw">Kossiw</a></i> im <a href="/wiki/Rajon_Kossiw" title="Rajon Kossiw">Rajon Kossiw</a><sup id="cite_ref-3" class="reference"><a href="#cite_note-3">&#91;3&#93;</a></sup>, bis dahin bildete es zusammen mit dem Dorf <a href="/w/index.php?title=Staryj_Kossiw&amp;action=edit&amp;redlink=1" class="new" title="Staryj Kossiw (Seite nicht vorhanden)">Staryj Kossiw</a> (<span lang="uk-Cyrl" class="Cyrl">Старий Косів</span>) die <i>Landratsgemeinde Werbowez</i> (Вербовецька сільська рада/<i>Werbowezka silska rada</i>) im Osten des Rajons.
-</p>
-<ol class="references">
-<li id="cite_note-1"><span class="mw-cite-backlink"><a href="#cite_ref-1">↑</a></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://w1.c1.rada.gov.ua/pls/z7503/A005?rf7571=13801">Ortswebseite</a> auf der offiziellen Webpräsenz der <a href="/wiki/Werchowna_Rada" title="Werchowna Rada">Werchowna Rada</a>; abgerufen am 14. November 2017 (ukrainisch)</span>
-</li>
-<li id="cite_note-2"><span class="mw-cite-backlink"><a href="#cite_ref-2">↑</a></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://ukrssr.com.ua/ifrank/kosivskiy/verbovets-kosivskiy-rayon-ivano-frankivska-oblast">Ortsgeschichte Werbowez</a> in der <a href="/wiki/Geschichte_der_St%C3%A4dte_und_D%C3%B6rfer_der_Ukrainischen_SSR" title="Geschichte der Städte und Dörfer der Ukrainischen SSR">Geschichte der Städte und Dörfer der Ukrainischen SSR</a>; abgerufen am 14. November 2017 (ukrainisch)</span>
-</li>
-<li id="cite_note-3"><span class="mw-cite-backlink"><a href="#cite_ref-3">↑</a></span> <span class="reference-text"><a rel="nofollow" class="external text" href="https://zakon.rada.gov.ua/laws/show/714-2020-%D1%80#Text">Кабінет Міністрів України Розпорядження від 12 червня 2020 р. № 714-р "Про визначення адміністративних центрів та затвердження територій територіальних громад Івано-Франківської області"</a></span>
-</li>
-</ol>
-<!-- 
-NewPP limit report
-Parsed by mw1396
-Cached time: 20230831121013
-Cache expiry: 42588
-Reduced expiry: true
-Complications: []
-CPU time usage: 0.219 seconds
-Real time usage: 0.274 seconds
-Preprocessor visited node count: 6414/1000000
-Post‐expand include size: 33611/2097152 bytes
-Template argument size: 12317/2097152 bytes
-Highest expansion depth: 34/100
-Expensive parser function count: 9/500
-Unstrip recursion depth: 0/20
-Unstrip post‐expand size: 1476/5000000 bytes
-Lua time usage: 0.080/10.000 seconds
-Lua memory usage: 3398800/52428800 bytes
-Number of Wikibase entities loaded: 0/400
-->
-<!--
-Transclusion expansion time report (%,ms,calls,template)
-100.00%  239.600      1 -total
- 93.55%  224.134      1 Vorlage:Infobox_Ort_in_der_Ukraine
- 50.81%  121.740      2 Vorlage:Positionskarte
- 49.72%  119.121      2 Vorlage:Positionskarte+
- 44.41%  106.401      2 Vorlage:Positionskarte~
- 33.28%   79.732      2 Vorlage:Positionskarte~*
- 25.69%   61.558      3 Vorlage:Lang
- 19.41%   46.499      1 Vorlage:Positionskarte_ISO_3166-2
- 16.90%   40.486     12 Vorlage:CoordinateLONG
- 14.02%   33.586     10 Vorlage:CoordinateLAT
-->
-</div>" - (de.wikipedia.org 31.08.2023)';
-
-        $output = NodaWikidataFetcher::cleanWikidataInput($testStr);
-        $expected = 'Werbowez (ukrainisch Вербовець; russisch Вербовец, polnisch Wierzbowiec; rumänisch Verboveț) ist ein Dorf in der ukrainischen Oblast Iwano-Frankiwsk mit etwa 3400 Einwohnern (2001).';
-        self::assertTrue(
-            str_starts_with($output, $expected),
-            "Start of parsed Wikipedia text should be:" . PHP_EOL . PHP_EOL . $expected . PHP_EOL . PHP_EOL . 'Real start text is: ' . PHP_EOL . PHP_EOL . substr($output, 0, 250)
-        );
-
-        $output = NodaWikidataFetcher::cleanWikidataInput('<div class="mw-parser-output"><figure class="mw-default-size mw-halign-right" typeof="mw:File/Thumb"><a href="/wiki/File:%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/220px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" decoding="async" width="220" height="147" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/330px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/440px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 2x" data-file-width="5184" data-file-height="3456" /></a><figcaption></figcaption></figure>
-<p><span style="font-size: small;"><span id="coordinates"><a href="/wiki/%E5%9C%B0%E7%90%86%E5%9D%90%E6%A0%87" class="mw-redirect" title="地理坐标">坐标</a>：<style data-mw-deduplicate="TemplateStyles:r65292569">.mw-parser-output .geo-default,.mw-parser-output .geo-dms,.mw-parser-output .geo-dec{display:inline}.mw-parser-output .geo-nondefault,.mw-parser-output .geo-multi-punct{display:none}.mw-parser-output .longitude,.mw-parser-output .latitude{white-space:nowrap}</style><span class="plainlinks nourlexpansion"><a class="external text" href="//geohack.toolforge.org/geohack.php?language=zh&amp;pagename=%E9%9F%8B%E7%88%BE%E5%8D%9A%E9%9F%8B%E9%BD%8A_(%E7%A7%91%E7%B4%A2%E5%A4%AB%E5%8D%80)&amp;params=48_20_32_N_25_8_0_E_scale:30000"><span class="geo-default"><span class="geo-dms" title="此地的地图、航拍照片和其他数据"><span class="latitude">48°20′32″N</span> <span class="longitude">25°8′0″E</span></span></span><span class="geo-multi-punct">&#xfeff; / &#xfeff;</span><span class="geo-nondefault"><span class="geo-dec" title="此地的地图、航拍照片和其他数据">48.34222°N 25.13333°E</span><span style="display:none">&#xfeff; / <span class="geo">48.34222; 25.13333</span></span></span></a></span></span></span>
-</p><p><b>韋爾博韋齊</b>（<a href="/wiki/%E7%83%8F%E5%85%8B%E8%98%AD%E8%AA%9E" class="mw-redirect" title="烏克蘭語">烏克蘭語</a>：<span lang="uk">Вербовець</span>），是<a href="/wiki/%E7%83%8F%E5%85%8B%E8%98%AD" class="mw-redirect" title="烏克蘭">烏克蘭</a>的村落，位於該國西部<a href="/wiki/%E4%BC%8A%E4%B8%87%E8%AF%BA-%E5%BC%97%E5%85%B0%E7%A7%91%E5%A4%AB%E6%96%AF%E5%85%8B%E5%B7%9E" title="伊万诺-弗兰科夫斯克州">伊萬諾-弗蘭科夫斯克州</a>，由<a href="/wiki/%E7%A7%91%E7%B4%A2%E5%A4%AB%E5%8D%80" class="mw-redirect" title="科索夫區">科索夫區</a>負責管轄，始建於1456年，面積18.77平方公里，2001年人口3,395。
-</p>
-<!-- 
-NewPP limit report
-Parsed by mw1412
-Cached time: 20230831132208
-Cache expiry: 1814400
-Reduced expiry: false
-Complications: []
-CPU time usage: 0.147 seconds
-Real time usage: 0.186 seconds
-Preprocessor visited node count: 48/1000000
-Post‐expand include size: 2084/2097152 bytes
-Template argument size: 0/2097152 bytes
-Highest expansion depth: 3/100
-Expensive parser function count: 1/500
-Unstrip recursion depth: 0/20
-Unstrip post‐expand size: 362/5000000 bytes
-Lua time usage: 0.110/10.000 seconds
-Lua memory usage: 15402517/52428800 bytes
-Number of Wikibase entities loaded: 1/400
-->
-<!--
-Transclusion expansion time report (%,ms,calls,template)
-100.00%  152.989      1 -total
- 70.07%  107.204      1 Template:Lang-uk
- 29.62%   45.313      1 Template:Coord
-->
-</div>');
-        $expected = '韋爾博韋齊（烏克蘭語：Вербовець），是烏克蘭的村落，位於該國西部伊萬諾-弗蘭科夫斯克州，由科索夫區負責管轄，始建於1456年，面積18.77平方公里，2001年人口3,3';
-        self::assertTrue(
-            str_starts_with($output, $expected),
-            "Start of parsed Wikipedia text should be:" . PHP_EOL . PHP_EOL . $expected . PHP_EOL . PHP_EOL . 'Real start text is: ' . PHP_EOL . PHP_EOL . substr($output, 0, 250)
-        );
+        return [
+            'Actor with wikidata and translation' => [$cur[0], $cur[1]],
+        ];

    }

    /**
-     * Test for cleaning wikidata info.
+     * Data provider for a place that has a wikidata link and a Telugu translation.
     *
-     * @author Joshua Ramon Enslin <joshua@museum-digital.de>
-     * @group  ValidOutput
-     * @small
+     * @return array<string, array{0: int, 1: string}>
+     */
+    public static function placeWithTlAndWikidataLinkProvider():array {
+
+        $mysqli = md_main_mysqli_connect();
+
+        $result = $mysqli->do_read_query("SELECT `ort_id`, `noda_nrinsource`
+            FROM `" . DATABASENAME_NODA . "`.`noda_orte`
+            WHERE `noda_source` = 'Wikidata'
+                AND EXISTS (SELECT 1 FROM `" . DATABASENAME_NODA . "`.`ort_translation`
+                    WHERE `ort_translation`.`ort_id` = `noda_orte`.`ort_id`
+                        AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "')");
+
+        if (!$cur = $result->fetch_row()) {
+            throw new Exception("Failed to identify an entry that has a wikidata entry and a translation for language " . self::TEST_LANG);
+        }
+        $result->close();
+        $mysqli->close();
+
+        return [
+            'Place with wikidata and translation' => [$cur[0], $cur[1]],
+        ];
+
+    }
+
+    /**
+     * Data provider for an tag that has a wikidata link and a Telugu translation.
+     *
+     * @return array<string, array{0: int, 1: string}>
+     */
+    public static function tagWithTlAndWikidataLinkProvider():array {
+
+        $mysqli = md_main_mysqli_connect();
+
+        $result = $mysqli->do_read_query("SELECT `tag_id`, `noda_nrinsource`
+            FROM `" . DATABASENAME_NODA . "`.`noda_tag`
+            WHERE `noda_source` = 'Wikidata'
+                AND EXISTS (SELECT 1 FROM `" . DATABASENAME_NODA . "`.`tag_translation`
+                    WHERE `tag_translation`.`tag_id` = `noda_tag`.`tag_id`
+                        AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "')");
+
+        if (!$cur = $result->fetch_row()) {
+            throw new Exception("Failed to identify an entry that has a wikidata entry and a translation for language " . self::TEST_LANG);
+        }
+        $result->close();
+        $mysqli->close();
+
+        return [
+            'Tag with wikidata and translation' => [$cur[0], $cur[1]],
+        ];
+
+    }
+
+    /**
+     * Test for fetching and recording translations for an actor.
+     *
+     * @param integer $actor_id    Actor ID.
+     * @param string  $wikidata_id Wikidata ID.
     *
     * @return void
     */
-    public function testCleanWikidataInputWithoutHtml():void {
+    #[DataProvider('actorWithTlAndWikidataLinkProvider')]
+    public function testFetchingTranslationForPersinst(int $actor_id, string $wikidata_id):void {

-        $output = NodaWikidataFetcher::cleanWikidataInput('Werbowez (ukrainisch Вербовець; russisch Вербовец, polnisch Wierzbowiec; rumänisch Verboveț) ist ein Dorf in der ukrainischen Oblast Iwano-Frankiwsk mit etwa 3400 Einwohnern (2001).[1]');
-        $expected = 'Werbowez (ukrainisch Вербовець; russisch Вербовец, polnisch Wierzbowiec; rumänisch Verboveț) ist ein Dorf in der ukrainischen Oblast Iwano-Frankiwsk mit etwa 3400 Einwohnern (2001).';
-        self::assertTrue(
-            str_starts_with($output, $expected),
-            "Start of parsed Wikipedia text should be:" . PHP_EOL . PHP_EOL . $expected . PHP_EOL . PHP_EOL . 'Real start text is: ' . PHP_EOL . PHP_EOL . substr($output, 0, 250)
-        );
+        $mysqli = md_main_mysqli_connect();
+
+        $mysqli->do_update_query("DELETE FROM `" . DATABASENAME_NODA . "`.`persinst_translation`
+            WHERE `persinst_id` = " . $actor_id . "
+                AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'");
+
+        self::assertEquals(0, MDMysqliTesting::queryNumRows($mysqli, "
+            FROM `" . DATABASENAME_NODA . "`.`persinst_translation`
+            WHERE `persinst_id` = " . $actor_id . "
+                AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
+
+        $data = NodaWikidataFetcher::getWikidataEntity($wikidata_id);
+        $fetcher = new NodaWikidataFetcher($mysqli);
+        $fetcher->getWikidataTranslationsForPersinst($data, $actor_id, [self::TEST_LANG]);
+
+        self::assertEquals(1, MDMysqliTesting::queryNumRows($mysqli, "
+            FROM `" . DATABASENAME_NODA . "`.`persinst_translation`
+            WHERE `persinst_id` = " . $actor_id . "
+                AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
+
+        $mysqli->close();
+
+    }
+
+    /**
+     * Test for fetching and recording translations for an place.
+     *
+     * @param integer $place_id    Place ID.
+     * @param string  $wikidata_id Wikidata ID.
+     *
+     * @return void
+     */
+    #[DataProvider('placeWithTlAndWikidataLinkProvider')]
+    public function testFetchingTranslationForPlace(int $place_id, string $wikidata_id):void {
+
+        $mysqli = md_main_mysqli_connect();
+
+        $mysqli->do_update_query("DELETE FROM `" . DATABASENAME_NODA . "`.`ort_translation`
+            WHERE `ort_id` = " . $place_id . "
+                AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'");
+
+        self::assertEquals(0, MDMysqliTesting::queryNumRows($mysqli, "
+            FROM `" . DATABASENAME_NODA . "`.`ort_translation`
+            WHERE `ort_id` = " . $place_id . "
+                AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
+
+        $data = NodaWikidataFetcher::getWikidataEntity($wikidata_id);
+        $fetcher = new NodaWikidataFetcher($mysqli);
+        $fetcher->getWikidataTranslationsForPlace($data, $place_id, [self::TEST_LANG]);
+
+        self::assertEquals(1, MDMysqliTesting::queryNumRows($mysqli, "
+            FROM `" . DATABASENAME_NODA . "`.`ort_translation`
+            WHERE `ort_id` = " . $place_id . "
+                AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
+
+        $mysqli->close();
+
+    }
+
+    /**
+     * Test for fetching and recording translations for an tag.
+     *
+     * @param integer $tag_id      Tag ID.
+     * @param string  $wikidata_id Wikidata ID.
+     *
+     * @return void
+     */
+    #[DataProvider('tagWithTlAndWikidataLinkProvider')]
+    public function testFetchingTranslationForTag(int $tag_id, string $wikidata_id):void {
+
+        $mysqli = md_main_mysqli_connect();
+
+        $mysqli->do_update_query("DELETE FROM `" . DATABASENAME_NODA . "`.`tag_translation`
+            WHERE `tag_id` = " . $tag_id . "
+                AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'");
+
+        self::assertEquals(0, MDMysqliTesting::queryNumRows($mysqli, "
+            FROM `" . DATABASENAME_NODA . "`.`tag_translation`
+            WHERE `tag_id` = " . $tag_id . "
+                AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
+
+        $data = NodaWikidataFetcher::getWikidataEntity($wikidata_id);
+        $fetcher = new NodaWikidataFetcher($mysqli);
+        $fetcher->getWikidataTranslationsForTag($data, $tag_id, [self::TEST_LANG]);
+
+        self::assertEquals(1, MDMysqliTesting::queryNumRows($mysqli, "
+            FROM `" . DATABASENAME_NODA . "`.`tag_translation`
+            WHERE `tag_id` = " . $tag_id . "
+                AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
+
+        $mysqli->close();
+
+    }
+
+    /**
+     * Test that fetching translation from Wikidata returns the title of the wikipedia page,
+     * not the wikidata title.
+     *
+     * @return void
+     */
+    public function testListTranslationsFromWikidataWikipediaReturnsWikipediaTitle():void {
+
+        $data = NodaWikidataFetcher::getWikidataEntity("Q33550");
+
+        $output = NodaWikidataFetcher::listTranslationsFromWikidataWikipedia(["de"], $data);
+        self::assertNotEmpty($output['de']);
+
+        self::assertEquals("Friedrich II. (Preußen)", $output['de']['label']);

    }
 }
Author	SHA1	Message	Date
Joshua Ramon Enslin	119f216907	Merge branch 'master' of gitea:museum-digital/MDNodaHelpers	2025-06-08 17:20:24 +02:00
Joshua Ramon Enslin	25668b7b16	Ping and reconnect DB in fulltext sync for actors fulltext index	2025-06-08 17:19:47 +02:00
Joshua Ramon Enslin	8a31cf216e	Add shortened 100x A to list of blacklisted tags	2025-05-22 16:25:27 +02:00
Joshua Ramon Enslin	ff474341ed	Add iconclass terms BB, CC, DD, to blacklist	2025-05-08 16:18:05 +02:00
Joshua Ramon Enslin	1051e10732	Prevent ambigious splitting of [0-9]{4}-[0-9]{2}	2025-05-06 22:32:00 +02:00
Joshua Ramon Enslin	057cac0f1b	Ensure 1903/1904 cannot be split	2025-05-05 17:05:47 +02:00
Joshua Ramon Enslin	0053fbe030	Support splitting times like "1. Hälfte des 19. Jahrhunderts"	2025-04-28 17:00:32 +02:00
Joshua Ramon Enslin	7a2856ffad	Split times in more cases (300-20 BC, 300-4000 CE)	2025-04-08 15:18:32 +02:00
Joshua Ramon Enslin	00638152cf	Prevent splitting of non-existing exact dates (e.g. 31.04.XXXX) Close #35	2025-04-08 03:48:04 +02:00
Joshua Ramon Enslin	dba60dbce6	Fix order of split days and months within a single year BCE Close #32	2025-04-07 18:32:14 +02:00
Joshua Ramon Enslin	f84fe1bca5	Fix type error / reference to values now not consistently existing anymore	2025-04-06 22:56:36 +02:00
Joshua Ramon Enslin	423959ac94	Stop early if autotranslation cannot proceed after validation	2025-04-05 00:11:03 +02:00
Joshua Ramon Enslin	e8edb4a459	Time splitter: Handle first/second half Close #31	2025-04-05 00:09:39 +02:00
Joshua Ramon Enslin	8491b62a83	Validate against time errors in autogenerating translations for times Close #30	2025-04-04 20:03:59 +02:00
Joshua Ramon Enslin	bb2b1c2c32	Update NodaGroup	2025-03-13 00:30:33 +01:00
Joshua Ramon Enslin	5054d3c62f	Use more rigurous trimming in NodaConsolidatedNamesForPersinst	2025-03-10 04:18:00 +01:00
Joshua Ramon Enslin	beba838c0d	Correctly handle multibype hyphens in XXXX-XXXX	2025-03-10 04:13:59 +01:00
Joshua Ramon Enslin	54dd958073	See before	2025-03-10 04:05:00 +01:00
Joshua Ramon Enslin	5b99304b5c	Accept an additional type of hyphen / dash in time splitting	2025-03-10 03:59:44 +01:00
Joshua Ramon Enslin	5cce98f15b	Extend tests	2025-03-10 03:20:46 +01:00
Joshua Ramon Enslin	5036c77f32	Extend test for getting actor ID by life dates + name	2025-03-10 02:18:28 +01:00
Joshua Ramon Enslin	e95415be8f	Add test for getting actor ID by name with life dates	2025-03-10 01:48:09 +01:00
Joshua Ramon Enslin	5192781494	Use Wikipedia API for getting descriptions from Wikipedia rather than parsing HTML in Wikidata fetcher Thanks @awinkler	2025-03-09 02:08:26 +01:00
Joshua Ramon Enslin	d9d9f7fcdc	Continue refactoring tests for time splitter to run provider-based	2025-02-24 14:02:42 +01:00
Joshua Ramon Enslin	dbfa0df17f	Begin restructuring NodaTimeSplitterTest to use data providers	2025-02-21 10:32:07 +01:00
Joshua Ramon Enslin	3409ec7afe	Begin adding autotranslation language CRH / Crimean Tatar Some formatting is still unclear. See https://forum.museum-digital.info/d/52-additional-languages-for-translations-crimean-tatar/9	2025-02-18 17:51:36 +01:00
Joshua Ramon Enslin	27ac3f255a	Minor typing improvements	2025-02-15 13:36:50 +01:00
Joshua Ramon Enslin	9d7d53a858	Disallow fetching from Wikidata disambiguation pages Close #23	2025-02-13 22:37:17 +01:00
Joshua Ramon Enslin	28f6db67ff	Disable XML error warnings when parsing unclean inputs from Wikidata	2025-02-13 21:48:07 +01:00
Joshua Ramon Enslin	2f3bc5f2fa	Prefer wikipedia page titles over wikidata labels Close #28	2025-02-13 21:38:13 +01:00
Joshua Ramon Enslin	39362f537a	Merge branch 'master' of gitea:museum-digital/MDNodaHelpers	2025-02-13 17:19:43 +01:00
Joshua Ramon Enslin	de0357473a	Make constant for test language in NodaWikidataFetcherTest public, allowing reuse	2025-02-13 17:19:06 +01:00
Joshua Ramon Enslin	ef43270fb2	Map suffixes material and technique to their respective tag relation types	2025-02-13 14:04:38 +01:00
Joshua Ramon Enslin	338e09f001	Add kannada to list of languages fetched from wikidata	2025-02-13 13:10:45 +01:00
Joshua Ramon Enslin	4cf9eaf4fa	Remove superfluous params passed to function	2025-02-13 13:10:30 +01:00
Joshua Ramon Enslin	18438251a7	Add functions for getting IDs by any translated entry irrespective of the language	2025-02-12 17:15:19 +01:00
Joshua Ramon Enslin	1cf0f9858a	Add tests for loading translations in NodaWikidataFetcher	2025-02-12 16:02:04 +01:00
Joshua Ramon Enslin	1d50027809	Make function getWikidataEntity public	2025-02-12 15:48:52 +01:00
Joshua Ramon Enslin	d1cee17ef5	Add Telugu to list of languages to fetch in Wikidata fetcher Close #24	2025-02-12 12:47:02 +01:00
Joshua Ramon Enslin	baf7905e0b	Map gender Q207959 Q207959 is androgyny, mapping is a preliminary solution	2025-02-03 09:41:16 +01:00
Joshua Ramon Enslin	9bf14d7d91	Add search function for getting entries in NodaIDGetter across vocabs	2025-01-31 23:25:40 +01:00
Joshua Ramon Enslin	a621534136	Update NodaBlacklistedTerms	2025-01-24 13:45:28 +01:00
Joshua Ramon Enslin	51fe9a5e45	Cover more edge cases for splitting time names	2025-01-15 11:49:20 +01:00
Joshua Ramon Enslin	9c2eaa2929	Allow splitting 1945-48	2025-01-15 10:35:35 +01:00
Joshua Ramon Enslin	546c17031a	Make NodaImportLogger more resilient, prevent error in case of duplicate import names	2024-12-12 12:43:11 +01:00
Joshua Ramon Enslin	bf22f5541d	Retrieve "displayed subject" relationship from suffix "<Motiv>", "[Motiv]"	2024-12-03 16:07:41 +01:00
Joshua Ramon Enslin	e036d7881a	Add missing strict typing in function params	2024-12-01 22:11:17 +01:00
Joshua Ramon Enslin	d8db941485	Disallow tags of name "Nichtmünzliches" (de)	2024-11-24 16:08:14 +01:00
Joshua Ramon Enslin	b7bb7364d4	Ensure duplicate time names can be parsed in NodaTimeSplitter (e.g. 1.1.2024-1.1.2024)	2024-11-20 10:02:10 +01:00
Joshua Ramon Enslin	4dcd93b947	Better validate input JSON fetched from Wikipedia	2024-11-12 15:36:32 +01:00
Joshua Ramon Enslin	c72ad51dda	Merge branch 'master' of gitea:museum-digital/MDNodaHelpers	2024-11-11 09:11:35 +01:00
Joshua Ramon Enslin	d6dea3e280	Remove use of SESSION in NodaWikidataFetcher	2024-11-11 09:11:15 +01:00