Compare commits

...

52 Commits

Author SHA1 Message Date
119f216907 Merge branch 'master' of gitea:museum-digital/MDNodaHelpers 2025-06-08 17:20:24 +02:00
25668b7b16 Ping and reconnect DB in fulltext sync for actors fulltext index 2025-06-08 17:19:47 +02:00
8a31cf216e Add shortened 100x A to list of blacklisted tags 2025-05-22 16:25:27 +02:00
ff474341ed Add iconclass terms BB, CC, DD, to blacklist 2025-05-08 16:18:05 +02:00
1051e10732 Prevent ambigious splitting of [0-9]{4}-[0-9]{2} 2025-05-06 22:32:00 +02:00
057cac0f1b Ensure 1903/1904 cannot be split 2025-05-05 17:05:47 +02:00
0053fbe030 Support splitting times like "1. Hälfte des 19. Jahrhunderts" 2025-04-28 17:00:32 +02:00
7a2856ffad Split times in more cases (300-20 BC, 300-4000 CE) 2025-04-08 15:18:32 +02:00
00638152cf Prevent splitting of non-existing exact dates (e.g. 31.04.XXXX)
Close #35
2025-04-08 03:48:04 +02:00
dba60dbce6 Fix order of split days and months within a single year BCE
Close #32
2025-04-07 18:32:14 +02:00
f84fe1bca5 Fix type error / reference to values now not consistently existing
anymore
2025-04-06 22:56:36 +02:00
423959ac94 Stop early if autotranslation cannot proceed after validation 2025-04-05 00:11:03 +02:00
e8edb4a459 Time splitter: Handle first/second half
Close #31
2025-04-05 00:09:39 +02:00
8491b62a83 Validate against time errors in autogenerating translations for times
Close #30
2025-04-04 20:03:59 +02:00
bb2b1c2c32 Update NodaGroup 2025-03-13 00:30:33 +01:00
5054d3c62f Use more rigurous trimming in NodaConsolidatedNamesForPersinst 2025-03-10 04:18:00 +01:00
beba838c0d Correctly handle multibype hyphens in XXXX-XXXX 2025-03-10 04:13:59 +01:00
54dd958073 See before 2025-03-10 04:05:00 +01:00
5b99304b5c Accept an additional type of hyphen / dash in time splitting 2025-03-10 03:59:44 +01:00
5cce98f15b Extend tests 2025-03-10 03:20:46 +01:00
5036c77f32 Extend test for getting actor ID by life dates + name 2025-03-10 02:18:28 +01:00
e95415be8f Add test for getting actor ID by name with life dates 2025-03-10 01:48:09 +01:00
5192781494 Use Wikipedia API for getting descriptions from Wikipedia rather than
parsing HTML in Wikidata fetcher

Thanks @awinkler
2025-03-09 02:08:26 +01:00
d9d9f7fcdc Continue refactoring tests for time splitter to run provider-based 2025-02-24 14:02:42 +01:00
dbfa0df17f Begin restructuring NodaTimeSplitterTest to use data providers 2025-02-21 10:32:07 +01:00
3409ec7afe Begin adding autotranslation language CRH / Crimean Tatar
Some formatting is still unclear. See https://forum.museum-digital.info/d/52-additional-languages-for-translations-crimean-tatar/9
2025-02-18 17:51:36 +01:00
27ac3f255a Minor typing improvements 2025-02-15 13:36:50 +01:00
9d7d53a858 Disallow fetching from Wikidata disambiguation pages
Close #23
2025-02-13 22:37:17 +01:00
28f6db67ff Disable XML error warnings when parsing unclean inputs from Wikidata 2025-02-13 21:48:07 +01:00
2f3bc5f2fa Prefer wikipedia page titles over wikidata labels
Close #28
2025-02-13 21:38:13 +01:00
39362f537a Merge branch 'master' of gitea:museum-digital/MDNodaHelpers 2025-02-13 17:19:43 +01:00
de0357473a Make constant for test language in NodaWikidataFetcherTest public, allowing reuse 2025-02-13 17:19:06 +01:00
ef43270fb2 Map suffixes material and technique to their respective tag relation
types
2025-02-13 14:04:38 +01:00
338e09f001 Add kannada to list of languages fetched from wikidata 2025-02-13 13:10:45 +01:00
4cf9eaf4fa Remove superfluous params passed to function 2025-02-13 13:10:30 +01:00
18438251a7 Add functions for getting IDs by any translated entry irrespective of
the language
2025-02-12 17:15:19 +01:00
1cf0f9858a Add tests for loading translations in NodaWikidataFetcher 2025-02-12 16:02:04 +01:00
1d50027809 Make function getWikidataEntity public 2025-02-12 15:48:52 +01:00
d1cee17ef5 Add Telugu to list of languages to fetch in Wikidata fetcher
Close #24
2025-02-12 12:47:02 +01:00
baf7905e0b Map gender Q207959
Q207959 is androgyny, mapping is a preliminary solution
2025-02-03 09:41:16 +01:00
9bf14d7d91 Add search function for getting entries in NodaIDGetter across vocabs 2025-01-31 23:25:40 +01:00
a621534136 Update NodaBlacklistedTerms 2025-01-24 13:45:28 +01:00
51fe9a5e45 Cover more edge cases for splitting time names 2025-01-15 11:49:20 +01:00
9c2eaa2929 Allow splitting 1945-48 2025-01-15 10:35:35 +01:00
546c17031a Make NodaImportLogger more resilient, prevent error in case of duplicate import names 2024-12-12 12:43:11 +01:00
bf22f5541d Retrieve "displayed subject" relationship from suffix "<Motiv>", "[Motiv]" 2024-12-03 16:07:41 +01:00
e036d7881a Add missing strict typing in function params 2024-12-01 22:11:17 +01:00
d8db941485 Disallow tags of name "Nichtmünzliches" (de) 2024-11-24 16:08:14 +01:00
b7bb7364d4 Ensure duplicate time names can be parsed in NodaTimeSplitter (e.g.
1.1.2024-1.1.2024)
2024-11-20 10:02:10 +01:00
4dcd93b947 Better validate input JSON fetched from Wikipedia 2024-11-12 15:36:32 +01:00
c72ad51dda Merge branch 'master' of gitea:museum-digital/MDNodaHelpers 2024-11-11 09:11:35 +01:00
d6dea3e280 Remove use of SESSION in NodaWikidataFetcher 2024-11-11 09:11:15 +01:00
21 changed files with 1806 additions and 1092 deletions

View File

@ -13,7 +13,7 @@ final class NodaBlacklistedTerms {
/**
* A blacklist of disallowed tags. All entries are listed in full lowercase.
*/
const TAG_BLACKLIST = [
public const TAG_BLACKLIST = [
'de' => [
'andere',
'anderes',
@ -33,16 +33,35 @@ final class NodaBlacklistedTerms {
'ding',
'dinge',
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
'Aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
'nichtmünzliches',
'unbestimmt',
'AA',
'BB',
'CC',
'DD',
'EE',
'FF',
'GG',
'HH',
'LL',
'-',
'?',
],
'en' => [
'other',
'others',
'unknown',
'various',
'-',
'?',
],
'hu' => [
'ism.',
'ismeretlen',
'-',
'?',
],
];

View File

@ -79,11 +79,12 @@ final class NodaConsolidatedNamesForPersinst extends NodaConsolidatedNamesAbstra
if (count($parts) !== 2) return [];
$nameOnly = trim($parts[0]);
$dateString = rtrim($parts[1], ')'); //
$dateString = trim(rtrim($parts[1], ')')); //
if (!empty($dates = NodaTimeSplitter::is_timespan($dateString))
&& $dates->start_year !== '?'
&& $dates->end_year !== '?'
&& $dates->start_year !== $dates->end_year
&& intval($dates->end_year) - intval($dates->start_year) < 150
) {
return [

View File

@ -93,7 +93,7 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
*
* @return string
*/
private static function _rewrite_narrower_broader_pairs_to_brackets(string $name, string $indicator, $separator = ', '):string {
private static function _rewrite_narrower_broader_pairs_to_brackets(string $name, string $indicator, string $separator = ', '):string {
if (str_contains($name, $indicator)
&& substr_count($name, $indicator) === 1
@ -223,7 +223,7 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
*
* @return string
*/
private static function _rewrite_ukrainian_names_by_hierarchy($name):string {
private static function _rewrite_ukrainian_names_by_hierarchy(string $name):string {
$identifiersByLevel = [
'state' => [' РСР', 'РСР ', ' АРСР', 'АРСР ', ' губернія', 'губернія '],
@ -325,7 +325,9 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
}
$output = $main_name;
if (!empty($specifiers)) $output .= ' (' . implode(', ', $specifiers) . ')';
if (!empty($specifiers)) {
$output .= ' (' . implode(', ', $specifiers) . ')';
}
return $output;

View File

@ -101,7 +101,11 @@ final class NodaGroup {
/**
* Updates a group.
*
* @retun void
* @param integer $group_id ID of the group to update.
* @param string $name Name of the group.
* @param string $comment Optional: Comment for the group.
*
* @return void
*/
public function update(int $group_id, string $name, string $comment = ''):void {
@ -123,7 +127,9 @@ final class NodaGroup {
/**
* Deletes a group.
*
* @retun void
* @param integer $group_id ID of the group to delete.
*
* @return void
*/
public function delete(int $group_id):void {

View File

@ -155,6 +155,37 @@ final class NodaIDGetter {
}
/**
* Returns persinst ID by entry in persinst translations table,
* irrespective of language.
*
* @param MDMysqli $mysqli_noda Database connection.
* @param string $name Name of the persinst to search for.
*
* @return integer
*/
public static function getPersinstIDByAnyTransName(MDMysqli $mysqli_noda, string $name):int {
if (empty($name)) return 0;
$result = $mysqli_noda->query_by_stmt("
SELECT `persinst_id`, `trans_name`
FROM `persinst_translation`
WHERE `trans_name` = ?
LIMIT 2", "s", $name);
while ($cur = $result->fetch_row()) {
if (self::_stri_matches($cur[1], $name)) {
$result->close();
return (int)$cur[0];
}
}
$result->close();
return 0;
}
/**
* Returns persinst ID by entry in persinst translations table
* plus birth and death.
@ -456,6 +487,37 @@ final class NodaIDGetter {
}
/**
* Returns place ID by entry in place translations table, irrespective of
* language.
*
* @param MDMysqli $mysqli_noda Database connection.
* @param string $name Name of the place to search for.
*
* @return integer
*/
public static function getPlaceIDByAnyTransName(MDMysqli $mysqli_noda, string $name):int {
if (empty($name)) return 0;
$result = $mysqli_noda->query_by_stmt("
SELECT `ort_id`, `trans_name`
FROM `ort_translation`
WHERE `trans_name` = ?
LIMIT 2", "s", $name);
while ($cur = $result->fetch_row()) {
if (self::_stri_matches($cur[1], $name)) {
$result->close();
return (int)$cur[0];
}
}
$result->close();
return 0;
}
/**
* Returns place ID by entry in place noda table.
*
@ -647,6 +709,37 @@ final class NodaIDGetter {
}
/**
* Returns tag ID by entry in tag translations table,
* irrespective of language.
*
* @param MDMysqli $mysqli_noda Database connection.
* @param string $name Name of the tag to search for.
*
* @return integer
*/
public static function getTagIDByAnyTransName(MDMysqli $mysqli_noda, string $name):int {
if (empty($name)) return 0;
$result = $mysqli_noda->query_by_stmt("
SELECT `tag_id`, `trans_name`
FROM `tag_translation`
WHERE `trans_name` = ?
LIMIT 2", "s", $name);
while ($cur = $result->fetch_row()) {
if (self::_stri_matches($name, $cur[1])) {
$result->close();
return (int)$cur[0];
}
}
$result->close();
return 0;
}
/**
* Returns tag ID by entry in tag noda table.
*
@ -838,6 +931,36 @@ final class NodaIDGetter {
}
/**
* Returns time ID by entry in time translations table.
*
* @param MDMysqli $mysqli_noda Database connection.
* @param string $name Name of the time to search for.
*
* @return integer
*/
public static function getTimeIDByAnyTransName(MDMysqli $mysqli_noda, string $name):int {
if (empty($name)) return 0;
$result = $mysqli_noda->query_by_stmt("
SELECT `zeit_id`, `trans_name`
FROM `zeit_translation`
WHERE `trans_name` = ?
LIMIT 2", "s", $name);
while ($cur = $result->fetch_row()) {
if (self::_stri_matches($name, $cur[1])) {
$result->close();
return (int)$cur[0];
}
}
$result->close();
return 0;
}
/**
* Returns time ID by entry in time translations table.
*
@ -999,4 +1122,79 @@ final class NodaIDGetter {
return 0;
}
/**
* Checks each string in a list of strings for its existence as a tag name.
*
* @param MDMysqli $mysqli_noda Database connection.
* @param string $lang Language to check in.
* @param non-empty-array<string> $phrases List of phrases to check.
*
* @return array{count: int, tag: integer[], actor: integer[], time: integer[], place: integer[]}
*/
public static function searchEntryNamesByList(MDMysqli $mysqli_noda, string $lang, array $phrases):array {
$output = [
'count' => 0,
'tag' => [],
'actor' => [],
'time' => [],
'place' => [],
];
foreach ($phrases as $phrase) {
if (($tag_id = NodaIDGetter::getTagIDByNamesAndRewrites($mysqli_noda, $lang, $phrase)) !== 0 && !in_array($tag_id, $output['tag'], true)) {
$output['tag'][] = $tag_id;
++$output['count'];
}
else if (($tag_id_by_tl = NodaIDGetter::getTagIDByAnyTransName($mysqli_noda, $phrase)) !== 0 && !in_array($tag_id_by_tl, $output['tag'], true)) {
$output['tag'][] = $tag_id_by_tl;
++$output['count'];
}
else if (($place_id = NodaIDGetter::getPlaceIDByNamesAndRewrites($mysqli_noda, $lang, $phrase)) !== 0 && !in_array($place_id, $output['place'], true)) {
$output['place'][] = $place_id;
++$output['count'];
}
else if (($place_id = NodaIDGetter::getPlaceIDByAnyTransName($mysqli_noda, $phrase)) !== 0 && !in_array($place_id, $output['place'], true)) {
$output['place'][] = $place_id;
++$output['count'];
}
else if (($persinst_id = NodaIDGetter::getPersinstIDByNamesAndRewrites($mysqli_noda, $lang, $phrase, '', '')) !== 0 && !in_array($persinst_id, $output['actor'], true)) {
$output['actor'][] = $persinst_id;
++$output['count'];
}
else if (($persinst_id = NodaIDGetter::getPersinstIDByAnyTransName($mysqli_noda, $phrase)) !== 0 && !in_array($persinst_id, $output['actor'], true)) {
$output['actor'][] = $persinst_id;
++$output['count'];
}
else if (($time_id = NodaIDGetter::getTimeIDByNamesAndRewrites($mysqli_noda, $lang, $phrase)) !== 0 && !in_array($time_id, $output['time'], true)) {
$output['time'][] = $time_id;
++$output['count'];
}
else if (($time_id = NodaIDGetter::getTimeIDByAnyTransName($mysqli_noda, $phrase)) !== 0 && !in_array($time_id, $output['time'], true)) {
$output['time'][] = $time_id;
++$output['count'];
}
}
if (count($phrases) !== $output['count']) {
return [
'count' => 0,
'tag' => [],
'actor' => [],
'time' => [],
'place' => [],
];
}
if (!empty($output['tag'])) sort($output['tag']);
if (!empty($output['actor'])) sort($output['actor']);
if (!empty($output['time'])) sort($output['time']);
if (!empty($output['place'])) sort($output['place']);
return $output;
}
}

View File

@ -32,8 +32,12 @@ final class NodaImportLogger {
$logStmt = $mysqli_noda->do_prepare("INSERT INTO `persinst_logged_imports`
(`instance`, `institution_id`, `input_string`, `persinst_id`)
VALUES (?, ?, ?, ?)");
$logStmt->bind_param("sisi", $instance, $institution_id, $loggedName, $persinst_id);
$logStmt->execute();
try {
$logStmt->bind_param("sisi", $instance, $institution_id, $loggedName, $persinst_id);
$logStmt->execute();
}
catch (MDMysqliDuplicateKeysError $e) {
}
$logStmt->close();
}
@ -54,8 +58,12 @@ final class NodaImportLogger {
$logStmt = $mysqli_noda->do_prepare("INSERT INTO `orte_logged_imports`
(`instance`, `institution_id`, `input_string`, `ort_id`)
VALUES (?, ?, ?, ?)");
$logStmt->bind_param("sisi", $instance, $institution_id, $name, $ort_id);
$logStmt->execute();
try {
$logStmt->bind_param("sisi", $instance, $institution_id, $name, $ort_id);
$logStmt->execute();
}
catch (MDMysqliDuplicateKeysError $e) {
}
$logStmt->close();
}
@ -76,8 +84,12 @@ final class NodaImportLogger {
$logStmt = $mysqli_noda->do_prepare("INSERT INTO `zeiten_logged_imports`
(`instance`, `institution_id`, `input_string`, `zeit_id`)
VALUES (?, ?, ?, ?)");
$logStmt->bind_param("sisi", $instance, $institution_id, $name, $zeit_id);
$logStmt->execute();
try {
$logStmt->bind_param("sisi", $instance, $institution_id, $name, $zeit_id);
$logStmt->execute();
}
catch (MDMysqliDuplicateKeysError $e) {
}
$logStmt->close();
}
@ -98,8 +110,12 @@ final class NodaImportLogger {
$logStmt = $mysqli_noda->do_prepare("INSERT INTO `tag_logged_imports`
(`instance`, `institution_id`, `input_string`, `tag_id`)
VALUES (?, ?, ?, ?)");
$logStmt->bind_param("sisi", $instance, $institution_id, $name, $tag_id);
$logStmt->execute();
try {
$logStmt->bind_param("sisi", $instance, $institution_id, $name, $tag_id);
$logStmt->execute();
}
catch (MDMysqliDuplicateKeysError $e) {
}
$logStmt->close();
}

View File

@ -28,9 +28,10 @@ final class NodaSplitTime {
/**
* Returns a single, exact date.
*
* @param string $year Year.
* @param string $month Month.
* @param string $day Day.
* @param string $year Year.
* @param string $month Month.
* @param string $day Day.
* @param NodaTimeBeforeAfterIndicator $before_after_indicator Determines if the time is exact or before / after.
*
* @return NodaSplitTime
*/
@ -296,6 +297,15 @@ final class NodaSplitTime {
/**
* Constructor.
*
* @param string $start_year Year.
* @param string $end_year Year.
* @param string $counting_time_month Month.
* @param string $counting_time_day Day.
* @param NodaCountingTimeIndicator $counting_time_indicator Determines if the time is BCE or CCE.
* @param NodaTimeBeforeAfterIndicator $before_after_indicator Determines if the time is inexact to one direction.
* @param false|string $start_date Start date.
* @param false|string $end_date End date.
*
* @return void
*/
public function __construct(string $start_year, string $end_year,
@ -379,5 +389,19 @@ final class NodaSplitTime {
}
}
// Validate
$startDateTime = MD_STD::strtotime("2000-" . substr($this->start_date, -5));
if (checkdate((int)date('m', $startDateTime), (int)date('d', $startDateTime), (int)date('Y', $startDateTime)) === false) {
throw new MDgenericInvalidInputsException("Invalid start date: " . $this->start_date);
}
if (!empty((int)$this->counting_time_day)) {
// The year 2000 is used here as it is a leap year and lots of years accepted in md are not accepted
// by checkdate.
if (checkdate((int)$this->counting_time_month, (int)$this->counting_time_day, 2000) === false) {
throw new MDgenericInvalidInputsException("Invalid date formed by counting time: " . $this->counting_time_month . ' -- ' . $this->counting_time_day);
}
}
}
}

View File

@ -15,6 +15,16 @@ final class NodaTagRelationIdentifier {
private const SUFFIXES = [
'de' => [
' (Motiv)' => MDTagRelationType::display_subject,
' [Motiv]' => MDTagRelationType::display_subject,
' <Motiv>' => MDTagRelationType::display_subject,
' (Material)' => MDTagRelationType::material,
' [Material]' => MDTagRelationType::material,
' <Material>' => MDTagRelationType::material,
' (Technik)' => MDTagRelationType::technique,
' [Technik]' => MDTagRelationType::technique,
' <Technik>' => MDTagRelationType::technique,
]
];

View File

@ -13,7 +13,7 @@ final class NodaTimeAutotranslater {
// TODO: Move these to NodaTimeAutotranslaterLocales
const LANGS_SYLLABLE_CLEANING = [
public const LANGS_SYLLABLE_CLEANING = [
"hu" => [
"10-as évek" => "10-es évek",
"40-as évek" => "40-es évek",
@ -463,13 +463,13 @@ final class NodaTimeAutotranslater {
}
/**
* Gets translations for a given entry type.
* Prepares translations for each available language.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function getTranslations(array $timeInfo):array {
public static function prepareTranslations(array $timeInfo):array {
if (!empty($timeInfo['zeit_name']) and strlen((string)$timeInfo['zeit_name']) > 10 and !empty($timespanDates = NodaTimeSplitter::attempt_splitting_from_till((string)$timeInfo['zeit_name']))) {
@ -504,8 +504,11 @@ final class NodaTimeAutotranslater {
$output = [];
$cases = NodaTimeAutotranslaterLocales::cases();
foreach ($cases as $tLang) {
$start_term = self::getTranslations($startTimeInfo)[$tLang->name];
$end_term = self::getTranslations($endTimeInfo)[$tLang->name];
$startTls = self::getTranslations($startTimeInfo);
$endTls = self::getTranslations($endTimeInfo);
if (empty($startTls) || empty($endTls)) return [];
$start_term = $startTls[$tLang->name];
$end_term = $endTls[$tLang->name];
$output[$tLang->name] = \sprintf($tLang->formatYearspanForSprintf(), $start_term, $end_term);
}
@ -604,6 +607,78 @@ final class NodaTimeAutotranslater {
}
/**
* Validates correctness of years in translation strings.
*
* @param string|integer $start Start year.
* @param string|integer $end End year.
* @param array<string, string> $translations Translations.
*
* @return boolean
*/
public static function validateTranslations(string|int $start, string|int $end, array $translations):bool {
$start = ltrim((string)$start, ' 0-');
$end = ltrim((string)$end, ' 0-');
// Edge cases: Centuries and decades have special translations
// and can thus not be validated properly
// Century BCE
if (substr($start, -1) === "0" && substr($end, -1) === '1' && $start > $end) {
return true;
}
// Century CE
if (substr($start, -1) === "1" && substr($end, -1) === '0' && $start < $end) {
return true;
}
// Decade
if (substr($start, -1) === "0" && substr($end, -1) === '9' && $start < $end) {
return true;
}
// 1920 + ? can be both Since 1920 and After 1919, so validation
// is impossible there, too
if ($start === '?' || $end === '?') return true;
// Unset unvalidatable languages
unset($translations['ar'], $translations['fa']);
if ($start !== '?') {
foreach ($translations as $t) {
if (!str_contains($t, $start)) {
return false;
}
}
}
if ($end !== '?' && $start !== $end) {
foreach ($translations as $t) {
if (!str_contains($t, $end)) {
return false;
}
}
}
return true;
}
/**
* Gets translations for a given entry type.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function getTranslations(array $timeInfo):array {
$output = self::prepareTranslations($timeInfo);
if (self::validateTranslations($timeInfo['zeit_beginn'], $timeInfo['zeit_ende'], $output) === false) return [];
return $output;
}
/**
* Runs autotranslater.
*
@ -613,7 +688,9 @@ final class NodaTimeAutotranslater {
*/
public function translate(array $timeInfo):void {
$translations = self::getTranslations($timeInfo);
if (empty($translations = self::getTranslations($timeInfo))) {
return;
}
$this->_mysqli_noda->autocommit(false);

View File

@ -140,7 +140,7 @@ final class NodaTimeSplitter {
"decemberig",
];
private const REGEX_CENTURIES = '(\ |)(Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)';
private const REGEX_CENTURIES = '(\ |)(Jh|Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)';
private const REGEX_DECADES = '(s|er|er\ Jahre|(\-|\ )es\ évek|(\-|\ )as\ \évek|\ évek|\-es\ években|\-ті)';
/**
@ -345,16 +345,29 @@ final class NodaTimeSplitter {
$start_date = $output->start_date;
$end_date = $output->end_date;
}
else if ($start === $end && (int)str_replace('-', '', $start_date) > (int)str_replace('-', '', $end_date)) {
$start_date = $output->start_date;
$end_date = $output->end_date;
}
return new NodaSplitTime($start, $end, $output->counting_time_month, $output->counting_time_day,
NodaCountingTimeIndicator::bce, $output->before_after_indicator, '-' . $start_date, '-' . $end_date);
}
}
if (\preg_match("/^[0-9][0-9][0-9][0-9]\ bis [0-9][0-9][0-9][0-9]$/", $datum)) {
if (\preg_match("/^[0-9]{4}\ bis\ [0-9]{4}$/", $datum)) {
$start = \substr($datum, 0, 4);
$end = \substr($datum, -4);
return new NodaSplitTime($start, $end);
}
if (\preg_match("/^[0-9]{4}\ (und|oder|od.)\ [0-9]{4}$/", $datum)) {
$start = \substr($datum, 0, 4);
$end = \substr($datum, -4);
$startInt = (int)$start;
$endInt = (int)$end;
if ($startInt === $endInt - 1) {
return new NodaSplitTime($start, $end);
}
}
$datum = \str_replace(". ", ".", $datum);
@ -536,22 +549,22 @@ final class NodaTimeSplitter {
// 10000-20000
if (!empty(\preg_match("/^[0-9]{5}(\-|\/)[0-9]{5}$/", $datum))) {
return new NodaSplitTime(start_year: \substr($datum, 0, 5), end_year: \substr($datum, 6, 5));
return new NodaSplitTime(start_year: \substr($datum, 0, 5), end_year: \substr($datum, -5));
}
// 0000-0000
if (\preg_match("/^[0-9]{4}(\-|\/)[0-9]{4}(\.|)$/", $datum)) {
return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, 5, 4));
if (\preg_match("/^[0-9]{4}(\-|\/|\)[0-9]{4}(\.|)$/", $datum)) {
return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, -4));
}
// 1.900-2.000
if (\preg_match("/^[0-9]\.[0-9][0-9][0-9](\-|\/)[0-9]\.[0-9][0-9][0-9]$/", $datum)) {
if (\preg_match("/^[0-9]\.[0-9][0-9][0-9](\-|\/|\)[0-9]\.[0-9][0-9][0-9]$/", $datum)) {
$datum = \str_replace(".", "", $datum);
return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, 5, 4));
return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, -4));
}
// German TT.MM.JJJJ / TT.MM.JJJ / TT.MM.JJ / TT.MM.J
if (\preg_match("/^[0-9][0-9]\.[0-9][0-9]\.([0-9][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[0-9])$/", $datum)) { // German T.MM.JJJJ
if (\preg_match("/^[0-9]{2}\.[0-9]{2}\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
$year = \substr($datum, 6, 4);
$month = \substr($datum, 3, 2);
$day = \substr($datum, 0, 2);
@ -559,7 +572,7 @@ final class NodaTimeSplitter {
}
// German TT.M.JJJJ / TT.M.JJJ / TT.M.JJ / TT.M.J
if (\preg_match("/^[0-9][0-9]\.[0-9]\.([0-9][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[0-9])$/", $datum)) { // German T.MM.JJJJ
if (\preg_match("/^[0-9]{2}\.[0-9]\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
$year = \substr($datum, 5, 4);
$month = "0" . \substr($datum, 3, 1);
$day = \substr($datum, 0, 2);
@ -590,10 +603,24 @@ final class NodaTimeSplitter {
return NodaSplitTime::genExactDate($year, $month, $day);
}
// Intl': 2020-12
if (\preg_match("/^[0-9]{4}\-[0-9]{2}$/", $datum)) { // German Y-m
if (\preg_match("/^[0-9]{4}\-[0-9]{2}$/", $datum)) { // German Y-m or 1912-15
$year = \substr($datum, 0, 4);
$month = \substr($datum, 5, 2);
return new NodaSplitTime($year, $year, $month);
$month = \substr($datum, -2);
// Assume the end is a month
if (intval($month) < 12) {
// If the year is smaller than the second number, do not split, as either month
// or year may be meant
// Example: 1903-04
if (substr($datum, 2, 2) < 12) {
return false;
}
return new NodaSplitTime($year, $year, $month);
}
else {
$end = \substr($year, 0, 2) . $month;
return new NodaSplitTime($year, $end);
}
}
// German MM.JJJJ
@ -648,7 +675,27 @@ final class NodaTimeSplitter {
if (\preg_match("/^[0-9]{4}\-[0-9]{3}$/", $datum)) { // Hungarian Y-m
$start = \substr($datum, 0, 4);
$end = \substr($datum, -3);
return new NodaSplitTime("0" . $start, "0" . $end);
return new NodaSplitTime($start, "0" . $end);
}
// 2-3 (n. Chr.)
if (\preg_match("/^[0-9]{1}\-[0-9]{1}$/", $datum)) {
return new NodaSplitTime("000" . \substr($datum, 0, 1), "000" . \substr($datum, -1));
}
// 300-2 (v. Chr.)
if (\preg_match("/^[0-9]{3}\-[0-9]{2}$/", $datum)) {
return new NodaSplitTime("0" . \substr($datum, 0, 3), "00" . \substr($datum, -2));
}
// 30-2 (v. Chr.)
if (\preg_match("/^[0-9]{2}\-[0-9]{1}$/", $datum)) {
return new NodaSplitTime("00" . \substr($datum, 0, 2), "000" . \substr($datum, -1));
}
// 2-300 (n. Chr.)
if (\preg_match("/^[0-9]{1}\-[0-9]{3}$/", $datum)) {
return new NodaSplitTime("000" . \substr($datum, 0, 1), "0" . \substr($datum, -3));
}
// 20-30 (n. Chr.)
@ -658,6 +705,18 @@ final class NodaTimeSplitter {
return new NodaSplitTime("00" . $start, "00" . $end);
}
// 20-130 (n. Chr.)
if (\preg_match("/^[0-9]{2}\-[0-9]{3}$/", $datum)) { // 20-40 (n. Chr.)
$start = \substr($datum, 0, 2);
$end = \substr($datum, -3);
return new NodaSplitTime("00" . $start, "0" . $end);
}
// 120-1130 (n. Chr.)
if (\preg_match("/^[0-9]{3}\-[0-9]{4}$/", $datum)) { // 20-40 (n. Chr.)
return new NodaSplitTime("0" . \substr($datum, 0, 3), \substr($datum, -4));
}
// 1920
if (\preg_match("/^[0-9]{4}(\.|)$/", $datum)) {
$start = \substr($datum, 0, 4);
@ -699,35 +758,67 @@ final class NodaTimeSplitter {
$datum = self::clean_input($datum);
if (\preg_match("/^[0-9]{4}\.[0-9]{2}\.[0-9]{2}(\.|)\-$/", $datum)) { // YYYY.MM.DD.
$year = \substr($datum, 0, 4);
$month = \substr($datum, 5, 2);
$day = \substr($datum, 8, 2);
$inpDateWoSpaces = str_replace(" ", "", $datum);
if (\preg_match("/^[0-9]{4}\.[0-9]{2}\.[0-9]{2}(\.|)\-$/", $inpDateWoSpaces)) { // YYYY.MM.DD.
$year = \substr($inpDateWoSpaces, 0, 4);
$month = \substr($inpDateWoSpaces, 5, 2);
$day = \substr($inpDateWoSpaces, 8, 2);
return NodaSplitTime::genExactDate($year, $month, $day, NodaTimeBeforeAfterIndicator::since);
}
if (\preg_match("/^[0-9]{4}\.[0-9]{2}(\.|)\-$/", $datum)) { // YYYY.MM.-
$start = \substr($datum, 0, 4);
$month = \substr($datum, 5, 2);
if (\preg_match("/^[0-9]{4}\.[0-9]{2}(\.|)\-$/", $inpDateWoSpaces)) { // YYYY.MM.-
$start = \substr($inpDateWoSpaces, 0, 4);
$month = \substr($inpDateWoSpaces, 5, 2);
return new NodaSplitTime($start, '?', $month, before_after_indicator: NodaTimeBeforeAfterIndicator::since);
}
if (\preg_match("/^[0-9]{4}\-$/", $datum)) { // YYYY-
$start = \substr($datum, 0, 4);
if (\preg_match("/^[0-9]{4}\-$/", $inpDateWoSpaces)) { // YYYY-
$start = \substr($inpDateWoSpaces, 0, 4);
return new NodaSplitTime($start, '?', before_after_indicator: NodaTimeBeforeAfterIndicator::since);
}
if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}\.[0-9]{2}$/", $datum)) { // Hungarian Y-m
$year = \substr($datum, 1, 4);
$month = \substr($datum, 6, 2);
$day = \substr($datum, 9, 2);
// ?.6.2024
if (\preg_match("/^\?\.([0-9]|[0-9]{2})\.[0-9]{4}$/", $inpDateWoSpaces)) { // German Y-m
$year = \substr($inpDateWoSpaces, -4);
$month = trim(\substr($inpDateWoSpaces, 2, 2), '. ');
return new NodaSplitTime($year, $year, $month);
}
// ?.?.2024
if (\preg_match("/^\?\.\?\.[0-9]{4}$/", $inpDateWoSpaces)) { // German Y-m
$year = \substr($inpDateWoSpaces, -4);
return new NodaSplitTime($year, $year);
}
if (\preg_match("/^[0-9]{4}$/", \trim($inpDateWoSpaces, '. ?!()[]X'))) { // German Y-m
$year = \trim($inpDateWoSpaces, '. ?!()[]X');
return new NodaSplitTime($year, $year);
}
if ((str_starts_with($inpDateWoSpaces, '0-') || str_ends_with($inpDateWoSpaces, '-0')) && \preg_match("/^[0-9]{4}$/", \strtr($inpDateWoSpaces, ['-0' => '', '0-' => ''])) && !str_ends_with($inpDateWoSpaces, '0-0')) {
$year = \strtr($inpDateWoSpaces, ['-0' => '', '0-' => '']);
if (strlen($year) === 4) {
return new NodaSplitTime($year, $year);
}
}
if (\preg_match("/^[0-9]{4}$/", \strtr($inpDateWoSpaces, ['o' => '0']))) { // German Y-m
$year = \strtr($inpDateWoSpaces, ['o' => '0']);
return new NodaSplitTime($year, $year);
}
if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}\.[0-9]{2}$/", $inpDateWoSpaces)) { // Hungarian Y-m
$year = \substr($inpDateWoSpaces, 1, 4);
$month = \substr($inpDateWoSpaces, 6, 2);
$day = \substr($inpDateWoSpaces, 9, 2);
return NodaSplitTime::genExactDate($year, $month, $day, NodaTimeBeforeAfterIndicator::until);
}
if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}$/", $datum)) { // Hungarian Y-m
$year = \substr($datum, 1, 4);
$month = \substr($datum, 6, 2);
if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}$/", $inpDateWoSpaces)) { // Hungarian Y-m
$year = \substr($inpDateWoSpaces, 1, 4);
$month = \substr($inpDateWoSpaces, 6, 2);
return new NodaSplitTime('?', $year, $month, before_after_indicator: NodaTimeBeforeAfterIndicator::until);
}
if (\preg_match("/^\-[0-9]{4}$/", $datum)) { // Hungarian -Y
$year = \substr($datum, 1, 4);
if (\preg_match("/^\-[0-9]{4}$/", $inpDateWoSpaces)) { // Hungarian -Y
$year = \substr($inpDateWoSpaces, 1, 4);
return new NodaSplitTime('?', $year, before_after_indicator: NodaTimeBeforeAfterIndicator::until);
}
@ -828,7 +919,7 @@ final class NodaTimeSplitter {
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date);
}
}
if (str_ends_with($datum, '-as évekig') || str_ends_with($datum, '-es évekig')) {
if (str_ends_with($datum, ' as évekig') || str_ends_with($datum, ' es évekig') || str_ends_with($datum, '-as évekig') || str_ends_with($datum, '-es évekig')) {
if ($output = self::attempt_splitting(\substr($datum, 0, -2))) {
return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day,
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date);
@ -899,7 +990,7 @@ final class NodaTimeSplitter {
}
// 1. Jahrhundert
if (\preg_match("/^[0-9]\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) {
if (\preg_match("/^[0-9]\.\ (Jh\|Jh\.|Jahrhundert|sz|század)$/", $datum)) {
if ($centuryNo = \intval(\substr($datum, 0, 1))) {
$centuryNo--;
return new NodaSplitTime((string)$centuryNo . "01", \strval($centuryNo + 1) . '00');
@ -907,7 +998,7 @@ final class NodaTimeSplitter {
}
// 17.-18. Jahrhundert
if (\preg_match("/^[0-9]{2}(\.|)(|\ Jh\.||\ Jahrhundert||\ sz||\ század)(\-|\/)[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) {
if (\preg_match("/^[0-9]{2}(\.|)(|\ Jh|\ Jh\.|\ Jahrhundert|\ sz|\ század)(\-|\/)[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) {
if (\strpos($datum, '/') !== false) {
$datum = str_replace('/', '-', $datum);
}
@ -1040,6 +1131,33 @@ final class NodaTimeSplitter {
}
/**
* Rewrites special formulations of a date.
*
* @param string $datum Date.
*
* @return string|false
*/
private static function _rewrite_special_cases_regular(string $datum):string|false {
if (\preg_match("/^(1|2)\.\ Hälfte(|\ des)\ [0-9]{2}\.\ Jahrhundert(|s)$/", $datum)) {
$half = substr($datum, 0, 1);
$number = substr(ltrim(substr($datum, 10), "des Hälfte"), 0, 2);
if (is_numeric($number)) {
$num = (int)$number;
$targetCentury = $num - 1;
return match((int)$half) {
1 => $targetCentury . "00-" . $targetCentury . "50",
2 => $targetCentury . "50-" . $targetCentury . "99",
};
}
}
return false;
}
/**
* Contains special rules for incorrectly or incompletely spelled out timespan names.
* To be called by self::attempt_splitting_from_till().
@ -1052,6 +1170,15 @@ final class NodaTimeSplitter {
if (empty($datum)) return '';
if (\preg_match("/^1\.\ (Halbjahr|Hälfte)\ [0-9]{4}$/", $datum)) {
$year = substr($datum, -4);
return "Januar $year-Juni $year";
}
if (\preg_match("/^2\.\ (Halbjahr|Hälfte)\ [0-9]{4}$/", $datum)) {
$year = substr($datum, -4);
return "Juli $year-Dezember $year";
}
$inputLength = strlen($datum);
// Hungarian year and month until month
@ -1091,6 +1218,33 @@ final class NodaTimeSplitter {
return $reconstituted;
}
// German T.-T.MM.JJJJ / T.-T.MM.JJJ / T.-T.MM.JJ / T.-T.MM.J
if (\preg_match("/^[0-9].\-[0-9]\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
$year = \substr($datum, -4);
$month = trim(\substr($datum, -7, 2), '.');
$day = '0' . \substr($datum, 3, 1);
$firstday = '0' . \substr($datum, 0, 1);
return "$firstday.$month.$year-$day.$month.$year";
}
// German T.-TT.MM.JJJJ / T.-TT.MM.JJJ / T.-TT.MM.JJ / T.-TT.MM.J
if (\preg_match("/^[0-9].\-[0-9]{2}\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
$year = \substr($datum, -4);
$month = trim(\substr($datum, -7, 2), '.');
$day = \substr($datum, 3, 2);
$firstday = '0' . \substr($datum, 0, 1);
return "$firstday.$month.$year-$day.$month.$year";
}
// German TT.-TT.MM.JJJJ / TT.-TT.MM.JJJ / TT.-TT.MM.JJ / TT.-TT.MM.J
if (\preg_match("/^[0-9]{2}.\-[0-9]{2}\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
$year = \substr($datum, -4);
$month = trim(\substr($datum, -7, 2), '.');
$day = \substr($datum, 4, 2);
$firstday = \substr($datum, 0, 2);
return "$firstday.$month.$year-$day.$month.$year";
}
// 17-19. Jahrhundert
if (\preg_match("/^[0-9]{2}(\.|)\-[0-9]{2}(\.|)" . self::REGEX_CENTURIES . "$/", $datum)) {
$parts = explode('-', $datum);
@ -1208,6 +1362,30 @@ final class NodaTimeSplitter {
}
/**
* Removes superfluous characters and makes an input string roughly parsable.
*
* @param string $input Input string.
*
* @return string
*/
private static function _runBasicNameCleanup(string $input):string {
$input = ltrim(trim(trim($input), ',;'), ' .');
// Clean away duplicate inputs
// 1440-1440
if (str_contains($input, '-')) {
$parts = explode('-', $input);
if (count($parts) === 2 && $parts[0] === $parts[1]) {
$input = $parts[0];
}
}
return $input;
}
/**
* Wrapper to check if any splitting command works.
*
@ -1217,6 +1395,8 @@ final class NodaTimeSplitter {
*/
public static function attempt_splitting(string $datum):NodaSplitTime|false {
$datum = self::_runBasicNameCleanup($datum);
try {
if (!empty($moda = self::is_timespan($datum))) {
return $moda;
@ -1254,6 +1434,10 @@ final class NodaTimeSplitter {
}
}
if ($rewrite = self::_rewrite_special_cases_regular($datum)) {
return self::attempt_splitting($rewrite);
}
return false;
}

View File

@ -61,6 +61,7 @@ final class NodaUncertaintyHelper {
"(?)",
"?",
" [vermutlich]",
" vermutlich",
" [verm.]",
" [wahrscheinlich]",
];
@ -100,6 +101,7 @@ final class NodaUncertaintyHelper {
"c. ",
"ca ",
"ca. ",
"ca.",
"Ca ",
"Ca. ",
"za. ",
@ -141,8 +143,11 @@ final class NodaUncertaintyHelper {
" [circa]",
" (verm.)",
" (vermutl.)",
" vermutlich",
" körül",
", um",
", ca.",
", ca",
" (um)",
" (ок.)",
];

View File

@ -18,7 +18,7 @@ final class NodaWikidataFetcher {
];
public const LANGUAGES_MAIN_DESC = ['de', 'da', 'en', 'es', 'fr', 'hu', 'it', 'jp', 'nl', 'pt', 'ru', 'sv', 'sk', 'uk', 'zh'];
public const LANGUAGES_TO_CHECK = ['ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sk', 'sw', 'ta', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'zh'];
public const LANGUAGES_TO_CHECK = ['ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'kn', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sk', 'sw', 'ta', 'te', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'zh'];
public const LANGUAGES_TO_CAPITALIZE = ["cs", "da", "de", "en", "es", "fr", "fi", "id", "it", "nl", "pl", "pt", "ru", "sv", 'sk', "tl", "tr"];
@ -43,25 +43,6 @@ final class NodaWikidataFetcher {
"orcid" => "P496",
];
private const WIKIPEDIA_REMOVE_LITERALS = [
"<p>Si vous disposez d'ouvrages ou d'articles de référence ou si vous ",
'<p><b>En pratique&#160;:</b> <a href="/wiki/Wikip%C3%A9dia:Citez_vos_sources#Qualité_des_sources" title="Wikipédia:Citez vos sources">Quelles sources sont attendu',
'<pVous pouvez partager vos connaissances en laméliorant (',
'<p class="mw-empty-elt">',
'<p><small>Géolocalisation sur la carte',
'<p><b>Koordinaatit:</b>',
'<p><span class="executeJS" data-gadgetname="ImgToggle"></span',
'<p><span class="imgtoggleboxTitle">',
//'<div class="mw-parser-output"><p>',
'<p><span style="font-size: small;"><span id="coordinates">',
'<p><span></span></p>',
'<p><a rel="nofollow" class="external text" href="https://maps.gs',
'<p><span class="plainlinks nourlexpansion"><a class="external text" href="//tools.wmflabs.org/geohack/geohack.php?langu',
'<p><span style="display:none">',
'<p>&#32;</p>',
'<p><span class="geo noexcerpt"',
];
public const RETRIEVAL_MODES_ACCEPTED = [
'list',
'add',
@ -87,7 +68,8 @@ final class NodaWikidataFetcher {
*/
private static function _getWikipediaApiLink(string $lang, string $searchTerm):string {
return "https://" . urlencode($lang) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($searchTerm) . "&prop=text&section=0&format=json";
return "https://" . urlencode($lang) . ".wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro&explaintext&redirects=1&titles=" . urlencode($searchTerm);
# w/api.php?action=parse&page=" . urlencode($searchTerm) . "&prop=text&section=0&format=json";
}
@ -151,9 +133,14 @@ final class NodaWikidataFetcher {
private static function _getCleanedWikipediaSnippet(string $lang, string $title):string {
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $title), 10000);
$datafromwiki = strval(json_decode($datafromwiki, true)['parse']['text']['*']);
$json_decoded = json_decode($datafromwiki, true);
if (empty($json_decoded) || !isset($json_decoded['query']) || empty($json_decoded['query']['pages'])) {
return '';
}
$firstPageId = array_keys($json_decoded['query']['pages'])[0];
$datafromwiki = strval($json_decoded['query']['pages'][$firstPageId]['extract']);
return self::_cleanWikidataInput($datafromwiki);
return self::_cleanInputSimple($datafromwiki);
}
@ -164,8 +151,9 @@ final class NodaWikidataFetcher {
*
* @return array<mixed>
*/
private static function _getWikidataEntity(string $wikidata_id):array {
public static function getWikidataEntity(string $wikidata_id):array {
self::validateWikidataId($wikidata_id);
$data = json_decode(MD_STD::runCurl("https://www.wikidata.org/wiki/Special:EntityData/" . urlencode($wikidata_id) . ".json", 10000), true);
if ($data === null) {
throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later.");
@ -173,7 +161,20 @@ final class NodaWikidataFetcher {
if (empty($data['entities'][$wikidata_id])) {
throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later.");
}
return $data['entities'][$wikidata_id];
$output = $data['entities'][$wikidata_id];
// Throw exception if this page is a dedicated disambigation item.-
// P31: Instance of; Q4167410: Wikimedia disambiguation page
if (isset($output['claims']) && isset($output['claims']['P31'])) {
foreach ($output['claims']['P31'] as $is_instance_of) {
if (isset($is_instance_of['mainsnak']['datavalue']['value']['id']) && $is_instance_of['mainsnak']['datavalue']['value']['id'] === 'Q4167410') {
throw new NodaWikidataFetcherDisambiguationIsDisallowedException("Loading wikidata disambiguation pages is disallowed");
}
}
}
return $output;
}
@ -258,237 +259,21 @@ final class NodaWikidataFetcher {
}
/**
* Cleans basic tags off Wikidata input.
* Cleans remaining HTML elements and leading, trailing whitespaces.
*
* @param string $input Input string.
*
* @return string
*/
private static function _cleanWikidataInputHtml(string $input):string {
private static function _cleanInputSimple(string $input):string {
// Clean off anything before first <p>
if ($pStartPos = strpos($input, '<p')) {
$input = substr($input, $pStartPos);
}
if ($pEndPos = strrpos($input, '</p>')) {
$input = substr($input, 0, $pEndPos + 4);
}
$doc = new DOMDocument();
try {
$doc->loadXML('<section>' . trim($input) . '</section>');
}
catch (Exception $e) {
throw new Exception("Failed to load DOMDocument." . PHP_EOL . $e->getMessage() . PHP_EOL . PHP_EOL . '---' . $input . '---');
}
$list = $doc->getElementsByTagName("style");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
$list = $doc->getElementsByTagName("table");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
$list = $doc->getElementsByTagName("ol");
while ($list->length > 0) {
$p = $list->item(0);
if ($p === null || $p->parentNode === null) break;
$p->parentNode->removeChild($p);
}
if (($firstP = $doc->getElementsByTagName("p")->item(0)) !== null) {
if (($firstPhtml = $doc->saveHTML($firstP)) !== false) {
if (strpos($firstPhtml, 'geohack') !== false) {
if ($firstP->parentNode !== null) $firstP->parentNode->removeChild($firstP);
}
}
}
$output = [];
foreach ($doc->getElementsByTagName("p") as $p) {
$output[] = trim($p->textContent);
}
/*
if (strpos($doc->saveHTML(), 'Coordinates:') !== false) {
echo $doc->saveHTML();
exit;
}
*/
return str_replace(PHP_EOL, PHP_EOL . PHP_EOL, trim(implode(PHP_EOL, $output)));
}
/**
* Cleans brackets ([1], [2]) off description text.
*
* @param string $input Input string.
*
* @return string
*/
private static function _cleanSourceBracketsOffTranslation(string $input):string {
$bracketsToRemove = [];
for ($i = 0; $i < 100; $i++) {
$bracketsToRemove["[$i]"] = "";
}
return strtr($input, $bracketsToRemove);
}
/**
* Cleans contents parsed from Wikipedia.
*
* @param string $input Input string.
*
* @return string
*/
private static function _cleanWikidataInput(string $input):string {
$input = trim($input, '"');
foreach (self::WIKIPEDIA_REMOVE_LITERALS as $tToRemove) $input = str_replace($tToRemove, "", $input);
if (substr($input, 0, strlen('<')) === '<') {
$input = self::_cleanWikidataInputHtml($input);
if (mb_strlen($input) > 600) {
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
$input = substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600));
}
}
$input = self::_cleanSourceBracketsOffTranslation($input);
$input = str_replace("\t", " ", $input);
// Remove newlines with ensuing spaces
while (strpos($input, PHP_EOL . " ") !== false) {
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
}
// Remove double newlines
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
}
return MD_STD_IN::sanitize_text($input);
}
$input = str_replace(PHP_EOL, '', $input);
if (empty($input)) return "";
// Remove infobox tables specifically
$firstParagraphPosition = strpos($input, '<p', 1);
$currentSearchPos = strpos($input, "<table>");
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
if (($tableEndPos = strpos($input, "</table>")) !== false) {
if (($pStartPos = strpos($input, '<p', $tableEndPos + 6)) !== false) {
$input = substr($input, $pStartPos);
}
}
}
// Remove leftover unnecessary paragraphs before actual content
$removeFirstParagraph = false;
$firstParagraphPosition = strpos($input, '<p', 1);
foreach (["</table>", "<img"] as $tagPart) {
$currentSearchPos = strpos($input, $tagPart);
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
$removeFirstParagraph = true;
break;
}
}
if ($removeFirstParagraph === true) {
$input = substr($input, $firstParagraphPosition ?: 0);
}
$input = str_replace('</p>', '</p>' . PHP_EOL . PHP_EOL . PHP_EOL, $input);
# $input = str_replace('?/i', '', $input);
$input = strip_tags($input);
# for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input);
$i = 0;
while (strpos($input, ".mw-parser-output") !== false and strpos($input, "}", strpos($input, ".mw-parser-output")) !== false) {
$part1 = substr($input, 0, strpos($input, ".mw-parser-output"));
$part2 = substr($input, strpos($input, "}", strpos($input, ".mw-parser-output")) + 1);
$input = $part1 . $part2;
++$i;
if ($i === 30) break;
}
$input = self::_cleanSourceBracketsOffTranslation($input);
$input = str_replace("\t", " ", $input);
// Remove double whitespaces
while (strpos($input, " ") !== false) {
$input = str_replace(" ", " ", $input);
}
// Remove newlines with ensuing spaces
while (strpos($input, PHP_EOL . " ") !== false) {
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
}
// Remove double newlines
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
}
$stableToRemove = [
"Vous pouvez partager vos connaissances en laméliorant (comment ?) selon les recommandations des projets correspondants.",
];
foreach ($stableToRemove as $tToRemove) $input = str_replace($tToRemove, "", $input);
$endings = [
"StubDenne artikel om et vandløb ",
];
foreach ($endings as $ending) {
if (strpos($input, $ending) !== false) $input = substr($input, 0, strpos($input, $ending));
}
$input = trim($input);
// Cut off overly long articles
if (mb_strlen($input) > 600) {
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
$input = trim(substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600)));
}
}
if (empty($input)) return '';
$input = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $input));
$input = html_entity_decode($input);
return MD_STD_IN::sanitize_text($input);
}
/**
* Wrapper around _cleanWikidataInput for testing.
*
* @param string $input Input string.
*
* @return string
*/
public static function cleanWikidataInput(string $input):string {
if (PHP_SAPI !== 'cli') throw new Exception("Use this function only for testing");
return self::_cleanWikidataInput($input);
return strtr(
trim(MD_STD_IN::sanitize_text($input)),
[
PHP_EOL => PHP_EOL . PHP_EOL,
PHP_EOL . PHP_EOL . PHP_EOL => PHP_EOL . PHP_EOL,
]
);
}
@ -740,7 +525,7 @@ final class NodaWikidataFetcher {
$languagesToFetch = $wikilinks = [];
foreach ($checkagainstLanguage as $lang) {
if (empty($data['labels'][$lang])) {
if (empty($data['labels']) || empty($data['labels'][$lang])) {
continue;
}
@ -795,23 +580,20 @@ final class NodaWikidataFetcher {
$wikilink = $wikilinks[$lang];
if (!empty($contents[$lang])) {
$descFromWiki = json_decode($contents[$lang], true)['parse']['text']['*'];
# Process data retrieved from wikipedia
if ($descFromWiki !== null) $tDescription = (string)$descFromWiki;
else $tDescription = "";
$titleFromWikipedia = $data['sitelinks'][$lang . 'wiki']['title'];
$tDescription = self::_getCleanedWikipediaSnippet($lang, $titleFromWikipedia);
}
else {
$tDescription = "";
}
if ($tDescription !== '' && !empty($desc_cleaned = self::_cleanWikidataInput($tDescription))) {
if (!empty($titleFromWikipedia) && !empty($tDescription)) {
# $descs[$lang] = $tDescription;
$output[$lang] = [
'label' => self::_cleanWikidataInput((string)$data['labels'][$lang]['value']),
'description' => '"' . $desc_cleaned . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')',
'label' => $titleFromWikipedia,
'description' => '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')',
'link' => $wikilink,
];
}
@ -819,8 +601,8 @@ final class NodaWikidataFetcher {
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
$output[$lang] = [
'label' => self::_cleanWikidataInput($data['labels'][$lang]['value']),
'description' => self::_cleanWikidataInput($data['descriptions'][$lang]['value']),
'label' => self::_cleanInputSimple($data['labels'][$lang]['value']),
'description' => self::_cleanInputSimple($data['descriptions'][$lang]['value']),
'link' => "",
];
@ -831,8 +613,8 @@ final class NodaWikidataFetcher {
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
$output[$lang] = [
'label' => self::_cleanWikidataInput($data['labels'][$lang]['value']),
'description' => self::_cleanWikidataInput($data['descriptions'][$lang]['value']),
'label' => self::_cleanInputSimple($data['labels'][$lang]['value']),
'description' => self::_cleanInputSimple($data['descriptions'][$lang]['value']),
'link' => "",
];
@ -1026,6 +808,7 @@ final class NodaWikidataFetcher {
$wikidata_gender = "female";
break;
case "Q48270":
case "Q207959": // Androgyny
$wikidata_gender = "other";
break;
default:
@ -1047,6 +830,51 @@ final class NodaWikidataFetcher {
}
/**
* Function for retrieving information.
*
* @param string $lang The user's selected used language.
* @param array<mixed> $data Data fetched from wikidata.
* @param array<string, array{url: string, title: string}> $wikilinks Links to wikipedia APIs.
*
* @return array{}|array{lang: string, desc: string, source: 'wikidata'|'wikipedia'}
*/
private static function _getDescriptionFromWikidataAndWikipediaLinks(string $lang, array $data, array $wikilinks):array {
// Try the current user language for retrieving wikipedia texts
if (isset($wikilinks[$lang])) {
# Process data retrieved from wikipedia
if (!empty($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$lang]['title']))) {
return ['lang' => $lang, 'desc' => $datafromwiki, 'source' => 'wikipedia'];
}
}
// Try the alternative languages for retrieving wikidata tests
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
if ($lang === $cur_lang || !isset($wikilinks[$cur_lang])) continue;
if ($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$cur_lang]['title'])) {
return ['lang' => $cur_lang, 'desc' => $datafromwiki, 'source' => 'wikipedia'];
}
}
// If the description still has not been entered, try retrieving it from wikidata.
if (!empty($data['descriptions'][$lang])) {
return ['lang' => $lang, 'desc' => $data['descriptions'][$lang]['value'], 'source' => 'wikidata'];
}
else if (!empty($data['descriptions'])) {
$tLang = (string)array_keys($data['descriptions'])[0];
$desc = $data['descriptions'][$tLang];
return ['lang' => $tLang, 'desc' => (string)$desc['value'], 'source' => 'wikidata'];
}
return [];
}
/**
* Function for retrieving information.
*
@ -1059,30 +887,13 @@ final class NodaWikidataFetcher {
*/
public function retrievePersinstInfoFromWikidataID(string $lang, string $wikidata_id, int $persinst_id, string $erfasst_von) {
self::validateWikidataId($wikidata_id);
$data = self::_getWikidataEntity($wikidata_id);
$data = self::getWikidataEntity($wikidata_id);
// Get links to wikipedia
$wikilinks = self::_getWikipediaLinksFromWikidataOutput($data);
$alreadyEntered = false;
if (isset($wikilinks[$lang])) {
# Process data retrieved from wikipedia
if (!empty($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$lang]['title']))) {
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $lang, $lang, $erfasst_von);
}
}
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
if ($alreadyEntered === true || !isset($wikilinks[$cur_lang])) continue;
if ($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$cur_lang]['title'])) {
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $lang, "$cur_lang", $erfasst_von);
}
if (!empty($desc = self::_getDescriptionFromWikidataAndWikipediaLinks($lang, $data, $wikilinks))) {
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $desc['desc'], $lang, $desc['lang'], $erfasst_von);
}
$this->enterPersinstBirthDeathDatesFromWikidata($data, $persinst_id);
@ -1110,8 +921,7 @@ final class NodaWikidataFetcher {
*/
public function retrievePersinstNormDataLinksFromWikidataID(string $wikidata_id, int $persinst_id, string $erfasst_von) {
self::validateWikidataId($wikidata_id);
$data = self::_getWikidataEntity($wikidata_id);
$data = self::getWikidataEntity($wikidata_id);
if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('persinst', $wikidata_id, $data))) {
NodaBatchInserter::linkNodaForPersinst($this->_mysqli_noda, $persinst_id, $nodaLinks, $erfasst_von);
}
@ -1129,8 +939,7 @@ final class NodaWikidataFetcher {
*/
public function retrievePlaceNormDataLinksFromWikidataID(string $wikidata_id, int $onum, string $erfasst_von) {
self::validateWikidataId($wikidata_id);
$data = self::_getWikidataEntity($wikidata_id);
$data = self::getWikidataEntity($wikidata_id);
if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('place', $wikidata_id, $data))) {
NodaBatchInserter::linkNodaForPlace($this->_mysqli_noda, $onum, $nodaLinks, $erfasst_von);
}
@ -1279,7 +1088,6 @@ final class NodaWikidataFetcher {
$updateStmt->execute();
}
catch (MDMysqliInvalidEncodingError $e) {
$_SESSION["editHistory"] = ["changesStored", "Error adding base description"];
}
$updateStmt->close();
unset($updateStmt);
@ -1355,8 +1163,7 @@ final class NodaWikidataFetcher {
*/
public function retrievePlaceInfoFromWikidataID(string $lang, string $wikidata_id, int $onum, string $erfasst_von) {
self::validateWikidataId($wikidata_id);
$data = self::_getWikidataEntity($wikidata_id);
$data = self::getWikidataEntity($wikidata_id);
$wikilinks = self::_getWikipediaLinksFromWikidataOutput($data);
@ -1368,30 +1175,8 @@ final class NodaWikidataFetcher {
}
$cur_place_desc = $this->getPlaceDescription($onum);
$alreadyEntered = false;
if (!empty($wikilinks[$lang])) {
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinks[$lang]['title']), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
$alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $lang, $lang, $onum, $erfasst_von);
}
}
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
//if ($alreadyEntered === true) break;
if ($alreadyEntered === true) break;
if (!isset($wikilinks[$cur_lang]['url'])) continue;
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinks[$cur_lang]['title']), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
$alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $lang, $cur_lang, $onum, $erfasst_von);
}
if (!empty($desc = self::_getDescriptionFromWikidataAndWikipediaLinks($lang, $data, $wikilinks))) {
$this->enterPlaceDescFromWikidata($cur_place_desc, $desc['desc'], $lang, $desc['lang'], $onum, $erfasst_von);
}
if (isset($data['claims']['P1566'])) $geonames_id = filter_var($data['claims']['P1566'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT);
@ -1589,37 +1374,12 @@ final class NodaWikidataFetcher {
*/
public function retrieveTagInfoFromWikidataID(string $lang, string $wikidata_id, int $tag_id, string $erfasst_von) {
self::validateWikidataId($wikidata_id);
$data = self::_getWikidataEntity($wikidata_id);
$data = self::getWikidataEntity($wikidata_id);
$wikilinks = self::_getWikipediaLinksFromWikidataOutput($data);
$alreadyEntered = false;
if (isset($wikilinks[$lang])) {
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinks[$lang]['title']), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
# Process data retrieved from wikipedia
if (!empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $lang, $lang, $erfasst_von);
}
}
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
if ($alreadyEntered === true || !isset($wikilinks[$cur_lang])) continue;
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinks[$cur_lang]['title']), 10000);
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
# Process data retrieved from wikipedia
if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) {
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $lang, $cur_lang, $erfasst_von);
}
if (!empty($desc = self::_getDescriptionFromWikidataAndWikipediaLinks($lang, $data, $wikilinks))) {
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $desc['desc'], $lang, $desc['lang'], $erfasst_von);
}
if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('tag', $wikidata_id, $data))) {

View File

@ -0,0 +1,27 @@
<?PHP
/**
* This file contains an exception class to be thrown if a user attempts to load
* data from a Wikidata item specifically established for a disambiguation page.
*
* @file
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
declare(strict_types = 1);
/**
* Exception class to be thrown if a user attempts to load
* data from a Wikidata item specifically established for a disambiguation page.
*/
final class NodaWikidataFetcherDisambiguationIsDisallowedException extends MDgenericInvalidInputsException {
/**
* Error message.
*
* @return string
*/
public function errorMessage() {
//error message
return 'Attempted to load a disambiguation page. Please select the specific item you want to fetch to enrich the given entry: ' . $this->getMessage();
}
}

View File

@ -11,7 +11,7 @@ declare(strict_types = 1);
*/
final class NodaPersinstFulltextSyncManticore {
const FULL_SYNC_COMMIT_AFTER = 30000;
private const FULL_SYNC_COMMIT_AFTER = 30000;
/**
* Returns all names and descriptions in the different languages of a actor.
@ -188,6 +188,10 @@ final class NodaPersinstFulltextSyncManticore {
$mysqli_manticore->commit();
if (PHP_SAPI === 'cli' && $mysqli_noda->ping() === false) {
$mysqli_noda->reconnect();
}
// Sync translations
$result = $mysqli_noda->do_read_query("SELECT `persinst`.`persinst_id`, `trans_language`,

View File

@ -11,7 +11,7 @@ declare(strict_types = 1);
*/
final class NodaTagFulltextSyncManticore {
const FULL_SYNC_COMMIT_AFTER = 30000;
private const FULL_SYNC_COMMIT_AFTER = 30000;
/**
* Returns all names and descriptions in the different languages of a tag.
@ -139,6 +139,10 @@ final class NodaTagFulltextSyncManticore {
/**
* Synchronizes base entries.
*
* @param MDMysqli $mysqli_noda Connection to MySQL DB.
* @param MDMysqli $mysqli_manticore Connection to Manticore DB.
* @param string $databasename Name of the main noda database.
*
* @return void
*/
public static function runFullSyncForBaseEntries(MDMysqli $mysqli_noda, MDMysqli $mysqli_manticore, string $databasename):void {
@ -189,6 +193,10 @@ final class NodaTagFulltextSyncManticore {
/**
* Synchronizes translated entries.
*
* @param MDMysqli $mysqli_noda Connection to MySQL DB.
* @param MDMysqli $mysqli_manticore Connection to Manticore DB.
* @param string $databasename Name of the main noda database.
*
* @return void
*/
public static function runFullSyncForTranslatedEntries(MDMysqli $mysqli_noda, MDMysqli $mysqli_manticore, string $databasename):void {

View File

@ -8,6 +8,7 @@
enum NodaTimeAutotranslaterLocales {
case ar;
case crh;
case de;
case en;
case es;
@ -40,6 +41,7 @@ enum NodaTimeAutotranslaterLocales {
return match($lang) {
'ar' => static::ar,
'crh' => static::crh,
'de' => static::de,
'en' => static::en,
'es' => static::es,
@ -73,6 +75,7 @@ enum NodaTimeAutotranslaterLocales {
return match($this) {
self::ar => 'ar_SY.utf8',
self::crh => 'uk_UA.utf8',
self::de => 'de_DE.utf8',
self::en => 'en_US.utf8',
self::es => 'es_ES.utf8',
@ -108,6 +111,7 @@ enum NodaTimeAutotranslaterLocales {
return match($this) {
self::ar => 'ar-SY',
self::crh => 'uk-UA',
self::de => 'de-DE',
self::en => 'en-US',
self::es => 'es-ES',
@ -143,6 +147,7 @@ enum NodaTimeAutotranslaterLocales {
return match($this) {
self::ar => '%s',
self::crh => '%s',
self::de => '%s n. Chr.',
self::en => '%s CE',
self::es => '%s d.C.',
@ -176,6 +181,7 @@ enum NodaTimeAutotranslaterLocales {
return match($this) {
self::ar => '-%s',
self::crh => '%s рік до нашої ери',
self::de => '%s v. Chr.',
self::en => '%s BC',
self::es => '%s a.C.',
@ -211,6 +217,7 @@ enum NodaTimeAutotranslaterLocales {
return match($this) {
self::ar => '%s',
self::crh => '%s',
self::de => '%s',
self::en => '%s',
self::es => '%s',
@ -244,6 +251,7 @@ enum NodaTimeAutotranslaterLocales {
return match($this) {
self::ar => '%s-%s',
self::crh => '%s-%s',
self::de => '%s-%s',
self::en => '%s-%s',
self::es => '%s-%s',
@ -279,6 +287,7 @@ enum NodaTimeAutotranslaterLocales {
return match($this) {
self::ar => '%s-',
self::crh => 'з %s року',
self::de => 'Seit %s',
self::en => 'Since %s',
self::es => 'Desde %s',
@ -315,6 +324,7 @@ enum NodaTimeAutotranslaterLocales {
return match($this) {
self::ar => '%s-',
self::crh => 'після %s року',
self::de => 'Nach %s',
self::en => 'After %s',
self::es => 'Despues de %s',
@ -350,6 +360,7 @@ enum NodaTimeAutotranslaterLocales {
return match($this) {
self::ar => '-%s',
self::crh => 'до %s року',
self::de => 'Bis %s',
self::en => 'Until %s',
self::es => 'Hasta %s',
@ -384,6 +395,7 @@ enum NodaTimeAutotranslaterLocales {
return match($this) {
self::ar => 'القرن ال %s',
self::crh => '%s століття',
self::de => '%s. Jahrhundert',
self::en => '%s. century',
self::es => 'Siglo %s',
@ -418,6 +430,7 @@ enum NodaTimeAutotranslaterLocales {
return match($this) {
self::ar => 'القرن ال %s-%s',
self::crh => '%s-%s століття',
self::de => '%s.-%s. Jahrhundert',
self::en => '%s.-%s. century',
self::es => 'Siglo %s-%s',
@ -452,6 +465,7 @@ enum NodaTimeAutotranslaterLocales {
return match($this) {
self::ar => '%s-%s',
self::crh => '%s-ті роки',
self::de => '%ser Jahre',
self::en => '%ss',
self::es => '%s-%s',
@ -486,6 +500,7 @@ enum NodaTimeAutotranslaterLocales {
return match($this) {
self::ar => '%s-%s',
self::crh => '%s-%s-ті роки',
self::de => '%s-%ser Jahre',
self::en => '%s-%ss',
self::es => '%s-%s',
@ -521,6 +536,7 @@ enum NodaTimeAutotranslaterLocales {
return match($this) {
self::ar => '-%s',
self::crh => 'до %s року',
self::de => 'Vor %s',
self::en => 'Before %s',
self::es => 'Antes de %s',
@ -558,6 +574,7 @@ enum NodaTimeAutotranslaterLocales {
# self::be => '%d.%B.%Y',
# self::bg => '%Y-%B-%d',
# self::ca => '%d/%m/%Y',
self::crh => '%d.%m.%Y',
# self::cs => '%d.%B.%Y',
# self::da => '%d-%m-%Y',
self::de => '%d.%m.%Y',
@ -618,6 +635,7 @@ enum NodaTimeAutotranslaterLocales {
# self::be => '%d.%B.%Y',
# self::bg => '%Y-%B-%d',
# self::ca => '%d/%m/%Y',
self::crh => 'dd.MM.Y',
# self::cs => '%d.%B.%Y',
# self::da => '%d-%m-%Y',
self::de => 'dd.MM.Y',
@ -679,6 +697,7 @@ enum NodaTimeAutotranslaterLocales {
# self::bg => '%Y-%B',
# self::ca => '%m/%Y',
# self::cs => '%B.%Y',
self::crh => '%m %Y',
# self::da => '%m-%Y',
self::de => '%B %Y',
# self::el => '%B %Y',
@ -735,6 +754,7 @@ enum NodaTimeAutotranslaterLocales {
# self::bg => 'Y-MMMM',
# self::ca => 'MM/Y',
# self::cs => 'MMMM.Y',
self::crh => 'MMMM Y',
# self::da => 'MM-Y',
self::de => 'MMMM Y',
# self::el => 'MMMM Y',

View File

@ -141,6 +141,35 @@ final class NodaIDGetterTest extends TestCase {
}
/**
* Returns a test actor name and life dates.
*
* @return array<array{0: string, 1: integer, 2: string, 3: string}>
*/
public static function persinstByNameAndLifeDatesProvider():array {
$mysqli = md_noda_mysqli_connect();
$result = $mysqli->do_read_query("SELECT `persinst_name_en`, `persinst_name`, `persinst_id`, `persinst_geburtsjahr`, `persinst_sterbejahr`
FROM `persinst`
WHERE INSTR(`persinst_name_en`, 'i')
AND `persinst_geburtsjahr` != ''
AND `persinst_sterbejahr` != ''
LIMIT 1");
if (!$cur = $result->fetch_row()) {
throw new Exception("Error");
}
$result->close();
$mysqli->close();
return [
'Persinst ID by name: ' . implode(' - ', $cur) => [
(string)$cur[0], (int)$cur[2], (string)$cur[3], (string)$cur[4],
(string)$cur[1], (int)$cur[2], (string)$cur[3], (string)$cur[4],
]
];
}
/**
* Test getting persinst by name works.
*
@ -156,6 +185,25 @@ final class NodaIDGetterTest extends TestCase {
}
/**
* Test getting persinst by name and life dates works.
*
* @param string $name Name of the entry.
* @param integer $expected_id Expected target ID.
* @param string $birth_year Birth year.
* @param string $death_year Death year.
*
* @return void
*/
#[DataProvider('persinstByNameAndLifeDatesProvider')]
public function testGetPersinstIdByNameAndLifeDatesWorks(string $name, int $expected_id, string $birth_year, string $death_year):void {
self::assertEquals($expected_id,
NodaIDGetter::getPersinstIDByNamePlusYears($this->_mysqli, "de", $name, $birth_year, $death_year),
"Entry " . $name . " is not matched in exact lookup. Expected ID: " . $expected_id);
}
// PersinstIDByRewrite
/**
@ -411,7 +459,8 @@ final class NodaIDGetterTest extends TestCase {
$mysqli = md_noda_mysqli_connect();
$timeByRewriteSimple = self::_getNameAndIdFromDbQuery($mysqli, "SELECT `input_name`, `zeit_id`
FROM `zeit_rewriting`
WHERE INSTR(`input_name`, 'i')");
WHERE INSTR(`input_name`, 'i')
AND `language` = 'de'");
$mysqli->close();
return [

View File

@ -6,12 +6,14 @@
*/
declare(strict_types = 1);
use PHPUnit\Framework\TestCase;
use PHPUnit\Framework\Attributes\CoversClass;
use PHPUnit\Framework\Attributes\Small;
/**
* This script contains tests for the actor name splitter.
*
* @covers \NodaNameSplitter
*/
#[Small]
#[CoversClass(\NodaIDGetter::class)]
final class NodaNameSplitterTest extends TestCase {
/**
* Test to check whether the HTML page is correctly generated.

View File

@ -6,12 +6,14 @@
*/
declare(strict_types = 1);
use PHPUnit\Framework\TestCase;
use PHPUnit\Framework\Attributes\CoversClass;
use PHPUnit\Framework\Attributes\Medium;
/**
* This script contains tests for the automatic translation class for time names.
*
* @covers \NodaTimeAutotranslater
*/
#[Medium]
#[CoversClass(\NodaIDGetter::class)]
final class NodaTimeAutotranslaterTest extends TestCase {
/**
* Test to check whether the HTML page is correctly generated.
@ -32,7 +34,7 @@ final class NodaTimeAutotranslaterTest extends TestCase {
"zeit_zaehlzeit_tag" => "01",
];
$output = NodaTimeAutotranslater::getTranslations($timeInfo);
self::assertEquals($output["de"], "01.05.1920");
self::assertEquals("01.05.1920", $output["de"]);
}
@ -671,4 +673,23 @@ final class NodaTimeAutotranslaterTest extends TestCase {
self::assertEquals($output["de"], "Vor 01.12.1919");
}
/**
* Test to check whether validating works.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
* @group ValidOutput
* @small
*
* @return void
*/
public function testValidation():void {
$output = [
'de' => '1.12.1920',
];
self::assertFalse(NodaTimeAutotranslater::validateTranslations("1919", "1919", $output));
}
}

File diff suppressed because it is too large Load Diff

View File

@ -6,19 +6,28 @@
*/
declare(strict_types = 1);
use PHPUnit\Framework\TestCase;
use PHPUnit\Framework\Attributes\CoversClass;
use PHPUnit\Framework\Attributes\Medium;
use PHPUnit\Framework\Attributes\DataProvider;
require_once __DIR__ . '/../../MDMysqli/test_connections.conf.php';
require_once __DIR__ . '/../src/NodaWikidataFetcherDisambiguationIsDisallowedException.php';
/**
* This script contains tests for the Wikidata fetcher.
*
* @covers \NodaWikidataFetcher
*/
#[medium]
#[CoversClass(\NodaWikidataFetcher::class)]
final class NodaWikidataFetcherTest extends TestCase {
// Test for getting translations: Telugu
public const TEST_LANG = 'te';
/**
* Test to check whether the HTML page is correctly generated.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
* @group ValidOutput
* @small
*
* @return void
*/
@ -29,12 +38,39 @@ final class NodaWikidataFetcherTest extends TestCase {
}
/**
* Data provider providing a Wikidata ID for a dedicated wikidata item for disambiguation pages.
*
* @return array<string, array{0: string}>
*/
public static function disambiguationPageProvider():array {
return [
'Disambiguation page for "Mochi" - Q6916210' => ['Q6916210'],
];
}
/**
* Throw error when attempting to load a dedicated wikidata entry for a disambiguation page.
*
* @param string $wikidata_id Wikidata ID.
*
* @return void
*/
#[DataProvider('disambiguationPageProvider')]
public function testWikidataIdFromLinkFailsForDisambiguationPages(string $wikidata_id):void {
self::expectException(NodaWikidataFetcherDisambiguationIsDisallowedException::class);
NodaWikidataFetcher::getWikidataEntity($wikidata_id);
}
/**
* Test to check whether the HTML page is correctly generated.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
* @group ValidOutput
* @small
*
* @return void
*/
@ -47,9 +83,7 @@ final class NodaWikidataFetcherTest extends TestCase {
/**
* Test to check whether the HTML page is correctly generated.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
* @group ValidOutput
* @small
*
* @return void
*/
@ -60,225 +94,208 @@ final class NodaWikidataFetcherTest extends TestCase {
}
/**
* Test for cleaning wikidata info.
* Data provider for an actor that has a wikidata link and a Telugu translation.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
* @group ValidOutput
* @small
*
* @return void
* @return array<string, array{0: int, 1: string}>
*/
public function testCleanWikidataInput():void {
public static function actorWithTlAndWikidataLinkProvider():array {
$testStr = '"<div class="mw-parser-output"><table class="infobox float-right toccolours toptextcells" style="margin: 0 0 1em 1em; width: 300px;" id="Vorlage_Infobox_Ort_in_der_Ukraine" summary="Infobox Ort in der Ukraine">
$mysqli = md_main_mysqli_connect();
<tbody><tr>
<td colspan="2" style="background-color:#AFD6FF; font-size:1.3em; font-weight:bold; text-align:center;">Werbowez (Kossiw)
</td></tr>
<tr>
<td colspan="2" style="background-color:#FFC; font-size:1em; font-weight:bold; text-align:center;"><span lang="uk-Cyrl" class="Cyrl">Вербовець</span>
</td></tr>
$result = $mysqli->do_read_query("SELECT `persinst_id`, `noda_nrinsource`
FROM `" . DATABASENAME_NODA . "`.`noda`
WHERE `noda_source` = 'Wikidata'
AND EXISTS (SELECT 1 FROM `" . DATABASENAME_NODA . "`.`persinst_translation`
WHERE `persinst_translation`.`persinst_id` = `noda`.`persinst_id`
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "')");
if (!$cur = $result->fetch_row()) {
throw new Exception("Failed to identify an entry that has a wikidata entry and a translation for language " . self::TEST_LANG);
}
$result->close();
$mysqli->close();
<tr style="height:120px; background-color:#FFF;">
<td style="width: 130px; text-align:center;"><span typeof="mw:File"><a href="/wiki/Datei:Coats_of_arms_of_None.svg" class="mw-file-description" title="Wappen fehlt"><img alt="Wappen fehlt" src="//upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Coats_of_arms_of_None.svg/100px-Coats_of_arms_of_None.svg.png" decoding="async" width="100" height="120" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Coats_of_arms_of_None.svg/150px-Coats_of_arms_of_None.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Coats_of_arms_of_None.svg/200px-Coats_of_arms_of_None.svg.png 2x" data-file-width="125" data-file-height="150" /></a></span>
</td>
<td style="width: 170px; text-align:center;"><table class="centered" style="background-color: #f9f9f9; border: none; border-collapse: collapse; width: 1px;">
<tbody><tr><td style="border: none; padding: 0; text-align: center;"><div style="position: relative; z-index: 0; padding: 0; display: inline-block; width: -webkit-max-content; width: -moz-max-content; width: max-content; border: none;"><figure class="mw-halign-center noviewer notpageimage" typeof="mw:File"><a href="/wiki/Datei:Ukraine_adm_location_map.svg" class="mw-file-description" title="Werbowez (Kossiw) (Ukraine)"><img alt="Werbowez (Kossiw) (Ukraine)" src="//upload.wikimedia.org/wikipedia/commons/thumb/7/78/Ukraine_adm_location_map.svg/180px-Ukraine_adm_location_map.svg.png" decoding="async" width="180" height="121" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/7/78/Ukraine_adm_location_map.svg/270px-Ukraine_adm_location_map.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/7/78/Ukraine_adm_location_map.svg/360px-Ukraine_adm_location_map.svg.png 2x" data-file-width="1546" data-file-height="1038" /></a><figcaption>Werbowez (Kossiw) (Ukraine)</figcaption></figure><div style="position:absolute; top:50.7%; left:18.9%; height:0; width:0;"><div style="position:relative;z-index:100;left:-4px;top:-4px;width:8px;height:8px;line-height:0px;"><span typeof="mw:File"><a href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&amp;language=de&amp;params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)&amp;title=Werbowez+%28Kossiw%29" title="Werbowez (Kossiw) (48° 20 32″ N, 25° 8 0″O)"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/8px-ButtonRed.svg.png" decoding="async" width="8" height="8" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/12px-ButtonRed.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/16px-ButtonRed.svg.png 2x" data-file-width="480" data-file-height="480" /></a></span></div>
<table style="font-size:90%; border:none; background-color:transparent; border-collapse:collapse; line-height:1em; position:absolute; width:6em; margin: 0 .2em; text-align:left; left:1px; bottom:1px;"><tbody><tr><td style="border:none; vertical-align:middle;"><span style="position:relative; z-index:9; background-color:none;">Werbowez (Kossiw) </span></td></tr></tbody></table></div></div></td></tr>
</tbody></table>
</td></tr>
<tr style="background-color:#AFD6FF;">
<th colspan="2">Basisdaten
</th></tr>
<tr>
<td><a href="/wiki/Liste_der_Oblaste_der_Ukraine" title="Liste der Oblaste der Ukraine">Oblast</a>:</td>
<td><a href="/wiki/Oblast_Iwano-Frankiwsk" title="Oblast Iwano-Frankiwsk">Oblast Iwano-Frankiwsk</a>
</td></tr>
<tr>
<td><a href="/wiki/Liste_der_Rajone_der_Ukraine" title="Liste der Rajone der Ukraine">Rajon</a>:</td>
<td><a href="/wiki/Rajon_Kossiw" title="Rajon Kossiw">Rajon Kossiw</a>
</td></tr>
<tr>
<td><a href="/wiki/H%C3%B6he_%C3%BCber_dem_Meeresspiegel" title="Höhe über dem Meeresspiegel">Höhe</a>:</td>
<td>369 m
</td></tr>
<tr>
<td><a href="/wiki/Fl%C3%A4cheninhalt" title="Flächeninhalt">Fläche</a>:</td>
<td>18,77 <a href="/wiki/Quadratmeter#Quadratkilometer" title="Quadratmeter">km²</a>
</td></tr>
<tr>
<td><a href="/wiki/Einwohner" title="Einwohner">Einwohner</a>:</td>
<td>3.395 <small><i>(2001)</i></small>
</td></tr>
<tr>
<td><a href="/wiki/Bev%C3%B6lkerungsdichte" title="Bevölkerungsdichte">Bevölkerungsdichte</a>:
</td>
<td>181 Einwohner je km²
</td></tr>
<tr>
<td><a href="/wiki/Postleitzahl" title="Postleitzahl">Postleitzahlen</a>:</td>
<td>78605
</td></tr>
<tr>
<td><a href="/wiki/Telefonvorwahl" title="Telefonvorwahl">Vorwahl</a>:</td>
<td>+380 3478
</td></tr>
<tr>
<td><a href="/wiki/Geographische_Koordinaten" title="Geographische Koordinaten">Geographische Lage</a>:</td>
<td><span id="text_coordinates" class="coordinates plainlinks-print"><a class="external text" href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&amp;language=de&amp;params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)"><span title="Breitengrad">48°&#160;21&#160;<abbr title="Nord">N</abbr></span>, <span title="Längengrad">25°&#160;8&#160;<abbr title="Ost">O</abbr></span></a></span><span class="geo noexcerpt" style="display:none"><span class="body"></span><span class="latitude">48.342222222222</span><span class="longitude">25.133333333333</span><span class="elevation"></span></span><span id="coordinates" class="coordinates noprint"><span title="Koordinatensystem WGS84">Koordinaten: </span><a class="external text" href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&amp;language=de&amp;params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)"><span title="Breitengrad">48°&#160;20&#160;32″&#160;<abbr title="Nord">N</abbr></span>, <span title="Längengrad">25°&#160;8&#160;0″&#160;<abbr title="Ost">O</abbr></span></a></span>
</td></tr>
<tr>
<td><a href="/wiki/KATOTTH" title="KATOTTH">KATOTTH</a>:
</td>
<td>UA26100010030094355
</td></tr>
<tr>
<td><a href="/wiki/KOATUU" title="KOATUU">KOATUU</a>:
</td>
<td>2623682401
</td></tr>
<tr>
<td><a href="/wiki/Verwaltungsgliederung_der_Ukraine" title="Verwaltungsgliederung der Ukraine">Verwaltungsgliederung</a>:
</td>
<td>1 Dorf
</td></tr>
<tr>
<td>Adresse:
</td>
<td>вул. Миру, буд. 15<br />78605 с. Вербовець
</td></tr>
<tr>
<td><a href="/wiki/Website" title="Website">Website</a>:
</td>
<td><a rel="nofollow" class="external text" href="http://verbovets.kosiv.net/">Offizielle Webseite</a>
</td></tr>
<tr>
<td colspan="2" style="padding-bottom:3px; text-align:center; border-bottom:1px solid #bbb; border-top:1px solid #bbb;"><a rel="nofollow" class="external text" href="http://w1.c1.rada.gov.ua/pls/z7503/A005?rdat1=31.08.2023&amp;rf7571=13801">Statistische Informationen</a>
</td></tr>
<tr>
<td colspan="2" style="padding-bottom:3px; text-align:center; border-bottom:1px solid #bbb; border-top:1px solid #bbb;">
<table class="centered" style="background-color: #f9f9f9; border: none; border-collapse: collapse; width: 1px;">
<tbody><tr><td style="border: none; padding: 0; text-align: center;"><div style="position: relative; z-index: 0; padding: 0; display: inline-block; width: -webkit-max-content; width: -moz-max-content; width: max-content; border: none;"><figure class="mw-halign-center noviewer notpageimage" typeof="mw:File"><a href="/wiki/Datei:Ivano-Frankivsk_location_map.svg" class="mw-file-description" title="Werbowez (Kossiw) (Oblast Iwano-Frankiwsk)"><img alt="Werbowez (Kossiw) (Oblast Iwano-Frankiwsk)" src="//upload.wikimedia.org/wikipedia/commons/thumb/8/8e/Ivano-Frankivsk_location_map.svg/290px-Ivano-Frankivsk_location_map.svg.png" decoding="async" width="290" height="347" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/8/8e/Ivano-Frankivsk_location_map.svg/435px-Ivano-Frankivsk_location_map.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/8/8e/Ivano-Frankivsk_location_map.svg/580px-Ivano-Frankivsk_location_map.svg.png 2x" data-file-width="533" data-file-height="637" /></a><figcaption>Werbowez (Kossiw) (Oblast Iwano-Frankiwsk)</figcaption></figure><div style="position:absolute; top:63.3%; left:74.4%; height:0; width:0;"><div style="position:relative;z-index:100;left:-4px;top:-4px;width:8px;height:8px;line-height:0px;"><span typeof="mw:File"><a href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&amp;language=de&amp;params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)&amp;title=Werbowez+%28Kossiw%29" title="Werbowez (Kossiw) (48° 20 32″ N, 25° 8 0″O)"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/8px-ButtonRed.svg.png" decoding="async" width="8" height="8" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/12px-ButtonRed.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/16px-ButtonRed.svg.png 2x" data-file-width="480" data-file-height="480" /></a></span></div>
<table style="font-size:90%; border:none; background-color:transparent; border-collapse:collapse; line-height:1em; position:absolute; width:6em; margin: 0 .2em; text-align:right; right:1px; bottom:1px;"><tbody><tr><td style="border:none; vertical-align:middle;"><span style="position:relative; z-index:9; background-color:none;">Werbowez (Kossiw) </span></td></tr></tbody></table></div></div></td></tr>
</tbody></table><span style="display:none;"><a href="/w/index.php?title=Vorlage:Positionskarte_ISO_3166-2/Wartung/noregion&amp;action=edit&amp;redlink=1" class="new" title="Vorlage:Positionskarte ISO 3166-2/Wartung/noregion (Seite nicht vorhanden)">i1</a></span>
</td></tr></tbody></table>
<p><b>Werbowez</b> (<b><span style="font-style:normal;font-weight:normal"><a href="/wiki/Ukrainische_Sprache" title="Ukrainische Sprache">ukrainisch</a></span> <span lang="uk-Cyrl" class="Cyrl" style="font-style:normal">Вербовець</span></b>; <span style="font-style:normal;font-weight:normal"><a href="/wiki/Russische_Sprache" title="Russische Sprache">russisch</a></span> <span lang="ru-Cyrl" class="Cyrl" style="font-style:normal">Вербовец</span>, <a href="/wiki/Polnische_Sprache" title="Polnische Sprache">polnisch</a> <span lang="pl" style="font-style:italic;font-weight:normal">Wierzbowiec</span>; <span style="font-style:normal;font-weight:normal"><a href="/wiki/Rum%C3%A4nische_Sprache" title="Rumänische Sprache">rumänisch</a></span> <span lang="ro-Latn" style="font-style:italic">Verboveț</span>) ist ein <a href="/wiki/Dorf" title="Dorf">Dorf</a> in der <a href="/wiki/Ukraine" title="Ukraine">ukrainischen</a> <a href="/wiki/Oblast_Iwano-Frankiwsk" title="Oblast Iwano-Frankiwsk">Oblast Iwano-Frankiwsk</a> mit etwa 3400 Einwohnern (2001).<sup id="cite_ref-1" class="reference"><a href="#cite_note-1">&#91;1&#93;</a></sup>
</p>
<figure class="mw-default-size mw-halign-left" typeof="mw:File/Thumb"><a href="/wiki/Datei:%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/220px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" decoding="async" width="220" height="147" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/330px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/440px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 2x" data-file-width="5184" data-file-height="3456" /></a><figcaption>Blick auf das Dorf</figcaption></figure>
<p>Das um 1650 erstmals schriftlich erwähnte Dorf<sup id="cite_ref-2" class="reference"><a href="#cite_note-2">&#91;2&#93;</a></sup> liegt im Osten der <a href="/wiki/Historische_Landschaft" title="Historische Landschaft">historischen Landschaft</a> <a href="/wiki/Galizien" title="Galizien">Galizien</a> am Ufer der <a href="/w/index.php?title=Rybnyzja_(Fluss)&amp;action=edit&amp;redlink=1" class="new" title="Rybnyzja (Fluss) (Seite nicht vorhanden)">Rybnyzja</a> (<span lang="uk-Cyrl" class="Cyrl">Рибниця</span>), einem 56&#160;km langen Nebenfluss des <a href="/wiki/Pruth" title="Pruth">Pruth</a> 7&#160;km nordöstlich vom Rajonzentrum <a href="/wiki/Kossiw" title="Kossiw">Kossiw</a> und 95&#160;km südlich vom Oblastzentrum <a href="/wiki/Iwano-Frankiwsk" title="Iwano-Frankiwsk">Iwano-Frankiwsk</a>. Südlich der Ortschaft verläuft die <a href="/wiki/Territorialstra%C3%9Fe" title="Territorialstraße">Territorialstraße</a> <i>T0909</i>.
</p><p>Am 12. Juni 2020 wurde das Dorf ein Teil der neu gegründeten <i>Stadtgemeinde <a href="/wiki/Kossiw" title="Kossiw">Kossiw</a></i> im <a href="/wiki/Rajon_Kossiw" title="Rajon Kossiw">Rajon Kossiw</a><sup id="cite_ref-3" class="reference"><a href="#cite_note-3">&#91;3&#93;</a></sup>, bis dahin bildete es zusammen mit dem Dorf <a href="/w/index.php?title=Staryj_Kossiw&amp;action=edit&amp;redlink=1" class="new" title="Staryj Kossiw (Seite nicht vorhanden)">Staryj Kossiw</a> (<span lang="uk-Cyrl" class="Cyrl">Старий Косів</span>) die <i>Landratsgemeinde Werbowez</i> (Вербовецька сільська рада/<i>Werbowezka silska rada</i>) im Osten des Rajons.
</p>
<ol class="references">
<li id="cite_note-1"><span class="mw-cite-backlink"><a href="#cite_ref-1">↑</a></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://w1.c1.rada.gov.ua/pls/z7503/A005?rf7571=13801">Ortswebseite</a> auf der offiziellen Webpräsenz der <a href="/wiki/Werchowna_Rada" title="Werchowna Rada">Werchowna Rada</a>; abgerufen am 14. November 2017 (ukrainisch)</span>
</li>
<li id="cite_note-2"><span class="mw-cite-backlink"><a href="#cite_ref-2">↑</a></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://ukrssr.com.ua/ifrank/kosivskiy/verbovets-kosivskiy-rayon-ivano-frankivska-oblast">Ortsgeschichte Werbowez</a> in der <a href="/wiki/Geschichte_der_St%C3%A4dte_und_D%C3%B6rfer_der_Ukrainischen_SSR" title="Geschichte der Städte und Dörfer der Ukrainischen SSR">Geschichte der Städte und Dörfer der Ukrainischen SSR</a>; abgerufen am 14. November 2017 (ukrainisch)</span>
</li>
<li id="cite_note-3"><span class="mw-cite-backlink"><a href="#cite_ref-3">↑</a></span> <span class="reference-text"><a rel="nofollow" class="external text" href="https://zakon.rada.gov.ua/laws/show/714-2020-%D1%80#Text">Кабінет Міністрів України Розпорядження від 12 червня 2020 р. № 714-р "Про визначення адміністративних центрів та затвердження територій територіальних громад Івано-Франківської області"</a></span>
</li>
</ol>
<!--
NewPP limit report
Parsed by mw1396
Cached time: 20230831121013
Cache expiry: 42588
Reduced expiry: true
Complications: []
CPU time usage: 0.219 seconds
Real time usage: 0.274 seconds
Preprocessor visited node count: 6414/1000000
Postexpand include size: 33611/2097152 bytes
Template argument size: 12317/2097152 bytes
Highest expansion depth: 34/100
Expensive parser function count: 9/500
Unstrip recursion depth: 0/20
Unstrip postexpand size: 1476/5000000 bytes
Lua time usage: 0.080/10.000 seconds
Lua memory usage: 3398800/52428800 bytes
Number of Wikibase entities loaded: 0/400
-->
<!--
Transclusion expansion time report (%,ms,calls,template)
100.00% 239.600 1 -total
93.55% 224.134 1 Vorlage:Infobox_Ort_in_der_Ukraine
50.81% 121.740 2 Vorlage:Positionskarte
49.72% 119.121 2 Vorlage:Positionskarte+
44.41% 106.401 2 Vorlage:Positionskarte~
33.28% 79.732 2 Vorlage:Positionskarte~*
25.69% 61.558 3 Vorlage:Lang
19.41% 46.499 1 Vorlage:Positionskarte_ISO_3166-2
16.90% 40.486 12 Vorlage:CoordinateLONG
14.02% 33.586 10 Vorlage:CoordinateLAT
-->
</div>" - (de.wikipedia.org 31.08.2023)';
$output = NodaWikidataFetcher::cleanWikidataInput($testStr);
$expected = 'Werbowez (ukrainisch Вербовець; russisch Вербовец, polnisch Wierzbowiec; rumänisch Verboveț) ist ein Dorf in der ukrainischen Oblast Iwano-Frankiwsk mit etwa 3400 Einwohnern (2001).';
self::assertTrue(
str_starts_with($output, $expected),
"Start of parsed Wikipedia text should be:" . PHP_EOL . PHP_EOL . $expected . PHP_EOL . PHP_EOL . 'Real start text is: ' . PHP_EOL . PHP_EOL . substr($output, 0, 250)
);
$output = NodaWikidataFetcher::cleanWikidataInput('<div class="mw-parser-output"><figure class="mw-default-size mw-halign-right" typeof="mw:File/Thumb"><a href="/wiki/File:%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/220px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" decoding="async" width="220" height="147" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/330px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/440px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 2x" data-file-width="5184" data-file-height="3456" /></a><figcaption></figcaption></figure>
<p><span style="font-size: small;"><span id="coordinates"><a href="/wiki/%E5%9C%B0%E7%90%86%E5%9D%90%E6%A0%87" class="mw-redirect" title="地理坐标">坐标</a><style data-mw-deduplicate="TemplateStyles:r65292569">.mw-parser-output .geo-default,.mw-parser-output .geo-dms,.mw-parser-output .geo-dec{display:inline}.mw-parser-output .geo-nondefault,.mw-parser-output .geo-multi-punct{display:none}.mw-parser-output .longitude,.mw-parser-output .latitude{white-space:nowrap}</style><span class="plainlinks nourlexpansion"><a class="external text" href="//geohack.toolforge.org/geohack.php?language=zh&amp;pagename=%E9%9F%8B%E7%88%BE%E5%8D%9A%E9%9F%8B%E9%BD%8A_(%E7%A7%91%E7%B4%A2%E5%A4%AB%E5%8D%80)&amp;params=48_20_32_N_25_8_0_E_scale:30000"><span class="geo-default"><span class="geo-dms" title="此地的地图、航拍照片和其他数据"><span class="latitude">48°2032″N</span> <span class="longitude">25°80″E</span></span></span><span class="geo-multi-punct">&#xfeff; / &#xfeff;</span><span class="geo-nondefault"><span class="geo-dec" title="此地的地图、航拍照片和其他数据">48.34222°N 25.13333°E</span><span style="display:none">&#xfeff; / <span class="geo">48.34222; 25.13333</span></span></span></a></span></span></span>
</p><p><b>韋爾博韋齊</b><a href="/wiki/%E7%83%8F%E5%85%8B%E8%98%AD%E8%AA%9E" class="mw-redirect" title="烏克蘭語">烏克蘭語</a><span lang="uk">Вербовець</span>),是<a href="/wiki/%E7%83%8F%E5%85%8B%E8%98%AD" class="mw-redirect" title="烏克蘭">烏克蘭</a>的村落,位於該國西部<a href="/wiki/%E4%BC%8A%E4%B8%87%E8%AF%BA-%E5%BC%97%E5%85%B0%E7%A7%91%E5%A4%AB%E6%96%AF%E5%85%8B%E5%B7%9E" title="伊万诺-弗兰科夫斯克州">伊萬諾-弗蘭科夫斯克州</a>,由<a href="/wiki/%E7%A7%91%E7%B4%A2%E5%A4%AB%E5%8D%80" class="mw-redirect" title="科索夫區">科索夫區</a>負責管轄始建於1456年面積18.77平方公里2001年人口3,395。
</p>
<!--
NewPP limit report
Parsed by mw1412
Cached time: 20230831132208
Cache expiry: 1814400
Reduced expiry: false
Complications: []
CPU time usage: 0.147 seconds
Real time usage: 0.186 seconds
Preprocessor visited node count: 48/1000000
Postexpand include size: 2084/2097152 bytes
Template argument size: 0/2097152 bytes
Highest expansion depth: 3/100
Expensive parser function count: 1/500
Unstrip recursion depth: 0/20
Unstrip postexpand size: 362/5000000 bytes
Lua time usage: 0.110/10.000 seconds
Lua memory usage: 15402517/52428800 bytes
Number of Wikibase entities loaded: 1/400
-->
<!--
Transclusion expansion time report (%,ms,calls,template)
100.00% 152.989 1 -total
70.07% 107.204 1 Template:Lang-uk
29.62% 45.313 1 Template:Coord
-->
</div>');
$expected = '韋爾博韋齊(烏克蘭語:Вербовець),是烏克蘭的村落,位於該國西部伊萬諾-弗蘭科夫斯克州由科索夫區負責管轄始建於1456年面積18.77平方公里2001年人口3,3';
self::assertTrue(
str_starts_with($output, $expected),
"Start of parsed Wikipedia text should be:" . PHP_EOL . PHP_EOL . $expected . PHP_EOL . PHP_EOL . 'Real start text is: ' . PHP_EOL . PHP_EOL . substr($output, 0, 250)
);
return [
'Actor with wikidata and translation' => [$cur[0], $cur[1]],
];
}
/**
* Test for cleaning wikidata info.
* Data provider for a place that has a wikidata link and a Telugu translation.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
* @group ValidOutput
* @small
* @return array<string, array{0: int, 1: string}>
*/
public static function placeWithTlAndWikidataLinkProvider():array {
$mysqli = md_main_mysqli_connect();
$result = $mysqli->do_read_query("SELECT `ort_id`, `noda_nrinsource`
FROM `" . DATABASENAME_NODA . "`.`noda_orte`
WHERE `noda_source` = 'Wikidata'
AND EXISTS (SELECT 1 FROM `" . DATABASENAME_NODA . "`.`ort_translation`
WHERE `ort_translation`.`ort_id` = `noda_orte`.`ort_id`
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "')");
if (!$cur = $result->fetch_row()) {
throw new Exception("Failed to identify an entry that has a wikidata entry and a translation for language " . self::TEST_LANG);
}
$result->close();
$mysqli->close();
return [
'Place with wikidata and translation' => [$cur[0], $cur[1]],
];
}
/**
* Data provider for an tag that has a wikidata link and a Telugu translation.
*
* @return array<string, array{0: int, 1: string}>
*/
public static function tagWithTlAndWikidataLinkProvider():array {
$mysqli = md_main_mysqli_connect();
$result = $mysqli->do_read_query("SELECT `tag_id`, `noda_nrinsource`
FROM `" . DATABASENAME_NODA . "`.`noda_tag`
WHERE `noda_source` = 'Wikidata'
AND EXISTS (SELECT 1 FROM `" . DATABASENAME_NODA . "`.`tag_translation`
WHERE `tag_translation`.`tag_id` = `noda_tag`.`tag_id`
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "')");
if (!$cur = $result->fetch_row()) {
throw new Exception("Failed to identify an entry that has a wikidata entry and a translation for language " . self::TEST_LANG);
}
$result->close();
$mysqli->close();
return [
'Tag with wikidata and translation' => [$cur[0], $cur[1]],
];
}
/**
* Test for fetching and recording translations for an actor.
*
* @param integer $actor_id Actor ID.
* @param string $wikidata_id Wikidata ID.
*
* @return void
*/
public function testCleanWikidataInputWithoutHtml():void {
#[DataProvider('actorWithTlAndWikidataLinkProvider')]
public function testFetchingTranslationForPersinst(int $actor_id, string $wikidata_id):void {
$output = NodaWikidataFetcher::cleanWikidataInput('Werbowez (ukrainisch Вербовець; russisch Вербовец, polnisch Wierzbowiec; rumänisch Verboveț) ist ein Dorf in der ukrainischen Oblast Iwano-Frankiwsk mit etwa 3400 Einwohnern (2001).[1]');
$expected = 'Werbowez (ukrainisch Вербовець; russisch Вербовец, polnisch Wierzbowiec; rumänisch Verboveț) ist ein Dorf in der ukrainischen Oblast Iwano-Frankiwsk mit etwa 3400 Einwohnern (2001).';
self::assertTrue(
str_starts_with($output, $expected),
"Start of parsed Wikipedia text should be:" . PHP_EOL . PHP_EOL . $expected . PHP_EOL . PHP_EOL . 'Real start text is: ' . PHP_EOL . PHP_EOL . substr($output, 0, 250)
);
$mysqli = md_main_mysqli_connect();
$mysqli->do_update_query("DELETE FROM `" . DATABASENAME_NODA . "`.`persinst_translation`
WHERE `persinst_id` = " . $actor_id . "
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'");
self::assertEquals(0, MDMysqliTesting::queryNumRows($mysqli, "
FROM `" . DATABASENAME_NODA . "`.`persinst_translation`
WHERE `persinst_id` = " . $actor_id . "
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
$data = NodaWikidataFetcher::getWikidataEntity($wikidata_id);
$fetcher = new NodaWikidataFetcher($mysqli);
$fetcher->getWikidataTranslationsForPersinst($data, $actor_id, [self::TEST_LANG]);
self::assertEquals(1, MDMysqliTesting::queryNumRows($mysqli, "
FROM `" . DATABASENAME_NODA . "`.`persinst_translation`
WHERE `persinst_id` = " . $actor_id . "
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
$mysqli->close();
}
/**
* Test for fetching and recording translations for an place.
*
* @param integer $place_id Place ID.
* @param string $wikidata_id Wikidata ID.
*
* @return void
*/
#[DataProvider('placeWithTlAndWikidataLinkProvider')]
public function testFetchingTranslationForPlace(int $place_id, string $wikidata_id):void {
$mysqli = md_main_mysqli_connect();
$mysqli->do_update_query("DELETE FROM `" . DATABASENAME_NODA . "`.`ort_translation`
WHERE `ort_id` = " . $place_id . "
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'");
self::assertEquals(0, MDMysqliTesting::queryNumRows($mysqli, "
FROM `" . DATABASENAME_NODA . "`.`ort_translation`
WHERE `ort_id` = " . $place_id . "
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
$data = NodaWikidataFetcher::getWikidataEntity($wikidata_id);
$fetcher = new NodaWikidataFetcher($mysqli);
$fetcher->getWikidataTranslationsForPlace($data, $place_id, [self::TEST_LANG]);
self::assertEquals(1, MDMysqliTesting::queryNumRows($mysqli, "
FROM `" . DATABASENAME_NODA . "`.`ort_translation`
WHERE `ort_id` = " . $place_id . "
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
$mysqli->close();
}
/**
* Test for fetching and recording translations for an tag.
*
* @param integer $tag_id Tag ID.
* @param string $wikidata_id Wikidata ID.
*
* @return void
*/
#[DataProvider('tagWithTlAndWikidataLinkProvider')]
public function testFetchingTranslationForTag(int $tag_id, string $wikidata_id):void {
$mysqli = md_main_mysqli_connect();
$mysqli->do_update_query("DELETE FROM `" . DATABASENAME_NODA . "`.`tag_translation`
WHERE `tag_id` = " . $tag_id . "
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'");
self::assertEquals(0, MDMysqliTesting::queryNumRows($mysqli, "
FROM `" . DATABASENAME_NODA . "`.`tag_translation`
WHERE `tag_id` = " . $tag_id . "
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
$data = NodaWikidataFetcher::getWikidataEntity($wikidata_id);
$fetcher = new NodaWikidataFetcher($mysqli);
$fetcher->getWikidataTranslationsForTag($data, $tag_id, [self::TEST_LANG]);
self::assertEquals(1, MDMysqliTesting::queryNumRows($mysqli, "
FROM `" . DATABASENAME_NODA . "`.`tag_translation`
WHERE `tag_id` = " . $tag_id . "
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
$mysqli->close();
}
/**
* Test that fetching translation from Wikidata returns the title of the wikipedia page,
* not the wikidata title.
*
* @return void
*/
public function testListTranslationsFromWikidataWikipediaReturnsWikipediaTitle():void {
$data = NodaWikidataFetcher::getWikidataEntity("Q33550");
$output = NodaWikidataFetcher::listTranslationsFromWikidataWikipedia(["de"], $data);
self::assertNotEmpty($output['de']);
self::assertEquals("Friedrich II. (Preußen)", $output['de']['label']);
}
}