Compare commits
52 Commits
6f7ad13c4e
...
master
Author | SHA1 | Date | |
---|---|---|---|
119f216907 | |||
25668b7b16
|
|||
8a31cf216e
|
|||
ff474341ed
|
|||
1051e10732
|
|||
057cac0f1b
|
|||
0053fbe030
|
|||
7a2856ffad
|
|||
00638152cf
|
|||
dba60dbce6
|
|||
f84fe1bca5
|
|||
423959ac94
|
|||
e8edb4a459
|
|||
8491b62a83
|
|||
bb2b1c2c32
|
|||
5054d3c62f
|
|||
beba838c0d
|
|||
54dd958073
|
|||
5b99304b5c
|
|||
5cce98f15b
|
|||
5036c77f32
|
|||
e95415be8f
|
|||
5192781494
|
|||
d9d9f7fcdc
|
|||
dbfa0df17f
|
|||
3409ec7afe
|
|||
27ac3f255a
|
|||
9d7d53a858
|
|||
28f6db67ff
|
|||
2f3bc5f2fa
|
|||
39362f537a | |||
de0357473a
|
|||
ef43270fb2
|
|||
338e09f001
|
|||
4cf9eaf4fa
|
|||
18438251a7
|
|||
1cf0f9858a
|
|||
1d50027809
|
|||
d1cee17ef5
|
|||
baf7905e0b
|
|||
9bf14d7d91
|
|||
a621534136
|
|||
51fe9a5e45
|
|||
9c2eaa2929
|
|||
546c17031a
|
|||
bf22f5541d
|
|||
e036d7881a
|
|||
d8db941485
|
|||
b7bb7364d4
|
|||
4dcd93b947
|
|||
c72ad51dda | |||
d6dea3e280
|
@ -13,7 +13,7 @@ final class NodaBlacklistedTerms {
|
||||
/**
|
||||
* A blacklist of disallowed tags. All entries are listed in full lowercase.
|
||||
*/
|
||||
const TAG_BLACKLIST = [
|
||||
public const TAG_BLACKLIST = [
|
||||
'de' => [
|
||||
'andere',
|
||||
'anderes',
|
||||
@ -33,16 +33,35 @@ final class NodaBlacklistedTerms {
|
||||
'ding',
|
||||
'dinge',
|
||||
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
|
||||
'Aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
|
||||
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
|
||||
'nichtmünzliches',
|
||||
'unbestimmt',
|
||||
'AA',
|
||||
'BB',
|
||||
'CC',
|
||||
'DD',
|
||||
'EE',
|
||||
'FF',
|
||||
'GG',
|
||||
'HH',
|
||||
'LL',
|
||||
'-',
|
||||
'?',
|
||||
],
|
||||
'en' => [
|
||||
'other',
|
||||
'others',
|
||||
'unknown',
|
||||
'various',
|
||||
'-',
|
||||
'?',
|
||||
],
|
||||
'hu' => [
|
||||
'ism.',
|
||||
'ismeretlen',
|
||||
'-',
|
||||
'?',
|
||||
],
|
||||
];
|
||||
|
||||
|
@ -79,11 +79,12 @@ final class NodaConsolidatedNamesForPersinst extends NodaConsolidatedNamesAbstra
|
||||
if (count($parts) !== 2) return [];
|
||||
|
||||
$nameOnly = trim($parts[0]);
|
||||
$dateString = rtrim($parts[1], ')'); //
|
||||
$dateString = trim(rtrim($parts[1], ')')); //
|
||||
|
||||
if (!empty($dates = NodaTimeSplitter::is_timespan($dateString))
|
||||
&& $dates->start_year !== '?'
|
||||
&& $dates->end_year !== '?'
|
||||
&& $dates->start_year !== $dates->end_year
|
||||
&& intval($dates->end_year) - intval($dates->start_year) < 150
|
||||
) {
|
||||
return [
|
||||
|
@ -93,7 +93,7 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private static function _rewrite_narrower_broader_pairs_to_brackets(string $name, string $indicator, $separator = ', '):string {
|
||||
private static function _rewrite_narrower_broader_pairs_to_brackets(string $name, string $indicator, string $separator = ', '):string {
|
||||
|
||||
if (str_contains($name, $indicator)
|
||||
&& substr_count($name, $indicator) === 1
|
||||
@ -223,7 +223,7 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private static function _rewrite_ukrainian_names_by_hierarchy($name):string {
|
||||
private static function _rewrite_ukrainian_names_by_hierarchy(string $name):string {
|
||||
|
||||
$identifiersByLevel = [
|
||||
'state' => [' РСР', 'РСР ', ' АРСР', 'АРСР ', ' губернія', 'губернія '],
|
||||
@ -325,7 +325,9 @@ final class NodaConsolidatedNamesForPlaces extends NodaConsolidatedNamesAbstract
|
||||
}
|
||||
|
||||
$output = $main_name;
|
||||
if (!empty($specifiers)) $output .= ' (' . implode(', ', $specifiers) . ')';
|
||||
if (!empty($specifiers)) {
|
||||
$output .= ' (' . implode(', ', $specifiers) . ')';
|
||||
}
|
||||
|
||||
return $output;
|
||||
|
||||
|
@ -101,7 +101,11 @@ final class NodaGroup {
|
||||
/**
|
||||
* Updates a group.
|
||||
*
|
||||
* @retun void
|
||||
* @param integer $group_id ID of the group to update.
|
||||
* @param string $name Name of the group.
|
||||
* @param string $comment Optional: Comment for the group.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function update(int $group_id, string $name, string $comment = ''):void {
|
||||
|
||||
@ -123,7 +127,9 @@ final class NodaGroup {
|
||||
/**
|
||||
* Deletes a group.
|
||||
*
|
||||
* @retun void
|
||||
* @param integer $group_id ID of the group to delete.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function delete(int $group_id):void {
|
||||
|
||||
|
@ -155,6 +155,37 @@ final class NodaIDGetter {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns persinst ID by entry in persinst translations table,
|
||||
* irrespective of language.
|
||||
*
|
||||
* @param MDMysqli $mysqli_noda Database connection.
|
||||
* @param string $name Name of the persinst to search for.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public static function getPersinstIDByAnyTransName(MDMysqli $mysqli_noda, string $name):int {
|
||||
|
||||
if (empty($name)) return 0;
|
||||
|
||||
$result = $mysqli_noda->query_by_stmt("
|
||||
SELECT `persinst_id`, `trans_name`
|
||||
FROM `persinst_translation`
|
||||
WHERE `trans_name` = ?
|
||||
LIMIT 2", "s", $name);
|
||||
|
||||
while ($cur = $result->fetch_row()) {
|
||||
if (self::_stri_matches($cur[1], $name)) {
|
||||
$result->close();
|
||||
return (int)$cur[0];
|
||||
}
|
||||
}
|
||||
$result->close();
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns persinst ID by entry in persinst translations table
|
||||
* plus birth and death.
|
||||
@ -456,6 +487,37 @@ final class NodaIDGetter {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns place ID by entry in place translations table, irrespective of
|
||||
* language.
|
||||
*
|
||||
* @param MDMysqli $mysqli_noda Database connection.
|
||||
* @param string $name Name of the place to search for.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public static function getPlaceIDByAnyTransName(MDMysqli $mysqli_noda, string $name):int {
|
||||
|
||||
if (empty($name)) return 0;
|
||||
|
||||
$result = $mysqli_noda->query_by_stmt("
|
||||
SELECT `ort_id`, `trans_name`
|
||||
FROM `ort_translation`
|
||||
WHERE `trans_name` = ?
|
||||
LIMIT 2", "s", $name);
|
||||
|
||||
while ($cur = $result->fetch_row()) {
|
||||
if (self::_stri_matches($cur[1], $name)) {
|
||||
$result->close();
|
||||
return (int)$cur[0];
|
||||
}
|
||||
}
|
||||
$result->close();
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns place ID by entry in place noda table.
|
||||
*
|
||||
@ -647,6 +709,37 @@ final class NodaIDGetter {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns tag ID by entry in tag translations table,
|
||||
* irrespective of language.
|
||||
*
|
||||
* @param MDMysqli $mysqli_noda Database connection.
|
||||
* @param string $name Name of the tag to search for.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public static function getTagIDByAnyTransName(MDMysqli $mysqli_noda, string $name):int {
|
||||
|
||||
if (empty($name)) return 0;
|
||||
|
||||
$result = $mysqli_noda->query_by_stmt("
|
||||
SELECT `tag_id`, `trans_name`
|
||||
FROM `tag_translation`
|
||||
WHERE `trans_name` = ?
|
||||
LIMIT 2", "s", $name);
|
||||
|
||||
while ($cur = $result->fetch_row()) {
|
||||
if (self::_stri_matches($name, $cur[1])) {
|
||||
$result->close();
|
||||
return (int)$cur[0];
|
||||
}
|
||||
}
|
||||
$result->close();
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns tag ID by entry in tag noda table.
|
||||
*
|
||||
@ -838,6 +931,36 @@ final class NodaIDGetter {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns time ID by entry in time translations table.
|
||||
*
|
||||
* @param MDMysqli $mysqli_noda Database connection.
|
||||
* @param string $name Name of the time to search for.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public static function getTimeIDByAnyTransName(MDMysqli $mysqli_noda, string $name):int {
|
||||
|
||||
if (empty($name)) return 0;
|
||||
|
||||
$result = $mysqli_noda->query_by_stmt("
|
||||
SELECT `zeit_id`, `trans_name`
|
||||
FROM `zeit_translation`
|
||||
WHERE `trans_name` = ?
|
||||
LIMIT 2", "s", $name);
|
||||
|
||||
while ($cur = $result->fetch_row()) {
|
||||
if (self::_stri_matches($name, $cur[1])) {
|
||||
$result->close();
|
||||
return (int)$cur[0];
|
||||
}
|
||||
}
|
||||
$result->close();
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns time ID by entry in time translations table.
|
||||
*
|
||||
@ -999,4 +1122,79 @@ final class NodaIDGetter {
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks each string in a list of strings for its existence as a tag name.
|
||||
*
|
||||
* @param MDMysqli $mysqli_noda Database connection.
|
||||
* @param string $lang Language to check in.
|
||||
* @param non-empty-array<string> $phrases List of phrases to check.
|
||||
*
|
||||
* @return array{count: int, tag: integer[], actor: integer[], time: integer[], place: integer[]}
|
||||
*/
|
||||
public static function searchEntryNamesByList(MDMysqli $mysqli_noda, string $lang, array $phrases):array {
|
||||
|
||||
$output = [
|
||||
'count' => 0,
|
||||
'tag' => [],
|
||||
'actor' => [],
|
||||
'time' => [],
|
||||
'place' => [],
|
||||
];
|
||||
|
||||
foreach ($phrases as $phrase) {
|
||||
|
||||
if (($tag_id = NodaIDGetter::getTagIDByNamesAndRewrites($mysqli_noda, $lang, $phrase)) !== 0 && !in_array($tag_id, $output['tag'], true)) {
|
||||
$output['tag'][] = $tag_id;
|
||||
++$output['count'];
|
||||
}
|
||||
else if (($tag_id_by_tl = NodaIDGetter::getTagIDByAnyTransName($mysqli_noda, $phrase)) !== 0 && !in_array($tag_id_by_tl, $output['tag'], true)) {
|
||||
$output['tag'][] = $tag_id_by_tl;
|
||||
++$output['count'];
|
||||
}
|
||||
else if (($place_id = NodaIDGetter::getPlaceIDByNamesAndRewrites($mysqli_noda, $lang, $phrase)) !== 0 && !in_array($place_id, $output['place'], true)) {
|
||||
$output['place'][] = $place_id;
|
||||
++$output['count'];
|
||||
}
|
||||
else if (($place_id = NodaIDGetter::getPlaceIDByAnyTransName($mysqli_noda, $phrase)) !== 0 && !in_array($place_id, $output['place'], true)) {
|
||||
$output['place'][] = $place_id;
|
||||
++$output['count'];
|
||||
}
|
||||
else if (($persinst_id = NodaIDGetter::getPersinstIDByNamesAndRewrites($mysqli_noda, $lang, $phrase, '', '')) !== 0 && !in_array($persinst_id, $output['actor'], true)) {
|
||||
$output['actor'][] = $persinst_id;
|
||||
++$output['count'];
|
||||
}
|
||||
else if (($persinst_id = NodaIDGetter::getPersinstIDByAnyTransName($mysqli_noda, $phrase)) !== 0 && !in_array($persinst_id, $output['actor'], true)) {
|
||||
$output['actor'][] = $persinst_id;
|
||||
++$output['count'];
|
||||
}
|
||||
else if (($time_id = NodaIDGetter::getTimeIDByNamesAndRewrites($mysqli_noda, $lang, $phrase)) !== 0 && !in_array($time_id, $output['time'], true)) {
|
||||
$output['time'][] = $time_id;
|
||||
++$output['count'];
|
||||
}
|
||||
else if (($time_id = NodaIDGetter::getTimeIDByAnyTransName($mysqli_noda, $phrase)) !== 0 && !in_array($time_id, $output['time'], true)) {
|
||||
$output['time'][] = $time_id;
|
||||
++$output['count'];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (count($phrases) !== $output['count']) {
|
||||
return [
|
||||
'count' => 0,
|
||||
'tag' => [],
|
||||
'actor' => [],
|
||||
'time' => [],
|
||||
'place' => [],
|
||||
];
|
||||
}
|
||||
|
||||
if (!empty($output['tag'])) sort($output['tag']);
|
||||
if (!empty($output['actor'])) sort($output['actor']);
|
||||
if (!empty($output['time'])) sort($output['time']);
|
||||
if (!empty($output['place'])) sort($output['place']);
|
||||
|
||||
return $output;
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -32,8 +32,12 @@ final class NodaImportLogger {
|
||||
$logStmt = $mysqli_noda->do_prepare("INSERT INTO `persinst_logged_imports`
|
||||
(`instance`, `institution_id`, `input_string`, `persinst_id`)
|
||||
VALUES (?, ?, ?, ?)");
|
||||
$logStmt->bind_param("sisi", $instance, $institution_id, $loggedName, $persinst_id);
|
||||
$logStmt->execute();
|
||||
try {
|
||||
$logStmt->bind_param("sisi", $instance, $institution_id, $loggedName, $persinst_id);
|
||||
$logStmt->execute();
|
||||
}
|
||||
catch (MDMysqliDuplicateKeysError $e) {
|
||||
}
|
||||
$logStmt->close();
|
||||
|
||||
}
|
||||
@ -54,8 +58,12 @@ final class NodaImportLogger {
|
||||
$logStmt = $mysqli_noda->do_prepare("INSERT INTO `orte_logged_imports`
|
||||
(`instance`, `institution_id`, `input_string`, `ort_id`)
|
||||
VALUES (?, ?, ?, ?)");
|
||||
$logStmt->bind_param("sisi", $instance, $institution_id, $name, $ort_id);
|
||||
$logStmt->execute();
|
||||
try {
|
||||
$logStmt->bind_param("sisi", $instance, $institution_id, $name, $ort_id);
|
||||
$logStmt->execute();
|
||||
}
|
||||
catch (MDMysqliDuplicateKeysError $e) {
|
||||
}
|
||||
$logStmt->close();
|
||||
|
||||
}
|
||||
@ -76,8 +84,12 @@ final class NodaImportLogger {
|
||||
$logStmt = $mysqli_noda->do_prepare("INSERT INTO `zeiten_logged_imports`
|
||||
(`instance`, `institution_id`, `input_string`, `zeit_id`)
|
||||
VALUES (?, ?, ?, ?)");
|
||||
$logStmt->bind_param("sisi", $instance, $institution_id, $name, $zeit_id);
|
||||
$logStmt->execute();
|
||||
try {
|
||||
$logStmt->bind_param("sisi", $instance, $institution_id, $name, $zeit_id);
|
||||
$logStmt->execute();
|
||||
}
|
||||
catch (MDMysqliDuplicateKeysError $e) {
|
||||
}
|
||||
$logStmt->close();
|
||||
|
||||
}
|
||||
@ -98,8 +110,12 @@ final class NodaImportLogger {
|
||||
$logStmt = $mysqli_noda->do_prepare("INSERT INTO `tag_logged_imports`
|
||||
(`instance`, `institution_id`, `input_string`, `tag_id`)
|
||||
VALUES (?, ?, ?, ?)");
|
||||
$logStmt->bind_param("sisi", $instance, $institution_id, $name, $tag_id);
|
||||
$logStmt->execute();
|
||||
try {
|
||||
$logStmt->bind_param("sisi", $instance, $institution_id, $name, $tag_id);
|
||||
$logStmt->execute();
|
||||
}
|
||||
catch (MDMysqliDuplicateKeysError $e) {
|
||||
}
|
||||
$logStmt->close();
|
||||
|
||||
}
|
||||
|
@ -28,9 +28,10 @@ final class NodaSplitTime {
|
||||
/**
|
||||
* Returns a single, exact date.
|
||||
*
|
||||
* @param string $year Year.
|
||||
* @param string $month Month.
|
||||
* @param string $day Day.
|
||||
* @param string $year Year.
|
||||
* @param string $month Month.
|
||||
* @param string $day Day.
|
||||
* @param NodaTimeBeforeAfterIndicator $before_after_indicator Determines if the time is exact or before / after.
|
||||
*
|
||||
* @return NodaSplitTime
|
||||
*/
|
||||
@ -296,6 +297,15 @@ final class NodaSplitTime {
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param string $start_year Year.
|
||||
* @param string $end_year Year.
|
||||
* @param string $counting_time_month Month.
|
||||
* @param string $counting_time_day Day.
|
||||
* @param NodaCountingTimeIndicator $counting_time_indicator Determines if the time is BCE or CCE.
|
||||
* @param NodaTimeBeforeAfterIndicator $before_after_indicator Determines if the time is inexact to one direction.
|
||||
* @param false|string $start_date Start date.
|
||||
* @param false|string $end_date End date.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function __construct(string $start_year, string $end_year,
|
||||
@ -379,5 +389,19 @@ final class NodaSplitTime {
|
||||
}
|
||||
}
|
||||
|
||||
// Validate
|
||||
$startDateTime = MD_STD::strtotime("2000-" . substr($this->start_date, -5));
|
||||
if (checkdate((int)date('m', $startDateTime), (int)date('d', $startDateTime), (int)date('Y', $startDateTime)) === false) {
|
||||
throw new MDgenericInvalidInputsException("Invalid start date: " . $this->start_date);
|
||||
}
|
||||
|
||||
if (!empty((int)$this->counting_time_day)) {
|
||||
// The year 2000 is used here as it is a leap year and lots of years accepted in md are not accepted
|
||||
// by checkdate.
|
||||
if (checkdate((int)$this->counting_time_month, (int)$this->counting_time_day, 2000) === false) {
|
||||
throw new MDgenericInvalidInputsException("Invalid date formed by counting time: " . $this->counting_time_month . ' -- ' . $this->counting_time_day);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -15,6 +15,16 @@ final class NodaTagRelationIdentifier {
|
||||
private const SUFFIXES = [
|
||||
'de' => [
|
||||
' (Motiv)' => MDTagRelationType::display_subject,
|
||||
' [Motiv]' => MDTagRelationType::display_subject,
|
||||
' <Motiv>' => MDTagRelationType::display_subject,
|
||||
|
||||
' (Material)' => MDTagRelationType::material,
|
||||
' [Material]' => MDTagRelationType::material,
|
||||
' <Material>' => MDTagRelationType::material,
|
||||
|
||||
' (Technik)' => MDTagRelationType::technique,
|
||||
' [Technik]' => MDTagRelationType::technique,
|
||||
' <Technik>' => MDTagRelationType::technique,
|
||||
]
|
||||
];
|
||||
|
||||
|
@ -13,7 +13,7 @@ final class NodaTimeAutotranslater {
|
||||
|
||||
// TODO: Move these to NodaTimeAutotranslaterLocales
|
||||
|
||||
const LANGS_SYLLABLE_CLEANING = [
|
||||
public const LANGS_SYLLABLE_CLEANING = [
|
||||
"hu" => [
|
||||
"10-as évek" => "10-es évek",
|
||||
"40-as évek" => "40-es évek",
|
||||
@ -463,13 +463,13 @@ final class NodaTimeAutotranslater {
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets translations for a given entry type.
|
||||
* Prepares translations for each available language.
|
||||
*
|
||||
* @param array<integer|string> $timeInfo Time information.
|
||||
*
|
||||
* @return array<string>
|
||||
*/
|
||||
public static function getTranslations(array $timeInfo):array {
|
||||
public static function prepareTranslations(array $timeInfo):array {
|
||||
|
||||
if (!empty($timeInfo['zeit_name']) and strlen((string)$timeInfo['zeit_name']) > 10 and !empty($timespanDates = NodaTimeSplitter::attempt_splitting_from_till((string)$timeInfo['zeit_name']))) {
|
||||
|
||||
@ -504,8 +504,11 @@ final class NodaTimeAutotranslater {
|
||||
$output = [];
|
||||
$cases = NodaTimeAutotranslaterLocales::cases();
|
||||
foreach ($cases as $tLang) {
|
||||
$start_term = self::getTranslations($startTimeInfo)[$tLang->name];
|
||||
$end_term = self::getTranslations($endTimeInfo)[$tLang->name];
|
||||
$startTls = self::getTranslations($startTimeInfo);
|
||||
$endTls = self::getTranslations($endTimeInfo);
|
||||
if (empty($startTls) || empty($endTls)) return [];
|
||||
$start_term = $startTls[$tLang->name];
|
||||
$end_term = $endTls[$tLang->name];
|
||||
|
||||
$output[$tLang->name] = \sprintf($tLang->formatYearspanForSprintf(), $start_term, $end_term);
|
||||
}
|
||||
@ -604,6 +607,78 @@ final class NodaTimeAutotranslater {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates correctness of years in translation strings.
|
||||
*
|
||||
* @param string|integer $start Start year.
|
||||
* @param string|integer $end End year.
|
||||
* @param array<string, string> $translations Translations.
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public static function validateTranslations(string|int $start, string|int $end, array $translations):bool {
|
||||
|
||||
$start = ltrim((string)$start, ' 0-');
|
||||
$end = ltrim((string)$end, ' 0-');
|
||||
|
||||
// Edge cases: Centuries and decades have special translations
|
||||
// and can thus not be validated properly
|
||||
// Century BCE
|
||||
if (substr($start, -1) === "0" && substr($end, -1) === '1' && $start > $end) {
|
||||
return true;
|
||||
}
|
||||
// Century CE
|
||||
if (substr($start, -1) === "1" && substr($end, -1) === '0' && $start < $end) {
|
||||
return true;
|
||||
}
|
||||
// Decade
|
||||
if (substr($start, -1) === "0" && substr($end, -1) === '9' && $start < $end) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// 1920 + ? can be both Since 1920 and After 1919, so validation
|
||||
// is impossible there, too
|
||||
if ($start === '?' || $end === '?') return true;
|
||||
|
||||
// Unset unvalidatable languages
|
||||
unset($translations['ar'], $translations['fa']);
|
||||
|
||||
if ($start !== '?') {
|
||||
foreach ($translations as $t) {
|
||||
if (!str_contains($t, $start)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($end !== '?' && $start !== $end) {
|
||||
foreach ($translations as $t) {
|
||||
if (!str_contains($t, $end)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets translations for a given entry type.
|
||||
*
|
||||
* @param array<integer|string> $timeInfo Time information.
|
||||
*
|
||||
* @return array<string>
|
||||
*/
|
||||
public static function getTranslations(array $timeInfo):array {
|
||||
|
||||
$output = self::prepareTranslations($timeInfo);
|
||||
|
||||
if (self::validateTranslations($timeInfo['zeit_beginn'], $timeInfo['zeit_ende'], $output) === false) return [];
|
||||
|
||||
return $output;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs autotranslater.
|
||||
*
|
||||
@ -613,7 +688,9 @@ final class NodaTimeAutotranslater {
|
||||
*/
|
||||
public function translate(array $timeInfo):void {
|
||||
|
||||
$translations = self::getTranslations($timeInfo);
|
||||
if (empty($translations = self::getTranslations($timeInfo))) {
|
||||
return;
|
||||
}
|
||||
|
||||
$this->_mysqli_noda->autocommit(false);
|
||||
|
||||
|
@ -140,7 +140,7 @@ final class NodaTimeSplitter {
|
||||
"decemberig",
|
||||
];
|
||||
|
||||
private const REGEX_CENTURIES = '(\ |)(Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)';
|
||||
private const REGEX_CENTURIES = '(\ |)(Jh|Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)';
|
||||
private const REGEX_DECADES = '(s|er|er\ Jahre|(\-|\ )es\ évek|(\-|\ )as\ \évek|\ évek|\-es\ években|\-ті)';
|
||||
|
||||
/**
|
||||
@ -345,16 +345,29 @@ final class NodaTimeSplitter {
|
||||
$start_date = $output->start_date;
|
||||
$end_date = $output->end_date;
|
||||
}
|
||||
else if ($start === $end && (int)str_replace('-', '', $start_date) > (int)str_replace('-', '', $end_date)) {
|
||||
$start_date = $output->start_date;
|
||||
$end_date = $output->end_date;
|
||||
}
|
||||
return new NodaSplitTime($start, $end, $output->counting_time_month, $output->counting_time_day,
|
||||
NodaCountingTimeIndicator::bce, $output->before_after_indicator, '-' . $start_date, '-' . $end_date);
|
||||
}
|
||||
}
|
||||
|
||||
if (\preg_match("/^[0-9][0-9][0-9][0-9]\ bis [0-9][0-9][0-9][0-9]$/", $datum)) {
|
||||
if (\preg_match("/^[0-9]{4}\ bis\ [0-9]{4}$/", $datum)) {
|
||||
$start = \substr($datum, 0, 4);
|
||||
$end = \substr($datum, -4);
|
||||
return new NodaSplitTime($start, $end);
|
||||
}
|
||||
if (\preg_match("/^[0-9]{4}\ (und|oder|od.)\ [0-9]{4}$/", $datum)) {
|
||||
$start = \substr($datum, 0, 4);
|
||||
$end = \substr($datum, -4);
|
||||
$startInt = (int)$start;
|
||||
$endInt = (int)$end;
|
||||
if ($startInt === $endInt - 1) {
|
||||
return new NodaSplitTime($start, $end);
|
||||
}
|
||||
}
|
||||
|
||||
$datum = \str_replace(". ", ".", $datum);
|
||||
|
||||
@ -536,22 +549,22 @@ final class NodaTimeSplitter {
|
||||
|
||||
// 10000-20000
|
||||
if (!empty(\preg_match("/^[0-9]{5}(\-|\/)[0-9]{5}$/", $datum))) {
|
||||
return new NodaSplitTime(start_year: \substr($datum, 0, 5), end_year: \substr($datum, 6, 5));
|
||||
return new NodaSplitTime(start_year: \substr($datum, 0, 5), end_year: \substr($datum, -5));
|
||||
}
|
||||
|
||||
// 0000-0000
|
||||
if (\preg_match("/^[0-9]{4}(\-|\/)[0-9]{4}(\.|)$/", $datum)) {
|
||||
return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, 5, 4));
|
||||
if (\preg_match("/^[0-9]{4}(\-|\/|\–)[0-9]{4}(\.|)$/", $datum)) {
|
||||
return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, -4));
|
||||
}
|
||||
|
||||
// 1.900-2.000
|
||||
if (\preg_match("/^[0-9]\.[0-9][0-9][0-9](\-|\/)[0-9]\.[0-9][0-9][0-9]$/", $datum)) {
|
||||
if (\preg_match("/^[0-9]\.[0-9][0-9][0-9](\-|\/|\–)[0-9]\.[0-9][0-9][0-9]$/", $datum)) {
|
||||
$datum = \str_replace(".", "", $datum);
|
||||
return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, 5, 4));
|
||||
return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, -4));
|
||||
}
|
||||
|
||||
// German TT.MM.JJJJ / TT.MM.JJJ / TT.MM.JJ / TT.MM.J
|
||||
if (\preg_match("/^[0-9][0-9]\.[0-9][0-9]\.([0-9][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[0-9])$/", $datum)) { // German T.MM.JJJJ
|
||||
if (\preg_match("/^[0-9]{2}\.[0-9]{2}\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
|
||||
$year = \substr($datum, 6, 4);
|
||||
$month = \substr($datum, 3, 2);
|
||||
$day = \substr($datum, 0, 2);
|
||||
@ -559,7 +572,7 @@ final class NodaTimeSplitter {
|
||||
}
|
||||
|
||||
// German TT.M.JJJJ / TT.M.JJJ / TT.M.JJ / TT.M.J
|
||||
if (\preg_match("/^[0-9][0-9]\.[0-9]\.([0-9][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[0-9])$/", $datum)) { // German T.MM.JJJJ
|
||||
if (\preg_match("/^[0-9]{2}\.[0-9]\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
|
||||
$year = \substr($datum, 5, 4);
|
||||
$month = "0" . \substr($datum, 3, 1);
|
||||
$day = \substr($datum, 0, 2);
|
||||
@ -590,10 +603,24 @@ final class NodaTimeSplitter {
|
||||
return NodaSplitTime::genExactDate($year, $month, $day);
|
||||
}
|
||||
// Intl': 2020-12
|
||||
if (\preg_match("/^[0-9]{4}\-[0-9]{2}$/", $datum)) { // German Y-m
|
||||
if (\preg_match("/^[0-9]{4}\-[0-9]{2}$/", $datum)) { // German Y-m or 1912-15
|
||||
$year = \substr($datum, 0, 4);
|
||||
$month = \substr($datum, 5, 2);
|
||||
return new NodaSplitTime($year, $year, $month);
|
||||
$month = \substr($datum, -2);
|
||||
|
||||
// Assume the end is a month
|
||||
if (intval($month) < 12) {
|
||||
// If the year is smaller than the second number, do not split, as either month
|
||||
// or year may be meant
|
||||
// Example: 1903-04
|
||||
if (substr($datum, 2, 2) < 12) {
|
||||
return false;
|
||||
}
|
||||
return new NodaSplitTime($year, $year, $month);
|
||||
}
|
||||
else {
|
||||
$end = \substr($year, 0, 2) . $month;
|
||||
return new NodaSplitTime($year, $end);
|
||||
}
|
||||
}
|
||||
|
||||
// German MM.JJJJ
|
||||
@ -648,7 +675,27 @@ final class NodaTimeSplitter {
|
||||
if (\preg_match("/^[0-9]{4}\-[0-9]{3}$/", $datum)) { // Hungarian Y-m
|
||||
$start = \substr($datum, 0, 4);
|
||||
$end = \substr($datum, -3);
|
||||
return new NodaSplitTime("0" . $start, "0" . $end);
|
||||
return new NodaSplitTime($start, "0" . $end);
|
||||
}
|
||||
|
||||
// 2-3 (n. Chr.)
|
||||
if (\preg_match("/^[0-9]{1}\-[0-9]{1}$/", $datum)) {
|
||||
return new NodaSplitTime("000" . \substr($datum, 0, 1), "000" . \substr($datum, -1));
|
||||
}
|
||||
|
||||
// 300-2 (v. Chr.)
|
||||
if (\preg_match("/^[0-9]{3}\-[0-9]{2}$/", $datum)) {
|
||||
return new NodaSplitTime("0" . \substr($datum, 0, 3), "00" . \substr($datum, -2));
|
||||
}
|
||||
|
||||
// 30-2 (v. Chr.)
|
||||
if (\preg_match("/^[0-9]{2}\-[0-9]{1}$/", $datum)) {
|
||||
return new NodaSplitTime("00" . \substr($datum, 0, 2), "000" . \substr($datum, -1));
|
||||
}
|
||||
|
||||
// 2-300 (n. Chr.)
|
||||
if (\preg_match("/^[0-9]{1}\-[0-9]{3}$/", $datum)) {
|
||||
return new NodaSplitTime("000" . \substr($datum, 0, 1), "0" . \substr($datum, -3));
|
||||
}
|
||||
|
||||
// 20-30 (n. Chr.)
|
||||
@ -658,6 +705,18 @@ final class NodaTimeSplitter {
|
||||
return new NodaSplitTime("00" . $start, "00" . $end);
|
||||
}
|
||||
|
||||
// 20-130 (n. Chr.)
|
||||
if (\preg_match("/^[0-9]{2}\-[0-9]{3}$/", $datum)) { // 20-40 (n. Chr.)
|
||||
$start = \substr($datum, 0, 2);
|
||||
$end = \substr($datum, -3);
|
||||
return new NodaSplitTime("00" . $start, "0" . $end);
|
||||
}
|
||||
|
||||
// 120-1130 (n. Chr.)
|
||||
if (\preg_match("/^[0-9]{3}\-[0-9]{4}$/", $datum)) { // 20-40 (n. Chr.)
|
||||
return new NodaSplitTime("0" . \substr($datum, 0, 3), \substr($datum, -4));
|
||||
}
|
||||
|
||||
// 1920
|
||||
if (\preg_match("/^[0-9]{4}(\.|)$/", $datum)) {
|
||||
$start = \substr($datum, 0, 4);
|
||||
@ -699,35 +758,67 @@ final class NodaTimeSplitter {
|
||||
|
||||
$datum = self::clean_input($datum);
|
||||
|
||||
if (\preg_match("/^[0-9]{4}\.[0-9]{2}\.[0-9]{2}(\.|)\-$/", $datum)) { // YYYY.MM.DD.
|
||||
$year = \substr($datum, 0, 4);
|
||||
$month = \substr($datum, 5, 2);
|
||||
$day = \substr($datum, 8, 2);
|
||||
$inpDateWoSpaces = str_replace(" ", "", $datum);
|
||||
|
||||
if (\preg_match("/^[0-9]{4}\.[0-9]{2}\.[0-9]{2}(\.|)\-$/", $inpDateWoSpaces)) { // YYYY.MM.DD.
|
||||
$year = \substr($inpDateWoSpaces, 0, 4);
|
||||
$month = \substr($inpDateWoSpaces, 5, 2);
|
||||
$day = \substr($inpDateWoSpaces, 8, 2);
|
||||
return NodaSplitTime::genExactDate($year, $month, $day, NodaTimeBeforeAfterIndicator::since);
|
||||
}
|
||||
if (\preg_match("/^[0-9]{4}\.[0-9]{2}(\.|)\-$/", $datum)) { // YYYY.MM.-
|
||||
$start = \substr($datum, 0, 4);
|
||||
$month = \substr($datum, 5, 2);
|
||||
if (\preg_match("/^[0-9]{4}\.[0-9]{2}(\.|)\-$/", $inpDateWoSpaces)) { // YYYY.MM.-
|
||||
$start = \substr($inpDateWoSpaces, 0, 4);
|
||||
$month = \substr($inpDateWoSpaces, 5, 2);
|
||||
return new NodaSplitTime($start, '?', $month, before_after_indicator: NodaTimeBeforeAfterIndicator::since);
|
||||
}
|
||||
if (\preg_match("/^[0-9]{4}\-$/", $datum)) { // YYYY-
|
||||
$start = \substr($datum, 0, 4);
|
||||
if (\preg_match("/^[0-9]{4}\-$/", $inpDateWoSpaces)) { // YYYY-
|
||||
$start = \substr($inpDateWoSpaces, 0, 4);
|
||||
return new NodaSplitTime($start, '?', before_after_indicator: NodaTimeBeforeAfterIndicator::since);
|
||||
}
|
||||
|
||||
if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}\.[0-9]{2}$/", $datum)) { // Hungarian Y-m
|
||||
$year = \substr($datum, 1, 4);
|
||||
$month = \substr($datum, 6, 2);
|
||||
$day = \substr($datum, 9, 2);
|
||||
// ?.6.2024
|
||||
if (\preg_match("/^\?\.([0-9]|[0-9]{2})\.[0-9]{4}$/", $inpDateWoSpaces)) { // German Y-m
|
||||
$year = \substr($inpDateWoSpaces, -4);
|
||||
$month = trim(\substr($inpDateWoSpaces, 2, 2), '. ');
|
||||
return new NodaSplitTime($year, $year, $month);
|
||||
}
|
||||
|
||||
// ?.?.2024
|
||||
if (\preg_match("/^\?\.\?\.[0-9]{4}$/", $inpDateWoSpaces)) { // German Y-m
|
||||
$year = \substr($inpDateWoSpaces, -4);
|
||||
return new NodaSplitTime($year, $year);
|
||||
}
|
||||
|
||||
if (\preg_match("/^[0-9]{4}$/", \trim($inpDateWoSpaces, '. ?!()[]X'))) { // German Y-m
|
||||
$year = \trim($inpDateWoSpaces, '. ?!()[]X');
|
||||
return new NodaSplitTime($year, $year);
|
||||
}
|
||||
|
||||
if ((str_starts_with($inpDateWoSpaces, '0-') || str_ends_with($inpDateWoSpaces, '-0')) && \preg_match("/^[0-9]{4}$/", \strtr($inpDateWoSpaces, ['-0' => '', '0-' => ''])) && !str_ends_with($inpDateWoSpaces, '0-0')) {
|
||||
$year = \strtr($inpDateWoSpaces, ['-0' => '', '0-' => '']);
|
||||
if (strlen($year) === 4) {
|
||||
return new NodaSplitTime($year, $year);
|
||||
}
|
||||
}
|
||||
|
||||
if (\preg_match("/^[0-9]{4}$/", \strtr($inpDateWoSpaces, ['o' => '0']))) { // German Y-m
|
||||
$year = \strtr($inpDateWoSpaces, ['o' => '0']);
|
||||
return new NodaSplitTime($year, $year);
|
||||
}
|
||||
|
||||
if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}\.[0-9]{2}$/", $inpDateWoSpaces)) { // Hungarian Y-m
|
||||
$year = \substr($inpDateWoSpaces, 1, 4);
|
||||
$month = \substr($inpDateWoSpaces, 6, 2);
|
||||
$day = \substr($inpDateWoSpaces, 9, 2);
|
||||
return NodaSplitTime::genExactDate($year, $month, $day, NodaTimeBeforeAfterIndicator::until);
|
||||
}
|
||||
if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}$/", $datum)) { // Hungarian Y-m
|
||||
$year = \substr($datum, 1, 4);
|
||||
$month = \substr($datum, 6, 2);
|
||||
if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}$/", $inpDateWoSpaces)) { // Hungarian Y-m
|
||||
$year = \substr($inpDateWoSpaces, 1, 4);
|
||||
$month = \substr($inpDateWoSpaces, 6, 2);
|
||||
return new NodaSplitTime('?', $year, $month, before_after_indicator: NodaTimeBeforeAfterIndicator::until);
|
||||
}
|
||||
if (\preg_match("/^\-[0-9]{4}$/", $datum)) { // Hungarian -Y
|
||||
$year = \substr($datum, 1, 4);
|
||||
if (\preg_match("/^\-[0-9]{4}$/", $inpDateWoSpaces)) { // Hungarian -Y
|
||||
$year = \substr($inpDateWoSpaces, 1, 4);
|
||||
return new NodaSplitTime('?', $year, before_after_indicator: NodaTimeBeforeAfterIndicator::until);
|
||||
}
|
||||
|
||||
@ -828,7 +919,7 @@ final class NodaTimeSplitter {
|
||||
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date);
|
||||
}
|
||||
}
|
||||
if (str_ends_with($datum, '-as évekig') || str_ends_with($datum, '-es évekig')) {
|
||||
if (str_ends_with($datum, ' as évekig') || str_ends_with($datum, ' es évekig') || str_ends_with($datum, '-as évekig') || str_ends_with($datum, '-es évekig')) {
|
||||
if ($output = self::attempt_splitting(\substr($datum, 0, -2))) {
|
||||
return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day,
|
||||
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date);
|
||||
@ -899,7 +990,7 @@ final class NodaTimeSplitter {
|
||||
}
|
||||
|
||||
// 1. Jahrhundert
|
||||
if (\preg_match("/^[0-9]\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) {
|
||||
if (\preg_match("/^[0-9]\.\ (Jh\|Jh\.|Jahrhundert|sz|század)$/", $datum)) {
|
||||
if ($centuryNo = \intval(\substr($datum, 0, 1))) {
|
||||
$centuryNo--;
|
||||
return new NodaSplitTime((string)$centuryNo . "01", \strval($centuryNo + 1) . '00');
|
||||
@ -907,7 +998,7 @@ final class NodaTimeSplitter {
|
||||
}
|
||||
|
||||
// 17.-18. Jahrhundert
|
||||
if (\preg_match("/^[0-9]{2}(\.|)(|\ Jh\.||\ Jahrhundert||\ sz||\ század)(\-|\/)[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) {
|
||||
if (\preg_match("/^[0-9]{2}(\.|)(|\ Jh|\ Jh\.|\ Jahrhundert|\ sz|\ század)(\-|\/)[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) {
|
||||
if (\strpos($datum, '/') !== false) {
|
||||
$datum = str_replace('/', '-', $datum);
|
||||
}
|
||||
@ -1040,6 +1131,33 @@ final class NodaTimeSplitter {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrites special formulations of a date.
|
||||
*
|
||||
* @param string $datum Date.
|
||||
*
|
||||
* @return string|false
|
||||
*/
|
||||
private static function _rewrite_special_cases_regular(string $datum):string|false {
|
||||
|
||||
if (\preg_match("/^(1|2)\.\ Hälfte(|\ des)\ [0-9]{2}\.\ Jahrhundert(|s)$/", $datum)) {
|
||||
|
||||
$half = substr($datum, 0, 1);
|
||||
$number = substr(ltrim(substr($datum, 10), "des Hälfte"), 0, 2);
|
||||
if (is_numeric($number)) {
|
||||
$num = (int)$number;
|
||||
$targetCentury = $num - 1;
|
||||
return match((int)$half) {
|
||||
1 => $targetCentury . "00-" . $targetCentury . "50",
|
||||
2 => $targetCentury . "50-" . $targetCentury . "99",
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Contains special rules for incorrectly or incompletely spelled out timespan names.
|
||||
* To be called by self::attempt_splitting_from_till().
|
||||
@ -1052,6 +1170,15 @@ final class NodaTimeSplitter {
|
||||
|
||||
if (empty($datum)) return '';
|
||||
|
||||
if (\preg_match("/^1\.\ (Halbjahr|Hälfte)\ [0-9]{4}$/", $datum)) {
|
||||
$year = substr($datum, -4);
|
||||
return "Januar $year-Juni $year";
|
||||
}
|
||||
if (\preg_match("/^2\.\ (Halbjahr|Hälfte)\ [0-9]{4}$/", $datum)) {
|
||||
$year = substr($datum, -4);
|
||||
return "Juli $year-Dezember $year";
|
||||
}
|
||||
|
||||
$inputLength = strlen($datum);
|
||||
|
||||
// Hungarian year and month until month
|
||||
@ -1091,6 +1218,33 @@ final class NodaTimeSplitter {
|
||||
return $reconstituted;
|
||||
}
|
||||
|
||||
// German T.-T.MM.JJJJ / T.-T.MM.JJJ / T.-T.MM.JJ / T.-T.MM.J
|
||||
if (\preg_match("/^[0-9].\-[0-9]\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
|
||||
$year = \substr($datum, -4);
|
||||
$month = trim(\substr($datum, -7, 2), '.');
|
||||
$day = '0' . \substr($datum, 3, 1);
|
||||
$firstday = '0' . \substr($datum, 0, 1);
|
||||
return "$firstday.$month.$year-$day.$month.$year";
|
||||
}
|
||||
|
||||
// German T.-TT.MM.JJJJ / T.-TT.MM.JJJ / T.-TT.MM.JJ / T.-TT.MM.J
|
||||
if (\preg_match("/^[0-9].\-[0-9]{2}\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
|
||||
$year = \substr($datum, -4);
|
||||
$month = trim(\substr($datum, -7, 2), '.');
|
||||
$day = \substr($datum, 3, 2);
|
||||
$firstday = '0' . \substr($datum, 0, 1);
|
||||
return "$firstday.$month.$year-$day.$month.$year";
|
||||
}
|
||||
|
||||
// German TT.-TT.MM.JJJJ / TT.-TT.MM.JJJ / TT.-TT.MM.JJ / TT.-TT.MM.J
|
||||
if (\preg_match("/^[0-9]{2}.\-[0-9]{2}\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
|
||||
$year = \substr($datum, -4);
|
||||
$month = trim(\substr($datum, -7, 2), '.');
|
||||
$day = \substr($datum, 4, 2);
|
||||
$firstday = \substr($datum, 0, 2);
|
||||
return "$firstday.$month.$year-$day.$month.$year";
|
||||
}
|
||||
|
||||
// 17-19. Jahrhundert
|
||||
if (\preg_match("/^[0-9]{2}(\.|)\-[0-9]{2}(\.|)" . self::REGEX_CENTURIES . "$/", $datum)) {
|
||||
$parts = explode('-', $datum);
|
||||
@ -1208,6 +1362,30 @@ final class NodaTimeSplitter {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes superfluous characters and makes an input string roughly parsable.
|
||||
*
|
||||
* @param string $input Input string.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private static function _runBasicNameCleanup(string $input):string {
|
||||
|
||||
$input = ltrim(trim(trim($input), ',;'), ' .');
|
||||
|
||||
// Clean away duplicate inputs
|
||||
// 1440-1440
|
||||
if (str_contains($input, '-')) {
|
||||
$parts = explode('-', $input);
|
||||
if (count($parts) === 2 && $parts[0] === $parts[1]) {
|
||||
$input = $parts[0];
|
||||
}
|
||||
}
|
||||
|
||||
return $input;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper to check if any splitting command works.
|
||||
*
|
||||
@ -1217,6 +1395,8 @@ final class NodaTimeSplitter {
|
||||
*/
|
||||
public static function attempt_splitting(string $datum):NodaSplitTime|false {
|
||||
|
||||
$datum = self::_runBasicNameCleanup($datum);
|
||||
|
||||
try {
|
||||
if (!empty($moda = self::is_timespan($datum))) {
|
||||
return $moda;
|
||||
@ -1254,6 +1434,10 @@ final class NodaTimeSplitter {
|
||||
}
|
||||
}
|
||||
|
||||
if ($rewrite = self::_rewrite_special_cases_regular($datum)) {
|
||||
return self::attempt_splitting($rewrite);
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
}
|
||||
|
@ -61,6 +61,7 @@ final class NodaUncertaintyHelper {
|
||||
"(?)",
|
||||
"?",
|
||||
" [vermutlich]",
|
||||
" vermutlich",
|
||||
" [verm.]",
|
||||
" [wahrscheinlich]",
|
||||
];
|
||||
@ -100,6 +101,7 @@ final class NodaUncertaintyHelper {
|
||||
"c. ",
|
||||
"ca ",
|
||||
"ca. ",
|
||||
"ca.",
|
||||
"Ca ",
|
||||
"Ca. ",
|
||||
"za. ",
|
||||
@ -141,8 +143,11 @@ final class NodaUncertaintyHelper {
|
||||
" [circa]",
|
||||
" (verm.)",
|
||||
" (vermutl.)",
|
||||
" vermutlich",
|
||||
" körül",
|
||||
", um",
|
||||
", ca.",
|
||||
", ca",
|
||||
" (um)",
|
||||
" (ок.)",
|
||||
];
|
||||
|
@ -18,7 +18,7 @@ final class NodaWikidataFetcher {
|
||||
];
|
||||
|
||||
public const LANGUAGES_MAIN_DESC = ['de', 'da', 'en', 'es', 'fr', 'hu', 'it', 'jp', 'nl', 'pt', 'ru', 'sv', 'sk', 'uk', 'zh'];
|
||||
public const LANGUAGES_TO_CHECK = ['ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sk', 'sw', 'ta', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'zh'];
|
||||
public const LANGUAGES_TO_CHECK = ['ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'kn', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sk', 'sw', 'ta', 'te', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'zh'];
|
||||
|
||||
public const LANGUAGES_TO_CAPITALIZE = ["cs", "da", "de", "en", "es", "fr", "fi", "id", "it", "nl", "pl", "pt", "ru", "sv", 'sk', "tl", "tr"];
|
||||
|
||||
@ -43,25 +43,6 @@ final class NodaWikidataFetcher {
|
||||
"orcid" => "P496",
|
||||
];
|
||||
|
||||
private const WIKIPEDIA_REMOVE_LITERALS = [
|
||||
"<p>Si vous disposez d'ouvrages ou d'articles de référence ou si vous ",
|
||||
'<p><b>En pratique :</b> <a href="/wiki/Wikip%C3%A9dia:Citez_vos_sources#Qualité_des_sources" title="Wikipédia:Citez vos sources">Quelles sources sont attendu',
|
||||
'<pVous pouvez partager vos connaissances en l’améliorant (',
|
||||
'<p class="mw-empty-elt">',
|
||||
'<p><small>Géolocalisation sur la carte',
|
||||
'<p><b>Koordinaatit:</b>',
|
||||
'<p><span class="executeJS" data-gadgetname="ImgToggle"></span',
|
||||
'<p><span class="imgtoggleboxTitle">',
|
||||
//'<div class="mw-parser-output"><p>',
|
||||
'<p><span style="font-size: small;"><span id="coordinates">',
|
||||
'<p><span></span></p>',
|
||||
'<p><a rel="nofollow" class="external text" href="https://maps.gs',
|
||||
'<p><span class="plainlinks nourlexpansion"><a class="external text" href="//tools.wmflabs.org/geohack/geohack.php?langu',
|
||||
'<p><span style="display:none">',
|
||||
'<p> </p>',
|
||||
'<p><span class="geo noexcerpt"',
|
||||
];
|
||||
|
||||
public const RETRIEVAL_MODES_ACCEPTED = [
|
||||
'list',
|
||||
'add',
|
||||
@ -87,7 +68,8 @@ final class NodaWikidataFetcher {
|
||||
*/
|
||||
private static function _getWikipediaApiLink(string $lang, string $searchTerm):string {
|
||||
|
||||
return "https://" . urlencode($lang) . ".wikipedia.org/w/api.php?action=parse&page=" . urlencode($searchTerm) . "&prop=text§ion=0&format=json";
|
||||
return "https://" . urlencode($lang) . ".wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro&explaintext&redirects=1&titles=" . urlencode($searchTerm);
|
||||
# w/api.php?action=parse&page=" . urlencode($searchTerm) . "&prop=text§ion=0&format=json";
|
||||
|
||||
}
|
||||
|
||||
@ -151,9 +133,14 @@ final class NodaWikidataFetcher {
|
||||
private static function _getCleanedWikipediaSnippet(string $lang, string $title):string {
|
||||
|
||||
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $title), 10000);
|
||||
$datafromwiki = strval(json_decode($datafromwiki, true)['parse']['text']['*']);
|
||||
$json_decoded = json_decode($datafromwiki, true);
|
||||
if (empty($json_decoded) || !isset($json_decoded['query']) || empty($json_decoded['query']['pages'])) {
|
||||
return '';
|
||||
}
|
||||
$firstPageId = array_keys($json_decoded['query']['pages'])[0];
|
||||
$datafromwiki = strval($json_decoded['query']['pages'][$firstPageId]['extract']);
|
||||
|
||||
return self::_cleanWikidataInput($datafromwiki);
|
||||
return self::_cleanInputSimple($datafromwiki);
|
||||
|
||||
}
|
||||
|
||||
@ -164,8 +151,9 @@ final class NodaWikidataFetcher {
|
||||
*
|
||||
* @return array<mixed>
|
||||
*/
|
||||
private static function _getWikidataEntity(string $wikidata_id):array {
|
||||
public static function getWikidataEntity(string $wikidata_id):array {
|
||||
|
||||
self::validateWikidataId($wikidata_id);
|
||||
$data = json_decode(MD_STD::runCurl("https://www.wikidata.org/wiki/Special:EntityData/" . urlencode($wikidata_id) . ".json", 10000), true);
|
||||
if ($data === null) {
|
||||
throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later.");
|
||||
@ -173,7 +161,20 @@ final class NodaWikidataFetcher {
|
||||
if (empty($data['entities'][$wikidata_id])) {
|
||||
throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later.");
|
||||
}
|
||||
return $data['entities'][$wikidata_id];
|
||||
|
||||
$output = $data['entities'][$wikidata_id];
|
||||
|
||||
// Throw exception if this page is a dedicated disambigation item.-
|
||||
// P31: Instance of; Q4167410: Wikimedia disambiguation page
|
||||
if (isset($output['claims']) && isset($output['claims']['P31'])) {
|
||||
foreach ($output['claims']['P31'] as $is_instance_of) {
|
||||
if (isset($is_instance_of['mainsnak']['datavalue']['value']['id']) && $is_instance_of['mainsnak']['datavalue']['value']['id'] === 'Q4167410') {
|
||||
throw new NodaWikidataFetcherDisambiguationIsDisallowedException("Loading wikidata disambiguation pages is disallowed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $output;
|
||||
|
||||
}
|
||||
|
||||
@ -258,237 +259,21 @@ final class NodaWikidataFetcher {
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans basic tags off Wikidata input.
|
||||
* Cleans remaining HTML elements and leading, trailing whitespaces.
|
||||
*
|
||||
* @param string $input Input string.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private static function _cleanWikidataInputHtml(string $input):string {
|
||||
private static function _cleanInputSimple(string $input):string {
|
||||
|
||||
// Clean off anything before first <p>
|
||||
if ($pStartPos = strpos($input, '<p')) {
|
||||
$input = substr($input, $pStartPos);
|
||||
}
|
||||
if ($pEndPos = strrpos($input, '</p>')) {
|
||||
$input = substr($input, 0, $pEndPos + 4);
|
||||
}
|
||||
|
||||
$doc = new DOMDocument();
|
||||
try {
|
||||
$doc->loadXML('<section>' . trim($input) . '</section>');
|
||||
}
|
||||
catch (Exception $e) {
|
||||
throw new Exception("Failed to load DOMDocument." . PHP_EOL . $e->getMessage() . PHP_EOL . PHP_EOL . '---' . $input . '---');
|
||||
}
|
||||
|
||||
$list = $doc->getElementsByTagName("style");
|
||||
while ($list->length > 0) {
|
||||
$p = $list->item(0);
|
||||
if ($p === null || $p->parentNode === null) break;
|
||||
$p->parentNode->removeChild($p);
|
||||
}
|
||||
|
||||
$list = $doc->getElementsByTagName("table");
|
||||
while ($list->length > 0) {
|
||||
$p = $list->item(0);
|
||||
if ($p === null || $p->parentNode === null) break;
|
||||
$p->parentNode->removeChild($p);
|
||||
}
|
||||
|
||||
$list = $doc->getElementsByTagName("ol");
|
||||
while ($list->length > 0) {
|
||||
$p = $list->item(0);
|
||||
if ($p === null || $p->parentNode === null) break;
|
||||
$p->parentNode->removeChild($p);
|
||||
}
|
||||
|
||||
if (($firstP = $doc->getElementsByTagName("p")->item(0)) !== null) {
|
||||
if (($firstPhtml = $doc->saveHTML($firstP)) !== false) {
|
||||
if (strpos($firstPhtml, 'geohack') !== false) {
|
||||
if ($firstP->parentNode !== null) $firstP->parentNode->removeChild($firstP);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$output = [];
|
||||
foreach ($doc->getElementsByTagName("p") as $p) {
|
||||
$output[] = trim($p->textContent);
|
||||
}
|
||||
|
||||
/*
|
||||
if (strpos($doc->saveHTML(), 'Coordinates:') !== false) {
|
||||
echo $doc->saveHTML();
|
||||
exit;
|
||||
}
|
||||
*/
|
||||
return str_replace(PHP_EOL, PHP_EOL . PHP_EOL, trim(implode(PHP_EOL, $output)));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans brackets ([1], [2]) off description text.
|
||||
*
|
||||
* @param string $input Input string.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private static function _cleanSourceBracketsOffTranslation(string $input):string {
|
||||
|
||||
$bracketsToRemove = [];
|
||||
for ($i = 0; $i < 100; $i++) {
|
||||
$bracketsToRemove["[$i]"] = "";
|
||||
}
|
||||
return strtr($input, $bracketsToRemove);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans contents parsed from Wikipedia.
|
||||
*
|
||||
* @param string $input Input string.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private static function _cleanWikidataInput(string $input):string {
|
||||
|
||||
$input = trim($input, '"');
|
||||
foreach (self::WIKIPEDIA_REMOVE_LITERALS as $tToRemove) $input = str_replace($tToRemove, "", $input);
|
||||
|
||||
if (substr($input, 0, strlen('<')) === '<') {
|
||||
|
||||
$input = self::_cleanWikidataInputHtml($input);
|
||||
|
||||
if (mb_strlen($input) > 600) {
|
||||
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
|
||||
$input = substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600));
|
||||
}
|
||||
}
|
||||
|
||||
$input = self::_cleanSourceBracketsOffTranslation($input);
|
||||
|
||||
$input = str_replace("\t", " ", $input);
|
||||
|
||||
// Remove newlines with ensuing spaces
|
||||
while (strpos($input, PHP_EOL . " ") !== false) {
|
||||
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
|
||||
}
|
||||
|
||||
// Remove double newlines
|
||||
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
|
||||
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
|
||||
}
|
||||
return MD_STD_IN::sanitize_text($input);
|
||||
|
||||
}
|
||||
|
||||
$input = str_replace(PHP_EOL, '', $input);
|
||||
|
||||
if (empty($input)) return "";
|
||||
|
||||
// Remove infobox tables specifically
|
||||
$firstParagraphPosition = strpos($input, '<p', 1);
|
||||
$currentSearchPos = strpos($input, "<table>");
|
||||
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
|
||||
if (($tableEndPos = strpos($input, "</table>")) !== false) {
|
||||
if (($pStartPos = strpos($input, '<p', $tableEndPos + 6)) !== false) {
|
||||
$input = substr($input, $pStartPos);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove leftover unnecessary paragraphs before actual content
|
||||
|
||||
$removeFirstParagraph = false;
|
||||
$firstParagraphPosition = strpos($input, '<p', 1);
|
||||
|
||||
foreach (["</table>", "<img"] as $tagPart) {
|
||||
$currentSearchPos = strpos($input, $tagPart);
|
||||
if ($currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition) {
|
||||
$removeFirstParagraph = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($removeFirstParagraph === true) {
|
||||
$input = substr($input, $firstParagraphPosition ?: 0);
|
||||
}
|
||||
|
||||
$input = str_replace('</p>', '</p>' . PHP_EOL . PHP_EOL . PHP_EOL, $input);
|
||||
# $input = str_replace('?/i', '', $input);
|
||||
$input = strip_tags($input);
|
||||
|
||||
# for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input);
|
||||
$i = 0;
|
||||
while (strpos($input, ".mw-parser-output") !== false and strpos($input, "}", strpos($input, ".mw-parser-output")) !== false) {
|
||||
$part1 = substr($input, 0, strpos($input, ".mw-parser-output"));
|
||||
$part2 = substr($input, strpos($input, "}", strpos($input, ".mw-parser-output")) + 1);
|
||||
$input = $part1 . $part2;
|
||||
++$i;
|
||||
if ($i === 30) break;
|
||||
}
|
||||
|
||||
$input = self::_cleanSourceBracketsOffTranslation($input);
|
||||
|
||||
$input = str_replace("\t", " ", $input);
|
||||
|
||||
// Remove double whitespaces
|
||||
while (strpos($input, " ") !== false) {
|
||||
$input = str_replace(" ", " ", $input);
|
||||
}
|
||||
|
||||
// Remove newlines with ensuing spaces
|
||||
while (strpos($input, PHP_EOL . " ") !== false) {
|
||||
$input = str_replace(PHP_EOL . " ", PHP_EOL, $input);
|
||||
}
|
||||
|
||||
// Remove double newlines
|
||||
while (strpos($input, PHP_EOL . PHP_EOL . PHP_EOL) !== false) {
|
||||
$input = str_replace(PHP_EOL . PHP_EOL . PHP_EOL, PHP_EOL . PHP_EOL, $input);
|
||||
}
|
||||
|
||||
$stableToRemove = [
|
||||
"Vous pouvez partager vos connaissances en l’améliorant (comment ?) selon les recommandations des projets correspondants.",
|
||||
];
|
||||
foreach ($stableToRemove as $tToRemove) $input = str_replace($tToRemove, "", $input);
|
||||
|
||||
$endings = [
|
||||
"StubDenne artikel om et vandløb ",
|
||||
];
|
||||
foreach ($endings as $ending) {
|
||||
if (strpos($input, $ending) !== false) $input = substr($input, 0, strpos($input, $ending));
|
||||
}
|
||||
|
||||
$input = trim($input);
|
||||
|
||||
// Cut off overly long articles
|
||||
if (mb_strlen($input) > 600) {
|
||||
if (strpos($input, PHP_EOL . PHP_EOL, 600) !== false) {
|
||||
$input = trim(substr($input, 0, strpos($input, PHP_EOL . PHP_EOL, 600)));
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($input)) return '';
|
||||
|
||||
$input = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $input));
|
||||
|
||||
$input = html_entity_decode($input);
|
||||
|
||||
return MD_STD_IN::sanitize_text($input);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around _cleanWikidataInput for testing.
|
||||
*
|
||||
* @param string $input Input string.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function cleanWikidataInput(string $input):string {
|
||||
|
||||
if (PHP_SAPI !== 'cli') throw new Exception("Use this function only for testing");
|
||||
return self::_cleanWikidataInput($input);
|
||||
return strtr(
|
||||
trim(MD_STD_IN::sanitize_text($input)),
|
||||
[
|
||||
PHP_EOL => PHP_EOL . PHP_EOL,
|
||||
PHP_EOL . PHP_EOL . PHP_EOL => PHP_EOL . PHP_EOL,
|
||||
]
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
@ -740,7 +525,7 @@ final class NodaWikidataFetcher {
|
||||
$languagesToFetch = $wikilinks = [];
|
||||
foreach ($checkagainstLanguage as $lang) {
|
||||
|
||||
if (empty($data['labels'][$lang])) {
|
||||
if (empty($data['labels']) || empty($data['labels'][$lang])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -795,23 +580,20 @@ final class NodaWikidataFetcher {
|
||||
$wikilink = $wikilinks[$lang];
|
||||
if (!empty($contents[$lang])) {
|
||||
|
||||
$descFromWiki = json_decode($contents[$lang], true)['parse']['text']['*'];
|
||||
|
||||
# Process data retrieved from wikipedia
|
||||
|
||||
if ($descFromWiki !== null) $tDescription = (string)$descFromWiki;
|
||||
else $tDescription = "";
|
||||
$titleFromWikipedia = $data['sitelinks'][$lang . 'wiki']['title'];
|
||||
$tDescription = self::_getCleanedWikipediaSnippet($lang, $titleFromWikipedia);
|
||||
|
||||
}
|
||||
else {
|
||||
$tDescription = "";
|
||||
}
|
||||
|
||||
if ($tDescription !== '' && !empty($desc_cleaned = self::_cleanWikidataInput($tDescription))) {
|
||||
if (!empty($titleFromWikipedia) && !empty($tDescription)) {
|
||||
|
||||
# $descs[$lang] = $tDescription;
|
||||
$output[$lang] = [
|
||||
'label' => self::_cleanWikidataInput((string)$data['labels'][$lang]['value']),
|
||||
'description' => '"' . $desc_cleaned . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')',
|
||||
'label' => $titleFromWikipedia,
|
||||
'description' => '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')',
|
||||
'link' => $wikilink,
|
||||
];
|
||||
}
|
||||
@ -819,8 +601,8 @@ final class NodaWikidataFetcher {
|
||||
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
|
||||
|
||||
$output[$lang] = [
|
||||
'label' => self::_cleanWikidataInput($data['labels'][$lang]['value']),
|
||||
'description' => self::_cleanWikidataInput($data['descriptions'][$lang]['value']),
|
||||
'label' => self::_cleanInputSimple($data['labels'][$lang]['value']),
|
||||
'description' => self::_cleanInputSimple($data['descriptions'][$lang]['value']),
|
||||
'link' => "",
|
||||
];
|
||||
|
||||
@ -831,8 +613,8 @@ final class NodaWikidataFetcher {
|
||||
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
|
||||
|
||||
$output[$lang] = [
|
||||
'label' => self::_cleanWikidataInput($data['labels'][$lang]['value']),
|
||||
'description' => self::_cleanWikidataInput($data['descriptions'][$lang]['value']),
|
||||
'label' => self::_cleanInputSimple($data['labels'][$lang]['value']),
|
||||
'description' => self::_cleanInputSimple($data['descriptions'][$lang]['value']),
|
||||
'link' => "",
|
||||
];
|
||||
|
||||
@ -1026,6 +808,7 @@ final class NodaWikidataFetcher {
|
||||
$wikidata_gender = "female";
|
||||
break;
|
||||
case "Q48270":
|
||||
case "Q207959": // Androgyny
|
||||
$wikidata_gender = "other";
|
||||
break;
|
||||
default:
|
||||
@ -1047,6 +830,51 @@ final class NodaWikidataFetcher {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Function for retrieving information.
|
||||
*
|
||||
* @param string $lang The user's selected used language.
|
||||
* @param array<mixed> $data Data fetched from wikidata.
|
||||
* @param array<string, array{url: string, title: string}> $wikilinks Links to wikipedia APIs.
|
||||
*
|
||||
* @return array{}|array{lang: string, desc: string, source: 'wikidata'|'wikipedia'}
|
||||
*/
|
||||
private static function _getDescriptionFromWikidataAndWikipediaLinks(string $lang, array $data, array $wikilinks):array {
|
||||
|
||||
// Try the current user language for retrieving wikipedia texts
|
||||
if (isset($wikilinks[$lang])) {
|
||||
# Process data retrieved from wikipedia
|
||||
if (!empty($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$lang]['title']))) {
|
||||
return ['lang' => $lang, 'desc' => $datafromwiki, 'source' => 'wikipedia'];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Try the alternative languages for retrieving wikidata tests
|
||||
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
|
||||
|
||||
if ($lang === $cur_lang || !isset($wikilinks[$cur_lang])) continue;
|
||||
|
||||
if ($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$cur_lang]['title'])) {
|
||||
return ['lang' => $cur_lang, 'desc' => $datafromwiki, 'source' => 'wikipedia'];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// If the description still has not been entered, try retrieving it from wikidata.
|
||||
if (!empty($data['descriptions'][$lang])) {
|
||||
return ['lang' => $lang, 'desc' => $data['descriptions'][$lang]['value'], 'source' => 'wikidata'];
|
||||
}
|
||||
else if (!empty($data['descriptions'])) {
|
||||
$tLang = (string)array_keys($data['descriptions'])[0];
|
||||
$desc = $data['descriptions'][$tLang];
|
||||
return ['lang' => $tLang, 'desc' => (string)$desc['value'], 'source' => 'wikidata'];
|
||||
}
|
||||
|
||||
return [];
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Function for retrieving information.
|
||||
*
|
||||
@ -1059,30 +887,13 @@ final class NodaWikidataFetcher {
|
||||
*/
|
||||
public function retrievePersinstInfoFromWikidataID(string $lang, string $wikidata_id, int $persinst_id, string $erfasst_von) {
|
||||
|
||||
self::validateWikidataId($wikidata_id);
|
||||
$data = self::_getWikidataEntity($wikidata_id);
|
||||
$data = self::getWikidataEntity($wikidata_id);
|
||||
|
||||
// Get links to wikipedia
|
||||
|
||||
$wikilinks = self::_getWikipediaLinksFromWikidataOutput($data);
|
||||
$alreadyEntered = false;
|
||||
|
||||
if (isset($wikilinks[$lang])) {
|
||||
# Process data retrieved from wikipedia
|
||||
if (!empty($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$lang]['title']))) {
|
||||
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $lang, $lang, $erfasst_von);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
|
||||
|
||||
if ($alreadyEntered === true || !isset($wikilinks[$cur_lang])) continue;
|
||||
|
||||
if ($datafromwiki = self::_getCleanedWikipediaSnippet($lang, $wikilinks[$cur_lang]['title'])) {
|
||||
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $datafromwiki, $lang, "$cur_lang", $erfasst_von);
|
||||
}
|
||||
|
||||
if (!empty($desc = self::_getDescriptionFromWikidataAndWikipediaLinks($lang, $data, $wikilinks))) {
|
||||
$alreadyEntered = $this->retrievePersinstDescFromWikipedia($persinst_id, $wikidata_id, $desc['desc'], $lang, $desc['lang'], $erfasst_von);
|
||||
}
|
||||
|
||||
$this->enterPersinstBirthDeathDatesFromWikidata($data, $persinst_id);
|
||||
@ -1110,8 +921,7 @@ final class NodaWikidataFetcher {
|
||||
*/
|
||||
public function retrievePersinstNormDataLinksFromWikidataID(string $wikidata_id, int $persinst_id, string $erfasst_von) {
|
||||
|
||||
self::validateWikidataId($wikidata_id);
|
||||
$data = self::_getWikidataEntity($wikidata_id);
|
||||
$data = self::getWikidataEntity($wikidata_id);
|
||||
if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('persinst', $wikidata_id, $data))) {
|
||||
NodaBatchInserter::linkNodaForPersinst($this->_mysqli_noda, $persinst_id, $nodaLinks, $erfasst_von);
|
||||
}
|
||||
@ -1129,8 +939,7 @@ final class NodaWikidataFetcher {
|
||||
*/
|
||||
public function retrievePlaceNormDataLinksFromWikidataID(string $wikidata_id, int $onum, string $erfasst_von) {
|
||||
|
||||
self::validateWikidataId($wikidata_id);
|
||||
$data = self::_getWikidataEntity($wikidata_id);
|
||||
$data = self::getWikidataEntity($wikidata_id);
|
||||
if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('place', $wikidata_id, $data))) {
|
||||
NodaBatchInserter::linkNodaForPlace($this->_mysqli_noda, $onum, $nodaLinks, $erfasst_von);
|
||||
}
|
||||
@ -1279,7 +1088,6 @@ final class NodaWikidataFetcher {
|
||||
$updateStmt->execute();
|
||||
}
|
||||
catch (MDMysqliInvalidEncodingError $e) {
|
||||
$_SESSION["editHistory"] = ["changesStored", "Error adding base description"];
|
||||
}
|
||||
$updateStmt->close();
|
||||
unset($updateStmt);
|
||||
@ -1355,8 +1163,7 @@ final class NodaWikidataFetcher {
|
||||
*/
|
||||
public function retrievePlaceInfoFromWikidataID(string $lang, string $wikidata_id, int $onum, string $erfasst_von) {
|
||||
|
||||
self::validateWikidataId($wikidata_id);
|
||||
$data = self::_getWikidataEntity($wikidata_id);
|
||||
$data = self::getWikidataEntity($wikidata_id);
|
||||
|
||||
$wikilinks = self::_getWikipediaLinksFromWikidataOutput($data);
|
||||
|
||||
@ -1368,30 +1175,8 @@ final class NodaWikidataFetcher {
|
||||
}
|
||||
|
||||
$cur_place_desc = $this->getPlaceDescription($onum);
|
||||
$alreadyEntered = false;
|
||||
|
||||
if (!empty($wikilinks[$lang])) {
|
||||
|
||||
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinks[$lang]['title']), 10000);
|
||||
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
|
||||
|
||||
if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
|
||||
$alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $lang, $lang, $onum, $erfasst_von);
|
||||
}
|
||||
}
|
||||
|
||||
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
|
||||
|
||||
//if ($alreadyEntered === true) break;
|
||||
if ($alreadyEntered === true) break;
|
||||
if (!isset($wikilinks[$cur_lang]['url'])) continue;
|
||||
|
||||
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinks[$cur_lang]['title']), 10000);
|
||||
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
|
||||
if (!empty($datafromwiki) and !empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
|
||||
$alreadyEntered = $this->enterPlaceDescFromWikidata($cur_place_desc, $datafromwiki, $lang, $cur_lang, $onum, $erfasst_von);
|
||||
}
|
||||
|
||||
if (!empty($desc = self::_getDescriptionFromWikidataAndWikipediaLinks($lang, $data, $wikilinks))) {
|
||||
$this->enterPlaceDescFromWikidata($cur_place_desc, $desc['desc'], $lang, $desc['lang'], $onum, $erfasst_von);
|
||||
}
|
||||
|
||||
if (isset($data['claims']['P1566'])) $geonames_id = filter_var($data['claims']['P1566'][0]['mainsnak']['datavalue']['value'], FILTER_VALIDATE_INT);
|
||||
@ -1589,37 +1374,12 @@ final class NodaWikidataFetcher {
|
||||
*/
|
||||
public function retrieveTagInfoFromWikidataID(string $lang, string $wikidata_id, int $tag_id, string $erfasst_von) {
|
||||
|
||||
self::validateWikidataId($wikidata_id);
|
||||
$data = self::_getWikidataEntity($wikidata_id);
|
||||
$data = self::getWikidataEntity($wikidata_id);
|
||||
|
||||
$wikilinks = self::_getWikipediaLinksFromWikidataOutput($data);
|
||||
|
||||
$alreadyEntered = false;
|
||||
|
||||
if (isset($wikilinks[$lang])) {
|
||||
|
||||
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($lang, $wikilinks[$lang]['title']), 10000);
|
||||
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
|
||||
|
||||
# Process data retrieved from wikipedia
|
||||
if (!empty($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki))) {
|
||||
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $lang, $lang, $erfasst_von);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
foreach (self::LANGUAGES_MAIN_DESC as $cur_lang) {
|
||||
|
||||
if ($alreadyEntered === true || !isset($wikilinks[$cur_lang])) continue;
|
||||
|
||||
$datafromwiki = MD_STD::runCurl(self::_getWikipediaApiLink($cur_lang, $wikilinks[$cur_lang]['title']), 10000);
|
||||
$datafromwiki = json_decode($datafromwiki, true)['parse']['text']['*'];
|
||||
|
||||
# Process data retrieved from wikipedia
|
||||
if ($datafromwiki = self::_cleanWikidataInput((string)$datafromwiki)) {
|
||||
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $datafromwiki, $lang, $cur_lang, $erfasst_von);
|
||||
}
|
||||
|
||||
if (!empty($desc = self::_getDescriptionFromWikidataAndWikipediaLinks($lang, $data, $wikilinks))) {
|
||||
$alreadyEntered = $this->retrieveTagDescFromWikipedia($tag_id, $desc['desc'], $lang, $desc['lang'], $erfasst_von);
|
||||
}
|
||||
|
||||
if (!empty($nodaLinks = $this->_getNodaLinksFromWikidataResult('tag', $wikidata_id, $data))) {
|
||||
|
@ -0,0 +1,27 @@
|
||||
<?PHP
|
||||
/**
|
||||
* This file contains an exception class to be thrown if a user attempts to load
|
||||
* data from a Wikidata item specifically established for a disambiguation page.
|
||||
*
|
||||
* @file
|
||||
*
|
||||
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||||
*/
|
||||
declare(strict_types = 1);
|
||||
|
||||
/**
|
||||
* Exception class to be thrown if a user attempts to load
|
||||
* data from a Wikidata item specifically established for a disambiguation page.
|
||||
*/
|
||||
final class NodaWikidataFetcherDisambiguationIsDisallowedException extends MDgenericInvalidInputsException {
|
||||
/**
|
||||
* Error message.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function errorMessage() {
|
||||
//error message
|
||||
return 'Attempted to load a disambiguation page. Please select the specific item you want to fetch to enrich the given entry: ' . $this->getMessage();
|
||||
|
||||
}
|
||||
}
|
@ -11,7 +11,7 @@ declare(strict_types = 1);
|
||||
*/
|
||||
final class NodaPersinstFulltextSyncManticore {
|
||||
|
||||
const FULL_SYNC_COMMIT_AFTER = 30000;
|
||||
private const FULL_SYNC_COMMIT_AFTER = 30000;
|
||||
|
||||
/**
|
||||
* Returns all names and descriptions in the different languages of a actor.
|
||||
@ -188,6 +188,10 @@ final class NodaPersinstFulltextSyncManticore {
|
||||
|
||||
$mysqli_manticore->commit();
|
||||
|
||||
if (PHP_SAPI === 'cli' && $mysqli_noda->ping() === false) {
|
||||
$mysqli_noda->reconnect();
|
||||
}
|
||||
|
||||
// Sync translations
|
||||
|
||||
$result = $mysqli_noda->do_read_query("SELECT `persinst`.`persinst_id`, `trans_language`,
|
||||
|
@ -11,7 +11,7 @@ declare(strict_types = 1);
|
||||
*/
|
||||
final class NodaTagFulltextSyncManticore {
|
||||
|
||||
const FULL_SYNC_COMMIT_AFTER = 30000;
|
||||
private const FULL_SYNC_COMMIT_AFTER = 30000;
|
||||
|
||||
/**
|
||||
* Returns all names and descriptions in the different languages of a tag.
|
||||
@ -139,6 +139,10 @@ final class NodaTagFulltextSyncManticore {
|
||||
/**
|
||||
* Synchronizes base entries.
|
||||
*
|
||||
* @param MDMysqli $mysqli_noda Connection to MySQL DB.
|
||||
* @param MDMysqli $mysqli_manticore Connection to Manticore DB.
|
||||
* @param string $databasename Name of the main noda database.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public static function runFullSyncForBaseEntries(MDMysqli $mysqli_noda, MDMysqli $mysqli_manticore, string $databasename):void {
|
||||
@ -189,6 +193,10 @@ final class NodaTagFulltextSyncManticore {
|
||||
/**
|
||||
* Synchronizes translated entries.
|
||||
*
|
||||
* @param MDMysqli $mysqli_noda Connection to MySQL DB.
|
||||
* @param MDMysqli $mysqli_manticore Connection to Manticore DB.
|
||||
* @param string $databasename Name of the main noda database.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public static function runFullSyncForTranslatedEntries(MDMysqli $mysqli_noda, MDMysqli $mysqli_manticore, string $databasename):void {
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
enum NodaTimeAutotranslaterLocales {
|
||||
case ar;
|
||||
case crh;
|
||||
case de;
|
||||
case en;
|
||||
case es;
|
||||
@ -40,6 +41,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
|
||||
return match($lang) {
|
||||
'ar' => static::ar,
|
||||
'crh' => static::crh,
|
||||
'de' => static::de,
|
||||
'en' => static::en,
|
||||
'es' => static::es,
|
||||
@ -73,6 +75,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
|
||||
return match($this) {
|
||||
self::ar => 'ar_SY.utf8',
|
||||
self::crh => 'uk_UA.utf8',
|
||||
self::de => 'de_DE.utf8',
|
||||
self::en => 'en_US.utf8',
|
||||
self::es => 'es_ES.utf8',
|
||||
@ -108,6 +111,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
|
||||
return match($this) {
|
||||
self::ar => 'ar-SY',
|
||||
self::crh => 'uk-UA',
|
||||
self::de => 'de-DE',
|
||||
self::en => 'en-US',
|
||||
self::es => 'es-ES',
|
||||
@ -143,6 +147,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
|
||||
return match($this) {
|
||||
self::ar => '%s',
|
||||
self::crh => '%s',
|
||||
self::de => '%s n. Chr.',
|
||||
self::en => '%s CE',
|
||||
self::es => '%s d.C.',
|
||||
@ -176,6 +181,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
|
||||
return match($this) {
|
||||
self::ar => '-%s',
|
||||
self::crh => '%s рік до нашої ери',
|
||||
self::de => '%s v. Chr.',
|
||||
self::en => '%s BC',
|
||||
self::es => '%s a.C.',
|
||||
@ -211,6 +217,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
|
||||
return match($this) {
|
||||
self::ar => '%s',
|
||||
self::crh => '%s',
|
||||
self::de => '%s',
|
||||
self::en => '%s',
|
||||
self::es => '%s',
|
||||
@ -244,6 +251,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
|
||||
return match($this) {
|
||||
self::ar => '%s-%s',
|
||||
self::crh => '%s-%s',
|
||||
self::de => '%s-%s',
|
||||
self::en => '%s-%s',
|
||||
self::es => '%s-%s',
|
||||
@ -279,6 +287,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
|
||||
return match($this) {
|
||||
self::ar => '%s-',
|
||||
self::crh => 'з %s року',
|
||||
self::de => 'Seit %s',
|
||||
self::en => 'Since %s',
|
||||
self::es => 'Desde %s',
|
||||
@ -315,6 +324,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
|
||||
return match($this) {
|
||||
self::ar => '%s-',
|
||||
self::crh => 'після %s року',
|
||||
self::de => 'Nach %s',
|
||||
self::en => 'After %s',
|
||||
self::es => 'Despues de %s',
|
||||
@ -350,6 +360,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
|
||||
return match($this) {
|
||||
self::ar => '-%s',
|
||||
self::crh => 'до %s року',
|
||||
self::de => 'Bis %s',
|
||||
self::en => 'Until %s',
|
||||
self::es => 'Hasta %s',
|
||||
@ -384,6 +395,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
|
||||
return match($this) {
|
||||
self::ar => 'القرن ال %s',
|
||||
self::crh => '%s століття',
|
||||
self::de => '%s. Jahrhundert',
|
||||
self::en => '%s. century',
|
||||
self::es => 'Siglo %s',
|
||||
@ -418,6 +430,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
|
||||
return match($this) {
|
||||
self::ar => 'القرن ال %s-%s',
|
||||
self::crh => '%s-%s століття',
|
||||
self::de => '%s.-%s. Jahrhundert',
|
||||
self::en => '%s.-%s. century',
|
||||
self::es => 'Siglo %s-%s',
|
||||
@ -452,6 +465,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
|
||||
return match($this) {
|
||||
self::ar => '%s-%s',
|
||||
self::crh => '%s-ті роки',
|
||||
self::de => '%ser Jahre',
|
||||
self::en => '%ss',
|
||||
self::es => '%s-%s',
|
||||
@ -486,6 +500,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
|
||||
return match($this) {
|
||||
self::ar => '%s-%s',
|
||||
self::crh => '%s-%s-ті роки',
|
||||
self::de => '%s-%ser Jahre',
|
||||
self::en => '%s-%ss',
|
||||
self::es => '%s-%s',
|
||||
@ -521,6 +536,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
|
||||
return match($this) {
|
||||
self::ar => '-%s',
|
||||
self::crh => 'до %s року',
|
||||
self::de => 'Vor %s',
|
||||
self::en => 'Before %s',
|
||||
self::es => 'Antes de %s',
|
||||
@ -558,6 +574,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
# self::be => '%d.%B.%Y',
|
||||
# self::bg => '%Y-%B-%d',
|
||||
# self::ca => '%d/%m/%Y',
|
||||
self::crh => '%d.%m.%Y',
|
||||
# self::cs => '%d.%B.%Y',
|
||||
# self::da => '%d-%m-%Y',
|
||||
self::de => '%d.%m.%Y',
|
||||
@ -618,6 +635,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
# self::be => '%d.%B.%Y',
|
||||
# self::bg => '%Y-%B-%d',
|
||||
# self::ca => '%d/%m/%Y',
|
||||
self::crh => 'dd.MM.Y',
|
||||
# self::cs => '%d.%B.%Y',
|
||||
# self::da => '%d-%m-%Y',
|
||||
self::de => 'dd.MM.Y',
|
||||
@ -679,6 +697,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
# self::bg => '%Y-%B',
|
||||
# self::ca => '%m/%Y',
|
||||
# self::cs => '%B.%Y',
|
||||
self::crh => '%m %Y',
|
||||
# self::da => '%m-%Y',
|
||||
self::de => '%B %Y',
|
||||
# self::el => '%B %Y',
|
||||
@ -735,6 +754,7 @@ enum NodaTimeAutotranslaterLocales {
|
||||
# self::bg => 'Y-MMMM',
|
||||
# self::ca => 'MM/Y',
|
||||
# self::cs => 'MMMM.Y',
|
||||
self::crh => 'MMMM Y',
|
||||
# self::da => 'MM-Y',
|
||||
self::de => 'MMMM Y',
|
||||
# self::el => 'MMMM Y',
|
||||
|
@ -141,6 +141,35 @@ final class NodaIDGetterTest extends TestCase {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a test actor name and life dates.
|
||||
*
|
||||
* @return array<array{0: string, 1: integer, 2: string, 3: string}>
|
||||
*/
|
||||
public static function persinstByNameAndLifeDatesProvider():array {
|
||||
|
||||
$mysqli = md_noda_mysqli_connect();
|
||||
$result = $mysqli->do_read_query("SELECT `persinst_name_en`, `persinst_name`, `persinst_id`, `persinst_geburtsjahr`, `persinst_sterbejahr`
|
||||
FROM `persinst`
|
||||
WHERE INSTR(`persinst_name_en`, 'i')
|
||||
AND `persinst_geburtsjahr` != ''
|
||||
AND `persinst_sterbejahr` != ''
|
||||
LIMIT 1");
|
||||
if (!$cur = $result->fetch_row()) {
|
||||
throw new Exception("Error");
|
||||
}
|
||||
$result->close();
|
||||
$mysqli->close();
|
||||
|
||||
return [
|
||||
'Persinst ID by name: ' . implode(' - ', $cur) => [
|
||||
(string)$cur[0], (int)$cur[2], (string)$cur[3], (string)$cur[4],
|
||||
(string)$cur[1], (int)$cur[2], (string)$cur[3], (string)$cur[4],
|
||||
]
|
||||
];
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Test getting persinst by name works.
|
||||
*
|
||||
@ -156,6 +185,25 @@ final class NodaIDGetterTest extends TestCase {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Test getting persinst by name and life dates works.
|
||||
*
|
||||
* @param string $name Name of the entry.
|
||||
* @param integer $expected_id Expected target ID.
|
||||
* @param string $birth_year Birth year.
|
||||
* @param string $death_year Death year.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
#[DataProvider('persinstByNameAndLifeDatesProvider')]
|
||||
public function testGetPersinstIdByNameAndLifeDatesWorks(string $name, int $expected_id, string $birth_year, string $death_year):void {
|
||||
|
||||
self::assertEquals($expected_id,
|
||||
NodaIDGetter::getPersinstIDByNamePlusYears($this->_mysqli, "de", $name, $birth_year, $death_year),
|
||||
"Entry " . $name . " is not matched in exact lookup. Expected ID: " . $expected_id);
|
||||
|
||||
}
|
||||
|
||||
// PersinstIDByRewrite
|
||||
|
||||
/**
|
||||
@ -411,7 +459,8 @@ final class NodaIDGetterTest extends TestCase {
|
||||
$mysqli = md_noda_mysqli_connect();
|
||||
$timeByRewriteSimple = self::_getNameAndIdFromDbQuery($mysqli, "SELECT `input_name`, `zeit_id`
|
||||
FROM `zeit_rewriting`
|
||||
WHERE INSTR(`input_name`, 'i')");
|
||||
WHERE INSTR(`input_name`, 'i')
|
||||
AND `language` = 'de'");
|
||||
$mysqli->close();
|
||||
|
||||
return [
|
||||
|
@ -6,12 +6,14 @@
|
||||
*/
|
||||
declare(strict_types = 1);
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use PHPUnit\Framework\Attributes\CoversClass;
|
||||
use PHPUnit\Framework\Attributes\Small;
|
||||
|
||||
/**
|
||||
* This script contains tests for the actor name splitter.
|
||||
*
|
||||
* @covers \NodaNameSplitter
|
||||
*/
|
||||
#[Small]
|
||||
#[CoversClass(\NodaIDGetter::class)]
|
||||
final class NodaNameSplitterTest extends TestCase {
|
||||
/**
|
||||
* Test to check whether the HTML page is correctly generated.
|
||||
|
@ -6,12 +6,14 @@
|
||||
*/
|
||||
declare(strict_types = 1);
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use PHPUnit\Framework\Attributes\CoversClass;
|
||||
use PHPUnit\Framework\Attributes\Medium;
|
||||
|
||||
/**
|
||||
* This script contains tests for the automatic translation class for time names.
|
||||
*
|
||||
* @covers \NodaTimeAutotranslater
|
||||
*/
|
||||
#[Medium]
|
||||
#[CoversClass(\NodaIDGetter::class)]
|
||||
final class NodaTimeAutotranslaterTest extends TestCase {
|
||||
/**
|
||||
* Test to check whether the HTML page is correctly generated.
|
||||
@ -32,7 +34,7 @@ final class NodaTimeAutotranslaterTest extends TestCase {
|
||||
"zeit_zaehlzeit_tag" => "01",
|
||||
];
|
||||
$output = NodaTimeAutotranslater::getTranslations($timeInfo);
|
||||
self::assertEquals($output["de"], "01.05.1920");
|
||||
self::assertEquals("01.05.1920", $output["de"]);
|
||||
|
||||
}
|
||||
|
||||
@ -671,4 +673,23 @@ final class NodaTimeAutotranslaterTest extends TestCase {
|
||||
self::assertEquals($output["de"], "Vor 01.12.1919");
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Test to check whether validating works.
|
||||
*
|
||||
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||||
* @group ValidOutput
|
||||
* @small
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testValidation():void {
|
||||
|
||||
$output = [
|
||||
'de' => '1.12.1920',
|
||||
];
|
||||
|
||||
self::assertFalse(NodaTimeAutotranslater::validateTranslations("1919", "1919", $output));
|
||||
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -6,19 +6,28 @@
|
||||
*/
|
||||
declare(strict_types = 1);
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use PHPUnit\Framework\Attributes\CoversClass;
|
||||
use PHPUnit\Framework\Attributes\Medium;
|
||||
use PHPUnit\Framework\Attributes\DataProvider;
|
||||
|
||||
require_once __DIR__ . '/../../MDMysqli/test_connections.conf.php';
|
||||
require_once __DIR__ . '/../src/NodaWikidataFetcherDisambiguationIsDisallowedException.php';
|
||||
|
||||
/**
|
||||
* This script contains tests for the Wikidata fetcher.
|
||||
*
|
||||
* @covers \NodaWikidataFetcher
|
||||
*/
|
||||
#[medium]
|
||||
#[CoversClass(\NodaWikidataFetcher::class)]
|
||||
final class NodaWikidataFetcherTest extends TestCase {
|
||||
|
||||
// Test for getting translations: Telugu
|
||||
public const TEST_LANG = 'te';
|
||||
|
||||
/**
|
||||
* Test to check whether the HTML page is correctly generated.
|
||||
*
|
||||
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||||
* @group ValidOutput
|
||||
* @small
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
@ -29,12 +38,39 @@ final class NodaWikidataFetcherTest extends TestCase {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Data provider providing a Wikidata ID for a dedicated wikidata item for disambiguation pages.
|
||||
*
|
||||
* @return array<string, array{0: string}>
|
||||
*/
|
||||
public static function disambiguationPageProvider():array {
|
||||
|
||||
return [
|
||||
'Disambiguation page for "Mochi" - Q6916210' => ['Q6916210'],
|
||||
];
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Throw error when attempting to load a dedicated wikidata entry for a disambiguation page.
|
||||
*
|
||||
* @param string $wikidata_id Wikidata ID.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
#[DataProvider('disambiguationPageProvider')]
|
||||
public function testWikidataIdFromLinkFailsForDisambiguationPages(string $wikidata_id):void {
|
||||
|
||||
self::expectException(NodaWikidataFetcherDisambiguationIsDisallowedException::class);
|
||||
NodaWikidataFetcher::getWikidataEntity($wikidata_id);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Test to check whether the HTML page is correctly generated.
|
||||
*
|
||||
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||||
* @group ValidOutput
|
||||
* @small
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
@ -47,9 +83,7 @@ final class NodaWikidataFetcherTest extends TestCase {
|
||||
/**
|
||||
* Test to check whether the HTML page is correctly generated.
|
||||
*
|
||||
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||||
* @group ValidOutput
|
||||
* @small
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
@ -60,225 +94,208 @@ final class NodaWikidataFetcherTest extends TestCase {
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for cleaning wikidata info.
|
||||
* Data provider for an actor that has a wikidata link and a Telugu translation.
|
||||
*
|
||||
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||||
* @group ValidOutput
|
||||
* @small
|
||||
*
|
||||
* @return void
|
||||
* @return array<string, array{0: int, 1: string}>
|
||||
*/
|
||||
public function testCleanWikidataInput():void {
|
||||
public static function actorWithTlAndWikidataLinkProvider():array {
|
||||
|
||||
$testStr = '"<div class="mw-parser-output"><table class="infobox float-right toccolours toptextcells" style="margin: 0 0 1em 1em; width: 300px;" id="Vorlage_Infobox_Ort_in_der_Ukraine" summary="Infobox Ort in der Ukraine">
|
||||
$mysqli = md_main_mysqli_connect();
|
||||
|
||||
<tbody><tr>
|
||||
<td colspan="2" style="background-color:#AFD6FF; font-size:1.3em; font-weight:bold; text-align:center;">Werbowez (Kossiw)
|
||||
</td></tr>
|
||||
<tr>
|
||||
<td colspan="2" style="background-color:#FFC; font-size:1em; font-weight:bold; text-align:center;"><span lang="uk-Cyrl" class="Cyrl">Вербовець</span>
|
||||
</td></tr>
|
||||
$result = $mysqli->do_read_query("SELECT `persinst_id`, `noda_nrinsource`
|
||||
FROM `" . DATABASENAME_NODA . "`.`noda`
|
||||
WHERE `noda_source` = 'Wikidata'
|
||||
AND EXISTS (SELECT 1 FROM `" . DATABASENAME_NODA . "`.`persinst_translation`
|
||||
WHERE `persinst_translation`.`persinst_id` = `noda`.`persinst_id`
|
||||
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "')");
|
||||
|
||||
if (!$cur = $result->fetch_row()) {
|
||||
throw new Exception("Failed to identify an entry that has a wikidata entry and a translation for language " . self::TEST_LANG);
|
||||
}
|
||||
$result->close();
|
||||
$mysqli->close();
|
||||
|
||||
<tr style="height:120px; background-color:#FFF;">
|
||||
<td style="width: 130px; text-align:center;"><span typeof="mw:File"><a href="/wiki/Datei:Coats_of_arms_of_None.svg" class="mw-file-description" title="Wappen fehlt"><img alt="Wappen fehlt" src="//upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Coats_of_arms_of_None.svg/100px-Coats_of_arms_of_None.svg.png" decoding="async" width="100" height="120" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Coats_of_arms_of_None.svg/150px-Coats_of_arms_of_None.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Coats_of_arms_of_None.svg/200px-Coats_of_arms_of_None.svg.png 2x" data-file-width="125" data-file-height="150" /></a></span>
|
||||
</td>
|
||||
<td style="width: 170px; text-align:center;"><table class="centered" style="background-color: #f9f9f9; border: none; border-collapse: collapse; width: 1px;">
|
||||
<tbody><tr><td style="border: none; padding: 0; text-align: center;"><div style="position: relative; z-index: 0; padding: 0; display: inline-block; width: -webkit-max-content; width: -moz-max-content; width: max-content; border: none;"><figure class="mw-halign-center noviewer notpageimage" typeof="mw:File"><a href="/wiki/Datei:Ukraine_adm_location_map.svg" class="mw-file-description" title="Werbowez (Kossiw) (Ukraine)"><img alt="Werbowez (Kossiw) (Ukraine)" src="//upload.wikimedia.org/wikipedia/commons/thumb/7/78/Ukraine_adm_location_map.svg/180px-Ukraine_adm_location_map.svg.png" decoding="async" width="180" height="121" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/7/78/Ukraine_adm_location_map.svg/270px-Ukraine_adm_location_map.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/7/78/Ukraine_adm_location_map.svg/360px-Ukraine_adm_location_map.svg.png 2x" data-file-width="1546" data-file-height="1038" /></a><figcaption>Werbowez (Kossiw) (Ukraine)</figcaption></figure><div style="position:absolute; top:50.7%; left:18.9%; height:0; width:0;"><div style="position:relative;z-index:100;left:-4px;top:-4px;width:8px;height:8px;line-height:0px;"><span typeof="mw:File"><a href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&language=de&params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)&title=Werbowez+%28Kossiw%29" title="Werbowez (Kossiw) (48° 20′ 32″ N, 25° 8′ 0″O)"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/8px-ButtonRed.svg.png" decoding="async" width="8" height="8" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/12px-ButtonRed.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/16px-ButtonRed.svg.png 2x" data-file-width="480" data-file-height="480" /></a></span></div>
|
||||
<table style="font-size:90%; border:none; background-color:transparent; border-collapse:collapse; line-height:1em; position:absolute; width:6em; margin: 0 .2em; text-align:left; left:1px; bottom:1px;"><tbody><tr><td style="border:none; vertical-align:middle;"><span style="position:relative; z-index:9; background-color:none;">Werbowez (Kossiw) </span></td></tr></tbody></table></div></div></td></tr>
|
||||
</tbody></table>
|
||||
</td></tr>
|
||||
<tr style="background-color:#AFD6FF;">
|
||||
<th colspan="2">Basisdaten
|
||||
</th></tr>
|
||||
<tr>
|
||||
<td><a href="/wiki/Liste_der_Oblaste_der_Ukraine" title="Liste der Oblaste der Ukraine">Oblast</a>:</td>
|
||||
<td><a href="/wiki/Oblast_Iwano-Frankiwsk" title="Oblast Iwano-Frankiwsk">Oblast Iwano-Frankiwsk</a>
|
||||
</td></tr>
|
||||
<tr>
|
||||
<td><a href="/wiki/Liste_der_Rajone_der_Ukraine" title="Liste der Rajone der Ukraine">Rajon</a>:</td>
|
||||
<td><a href="/wiki/Rajon_Kossiw" title="Rajon Kossiw">Rajon Kossiw</a>
|
||||
</td></tr>
|
||||
<tr>
|
||||
<td><a href="/wiki/H%C3%B6he_%C3%BCber_dem_Meeresspiegel" title="Höhe über dem Meeresspiegel">Höhe</a>:</td>
|
||||
<td>369 m
|
||||
</td></tr>
|
||||
<tr>
|
||||
<td><a href="/wiki/Fl%C3%A4cheninhalt" title="Flächeninhalt">Fläche</a>:</td>
|
||||
<td>18,77 <a href="/wiki/Quadratmeter#Quadratkilometer" title="Quadratmeter">km²</a>
|
||||
</td></tr>
|
||||
<tr>
|
||||
<td><a href="/wiki/Einwohner" title="Einwohner">Einwohner</a>:</td>
|
||||
<td>3.395 <small><i>(2001)</i></small>
|
||||
</td></tr>
|
||||
<tr>
|
||||
<td><a href="/wiki/Bev%C3%B6lkerungsdichte" title="Bevölkerungsdichte">Bevölkerungsdichte</a>:
|
||||
</td>
|
||||
<td>181 Einwohner je km²
|
||||
</td></tr>
|
||||
<tr>
|
||||
<td><a href="/wiki/Postleitzahl" title="Postleitzahl">Postleitzahlen</a>:</td>
|
||||
<td>78605
|
||||
</td></tr>
|
||||
<tr>
|
||||
<td><a href="/wiki/Telefonvorwahl" title="Telefonvorwahl">Vorwahl</a>:</td>
|
||||
<td>+380 3478
|
||||
</td></tr>
|
||||
<tr>
|
||||
<td><a href="/wiki/Geographische_Koordinaten" title="Geographische Koordinaten">Geographische Lage</a>:</td>
|
||||
<td><span id="text_coordinates" class="coordinates plainlinks-print"><a class="external text" href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&language=de&params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)"><span title="Breitengrad">48° 21′ <abbr title="Nord">N</abbr></span>, <span title="Längengrad">25° 8′ <abbr title="Ost">O</abbr></span></a></span><span class="geo noexcerpt" style="display:none"><span class="body"></span><span class="latitude">48.342222222222</span><span class="longitude">25.133333333333</span><span class="elevation"></span></span><span id="coordinates" class="coordinates noprint"><span title="Koordinatensystem WGS84">Koordinaten: </span><a class="external text" href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&language=de&params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)"><span title="Breitengrad">48° 20′ 32″ <abbr title="Nord">N</abbr></span>, <span title="Längengrad">25° 8′ 0″ <abbr title="Ost">O</abbr></span></a></span>
|
||||
</td></tr>
|
||||
<tr>
|
||||
<td><a href="/wiki/KATOTTH" title="KATOTTH">KATOTTH</a>:
|
||||
</td>
|
||||
<td>UA26100010030094355
|
||||
</td></tr>
|
||||
<tr>
|
||||
<td><a href="/wiki/KOATUU" title="KOATUU">KOATUU</a>:
|
||||
</td>
|
||||
<td>2623682401
|
||||
</td></tr>
|
||||
<tr>
|
||||
<td><a href="/wiki/Verwaltungsgliederung_der_Ukraine" title="Verwaltungsgliederung der Ukraine">Verwaltungsgliederung</a>:
|
||||
</td>
|
||||
<td>1 Dorf
|
||||
</td></tr>
|
||||
|
||||
|
||||
|
||||
|
||||
<tr>
|
||||
<td>Adresse:
|
||||
</td>
|
||||
<td>вул. Миру, буд. 15<br />78605 с. Вербовець
|
||||
</td></tr>
|
||||
<tr>
|
||||
<td><a href="/wiki/Website" title="Website">Website</a>:
|
||||
</td>
|
||||
<td><a rel="nofollow" class="external text" href="http://verbovets.kosiv.net/">Offizielle Webseite</a>
|
||||
</td></tr>
|
||||
<tr>
|
||||
<td colspan="2" style="padding-bottom:3px; text-align:center; border-bottom:1px solid #bbb; border-top:1px solid #bbb;"><a rel="nofollow" class="external text" href="http://w1.c1.rada.gov.ua/pls/z7503/A005?rdat1=31.08.2023&rf7571=13801">Statistische Informationen</a>
|
||||
</td></tr>
|
||||
<tr>
|
||||
<td colspan="2" style="padding-bottom:3px; text-align:center; border-bottom:1px solid #bbb; border-top:1px solid #bbb;">
|
||||
<table class="centered" style="background-color: #f9f9f9; border: none; border-collapse: collapse; width: 1px;">
|
||||
<tbody><tr><td style="border: none; padding: 0; text-align: center;"><div style="position: relative; z-index: 0; padding: 0; display: inline-block; width: -webkit-max-content; width: -moz-max-content; width: max-content; border: none;"><figure class="mw-halign-center noviewer notpageimage" typeof="mw:File"><a href="/wiki/Datei:Ivano-Frankivsk_location_map.svg" class="mw-file-description" title="Werbowez (Kossiw) (Oblast Iwano-Frankiwsk)"><img alt="Werbowez (Kossiw) (Oblast Iwano-Frankiwsk)" src="//upload.wikimedia.org/wikipedia/commons/thumb/8/8e/Ivano-Frankivsk_location_map.svg/290px-Ivano-Frankivsk_location_map.svg.png" decoding="async" width="290" height="347" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/8/8e/Ivano-Frankivsk_location_map.svg/435px-Ivano-Frankivsk_location_map.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/8/8e/Ivano-Frankivsk_location_map.svg/580px-Ivano-Frankivsk_location_map.svg.png 2x" data-file-width="533" data-file-height="637" /></a><figcaption>Werbowez (Kossiw) (Oblast Iwano-Frankiwsk)</figcaption></figure><div style="position:absolute; top:63.3%; left:74.4%; height:0; width:0;"><div style="position:relative;z-index:100;left:-4px;top:-4px;width:8px;height:8px;line-height:0px;"><span typeof="mw:File"><a href="https://geohack.toolforge.org/geohack.php?pagename=Werbowez_(Kossiw)&language=de&params=48.342222222222_N_25.133333333333_E_dim:10000_region:UA-26_type:city(3395)&title=Werbowez+%28Kossiw%29" title="Werbowez (Kossiw) (48° 20′ 32″ N, 25° 8′ 0″O)"><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/8px-ButtonRed.svg.png" decoding="async" width="8" height="8" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/12px-ButtonRed.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/97/ButtonRed.svg/16px-ButtonRed.svg.png 2x" data-file-width="480" data-file-height="480" /></a></span></div>
|
||||
<table style="font-size:90%; border:none; background-color:transparent; border-collapse:collapse; line-height:1em; position:absolute; width:6em; margin: 0 .2em; text-align:right; right:1px; bottom:1px;"><tbody><tr><td style="border:none; vertical-align:middle;"><span style="position:relative; z-index:9; background-color:none;">Werbowez (Kossiw) </span></td></tr></tbody></table></div></div></td></tr>
|
||||
</tbody></table><span style="display:none;"><a href="/w/index.php?title=Vorlage:Positionskarte_ISO_3166-2/Wartung/noregion&action=edit&redlink=1" class="new" title="Vorlage:Positionskarte ISO 3166-2/Wartung/noregion (Seite nicht vorhanden)">i1</a></span>
|
||||
</td></tr></tbody></table>
|
||||
<p><b>Werbowez</b> (<b><span style="font-style:normal;font-weight:normal"><a href="/wiki/Ukrainische_Sprache" title="Ukrainische Sprache">ukrainisch</a></span> <span lang="uk-Cyrl" class="Cyrl" style="font-style:normal">Вербовець</span></b>; <span style="font-style:normal;font-weight:normal"><a href="/wiki/Russische_Sprache" title="Russische Sprache">russisch</a></span> <span lang="ru-Cyrl" class="Cyrl" style="font-style:normal">Вербовец</span>, <a href="/wiki/Polnische_Sprache" title="Polnische Sprache">polnisch</a> <span lang="pl" style="font-style:italic;font-weight:normal">Wierzbowiec</span>; <span style="font-style:normal;font-weight:normal"><a href="/wiki/Rum%C3%A4nische_Sprache" title="Rumänische Sprache">rumänisch</a></span> <span lang="ro-Latn" style="font-style:italic">Verboveț</span>) ist ein <a href="/wiki/Dorf" title="Dorf">Dorf</a> in der <a href="/wiki/Ukraine" title="Ukraine">ukrainischen</a> <a href="/wiki/Oblast_Iwano-Frankiwsk" title="Oblast Iwano-Frankiwsk">Oblast Iwano-Frankiwsk</a> mit etwa 3400 Einwohnern (2001).<sup id="cite_ref-1" class="reference"><a href="#cite_note-1">[1]</a></sup>
|
||||
</p>
|
||||
<figure class="mw-default-size mw-halign-left" typeof="mw:File/Thumb"><a href="/wiki/Datei:%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/220px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" decoding="async" width="220" height="147" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/330px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/440px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 2x" data-file-width="5184" data-file-height="3456" /></a><figcaption>Blick auf das Dorf</figcaption></figure>
|
||||
<p>Das um 1650 erstmals schriftlich erwähnte Dorf<sup id="cite_ref-2" class="reference"><a href="#cite_note-2">[2]</a></sup> liegt im Osten der <a href="/wiki/Historische_Landschaft" title="Historische Landschaft">historischen Landschaft</a> <a href="/wiki/Galizien" title="Galizien">Galizien</a> am Ufer der <a href="/w/index.php?title=Rybnyzja_(Fluss)&action=edit&redlink=1" class="new" title="Rybnyzja (Fluss) (Seite nicht vorhanden)">Rybnyzja</a> (<span lang="uk-Cyrl" class="Cyrl">Рибниця</span>), einem 56 km langen Nebenfluss des <a href="/wiki/Pruth" title="Pruth">Pruth</a> 7 km nordöstlich vom Rajonzentrum <a href="/wiki/Kossiw" title="Kossiw">Kossiw</a> und 95 km südlich vom Oblastzentrum <a href="/wiki/Iwano-Frankiwsk" title="Iwano-Frankiwsk">Iwano-Frankiwsk</a>. Südlich der Ortschaft verläuft die <a href="/wiki/Territorialstra%C3%9Fe" title="Territorialstraße">Territorialstraße</a> <i>T–09–09</i>.
|
||||
</p><p>Am 12. Juni 2020 wurde das Dorf ein Teil der neu gegründeten <i>Stadtgemeinde <a href="/wiki/Kossiw" title="Kossiw">Kossiw</a></i> im <a href="/wiki/Rajon_Kossiw" title="Rajon Kossiw">Rajon Kossiw</a><sup id="cite_ref-3" class="reference"><a href="#cite_note-3">[3]</a></sup>, bis dahin bildete es zusammen mit dem Dorf <a href="/w/index.php?title=Staryj_Kossiw&action=edit&redlink=1" class="new" title="Staryj Kossiw (Seite nicht vorhanden)">Staryj Kossiw</a> (<span lang="uk-Cyrl" class="Cyrl">Старий Косів</span>) die <i>Landratsgemeinde Werbowez</i> (Вербовецька сільська рада/<i>Werbowezka silska rada</i>) im Osten des Rajons.
|
||||
</p>
|
||||
<ol class="references">
|
||||
<li id="cite_note-1"><span class="mw-cite-backlink"><a href="#cite_ref-1">↑</a></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://w1.c1.rada.gov.ua/pls/z7503/A005?rf7571=13801">Ortswebseite</a> auf der offiziellen Webpräsenz der <a href="/wiki/Werchowna_Rada" title="Werchowna Rada">Werchowna Rada</a>; abgerufen am 14. November 2017 (ukrainisch)</span>
|
||||
</li>
|
||||
<li id="cite_note-2"><span class="mw-cite-backlink"><a href="#cite_ref-2">↑</a></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://ukrssr.com.ua/ifrank/kosivskiy/verbovets-kosivskiy-rayon-ivano-frankivska-oblast">Ortsgeschichte Werbowez</a> in der <a href="/wiki/Geschichte_der_St%C3%A4dte_und_D%C3%B6rfer_der_Ukrainischen_SSR" title="Geschichte der Städte und Dörfer der Ukrainischen SSR">Geschichte der Städte und Dörfer der Ukrainischen SSR</a>; abgerufen am 14. November 2017 (ukrainisch)</span>
|
||||
</li>
|
||||
<li id="cite_note-3"><span class="mw-cite-backlink"><a href="#cite_ref-3">↑</a></span> <span class="reference-text"><a rel="nofollow" class="external text" href="https://zakon.rada.gov.ua/laws/show/714-2020-%D1%80#Text">Кабінет Міністрів України Розпорядження від 12 червня 2020 р. № 714-р "Про визначення адміністративних центрів та затвердження територій територіальних громад Івано-Франківської області"</a></span>
|
||||
</li>
|
||||
</ol>
|
||||
<!--
|
||||
NewPP limit report
|
||||
Parsed by mw1396
|
||||
Cached time: 20230831121013
|
||||
Cache expiry: 42588
|
||||
Reduced expiry: true
|
||||
Complications: []
|
||||
CPU time usage: 0.219 seconds
|
||||
Real time usage: 0.274 seconds
|
||||
Preprocessor visited node count: 6414/1000000
|
||||
Post‐expand include size: 33611/2097152 bytes
|
||||
Template argument size: 12317/2097152 bytes
|
||||
Highest expansion depth: 34/100
|
||||
Expensive parser function count: 9/500
|
||||
Unstrip recursion depth: 0/20
|
||||
Unstrip post‐expand size: 1476/5000000 bytes
|
||||
Lua time usage: 0.080/10.000 seconds
|
||||
Lua memory usage: 3398800/52428800 bytes
|
||||
Number of Wikibase entities loaded: 0/400
|
||||
-->
|
||||
<!--
|
||||
Transclusion expansion time report (%,ms,calls,template)
|
||||
100.00% 239.600 1 -total
|
||||
93.55% 224.134 1 Vorlage:Infobox_Ort_in_der_Ukraine
|
||||
50.81% 121.740 2 Vorlage:Positionskarte
|
||||
49.72% 119.121 2 Vorlage:Positionskarte+
|
||||
44.41% 106.401 2 Vorlage:Positionskarte~
|
||||
33.28% 79.732 2 Vorlage:Positionskarte~*
|
||||
25.69% 61.558 3 Vorlage:Lang
|
||||
19.41% 46.499 1 Vorlage:Positionskarte_ISO_3166-2
|
||||
16.90% 40.486 12 Vorlage:CoordinateLONG
|
||||
14.02% 33.586 10 Vorlage:CoordinateLAT
|
||||
-->
|
||||
</div>" - (de.wikipedia.org 31.08.2023)';
|
||||
|
||||
$output = NodaWikidataFetcher::cleanWikidataInput($testStr);
|
||||
$expected = 'Werbowez (ukrainisch Вербовець; russisch Вербовец, polnisch Wierzbowiec; rumänisch Verboveț) ist ein Dorf in der ukrainischen Oblast Iwano-Frankiwsk mit etwa 3400 Einwohnern (2001).';
|
||||
self::assertTrue(
|
||||
str_starts_with($output, $expected),
|
||||
"Start of parsed Wikipedia text should be:" . PHP_EOL . PHP_EOL . $expected . PHP_EOL . PHP_EOL . 'Real start text is: ' . PHP_EOL . PHP_EOL . substr($output, 0, 250)
|
||||
);
|
||||
|
||||
$output = NodaWikidataFetcher::cleanWikidataInput('<div class="mw-parser-output"><figure class="mw-default-size mw-halign-right" typeof="mw:File/Thumb"><a href="/wiki/File:%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/220px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG" decoding="async" width="220" height="147" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/330px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/9/9d/%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG/440px-%D0%92%D0%B5%D1%80%D0%B1%D0%BE%D0%B2%D0%B5%D1%86%D1%8C.JPG 2x" data-file-width="5184" data-file-height="3456" /></a><figcaption></figcaption></figure>
|
||||
<p><span style="font-size: small;"><span id="coordinates"><a href="/wiki/%E5%9C%B0%E7%90%86%E5%9D%90%E6%A0%87" class="mw-redirect" title="地理坐标">坐标</a>:<style data-mw-deduplicate="TemplateStyles:r65292569">.mw-parser-output .geo-default,.mw-parser-output .geo-dms,.mw-parser-output .geo-dec{display:inline}.mw-parser-output .geo-nondefault,.mw-parser-output .geo-multi-punct{display:none}.mw-parser-output .longitude,.mw-parser-output .latitude{white-space:nowrap}</style><span class="plainlinks nourlexpansion"><a class="external text" href="//geohack.toolforge.org/geohack.php?language=zh&pagename=%E9%9F%8B%E7%88%BE%E5%8D%9A%E9%9F%8B%E9%BD%8A_(%E7%A7%91%E7%B4%A2%E5%A4%AB%E5%8D%80)&params=48_20_32_N_25_8_0_E_scale:30000"><span class="geo-default"><span class="geo-dms" title="此地的地图、航拍照片和其他数据"><span class="latitude">48°20′32″N</span> <span class="longitude">25°8′0″E</span></span></span><span class="geo-multi-punct"> / </span><span class="geo-nondefault"><span class="geo-dec" title="此地的地图、航拍照片和其他数据">48.34222°N 25.13333°E</span><span style="display:none"> / <span class="geo">48.34222; 25.13333</span></span></span></a></span></span></span>
|
||||
</p><p><b>韋爾博韋齊</b>(<a href="/wiki/%E7%83%8F%E5%85%8B%E8%98%AD%E8%AA%9E" class="mw-redirect" title="烏克蘭語">烏克蘭語</a>:<span lang="uk">Вербовець</span>),是<a href="/wiki/%E7%83%8F%E5%85%8B%E8%98%AD" class="mw-redirect" title="烏克蘭">烏克蘭</a>的村落,位於該國西部<a href="/wiki/%E4%BC%8A%E4%B8%87%E8%AF%BA-%E5%BC%97%E5%85%B0%E7%A7%91%E5%A4%AB%E6%96%AF%E5%85%8B%E5%B7%9E" title="伊万诺-弗兰科夫斯克州">伊萬諾-弗蘭科夫斯克州</a>,由<a href="/wiki/%E7%A7%91%E7%B4%A2%E5%A4%AB%E5%8D%80" class="mw-redirect" title="科索夫區">科索夫區</a>負責管轄,始建於1456年,面積18.77平方公里,2001年人口3,395。
|
||||
</p>
|
||||
<!--
|
||||
NewPP limit report
|
||||
Parsed by mw1412
|
||||
Cached time: 20230831132208
|
||||
Cache expiry: 1814400
|
||||
Reduced expiry: false
|
||||
Complications: []
|
||||
CPU time usage: 0.147 seconds
|
||||
Real time usage: 0.186 seconds
|
||||
Preprocessor visited node count: 48/1000000
|
||||
Post‐expand include size: 2084/2097152 bytes
|
||||
Template argument size: 0/2097152 bytes
|
||||
Highest expansion depth: 3/100
|
||||
Expensive parser function count: 1/500
|
||||
Unstrip recursion depth: 0/20
|
||||
Unstrip post‐expand size: 362/5000000 bytes
|
||||
Lua time usage: 0.110/10.000 seconds
|
||||
Lua memory usage: 15402517/52428800 bytes
|
||||
Number of Wikibase entities loaded: 1/400
|
||||
-->
|
||||
<!--
|
||||
Transclusion expansion time report (%,ms,calls,template)
|
||||
100.00% 152.989 1 -total
|
||||
70.07% 107.204 1 Template:Lang-uk
|
||||
29.62% 45.313 1 Template:Coord
|
||||
-->
|
||||
</div>');
|
||||
$expected = '韋爾博韋齊(烏克蘭語:Вербовець),是烏克蘭的村落,位於該國西部伊萬諾-弗蘭科夫斯克州,由科索夫區負責管轄,始建於1456年,面積18.77平方公里,2001年人口3,3';
|
||||
self::assertTrue(
|
||||
str_starts_with($output, $expected),
|
||||
"Start of parsed Wikipedia text should be:" . PHP_EOL . PHP_EOL . $expected . PHP_EOL . PHP_EOL . 'Real start text is: ' . PHP_EOL . PHP_EOL . substr($output, 0, 250)
|
||||
);
|
||||
return [
|
||||
'Actor with wikidata and translation' => [$cur[0], $cur[1]],
|
||||
];
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for cleaning wikidata info.
|
||||
* Data provider for a place that has a wikidata link and a Telugu translation.
|
||||
*
|
||||
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||||
* @group ValidOutput
|
||||
* @small
|
||||
* @return array<string, array{0: int, 1: string}>
|
||||
*/
|
||||
public static function placeWithTlAndWikidataLinkProvider():array {
|
||||
|
||||
$mysqli = md_main_mysqli_connect();
|
||||
|
||||
$result = $mysqli->do_read_query("SELECT `ort_id`, `noda_nrinsource`
|
||||
FROM `" . DATABASENAME_NODA . "`.`noda_orte`
|
||||
WHERE `noda_source` = 'Wikidata'
|
||||
AND EXISTS (SELECT 1 FROM `" . DATABASENAME_NODA . "`.`ort_translation`
|
||||
WHERE `ort_translation`.`ort_id` = `noda_orte`.`ort_id`
|
||||
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "')");
|
||||
|
||||
if (!$cur = $result->fetch_row()) {
|
||||
throw new Exception("Failed to identify an entry that has a wikidata entry and a translation for language " . self::TEST_LANG);
|
||||
}
|
||||
$result->close();
|
||||
$mysqli->close();
|
||||
|
||||
return [
|
||||
'Place with wikidata and translation' => [$cur[0], $cur[1]],
|
||||
];
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Data provider for an tag that has a wikidata link and a Telugu translation.
|
||||
*
|
||||
* @return array<string, array{0: int, 1: string}>
|
||||
*/
|
||||
public static function tagWithTlAndWikidataLinkProvider():array {
|
||||
|
||||
$mysqli = md_main_mysqli_connect();
|
||||
|
||||
$result = $mysqli->do_read_query("SELECT `tag_id`, `noda_nrinsource`
|
||||
FROM `" . DATABASENAME_NODA . "`.`noda_tag`
|
||||
WHERE `noda_source` = 'Wikidata'
|
||||
AND EXISTS (SELECT 1 FROM `" . DATABASENAME_NODA . "`.`tag_translation`
|
||||
WHERE `tag_translation`.`tag_id` = `noda_tag`.`tag_id`
|
||||
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "')");
|
||||
|
||||
if (!$cur = $result->fetch_row()) {
|
||||
throw new Exception("Failed to identify an entry that has a wikidata entry and a translation for language " . self::TEST_LANG);
|
||||
}
|
||||
$result->close();
|
||||
$mysqli->close();
|
||||
|
||||
return [
|
||||
'Tag with wikidata and translation' => [$cur[0], $cur[1]],
|
||||
];
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for fetching and recording translations for an actor.
|
||||
*
|
||||
* @param integer $actor_id Actor ID.
|
||||
* @param string $wikidata_id Wikidata ID.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testCleanWikidataInputWithoutHtml():void {
|
||||
#[DataProvider('actorWithTlAndWikidataLinkProvider')]
|
||||
public function testFetchingTranslationForPersinst(int $actor_id, string $wikidata_id):void {
|
||||
|
||||
$output = NodaWikidataFetcher::cleanWikidataInput('Werbowez (ukrainisch Вербовець; russisch Вербовец, polnisch Wierzbowiec; rumänisch Verboveț) ist ein Dorf in der ukrainischen Oblast Iwano-Frankiwsk mit etwa 3400 Einwohnern (2001).[1]');
|
||||
$expected = 'Werbowez (ukrainisch Вербовець; russisch Вербовец, polnisch Wierzbowiec; rumänisch Verboveț) ist ein Dorf in der ukrainischen Oblast Iwano-Frankiwsk mit etwa 3400 Einwohnern (2001).';
|
||||
self::assertTrue(
|
||||
str_starts_with($output, $expected),
|
||||
"Start of parsed Wikipedia text should be:" . PHP_EOL . PHP_EOL . $expected . PHP_EOL . PHP_EOL . 'Real start text is: ' . PHP_EOL . PHP_EOL . substr($output, 0, 250)
|
||||
);
|
||||
$mysqli = md_main_mysqli_connect();
|
||||
|
||||
$mysqli->do_update_query("DELETE FROM `" . DATABASENAME_NODA . "`.`persinst_translation`
|
||||
WHERE `persinst_id` = " . $actor_id . "
|
||||
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'");
|
||||
|
||||
self::assertEquals(0, MDMysqliTesting::queryNumRows($mysqli, "
|
||||
FROM `" . DATABASENAME_NODA . "`.`persinst_translation`
|
||||
WHERE `persinst_id` = " . $actor_id . "
|
||||
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
|
||||
|
||||
$data = NodaWikidataFetcher::getWikidataEntity($wikidata_id);
|
||||
$fetcher = new NodaWikidataFetcher($mysqli);
|
||||
$fetcher->getWikidataTranslationsForPersinst($data, $actor_id, [self::TEST_LANG]);
|
||||
|
||||
self::assertEquals(1, MDMysqliTesting::queryNumRows($mysqli, "
|
||||
FROM `" . DATABASENAME_NODA . "`.`persinst_translation`
|
||||
WHERE `persinst_id` = " . $actor_id . "
|
||||
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
|
||||
|
||||
$mysqli->close();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for fetching and recording translations for an place.
|
||||
*
|
||||
* @param integer $place_id Place ID.
|
||||
* @param string $wikidata_id Wikidata ID.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
#[DataProvider('placeWithTlAndWikidataLinkProvider')]
|
||||
public function testFetchingTranslationForPlace(int $place_id, string $wikidata_id):void {
|
||||
|
||||
$mysqli = md_main_mysqli_connect();
|
||||
|
||||
$mysqli->do_update_query("DELETE FROM `" . DATABASENAME_NODA . "`.`ort_translation`
|
||||
WHERE `ort_id` = " . $place_id . "
|
||||
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'");
|
||||
|
||||
self::assertEquals(0, MDMysqliTesting::queryNumRows($mysqli, "
|
||||
FROM `" . DATABASENAME_NODA . "`.`ort_translation`
|
||||
WHERE `ort_id` = " . $place_id . "
|
||||
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
|
||||
|
||||
$data = NodaWikidataFetcher::getWikidataEntity($wikidata_id);
|
||||
$fetcher = new NodaWikidataFetcher($mysqli);
|
||||
$fetcher->getWikidataTranslationsForPlace($data, $place_id, [self::TEST_LANG]);
|
||||
|
||||
self::assertEquals(1, MDMysqliTesting::queryNumRows($mysqli, "
|
||||
FROM `" . DATABASENAME_NODA . "`.`ort_translation`
|
||||
WHERE `ort_id` = " . $place_id . "
|
||||
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
|
||||
|
||||
$mysqli->close();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for fetching and recording translations for an tag.
|
||||
*
|
||||
* @param integer $tag_id Tag ID.
|
||||
* @param string $wikidata_id Wikidata ID.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
#[DataProvider('tagWithTlAndWikidataLinkProvider')]
|
||||
public function testFetchingTranslationForTag(int $tag_id, string $wikidata_id):void {
|
||||
|
||||
$mysqli = md_main_mysqli_connect();
|
||||
|
||||
$mysqli->do_update_query("DELETE FROM `" . DATABASENAME_NODA . "`.`tag_translation`
|
||||
WHERE `tag_id` = " . $tag_id . "
|
||||
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'");
|
||||
|
||||
self::assertEquals(0, MDMysqliTesting::queryNumRows($mysqli, "
|
||||
FROM `" . DATABASENAME_NODA . "`.`tag_translation`
|
||||
WHERE `tag_id` = " . $tag_id . "
|
||||
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
|
||||
|
||||
$data = NodaWikidataFetcher::getWikidataEntity($wikidata_id);
|
||||
$fetcher = new NodaWikidataFetcher($mysqli);
|
||||
$fetcher->getWikidataTranslationsForTag($data, $tag_id, [self::TEST_LANG]);
|
||||
|
||||
self::assertEquals(1, MDMysqliTesting::queryNumRows($mysqli, "
|
||||
FROM `" . DATABASENAME_NODA . "`.`tag_translation`
|
||||
WHERE `tag_id` = " . $tag_id . "
|
||||
AND `trans_language` = '" . $mysqli->escape_string(self::TEST_LANG) . "'"));
|
||||
|
||||
$mysqli->close();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that fetching translation from Wikidata returns the title of the wikipedia page,
|
||||
* not the wikidata title.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testListTranslationsFromWikidataWikipediaReturnsWikipediaTitle():void {
|
||||
|
||||
$data = NodaWikidataFetcher::getWikidataEntity("Q33550");
|
||||
|
||||
$output = NodaWikidataFetcher::listTranslationsFromWikidataWikipedia(["de"], $data);
|
||||
self::assertNotEmpty($output['de']);
|
||||
|
||||
self::assertEquals("Friedrich II. (Preußen)", $output['de']['label']);
|
||||
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user