Add automatic splitting and translation of centuries (CE)

This commit is contained in:
Joshua Ramon Enslin 2020-09-23 10:28:04 +02:00 committed by Stefan Rohde-Enslin
parent 308e11b4f8
commit 0f6a6ebc84
2 changed files with 165 additions and 1 deletions

View File

@ -21,6 +21,8 @@ final class NodaTimeAutotranslater {
const TRANSLATABLE_SINCE_START = 3;
const TRANSLATABLE_UNTIL_END = 4;
const TRANSLATABLE_ONLY_YEAR = 5;
const TRANSLATABLE_CENTURY = 6;
const TRANSLATABLE_DECADE = 7;
const LANGS_TO_LOCALES = [
'ar' => 'ar_SY.utf8',
@ -197,6 +199,52 @@ final class NodaTimeAutotranslater {
'zh' => '直到%s',
];
const LANGS_CENTURY_FORMAT = [
'ar' => 'القرن ال %s',
'de' => '%s. Jahrhundert',
'en' => '%s. century',
'es' => 'Siglo %s',
'fa' => 'قرن %s',
'fr' => '%sème siècle',
'hu' => '%s. század',
'id' => 'abad ke-%s',
'it' => '%sesimo secolo',
'ka' => 'მე -%s საუკუნე',
'ko' => '%s 세기',
'pl' => '%s wiek',
'pt' => 'século %s',
'ro' => 'secolul al %s-lea',
'ru' => '%s век',
'ta' => '%s ஆம் நூற்றாண்டு',
'tl' => 'Ika-%s na siglo',
'tr' => '%s. yüzyıl',
'ja' => '%s世紀',
'zh' => '%s世紀',
];
const LANGS_CENTURIES_FORMAT = [
'ar' => 'القرن ال %s-%s',
'de' => '%s.-%s. Jahrhundert',
'en' => '%s.-%s. century',
'es' => 'Siglo %s-%s',
'fa' => 'قرن %s-%s',
'fr' => '%s-%sème siècle',
'hu' => '%s.-%s. század',
'id' => 'abad ke-%s-%s',
'it' => '%s-%sesimo secolo',
'ka' => 'მე -%s-%s საუკუნე',
'ko' => '%s-%s 세기',
'pl' => '%s-%s wiek',
'pt' => 'século %s-%s',
'ro' => 'secolul al %s-%s-lea',
'ru' => '%s-%s век',
'ta' => '%s-%s ஆம் நூற்றாண்டு',
'tl' => 'Ika-%s hanggang ika-%s na siglo',
'tr' => '%s.-%s. yüzyıl',
'ja' => '%s世紀-%s世紀',
'zh' => '%s-%s世紀',
];
/** @var MDMysqli */
private MDMysqli $_mysqli_noda;
/** @var integer */
@ -222,9 +270,15 @@ final class NodaTimeAutotranslater {
if (intval($zeit_ende) > 1000 and $zeit_beginn === "?") {
return self::TRANSLATABLE_UNTIL_END;
}
if (intval($zeit_ende) >= 0 && intval($zeit_beginn) < 0) {
return self::TRANSLATABLE_NOT;
}
if (intval($zeit_ende) % 100 === 0 and (intval($zeit_beginn) - 1) % 100 === 0) {
return self::TRANSLATABLE_CENTURY;
}
if (intval($zeit_ende) < 0 && intval($zeit_beginn) < 0 || intval($zeit_ende) < 1000) {
return self::TRANSLATABLE_AS_YEAR_WITH_SUFFIX;
}
@ -334,6 +388,40 @@ final class NodaTimeAutotranslater {
}
/**
* Translates century names: 19. Jahrhundert.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function translateYearsAsCentury(array $timeInfo):array {
// Beginn: 1500. (1501 - 1) / 100 + 1 = 16. 16th century is the time.
$start_cen = ((intval($timeInfo['zeit_beginn']) - 1) / 100) + 1;
// End: 1600. 16th century is the time.
$end_cen = (intval($timeInfo['zeit_ende']) / 100);
$suffixMode = self::getSuffixModeForYearsWSuffix((intval($timeInfo['zeit_beginn']) - 1), intval($timeInfo['zeit_ende']));
$output = [];
if ($start_cen === $end_cen) {
foreach (self::LANGS_CENTURY_FORMAT as $tLang => $format) {
$tLangValue = sprintf($format, (string)$start_cen);
$output[$tLang] = self::applyBcBceFormat($tLang, $tLangValue, $suffixMode);
}
}
else {
foreach (self::LANGS_CENTURIES_FORMAT as $tLang => $format) {
$tLangValue = sprintf($format, (string)$start_cen, (string)$end_cen);
$output[$tLang] = self::applyBcBceFormat($tLang, $tLangValue, $suffixMode);
}
}
return $output;
}
/**
* Translated years or timespans below 1000 CE.
*
@ -412,6 +500,10 @@ final class NodaTimeAutotranslater {
return self::translateYearsUntilEnd($timeInfo);
}
if ($translation_type === self::TRANSLATABLE_CENTURY) {
return self::translateYearsAsCentury($timeInfo);
}
if ($translation_type === self::TRANSLATABLE_AS_YEAR_WITH_SUFFIX) {
return self::translateYearsWithSuffix($timeInfo);
}

View File

@ -525,6 +525,32 @@ final class NodaTimeSplitter {
*/
public static function is_incomplete_date(string $datum):array {
$datum = self::clean_input($datum);
if (preg_match("/^[0-9][0-9][0-9][0-9]\.[0-9][0-9]\.[0-9][0-9]\-$/", $datum)) { // Hungarian Y-m
$start = substr($datum, 0, 4);
$month = substr($datum, 5, 2);
$day = substr($datum, 8, 2);
return [$start, "?", $month, $day, "+", ""];
}
if (preg_match("/^[0-9][0-9][0-9][0-9]\.[0-9][0-9]\-$/", $datum)) { // Hungarian Y-m
$start = substr($datum, 0, 4);
$month = substr($datum, 5, 2);
return [$start, "?", $month, "00", "+", ""];
}
if (preg_match("/^\-[0-9][0-9][0-9][0-9]\.[0-9][0-9]\.[0-9][0-9]$/", $datum)) { // Hungarian Y-m
$start = substr($datum, 1, 4);
$month = substr($datum, 6, 2);
$day = substr($datum, 9, 2);
return ["?", $start, $month, $day, "+", ""];
}
if (preg_match("/^\-[0-9][0-9][0-9][0-9]\.[0-9][0-9]$/", $datum)) { // Hungarian Y-m
$start = substr($datum, 1, 4);
$month = substr($datum, 6, 2);
return ["?", $start, $month, "00", "+", ""];
}
if (preg_match("/^(Ab|Seit|seit)\ /", $datum)) {
if (($spacePos = strpos($datum, " ")) === false) {
return [];
@ -546,7 +572,7 @@ final class NodaTimeSplitter {
}
// Endings beginning with a space
if (preg_match("/ (\(bis\))$/", $datum)) {
if (preg_match("/ (\(bis)$/", $datum)) {
if (($spacePos = strrpos($datum, " ")) === false) {
return [];
}
@ -569,6 +595,51 @@ final class NodaTimeSplitter {
}
/**
* Checks if an input date is a century.
*
* @param string $datum Input date.
*
* @return array<string>
*/
public static function is_century(string $datum):array {
$datum = self::clean_input($datum);
$bcBceIndicator = '+';
// 17. Jahrhundert
if (preg_match("/^[0-9][0-9]\.\ (Jh\.|Jahrhundert|század)$/", $datum)) {
if ($centuryNo = intval(substr($datum, 0, 2))) {
$centuryNo--;
return [(string)$centuryNo . "01", strval($centuryNo + 1) . "00", "00", "00", $bcBceIndicator, ""];
}
}
// 1. Jahrhundert
if (preg_match("/^[0-9]\.\ (Jh\.|Jahrhundert|század)$/", $datum)) {
echo "HI";
if ($centuryNo = intval(substr($datum, 0, 1))) {
$centuryNo--;
return [(string)$centuryNo . "01", strval($centuryNo + 1) . "00", "00", "00", $bcBceIndicator, ""];
}
}
// 17.-18. Jahrhundert
if (preg_match("/^[0-9][0-9]\.\-[0-9][0-9]\.\ (Jh\.|Jahrhundert|század)$/", $datum)) {
return [(string)(intval(substr($datum, 0, 2)) - 1) . "01", substr($datum, 4, 2) . "00", "00", "00", $bcBceIndicator, ""];
}
if (preg_match("/^[0-9][0-9]\-[0-9][0-9]\.\ (Jh\.|Jahrhundert|század)$/", $datum)) {
return [(string)(intval(substr($datum, 0, 2)) - 1) . "01", substr($datum, 3, 2) . "00", "00", "00", $bcBceIndicator, ""];
}
if (preg_match("/^[0-9]\.\-[0-9]\.\ (Jh\.|Jahrhundert|század)$/", $datum)) {
return [(string)(intval(substr($datum, 0, 1)) - 1) . "01", substr($datum, 3, 1) . "00", "00", "00", $bcBceIndicator, ""];
}
return [];
}
/**
* Wrapper to check if any splitting command works.
*
@ -582,6 +653,7 @@ final class NodaTimeSplitter {
if (!$moda) $moda = NodaTimeSplitter::is_incomplete_date($datum);
if (!$moda) $moda = NodaTimeSplitter::is_valid_date($datum);
if (!$moda) $moda = NodaTimeSplitter::is_valid_date_hungarian($datum);
if (!$moda) $moda = NodaTimeSplitter::is_century($datum);
return $moda;