MDNodaHelpers/src/NodaTimeAutotranslater.php

718 lines
21 KiB
PHP
Raw Normal View History

2020-09-18 18:48:40 +02:00
<?PHP
/**
* Controls automatic translation of times.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
declare(strict_types = 1);
require_once __DIR__ . '/inc/datesByCountry.php';
/**
* Autotranslater class for times.
*/
2020-09-18 21:38:49 +02:00
final class NodaTimeAutotranslater {
2020-09-18 18:48:40 +02:00
const USECASE_MONTH = 1;
const USECASE_DAY = 2;
const TRANSLATABLE_NOT = 0;
const TRANSLATABLE_AS_MONTH_DAY = 1;
const TRANSLATABLE_AS_YEAR_WITH_SUFFIX = 2;
const TRANSLATABLE_SINCE_START = 3;
const TRANSLATABLE_UNTIL_END = 4;
const TRANSLATABLE_ONLY_YEAR = 5;
const TRANSLATABLE_CENTURY = 6;
const TRANSLATABLE_DECADE = 7;
const TRANSLATABLE_TIMESPAN_YEARS = 8;
2020-09-18 18:48:40 +02:00
const LANGS_TO_LOCALES = [
'ar' => 'ar_SY.utf8',
'de' => 'de_DE.utf8',
'en' => 'en_US.utf8',
'es' => 'es_ES.utf8',
'fa' => 'fa_IR.UTF-8',
'fr' => 'fr_FR.utf8',
'hu' => 'hu_HU.utf8',
'id' => 'id_ID.utf8',
'it' => 'it_IT.utf8',
'ka' => 'ka_GE.UTF-8',
'ko' => 'ko_KR.UTF-8',
'pl' => 'pl_PL.utf8',
'pt' => 'pt_BR.utf8',
'ro' => 'ro_RO.UTF-8',
'ru' => 'ru_RU.UTF-8',
'ta' => 'ta_IN.UTF-8',
'tl' => 'tl_PH.utf8',
'tr' => 'tr_TR.utf8',
// Languages that don't really need a specific locale
'ja' => 'en_US.utf8',
'zh' => 'en_US.utf8',
];
const LANGS_SINGLE_YEAR_FORMAT = [
'ar' => '%s',
'de' => '%s',
'en' => '%s',
'es' => '%s',
'fa' => '%s',
'fr' => '%s',
'hu' => '%s',
'id' => '%s',
'it' => '%s',
'ka' => '%s',
'ko' => '%s년',
'pl' => '%s',
'pt' => '%s',
'ro' => '%s',
'ru' => '%s',
'ta' => '%s',
'tl' => '%s',
'tr' => '%s',
// Languages that don't really need a specific locale
'ja' => '%s年',
'zh' => '%s年',
];
const LANGS_YEARSPAN_FORMAT = [
'ar' => '%s-%s',
'de' => '%s-%s',
'en' => '%s-%s',
'es' => '%s-%s',
'fa' => '%s-%s',
'fr' => '%s-%s',
'hu' => '%s-%s',
'id' => '%s-%s',
'it' => '%s-%s',
'ka' => '%s-%s',
'ko' => '%s년-%s년',
'pl' => '%s-%s',
'pt' => '%s-%s',
'ro' => '%s-%s',
'ru' => '%s-%s',
'ta' => '%s-%s',
'tl' => '%s-%s',
'tr' => '%s-%s',
// Languages that don't really need a specific locale
'ja' => '%s年から%s年',
'zh' => '%s年至%s年',
];
const LANGS_TO_BCE_FORMAT = [
'ar' => '-%s',
'de' => '%s v. Chr.',
'en' => '%s BC',
'es' => '%s a.C.',
'fa' => '-%s',
'fr' => '%s av. J.-C.',
'hu' => 'Kr. e. %s',
'id' => '%s SM',
'it' => '%s a.C.',
'ka' => 'ძვ. წ. %s წ',
'ko' => '기원전%s',
'pl' => '%s pne',
'pt' => '%s AC',
'ro' => '%s î.Hr.',
'ru' => '%s г. до н.э.',
'ta' => 'கிமு %s',
'tl' => '%s BC',
'tr' => 'MÖ %s',
// Languages that don't really need a specific locale
'ja' => '紀元前%s',
'zh' => '公元前%s',
];
const LANGS_TO_CE_FORMAT = [
'ar' => '%s',
'de' => '%s n. Chr.',
'en' => '%s CE',
'es' => '%s d.C.',
'fa' => '%s',
'fr' => '%s ap. J.-C.',
'hu' => '%s',
'id' => '%s M.',
'it' => '%s d.C.',
'ka' => '%s წ',
'ko' => '%s',
'pl' => '%s n.e.',
'pt' => '%s dC',
'ro' => '%s',
'ru' => '%s',
'ta' => '%s பொ.ச.',
'tl' => '%s AD',
'tr' => '%s',
// Languages that don't really need a specific locale
'ja' => '西暦%s',
'zh' => '%s',
];
const LANGS_SINCE_START_FORMAT_YEAR = [
'ar' => '%s-',
'de' => 'Seit %s',
'en' => 'Since %s',
'es' => 'Desde %s',
'fa' => '%s-',
'fr' => 'Depuis %s',
'hu' => '%s-től',
'id' => 'Sejak %s',
'it' => 'Dal %s',
'ka' => '%s წლიდან',
'ko' => '%s부터',
'pl' => 'Desde %s',
'pt' => '%s dC',
'ro' => 'Din %s',
'ru' => 'С %s г.',
'ta' => '%s முதல்',
'tl' => 'Mula noong %s',
'tr' => '%s-',
// Languages that don't really need a specific locale
'ja' => '%s以来',
'zh' => '自%s以來',
];
const LANGS_UNTIL_START_FORMAT_YEAR = [
'ar' => '-%s',
'de' => 'Bis %s',
'en' => 'Until %s',
'es' => 'Hasta %s',
'fa' => '-%s',
'fr' => 'Jusqu\'en %s',
'hu' => '%s-ig',
'id' => 'Sampai %s',
'it' => 'Fino al %s',
'ka' => '%s წლამდე',
'ko' => '%s까지',
'pl' => 'do %s roku',
'pt' => 'até %s',
'ro' => 'până în %s',
'ru' => 'до %s г.',
'ta' => '%s வரை',
'tl' => 'Hanggang %s',
'tr' => '%s-',
// Languages that don't really need a specific locale
'ja' => '%sまで',
'zh' => '直到%s',
];
const LANGS_CENTURY_FORMAT = [
'ar' => 'القرن ال %s',
'de' => '%s. Jahrhundert',
'en' => '%s. century',
'es' => 'Siglo %s',
'fa' => 'قرن %s',
'fr' => '%sème siècle',
'hu' => '%s. század',
'id' => 'abad ke-%s',
'it' => '%sesimo secolo',
'ka' => 'მე -%s საუკუნე',
'ko' => '%s 세기',
'pl' => '%s wiek',
'pt' => 'século %s',
'ro' => 'secolul al %s-lea',
'ru' => '%s век',
'ta' => '%s ஆம் நூற்றாண்டு',
'tl' => 'Ika-%s na siglo',
'tr' => '%s. yüzyıl',
'ja' => '%s世紀',
'zh' => '%s世紀',
];
const LANGS_CENTURIES_FORMAT = [
'ar' => 'القرن ال %s-%s',
'de' => '%s.-%s. Jahrhundert',
'en' => '%s.-%s. century',
'es' => 'Siglo %s-%s',
'fa' => 'قرن %s-%s',
'fr' => '%s-%sème siècle',
'hu' => '%s.-%s. század',
'id' => 'abad ke-%s-%s',
'it' => '%s-%sesimo secolo',
'ka' => 'მე -%s-%s საუკუნე',
'ko' => '%s-%s 세기',
'pl' => '%s-%s wiek',
'pt' => 'século %s-%s',
'ro' => 'secolul al %s-%s-lea',
'ru' => '%s-%s век',
'ta' => '%s-%s ஆம் நூற்றாண்டு',
'tl' => 'Ika-%s hanggang ika-%s na siglo',
'tr' => '%s.-%s. yüzyıl',
'ja' => '%s世紀-%s世紀',
'zh' => '%s-%s世紀',
];
const LANGS_DECADE_FORMAT = [
'ar' => '%s-%s',
'de' => '%ser Jahre',
'en' => '%ss',
'es' => '%s-%s',
'fa' => 'دهه %s',
'fr' => 'Années %s',
'hu' => '%s-as évek',
'id' => 'Tahun %s-an',
'it' => '%ss',
'ka' => '%s-იანი წლები',
'ko' => '%s 년대',
'pl' => '%s roku',
'pt' => 'Década de %s',
'ro' => 'Anii %s',
'ru' => '%s-е годы',
'ta' => '%s கள்',
'tl' => '%ss',
'tr' => '%s\'ler',
'ja' => '%s年代',
'zh' => '%s年代',
];
const LANGS_DECADES_FORMAT = [
'ar' => '%s-%s',
'de' => '%s-%ser Jahre',
'en' => '%s-%ss',
'es' => '%s-%s',
'fa' => 'دهه %s-%s',
'fr' => 'Années %s-%s',
'hu' => '%s-%s-as évek',
'id' => 'Tahun %s-an sampai tahun %s-an',
'it' => '%s-%ss',
'ka' => '%s-%s-იანი წლები',
'ko' => '%s-%s 년대',
'pl' => '%s-%s roku',
'pt' => 'Décadas de %s-%s',
'ro' => 'Anii %s-%s',
'ru' => '%s-%s-е годы',
'ta' => '%s-%s கள்',
'tl' => '%s-%ss',
'tr' => '%s-%s\'ler',
'ja' => '%s年代から%s年代',
'zh' => '%s年代-%s年代',
];
const LANGS_SYLLABLE_CLEANING = [
"hu" => [
"10-as évek" => "10-es évek",
"40-as évek" => "40-es évek",
"50-as évek" => "50-es évek",
"70-as évek" => "70-es évek",
"90-as évek" => "90-es évek",
],
];
2020-09-18 18:48:40 +02:00
/** @var MDMysqli */
private MDMysqli $_mysqli_noda;
/** @var integer */
private int $_znum;
/** @var MDMysqliStmt */
private MDMysqliStmt $_insertStmt;
/**
* Checks if a time is translatable.
* Translatable times have either a counting time day and month or at least a month.
*
* @param string $zeit_beginn Beginn year.
* @param string $zeit_ende End year.
2020-09-18 18:48:40 +02:00
* @param string $zeit_zaehlzeit_monat Counting time month.
*
* @return integer
*/
public static function check_translatability(string $zeit_beginn, string $zeit_ende, string $zeit_zaehlzeit_monat):int {
if ($zeit_ende === "?") {
return self::TRANSLATABLE_SINCE_START;
}
if ($zeit_beginn === "?") {
return self::TRANSLATABLE_UNTIL_END;
}
if (intval($zeit_ende) >= 0 && intval($zeit_beginn) < 0) {
return self::TRANSLATABLE_NOT;
}
if (intval($zeit_ende) % 100 === 0 and (intval($zeit_beginn) - 1) % 100 === 0) {
return self::TRANSLATABLE_CENTURY;
}
if ((intval($zeit_ende) + 1) % 10 === 0 and intval($zeit_beginn) % 10 === 0) {
return self::TRANSLATABLE_DECADE;
}
2020-09-21 10:49:34 +02:00
if (intval($zeit_ende) < 0 && intval($zeit_beginn) < 0 || intval($zeit_ende) < 1000) {
return self::TRANSLATABLE_AS_YEAR_WITH_SUFFIX;
}
if ($zeit_ende === $zeit_beginn and trim($zeit_zaehlzeit_monat, ", .0") === "") {
return self::TRANSLATABLE_ONLY_YEAR;
}
if ($zeit_ende !== $zeit_beginn and trim($zeit_zaehlzeit_monat, ", .0") === "") {
return self::TRANSLATABLE_TIMESPAN_YEARS;
}
// Conditions speaking against translatability.
2020-09-20 18:32:48 +02:00
if (trim($zeit_zaehlzeit_monat, ", .0") === "") {
return self::TRANSLATABLE_NOT;
}
return self::TRANSLATABLE_AS_MONTH_DAY;
}
/**
* Gets suffix mode for years with suffix.
*
* @param integer $start Start year.
* @param integer $end End year.
*
* @return integer
2020-09-18 18:48:40 +02:00
*/
public static function getSuffixModeForYearsWSuffix(int $start, int $end):int {
2020-09-18 18:48:40 +02:00
if ($start < 0 && $end < 0) {
$suffixMode = 2; // 2; // " v. Chr.";
}
else if ($end < 1000) {
$suffixMode = 1; // " n. Chr.";
2020-09-18 18:48:40 +02:00
}
else $suffixMode = 0; // Times larger than 1000 n. Chr.
return $suffixMode;
}
/**
* Applies suffix format to a time string.
*
* @param string $tLang Two digit ANSI language code.
* @param string $timeName Time name.
* @param integer $suffixMode Suffix mode.
*
* @return string
*/
public static function applyBcBceFormat(string $tLang, string $timeName, int $suffixMode):string {
switch ($suffixMode) {
case 0:
return $timeName;
case 1:
return sprintf(self::LANGS_TO_CE_FORMAT[$tLang], $timeName);
case 2:
return sprintf(self::LANGS_TO_BCE_FORMAT[$tLang], $timeName);
default:
throw new Exception("Unknown case encountered for time translations.");
}
}
/**
* Translated years or timespans below 1000 CE.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function translateYearsWithSuffix(array $timeInfo):array {
$start = intval($timeInfo['zeit_beginn']);
$end = intval($timeInfo['zeit_ende']);
$suffixMode = self::getSuffixModeForYearsWSuffix($start, $end);
$output = [];
foreach (self::LANGS_TO_CE_FORMAT as $tLang => $ceFormat) {
if ($start === $end) {
$year = sprintf(self::LANGS_SINGLE_YEAR_FORMAT[$tLang], (string)abs($start));
}
else $year = sprintf(self::LANGS_YEARSPAN_FORMAT[$tLang], (string)abs($start), (string)abs($end));
2020-09-18 18:48:40 +02:00
$output[$tLang] = self::applyBcBceFormat($tLang, $year, $suffixMode);
}
return $output;
}
/**
* Translated only years: 1994.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function translateYearOnly(array $timeInfo):array {
$start = intval($timeInfo['zeit_beginn']);
$output = [];
foreach (self::LANGS_SINGLE_YEAR_FORMAT as $tLang => $format) {
$output[$tLang] = sprintf($format, (string)abs($start));
}
return $output;
2020-09-18 18:48:40 +02:00
}
/**
* Translated only years: 1994.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function translateTimespanYears(array $timeInfo):array {
$start = intval($timeInfo['zeit_beginn']);
$end = intval($timeInfo['zeit_ende']);
$output = [];
foreach (self::LANGS_YEARSPAN_FORMAT as $tLang => $format) {
$output[$tLang] = sprintf($format, (string)abs($start), (string)abs($end));
}
return $output;
}
/**
* Translates century names: 19. Jahrhundert.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function translateYearsAsCentury(array $timeInfo):array {
// Beginn: 1500. (1501 - 1) / 100 + 1 = 16. 16th century is the time.
$start_cen = ((intval($timeInfo['zeit_beginn']) - 1) / 100) + 1;
// End: 1600. 16th century is the time.
$end_cen = (intval($timeInfo['zeit_ende']) / 100);
$suffixMode = self::getSuffixModeForYearsWSuffix((intval($timeInfo['zeit_beginn']) - 1), intval($timeInfo['zeit_ende']));
$output = [];
if ($start_cen === $end_cen) {
foreach (self::LANGS_CENTURY_FORMAT as $tLang => $format) {
$tLangValue = sprintf($format, (string)$start_cen);
$output[$tLang] = self::applyBcBceFormat($tLang, $tLangValue, $suffixMode);
}
}
else {
foreach (self::LANGS_CENTURIES_FORMAT as $tLang => $format) {
$tLangValue = sprintf($format, (string)$start_cen, (string)$end_cen);
$output[$tLang] = self::applyBcBceFormat($tLang, $tLangValue, $suffixMode);
}
}
return $output;
}
/**
* Translates decade names: 1920er Jahre.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function translateYearsAsDecade(array $timeInfo):array {
// Beginn: 1500. (1501 - 1) / 100 + 1 = 16. 16th century is the time.
$start_cen = (intval($timeInfo['zeit_beginn']));
// End: 1600. 16th century is the time.
$end_cen = (intval($timeInfo['zeit_ende']));
$suffixMode = self::getSuffixModeForYearsWSuffix((intval($timeInfo['zeit_beginn']) - 1), intval($timeInfo['zeit_ende']));
$output = [];
if ($start_cen === $end_cen - 9) {
foreach (self::LANGS_DECADE_FORMAT as $tLang => $format) {
$tLangValue = sprintf($format, (string)$start_cen, (string)$end_cen);
if (!empty(self::LANGS_SYLLABLE_CLEANING[$tLang])) $tLangValue = strtr($tLangValue, self::LANGS_SYLLABLE_CLEANING[$tLang]);
$output[$tLang] = self::applyBcBceFormat($tLang, $tLangValue, $suffixMode);
}
}
else {
foreach (self::LANGS_DECADES_FORMAT as $tLang => $format) {
$tLangValue = sprintf($format, (string)$start_cen, (string)($end_cen - 9));
if (!empty(self::LANGS_SYLLABLE_CLEANING[$tLang])) $tLangValue = strtr($tLangValue, self::LANGS_SYLLABLE_CLEANING[$tLang]);
$output[$tLang] = self::applyBcBceFormat($tLang, $tLangValue, $suffixMode);
}
}
return $output;
}
2020-09-18 18:48:40 +02:00
/**
* Translated years or timespans below 1000 CE.
2020-09-18 18:48:40 +02:00
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
2020-09-18 18:48:40 +02:00
*/
public static function translateYearsSinceStart(array $timeInfo):array {
$start = intval($timeInfo['zeit_beginn']);
$innerTimeInfo = $timeInfo;
$innerTimeInfo['zeit_ende'] = $timeInfo['zeit_beginn'];
$output = [];
foreach (self::LANGS_SINCE_START_FORMAT_YEAR as $tLang => $format) {
$dateAlone = self::getTranslations($innerTimeInfo)[$tLang];
$timeName = sprintf($format, $dateAlone);
$output[$tLang] = $timeName;
}
return $output;
}
/**
* Translated years or timespans below 1000 CE.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function translateYearsUntilEnd(array $timeInfo):array {
$end = intval($timeInfo['zeit_ende']);
$innerTimeInfo = $timeInfo;
$innerTimeInfo['zeit_beginn'] = $timeInfo['zeit_ende'];
$output = [];
foreach (self::LANGS_UNTIL_START_FORMAT_YEAR as $tLang => $format) {
$dateAlone = self::getTranslations($innerTimeInfo)[$tLang];
$timeName = sprintf($format, $dateAlone);
$output[$tLang] = $timeName;
}
return $output;
}
/**
* Gets translations for a given entry type.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function getTranslations(array $timeInfo):array {
2020-09-18 18:48:40 +02:00
2020-09-21 10:49:34 +02:00
if (!($translation_type = self::check_translatability((string)$timeInfo['zeit_beginn'], (string)$timeInfo['zeit_ende'], (string)$timeInfo['zeit_zaehlzeit_monat']))) {
2020-09-18 18:48:40 +02:00
throw new MDgenericInvalidInputsException("Non-translatable date");
}
if ($translation_type === self::TRANSLATABLE_ONLY_YEAR) {
return self::translateYearOnly($timeInfo);
}
if ($translation_type === self::TRANSLATABLE_SINCE_START) {
return self::translateYearsSinceStart($timeInfo);
}
if ($translation_type === self::TRANSLATABLE_UNTIL_END) {
return self::translateYearsUntilEnd($timeInfo);
}
if ($translation_type === self::TRANSLATABLE_CENTURY) {
return self::translateYearsAsCentury($timeInfo);
}
if ($translation_type === self::TRANSLATABLE_DECADE) {
return self::translateYearsAsDecade($timeInfo);
}
if ($translation_type === self::TRANSLATABLE_TIMESPAN_YEARS) {
return self::translateTimespanYears($timeInfo);
}
if ($translation_type === self::TRANSLATABLE_AS_YEAR_WITH_SUFFIX) {
return self::translateYearsWithSuffix($timeInfo);
}
2020-09-18 21:38:49 +02:00
if (trim((string)$timeInfo['zeit_zaehlzeit_tag'], ", .0") === "") {
2020-09-18 18:48:40 +02:00
$dateStr = "{$timeInfo['zeit_zaehlzeit_jahr']}-{$timeInfo['zeit_zaehlzeit_monat']}-05 00:00:01";
$usecase = self::USECASE_MONTH;
2020-09-18 18:48:40 +02:00
}
else {
$dateStr = "{$timeInfo['zeit_zaehlzeit_jahr']}-{$timeInfo['zeit_zaehlzeit_monat']}-{$timeInfo['zeit_zaehlzeit_tag']} 00:00:01";
$usecase = self::USECASE_DAY;
2020-09-18 18:48:40 +02:00
}
$dateGeneral = strtotime($dateStr);
$output = [];
2020-09-18 18:48:40 +02:00
foreach (self::LANGS_TO_LOCALES as $tLang => $locale) {
setlocale(LC_TIME, $locale);
if ($locale !== setlocale(LC_TIME, "0")) continue;
if ($usecase === self::USECASE_MONTH) $tLangValue = strftime(getMonthFormatByLang($tLang), $dateGeneral ?: 0);
2020-09-18 21:38:49 +02:00
else { # if ($usecase === self::USECASE_DAY)
$tLangValue = strftime(getDateFormatByLang($tLang), $dateGeneral ?: 0);
}
$output[$tLang] = $tLangValue;
}
return $output;
}
/**
* Runs autotranslater.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return void
*/
public function translate(array $timeInfo):void {
$translations = self::getTranslations($timeInfo);
foreach ($translations as $tLang => $tLangValue) {
2020-09-18 18:48:40 +02:00
$this->_insertStmt->bind_param("iss", $this->_znum, $tLang, $tLangValue);
$this->_insertStmt->execute();
}
}
/**
* Constructor.
*
* @param MDMysqli $mysqli_noda Database connection.
* @param integer $znum Time ID.
*
* @return void
*/
public function __construct(MDMysqli $mysqli_noda, int $znum) {
$this->_mysqli_noda = $mysqli_noda;
$this->_znum = $znum;
$this->_insertStmt = $mysqli_noda->do_prepare("INSERT INTO `zeit_translation`
(`zeit_id`, `trans_language`, `trans_name`)
VALUES
(?, ?, ?)");
}
/**
* Destructor.
*
* @return void
*/
public function __destruct() {
$this->_insertStmt->close();
}
}