MDNodaHelpers/src/NodaTimeAutotranslater.php

749 lines
22 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?PHP
/**
* Controls automatic translation of times.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
declare(strict_types = 1);
require_once __DIR__ . '/inc/datesByCountry.php';
/**
* Autotranslater class for times.
*/
final class NodaTimeAutotranslater {
const USECASE_MONTH = 1;
const USECASE_DAY = 2;
const TRANSLATABLE_NOT = 0;
const TRANSLATABLE_AS_MONTH_DAY = 1;
const TRANSLATABLE_AS_YEAR_WITH_SUFFIX = 2;
const TRANSLATABLE_SINCE_START = 3;
const TRANSLATABLE_UNTIL_END = 4;
const TRANSLATABLE_ONLY_YEAR = 5;
const TRANSLATABLE_CENTURY = 6;
const TRANSLATABLE_DECADE = 7;
const TRANSLATABLE_TIMESPAN_YEARS = 8;
const LANGS_TO_LOCALES = [
'ar' => 'ar_SY.utf8',
'de' => 'de_DE.utf8',
'en' => 'en_US.utf8',
'es' => 'es_ES.utf8',
'fa' => 'fa_IR.UTF-8',
'fr' => 'fr_FR.utf8',
'hu' => 'hu_HU.utf8',
'id' => 'id_ID.utf8',
'it' => 'it_IT.utf8',
'ka' => 'ka_GE.UTF-8',
'ko' => 'ko_KR.UTF-8',
'pl' => 'pl_PL.utf8',
'pt' => 'pt_BR.utf8',
'ro' => 'ro_RO.UTF-8',
'ru' => 'ru_RU.UTF-8',
'ta' => 'ta_IN.UTF-8',
'tl' => 'tl_PH.utf8',
'tr' => 'tr_TR.utf8',
// Languages that don't really need a specific locale
'ja' => 'en_US.utf8',
'zh' => 'en_US.utf8',
];
const LANGS_SINGLE_YEAR_FORMAT = [
'ar' => '%s',
'de' => '%s',
'en' => '%s',
'es' => '%s',
'fa' => '%s',
'fr' => '%s',
'hu' => '%s',
'id' => '%s',
'it' => '%s',
'ka' => '%s',
'ko' => '%s년',
'pl' => '%s',
'pt' => '%s',
'ro' => '%s',
'ru' => '%s',
'ta' => '%s',
'tl' => '%s',
'tr' => '%s',
// Languages that don't really need a specific locale
'ja' => '%s年',
'zh' => '%s年',
];
const LANGS_YEARSPAN_FORMAT = [
'ar' => '%s-%s',
'de' => '%s-%s',
'en' => '%s-%s',
'es' => '%s-%s',
'fa' => '%s-%s',
'fr' => '%s-%s',
'hu' => '%s-%s',
'id' => '%s-%s',
'it' => '%s-%s',
'ka' => '%s-%s',
'ko' => '%s년-%s년',
'pl' => '%s-%s',
'pt' => '%s-%s',
'ro' => '%s-%s',
'ru' => '%s-%s',
'ta' => '%s-%s',
'tl' => '%s-%s',
'tr' => '%s-%s',
// Languages that don't really need a specific locale
'ja' => '%s年から%s年',
'zh' => '%s年至%s年',
];
const LANGS_TO_BCE_FORMAT = [
'ar' => '-%s',
'de' => '%s v. Chr.',
'en' => '%s BC',
'es' => '%s a.C.',
'fa' => '-%s',
'fr' => '%s av. J.-C.',
'hu' => 'Kr. e. %s',
'id' => '%s SM',
'it' => '%s a.C.',
'ka' => 'ძვ. წ. %s წ',
'ko' => '기원전%s',
'pl' => '%s pne',
'pt' => '%s AC',
'ro' => '%s î.Hr.',
'ru' => '%s г. до н.э.',
'ta' => 'கிமு %s',
'tl' => '%s BC',
'tr' => 'MÖ %s',
// Languages that don't really need a specific locale
'ja' => '紀元前%s',
'zh' => '公元前%s',
];
const LANGS_TO_CE_FORMAT = [
'ar' => '%s',
'de' => '%s n. Chr.',
'en' => '%s CE',
'es' => '%s d.C.',
'fa' => '%s',
'fr' => '%s ap. J.-C.',
'hu' => '%s',
'id' => '%s M.',
'it' => '%s d.C.',
'ka' => '%s წ',
'ko' => '%s',
'pl' => '%s n.e.',
'pt' => '%s dC',
'ro' => '%s',
'ru' => '%s',
'ta' => '%s பொ.ச.',
'tl' => '%s AD',
'tr' => '%s',
// Languages that don't really need a specific locale
'ja' => '西暦%s',
'zh' => '%s',
];
const LANGS_SINCE_START_FORMAT_YEAR = [
'ar' => '%s-',
'de' => 'Seit %s',
'en' => 'Since %s',
'es' => 'Desde %s',
'fa' => '%s-',
'fr' => 'Depuis %s',
'hu' => '%s-től',
'id' => 'Sejak %s',
'it' => 'Dal %s',
'ka' => '%s წლიდან',
'ko' => '%s부터',
'pl' => 'Desde %s',
'pt' => '%s dC',
'ro' => 'Din %s',
'ru' => 'С %s г.',
'ta' => '%s முதல்',
'tl' => 'Mula noong %s',
'tr' => '%s-',
// Languages that don't really need a specific locale
'ja' => '%s以来',
'zh' => '自%s以來',
];
const LANGS_UNTIL_START_FORMAT_YEAR = [
'ar' => '-%s',
'de' => 'Bis %s',
'en' => 'Until %s',
'es' => 'Hasta %s',
'fa' => '-%s',
'fr' => 'Jusqu\'en %s',
'hu' => '%s-ig',
'id' => 'Sampai %s',
'it' => 'Fino al %s',
'ka' => '%s წლამდე',
'ko' => '%s까지',
'pl' => 'do %s roku',
'pt' => 'até %s',
'ro' => 'până în %s',
'ru' => 'до %s г.',
'ta' => '%s வரை',
'tl' => 'Hanggang %s',
'tr' => '%s-',
// Languages that don't really need a specific locale
'ja' => '%sまで',
'zh' => '直到%s',
];
const LANGS_CENTURY_FORMAT = [
'ar' => 'القرن ال %s',
'de' => '%s. Jahrhundert',
'en' => '%s. century',
'es' => 'Siglo %s',
'fa' => 'قرن %s',
'fr' => '%sème siècle',
'hu' => '%s. század',
'id' => 'Abad ke-%s',
'it' => '%sesimo secolo',
'ka' => 'მე -%s საუკუნე',
'ko' => '%s 세기',
'pl' => '%s wiek',
'pt' => 'Século %s',
'ro' => 'Secolul al %s-lea',
'ru' => '%s век',
'ta' => '%s ஆம் நூற்றாண்டு',
'tl' => 'Ika-%s na siglo',
'tr' => '%s. yüzyıl',
'ja' => '%s世紀',
'zh' => '%s世紀',
];
const LANGS_CENTURIES_FORMAT = [
'ar' => 'القرن ال %s-%s',
'de' => '%s.-%s. Jahrhundert',
'en' => '%s.-%s. century',
'es' => 'Siglo %s-%s',
'fa' => 'قرن %s-%s',
'fr' => '%s-%sème siècle',
'hu' => '%s.-%s. század',
'id' => 'Abad ke-%s-%s',
'it' => '%s-%sesimo secolo',
'ka' => 'მე -%s-%s საუკუნე',
'ko' => '%s-%s 세기',
'pl' => '%s-%s wiek',
'pt' => 'Século %s-%s',
'ro' => 'Secolul al %s-%s-lea',
'ru' => '%s-%s век',
'ta' => '%s-%s ஆம் நூற்றாண்டு',
'tl' => 'Ika-%s hanggang ika-%s na siglo',
'tr' => '%s.-%s. yüzyıl',
'ja' => '%s世紀-%s世紀',
'zh' => '%s-%s世紀',
];
const LANGS_DECADE_FORMAT = [
'ar' => '%s-%s',
'de' => '%ser Jahre',
'en' => '%ss',
'es' => '%s-%s',
'fa' => 'دهه %s',
'fr' => 'Années %s',
'hu' => '%s-as évek',
'id' => 'Tahun %s-an',
'it' => '%ss',
'ka' => '%s-იანი წლები',
'ko' => '%s 년대',
'pl' => '%s roku',
'pt' => 'Década de %s',
'ro' => 'Anii %s',
'ru' => '%s-е годы',
'ta' => '%s கள்',
'tl' => '%ss',
'tr' => '%s\'ler',
'ja' => '%s年代',
'zh' => '%s年代',
];
const LANGS_DECADES_FORMAT = [
'ar' => '%s-%s',
'de' => '%s-%ser Jahre',
'en' => '%s-%ss',
'es' => '%s-%s',
'fa' => 'دهه %s-%s',
'fr' => 'Années %s-%s',
'hu' => '%s-%s-as évek',
'id' => 'Tahun %s-an sampai tahun %s-an',
'it' => '%s-%ss',
'ka' => '%s-%s-იანი წლები',
'ko' => '%s-%s 년대',
'pl' => '%s-%s roku',
'pt' => 'Décadas de %s-%s',
'ro' => 'Anii %s-%s',
'ru' => '%s-%s-е годы',
'ta' => '%s-%s கள்',
'tl' => '%s-%ss',
'tr' => '%s-%s\'ler',
'ja' => '%s年代から%s年代',
'zh' => '%s年代-%s年代',
];
const LANGS_SYLLABLE_CLEANING = [
"hu" => [
"10-as évek" => "10-es évek",
"40-as évek" => "40-es évek",
"50-as évek" => "50-es évek",
"70-as évek" => "70-es évek",
"90-as évek" => "90-es évek",
],
];
/** @var MDMysqli */
private MDMysqli $_mysqli_noda;
/** @var integer */
private int $_znum;
/** @var MDMysqliStmt */
private MDMysqliStmt $_insertStmt;
/**
* Checks if a time is translatable.
* Translatable times have either a counting time day and month or at least a month.
*
* @param string $zeit_beginn Beginn year.
* @param string $zeit_ende End year.
* @param string $zeit_zaehlzeit_monat Counting time month.
*
* @return integer
*/
public static function check_translatability(string $zeit_beginn, string $zeit_ende, string $zeit_zaehlzeit_monat):int {
if ($zeit_ende === "?") {
return self::TRANSLATABLE_SINCE_START;
}
if ($zeit_beginn === "?") {
return self::TRANSLATABLE_UNTIL_END;
}
if (intval($zeit_ende) >= 0 && intval($zeit_beginn) < 0) {
return self::TRANSLATABLE_NOT;
}
if (($zeit_ende > 0 && intval($zeit_ende) % 100 === 0 and (intval($zeit_beginn) - 1) % 100 === 0)
|| (intval($zeit_beginn) < 0 && intval($zeit_beginn) % 100 === 0 and (intval($zeit_ende) + 1) % 100 === 0)
) {
return self::TRANSLATABLE_CENTURY;
}
if ((intval($zeit_ende) + 1) % 10 === 0 and intval($zeit_beginn) % 10 === 0 and intval($zeit_beginn) > 1000) {
return self::TRANSLATABLE_DECADE;
}
if (intval($zeit_ende) < 0 && intval($zeit_beginn) < 0 || intval($zeit_ende) < 1000) {
return self::TRANSLATABLE_AS_YEAR_WITH_SUFFIX;
}
if ($zeit_ende === $zeit_beginn and trim($zeit_zaehlzeit_monat, ", .0") === "") {
return self::TRANSLATABLE_ONLY_YEAR;
}
if ($zeit_ende !== $zeit_beginn and trim($zeit_zaehlzeit_monat, ", .0") === "") {
return self::TRANSLATABLE_TIMESPAN_YEARS;
}
// Conditions speaking against translatability.
if (trim($zeit_zaehlzeit_monat, ", .0") === "") {
return self::TRANSLATABLE_NOT;
}
return self::TRANSLATABLE_AS_MONTH_DAY;
}
/**
* Gets suffix mode for years with suffix.
*
* @param integer $start Start year.
* @param integer $end End year.
*
* @return integer
*/
public static function getSuffixModeForYearsWSuffix(int $start, int $end):int {
if ($start < 0 && $end < 0) {
$suffixMode = 2; // 2; // " v. Chr.";
}
else if ($end < 1000) {
$suffixMode = 1; // " n. Chr.";
}
else $suffixMode = 0; // Times larger than 1000 n. Chr.
return $suffixMode;
}
/**
* Applies suffix format to a time string.
*
* @param string $tLang Two digit ANSI language code.
* @param string $timeName Time name.
* @param integer $suffixMode Suffix mode.
*
* @return string
*/
public static function applyBcBceFormat(string $tLang, string $timeName, int $suffixMode):string {
switch ($suffixMode) {
case 0:
return $timeName;
case 1:
return sprintf(self::LANGS_TO_CE_FORMAT[$tLang], $timeName);
case 2:
return sprintf(self::LANGS_TO_BCE_FORMAT[$tLang], $timeName);
default:
throw new Exception("Unknown case encountered for time translations.");
}
}
/**
* Translated years or timespans below 1000 CE.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function translateYearsWithSuffix(array $timeInfo):array {
$start = intval($timeInfo['zeit_beginn']);
$end = intval($timeInfo['zeit_ende']);
$suffixMode = self::getSuffixModeForYearsWSuffix($start, $end);
// Time info to pass
if ($suffixMode === 2) {
$timeInfoToCopy = $timeInfo;
$timeInfoToCopy["zeit_beginn"] = abs($start);
$timeInfoToCopy["zeit_ende"] = abs($end);
}
$output = [];
foreach (self::LANGS_TO_CE_FORMAT as $tLang => $ceFormat) {
if ($suffixMode === 2) {
if (empty($timeInfoToCopy)) {
throw new Exception("No time information to copy existent");
}
$ceIndicatorsToRemove = explode("%s", self::LANGS_TO_CE_FORMAT[$tLang]);
$year = self::getTranslations($timeInfoToCopy)[$tLang];
foreach ($ceIndicatorsToRemove as $ceIndicatorToRemove) {
$year = str_replace($ceIndicatorToRemove, "", $year);
}
}
else {
if ($start === $end) {
$year = sprintf(self::LANGS_SINGLE_YEAR_FORMAT[$tLang], (string)abs($start));
}
else $year = sprintf(self::LANGS_YEARSPAN_FORMAT[$tLang], (string)abs($start), (string)abs($end));
}
$output[$tLang] = self::applyBcBceFormat($tLang, $year, $suffixMode);
}
return $output;
}
/**
* Translated only years: 1994.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function translateYearOnly(array $timeInfo):array {
$start = intval($timeInfo['zeit_beginn']);
$output = [];
foreach (self::LANGS_SINGLE_YEAR_FORMAT as $tLang => $format) {
$output[$tLang] = sprintf($format, (string)abs($start));
}
return $output;
}
/**
* Translated only years: 1994.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function translateTimespanYears(array $timeInfo):array {
$start = intval($timeInfo['zeit_beginn']);
$end = intval($timeInfo['zeit_ende']);
$output = [];
if (abs($start) === 1102) throw new Exception(var_export($timeInfo, true));
foreach (self::LANGS_YEARSPAN_FORMAT as $tLang => $format) {
$output[$tLang] = sprintf($format, (string)abs($start), (string)abs($end));
}
return $output;
}
/**
* Translates century names: 19. Jahrhundert.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function translateYearsAsCentury(array $timeInfo):array {
if (intval($timeInfo['zeit_beginn']) > 0) {
// Beginn: 1500. (1501 - 1) / 100 + 1 = 16. 16th century is the time.
$start_cen = ((intval($timeInfo['zeit_beginn']) - 1) / 100) + 1;
// End: 1600. 16th century is the time.
$end_cen = (intval($timeInfo['zeit_ende']) / 100);
}
else {
// End: -1600. 16th century is the time.
$start_cen = (intval($timeInfo['zeit_beginn']) / 100);
// End: -1500. (1501 - 1) / 100 + 1 = 16. 16th century is the time.
$end_cen = ((intval($timeInfo['zeit_ende']) + 1) / 100) - 1;
}
$suffixMode = self::getSuffixModeForYearsWSuffix((intval($timeInfo['zeit_beginn']) - 1), intval($timeInfo['zeit_ende']));
$output = [];
if ($start_cen === $end_cen) {
foreach (self::LANGS_CENTURY_FORMAT as $tLang => $format) {
$tLangValue = sprintf($format, (string)abs($start_cen));
$output[$tLang] = self::applyBcBceFormat($tLang, $tLangValue, $suffixMode);
}
}
else {
foreach (self::LANGS_CENTURIES_FORMAT as $tLang => $format) {
$tLangValue = sprintf($format, (string)abs($start_cen), (string)abs($end_cen));
$output[$tLang] = self::applyBcBceFormat($tLang, $tLangValue, $suffixMode);
}
}
return $output;
}
/**
* Translates decade names: 1920er Jahre.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function translateYearsAsDecade(array $timeInfo):array {
// Beginn: 1500. (1501 - 1) / 100 + 1 = 16. 16th century is the time.
$start_cen = (intval($timeInfo['zeit_beginn']));
// End: 1600. 16th century is the time.
$end_cen = (intval($timeInfo['zeit_ende']));
$suffixMode = self::getSuffixModeForYearsWSuffix((intval($timeInfo['zeit_beginn']) - 1), intval($timeInfo['zeit_ende']));
$output = [];
if ($start_cen === $end_cen - 9) {
foreach (self::LANGS_DECADE_FORMAT as $tLang => $format) {
$tLangValue = sprintf($format, (string)$start_cen, (string)$end_cen);
if (!empty(self::LANGS_SYLLABLE_CLEANING[$tLang])) $tLangValue = strtr($tLangValue, self::LANGS_SYLLABLE_CLEANING[$tLang]);
$output[$tLang] = self::applyBcBceFormat($tLang, $tLangValue, $suffixMode);
}
}
else {
foreach (self::LANGS_DECADES_FORMAT as $tLang => $format) {
$tLangValue = sprintf($format, (string)$start_cen, (string)($end_cen - 9));
if (!empty(self::LANGS_SYLLABLE_CLEANING[$tLang])) $tLangValue = strtr($tLangValue, self::LANGS_SYLLABLE_CLEANING[$tLang]);
$output[$tLang] = self::applyBcBceFormat($tLang, $tLangValue, $suffixMode);
}
}
return $output;
}
/**
* Translated years or timespans below 1000 CE.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function translateYearsSinceStart(array $timeInfo):array {
$start = intval($timeInfo['zeit_beginn']);
$innerTimeInfo = $timeInfo;
$innerTimeInfo['zeit_ende'] = $timeInfo['zeit_beginn'];
$output = [];
foreach (self::LANGS_SINCE_START_FORMAT_YEAR as $tLang => $format) {
$dateAlone = self::getTranslations($innerTimeInfo)[$tLang];
$timeName = sprintf($format, $dateAlone);
$output[$tLang] = $timeName;
}
return $output;
}
/**
* Translated years or timespans below 1000 CE.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function translateYearsUntilEnd(array $timeInfo):array {
$end = intval($timeInfo['zeit_ende']);
$innerTimeInfo = $timeInfo;
$innerTimeInfo['zeit_beginn'] = $timeInfo['zeit_ende'];
$output = [];
foreach (self::LANGS_UNTIL_START_FORMAT_YEAR as $tLang => $format) {
$dateAlone = self::getTranslations($innerTimeInfo)[$tLang];
$timeName = sprintf($format, $dateAlone);
$output[$tLang] = $timeName;
}
return $output;
}
/**
* Gets translations for a given entry type.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return array<string>
*/
public static function getTranslations(array $timeInfo):array {
if (!($translation_type = self::check_translatability((string)$timeInfo['zeit_beginn'], (string)$timeInfo['zeit_ende'], (string)$timeInfo['zeit_zaehlzeit_monat']))) {
throw new MDgenericInvalidInputsException("Non-translatable date: {$timeInfo['zeit_beginn']} - {$timeInfo['zeit_ende']}");
}
if ($translation_type === self::TRANSLATABLE_ONLY_YEAR) {
return self::translateYearOnly($timeInfo);
}
if ($translation_type === self::TRANSLATABLE_SINCE_START) {
return self::translateYearsSinceStart($timeInfo);
}
if ($translation_type === self::TRANSLATABLE_UNTIL_END) {
return self::translateYearsUntilEnd($timeInfo);
}
if ($translation_type === self::TRANSLATABLE_CENTURY) {
return self::translateYearsAsCentury($timeInfo);
}
if ($translation_type === self::TRANSLATABLE_DECADE) {
return self::translateYearsAsDecade($timeInfo);
}
if ($translation_type === self::TRANSLATABLE_TIMESPAN_YEARS) {
return self::translateTimespanYears($timeInfo);
}
if ($translation_type === self::TRANSLATABLE_AS_YEAR_WITH_SUFFIX) {
return self::translateYearsWithSuffix($timeInfo);
}
if (trim((string)$timeInfo['zeit_zaehlzeit_tag'], ", .0") === "") {
$dateStr = "{$timeInfo['zeit_zaehlzeit_jahr']}-{$timeInfo['zeit_zaehlzeit_monat']}-05 00:00:01";
$usecase = self::USECASE_MONTH;
}
else {
$dateStr = "{$timeInfo['zeit_zaehlzeit_jahr']}-{$timeInfo['zeit_zaehlzeit_monat']}-{$timeInfo['zeit_zaehlzeit_tag']} 00:00:01";
$usecase = self::USECASE_DAY;
}
$dateGeneral = strtotime($dateStr);
$output = [];
foreach (self::LANGS_TO_LOCALES as $tLang => $locale) {
setlocale(LC_TIME, $locale);
if ($locale !== setlocale(LC_TIME, "0")) continue;
if ($usecase === self::USECASE_MONTH) $tLangValue = strftime(getMonthFormatByLang($tLang), $dateGeneral ?: 0);
else { # if ($usecase === self::USECASE_DAY)
$tLangValue = strftime(getDateFormatByLang($tLang), $dateGeneral ?: 0);
}
$output[$tLang] = $tLangValue;
}
return $output;
}
/**
* Runs autotranslater.
*
* @param array<integer|string> $timeInfo Time information.
*
* @return void
*/
public function translate(array $timeInfo):void {
$translations = self::getTranslations($timeInfo);
foreach ($translations as $tLang => $tLangValue) {
$this->_insertStmt->bind_param("iss", $this->_znum, $tLang, $tLangValue);
$this->_insertStmt->execute();
}
}
/**
* Constructor.
*
* @param MDMysqli $mysqli_noda Database connection.
* @param integer $znum Time ID.
*
* @return void
*/
public function __construct(MDMysqli $mysqli_noda, int $znum) {
$this->_mysqli_noda = $mysqli_noda;
$this->_znum = $znum;
$this->_insertStmt = $this->_mysqli_noda->do_prepare("INSERT INTO `zeit_translation`
(`zeit_id`, `trans_language`, `trans_name`)
VALUES
(?, ?, ?)");
}
/**
* Destructor.
*
* @return void
*/
public function __destruct() {
$this->_insertStmt->close();
}
}