MDNodaHelpers/src/NodaTimeSplitter.php

1356 lines
49 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?PHP
/**
* Splits nodac times.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
declare(strict_types = 1);
/**
* Class for splitting times.
*/
final class NodaTimeSplitter {
private const MONTH_NAMES_GERMAN = [
"01" => ['Januar', 'Jan.'],
"02" => ['Februar', 'Feb'],
"03" => ['März', 'Mrz.'],
"04" => ['April', 'Apr.'],
"05" => ['Mai'],
"06" => ['Juni', 'Jun.'],
"07" => ['Juli', 'Jul.'],
"08" => ['August', 'Aug.'],
"09" => ['September', 'Sep.', 'Sept.'],
"10" => ['Oktober', 'Okt.'],
"11" => ['November', 'Nov.'],
"12" => ['Dezember', 'Dez.'],
];
private const MONTH_NAMES_ENGLISH = [
"01" => ['January', 'Jan.'],
"02" => ['February', 'Feb'],
"03" => ['March', 'Mar.'],
"04" => ['April', 'Apr.'],
"05" => ['May'],
"06" => ['June', 'Jun.'],
"07" => ['July', 'Jul.'],
"08" => ['August', 'Aug.'],
"09" => ['September', 'Sep.', 'Sept.'],
"10" => ['October', 'Oct.'],
"11" => ['November', 'Nov.'],
"12" => ['December', 'Dec.'],
];
private const MONTH_NAMES_HUNGARIAN = [
"01" => ['január', 'januar', 'jan'],
"02" => ['február', 'feb'],
"03" => ['március', 'mar.', 'már.'],
"04" => ['április', 'apr.', 'ápr.'],
"05" => ['május', 'maj.', 'máj.'],
"06" => ['június', 'jun.', 'jún'],
"07" => ['július', 'julius', 'jul.', 'júl.'],
"08" => ['augusztus', 'aug.'],
"09" => ['szeptember', 'szp.'],
"10" => ['október', 'okt.'],
"11" => ['november', 'nov.'],
"12" => ['december', 'dec.'],
];
private const STRINGS_TO_CLEAN = [
"között" => "",
" рр." => "",
" рр" => "",
"nach Christus" => "",
"n. Christus" => "",
"nach Chr." => "",
"n. Chr." => "",
"n.Chr." => "",
" pp" => "",
" p" => "",
" р" => "", // Cyrillic
// To clean
"v.Chr." => "v. Chr.",
"v.C." => "v. Chr.",
"v. C." => "v. Chr.",
"v. Chr" => "v. Chr.",
"BCE" => "v. Chr.",
"CE" => "",
"vor Christus" => "v. Chr.",
" до н. е." => "v. Chr.",
];
private const STRINGS_TO_CLEAN_START = [
"V. " => "5. ",
"IV. " => "4. ",
"III. " => "3. ",
"II. " => "2. ",
"I. " => "1. ",
];
private const STOP_STRINGS_GERMAN = [
"-",
",",
";",
":",
"/",
"(", ")",
"[", "]",
", ",
" und ",
"nach ",
"um ",
"ca.",
"ab ",
"seit ",
"bis ",
"vor ",
"anfang ",
"ende ",
];
private const STOP_STRINGS_HUNGARIAN = [
"-",
",",
";",
":",
"/",
"(", ")",
"[", "]",
"ca.",
", ",
"-ig",
"és",
"eleje",
"között",
"töl",
"tól",
"januárig",
"februárig",
"márciusig",
"vége",
"végén",
"áprilisig",
"májusig",
"júniusig",
"júliusig",
"augusztusig",
"szeptemberig",
"októberig",
"novemberig",
"decemberig",
];
private const REGEX_CENTURIES = '(\ |)(Jh|Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)';
private const REGEX_DECADES = '(s|er|er\ Jahre|(\-|\ )es\ évek|(\-|\ )as\ \évek|\ évek|\-es\ években|\-ті)';
/**
* Cleans input strings by trimming obsolete stuff.
*
* @param string $input Input date name.
*
* @return string
*/
private static function clean_input(string $input):string {
while (\strpos($input, " -") !== false) $input = \str_replace(" -", "-", $input);
while (\strpos($input, "- ") !== false) $input = \str_replace("- ", "-", $input);
$input = \strtr($input, self::STRINGS_TO_CLEAN);
foreach (self::STRINGS_TO_CLEAN_START as $toCleanFrom => $toCleanTo) {
if (strpos($input, $toCleanFrom) === 0) {
$input = \str_replace($toCleanFrom, $toCleanTo, $input);
}
}
while (strpos($input, "..") !== false) $input = \str_replace("..", ".", $input);
return trim($input, ", [](){}");
}
/**
* Checks if a string is really numeric, not numeric + space, dot.
*
* @param string $input Input string.
*
* @return boolean
*/
private static function is_numeric(string $input):bool {
if (\is_numeric($input)
and \strpos($input, " ") === false
and \strpos($input, ".") === false
) {
return true;
}
return false;
}
/**
* Validates a time substr.
*
* @param string $datum Date.
* @param integer $start Start of substr.
* @param integer $end End of substr.
*
* @return string
*/
private static function validateDateSubstr(string $datum, int $start, int $end = 10000):string {
if ($start !== 0
&& !\in_array(\substr($datum, $start - 1, 1), ["-", " ", "."], true)
) {
return "";
}
$output = \substr($datum, $start, $end);
if (self::is_numeric($output)) return $output;
return "";
}
/**
* Generates counting year - the middle between start and end year.
*
* @param NodaSplitTime $moda Date strings.
*
* @return integer
*/
public static function timePartsToCountingYear(NodaSplitTime $moda):int {
if ($moda->start_year === "?") {
if ($moda->before_after_indicator === NodaTimeBeforeAfterIndicator::before and empty(trim($moda->counting_time_month, " 0"))) {
return \abs(\intval($moda->end_year)) + 1;
}
return \abs(\intval($moda->end_year));
}
if ($moda->end_year === "?") {
if ($moda->before_after_indicator === NodaTimeBeforeAfterIndicator::after and empty(trim($moda->counting_time_month, " 0"))) {
return \abs(\intval($moda->start_year)) - 1;
}
return \abs(\intval($moda->start_year));
}
return \abs((int)\ceil(\intval($moda->end_year) - ((\intval($moda->end_year) - \intval($moda->start_year)) / 2)));
}
/**
* Generates HTML for linking disassembly of times for a single day.
*
* @param integer $znum Time ID.
* @param NodaSplitTime $moda Date strings.
* @param MDTlLoader $tlLoader Translation loader.
*
* @return string
*/
public static function generateDisassemblyForDay(int $znum, NodaSplitTime $moda, MDTlLoader $tlLoader):string {
$zaehlzeit_jahr = self::pad_to_four((string)self::timePartsToCountingYear($moda));
// Wenn Datum in Form von tt.mm.jjjj, dann biete zerlegen an
$output = '<hr>';
$output .= '<table>';
$output .= '<tr><td width="250px">' . $tlLoader->tl("tempi", "tempi", "time_tool") . '</td>';
$output .= '<td><a href="tempi_md/zeit_cha.php?znum=' . $znum . '&kontrolle=todo';
if (($newTimeName = $moda->toTimeName()) !== "") {
$output .= "&zeit_name_neu={$newTimeName}";
}
$output .= '&zeit_beginn_neu=' . $moda->start_year . '&zeit_ende_neu=' . $moda->end_year . '&zeit_zaehlzeit_vorzeichen_neu=' . urlencode($moda->counting_time_indicator->toString()) . '&zeit_zaehlzeit_jahr_neu=' . $zaehlzeit_jahr . '&zeit_zaehlzeit_monat_neu=' . $moda->counting_time_month . '&zeit_zaehlzeit_tag_neu=' . $moda->counting_time_day . '&zeit_status_neu=%2B&zeit_beginn_datum_neu=' . $moda->start_date . '&zeit_ende_datum_neu=' . $moda->end_date . '" class="icons iconsBell buttonLike" id="splitTimeLink">+';
if (!empty($newTimeName)) {
$output .= $newTimeName;
}
else {
if (!empty(trim($moda->counting_time_day, " 0")) and !empty(trim($moda->counting_time_month, " 0"))) $output .= $moda->counting_time_day . '.' . $moda->counting_time_month . '.' . $moda->start_year;
else if ($moda->start_year !== $moda->end_year) $output .= $moda->start_year . "-" . $moda->end_year;
else if (!empty(trim($moda->counting_time_month, " 0"))) $output .= "{$moda->counting_time_month}.{$moda->start_year}";
else $output .= $moda->start_year;
}
$output .= ' - ' . $tlLoader->tl("tempi", "tempi", "time_disassemble") . '</a></td>';
$output .= '</tr>';
$output .= '</table>';
return $output;
}
/**
* Checks if any string of a list occurs in the haystack input string.
*
* @param string $haystack Haystack.
* @param array<string> $needles Needles.
*
* @return boolean
*/
private static function stri_occurs(string $haystack, array $needles):bool {
foreach ($needles as $needle) {
if (stripos($haystack, $needle) !== false) return true;
}
return false;
}
/**
* Pads to four digits. E.g. 20 > 0020.
*
* @param string $input Input string.
*
* @return string
*/
public static function pad_to_four(string $input):string {
return \substr("0000" . $input, -4);
}
/**
* Pads to four digits. E.g. 2 > 02.
*
* @param string $input Input string.
*
* @return string
*/
public static function pad_to_two(string $input):string {
return \substr("00" . $input, -2);
}
/**
* Translate German month to two digits number.
*
* @param string $datum Date.
*
* @return NodaSplitTime|false
*/
public static function is_valid_date(string $datum):NodaSplitTime|false {
$datum = self::clean_input($datum);
if (\str_ends_with($datum, ' v. Chr.')) {
if ($output = self::attempt_splitting(\substr($datum, 0, -8))) {
$start = \strval(-1 * \intval($output->end_year));
$end = \strval(-1 * \intval($output->start_year));
$start_date = $output->end_date;
$end_date = $output->start_date;
if (\intval($start) > \intval($end)) {
$startToSet = $end;
$end = $start;
$start = $startToSet;
$start_date = $output->start_date;
$end_date = $output->end_date;
}
return new NodaSplitTime($start, $end, $output->counting_time_month, $output->counting_time_day,
NodaCountingTimeIndicator::bce, $output->before_after_indicator, '-' . $start_date, '-' . $end_date);
}
}
if (\preg_match("/^[0-9]{4}\ bis\ [0-9]{4}$/", $datum)) {
$start = \substr($datum, 0, 4);
$end = \substr($datum, -4);
return new NodaSplitTime($start, $end);
}
if (\preg_match("/^[0-9]{4}\ (und|oder|od.)\ [0-9]{4}$/", $datum)) {
$start = \substr($datum, 0, 4);
$end = \substr($datum, -4);
$startInt = (int)$start;
$endInt = (int)$end;
if ($startInt === $endInt - 1) {
return new NodaSplitTime($start, $end);
}
}
$datum = \str_replace(". ", ".", $datum);
if (self::stri_occurs($datum, self::STOP_STRINGS_GERMAN)) {
return false;
}
if (strlen($datum) <= 6) return false;
if (strlen($datum) <= 9) $use_day = false;
else $use_day = true;
if (self::is_numeric((string)\substr($datum, -4))) $year = \substr($datum, -4);
// Further code requires a year to be present, skip if none is set
if (empty($year)) return false;
foreach (self::MONTH_NAMES_ENGLISH as $monthVal => $monthValidNames) {
if (self::stri_occurs($datum, $monthValidNames)) {
if (!empty($monat)) break;
$monat = (string)$monthVal;
}
}
foreach (self::MONTH_NAMES_GERMAN as $monthVal => $monthValidNames) {
if (self::stri_occurs($datum, $monthValidNames)) {
if (!empty($monat)) break;
$monat = (string)$monthVal;
}
}
if (empty($monat) and self::is_numeric((string)\substr($datum, 3, 2))) $monat = \substr($datum, 3, 2);
if (self::is_numeric((string)\substr($datum, 0, 2))) {
$day = \substr($datum, 0, 2);
}
else if (\in_array(\substr($datum, 1, 1), [".", " "], true) && self::is_numeric((string)\substr($datum, 0, 1))) {
$day = "0" . \substr($datum, 0, 1);
}
if (!empty($monat) and !empty($day) and $use_day) {
return NodaSplitTime::genExactDate($year, $monat, $day);
}
else if (!empty($monat)) {
return new NodaSplitTime($year, $year, $monat);
}
return false;
}
/**
* Translate Hungarian month to two digits number.
*
* @param string $datum Date.
*
* @return NodaSplitTime|false
*/
public static function is_valid_date_hungarian(string $datum):NodaSplitTime|false {
$datum = self::clean_input($datum);
if (\preg_match("/^Kr\.\ e\.\ /", $datum)) {
if ($output = self::attempt_splitting(\substr($datum, 7))) {
$start = \strval(-1 * \intval($output->end_year));
$end = \strval(-1 * \intval($output->start_year));
if (\intval($start) > \intval($end)) {
$startToSet = $end;
$end = $start;
$start = $startToSet;
}
return new NodaSplitTime($start, $end, $output->counting_time_month, $output->counting_time_day,
NodaCountingTimeIndicator::bce, $output->before_after_indicator, '-' . $output->end_date, '-' . $output->start_date);
}
}
// Example: 2009-tol 2010-ig
// From 2009 to 2010
if (\preg_match("/^[0-9][0-9][0-9][0-9]\-t(ő|ó)l(\ |\-)[0-9][0-9][0-9][0-9]\-ig$/", $datum)) {
$start = \substr($datum, 0, 4);
$end = \substr($datum, -7, 4);
return new NodaSplitTime($start, $end);
}
if (self::stri_occurs($datum, self::STOP_STRINGS_HUNGARIAN)) {
return false;
}
//
// Rest: Only those entries, where there are spelled out months
//
if (strlen($datum) <= 9) return false;
// The year is only parse-able if it is a four digit year at the start
if (self::is_numeric((string)\substr($datum, 0, 4)) && substr($datum, 4, 1) === '.') {
$year = \substr($datum, 0, 4);
}
// Further code requires a year to be present, skip if none is set
if (empty($year)) return false;
// Skip, if dates are too long and do not contain spaces (= no translatable names)
if (str_contains($datum, " ") === false && strlen($datum) > 12) return false;
$unparsed = trim(strtolower(str_replace($year, '', $datum)), ' ,.');
foreach (self::MONTH_NAMES_HUNGARIAN as $monthVal => $monthValidNames) {
if (self::stri_occurs($datum, $monthValidNames)) {
$monat = (string)$monthVal;
foreach ($monthValidNames as $name) {
$unparsed = str_replace($name, '', $unparsed);
}
break;
}
}
if (strlen($unparsed) > 5) {
return false;
}
if (empty($monat) and self::is_numeric((string)\substr($datum, 5, 2))) $monat = \substr($datum, 5, 2);
else if (empty($monat) and self::is_numeric((string)\substr($datum, 6, 2))) $monat = \substr($datum, 6, 2);
// Last four characters must contain at least one space or one dot
$day = self::validateDateSubstr($datum, -2);
if (empty($day)) $day = self::validateDateSubstr($datum, -3, 2);
if (empty($day)) $day = self::validateDateSubstr($datum, -4, 2);
if (empty($day)) $day = self::validateDateSubstr($datum, -5, 2);
if (empty($day)) $day = self::validateDateSubstr($datum, -6, 2);
if (empty($day)) {
if (\substr($datum, -2, 1) === " " and self::is_numeric((string)\substr($datum, -1, 1))) {
$day = "0" . \substr($datum, -1, 1);
}
else if (\substr($datum, -3, 1) === " " and self::is_numeric((string)\substr($datum, -2, 1))) {
$day = "0" . \substr($datum, -2, 1);
}
}
if (!empty($monat) && empty($day) && preg_match('~[0-9]+~', substr($datum, -3))) {
return false;
}
if (!empty($monat) and !empty($day)) {
return NodaSplitTime::genExactDate($year, $monat, $day);
}
else if (!empty($monat)) {
return new NodaSplitTime($year, $year, $monat);
}
return false;
}
/**
* Translate German month to two digits number.
*
* @param string $datum Date.
*
* @return NodaSplitTime|false
*/
public static function is_valid_date_by_php(string $datum):NodaSplitTime|false {
$datum = self::clean_input($datum);
if (!($timeInt = \strtotime($datum))) {
return false;
}
return NodaSplitTime::genExactDate(\date("Y", $timeInt), \date("m", $timeInt), \date("d", $timeInt));
}
/**
* Checks if an input date is a timespan.
*
* @param string $datum Input date.
*
* @return NodaSplitTime|false
*/
public static function is_timespan(string $datum):NodaSplitTime|false {
$datum = self::clean_input($datum);
// 10000-20000
if (!empty(\preg_match("/^[0-9]{5}(\-|\/)[0-9]{5}$/", $datum))) {
return new NodaSplitTime(start_year: \substr($datum, 0, 5), end_year: \substr($datum, 6, 5));
}
// 0000-0000
if (\preg_match("/^[0-9]{4}(\-|\/)[0-9]{4}(\.|)$/", $datum)) {
return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, 5, 4));
}
// 1.900-2.000
if (\preg_match("/^[0-9]\.[0-9][0-9][0-9](\-|\/)[0-9]\.[0-9][0-9][0-9]$/", $datum)) {
$datum = \str_replace(".", "", $datum);
return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, 5, 4));
}
// German TT.MM.JJJJ / TT.MM.JJJ / TT.MM.JJ / TT.MM.J
if (\preg_match("/^[0-9]{2}\.[0-9]{2}\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
$year = \substr($datum, 6, 4);
$month = \substr($datum, 3, 2);
$day = \substr($datum, 0, 2);
return NodaSplitTime::genExactDate($year, $month, $day);
}
// German TT.M.JJJJ / TT.M.JJJ / TT.M.JJ / TT.M.J
if (\preg_match("/^[0-9]{2}\.[0-9]\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
$year = \substr($datum, 5, 4);
$month = "0" . \substr($datum, 3, 1);
$day = \substr($datum, 0, 2);
return NodaSplitTime::genExactDate($year, $month, $day);
}
// German T.MM.JJJJ / T.MM.JJJ / T.MM.JJ / T.MM.J
if (\preg_match("/^[0-9]\.[0-9][0-9]\.([0-9][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[0-9])$/", $datum)) {
$year = \substr($datum, 5, 4);
$month = \substr($datum, 2, 2);
$day = "0" . \substr($datum, 0, 1);
return NodaSplitTime::genExactDate($year, $month, $day);
}
// German T.M.JJJJ / T.M.JJJ / T.M.JJ / T.M.J
if (\preg_match("/^[0-9]\.[0-9]\.([0-9][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[0-9])$/", $datum)) {
$year = \substr($datum, 4, 4);
$month = "0" . \substr($datum, 2, 1);
$day = "0" . \substr($datum, 0, 1);
return NodaSplitTime::genExactDate($year, $month, $day);
}
// Intl': 2020-12-20
if (\preg_match("/^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/", $datum)) { // German Y-m
$year = \substr($datum, 0, 4);
$month = \substr($datum, 5, 2);
$day = \substr($datum, 8, 2);
return NodaSplitTime::genExactDate($year, $month, $day);
}
// Intl': 2020-12
if (\preg_match("/^[0-9]{4}\-[0-9]{2}$/", $datum)) { // German Y-m or 1912-15
$year = \substr($datum, 0, 4);
$month = \substr($datum, 5, 2);
// Assume the end is a month
if (intval($month) < 12) {
return new NodaSplitTime($year, $year, $month);
}
else {
$end = \substr($year, 0, 2) . $month;
return new NodaSplitTime($year, $end);
}
}
// German MM.JJJJ
if (\preg_match("/^[0-9]{2}\.[0-9]{4}$/", $datum)) { // German Y-m
$year = \substr($datum, 3, 4);
$month = \substr($datum, 0, 2);
return new NodaSplitTime($year, $year, $month);
}
if (\preg_match("/^[0-9]\.[0-9]{4}$/", $datum)) { // German Y-m
$year = \substr($datum, 2, 4);
$month = "0" . \substr($datum, 0, 1);
return new NodaSplitTime($year, $year, $month);
}
if (\preg_match("/^[0-9]{4}\.[0-3][0-9]\.[0-9]{1,2}(\.|)$/", $datum)) { // Hungarian Y-m-d
$year = \substr($datum, 0, 4);
$month = \substr($datum, 5, 2);
$day = self::pad_to_two(\rtrim(\substr($datum, 8, 2), '.'));
return NodaSplitTime::genExactDate($year, $month, $day);
}
if (\preg_match("/^[0-9]{4}\.[0-9]\.[0-9]{1,2}\.$/", $datum)) { // Hungarian Y-m-d > 2005.1.1.
$year = \substr($datum, 0, 4);
$month = self::pad_to_two(\substr($datum, 5, 1));
$day = self::pad_to_two(\rtrim(\substr($datum, 7, 2), '.'));
return NodaSplitTime::genExactDate($year, $month, $day);
}
if (\preg_match("/^[0-9]{4}\.[0-3][0-9](\.|)$/", $datum)) { // Hungarian Y-m
$year = \substr($datum, 0, 4);
$month = \substr($datum, 5, 2);
return new NodaSplitTime($year, $year, $month);
}
if (\preg_match("/^[0-9]{4}\-[0-9]{2}$/", $datum)) { // Time spans: 1945-46
$start = \substr($datum, 0, 4);
$endDigits = \substr($datum, 5, 2);
return new NodaSplitTime($start, \substr($datum, 0, 2) . $endDigits);
}
// 01.01.1920-31.12.1930
if (\preg_match("/^01\.01\.[0-9]{4}\-31\.12\.[0-9]{4}$/", $datum)) { // Hungarian Y-m
$start = \substr($datum, 6, 4);
$end = \substr($datum, -4);
return new NodaSplitTime($start, $end);
}
// 303-305 (n. Chr.)
if (\preg_match("/^[0-9]{3}\-[0-9]{3}$/", $datum)) { // Hungarian Y-m
$start = \substr($datum, 0, 3);
$end = \substr($datum, -3);
return new NodaSplitTime("0" . $start, "0" . $end);
}
// 1720-120
if (\preg_match("/^[0-9]{4}\-[0-9]{3}$/", $datum)) { // Hungarian Y-m
$start = \substr($datum, 0, 4);
$end = \substr($datum, -3);
return new NodaSplitTime("0" . $start, "0" . $end);
}
// 20-30 (n. Chr.)
if (\preg_match("/^[0-9]{2}\-[0-9]{2}$/", $datum)) { // 20-40 (n. Chr.)
$start = \substr($datum, 0, 2);
$end = \substr($datum, -2);
return new NodaSplitTime("00" . $start, "00" . $end);
}
// 1920
if (\preg_match("/^[0-9]{4}(\.|)$/", $datum)) {
$start = \substr($datum, 0, 4);
return new NodaSplitTime($start, $start);
}
// 1920
if (\preg_match("/^[0-9]{3}$/", $datum)) {
$start = "0" . \substr($datum, 0, 3);
return new NodaSplitTime($start, $start);
}
if (\preg_match("/^[0-9]{2}$/", $datum)) {
$start = "00" . \substr($datum, 0, 2);
return new NodaSplitTime($start, $start);
}
// Special case for SMB: YYYY, MM. DD and YYYY, MM.
if (\preg_match("/^[0-9]{4}\,\ [0-9]{2}\.(|\ [0-9]{2})$/", $datum)) {
$start = \substr($datum, 0, 4);
$month = \substr($datum, 6, 2);
$day = \substr($datum, 10, 2);
return NodaSplitTime::genExactDate($start, $month, $day);
}
return false;
}
/**
* Checks if an input date is an incomplete date: Before 1920, after 1930.
*
* @param string $datum Input date.
*
* @return NodaSplitTime|false
*/
public static function is_incomplete_date(string $datum):NodaSplitTime|false {
$datum = self::clean_input($datum);
$inpDateWoSpaces = str_replace(" ", "", $datum);
if (\preg_match("/^[0-9]{4}\.[0-9]{2}\.[0-9]{2}(\.|)\-$/", $inpDateWoSpaces)) { // YYYY.MM.DD.
$year = \substr($inpDateWoSpaces, 0, 4);
$month = \substr($inpDateWoSpaces, 5, 2);
$day = \substr($inpDateWoSpaces, 8, 2);
return NodaSplitTime::genExactDate($year, $month, $day, NodaTimeBeforeAfterIndicator::since);
}
if (\preg_match("/^[0-9]{4}\.[0-9]{2}(\.|)\-$/", $inpDateWoSpaces)) { // YYYY.MM.-
$start = \substr($inpDateWoSpaces, 0, 4);
$month = \substr($inpDateWoSpaces, 5, 2);
return new NodaSplitTime($start, '?', $month, before_after_indicator: NodaTimeBeforeAfterIndicator::since);
}
if (\preg_match("/^[0-9]{4}\-$/", $inpDateWoSpaces)) { // YYYY-
$start = \substr($inpDateWoSpaces, 0, 4);
return new NodaSplitTime($start, '?', before_after_indicator: NodaTimeBeforeAfterIndicator::since);
}
// ?.6.2024
if (\preg_match("/^\?\.([0-9]|[0-9]{2})\.[0-9]{4}$/", $inpDateWoSpaces)) { // German Y-m
$year = \substr($inpDateWoSpaces, -4);
$month = trim(\substr($inpDateWoSpaces, 2, 2), '. ');
return new NodaSplitTime($year, $year, $month);
}
// ?.?.2024
if (\preg_match("/^\?\.\?\.[0-9]{4}$/", $inpDateWoSpaces)) { // German Y-m
$year = \substr($inpDateWoSpaces, -4);
return new NodaSplitTime($year, $year);
}
if (\preg_match("/^[0-9]{4}$/", \trim($inpDateWoSpaces, '. ?!()[]X'))) { // German Y-m
$year = \trim($inpDateWoSpaces, '. ?!()[]X');
return new NodaSplitTime($year, $year);
}
if (\preg_match("/^[0-9]{4}$/", \strtr($inpDateWoSpaces, ['-0' => '', '0-' => '', 'o' => '0']))) { // German Y-m
$year = \strtr($inpDateWoSpaces, ['-0' => '', '0-' => '', 'o' => '0']);
return new NodaSplitTime($year, $year);
}
if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}\.[0-9]{2}$/", $inpDateWoSpaces)) { // Hungarian Y-m
$year = \substr($inpDateWoSpaces, 1, 4);
$month = \substr($inpDateWoSpaces, 6, 2);
$day = \substr($inpDateWoSpaces, 9, 2);
return NodaSplitTime::genExactDate($year, $month, $day, NodaTimeBeforeAfterIndicator::until);
}
if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}$/", $inpDateWoSpaces)) { // Hungarian Y-m
$year = \substr($inpDateWoSpaces, 1, 4);
$month = \substr($inpDateWoSpaces, 6, 2);
return new NodaSplitTime('?', $year, $month, before_after_indicator: NodaTimeBeforeAfterIndicator::until);
}
if (\preg_match("/^\-[0-9]{4}$/", $inpDateWoSpaces)) { // Hungarian -Y
$year = \substr($inpDateWoSpaces, 1, 4);
return new NodaSplitTime('?', $year, before_after_indicator: NodaTimeBeforeAfterIndicator::until);
}
if (\preg_match("/^(Nach|nach)\ /", $datum)) {
if (($spacePos = \strpos($datum, " ")) === false) {
return false;
}
if ($output = self::attempt_splitting(\substr($datum, $spacePos))) {
return self::_turn_noda_split_time_to_after($output);
}
}
if (\preg_match("/\ (\(nach|nach)$/", $datum)) {
if (($spacePos = \strpos($datum, " ")) === false) {
return false;
}
if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) {
return self::_turn_noda_split_time_to_after($output);
}
}
if (\preg_match("/^(Vor|vor)\ /", $datum)) {
if (($spacePos = \strpos($datum, " ")) === false) {
return false;
}
if ($output = self::attempt_splitting(\substr($datum, $spacePos))) {
return self::_turn_noda_split_time_to_before($output);
}
}
if (\preg_match("/\ (\(vor|\(Vor|vor|előtt)$/", $datum)) {
if (($spacePos = \strrpos($datum, " ")) === false) {
return false;
}
if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) {
return self::_turn_noda_split_time_to_before($output);
}
}
if (\preg_match("/^(Ab|ab|Seit|seit)\ /", $datum)) {
if (($spacePos = \strpos($datum, " ")) === false) {
return false;
}
if ($output = self::attempt_splitting(\substr($datum, $spacePos))) {
return new NodaSplitTime($output->start_year, '?', $output->counting_time_month, $output->counting_time_day,
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::since, $output->start_date, '?');
}
}
// Endings beginning with a dash
if (\preg_match("/(\-től|\-tól)$/", $datum)) {
if (($spacePos = strrpos($datum, "-")) === false) {
return false;
}
if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) {
return new NodaSplitTime($output->start_year, '?', $output->counting_time_month, $output->counting_time_day,
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::since, $output->start_date, '?');
}
}
if (\preg_match("/^(Bis|bis)\ /", $datum)) {
if (($spacePos = \strpos($datum, " ")) === false) {
return false;
}
if ($output = self::attempt_splitting(\substr($datum, $spacePos))) {
return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day,
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date);
}
}
// Endings beginning with a space (until)
if (\preg_match("/ (\(bis)$/", $datum)) {
if (($spacePos = strrpos($datum, " ")) === false) {
return false;
}
if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) {
return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day,
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date);
}
}
// Ends beginning with a hyphen
if (\preg_match("/\-ig(\.|)$/", $datum)) {
if (($spacePos = strrpos($datum, "-")) === false) {
return false;
}
if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) {
return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day,
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date);
}
}
if (!empty(\preg_match("/^[0-9]{4}ig$/", $datum))) {
if ($output = self::attempt_splitting(\substr($datum, 0, 4))) {
return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day,
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date);
}
}
if (str_ends_with($datum, '-as évekig') || str_ends_with($datum, '-es évekig')) {
if ($output = self::attempt_splitting(\substr($datum, 0, -2))) {
return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day,
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date);
}
}
// Endings that are extensions of an existing word
if (\preg_match("/évektől$/", $datum)) {
if ($output = self::attempt_splitting(\substr($datum, 0, -4))) {
return self::_turn_noda_split_time_to_after($output);
}
}
// Endings beginning with a space (after)
if (\preg_match("/ (utantól|utántól)$/", $datum)) {
if (($spacePos = strrpos($datum, " ")) === false) {
return false;
}
if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) {
return new NodaSplitTime($output->start_year, '?', $output->counting_time_month, $output->counting_time_day,
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::since, $output->start_date, '?');
}
}
return false;
}
/**
* Negotiates century spans before times.
*
* @param string $start Begin time.
* @param string $end End time.
*
* @return NodaSplitTime
*/
public static function negotiate_century_span_bce_ce(string $start, string $end):NodaSplitTime {
$start = \intval($start);
$end = \intval($end);
if ($start < $end) {
return new NodaSplitTime((string)($start - 1) . "01", $end . "00");
}
return new NodaSplitTime((string)($start) . "00", ($end - 1) . "01");
}
/**
* Checks if an input date is a century. Always returns supposedly positive settings (bc / ce).
*
* @param string $datum Input date.
*
* @return NodaSplitTime|false
*/
public static function is_century(string $datum):NodaSplitTime|false {
$datum = self::clean_input($datum);
// TODO: Check if this is duplicate
// 17. Jahrhundert
if (\preg_match("/^[0-9]{2}(\.|)" . self::REGEX_CENTURIES . "$/", $datum)) {
if ($centuryNo = \intval(\substr($datum, 0, 2))) {
$centuryNo--;
return new NodaSplitTime((string)$centuryNo . "01", \strval($centuryNo + 1) . "00");
}
}
// 1. Jahrhundert
if (\preg_match("/^[0-9]\.\ (Jh\|Jh\.|Jahrhundert|sz|század)$/", $datum)) {
if ($centuryNo = \intval(\substr($datum, 0, 1))) {
$centuryNo--;
return new NodaSplitTime((string)$centuryNo . "01", \strval($centuryNo + 1) . '00');
}
}
// 17.-18. Jahrhundert
if (\preg_match("/^[0-9]{2}(\.|)(|\ Jh|\ Jh\.|\ Jahrhundert|\ sz|\ század)(\-|\/)[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) {
if (\strpos($datum, '/') !== false) {
$datum = str_replace('/', '-', $datum);
}
if (($dashPos = \strpos($datum, "-")) !== false) {
return self::negotiate_century_span_bce_ce(\substr($datum, 0, 2), \substr($datum, $dashPos + 1, 2));
}
}
// 1.-12. Jahrhundert
if (\preg_match("/^[0-9](\.|)(|\ Jh\.||\ Jahrhundert||\ sz||\ század)\-[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) {
if (($dashPos = \strpos($datum, "-")) !== false) {
return self::negotiate_century_span_bce_ce(\substr($datum, 0, 1), \substr($datum, $dashPos + 1, 2));
}
}
// 1.-2. Jahrhundert
if (\preg_match("/^[0-9](\.|)(|\ Jh\.||\ Jahrhundert||\ sz||\ század)\-[0-9]\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) {
if (($dashPos = \strpos($datum, "-")) !== false) {
return self::negotiate_century_span_bce_ce(\substr($datum, 0, 1), \substr($datum, $dashPos + 1, 1));
}
}
// 1-2. Jahrhundert
if (\preg_match("/^[0-9](\.|)(|\ Jh\.||\ Jahrhundert||\ sz||\ század)\-[0-9]\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) {
if (($dashPos = \strpos($datum, "-")) !== false) {
return self::negotiate_century_span_bce_ce(\substr($datum, 0, 1), \substr($datum, $dashPos + 1, 1));
}
}
return false;
}
/**
* Checks if an input date is a decade.
*
* @param string $datum Input date.
*
* @return NodaSplitTime|false
*/
public static function is_decade(string $datum):NodaSplitTime|false {
$datum = self::clean_input($datum);
// 20er Jahre
if (\preg_match("/^[0-9]0" . self::REGEX_DECADES . "$/", $datum)) {
$start = "19" . \substr($datum, 0, 2);
$ende = (string)(\intval($start) + 9);
return new NodaSplitTime($start, $ende);
}
// 1920er Jahre
if (\preg_match("/^[0-9]{3}0" . self::REGEX_DECADES . "$/", $datum)) {
$start = \substr($datum, 0, 4);
$ende = (string)(\intval($start) + 9);
return new NodaSplitTime($start, $ende);
}
return false;
}
/**
* Creates a NodaSplitTime based on an existing one.
*
* @param NodaSplitTime $output Time to translate.
*
* @return NodaSplitTime
*/
private static function _turn_noda_split_time_to_before(NodaSplitTime $output):NodaSplitTime {
if (empty(trim($output->counting_time_month, "0 .,"))) {
$start_year = strval((int)$output->start_year - 1);
}
else $start_year = $output->start_year;
return new NodaSplitTime('?', $start_year, $output->counting_time_month, $output->counting_time_day,
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::before, '?', $output->start_date);
}
/**
* Creates a NodaSplitTime based on an existing one.
*
* @param NodaSplitTime $output Time to translate.
*
* @return NodaSplitTime
*/
private static function _turn_noda_split_time_to_after(NodaSplitTime $output):NodaSplitTime {
if (empty(trim($output->counting_time_month, "0 .,"))) {
$end_year = strval((int)$output->end_year + 1);
}
else $end_year = $output->end_year;
return new NodaSplitTime($end_year, '?', $output->counting_time_month, $output->counting_time_day,
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::after, $output->end_date, '?');
}
/**
* Checks if the string is a time span with given start and end dates.
*
* @param string $datum Date.
*
* @return array{}|array{0: NodaSplitTime, 1: NodaSplitTime}
*/
public static function check_is_timespan_from_till(string $datum):array {
if (substr_count($datum, '-') !== 1) {
return [];
}
list($start_str, $end_str) = explode('-', $datum);
if (strlen($end_str) < 4 && strlen($end_str) < strlen($start_str)) {
return [];
}
if (strlen($start_str) < 4 && strlen($start_str) < strlen($end_str)) {
return [];
}
if (empty($start = self::attempt_splitting($start_str))) {
return [];
}
if (empty($end = self::attempt_splitting($end_str))) {
return [];
}
return [$start, $end];
}
/**
* Contains special rules for incorrectly or incompletely spelled out timespan names.
* To be called by self::attempt_splitting_from_till().
*
* @param string $datum Date.
*
* @return string
*/
public static function _attempt_rewriting_special_cases_from_till(string $datum):string {
if (empty($datum)) return '';
$inputLength = strlen($datum);
// Hungarian year and month until month
// 2005.01.-02. => 2005.01.-2005.02.
if ($inputLength === 12 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.\-[0-1][0-9]\.$/", $datum))) {
$reconstituted = substr($datum, 0, 8) . '-';
$reconstituted .= substr($datum, 0, 4) . '.' . substr($datum, -3);
return $reconstituted;
}
// Hungarian year and month until month without a dot after the first YYYY-MM
// 2005.01-02. => 2005.01.-2005.02.
if (in_array($inputLength, [10, 11], true) && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\-[0-1][0-9](\.|)$/", $datum))) {
$reconstituted = substr($datum, 0, 7) . '.-';
$reconstituted .= substr($datum, 0, 4) . '.' . substr(rtrim($datum, '.'), -2) . '.';
return $reconstituted;
}
// Hungarian year and month until month
// 2005.01.01.-02.02. => 2005.01.01-2005.02.02.
// 2005.01.01-02.02 => 2005.01.01-2005.02.02.
if ($inputLength >= 16 && $inputLength <= 18 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-1][0-9]\.[0-3][0-9](\.|)$/", $datum))) {
$parts = explode('-', $datum);
if (count($parts) !== 2) return '';
$reconstituted = substr($datum, 0, 10) . '.-';
$reconstituted .= substr($datum, 0, 4) . '.' . rtrim($parts[1], '.') . '.';
return $reconstituted;
}
// Hungarian; without trailing dots: YYYY.MM.DD-DD
if ($inputLength >= 13 && $inputLength <= 15 && \preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-3][0-9](\.|)$/", $datum)) {
$parts = explode('-', $datum);
if (count($parts) !== 2) return '';
$reconstituted = substr($datum, 0, 10) . '.-';
$reconstituted .= substr($datum, 0, 7) . '.' . substr(rtrim($parts[1], '.'), -2);
return $reconstituted;
}
// German T.-T.MM.JJJJ / T.-T.MM.JJJ / T.-T.MM.JJ / T.-T.MM.J
if (\preg_match("/^[0-9].\-[0-9]\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
$year = \substr($datum, -4);
$month = trim(\substr($datum, -7, 2), '.');
$day = '0' . \substr($datum, 3, 1);
$firstday = '0' . \substr($datum, 0, 1);
return "$firstday.$month.$year-$day.$month.$year";
}
// German T.-TT.MM.JJJJ / T.-TT.MM.JJJ / T.-TT.MM.JJ / T.-TT.MM.J
if (\preg_match("/^[0-9].\-[0-9]{2}\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
$year = \substr($datum, -4);
$month = trim(\substr($datum, -7, 2), '.');
$day = \substr($datum, 3, 2);
$firstday = '0' . \substr($datum, 0, 1);
return "$firstday.$month.$year-$day.$month.$year";
}
// German TT.-TT.MM.JJJJ / TT.-TT.MM.JJJ / TT.-TT.MM.JJ / TT.-TT.MM.J
if (\preg_match("/^[0-9]{2}.\-[0-9]{2}\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
$year = \substr($datum, -4);
$month = trim(\substr($datum, -7, 2), '.');
$day = \substr($datum, 4, 2);
$firstday = \substr($datum, 0, 2);
return "$firstday.$month.$year-$day.$month.$year";
}
// 17-19. Jahrhundert
if (\preg_match("/^[0-9]{2}(\.|)\-[0-9]{2}(\.|)" . self::REGEX_CENTURIES . "$/", $datum)) {
$parts = explode('-', $datum);
$reconstituted = ((int)substr($parts[0] ?? "", 0, 2) - 1) . '01-';
$reconstituted .= substr($parts[1] ?? "", 0, 2) . '. Jahrhundert';
return $reconstituted;
}
// 1950-60-as évek
if (\preg_match("/^[0-9]{4}\-[0-9]{2} (a|e)s évek$/", $datum)) {
$reconstituted = substr($datum, 0, 4) . '-';
$reconstituted .= substr($datum, 5, 2) . 'er Jahre';
return $reconstituted;
}
// If es évek / as évek is contained in the string (e.g. 1880-1990-es évek), there
// will be more than one hyphens
if (MD_STD::stri_contains_any($datum, ['-as évek', '-es-évek', '-es évek'])) {
return strtr($datum, ['-as évek' => ' as évek', '-es-évek' => ' es évek', '-es évek' => ' es évek']);
}
// 1981. július-augusztus > 1981.07-08
if (is_numeric(substr($datum, 0, 4)) && substr($datum, 4, 2) === '. ') {
$monthNames = [];
foreach (self::MONTH_NAMES_ENGLISH as $month => $names) {
foreach ($names as $name) $monthNames[$name] = $month;
}
foreach (self::MONTH_NAMES_GERMAN as $month => $names) {
foreach ($names as $name) $monthNames[$name] = $month;
}
foreach (self::MONTH_NAMES_HUNGARIAN as $month => $names) {
foreach ($names as $name) $monthNames[$name] = $month;
}
$rewrite = strtr($datum, $monthNames);
if ($rewrite !== $datum) {
return str_replace('..', '.', str_replace(" ", ".", $rewrite));
}
}
if (str_contains($datum, ',')) {
return str_replace(',', '-', $datum);
}
return '';
}
/**
* Checks if the string is a time span with given start and end dates.
*
* @param string $datum Date.
*
* @return array<string>
*/
public static function attempt_splitting_from_till(string $datum):array {
// Skip 1200-1300
if (!empty(self::attempt_splitting($datum))) return [];
if (strlen($datum) === 9 and substr($datum, 4, 1) !== '-') return [];
if (empty($startEnd = self::check_is_timespan_from_till($datum))) {
if ($rewritten = self::_attempt_rewriting_special_cases_from_till($datum)) {
return self::attempt_splitting_from_till($rewritten);
}
return [];
}
list($start, $end) = $startEnd;
if ($start->counting_time_indicator === NodaCountingTimeIndicator::bce) return [];
try {
$startDate = $start->startToDateTime();
$endDate = $end->endToDateTime();
}
catch (Exception $e) {
return [];
}
$interval = $startDate->diff($endDate);
$days_diff = (int)$interval->format('%a');
if ($days_diff === 0) {
throw new Exception("Missing interval, failed to calculate (" . $days_diff . ")");
}
$middle_substraction = round($days_diff / 2);
if (!($startDateTimestamp = strtotime($startDate->format('Y-m-d')))) return [];
if (!($middleDayTimestamp = strtotime('+' . $middle_substraction . ' days', $startDateTimestamp))) return [];
$middle_year = date('Y', $middleDayTimestamp);
$middle_month = date('m', $middleDayTimestamp);
$middle_day = date('d', $middleDayTimestamp);
$start_name = $start->toTimeName();
$end_name = $end->toTimeName();
if (strlen($start_name) === 9 and substr($start_name, 4, 1) === '-') $start_name = substr($start_name, 0, 4);
if (strlen($end_name) === 9 and substr($end_name, 4, 1) === '-') $end_name = substr($end_name, 5, 4);
return [
"start_name" => $start_name,
"end_name" => $end_name,
"start_year" => $start->start_year,
"end_year" => $end->end_year,
"start_date" => $start->start_date,
"end_date" => $end->end_date,
"counting_time_year" => $middle_year,
"counting_time_month" => $middle_month,
"counting_time_day" => $middle_day,
"counting_time_bcce" => "+",
];
}
/**
* Removes superfluous characters and makes an input string roughly parsable.
*
* @param string $input Input string.
*
* @return string
*/
private static function _runBasicNameCleanup(string $input):string {
$input = ltrim(trim(trim($input), ',;'), ' .');
// Clean away duplicate inputs
// 1440-1440
if (str_contains($input, '-')) {
$parts = explode('-', $input);
if (count($parts) === 2 && $parts[0] === $parts[1]) {
$input = $parts[0];
}
}
return $input;
}
/**
* Wrapper to check if any splitting command works.
*
* @param string $datum Input date.
*
* @return NodaSplitTime|false
*/
public static function attempt_splitting(string $datum):NodaSplitTime|false {
$datum = self::_runBasicNameCleanup($datum);
try {
if (!empty($moda = self::is_timespan($datum))) {
return $moda;
}
if (!empty($moda = self::is_incomplete_date($datum))) {
return $moda;
}
if (!empty($moda = self::is_valid_date($datum))) {
return $moda;
}
if (!empty($moda = self::is_valid_date_hungarian($datum))) {
return $moda;
}
if (!empty($moda = self::is_century($datum))) {
return $moda;
}
if (!empty($moda = self::is_decade($datum))) {
return $moda;
}
}
catch (MDgenericInvalidInputsException $e) {
return false;
}
// 2015. 05.
if (str_contains($datum, ' ')) {
$rewrite = str_replace(' ', '', $datum);
if (is_numeric(str_replace('.', '', $datum))) {
return self::attempt_splitting($rewrite);
}
}
return false;
}
}