<?PHP /** * Splits nodac times. * * @author Joshua Ramon Enslin <joshua@museum-digital.de> */ declare(strict_types = 1); /** * Class for splitting times. */ final class NodaTimeSplitter { private const MONTH_NAMES_GERMAN = [ "01" => ['Januar', 'Jan.'], "02" => ['Februar', 'Feb'], "03" => ['März', 'Mrz.'], "04" => ['April', 'Apr.'], "05" => ['Mai'], "06" => ['Juni', 'Jun.'], "07" => ['Juli', 'Jul.'], "08" => ['August', 'Aug.'], "09" => ['September', 'Sep.', 'Sept.'], "10" => ['Oktober', 'Okt.'], "11" => ['November', 'Nov.'], "12" => ['Dezember', 'Dez.'], ]; private const MONTH_NAMES_ENGLISH = [ "01" => ['January', 'Jan.'], "02" => ['February', 'Feb'], "03" => ['March', 'Mar.'], "04" => ['April', 'Apr.'], "05" => ['May'], "06" => ['June', 'Jun.'], "07" => ['July', 'Jul.'], "08" => ['August', 'Aug.'], "09" => ['September', 'Sep.', 'Sept.'], "10" => ['October', 'Oct.'], "11" => ['November', 'Nov.'], "12" => ['December', 'Dec.'], ]; private const MONTH_NAMES_HUNGARIAN = [ "01" => ['január', 'januar', 'jan'], "02" => ['február', 'feb'], "03" => ['március', 'mar.', 'már.'], "04" => ['április', 'apr.', 'ápr.'], "05" => ['május', 'maj.', 'máj.'], "06" => ['június', 'jun.', 'jún'], "07" => ['július', 'julius', 'jul.', 'júl.'], "08" => ['augusztus', 'aug.'], "09" => ['szeptember', 'szp.'], "10" => ['október', 'okt.'], "11" => ['november', 'nov.'], "12" => ['december', 'dec.'], ]; private const STRINGS_TO_CLEAN = [ "között" => "", " рр." => "", " рр" => "", "nach Christus" => "", "n. Christus" => "", "nach Chr." => "", "n. Chr." => "", "n.Chr." => "", " pp" => "", " p" => "", " р" => "", // Cyrillic // To clean "v.Chr." => "v. Chr.", "v.C." => "v. Chr.", "v. C." => "v. Chr.", "v. Chr" => "v. Chr.", "BCE" => "v. Chr.", "CE" => "", "vor Christus" => "v. Chr.", " до н. е." => "v. Chr.", ]; private const STRINGS_TO_CLEAN_START = [ "V. " => "5. ", "IV. " => "4. ", "III. " => "3. ", "II. " => "2. ", "I. " => "1. ", ]; private const STOP_STRINGS_GERMAN = [ "-", ",", ";", ":", "/", "(", ")", "[", "]", ", ", " und ", "nach ", "um ", "ca.", "ab ", "seit ", "bis ", "vor ", "anfang ", "ende ", ]; private const STOP_STRINGS_HUNGARIAN = [ "-", ",", ";", ":", "/", "(", ")", "[", "]", "ca.", ", ", "-ig", "és", "eleje", "között", "töl", "tól", "januárig", "februárig", "márciusig", "vége", "végén", "áprilisig", "májusig", "júniusig", "júliusig", "augusztusig", "szeptemberig", "októberig", "novemberig", "decemberig", ]; private const REGEX_CENTURIES = '(\ |)(Jh|Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)'; private const REGEX_DECADES = '(s|er|er\ Jahre|(\-|\ )es\ évek|(\-|\ )as\ \évek|\ évek|\-es\ években|\-ті)'; /** * Cleans input strings by trimming obsolete stuff. * * @param string $input Input date name. * * @return string */ private static function clean_input(string $input):string { while (\strpos($input, " -") !== false) $input = \str_replace(" -", "-", $input); while (\strpos($input, "- ") !== false) $input = \str_replace("- ", "-", $input); $input = \strtr($input, self::STRINGS_TO_CLEAN); foreach (self::STRINGS_TO_CLEAN_START as $toCleanFrom => $toCleanTo) { if (strpos($input, $toCleanFrom) === 0) { $input = \str_replace($toCleanFrom, $toCleanTo, $input); } } while (strpos($input, "..") !== false) $input = \str_replace("..", ".", $input); return trim($input, ", [](){}"); } /** * Checks if a string is really numeric, not numeric + space, dot. * * @param string $input Input string. * * @return boolean */ private static function is_numeric(string $input):bool { if (\is_numeric($input) and \strpos($input, " ") === false and \strpos($input, ".") === false ) { return true; } return false; } /** * Validates a time substr. * * @param string $datum Date. * @param integer $start Start of substr. * @param integer $end End of substr. * * @return string */ private static function validateDateSubstr(string $datum, int $start, int $end = 10000):string { if ($start !== 0 && !\in_array(\substr($datum, $start - 1, 1), ["-", " ", "."], true) ) { return ""; } $output = \substr($datum, $start, $end); if (self::is_numeric($output)) return $output; return ""; } /** * Generates counting year - the middle between start and end year. * * @param NodaSplitTime $moda Date strings. * * @return integer */ public static function timePartsToCountingYear(NodaSplitTime $moda):int { if ($moda->start_year === "?") { if ($moda->before_after_indicator === NodaTimeBeforeAfterIndicator::before and empty(trim($moda->counting_time_month, " 0"))) { return \abs(\intval($moda->end_year)) + 1; } return \abs(\intval($moda->end_year)); } if ($moda->end_year === "?") { if ($moda->before_after_indicator === NodaTimeBeforeAfterIndicator::after and empty(trim($moda->counting_time_month, " 0"))) { return \abs(\intval($moda->start_year)) - 1; } return \abs(\intval($moda->start_year)); } return \abs((int)\ceil(\intval($moda->end_year) - ((\intval($moda->end_year) - \intval($moda->start_year)) / 2))); } /** * Generates HTML for linking disassembly of times for a single day. * * @param integer $znum Time ID. * @param NodaSplitTime $moda Date strings. * @param MDTlLoader $tlLoader Translation loader. * * @return string */ public static function generateDisassemblyForDay(int $znum, NodaSplitTime $moda, MDTlLoader $tlLoader):string { $zaehlzeit_jahr = self::pad_to_four((string)self::timePartsToCountingYear($moda)); // Wenn Datum in Form von tt.mm.jjjj, dann biete zerlegen an $output = '<hr>'; $output .= '<table>'; $output .= '<tr><td width="250px">' . $tlLoader->tl("tempi", "tempi", "time_tool") . '</td>'; $output .= '<td><a href="tempi_md/zeit_cha.php?znum=' . $znum . '&kontrolle=todo'; if (($newTimeName = $moda->toTimeName()) !== "") { $output .= "&zeit_name_neu={$newTimeName}"; } $output .= '&zeit_beginn_neu=' . $moda->start_year . '&zeit_ende_neu=' . $moda->end_year . '&zeit_zaehlzeit_vorzeichen_neu=' . urlencode($moda->counting_time_indicator->toString()) . '&zeit_zaehlzeit_jahr_neu=' . $zaehlzeit_jahr . '&zeit_zaehlzeit_monat_neu=' . $moda->counting_time_month . '&zeit_zaehlzeit_tag_neu=' . $moda->counting_time_day . '&zeit_status_neu=%2B&zeit_beginn_datum_neu=' . $moda->start_date . '&zeit_ende_datum_neu=' . $moda->end_date . '" class="icons iconsBell buttonLike" id="splitTimeLink">+'; if (!empty($newTimeName)) { $output .= $newTimeName; } else { if (!empty(trim($moda->counting_time_day, " 0")) and !empty(trim($moda->counting_time_month, " 0"))) $output .= $moda->counting_time_day . '.' . $moda->counting_time_month . '.' . $moda->start_year; else if ($moda->start_year !== $moda->end_year) $output .= $moda->start_year . "-" . $moda->end_year; else if (!empty(trim($moda->counting_time_month, " 0"))) $output .= "{$moda->counting_time_month}.{$moda->start_year}"; else $output .= $moda->start_year; } $output .= ' - ' . $tlLoader->tl("tempi", "tempi", "time_disassemble") . '</a></td>'; $output .= '</tr>'; $output .= '</table>'; return $output; } /** * Checks if any string of a list occurs in the haystack input string. * * @param string $haystack Haystack. * @param array<string> $needles Needles. * * @return boolean */ private static function stri_occurs(string $haystack, array $needles):bool { foreach ($needles as $needle) { if (stripos($haystack, $needle) !== false) return true; } return false; } /** * Pads to four digits. E.g. 20 > 0020. * * @param string $input Input string. * * @return string */ public static function pad_to_four(string $input):string { return \substr("0000" . $input, -4); } /** * Pads to four digits. E.g. 2 > 02. * * @param string $input Input string. * * @return string */ public static function pad_to_two(string $input):string { return \substr("00" . $input, -2); } /** * Translate German month to two digits number. * * @param string $datum Date. * * @return NodaSplitTime|false */ public static function is_valid_date(string $datum):NodaSplitTime|false { $datum = self::clean_input($datum); if (\str_ends_with($datum, ' v. Chr.')) { if ($output = self::attempt_splitting(\substr($datum, 0, -8))) { $start = \strval(-1 * \intval($output->end_year)); $end = \strval(-1 * \intval($output->start_year)); $start_date = $output->end_date; $end_date = $output->start_date; if (\intval($start) > \intval($end)) { $startToSet = $end; $end = $start; $start = $startToSet; $start_date = $output->start_date; $end_date = $output->end_date; } return new NodaSplitTime($start, $end, $output->counting_time_month, $output->counting_time_day, NodaCountingTimeIndicator::bce, $output->before_after_indicator, '-' . $start_date, '-' . $end_date); } } if (\preg_match("/^[0-9]{4}\ bis\ [0-9]{4}$/", $datum)) { $start = \substr($datum, 0, 4); $end = \substr($datum, -4); return new NodaSplitTime($start, $end); } if (\preg_match("/^[0-9]{4}\ (und|oder|od.)\ [0-9]{4}$/", $datum)) { $start = \substr($datum, 0, 4); $end = \substr($datum, -4); $startInt = (int)$start; $endInt = (int)$end; if ($startInt === $endInt - 1) { return new NodaSplitTime($start, $end); } } $datum = \str_replace(". ", ".", $datum); if (self::stri_occurs($datum, self::STOP_STRINGS_GERMAN)) { return false; } if (strlen($datum) <= 6) return false; if (strlen($datum) <= 9) $use_day = false; else $use_day = true; if (self::is_numeric((string)\substr($datum, -4))) $year = \substr($datum, -4); // Further code requires a year to be present, skip if none is set if (empty($year)) return false; foreach (self::MONTH_NAMES_ENGLISH as $monthVal => $monthValidNames) { if (self::stri_occurs($datum, $monthValidNames)) { if (!empty($monat)) break; $monat = (string)$monthVal; } } foreach (self::MONTH_NAMES_GERMAN as $monthVal => $monthValidNames) { if (self::stri_occurs($datum, $monthValidNames)) { if (!empty($monat)) break; $monat = (string)$monthVal; } } if (empty($monat) and self::is_numeric((string)\substr($datum, 3, 2))) $monat = \substr($datum, 3, 2); if (self::is_numeric((string)\substr($datum, 0, 2))) { $day = \substr($datum, 0, 2); } else if (\in_array(\substr($datum, 1, 1), [".", " "], true) && self::is_numeric((string)\substr($datum, 0, 1))) { $day = "0" . \substr($datum, 0, 1); } if (!empty($monat) and !empty($day) and $use_day) { return NodaSplitTime::genExactDate($year, $monat, $day); } else if (!empty($monat)) { return new NodaSplitTime($year, $year, $monat); } return false; } /** * Translate Hungarian month to two digits number. * * @param string $datum Date. * * @return NodaSplitTime|false */ public static function is_valid_date_hungarian(string $datum):NodaSplitTime|false { $datum = self::clean_input($datum); if (\preg_match("/^Kr\.\ e\.\ /", $datum)) { if ($output = self::attempt_splitting(\substr($datum, 7))) { $start = \strval(-1 * \intval($output->end_year)); $end = \strval(-1 * \intval($output->start_year)); if (\intval($start) > \intval($end)) { $startToSet = $end; $end = $start; $start = $startToSet; } return new NodaSplitTime($start, $end, $output->counting_time_month, $output->counting_time_day, NodaCountingTimeIndicator::bce, $output->before_after_indicator, '-' . $output->end_date, '-' . $output->start_date); } } // Example: 2009-tol 2010-ig // From 2009 to 2010 if (\preg_match("/^[0-9][0-9][0-9][0-9]\-t(ő|ó)l(\ |\-)[0-9][0-9][0-9][0-9]\-ig$/", $datum)) { $start = \substr($datum, 0, 4); $end = \substr($datum, -7, 4); return new NodaSplitTime($start, $end); } if (self::stri_occurs($datum, self::STOP_STRINGS_HUNGARIAN)) { return false; } // // Rest: Only those entries, where there are spelled out months // if (strlen($datum) <= 9) return false; // The year is only parse-able if it is a four digit year at the start if (self::is_numeric((string)\substr($datum, 0, 4)) && substr($datum, 4, 1) === '.') { $year = \substr($datum, 0, 4); } // Further code requires a year to be present, skip if none is set if (empty($year)) return false; // Skip, if dates are too long and do not contain spaces (= no translatable names) if (str_contains($datum, " ") === false && strlen($datum) > 12) return false; $unparsed = trim(strtolower(str_replace($year, '', $datum)), ' ,.'); foreach (self::MONTH_NAMES_HUNGARIAN as $monthVal => $monthValidNames) { if (self::stri_occurs($datum, $monthValidNames)) { $monat = (string)$monthVal; foreach ($monthValidNames as $name) { $unparsed = str_replace($name, '', $unparsed); } break; } } if (strlen($unparsed) > 5) { return false; } if (empty($monat) and self::is_numeric((string)\substr($datum, 5, 2))) $monat = \substr($datum, 5, 2); else if (empty($monat) and self::is_numeric((string)\substr($datum, 6, 2))) $monat = \substr($datum, 6, 2); // Last four characters must contain at least one space or one dot $day = self::validateDateSubstr($datum, -2); if (empty($day)) $day = self::validateDateSubstr($datum, -3, 2); if (empty($day)) $day = self::validateDateSubstr($datum, -4, 2); if (empty($day)) $day = self::validateDateSubstr($datum, -5, 2); if (empty($day)) $day = self::validateDateSubstr($datum, -6, 2); if (empty($day)) { if (\substr($datum, -2, 1) === " " and self::is_numeric((string)\substr($datum, -1, 1))) { $day = "0" . \substr($datum, -1, 1); } else if (\substr($datum, -3, 1) === " " and self::is_numeric((string)\substr($datum, -2, 1))) { $day = "0" . \substr($datum, -2, 1); } } if (!empty($monat) && empty($day) && preg_match('~[0-9]+~', substr($datum, -3))) { return false; } if (!empty($monat) and !empty($day)) { return NodaSplitTime::genExactDate($year, $monat, $day); } else if (!empty($monat)) { return new NodaSplitTime($year, $year, $monat); } return false; } /** * Translate German month to two digits number. * * @param string $datum Date. * * @return NodaSplitTime|false */ public static function is_valid_date_by_php(string $datum):NodaSplitTime|false { $datum = self::clean_input($datum); if (!($timeInt = \strtotime($datum))) { return false; } return NodaSplitTime::genExactDate(\date("Y", $timeInt), \date("m", $timeInt), \date("d", $timeInt)); } /** * Checks if an input date is a timespan. * * @param string $datum Input date. * * @return NodaSplitTime|false */ public static function is_timespan(string $datum):NodaSplitTime|false { $datum = self::clean_input($datum); // 10000-20000 if (!empty(\preg_match("/^[0-9]{5}(\-|\/)[0-9]{5}$/", $datum))) { return new NodaSplitTime(start_year: \substr($datum, 0, 5), end_year: \substr($datum, 6, 5)); } // 0000-0000 if (\preg_match("/^[0-9]{4}(\-|\/)[0-9]{4}(\.|)$/", $datum)) { return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, 5, 4)); } // 1.900-2.000 if (\preg_match("/^[0-9]\.[0-9][0-9][0-9](\-|\/)[0-9]\.[0-9][0-9][0-9]$/", $datum)) { $datum = \str_replace(".", "", $datum); return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, 5, 4)); } // German TT.MM.JJJJ / TT.MM.JJJ / TT.MM.JJ / TT.MM.J if (\preg_match("/^[0-9]{2}\.[0-9]{2}\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ $year = \substr($datum, 6, 4); $month = \substr($datum, 3, 2); $day = \substr($datum, 0, 2); return NodaSplitTime::genExactDate($year, $month, $day); } // German TT.M.JJJJ / TT.M.JJJ / TT.M.JJ / TT.M.J if (\preg_match("/^[0-9]{2}\.[0-9]\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ $year = \substr($datum, 5, 4); $month = "0" . \substr($datum, 3, 1); $day = \substr($datum, 0, 2); return NodaSplitTime::genExactDate($year, $month, $day); } // German T.MM.JJJJ / T.MM.JJJ / T.MM.JJ / T.MM.J if (\preg_match("/^[0-9]\.[0-9][0-9]\.([0-9][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[0-9])$/", $datum)) { $year = \substr($datum, 5, 4); $month = \substr($datum, 2, 2); $day = "0" . \substr($datum, 0, 1); return NodaSplitTime::genExactDate($year, $month, $day); } // German T.M.JJJJ / T.M.JJJ / T.M.JJ / T.M.J if (\preg_match("/^[0-9]\.[0-9]\.([0-9][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[0-9])$/", $datum)) { $year = \substr($datum, 4, 4); $month = "0" . \substr($datum, 2, 1); $day = "0" . \substr($datum, 0, 1); return NodaSplitTime::genExactDate($year, $month, $day); } // Intl': 2020-12-20 if (\preg_match("/^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/", $datum)) { // German Y-m $year = \substr($datum, 0, 4); $month = \substr($datum, 5, 2); $day = \substr($datum, 8, 2); return NodaSplitTime::genExactDate($year, $month, $day); } // Intl': 2020-12 if (\preg_match("/^[0-9]{4}\-[0-9]{2}$/", $datum)) { // German Y-m or 1912-15 $year = \substr($datum, 0, 4); $month = \substr($datum, 5, 2); // Assume the end is a month if (intval($month) < 12) { return new NodaSplitTime($year, $year, $month); } else { $end = \substr($year, 0, 2) . $month; return new NodaSplitTime($year, $end); } } // German MM.JJJJ if (\preg_match("/^[0-9]{2}\.[0-9]{4}$/", $datum)) { // German Y-m $year = \substr($datum, 3, 4); $month = \substr($datum, 0, 2); return new NodaSplitTime($year, $year, $month); } if (\preg_match("/^[0-9]\.[0-9]{4}$/", $datum)) { // German Y-m $year = \substr($datum, 2, 4); $month = "0" . \substr($datum, 0, 1); return new NodaSplitTime($year, $year, $month); } if (\preg_match("/^[0-9]{4}\.[0-3][0-9]\.[0-9]{1,2}(\.|)$/", $datum)) { // Hungarian Y-m-d $year = \substr($datum, 0, 4); $month = \substr($datum, 5, 2); $day = self::pad_to_two(\rtrim(\substr($datum, 8, 2), '.')); return NodaSplitTime::genExactDate($year, $month, $day); } if (\preg_match("/^[0-9]{4}\.[0-9]\.[0-9]{1,2}\.$/", $datum)) { // Hungarian Y-m-d > 2005.1.1. $year = \substr($datum, 0, 4); $month = self::pad_to_two(\substr($datum, 5, 1)); $day = self::pad_to_two(\rtrim(\substr($datum, 7, 2), '.')); return NodaSplitTime::genExactDate($year, $month, $day); } if (\preg_match("/^[0-9]{4}\.[0-3][0-9](\.|)$/", $datum)) { // Hungarian Y-m $year = \substr($datum, 0, 4); $month = \substr($datum, 5, 2); return new NodaSplitTime($year, $year, $month); } if (\preg_match("/^[0-9]{4}\-[0-9]{2}$/", $datum)) { // Time spans: 1945-46 $start = \substr($datum, 0, 4); $endDigits = \substr($datum, 5, 2); return new NodaSplitTime($start, \substr($datum, 0, 2) . $endDigits); } // 01.01.1920-31.12.1930 if (\preg_match("/^01\.01\.[0-9]{4}\-31\.12\.[0-9]{4}$/", $datum)) { // Hungarian Y-m $start = \substr($datum, 6, 4); $end = \substr($datum, -4); return new NodaSplitTime($start, $end); } // 303-305 (n. Chr.) if (\preg_match("/^[0-9]{3}\-[0-9]{3}$/", $datum)) { // Hungarian Y-m $start = \substr($datum, 0, 3); $end = \substr($datum, -3); return new NodaSplitTime("0" . $start, "0" . $end); } // 1720-120 if (\preg_match("/^[0-9]{4}\-[0-9]{3}$/", $datum)) { // Hungarian Y-m $start = \substr($datum, 0, 4); $end = \substr($datum, -3); return new NodaSplitTime("0" . $start, "0" . $end); } // 20-30 (n. Chr.) if (\preg_match("/^[0-9]{2}\-[0-9]{2}$/", $datum)) { // 20-40 (n. Chr.) $start = \substr($datum, 0, 2); $end = \substr($datum, -2); return new NodaSplitTime("00" . $start, "00" . $end); } // 1920 if (\preg_match("/^[0-9]{4}(\.|)$/", $datum)) { $start = \substr($datum, 0, 4); return new NodaSplitTime($start, $start); } // 1920 if (\preg_match("/^[0-9]{3}$/", $datum)) { $start = "0" . \substr($datum, 0, 3); return new NodaSplitTime($start, $start); } if (\preg_match("/^[0-9]{2}$/", $datum)) { $start = "00" . \substr($datum, 0, 2); return new NodaSplitTime($start, $start); } // Special case for SMB: YYYY, MM. DD and YYYY, MM. if (\preg_match("/^[0-9]{4}\,\ [0-9]{2}\.(|\ [0-9]{2})$/", $datum)) { $start = \substr($datum, 0, 4); $month = \substr($datum, 6, 2); $day = \substr($datum, 10, 2); return NodaSplitTime::genExactDate($start, $month, $day); } return false; } /** * Checks if an input date is an incomplete date: Before 1920, after 1930. * * @param string $datum Input date. * * @return NodaSplitTime|false */ public static function is_incomplete_date(string $datum):NodaSplitTime|false { $datum = self::clean_input($datum); $inpDateWoSpaces = str_replace(" ", "", $datum); if (\preg_match("/^[0-9]{4}\.[0-9]{2}\.[0-9]{2}(\.|)\-$/", $inpDateWoSpaces)) { // YYYY.MM.DD. $year = \substr($inpDateWoSpaces, 0, 4); $month = \substr($inpDateWoSpaces, 5, 2); $day = \substr($inpDateWoSpaces, 8, 2); return NodaSplitTime::genExactDate($year, $month, $day, NodaTimeBeforeAfterIndicator::since); } if (\preg_match("/^[0-9]{4}\.[0-9]{2}(\.|)\-$/", $inpDateWoSpaces)) { // YYYY.MM.- $start = \substr($inpDateWoSpaces, 0, 4); $month = \substr($inpDateWoSpaces, 5, 2); return new NodaSplitTime($start, '?', $month, before_after_indicator: NodaTimeBeforeAfterIndicator::since); } if (\preg_match("/^[0-9]{4}\-$/", $inpDateWoSpaces)) { // YYYY- $start = \substr($inpDateWoSpaces, 0, 4); return new NodaSplitTime($start, '?', before_after_indicator: NodaTimeBeforeAfterIndicator::since); } // ?.6.2024 if (\preg_match("/^\?\.([0-9]|[0-9]{2})\.[0-9]{4}$/", $inpDateWoSpaces)) { // German Y-m $year = \substr($inpDateWoSpaces, -4); $month = trim(\substr($inpDateWoSpaces, 2, 2), '. '); return new NodaSplitTime($year, $year, $month); } // ?.?.2024 if (\preg_match("/^\?\.\?\.[0-9]{4}$/", $inpDateWoSpaces)) { // German Y-m $year = \substr($inpDateWoSpaces, -4); return new NodaSplitTime($year, $year); } if (\preg_match("/^[0-9]{4}$/", \trim($inpDateWoSpaces, '. ?!()[]X'))) { // German Y-m $year = \trim($inpDateWoSpaces, '. ?!()[]X'); return new NodaSplitTime($year, $year); } if (\preg_match("/^[0-9]{4}$/", \strtr($inpDateWoSpaces, ['-0' => '', '0-' => '', 'o' => '0']))) { // German Y-m $year = \strtr($inpDateWoSpaces, ['-0' => '', '0-' => '', 'o' => '0']); return new NodaSplitTime($year, $year); } if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}\.[0-9]{2}$/", $inpDateWoSpaces)) { // Hungarian Y-m $year = \substr($inpDateWoSpaces, 1, 4); $month = \substr($inpDateWoSpaces, 6, 2); $day = \substr($inpDateWoSpaces, 9, 2); return NodaSplitTime::genExactDate($year, $month, $day, NodaTimeBeforeAfterIndicator::until); } if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}$/", $inpDateWoSpaces)) { // Hungarian Y-m $year = \substr($inpDateWoSpaces, 1, 4); $month = \substr($inpDateWoSpaces, 6, 2); return new NodaSplitTime('?', $year, $month, before_after_indicator: NodaTimeBeforeAfterIndicator::until); } if (\preg_match("/^\-[0-9]{4}$/", $inpDateWoSpaces)) { // Hungarian -Y $year = \substr($inpDateWoSpaces, 1, 4); return new NodaSplitTime('?', $year, before_after_indicator: NodaTimeBeforeAfterIndicator::until); } if (\preg_match("/^(Nach|nach)\ /", $datum)) { if (($spacePos = \strpos($datum, " ")) === false) { return false; } if ($output = self::attempt_splitting(\substr($datum, $spacePos))) { return self::_turn_noda_split_time_to_after($output); } } if (\preg_match("/\ (\(nach|nach)$/", $datum)) { if (($spacePos = \strpos($datum, " ")) === false) { return false; } if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) { return self::_turn_noda_split_time_to_after($output); } } if (\preg_match("/^(Vor|vor)\ /", $datum)) { if (($spacePos = \strpos($datum, " ")) === false) { return false; } if ($output = self::attempt_splitting(\substr($datum, $spacePos))) { return self::_turn_noda_split_time_to_before($output); } } if (\preg_match("/\ (\(vor|\(Vor|vor|előtt)$/", $datum)) { if (($spacePos = \strrpos($datum, " ")) === false) { return false; } if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) { return self::_turn_noda_split_time_to_before($output); } } if (\preg_match("/^(Ab|ab|Seit|seit)\ /", $datum)) { if (($spacePos = \strpos($datum, " ")) === false) { return false; } if ($output = self::attempt_splitting(\substr($datum, $spacePos))) { return new NodaSplitTime($output->start_year, '?', $output->counting_time_month, $output->counting_time_day, $output->counting_time_indicator, NodaTimeBeforeAfterIndicator::since, $output->start_date, '?'); } } // Endings beginning with a dash if (\preg_match("/(\-től|\-tól)$/", $datum)) { if (($spacePos = strrpos($datum, "-")) === false) { return false; } if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) { return new NodaSplitTime($output->start_year, '?', $output->counting_time_month, $output->counting_time_day, $output->counting_time_indicator, NodaTimeBeforeAfterIndicator::since, $output->start_date, '?'); } } if (\preg_match("/^(Bis|bis)\ /", $datum)) { if (($spacePos = \strpos($datum, " ")) === false) { return false; } if ($output = self::attempt_splitting(\substr($datum, $spacePos))) { return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day, $output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date); } } // Endings beginning with a space (until) if (\preg_match("/ (\(bis)$/", $datum)) { if (($spacePos = strrpos($datum, " ")) === false) { return false; } if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) { return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day, $output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date); } } // Ends beginning with a hyphen if (\preg_match("/\-ig(\.|)$/", $datum)) { if (($spacePos = strrpos($datum, "-")) === false) { return false; } if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) { return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day, $output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date); } } if (!empty(\preg_match("/^[0-9]{4}ig$/", $datum))) { if ($output = self::attempt_splitting(\substr($datum, 0, 4))) { return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day, $output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date); } } if (str_ends_with($datum, '-as évekig') || str_ends_with($datum, '-es évekig')) { if ($output = self::attempt_splitting(\substr($datum, 0, -2))) { return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day, $output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date); } } // Endings that are extensions of an existing word if (\preg_match("/évektől$/", $datum)) { if ($output = self::attempt_splitting(\substr($datum, 0, -4))) { return self::_turn_noda_split_time_to_after($output); } } // Endings beginning with a space (after) if (\preg_match("/ (utantól|utántól)$/", $datum)) { if (($spacePos = strrpos($datum, " ")) === false) { return false; } if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) { return new NodaSplitTime($output->start_year, '?', $output->counting_time_month, $output->counting_time_day, $output->counting_time_indicator, NodaTimeBeforeAfterIndicator::since, $output->start_date, '?'); } } return false; } /** * Negotiates century spans before times. * * @param string $start Begin time. * @param string $end End time. * * @return NodaSplitTime */ public static function negotiate_century_span_bce_ce(string $start, string $end):NodaSplitTime { $start = \intval($start); $end = \intval($end); if ($start < $end) { return new NodaSplitTime((string)($start - 1) . "01", $end . "00"); } return new NodaSplitTime((string)($start) . "00", ($end - 1) . "01"); } /** * Checks if an input date is a century. Always returns supposedly positive settings (bc / ce). * * @param string $datum Input date. * * @return NodaSplitTime|false */ public static function is_century(string $datum):NodaSplitTime|false { $datum = self::clean_input($datum); // TODO: Check if this is duplicate // 17. Jahrhundert if (\preg_match("/^[0-9]{2}(\.|)" . self::REGEX_CENTURIES . "$/", $datum)) { if ($centuryNo = \intval(\substr($datum, 0, 2))) { $centuryNo--; return new NodaSplitTime((string)$centuryNo . "01", \strval($centuryNo + 1) . "00"); } } // 1. Jahrhundert if (\preg_match("/^[0-9]\.\ (Jh\|Jh\.|Jahrhundert|sz|század)$/", $datum)) { if ($centuryNo = \intval(\substr($datum, 0, 1))) { $centuryNo--; return new NodaSplitTime((string)$centuryNo . "01", \strval($centuryNo + 1) . '00'); } } // 17.-18. Jahrhundert if (\preg_match("/^[0-9]{2}(\.|)(|\ Jh|\ Jh\.|\ Jahrhundert|\ sz|\ század)(\-|\/)[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) { if (\strpos($datum, '/') !== false) { $datum = str_replace('/', '-', $datum); } if (($dashPos = \strpos($datum, "-")) !== false) { return self::negotiate_century_span_bce_ce(\substr($datum, 0, 2), \substr($datum, $dashPos + 1, 2)); } } // 1.-12. Jahrhundert if (\preg_match("/^[0-9](\.|)(|\ Jh\.||\ Jahrhundert||\ sz||\ század)\-[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) { if (($dashPos = \strpos($datum, "-")) !== false) { return self::negotiate_century_span_bce_ce(\substr($datum, 0, 1), \substr($datum, $dashPos + 1, 2)); } } // 1.-2. Jahrhundert if (\preg_match("/^[0-9](\.|)(|\ Jh\.||\ Jahrhundert||\ sz||\ század)\-[0-9]\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) { if (($dashPos = \strpos($datum, "-")) !== false) { return self::negotiate_century_span_bce_ce(\substr($datum, 0, 1), \substr($datum, $dashPos + 1, 1)); } } // 1-2. Jahrhundert if (\preg_match("/^[0-9](\.|)(|\ Jh\.||\ Jahrhundert||\ sz||\ század)\-[0-9]\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) { if (($dashPos = \strpos($datum, "-")) !== false) { return self::negotiate_century_span_bce_ce(\substr($datum, 0, 1), \substr($datum, $dashPos + 1, 1)); } } return false; } /** * Checks if an input date is a decade. * * @param string $datum Input date. * * @return NodaSplitTime|false */ public static function is_decade(string $datum):NodaSplitTime|false { $datum = self::clean_input($datum); // 20er Jahre if (\preg_match("/^[0-9]0" . self::REGEX_DECADES . "$/", $datum)) { $start = "19" . \substr($datum, 0, 2); $ende = (string)(\intval($start) + 9); return new NodaSplitTime($start, $ende); } // 1920er Jahre if (\preg_match("/^[0-9]{3}0" . self::REGEX_DECADES . "$/", $datum)) { $start = \substr($datum, 0, 4); $ende = (string)(\intval($start) + 9); return new NodaSplitTime($start, $ende); } return false; } /** * Creates a NodaSplitTime based on an existing one. * * @param NodaSplitTime $output Time to translate. * * @return NodaSplitTime */ private static function _turn_noda_split_time_to_before(NodaSplitTime $output):NodaSplitTime { if (empty(trim($output->counting_time_month, "0 .,"))) { $start_year = strval((int)$output->start_year - 1); } else $start_year = $output->start_year; return new NodaSplitTime('?', $start_year, $output->counting_time_month, $output->counting_time_day, $output->counting_time_indicator, NodaTimeBeforeAfterIndicator::before, '?', $output->start_date); } /** * Creates a NodaSplitTime based on an existing one. * * @param NodaSplitTime $output Time to translate. * * @return NodaSplitTime */ private static function _turn_noda_split_time_to_after(NodaSplitTime $output):NodaSplitTime { if (empty(trim($output->counting_time_month, "0 .,"))) { $end_year = strval((int)$output->end_year + 1); } else $end_year = $output->end_year; return new NodaSplitTime($end_year, '?', $output->counting_time_month, $output->counting_time_day, $output->counting_time_indicator, NodaTimeBeforeAfterIndicator::after, $output->end_date, '?'); } /** * Checks if the string is a time span with given start and end dates. * * @param string $datum Date. * * @return array{}|array{0: NodaSplitTime, 1: NodaSplitTime} */ public static function check_is_timespan_from_till(string $datum):array { if (substr_count($datum, '-') !== 1) { return []; } list($start_str, $end_str) = explode('-', $datum); if (strlen($end_str) < 4 && strlen($end_str) < strlen($start_str)) { return []; } if (strlen($start_str) < 4 && strlen($start_str) < strlen($end_str)) { return []; } if (empty($start = self::attempt_splitting($start_str))) { return []; } if (empty($end = self::attempt_splitting($end_str))) { return []; } return [$start, $end]; } /** * Contains special rules for incorrectly or incompletely spelled out timespan names. * To be called by self::attempt_splitting_from_till(). * * @param string $datum Date. * * @return string */ public static function _attempt_rewriting_special_cases_from_till(string $datum):string { if (empty($datum)) return ''; $inputLength = strlen($datum); // Hungarian year and month until month // 2005.01.-02. => 2005.01.-2005.02. if ($inputLength === 12 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.\-[0-1][0-9]\.$/", $datum))) { $reconstituted = substr($datum, 0, 8) . '-'; $reconstituted .= substr($datum, 0, 4) . '.' . substr($datum, -3); return $reconstituted; } // Hungarian year and month until month without a dot after the first YYYY-MM // 2005.01-02. => 2005.01.-2005.02. if (in_array($inputLength, [10, 11], true) && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\-[0-1][0-9](\.|)$/", $datum))) { $reconstituted = substr($datum, 0, 7) . '.-'; $reconstituted .= substr($datum, 0, 4) . '.' . substr(rtrim($datum, '.'), -2) . '.'; return $reconstituted; } // Hungarian year and month until month // 2005.01.01.-02.02. => 2005.01.01-2005.02.02. // 2005.01.01-02.02 => 2005.01.01-2005.02.02. if ($inputLength >= 16 && $inputLength <= 18 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-1][0-9]\.[0-3][0-9](\.|)$/", $datum))) { $parts = explode('-', $datum); if (count($parts) !== 2) return ''; $reconstituted = substr($datum, 0, 10) . '.-'; $reconstituted .= substr($datum, 0, 4) . '.' . rtrim($parts[1], '.') . '.'; return $reconstituted; } // Hungarian; without trailing dots: YYYY.MM.DD-DD if ($inputLength >= 13 && $inputLength <= 15 && \preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-3][0-9](\.|)$/", $datum)) { $parts = explode('-', $datum); if (count($parts) !== 2) return ''; $reconstituted = substr($datum, 0, 10) . '.-'; $reconstituted .= substr($datum, 0, 7) . '.' . substr(rtrim($parts[1], '.'), -2); return $reconstituted; } // German T.-T.MM.JJJJ / T.-T.MM.JJJ / T.-T.MM.JJ / T.-T.MM.J if (\preg_match("/^[0-9].\-[0-9]\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ $year = \substr($datum, -4); $month = trim(\substr($datum, -7, 2), '.'); $day = '0' . \substr($datum, 3, 1); $firstday = '0' . \substr($datum, 0, 1); return "$firstday.$month.$year-$day.$month.$year"; } // German T.-TT.MM.JJJJ / T.-TT.MM.JJJ / T.-TT.MM.JJ / T.-TT.MM.J if (\preg_match("/^[0-9].\-[0-9]{2}\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ $year = \substr($datum, -4); $month = trim(\substr($datum, -7, 2), '.'); $day = \substr($datum, 3, 2); $firstday = '0' . \substr($datum, 0, 1); return "$firstday.$month.$year-$day.$month.$year"; } // German TT.-TT.MM.JJJJ / TT.-TT.MM.JJJ / TT.-TT.MM.JJ / TT.-TT.MM.J if (\preg_match("/^[0-9]{2}.\-[0-9]{2}\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ $year = \substr($datum, -4); $month = trim(\substr($datum, -7, 2), '.'); $day = \substr($datum, 4, 2); $firstday = \substr($datum, 0, 2); return "$firstday.$month.$year-$day.$month.$year"; } // 17-19. Jahrhundert if (\preg_match("/^[0-9]{2}(\.|)\-[0-9]{2}(\.|)" . self::REGEX_CENTURIES . "$/", $datum)) { $parts = explode('-', $datum); $reconstituted = ((int)substr($parts[0] ?? "", 0, 2) - 1) . '01-'; $reconstituted .= substr($parts[1] ?? "", 0, 2) . '. Jahrhundert'; return $reconstituted; } // 1950-60-as évek if (\preg_match("/^[0-9]{4}\-[0-9]{2} (a|e)s évek$/", $datum)) { $reconstituted = substr($datum, 0, 4) . '-'; $reconstituted .= substr($datum, 5, 2) . 'er Jahre'; return $reconstituted; } // If es évek / as évek is contained in the string (e.g. 1880-1990-es évek), there // will be more than one hyphens if (MD_STD::stri_contains_any($datum, ['-as évek', '-es-évek', '-es évek'])) { return strtr($datum, ['-as évek' => ' as évek', '-es-évek' => ' es évek', '-es évek' => ' es évek']); } // 1981. július-augusztus > 1981.07-08 if (is_numeric(substr($datum, 0, 4)) && substr($datum, 4, 2) === '. ') { $monthNames = []; foreach (self::MONTH_NAMES_ENGLISH as $month => $names) { foreach ($names as $name) $monthNames[$name] = $month; } foreach (self::MONTH_NAMES_GERMAN as $month => $names) { foreach ($names as $name) $monthNames[$name] = $month; } foreach (self::MONTH_NAMES_HUNGARIAN as $month => $names) { foreach ($names as $name) $monthNames[$name] = $month; } $rewrite = strtr($datum, $monthNames); if ($rewrite !== $datum) { return str_replace('..', '.', str_replace(" ", ".", $rewrite)); } } if (str_contains($datum, ',')) { return str_replace(',', '-', $datum); } return ''; } /** * Checks if the string is a time span with given start and end dates. * * @param string $datum Date. * * @return array<string> */ public static function attempt_splitting_from_till(string $datum):array { // Skip 1200-1300 if (!empty(self::attempt_splitting($datum))) return []; if (strlen($datum) === 9 and substr($datum, 4, 1) !== '-') return []; if (empty($startEnd = self::check_is_timespan_from_till($datum))) { if ($rewritten = self::_attempt_rewriting_special_cases_from_till($datum)) { return self::attempt_splitting_from_till($rewritten); } return []; } list($start, $end) = $startEnd; if ($start->counting_time_indicator === NodaCountingTimeIndicator::bce) return []; try { $startDate = $start->startToDateTime(); $endDate = $end->endToDateTime(); } catch (Exception $e) { return []; } $interval = $startDate->diff($endDate); $days_diff = (int)$interval->format('%a'); if ($days_diff === 0) { throw new Exception("Missing interval, failed to calculate (" . $days_diff . ")"); } $middle_substraction = round($days_diff / 2); if (!($startDateTimestamp = strtotime($startDate->format('Y-m-d')))) return []; if (!($middleDayTimestamp = strtotime('+' . $middle_substraction . ' days', $startDateTimestamp))) return []; $middle_year = date('Y', $middleDayTimestamp); $middle_month = date('m', $middleDayTimestamp); $middle_day = date('d', $middleDayTimestamp); $start_name = $start->toTimeName(); $end_name = $end->toTimeName(); if (strlen($start_name) === 9 and substr($start_name, 4, 1) === '-') $start_name = substr($start_name, 0, 4); if (strlen($end_name) === 9 and substr($end_name, 4, 1) === '-') $end_name = substr($end_name, 5, 4); return [ "start_name" => $start_name, "end_name" => $end_name, "start_year" => $start->start_year, "end_year" => $end->end_year, "start_date" => $start->start_date, "end_date" => $end->end_date, "counting_time_year" => $middle_year, "counting_time_month" => $middle_month, "counting_time_day" => $middle_day, "counting_time_bcce" => "+", ]; } /** * Removes superfluous characters and makes an input string roughly parsable. * * @param string $input Input string. * * @return string */ private static function _runBasicNameCleanup(string $input):string { $input = ltrim(trim(trim($input), ',;'), ' .'); // Clean away duplicate inputs // 1440-1440 if (str_contains($input, '-')) { $parts = explode('-', $input); if (count($parts) === 2 && $parts[0] === $parts[1]) { $input = $parts[0]; } } return $input; } /** * Wrapper to check if any splitting command works. * * @param string $datum Input date. * * @return NodaSplitTime|false */ public static function attempt_splitting(string $datum):NodaSplitTime|false { $datum = self::_runBasicNameCleanup($datum); try { if (!empty($moda = self::is_timespan($datum))) { return $moda; } if (!empty($moda = self::is_incomplete_date($datum))) { return $moda; } if (!empty($moda = self::is_valid_date($datum))) { return $moda; } if (!empty($moda = self::is_valid_date_hungarian($datum))) { return $moda; } if (!empty($moda = self::is_century($datum))) { return $moda; } if (!empty($moda = self::is_decade($datum))) { return $moda; } } catch (MDgenericInvalidInputsException $e) { return false; } // 2015. 05. if (str_contains($datum, ' ')) { $rewrite = str_replace(' ', '', $datum); if (is_numeric(str_replace('.', '', $datum))) { return self::attempt_splitting($rewrite); } } return false; } }