1356 lines
49 KiB
PHP
1356 lines
49 KiB
PHP
<?PHP
|
||
/**
|
||
* Splits nodac times.
|
||
*
|
||
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||
*/
|
||
declare(strict_types = 1);
|
||
|
||
/**
|
||
* Class for splitting times.
|
||
*/
|
||
final class NodaTimeSplitter {
|
||
|
||
private const MONTH_NAMES_GERMAN = [
|
||
"01" => ['Januar', 'Jan.'],
|
||
"02" => ['Februar', 'Feb'],
|
||
"03" => ['März', 'Mrz.'],
|
||
"04" => ['April', 'Apr.'],
|
||
"05" => ['Mai'],
|
||
"06" => ['Juni', 'Jun.'],
|
||
"07" => ['Juli', 'Jul.'],
|
||
"08" => ['August', 'Aug.'],
|
||
"09" => ['September', 'Sep.', 'Sept.'],
|
||
"10" => ['Oktober', 'Okt.'],
|
||
"11" => ['November', 'Nov.'],
|
||
"12" => ['Dezember', 'Dez.'],
|
||
];
|
||
|
||
private const MONTH_NAMES_ENGLISH = [
|
||
"01" => ['January', 'Jan.'],
|
||
"02" => ['February', 'Feb'],
|
||
"03" => ['March', 'Mar.'],
|
||
"04" => ['April', 'Apr.'],
|
||
"05" => ['May'],
|
||
"06" => ['June', 'Jun.'],
|
||
"07" => ['July', 'Jul.'],
|
||
"08" => ['August', 'Aug.'],
|
||
"09" => ['September', 'Sep.', 'Sept.'],
|
||
"10" => ['October', 'Oct.'],
|
||
"11" => ['November', 'Nov.'],
|
||
"12" => ['December', 'Dec.'],
|
||
];
|
||
|
||
private const MONTH_NAMES_HUNGARIAN = [
|
||
"01" => ['január', 'januar', 'jan'],
|
||
"02" => ['február', 'feb'],
|
||
"03" => ['március', 'mar.', 'már.'],
|
||
"04" => ['április', 'apr.', 'ápr.'],
|
||
"05" => ['május', 'maj.', 'máj.'],
|
||
"06" => ['június', 'jun.', 'jún'],
|
||
"07" => ['július', 'julius', 'jul.', 'júl.'],
|
||
"08" => ['augusztus', 'aug.'],
|
||
"09" => ['szeptember', 'szp.'],
|
||
"10" => ['október', 'okt.'],
|
||
"11" => ['november', 'nov.'],
|
||
"12" => ['december', 'dec.'],
|
||
];
|
||
|
||
private const STRINGS_TO_CLEAN = [
|
||
"között" => "",
|
||
" рр." => "",
|
||
" рр" => "",
|
||
"nach Christus" => "",
|
||
"n. Christus" => "",
|
||
"nach Chr." => "",
|
||
"n. Chr." => "",
|
||
"n.Chr." => "",
|
||
" pp" => "",
|
||
" p" => "",
|
||
" р" => "", // Cyrillic
|
||
// To clean
|
||
"v.Chr." => "v. Chr.",
|
||
"v.C." => "v. Chr.",
|
||
"v. C." => "v. Chr.",
|
||
"v. Chr" => "v. Chr.",
|
||
"BCE" => "v. Chr.",
|
||
"CE" => "",
|
||
"vor Christus" => "v. Chr.",
|
||
" до н. е." => "v. Chr.",
|
||
];
|
||
|
||
private const STRINGS_TO_CLEAN_START = [
|
||
"V. " => "5. ",
|
||
"IV. " => "4. ",
|
||
"III. " => "3. ",
|
||
"II. " => "2. ",
|
||
"I. " => "1. ",
|
||
];
|
||
|
||
private const STOP_STRINGS_GERMAN = [
|
||
"-",
|
||
",",
|
||
";",
|
||
":",
|
||
"/",
|
||
"(", ")",
|
||
"[", "]",
|
||
", ",
|
||
" und ",
|
||
"nach ",
|
||
"um ",
|
||
"ca.",
|
||
"ab ",
|
||
"seit ",
|
||
"bis ",
|
||
"vor ",
|
||
"anfang ",
|
||
"ende ",
|
||
];
|
||
|
||
private const STOP_STRINGS_HUNGARIAN = [
|
||
"-",
|
||
",",
|
||
";",
|
||
":",
|
||
"/",
|
||
"(", ")",
|
||
"[", "]",
|
||
"ca.",
|
||
", ",
|
||
"-ig",
|
||
"és",
|
||
"eleje",
|
||
"között",
|
||
"töl",
|
||
"tól",
|
||
"januárig",
|
||
"februárig",
|
||
"márciusig",
|
||
"vége",
|
||
"végén",
|
||
"áprilisig",
|
||
"májusig",
|
||
"júniusig",
|
||
"júliusig",
|
||
"augusztusig",
|
||
"szeptemberig",
|
||
"októberig",
|
||
"novemberig",
|
||
"decemberig",
|
||
];
|
||
|
||
private const REGEX_CENTURIES = '(\ |)(Jh|Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)';
|
||
private const REGEX_DECADES = '(s|er|er\ Jahre|(\-|\ )es\ évek|(\-|\ )as\ \évek|\ évek|\-es\ években|\-ті)';
|
||
|
||
/**
|
||
* Cleans input strings by trimming obsolete stuff.
|
||
*
|
||
* @param string $input Input date name.
|
||
*
|
||
* @return string
|
||
*/
|
||
private static function clean_input(string $input):string {
|
||
|
||
while (\strpos($input, " -") !== false) $input = \str_replace(" -", "-", $input);
|
||
while (\strpos($input, "- ") !== false) $input = \str_replace("- ", "-", $input);
|
||
$input = \strtr($input, self::STRINGS_TO_CLEAN);
|
||
foreach (self::STRINGS_TO_CLEAN_START as $toCleanFrom => $toCleanTo) {
|
||
if (strpos($input, $toCleanFrom) === 0) {
|
||
$input = \str_replace($toCleanFrom, $toCleanTo, $input);
|
||
}
|
||
}
|
||
while (strpos($input, "..") !== false) $input = \str_replace("..", ".", $input);
|
||
|
||
return trim($input, ", [](){}");
|
||
|
||
}
|
||
|
||
/**
|
||
* Checks if a string is really numeric, not numeric + space, dot.
|
||
*
|
||
* @param string $input Input string.
|
||
*
|
||
* @return boolean
|
||
*/
|
||
private static function is_numeric(string $input):bool {
|
||
|
||
if (\is_numeric($input)
|
||
and \strpos($input, " ") === false
|
||
and \strpos($input, ".") === false
|
||
) {
|
||
return true;
|
||
}
|
||
|
||
return false;
|
||
|
||
}
|
||
|
||
/**
|
||
* Validates a time substr.
|
||
*
|
||
* @param string $datum Date.
|
||
* @param integer $start Start of substr.
|
||
* @param integer $end End of substr.
|
||
*
|
||
* @return string
|
||
*/
|
||
private static function validateDateSubstr(string $datum, int $start, int $end = 10000):string {
|
||
|
||
if ($start !== 0
|
||
&& !\in_array(\substr($datum, $start - 1, 1), ["-", " ", "."], true)
|
||
) {
|
||
return "";
|
||
}
|
||
|
||
$output = \substr($datum, $start, $end);
|
||
if (self::is_numeric($output)) return $output;
|
||
return "";
|
||
|
||
}
|
||
|
||
/**
|
||
* Generates counting year - the middle between start and end year.
|
||
*
|
||
* @param NodaSplitTime $moda Date strings.
|
||
*
|
||
* @return integer
|
||
*/
|
||
public static function timePartsToCountingYear(NodaSplitTime $moda):int {
|
||
|
||
if ($moda->start_year === "?") {
|
||
if ($moda->before_after_indicator === NodaTimeBeforeAfterIndicator::before and empty(trim($moda->counting_time_month, " 0"))) {
|
||
return \abs(\intval($moda->end_year)) + 1;
|
||
}
|
||
return \abs(\intval($moda->end_year));
|
||
}
|
||
|
||
if ($moda->end_year === "?") {
|
||
if ($moda->before_after_indicator === NodaTimeBeforeAfterIndicator::after and empty(trim($moda->counting_time_month, " 0"))) {
|
||
return \abs(\intval($moda->start_year)) - 1;
|
||
}
|
||
return \abs(\intval($moda->start_year));
|
||
}
|
||
|
||
return \abs((int)\ceil(\intval($moda->end_year) - ((\intval($moda->end_year) - \intval($moda->start_year)) / 2)));
|
||
|
||
}
|
||
|
||
/**
|
||
* Generates HTML for linking disassembly of times for a single day.
|
||
*
|
||
* @param integer $znum Time ID.
|
||
* @param NodaSplitTime $moda Date strings.
|
||
* @param MDTlLoader $tlLoader Translation loader.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function generateDisassemblyForDay(int $znum, NodaSplitTime $moda, MDTlLoader $tlLoader):string {
|
||
|
||
$zaehlzeit_jahr = self::pad_to_four((string)self::timePartsToCountingYear($moda));
|
||
|
||
// Wenn Datum in Form von tt.mm.jjjj, dann biete zerlegen an
|
||
$output = '<hr>';
|
||
$output .= '<table>';
|
||
$output .= '<tr><td width="250px">' . $tlLoader->tl("tempi", "tempi", "time_tool") . '</td>';
|
||
$output .= '<td><a href="tempi_md/zeit_cha.php?znum=' . $znum . '&kontrolle=todo';
|
||
if (($newTimeName = $moda->toTimeName()) !== "") {
|
||
$output .= "&zeit_name_neu={$newTimeName}";
|
||
}
|
||
$output .= '&zeit_beginn_neu=' . $moda->start_year . '&zeit_ende_neu=' . $moda->end_year . '&zeit_zaehlzeit_vorzeichen_neu=' . urlencode($moda->counting_time_indicator->toString()) . '&zeit_zaehlzeit_jahr_neu=' . $zaehlzeit_jahr . '&zeit_zaehlzeit_monat_neu=' . $moda->counting_time_month . '&zeit_zaehlzeit_tag_neu=' . $moda->counting_time_day . '&zeit_status_neu=%2B&zeit_beginn_datum_neu=' . $moda->start_date . '&zeit_ende_datum_neu=' . $moda->end_date . '" class="icons iconsBell buttonLike" id="splitTimeLink">+';
|
||
|
||
if (!empty($newTimeName)) {
|
||
$output .= $newTimeName;
|
||
}
|
||
else {
|
||
if (!empty(trim($moda->counting_time_day, " 0")) and !empty(trim($moda->counting_time_month, " 0"))) $output .= $moda->counting_time_day . '.' . $moda->counting_time_month . '.' . $moda->start_year;
|
||
else if ($moda->start_year !== $moda->end_year) $output .= $moda->start_year . "-" . $moda->end_year;
|
||
else if (!empty(trim($moda->counting_time_month, " 0"))) $output .= "{$moda->counting_time_month}.{$moda->start_year}";
|
||
else $output .= $moda->start_year;
|
||
}
|
||
$output .= ' - ' . $tlLoader->tl("tempi", "tempi", "time_disassemble") . '</a></td>';
|
||
$output .= '</tr>';
|
||
$output .= '</table>';
|
||
|
||
return $output;
|
||
|
||
}
|
||
|
||
/**
|
||
* Checks if any string of a list occurs in the haystack input string.
|
||
*
|
||
* @param string $haystack Haystack.
|
||
* @param array<string> $needles Needles.
|
||
*
|
||
* @return boolean
|
||
*/
|
||
private static function stri_occurs(string $haystack, array $needles):bool {
|
||
|
||
foreach ($needles as $needle) {
|
||
if (stripos($haystack, $needle) !== false) return true;
|
||
}
|
||
|
||
return false;
|
||
|
||
}
|
||
|
||
/**
|
||
* Pads to four digits. E.g. 20 > 0020.
|
||
*
|
||
* @param string $input Input string.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function pad_to_four(string $input):string {
|
||
|
||
return \substr("0000" . $input, -4);
|
||
|
||
}
|
||
|
||
/**
|
||
* Pads to four digits. E.g. 2 > 02.
|
||
*
|
||
* @param string $input Input string.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function pad_to_two(string $input):string {
|
||
|
||
return \substr("00" . $input, -2);
|
||
|
||
}
|
||
|
||
/**
|
||
* Translate German month to two digits number.
|
||
*
|
||
* @param string $datum Date.
|
||
*
|
||
* @return NodaSplitTime|false
|
||
*/
|
||
public static function is_valid_date(string $datum):NodaSplitTime|false {
|
||
|
||
$datum = self::clean_input($datum);
|
||
|
||
if (\str_ends_with($datum, ' v. Chr.')) {
|
||
if ($output = self::attempt_splitting(\substr($datum, 0, -8))) {
|
||
|
||
$start = \strval(-1 * \intval($output->end_year));
|
||
$end = \strval(-1 * \intval($output->start_year));
|
||
$start_date = $output->end_date;
|
||
$end_date = $output->start_date;
|
||
if (\intval($start) > \intval($end)) {
|
||
$startToSet = $end;
|
||
$end = $start;
|
||
$start = $startToSet;
|
||
$start_date = $output->start_date;
|
||
$end_date = $output->end_date;
|
||
}
|
||
return new NodaSplitTime($start, $end, $output->counting_time_month, $output->counting_time_day,
|
||
NodaCountingTimeIndicator::bce, $output->before_after_indicator, '-' . $start_date, '-' . $end_date);
|
||
}
|
||
}
|
||
|
||
if (\preg_match("/^[0-9]{4}\ bis\ [0-9]{4}$/", $datum)) {
|
||
$start = \substr($datum, 0, 4);
|
||
$end = \substr($datum, -4);
|
||
return new NodaSplitTime($start, $end);
|
||
}
|
||
if (\preg_match("/^[0-9]{4}\ (und|oder|od.)\ [0-9]{4}$/", $datum)) {
|
||
$start = \substr($datum, 0, 4);
|
||
$end = \substr($datum, -4);
|
||
$startInt = (int)$start;
|
||
$endInt = (int)$end;
|
||
if ($startInt === $endInt - 1) {
|
||
return new NodaSplitTime($start, $end);
|
||
}
|
||
}
|
||
|
||
$datum = \str_replace(". ", ".", $datum);
|
||
|
||
if (self::stri_occurs($datum, self::STOP_STRINGS_GERMAN)) {
|
||
return false;
|
||
}
|
||
|
||
if (strlen($datum) <= 6) return false;
|
||
if (strlen($datum) <= 9) $use_day = false;
|
||
else $use_day = true;
|
||
|
||
if (self::is_numeric((string)\substr($datum, -4))) $year = \substr($datum, -4);
|
||
// Further code requires a year to be present, skip if none is set
|
||
if (empty($year)) return false;
|
||
|
||
foreach (self::MONTH_NAMES_ENGLISH as $monthVal => $monthValidNames) {
|
||
if (self::stri_occurs($datum, $monthValidNames)) {
|
||
if (!empty($monat)) break;
|
||
$monat = (string)$monthVal;
|
||
}
|
||
}
|
||
|
||
foreach (self::MONTH_NAMES_GERMAN as $monthVal => $monthValidNames) {
|
||
if (self::stri_occurs($datum, $monthValidNames)) {
|
||
if (!empty($monat)) break;
|
||
$monat = (string)$monthVal;
|
||
}
|
||
}
|
||
|
||
if (empty($monat) and self::is_numeric((string)\substr($datum, 3, 2))) $monat = \substr($datum, 3, 2);
|
||
|
||
if (self::is_numeric((string)\substr($datum, 0, 2))) {
|
||
$day = \substr($datum, 0, 2);
|
||
}
|
||
else if (\in_array(\substr($datum, 1, 1), [".", " "], true) && self::is_numeric((string)\substr($datum, 0, 1))) {
|
||
$day = "0" . \substr($datum, 0, 1);
|
||
}
|
||
|
||
if (!empty($monat) and !empty($day) and $use_day) {
|
||
return NodaSplitTime::genExactDate($year, $monat, $day);
|
||
}
|
||
else if (!empty($monat)) {
|
||
return new NodaSplitTime($year, $year, $monat);
|
||
}
|
||
return false;
|
||
|
||
}
|
||
|
||
/**
|
||
* Translate Hungarian month to two digits number.
|
||
*
|
||
* @param string $datum Date.
|
||
*
|
||
* @return NodaSplitTime|false
|
||
*/
|
||
public static function is_valid_date_hungarian(string $datum):NodaSplitTime|false {
|
||
|
||
$datum = self::clean_input($datum);
|
||
|
||
if (\preg_match("/^Kr\.\ e\.\ /", $datum)) {
|
||
if ($output = self::attempt_splitting(\substr($datum, 7))) {
|
||
$start = \strval(-1 * \intval($output->end_year));
|
||
$end = \strval(-1 * \intval($output->start_year));
|
||
if (\intval($start) > \intval($end)) {
|
||
$startToSet = $end;
|
||
$end = $start;
|
||
$start = $startToSet;
|
||
}
|
||
return new NodaSplitTime($start, $end, $output->counting_time_month, $output->counting_time_day,
|
||
NodaCountingTimeIndicator::bce, $output->before_after_indicator, '-' . $output->end_date, '-' . $output->start_date);
|
||
}
|
||
}
|
||
|
||
// Example: 2009-tol 2010-ig
|
||
// From 2009 to 2010
|
||
if (\preg_match("/^[0-9][0-9][0-9][0-9]\-t(ő|ó)l(\ |\-)[0-9][0-9][0-9][0-9]\-ig$/", $datum)) {
|
||
$start = \substr($datum, 0, 4);
|
||
$end = \substr($datum, -7, 4);
|
||
return new NodaSplitTime($start, $end);
|
||
}
|
||
|
||
if (self::stri_occurs($datum, self::STOP_STRINGS_HUNGARIAN)) {
|
||
return false;
|
||
}
|
||
|
||
//
|
||
// Rest: Only those entries, where there are spelled out months
|
||
//
|
||
if (strlen($datum) <= 9) return false;
|
||
|
||
// The year is only parse-able if it is a four digit year at the start
|
||
if (self::is_numeric((string)\substr($datum, 0, 4)) && substr($datum, 4, 1) === '.') {
|
||
$year = \substr($datum, 0, 4);
|
||
}
|
||
|
||
// Further code requires a year to be present, skip if none is set
|
||
if (empty($year)) return false;
|
||
|
||
// Skip, if dates are too long and do not contain spaces (= no translatable names)
|
||
if (str_contains($datum, " ") === false && strlen($datum) > 12) return false;
|
||
|
||
$unparsed = trim(strtolower(str_replace($year, '', $datum)), ' ,.');
|
||
foreach (self::MONTH_NAMES_HUNGARIAN as $monthVal => $monthValidNames) {
|
||
if (self::stri_occurs($datum, $monthValidNames)) {
|
||
$monat = (string)$monthVal;
|
||
foreach ($monthValidNames as $name) {
|
||
$unparsed = str_replace($name, '', $unparsed);
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (strlen($unparsed) > 5) {
|
||
return false;
|
||
}
|
||
|
||
if (empty($monat) and self::is_numeric((string)\substr($datum, 5, 2))) $monat = \substr($datum, 5, 2);
|
||
else if (empty($monat) and self::is_numeric((string)\substr($datum, 6, 2))) $monat = \substr($datum, 6, 2);
|
||
|
||
// Last four characters must contain at least one space or one dot
|
||
$day = self::validateDateSubstr($datum, -2);
|
||
if (empty($day)) $day = self::validateDateSubstr($datum, -3, 2);
|
||
if (empty($day)) $day = self::validateDateSubstr($datum, -4, 2);
|
||
if (empty($day)) $day = self::validateDateSubstr($datum, -5, 2);
|
||
if (empty($day)) $day = self::validateDateSubstr($datum, -6, 2);
|
||
|
||
if (empty($day)) {
|
||
if (\substr($datum, -2, 1) === " " and self::is_numeric((string)\substr($datum, -1, 1))) {
|
||
$day = "0" . \substr($datum, -1, 1);
|
||
}
|
||
else if (\substr($datum, -3, 1) === " " and self::is_numeric((string)\substr($datum, -2, 1))) {
|
||
$day = "0" . \substr($datum, -2, 1);
|
||
}
|
||
}
|
||
|
||
if (!empty($monat) && empty($day) && preg_match('~[0-9]+~', substr($datum, -3))) {
|
||
return false;
|
||
}
|
||
|
||
if (!empty($monat) and !empty($day)) {
|
||
return NodaSplitTime::genExactDate($year, $monat, $day);
|
||
}
|
||
else if (!empty($monat)) {
|
||
return new NodaSplitTime($year, $year, $monat);
|
||
}
|
||
return false;
|
||
|
||
}
|
||
|
||
/**
|
||
* Translate German month to two digits number.
|
||
*
|
||
* @param string $datum Date.
|
||
*
|
||
* @return NodaSplitTime|false
|
||
*/
|
||
public static function is_valid_date_by_php(string $datum):NodaSplitTime|false {
|
||
|
||
$datum = self::clean_input($datum);
|
||
|
||
if (!($timeInt = \strtotime($datum))) {
|
||
return false;
|
||
}
|
||
|
||
return NodaSplitTime::genExactDate(\date("Y", $timeInt), \date("m", $timeInt), \date("d", $timeInt));
|
||
|
||
}
|
||
|
||
/**
|
||
* Checks if an input date is a timespan.
|
||
*
|
||
* @param string $datum Input date.
|
||
*
|
||
* @return NodaSplitTime|false
|
||
*/
|
||
public static function is_timespan(string $datum):NodaSplitTime|false {
|
||
|
||
$datum = self::clean_input($datum);
|
||
|
||
// 10000-20000
|
||
if (!empty(\preg_match("/^[0-9]{5}(\-|\/)[0-9]{5}$/", $datum))) {
|
||
return new NodaSplitTime(start_year: \substr($datum, 0, 5), end_year: \substr($datum, 6, 5));
|
||
}
|
||
|
||
// 0000-0000
|
||
if (\preg_match("/^[0-9]{4}(\-|\/)[0-9]{4}(\.|)$/", $datum)) {
|
||
return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, 5, 4));
|
||
}
|
||
|
||
// 1.900-2.000
|
||
if (\preg_match("/^[0-9]\.[0-9][0-9][0-9](\-|\/)[0-9]\.[0-9][0-9][0-9]$/", $datum)) {
|
||
$datum = \str_replace(".", "", $datum);
|
||
return new NodaSplitTime(start_year: \substr($datum, 0, 4), end_year: \substr($datum, 5, 4));
|
||
}
|
||
|
||
// German TT.MM.JJJJ / TT.MM.JJJ / TT.MM.JJ / TT.MM.J
|
||
if (\preg_match("/^[0-9]{2}\.[0-9]{2}\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
|
||
$year = \substr($datum, 6, 4);
|
||
$month = \substr($datum, 3, 2);
|
||
$day = \substr($datum, 0, 2);
|
||
return NodaSplitTime::genExactDate($year, $month, $day);
|
||
}
|
||
|
||
// German TT.M.JJJJ / TT.M.JJJ / TT.M.JJ / TT.M.J
|
||
if (\preg_match("/^[0-9]{2}\.[0-9]\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
|
||
$year = \substr($datum, 5, 4);
|
||
$month = "0" . \substr($datum, 3, 1);
|
||
$day = \substr($datum, 0, 2);
|
||
return NodaSplitTime::genExactDate($year, $month, $day);
|
||
}
|
||
|
||
// German T.MM.JJJJ / T.MM.JJJ / T.MM.JJ / T.MM.J
|
||
if (\preg_match("/^[0-9]\.[0-9][0-9]\.([0-9][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[0-9])$/", $datum)) {
|
||
$year = \substr($datum, 5, 4);
|
||
$month = \substr($datum, 2, 2);
|
||
$day = "0" . \substr($datum, 0, 1);
|
||
return NodaSplitTime::genExactDate($year, $month, $day);
|
||
}
|
||
|
||
// German T.M.JJJJ / T.M.JJJ / T.M.JJ / T.M.J
|
||
if (\preg_match("/^[0-9]\.[0-9]\.([0-9][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[0-9])$/", $datum)) {
|
||
$year = \substr($datum, 4, 4);
|
||
$month = "0" . \substr($datum, 2, 1);
|
||
$day = "0" . \substr($datum, 0, 1);
|
||
return NodaSplitTime::genExactDate($year, $month, $day);
|
||
}
|
||
|
||
// Intl': 2020-12-20
|
||
if (\preg_match("/^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/", $datum)) { // German Y-m
|
||
$year = \substr($datum, 0, 4);
|
||
$month = \substr($datum, 5, 2);
|
||
$day = \substr($datum, 8, 2);
|
||
return NodaSplitTime::genExactDate($year, $month, $day);
|
||
}
|
||
// Intl': 2020-12
|
||
if (\preg_match("/^[0-9]{4}\-[0-9]{2}$/", $datum)) { // German Y-m or 1912-15
|
||
$year = \substr($datum, 0, 4);
|
||
$month = \substr($datum, 5, 2);
|
||
|
||
// Assume the end is a month
|
||
if (intval($month) < 12) {
|
||
return new NodaSplitTime($year, $year, $month);
|
||
}
|
||
else {
|
||
$end = \substr($year, 0, 2) . $month;
|
||
return new NodaSplitTime($year, $end);
|
||
}
|
||
}
|
||
|
||
// German MM.JJJJ
|
||
if (\preg_match("/^[0-9]{2}\.[0-9]{4}$/", $datum)) { // German Y-m
|
||
$year = \substr($datum, 3, 4);
|
||
$month = \substr($datum, 0, 2);
|
||
return new NodaSplitTime($year, $year, $month);
|
||
}
|
||
if (\preg_match("/^[0-9]\.[0-9]{4}$/", $datum)) { // German Y-m
|
||
$year = \substr($datum, 2, 4);
|
||
$month = "0" . \substr($datum, 0, 1);
|
||
return new NodaSplitTime($year, $year, $month);
|
||
}
|
||
if (\preg_match("/^[0-9]{4}\.[0-3][0-9]\.[0-9]{1,2}(\.|)$/", $datum)) { // Hungarian Y-m-d
|
||
$year = \substr($datum, 0, 4);
|
||
$month = \substr($datum, 5, 2);
|
||
$day = self::pad_to_two(\rtrim(\substr($datum, 8, 2), '.'));
|
||
return NodaSplitTime::genExactDate($year, $month, $day);
|
||
}
|
||
if (\preg_match("/^[0-9]{4}\.[0-9]\.[0-9]{1,2}\.$/", $datum)) { // Hungarian Y-m-d > 2005.1.1.
|
||
$year = \substr($datum, 0, 4);
|
||
$month = self::pad_to_two(\substr($datum, 5, 1));
|
||
$day = self::pad_to_two(\rtrim(\substr($datum, 7, 2), '.'));
|
||
return NodaSplitTime::genExactDate($year, $month, $day);
|
||
}
|
||
if (\preg_match("/^[0-9]{4}\.[0-3][0-9](\.|)$/", $datum)) { // Hungarian Y-m
|
||
$year = \substr($datum, 0, 4);
|
||
$month = \substr($datum, 5, 2);
|
||
return new NodaSplitTime($year, $year, $month);
|
||
}
|
||
if (\preg_match("/^[0-9]{4}\-[0-9]{2}$/", $datum)) { // Time spans: 1945-46
|
||
$start = \substr($datum, 0, 4);
|
||
$endDigits = \substr($datum, 5, 2);
|
||
return new NodaSplitTime($start, \substr($datum, 0, 2) . $endDigits);
|
||
}
|
||
|
||
// 01.01.1920-31.12.1930
|
||
if (\preg_match("/^01\.01\.[0-9]{4}\-31\.12\.[0-9]{4}$/", $datum)) { // Hungarian Y-m
|
||
$start = \substr($datum, 6, 4);
|
||
$end = \substr($datum, -4);
|
||
return new NodaSplitTime($start, $end);
|
||
}
|
||
|
||
// 303-305 (n. Chr.)
|
||
if (\preg_match("/^[0-9]{3}\-[0-9]{3}$/", $datum)) { // Hungarian Y-m
|
||
$start = \substr($datum, 0, 3);
|
||
$end = \substr($datum, -3);
|
||
return new NodaSplitTime("0" . $start, "0" . $end);
|
||
}
|
||
|
||
// 1720-120
|
||
if (\preg_match("/^[0-9]{4}\-[0-9]{3}$/", $datum)) { // Hungarian Y-m
|
||
$start = \substr($datum, 0, 4);
|
||
$end = \substr($datum, -3);
|
||
return new NodaSplitTime("0" . $start, "0" . $end);
|
||
}
|
||
|
||
// 20-30 (n. Chr.)
|
||
if (\preg_match("/^[0-9]{2}\-[0-9]{2}$/", $datum)) { // 20-40 (n. Chr.)
|
||
$start = \substr($datum, 0, 2);
|
||
$end = \substr($datum, -2);
|
||
return new NodaSplitTime("00" . $start, "00" . $end);
|
||
}
|
||
|
||
// 1920
|
||
if (\preg_match("/^[0-9]{4}(\.|)$/", $datum)) {
|
||
$start = \substr($datum, 0, 4);
|
||
return new NodaSplitTime($start, $start);
|
||
}
|
||
|
||
// 1920
|
||
if (\preg_match("/^[0-9]{3}$/", $datum)) {
|
||
$start = "0" . \substr($datum, 0, 3);
|
||
return new NodaSplitTime($start, $start);
|
||
}
|
||
|
||
if (\preg_match("/^[0-9]{2}$/", $datum)) {
|
||
$start = "00" . \substr($datum, 0, 2);
|
||
return new NodaSplitTime($start, $start);
|
||
}
|
||
|
||
// Special case for SMB: YYYY, MM. DD and YYYY, MM.
|
||
|
||
if (\preg_match("/^[0-9]{4}\,\ [0-9]{2}\.(|\ [0-9]{2})$/", $datum)) {
|
||
$start = \substr($datum, 0, 4);
|
||
$month = \substr($datum, 6, 2);
|
||
$day = \substr($datum, 10, 2);
|
||
return NodaSplitTime::genExactDate($start, $month, $day);
|
||
}
|
||
|
||
return false;
|
||
|
||
}
|
||
|
||
/**
|
||
* Checks if an input date is an incomplete date: Before 1920, after 1930.
|
||
*
|
||
* @param string $datum Input date.
|
||
*
|
||
* @return NodaSplitTime|false
|
||
*/
|
||
public static function is_incomplete_date(string $datum):NodaSplitTime|false {
|
||
|
||
$datum = self::clean_input($datum);
|
||
|
||
$inpDateWoSpaces = str_replace(" ", "", $datum);
|
||
|
||
if (\preg_match("/^[0-9]{4}\.[0-9]{2}\.[0-9]{2}(\.|)\-$/", $inpDateWoSpaces)) { // YYYY.MM.DD.
|
||
$year = \substr($inpDateWoSpaces, 0, 4);
|
||
$month = \substr($inpDateWoSpaces, 5, 2);
|
||
$day = \substr($inpDateWoSpaces, 8, 2);
|
||
return NodaSplitTime::genExactDate($year, $month, $day, NodaTimeBeforeAfterIndicator::since);
|
||
}
|
||
if (\preg_match("/^[0-9]{4}\.[0-9]{2}(\.|)\-$/", $inpDateWoSpaces)) { // YYYY.MM.-
|
||
$start = \substr($inpDateWoSpaces, 0, 4);
|
||
$month = \substr($inpDateWoSpaces, 5, 2);
|
||
return new NodaSplitTime($start, '?', $month, before_after_indicator: NodaTimeBeforeAfterIndicator::since);
|
||
}
|
||
if (\preg_match("/^[0-9]{4}\-$/", $inpDateWoSpaces)) { // YYYY-
|
||
$start = \substr($inpDateWoSpaces, 0, 4);
|
||
return new NodaSplitTime($start, '?', before_after_indicator: NodaTimeBeforeAfterIndicator::since);
|
||
}
|
||
|
||
// ?.6.2024
|
||
if (\preg_match("/^\?\.([0-9]|[0-9]{2})\.[0-9]{4}$/", $inpDateWoSpaces)) { // German Y-m
|
||
$year = \substr($inpDateWoSpaces, -4);
|
||
$month = trim(\substr($inpDateWoSpaces, 2, 2), '. ');
|
||
return new NodaSplitTime($year, $year, $month);
|
||
}
|
||
|
||
// ?.?.2024
|
||
if (\preg_match("/^\?\.\?\.[0-9]{4}$/", $inpDateWoSpaces)) { // German Y-m
|
||
$year = \substr($inpDateWoSpaces, -4);
|
||
return new NodaSplitTime($year, $year);
|
||
}
|
||
|
||
if (\preg_match("/^[0-9]{4}$/", \trim($inpDateWoSpaces, '. ?!()[]X'))) { // German Y-m
|
||
$year = \trim($inpDateWoSpaces, '. ?!()[]X');
|
||
return new NodaSplitTime($year, $year);
|
||
}
|
||
|
||
if (\preg_match("/^[0-9]{4}$/", \strtr($inpDateWoSpaces, ['-0' => '', '0-' => '', 'o' => '0']))) { // German Y-m
|
||
$year = \strtr($inpDateWoSpaces, ['-0' => '', '0-' => '', 'o' => '0']);
|
||
return new NodaSplitTime($year, $year);
|
||
}
|
||
|
||
if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}\.[0-9]{2}$/", $inpDateWoSpaces)) { // Hungarian Y-m
|
||
$year = \substr($inpDateWoSpaces, 1, 4);
|
||
$month = \substr($inpDateWoSpaces, 6, 2);
|
||
$day = \substr($inpDateWoSpaces, 9, 2);
|
||
return NodaSplitTime::genExactDate($year, $month, $day, NodaTimeBeforeAfterIndicator::until);
|
||
}
|
||
if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}$/", $inpDateWoSpaces)) { // Hungarian Y-m
|
||
$year = \substr($inpDateWoSpaces, 1, 4);
|
||
$month = \substr($inpDateWoSpaces, 6, 2);
|
||
return new NodaSplitTime('?', $year, $month, before_after_indicator: NodaTimeBeforeAfterIndicator::until);
|
||
}
|
||
if (\preg_match("/^\-[0-9]{4}$/", $inpDateWoSpaces)) { // Hungarian -Y
|
||
$year = \substr($inpDateWoSpaces, 1, 4);
|
||
return new NodaSplitTime('?', $year, before_after_indicator: NodaTimeBeforeAfterIndicator::until);
|
||
}
|
||
|
||
if (\preg_match("/^(Nach|nach)\ /", $datum)) {
|
||
if (($spacePos = \strpos($datum, " ")) === false) {
|
||
return false;
|
||
}
|
||
if ($output = self::attempt_splitting(\substr($datum, $spacePos))) {
|
||
return self::_turn_noda_split_time_to_after($output);
|
||
}
|
||
}
|
||
|
||
if (\preg_match("/\ (\(nach|nach)$/", $datum)) {
|
||
if (($spacePos = \strpos($datum, " ")) === false) {
|
||
return false;
|
||
}
|
||
if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) {
|
||
return self::_turn_noda_split_time_to_after($output);
|
||
}
|
||
}
|
||
|
||
if (\preg_match("/^(Vor|vor)\ /", $datum)) {
|
||
if (($spacePos = \strpos($datum, " ")) === false) {
|
||
return false;
|
||
}
|
||
if ($output = self::attempt_splitting(\substr($datum, $spacePos))) {
|
||
return self::_turn_noda_split_time_to_before($output);
|
||
|
||
}
|
||
}
|
||
|
||
if (\preg_match("/\ (\(vor|\(Vor|vor|előtt)$/", $datum)) {
|
||
if (($spacePos = \strrpos($datum, " ")) === false) {
|
||
return false;
|
||
}
|
||
if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) {
|
||
return self::_turn_noda_split_time_to_before($output);
|
||
}
|
||
}
|
||
|
||
if (\preg_match("/^(Ab|ab|Seit|seit)\ /", $datum)) {
|
||
if (($spacePos = \strpos($datum, " ")) === false) {
|
||
return false;
|
||
}
|
||
if ($output = self::attempt_splitting(\substr($datum, $spacePos))) {
|
||
|
||
return new NodaSplitTime($output->start_year, '?', $output->counting_time_month, $output->counting_time_day,
|
||
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::since, $output->start_date, '?');
|
||
|
||
}
|
||
}
|
||
|
||
// Endings beginning with a dash
|
||
if (\preg_match("/(\-től|\-tól)$/", $datum)) {
|
||
if (($spacePos = strrpos($datum, "-")) === false) {
|
||
return false;
|
||
}
|
||
if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) {
|
||
|
||
return new NodaSplitTime($output->start_year, '?', $output->counting_time_month, $output->counting_time_day,
|
||
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::since, $output->start_date, '?');
|
||
|
||
}
|
||
}
|
||
|
||
if (\preg_match("/^(Bis|bis)\ /", $datum)) {
|
||
if (($spacePos = \strpos($datum, " ")) === false) {
|
||
return false;
|
||
}
|
||
if ($output = self::attempt_splitting(\substr($datum, $spacePos))) {
|
||
return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day,
|
||
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date);
|
||
}
|
||
}
|
||
// Endings beginning with a space (until)
|
||
if (\preg_match("/ (\(bis)$/", $datum)) {
|
||
if (($spacePos = strrpos($datum, " ")) === false) {
|
||
return false;
|
||
}
|
||
if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) {
|
||
return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day,
|
||
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date);
|
||
}
|
||
}
|
||
// Ends beginning with a hyphen
|
||
if (\preg_match("/\-ig(\.|)$/", $datum)) {
|
||
if (($spacePos = strrpos($datum, "-")) === false) {
|
||
return false;
|
||
}
|
||
if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) {
|
||
return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day,
|
||
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date);
|
||
}
|
||
}
|
||
if (!empty(\preg_match("/^[0-9]{4}ig$/", $datum))) {
|
||
if ($output = self::attempt_splitting(\substr($datum, 0, 4))) {
|
||
return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day,
|
||
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date);
|
||
}
|
||
}
|
||
if (str_ends_with($datum, '-as évekig') || str_ends_with($datum, '-es évekig')) {
|
||
if ($output = self::attempt_splitting(\substr($datum, 0, -2))) {
|
||
return new NodaSplitTime('?', $output->end_year, $output->counting_time_month, $output->counting_time_day,
|
||
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::until, '?', $output->end_date);
|
||
}
|
||
}
|
||
// Endings that are extensions of an existing word
|
||
if (\preg_match("/évektől$/", $datum)) {
|
||
if ($output = self::attempt_splitting(\substr($datum, 0, -4))) {
|
||
return self::_turn_noda_split_time_to_after($output);
|
||
}
|
||
}
|
||
|
||
// Endings beginning with a space (after)
|
||
if (\preg_match("/ (utantól|utántól)$/", $datum)) {
|
||
if (($spacePos = strrpos($datum, " ")) === false) {
|
||
return false;
|
||
}
|
||
if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) {
|
||
return new NodaSplitTime($output->start_year, '?', $output->counting_time_month, $output->counting_time_day,
|
||
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::since, $output->start_date, '?');
|
||
}
|
||
}
|
||
|
||
return false;
|
||
|
||
}
|
||
|
||
/**
|
||
* Negotiates century spans before times.
|
||
*
|
||
* @param string $start Begin time.
|
||
* @param string $end End time.
|
||
*
|
||
* @return NodaSplitTime
|
||
*/
|
||
public static function negotiate_century_span_bce_ce(string $start, string $end):NodaSplitTime {
|
||
|
||
$start = \intval($start);
|
||
$end = \intval($end);
|
||
|
||
if ($start < $end) {
|
||
return new NodaSplitTime((string)($start - 1) . "01", $end . "00");
|
||
}
|
||
|
||
return new NodaSplitTime((string)($start) . "00", ($end - 1) . "01");
|
||
|
||
}
|
||
|
||
/**
|
||
* Checks if an input date is a century. Always returns supposedly positive settings (bc / ce).
|
||
*
|
||
* @param string $datum Input date.
|
||
*
|
||
* @return NodaSplitTime|false
|
||
*/
|
||
public static function is_century(string $datum):NodaSplitTime|false {
|
||
|
||
$datum = self::clean_input($datum);
|
||
|
||
// TODO: Check if this is duplicate
|
||
|
||
// 17. Jahrhundert
|
||
if (\preg_match("/^[0-9]{2}(\.|)" . self::REGEX_CENTURIES . "$/", $datum)) {
|
||
if ($centuryNo = \intval(\substr($datum, 0, 2))) {
|
||
$centuryNo--;
|
||
return new NodaSplitTime((string)$centuryNo . "01", \strval($centuryNo + 1) . "00");
|
||
}
|
||
}
|
||
|
||
// 1. Jahrhundert
|
||
if (\preg_match("/^[0-9]\.\ (Jh\|Jh\.|Jahrhundert|sz|század)$/", $datum)) {
|
||
if ($centuryNo = \intval(\substr($datum, 0, 1))) {
|
||
$centuryNo--;
|
||
return new NodaSplitTime((string)$centuryNo . "01", \strval($centuryNo + 1) . '00');
|
||
}
|
||
}
|
||
|
||
// 17.-18. Jahrhundert
|
||
if (\preg_match("/^[0-9]{2}(\.|)(|\ Jh|\ Jh\.|\ Jahrhundert|\ sz|\ század)(\-|\/)[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) {
|
||
if (\strpos($datum, '/') !== false) {
|
||
$datum = str_replace('/', '-', $datum);
|
||
}
|
||
if (($dashPos = \strpos($datum, "-")) !== false) {
|
||
return self::negotiate_century_span_bce_ce(\substr($datum, 0, 2), \substr($datum, $dashPos + 1, 2));
|
||
}
|
||
}
|
||
|
||
// 1.-12. Jahrhundert
|
||
if (\preg_match("/^[0-9](\.|)(|\ Jh\.||\ Jahrhundert||\ sz||\ század)\-[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) {
|
||
if (($dashPos = \strpos($datum, "-")) !== false) {
|
||
return self::negotiate_century_span_bce_ce(\substr($datum, 0, 1), \substr($datum, $dashPos + 1, 2));
|
||
}
|
||
}
|
||
// 1.-2. Jahrhundert
|
||
if (\preg_match("/^[0-9](\.|)(|\ Jh\.||\ Jahrhundert||\ sz||\ század)\-[0-9]\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) {
|
||
if (($dashPos = \strpos($datum, "-")) !== false) {
|
||
return self::negotiate_century_span_bce_ce(\substr($datum, 0, 1), \substr($datum, $dashPos + 1, 1));
|
||
}
|
||
}
|
||
// 1-2. Jahrhundert
|
||
if (\preg_match("/^[0-9](\.|)(|\ Jh\.||\ Jahrhundert||\ sz||\ század)\-[0-9]\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) {
|
||
if (($dashPos = \strpos($datum, "-")) !== false) {
|
||
return self::negotiate_century_span_bce_ce(\substr($datum, 0, 1), \substr($datum, $dashPos + 1, 1));
|
||
}
|
||
}
|
||
|
||
return false;
|
||
|
||
}
|
||
|
||
/**
|
||
* Checks if an input date is a decade.
|
||
*
|
||
* @param string $datum Input date.
|
||
*
|
||
* @return NodaSplitTime|false
|
||
*/
|
||
public static function is_decade(string $datum):NodaSplitTime|false {
|
||
|
||
$datum = self::clean_input($datum);
|
||
|
||
// 20er Jahre
|
||
if (\preg_match("/^[0-9]0" . self::REGEX_DECADES . "$/", $datum)) {
|
||
$start = "19" . \substr($datum, 0, 2);
|
||
$ende = (string)(\intval($start) + 9);
|
||
return new NodaSplitTime($start, $ende);
|
||
}
|
||
|
||
// 1920er Jahre
|
||
if (\preg_match("/^[0-9]{3}0" . self::REGEX_DECADES . "$/", $datum)) {
|
||
$start = \substr($datum, 0, 4);
|
||
$ende = (string)(\intval($start) + 9);
|
||
return new NodaSplitTime($start, $ende);
|
||
}
|
||
|
||
return false;
|
||
|
||
}
|
||
|
||
/**
|
||
* Creates a NodaSplitTime based on an existing one.
|
||
*
|
||
* @param NodaSplitTime $output Time to translate.
|
||
*
|
||
* @return NodaSplitTime
|
||
*/
|
||
private static function _turn_noda_split_time_to_before(NodaSplitTime $output):NodaSplitTime {
|
||
|
||
if (empty(trim($output->counting_time_month, "0 .,"))) {
|
||
$start_year = strval((int)$output->start_year - 1);
|
||
}
|
||
else $start_year = $output->start_year;
|
||
|
||
return new NodaSplitTime('?', $start_year, $output->counting_time_month, $output->counting_time_day,
|
||
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::before, '?', $output->start_date);
|
||
|
||
}
|
||
|
||
/**
|
||
* Creates a NodaSplitTime based on an existing one.
|
||
*
|
||
* @param NodaSplitTime $output Time to translate.
|
||
*
|
||
* @return NodaSplitTime
|
||
*/
|
||
private static function _turn_noda_split_time_to_after(NodaSplitTime $output):NodaSplitTime {
|
||
|
||
if (empty(trim($output->counting_time_month, "0 .,"))) {
|
||
$end_year = strval((int)$output->end_year + 1);
|
||
}
|
||
else $end_year = $output->end_year;
|
||
|
||
return new NodaSplitTime($end_year, '?', $output->counting_time_month, $output->counting_time_day,
|
||
$output->counting_time_indicator, NodaTimeBeforeAfterIndicator::after, $output->end_date, '?');
|
||
|
||
}
|
||
|
||
/**
|
||
* Checks if the string is a time span with given start and end dates.
|
||
*
|
||
* @param string $datum Date.
|
||
*
|
||
* @return array{}|array{0: NodaSplitTime, 1: NodaSplitTime}
|
||
*/
|
||
public static function check_is_timespan_from_till(string $datum):array {
|
||
|
||
if (substr_count($datum, '-') !== 1) {
|
||
return [];
|
||
}
|
||
|
||
list($start_str, $end_str) = explode('-', $datum);
|
||
|
||
if (strlen($end_str) < 4 && strlen($end_str) < strlen($start_str)) {
|
||
return [];
|
||
}
|
||
if (strlen($start_str) < 4 && strlen($start_str) < strlen($end_str)) {
|
||
return [];
|
||
}
|
||
|
||
if (empty($start = self::attempt_splitting($start_str))) {
|
||
return [];
|
||
}
|
||
|
||
if (empty($end = self::attempt_splitting($end_str))) {
|
||
return [];
|
||
}
|
||
|
||
return [$start, $end];
|
||
|
||
}
|
||
|
||
/**
|
||
* Contains special rules for incorrectly or incompletely spelled out timespan names.
|
||
* To be called by self::attempt_splitting_from_till().
|
||
*
|
||
* @param string $datum Date.
|
||
*
|
||
* @return string
|
||
*/
|
||
public static function _attempt_rewriting_special_cases_from_till(string $datum):string {
|
||
|
||
if (empty($datum)) return '';
|
||
|
||
$inputLength = strlen($datum);
|
||
|
||
// Hungarian year and month until month
|
||
// 2005.01.-02. => 2005.01.-2005.02.
|
||
if ($inputLength === 12 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.\-[0-1][0-9]\.$/", $datum))) {
|
||
$reconstituted = substr($datum, 0, 8) . '-';
|
||
$reconstituted .= substr($datum, 0, 4) . '.' . substr($datum, -3);
|
||
return $reconstituted;
|
||
}
|
||
|
||
// Hungarian year and month until month without a dot after the first YYYY-MM
|
||
// 2005.01-02. => 2005.01.-2005.02.
|
||
|
||
if (in_array($inputLength, [10, 11], true) && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\-[0-1][0-9](\.|)$/", $datum))) {
|
||
$reconstituted = substr($datum, 0, 7) . '.-';
|
||
$reconstituted .= substr($datum, 0, 4) . '.' . substr(rtrim($datum, '.'), -2) . '.';
|
||
return $reconstituted;
|
||
}
|
||
|
||
// Hungarian year and month until month
|
||
// 2005.01.01.-02.02. => 2005.01.01-2005.02.02.
|
||
// 2005.01.01-02.02 => 2005.01.01-2005.02.02.
|
||
if ($inputLength >= 16 && $inputLength <= 18 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-1][0-9]\.[0-3][0-9](\.|)$/", $datum))) {
|
||
$parts = explode('-', $datum);
|
||
if (count($parts) !== 2) return '';
|
||
$reconstituted = substr($datum, 0, 10) . '.-';
|
||
$reconstituted .= substr($datum, 0, 4) . '.' . rtrim($parts[1], '.') . '.';
|
||
return $reconstituted;
|
||
}
|
||
|
||
// Hungarian; without trailing dots: YYYY.MM.DD-DD
|
||
if ($inputLength >= 13 && $inputLength <= 15 && \preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-3][0-9](\.|)$/", $datum)) {
|
||
$parts = explode('-', $datum);
|
||
if (count($parts) !== 2) return '';
|
||
$reconstituted = substr($datum, 0, 10) . '.-';
|
||
$reconstituted .= substr($datum, 0, 7) . '.' . substr(rtrim($parts[1], '.'), -2);
|
||
return $reconstituted;
|
||
}
|
||
|
||
// German T.-T.MM.JJJJ / T.-T.MM.JJJ / T.-T.MM.JJ / T.-T.MM.J
|
||
if (\preg_match("/^[0-9].\-[0-9]\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
|
||
$year = \substr($datum, -4);
|
||
$month = trim(\substr($datum, -7, 2), '.');
|
||
$day = '0' . \substr($datum, 3, 1);
|
||
$firstday = '0' . \substr($datum, 0, 1);
|
||
return "$firstday.$month.$year-$day.$month.$year";
|
||
}
|
||
|
||
// German T.-TT.MM.JJJJ / T.-TT.MM.JJJ / T.-TT.MM.JJ / T.-TT.MM.J
|
||
if (\preg_match("/^[0-9].\-[0-9]{2}\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
|
||
$year = \substr($datum, -4);
|
||
$month = trim(\substr($datum, -7, 2), '.');
|
||
$day = \substr($datum, 3, 2);
|
||
$firstday = '0' . \substr($datum, 0, 1);
|
||
return "$firstday.$month.$year-$day.$month.$year";
|
||
}
|
||
|
||
// German TT.-TT.MM.JJJJ / TT.-TT.MM.JJJ / TT.-TT.MM.JJ / TT.-TT.MM.J
|
||
if (\preg_match("/^[0-9]{2}.\-[0-9]{2}\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ
|
||
$year = \substr($datum, -4);
|
||
$month = trim(\substr($datum, -7, 2), '.');
|
||
$day = \substr($datum, 4, 2);
|
||
$firstday = \substr($datum, 0, 2);
|
||
return "$firstday.$month.$year-$day.$month.$year";
|
||
}
|
||
|
||
// 17-19. Jahrhundert
|
||
if (\preg_match("/^[0-9]{2}(\.|)\-[0-9]{2}(\.|)" . self::REGEX_CENTURIES . "$/", $datum)) {
|
||
$parts = explode('-', $datum);
|
||
$reconstituted = ((int)substr($parts[0] ?? "", 0, 2) - 1) . '01-';
|
||
$reconstituted .= substr($parts[1] ?? "", 0, 2) . '. Jahrhundert';
|
||
return $reconstituted;
|
||
}
|
||
|
||
// 1950-60-as évek
|
||
if (\preg_match("/^[0-9]{4}\-[0-9]{2} (a|e)s évek$/", $datum)) {
|
||
$reconstituted = substr($datum, 0, 4) . '-';
|
||
$reconstituted .= substr($datum, 5, 2) . 'er Jahre';
|
||
return $reconstituted;
|
||
}
|
||
|
||
// If es évek / as évek is contained in the string (e.g. 1880-1990-es évek), there
|
||
// will be more than one hyphens
|
||
if (MD_STD::stri_contains_any($datum, ['-as évek', '-es-évek', '-es évek'])) {
|
||
return strtr($datum, ['-as évek' => ' as évek', '-es-évek' => ' es évek', '-es évek' => ' es évek']);
|
||
}
|
||
|
||
// 1981. július-augusztus > 1981.07-08
|
||
if (is_numeric(substr($datum, 0, 4)) && substr($datum, 4, 2) === '. ') {
|
||
|
||
$monthNames = [];
|
||
foreach (self::MONTH_NAMES_ENGLISH as $month => $names) {
|
||
foreach ($names as $name) $monthNames[$name] = $month;
|
||
}
|
||
foreach (self::MONTH_NAMES_GERMAN as $month => $names) {
|
||
foreach ($names as $name) $monthNames[$name] = $month;
|
||
}
|
||
foreach (self::MONTH_NAMES_HUNGARIAN as $month => $names) {
|
||
foreach ($names as $name) $monthNames[$name] = $month;
|
||
}
|
||
|
||
$rewrite = strtr($datum, $monthNames);
|
||
if ($rewrite !== $datum) {
|
||
return str_replace('..', '.', str_replace(" ", ".", $rewrite));
|
||
}
|
||
|
||
}
|
||
|
||
if (str_contains($datum, ',')) {
|
||
return str_replace(',', '-', $datum);
|
||
}
|
||
|
||
return '';
|
||
|
||
}
|
||
|
||
/**
|
||
* Checks if the string is a time span with given start and end dates.
|
||
*
|
||
* @param string $datum Date.
|
||
*
|
||
* @return array<string>
|
||
*/
|
||
public static function attempt_splitting_from_till(string $datum):array {
|
||
|
||
// Skip 1200-1300
|
||
if (!empty(self::attempt_splitting($datum))) return [];
|
||
if (strlen($datum) === 9 and substr($datum, 4, 1) !== '-') return [];
|
||
|
||
if (empty($startEnd = self::check_is_timespan_from_till($datum))) {
|
||
if ($rewritten = self::_attempt_rewriting_special_cases_from_till($datum)) {
|
||
return self::attempt_splitting_from_till($rewritten);
|
||
}
|
||
return [];
|
||
}
|
||
list($start, $end) = $startEnd;
|
||
|
||
if ($start->counting_time_indicator === NodaCountingTimeIndicator::bce) return [];
|
||
|
||
try {
|
||
$startDate = $start->startToDateTime();
|
||
$endDate = $end->endToDateTime();
|
||
}
|
||
catch (Exception $e) {
|
||
return [];
|
||
}
|
||
|
||
$interval = $startDate->diff($endDate);
|
||
$days_diff = (int)$interval->format('%a');
|
||
|
||
if ($days_diff === 0) {
|
||
throw new Exception("Missing interval, failed to calculate (" . $days_diff . ")");
|
||
}
|
||
|
||
$middle_substraction = round($days_diff / 2);
|
||
|
||
if (!($startDateTimestamp = strtotime($startDate->format('Y-m-d')))) return [];
|
||
if (!($middleDayTimestamp = strtotime('+' . $middle_substraction . ' days', $startDateTimestamp))) return [];
|
||
$middle_year = date('Y', $middleDayTimestamp);
|
||
$middle_month = date('m', $middleDayTimestamp);
|
||
$middle_day = date('d', $middleDayTimestamp);
|
||
|
||
$start_name = $start->toTimeName();
|
||
$end_name = $end->toTimeName();
|
||
|
||
if (strlen($start_name) === 9 and substr($start_name, 4, 1) === '-') $start_name = substr($start_name, 0, 4);
|
||
if (strlen($end_name) === 9 and substr($end_name, 4, 1) === '-') $end_name = substr($end_name, 5, 4);
|
||
|
||
return [
|
||
"start_name" => $start_name,
|
||
"end_name" => $end_name,
|
||
"start_year" => $start->start_year,
|
||
"end_year" => $end->end_year,
|
||
"start_date" => $start->start_date,
|
||
"end_date" => $end->end_date,
|
||
"counting_time_year" => $middle_year,
|
||
"counting_time_month" => $middle_month,
|
||
"counting_time_day" => $middle_day,
|
||
"counting_time_bcce" => "+",
|
||
];
|
||
|
||
}
|
||
|
||
/**
|
||
* Removes superfluous characters and makes an input string roughly parsable.
|
||
*
|
||
* @param string $input Input string.
|
||
*
|
||
* @return string
|
||
*/
|
||
private static function _runBasicNameCleanup(string $input):string {
|
||
|
||
$input = ltrim(trim(trim($input), ',;'), ' .');
|
||
|
||
// Clean away duplicate inputs
|
||
// 1440-1440
|
||
if (str_contains($input, '-')) {
|
||
$parts = explode('-', $input);
|
||
if (count($parts) === 2 && $parts[0] === $parts[1]) {
|
||
$input = $parts[0];
|
||
}
|
||
}
|
||
|
||
return $input;
|
||
|
||
}
|
||
|
||
/**
|
||
* Wrapper to check if any splitting command works.
|
||
*
|
||
* @param string $datum Input date.
|
||
*
|
||
* @return NodaSplitTime|false
|
||
*/
|
||
public static function attempt_splitting(string $datum):NodaSplitTime|false {
|
||
|
||
$datum = self::_runBasicNameCleanup($datum);
|
||
|
||
try {
|
||
if (!empty($moda = self::is_timespan($datum))) {
|
||
return $moda;
|
||
}
|
||
|
||
if (!empty($moda = self::is_incomplete_date($datum))) {
|
||
return $moda;
|
||
}
|
||
|
||
if (!empty($moda = self::is_valid_date($datum))) {
|
||
return $moda;
|
||
}
|
||
|
||
if (!empty($moda = self::is_valid_date_hungarian($datum))) {
|
||
return $moda;
|
||
}
|
||
|
||
if (!empty($moda = self::is_century($datum))) {
|
||
return $moda;
|
||
}
|
||
|
||
if (!empty($moda = self::is_decade($datum))) {
|
||
return $moda;
|
||
}
|
||
}
|
||
catch (MDgenericInvalidInputsException $e) {
|
||
return false;
|
||
}
|
||
|
||
// 2015. 05.
|
||
if (str_contains($datum, ' ')) {
|
||
$rewrite = str_replace(' ', '', $datum);
|
||
if (is_numeric(str_replace('.', '', $datum))) {
|
||
return self::attempt_splitting($rewrite);
|
||
}
|
||
}
|
||
|
||
return false;
|
||
|
||
}
|
||
}
|