Enable NodaTimeSplitter to split dates with uncertain end or start

(seit, bis)
This commit is contained in:
Joshua Ramon Enslin 2020-09-22 17:58:26 +02:00 committed by Stefan Rohde-Enslin
parent 707f781f1e
commit 4f1e65934a

View File

@ -175,6 +175,16 @@ final class NodaTimeSplitter {
*/ */
public static function timePartsToTimeName(array $moda):string { public static function timePartsToTimeName(array $moda):string {
if ($moda[0] === "?") {
$prefix = "Bis ";
$moda[0] = $moda[1];
}
else if ($moda[1] === "?") {
$prefix = "Seit ";
$moda[1] = $moda[0];
}
else $prefix = "";
$moda[0] = intval($moda[0]); $moda[0] = intval($moda[0]);
$moda[1] = intval($moda[1]); $moda[1] = intval($moda[1]);
@ -190,17 +200,17 @@ final class NodaTimeSplitter {
$moda[1] = abs($moda[1]); $moda[1] = abs($moda[1]);
if ($moda[0] !== $moda[1]) { if ($moda[0] !== $moda[1]) {
return "{$moda[0]}-{$moda[1]}{$suffix}"; return "{$prefix}{$moda[0]}-{$moda[1]}{$suffix}";
} }
else if (intval($moda[2]) !== 0 and intval($moda[3]) !== 0) { else if (intval($moda[2]) !== 0 and intval($moda[3]) !== 0) {
return "{$moda[3]}.{$moda[2]}.{$moda[0]}{$suffix}"; return "{$prefix}{$moda[3]}.{$moda[2]}.{$moda[0]}{$suffix}";
} }
else if ($moda[0] === $moda[1] && trim((string)$moda[2], " 0") === "" && trim((string)$moda[3], " 0") === "") { else if ($moda[0] === $moda[1] && trim((string)$moda[2], " 0") === "" && trim((string)$moda[3], " 0") === "") {
return "{$moda[0]}{$suffix}"; return "{$prefix}{$moda[0]}{$suffix}";
} }
else if ($moda[0] === $moda[1] && trim((string)$moda[2], " 0") !== "" && trim((string)$moda[3], " 0") === "") { else if ($moda[0] === $moda[1] && trim((string)$moda[2], " 0") !== "" && trim((string)$moda[3], " 0") === "") {
setlocale(LC_TIME, NodaTimeAutotranslater::LANGS_TO_LOCALES['de']); setlocale(LC_TIME, NodaTimeAutotranslater::LANGS_TO_LOCALES['de']);
return strftime(getMonthFormatByLang("de"), MD_STD::strtotime("{$moda[0]}-{$moda[2]}-01 01:01:01")) . $suffix; return $prefix . strftime(getMonthFormatByLang("de"), MD_STD::strtotime("{$moda[0]}-{$moda[2]}-01 01:01:01")) . $suffix;
} }
return ""; return "";
@ -216,6 +226,14 @@ final class NodaTimeSplitter {
*/ */
public static function timePartsToCountingYear(array $moda):int { public static function timePartsToCountingYear(array $moda):int {
if ($moda[0] === "?") {
return intval($moda[1]);
}
if ($moda[1] === "?") {
return intval($moda[0]);
}
return abs((int)ceil(intval($moda[1]) - ((intval($moda[1]) - intval($moda[0])) / 2))); return abs((int)ceil(intval($moda[1]) - ((intval($moda[1]) - intval($moda[0])) / 2)));
} }
@ -292,22 +310,22 @@ final class NodaTimeSplitter {
if (preg_match("/^[0-9][0-9][0-9][0-9](\-|\/)[0-9][0-9][0-9][0-9] v\. Chr\.$/", $datum)) { if (preg_match("/^[0-9][0-9][0-9][0-9](\-|\/)[0-9][0-9][0-9][0-9] v\. Chr\.$/", $datum)) {
$start = "-" . substr($datum, 0, 4); $start = "-" . substr($datum, 0, 4);
$end = "-" . substr($datum, 5, 4); $end = "-" . substr($datum, 5, 4);
return [$start, $end, "00", "00", "-"]; return [$start, $end, "00", "00", "-", ""];
} }
if (preg_match("/^[0-9][0-9][0-9][0-9](\-|\/)[0-9][0-9][0-9] v\. Chr\.$/", $datum)) { if (preg_match("/^[0-9][0-9][0-9][0-9](\-|\/)[0-9][0-9][0-9] v\. Chr\.$/", $datum)) {
$start = "-" . substr($datum, 0, 4); $start = "-" . substr($datum, 0, 4);
$end = "-" . substr($datum, 5, 3); $end = "-" . substr($datum, 5, 3);
return [$start, $end, "00", "00", "-"]; return [$start, $end, "00", "00", "-", ""];
} }
if (preg_match("/^[0-9][0-9][0-9](\-|\/)[0-9][0-9][0-9] v\. Chr\.$/", $datum)) { if (preg_match("/^[0-9][0-9][0-9](\-|\/)[0-9][0-9][0-9] v\. Chr\.$/", $datum)) {
$start = "-" . substr($datum, 0, 3); $start = "-" . substr($datum, 0, 3);
$end = "-" . substr($datum, 4, 3); $end = "-" . substr($datum, 4, 3);
return [$start, $end, "00", "00", "-"]; return [$start, $end, "00", "00", "-", ""];
} }
if (preg_match("/^[0-9][0-9](\-|\/)[0-9][0-9] v\. Chr\.$/", $datum)) { if (preg_match("/^[0-9][0-9](\-|\/)[0-9][0-9] v\. Chr\.$/", $datum)) {
$start = "-00" . substr($datum, 0, 2); $start = "-00" . substr($datum, 0, 2);
$end = "-00" . substr($datum, 3, 2); $end = "-00" . substr($datum, 3, 2);
return [$start, $end, "00", "00", "-"]; return [$start, $end, "00", "00", "-", ""];
} }
if (self::stri_occurs($datum, self::STOP_STRINGS_GERMAN)) { if (self::stri_occurs($datum, self::STOP_STRINGS_GERMAN)) {
@ -333,10 +351,10 @@ final class NodaTimeSplitter {
if (self::is_numeric((string)substr($datum, -4))) $year = substr($datum, -4); if (self::is_numeric((string)substr($datum, -4))) $year = substr($datum, -4);
if (!empty($year) and !empty($monat) and !empty($day) and $use_day) { if (!empty($year) and !empty($monat) and !empty($day) and $use_day) {
return [$year, $year, $monat, $day, '+']; return [$year, $year, $monat, $day, '+', ""];
} }
else if (!empty($year) and !empty($monat)) { else if (!empty($year) and !empty($monat)) {
return [$year, $year, $monat, "00", '+']; return [$year, $year, $monat, "00", '+', ""];
} }
return []; return [];
@ -390,10 +408,10 @@ final class NodaTimeSplitter {
if (self::is_numeric((string)substr($datum, 0, 4))) $year = substr($datum, 0, 4); if (self::is_numeric((string)substr($datum, 0, 4))) $year = substr($datum, 0, 4);
if (!empty($year) and !empty($monat) and !empty($day)) { if (!empty($year) and !empty($monat) and !empty($day)) {
return [$year, $year, $monat, $day, '+']; return [$year, $year, $monat, $day, '+', ""];
} }
else if (!empty($year) and !empty($monat)) { else if (!empty($year) and !empty($monat)) {
return [$year, $year, $monat, "00", '+']; return [$year, $year, $monat, "00", '+', ""];
} }
return []; return [];
@ -414,7 +432,7 @@ final class NodaTimeSplitter {
return []; return [];
} }
return [date("Y", $timeInt), date("m", $timeInt), date("d", $timeInt), '+']; return [date("Y", $timeInt), date("m", $timeInt), date("d", $timeInt), '+', ""];
} }
@ -432,72 +450,105 @@ final class NodaTimeSplitter {
if (preg_match("/^[0-9][0-9][0-9][0-9](\-|\/)[0-9][0-9][0-9][0-9]$/", $datum)) { if (preg_match("/^[0-9][0-9][0-9][0-9](\-|\/)[0-9][0-9][0-9][0-9]$/", $datum)) {
$start = substr($datum, 0, 4); $start = substr($datum, 0, 4);
$end = substr($datum, -4); $end = substr($datum, -4);
return [$start, $end, "00", "00", "+"]; return [$start, $end, "00", "00", "+", ""];
} }
if (preg_match("/^[0-9][0-9]\.[0-9]\.[0-9][0-9][0-9][0-9]$/", $datum)) { // German T.MM.JJJJ if (preg_match("/^[0-9][0-9]\.[0-9]\.[0-9][0-9][0-9][0-9]$/", $datum)) { // German T.MM.JJJJ
$start = substr($datum, 5, 4); $start = substr($datum, 5, 4);
$month = "0" . substr($datum, 3, 1); $month = "0" . substr($datum, 3, 1);
$day = substr($datum, 0, 2); $day = substr($datum, 0, 2);
return [$start, $start, $month, $day, "+"]; return [$start, $start, $month, $day, "+", ""];
} }
if (preg_match("/^[0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9]$/", $datum)) { // German T.MM.JJJJ if (preg_match("/^[0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9]$/", $datum)) { // German T.MM.JJJJ
$start = substr($datum, 5, 4); $start = substr($datum, 5, 4);
$month = substr($datum, 2, 2); $month = substr($datum, 2, 2);
$day = "0" . substr($datum, 0, 1); $day = "0" . substr($datum, 0, 1);
return [$start, $start, $month, $day, "+"]; return [$start, $start, $month, $day, "+", ""];
} }
if (preg_match("/^[0-9]\.[0-9]\.[0-9][0-9][0-9][0-9]$/", $datum)) { // German T.M.JJJJ if (preg_match("/^[0-9]\.[0-9]\.[0-9][0-9][0-9][0-9]$/", $datum)) { // German T.M.JJJJ
$start = substr($datum, 4, 4); $start = substr($datum, 4, 4);
$month = "0" . substr($datum, 2, 1); $month = "0" . substr($datum, 2, 1);
$day = "0" . substr($datum, 0, 1); $day = "0" . substr($datum, 0, 1);
return [$start, $start, $month, $day, "+"]; return [$start, $start, $month, $day, "+", ""];
} }
if (preg_match("/^[0-9][0-9]\.[0-9][0-9][0-9][0-9]$/", $datum)) { // German Y-m if (preg_match("/^[0-9][0-9]\.[0-9][0-9][0-9][0-9]$/", $datum)) { // German Y-m
$start = substr($datum, 3, 4); $start = substr($datum, 3, 4);
$month = substr($datum, 0, 2); $month = substr($datum, 0, 2);
return [$start, $start, $month, "00", "+"]; return [$start, $start, $month, "00", "+", ""];
} }
if (preg_match("/^[0-9]\.[0-9][0-9][0-9][0-9]$/", $datum)) { // German Y-m if (preg_match("/^[0-9]\.[0-9][0-9][0-9][0-9]$/", $datum)) { // German Y-m
$start = substr($datum, 2, 4); $start = substr($datum, 2, 4);
$month = "0" . substr($datum, 0, 1); $month = "0" . substr($datum, 0, 1);
return [$start, $start, $month, "00", "+"]; return [$start, $start, $month, "00", "+", ""];
} }
if (preg_match("/^[0-9][0-9][0-9][0-9]\.[0-9][0-9]$/", $datum)) { // Hungarian Y-m if (preg_match("/^[0-9][0-9][0-9][0-9]\.[0-9][0-9]$/", $datum)) { // Hungarian Y-m
$start = substr($datum, 0, 4); $start = substr($datum, 0, 4);
$month = substr($datum, 5, 2); $month = substr($datum, 5, 2);
return [$start, $start, $month, "00", "+"]; return [$start, $start, $month, "00", "+", ""];
} }
if (preg_match("/^[0-9][0-9][0-9][0-9]\-[0-9][0-9]$/", $datum)) { // Hungarian Y-m if (preg_match("/^[0-9][0-9][0-9][0-9]\-[0-9][0-9]$/", $datum)) { // Time spans: 1945-46
$start = substr($datum, 0, 4); $start = substr($datum, 0, 4);
$endDigits = substr($datum, 5, 2); $endDigits = substr($datum, 5, 2);
if (intval($endDigits) > 12) return [$start, substr($datum, 0, 2) . $endDigits, "00", "00", "+"]; if (intval($endDigits) > 12) return [$start, substr($datum, 0, 2) . $endDigits, "00", "00", "+", ""];
} }
if (preg_match("/^01\.01\.[0-9][0-9][0-9][0-9]\-31\.12\.[0-9][0-9][0-9][0-9]$/", $datum)) { // Hungarian Y-m if (preg_match("/^01\.01\.[0-9][0-9][0-9][0-9]\-31\.12\.[0-9][0-9][0-9][0-9]$/", $datum)) { // Hungarian Y-m
$start = substr($datum, 6, 4); $start = substr($datum, 6, 4);
$end = substr($datum, -4); $end = substr($datum, -4);
return [$start, $end, "00", "00", "+"]; return [$start, $end, "00", "00", "+", ""];
} }
if (preg_match("/^[0-9][0-9][0-9]\-[0-9][0-9][0-9]$/", $datum)) { // Hungarian Y-m if (preg_match("/^[0-9][0-9][0-9]\-[0-9][0-9][0-9]$/", $datum)) { // Hungarian Y-m
$start = substr($datum, 0, 3); $start = substr($datum, 0, 3);
$end = substr($datum, -3); $end = substr($datum, -3);
if ($end > $start) return ["0" . $start, "0" . $end, "00", "00", "+"]; if ($end > $start) return ["0" . $start, "0" . $end, "00", "00", "+", ""];
} }
if (preg_match("/^[0-9][0-9]\-[0-9][0-9]$/", $datum)) { // Hungarian Y-m if (preg_match("/^[0-9][0-9]\-[0-9][0-9]$/", $datum)) { // 20-40 (n. Chr.)
$start = substr($datum, 0, 2); $start = substr($datum, 0, 2);
$end = substr($datum, -2); $end = substr($datum, -2);
if ($end > $start) return ["00" . $start, "00" . $end, "00", "00", "+"]; if ($end > $start) return ["00" . $start, "00" . $end, "00", "00", "+", ""];
} }
if (preg_match("/^[0-9][0-9][0-9][0-9]$/", $datum)) { if (preg_match("/^[0-9][0-9][0-9][0-9]$/", $datum)) {
$start = substr($datum, 0, 4); $start = substr($datum, 0, 4);
return [$start, $start, "00", "00", "+"]; return [$start, $start, "00", "00", "+", ""];
} }
return []; return [];
} }
/**
* Checks if an input date is an incomplete date: Before 1920, after 1930.
*
* @param string $datum Input date.
*
* @return array<string>
*/
public static function is_incomplete_date(string $datum):array {
if (preg_match("/^(Ab|Seit|seit)\ /", $datum)) {
if (($spacePos = strpos($datum, " ")) === false) {
return [];
}
if ($output = self::attempt_splitting(substr($datum, $spacePos))) {
$output[1] = "?";
return $output;
}
}
if (preg_match("/^(Bis|bis)\ /", $datum)) {
if (($spacePos = strpos($datum, " ")) === false) {
return [];
}
if ($output = self::attempt_splitting(substr($datum, $spacePos))) {
$output[0] = "?";
return $output;
}
}
return [];
}
/** /**
* Wrapper to check if any splitting command works. * Wrapper to check if any splitting command works.
* *
@ -508,6 +559,7 @@ final class NodaTimeSplitter {
public static function attempt_splitting(string $datum):array { public static function attempt_splitting(string $datum):array {
$moda = NodaTimeSplitter::is_timespan($datum); $moda = NodaTimeSplitter::is_timespan($datum);
if (!$moda) $moda = NodaTimeSplitter::is_incomplete_date($datum);
if (!$moda) $moda = NodaTimeSplitter::is_valid_date($datum); if (!$moda) $moda = NodaTimeSplitter::is_valid_date($datum);
if (!$moda) $moda = NodaTimeSplitter::is_valid_date_hungarian($datum); if (!$moda) $moda = NodaTimeSplitter::is_valid_date_hungarian($datum);