Fix bug in German dates like "1 November 1921"

This commit is contained in:
Joshua Ramon Enslin 2022-09-08 16:28:27 +02:00
parent 6f41ffeb9f
commit c8d0292ca8
Signed by: jrenslin
GPG Key ID: 46016F84501B70AE
2 changed files with 43 additions and 11 deletions

View File

@ -428,6 +428,10 @@ final class NodaTimeSplitter {
if (strlen($datum) <= 9) $use_day = false; if (strlen($datum) <= 9) $use_day = false;
else $use_day = true; else $use_day = true;
if (self::is_numeric((string)\substr($datum, -4))) $year = \substr($datum, -4);
// Further code requires a year to be present, skip if none is set
if (empty($year)) return [];
foreach (self::MONTH_NAMES_ENGLISH as $monthVal => $monthValidNames) { foreach (self::MONTH_NAMES_ENGLISH as $monthVal => $monthValidNames) {
if (self::stri_occurs($datum, $monthValidNames)) { if (self::stri_occurs($datum, $monthValidNames)) {
if (!empty($monat)) break; if (!empty($monat)) break;
@ -444,10 +448,12 @@ final class NodaTimeSplitter {
if (empty($monat) and self::is_numeric((string)\substr($datum, 3, 2))) $monat = \substr($datum, 3, 2); if (empty($monat) and self::is_numeric((string)\substr($datum, 3, 2))) $monat = \substr($datum, 3, 2);
if (self::is_numeric((string)\substr($datum, 0, 2))) $day = \substr($datum, 0, 2); if (self::is_numeric((string)\substr($datum, 0, 2))) {
else if (\substr($datum, 1, 1) === "." and self::is_numeric((string)\substr($datum, 0, 1))) $day = "0" . \substr($datum, 0, 1); $day = \substr($datum, 0, 2);
}
if (self::is_numeric((string)\substr($datum, -4))) $year = \substr($datum, -4); else if (\in_array(\substr($datum, 1, 1), [".", " "]) && self::is_numeric((string)\substr($datum, 0, 1))) {
$day = "0" . \substr($datum, 0, 1);
}
if (!empty($year) and !empty($monat) and !empty($day) and $use_day) { if (!empty($year) and !empty($monat) and !empty($day) and $use_day) {
return [$year, $year, $monat, $day, '+', ""]; return [$year, $year, $monat, $day, '+', ""];
@ -493,8 +499,19 @@ final class NodaTimeSplitter {
return []; return [];
} }
//
// Rest: Only those entries, where there are spelled out months
//
if (strlen($datum) <= 9) return []; if (strlen($datum) <= 9) return [];
// The year is only parse-able if it is a four digit year at the start
if (self::is_numeric((string)\substr($datum, 0, 4))) {
$year = \substr($datum, 0, 4);
}
// Further code requires a year to be present, skip if none is set
if (empty($year)) return [];
foreach (self::MONTH_NAMES_HUNGARIAN as $monthVal => $monthValidNames) { foreach (self::MONTH_NAMES_HUNGARIAN as $monthVal => $monthValidNames) {
if (self::stri_occurs($datum, $monthValidNames)) { if (self::stri_occurs($datum, $monthValidNames)) {
if (!empty($monat)) return []; if (!empty($monat)) return [];
@ -511,14 +528,14 @@ final class NodaTimeSplitter {
if (empty($day)) $day = self::validateDateSubstr($datum, -5, 2); if (empty($day)) $day = self::validateDateSubstr($datum, -5, 2);
if (empty($day)) $day = self::validateDateSubstr($datum, -6, 2); if (empty($day)) $day = self::validateDateSubstr($datum, -6, 2);
if (empty($day)) {
if (\substr($datum, -2, 1) === " " and self::is_numeric((string)\substr($datum, -1, 1))) { if (\substr($datum, -2, 1) === " " and self::is_numeric((string)\substr($datum, -1, 1))) {
$day = "0" . \substr($datum, -1, 1); $day = "0" . \substr($datum, -1, 1);
} }
else if (empty($day) and \substr($datum, -3, 1) === " " and self::is_numeric((string)\substr($datum, -2, 1))) { else if (\substr($datum, -3, 1) === " " and self::is_numeric((string)\substr($datum, -2, 1))) {
$day = "0" . \substr($datum, -2, 1); $day = "0" . \substr($datum, -2, 1);
} }
}
if (self::is_numeric((string)\substr($datum, 0, 4))) $year = \substr($datum, 0, 4);
if (!empty($year) and !empty($monat) and !empty($day)) { if (!empty($year) and !empty($monat) and !empty($day)) {
return [$year, $year, $monat, $day, '+', ""]; return [$year, $year, $monat, $day, '+', ""];

View File

@ -374,6 +374,18 @@ final class NodaTimeSplitterTest extends TestCase {
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Bis 1801 v. Chr."); self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Bis 1801 v. Chr.");
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1801); self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1801);
$output = NodaTimeSplitter::attempt_splitting("6 November 1978");
self::assertEquals($output, [
0 => "1978",
1 => "1978",
2 => "11",
3 => "06",
4 => "+",
5 => "",
]);
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "06.11.1978");
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1978);
} }
/** /**
@ -603,6 +615,9 @@ final class NodaTimeSplitterTest extends TestCase {
$output = NodaTimeSplitter::attempt_splitting("2022-10-40"); $output = NodaTimeSplitter::attempt_splitting("2022-10-40");
self::assertEquals($output, []); self::assertEquals($output, []);
$output = NodaTimeSplitter::attempt_splitting("6;November 1978");
self::assertEquals($output, []);
# $output = NodaTimeSplitter::attempt_splitting("Nach 1944-1964"); # $output = NodaTimeSplitter::attempt_splitting("Nach 1944-1964");
# self::assertEquals($output, []); # self::assertEquals($output, []);