From 4a49c7a4e71250ed88f7a8673675afae2bd0a791 Mon Sep 17 00:00:00 2001 From: Joshua Ramon Enslin Date: Sat, 9 Dec 2023 11:56:15 +0100 Subject: [PATCH] Fix erroneous splitting of Hungarian time names with additions --- src/NodaTimeSplitter.php | 39 ++++++++++++++++++++-------- tests/NodaTimeSplitterTest.php | 46 ++++++++++++++++++++++++++++++++-- 2 files changed, 73 insertions(+), 12 deletions(-) diff --git a/src/NodaTimeSplitter.php b/src/NodaTimeSplitter.php index 32ec0e6..09628a4 100644 --- a/src/NodaTimeSplitter.php +++ b/src/NodaTimeSplitter.php @@ -42,7 +42,7 @@ final class NodaTimeSplitter { ]; const MONTH_NAMES_HUNGARIAN = [ - "01" => ['január', 'jan'], + "01" => ['január', 'januar', 'jan'], "02" => ['február', 'feb'], "03" => ['március', 'mar.', 'már.'], "04" => ['április', 'apr.', 'ápr.'], @@ -521,20 +521,25 @@ final class NodaTimeSplitter { // Skip, if dates are too long and do not contain spaces (= no translatable names) if (str_contains($datum, " ") === false && strlen($datum) > 12) return []; + $unparsed = trim(strtolower(str_replace($year, '', $datum)), ' ,.'); foreach (self::MONTH_NAMES_HUNGARIAN as $monthVal => $monthValidNames) { if (self::stri_occurs($datum, $monthValidNames)) { $monat = (string)$monthVal; + foreach ($monthValidNames as $name) { + $unparsed = str_replace($name, '', $unparsed); + } break; } } + if (strlen($unparsed) > 5) { + return []; + } + if (empty($monat) and self::is_numeric((string)\substr($datum, 5, 2))) $monat = \substr($datum, 5, 2); else if (empty($monat) and self::is_numeric((string)\substr($datum, 6, 2))) $monat = \substr($datum, 6, 2); // Last four characters must contain at least one space or one dot - $lastChars = substr($datum, -4); - if (str_contains($lastChars, '.') === false && str_contains($lastChars, ' ') === false) return []; - $day = self::validateDateSubstr($datum, -2); if (empty($day)) $day = self::validateDateSubstr($datum, -3, 2); if (empty($day)) $day = self::validateDateSubstr($datum, -4, 2); @@ -550,6 +555,14 @@ final class NodaTimeSplitter { } } + if ($datum === '1978. július 7 elött') { + throw new Exception(var_export($monat, true)); + } + + if (!empty($monat) && empty($day) && preg_match('~[0-9]+~', substr($datum, -3))) { + return []; + } + if (!empty($monat) and !empty($day)) { return [$year, $year, $monat, $day, '+', ""]; } @@ -932,6 +945,12 @@ final class NodaTimeSplitter { return $output; } } + if (!empty(\preg_match("/^[0-9]{4}ig$/", $datum))) { + if ($output = self::attempt_splitting(\substr($datum, 0, 4))) { + $output[0] = "?"; + return $output; + } + } return []; @@ -1102,7 +1121,7 @@ final class NodaTimeSplitter { // Hungarian year and month until month // 2005.01.-02. => 2005.01.-2005.02. - if ($inputLength === 12 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.\-[0-1][0-9]\.$/", $datum)) !== false) { + if ($inputLength === 12 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.\-[0-1][0-9]\.$/", $datum))) { $reconstituted = substr($datum, 0, 8) . '-'; $reconstituted .= substr($datum, 0, 4) . '.' . substr($datum, -3); return $reconstituted; @@ -1111,7 +1130,7 @@ final class NodaTimeSplitter { // Hungarian year and month until month without a dot after the first YYYY-MM // 2005.01-02. => 2005.01.-2005.02. - if (in_array($inputLength, [10, 11], true) && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\-[0-1][0-9](\.|)$/", $datum)) !== false) { + if (in_array($inputLength, [10, 11], true) && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\-[0-1][0-9](\.|)$/", $datum))) { $reconstituted = substr($datum, 0, 7) . '.-'; $reconstituted .= substr($datum, 0, 4) . '.' . substr(rtrim($datum, '.'), -2) . '.'; return $reconstituted; @@ -1120,7 +1139,7 @@ final class NodaTimeSplitter { // Hungarian year and month until month // 2005.01.01.-02.02. => 2005.01.01-2005.02.02. // 2005.01.01-02.02 => 2005.01.01-2005.02.02. - if ($inputLength >= 16 && $inputLength <= 18 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-1][0-9]\.[0-3][0-9](\.|)$/", $datum)) !== false) { + if ($inputLength >= 16 && $inputLength <= 18 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-1][0-9]\.[0-3][0-9](\.|)$/", $datum))) { $parts = explode('-', $datum); if (count($parts) !== 2) return ''; $reconstituted = substr($datum, 0, 10) . '.-'; @@ -1129,7 +1148,7 @@ final class NodaTimeSplitter { } // Hungarian; without trailing dots: YYYY.MM.DD-DD - if ($inputLength >= 13 && $inputLength <= 15 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-3][0-9](\.|)$/", $datum)) !== false) { + if ($inputLength >= 13 && $inputLength <= 15 && \preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-3][0-9](\.|)$/", $datum)) { $parts = explode('-', $datum); if (count($parts) !== 2) return ''; $reconstituted = substr($datum, 0, 10) . '.-'; @@ -1154,8 +1173,8 @@ final class NodaTimeSplitter { // If es évek / as évek is contained in the string (e.g. 1880-1990-es évek), there // will be more than one hyphens - if (MD_STD::stri_contains_any($datum, ['-as évek', '-es évek'])) { - return strtr($datum, ['-as évek' => ' as évek', '-es évek' => ' es évek']); + if (MD_STD::stri_contains_any($datum, ['-as évek', '-es-évek', '-es évek'])) { + return strtr($datum, ['-as évek' => ' as évek', '-es-évek' => ' es évek', '-es évek' => ' es évek']); } // 1981. július-augusztus > 1981.07-08 diff --git a/tests/NodaTimeSplitterTest.php b/tests/NodaTimeSplitterTest.php index 7711c28..1ae5577 100644 --- a/tests/NodaTimeSplitterTest.php +++ b/tests/NodaTimeSplitterTest.php @@ -610,14 +610,14 @@ final class NodaTimeSplitterTest extends TestCase { self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1925); $output = NodaTimeSplitter::attempt_splitting("2020. Januar"); - self::assertEquals($output, [ + self::assertEquals([ 0 => "2020", 1 => "2020", 2 => "01", 3 => "00", 4 => "+", 5 => "", - ]); + ], $output); self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Januar 2020"); self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020); @@ -729,6 +729,45 @@ final class NodaTimeSplitterTest extends TestCase { self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Bis 1801 v. Chr."); self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1801); + /* + * TODO + $output = NodaTimeSplitter::attempt_splitting("1900 előtt"); + self::assertEquals($output, [ + 0 => "?", + 1 => "1899", + 2 => "00", + 3 => "00", + 4 => "+", + 5 => "", + ]); + self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Vor 1900"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1899); + */ + + $output = NodaTimeSplitter::attempt_splitting("1900-ig"); + self::assertEquals($output, [ + 0 => "?", + 1 => "1900", + 2 => "00", + 3 => "00", + 4 => "+", + 5 => "", + ]); + self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Bis 1900"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1900); + + $output = NodaTimeSplitter::attempt_splitting("1900ig"); + self::assertEquals($output, [ + 0 => "?", + 1 => "1900", + 2 => "00", + 3 => "00", + 4 => "+", + 5 => "", + ]); + self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Bis 1900"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1900); + } /** @@ -881,6 +920,9 @@ final class NodaTimeSplitterTest extends TestCase { $output = NodaTimeSplitter::attempt_splitting("1978. július7"); self::assertEmpty($output); + $output = NodaTimeSplitter::attempt_splitting("1978. július 7 elött"); + self::assertEmpty($output); + $output = NodaTimeSplitter::attempt_splitting("Anfang September 1903"); self::assertEmpty($output);