From f3831965a3c4f3a8478fd037b3b6700227b647e1 Mon Sep 17 00:00:00 2001 From: Joshua Ramon Enslin Date: Sat, 9 Dec 2023 23:54:45 +0100 Subject: [PATCH] Extend NodaTimeSplitter --- src/NodaTimeSplitter.php | 27 ++++++++++++++++--------- tests/NodaTimeSplitterTest.php | 36 ++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 9 deletions(-) diff --git a/src/NodaTimeSplitter.php b/src/NodaTimeSplitter.php index 09628a4..d48b8ed 100644 --- a/src/NodaTimeSplitter.php +++ b/src/NodaTimeSplitter.php @@ -58,6 +58,8 @@ final class NodaTimeSplitter { const STRINGS_TO_CLEAN = [ "között" => "", + " рр." => "", + " рр" => "", "nach Christus" => "", "n. Christus" => "", "nach Chr." => "", @@ -69,6 +71,7 @@ final class NodaTimeSplitter { "BCE" => "v. Chr.", "CE" => "", "vor Christus" => "v. Chr.", + " до н. е." => "v. Chr.", ]; const STRINGS_TO_CLEAN_START = [ @@ -133,6 +136,7 @@ final class NodaTimeSplitter { ]; private const REGEX_CENTURIES = '(\ |)(Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)'; + private const REGEX_DECADES = '(s|er|er\ Jahre|(\-|\ )es\ évek|(\-|\ )as\ \évek|\ évek|\-es\ években|\-ті)'; /** * Cleans input strings by trimming obsolete stuff. @@ -555,10 +559,6 @@ final class NodaTimeSplitter { } } - if ($datum === '1978. július 7 elött') { - throw new Exception(var_export($monat, true)); - } - if (!empty($monat) && empty($day) && preg_match('~[0-9]+~', substr($datum, -3))) { return []; } @@ -864,14 +864,17 @@ final class NodaTimeSplitter { } } - if (\preg_match("/\ (\(vor|\(Vor|vor)$/", $datum)) { - if (($spacePos = \strpos($datum, " ")) === false) { + if (\preg_match("/\ (\(vor|\(Vor|vor|előtt)$/", $datum)) { + if (($spacePos = \strrpos($datum, " ")) === false) { return []; } if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) { $output[0] = "?"; - if (empty(trim($output[2], "0 .,"))) $output[1] = \strval(\intval($output[1]) - 1); + // If month and day are unknown, lower end year by one + if (empty(trim($output[2], "0 .,"))) { + $output[1] = \strval(\intval($output[1]) - 1); + } $output[5] = "Vor"; return $output; } @@ -951,6 +954,12 @@ final class NodaTimeSplitter { return $output; } } + if (str_ends_with($datum, '-as évekig') || str_ends_with($datum, '-es évekig')) { + if ($output = self::attempt_splitting(\substr($datum, 0, -2))) { + $output[0] = "?"; + return $output; + } + } return []; @@ -1054,14 +1063,14 @@ final class NodaTimeSplitter { $bcBceIndicator = '+'; // 20er Jahre - if (\preg_match("/^[0-9]0(er|er\ Jahre|\-es\ évek|\-as\ \évek)$/", $datum)) { + if (\preg_match("/^[0-9]0" . self::REGEX_DECADES . "$/", $datum)) { $start = "19" . \substr($datum, 0, 2); $ende = (string)(\intval($start) + 9); return [$start, $ende, "00", "00", $bcBceIndicator, ""]; } // 1920er Jahre - if (\preg_match("/^[0-9]{3}0(s|er|er\ Jahre|(\-|\ )es\ évek|(\-|\ )as\ \évek)$/", $datum)) { + if (\preg_match("/^[0-9]{3}0" . self::REGEX_DECADES . "$/", $datum)) { $start = \substr($datum, 0, 4); $ende = (string)(\intval($start) + 9); return [$start, $ende, "00", "00", $bcBceIndicator, ""]; diff --git a/tests/NodaTimeSplitterTest.php b/tests/NodaTimeSplitterTest.php index 1ae5577..a9b4583 100644 --- a/tests/NodaTimeSplitterTest.php +++ b/tests/NodaTimeSplitterTest.php @@ -500,6 +500,18 @@ final class NodaTimeSplitterTest extends TestCase { self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "02.01.2020"); self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020); + $output = NodaTimeSplitter::attempt_splitting("2020. januar. 2."); + self::assertEquals($output, [ + 0 => "2020", + 1 => "2020", + 2 => "01", + 3 => "02", + 4 => "+", + 5 => "", + ]); + self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "02.01.2020"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020); + $output = NodaTimeSplitter::attempt_splitting("2020.01.2."); self::assertEquals($output, [ 0 => "2020", @@ -609,6 +621,18 @@ final class NodaTimeSplitterTest extends TestCase { self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "1920-1929"); self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1925); + $output = NodaTimeSplitter::attempt_splitting("1920-es évekig"); + self::assertEquals($output, [ + 0 => "?", + 1 => "1929", + 2 => "00", + 3 => "00", + 4 => "+", + 5 => "", + ]); + self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Bis 1929"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1929); + $output = NodaTimeSplitter::attempt_splitting("2020. Januar"); self::assertEquals([ 0 => "2020", @@ -768,6 +792,18 @@ final class NodaTimeSplitterTest extends TestCase { self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Bis 1900"); self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1900); + $output = NodaTimeSplitter::attempt_splitting("2020. december előtt"); + self::assertEquals($output, [ + 0 => "?", + 1 => "2020", + 2 => "12", + 3 => "00", + 4 => "+", + 5 => "Vor", + ]); + self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Vor Dezember 2020"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020); + } /**