From 8a4a8f7ed8489f68eb6871210746d3e84d539d0c Mon Sep 17 00:00:00 2001 From: Joshua Ramon Enslin Date: Sun, 4 Oct 2020 23:20:58 +0200 Subject: [PATCH] Split more variations of dots in dates, century ranges --- src/NodaTimeSplitter.php | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/src/NodaTimeSplitter.php b/src/NodaTimeSplitter.php index e9f2aeb..a87bd57 100644 --- a/src/NodaTimeSplitter.php +++ b/src/NodaTimeSplitter.php @@ -657,13 +657,13 @@ final class NodaTimeSplitter { $datum = self::clean_input($datum); - if (preg_match("/^[0-9]{4}\.[0-9]{2}\.[0-9]{2}\-$/", $datum)) { // Hungarian Y-m + if (preg_match("/^[0-9]{4}\.[0-9]{2}\.[0-9]{2}(\.|)\-$/", $datum)) { // Hungarian Y-m $start = substr($datum, 0, 4); $month = substr($datum, 5, 2); $day = substr($datum, 8, 2); return [$start, "?", $month, $day, "+", ""]; } - if (preg_match("/^[0-9]{4}\.[0-9]{2}\-$/", $datum)) { // Hungarian Y-m + if (preg_match("/^[0-9]{4}\.[0-9]{2}(\.|)\-$/", $datum)) { // Hungarian Y-m $start = substr($datum, 0, 4); $month = substr($datum, 5, 2); return [$start, "?", $month, "00", "+", ""]; @@ -707,7 +707,7 @@ final class NodaTimeSplitter { } } - if (preg_match("/^[0-9]{4}\ (\(nach)/", $datum)) { + if (preg_match("/\ (\(nach|nach)$/", $datum)) { if (($spacePos = strpos($datum, " ")) === false) { return []; } @@ -738,6 +738,19 @@ final class NodaTimeSplitter { } } + if (preg_match("/\ (\(vor|Vor)$/", $datum)) { + if (($spacePos = strpos($datum, " ")) === false) { + return []; + } + if ($output = self::attempt_splitting(substr($datum, 0, $spacePos))) { + + $output[0] = "?"; + if (empty(trim($output[2], "0 .,"))) $output[1] = strval((intval($output[1]) - 1)); + $output[5] = "Vor"; + return $output; + } + } + if (preg_match("/^(Ab|Seit|seit)\ /", $datum)) { if (($spacePos = strpos($datum, " ")) === false) { return []; @@ -843,12 +856,10 @@ final class NodaTimeSplitter { } // 17.-18. Jahrhundert - if (preg_match("/^[0-9]{2}\.\-[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) { - return self::negotiate_century_span_bce_ce(substr($datum, 0, 2), substr($datum, 4, 2)); - } - // 17-18. Jahrhundert - if (preg_match("/^[0-9]{2}\-[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) { - return self::negotiate_century_span_bce_ce(substr($datum, 0, 2), substr($datum, 3, 2)); + if (preg_match("/^[0-9]{2}(\.|)(|\ Jh\.||\ Jahrhundert||\ sz||\ század)\-[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) { + if (($dashPos = strpos($datum, "-")) !== false) { + return self::negotiate_century_span_bce_ce(substr($datum, 0, 2), substr($datum, $dashPos + 1, 2)); + } } // 1.-12. Jahrhundert @@ -860,8 +871,10 @@ final class NodaTimeSplitter { return self::negotiate_century_span_bce_ce(substr($datum, 0, 1), substr($datum, 3, 1)); } // 1-2. Jahrhundert - if (preg_match("/^[0-9]\-[0-9]\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) { - return self::negotiate_century_span_bce_ce(substr($datum, 0, 1), substr($datum, 2, 1)); + if (preg_match("/^[0-9](\.|)(|\ Jh\.||\ Jahrhundert||\ sz||\ század)\-[0-9]\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) { + if (($dashPos = strpos($datum, "-")) !== false) { + return self::negotiate_century_span_bce_ce(substr($datum, 0, 1), substr($datum, $dashPos + 1, 1)); + } } return [];