Cover more edge cases for splitting time names
This commit is contained in:
		| @@ -140,7 +140,7 @@ final class NodaTimeSplitter { | ||||
|         "decemberig", | ||||
|     ]; | ||||
|  | ||||
|     private const REGEX_CENTURIES = '(\ |)(Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)'; | ||||
|     private const REGEX_CENTURIES = '(\ |)(Jh|Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)'; | ||||
|     private const REGEX_DECADES = '(s|er|er\ Jahre|(\-|\ )es\ évek|(\-|\ )as\ \évek|\ évek|\-es\ években|\-ті)'; | ||||
|  | ||||
|     /** | ||||
| @@ -350,11 +350,20 @@ final class NodaTimeSplitter { | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if (\preg_match("/^[0-9][0-9][0-9][0-9]\ bis [0-9][0-9][0-9][0-9]$/", $datum)) { | ||||
|         if (\preg_match("/^[0-9]{4}\ bis\ [0-9]{4}$/", $datum)) { | ||||
|             $start = \substr($datum, 0, 4); | ||||
|             $end = \substr($datum, -4); | ||||
|             return new NodaSplitTime($start, $end); | ||||
|         } | ||||
|         if (\preg_match("/^[0-9]{4}\ (und|oder|od.)\ [0-9]{4}$/", $datum)) { | ||||
|             $start = \substr($datum, 0, 4); | ||||
|             $end = \substr($datum, -4); | ||||
|             $startInt = (int)$start; | ||||
|             $endInt = (int)$end; | ||||
|             if ($startInt === $endInt - 1) { | ||||
|                 return new NodaSplitTime($start, $end); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         $datum = \str_replace(". ", ".", $datum); | ||||
|  | ||||
| @@ -551,7 +560,7 @@ final class NodaTimeSplitter { | ||||
|         } | ||||
|  | ||||
|         // German TT.MM.JJJJ  /  TT.MM.JJJ  /  TT.MM.JJ  /  TT.MM.J | ||||
|         if (\preg_match("/^[0-9][0-9]\.[0-9][0-9]\.([0-9][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[0-9])$/", $datum)) { // German T.MM.JJJJ | ||||
|         if (\preg_match("/^[0-9]{2}\.[0-9]{2}\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ | ||||
|             $year = \substr($datum, 6, 4); | ||||
|             $month = \substr($datum, 3, 2); | ||||
|             $day   = \substr($datum, 0, 2); | ||||
| @@ -559,7 +568,7 @@ final class NodaTimeSplitter { | ||||
|         } | ||||
|  | ||||
|         // German TT.M.JJJJ  /  TT.M.JJJ  /  TT.M.JJ  /  TT.M.J | ||||
|         if (\preg_match("/^[0-9][0-9]\.[0-9]\.([0-9][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[0-9])$/", $datum)) { // German T.MM.JJJJ | ||||
|         if (\preg_match("/^[0-9]{2}\.[0-9]\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ | ||||
|             $year = \substr($datum, 5, 4); | ||||
|             $month = "0" . \substr($datum, 3, 1); | ||||
|             $day   = \substr($datum, 0, 2); | ||||
| @@ -707,35 +716,60 @@ final class NodaTimeSplitter { | ||||
|  | ||||
|         $datum = self::clean_input($datum); | ||||
|  | ||||
|         if (\preg_match("/^[0-9]{4}\.[0-9]{2}\.[0-9]{2}(\.|)\-$/", $datum)) { // YYYY.MM.DD. | ||||
|             $year = \substr($datum, 0, 4); | ||||
|             $month = \substr($datum, 5, 2); | ||||
|             $day = \substr($datum, 8, 2); | ||||
|         $inpDateWoSpaces = str_replace(" ", "", $datum); | ||||
|  | ||||
|         if (\preg_match("/^[0-9]{4}\.[0-9]{2}\.[0-9]{2}(\.|)\-$/", $inpDateWoSpaces)) { // YYYY.MM.DD. | ||||
|             $year = \substr($inpDateWoSpaces, 0, 4); | ||||
|             $month = \substr($inpDateWoSpaces, 5, 2); | ||||
|             $day = \substr($inpDateWoSpaces, 8, 2); | ||||
|             return NodaSplitTime::genExactDate($year, $month, $day, NodaTimeBeforeAfterIndicator::since); | ||||
|         } | ||||
|         if (\preg_match("/^[0-9]{4}\.[0-9]{2}(\.|)\-$/", $datum)) { // YYYY.MM.- | ||||
|             $start = \substr($datum, 0, 4); | ||||
|             $month = \substr($datum, 5, 2); | ||||
|         if (\preg_match("/^[0-9]{4}\.[0-9]{2}(\.|)\-$/", $inpDateWoSpaces)) { // YYYY.MM.- | ||||
|             $start = \substr($inpDateWoSpaces, 0, 4); | ||||
|             $month = \substr($inpDateWoSpaces, 5, 2); | ||||
|             return new NodaSplitTime($start, '?', $month, before_after_indicator: NodaTimeBeforeAfterIndicator::since); | ||||
|         } | ||||
|         if (\preg_match("/^[0-9]{4}\-$/", $datum)) { // YYYY- | ||||
|             $start = \substr($datum, 0, 4); | ||||
|         if (\preg_match("/^[0-9]{4}\-$/", $inpDateWoSpaces)) { // YYYY- | ||||
|             $start = \substr($inpDateWoSpaces, 0, 4); | ||||
|             return new NodaSplitTime($start, '?', before_after_indicator: NodaTimeBeforeAfterIndicator::since); | ||||
|         } | ||||
|  | ||||
|         if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}\.[0-9]{2}$/", $datum)) { // Hungarian Y-m | ||||
|             $year = \substr($datum, 1, 4); | ||||
|             $month = \substr($datum, 6, 2); | ||||
|             $day = \substr($datum, 9, 2); | ||||
|         // ?.6.2024 | ||||
|         if (\preg_match("/^\?\.([0-9]|[0-9]{2})\.[0-9]{4}$/", $inpDateWoSpaces)) { // German Y-m | ||||
|             $year = \substr($inpDateWoSpaces, -4); | ||||
|             $month = trim(\substr($inpDateWoSpaces, 2, 2), '. '); | ||||
|             return new NodaSplitTime($year, $year, $month); | ||||
|         } | ||||
|  | ||||
|         // ?.?.2024 | ||||
|         if (\preg_match("/^\?\.\?\.[0-9]{4}$/", $inpDateWoSpaces)) { // German Y-m | ||||
|             $year = \substr($inpDateWoSpaces, -4); | ||||
|             return new NodaSplitTime($year, $year); | ||||
|         } | ||||
|  | ||||
|         if (\preg_match("/^[0-9]{4}$/", \trim($inpDateWoSpaces, '. ?!()[]X'))) { // German Y-m | ||||
|             $year = \trim($inpDateWoSpaces, '. ?!()[]X'); | ||||
|             return new NodaSplitTime($year, $year); | ||||
|         } | ||||
|  | ||||
|         if (\preg_match("/^[0-9]{4}$/", \strtr($inpDateWoSpaces, ['-0' => '', '0-' => '', 'o' => '0']))) { // German Y-m | ||||
|             $year = \strtr($inpDateWoSpaces, ['-0' => '', '0-' => '', 'o' => '0']); | ||||
|             return new NodaSplitTime($year, $year); | ||||
|         } | ||||
|  | ||||
|         if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}\.[0-9]{2}$/", $inpDateWoSpaces)) { // Hungarian Y-m | ||||
|             $year = \substr($inpDateWoSpaces, 1, 4); | ||||
|             $month = \substr($inpDateWoSpaces, 6, 2); | ||||
|             $day = \substr($inpDateWoSpaces, 9, 2); | ||||
|             return NodaSplitTime::genExactDate($year, $month, $day, NodaTimeBeforeAfterIndicator::until); | ||||
|         } | ||||
|         if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}$/", $datum)) { // Hungarian Y-m | ||||
|             $year = \substr($datum, 1, 4); | ||||
|             $month = \substr($datum, 6, 2); | ||||
|         if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}$/", $inpDateWoSpaces)) { // Hungarian Y-m | ||||
|             $year = \substr($inpDateWoSpaces, 1, 4); | ||||
|             $month = \substr($inpDateWoSpaces, 6, 2); | ||||
|             return new NodaSplitTime('?', $year, $month, before_after_indicator: NodaTimeBeforeAfterIndicator::until); | ||||
|         } | ||||
|         if (\preg_match("/^\-[0-9]{4}$/", $datum)) { // Hungarian -Y | ||||
|             $year = \substr($datum, 1, 4); | ||||
|         if (\preg_match("/^\-[0-9]{4}$/", $inpDateWoSpaces)) { // Hungarian -Y | ||||
|             $year = \substr($inpDateWoSpaces, 1, 4); | ||||
|             return new NodaSplitTime('?', $year, before_after_indicator: NodaTimeBeforeAfterIndicator::until); | ||||
|         } | ||||
|  | ||||
| @@ -907,7 +941,7 @@ final class NodaTimeSplitter { | ||||
|         } | ||||
|  | ||||
|         // 1. Jahrhundert | ||||
|         if (\preg_match("/^[0-9]\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) { | ||||
|         if (\preg_match("/^[0-9]\.\ (Jh\|Jh\.|Jahrhundert|sz|század)$/", $datum)) { | ||||
|             if ($centuryNo = \intval(\substr($datum, 0, 1))) { | ||||
|                 $centuryNo--; | ||||
|                 return new NodaSplitTime((string)$centuryNo . "01", \strval($centuryNo + 1) . '00'); | ||||
| @@ -915,7 +949,7 @@ final class NodaTimeSplitter { | ||||
|         } | ||||
|  | ||||
|         // 17.-18. Jahrhundert | ||||
|         if (\preg_match("/^[0-9]{2}(\.|)(|\ Jh\.||\ Jahrhundert||\ sz||\ század)(\-|\/)[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) { | ||||
|         if (\preg_match("/^[0-9]{2}(\.|)(|\ Jh|\ Jh\.|\ Jahrhundert|\ sz|\ század)(\-|\/)[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) { | ||||
|             if (\strpos($datum, '/') !== false) { | ||||
|                 $datum = str_replace('/', '-', $datum); | ||||
|             } | ||||
| @@ -1099,6 +1133,33 @@ final class NodaTimeSplitter { | ||||
|             return $reconstituted; | ||||
|         } | ||||
|  | ||||
|         // German T.-T.MM.JJJJ / T.-T.MM.JJJ / T.-T.MM.JJ / T.-T.MM.J | ||||
|         if (\preg_match("/^[0-9].\-[0-9]\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ | ||||
|             $year = \substr($datum, -4); | ||||
|             $month = trim(\substr($datum, -7, 2), '.'); | ||||
|             $day   = '0' . \substr($datum, 3, 1); | ||||
|             $firstday   = '0' . \substr($datum, 0, 1); | ||||
|             return "$firstday.$month.$year-$day.$month.$year"; | ||||
|         } | ||||
|  | ||||
|         // German T.-TT.MM.JJJJ / T.-TT.MM.JJJ / T.-TT.MM.JJ / T.-TT.MM.J | ||||
|         if (\preg_match("/^[0-9].\-[0-9]{2}\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ | ||||
|             $year = \substr($datum, -4); | ||||
|             $month = trim(\substr($datum, -7, 2), '.'); | ||||
|             $day   = \substr($datum, 3, 2); | ||||
|             $firstday   = '0' . \substr($datum, 0, 1); | ||||
|             return "$firstday.$month.$year-$day.$month.$year"; | ||||
|         } | ||||
|  | ||||
|         // German TT.-TT.MM.JJJJ  /  TT.-TT.MM.JJJ  /  TT.-TT.MM.JJ  /  TT.-TT.MM.J | ||||
|         if (\preg_match("/^[0-9]{2}.\-[0-9]{2}\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ | ||||
|             $year = \substr($datum, -4); | ||||
|             $month = trim(\substr($datum, -7, 2), '.'); | ||||
|             $day   = \substr($datum, 4, 2); | ||||
|             $firstday   = \substr($datum, 0, 2); | ||||
|             return "$firstday.$month.$year-$day.$month.$year"; | ||||
|         } | ||||
|  | ||||
|         // 17-19. Jahrhundert | ||||
|         if (\preg_match("/^[0-9]{2}(\.|)\-[0-9]{2}(\.|)" . self::REGEX_CENTURIES . "$/", $datum)) { | ||||
|             $parts = explode('-', $datum); | ||||
| @@ -1225,7 +1286,7 @@ final class NodaTimeSplitter { | ||||
|      */ | ||||
|     private static function _runBasicNameCleanup(string $input):string { | ||||
|  | ||||
|         $input = trim(trim($input), ',;'); | ||||
|         $input = ltrim(trim(trim($input), ',;'), ' .'); | ||||
|  | ||||
|         // Clean away duplicate inputs | ||||
|         // 1440-1440 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user