diff --git a/src/NodaTimeSplitter.php b/src/NodaTimeSplitter.php index c946744..15ae72e 100644 --- a/src/NodaTimeSplitter.php +++ b/src/NodaTimeSplitter.php @@ -140,7 +140,7 @@ final class NodaTimeSplitter { "decemberig", ]; - private const REGEX_CENTURIES = '(\ |)(Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)'; + private const REGEX_CENTURIES = '(\ |)(Jh|Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)'; private const REGEX_DECADES = '(s|er|er\ Jahre|(\-|\ )es\ évek|(\-|\ )as\ \évek|\ évek|\-es\ években|\-ті)'; /** @@ -350,11 +350,20 @@ final class NodaTimeSplitter { } } - if (\preg_match("/^[0-9][0-9][0-9][0-9]\ bis [0-9][0-9][0-9][0-9]$/", $datum)) { + if (\preg_match("/^[0-9]{4}\ bis\ [0-9]{4}$/", $datum)) { $start = \substr($datum, 0, 4); $end = \substr($datum, -4); return new NodaSplitTime($start, $end); } + if (\preg_match("/^[0-9]{4}\ (und|oder|od.)\ [0-9]{4}$/", $datum)) { + $start = \substr($datum, 0, 4); + $end = \substr($datum, -4); + $startInt = (int)$start; + $endInt = (int)$end; + if ($startInt === $endInt - 1) { + return new NodaSplitTime($start, $end); + } + } $datum = \str_replace(". ", ".", $datum); @@ -551,7 +560,7 @@ final class NodaTimeSplitter { } // German TT.MM.JJJJ / TT.MM.JJJ / TT.MM.JJ / TT.MM.J - if (\preg_match("/^[0-9][0-9]\.[0-9][0-9]\.([0-9][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[0-9])$/", $datum)) { // German T.MM.JJJJ + if (\preg_match("/^[0-9]{2}\.[0-9]{2}\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ $year = \substr($datum, 6, 4); $month = \substr($datum, 3, 2); $day = \substr($datum, 0, 2); @@ -559,7 +568,7 @@ final class NodaTimeSplitter { } // German TT.M.JJJJ / TT.M.JJJ / TT.M.JJ / TT.M.J - if (\preg_match("/^[0-9][0-9]\.[0-9]\.([0-9][0-9][0-9][0-9]|[0-9][0-9][0-9]|[0-9][0-9]|[0-9])$/", $datum)) { // German T.MM.JJJJ + if (\preg_match("/^[0-9]{2}\.[0-9]\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ $year = \substr($datum, 5, 4); $month = "0" . \substr($datum, 3, 1); $day = \substr($datum, 0, 2); @@ -707,35 +716,60 @@ final class NodaTimeSplitter { $datum = self::clean_input($datum); - if (\preg_match("/^[0-9]{4}\.[0-9]{2}\.[0-9]{2}(\.|)\-$/", $datum)) { // YYYY.MM.DD. - $year = \substr($datum, 0, 4); - $month = \substr($datum, 5, 2); - $day = \substr($datum, 8, 2); + $inpDateWoSpaces = str_replace(" ", "", $datum); + + if (\preg_match("/^[0-9]{4}\.[0-9]{2}\.[0-9]{2}(\.|)\-$/", $inpDateWoSpaces)) { // YYYY.MM.DD. + $year = \substr($inpDateWoSpaces, 0, 4); + $month = \substr($inpDateWoSpaces, 5, 2); + $day = \substr($inpDateWoSpaces, 8, 2); return NodaSplitTime::genExactDate($year, $month, $day, NodaTimeBeforeAfterIndicator::since); } - if (\preg_match("/^[0-9]{4}\.[0-9]{2}(\.|)\-$/", $datum)) { // YYYY.MM.- - $start = \substr($datum, 0, 4); - $month = \substr($datum, 5, 2); + if (\preg_match("/^[0-9]{4}\.[0-9]{2}(\.|)\-$/", $inpDateWoSpaces)) { // YYYY.MM.- + $start = \substr($inpDateWoSpaces, 0, 4); + $month = \substr($inpDateWoSpaces, 5, 2); return new NodaSplitTime($start, '?', $month, before_after_indicator: NodaTimeBeforeAfterIndicator::since); } - if (\preg_match("/^[0-9]{4}\-$/", $datum)) { // YYYY- - $start = \substr($datum, 0, 4); + if (\preg_match("/^[0-9]{4}\-$/", $inpDateWoSpaces)) { // YYYY- + $start = \substr($inpDateWoSpaces, 0, 4); return new NodaSplitTime($start, '?', before_after_indicator: NodaTimeBeforeAfterIndicator::since); } - if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}\.[0-9]{2}$/", $datum)) { // Hungarian Y-m - $year = \substr($datum, 1, 4); - $month = \substr($datum, 6, 2); - $day = \substr($datum, 9, 2); + // ?.6.2024 + if (\preg_match("/^\?\.([0-9]|[0-9]{2})\.[0-9]{4}$/", $inpDateWoSpaces)) { // German Y-m + $year = \substr($inpDateWoSpaces, -4); + $month = trim(\substr($inpDateWoSpaces, 2, 2), '. '); + return new NodaSplitTime($year, $year, $month); + } + + // ?.?.2024 + if (\preg_match("/^\?\.\?\.[0-9]{4}$/", $inpDateWoSpaces)) { // German Y-m + $year = \substr($inpDateWoSpaces, -4); + return new NodaSplitTime($year, $year); + } + + if (\preg_match("/^[0-9]{4}$/", \trim($inpDateWoSpaces, '. ?!()[]X'))) { // German Y-m + $year = \trim($inpDateWoSpaces, '. ?!()[]X'); + return new NodaSplitTime($year, $year); + } + + if (\preg_match("/^[0-9]{4}$/", \strtr($inpDateWoSpaces, ['-0' => '', '0-' => '', 'o' => '0']))) { // German Y-m + $year = \strtr($inpDateWoSpaces, ['-0' => '', '0-' => '', 'o' => '0']); + return new NodaSplitTime($year, $year); + } + + if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}\.[0-9]{2}$/", $inpDateWoSpaces)) { // Hungarian Y-m + $year = \substr($inpDateWoSpaces, 1, 4); + $month = \substr($inpDateWoSpaces, 6, 2); + $day = \substr($inpDateWoSpaces, 9, 2); return NodaSplitTime::genExactDate($year, $month, $day, NodaTimeBeforeAfterIndicator::until); } - if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}$/", $datum)) { // Hungarian Y-m - $year = \substr($datum, 1, 4); - $month = \substr($datum, 6, 2); + if (\preg_match("/^\-[0-9]{4}\.[0-9]{2}$/", $inpDateWoSpaces)) { // Hungarian Y-m + $year = \substr($inpDateWoSpaces, 1, 4); + $month = \substr($inpDateWoSpaces, 6, 2); return new NodaSplitTime('?', $year, $month, before_after_indicator: NodaTimeBeforeAfterIndicator::until); } - if (\preg_match("/^\-[0-9]{4}$/", $datum)) { // Hungarian -Y - $year = \substr($datum, 1, 4); + if (\preg_match("/^\-[0-9]{4}$/", $inpDateWoSpaces)) { // Hungarian -Y + $year = \substr($inpDateWoSpaces, 1, 4); return new NodaSplitTime('?', $year, before_after_indicator: NodaTimeBeforeAfterIndicator::until); } @@ -907,7 +941,7 @@ final class NodaTimeSplitter { } // 1. Jahrhundert - if (\preg_match("/^[0-9]\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) { + if (\preg_match("/^[0-9]\.\ (Jh\|Jh\.|Jahrhundert|sz|század)$/", $datum)) { if ($centuryNo = \intval(\substr($datum, 0, 1))) { $centuryNo--; return new NodaSplitTime((string)$centuryNo . "01", \strval($centuryNo + 1) . '00'); @@ -915,7 +949,7 @@ final class NodaTimeSplitter { } // 17.-18. Jahrhundert - if (\preg_match("/^[0-9]{2}(\.|)(|\ Jh\.||\ Jahrhundert||\ sz||\ század)(\-|\/)[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) { + if (\preg_match("/^[0-9]{2}(\.|)(|\ Jh|\ Jh\.|\ Jahrhundert|\ sz|\ század)(\-|\/)[0-9]{2}\.\ (Jh\.|Jahrhundert|sz|század)$/", $datum)) { if (\strpos($datum, '/') !== false) { $datum = str_replace('/', '-', $datum); } @@ -1099,6 +1133,33 @@ final class NodaTimeSplitter { return $reconstituted; } + // German T.-T.MM.JJJJ / T.-T.MM.JJJ / T.-T.MM.JJ / T.-T.MM.J + if (\preg_match("/^[0-9].\-[0-9]\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ + $year = \substr($datum, -4); + $month = trim(\substr($datum, -7, 2), '.'); + $day = '0' . \substr($datum, 3, 1); + $firstday = '0' . \substr($datum, 0, 1); + return "$firstday.$month.$year-$day.$month.$year"; + } + + // German T.-TT.MM.JJJJ / T.-TT.MM.JJJ / T.-TT.MM.JJ / T.-TT.MM.J + if (\preg_match("/^[0-9].\-[0-9]{2}\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ + $year = \substr($datum, -4); + $month = trim(\substr($datum, -7, 2), '.'); + $day = \substr($datum, 3, 2); + $firstday = '0' . \substr($datum, 0, 1); + return "$firstday.$month.$year-$day.$month.$year"; + } + + // German TT.-TT.MM.JJJJ / TT.-TT.MM.JJJ / TT.-TT.MM.JJ / TT.-TT.MM.J + if (\preg_match("/^[0-9]{2}.\-[0-9]{2}\.([0-9]|[0-9]{2})\.([0-9]{4}|[0-9]{3}|[0-9]{2}|[0-9])$/", $datum)) { // German T.MM.JJJJ + $year = \substr($datum, -4); + $month = trim(\substr($datum, -7, 2), '.'); + $day = \substr($datum, 4, 2); + $firstday = \substr($datum, 0, 2); + return "$firstday.$month.$year-$day.$month.$year"; + } + // 17-19. Jahrhundert if (\preg_match("/^[0-9]{2}(\.|)\-[0-9]{2}(\.|)" . self::REGEX_CENTURIES . "$/", $datum)) { $parts = explode('-', $datum); @@ -1225,7 +1286,7 @@ final class NodaTimeSplitter { */ private static function _runBasicNameCleanup(string $input):string { - $input = trim(trim($input), ',;'); + $input = ltrim(trim(trim($input), ',;'), ' .'); // Clean away duplicate inputs // 1440-1440 diff --git a/src/NodaUncertaintyHelper.php b/src/NodaUncertaintyHelper.php index fd378ef..bb52e11 100644 --- a/src/NodaUncertaintyHelper.php +++ b/src/NodaUncertaintyHelper.php @@ -61,6 +61,7 @@ final class NodaUncertaintyHelper { "(?)", "?", " [vermutlich]", + " vermutlich", " [verm.]", " [wahrscheinlich]", ]; @@ -100,6 +101,7 @@ final class NodaUncertaintyHelper { "c. ", "ca ", "ca. ", + "ca.", "Ca ", "Ca. ", "za. ", @@ -141,6 +143,7 @@ final class NodaUncertaintyHelper { " [circa]", " (verm.)", " (vermutl.)", + " vermutlich", " körül", ", um", " (um)", diff --git a/tests/NodaTimeSplitterTest.php b/tests/NodaTimeSplitterTest.php index 0b3dc22..261746b 100644 --- a/tests/NodaTimeSplitterTest.php +++ b/tests/NodaTimeSplitterTest.php @@ -118,6 +118,97 @@ final class NodaTimeSplitterTest extends TestCase { self::assertEquals($output->toTimeName(), "02.01.2020"); self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020); + $output = NodaTimeSplitter::attempt_splitting("?.1.2020"); + self::assertNotEmpty($output); + self::assertEquals($output->toOldFormat(), [ + 0 => "2020", + 1 => "2020", + 2 => "01", + 3 => "00", + 4 => "+", + 5 => "", + ]); + self::assertEquals($output->toTimeName(), "Januar 2020"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020); + + $output = NodaTimeSplitter::attempt_splitting("?.11.2020"); + self::assertNotEmpty($output); + self::assertEquals($output->toOldFormat(), [ + 0 => "2020", + 1 => "2020", + 2 => "11", + 3 => "00", + 4 => "+", + 5 => "", + ]); + self::assertEquals($output->toTimeName(), "November 2020"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020); + + $output = NodaTimeSplitter::attempt_splitting(".2020"); + self::assertNotEmpty($output); + self::assertEquals($output->toOldFormat(), [ + 0 => "2020", + 1 => "2020", + 2 => "00", + 3 => "00", + 4 => "+", + 5 => "", + ]); + self::assertEquals($output->toTimeName(), "2020"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020); + + $output = NodaTimeSplitter::attempt_splitting("0-2020"); + self::assertNotEmpty($output); + self::assertEquals($output->toOldFormat(), [ + 0 => "2020", + 1 => "2020", + 2 => "00", + 3 => "00", + 4 => "+", + 5 => "", + ]); + self::assertEquals($output->toTimeName(), "2020"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020); + + $output = NodaTimeSplitter::attempt_splitting("2020-0"); + self::assertNotEmpty($output); + self::assertEquals($output->toOldFormat(), [ + 0 => "2020", + 1 => "2020", + 2 => "00", + 3 => "00", + 4 => "+", + 5 => "", + ]); + self::assertEquals($output->toTimeName(), "2020"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020); + + $output = NodaTimeSplitter::attempt_splitting("? 2020"); + self::assertNotEmpty($output); + self::assertEquals($output->toOldFormat(), [ + 0 => "2020", + 1 => "2020", + 2 => "00", + 3 => "00", + 4 => "+", + 5 => "", + ]); + self::assertEquals($output->toTimeName(), "2020"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020); + + $output = NodaTimeSplitter::attempt_splitting("?.?.2020"); + self::assertNotEmpty($output); + self::assertEquals($output->toOldFormat(), [ + 0 => "2020", + 1 => "2020", + 2 => "00", + 3 => "00", + 4 => "+", + 5 => "", + ]); + self::assertEquals($output->toTimeName(), "2020"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020); + $output = NodaTimeSplitter::attempt_splitting("1920-1929"); self::assertNotEmpty($output); self::assertEquals($output->toOldFormat(), [ @@ -463,6 +554,32 @@ final class NodaTimeSplitterTest extends TestCase { self::assertEquals($output->toTimeName(), "1945-1948"); self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1947); + $output = NodaTimeSplitter::attempt_splitting("1945 und 1946"); + self::assertNotEmpty($output); + self::assertEquals($output->toOldFormat(), [ + 0 => "1945", + 1 => "1946", + 2 => "00", + 3 => "00", + 4 => "+", + 5 => "", + ]); + self::assertEquals($output->toTimeName(), "1945-1946"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1946); + + $output = NodaTimeSplitter::attempt_splitting("1945 oder 1946"); + self::assertNotEmpty($output); + self::assertEquals($output->toOldFormat(), [ + 0 => "1945", + 1 => "1946", + 2 => "00", + 3 => "00", + 4 => "+", + 5 => "", + ]); + self::assertEquals($output->toTimeName(), "1945-1946"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1946); + $output = NodaTimeSplitter::attempt_splitting("20.-19. Jahrhundert v. Chr."); self::assertNotEmpty($output); self::assertEquals($output->toOldFormat(), [ @@ -1016,6 +1133,81 @@ final class NodaTimeSplitterTest extends TestCase { "counting_time_bcce" => "+", ]); + $output = NodaTimeSplitter::attempt_splitting_from_till("14.-15.03.2019"); + self::assertNotEmpty($output); + self::assertEquals($output, [ + 'start_name' => "14.03.2019", + 'end_name' => "15.03.2019", + "start_year" => '2019', + "end_year" => '2019', + 'start_date' => '2019-03-14', + 'end_date' => '2019-03-15', + "counting_time_year" => "2019", + "counting_time_month" => "03", + "counting_time_day" => "15", + "counting_time_bcce" => "+", + ]); + + $output = NodaTimeSplitter::attempt_splitting_from_till("3.-7.9.1819"); + self::assertNotEmpty($output); + self::assertEquals($output, [ + 'start_name' => "03.09.1819", + 'end_name' => "07.09.1819", + "start_year" => '1819', + "end_year" => '1819', + 'start_date' => '1819-09-03', + 'end_date' => '1819-09-07', + "counting_time_year" => "1819", + "counting_time_month" => "09", + "counting_time_day" => "05", + "counting_time_bcce" => "+", + ]); + + $output = NodaTimeSplitter::attempt_splitting_from_till("3.-15.9.1819"); + self::assertNotEmpty($output); + self::assertEquals($output, [ + 'start_name' => "03.09.1819", + 'end_name' => "15.09.1819", + "start_year" => '1819', + "end_year" => '1819', + 'start_date' => '1819-09-03', + 'end_date' => '1819-09-15', + "counting_time_year" => "1819", + "counting_time_month" => "09", + "counting_time_day" => "09", + "counting_time_bcce" => "+", + ]); + + $output = NodaTimeSplitter::attempt_splitting_from_till("14.-15.9.1819"); + self::assertNotEmpty($output); + self::assertEquals($output, [ + 'start_name' => "14.09.1819", + 'end_name' => "15.09.1819", + "start_year" => '1819', + "end_year" => '1819', + 'start_date' => '1819-09-14', + 'end_date' => '1819-09-15', + "counting_time_year" => "1819", + "counting_time_month" => "09", + "counting_time_day" => "15", + "counting_time_bcce" => "+", + ]); + + $output = NodaTimeSplitter::attempt_splitting_from_till("14.-15.11.1819"); + self::assertNotEmpty($output); + self::assertEquals($output, [ + 'start_name' => "14.11.1819", + 'end_name' => "15.11.1819", + "start_year" => '1819', + "end_year" => '1819', + 'start_date' => '1819-11-14', + 'end_date' => '1819-11-15', + "counting_time_year" => "1819", + "counting_time_month" => "11", + "counting_time_day" => "15", + "counting_time_bcce" => "+", + ]); + $output = NodaTimeSplitter::attempt_splitting_from_till("2019.03.14-15"); self::assertNotEmpty($output); self::assertEquals($output, [