From 4496a35f5c14e770f4e1eaf01e4258ddf697f57e Mon Sep 17 00:00:00 2001 From: Joshua Ramon Enslin Date: Mon, 20 Nov 2023 03:18:02 +0100 Subject: [PATCH] Rewrite incomplete time span spellings to extend parsable and splittable time names --- phpstan-baseline.neon | 36 +++++ phpstan.neon | 2 + src/NodaTimeAutotranslater.php | 8 +- src/NodaTimeSplitter.php | 268 +++++++++++++++++++++++++++------ tests/NodaTimeSplitterTest.php | 205 +++++++++++++++++++++++-- 5 files changed, 458 insertions(+), 61 deletions(-) create mode 100644 phpstan-baseline.neon diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon new file mode 100644 index 0000000..eeb4176 --- /dev/null +++ b/phpstan-baseline.neon @@ -0,0 +1,36 @@ +parameters: + ignoreErrors: + - + message: "#^Constant DATABASENAME_NODA not found\\.$#" + count: 3 + path: src/NodaBlacklistedTerms.php + + - + message: "#^Constant DATABASENAME_NODA not found\\.$#" + count: 2 + path: src/NodaMailChecker.php + + - + message: "#^Variable \\$timeInfoToCopy in empty\\(\\) always exists and is not falsy\\.$#" + count: 1 + path: src/NodaTimeAutotranslater.php + + - + message: "#^Call to an undefined method DOMNode\\:\\:getAttribute\\(\\)\\.$#" + count: 1 + path: src/NodaWikidataFetcher.php + + - + message: "#^Function printHTMLEnd not found\\.$#" + count: 1 + path: src/NodaWikidataFetcher.php + + - + message: "#^Function write_get_vars not found\\.$#" + count: 9 + path: src/NodaWikidataFetcher.php + + - + message: "#^Match expression does not handle remaining value\\: string$#" + count: 1 + path: src/enums/NodaTimeAutotranslaterLocales.php diff --git a/phpstan.neon b/phpstan.neon index d0c60f7..3fe3c79 100644 --- a/phpstan.neon +++ b/phpstan.neon @@ -8,3 +8,5 @@ parameters: - ../ ignoreErrors: excludePaths: +includes: + - phpstan-baseline.neon diff --git a/src/NodaTimeAutotranslater.php b/src/NodaTimeAutotranslater.php index 425f82c..0dcdea9 100644 --- a/src/NodaTimeAutotranslater.php +++ b/src/NodaTimeAutotranslater.php @@ -475,7 +475,9 @@ final class NodaTimeAutotranslater { $output = []; - $start = NodaTimeSplitter::attempt_splitting($timespanDates['start_name']); + if (empty($start = NodaTimeSplitter::attempt_splitting($timespanDates['start_name']))) { + return []; + } $startTimeInfo = [ "zeit_name" => $timespanDates['start_name'], "zeit_beginn" => $start[0], @@ -486,7 +488,9 @@ final class NodaTimeAutotranslater { "zeit_zaehlzeit_vorzeichen" => $start[4], ]; - $end = NodaTimeSplitter::attempt_splitting($timespanDates['end_name']); + if (empty($end = NodaTimeSplitter::attempt_splitting($timespanDates['end_name']))) { + return []; + } $endTimeInfo = [ "zeit_name" => $timespanDates['end_name'], "zeit_beginn" => $end[0], diff --git a/src/NodaTimeSplitter.php b/src/NodaTimeSplitter.php index b9593b6..2d7151b 100644 --- a/src/NodaTimeSplitter.php +++ b/src/NodaTimeSplitter.php @@ -48,7 +48,7 @@ final class NodaTimeSplitter { "04" => ['április', 'apr.', 'ápr.'], "05" => ['május', 'maj.', 'máj.'], "06" => ['június', 'jun.', 'jún'], - "07" => ['július', 'jul.', 'júl.'], + "07" => ['július', 'julius', 'jul.', 'júl.'], "08" => ['augusztus', 'aug.'], "09" => ['szeptember', 'szp.'], "10" => ['október', 'okt.'], @@ -132,6 +132,8 @@ final class NodaTimeSplitter { "decemberig", ]; + private const REGEX_CENTURIES = '(\ |)(Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)'; + /** * Cleans input strings by trimming obsolete stuff. * @@ -395,7 +397,7 @@ final class NodaTimeSplitter { * * @param string $datum Date. * - * @return array + * @return array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{} */ public static function is_valid_date(string $datum):array { @@ -472,7 +474,7 @@ final class NodaTimeSplitter { * * @param string $datum Date. * - * @return array + * @return array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{} */ public static function is_valid_date_hungarian(string $datum):array { @@ -491,6 +493,8 @@ final class NodaTimeSplitter { } } + // Example: 2009-tol 2010-ig + // From 2009 to 2010 if (\preg_match("/^[0-9][0-9][0-9][0-9]\-t(ő|ó)l(\ |\-)[0-9][0-9][0-9][0-9]\-ig$/", $datum)) { $start = \substr($datum, 0, 4); $end = \substr($datum, -7, 4); @@ -576,7 +580,7 @@ final class NodaTimeSplitter { * * @param string $datum Input date. * - * @return array + * @return array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{} */ public static function is_timespan(string $datum):array { @@ -670,13 +674,19 @@ final class NodaTimeSplitter { $month = "0" . \substr($datum, 0, 1); return [$start, $start, $month, "00", "+", ""]; } - if (\preg_match("/^[0-9]{4}\.[0-9]{2}\.[0-9]{1,2}(\.|)$/", $datum)) { // Hungarian Y-m-d + if (\preg_match("/^[0-9]{4}\.[0-3][0-9]\.[0-9]{1,2}(\.|)$/", $datum)) { // Hungarian Y-m-d $start = \substr($datum, 0, 4); $month = \substr($datum, 5, 2); - $day = self::pad_to_two(\substr($datum, 8, 2)); + $day = self::pad_to_two(\rtrim(\substr($datum, 8, 2), '.')); if (\intval($month) < 13) return [$start, $start, $month, $day, "+", ""]; } - if (\preg_match("/^[0-9]{4}\.[0-9]{2}(\.|)$/", $datum)) { // Hungarian Y-m + if (\preg_match("/^[0-9]{4}\.[0-9]\.[0-9]{1,2}\.$/", $datum)) { // Hungarian Y-m-d > 2005.1.1. + $start = \substr($datum, 0, 4); + $month = self::pad_to_two(\substr($datum, 5, 1)); + $day = self::pad_to_two(\rtrim(\substr($datum, 7, 2), '.')); + if (\intval($month) < 13) return [$start, $start, $month, $day, "+", ""]; + } + if (\preg_match("/^[0-9]{4}\.[0-3][0-9](\.|)$/", $datum)) { // Hungarian Y-m $start = \substr($datum, 0, 4); $month = \substr($datum, 5, 2); if (\intval($month) < 13) return [$start, $start, $month, "00", "+", ""]; @@ -750,7 +760,7 @@ final class NodaTimeSplitter { * * @param string $datum Input date. * - * @return array + * @return array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{} */ public static function is_incomplete_date(string $datum):array { @@ -860,7 +870,7 @@ final class NodaTimeSplitter { } } - // Endings beginning with a space + // Endings beginning with a dash if (\preg_match("/(\-től|\-tól)$/", $datum)) { if (($spacePos = strrpos($datum, "-")) === false) { return []; @@ -880,8 +890,25 @@ final class NodaTimeSplitter { return $output; } } + // Endings that are extensions of an existing word + if (\preg_match("/évektől$/", $datum)) { + if ($output = self::attempt_splitting(\substr($datum, 0, -4))) { + $output[1] = "?"; + return $output; + } + } - // Endings beginning with a space + // Endings beginning with a space (after) + if (\preg_match("/ (utantól|utántól)$/", $datum)) { + if (($spacePos = strrpos($datum, " ")) === false) { + return []; + } + if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) { + $output[1] = "?"; + return $output; + } + } + // Endings beginning with a space (until) if (\preg_match("/ (\(bis)$/", $datum)) { if (($spacePos = strrpos($datum, " ")) === false) { return []; @@ -891,6 +918,7 @@ final class NodaTimeSplitter { return $output; } } + // Ends beginning with a hyphen if (\preg_match("/\-ig(\.|)$/", $datum)) { if (($spacePos = strrpos($datum, "-")) === false) { return []; @@ -931,23 +959,18 @@ final class NodaTimeSplitter { * * @param string $datum Input date. * - * @return array + * @return array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{} */ public static function is_century(string $datum):array { $datum = self::clean_input($datum); $bcBceIndicator = '+'; - // 17. Jahrhundert - if (\preg_match("/^[0-9]{2}\.\ (Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század)$/", $datum)) { - if ($centuryNo = \intval(\substr($datum, 0, 2))) { - $centuryNo--; - return [(string)$centuryNo . "01", \strval($centuryNo + 1) . "00", "00", "00", $bcBceIndicator, ""]; - } - } - // 17th century - if (\preg_match("/^[0-9]{2}th century$/", $datum)) { + // TODO: Check if this is duplicate + + // 17. Jahrhundert + if (\preg_match("/^[0-9]{2}(\.|)" . self::REGEX_CENTURIES ."$/", $datum)) { if ($centuryNo = \intval(\substr($datum, 0, 2))) { $centuryNo--; return [(string)$centuryNo . "01", \strval($centuryNo + 1) . "00", "00", "00", $bcBceIndicator, ""]; @@ -1000,20 +1023,22 @@ final class NodaTimeSplitter { * * @param string $datum Input date. * - * @return array + * @return array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{} */ public static function is_decade(string $datum):array { $datum = self::clean_input($datum); $bcBceIndicator = '+'; + // 20er Jahre if (\preg_match("/^[0-9]0(er|er\ Jahre|\-es\ évek|\-as\ \évek)$/", $datum)) { $start = "19" . \substr($datum, 0, 2); $ende = (string)(\intval($start) + 9); return [$start, $ende, "00", "00", $bcBceIndicator, ""]; } - if (\preg_match("/^[0-9]{3}0(s|er|er\ Jahre|\-es\ évek|\-as\ \évek)$/", $datum)) { + // 1920er Jahre + if (\preg_match("/^[0-9]{3}0(s|er|er\ Jahre|(\-|\ )es\ évek|(\-|\ )as\ \évek)$/", $datum)) { $start = \substr($datum, 0, 4); $ende = (string)(\intval($start) + 9); return [$start, $ende, "00", "00", $bcBceIndicator, ""]; @@ -1032,10 +1057,19 @@ final class NodaTimeSplitter { */ public static function check_is_timespan_from_till(string $datum):array { - if (substr_count($datum, '-') !== 1) return []; + if (substr_count($datum, '-') !== 1) { + return []; + } list($start_str, $end_str) = explode('-', $datum); + if (strlen($end_str) < 4 && strlen($end_str) < strlen($start_str)) { + return []; + } + if (strlen($start_str) < 4 && strlen($start_str) < strlen($end_str)) { + return []; + } + if (empty($start = self::attempt_splitting($start_str))) { return []; } @@ -1048,6 +1082,107 @@ final class NodaTimeSplitter { } + /** + * Contains special rules for incorrectly or incompletely spelled out timespan names. + * To be called by self::attempt_splitting_from_till(). + * + * @param string $datum Date. + * + * @return string + */ + public static function _attempt_rewriting_special_cases_from_till(string $datum):string { + + if (empty($datum)) return ''; + + $inputLength = strlen($datum); + + // Hungarian year and month until month + // 2005.01.-02. => 2005.01.-2005.02. + if ($inputLength === 12 && \preg_match("/^[0-9]{4}\.[0-1][0-9]\.\-[0-1][0-9]\.$/", $datum)) { + $reconstituted = substr($datum, 0, 8) . '-'; + $reconstituted .= substr($datum, 0, 4) . '.' . substr($datum, -3); + return $reconstituted; + } + + // Hungarian year and month until month without a dot after the first YYYY-MM + // 2005.01-02. => 2005.01.-2005.02. + + if (in_array($inputLength, [10, 11], true) && \preg_match("/^[0-9]{4}\.[0-1][0-9]\-[0-1][0-9](\.|)$/", $datum)) { + $reconstituted = substr($datum, 0, 7) . '.-'; + $reconstituted .= substr($datum, 0, 4) . '.' . substr(rtrim($datum, '.'), -2) . '.'; + return $reconstituted; + } + + // Hungarian year and month until month + // 2005.01.01.-02.02. => 2005.01.01-2005.02.02. + // 2005.01.01-02.02 => 2005.01.01-2005.02.02. + if ($inputLength >= 16 && $inputLength <= 18 && \preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-1][0-9]\.[0-3][0-9](\.|)$/", $datum)) { + $parts = explode('-', $datum); + if (count($parts) !== 2) return ''; + $reconstituted = substr($datum, 0, 10) . '.-'; + $reconstituted .= substr($datum, 0, 4) . '.' . rtrim($parts[1], '.') . '.'; + return $reconstituted; + } + + // Hungarian; without trailing dots: YYYY.MM.DD-DD + if ($inputLength >= 13 && $inputLength <= 15 && \preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-3][0-9](\.|)$/", $datum)) { + $parts = explode('-', $datum); + if (count($parts) !== 2) return ''; + $reconstituted = substr($datum, 0, 10) . '.-'; + $reconstituted .= substr($datum, 0, 7) . '.' . substr(rtrim($parts[1], '.'), -2); + return $reconstituted; + } + + // 17-19. Jahrhundert + if (\preg_match("/^[0-9]{2}(\.|)\-[0-9]{2}(\.|)" . self::REGEX_CENTURIES . "$/", $datum)) { + $parts = explode('-', $datum); + $reconstituted = ((int)substr($parts[0] ?? "", 0, 2) - 1) . '01-'; + $reconstituted .= substr($parts[1] ?? "", 0, 2) . '. Jahrhundert'; + return $reconstituted; + } + + // 1950-60-as évek + if (\preg_match("/^[0-9]{4}\-[0-9]{2} (a|e)s évek$/", $datum)) { + $reconstituted = substr($datum, 0, 4) . '-'; + $reconstituted .= substr($datum, 5, 2) . 'er Jahre'; + return $reconstituted; + } + + // If es évek / as évek is contained in the string (e.g. 1880-1990-es évek), there + // will be more than one hyphens + if (MD_STD::stri_contains_any($datum, ['-as évek', '-es évek'])) { + return strtr($datum, ['-as évek' => ' as évek', '-es évek' => ' es évek']); + } + + // 1981. július-augusztus > 1981.07-08 + if (is_numeric(substr($datum, 0, 4)) && substr($datum, 4, 2) === '. ') { + + $monthNames = []; + foreach (self::MONTH_NAMES_ENGLISH as $month => $names) { + foreach ($names as $name) $monthNames[$name] = $month; + } + foreach (self::MONTH_NAMES_GERMAN as $month => $names) { + foreach ($names as $name) $monthNames[$name] = $month; + } + foreach (self::MONTH_NAMES_HUNGARIAN as $month => $names) { + foreach ($names as $name) $monthNames[$name] = $month; + } + + $rewrite = strtr($datum, $monthNames); + if ($rewrite !== $datum) { + return str_replace('..', '.', str_replace(" ", ".", $rewrite)); + } + + } + + if (str_contains($datum, ',')) { + return str_replace(',', '-', $datum); + } + + return ''; + + } + /** * Checks if the string is a time span with given start and end dates. * @@ -1062,6 +1197,9 @@ final class NodaTimeSplitter { if (strlen($datum) === 9 and substr($datum, 4, 1) !== '-') return []; if (empty($startEnd = self::check_is_timespan_from_till($datum))) { + if ($rewritten = self::_attempt_rewriting_special_cases_from_till($datum)) { + return self::attempt_splitting_from_till($rewritten); + } return []; } list($start, $end) = $startEnd; @@ -1105,38 +1243,76 @@ final class NodaTimeSplitter { } /** - * Wrapper to check if any splitting command works. + * Cleans invalid outputs from splitting. * - * @param string $datum Input date. + * @param array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{} $moda Split time to check. * - * @return array + * @return array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{} */ - public static function attempt_splitting(string $datum):array { + private static function validate_split_time(array $moda):array { - $moda = NodaTimeSplitter::is_timespan($datum); - if (!$moda) { - $moda = NodaTimeSplitter::is_incomplete_date($datum); - } - if (!$moda) { - $moda = NodaTimeSplitter::is_valid_date($datum); - } - if (!$moda) { - $moda = NodaTimeSplitter::is_valid_date_hungarian($datum); - } - if (!$moda) { - $moda = NodaTimeSplitter::is_century($datum); - } - if (!$moda) { - $moda = NodaTimeSplitter::is_decade($datum); + if (empty($moda)) return []; + + if ((int)$moda[2] > 12 || (int)$moda[3] > 31) { + return []; } - if (!empty($moda)) { - if ((int)$moda[2] > 12 || (int)$moda[3] > 31) { - return []; - } + + $month_no_zero = strtr($moda[2], ["0" => "", "1" => "", "2" => "", "3" => "", "4" => "", "5" => "", "6" => "", "7" => "", "8" => "", "9" => ""]); + $day_no_zero = strtr($moda[3], ["0" => "", "1" => "", "2" => "", "3" => "", "4" => "", "5" => "", "6" => "", "7" => "", "8" => "", "9" => ""]); + if (!empty($month_no_zero)) { + throw new Exception("Invalid split month: " . var_export($moda, true)); + } + if (!empty($day_no_zero)) { + throw new Exception("Invalid split day: " . var_export($moda, true)); } return $moda; } + + /** + * Wrapper to check if any splitting command works. + * + * @param string $datum Input date. + * + * @return array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{} + */ + public static function attempt_splitting(string $datum):array { + + if (!empty($moda = self::is_timespan($datum))) { + return self::validate_split_time($moda); + } + + if (!empty($moda = self::is_incomplete_date($datum))) { + return self::validate_split_time($moda); + } + + if (!empty($moda = self::is_valid_date($datum))) { + return self::validate_split_time($moda); + } + + if (!empty($moda = self::is_valid_date_hungarian($datum))) { + return self::validate_split_time($moda); + } + + if (!empty($moda = self::is_century($datum))) { + return self::validate_split_time($moda); + } + + if (!empty($moda = self::is_decade($datum))) { + return self::validate_split_time($moda); + } + + // 2015. 05. + if (str_contains($datum, ' ')) { + $rewrite = str_replace(' ', '', $datum); + if (is_numeric(str_replace('.', '', $datum))) { + return self::attempt_splitting($rewrite); + } + } + + return []; + + } } diff --git a/tests/NodaTimeSplitterTest.php b/tests/NodaTimeSplitterTest.php index 81dbe1c..e11233e 100644 --- a/tests/NodaTimeSplitterTest.php +++ b/tests/NodaTimeSplitterTest.php @@ -132,6 +132,18 @@ final class NodaTimeSplitterTest extends TestCase { self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "1920-1929"); self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1925); + $output = NodaTimeSplitter::attempt_splitting("1920-1929"); + self::assertEquals($output, [ + 0 => "1920", + 1 => "1929", + 2 => "00", + 3 => "00", + 4 => "+", + 5 => "", + ]); + self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "1920-1929"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1925); + $output = NodaTimeSplitter::attempt_splitting("1920er Jahre"); self::assertEquals($output, [ 0 => "1920", @@ -488,6 +500,30 @@ final class NodaTimeSplitterTest extends TestCase { self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "02.01.2020"); self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020); + $output = NodaTimeSplitter::attempt_splitting("2020.01.2."); + self::assertEquals($output, [ + 0 => "2020", + 1 => "2020", + 2 => "01", + 3 => "02", + 4 => "+", + 5 => "", + ]); + self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "02.01.2020"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020); + + $output = NodaTimeSplitter::attempt_splitting("2020.1.2."); + self::assertEquals($output, [ + 0 => "2020", + 1 => "2020", + 2 => "01", + 3 => "02", + 4 => "+", + 5 => "", + ]); + self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "02.01.2020"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020); + $output = NodaTimeSplitter::attempt_splitting("2020. Januar 2."); self::assertEquals($output, [ 0 => "2020", @@ -548,6 +584,19 @@ final class NodaTimeSplitterTest extends TestCase { self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "1920-1929"); self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1925); + // From 1920 onwards + $output = NodaTimeSplitter::attempt_splitting("1920 utántól"); + self::assertEquals($output, [ + 0 => "1920", + 1 => "?", + 2 => "00", + 3 => "00", + 4 => "+", + 5 => "", + ]); + self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Seit 1920"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1920); + $output = NodaTimeSplitter::attempt_splitting("1920-es évek"); self::assertEquals($output, [ 0 => "1920", @@ -608,6 +657,18 @@ final class NodaTimeSplitterTest extends TestCase { self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "201-300 n. Chr."); self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 251); + $output = NodaTimeSplitter::attempt_splitting("20th century"); + self::assertEquals($output, [ + 0 => "1901", + 1 => "2000", + 2 => "00", + 3 => "00", + 4 => "+", + 5 => "", + ]); + self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "1901-2000"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1951); + $output = NodaTimeSplitter::attempt_splitting("20. század"); self::assertEquals($output, [ 0 => "1901", @@ -620,6 +681,18 @@ final class NodaTimeSplitterTest extends TestCase { self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "1901-2000"); self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1951); + $output = NodaTimeSplitter::attempt_splitting("20.század"); + self::assertEquals($output, [ + 0 => "1901", + 1 => "2000", + 2 => "00", + 3 => "00", + 4 => "+", + 5 => "", + ]); + self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "1901-2000"); + self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1951); + $output = NodaTimeSplitter::attempt_splitting("Kr. e. 20. század"); self::assertEquals($output, [ 0 => "-2000", @@ -658,6 +731,109 @@ final class NodaTimeSplitterTest extends TestCase { } + /** + * Test to check special cases can be parsed. + * + * @author Joshua Ramon Enslin + * @group ValidOutput + * @small + * + * @return void + */ + public function testSplittingFromTill():void { + + // Regular + $output = NodaTimeSplitter::attempt_splitting_from_till("2004.01.-2004.02."); + self::assertNotEmpty($output); + self::assertEquals($output, [ + 'start_name' => "Januar 2004", + 'end_name' => "Februar 2004", + "start_year" => '2004', + "end_year" => '2004', + "counting_time_year" => "2004", + "counting_time_month" => "01", + "counting_time_day" => "16", + "counting_time_bcce" => "+", + ]); + + // Rewritten / Hungarian YYYY.MM.-MM. + $output = NodaTimeSplitter::attempt_splitting_from_till("2004.01.-02."); + self::assertNotEmpty($output); + self::assertEquals($output, [ + 'start_name' => "Januar 2004", + 'end_name' => "Februar 2004", + "start_year" => '2004', + "end_year" => '2004', + "counting_time_year" => "2004", + "counting_time_month" => "01", + "counting_time_day" => "16", + "counting_time_bcce" => "+", + ]); + + // Rewritten / Hungarian YYYY.MM.-MM. + $output = NodaTimeSplitter::attempt_splitting_from_till("2003.04-05."); + self::assertNotEmpty($output); + + // Rewritten / Hungarian YYYY.MM.-MM. + $output = NodaTimeSplitter::attempt_splitting_from_till("1981. július-augusztus"); + self::assertNotEmpty($output); + + $output = NodaTimeSplitter::attempt_splitting_from_till("2019.03.14.,04.15."); + self::assertNotEmpty($output); + self::assertEquals($output, [ + 'start_name' => "14.03.2019", + 'end_name' => "15.04.2019", + "start_year" => '2019', + "end_year" => '2019', + "counting_time_year" => "2019", + "counting_time_month" => "03", + "counting_time_day" => "30", + "counting_time_bcce" => "+", + ]); + + $output = NodaTimeSplitter::attempt_splitting_from_till("2019.03.14-15"); + self::assertNotEmpty($output); + self::assertEquals($output, [ + 'start_name' => "14.03.2019", + 'end_name' => "15.03.2019", + "start_year" => '2019', + "end_year" => '2019', + "counting_time_year" => "2019", + "counting_time_month" => "03", + "counting_time_day" => "15", + "counting_time_bcce" => "+", + ]); + + // Rewritten / Hungarian YYYY.MM.-MM. + $output = NodaTimeSplitter::attempt_splitting_from_till("17-19.század"); + self::assertNotEmpty($output); + self::assertEquals($output, [ + 'start_name' => "1601", + 'end_name' => "1900", + "start_year" => '1601', + "end_year" => '1900', + "counting_time_year" => "1750", + "counting_time_month" => "06", + "counting_time_day" => "01", + "counting_time_bcce" => "+", + ]); + + // Rewritten / 1950-60-as évek + $output = NodaTimeSplitter::attempt_splitting_from_till("1950-60-as évek"); + self::assertNotEmpty($output); + self::assertEquals($output, [ + 'start_name' => "1950", + 'end_name' => "1969", + "start_year" => '1950', + "end_year" => '1969', + "counting_time_year" => "1959", + "counting_time_month" => "06", + "counting_time_day" => "01", + "counting_time_bcce" => "+", + ]); + + } + /** * Test to check whether the HTML page is correctly generated. * @@ -670,40 +846,43 @@ final class NodaTimeSplitterTest extends TestCase { public function testSplitDoesNotWorkWhenItShouldNot():void { $output = NodaTimeSplitter::attempt_splitting(""); - self::assertEquals($output, []); + self::assertEmpty($output); $output = NodaTimeSplitter::attempt_splitting("1.2.2020-2.2.2020"); - self::assertEquals($output, []); + self::assertEmpty($output); $output = NodaTimeSplitter::attempt_splitting("2020 Januar 2-2020 Februar 2"); - self::assertEquals($output, []); + self::assertEmpty($output); $output = NodaTimeSplitter::attempt_splitting("2020 Januar-2020 Februar"); - self::assertEquals($output, []); + self::assertEmpty($output); $output = NodaTimeSplitter::attempt_splitting("Januar-Februar"); - self::assertEquals($output, []); + self::assertEmpty($output); $output = NodaTimeSplitter::attempt_splitting("13.13.2022"); - self::assertEquals($output, []); + self::assertEmpty($output); $output = NodaTimeSplitter::attempt_splitting("2022-13-13"); - self::assertEquals($output, []); + self::assertEmpty($output); $output = NodaTimeSplitter::attempt_splitting("40.10.2022"); - self::assertEquals($output, []); + self::assertEmpty($output); $output = NodaTimeSplitter::attempt_splitting("2022-10-40"); - self::assertEquals($output, []); + self::assertEmpty($output); $output = NodaTimeSplitter::attempt_splitting("6;November 1978"); - self::assertEquals($output, []); + self::assertEmpty($output); $output = NodaTimeSplitter::attempt_splitting("65497028c51eb"); - self::assertEquals($output, []); + self::assertEmpty($output); $output = NodaTimeSplitter::attempt_splitting("6552cf08b0196 test tag"); - self::assertEquals($output, []); + self::assertEmpty($output); + + $output = NodaTimeSplitter::attempt_splitting("Anfang September 1903"); + self::assertEmpty($output); # $output = NodaTimeSplitter::attempt_splitting("Nach 1944-1964"); - # self::assertEquals($output, []); + # self::assertEmpty($output); } }