Rewrite incomplete time span spellings to extend parsable and splittable time names
This commit is contained in:
		
							
								
								
									
										36
									
								
								phpstan-baseline.neon
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								phpstan-baseline.neon
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,36 @@
 | 
			
		||||
parameters:
 | 
			
		||||
	ignoreErrors:
 | 
			
		||||
		-
 | 
			
		||||
			message: "#^Constant DATABASENAME_NODA not found\\.$#"
 | 
			
		||||
			count: 3
 | 
			
		||||
			path: src/NodaBlacklistedTerms.php
 | 
			
		||||
 | 
			
		||||
		-
 | 
			
		||||
			message: "#^Constant DATABASENAME_NODA not found\\.$#"
 | 
			
		||||
			count: 2
 | 
			
		||||
			path: src/NodaMailChecker.php
 | 
			
		||||
 | 
			
		||||
		-
 | 
			
		||||
			message: "#^Variable \\$timeInfoToCopy in empty\\(\\) always exists and is not falsy\\.$#"
 | 
			
		||||
			count: 1
 | 
			
		||||
			path: src/NodaTimeAutotranslater.php
 | 
			
		||||
 | 
			
		||||
		-
 | 
			
		||||
			message: "#^Call to an undefined method DOMNode\\:\\:getAttribute\\(\\)\\.$#"
 | 
			
		||||
			count: 1
 | 
			
		||||
			path: src/NodaWikidataFetcher.php
 | 
			
		||||
 | 
			
		||||
		-
 | 
			
		||||
			message: "#^Function printHTMLEnd not found\\.$#"
 | 
			
		||||
			count: 1
 | 
			
		||||
			path: src/NodaWikidataFetcher.php
 | 
			
		||||
 | 
			
		||||
		-
 | 
			
		||||
			message: "#^Function write_get_vars not found\\.$#"
 | 
			
		||||
			count: 9
 | 
			
		||||
			path: src/NodaWikidataFetcher.php
 | 
			
		||||
 | 
			
		||||
		-
 | 
			
		||||
			message: "#^Match expression does not handle remaining value\\: string$#"
 | 
			
		||||
			count: 1
 | 
			
		||||
			path: src/enums/NodaTimeAutotranslaterLocales.php
 | 
			
		||||
@@ -8,3 +8,5 @@ parameters:
 | 
			
		||||
      - ../
 | 
			
		||||
    ignoreErrors:
 | 
			
		||||
    excludePaths:
 | 
			
		||||
includes:
 | 
			
		||||
    - phpstan-baseline.neon
 | 
			
		||||
 
 | 
			
		||||
@@ -475,7 +475,9 @@ final class NodaTimeAutotranslater {
 | 
			
		||||
 | 
			
		||||
            $output = [];
 | 
			
		||||
 | 
			
		||||
            $start = NodaTimeSplitter::attempt_splitting($timespanDates['start_name']);
 | 
			
		||||
            if (empty($start = NodaTimeSplitter::attempt_splitting($timespanDates['start_name']))) {
 | 
			
		||||
                return [];
 | 
			
		||||
            }
 | 
			
		||||
            $startTimeInfo = [
 | 
			
		||||
                "zeit_name" => $timespanDates['start_name'],
 | 
			
		||||
                "zeit_beginn" => $start[0],
 | 
			
		||||
@@ -486,7 +488,9 @@ final class NodaTimeAutotranslater {
 | 
			
		||||
                "zeit_zaehlzeit_vorzeichen" => $start[4],
 | 
			
		||||
            ];
 | 
			
		||||
 | 
			
		||||
            $end = NodaTimeSplitter::attempt_splitting($timespanDates['end_name']);
 | 
			
		||||
            if (empty($end = NodaTimeSplitter::attempt_splitting($timespanDates['end_name']))) {
 | 
			
		||||
                return [];
 | 
			
		||||
            }
 | 
			
		||||
            $endTimeInfo = [
 | 
			
		||||
                "zeit_name" => $timespanDates['end_name'],
 | 
			
		||||
                "zeit_beginn" => $end[0],
 | 
			
		||||
 
 | 
			
		||||
@@ -48,7 +48,7 @@ final class NodaTimeSplitter {
 | 
			
		||||
        "04" => ['április', 'apr.', 'ápr.'],
 | 
			
		||||
        "05" => ['május', 'maj.', 'máj.'],
 | 
			
		||||
        "06" => ['június', 'jun.', 'jún'],
 | 
			
		||||
        "07" => ['július', 'jul.', 'júl.'],
 | 
			
		||||
        "07" => ['július', 'julius', 'jul.', 'júl.'],
 | 
			
		||||
        "08" => ['augusztus', 'aug.'],
 | 
			
		||||
        "09" => ['szeptember', 'szp.'],
 | 
			
		||||
        "10" => ['október', 'okt.'],
 | 
			
		||||
@@ -132,6 +132,8 @@ final class NodaTimeSplitter {
 | 
			
		||||
        "decemberig",
 | 
			
		||||
    ];
 | 
			
		||||
 | 
			
		||||
    private const REGEX_CENTURIES = '(\ |)(Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)';
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Cleans input strings by trimming obsolete stuff.
 | 
			
		||||
     *
 | 
			
		||||
@@ -395,7 +397,7 @@ final class NodaTimeSplitter {
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $datum Date.
 | 
			
		||||
     *
 | 
			
		||||
     * @return array<string>
 | 
			
		||||
     * @return array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{}
 | 
			
		||||
     */
 | 
			
		||||
    public static function is_valid_date(string $datum):array {
 | 
			
		||||
 | 
			
		||||
@@ -472,7 +474,7 @@ final class NodaTimeSplitter {
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $datum Date.
 | 
			
		||||
     *
 | 
			
		||||
     * @return array<string>
 | 
			
		||||
     * @return array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{}
 | 
			
		||||
     */
 | 
			
		||||
    public static function is_valid_date_hungarian(string $datum):array {
 | 
			
		||||
 | 
			
		||||
@@ -491,6 +493,8 @@ final class NodaTimeSplitter {
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Example: 2009-tol 2010-ig
 | 
			
		||||
        // From 2009 to 2010
 | 
			
		||||
        if (\preg_match("/^[0-9][0-9][0-9][0-9]\-t(ő|ó)l(\ |\-)[0-9][0-9][0-9][0-9]\-ig$/", $datum)) {
 | 
			
		||||
            $start = \substr($datum, 0, 4);
 | 
			
		||||
            $end = \substr($datum, -7, 4);
 | 
			
		||||
@@ -576,7 +580,7 @@ final class NodaTimeSplitter {
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $datum Input date.
 | 
			
		||||
     *
 | 
			
		||||
     * @return array<string>
 | 
			
		||||
     * @return array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{}
 | 
			
		||||
     */
 | 
			
		||||
    public static function is_timespan(string $datum):array {
 | 
			
		||||
 | 
			
		||||
@@ -670,13 +674,19 @@ final class NodaTimeSplitter {
 | 
			
		||||
            $month = "0" . \substr($datum, 0, 1);
 | 
			
		||||
            return [$start, $start, $month, "00", "+", ""];
 | 
			
		||||
        }
 | 
			
		||||
        if (\preg_match("/^[0-9]{4}\.[0-9]{2}\.[0-9]{1,2}(\.|)$/", $datum)) { // Hungarian Y-m-d
 | 
			
		||||
        if (\preg_match("/^[0-9]{4}\.[0-3][0-9]\.[0-9]{1,2}(\.|)$/", $datum)) { // Hungarian Y-m-d
 | 
			
		||||
            $start = \substr($datum, 0, 4);
 | 
			
		||||
            $month = \substr($datum, 5, 2);
 | 
			
		||||
            $day = self::pad_to_two(\substr($datum, 8, 2));
 | 
			
		||||
            $day = self::pad_to_two(\rtrim(\substr($datum, 8, 2), '.'));
 | 
			
		||||
            if (\intval($month) < 13) return [$start, $start, $month, $day, "+", ""];
 | 
			
		||||
        }
 | 
			
		||||
        if (\preg_match("/^[0-9]{4}\.[0-9]{2}(\.|)$/", $datum)) { // Hungarian Y-m
 | 
			
		||||
        if (\preg_match("/^[0-9]{4}\.[0-9]\.[0-9]{1,2}\.$/", $datum)) { // Hungarian Y-m-d > 2005.1.1.
 | 
			
		||||
            $start = \substr($datum, 0, 4);
 | 
			
		||||
            $month = self::pad_to_two(\substr($datum, 5, 1));
 | 
			
		||||
            $day = self::pad_to_two(\rtrim(\substr($datum, 7, 2), '.'));
 | 
			
		||||
            if (\intval($month) < 13) return [$start, $start, $month, $day, "+", ""];
 | 
			
		||||
        }
 | 
			
		||||
        if (\preg_match("/^[0-9]{4}\.[0-3][0-9](\.|)$/", $datum)) { // Hungarian Y-m
 | 
			
		||||
            $start = \substr($datum, 0, 4);
 | 
			
		||||
            $month = \substr($datum, 5, 2);
 | 
			
		||||
            if (\intval($month) < 13) return [$start, $start, $month, "00", "+", ""];
 | 
			
		||||
@@ -750,7 +760,7 @@ final class NodaTimeSplitter {
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $datum Input date.
 | 
			
		||||
     *
 | 
			
		||||
     * @return array<string>
 | 
			
		||||
     * @return array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{}
 | 
			
		||||
     */
 | 
			
		||||
    public static function is_incomplete_date(string $datum):array {
 | 
			
		||||
 | 
			
		||||
@@ -860,7 +870,7 @@ final class NodaTimeSplitter {
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Endings beginning with a space
 | 
			
		||||
        // Endings beginning with a dash
 | 
			
		||||
        if (\preg_match("/(\-től|\-tól)$/", $datum)) {
 | 
			
		||||
            if (($spacePos = strrpos($datum, "-")) === false) {
 | 
			
		||||
                return [];
 | 
			
		||||
@@ -880,8 +890,25 @@ final class NodaTimeSplitter {
 | 
			
		||||
                return $output;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        // Endings that are extensions of an existing word
 | 
			
		||||
        if (\preg_match("/évektől$/", $datum)) {
 | 
			
		||||
            if ($output = self::attempt_splitting(\substr($datum, 0, -4))) {
 | 
			
		||||
                $output[1] = "?";
 | 
			
		||||
                return $output;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Endings beginning with a space
 | 
			
		||||
        // Endings beginning with a space (after)
 | 
			
		||||
        if (\preg_match("/ (utantól|utántól)$/", $datum)) {
 | 
			
		||||
            if (($spacePos = strrpos($datum, " ")) === false) {
 | 
			
		||||
                return [];
 | 
			
		||||
            }
 | 
			
		||||
            if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) {
 | 
			
		||||
                $output[1] = "?";
 | 
			
		||||
                return $output;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        // Endings beginning with a space (until)
 | 
			
		||||
        if (\preg_match("/ (\(bis)$/", $datum)) {
 | 
			
		||||
            if (($spacePos = strrpos($datum, " ")) === false) {
 | 
			
		||||
                return [];
 | 
			
		||||
@@ -891,6 +918,7 @@ final class NodaTimeSplitter {
 | 
			
		||||
                return $output;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        // Ends beginning with a hyphen
 | 
			
		||||
        if (\preg_match("/\-ig(\.|)$/", $datum)) {
 | 
			
		||||
            if (($spacePos = strrpos($datum, "-")) === false) {
 | 
			
		||||
                return [];
 | 
			
		||||
@@ -931,23 +959,18 @@ final class NodaTimeSplitter {
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $datum Input date.
 | 
			
		||||
     *
 | 
			
		||||
     * @return array<string>
 | 
			
		||||
     * @return array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{}
 | 
			
		||||
     */
 | 
			
		||||
    public static function is_century(string $datum):array {
 | 
			
		||||
 | 
			
		||||
        $datum = self::clean_input($datum);
 | 
			
		||||
        $bcBceIndicator = '+';
 | 
			
		||||
 | 
			
		||||
        // 17. Jahrhundert
 | 
			
		||||
        if (\preg_match("/^[0-9]{2}\.\ (Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század)$/", $datum)) {
 | 
			
		||||
            if ($centuryNo = \intval(\substr($datum, 0, 2))) {
 | 
			
		||||
                $centuryNo--;
 | 
			
		||||
                return [(string)$centuryNo . "01", \strval($centuryNo + 1) . "00", "00", "00", $bcBceIndicator, ""];
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // 17th century
 | 
			
		||||
        if (\preg_match("/^[0-9]{2}th century$/", $datum)) {
 | 
			
		||||
        // TODO: Check if this is duplicate
 | 
			
		||||
 | 
			
		||||
        // 17. Jahrhundert
 | 
			
		||||
        if (\preg_match("/^[0-9]{2}(\.|)" . self::REGEX_CENTURIES ."$/", $datum)) {
 | 
			
		||||
            if ($centuryNo = \intval(\substr($datum, 0, 2))) {
 | 
			
		||||
                $centuryNo--;
 | 
			
		||||
                return [(string)$centuryNo . "01", \strval($centuryNo + 1) . "00", "00", "00", $bcBceIndicator, ""];
 | 
			
		||||
@@ -1000,20 +1023,22 @@ final class NodaTimeSplitter {
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $datum Input date.
 | 
			
		||||
     *
 | 
			
		||||
     * @return array<string>
 | 
			
		||||
     * @return array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{}
 | 
			
		||||
     */
 | 
			
		||||
    public static function is_decade(string $datum):array {
 | 
			
		||||
 | 
			
		||||
        $datum = self::clean_input($datum);
 | 
			
		||||
        $bcBceIndicator = '+';
 | 
			
		||||
 | 
			
		||||
        // 20er Jahre
 | 
			
		||||
        if (\preg_match("/^[0-9]0(er|er\ Jahre|\-es\ évek|\-as\ \évek)$/", $datum)) {
 | 
			
		||||
            $start = "19" . \substr($datum, 0, 2);
 | 
			
		||||
            $ende = (string)(\intval($start) + 9);
 | 
			
		||||
            return [$start, $ende, "00", "00", $bcBceIndicator, ""];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (\preg_match("/^[0-9]{3}0(s|er|er\ Jahre|\-es\ évek|\-as\ \évek)$/", $datum)) {
 | 
			
		||||
        // 1920er Jahre
 | 
			
		||||
        if (\preg_match("/^[0-9]{3}0(s|er|er\ Jahre|(\-|\ )es\ évek|(\-|\ )as\ \évek)$/", $datum)) {
 | 
			
		||||
            $start = \substr($datum, 0, 4);
 | 
			
		||||
            $ende = (string)(\intval($start) + 9);
 | 
			
		||||
            return [$start, $ende, "00", "00", $bcBceIndicator, ""];
 | 
			
		||||
@@ -1032,10 +1057,19 @@ final class NodaTimeSplitter {
 | 
			
		||||
     */
 | 
			
		||||
    public static function check_is_timespan_from_till(string $datum):array {
 | 
			
		||||
 | 
			
		||||
        if (substr_count($datum, '-') !== 1) return [];
 | 
			
		||||
        if (substr_count($datum, '-') !== 1) {
 | 
			
		||||
            return [];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        list($start_str, $end_str) = explode('-', $datum);
 | 
			
		||||
 | 
			
		||||
        if (strlen($end_str) < 4 && strlen($end_str) < strlen($start_str)) {
 | 
			
		||||
            return [];
 | 
			
		||||
        }
 | 
			
		||||
        if (strlen($start_str) < 4 && strlen($start_str) < strlen($end_str)) {
 | 
			
		||||
            return [];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (empty($start = self::attempt_splitting($start_str))) {
 | 
			
		||||
            return [];
 | 
			
		||||
        }
 | 
			
		||||
@@ -1048,6 +1082,107 @@ final class NodaTimeSplitter {
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Contains special rules for incorrectly or incompletely spelled out timespan names.
 | 
			
		||||
     * To be called by self::attempt_splitting_from_till().
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $datum Date.
 | 
			
		||||
     *
 | 
			
		||||
     * @return string
 | 
			
		||||
     */
 | 
			
		||||
    public static function _attempt_rewriting_special_cases_from_till(string $datum):string {
 | 
			
		||||
 | 
			
		||||
        if (empty($datum)) return '';
 | 
			
		||||
 | 
			
		||||
        $inputLength = strlen($datum);
 | 
			
		||||
 | 
			
		||||
        // Hungarian year and month until month
 | 
			
		||||
        // 2005.01.-02. => 2005.01.-2005.02.
 | 
			
		||||
        if ($inputLength === 12 && \preg_match("/^[0-9]{4}\.[0-1][0-9]\.\-[0-1][0-9]\.$/", $datum)) {
 | 
			
		||||
            $reconstituted = substr($datum, 0, 8) . '-';
 | 
			
		||||
            $reconstituted .= substr($datum, 0, 4) . '.' . substr($datum, -3);
 | 
			
		||||
            return $reconstituted;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Hungarian year and month until month without a dot after the first YYYY-MM
 | 
			
		||||
        // 2005.01-02. => 2005.01.-2005.02.
 | 
			
		||||
 | 
			
		||||
        if (in_array($inputLength, [10, 11], true) && \preg_match("/^[0-9]{4}\.[0-1][0-9]\-[0-1][0-9](\.|)$/", $datum)) {
 | 
			
		||||
            $reconstituted = substr($datum, 0, 7) . '.-';
 | 
			
		||||
            $reconstituted .= substr($datum, 0, 4) . '.' . substr(rtrim($datum, '.'), -2) . '.';
 | 
			
		||||
            return $reconstituted;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Hungarian year and month until month
 | 
			
		||||
        // 2005.01.01.-02.02. => 2005.01.01-2005.02.02.
 | 
			
		||||
        // 2005.01.01-02.02 => 2005.01.01-2005.02.02.
 | 
			
		||||
        if ($inputLength >= 16 && $inputLength <= 18 && \preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-1][0-9]\.[0-3][0-9](\.|)$/", $datum)) {
 | 
			
		||||
            $parts = explode('-', $datum);
 | 
			
		||||
            if (count($parts) !== 2) return '';
 | 
			
		||||
            $reconstituted = substr($datum, 0, 10) . '.-';
 | 
			
		||||
            $reconstituted .= substr($datum, 0, 4) . '.' . rtrim($parts[1], '.') . '.';
 | 
			
		||||
            return $reconstituted;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Hungarian; without trailing dots: YYYY.MM.DD-DD
 | 
			
		||||
        if ($inputLength >= 13 && $inputLength <= 15 && \preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-3][0-9](\.|)$/", $datum)) {
 | 
			
		||||
            $parts = explode('-', $datum);
 | 
			
		||||
            if (count($parts) !== 2) return '';
 | 
			
		||||
            $reconstituted = substr($datum, 0, 10) . '.-';
 | 
			
		||||
            $reconstituted .= substr($datum, 0, 7) . '.' . substr(rtrim($parts[1], '.'), -2);
 | 
			
		||||
            return $reconstituted;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // 17-19. Jahrhundert
 | 
			
		||||
        if (\preg_match("/^[0-9]{2}(\.|)\-[0-9]{2}(\.|)" . self::REGEX_CENTURIES . "$/", $datum)) {
 | 
			
		||||
            $parts = explode('-', $datum);
 | 
			
		||||
            $reconstituted  = ((int)substr($parts[0] ?? "", 0, 2) - 1) . '01-';
 | 
			
		||||
            $reconstituted .= substr($parts[1] ?? "", 0, 2) . '. Jahrhundert';
 | 
			
		||||
            return $reconstituted;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // 1950-60-as évek
 | 
			
		||||
        if (\preg_match("/^[0-9]{4}\-[0-9]{2} (a|e)s évek$/", $datum)) {
 | 
			
		||||
            $reconstituted  = substr($datum, 0, 4) . '-';
 | 
			
		||||
            $reconstituted .= substr($datum, 5, 2) . 'er Jahre';
 | 
			
		||||
            return $reconstituted;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // If es évek / as évek is contained in the string (e.g. 1880-1990-es évek), there
 | 
			
		||||
        // will be more than one hyphens
 | 
			
		||||
        if (MD_STD::stri_contains_any($datum, ['-as évek', '-es évek'])) {
 | 
			
		||||
            return strtr($datum, ['-as évek' => ' as évek', '-es évek' => ' es évek']);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // 1981. július-augusztus > 1981.07-08
 | 
			
		||||
        if (is_numeric(substr($datum, 0, 4)) && substr($datum, 4, 2) === '. ') {
 | 
			
		||||
 | 
			
		||||
            $monthNames = [];
 | 
			
		||||
            foreach (self::MONTH_NAMES_ENGLISH as $month => $names) {
 | 
			
		||||
                foreach ($names as $name) $monthNames[$name] = $month;
 | 
			
		||||
            }
 | 
			
		||||
            foreach (self::MONTH_NAMES_GERMAN as $month => $names) {
 | 
			
		||||
                foreach ($names as $name) $monthNames[$name] = $month;
 | 
			
		||||
            }
 | 
			
		||||
            foreach (self::MONTH_NAMES_HUNGARIAN as $month => $names) {
 | 
			
		||||
                foreach ($names as $name) $monthNames[$name] = $month;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            $rewrite = strtr($datum, $monthNames);
 | 
			
		||||
            if ($rewrite !== $datum) {
 | 
			
		||||
                return str_replace('..', '.', str_replace(" ", ".", $rewrite));
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (str_contains($datum, ',')) {
 | 
			
		||||
            return str_replace(',', '-', $datum);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return '';
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Checks if the string is a time span with given start and end dates.
 | 
			
		||||
     *
 | 
			
		||||
@@ -1062,6 +1197,9 @@ final class NodaTimeSplitter {
 | 
			
		||||
        if (strlen($datum) === 9 and substr($datum, 4, 1) !== '-') return [];
 | 
			
		||||
 | 
			
		||||
        if (empty($startEnd = self::check_is_timespan_from_till($datum))) {
 | 
			
		||||
            if ($rewritten = self::_attempt_rewriting_special_cases_from_till($datum)) {
 | 
			
		||||
                return self::attempt_splitting_from_till($rewritten);
 | 
			
		||||
            }
 | 
			
		||||
            return [];
 | 
			
		||||
        }
 | 
			
		||||
        list($start, $end) = $startEnd;
 | 
			
		||||
@@ -1105,38 +1243,76 @@ final class NodaTimeSplitter {
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Wrapper to check if any splitting command works.
 | 
			
		||||
     * Cleans invalid outputs from splitting.
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $datum Input date.
 | 
			
		||||
     * @param array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{} $moda Split time to check.
 | 
			
		||||
     *
 | 
			
		||||
     * @return array<string>
 | 
			
		||||
     * @return array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{}
 | 
			
		||||
     */
 | 
			
		||||
    public static function attempt_splitting(string $datum):array {
 | 
			
		||||
    private static function validate_split_time(array $moda):array {
 | 
			
		||||
 | 
			
		||||
        $moda = NodaTimeSplitter::is_timespan($datum);
 | 
			
		||||
        if (!$moda) {
 | 
			
		||||
            $moda = NodaTimeSplitter::is_incomplete_date($datum);
 | 
			
		||||
        }
 | 
			
		||||
        if (!$moda) {
 | 
			
		||||
            $moda = NodaTimeSplitter::is_valid_date($datum);
 | 
			
		||||
        }
 | 
			
		||||
        if (!$moda) {
 | 
			
		||||
            $moda = NodaTimeSplitter::is_valid_date_hungarian($datum);
 | 
			
		||||
        }
 | 
			
		||||
        if (!$moda) {
 | 
			
		||||
            $moda = NodaTimeSplitter::is_century($datum);
 | 
			
		||||
        }
 | 
			
		||||
        if (!$moda) {
 | 
			
		||||
            $moda = NodaTimeSplitter::is_decade($datum);
 | 
			
		||||
        if (empty($moda)) return [];
 | 
			
		||||
 | 
			
		||||
        if ((int)$moda[2] > 12 || (int)$moda[3] > 31) {
 | 
			
		||||
            return [];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (!empty($moda)) {
 | 
			
		||||
            if ((int)$moda[2] > 12 || (int)$moda[3] > 31) {
 | 
			
		||||
                return [];
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        $month_no_zero = strtr($moda[2], ["0" => "", "1" => "", "2" => "", "3" => "", "4" => "", "5" => "", "6" => "", "7" => "", "8" => "", "9" => ""]);
 | 
			
		||||
        $day_no_zero = strtr($moda[3], ["0" => "", "1" => "", "2" => "", "3" => "", "4" => "", "5" => "", "6" => "", "7" => "", "8" => "", "9" => ""]);
 | 
			
		||||
        if (!empty($month_no_zero)) {
 | 
			
		||||
            throw new Exception("Invalid split month: " . var_export($moda, true));
 | 
			
		||||
        }
 | 
			
		||||
        if (!empty($day_no_zero)) {
 | 
			
		||||
            throw new Exception("Invalid split day: " . var_export($moda, true));
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return $moda;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Wrapper to check if any splitting command works.
 | 
			
		||||
     *
 | 
			
		||||
     * @param string $datum Input date.
 | 
			
		||||
     *
 | 
			
		||||
     * @return array{0: string, 1: string, 2: string, 3: string, 4: '+'|'-'|'', 5: string}|array{}
 | 
			
		||||
     */
 | 
			
		||||
    public static function attempt_splitting(string $datum):array {
 | 
			
		||||
 | 
			
		||||
        if (!empty($moda = self::is_timespan($datum))) {
 | 
			
		||||
            return self::validate_split_time($moda);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (!empty($moda = self::is_incomplete_date($datum))) {
 | 
			
		||||
            return self::validate_split_time($moda);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (!empty($moda = self::is_valid_date($datum))) {
 | 
			
		||||
            return self::validate_split_time($moda);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (!empty($moda = self::is_valid_date_hungarian($datum))) {
 | 
			
		||||
            return self::validate_split_time($moda);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (!empty($moda = self::is_century($datum))) {
 | 
			
		||||
            return self::validate_split_time($moda);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (!empty($moda = self::is_decade($datum))) {
 | 
			
		||||
            return self::validate_split_time($moda);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // 2015. 05.
 | 
			
		||||
        if (str_contains($datum, ' ')) {
 | 
			
		||||
            $rewrite = str_replace(' ', '', $datum);
 | 
			
		||||
            if (is_numeric(str_replace('.', '', $datum))) {
 | 
			
		||||
                return self::attempt_splitting($rewrite);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return [];
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -132,6 +132,18 @@ final class NodaTimeSplitterTest extends TestCase {
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "1920-1929");
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1925);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("1920-1929");
 | 
			
		||||
        self::assertEquals($output, [
 | 
			
		||||
            0 => "1920",
 | 
			
		||||
            1 => "1929",
 | 
			
		||||
            2 => "00",
 | 
			
		||||
            3 => "00",
 | 
			
		||||
            4 => "+",
 | 
			
		||||
            5 => "",
 | 
			
		||||
        ]);
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "1920-1929");
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1925);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("1920er Jahre");
 | 
			
		||||
        self::assertEquals($output, [
 | 
			
		||||
            0 => "1920",
 | 
			
		||||
@@ -488,6 +500,30 @@ final class NodaTimeSplitterTest extends TestCase {
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "02.01.2020");
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("2020.01.2.");
 | 
			
		||||
        self::assertEquals($output, [
 | 
			
		||||
            0 => "2020",
 | 
			
		||||
            1 => "2020",
 | 
			
		||||
            2 => "01",
 | 
			
		||||
            3 => "02",
 | 
			
		||||
            4 => "+",
 | 
			
		||||
            5 => "",
 | 
			
		||||
        ]);
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "02.01.2020");
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("2020.1.2.");
 | 
			
		||||
        self::assertEquals($output, [
 | 
			
		||||
            0 => "2020",
 | 
			
		||||
            1 => "2020",
 | 
			
		||||
            2 => "01",
 | 
			
		||||
            3 => "02",
 | 
			
		||||
            4 => "+",
 | 
			
		||||
            5 => "",
 | 
			
		||||
        ]);
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "02.01.2020");
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("2020. Januar 2.");
 | 
			
		||||
        self::assertEquals($output, [
 | 
			
		||||
            0 => "2020",
 | 
			
		||||
@@ -548,6 +584,19 @@ final class NodaTimeSplitterTest extends TestCase {
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "1920-1929");
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1925);
 | 
			
		||||
 | 
			
		||||
        // From 1920 onwards
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("1920 utántól");
 | 
			
		||||
        self::assertEquals($output, [
 | 
			
		||||
            0 => "1920",
 | 
			
		||||
            1 => "?",
 | 
			
		||||
            2 => "00",
 | 
			
		||||
            3 => "00",
 | 
			
		||||
            4 => "+",
 | 
			
		||||
            5 => "",
 | 
			
		||||
        ]);
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Seit 1920");
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1920);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("1920-es évek");
 | 
			
		||||
        self::assertEquals($output, [
 | 
			
		||||
            0 => "1920",
 | 
			
		||||
@@ -608,6 +657,18 @@ final class NodaTimeSplitterTest extends TestCase {
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "201-300 n. Chr.");
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 251);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("20th century");
 | 
			
		||||
        self::assertEquals($output, [
 | 
			
		||||
            0 => "1901",
 | 
			
		||||
            1 => "2000",
 | 
			
		||||
            2 => "00",
 | 
			
		||||
            3 => "00",
 | 
			
		||||
            4 => "+",
 | 
			
		||||
            5 => "",
 | 
			
		||||
        ]);
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "1901-2000");
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1951);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("20. század");
 | 
			
		||||
        self::assertEquals($output, [
 | 
			
		||||
            0 => "1901",
 | 
			
		||||
@@ -620,6 +681,18 @@ final class NodaTimeSplitterTest extends TestCase {
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "1901-2000");
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1951);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("20.század");
 | 
			
		||||
        self::assertEquals($output, [
 | 
			
		||||
            0 => "1901",
 | 
			
		||||
            1 => "2000",
 | 
			
		||||
            2 => "00",
 | 
			
		||||
            3 => "00",
 | 
			
		||||
            4 => "+",
 | 
			
		||||
            5 => "",
 | 
			
		||||
        ]);
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "1901-2000");
 | 
			
		||||
        self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1951);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("Kr. e. 20. század");
 | 
			
		||||
        self::assertEquals($output, [
 | 
			
		||||
            0 => "-2000",
 | 
			
		||||
@@ -658,6 +731,109 @@ final class NodaTimeSplitterTest extends TestCase {
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Test to check special cases can be parsed.
 | 
			
		||||
     *
 | 
			
		||||
     * @author Joshua Ramon Enslin <joshua@museum-digital.de>
 | 
			
		||||
     * @group  ValidOutput
 | 
			
		||||
     * @small
 | 
			
		||||
     *
 | 
			
		||||
     * @return void
 | 
			
		||||
     */
 | 
			
		||||
    public function testSplittingFromTill():void {
 | 
			
		||||
 | 
			
		||||
        // Regular
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting_from_till("2004.01.-2004.02.");
 | 
			
		||||
        self::assertNotEmpty($output);
 | 
			
		||||
        self::assertEquals($output, [
 | 
			
		||||
            'start_name' => "Januar 2004",
 | 
			
		||||
            'end_name' => "Februar 2004",
 | 
			
		||||
            "start_year" => '2004',
 | 
			
		||||
            "end_year" => '2004',
 | 
			
		||||
            "counting_time_year" => "2004",
 | 
			
		||||
            "counting_time_month" => "01",
 | 
			
		||||
            "counting_time_day" => "16",
 | 
			
		||||
            "counting_time_bcce" => "+",
 | 
			
		||||
        ]);
 | 
			
		||||
 | 
			
		||||
        // Rewritten / Hungarian YYYY.MM.-MM.
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting_from_till("2004.01.-02.");
 | 
			
		||||
        self::assertNotEmpty($output);
 | 
			
		||||
        self::assertEquals($output, [
 | 
			
		||||
            'start_name' => "Januar 2004",
 | 
			
		||||
            'end_name' => "Februar 2004",
 | 
			
		||||
            "start_year" => '2004',
 | 
			
		||||
            "end_year" => '2004',
 | 
			
		||||
            "counting_time_year" => "2004",
 | 
			
		||||
            "counting_time_month" => "01",
 | 
			
		||||
            "counting_time_day" => "16",
 | 
			
		||||
            "counting_time_bcce" => "+",
 | 
			
		||||
        ]);
 | 
			
		||||
 | 
			
		||||
        // Rewritten / Hungarian YYYY.MM.-MM.
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting_from_till("2003.04-05.");
 | 
			
		||||
        self::assertNotEmpty($output);
 | 
			
		||||
 | 
			
		||||
        // Rewritten / Hungarian YYYY.MM.-MM.
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting_from_till("1981. július-augusztus");
 | 
			
		||||
        self::assertNotEmpty($output);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting_from_till("2019.03.14.,04.15.");
 | 
			
		||||
        self::assertNotEmpty($output);
 | 
			
		||||
        self::assertEquals($output, [
 | 
			
		||||
            'start_name' => "14.03.2019",
 | 
			
		||||
            'end_name' => "15.04.2019",
 | 
			
		||||
            "start_year" => '2019',
 | 
			
		||||
            "end_year" => '2019',
 | 
			
		||||
            "counting_time_year" => "2019",
 | 
			
		||||
            "counting_time_month" => "03",
 | 
			
		||||
            "counting_time_day" => "30",
 | 
			
		||||
            "counting_time_bcce" => "+",
 | 
			
		||||
        ]);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting_from_till("2019.03.14-15");
 | 
			
		||||
        self::assertNotEmpty($output);
 | 
			
		||||
        self::assertEquals($output, [
 | 
			
		||||
            'start_name' => "14.03.2019",
 | 
			
		||||
            'end_name' => "15.03.2019",
 | 
			
		||||
            "start_year" => '2019',
 | 
			
		||||
            "end_year" => '2019',
 | 
			
		||||
            "counting_time_year" => "2019",
 | 
			
		||||
            "counting_time_month" => "03",
 | 
			
		||||
            "counting_time_day" => "15",
 | 
			
		||||
            "counting_time_bcce" => "+",
 | 
			
		||||
        ]);
 | 
			
		||||
 | 
			
		||||
        // Rewritten / Hungarian YYYY.MM.-MM.
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting_from_till("17-19.század");
 | 
			
		||||
        self::assertNotEmpty($output);
 | 
			
		||||
        self::assertEquals($output, [
 | 
			
		||||
            'start_name' => "1601",
 | 
			
		||||
            'end_name' => "1900",
 | 
			
		||||
            "start_year" => '1601',
 | 
			
		||||
            "end_year" => '1900',
 | 
			
		||||
            "counting_time_year" => "1750",
 | 
			
		||||
            "counting_time_month" => "06",
 | 
			
		||||
            "counting_time_day" => "01",
 | 
			
		||||
            "counting_time_bcce" => "+",
 | 
			
		||||
        ]);
 | 
			
		||||
 | 
			
		||||
        // Rewritten / 1950-60-as évek
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting_from_till("1950-60-as évek");
 | 
			
		||||
        self::assertNotEmpty($output);
 | 
			
		||||
        self::assertEquals($output, [
 | 
			
		||||
            'start_name' => "1950",
 | 
			
		||||
            'end_name' => "1969",
 | 
			
		||||
            "start_year" => '1950',
 | 
			
		||||
            "end_year" => '1969',
 | 
			
		||||
            "counting_time_year" => "1959",
 | 
			
		||||
            "counting_time_month" => "06",
 | 
			
		||||
            "counting_time_day" => "01",
 | 
			
		||||
            "counting_time_bcce" => "+",
 | 
			
		||||
        ]);
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Test to check whether the HTML page is correctly generated.
 | 
			
		||||
     *
 | 
			
		||||
@@ -670,40 +846,43 @@ final class NodaTimeSplitterTest extends TestCase {
 | 
			
		||||
    public function testSplitDoesNotWorkWhenItShouldNot():void {
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("");
 | 
			
		||||
        self::assertEquals($output, []);
 | 
			
		||||
        self::assertEmpty($output);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("1.2.2020-2.2.2020");
 | 
			
		||||
        self::assertEquals($output, []);
 | 
			
		||||
        self::assertEmpty($output);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("2020 Januar 2-2020 Februar 2");
 | 
			
		||||
        self::assertEquals($output, []);
 | 
			
		||||
        self::assertEmpty($output);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("2020 Januar-2020 Februar");
 | 
			
		||||
        self::assertEquals($output, []);
 | 
			
		||||
        self::assertEmpty($output);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("Januar-Februar");
 | 
			
		||||
        self::assertEquals($output, []);
 | 
			
		||||
        self::assertEmpty($output);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("13.13.2022");
 | 
			
		||||
        self::assertEquals($output, []);
 | 
			
		||||
        self::assertEmpty($output);
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("2022-13-13");
 | 
			
		||||
        self::assertEquals($output, []);
 | 
			
		||||
        self::assertEmpty($output);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("40.10.2022");
 | 
			
		||||
        self::assertEquals($output, []);
 | 
			
		||||
        self::assertEmpty($output);
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("2022-10-40");
 | 
			
		||||
        self::assertEquals($output, []);
 | 
			
		||||
        self::assertEmpty($output);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("6;November 1978");
 | 
			
		||||
        self::assertEquals($output, []);
 | 
			
		||||
        self::assertEmpty($output);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("65497028c51eb");
 | 
			
		||||
        self::assertEquals($output, []);
 | 
			
		||||
        self::assertEmpty($output);
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("6552cf08b0196 test tag");
 | 
			
		||||
        self::assertEquals($output, []);
 | 
			
		||||
        self::assertEmpty($output);
 | 
			
		||||
 | 
			
		||||
        $output = NodaTimeSplitter::attempt_splitting("Anfang September 1903");
 | 
			
		||||
        self::assertEmpty($output);
 | 
			
		||||
 | 
			
		||||
        # $output = NodaTimeSplitter::attempt_splitting("Nach 1944-1964");
 | 
			
		||||
        # self::assertEquals($output, []);
 | 
			
		||||
        # self::assertEmpty($output);
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user