Fix erroneous splitting of Hungarian time names with additions
This commit is contained in:
		| @@ -42,7 +42,7 @@ final class NodaTimeSplitter { | ||||
|     ]; | ||||
|  | ||||
|     const MONTH_NAMES_HUNGARIAN = [ | ||||
|         "01" => ['január', 'jan'], | ||||
|         "01" => ['január', 'januar', 'jan'], | ||||
|         "02" => ['február', 'feb'], | ||||
|         "03" => ['március', 'mar.', 'már.'], | ||||
|         "04" => ['április', 'apr.', 'ápr.'], | ||||
| @@ -521,20 +521,25 @@ final class NodaTimeSplitter { | ||||
|         // Skip, if dates are too long and do not contain spaces (= no translatable names) | ||||
|         if (str_contains($datum, " ") === false && strlen($datum) > 12) return []; | ||||
|  | ||||
|         $unparsed = trim(strtolower(str_replace($year, '', $datum)), ' ,.'); | ||||
|         foreach (self::MONTH_NAMES_HUNGARIAN as $monthVal => $monthValidNames) { | ||||
|             if (self::stri_occurs($datum, $monthValidNames)) { | ||||
|                 $monat = (string)$monthVal; | ||||
|                 foreach ($monthValidNames as $name) { | ||||
|                     $unparsed = str_replace($name, '', $unparsed); | ||||
|                 } | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if (strlen($unparsed) > 5) { | ||||
|             return []; | ||||
|         } | ||||
|  | ||||
|         if (empty($monat) and self::is_numeric((string)\substr($datum, 5, 2))) $monat = \substr($datum, 5, 2); | ||||
|         else if (empty($monat) and self::is_numeric((string)\substr($datum, 6, 2))) $monat = \substr($datum, 6, 2); | ||||
|  | ||||
|         // Last four characters must contain at least one space or one dot | ||||
|         $lastChars = substr($datum, -4); | ||||
|         if (str_contains($lastChars, '.') === false && str_contains($lastChars, ' ') === false)  return []; | ||||
|  | ||||
|         $day = self::validateDateSubstr($datum, -2); | ||||
|         if (empty($day)) $day = self::validateDateSubstr($datum, -3, 2); | ||||
|         if (empty($day)) $day = self::validateDateSubstr($datum, -4, 2); | ||||
| @@ -550,6 +555,14 @@ final class NodaTimeSplitter { | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if ($datum === '1978. július 7 elött') { | ||||
|             throw new Exception(var_export($monat, true)); | ||||
|         } | ||||
|  | ||||
|         if (!empty($monat) && empty($day) && preg_match('~[0-9]+~', substr($datum, -3))) { | ||||
|             return []; | ||||
|         } | ||||
|  | ||||
|         if (!empty($monat) and !empty($day)) { | ||||
|             return [$year, $year, $monat, $day, '+', ""]; | ||||
|         } | ||||
| @@ -932,6 +945,12 @@ final class NodaTimeSplitter { | ||||
|                 return $output; | ||||
|             } | ||||
|         } | ||||
|         if (!empty(\preg_match("/^[0-9]{4}ig$/", $datum))) { | ||||
|             if ($output = self::attempt_splitting(\substr($datum, 0, 4))) { | ||||
|                 $output[0] = "?"; | ||||
|                 return $output; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         return []; | ||||
|  | ||||
| @@ -1102,7 +1121,7 @@ final class NodaTimeSplitter { | ||||
|  | ||||
|         // Hungarian year and month until month | ||||
|         // 2005.01.-02. => 2005.01.-2005.02. | ||||
|         if ($inputLength === 12 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.\-[0-1][0-9]\.$/", $datum)) !== false) { | ||||
|         if ($inputLength === 12 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.\-[0-1][0-9]\.$/", $datum))) { | ||||
|             $reconstituted = substr($datum, 0, 8) . '-'; | ||||
|             $reconstituted .= substr($datum, 0, 4) . '.' . substr($datum, -3); | ||||
|             return $reconstituted; | ||||
| @@ -1111,7 +1130,7 @@ final class NodaTimeSplitter { | ||||
|         // Hungarian year and month until month without a dot after the first YYYY-MM | ||||
|         // 2005.01-02. => 2005.01.-2005.02. | ||||
|  | ||||
|         if (in_array($inputLength, [10, 11], true) && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\-[0-1][0-9](\.|)$/", $datum)) !== false) { | ||||
|         if (in_array($inputLength, [10, 11], true) && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\-[0-1][0-9](\.|)$/", $datum))) { | ||||
|             $reconstituted = substr($datum, 0, 7) . '.-'; | ||||
|             $reconstituted .= substr($datum, 0, 4) . '.' . substr(rtrim($datum, '.'), -2) . '.'; | ||||
|             return $reconstituted; | ||||
| @@ -1120,7 +1139,7 @@ final class NodaTimeSplitter { | ||||
|         // Hungarian year and month until month | ||||
|         // 2005.01.01.-02.02. => 2005.01.01-2005.02.02. | ||||
|         // 2005.01.01-02.02 => 2005.01.01-2005.02.02. | ||||
|         if ($inputLength >= 16 && $inputLength <= 18 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-1][0-9]\.[0-3][0-9](\.|)$/", $datum)) !== false) { | ||||
|         if ($inputLength >= 16 && $inputLength <= 18 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-1][0-9]\.[0-3][0-9](\.|)$/", $datum))) { | ||||
|             $parts = explode('-', $datum); | ||||
|             if (count($parts) !== 2) return ''; | ||||
|             $reconstituted = substr($datum, 0, 10) . '.-'; | ||||
| @@ -1129,7 +1148,7 @@ final class NodaTimeSplitter { | ||||
|         } | ||||
|  | ||||
|         // Hungarian; without trailing dots: YYYY.MM.DD-DD | ||||
|         if ($inputLength >= 13 && $inputLength <= 15 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-3][0-9](\.|)$/", $datum)) !== false) { | ||||
|         if ($inputLength >= 13 && $inputLength <= 15 && \preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-3][0-9](\.|)$/", $datum)) { | ||||
|             $parts = explode('-', $datum); | ||||
|             if (count($parts) !== 2) return ''; | ||||
|             $reconstituted = substr($datum, 0, 10) . '.-'; | ||||
| @@ -1154,8 +1173,8 @@ final class NodaTimeSplitter { | ||||
|  | ||||
|         // If es évek / as évek is contained in the string (e.g. 1880-1990-es évek), there | ||||
|         // will be more than one hyphens | ||||
|         if (MD_STD::stri_contains_any($datum, ['-as évek', '-es évek'])) { | ||||
|             return strtr($datum, ['-as évek' => ' as évek', '-es évek' => ' es évek']); | ||||
|         if (MD_STD::stri_contains_any($datum, ['-as évek', '-es-évek', '-es évek'])) { | ||||
|             return strtr($datum, ['-as évek' => ' as évek', '-es-évek' => ' es évek', '-es évek' => ' es évek']); | ||||
|         } | ||||
|  | ||||
|         // 1981. július-augusztus > 1981.07-08 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user