Compare commits
No commits in common. "f3831965a3c4f3a8478fd037b3b6700227b647e1" and "3e9f675fdcb937053af7ac538419f9d2683f5966" have entirely different histories.
f3831965a3
...
3e9f675fdc
|
@ -42,7 +42,7 @@ final class NodaTimeSplitter {
|
||||||
];
|
];
|
||||||
|
|
||||||
const MONTH_NAMES_HUNGARIAN = [
|
const MONTH_NAMES_HUNGARIAN = [
|
||||||
"01" => ['január', 'januar', 'jan'],
|
"01" => ['január', 'jan'],
|
||||||
"02" => ['február', 'feb'],
|
"02" => ['február', 'feb'],
|
||||||
"03" => ['március', 'mar.', 'már.'],
|
"03" => ['március', 'mar.', 'már.'],
|
||||||
"04" => ['április', 'apr.', 'ápr.'],
|
"04" => ['április', 'apr.', 'ápr.'],
|
||||||
|
@ -58,8 +58,6 @@ final class NodaTimeSplitter {
|
||||||
|
|
||||||
const STRINGS_TO_CLEAN = [
|
const STRINGS_TO_CLEAN = [
|
||||||
"között" => "",
|
"között" => "",
|
||||||
" рр." => "",
|
|
||||||
" рр" => "",
|
|
||||||
"nach Christus" => "",
|
"nach Christus" => "",
|
||||||
"n. Christus" => "",
|
"n. Christus" => "",
|
||||||
"nach Chr." => "",
|
"nach Chr." => "",
|
||||||
|
@ -71,7 +69,6 @@ final class NodaTimeSplitter {
|
||||||
"BCE" => "v. Chr.",
|
"BCE" => "v. Chr.",
|
||||||
"CE" => "",
|
"CE" => "",
|
||||||
"vor Christus" => "v. Chr.",
|
"vor Christus" => "v. Chr.",
|
||||||
" до н. е." => "v. Chr.",
|
|
||||||
];
|
];
|
||||||
|
|
||||||
const STRINGS_TO_CLEAN_START = [
|
const STRINGS_TO_CLEAN_START = [
|
||||||
|
@ -136,7 +133,6 @@ final class NodaTimeSplitter {
|
||||||
];
|
];
|
||||||
|
|
||||||
private const REGEX_CENTURIES = '(\ |)(Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)';
|
private const REGEX_CENTURIES = '(\ |)(Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)';
|
||||||
private const REGEX_DECADES = '(s|er|er\ Jahre|(\-|\ )es\ évek|(\-|\ )as\ \évek|\ évek|\-es\ években|\-ті)';
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cleans input strings by trimming obsolete stuff.
|
* Cleans input strings by trimming obsolete stuff.
|
||||||
|
@ -525,25 +521,20 @@ final class NodaTimeSplitter {
|
||||||
// Skip, if dates are too long and do not contain spaces (= no translatable names)
|
// Skip, if dates are too long and do not contain spaces (= no translatable names)
|
||||||
if (str_contains($datum, " ") === false && strlen($datum) > 12) return [];
|
if (str_contains($datum, " ") === false && strlen($datum) > 12) return [];
|
||||||
|
|
||||||
$unparsed = trim(strtolower(str_replace($year, '', $datum)), ' ,.');
|
|
||||||
foreach (self::MONTH_NAMES_HUNGARIAN as $monthVal => $monthValidNames) {
|
foreach (self::MONTH_NAMES_HUNGARIAN as $monthVal => $monthValidNames) {
|
||||||
if (self::stri_occurs($datum, $monthValidNames)) {
|
if (self::stri_occurs($datum, $monthValidNames)) {
|
||||||
$monat = (string)$monthVal;
|
$monat = (string)$monthVal;
|
||||||
foreach ($monthValidNames as $name) {
|
|
||||||
$unparsed = str_replace($name, '', $unparsed);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strlen($unparsed) > 5) {
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (empty($monat) and self::is_numeric((string)\substr($datum, 5, 2))) $monat = \substr($datum, 5, 2);
|
if (empty($monat) and self::is_numeric((string)\substr($datum, 5, 2))) $monat = \substr($datum, 5, 2);
|
||||||
else if (empty($monat) and self::is_numeric((string)\substr($datum, 6, 2))) $monat = \substr($datum, 6, 2);
|
else if (empty($monat) and self::is_numeric((string)\substr($datum, 6, 2))) $monat = \substr($datum, 6, 2);
|
||||||
|
|
||||||
// Last four characters must contain at least one space or one dot
|
// Last four characters must contain at least one space or one dot
|
||||||
|
$lastChars = substr($datum, -4);
|
||||||
|
if (str_contains($lastChars, '.') === false && str_contains($lastChars, ' ') === false) return [];
|
||||||
|
|
||||||
$day = self::validateDateSubstr($datum, -2);
|
$day = self::validateDateSubstr($datum, -2);
|
||||||
if (empty($day)) $day = self::validateDateSubstr($datum, -3, 2);
|
if (empty($day)) $day = self::validateDateSubstr($datum, -3, 2);
|
||||||
if (empty($day)) $day = self::validateDateSubstr($datum, -4, 2);
|
if (empty($day)) $day = self::validateDateSubstr($datum, -4, 2);
|
||||||
|
@ -559,10 +550,6 @@ final class NodaTimeSplitter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!empty($monat) && empty($day) && preg_match('~[0-9]+~', substr($datum, -3))) {
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!empty($monat) and !empty($day)) {
|
if (!empty($monat) and !empty($day)) {
|
||||||
return [$year, $year, $monat, $day, '+', ""];
|
return [$year, $year, $monat, $day, '+', ""];
|
||||||
}
|
}
|
||||||
|
@ -864,17 +851,14 @@ final class NodaTimeSplitter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (\preg_match("/\ (\(vor|\(Vor|vor|előtt)$/", $datum)) {
|
if (\preg_match("/\ (\(vor|\(Vor|vor)$/", $datum)) {
|
||||||
if (($spacePos = \strrpos($datum, " ")) === false) {
|
if (($spacePos = \strpos($datum, " ")) === false) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) {
|
if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) {
|
||||||
|
|
||||||
$output[0] = "?";
|
$output[0] = "?";
|
||||||
// If month and day are unknown, lower end year by one
|
if (empty(trim($output[2], "0 .,"))) $output[1] = \strval(\intval($output[1]) - 1);
|
||||||
if (empty(trim($output[2], "0 .,"))) {
|
|
||||||
$output[1] = \strval(\intval($output[1]) - 1);
|
|
||||||
}
|
|
||||||
$output[5] = "Vor";
|
$output[5] = "Vor";
|
||||||
return $output;
|
return $output;
|
||||||
}
|
}
|
||||||
|
@ -948,18 +932,6 @@ final class NodaTimeSplitter {
|
||||||
return $output;
|
return $output;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!empty(\preg_match("/^[0-9]{4}ig$/", $datum))) {
|
|
||||||
if ($output = self::attempt_splitting(\substr($datum, 0, 4))) {
|
|
||||||
$output[0] = "?";
|
|
||||||
return $output;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (str_ends_with($datum, '-as évekig') || str_ends_with($datum, '-es évekig')) {
|
|
||||||
if ($output = self::attempt_splitting(\substr($datum, 0, -2))) {
|
|
||||||
$output[0] = "?";
|
|
||||||
return $output;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return [];
|
return [];
|
||||||
|
|
||||||
|
@ -1063,14 +1035,14 @@ final class NodaTimeSplitter {
|
||||||
$bcBceIndicator = '+';
|
$bcBceIndicator = '+';
|
||||||
|
|
||||||
// 20er Jahre
|
// 20er Jahre
|
||||||
if (\preg_match("/^[0-9]0" . self::REGEX_DECADES . "$/", $datum)) {
|
if (\preg_match("/^[0-9]0(er|er\ Jahre|\-es\ évek|\-as\ \évek)$/", $datum)) {
|
||||||
$start = "19" . \substr($datum, 0, 2);
|
$start = "19" . \substr($datum, 0, 2);
|
||||||
$ende = (string)(\intval($start) + 9);
|
$ende = (string)(\intval($start) + 9);
|
||||||
return [$start, $ende, "00", "00", $bcBceIndicator, ""];
|
return [$start, $ende, "00", "00", $bcBceIndicator, ""];
|
||||||
}
|
}
|
||||||
|
|
||||||
// 1920er Jahre
|
// 1920er Jahre
|
||||||
if (\preg_match("/^[0-9]{3}0" . self::REGEX_DECADES . "$/", $datum)) {
|
if (\preg_match("/^[0-9]{3}0(s|er|er\ Jahre|(\-|\ )es\ évek|(\-|\ )as\ \évek)$/", $datum)) {
|
||||||
$start = \substr($datum, 0, 4);
|
$start = \substr($datum, 0, 4);
|
||||||
$ende = (string)(\intval($start) + 9);
|
$ende = (string)(\intval($start) + 9);
|
||||||
return [$start, $ende, "00", "00", $bcBceIndicator, ""];
|
return [$start, $ende, "00", "00", $bcBceIndicator, ""];
|
||||||
|
@ -1130,7 +1102,7 @@ final class NodaTimeSplitter {
|
||||||
|
|
||||||
// Hungarian year and month until month
|
// Hungarian year and month until month
|
||||||
// 2005.01.-02. => 2005.01.-2005.02.
|
// 2005.01.-02. => 2005.01.-2005.02.
|
||||||
if ($inputLength === 12 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.\-[0-1][0-9]\.$/", $datum))) {
|
if ($inputLength === 12 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.\-[0-1][0-9]\.$/", $datum)) !== false) {
|
||||||
$reconstituted = substr($datum, 0, 8) . '-';
|
$reconstituted = substr($datum, 0, 8) . '-';
|
||||||
$reconstituted .= substr($datum, 0, 4) . '.' . substr($datum, -3);
|
$reconstituted .= substr($datum, 0, 4) . '.' . substr($datum, -3);
|
||||||
return $reconstituted;
|
return $reconstituted;
|
||||||
|
@ -1139,7 +1111,7 @@ final class NodaTimeSplitter {
|
||||||
// Hungarian year and month until month without a dot after the first YYYY-MM
|
// Hungarian year and month until month without a dot after the first YYYY-MM
|
||||||
// 2005.01-02. => 2005.01.-2005.02.
|
// 2005.01-02. => 2005.01.-2005.02.
|
||||||
|
|
||||||
if (in_array($inputLength, [10, 11], true) && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\-[0-1][0-9](\.|)$/", $datum))) {
|
if (in_array($inputLength, [10, 11], true) && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\-[0-1][0-9](\.|)$/", $datum)) !== false) {
|
||||||
$reconstituted = substr($datum, 0, 7) . '.-';
|
$reconstituted = substr($datum, 0, 7) . '.-';
|
||||||
$reconstituted .= substr($datum, 0, 4) . '.' . substr(rtrim($datum, '.'), -2) . '.';
|
$reconstituted .= substr($datum, 0, 4) . '.' . substr(rtrim($datum, '.'), -2) . '.';
|
||||||
return $reconstituted;
|
return $reconstituted;
|
||||||
|
@ -1148,7 +1120,7 @@ final class NodaTimeSplitter {
|
||||||
// Hungarian year and month until month
|
// Hungarian year and month until month
|
||||||
// 2005.01.01.-02.02. => 2005.01.01-2005.02.02.
|
// 2005.01.01.-02.02. => 2005.01.01-2005.02.02.
|
||||||
// 2005.01.01-02.02 => 2005.01.01-2005.02.02.
|
// 2005.01.01-02.02 => 2005.01.01-2005.02.02.
|
||||||
if ($inputLength >= 16 && $inputLength <= 18 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-1][0-9]\.[0-3][0-9](\.|)$/", $datum))) {
|
if ($inputLength >= 16 && $inputLength <= 18 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-1][0-9]\.[0-3][0-9](\.|)$/", $datum)) !== false) {
|
||||||
$parts = explode('-', $datum);
|
$parts = explode('-', $datum);
|
||||||
if (count($parts) !== 2) return '';
|
if (count($parts) !== 2) return '';
|
||||||
$reconstituted = substr($datum, 0, 10) . '.-';
|
$reconstituted = substr($datum, 0, 10) . '.-';
|
||||||
|
@ -1157,7 +1129,7 @@ final class NodaTimeSplitter {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hungarian; without trailing dots: YYYY.MM.DD-DD
|
// Hungarian; without trailing dots: YYYY.MM.DD-DD
|
||||||
if ($inputLength >= 13 && $inputLength <= 15 && \preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-3][0-9](\.|)$/", $datum)) {
|
if ($inputLength >= 13 && $inputLength <= 15 && (\preg_match("/^[0-9]{4}\.[0-1][0-9]\.[0-3][0-9](\.|)\-[0-3][0-9](\.|)$/", $datum)) !== false) {
|
||||||
$parts = explode('-', $datum);
|
$parts = explode('-', $datum);
|
||||||
if (count($parts) !== 2) return '';
|
if (count($parts) !== 2) return '';
|
||||||
$reconstituted = substr($datum, 0, 10) . '.-';
|
$reconstituted = substr($datum, 0, 10) . '.-';
|
||||||
|
@ -1182,8 +1154,8 @@ final class NodaTimeSplitter {
|
||||||
|
|
||||||
// If es évek / as évek is contained in the string (e.g. 1880-1990-es évek), there
|
// If es évek / as évek is contained in the string (e.g. 1880-1990-es évek), there
|
||||||
// will be more than one hyphens
|
// will be more than one hyphens
|
||||||
if (MD_STD::stri_contains_any($datum, ['-as évek', '-es-évek', '-es évek'])) {
|
if (MD_STD::stri_contains_any($datum, ['-as évek', '-es évek'])) {
|
||||||
return strtr($datum, ['-as évek' => ' as évek', '-es-évek' => ' es évek', '-es évek' => ' es évek']);
|
return strtr($datum, ['-as évek' => ' as évek', '-es évek' => ' es évek']);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 1981. július-augusztus > 1981.07-08
|
// 1981. július-augusztus > 1981.07-08
|
||||||
|
|
|
@ -500,18 +500,6 @@ final class NodaTimeSplitterTest extends TestCase {
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "02.01.2020");
|
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "02.01.2020");
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020);
|
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020);
|
||||||
|
|
||||||
$output = NodaTimeSplitter::attempt_splitting("2020. januar. 2.");
|
|
||||||
self::assertEquals($output, [
|
|
||||||
0 => "2020",
|
|
||||||
1 => "2020",
|
|
||||||
2 => "01",
|
|
||||||
3 => "02",
|
|
||||||
4 => "+",
|
|
||||||
5 => "",
|
|
||||||
]);
|
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "02.01.2020");
|
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020);
|
|
||||||
|
|
||||||
$output = NodaTimeSplitter::attempt_splitting("2020.01.2.");
|
$output = NodaTimeSplitter::attempt_splitting("2020.01.2.");
|
||||||
self::assertEquals($output, [
|
self::assertEquals($output, [
|
||||||
0 => "2020",
|
0 => "2020",
|
||||||
|
@ -621,27 +609,15 @@ final class NodaTimeSplitterTest extends TestCase {
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "1920-1929");
|
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "1920-1929");
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1925);
|
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1925);
|
||||||
|
|
||||||
$output = NodaTimeSplitter::attempt_splitting("1920-es évekig");
|
|
||||||
self::assertEquals($output, [
|
|
||||||
0 => "?",
|
|
||||||
1 => "1929",
|
|
||||||
2 => "00",
|
|
||||||
3 => "00",
|
|
||||||
4 => "+",
|
|
||||||
5 => "",
|
|
||||||
]);
|
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Bis 1929");
|
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1929);
|
|
||||||
|
|
||||||
$output = NodaTimeSplitter::attempt_splitting("2020. Januar");
|
$output = NodaTimeSplitter::attempt_splitting("2020. Januar");
|
||||||
self::assertEquals([
|
self::assertEquals($output, [
|
||||||
0 => "2020",
|
0 => "2020",
|
||||||
1 => "2020",
|
1 => "2020",
|
||||||
2 => "01",
|
2 => "01",
|
||||||
3 => "00",
|
3 => "00",
|
||||||
4 => "+",
|
4 => "+",
|
||||||
5 => "",
|
5 => "",
|
||||||
], $output);
|
]);
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Januar 2020");
|
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Januar 2020");
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020);
|
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020);
|
||||||
|
|
||||||
|
@ -753,57 +729,6 @@ final class NodaTimeSplitterTest extends TestCase {
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Bis 1801 v. Chr.");
|
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Bis 1801 v. Chr.");
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1801);
|
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1801);
|
||||||
|
|
||||||
/*
|
|
||||||
* TODO
|
|
||||||
$output = NodaTimeSplitter::attempt_splitting("1900 előtt");
|
|
||||||
self::assertEquals($output, [
|
|
||||||
0 => "?",
|
|
||||||
1 => "1899",
|
|
||||||
2 => "00",
|
|
||||||
3 => "00",
|
|
||||||
4 => "+",
|
|
||||||
5 => "",
|
|
||||||
]);
|
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Vor 1900");
|
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1899);
|
|
||||||
*/
|
|
||||||
|
|
||||||
$output = NodaTimeSplitter::attempt_splitting("1900-ig");
|
|
||||||
self::assertEquals($output, [
|
|
||||||
0 => "?",
|
|
||||||
1 => "1900",
|
|
||||||
2 => "00",
|
|
||||||
3 => "00",
|
|
||||||
4 => "+",
|
|
||||||
5 => "",
|
|
||||||
]);
|
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Bis 1900");
|
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1900);
|
|
||||||
|
|
||||||
$output = NodaTimeSplitter::attempt_splitting("1900ig");
|
|
||||||
self::assertEquals($output, [
|
|
||||||
0 => "?",
|
|
||||||
1 => "1900",
|
|
||||||
2 => "00",
|
|
||||||
3 => "00",
|
|
||||||
4 => "+",
|
|
||||||
5 => "",
|
|
||||||
]);
|
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Bis 1900");
|
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1900);
|
|
||||||
|
|
||||||
$output = NodaTimeSplitter::attempt_splitting("2020. december előtt");
|
|
||||||
self::assertEquals($output, [
|
|
||||||
0 => "?",
|
|
||||||
1 => "2020",
|
|
||||||
2 => "12",
|
|
||||||
3 => "00",
|
|
||||||
4 => "+",
|
|
||||||
5 => "Vor",
|
|
||||||
]);
|
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Vor Dezember 2020");
|
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -956,9 +881,6 @@ final class NodaTimeSplitterTest extends TestCase {
|
||||||
$output = NodaTimeSplitter::attempt_splitting("1978. július7");
|
$output = NodaTimeSplitter::attempt_splitting("1978. július7");
|
||||||
self::assertEmpty($output);
|
self::assertEmpty($output);
|
||||||
|
|
||||||
$output = NodaTimeSplitter::attempt_splitting("1978. július 7 elött");
|
|
||||||
self::assertEmpty($output);
|
|
||||||
|
|
||||||
$output = NodaTimeSplitter::attempt_splitting("Anfang September 1903");
|
$output = NodaTimeSplitter::attempt_splitting("Anfang September 1903");
|
||||||
self::assertEmpty($output);
|
self::assertEmpty($output);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user