Extend NodaTimeSplitter
This commit is contained in:
parent
4a49c7a4e7
commit
f3831965a3
|
@ -58,6 +58,8 @@ final class NodaTimeSplitter {
|
||||||
|
|
||||||
const STRINGS_TO_CLEAN = [
|
const STRINGS_TO_CLEAN = [
|
||||||
"között" => "",
|
"között" => "",
|
||||||
|
" рр." => "",
|
||||||
|
" рр" => "",
|
||||||
"nach Christus" => "",
|
"nach Christus" => "",
|
||||||
"n. Christus" => "",
|
"n. Christus" => "",
|
||||||
"nach Chr." => "",
|
"nach Chr." => "",
|
||||||
|
@ -69,6 +71,7 @@ final class NodaTimeSplitter {
|
||||||
"BCE" => "v. Chr.",
|
"BCE" => "v. Chr.",
|
||||||
"CE" => "",
|
"CE" => "",
|
||||||
"vor Christus" => "v. Chr.",
|
"vor Christus" => "v. Chr.",
|
||||||
|
" до н. е." => "v. Chr.",
|
||||||
];
|
];
|
||||||
|
|
||||||
const STRINGS_TO_CLEAN_START = [
|
const STRINGS_TO_CLEAN_START = [
|
||||||
|
@ -133,6 +136,7 @@ final class NodaTimeSplitter {
|
||||||
];
|
];
|
||||||
|
|
||||||
private const REGEX_CENTURIES = '(\ |)(Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)';
|
private const REGEX_CENTURIES = '(\ |)(Jh\.|Jhd(|\.)|Jhdt(|\.)|Jahrhundert|sz|század|th century|ст|ст\.)';
|
||||||
|
private const REGEX_DECADES = '(s|er|er\ Jahre|(\-|\ )es\ évek|(\-|\ )as\ \évek|\ évek|\-es\ években|\-ті)';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cleans input strings by trimming obsolete stuff.
|
* Cleans input strings by trimming obsolete stuff.
|
||||||
|
@ -555,10 +559,6 @@ final class NodaTimeSplitter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($datum === '1978. július 7 elött') {
|
|
||||||
throw new Exception(var_export($monat, true));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!empty($monat) && empty($day) && preg_match('~[0-9]+~', substr($datum, -3))) {
|
if (!empty($monat) && empty($day) && preg_match('~[0-9]+~', substr($datum, -3))) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
@ -864,14 +864,17 @@ final class NodaTimeSplitter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (\preg_match("/\ (\(vor|\(Vor|vor)$/", $datum)) {
|
if (\preg_match("/\ (\(vor|\(Vor|vor|előtt)$/", $datum)) {
|
||||||
if (($spacePos = \strpos($datum, " ")) === false) {
|
if (($spacePos = \strrpos($datum, " ")) === false) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) {
|
if ($output = self::attempt_splitting(\substr($datum, 0, $spacePos))) {
|
||||||
|
|
||||||
$output[0] = "?";
|
$output[0] = "?";
|
||||||
if (empty(trim($output[2], "0 .,"))) $output[1] = \strval(\intval($output[1]) - 1);
|
// If month and day are unknown, lower end year by one
|
||||||
|
if (empty(trim($output[2], "0 .,"))) {
|
||||||
|
$output[1] = \strval(\intval($output[1]) - 1);
|
||||||
|
}
|
||||||
$output[5] = "Vor";
|
$output[5] = "Vor";
|
||||||
return $output;
|
return $output;
|
||||||
}
|
}
|
||||||
|
@ -951,6 +954,12 @@ final class NodaTimeSplitter {
|
||||||
return $output;
|
return $output;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (str_ends_with($datum, '-as évekig') || str_ends_with($datum, '-es évekig')) {
|
||||||
|
if ($output = self::attempt_splitting(\substr($datum, 0, -2))) {
|
||||||
|
$output[0] = "?";
|
||||||
|
return $output;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return [];
|
return [];
|
||||||
|
|
||||||
|
@ -1054,14 +1063,14 @@ final class NodaTimeSplitter {
|
||||||
$bcBceIndicator = '+';
|
$bcBceIndicator = '+';
|
||||||
|
|
||||||
// 20er Jahre
|
// 20er Jahre
|
||||||
if (\preg_match("/^[0-9]0(er|er\ Jahre|\-es\ évek|\-as\ \évek)$/", $datum)) {
|
if (\preg_match("/^[0-9]0" . self::REGEX_DECADES . "$/", $datum)) {
|
||||||
$start = "19" . \substr($datum, 0, 2);
|
$start = "19" . \substr($datum, 0, 2);
|
||||||
$ende = (string)(\intval($start) + 9);
|
$ende = (string)(\intval($start) + 9);
|
||||||
return [$start, $ende, "00", "00", $bcBceIndicator, ""];
|
return [$start, $ende, "00", "00", $bcBceIndicator, ""];
|
||||||
}
|
}
|
||||||
|
|
||||||
// 1920er Jahre
|
// 1920er Jahre
|
||||||
if (\preg_match("/^[0-9]{3}0(s|er|er\ Jahre|(\-|\ )es\ évek|(\-|\ )as\ \évek)$/", $datum)) {
|
if (\preg_match("/^[0-9]{3}0" . self::REGEX_DECADES . "$/", $datum)) {
|
||||||
$start = \substr($datum, 0, 4);
|
$start = \substr($datum, 0, 4);
|
||||||
$ende = (string)(\intval($start) + 9);
|
$ende = (string)(\intval($start) + 9);
|
||||||
return [$start, $ende, "00", "00", $bcBceIndicator, ""];
|
return [$start, $ende, "00", "00", $bcBceIndicator, ""];
|
||||||
|
|
|
@ -500,6 +500,18 @@ final class NodaTimeSplitterTest extends TestCase {
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "02.01.2020");
|
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "02.01.2020");
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020);
|
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020);
|
||||||
|
|
||||||
|
$output = NodaTimeSplitter::attempt_splitting("2020. januar. 2.");
|
||||||
|
self::assertEquals($output, [
|
||||||
|
0 => "2020",
|
||||||
|
1 => "2020",
|
||||||
|
2 => "01",
|
||||||
|
3 => "02",
|
||||||
|
4 => "+",
|
||||||
|
5 => "",
|
||||||
|
]);
|
||||||
|
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "02.01.2020");
|
||||||
|
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020);
|
||||||
|
|
||||||
$output = NodaTimeSplitter::attempt_splitting("2020.01.2.");
|
$output = NodaTimeSplitter::attempt_splitting("2020.01.2.");
|
||||||
self::assertEquals($output, [
|
self::assertEquals($output, [
|
||||||
0 => "2020",
|
0 => "2020",
|
||||||
|
@ -609,6 +621,18 @@ final class NodaTimeSplitterTest extends TestCase {
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "1920-1929");
|
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "1920-1929");
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1925);
|
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1925);
|
||||||
|
|
||||||
|
$output = NodaTimeSplitter::attempt_splitting("1920-es évekig");
|
||||||
|
self::assertEquals($output, [
|
||||||
|
0 => "?",
|
||||||
|
1 => "1929",
|
||||||
|
2 => "00",
|
||||||
|
3 => "00",
|
||||||
|
4 => "+",
|
||||||
|
5 => "",
|
||||||
|
]);
|
||||||
|
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Bis 1929");
|
||||||
|
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1929);
|
||||||
|
|
||||||
$output = NodaTimeSplitter::attempt_splitting("2020. Januar");
|
$output = NodaTimeSplitter::attempt_splitting("2020. Januar");
|
||||||
self::assertEquals([
|
self::assertEquals([
|
||||||
0 => "2020",
|
0 => "2020",
|
||||||
|
@ -768,6 +792,18 @@ final class NodaTimeSplitterTest extends TestCase {
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Bis 1900");
|
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Bis 1900");
|
||||||
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1900);
|
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 1900);
|
||||||
|
|
||||||
|
$output = NodaTimeSplitter::attempt_splitting("2020. december előtt");
|
||||||
|
self::assertEquals($output, [
|
||||||
|
0 => "?",
|
||||||
|
1 => "2020",
|
||||||
|
2 => "12",
|
||||||
|
3 => "00",
|
||||||
|
4 => "+",
|
||||||
|
5 => "Vor",
|
||||||
|
]);
|
||||||
|
self::assertEquals(NodaTimeSplitter::timePartsToTimeName($output), "Vor Dezember 2020");
|
||||||
|
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue
Block a user