Identify time uncertainty for "etwa"

Close #37
This commit is contained in:
2025-11-01 14:02:40 +01:00
parent 3fb6c591d7
commit a0de048915
4 changed files with 26 additions and 0 deletions

View File

@@ -98,6 +98,7 @@ final class NodaTimeSplitter {
", ",
" und ",
"nach ",
"nach dem ",
"um ",
"ca.",
"ab ",
@@ -831,6 +832,15 @@ final class NodaTimeSplitter {
}
}
if (\preg_match("/^(Nach dem|nach dem)\ /", $datum)) {
if (($spacePos = \strpos($datum, " ", 6)) === false) {
return false;
}
if ($output = self::attempt_splitting(\substr($datum, $spacePos))) {
return self::_turn_noda_split_time_to_after($output);
}
}
if (\preg_match("/\ (\(nach|nach)$/", $datum)) {
if (($spacePos = \strpos($datum, " ")) === false) {
return false;

View File

@@ -107,6 +107,8 @@ final class NodaUncertaintyHelper {
"za. ",
"~",
"circa ",
"etwa ",
"Etwa ",
"gegen ",
"um ",
"Um ",

View File

@@ -776,6 +776,19 @@ final class NodaTimeSplitterTest extends TestCase {
self::assertEquals($output->toTimeName(), "Vor 2020");
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 2020);
$output = NodaTimeSplitter::attempt_splitting("Nach dem 02.1.25 v. Chr");
self::assertNotEmpty($output);
self::assertEquals($output->toOldFormat(), [
0 => "-25",
1 => "?",
2 => "01",
3 => "02",
4 => "-",
5 => "Nach",
]);
self::assertEquals($output->toTimeName(), "Nach 02.01.25 v. Chr.");
self::assertEquals(NodaTimeSplitter::timePartsToCountingYear($output), 25);
$output = NodaTimeSplitter::attempt_splitting("Nach Januar 2020");
self::assertNotEmpty($output);
self::assertEquals($output->toOldFormat(), [

View File

@@ -25,6 +25,7 @@ final class NodaUncertaintyHelperTest extends TestCase {
return [
'uncertainty prefix: "wohl 1950"' => ["wohl 1950", "1950", false],
'uncertainty prefix: "etwa 1950"' => ["etwa 1950", "1950", false],
'uncertainty suffix: "1950?"' => ["1950?", "1950", false],
'uncertainty suffix and superfluous chars: "1950 ?,"' => ["1950 ?,", "1950", false],
'certain term with superfluous chars: "1950 ,"' => ["1950 ,", "1950", true],