From aa2ed3f39128b06bfc4e97c02b08988d0ac36ef9 Mon Sep 17 00:00:00 2001 From: Francisco Date: Wed, 4 Feb 2026 13:23:30 +0700 Subject: [PATCH 1/9] feature: rentangin time parser --- rentangin/time_parser.go | 577 ++++++++++++++++++++++++++++++++++ rentangin/time_parser_test.go | 342 ++++++++++++++++++++ 2 files changed, 919 insertions(+) create mode 100644 rentangin/time_parser.go create mode 100644 rentangin/time_parser_test.go diff --git a/rentangin/time_parser.go b/rentangin/time_parser.go new file mode 100644 index 0000000..053ab81 --- /dev/null +++ b/rentangin/time_parser.go @@ -0,0 +1,577 @@ +package rentangin + +import ( + "regexp" + "strconv" + "strings" + "time" +) + +// Range is [Start, End) (End exclusive). +type Range struct { + Start time.Time + End time.Time +} + +// IsZero tell if both start and end are zero/undefined +func (r Range) IsZero() bool { return r.Start.IsZero() && r.End.IsZero() } + +// Parse scans query and returns the BEST date range it can extract. +// ok=false means "no range found" (not an error). +// +// Timezone is taken from now.Location(). So pass now in the timezone you want. +func Parse(query string, now time.Time) (r Range, ok bool, err error) { + s := normalizeID(strings.TrimSpace(query)) + if s == "" { + return Range{}, false, nil + } + + bestScore := -1 + var best Range + + // Scan by token boundary (space). + for i := 0; i < len(s); i++ { + if i != 0 && s[i-1] != ' ' { + continue + } + sub := s[i:] + + cand, score, found := parseBestAtStart(sub, now) + if !found { + continue + } + if score > bestScore { + bestScore = score + best = cand + } + } + + if bestScore < 0 { + return Range{}, false, nil + } + return best, true, nil +} + +/* ------------------------- + Core parsing at start +--------------------------*/ + +func parseBestAtStart(s string, now time.Time) (Range, int, bool) { + // Highest: explicit "dari ... sampai ..." + if r, ok := parseFromRangeAtStart(s, now); ok { + return r, scoreFromRange, true + } + // Next: inline "A sampai B" / "A - B" / "A sd B" + if r, ok := parseInlineRangeAtStart(s, now); ok { + return r, scoreInlineRange, true + } + // Single expression with specificity scoring + if r, score, ok := parseOneExprFromStart(s, now); ok { + return r, score, true + } + return Range{}, -1, false +} + +func parseFromRangeAtStart(s string, now time.Time) (Range, bool) { + m := rxFromRange.FindStringSubmatchIndex(s) + if m == nil || m[0] != 0 { + return Range{}, false + } + startExpr := strings.TrimSpace(s[m[2]:m[3]]) + endExpr := strings.TrimSpace(s[m[6]:m[7]]) + + rs, _, ok := parseOneExprAny(startExpr, now) + if !ok { + return Range{}, false + } + re, _, ok := parseOneExprAny(endExpr, now) + if !ok { + return Range{}, false + } + + r := Range{Start: rs.Start, End: re.Start} + if !r.End.After(r.Start) { + // Fallback to start only if user input is weird. + return rs, true + } + return r, true +} + +func parseInlineRangeAtStart(s string, now time.Time) (Range, bool) { + m := rxInlineRange.FindStringSubmatchIndex(s) + if m == nil || m[0] != 0 { + return Range{}, false + } + left := strings.TrimSpace(s[m[2]:m[3]]) + right := strings.TrimSpace(s[m[6]:m[7]]) + + rl, _, ok := parseOneExprAny(left, now) + if !ok { + return Range{}, false + } + rr, _, ok := parseOneExprAny(right, now) + if !ok { + return Range{}, false + } + + r := Range{Start: rl.Start, End: rr.Start} + if !r.End.After(r.Start) { + return rl, true + } + return r, true +} + +/* ------------------------- + Normalization +--------------------------*/ + +func normalizeID(s string) string { + ls := strings.ToLower(s) + + // connectors + ls = strings.ReplaceAll(ls, "s.d.", "sd") + ls = strings.ReplaceAll(ls, "s.d", "sd") + ls = strings.ReplaceAll(ls, "s / d", "sd") + ls = strings.ReplaceAll(ls, "s/d", "sd") + + // multi-token phrases + ls = strings.ReplaceAll(ls, "hari ini", "hariini") + ls = strings.ReplaceAll(ls, "yang lalu", "yanglalu") + ls = strings.ReplaceAll(ls, "ke depan", "kedepan") + + // relative anchors + ls = strings.ReplaceAll(ls, "dari sekarang", "darisekarang") + ls = strings.ReplaceAll(ls, "dari hariini", "darihariini") + + ls = strings.Join(strings.Fields(ls), " ") + return ls +} + +/* ------------------------- + One expression parsing + scoring +--------------------------*/ + +const ( + scoreFromRange = 100 + scoreInlineRange = 90 + + scoreLastNRange = 85 // "7 hari terakhir" + + scoreDay = 80 // day-specific (ymd, dmy, dm past-biased, edge month day) + scoreMonth = 70 // month-specific (my/ym/month-only) + scoreYear = 60 // "tahun 2024" + scoreRelNumber = 50 // "3 hari lalu" (single day) + scoreRelative = 40 // hariini/kemarin/besok, unit modifiers +) + +func parseOneExprAny(expr string, now time.Time) (Range, int, bool) { + e := normalizeID(strings.TrimSpace(expr)) + return parseOneExprFromStart(e, now) +} + +// parseOneExprFromStart parses if expression begins at start of s. +// Ambiguous expressions default to PAST (latest date <= now). +func parseOneExprFromStart(s string, now time.Time) (Range, int, bool) { + // 0) "awal bulan ini" / "akhir bulan ini" (day-range) + if m := rxEdgeMonthThis.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + edge := s[m[2]:m[3]] // "awal"|"akhir" + if r, ok := edgeMonthThis(edge, now); ok { + return r, scoreDay, true + } + } + + // 1) "7 hari terakhir" / "N hari terakhir" (range) + if m := rxLastNUnit.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + n, _ := strconv.Atoi(s[m[2]:m[3]]) + unit := s[m[4]:m[5]] + if r, ok := lastN(unit, n, now); ok { + return r, scoreLastNRange, true + } + } + + // 2) One-word relative + if m := rxOneWord.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + w := s[m[2]:m[3]] + if r, ok := oneWord(w, now); ok { + return r, scoreRelative, true + } + } + + // 3) Unit modifier: minggu/bulan/tahun ini/lalu/depan + if m := rxUnitModifier.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + unit := s[m[2]:m[3]] + mod := s[m[4]:m[5]] + if r, ok := unitModifier(unit, mod, now); ok { + return r, scoreRelative, true + } + } + + // 4) Relative numeric (single day/week/month/year anchor) + if m := rxRelNUnit.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + n, _ := strconv.Atoi(s[m[2]:m[3]]) + unit := s[m[4]:m[5]] + rel := s[m[6]:m[7]] + if r, ok := relN(n, unit, rel, now); ok { + return r, scoreRelNumber, true + } + } + + // 5) "tahun 2024" + if m := rxTahunYYYY.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + y, _ := strconv.Atoi(s[m[2]:m[3]]) + if okYear(y) { + return yearRange(y, now.Location()), scoreYear, true + } + return Range{}, -1, false + } + + // 6) Numeric YMD: 2026-02-04 / 2026/02/04 + if m := rxYMDNumeric.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + yr, _ := strconv.Atoi(s[m[2]:m[3]]) + moN, _ := strconv.Atoi(s[m[4]:m[5]]) + day, _ := strconv.Atoi(s[m[6]:m[7]]) + if okYear(yr) && 1 <= moN && moN <= 12 && okDay(day) { + start := time.Date(yr, time.Month(moN), day, 0, 0, 0, 0, now.Location()) + return dayRange(start), scoreDay, true + } + return Range{}, -1, false + } + + // 7) D M Y (month name): "4 feb 2026" + if m := rxDMY.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + day, _ := strconv.Atoi(s[m[2]:m[3]]) + mon := s[m[4]:m[5]] + yr, _ := strconv.Atoi(s[m[6]:m[7]]) + mo, ok := monthID(mon) + if ok && okDay(day) && okYear(yr) { + start := time.Date(yr, mo, day, 0, 0, 0, 0, now.Location()) + return dayRange(start), scoreDay, true + } + return Range{}, -1, false + } + + // 8) M Y: "februari 2026" + if m := rxMY.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + mon := s[m[2]:m[3]] + yr, _ := strconv.Atoi(s[m[4]:m[5]]) + mo, ok := monthID(mon) + if ok && okYear(yr) { + start := time.Date(yr, mo, 1, 0, 0, 0, 0, now.Location()) + return monthRange(start), scoreMonth, true + } + return Range{}, -1, false + } + + // 9) Y M: "2026 feb" + if m := rxYM.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + yr, _ := strconv.Atoi(s[m[2]:m[3]]) + mon := s[m[4]:m[5]] + mo, ok := monthID(mon) + if ok && okYear(yr) { + start := time.Date(yr, mo, 1, 0, 0, 0, 0, now.Location()) + return monthRange(start), scoreMonth, true + } + return Range{}, -1, false + } + + // 10) D M (no year): "15 maret" => past-biased (latest <= now) + if m := rxDM.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + day, _ := strconv.Atoi(s[m[2]:m[3]]) + mon := s[m[4]:m[5]] + if mo, ok := monthID(mon); ok && okDay(day) { + if r, ok := dayMonthPast(day, mo, now); ok { + return r, scoreDay, true + } + } + } + + // 11) Month only: "februari" => past-biased (latest <= now) + if m := rxMonthOnly.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + mon := s[m[2]:m[3]] + if mo, ok := monthID(mon); ok { + return monthOnlyPast(mo, now), scoreMonth, true + } + } + + return Range{}, -1, false +} + +/* ------------------------- + Semantics +--------------------------*/ + +// "awal bulan ini" -> day-range for first day of current month +// "akhir bulan ini" -> day-range for last day of current month +func edgeMonthThis(edge string, now time.Time) (Range, bool) { + loc := now.Location() + y, m, _ := now.Date() + first := time.Date(y, m, 1, 0, 0, 0, 0, loc) + + switch edge { + case "awal": + return dayRange(first), true + case "akhir": + nextMonth := first.AddDate(0, 1, 0) + lastDay := nextMonth.AddDate(0, 0, -1) + return dayRange(lastDay), true + default: + return Range{}, false + } +} + +// "N hari terakhir" => range including today, ending tomorrow 00:00 +// Start = today 00:00 - (n-1) days +// End = tomorrow 00:00 +func lastN(unit string, n int, now time.Time) (Range, bool) { + if n <= 0 { + return Range{}, false + } + end := truncateDay(now).AddDate(0, 0, 1) + + switch unit { + case "hari": + start := truncateDay(now).AddDate(0, 0, -(n - 1)) + return Range{Start: start, End: end}, true + case "minggu", "pekan": + start := weekRange(now).Start.AddDate(0, 0, -7*(n-1)) + return Range{Start: start, End: end}, true + case "bulan": + start := monthRange(now).Start.AddDate(0, -(n - 1), 0) + return Range{Start: start, End: end}, true + case "tahun": + start := yearRange(now.Year(), now.Location()).Start.AddDate(-(n - 1), 0, 0) + return Range{Start: start, End: end}, true + default: + return Range{}, false + } +} + +func oneWord(w string, now time.Time) (Range, bool) { + switch w { + case "sekarang": + return Range{Start: now, End: now.Add(time.Second)}, true + case "hariini": + return dayRange(now), true + case "kemarin": + return dayRange(now.AddDate(0, 0, -1)), true + case "besok": + return dayRange(now.AddDate(0, 0, 1)), true + default: + return Range{}, false + } +} + +func unitModifier(unit, mod string, now time.Time) (Range, bool) { + switch unit { + case "minggu", "pekan": + switch mod { + case "ini": + return weekRange(now), true + case "lalu": + return weekRange(now.AddDate(0, 0, -7)), true + case "depan": + return weekRange(now.AddDate(0, 0, 7)), true + } + case "bulan": + switch mod { + case "ini": + return monthRange(now), true + case "lalu": + return monthRange(now.AddDate(0, -1, 0)), true + case "depan": + return monthRange(now.AddDate(0, 1, 0)), true + } + case "tahun": + switch mod { + case "ini": + return yearRange(now.Year(), now.Location()), true + case "lalu": + return yearRange(now.Year()-1, now.Location()), true + case "depan": + return yearRange(now.Year()+1, now.Location()), true + } + } + return Range{}, false +} + +func relN(n int, unit, rel string, now time.Time) (Range, bool) { + if n <= 0 { + return Range{}, false + } + switch unit { + case "hari": + switch rel { + case "lalu", "yanglalu": + return dayRange(now.AddDate(0, 0, -n)), true + case "lagi", "kedepan", "darisekarang", "darihariini": + return dayRange(now.AddDate(0, 0, n)), true + } + case "minggu", "pekan": + switch rel { + case "lalu", "yanglalu": + return weekRange(now.AddDate(0, 0, -7*n)), true + case "lagi", "kedepan", "darisekarang", "darihariini": + return weekRange(now.AddDate(0, 0, 7*n)), true + } + case "bulan": + switch rel { + case "lalu", "yanglalu": + return monthRange(now.AddDate(0, -n, 0)), true + case "lagi", "kedepan", "darisekarang", "darihariini": + return monthRange(now.AddDate(0, n, 0)), true + } + case "tahun": + switch rel { + case "lalu", "yanglalu": + return yearRange(now.Year()-n, now.Location()), true + case "lagi", "kedepan", "darisekarang", "darihariini": + return yearRange(now.Year()+n, now.Location()), true + } + } + return Range{}, false +} + +/* ------------------------- + Past-biased ambiguity helpers +--------------------------*/ + +// Month only like "februari" => latest month start <= now (past-biased) +func monthOnlyPast(month time.Month, now time.Time) Range { + loc := now.Location() + y := now.Year() + candidate := time.Date(y, month, 1, 0, 0, 0, 0, loc) + if candidate.After(truncateDay(now)) { + candidate = candidate.AddDate(-1, 0, 0) + } + return monthRange(candidate) +} + +// Day+month without year like "15 maret" => latest date <= now (past-biased) +func dayMonthPast(day int, month time.Month, now time.Time) (Range, bool) { + if !okDay(day) { + return Range{}, false + } + + loc := now.Location() + y := now.Year() + candidate := time.Date(y, month, day, 0, 0, 0, 0, loc) + + // If invalid date (e.g. 31 februari), time.Date rolls over; detect by checking Month/Day. + if candidate.Month() != month || candidate.Day() != day { + return Range{}, false + } + + if candidate.After(truncateDay(now)) { + candidate = candidate.AddDate(-1, 0, 0) + if candidate.Month() != month || candidate.Day() != day { + return Range{}, false + } + } + + return dayRange(candidate), true +} + +/* ------------------------- + Range constructors +--------------------------*/ + +func truncateDay(t time.Time) time.Time { + return time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, t.Location()) +} + +func dayRange(ref time.Time) Range { + s := truncateDay(ref) + return Range{Start: s, End: s.AddDate(0, 0, 1)} +} + +func monthRange(ref time.Time) Range { + s := time.Date(ref.Year(), ref.Month(), 1, 0, 0, 0, 0, ref.Location()) + return Range{Start: s, End: s.AddDate(0, 1, 0)} +} + +func yearRange(y int, loc *time.Location) Range { + s := time.Date(y, 1, 1, 0, 0, 0, 0, loc) + return Range{Start: s, End: s.AddDate(1, 0, 0)} +} + +func weekRange(ref time.Time) Range { + // Week starts Monday 00:00 + d := truncateDay(ref) + wd := int(d.Weekday()) + if wd == 0 { + wd = 7 // Sunday -> 7 + } + start := d.AddDate(0, 0, -(wd - 1)) + return Range{Start: start, End: start.AddDate(0, 0, 7)} +} + +/* ------------------------- + Validation & months +--------------------------*/ + +func okYear(y int) bool { return 1000 <= y && y <= 9999 } +func okDay(d int) bool { return 1 <= d && d <= 31 } + +func monthID(s string) (time.Month, bool) { + switch s { + case "jan", "januari": + return time.January, true + case "feb", "februari": + return time.February, true + case "mar", "maret": + return time.March, true + case "apr", "april": + return time.April, true + case "mei": + return time.May, true + case "jun", "juni": + return time.June, true + case "jul", "juli": + return time.July, true + case "agu", "agt", "agustus": + return time.August, true + case "sep", "september": + return time.September, true + case "okt", "oktober": + return time.October, true + case "nov", "november": + return time.November, true + case "des", "desember": + return time.December, true + default: + return 0, false + } +} + +/* ------------------------- + Regex +--------------------------*/ + +var ( + // Make end greedy to allow "10 feb 2026 foo" and still parse prefix. + rxFromRange = regexp.MustCompile(`^dari\s+(.+?)\s+(sampai|hingga|sd|-)\s+(.+)$`) + rxInlineRange = regexp.MustCompile(`^(.+?)\s+(sampai|hingga|sd|-)\s+(.+)$`) + + rxEdgeMonthThis = regexp.MustCompile(`^(awal|akhir)\s+bulan\s+ini(?:\s|$)`) + + rxLastNUnit = regexp.MustCompile(`^(\d+)\s+(hari|minggu|pekan|bulan|tahun)\s+(terakhir|belakangan)(?:\s|$)`) + + rxOneWord = regexp.MustCompile(`^(sekarang|hariini|kemarin|besok)(?:\s|$)`) + rxUnitModifier = regexp.MustCompile(`^(minggu|pekan|bulan|tahun)\s+(ini|lalu|depan)(?:\s|$)`) + + rxRelNUnit = regexp.MustCompile(`^(\d+)\s+(hari|minggu|pekan|bulan|tahun)\s+(lalu|lagi|yanglalu|kedepan|darisekarang|darihariini)(?:\s|$)`) + + rxTahunYYYY = regexp.MustCompile(`^tahun\s+(\d{4})(?:\s|$)`) + + rxYMDNumeric = regexp.MustCompile(`^(\d{4})[-/](\d{1,2})[-/](\d{1,2})(?:\s|$)`) + + rxDMY = regexp.MustCompile(`^(\d{1,2})\s+([a-z]+)\s+(\d{4})(?:\s|$)`) + rxMY = regexp.MustCompile(`^([a-z]+)\s+(\d{4})(?:\s|$)`) + rxYM = regexp.MustCompile(`^(\d{4})\s+([a-z]+)(?:\s|$)`) + + // Ambiguous: + rxDM = regexp.MustCompile(`^(\d{1,2})\s+([a-z]+)(?:\s|$)`) + + // Month only (limit to known tokens to avoid matching random words) + rxMonthOnly = regexp.MustCompile(`^(jan|januari|feb|februari|mar|maret|apr|april|mei|jun|juni|jul|juli|agu|agt|agustus|sep|september|okt|oktober|nov|november|des|desember)(?:\s|$)`) +) diff --git a/rentangin/time_parser_test.go b/rentangin/time_parser_test.go new file mode 100644 index 0000000..0d2b5aa --- /dev/null +++ b/rentangin/time_parser_test.go @@ -0,0 +1,342 @@ +package rentangin + +import ( + "testing" + "time" +) + +func wibLoc() *time.Location { + return time.FixedZone("WIB", 7*3600) +} + +func nowWIB() time.Time { + return time.Date(2026, 2, 4, 10, 0, 0, 0, wibLoc()) // Wed +} + +func assertRangeEq(t *testing.T, got, want Range) { + t.Helper() + if !got.Start.Equal(want.Start) || !got.End.Equal(want.End) { + t.Fatalf("got [%s..%s) want [%s..%s)", + got.Start.Format(time.RFC3339), got.End.Format(time.RFC3339), + want.Start.Format(time.RFC3339), want.End.Format(time.RFC3339), + ) + } +} + +func TestParse_NoRange(t *testing.T) { + now := nowWIB() + + cases := []string{ + "", + " ", + "a", + "berita politik", + "not a date or a time", + "Message me in 2 minutes", // english not supported + "10", + "17", + "10:am", + "uu 24/2024", // ambiguous id-style, we don't parse + } + for _, s := range cases { + t.Run(s, func(t *testing.T) { + _, ok, err := Parse(s, now) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if ok { + t.Fatalf("expected ok=false") + } + }) + } +} + +func TestParse_Words(t *testing.T) { + now := nowWIB() + + r, ok, err := Parse("gempa hari ini jakarta", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 4, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 5, 0, 0, 0, 0, wibLoc()), + }) + + r, ok, err = Parse("banjir kemarin bandung", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 3, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 4, 0, 0, 0, 0, wibLoc()), + }) + + r, ok, err = Parse("promo besok", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 5, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 6, 0, 0, 0, 0, wibLoc()), + }) +} + +func TestParse_UnitModifier(t *testing.T) { + now := nowWIB() + + r, ok, err := Parse("minggu ini pilkada", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + // Week starts Monday; 2026-02-04 is Wed => week starts 2026-02-02 + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 2, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 9, 0, 0, 0, 0, wibLoc()), + }) + + r, ok, err = Parse("bulan lalu saham", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 1, 1, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), + }) + + r, ok, err = Parse("tahun depan event", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2027, 1, 1, 0, 0, 0, 0, wibLoc()), + End: time.Date(2028, 1, 1, 0, 0, 0, 0, wibLoc()), + }) +} + +func TestParse_RelativeN_SingleDay(t *testing.T) { + now := nowWIB() + + r, ok, err := Parse("3 hari lalu demo", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 2, 0, 0, 0, 0, wibLoc()), + }) + + r, ok, err = Parse("10 hari ke depan konser", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 14, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 15, 0, 0, 0, 0, wibLoc()), + }) +} + +func TestParse_TahunYYYY(t *testing.T) { + now := nowWIB() + + r, ok, err := Parse("berita tahun 2024 pemilu", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2024, 1, 1, 0, 0, 0, 0, wibLoc()), + End: time.Date(2025, 1, 1, 0, 0, 0, 0, wibLoc()), + }) + + // bare year should NOT parse (safe mode) + _, ok, err = Parse("2024 pemilu", now) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if ok { + t.Fatalf("expected ok=false for bare year") + } +} + +func TestParse_ExplicitDates(t *testing.T) { + now := nowWIB() + + r, ok, err := Parse("banjir 4 feb 2026 jakarta", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 4, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 5, 0, 0, 0, 0, wibLoc()), + }) + + r, ok, err = Parse("laporan februari 2026 ekonomi", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 3, 1, 0, 0, 0, 0, wibLoc()), + }) + + r, ok, err = Parse("2026 feb inflasi", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 3, 1, 0, 0, 0, 0, wibLoc()), + }) + + r, ok, err = Parse("update 2026-02-04", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 4, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 5, 0, 0, 0, 0, wibLoc()), + }) +} + +func TestParse_RangeFormsAnywhere(t *testing.T) { + now := nowWIB() + + r, ok, err := Parse("data dari 1 feb 2026 sampai 10 feb 2026 foo", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 10, 0, 0, 0, 0, wibLoc()), + }) + + r, ok, err = Parse("laporan 1 feb 2026 - 10 feb 2026 foo", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 10, 0, 0, 0, 0, wibLoc()), + }) + + r, ok, err = Parse("laporan 1 feb 2026 s.d. 10 feb 2026", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 10, 0, 0, 0, 0, wibLoc()), + }) +} + +func TestParse_Last7HariTerakhir(t *testing.T) { + now := nowWIB() + + r, ok, err := Parse("gempa 7 hari terakhir jakarta", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + + // inclusive today: Feb 4 -> start Jan 29, end Feb 5 + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 1, 29, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 5, 0, 0, 0, 0, wibLoc()), + }) +} + +func TestParse_AwalAkhirBulanIni(t *testing.T) { + now := nowWIB() + + r, ok, err := Parse("awal bulan ini", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 2, 0, 0, 0, 0, wibLoc()), + }) + + r, ok, err = Parse("akhir bulan ini", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + // 2026 bukan kabisat => Feb 28 + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 28, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 3, 1, 0, 0, 0, 0, wibLoc()), + }) +} + +func TestParse_AmbiguousPastBias(t *testing.T) { + loc := wibLoc() + + // now = 2026-02-04 + now := nowWIB() + + // "februari" => Feb 2026 (<= now) + r, ok, err := Parse("laporan februari", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 1, 0, 0, 0, 0, loc), + End: time.Date(2026, 3, 1, 0, 0, 0, 0, loc), + }) + + // If now is January, "februari" should resolve to last year. + nowJan := time.Date(2026, 1, 10, 10, 0, 0, 0, loc) + r, ok, err = Parse("februari", nowJan) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2025, 2, 1, 0, 0, 0, 0, loc), + End: time.Date(2025, 3, 1, 0, 0, 0, 0, loc), + }) + + // "15 maret" with now April 2026 => 15 Mar 2026 + nowApr := time.Date(2026, 4, 10, 10, 0, 0, 0, loc) + r, ok, err = Parse("15 maret", nowApr) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 3, 15, 0, 0, 0, 0, loc), + End: time.Date(2026, 3, 16, 0, 0, 0, 0, loc), + }) + + // "15 maret" with now Feb 2026 => 15 Mar 2025 (past-biased) + nowFeb := time.Date(2026, 2, 10, 10, 0, 0, 0, loc) + r, ok, err = Parse("15 maret", nowFeb) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2025, 3, 15, 0, 0, 0, 0, loc), + End: time.Date(2025, 3, 16, 0, 0, 0, 0, loc), + }) +} + +func TestParse_BestMatchWins(t *testing.T) { + now := nowWIB() + + // Has both "bulan ini" and "7 hari terakhir" -> prefer 7 hari terakhir (score higher). + r, ok, err := Parse("bulan ini 7 hari terakhir", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 1, 29, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 5, 0, 0, 0, 0, wibLoc()), + }) + + // Explicit range should beat everything else. + r, ok, err = Parse("7 hari terakhir dari 1 feb 2026 sampai 10 feb 2026", now) + if err != nil || !ok { + t.Fatalf("err=%v ok=%v", err, ok) + } + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 10, 0, 0, 0, 0, wibLoc()), + }) +} From 7d37e7d094e810e50f6b374259b27f632b9f4714 Mon Sep 17 00:00:00 2001 From: Agung Hariadi Tedja Date: Wed, 4 Feb 2026 14:51:28 +0700 Subject: [PATCH 2/9] add context awareness for event, some topics and entity --- rentangin/time_parser.go | 149 +++++++++++++++-- rentangin/time_parser_test.go | 302 ++++++++++++++++------------------ 2 files changed, 275 insertions(+), 176 deletions(-) diff --git a/rentangin/time_parser.go b/rentangin/time_parser.go index 053ab81..53e9117 100644 --- a/rentangin/time_parser.go +++ b/rentangin/time_parser.go @@ -13,7 +13,6 @@ type Range struct { End time.Time } -// IsZero tell if both start and end are zero/undefined func (r Range) IsZero() bool { return r.Start.IsZero() && r.End.IsZero() } // Parse scans query and returns the BEST date range it can extract. @@ -26,6 +25,10 @@ func Parse(query string, now time.Time) (r Range, ok bool, err error) { return Range{}, false, nil } + hasEventHint := containsEventHint(s) + topicIntent := containsTopicIntent(s) + nowYear := now.Year() + bestScore := -1 var best Range @@ -35,8 +38,9 @@ func Parse(query string, now time.Time) (r Range, ok bool, err error) { continue } sub := s[i:] + prev := prevToken(s, i) - cand, score, found := parseBestAtStart(sub, now) + cand, score, found := parseBestAtStart(sub, now, prev, hasEventHint, topicIntent, nowYear) if !found { continue } @@ -56,23 +60,23 @@ func Parse(query string, now time.Time) (r Range, ok bool, err error) { Core parsing at start --------------------------*/ -func parseBestAtStart(s string, now time.Time) (Range, int, bool) { +func parseBestAtStart(s string, now time.Time, prev string, hasEventHint bool, topicIntent bool, nowYear int) (Range, int, bool) { // Highest: explicit "dari ... sampai ..." - if r, ok := parseFromRangeAtStart(s, now); ok { + if r, ok := parseFromRangeAtStart(s, now, hasEventHint, topicIntent, nowYear); ok { return r, scoreFromRange, true } // Next: inline "A sampai B" / "A - B" / "A sd B" - if r, ok := parseInlineRangeAtStart(s, now); ok { + if r, ok := parseInlineRangeAtStart(s, now, hasEventHint, topicIntent, nowYear); ok { return r, scoreInlineRange, true } // Single expression with specificity scoring - if r, score, ok := parseOneExprFromStart(s, now); ok { + if r, score, ok := parseOneExprFromStart(s, now, prev, hasEventHint, topicIntent, nowYear); ok { return r, score, true } return Range{}, -1, false } -func parseFromRangeAtStart(s string, now time.Time) (Range, bool) { +func parseFromRangeAtStart(s string, now time.Time, hasEventHint bool, topicIntent bool, nowYear int) (Range, bool) { m := rxFromRange.FindStringSubmatchIndex(s) if m == nil || m[0] != 0 { return Range{}, false @@ -80,11 +84,11 @@ func parseFromRangeAtStart(s string, now time.Time) (Range, bool) { startExpr := strings.TrimSpace(s[m[2]:m[3]]) endExpr := strings.TrimSpace(s[m[6]:m[7]]) - rs, _, ok := parseOneExprAny(startExpr, now) + rs, _, ok := parseOneExprAny(startExpr, now, "", hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } - re, _, ok := parseOneExprAny(endExpr, now) + re, _, ok := parseOneExprAny(endExpr, now, "", hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } @@ -97,7 +101,7 @@ func parseFromRangeAtStart(s string, now time.Time) (Range, bool) { return r, true } -func parseInlineRangeAtStart(s string, now time.Time) (Range, bool) { +func parseInlineRangeAtStart(s string, now time.Time, hasEventHint bool, topicIntent bool, nowYear int) (Range, bool) { m := rxInlineRange.FindStringSubmatchIndex(s) if m == nil || m[0] != 0 { return Range{}, false @@ -105,11 +109,11 @@ func parseInlineRangeAtStart(s string, now time.Time) (Range, bool) { left := strings.TrimSpace(s[m[2]:m[3]]) right := strings.TrimSpace(s[m[6]:m[7]]) - rl, _, ok := parseOneExprAny(left, now) + rl, _, ok := parseOneExprAny(left, now, "", hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } - rr, _, ok := parseOneExprAny(right, now) + rr, _, ok := parseOneExprAny(right, now, "", hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } @@ -121,6 +125,25 @@ func parseInlineRangeAtStart(s string, now time.Time) (Range, bool) { return r, true } +func prevToken(s string, start int) string { + // start is token boundary index in s + if start <= 0 { + return "" + } + j := start - 1 + for j >= 0 && s[j] == ' ' { + j-- + } + if j < 0 { + return "" + } + k := j + for k >= 0 && s[k] != ' ' { + k-- + } + return s[k+1 : j+1] +} + /* ------------------------- Normalization --------------------------*/ @@ -164,14 +187,14 @@ const ( scoreRelative = 40 // hariini/kemarin/besok, unit modifiers ) -func parseOneExprAny(expr string, now time.Time) (Range, int, bool) { +func parseOneExprAny(expr string, now time.Time, prev string, hasEventHint bool, topicIntent bool, nowYear int) (Range, int, bool) { e := normalizeID(strings.TrimSpace(expr)) - return parseOneExprFromStart(e, now) + return parseOneExprFromStart(e, now, prev, hasEventHint, topicIntent, nowYear) } // parseOneExprFromStart parses if expression begins at start of s. // Ambiguous expressions default to PAST (latest date <= now). -func parseOneExprFromStart(s string, now time.Time) (Range, int, bool) { +func parseOneExprFromStart(s string, now time.Time, prev string, hasEventHint bool, topicIntent bool, nowYear int) (Range, int, bool) { // 0) "awal bulan ini" / "akhir bulan ini" (day-range) if m := rxEdgeMonthThis.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { edge := s[m[2]:m[3]] // "awal"|"akhir" @@ -225,6 +248,15 @@ func parseOneExprFromStart(s string, now time.Time) (Range, int, bool) { return Range{}, -1, false } + // 5.5) Bare year: "1998" (only if event-hint and not topic/title context and not future-year) + if m := rxBareYear.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + y, _ := strconv.Atoi(s[m[2]:m[3]]) + next := nextTokenAfterPrefix(s[m[1]:]) + if okYear(y) && allowBareYear(prev, next, hasEventHint, topicIntent, y, nowYear) { + return yearRange(y, now.Location()), scoreYear - 1, true + } + } + // 6) Numeric YMD: 2026-02-04 / 2026/02/04 if m := rxYMDNumeric.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { yr, _ := strconv.Atoi(s[m[2]:m[3]]) @@ -296,6 +328,91 @@ func parseOneExprFromStart(s string, now time.Time) (Range, int, bool) { return Range{}, -1, false } +func nextTokenAfterPrefix(rest string) string { + rest = strings.TrimSpace(rest) + if rest == "" { + return "" + } + if i := strings.IndexByte(rest, ' '); i >= 0 { + return rest[:i] + } + return rest +} + +func allowBareYear(prev, next string, hasEventHint bool, topicIntent bool, year int, nowYear int) bool { + // Topic-intent: year is likely a theme, not a published_at filter. + if topicIntent { + return false + } + // Future year: often a target topic/prediction, not document time. + if year > nowYear { + return false + } + // Must have event/time hint (to avoid false positives). + if !hasEventHint { + return false + } + // Title/entity contexts (film, review, trailer, etc). + if isEntityBlocker(prev) || isEntityBlocker(next) { + return false + } + return true +} + +func isEntityBlocker(tok string) bool { + switch tok { + case "film", "movie", "series", "serial", "drama", "anime", + "album", "lagu", "song", "buku", "novel", "game", + "review", "ulasan", "sinopsis", "trailer", "subtitle": + return true + default: + return false + } +} + +func containsEventHint(q string) bool { + // q already normalized & lowercase + eventPhrases := []string{ + "piala dunia", + "world cup", + } + for _, ph := range eventPhrases { + if strings.Contains(q, ph) { + return true + } + } + eventTokens := []string{ + "pemilu", "pilpres", "pilkada", + "oscar", "grammy", + "olimpiade", "olympic", "olympics", + "liga", "turnamen", "juara", "final", "pemenang", + "gempa", "banjir", "krisis", "inflasi", "resesi", + "piala", "musim", + } + for _, t := range eventTokens { + if strings.Contains(q, t) { + return true + } + } + return false +} + +func containsTopicIntent(q string) bool { + // q already normalized & lowercase + tokens := []string{ + "prediksi", "ramalan", "forecast", "proyeksi", "outlook", + "tren", "trend", "gaya", "model", "inspirasi", + "rekomendasi", "tips", "tip", "cara", "panduan", "tutorial", + "review", "ulasan", "sinopsis", "trailer", + } + for _, t := range tokens { + if strings.Contains(q, t) { + return true + } + } + return false +} + /* ------------------------- Semantics --------------------------*/ @@ -548,7 +665,6 @@ func monthID(s string) (time.Month, bool) { --------------------------*/ var ( - // Make end greedy to allow "10 feb 2026 foo" and still parse prefix. rxFromRange = regexp.MustCompile(`^dari\s+(.+?)\s+(sampai|hingga|sd|-)\s+(.+)$`) rxInlineRange = regexp.MustCompile(`^(.+?)\s+(sampai|hingga|sd|-)\s+(.+)$`) @@ -562,6 +678,7 @@ var ( rxRelNUnit = regexp.MustCompile(`^(\d+)\s+(hari|minggu|pekan|bulan|tahun)\s+(lalu|lagi|yanglalu|kedepan|darisekarang|darihariini)(?:\s|$)`) rxTahunYYYY = regexp.MustCompile(`^tahun\s+(\d{4})(?:\s|$)`) + rxBareYear = regexp.MustCompile(`^((?:19|20)\d{2})(?:\s|$)`) rxYMDNumeric = regexp.MustCompile(`^(\d{4})[-/](\d{1,2})[-/](\d{1,2})(?:\s|$)`) diff --git a/rentangin/time_parser_test.go b/rentangin/time_parser_test.go index 0d2b5aa..db84f22 100644 --- a/rentangin/time_parser_test.go +++ b/rentangin/time_parser_test.go @@ -9,11 +9,30 @@ func wibLoc() *time.Location { return time.FixedZone("WIB", 7*3600) } -func nowWIB() time.Time { - return time.Date(2026, 2, 4, 10, 0, 0, 0, wibLoc()) // Wed +func mustRange(t *testing.T, q string, now time.Time) Range { + t.Helper() + r, ok, err := Parse(q, now) + if err != nil { + t.Fatalf("Parse(%q) unexpected err: %v", q, err) + } + if !ok { + t.Fatalf("Parse(%q) expected ok=true, got ok=false", q) + } + return r +} + +func mustNoRange(t *testing.T, q string, now time.Time) { + t.Helper() + _, ok, err := Parse(q, now) + if err != nil { + t.Fatalf("Parse(%q) unexpected err: %v", q, err) + } + if ok { + t.Fatalf("Parse(%q) expected ok=false, got ok=true", q) + } } -func assertRangeEq(t *testing.T, got, want Range) { +func assertRangeEq(t *testing.T, got Range, want Range) { t.Helper() if !got.Start.Equal(want.Start) || !got.End.Equal(want.End) { t.Fatalf("got [%s..%s) want [%s..%s)", @@ -24,7 +43,7 @@ func assertRangeEq(t *testing.T, got, want Range) { } func TestParse_NoRange(t *testing.T) { - now := nowWIB() + now := time.Date(2026, 2, 4, 10, 0, 0, 0, wibLoc()) cases := []string{ "", @@ -32,50 +51,37 @@ func TestParse_NoRange(t *testing.T) { "a", "berita politik", "not a date or a time", - "Message me in 2 minutes", // english not supported + "Message me in 2 minutes", // english not supported here "10", "17", "10:am", - "uu 24/2024", // ambiguous id-style, we don't parse + "uu 24/2024", // should not accidentally become year filter + "pp 12/2019", // ditto + "iphone 12 2020", // no event hint } - for _, s := range cases { - t.Run(s, func(t *testing.T) { - _, ok, err := Parse(s, now) - if err != nil { - t.Fatalf("unexpected err: %v", err) - } - if ok { - t.Fatalf("expected ok=false") - } + for _, q := range cases { + t.Run(q, func(t *testing.T) { + mustNoRange(t, q, now) }) } } func TestParse_Words(t *testing.T) { - now := nowWIB() + now := time.Date(2026, 2, 4, 10, 0, 0, 0, wibLoc()) - r, ok, err := Parse("gempa hari ini jakarta", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r := mustRange(t, "gempa hari ini jakarta", now) assertRangeEq(t, r, Range{ Start: time.Date(2026, 2, 4, 0, 0, 0, 0, wibLoc()), End: time.Date(2026, 2, 5, 0, 0, 0, 0, wibLoc()), }) - r, ok, err = Parse("banjir kemarin bandung", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r = mustRange(t, "banjir kemarin bandung", now) assertRangeEq(t, r, Range{ Start: time.Date(2026, 2, 3, 0, 0, 0, 0, wibLoc()), End: time.Date(2026, 2, 4, 0, 0, 0, 0, wibLoc()), }) - r, ok, err = Parse("promo besok", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r = mustRange(t, "promo besok", now) assertRangeEq(t, r, Range{ Start: time.Date(2026, 2, 5, 0, 0, 0, 0, wibLoc()), End: time.Date(2026, 2, 6, 0, 0, 0, 0, wibLoc()), @@ -83,31 +89,22 @@ func TestParse_Words(t *testing.T) { } func TestParse_UnitModifier(t *testing.T) { - now := nowWIB() + now := time.Date(2026, 2, 4, 10, 0, 0, 0, wibLoc()) // Wed - r, ok, err := Parse("minggu ini pilkada", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } - // Week starts Monday; 2026-02-04 is Wed => week starts 2026-02-02 + r := mustRange(t, "minggu ini pilkada", now) + // Week starts Monday; 2026-02-04 (Wed) => starts 2026-02-02 assertRangeEq(t, r, Range{ Start: time.Date(2026, 2, 2, 0, 0, 0, 0, wibLoc()), End: time.Date(2026, 2, 9, 0, 0, 0, 0, wibLoc()), }) - r, ok, err = Parse("bulan lalu saham", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r = mustRange(t, "bulan lalu saham", now) assertRangeEq(t, r, Range{ Start: time.Date(2026, 1, 1, 0, 0, 0, 0, wibLoc()), End: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), }) - r, ok, err = Parse("tahun depan event", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r = mustRange(t, "tahun depan event", now) assertRangeEq(t, r, Range{ Start: time.Date(2027, 1, 1, 0, 0, 0, 0, wibLoc()), End: time.Date(2028, 1, 1, 0, 0, 0, 0, wibLoc()), @@ -115,83 +112,87 @@ func TestParse_UnitModifier(t *testing.T) { } func TestParse_RelativeN_SingleDay(t *testing.T) { - now := nowWIB() + now := time.Date(2026, 2, 4, 10, 0, 0, 0, wibLoc()) - r, ok, err := Parse("3 hari lalu demo", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r := mustRange(t, "3 hari lalu demo", now) assertRangeEq(t, r, Range{ Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), End: time.Date(2026, 2, 2, 0, 0, 0, 0, wibLoc()), }) - r, ok, err = Parse("10 hari ke depan konser", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r = mustRange(t, "10 hari ke depan konser", now) assertRangeEq(t, r, Range{ Start: time.Date(2026, 2, 14, 0, 0, 0, 0, wibLoc()), End: time.Date(2026, 2, 15, 0, 0, 0, 0, wibLoc()), }) } -func TestParse_TahunYYYY(t *testing.T) { - now := nowWIB() +func TestParse_LastN_Range(t *testing.T) { + now := time.Date(2026, 2, 4, 10, 0, 0, 0, wibLoc()) - r, ok, err := Parse("berita tahun 2024 pemilu", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r := mustRange(t, "gempa 7 hari terakhir jakarta", now) + // inclusive today: Feb 4 => start Jan 29; end Feb 5 + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 1, 29, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 5, 0, 0, 0, 0, wibLoc()), + }) + + r = mustRange(t, "2 bulan terakhir", now) + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 1, 1, 0, 0, 0, 0, wibLoc()), // monthRange(now) start is Feb 1; - (2-1) months => Jan 1 + End: time.Date(2026, 2, 5, 0, 0, 0, 0, wibLoc()), + }) +} + +func TestParse_AwalAkhirBulanIni(t *testing.T) { + now := time.Date(2026, 2, 4, 10, 0, 0, 0, wibLoc()) + + r := mustRange(t, "awal bulan ini", now) + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 2, 2, 0, 0, 0, 0, wibLoc()), + }) + + r = mustRange(t, "akhir bulan ini", now) + // 2026 bukan kabisat => Feb 28 + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 28, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 3, 1, 0, 0, 0, 0, wibLoc()), + }) +} + +func TestParse_Tahun_ExplicitKeyword(t *testing.T) { + now := time.Date(2026, 2, 4, 10, 0, 0, 0, wibLoc()) + + r := mustRange(t, "berita tahun 2024 pemilu", now) assertRangeEq(t, r, Range{ Start: time.Date(2024, 1, 1, 0, 0, 0, 0, wibLoc()), End: time.Date(2025, 1, 1, 0, 0, 0, 0, wibLoc()), }) - - // bare year should NOT parse (safe mode) - _, ok, err = Parse("2024 pemilu", now) - if err != nil { - t.Fatalf("unexpected err: %v", err) - } - if ok { - t.Fatalf("expected ok=false for bare year") - } } func TestParse_ExplicitDates(t *testing.T) { - now := nowWIB() + now := time.Date(2026, 2, 4, 10, 0, 0, 0, wibLoc()) - r, ok, err := Parse("banjir 4 feb 2026 jakarta", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r := mustRange(t, "banjir 4 feb 2026 jakarta", now) assertRangeEq(t, r, Range{ Start: time.Date(2026, 2, 4, 0, 0, 0, 0, wibLoc()), End: time.Date(2026, 2, 5, 0, 0, 0, 0, wibLoc()), }) - r, ok, err = Parse("laporan februari 2026 ekonomi", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r = mustRange(t, "laporan februari 2026 ekonomi", now) assertRangeEq(t, r, Range{ Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), End: time.Date(2026, 3, 1, 0, 0, 0, 0, wibLoc()), }) - r, ok, err = Parse("2026 feb inflasi", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r = mustRange(t, "2026 feb inflasi", now) assertRangeEq(t, r, Range{ Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), End: time.Date(2026, 3, 1, 0, 0, 0, 0, wibLoc()), }) - r, ok, err = Parse("update 2026-02-04", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r = mustRange(t, "update 2026-02-04", now) assertRangeEq(t, r, Range{ Start: time.Date(2026, 2, 4, 0, 0, 0, 0, wibLoc()), End: time.Date(2026, 2, 5, 0, 0, 0, 0, wibLoc()), @@ -199,85 +200,35 @@ func TestParse_ExplicitDates(t *testing.T) { } func TestParse_RangeFormsAnywhere(t *testing.T) { - now := nowWIB() + now := time.Date(2026, 2, 4, 10, 0, 0, 0, wibLoc()) - r, ok, err := Parse("data dari 1 feb 2026 sampai 10 feb 2026 foo", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r := mustRange(t, "data dari 1 feb 2026 sampai 10 feb 2026 foo", now) assertRangeEq(t, r, Range{ Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), End: time.Date(2026, 2, 10, 0, 0, 0, 0, wibLoc()), }) - r, ok, err = Parse("laporan 1 feb 2026 - 10 feb 2026 foo", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r = mustRange(t, "laporan 1 feb 2026 - 10 feb 2026 foo", now) assertRangeEq(t, r, Range{ Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), End: time.Date(2026, 2, 10, 0, 0, 0, 0, wibLoc()), }) - r, ok, err = Parse("laporan 1 feb 2026 s.d. 10 feb 2026", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r = mustRange(t, "laporan 1 feb 2026 s.d. 10 feb 2026", now) assertRangeEq(t, r, Range{ Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), End: time.Date(2026, 2, 10, 0, 0, 0, 0, wibLoc()), }) } -func TestParse_Last7HariTerakhir(t *testing.T) { - now := nowWIB() - - r, ok, err := Parse("gempa 7 hari terakhir jakarta", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } - - // inclusive today: Feb 4 -> start Jan 29, end Feb 5 - assertRangeEq(t, r, Range{ - Start: time.Date(2026, 1, 29, 0, 0, 0, 0, wibLoc()), - End: time.Date(2026, 2, 5, 0, 0, 0, 0, wibLoc()), - }) -} - -func TestParse_AwalAkhirBulanIni(t *testing.T) { - now := nowWIB() - - r, ok, err := Parse("awal bulan ini", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } - assertRangeEq(t, r, Range{ - Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), - End: time.Date(2026, 2, 2, 0, 0, 0, 0, wibLoc()), - }) - - r, ok, err = Parse("akhir bulan ini", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } - // 2026 bukan kabisat => Feb 28 - assertRangeEq(t, r, Range{ - Start: time.Date(2026, 2, 28, 0, 0, 0, 0, wibLoc()), - End: time.Date(2026, 3, 1, 0, 0, 0, 0, wibLoc()), - }) -} - func TestParse_AmbiguousPastBias(t *testing.T) { loc := wibLoc() // now = 2026-02-04 - now := nowWIB() + now := time.Date(2026, 2, 4, 10, 0, 0, 0, loc) // "februari" => Feb 2026 (<= now) - r, ok, err := Parse("laporan februari", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r := mustRange(t, "laporan februari", now) assertRangeEq(t, r, Range{ Start: time.Date(2026, 2, 1, 0, 0, 0, 0, loc), End: time.Date(2026, 3, 1, 0, 0, 0, 0, loc), @@ -285,10 +236,7 @@ func TestParse_AmbiguousPastBias(t *testing.T) { // If now is January, "februari" should resolve to last year. nowJan := time.Date(2026, 1, 10, 10, 0, 0, 0, loc) - r, ok, err = Parse("februari", nowJan) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r = mustRange(t, "februari", nowJan) assertRangeEq(t, r, Range{ Start: time.Date(2025, 2, 1, 0, 0, 0, 0, loc), End: time.Date(2025, 3, 1, 0, 0, 0, 0, loc), @@ -296,10 +244,7 @@ func TestParse_AmbiguousPastBias(t *testing.T) { // "15 maret" with now April 2026 => 15 Mar 2026 nowApr := time.Date(2026, 4, 10, 10, 0, 0, 0, loc) - r, ok, err = Parse("15 maret", nowApr) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r = mustRange(t, "15 maret", nowApr) assertRangeEq(t, r, Range{ Start: time.Date(2026, 3, 15, 0, 0, 0, 0, loc), End: time.Date(2026, 3, 16, 0, 0, 0, 0, loc), @@ -307,34 +252,71 @@ func TestParse_AmbiguousPastBias(t *testing.T) { // "15 maret" with now Feb 2026 => 15 Mar 2025 (past-biased) nowFeb := time.Date(2026, 2, 10, 10, 0, 0, 0, loc) - r, ok, err = Parse("15 maret", nowFeb) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r = mustRange(t, "15 maret", nowFeb) assertRangeEq(t, r, Range{ Start: time.Date(2025, 3, 15, 0, 0, 0, 0, loc), End: time.Date(2025, 3, 16, 0, 0, 0, 0, loc), }) } +func TestParse_BareYear_EventHint_Allows(t *testing.T) { + loc := wibLoc() + now := time.Date(2026, 2, 4, 10, 0, 0, 0, loc) + + r := mustRange(t, "pemenang piala dunia 1998", now) + assertRangeEq(t, r, Range{ + Start: time.Date(1998, 1, 1, 0, 0, 0, 0, loc), + End: time.Date(1999, 1, 1, 0, 0, 0, 0, loc), + }) + + r = mustRange(t, "pemilu 2024", now) + assertRangeEq(t, r, Range{ + Start: time.Date(2024, 1, 1, 0, 0, 0, 0, loc), + End: time.Date(2025, 1, 1, 0, 0, 0, 0, loc), + }) + + // Without event hint should not parse bare year. + mustNoRange(t, "angka 1998", now) +} + +func TestParse_BareYear_TitleContext_Blocked(t *testing.T) { + loc := wibLoc() + now := time.Date(2026, 2, 4, 10, 0, 0, 0, loc) + + // "film" + "review" => treat 2012 as title, not published_at filter + mustNoRange(t, "review film 2012", now) + mustNoRange(t, "film 2012 trailer", now) +} + +func TestParse_BareYear_TopicIntent_Blocked(t *testing.T) { + loc := wibLoc() + now := time.Date(2025, 12, 1, 10, 0, 0, 0, loc) + + // topic intent + future year => do NOT prefilter by year + mustNoRange(t, "prediksi gaya rambut 2026", now) + mustNoRange(t, "tren 2026", now) +} + +func TestParse_BareYear_FutureYear_BlockedEvenWithEventHint(t *testing.T) { + loc := wibLoc() + now := time.Date(2025, 12, 1, 10, 0, 0, 0, loc) + + // future year relative to nowYear(2025) => do NOT prefilter + mustNoRange(t, "piala dunia 2026", now) +} + func TestParse_BestMatchWins(t *testing.T) { - now := nowWIB() + now := time.Date(2026, 2, 4, 10, 0, 0, 0, wibLoc()) // Has both "bulan ini" and "7 hari terakhir" -> prefer 7 hari terakhir (score higher). - r, ok, err := Parse("bulan ini 7 hari terakhir", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r := mustRange(t, "bulan ini 7 hari terakhir", now) assertRangeEq(t, r, Range{ Start: time.Date(2026, 1, 29, 0, 0, 0, 0, wibLoc()), End: time.Date(2026, 2, 5, 0, 0, 0, 0, wibLoc()), }) // Explicit range should beat everything else. - r, ok, err = Parse("7 hari terakhir dari 1 feb 2026 sampai 10 feb 2026", now) - if err != nil || !ok { - t.Fatalf("err=%v ok=%v", err, ok) - } + r = mustRange(t, "7 hari terakhir dari 1 feb 2026 sampai 10 feb 2026", now) assertRangeEq(t, r, Range{ Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), End: time.Date(2026, 2, 10, 0, 0, 0, 0, wibLoc()), From 3981908ad22b081bd3a934470c1603c7d71d12b8 Mon Sep 17 00:00:00 2001 From: Agung Hariadi Tedja Date: Wed, 4 Feb 2026 14:55:01 +0700 Subject: [PATCH 3/9] fix lint --- rentangin/time_parser.go | 1 + 1 file changed, 1 insertion(+) diff --git a/rentangin/time_parser.go b/rentangin/time_parser.go index 53e9117..3622e53 100644 --- a/rentangin/time_parser.go +++ b/rentangin/time_parser.go @@ -13,6 +13,7 @@ type Range struct { End time.Time } +// IsZero returns true if empty func (r Range) IsZero() bool { return r.Start.IsZero() && r.End.IsZero() } // Parse scans query and returns the BEST date range it can extract. From 261e6c5c5a668c43f0fb102b5acd3b33ee2b7d9b Mon Sep 17 00:00:00 2001 From: Agung Hariadi Tedja Date: Wed, 4 Feb 2026 15:32:37 +0700 Subject: [PATCH 4/9] rename out --- rentangin/time_parser.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rentangin/time_parser.go b/rentangin/time_parser.go index 3622e53..d65a048 100644 --- a/rentangin/time_parser.go +++ b/rentangin/time_parser.go @@ -20,7 +20,7 @@ func (r Range) IsZero() bool { return r.Start.IsZero() && r.End.IsZero() } // ok=false means "no range found" (not an error). // // Timezone is taken from now.Location(). So pass now in the timezone you want. -func Parse(query string, now time.Time) (r Range, ok bool, err error) { +func Parse(query string, now time.Time) (r Range, isTimeRage bool, err error) { s := normalizeID(strings.TrimSpace(query)) if s == "" { return Range{}, false, nil From 3fcb1607bf3cc440e972d3605a095609cfaa61f1 Mon Sep 17 00:00:00 2001 From: Agung Hariadi Tedja Date: Wed, 4 Feb 2026 16:20:36 +0700 Subject: [PATCH 5/9] fix unparam --- rentangin/time_parser.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rentangin/time_parser.go b/rentangin/time_parser.go index d65a048..c7f1d76 100644 --- a/rentangin/time_parser.go +++ b/rentangin/time_parser.go @@ -85,11 +85,11 @@ func parseFromRangeAtStart(s string, now time.Time, hasEventHint bool, topicInte startExpr := strings.TrimSpace(s[m[2]:m[3]]) endExpr := strings.TrimSpace(s[m[6]:m[7]]) - rs, _, ok := parseOneExprAny(startExpr, now, "", hasEventHint, topicIntent, nowYear) + rs, _, ok := parseOneExprAny(startExpr, now, hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } - re, _, ok := parseOneExprAny(endExpr, now, "", hasEventHint, topicIntent, nowYear) + re, _, ok := parseOneExprAny(endExpr, now, hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } @@ -110,11 +110,11 @@ func parseInlineRangeAtStart(s string, now time.Time, hasEventHint bool, topicIn left := strings.TrimSpace(s[m[2]:m[3]]) right := strings.TrimSpace(s[m[6]:m[7]]) - rl, _, ok := parseOneExprAny(left, now, "", hasEventHint, topicIntent, nowYear) + rl, _, ok := parseOneExprAny(left, now, hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } - rr, _, ok := parseOneExprAny(right, now, "", hasEventHint, topicIntent, nowYear) + rr, _, ok := parseOneExprAny(right, now, hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } @@ -188,9 +188,9 @@ const ( scoreRelative = 40 // hariini/kemarin/besok, unit modifiers ) -func parseOneExprAny(expr string, now time.Time, prev string, hasEventHint bool, topicIntent bool, nowYear int) (Range, int, bool) { +func parseOneExprAny(expr string, now time.Time, hasEventHint bool, topicIntent bool, nowYear int) (Range, int, bool) { e := normalizeID(strings.TrimSpace(expr)) - return parseOneExprFromStart(e, now, prev, hasEventHint, topicIntent, nowYear) + return parseOneExprFromStart(e, now, "", hasEventHint, topicIntent, nowYear) } // parseOneExprFromStart parses if expression begins at start of s. From ac6fb7417b82dc35c33e74b083dbd3983835f09d Mon Sep 17 00:00:00 2001 From: Agung Hariadi Tedja Date: Mon, 9 Feb 2026 11:10:17 +0700 Subject: [PATCH 6/9] add more cases --- rentangin/time_parser.go | 117 +++++++++++++++++++++++++++++++--- rentangin/time_parser_test.go | 98 +++++++++++++++++++++++++++- 2 files changed, 204 insertions(+), 11 deletions(-) diff --git a/rentangin/time_parser.go b/rentangin/time_parser.go index c7f1d76..131ed0a 100644 --- a/rentangin/time_parser.go +++ b/rentangin/time_parser.go @@ -1,4 +1,4 @@ -package rentangin +package daterange import ( "regexp" @@ -20,7 +20,7 @@ func (r Range) IsZero() bool { return r.Start.IsZero() && r.End.IsZero() } // ok=false means "no range found" (not an error). // // Timezone is taken from now.Location(). So pass now in the timezone you want. -func Parse(query string, now time.Time) (r Range, isTimeRage bool, err error) { +func Parse(query string, now time.Time) (r Range, isTimeRange bool, err error) { s := normalizeID(strings.TrimSpace(query)) if s == "" { return Range{}, false, nil @@ -66,6 +66,10 @@ func parseBestAtStart(s string, now time.Time, prev string, hasEventHint bool, t if r, ok := parseFromRangeAtStart(s, now, hasEventHint, topicIntent, nowYear); ok { return r, scoreFromRange, true } + // sejak... / ... sampai sekarang/hari ini / awal pekan ini / akhir pekan lalu / sejak awal tahun + if r, ok := parseSinceOrUntilNowAtStart(s, now, hasEventHint, topicIntent, nowYear); ok { + return r, scoreSinceRange, true + } // Next: inline "A sampai B" / "A - B" / "A sd B" if r, ok := parseInlineRangeAtStart(s, now, hasEventHint, topicIntent, nowYear); ok { return r, scoreInlineRange, true @@ -85,11 +89,21 @@ func parseFromRangeAtStart(s string, now time.Time, hasEventHint bool, topicInte startExpr := strings.TrimSpace(s[m[2]:m[3]]) endExpr := strings.TrimSpace(s[m[6]:m[7]]) - rs, _, ok := parseOneExprAny(startExpr, now, hasEventHint, topicIntent, nowYear) + rs, _, ok := parseOneExprAny(startExpr, now, "", hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } - re, _, ok := parseOneExprAny(endExpr, now, hasEventHint, topicIntent, nowYear) + + // handle "dari X sampai sekarang/hari ini" as open-ended end of today + if isNowEndExpr(endExpr) { + r := Range{Start: rs.Start, End: endOfTodayPlus1(now)} + if !r.End.After(r.Start) { + return rs, true + } + return r, true + } + + re, _, ok := parseOneExprAny(endExpr, now, "", hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } @@ -110,11 +124,11 @@ func parseInlineRangeAtStart(s string, now time.Time, hasEventHint bool, topicIn left := strings.TrimSpace(s[m[2]:m[3]]) right := strings.TrimSpace(s[m[6]:m[7]]) - rl, _, ok := parseOneExprAny(left, now, hasEventHint, topicIntent, nowYear) + rl, _, ok := parseOneExprAny(left, now, "", hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } - rr, _, ok := parseOneExprAny(right, now, hasEventHint, topicIntent, nowYear) + rr, _, ok := parseOneExprAny(right, now, "", hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } @@ -126,6 +140,50 @@ func parseInlineRangeAtStart(s string, now time.Time, hasEventHint bool, topicIn return r, true } +func parseSinceOrUntilNowAtStart(s string, now time.Time, hasEventHint bool, topicIntent bool, nowYear int) (Range, bool) { + // A) "sejak awal tahun" / "sejak awal tahun ini" + if m := rxSejakAwalTahun.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + loc := now.Location() + start := time.Date(now.Year(), 1, 1, 0, 0, 0, 0, loc) + return Range{Start: start, End: endOfTodayPlus1(now)}, true + } + + // B) "awal pekan ini" (Senin saja) + if m := rxAwalPekanIni.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + start := weekRange(now).Start + return dayRange(start), true + } + + // C) "akhir pekan lalu" (Sabtu+Minggu minggu lalu) + if m := rxAkhirPekanLalu.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + weekStart := weekRange(now).Start // Monday this week + start := weekStart.AddDate(0, 0, -2) // Saturday of last week + return Range{Start: start, End: weekStart}, true + } + + // D) "sejak " + if m := rxSejakExpr.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + expr := strings.TrimSpace(s[m[2]:m[3]]) + rs, _, ok := parseOneExprAny(expr, now, "", hasEventHint, topicIntent, nowYear) + if !ok { + return Range{}, false + } + return Range{Start: rs.Start, End: endOfTodayPlus1(now)}, true + } + + // E) " sampai|hingga sekarang|hari ini" + if m := rxUntilNow.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { + expr := strings.TrimSpace(s[m[2]:m[3]]) + rs, _, ok := parseOneExprAny(expr, now, "", hasEventHint, topicIntent, nowYear) + if !ok { + return Range{}, false + } + return Range{Start: rs.Start, End: endOfTodayPlus1(now)}, true + } + + return Range{}, false +} + func prevToken(s string, start int) string { // start is token boundary index in s if start <= 0 { @@ -177,6 +235,7 @@ func normalizeID(s string) string { const ( scoreFromRange = 100 + scoreSinceRange = 89 // sejak X / X sampai sekarang scoreInlineRange = 90 scoreLastNRange = 85 // "7 hari terakhir" @@ -188,9 +247,9 @@ const ( scoreRelative = 40 // hariini/kemarin/besok, unit modifiers ) -func parseOneExprAny(expr string, now time.Time, hasEventHint bool, topicIntent bool, nowYear int) (Range, int, bool) { +func parseOneExprAny(expr string, now time.Time, prev string, hasEventHint bool, topicIntent bool, nowYear int) (Range, int, bool) { e := normalizeID(strings.TrimSpace(expr)) - return parseOneExprFromStart(e, now, "", hasEventHint, topicIntent, nowYear) + return parseOneExprFromStart(e, now, prev, hasEventHint, topicIntent, nowYear) } // parseOneExprFromStart parses if expression begins at start of s. @@ -216,6 +275,11 @@ func parseOneExprFromStart(s string, now time.Time, prev string, hasEventHint bo // 2) One-word relative if m := rxOneWord.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { w := s[m[2]:m[3]] + // Guard: jangan biarkan "sampai sekarang/hari ini" tanpa start expr yang valid + // jatuhnya diparse dari token "sekarang/hariini" saja. + if (w == "sekarang" || w == "hariini") && isEndConnector(prev) { + return Range{}, -1, false + } if r, ok := oneWord(w, now); ok { return r, scoreRelative, true } @@ -444,7 +508,7 @@ func lastN(unit string, n int, now time.Time) (Range, bool) { if n <= 0 { return Range{}, false } - end := truncateDay(now).AddDate(0, 0, 1) + end := endOfTodayPlus1(now) switch unit { case "hari": @@ -597,6 +661,10 @@ func truncateDay(t time.Time) time.Time { return time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, t.Location()) } +func endOfTodayPlus1(now time.Time) time.Time { + return truncateDay(now).AddDate(0, 0, 1) +} + func dayRange(ref time.Time) Range { s := truncateDay(ref) return Range{Start: s, End: s.AddDate(0, 0, 1)} @@ -623,6 +691,31 @@ func weekRange(ref time.Time) Range { return Range{Start: start, End: start.AddDate(0, 0, 7)} } +/* ------------------------- + Special helpers +--------------------------*/ + +func isNowEndExpr(expr string) bool { + expr = strings.TrimSpace(normalizeID(expr)) + if expr == "" { + return false + } + tok := expr + if i := strings.IndexByte(tok, ' '); i >= 0 { + tok = tok[:i] + } + return tok == "sekarang" || tok == "hariini" +} + +func isEndConnector(prev string) bool { + switch prev { + case "sampai", "hingga", "sd", "-": + return true + default: + return false + } +} + /* ------------------------- Validation & months --------------------------*/ @@ -669,6 +762,12 @@ var ( rxFromRange = regexp.MustCompile(`^dari\s+(.+?)\s+(sampai|hingga|sd|-)\s+(.+)$`) rxInlineRange = regexp.MustCompile(`^(.+?)\s+(sampai|hingga|sd|-)\s+(.+)$`) + rxSejakAwalTahun = regexp.MustCompile(`^sejak\s+awal\s+tahun(?:\s+ini)?(?:\s|$)`) + rxSejakExpr = regexp.MustCompile(`^sejak\s+(.+)$`) + rxUntilNow = regexp.MustCompile(`^(.+?)\s+(sampai|hingga)\s+(sekarang|hariini)(?:\s|$)`) + rxAwalPekanIni = regexp.MustCompile(`^awal\s+(pekan|minggu)\s+ini(?:\s|$)`) + rxAkhirPekanLalu = regexp.MustCompile(`^akhir\s+(pekan|minggu)\s+lalu(?:\s|$)`) + rxEdgeMonthThis = regexp.MustCompile(`^(awal|akhir)\s+bulan\s+ini(?:\s|$)`) rxLastNUnit = regexp.MustCompile(`^(\d+)\s+(hari|minggu|pekan|bulan|tahun)\s+(terakhir|belakangan)(?:\s|$)`) diff --git a/rentangin/time_parser_test.go b/rentangin/time_parser_test.go index db84f22..e9802cb 100644 --- a/rentangin/time_parser_test.go +++ b/rentangin/time_parser_test.go @@ -1,4 +1,4 @@ -package rentangin +package daterange import ( "testing" @@ -139,7 +139,7 @@ func TestParse_LastN_Range(t *testing.T) { r = mustRange(t, "2 bulan terakhir", now) assertRangeEq(t, r, Range{ - Start: time.Date(2026, 1, 1, 0, 0, 0, 0, wibLoc()), // monthRange(now) start is Feb 1; - (2-1) months => Jan 1 + Start: time.Date(2026, 1, 1, 0, 0, 0, 0, wibLoc()), End: time.Date(2026, 2, 5, 0, 0, 0, 0, wibLoc()), }) } @@ -305,6 +305,100 @@ func TestParse_BareYear_FutureYear_BlockedEvenWithEventHint(t *testing.T) { mustNoRange(t, "piala dunia 2026", now) } +func TestParse_SejakKemarin_OpenEnded(t *testing.T) { + loc := wibLoc() + now := time.Date(2026, 2, 4, 10, 0, 0, 0, loc) + + r := mustRange(t, "banjir sejak kemarin", now) + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 3, 0, 0, 0, 0, loc), + End: time.Date(2026, 2, 5, 0, 0, 0, 0, loc), + }) +} + +func TestParse_SejakAwalTahun(t *testing.T) { + loc := wibLoc() + now := time.Date(2026, 2, 4, 10, 0, 0, 0, loc) + + r := mustRange(t, "sejak awal tahun", now) + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 1, 1, 0, 0, 0, 0, loc), + End: time.Date(2026, 2, 5, 0, 0, 0, 0, loc), + }) + + r = mustRange(t, "sejak awal tahun ini", now) + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 1, 1, 0, 0, 0, 0, loc), + End: time.Date(2026, 2, 5, 0, 0, 0, 0, loc), + }) +} + +func TestParse_UntilNow_Form(t *testing.T) { + loc := wibLoc() + now := time.Date(2026, 2, 4, 10, 0, 0, 0, loc) + + // direct form: " sampai sekarang" + r := mustRange(t, "1 feb 2026 sampai sekarang", now) + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 1, 0, 0, 0, 0, loc), + End: time.Date(2026, 2, 5, 0, 0, 0, 0, loc), + }) + + // If expr cannot be parsed -> no range + mustNoRange(t, "kasus covid sampai sekarang", now) +} + +func TestParse_FromX_UntilNow_OpenEnded(t *testing.T) { + loc := wibLoc() + now := time.Date(2026, 2, 4, 10, 0, 0, 0, loc) + + r := mustRange(t, "dari 1 feb 2026 sampai sekarang", now) + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 1, 0, 0, 0, 0, loc), + End: time.Date(2026, 2, 5, 0, 0, 0, 0, loc), + }) + + r = mustRange(t, "dari 1 feb 2026 sampai hari ini", now) + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 1, 0, 0, 0, 0, loc), + End: time.Date(2026, 2, 5, 0, 0, 0, 0, loc), + }) + + r = mustRange(t, "dari kemarin sampai sekarang", now) + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 3, 0, 0, 0, 0, loc), + End: time.Date(2026, 2, 5, 0, 0, 0, 0, loc), + }) +} + +func TestParse_AwalAkhirPekan(t *testing.T) { + loc := wibLoc() + // 2026-02-04 is Wednesday; week starts Monday 2026-02-02 + now := time.Date(2026, 2, 4, 10, 0, 0, 0, loc) + + r := mustRange(t, "awal pekan ini", now) + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 2, 0, 0, 0, 0, loc), + End: time.Date(2026, 2, 3, 0, 0, 0, 0, loc), + }) + + r = mustRange(t, "akhir pekan lalu", now) + // Saturday+Sunday last week => 2026-01-31 to 2026-02-02 + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 1, 31, 0, 0, 0, 0, loc), + End: time.Date(2026, 2, 2, 0, 0, 0, 0, loc), + }) +} + +func TestParse_TerkiniTerbaru_NoRange(t *testing.T) { + loc := wibLoc() + now := time.Date(2026, 2, 4, 10, 0, 0, 0, loc) + + mustNoRange(t, "update terbaru", now) + mustNoRange(t, "update terkini", now) + mustNoRange(t, "hasil terakhir mu", now) +} + func TestParse_BestMatchWins(t *testing.T) { now := time.Date(2026, 2, 4, 10, 0, 0, 0, wibLoc()) From 64a7582a508baf76af9c73978f2b3022a82f27fc Mon Sep 17 00:00:00 2001 From: Agung Hariadi Tedja Date: Mon, 9 Feb 2026 11:12:30 +0700 Subject: [PATCH 7/9] fix unparam --- rentangin/time_parser.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/rentangin/time_parser.go b/rentangin/time_parser.go index 131ed0a..1494a2d 100644 --- a/rentangin/time_parser.go +++ b/rentangin/time_parser.go @@ -89,7 +89,7 @@ func parseFromRangeAtStart(s string, now time.Time, hasEventHint bool, topicInte startExpr := strings.TrimSpace(s[m[2]:m[3]]) endExpr := strings.TrimSpace(s[m[6]:m[7]]) - rs, _, ok := parseOneExprAny(startExpr, now, "", hasEventHint, topicIntent, nowYear) + rs, _, ok := parseOneExprAny(startExpr, now, hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } @@ -103,7 +103,7 @@ func parseFromRangeAtStart(s string, now time.Time, hasEventHint bool, topicInte return r, true } - re, _, ok := parseOneExprAny(endExpr, now, "", hasEventHint, topicIntent, nowYear) + re, _, ok := parseOneExprAny(endExpr, now, hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } @@ -124,11 +124,11 @@ func parseInlineRangeAtStart(s string, now time.Time, hasEventHint bool, topicIn left := strings.TrimSpace(s[m[2]:m[3]]) right := strings.TrimSpace(s[m[6]:m[7]]) - rl, _, ok := parseOneExprAny(left, now, "", hasEventHint, topicIntent, nowYear) + rl, _, ok := parseOneExprAny(left, now, hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } - rr, _, ok := parseOneExprAny(right, now, "", hasEventHint, topicIntent, nowYear) + rr, _, ok := parseOneExprAny(right, now, hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } @@ -164,7 +164,7 @@ func parseSinceOrUntilNowAtStart(s string, now time.Time, hasEventHint bool, top // D) "sejak " if m := rxSejakExpr.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { expr := strings.TrimSpace(s[m[2]:m[3]]) - rs, _, ok := parseOneExprAny(expr, now, "", hasEventHint, topicIntent, nowYear) + rs, _, ok := parseOneExprAny(expr, now, hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } @@ -174,7 +174,7 @@ func parseSinceOrUntilNowAtStart(s string, now time.Time, hasEventHint bool, top // E) " sampai|hingga sekarang|hari ini" if m := rxUntilNow.FindStringSubmatchIndex(s); m != nil && m[0] == 0 { expr := strings.TrimSpace(s[m[2]:m[3]]) - rs, _, ok := parseOneExprAny(expr, now, "", hasEventHint, topicIntent, nowYear) + rs, _, ok := parseOneExprAny(expr, now, hasEventHint, topicIntent, nowYear) if !ok { return Range{}, false } @@ -247,9 +247,9 @@ const ( scoreRelative = 40 // hariini/kemarin/besok, unit modifiers ) -func parseOneExprAny(expr string, now time.Time, prev string, hasEventHint bool, topicIntent bool, nowYear int) (Range, int, bool) { +func parseOneExprAny(expr string, now time.Time, hasEventHint bool, topicIntent bool, nowYear int) (Range, int, bool) { e := normalizeID(strings.TrimSpace(expr)) - return parseOneExprFromStart(e, now, prev, hasEventHint, topicIntent, nowYear) + return parseOneExprFromStart(e, now, "", hasEventHint, topicIntent, nowYear) } // parseOneExprFromStart parses if expression begins at start of s. From cf9c8f15597d77447046455812f711bb5d518565 Mon Sep 17 00:00:00 2001 From: Agung Hariadi Tedja Date: Mon, 9 Feb 2026 14:34:48 +0700 Subject: [PATCH 8/9] ignore question mark --- rentangin/time_parser.go | 10 ++++++++++ rentangin/time_parser_test.go | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/rentangin/time_parser.go b/rentangin/time_parser.go index 1494a2d..bad01e8 100644 --- a/rentangin/time_parser.go +++ b/rentangin/time_parser.go @@ -225,6 +225,16 @@ func normalizeID(s string) string { ls = strings.ReplaceAll(ls, "dari sekarang", "darisekarang") ls = strings.ReplaceAll(ls, "dari hariini", "darihariini") + // punctuation -> space (so "ini?" becomes "ini") + ls = strings.Map(func(r rune) rune { + switch r { + case '?', '!', '.', ',', ';', ':', '(', ')', '[', ']', '{', '}', '"', '\'', '“', '”', '‘', '’': + return ' ' + default: + return r + } + }, ls) + ls = strings.Join(strings.Fields(ls), " ") return ls } diff --git a/rentangin/time_parser_test.go b/rentangin/time_parser_test.go index e9802cb..3bf849c 100644 --- a/rentangin/time_parser_test.go +++ b/rentangin/time_parser_test.go @@ -416,3 +416,13 @@ func TestParse_BestMatchWins(t *testing.T) { End: time.Date(2026, 2, 10, 0, 0, 0, 0, wibLoc()), }) } + +func TestParse_IgnoresQuestionMark(t *testing.T) { + now := time.Date(2026, 2, 4, 10, 0, 0, 0, wibLoc()) + + r := mustRange(t, "apa kebijakan baru pemerintah bulan ini?", now) + assertRangeEq(t, r, Range{ + Start: time.Date(2026, 2, 1, 0, 0, 0, 0, wibLoc()), + End: time.Date(2026, 3, 1, 0, 0, 0, 0, wibLoc()), + }) +} From f51aaa9e261d30042902ac29e843231fdc5500cc Mon Sep 17 00:00:00 2001 From: Agung Hariadi Tedja Date: Thu, 12 Feb 2026 11:07:39 +0700 Subject: [PATCH 9/9] add intent on mana saja --- tanya/specs.go | 1 + 1 file changed, 1 insertion(+) diff --git a/tanya/specs.go b/tanya/specs.go index 8d294bb..5e5aac5 100644 --- a/tanya/specs.go +++ b/tanya/specs.go @@ -105,6 +105,7 @@ var intentTable = []IntentSpec{ {terms("apa", "apakah", "bagaimana", "gimana", "kapan", "siapa", "dimana", "di mana", "kemana", "ke mana", "berapa"), 2, MatchTypeContains, 0}, {terms(" vs ", " versus "), 1, MatchTypeContains, 0}, {terms(" yang mana "), 2, MatchTypeContains, 0}, + {terms(" mana saja "), 2, MatchTypeContains, 0}, {terms("yang mana "), 2, MatchTypeStarts, 0}, {terms(" mana"), 2, MatchTypeEnds, 0}, {terms("kah"), 1, MatchTypeTokenSuffix, 5},