diff --git a/CHANGELOG.md b/CHANGELOG.md
index d231b46..407d907 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,9 +1,15 @@
# go-utils
+
+## [v1.42.0] - 2025-10-22
+### New Features
+- identifier tanya
+
+
-## [v1.41.0] - 2025-09-11
+## [v1.41.0] - 2025-09-17
### New Features
-- reverse slice
+- reverse slice ([#71](https://github.com/kumparan/go-utils/issues/71))
@@ -207,6 +213,9 @@
- fix marshal issue on gorm.DeletedAt empty value ([#32](https://github.com/kumparan/go-utils/issues/32))
+
+## [v.1.20.0] - 2022-03-11
+
## [v1.20.0] - 2022-03-11
### New Features
@@ -306,11 +315,11 @@
- add money formatter for multiple currencies ([#13](https://github.com/kumparan/go-utils/issues/13))
-
-## [v1.8.0] - 2020-12-10
-
## [v1.7.1] - 2020-12-10
+
+
+## [v1.8.0] - 2020-12-10
### New Features
- add formatter for indonesian money and date
@@ -375,7 +384,8 @@
- init go-utils
-[Unreleased]: https://github.com/kumparan/go-utils/compare/v1.41.0...HEAD
+[Unreleased]: https://github.com/kumparan/go-utils/compare/v1.42.0...HEAD
+[v1.42.0]: https://github.com/kumparan/go-utils/compare/v1.41.0...v1.42.0
[v1.41.0]: https://github.com/kumparan/go-utils/compare/v1.40.2...v1.41.0
[v1.40.2]: https://github.com/kumparan/go-utils/compare/v1.40.1...v1.40.2
[v1.40.1]: https://github.com/kumparan/go-utils/compare/v1.40.0...v1.40.1
@@ -408,7 +418,8 @@
[v1.23.0]: https://github.com/kumparan/go-utils/compare/v1.22.0...v1.23.0
[v1.22.0]: https://github.com/kumparan/go-utils/compare/v1.21.0...v1.22.0
[v1.21.0]: https://github.com/kumparan/go-utils/compare/v1.20.1...v1.21.0
-[v1.20.1]: https://github.com/kumparan/go-utils/compare/v1.20.0...v1.20.1
+[v1.20.1]: https://github.com/kumparan/go-utils/compare/v.1.20.0...v1.20.1
+[v.1.20.0]: https://github.com/kumparan/go-utils/compare/v1.20.0...v.1.20.0
[v1.20.0]: https://github.com/kumparan/go-utils/compare/v1.19.3...v1.20.0
[v1.19.3]: https://github.com/kumparan/go-utils/compare/v1.19.2...v1.19.3
[v1.19.2]: https://github.com/kumparan/go-utils/compare/v1.19.1...v1.19.2
@@ -425,9 +436,9 @@
[v1.12.0]: https://github.com/kumparan/go-utils/compare/v1.11.0...v1.12.0
[v1.11.0]: https://github.com/kumparan/go-utils/compare/v1.10.0...v1.11.0
[v1.10.0]: https://github.com/kumparan/go-utils/compare/v1.9.0...v1.10.0
-[v1.9.0]: https://github.com/kumparan/go-utils/compare/v1.8.0...v1.9.0
-[v1.8.0]: https://github.com/kumparan/go-utils/compare/v1.7.1...v1.8.0
-[v1.7.1]: https://github.com/kumparan/go-utils/compare/v1.7.0...v1.7.1
+[v1.9.0]: https://github.com/kumparan/go-utils/compare/v1.7.1...v1.9.0
+[v1.7.1]: https://github.com/kumparan/go-utils/compare/v1.8.0...v1.7.1
+[v1.8.0]: https://github.com/kumparan/go-utils/compare/v1.7.0...v1.8.0
[v1.7.0]: https://github.com/kumparan/go-utils/compare/v1.6.0...v1.7.0
[v1.6.0]: https://github.com/kumparan/go-utils/compare/v1.5.0...v1.6.0
[v1.5.0]: https://github.com/kumparan/go-utils/compare/v1.4.0...v1.5.0
diff --git a/tanya/specs.go b/tanya/specs.go
new file mode 100644
index 0000000..8d294bb
--- /dev/null
+++ b/tanya/specs.go
@@ -0,0 +1,138 @@
+package tanya
+
+type (
+ // Intent is the intent of a query
+ Intent string
+ // MatchType is the type of match
+ MatchType string
+)
+
+// Intent and MatchType constants
+const (
+ IntentUpdate Intent = "update"
+ IntentExplain Intent = "explain"
+ IntentHowTo Intent = "how_to"
+ IntentDefinition Intent = "definition"
+ IntentComparison Intent = "comparison"
+ IntentRecommendation Intent = "recommendation"
+ IntentTroubleshoot Intent = "troubleshoot"
+ IntentLocation Intent = "location"
+ IntentTime Intent = "time"
+ IntentPrice Intent = "price"
+ IntentContact Intent = "contact"
+ IntentQuestion Intent = "question" // general fallback
+ IntentOther Intent = "other"
+
+ MatchTypeContains MatchType = "contains"
+ MatchTypeStarts MatchType = "starts"
+ MatchTypeEnds MatchType = "ends"
+ MatchTypeTokenSuffix MatchType = "token_suffix" // nolint:gosec
+)
+
+type (
+ // Rule is a rule for matching a query to intent
+ Rule struct {
+ Terms []string
+ Weight int
+ MatchType MatchType
+ MinTokenLen int // optional: for token_suffix; <=0 => default 4
+ }
+
+ // IntentSpec is a specification for intent
+ IntentSpec struct {
+ Intent Intent
+ Priority int
+ Rules []Rule
+ }
+)
+
+func terms(ss ...string) []string { return ss }
+
+var intentTable = []IntentSpec{
+ {IntentUpdate, 95, []Rule{
+ {terms("update", "perkembangan", "terbaru", "terkini", "progress", "lanjutan", "pembaruan"), 3, MatchTypeContains, 0},
+ {terms("hari ini", "sekarang", "terkini banget"), 1, MatchTypeContains, 0},
+ }},
+ {IntentExplain, 90, []Rule{
+ {terms("jelaskan", "jelasin", "penjelasan", "uraikan", "explain"), 3, MatchTypeContains, 0},
+ {terms("arti", "artinya", "maksud", "makna", "definisi"), 2, MatchTypeContains, 0},
+ }},
+ {IntentHowTo, 80, []Rule{
+ {terms("bagaimana cara ", "gimana cara "), 3, MatchTypeStarts, 0},
+ {terms("cara "), 3, MatchTypeStarts, 0},
+ {terms(" cara ", " langkah ", " step "), 1, MatchTypeContains, 0},
+ {terms("resep "), 3, MatchTypeStarts, 0},
+ {terms(" panduan ", "panduan "), 2, MatchTypeContains, 0},
+ {terms(" tutorial ", "tutorial "), 2, MatchTypeContains, 0},
+ }},
+ {IntentDefinition, 75, []Rule{
+ {terms("apa itu "), 3, MatchTypeStarts, 0},
+ {terms("apa arti", "apa maksud"), 2, MatchTypeContains, 0},
+ }},
+ {IntentComparison, 70, []Rule{
+ {terms(" vs ", " versus "), 2, MatchTypeContains, 0},
+ {terms("perbedaan ", "beda "), 2, MatchTypeContains, 0},
+ {terms("bagusan mana", "lebih bagus mana", "pilih mana"), 2, MatchTypeContains, 0},
+ }},
+ {IntentRecommendation, 65, []Rule{
+ {terms("rekomendasi", "rekom", "saran"), 2, MatchTypeContains, 0},
+ {terms("bagusan mana", "pilih mana", "cocok yang mana"), 2, MatchTypeContains, 0},
+ {terms("menu "), 2, MatchTypeStarts, 0},
+ {terms(" ide ", "ide "), 1, MatchTypeContains, 0},
+ }},
+ {IntentTroubleshoot, 60, []Rule{
+ {terms("kenapa", "mengapa"), 2, MatchTypeContains, 0},
+ {terms("kok "), 2, MatchTypeStarts, 0},
+ {terms("error", "gagal", "bug", "crash", "macet", "hang"), 2, MatchTypeContains, 0},
+ {terms("solusi ", "fix ", "gimana sih", "kenapa sih"), 1, MatchTypeContains, 0},
+ }},
+ {IntentLocation, 55, []Rule{
+ {terms("dimana", "di mana", "kemana", "ke mana", "lokasi", "alamat"), 2, MatchTypeContains, 0},
+ {terms(" kemana", " dimana", "di mana", " alamat", " lokasi"), 2, MatchTypeEnds, 0},
+ }},
+ {IntentTime, 55, []Rule{
+ {terms("kapan", "jadwal", "jam berapa", "pukul berapa"), 2, MatchTypeContains, 0},
+ {terms("hari ini", "minggu ini", "sekarang", "besok", "nanti sore", "malam ini"), 1, MatchTypeContains, 0},
+ }},
+ {IntentPrice, 50, []Rule{
+ {terms("harga", "biaya", "tarif", "fee", "ongkir"), 2, MatchTypeContains, 0},
+ }},
+ {IntentContact, 50, []Rule{
+ {terms("kontak", "contact", "telepon", "telp", "nomor", "email", "whatsapp", "wa"), 2, MatchTypeContains, 0},
+ }},
+ // fallback tanya umum
+ {IntentQuestion, 10, []Rule{
+ {terms("apa", "apakah", "bagaimana", "gimana", "kapan", "siapa", "dimana", "di mana", "kemana", "ke mana", "berapa"), 2, MatchTypeContains, 0},
+ {terms(" vs ", " versus "), 1, MatchTypeContains, 0},
+ {terms(" yang mana "), 2, MatchTypeContains, 0},
+ {terms("yang mana "), 2, MatchTypeStarts, 0},
+ {terms(" mana"), 2, MatchTypeEnds, 0},
+ {terms("kah"), 1, MatchTypeTokenSuffix, 5},
+ {terms("ya ga sih", "ya gak sih", "ya nggak sih", "ya kan", "apa sih", "gimana sih", "kenapa sih"), 2, MatchTypeContains, 0},
+ {terms(" kok "), 2, MatchTypeContains, 0},
+ {terms("?"), 3, MatchTypeContains, 0},
+ }},
+}
+
+var abbrevMap = map[string]string{
+ "gmn": "gimana",
+ "gmna": "gimana",
+ "bgmn": "bagaimana",
+ "knp": "kenapa",
+ "knpa": "kenapa",
+ "dmn": "di mana",
+ "dmna": "di mana",
+ "dimn": "di mana",
+ "kmn": "ke mana",
+ "kmna": "ke mana",
+ "brp": "berapa",
+ "brpa": "berapa",
+ "kpn": "kapan",
+ "kpan": "kapan",
+ "sapa": "siapa",
+ "sp": "siapa",
+ "syp": "siapa",
+ "sypa": "siapa",
+ "apkh": "apakah",
+ "apakh": "apakah",
+}
diff --git a/tanya/tanya.go b/tanya/tanya.go
new file mode 100644
index 0000000..e635af0
--- /dev/null
+++ b/tanya/tanya.go
@@ -0,0 +1,173 @@
+package tanya
+
+import (
+ "sort"
+ "strings"
+ "unicode"
+ "unicode/utf8"
+)
+
+// IsQuestion returns true if a query is a question
+func IsQuestion(q string) bool {
+ intent := ClassifyIntent(q)
+ switch intent { // nolint:exhaustive
+ case IntentPrice, IntentContact, IntentOther:
+ return false
+ default:
+ return true
+ }
+}
+
+// ClassifyIntent returns the most likely intent for the given query
+func ClassifyIntent(q string) Intent {
+ q = normalize(q)
+ if q == "" {
+ return IntentOther
+ }
+ type scored struct {
+ intent Intent
+ score, prio int
+ }
+ var candidates []scored
+
+ for _, spec := range intentTable {
+ score := 0
+ for _, r := range spec.Rules {
+ if matchByType(q, r) {
+ score += r.Weight
+ }
+ }
+ if score != 0 {
+ candidates = append(candidates, scored{spec.Intent, score, spec.Priority})
+ }
+ }
+ if len(candidates) == 0 {
+ return IntentOther
+ }
+
+ sort.Slice(candidates, func(i, j int) bool {
+ if candidates[i].score == candidates[j].score {
+ return candidates[i].prio > candidates[j].prio
+ }
+ return candidates[i].score > candidates[j].score
+ })
+ return candidates[0].intent
+}
+
+func matchByType(q string, r Rule) bool {
+ switch r.MatchType {
+ case MatchTypeContains:
+ for _, t := range r.Terms {
+ if strings.Contains(q, t) {
+ return true
+ }
+ }
+ case MatchTypeStarts:
+ for _, t := range r.Terms {
+ if strings.HasPrefix(q, t) {
+ return true
+ }
+ }
+ case MatchTypeEnds:
+ for _, t := range r.Terms {
+ if strings.HasSuffix(q, t) {
+ return true
+ }
+ }
+ case MatchTypeTokenSuffix:
+ minLen := r.MinTokenLen
+ if minLen <= 0 {
+ minLen = 4
+ }
+ for _, tok := range tokenize(q) {
+ if len(tok) < minLen {
+ continue
+ }
+ for _, suf := range r.Terms {
+ if strings.HasSuffix(tok, suf) {
+ return true
+ }
+ }
+ }
+ }
+
+ return false
+}
+
+func normalize(s string) string {
+ s = strings.ToLower(strings.TrimSpace(collapseSpaces(s)))
+ s = " " + s + " "
+ s = expandAbbreviations(s)
+ return strings.TrimSpace(collapseSpaces(s))
+}
+
+func collapseSpaces(s string) string {
+ var b strings.Builder
+ sp := false
+ for _, r := range s {
+ if unicode.IsSpace(r) {
+ if !sp {
+ b.WriteByte(' ')
+ sp = true
+ }
+ } else {
+ b.WriteRune(r)
+ sp = false
+ }
+ }
+ return strings.TrimSpace(b.String())
+}
+
+// normalize abbreviations anywhere (start/mid/end)
+func expandAbbreviations(s string) string {
+ words := strings.Fields(s)
+ for i, w := range words {
+ if repl, ok := abbrevMap[w]; ok {
+ words[i] = repl
+ continue
+ }
+ // handle punctuation like "knp?" or "dmn," etc.
+ base := strings.TrimRight(w, "?.!,")
+ suffix := w[len(base):]
+ if repl, ok := abbrevMap[base]; ok {
+ words[i] = repl + suffix
+ }
+ }
+ return strings.Join(words, " ")
+}
+
+// tokenize splits on whitespace and trims leading/trailing non-letters/digits per token.
+// keeps tokens simple & fast (no regex).
+func tokenize(s string) []string {
+ raw := strings.Fields(s)
+ out := make([]string, 0, len(raw))
+ for _, t := range raw {
+ t = trimNonAlphaNum(t)
+ if t != "" {
+ out = append(out, t)
+ }
+ }
+ return out
+}
+
+func trimNonAlphaNum(s string) string {
+ start, end := 0, len(s)
+ for start < end {
+ r := rune(s[start])
+ if isAlphaNum(r) {
+ break
+ }
+ _, w := utf8.DecodeRuneInString(s[start:])
+ start += w
+ }
+ for end > start {
+ r, w := utf8.DecodeLastRuneInString(s[:end])
+ if isAlphaNum(r) {
+ break
+ }
+ end -= w
+ }
+ return s[start:end]
+}
+
+func isAlphaNum(r rune) bool { return unicode.IsLetter(r) || unicode.IsDigit(r) }
diff --git a/tanya/tanya_test.go b/tanya/tanya_test.go
new file mode 100644
index 0000000..59a6eb3
--- /dev/null
+++ b/tanya/tanya_test.go
@@ -0,0 +1,190 @@
+package tanya
+
+import (
+ "strings"
+ "testing"
+)
+
+func TestIsQuestion(t *testing.T) {
+ t.Parallel()
+
+ cases := []struct {
+ q string
+ want bool
+ }{
+ // --- explicit / canonical questions
+ {"apa itu knowledge graph", true},
+ {"bagaimana cara reset password gmail", true},
+ {"perbedaan redux vs zustand", true},
+ {"kapan sidang mk hari ini", true},
+ {"si andi pergi kemana ya", true},
+ {"kok servernya error pas deploy", true},
+ {"ya nggak sih performanya drop", true},
+ {"jelasin cara mukbang", true},
+ {"update kematian mahasiswa unud", true},
+
+ // --- abbreviations / slang normalization (start/mid/end + punct)
+ {"gmn cara scrape instagram", true}, // gmn -> gimana
+ {"gmna cara beli tiket", true}, // gmna -> gimana
+ {"bgmn cara install docker", true}, // bgmn -> bagaimana
+ {"knp server down semalem", true}, // knp -> kenapa
+ {"knpa servernya lambat", true}, // knpa -> kenapa
+ {"dmn lokasi konser", true}, // dmn -> dimana
+ {"dmna lokasi vaksin", true}, // dmna -> dimana
+ {"ini ada di dimn", true}, // dimn -> dimana
+ {"kmn mau makan siang?", true}, // kmn -> kemana
+ {"kita kmna abis ini", true}, // kmna -> kemana
+ {"brp harga langganan", false}, // brp -> berapa -> price => non-question
+ {"kpn rilis update?", true}, // kpn -> kapan
+ {"kpan meetingnya", true}, // kpan -> kapan
+ {"sapa yang ikut", true}, // sapa -> siapa
+ {"sp aja yang hadir", true}, // sp -> siapa
+ {"knp?", true}, // knp at start + punctuation
+ {"server down dmn,", true}, // trailing punctuation handled
+
+ // --- particles at the end (colloquial endings)
+ {"mau makan kemana siang ini", true},
+ {"dia tadi ke kantor dimana", true},
+ {"ini kenapa ya", true},
+ {"ini apa sih", true},
+ {"performanya turun ya kan", true},
+
+ // --- -kah suffix (via token_suffix) including punctuation
+ {"bisakah presiden diganti", true},
+ {"mungkinkah ini berhasil", true},
+ {"adakah solusi cepatnya", true},
+ {"mungkinkah ini berhasil!!!", true},
+ {"akah", false}, // too short to be meaningful (guard by MinTokenLen)
+
+ // --- how-to variants
+ {"cara deploy ke production docker", true},
+ {"bagaimana cara memperbaiki error 500", true},
+ {"cara cepat push ke github ", true}, // extra spaces
+ {"cara setting oauth di https://example.com/docs", true},
+ {"resep bubur bayi 6 bulan", true},
+ {"resep mpasi tanpa gula garam", true},
+ {"menu mpasi 6 bulan", true},
+ {"ide mpasi murah meriah", true},
+ {"tutorial docker", true},
+ {"panduan upgrade postgres", true},
+
+ // --- comparison signals
+ {"bagusan mana mirrorless atau dslr", true},
+ {"A vs B untuk data pipeline", true},
+ {"versus airflow vs dagster", true},
+ {"pilih mana A atau B", true},
+ {"lebih bagus mana iphone atau pixel", true},
+
+ // --- definition / explain variants
+ {"apa arti resilien", true},
+ {"apa maksud zero copy", true},
+ {"explain RAG pls", true},
+ {"penjelasan implementasi RAG", true},
+
+ // --- update / time / location intent
+ {"terkini erupsi bromo", true},
+ {"perkembangan kasus x sekarang", true},
+ {"lokasi kantor jakarta selatan", true}, // location -> question-like
+ {"jadwal konser jakarta", true}, // time -> question-like
+
+ // --- “yang mana” (keep as question), but “mana store” should not
+ {"yang mana yang benar", true},
+ {"ini pilih yang mana", true},
+ {"mana store", false}, // 'mana' as noun chunk; intended info/browse
+
+ // --- punctuation / emoji / casing
+ {"KENAPA SERVER LEMOT", true},
+ {"Kenapa server lemot?", true},
+ {"kenapa server lemot 🤔", true},
+ {" Bagaimana Cara Reset Password ", true},
+
+ // --- tricky “vs” that is not comparison (product name)
+ {"vs code extensions", false}, // treat 'vs' here as product word, not comparison
+
+ // --- obvious non-questions
+ {"toyota", false},
+ {"jakarta", false},
+ {"harga paket premium", false},
+ {"kontak cs kumparan", false},
+ {"download aplikasi android", false},
+ {"grab promo kupon", false},
+ {"", false},
+ {" \t ", false},
+ }
+
+ for _, tc := range cases {
+ tc := tc
+ name := tc.q
+ if strings.TrimSpace(name) == "" {
+ name = ""
+ }
+ t.Run(name, func(t *testing.T) {
+ t.Parallel()
+ got := IsQuestion(tc.q)
+ if got != tc.want {
+ t.Fatalf("IsQuestion(%q) = %v, want %v", tc.q, got, tc.want)
+ }
+ })
+ }
+}
+
+func TestClassifyIntent(t *testing.T) {
+ t.Parallel()
+
+ cases := []struct {
+ q string
+ want Intent
+ }{
+ {"update kematian mahasiswa unud", IntentUpdate},
+ {"jelasin cara mukbang", IntentExplain},
+ {"arti overfitting", IntentExplain},
+ {"bagaimana cara reset password gmail", IntentHowTo},
+ {"apa itu knowledge graph", IntentDefinition},
+ {"perbedaan redux vs zustand", IntentComparison},
+ {"rekomendasi laptop 10 jutaan untuk desain", IntentRecommendation},
+ {"kok servernya error pas deploy", IntentTroubleshoot},
+ {"alamat kantor kumparan dimana ya", IntentLocation},
+ {"mau makan kemana siang ini", IntentLocation},
+ {"kapan jadwal konser hari ini", IntentTime},
+ {"berapa harga paket premium", IntentPrice},
+ {"kontak cs atau nomor wa resmi", IntentContact},
+ {"toyota", IntentOther},
+ {"download aplikasi android", IntentOther},
+
+ // mixed signals
+ {"update berita gempa vs banjir hari ini", IntentUpdate},
+ {"apa sih lebih bagus mana A vs B", IntentQuestion},
+ {"gmn cara beli tiket konser", IntentHowTo},
+ }
+
+ for _, tc := range cases {
+ tc := tc
+ t.Run(tc.q, func(t *testing.T) {
+ t.Parallel()
+ got := ClassifyIntent(tc.q)
+ if got != tc.want {
+ t.Fatalf("ClassifyIntent(%q) = %s, want one of %v", tc.q, got, tc.want)
+ }
+ })
+ }
+}
+
+func BenchmarkIsQuestion(b *testing.B) {
+ queries := []string{
+ "apa itu knowledge graph",
+ "bagaimana cara reset password gmail",
+ "update kematian mahasiswa unud",
+ "jelasin cara mukbang",
+ "perbedaan redux vs zustand",
+ "toyota",
+ "harga paket premium",
+ "kontak cs kumparan",
+ "download aplikasi android",
+ }
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ for _, q := range queries {
+ _ = IsQuestion(q)
+ }
+ }
+}