Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 21 additions & 10 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
# go-utils

<a name="v1.42.0"></a>
## [v1.42.0] - 2025-10-22
### New Features
- identifier tanya


<a name="v1.41.0"></a>
## [v1.41.0] - 2025-09-11
## [v1.41.0] - 2025-09-17
### New Features
- reverse slice
- reverse slice ([#71](https://github.com/kumparan/go-utils/issues/71))


<a name="v1.40.2"></a>
Expand Down Expand Up @@ -207,6 +213,9 @@
- fix marshal issue on gorm.DeletedAt empty value ([#32](https://github.com/kumparan/go-utils/issues/32))


<a name="v.1.20.0"></a>
## [v.1.20.0] - 2022-03-11

<a name="v1.20.0"></a>
## [v1.20.0] - 2022-03-11
### New Features
Expand Down Expand Up @@ -306,11 +315,11 @@
- add money formatter for multiple currencies ([#13](https://github.com/kumparan/go-utils/issues/13))


<a name="v1.8.0"></a>
## [v1.8.0] - 2020-12-10

<a name="v1.7.1"></a>
## [v1.7.1] - 2020-12-10

<a name="v1.8.0"></a>
## [v1.8.0] - 2020-12-10
### New Features
- add formatter for indonesian money and date

Expand Down Expand Up @@ -375,7 +384,8 @@
- init go-utils


[Unreleased]: https://github.com/kumparan/go-utils/compare/v1.41.0...HEAD
[Unreleased]: https://github.com/kumparan/go-utils/compare/v1.42.0...HEAD
[v1.42.0]: https://github.com/kumparan/go-utils/compare/v1.41.0...v1.42.0
[v1.41.0]: https://github.com/kumparan/go-utils/compare/v1.40.2...v1.41.0
[v1.40.2]: https://github.com/kumparan/go-utils/compare/v1.40.1...v1.40.2
[v1.40.1]: https://github.com/kumparan/go-utils/compare/v1.40.0...v1.40.1
Expand Down Expand Up @@ -408,7 +418,8 @@
[v1.23.0]: https://github.com/kumparan/go-utils/compare/v1.22.0...v1.23.0
[v1.22.0]: https://github.com/kumparan/go-utils/compare/v1.21.0...v1.22.0
[v1.21.0]: https://github.com/kumparan/go-utils/compare/v1.20.1...v1.21.0
[v1.20.1]: https://github.com/kumparan/go-utils/compare/v1.20.0...v1.20.1
[v1.20.1]: https://github.com/kumparan/go-utils/compare/v.1.20.0...v1.20.1
[v.1.20.0]: https://github.com/kumparan/go-utils/compare/v1.20.0...v.1.20.0
[v1.20.0]: https://github.com/kumparan/go-utils/compare/v1.19.3...v1.20.0
[v1.19.3]: https://github.com/kumparan/go-utils/compare/v1.19.2...v1.19.3
[v1.19.2]: https://github.com/kumparan/go-utils/compare/v1.19.1...v1.19.2
Expand All @@ -425,9 +436,9 @@
[v1.12.0]: https://github.com/kumparan/go-utils/compare/v1.11.0...v1.12.0
[v1.11.0]: https://github.com/kumparan/go-utils/compare/v1.10.0...v1.11.0
[v1.10.0]: https://github.com/kumparan/go-utils/compare/v1.9.0...v1.10.0
[v1.9.0]: https://github.com/kumparan/go-utils/compare/v1.8.0...v1.9.0
[v1.8.0]: https://github.com/kumparan/go-utils/compare/v1.7.1...v1.8.0
[v1.7.1]: https://github.com/kumparan/go-utils/compare/v1.7.0...v1.7.1
[v1.9.0]: https://github.com/kumparan/go-utils/compare/v1.7.1...v1.9.0
[v1.7.1]: https://github.com/kumparan/go-utils/compare/v1.8.0...v1.7.1
[v1.8.0]: https://github.com/kumparan/go-utils/compare/v1.7.0...v1.8.0
[v1.7.0]: https://github.com/kumparan/go-utils/compare/v1.6.0...v1.7.0
[v1.6.0]: https://github.com/kumparan/go-utils/compare/v1.5.0...v1.6.0
[v1.5.0]: https://github.com/kumparan/go-utils/compare/v1.4.0...v1.5.0
Expand Down
138 changes: 138 additions & 0 deletions tanya/specs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
package tanya

type (
// Intent is the intent of a query
Intent string
// MatchType is the type of match
MatchType string
)

// Intent and MatchType constants
const (
IntentUpdate Intent = "update"
IntentExplain Intent = "explain"
IntentHowTo Intent = "how_to"
IntentDefinition Intent = "definition"
IntentComparison Intent = "comparison"
IntentRecommendation Intent = "recommendation"
IntentTroubleshoot Intent = "troubleshoot"
IntentLocation Intent = "location"
IntentTime Intent = "time"
IntentPrice Intent = "price"
IntentContact Intent = "contact"
IntentQuestion Intent = "question" // general fallback
IntentOther Intent = "other"

MatchTypeContains MatchType = "contains"
MatchTypeStarts MatchType = "starts"
MatchTypeEnds MatchType = "ends"
MatchTypeTokenSuffix MatchType = "token_suffix" // nolint:gosec
)

type (
// Rule is a rule for matching a query to intent
Rule struct {
Terms []string
Weight int
MatchType MatchType
MinTokenLen int // optional: for token_suffix; <=0 => default 4
}

// IntentSpec is a specification for intent
IntentSpec struct {
Intent Intent
Priority int
Rules []Rule
}
)

func terms(ss ...string) []string { return ss }

var intentTable = []IntentSpec{
{IntentUpdate, 95, []Rule{
{terms("update", "perkembangan", "terbaru", "terkini", "progress", "lanjutan", "pembaruan"), 3, MatchTypeContains, 0},
{terms("hari ini", "sekarang", "terkini banget"), 1, MatchTypeContains, 0},
}},
{IntentExplain, 90, []Rule{
{terms("jelaskan", "jelasin", "penjelasan", "uraikan", "explain"), 3, MatchTypeContains, 0},
{terms("arti", "artinya", "maksud", "makna", "definisi"), 2, MatchTypeContains, 0},
}},
{IntentHowTo, 80, []Rule{
{terms("bagaimana cara ", "gimana cara "), 3, MatchTypeStarts, 0},
{terms("cara "), 3, MatchTypeStarts, 0},
{terms(" cara ", " langkah ", " step "), 1, MatchTypeContains, 0},
{terms("resep "), 3, MatchTypeStarts, 0},
{terms(" panduan ", "panduan "), 2, MatchTypeContains, 0},
{terms(" tutorial ", "tutorial "), 2, MatchTypeContains, 0},
}},
{IntentDefinition, 75, []Rule{
{terms("apa itu "), 3, MatchTypeStarts, 0},
{terms("apa arti", "apa maksud"), 2, MatchTypeContains, 0},
}},
{IntentComparison, 70, []Rule{
{terms(" vs ", " versus "), 2, MatchTypeContains, 0},
{terms("perbedaan ", "beda "), 2, MatchTypeContains, 0},
{terms("bagusan mana", "lebih bagus mana", "pilih mana"), 2, MatchTypeContains, 0},
}},
{IntentRecommendation, 65, []Rule{
{terms("rekomendasi", "rekom", "saran"), 2, MatchTypeContains, 0},
{terms("bagusan mana", "pilih mana", "cocok yang mana"), 2, MatchTypeContains, 0},
{terms("menu "), 2, MatchTypeStarts, 0},
{terms(" ide ", "ide "), 1, MatchTypeContains, 0},
}},
{IntentTroubleshoot, 60, []Rule{
{terms("kenapa", "mengapa"), 2, MatchTypeContains, 0},
{terms("kok "), 2, MatchTypeStarts, 0},
{terms("error", "gagal", "bug", "crash", "macet", "hang"), 2, MatchTypeContains, 0},
{terms("solusi ", "fix ", "gimana sih", "kenapa sih"), 1, MatchTypeContains, 0},
}},
{IntentLocation, 55, []Rule{
{terms("dimana", "di mana", "kemana", "ke mana", "lokasi", "alamat"), 2, MatchTypeContains, 0},
{terms(" kemana", " dimana", "di mana", " alamat", " lokasi"), 2, MatchTypeEnds, 0},
}},
{IntentTime, 55, []Rule{
{terms("kapan", "jadwal", "jam berapa", "pukul berapa"), 2, MatchTypeContains, 0},
{terms("hari ini", "minggu ini", "sekarang", "besok", "nanti sore", "malam ini"), 1, MatchTypeContains, 0},
}},
{IntentPrice, 50, []Rule{
{terms("harga", "biaya", "tarif", "fee", "ongkir"), 2, MatchTypeContains, 0},
}},
{IntentContact, 50, []Rule{
{terms("kontak", "contact", "telepon", "telp", "nomor", "email", "whatsapp", "wa"), 2, MatchTypeContains, 0},
}},
// fallback tanya umum
{IntentQuestion, 10, []Rule{
{terms("apa", "apakah", "bagaimana", "gimana", "kapan", "siapa", "dimana", "di mana", "kemana", "ke mana", "berapa"), 2, MatchTypeContains, 0},
{terms(" vs ", " versus "), 1, MatchTypeContains, 0},
{terms(" yang mana "), 2, MatchTypeContains, 0},
{terms("yang mana "), 2, MatchTypeStarts, 0},
{terms(" mana"), 2, MatchTypeEnds, 0},
{terms("kah"), 1, MatchTypeTokenSuffix, 5},
{terms("ya ga sih", "ya gak sih", "ya nggak sih", "ya kan", "apa sih", "gimana sih", "kenapa sih"), 2, MatchTypeContains, 0},
{terms(" kok "), 2, MatchTypeContains, 0},
{terms("?"), 3, MatchTypeContains, 0},
}},
}

var abbrevMap = map[string]string{
"gmn": "gimana",
"gmna": "gimana",
"bgmn": "bagaimana",
"knp": "kenapa",
"knpa": "kenapa",
"dmn": "di mana",
"dmna": "di mana",
"dimn": "di mana",
"kmn": "ke mana",
"kmna": "ke mana",
"brp": "berapa",
"brpa": "berapa",
"kpn": "kapan",
"kpan": "kapan",
"sapa": "siapa",
"sp": "siapa",
"syp": "siapa",
"sypa": "siapa",
"apkh": "apakah",
"apakh": "apakah",
}
173 changes: 173 additions & 0 deletions tanya/tanya.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
package tanya

import (
"sort"
"strings"
"unicode"
"unicode/utf8"
)

// IsQuestion returns true if a query is a question
func IsQuestion(q string) bool {
intent := ClassifyIntent(q)
switch intent { // nolint:exhaustive
case IntentPrice, IntentContact, IntentOther:
return false
default:
return true
}
}

// ClassifyIntent returns the most likely intent for the given query
func ClassifyIntent(q string) Intent {
q = normalize(q)
if q == "" {
return IntentOther
}
type scored struct {
intent Intent
score, prio int
}
var candidates []scored

for _, spec := range intentTable {
score := 0
for _, r := range spec.Rules {
if matchByType(q, r) {
score += r.Weight
}
}
if score != 0 {
candidates = append(candidates, scored{spec.Intent, score, spec.Priority})
}
}
if len(candidates) == 0 {
return IntentOther
}

sort.Slice(candidates, func(i, j int) bool {
if candidates[i].score == candidates[j].score {
return candidates[i].prio > candidates[j].prio
}
return candidates[i].score > candidates[j].score
})
return candidates[0].intent
}

func matchByType(q string, r Rule) bool {
switch r.MatchType {
case MatchTypeContains:
for _, t := range r.Terms {
if strings.Contains(q, t) {
return true
}
}
case MatchTypeStarts:
for _, t := range r.Terms {
if strings.HasPrefix(q, t) {
return true
}
}
case MatchTypeEnds:
for _, t := range r.Terms {
if strings.HasSuffix(q, t) {
return true
}
}
case MatchTypeTokenSuffix:
minLen := r.MinTokenLen
if minLen <= 0 {
minLen = 4
}
for _, tok := range tokenize(q) {
if len(tok) < minLen {
continue
}
for _, suf := range r.Terms {
if strings.HasSuffix(tok, suf) {
return true
}
}
}
}

return false
}

func normalize(s string) string {
s = strings.ToLower(strings.TrimSpace(collapseSpaces(s)))
s = " " + s + " "
s = expandAbbreviations(s)
return strings.TrimSpace(collapseSpaces(s))
}

func collapseSpaces(s string) string {
var b strings.Builder
sp := false
for _, r := range s {
if unicode.IsSpace(r) {
if !sp {
b.WriteByte(' ')
sp = true
}
} else {
b.WriteRune(r)
sp = false
}
}
return strings.TrimSpace(b.String())
}

// normalize abbreviations anywhere (start/mid/end)
func expandAbbreviations(s string) string {
words := strings.Fields(s)
for i, w := range words {
if repl, ok := abbrevMap[w]; ok {
words[i] = repl
continue
}
// handle punctuation like "knp?" or "dmn," etc.
base := strings.TrimRight(w, "?.!,")
suffix := w[len(base):]
if repl, ok := abbrevMap[base]; ok {
words[i] = repl + suffix
}
}
return strings.Join(words, " ")
}

// tokenize splits on whitespace and trims leading/trailing non-letters/digits per token.
// keeps tokens simple & fast (no regex).
func tokenize(s string) []string {
raw := strings.Fields(s)
out := make([]string, 0, len(raw))
for _, t := range raw {
t = trimNonAlphaNum(t)
if t != "" {
out = append(out, t)
}
}
return out
}

func trimNonAlphaNum(s string) string {
start, end := 0, len(s)
for start < end {
r := rune(s[start])
if isAlphaNum(r) {
break
}
_, w := utf8.DecodeRuneInString(s[start:])
start += w
}
for end > start {
r, w := utf8.DecodeLastRuneInString(s[:end])
if isAlphaNum(r) {
break
}
end -= w
}
return s[start:end]
}

func isAlphaNum(r rune) bool { return unicode.IsLetter(r) || unicode.IsDigit(r) }
Loading