diff --git a/internal/search/blevestore/store.go b/internal/search/blevestore/store.go index 480900d..bf436d3 100644 --- a/internal/search/blevestore/store.go +++ b/internal/search/blevestore/store.go @@ -189,17 +189,22 @@ func (s *Store) Close() error { // docToMap converts a search.Document to a flat map for Bleve indexing. func docToMap(d search.Document) map[string]interface{} { m := map[string]interface{}{ - "type": d.Type, - "path": d.Path, - "page_id": d.PageID, - "title": d.Title, - "space_key": d.SpaceKey, - "content": d.Content, - "heading_text": d.HeadingText, - "heading_level": float64(d.HeadingLevel), - "language": d.Language, - "line": float64(d.Line), - "mod_time": d.ModTime, + "type": d.Type, + "path": d.Path, + "page_id": d.PageID, + "title": d.Title, + "space_key": d.SpaceKey, + "content": d.Content, + "heading_text": d.HeadingText, + "heading_level": float64(d.HeadingLevel), + "language": d.Language, + "line": float64(d.Line), + "mod_time": func() interface{} { + if d.ModTime != nil { + return *d.ModTime + } + return nil + }(), "heading_path_text": strings.Join(d.HeadingPath, " / "), } @@ -251,7 +256,7 @@ func mapToDoc(id string, fields map[string]interface{}) (search.Document, error) } if v, ok := fields["mod_time"]; ok { if t, err := parseTimeField(v); err == nil { - d.ModTime = t + d.ModTime = &t } } if v, ok := fields["labels"]; ok { diff --git a/internal/search/blevestore/store_test.go b/internal/search/blevestore/store_test.go index 725e91c..6f1cff9 100644 --- a/internal/search/blevestore/store_test.go +++ b/internal/search/blevestore/store_test.go @@ -25,6 +25,7 @@ func openTestStore(t *testing.T) *Store { } func pageDoc(id, path, space, title, content string, labels ...string) search.Document { + t := time.Now().Truncate(time.Second) return search.Document{ ID: id, Type: search.DocTypePage, @@ -33,11 +34,12 @@ func pageDoc(id, path, space, title, content string, labels ...string) search.Do Title: title, Content: content, Labels: labels, - ModTime: time.Now().Truncate(time.Second), + ModTime: &t, } } func sectionDoc(id, path, space, title, headingText, content string, headingLevel, line int) search.Document { + t := time.Now().Truncate(time.Second) return search.Document{ ID: id, Type: search.DocTypeSection, @@ -48,7 +50,7 @@ func sectionDoc(id, path, space, title, headingText, content string, headingLeve Content: content, HeadingLevel: headingLevel, Line: line, - ModTime: time.Now().Truncate(time.Second), + ModTime: &t, } } @@ -142,7 +144,7 @@ func TestSearchReturnsAllFieldsRoundTrip(t *testing.T) { HeadingLevel: 0, Language: "", Line: 0, - ModTime: modTime, + ModTime: &modTime, } mustIndex(t, s, doc) diff --git a/internal/search/document.go b/internal/search/document.go index 25bc231..01391a1 100644 --- a/internal/search/document.go +++ b/internal/search/document.go @@ -21,48 +21,48 @@ const ( // so that filtering never requires a join. type Document struct { // ID is a composite, globally unique key. - ID string + ID string `json:"id,omitempty"` // Type is DocTypePage, DocTypeSection, or DocTypeCode. - Type string + Type string `json:"type,omitempty"` // Path is the repository-relative path with forward slashes, e.g. "DEV/overview.md". - Path string + Path string `json:"path,omitempty"` // PageID is the Confluence page identifier from frontmatter (may be empty for new files). - PageID string + PageID string `json:"page_id,omitempty"` // Title is the Confluence page title from frontmatter. - Title string + Title string `json:"title,omitempty"` // SpaceKey is the Confluence space key from frontmatter. - SpaceKey string + SpaceKey string `json:"space_key,omitempty"` // Labels are Confluence page labels, normalised (lowercase, trimmed, deduplicated). - Labels []string + Labels []string `json:"labels,omitempty"` // Content holds the searchable text: full body for page docs, heading-section text for // section docs, and raw code content for code docs. - Content string + Content string `json:"content,omitempty"` // HeadingPath is the ordered heading hierarchy from root to the section/code block, // e.g. ["# Overview", "## Auth Flow", "### Token Refresh"]. - HeadingPath []string + HeadingPath []string `json:"heading_path,omitempty"` // HeadingText is the innermost heading label (for section/code docs). - HeadingText string + HeadingText string `json:"heading_text,omitempty"` // HeadingLevel is the Markdown heading level (1–6) of HeadingText; 0 for page docs. - HeadingLevel int + HeadingLevel int `json:"heading_level,omitempty"` // Language is the fenced code block info string (e.g. "go", "sql"); empty for non-code docs. - Language string + Language string `json:"language,omitempty"` // Line is the 1-based start line in the source file (0 for page docs). - Line int + Line int `json:"line,omitempty"` // ModTime is the last modification time of the source file. - ModTime time.Time + ModTime *time.Time `json:"mod_time,omitempty"` } // SearchOptions controls a full-text search query. @@ -90,11 +90,11 @@ type SearchOptions struct { // SearchResult is a single match returned by Store.Search. type SearchResult struct { // Document is the full indexed document. - Document Document + Document Document `json:"document"` // Score is a backend-specific relevance score (higher = more relevant). - Score float64 + Score float64 `json:"score,omitempty"` // Snippet is a short contextual excerpt with the matched terms highlighted. - Snippet string + Snippet string `json:"snippet,omitempty"` } diff --git a/internal/search/indexer.go b/internal/search/indexer.go index 8948071..d5c2ec9 100644 --- a/internal/search/indexer.go +++ b/internal/search/indexer.go @@ -177,7 +177,7 @@ func (ix *Indexer) indexFile(absPath, docPath, spaceKey string) (int, error) { SpaceKey: spaceKey, Labels: labels, Content: mdDoc.Body, - ModTime: modTime, + ModTime: &modTime, }) // 2. Section and code-block documents. @@ -197,7 +197,7 @@ func (ix *Indexer) indexFile(absPath, docPath, spaceKey string) (int, error) { HeadingText: sec.HeadingText, HeadingLevel: sec.HeadingLevel, Line: sec.Line, - ModTime: modTime, + ModTime: &modTime, }) } @@ -216,7 +216,7 @@ func (ix *Indexer) indexFile(absPath, docPath, spaceKey string) (int, error) { HeadingLevel: cb.HeadingLevel, Language: cb.Language, Line: cb.Line, - ModTime: modTime, + ModTime: &modTime, }) } diff --git a/internal/search/sqlitestore/store.go b/internal/search/sqlitestore/store.go index bfe21ab..be63d3a 100644 --- a/internal/search/sqlitestore/store.go +++ b/internal/search/sqlitestore/store.go @@ -8,6 +8,7 @@ import ( "path/filepath" "strings" "time" + "unicode" "github.com/rgonek/confluence-markdown-sync/internal/search" _ "modernc.org/sqlite" // SQLite driver registration @@ -99,7 +100,7 @@ ON CONFLICT(id) DO UPDATE SET return fmt.Errorf("sqlitestore.Index marshal heading_path: %w", err) } modTimeStr := "" - if !d.ModTime.IsZero() { + if d.ModTime != nil { modTimeStr = d.ModTime.UTC().Format(time.RFC3339) } _, err = stmt.Exec( @@ -141,8 +142,12 @@ func (s *Store) Search(opts search.SearchOptions) ([]search.SearchResult, error) ) if opts.Query != "" { + safeQuery, err := normalizeFTSQuery(opts.Query) + if err != nil { + return nil, fmt.Errorf("sqlitestore.Search query normalize: %w", err) + } whereClauses = append(whereClauses, "documents_fts MATCH ?") - args = append(args, opts.Query) + args = append(args, safeQuery) } if opts.SpaceKey != "" { @@ -237,7 +242,7 @@ LIMIT ?`, whereExpr) } if modTimeStr != "" { if t, err := time.Parse(time.RFC3339, modTimeStr); err == nil { - doc.ModTime = t + doc.ModTime = &t } } @@ -349,3 +354,17 @@ func marshalJSON(v any) (string, error) { } return string(b), nil } + +func normalizeFTSQuery(raw string) (string, error) { + sanitized := strings.Map(func(r rune) rune { + if unicode.IsLetter(r) || unicode.IsNumber(r) { + return r + } + return ' ' + }, raw) + tokens := strings.Fields(sanitized) + if len(tokens) == 0 { + return "", fmt.Errorf("query contains no searchable tokens") + } + return strings.Join(tokens, " "), nil +} diff --git a/internal/search/sqlitestore/store_test.go b/internal/search/sqlitestore/store_test.go index 3528753..4f8597c 100644 --- a/internal/search/sqlitestore/store_test.go +++ b/internal/search/sqlitestore/store_test.go @@ -33,7 +33,7 @@ func sampleDocs() []search.Document { SpaceKey: "DEV", Labels: []string{"architecture", "security"}, Content: "This page covers the security architecture and OAuth2 flows.", - ModTime: now, + ModTime: &now, }, { ID: "section:DEV/overview.md:5", @@ -48,7 +48,7 @@ func sampleDocs() []search.Document { HeadingLevel: 2, HeadingPath: []string{"# Security Overview", "## OAuth2 Flow"}, Line: 5, - ModTime: now, + ModTime: &now, }, { ID: "code:DEV/overview.md:12", @@ -64,7 +64,7 @@ func sampleDocs() []search.Document { HeadingPath: []string{"# Security Overview", "## OAuth2 Flow", "### Token Refresh"}, Language: "go", Line: 12, - ModTime: now, + ModTime: &now, }, { ID: "page:OPS/deploy.md", @@ -75,7 +75,7 @@ func sampleDocs() []search.Document { SpaceKey: "OPS", Labels: []string{"ops", "deployment"}, Content: "How to deploy the application to production.", - ModTime: now, + ModTime: &now, }, } } @@ -97,6 +97,94 @@ func TestStore_IndexAndSearch(t *testing.T) { } } +func TestStore_SearchStripsSpecialCharacters(t *testing.T) { + s := newTestStore(t) + docs := sampleDocs() + + docs = append(docs, search.Document{ + ID: "page:OPS/events.md", + Type: search.DocTypePage, + Path: "OPS/events.md", + PageID: "777777", + Title: "Events API", + SpaceKey: "OPS", + Content: "POST /v2/events endpoint details and payloads.", + }) + + if err := s.Index(docs); err != nil { + t.Fatalf("Index: %v", err) + } + + results, err := s.Search(search.SearchOptions{Query: "POST /v2/events"}) + if err != nil { + t.Fatalf("Search: %v", err) + } + if len(results) == 0 { + t.Fatal("expected results for query with special characters") + } +} + +func TestNormalizeFTSQuery(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + want string + wantErr bool + }{ + { + name: "slashes", + input: "POST /v2/events", + want: "POST v2 events", + }, + { + name: "hyphen", + input: "Onboarding to On-Call guide", + want: "Onboarding to On Call guide", + }, + { + name: "punctuation", + input: "auth:token (refresh)", + want: "auth token refresh", + }, + { + name: "dots and quotes", + input: `"v2.0" endpoint`, + want: "v2 0 endpoint", + }, + { + name: "underscore", + input: "api_events_v2", + want: "api events v2", + }, + { + name: "only symbols", + input: "/-()", + wantErr: true, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + got, err := normalizeFTSQuery(tt.input) + if tt.wantErr { + if err == nil { + t.Fatalf("expected error, got nil") + } + return + } + if err != nil { + t.Fatalf("normalizeFTSQuery: %v", err) + } + if got != tt.want { + t.Fatalf("normalizeFTSQuery(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} + func TestStore_DeleteByPath(t *testing.T) { s := newTestStore(t) docs := sampleDocs()