Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 17 additions & 12 deletions internal/search/blevestore/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,17 +189,22 @@ func (s *Store) Close() error {
// docToMap converts a search.Document to a flat map for Bleve indexing.
func docToMap(d search.Document) map[string]interface{} {
m := map[string]interface{}{
"type": d.Type,
"path": d.Path,
"page_id": d.PageID,
"title": d.Title,
"space_key": d.SpaceKey,
"content": d.Content,
"heading_text": d.HeadingText,
"heading_level": float64(d.HeadingLevel),
"language": d.Language,
"line": float64(d.Line),
"mod_time": d.ModTime,
"type": d.Type,
"path": d.Path,
"page_id": d.PageID,
"title": d.Title,
"space_key": d.SpaceKey,
"content": d.Content,
"heading_text": d.HeadingText,
"heading_level": float64(d.HeadingLevel),
"language": d.Language,
"line": float64(d.Line),
"mod_time": func() interface{} {
if d.ModTime != nil {
return *d.ModTime
}
return nil
}(),
"heading_path_text": strings.Join(d.HeadingPath, " / "),
}

Expand Down Expand Up @@ -251,7 +256,7 @@ func mapToDoc(id string, fields map[string]interface{}) (search.Document, error)
}
if v, ok := fields["mod_time"]; ok {
if t, err := parseTimeField(v); err == nil {
d.ModTime = t
d.ModTime = &t
}
}
if v, ok := fields["labels"]; ok {
Expand Down
8 changes: 5 additions & 3 deletions internal/search/blevestore/store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ func openTestStore(t *testing.T) *Store {
}

func pageDoc(id, path, space, title, content string, labels ...string) search.Document {
t := time.Now().Truncate(time.Second)
return search.Document{
ID: id,
Type: search.DocTypePage,
Expand All @@ -33,11 +34,12 @@ func pageDoc(id, path, space, title, content string, labels ...string) search.Do
Title: title,
Content: content,
Labels: labels,
ModTime: time.Now().Truncate(time.Second),
ModTime: &t,
}
}

func sectionDoc(id, path, space, title, headingText, content string, headingLevel, line int) search.Document {
t := time.Now().Truncate(time.Second)
return search.Document{
ID: id,
Type: search.DocTypeSection,
Expand All @@ -48,7 +50,7 @@ func sectionDoc(id, path, space, title, headingText, content string, headingLeve
Content: content,
HeadingLevel: headingLevel,
Line: line,
ModTime: time.Now().Truncate(time.Second),
ModTime: &t,
}
}

Expand Down Expand Up @@ -142,7 +144,7 @@ func TestSearchReturnsAllFieldsRoundTrip(t *testing.T) {
HeadingLevel: 0,
Language: "",
Line: 0,
ModTime: modTime,
ModTime: &modTime,
}
mustIndex(t, s, doc)

Expand Down
34 changes: 17 additions & 17 deletions internal/search/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,48 +21,48 @@ const (
// so that filtering never requires a join.
type Document struct {
// ID is a composite, globally unique key.
ID string
ID string `json:"id,omitempty"`

// Type is DocTypePage, DocTypeSection, or DocTypeCode.
Type string
Type string `json:"type,omitempty"`

// Path is the repository-relative path with forward slashes, e.g. "DEV/overview.md".
Path string
Path string `json:"path,omitempty"`

// PageID is the Confluence page identifier from frontmatter (may be empty for new files).
PageID string
PageID string `json:"page_id,omitempty"`

// Title is the Confluence page title from frontmatter.
Title string
Title string `json:"title,omitempty"`

// SpaceKey is the Confluence space key from frontmatter.
SpaceKey string
SpaceKey string `json:"space_key,omitempty"`

// Labels are Confluence page labels, normalised (lowercase, trimmed, deduplicated).
Labels []string
Labels []string `json:"labels,omitempty"`

// Content holds the searchable text: full body for page docs, heading-section text for
// section docs, and raw code content for code docs.
Content string
Content string `json:"content,omitempty"`

// HeadingPath is the ordered heading hierarchy from root to the section/code block,
// e.g. ["# Overview", "## Auth Flow", "### Token Refresh"].
HeadingPath []string
HeadingPath []string `json:"heading_path,omitempty"`

// HeadingText is the innermost heading label (for section/code docs).
HeadingText string
HeadingText string `json:"heading_text,omitempty"`

// HeadingLevel is the Markdown heading level (1–6) of HeadingText; 0 for page docs.
HeadingLevel int
HeadingLevel int `json:"heading_level,omitempty"`

// Language is the fenced code block info string (e.g. "go", "sql"); empty for non-code docs.
Language string
Language string `json:"language,omitempty"`

// Line is the 1-based start line in the source file (0 for page docs).
Line int
Line int `json:"line,omitempty"`

// ModTime is the last modification time of the source file.
ModTime time.Time
ModTime *time.Time `json:"mod_time,omitempty"`
}

// SearchOptions controls a full-text search query.
Expand Down Expand Up @@ -90,11 +90,11 @@ type SearchOptions struct {
// SearchResult is a single match returned by Store.Search.
type SearchResult struct {
// Document is the full indexed document.
Document Document
Document Document `json:"document"`

// Score is a backend-specific relevance score (higher = more relevant).
Score float64
Score float64 `json:"score,omitempty"`

// Snippet is a short contextual excerpt with the matched terms highlighted.
Snippet string
Snippet string `json:"snippet,omitempty"`
}
6 changes: 3 additions & 3 deletions internal/search/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ func (ix *Indexer) indexFile(absPath, docPath, spaceKey string) (int, error) {
SpaceKey: spaceKey,
Labels: labels,
Content: mdDoc.Body,
ModTime: modTime,
ModTime: &modTime,
})

// 2. Section and code-block documents.
Expand All @@ -197,7 +197,7 @@ func (ix *Indexer) indexFile(absPath, docPath, spaceKey string) (int, error) {
HeadingText: sec.HeadingText,
HeadingLevel: sec.HeadingLevel,
Line: sec.Line,
ModTime: modTime,
ModTime: &modTime,
})
}

Expand All @@ -216,7 +216,7 @@ func (ix *Indexer) indexFile(absPath, docPath, spaceKey string) (int, error) {
HeadingLevel: cb.HeadingLevel,
Language: cb.Language,
Line: cb.Line,
ModTime: modTime,
ModTime: &modTime,
})
}

Expand Down
25 changes: 22 additions & 3 deletions internal/search/sqlitestore/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"path/filepath"
"strings"
"time"
"unicode"

"github.com/rgonek/confluence-markdown-sync/internal/search"
_ "modernc.org/sqlite" // SQLite driver registration
Expand Down Expand Up @@ -99,7 +100,7 @@ ON CONFLICT(id) DO UPDATE SET
return fmt.Errorf("sqlitestore.Index marshal heading_path: %w", err)
}
modTimeStr := ""
if !d.ModTime.IsZero() {
if d.ModTime != nil {
modTimeStr = d.ModTime.UTC().Format(time.RFC3339)
}
_, err = stmt.Exec(
Expand Down Expand Up @@ -141,8 +142,12 @@ func (s *Store) Search(opts search.SearchOptions) ([]search.SearchResult, error)
)

if opts.Query != "" {
safeQuery, err := normalizeFTSQuery(opts.Query)
if err != nil {
return nil, fmt.Errorf("sqlitestore.Search query normalize: %w", err)
}
whereClauses = append(whereClauses, "documents_fts MATCH ?")
args = append(args, opts.Query)
args = append(args, safeQuery)
}

if opts.SpaceKey != "" {
Expand Down Expand Up @@ -237,7 +242,7 @@ LIMIT ?`, whereExpr)
}
if modTimeStr != "" {
if t, err := time.Parse(time.RFC3339, modTimeStr); err == nil {
doc.ModTime = t
doc.ModTime = &t
}
}

Expand Down Expand Up @@ -349,3 +354,17 @@ func marshalJSON(v any) (string, error) {
}
return string(b), nil
}

func normalizeFTSQuery(raw string) (string, error) {
sanitized := strings.Map(func(r rune) rune {
if unicode.IsLetter(r) || unicode.IsNumber(r) {
return r
}
return ' '
}, raw)
tokens := strings.Fields(sanitized)
if len(tokens) == 0 {
return "", fmt.Errorf("query contains no searchable tokens")
}
return strings.Join(tokens, " "), nil
}
96 changes: 92 additions & 4 deletions internal/search/sqlitestore/store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func sampleDocs() []search.Document {
SpaceKey: "DEV",
Labels: []string{"architecture", "security"},
Content: "This page covers the security architecture and OAuth2 flows.",
ModTime: now,
ModTime: &now,
},
{
ID: "section:DEV/overview.md:5",
Expand All @@ -48,7 +48,7 @@ func sampleDocs() []search.Document {
HeadingLevel: 2,
HeadingPath: []string{"# Security Overview", "## OAuth2 Flow"},
Line: 5,
ModTime: now,
ModTime: &now,
},
{
ID: "code:DEV/overview.md:12",
Expand All @@ -64,7 +64,7 @@ func sampleDocs() []search.Document {
HeadingPath: []string{"# Security Overview", "## OAuth2 Flow", "### Token Refresh"},
Language: "go",
Line: 12,
ModTime: now,
ModTime: &now,
},
{
ID: "page:OPS/deploy.md",
Expand All @@ -75,7 +75,7 @@ func sampleDocs() []search.Document {
SpaceKey: "OPS",
Labels: []string{"ops", "deployment"},
Content: "How to deploy the application to production.",
ModTime: now,
ModTime: &now,
},
}
}
Expand All @@ -97,6 +97,94 @@ func TestStore_IndexAndSearch(t *testing.T) {
}
}

func TestStore_SearchStripsSpecialCharacters(t *testing.T) {
s := newTestStore(t)
docs := sampleDocs()

docs = append(docs, search.Document{
ID: "page:OPS/events.md",
Type: search.DocTypePage,
Path: "OPS/events.md",
PageID: "777777",
Title: "Events API",
SpaceKey: "OPS",
Content: "POST /v2/events endpoint details and payloads.",
})

if err := s.Index(docs); err != nil {
t.Fatalf("Index: %v", err)
}

results, err := s.Search(search.SearchOptions{Query: "POST /v2/events"})
if err != nil {
t.Fatalf("Search: %v", err)
}
if len(results) == 0 {
t.Fatal("expected results for query with special characters")
}
}

func TestNormalizeFTSQuery(t *testing.T) {
t.Parallel()

tests := []struct {
name string
input string
want string
wantErr bool
}{
{
name: "slashes",
input: "POST /v2/events",
want: "POST v2 events",
},
{
name: "hyphen",
input: "Onboarding to On-Call guide",
want: "Onboarding to On Call guide",
},
{
name: "punctuation",
input: "auth:token (refresh)",
want: "auth token refresh",
},
{
name: "dots and quotes",
input: `"v2.0" endpoint`,
want: "v2 0 endpoint",
},
{
name: "underscore",
input: "api_events_v2",
want: "api events v2",
},
{
name: "only symbols",
input: "/-()",
wantErr: true,
},
}

for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
got, err := normalizeFTSQuery(tt.input)
if tt.wantErr {
if err == nil {
t.Fatalf("expected error, got nil")
}
return
}
if err != nil {
t.Fatalf("normalizeFTSQuery: %v", err)
}
if got != tt.want {
t.Fatalf("normalizeFTSQuery(%q) = %q, want %q", tt.input, got, tt.want)
}
})
}
}

func TestStore_DeleteByPath(t *testing.T) {
s := newTestStore(t)
docs := sampleDocs()
Expand Down