Skip to content

Commit dd34d1a

Browse files
committed
feat: allow to fetch entry from collection
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent 947a2fa commit dd34d1a

3 files changed

Lines changed: 121 additions & 5 deletions

File tree

pkg/client/client.go

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"io"
99
"mime/multipart"
1010
"net/http"
11+
"net/url"
1112
"os"
1213

1314
"github.com/mudler/localrecall/rag/types"
@@ -74,7 +75,7 @@ func (c *Client) ListCollections() ([]string, error) {
7475
return collections, nil
7576
}
7677

77-
// ListCollections lists all collections
78+
// ListEntries lists all entries in a collection
7879
func (c *Client) ListEntries(collection string) ([]string, error) {
7980
url := fmt.Sprintf("%s/api/collections/%s/entries", c.BaseURL, collection)
8081

@@ -85,16 +86,61 @@ func (c *Client) ListEntries(collection string) ([]string, error) {
8586
defer resp.Body.Close()
8687

8788
if resp.StatusCode != http.StatusOK {
88-
return nil, errors.New("failed to list collections")
89+
return nil, errors.New("failed to list entries")
90+
}
91+
92+
var result struct {
93+
Data struct {
94+
Entries []string `json:"entries"`
95+
} `json:"data"`
96+
}
97+
err = json.NewDecoder(resp.Body).Decode(&result)
98+
if err != nil {
99+
return nil, err
100+
}
101+
102+
return result.Data.Entries, nil
103+
}
104+
105+
// EntryChunk is a single chunk of an entry's content (id, content, metadata only).
106+
type EntryChunk struct {
107+
ID string `json:"id"`
108+
Content string `json:"content"`
109+
Metadata map[string]string `json:"metadata"`
110+
}
111+
112+
// GetEntryContent returns the chunks (id, content, metadata) for a specific entry in a collection.
113+
func (c *Client) GetEntryContent(collection, entry string) ([]EntryChunk, error) {
114+
apiURL := fmt.Sprintf("%s/api/collections/%s/entries/%s", c.BaseURL, collection, url.PathEscape(entry))
115+
116+
resp, err := http.Get(apiURL)
117+
if err != nil {
118+
return nil, err
89119
}
120+
defer resp.Body.Close()
90121

91-
var entries []string
92-
err = json.NewDecoder(resp.Body).Decode(&entries)
122+
switch resp.StatusCode {
123+
case http.StatusOK:
124+
break
125+
case http.StatusNotFound:
126+
return nil, errors.New("collection or entry not found")
127+
case http.StatusNotImplemented:
128+
return nil, errors.New("this collection backend does not support listing entry content")
129+
default:
130+
return nil, fmt.Errorf("failed to get entry content: status %d", resp.StatusCode)
131+
}
132+
133+
var result struct {
134+
Data struct {
135+
Chunks []EntryChunk `json:"chunks"`
136+
} `json:"data"`
137+
}
138+
err = json.NewDecoder(resp.Body).Decode(&result)
93139
if err != nil {
94140
return nil, err
95141
}
96142

97-
return entries, nil
143+
return result.Data.Chunks, nil
98144
}
99145

100146
// DeleteEntry deletes an Entry in a collection and return the entries left

rag/persistency.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,29 @@ func (db *PersistentKB) EntryExists(entry string) bool {
208208
return false
209209
}
210210

211+
// GetEntryContent returns all chunks (content, id, metadata) for the given entry.
212+
// It uses the in-memory index and Engine.GetByID to resolve full chunk data.
213+
func (db *PersistentKB) GetEntryContent(entry string) ([]types.Result, error) {
214+
db.Lock()
215+
defer db.Unlock()
216+
217+
entry = filepath.Base(entry)
218+
chunkResults, ok := db.index[entry]
219+
if !ok {
220+
return nil, fmt.Errorf("entry not found: %s", entry)
221+
}
222+
223+
results := make([]types.Result, 0, len(chunkResults))
224+
for _, r := range chunkResults {
225+
full, err := db.Engine.GetByID(r.ID)
226+
if err != nil {
227+
return nil, fmt.Errorf("failed to get chunk %s: %w", r.ID, err)
228+
}
229+
results = append(results, full)
230+
}
231+
return results, nil
232+
}
233+
211234
// Store stores an entry in the persistent knowledge base.
212235
func (db *PersistentKB) Store(entry string, metadata map[string]string) error {
213236
db.Lock()

routes.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"fmt"
66
"io"
77
"net/http"
8+
"net/url"
89
"os"
910
"path/filepath"
1011
"strings"
@@ -125,6 +126,7 @@ func registerAPIRoutes(e *echo.Echo, openAIClient *openai.Client, maxChunkingSiz
125126
e.POST("/api/collections/:name/upload", uploadFile(collections, fileAssets))
126127
e.GET("/api/collections", listCollections)
127128
e.GET("/api/collections/:name/entries", listFiles(collections))
129+
e.GET("/api/collections/:name/entries/:entry", getEntryContent(collections))
128130
e.POST("/api/collections/:name/search", search(collections))
129131
e.POST("/api/collections/:name/reset", reset(collections))
130132
e.DELETE("/api/collections/:name/entry/delete", deleteEntryFromCollection(collections))
@@ -271,6 +273,51 @@ func listFiles(collections collectionList) func(c echo.Context) error {
271273
}
272274
}
273275

276+
// getEntryContent returns the chunks (id, content, metadata) for a specific entry in a collection.
277+
func getEntryContent(collections collectionList) func(c echo.Context) error {
278+
return func(c echo.Context) error {
279+
name := c.Param("name")
280+
collection, exists := collections[name]
281+
if !exists {
282+
return c.JSON(http.StatusNotFound, errorResponse(ErrCodeNotFound, "Collection not found", fmt.Sprintf("Collection '%s' does not exist", name)))
283+
}
284+
285+
entryParam := c.Param("entry")
286+
entry, err := url.PathUnescape(entryParam)
287+
if err != nil {
288+
entry = entryParam
289+
}
290+
291+
results, err := collection.GetEntryContent(entry)
292+
if err != nil {
293+
if strings.Contains(err.Error(), "entry not found") {
294+
return c.JSON(http.StatusNotFound, errorResponse(ErrCodeNotFound, "Entry not found", fmt.Sprintf("Entry '%s' does not exist in collection '%s'", entry, name)))
295+
}
296+
if strings.Contains(err.Error(), "not implemented") {
297+
return c.JSON(http.StatusNotImplemented, errorResponse(ErrCodeInternalError, "Not supported", "This collection backend does not support listing entry content"))
298+
}
299+
return c.JSON(http.StatusInternalServerError, errorResponse(ErrCodeInternalError, "Failed to get entry content", err.Error()))
300+
}
301+
302+
chunks := make([]map[string]interface{}, 0, len(results))
303+
for _, r := range results {
304+
chunks = append(chunks, map[string]interface{}{
305+
"id": r.ID,
306+
"content": r.Content,
307+
"metadata": r.Metadata,
308+
})
309+
}
310+
311+
response := successResponse("Entry content retrieved successfully", map[string]interface{}{
312+
"collection": name,
313+
"entry": entry,
314+
"chunks": chunks,
315+
"count": len(chunks),
316+
})
317+
return c.JSON(http.StatusOK, response)
318+
}
319+
}
320+
274321
// uploadFile handles uploading files to a collection
275322
func uploadFile(collections collectionList, fileAssets string) func(c echo.Context) error {
276323
return func(c echo.Context) error {

0 commit comments

Comments
 (0)