Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ Tag naming: `0.y.z` (no `v` prefix). Align `cmd/version.go` with the tag before

## [Unreleased]

### Fixed

- **code_files RAG**: populate `rag_for_content` when `fields.content.rag` is enabled so the control plane indexes GitLab file text (schema `usable_in_rag` expects `rag_for_<field>`); omit raw `content` from ingest to keep payload size bounded.

## [0.1.0] - 2026-05-23

### Added
Expand Down
2 changes: 1 addition & 1 deletion config/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ entities:
description: "Path relative to the repository root"
content:
type: "string"
description: "Text content (RAG when fields.content.rag is enabled on the code_files entity)"
description: "Raw file text in snapshots (RAG corpus uses rag_for_content when fields.content.rag is enabled)"
usable_in_rag: true
title:
type: "string"
Expand Down
14 changes: 8 additions & 6 deletions internal/models/code_file.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ package models

// CodeFile is a text file under a cloned repository, pushed for RAG indexing on the control plane.
type CodeFile struct {
ID string `json:"id"`
RepoURL string `json:"repo_url"`
FilePath string `json:"file_path"`
Content string `json:"content"`
Title string `json:"title,omitempty"`
SourceURL string `json:"source_url,omitempty"`
ID string `json:"id"`
RepoURL string `json:"repo_url"`
FilePath string `json:"file_path"`
Content string `json:"content"`
// RagForContent is indexed for RAG (control plane: usable_in_rag on schema field content).
RagForContent string `json:"rag_for_content,omitempty"`
Title string `json:"title,omitempty"`
SourceURL string `json:"source_url,omitempty"`
}
9 changes: 7 additions & 2 deletions internal/probe/entities/code_files_entity.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,14 @@ func (e *CodeFilesEntity) Refresh(client core.Client) (interface{}, error) {
log.Printf("code_files: scan %s: %v", r.URL, err)
continue
}
if !ragEnabled {
for i := range files {
for i := range files {
if ragEnabled {
files[i].RagForContent = files[i].Content
// RAG text is sent once as rag_for_content; omit content to keep ingest payload bounded.
files[i].Content = ""
} else {
files[i].Content = ""
files[i].RagForContent = ""
}
}
all = append(all, files...)
Expand Down
55 changes: 55 additions & 0 deletions internal/probe/entities/code_files_entity_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package entities

import (
"testing"

"fluid/probes/core"
"fluid/probes/gitlab/internal/config"
"fluid/probes/gitlab/internal/models"
)

type stubConfig struct {
entities []core.EntityConfig
}

func (s *stubConfig) GetEntities() []core.EntityConfig { return s.entities }
func (s *stubConfig) GetProbeName() string { return "test" }
func (s *stubConfig) GetProbeVersion() string { return "0.0.0" }
func (s *stubConfig) GetStateDir() string { return "state" }
func (s *stubConfig) GetCleanupInterval() int { return 60 }

func TestCodeFilesRAGPayload(t *testing.T) {
t.Parallel()

cfg := &config.Config{
Data: config.DataConfig{
Entities: []core.EntityConfig{
{
Name: "code_files",
Fields: map[string]core.EntityFieldConfig{
"content": {RAG: true},
},
},
},
},
}

files := []models.CodeFile{{Content: "resource \"x\" {}"}}
ragEnabled := codeFilesRAGEnabled(cfg)
for i := range files {
if ragEnabled {
files[i].RagForContent = files[i].Content
files[i].Content = ""
} else {
files[i].Content = ""
files[i].RagForContent = ""
}
}

if files[0].RagForContent != `resource "x" {}` {
t.Fatalf("expected rag_for_content payload, got %q", files[0].RagForContent)
}
if files[0].Content != "" {
t.Fatal("expected content omitted when RAG is enabled")
}
}
Loading