Skip to content

Commit 14468ff

Browse files
committed
feat: Mengimplementasikan pemilihan prompt berdasarkan mode Gemini dan pemrosesan embedding dokumen secara batch.
1 parent 8a67547 commit 14468ff

4 files changed

Lines changed: 83 additions & 14 deletions

File tree

sfcore-ai/crates/rag-api-server/config/settings.toml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,40 @@ acquire_timeout_ms = 60000
7777
embedding_batch_size = 10
7878

7979
[prompts]
80+
81+
[prompts.local]
82+
main_system_prompt = """
83+
Anda adalah asisten AI cerdas untuk Sistem Manajemen Dokumen.
84+
Waktu Server: {{CURRENT_DATETIME}}
85+
Dokumen Aktif:
86+
{{DOC_LIST}}
87+
88+
Instruksi:
89+
- Jawab pertanyaan berdasarkan konteks dokumen.
90+
- Gunakan Bahasa Indonesia yang natural dan jelas.
91+
- Jika tidak ada info di dokumen, katakan jujur.
92+
93+
Referensi:
94+
- [Judul Dokumen]
95+
"""
96+
97+
context_extraction_system_prompt = """
98+
Konteks Dokumen:
99+
{{CHUNKS}}
100+
"""
101+
102+
rag_query_system_prompt = """
103+
Anda adalah asisten AI RAG. Jawab berdasarkan konteks berikut:
104+
{{CONTEXT}}
105+
"""
106+
107+
deep_scan_system_prompt = """
108+
User Query: "{{QUERY}}"
109+
Pilih chunk ID yang relevan dari daftar di bawah.
110+
Output JSON: {"relevant_chunk_ids": [1, 2, ...]}
111+
"""
112+
113+
[prompts.gemini]
80114
main_system_prompt = """
81115
Anda adalah asisten AI cerdas untuk Sistem Manajemen Dokumen.
82116
Waktu Server: {{CURRENT_DATETIME}}

sfcore-ai/crates/rag-api-server/src/config/settings.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,16 @@ pub struct RagConfig {
9595

9696
#[derive(Debug, Deserialize, Serialize, Clone)]
9797
pub struct PromptsConfig {
98+
pub local: PromptSet,
99+
pub gemini: PromptSet,
100+
}
101+
102+
#[derive(Debug, Deserialize, Serialize, Clone)]
103+
pub struct PromptSet {
98104
pub main_system_prompt: String,
99105
pub context_extraction_system_prompt: String,
100-
pub rag_query_system_prompt: String, // NEW
101-
pub deep_scan_system_prompt: String, // NEW
106+
pub rag_query_system_prompt: String,
107+
pub deep_scan_system_prompt: String,
102108
}
103109

104110
impl Settings {

sfcore-ai/crates/rag-api-server/src/main.rs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,17 @@ async fn main() -> Result<()> {
117117
}
118118
}
119119

120+
// Select Prompts based on Mode
121+
let active_prompts = if let Some(gemini) = &settings.gemini {
122+
if gemini.enabled {
123+
settings.prompts.gemini.clone()
124+
} else {
125+
settings.prompts.local.clone()
126+
}
127+
} else {
128+
settings.prompts.local.clone()
129+
};
130+
120131
// Initialize services
121132
let embedding_service = Arc::new(EmbeddingService::new(
122133
final_embedding_config.base_url.clone(),
@@ -127,7 +138,7 @@ async fn main() -> Result<()> {
127138

128139
let llm_service = Arc::new(LlmService::new(
129140
final_llm_config.clone(),
130-
settings.prompts.context_extraction_system_prompt.clone(),
141+
active_prompts.context_extraction_system_prompt.clone(),
131142
limiters.clone(),
132143
));
133144

@@ -154,8 +165,8 @@ async fn main() -> Result<()> {
154165
Box::new((*llm_service).clone()),
155166
logger.clone(),
156167
settings.llm.stream_response,
157-
settings.prompts.main_system_prompt.clone(),
158-
settings.prompts.deep_scan_system_prompt.clone(),
168+
active_prompts.main_system_prompt.clone(),
169+
active_prompts.deep_scan_system_prompt.clone(),
159170
settings.rag.clone(),
160171
));
161172
info!("✅ Conversation manager initialized");

sfcore-ai/crates/rag-api-server/src/services/document_service.rs

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -229,19 +229,37 @@ impl DocumentService {
229229
return Err(ApiError::BadRequest("Failed to create chunks".to_string()));
230230
}
231231

232+
// 4. Generate embeddings (batch) or Fallback
232233
// 4. Generate embeddings (batch) or Fallback
233234
report_progress(0.6, "Generating embeddings (this might take a while)...".to_string(), "embedding-inprogress".to_string());
234235
let texts: Vec<String> = chunks.clone();
235236

236-
let embeddings = match self.embedding_service.embed_batch(texts.clone()).await {
237-
Ok(e) => e,
238-
Err(err) => {
239-
warn!("Embedding failed for document {} (falling back to zerovec): {}", document_id, err);
240-
// Fallback to zero vectors so Deep Scan can still work
241-
let dim = self.embedding_service.dimension;
242-
vec![vec![0.0; dim]; texts.len()]
243-
}
244-
};
237+
// Use configured batch size
238+
let batch_size = self.embedding_batch_size.max(1);
239+
let mut embeddings = Vec::with_capacity(texts.len());
240+
let total_batches = (texts.len() + batch_size - 1) / batch_size;
241+
242+
for (i, batch_texts) in texts.chunks(batch_size).enumerate() {
243+
// Report progress for this batch
244+
report_progress(
245+
0.6 + (0.2 * (i as f64 / total_batches as f64)),
246+
format!("Embedding batch {}/{}...", i + 1, total_batches),
247+
"embedding-inprogress".to_string()
248+
);
249+
250+
// Embed batch
251+
match self.embedding_service.embed_batch(batch_texts.to_vec()).await {
252+
Ok(batch_embs) => {
253+
embeddings.extend(batch_embs);
254+
},
255+
Err(err) => {
256+
warn!("Embedding failed for batch {}/{} of document {} (falling back to zerovec): {}", i + 1, total_batches, document_id, err);
257+
// Fallback to zero vectors so Deep Scan can still work for this batch
258+
let dim = self.embedding_service.dimension;
259+
embeddings.extend(vec![vec![0.0; dim]; batch_texts.len()]);
260+
}
261+
}
262+
}
245263
// debug!("Generated {} embeddings", embeddings.len());
246264

247265
// 5. Build chunk data

0 commit comments

Comments
 (0)