alpaim · alpaim · Jan 9, 2026 · Jan 9, 2026 · Jan 9, 2026 · Jan 9, 2026
diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml
@@ -29,7 +29,7 @@ chrono = { version = "0.4.42", features = ["serde"] }
 anyhow = "1.0.100"
 ignore = "0.4.25"
 dashmap = "6.1.0"
-async-openai = { version = "0.31.1", features = ["responses", "embedding", "chat-completion", "chat-completion-types"] }
+async-openai = { version = "0.31.1", features = ["responses", "embedding", "chat-completion", "chat-completion-types", "byot"] }
 specta = { version = "=2.0.0-rc.22", features = ["derive", "chrono", "serde"] }
 specta-typescript = "0.0.9"
 tauri-specta = { version = "=2.0.0-rc.21", features = ["derive", "typescript"] }

diff --git a/src-tauri/migrations/20251205113555_init_schema.sql b/src-tauri/migrations/20251205113555_init_schema.sql
@@ -59,7 +59,7 @@ CREATE TABLE IF NOT EXISTS file_chunks (
     file_id INTEGER NOT NULL,
 
     chunk_index INTEGER NOT NULL,  -- index of chunk in file
-    content TEXT NOT NULL,         -- text part
+    content TEXT,                  -- text part
 
     -- optional for syntax highlighting
     start_char_idx INTEGER,

diff --git a/src-tauri/src/ai.rs b/src-tauri/src/ai.rs
@@ -17,6 +17,8 @@ use async_openai::{
 };
 use base64::Engine;
 
+use crate::ai::{self};
+
 pub mod embedding;
 pub mod llm;
 
@@ -78,6 +80,29 @@ impl AI {
         Ok(response)
     }
 
+    pub async fn create_embedding_vecbox(
+        &self,
+        input: ai::embedding::vecbox::VecBoxEmbeddingInput,
+        model: String,
+    ) -> Result<CreateEmbeddingResponse> {
+        let content_parts = input.to_content_parts();
+
+        let request = ai::embedding::vecbox::VecBoxEmbeddingRequest {
+            model: Some(model),
+            input: content_parts,
+            instruction: input.instruction,
+        };
+
+        let response: CreateEmbeddingResponse = self
+            .client
+            .embeddings()
+            .create_byot(&request)
+            .await
+            .context("failed to generate vecBox embedding")?;
+
+        Ok(response)
+    }
+
     pub async fn request_llm(&self, input: InputParam, model: String) -> Result<Response> {
         let args = CreateResponseArgs::default()
             .input(input)

diff --git a/src-tauri/src/ai/embedding.rs b/src-tauri/src/ai/embedding.rs
@@ -0,0 +1 @@
+pub mod vecbox;
diff --git a/src-tauri/src/ai/embedding/vecbox.rs b/src-tauri/src/ai/embedding/vecbox.rs
@@ -0,0 +1,46 @@
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum EmbeddingContentPart {
+    Text { text: String },
+    ImageUrl { image_url: EmbeddingImageUrl },
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct EmbeddingImageUrl {
+    pub url: String,
+}
+
+#[derive(Debug, Serialize)]
+pub struct VecBoxEmbeddingRequest {
+    pub model: Option<String>,
+    pub input: Vec<EmbeddingContentPart>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub instruction: Option<String>,
+}
+
+#[derive(Debug, Clone)]
+pub struct VecBoxEmbeddingInput {
+    pub text: Option<String>,
+    pub instruction: Option<String>,
+    pub image_url: Option<String>,
+}
+
+impl VecBoxEmbeddingInput {
+    pub fn to_content_parts(&self) -> Vec<EmbeddingContentPart> {
+        let mut parts = Vec::new();
+
+        if let Some(ref text) = self.text {
+            parts.push(EmbeddingContentPart::Text { text: text.clone() });
+        }
+
+        if let Some(ref url) = self.image_url {
+            parts.push(EmbeddingContentPart::ImageUrl {
+                image_url: EmbeddingImageUrl { url: url.clone() },
+            });
+        }
+
+        parts
+    }
+}
diff --git a/src-tauri/src/database/chunks.rs b/src-tauri/src/database/chunks.rs
@@ -7,7 +7,7 @@ pub struct AddFileChunk {
     pub file_id: i32,
 
     pub chunk_index: i32,
-    pub content: String,
+    pub content: Option<String>,
 
     pub start_char_idx: Option<i32>,
     pub end_char_idx: Option<i32>,

diff --git a/src-tauri/src/database/models.rs b/src-tauri/src/database/models.rs
@@ -1,9 +1,20 @@
 use specta::Type;
 
-use serde::{Deserialize, Serialize};
-use sqlx::FromRow;
 use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
 use sqlx::types::Json;
+use sqlx::FromRow;
+
+// EMBEDDING BACKEND TYPE
+
+#[derive(Debug, Serialize, Deserialize, Clone, Type, PartialEq, Default)]
+pub enum EmbeddingBackendType {
+    #[default]
+    #[serde(rename = "openai_compat")]
+    OpenAICompat,
+    #[serde(rename = "vecbox")]
+    VecBox,
+}
 
 // APP CONFIG
 #[derive(Debug, Serialize, Deserialize, Clone, Type)]
@@ -16,21 +27,25 @@ pub struct AppConfig {
     #[serde(default = "default_parallelism")]
     #[specta(type = i32)]
     pub indexer_parallelism: usize, // How many files vecDir needs to index in parallel (2-4)
-    
+
     // AI Settings (Global defaults)
     pub default_openai_url: Option<String>,
 }
 
 // Default values
-fn default_theme() -> String { "system".to_string() }
-fn default_parallelism() -> usize { 2 }
+fn default_theme() -> String {
+    "system".to_string()
+}
+fn default_parallelism() -> usize {
+    2
+}
 
 impl Default for AppConfig {
     fn default() -> Self {
         Self {
             theme: default_theme(),
             indexer_parallelism: default_parallelism(),
-            default_openai_url: None
+            default_openai_url: None,
         }
     }
 }
@@ -49,20 +64,31 @@ pub struct LLMConfig {
     pub api_key: String,
 
     pub model: String,
-    
+
     pub text_processing_prompt: AIPrompt,
     pub image_processing_prompt: AIPrompt,
     pub default_processing_prompt: AIPrompt,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, Type)]
 pub struct EmbeddingConfig {
+    #[serde(default)]
+    pub backend_type: EmbeddingBackendType,
+
     pub api_base_url: String,
     pub api_key: String,
 
     pub model: String,
 
     pub dimensions: i32,
+
+    pub text_processing_prompt: AIPrompt,
+    pub image_processing_prompt: AIPrompt,
+    pub default_processing_prompt: AIPrompt,
+
+    pub search_prompt: AIPrompt,
+
+    pub multimodal: bool,
 }
 
 // SPACES
@@ -77,7 +103,7 @@ pub struct Space {
     #[specta(type = EmbeddingConfig)]
     pub embedding_config: Json<EmbeddingConfig>,
 
-    #[specta(type = LLMConfig)] 
+    #[specta(type = LLMConfig)]
     pub llm_config: Json<LLMConfig>,
 
     pub created_at: DateTime<Utc>,
@@ -112,7 +138,7 @@ pub struct FileMetadata {
     pub modified_at_fs: DateTime<Utc>,
     pub last_indexed_at: Option<DateTime<Utc>>,
     pub content_hash: Option<String>,
-    
+
     pub indexing_status: String,
     pub indexing_error_message: Option<String>,
 }
@@ -124,17 +150,17 @@ pub struct FileChunk {
     pub file_id: i32,
 
     pub chunk_index: i32,
-    pub content: String,
+    pub content: Option<String>,
 
     pub start_char_idx: Option<i32>,
-    pub end_char_idx: Option<i32>
+    pub end_char_idx: Option<i32>,
 }
 
 // VECTOR SEARCH RESULT
 #[derive(Debug, FromRow, Serialize, Type)]
 pub struct VectorSearchResult {
     pub chunk_id: i32,
-    pub content: String,
+    pub content: Option<String>,
 
     pub file_id: i32,
     pub absolute_path: String,