Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ uuid = { version = "1.0", features = ["v4", "serde"] }
chrono = { version = "0.4", features = ["serde", "clock"] }
regex = "1.10"
base64 = "0.21"
image = { version = "0.25", default-features = false, features = ["png", "jpeg", "gif", "webp", "bmp"] }
md5 = "0.7"
once_cell = "1.19.0"
lazy_static = "1.4"
Expand Down
152 changes: 142 additions & 10 deletions src/apps/desktop/src/api/agentic_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ use std::sync::Arc;
use tauri::{AppHandle, State};

use crate::api::app_state::AppState;
use crate::api::context_upload_api::get_image_context;
use bitfun_core::agentic::coordination::ConversationCoordinator;
use bitfun_core::agentic::core::*;
use bitfun_core::agentic::image_analysis::ImageContextData;

#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
Expand Down Expand Up @@ -45,6 +47,8 @@ pub struct StartDialogTurnRequest {
pub user_input: String,
pub agent_type: String,
pub turn_id: Option<String>,
#[serde(default)]
pub image_contexts: Option<Vec<ImageContextData>>,
}

#[derive(Debug, Serialize)]
Expand Down Expand Up @@ -179,23 +183,131 @@ pub async fn start_dialog_turn(
coordinator: State<'_, Arc<ConversationCoordinator>>,
request: StartDialogTurnRequest,
) -> Result<StartDialogTurnResponse, String> {
let _stream = coordinator
.start_dialog_turn(
request.session_id,
request.user_input,
request.turn_id,
request.agent_type,
false,
)
.await
.map_err(|e| format!("Failed to start dialog turn: {}", e))?;
let StartDialogTurnRequest {
session_id,
user_input,
agent_type,
turn_id,
image_contexts,
} = request;

if let Some(image_contexts) = image_contexts
.as_ref()
.filter(|images| !images.is_empty())
.cloned()
{
let resolved_image_contexts = resolve_missing_image_payloads(image_contexts)?;
coordinator
.start_dialog_turn_with_image_contexts(
session_id,
user_input,
resolved_image_contexts,
turn_id,
agent_type,
)
.await
.map_err(|e| format!("Failed to start dialog turn: {}", e))?;
} else {
coordinator
.start_dialog_turn(
session_id,
user_input,
turn_id,
agent_type,
false,
)
.await
.map_err(|e| format!("Failed to start dialog turn: {}", e))?;
}

Ok(StartDialogTurnResponse {
success: true,
message: "Dialog turn started".to_string(),
})
}

fn is_blank_text(value: Option<&String>) -> bool {
value.map(|s| s.trim().is_empty()).unwrap_or(true)
}

fn resolve_missing_image_payloads(
image_contexts: Vec<ImageContextData>,
) -> Result<Vec<ImageContextData>, String> {
let mut resolved = Vec::with_capacity(image_contexts.len());

for mut image in image_contexts {
let missing_payload =
is_blank_text(image.image_path.as_ref()) && is_blank_text(image.data_url.as_ref());
if !missing_payload {
resolved.push(image);
continue;
}

let stored = get_image_context(&image.id).ok_or_else(|| {
format!(
"Image context not found for image_id={}. It may have expired. Please re-attach the image and retry.",
image.id
)
})?;

if is_blank_text(image.image_path.as_ref()) {
image.image_path = stored
.image_path
.clone()
.filter(|s| !s.trim().is_empty());
}
if is_blank_text(image.data_url.as_ref()) {
image.data_url = stored
.data_url
.clone()
.filter(|s| !s.trim().is_empty());
}
if image.mime_type.trim().is_empty() {
image.mime_type = stored.mime_type.clone();
}

let mut metadata = image.metadata.take().unwrap_or_else(|| serde_json::json!({}));
if !metadata.is_object() {
metadata = serde_json::json!({ "raw_metadata": metadata });
}
if let Some(obj) = metadata.as_object_mut() {
if !obj.contains_key("name") {
obj.insert("name".to_string(), serde_json::json!(stored.image_name));
}
if !obj.contains_key("width") {
obj.insert("width".to_string(), serde_json::json!(stored.width));
}
if !obj.contains_key("height") {
obj.insert("height".to_string(), serde_json::json!(stored.height));
}
if !obj.contains_key("file_size") {
obj.insert("file_size".to_string(), serde_json::json!(stored.file_size));
}
if !obj.contains_key("source") {
obj.insert("source".to_string(), serde_json::json!(stored.source));
}
obj.insert(
"resolved_from_upload_cache".to_string(),
serde_json::json!(true),
);
}
image.metadata = Some(metadata);

let still_missing =
is_blank_text(image.image_path.as_ref()) && is_blank_text(image.data_url.as_ref());
if still_missing {
return Err(format!(
"Image context {} is missing image_path/data_url after cache resolution",
image.id
));
}

resolved.push(image);
}

Ok(resolved)
}

#[tauri::command]
pub async fn cancel_dialog_turn(
coordinator: State<'_, Arc<ConversationCoordinator>>,
Expand Down Expand Up @@ -394,6 +506,26 @@ fn message_to_dto(message: Message) -> MessageDTO {

let content = match message.content {
MessageContent::Text(text) => serde_json::json!({ "type": "text", "text": text }),
MessageContent::Multimodal { text, images } => {
let images: Vec<serde_json::Value> = images
.into_iter()
.map(|img| {
serde_json::json!({
"id": img.id,
"image_path": img.image_path,
"mime_type": img.mime_type,
"metadata": img.metadata,
"has_data_url": img.data_url.as_ref().is_some_and(|s| !s.is_empty()),
})
})
.collect();

serde_json::json!({
"type": "multimodal",
"text": text,
"images": images,
})
}
MessageContent::ToolResult {
tool_id,
tool_name,
Expand Down
73 changes: 73 additions & 0 deletions src/apps/desktop/src/api/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,21 @@ pub async fn test_ai_config_connection(
request: TestAIConfigConnectionRequest,
) -> Result<bitfun_core::util::types::ConnectionTestResult, String> {
let model_name = request.config.name.clone();
let supports_image_input = request
.config
.capabilities
.iter()
.any(|cap| {
matches!(
cap,
bitfun_core::service::config::types::ModelCapability::ImageUnderstanding
)
})
|| matches!(
request.config.category,
bitfun_core::service::config::types::ModelCategory::Multimodal
);

let ai_config = match request.config.try_into() {
Ok(config) => config,
Err(e) => {
Expand All @@ -209,6 +224,64 @@ pub async fn test_ai_config_connection(

match ai_client.test_connection().await {
Ok(result) => {
if !result.success {
info!(
"AI config connection test completed: model={}, success={}, response_time={}ms",
model_name, result.success, result.response_time_ms
);
return Ok(result);
}

if supports_image_input {
match ai_client.test_image_input_connection().await {
Ok(image_result) => {
let response_time_ms =
result.response_time_ms + image_result.response_time_ms;

if !image_result.success {
let image_error = image_result
.error_details
.unwrap_or_else(|| "Unknown image input test error".to_string());
let merged = bitfun_core::util::types::ConnectionTestResult {
success: false,
response_time_ms,
model_response: image_result.model_response.or(result.model_response),
error_details: Some(format!(
"Basic connection passed, but multimodal image input test failed: {}",
image_error
)),
};
info!(
"AI config connection test completed: model={}, success={}, response_time={}ms",
model_name, merged.success, merged.response_time_ms
);
return Ok(merged);
}

let merged = bitfun_core::util::types::ConnectionTestResult {
success: true,
response_time_ms,
model_response: image_result
.model_response
.or(result.model_response),
error_details: None,
};
info!(
"AI config connection test completed: model={}, success={}, response_time={}ms",
model_name, merged.success, merged.response_time_ms
);
return Ok(merged);
}
Err(e) => {
error!(
"AI config multimodal image input test failed unexpectedly: model={}, error={}",
model_name, e
);
return Err(format!("Connection test failed: {}", e));
}
}
}

info!(
"AI config connection test completed: model={}, success={}, response_time={}ms",
model_name, result.success, result.response_time_ms
Expand Down
73 changes: 18 additions & 55 deletions src/apps/desktop/src/api/image_analysis_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

use crate::api::app_state::AppState;
use bitfun_core::agentic::coordination::ConversationCoordinator;
use bitfun_core::agentic::image_analysis::*;
use bitfun_core::agentic::image_analysis::{
resolve_vision_model_from_ai_config, AnalyzeImagesRequest, ImageAnalysisResult, ImageAnalyzer,
MessageEnhancer, SendEnhancedMessageRequest,
};
use log::error;
use std::sync::Arc;
use tauri::State;
Expand All @@ -21,65 +24,25 @@ pub async fn analyze_images(
format!("Failed to get AI config: {}", e)
})?;

let image_model_id = ai_config
.default_models
.image_understanding
.ok_or_else(|| {
error!("Image understanding model not configured");
"Image understanding model not configured".to_string()
})?;

let image_model_id = if image_model_id.is_empty() {
let vision_model = ai_config
.models
.iter()
.find(|m| {
m.enabled
&& m.capabilities.iter().any(|cap| {
matches!(
cap,
bitfun_core::service::config::types::ModelCapability::ImageUnderstanding
)
})
})
.map(|m| m.id.as_str());

match vision_model {
Some(model_id) => model_id,
None => {
error!("No image understanding model found");
return Err(
"Image understanding model not configured and no compatible model found.\n\n\
Please add a model that supports image understanding\
in [Settings → AI Model Config], enable 'image_understanding' capability, \
and assign it in [Settings → Super Agent]."
.to_string(),
);
}
}
} else {
&image_model_id
};

let image_model = ai_config
.models
.iter()
.find(|m| &m.id == image_model_id)
.ok_or_else(|| {
error!(
"Model not found: model_id={}, available_models={:?}",
image_model_id,
ai_config.models.iter().map(|m| &m.id).collect::<Vec<_>>()
);
format!("Model not found: {}", image_model_id)
})?
.clone();
let image_model = resolve_vision_model_from_ai_config(&ai_config).map_err(|e| {
error!(
"Image understanding model resolution failed: available_models={:?}, error={}",
ai_config.models.iter().map(|m| &m.id).collect::<Vec<_>>(),
e
);
format!(
"Image understanding model is not configured.\n\n\
Please select a model for [Settings → Default Model Config → Image Understanding Model].\n\n\
Details: {}",
e
)
})?;

let workspace_path = state.workspace_path.read().await.clone();

let ai_client = state
.ai_client_factory
.get_client_by_id(image_model_id)
.get_client_by_id(&image_model.id)
.await
.map_err(|e| format!("Failed to create AI client: {}", e))?;

Expand Down
Loading
Loading