Skip to content

Commit a534de4

Browse files
authored
Merge pull request #80 from wgqqqqq/feature/layout-redesign
feat: add end-to-end multimodal image turn flow with view_image fallback and persistence redaction
2 parents e126187 + b1ce49d commit a534de4

38 files changed

Lines changed: 2617 additions & 1131 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ uuid = { version = "1.0", features = ["v4", "serde"] }
5151
chrono = { version = "0.4", features = ["serde", "clock"] }
5252
regex = "1.10"
5353
base64 = "0.21"
54+
image = { version = "0.25", default-features = false, features = ["png", "jpeg", "gif", "webp", "bmp"] }
5455
md5 = "0.7"
5556
once_cell = "1.19.0"
5657
lazy_static = "1.4"

src/apps/desktop/src/api/agentic_api.rs

Lines changed: 142 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@ use std::sync::Arc;
66
use tauri::{AppHandle, State};
77

88
use crate::api::app_state::AppState;
9+
use crate::api::context_upload_api::get_image_context;
910
use bitfun_core::agentic::coordination::ConversationCoordinator;
1011
use bitfun_core::agentic::core::*;
12+
use bitfun_core::agentic::image_analysis::ImageContextData;
1113

1214
#[derive(Debug, Deserialize)]
1315
#[serde(rename_all = "camelCase")]
@@ -45,6 +47,8 @@ pub struct StartDialogTurnRequest {
4547
pub user_input: String,
4648
pub agent_type: String,
4749
pub turn_id: Option<String>,
50+
#[serde(default)]
51+
pub image_contexts: Option<Vec<ImageContextData>>,
4852
}
4953

5054
#[derive(Debug, Serialize)]
@@ -179,23 +183,131 @@ pub async fn start_dialog_turn(
179183
coordinator: State<'_, Arc<ConversationCoordinator>>,
180184
request: StartDialogTurnRequest,
181185
) -> Result<StartDialogTurnResponse, String> {
182-
let _stream = coordinator
183-
.start_dialog_turn(
184-
request.session_id,
185-
request.user_input,
186-
request.turn_id,
187-
request.agent_type,
188-
false,
189-
)
190-
.await
191-
.map_err(|e| format!("Failed to start dialog turn: {}", e))?;
186+
let StartDialogTurnRequest {
187+
session_id,
188+
user_input,
189+
agent_type,
190+
turn_id,
191+
image_contexts,
192+
} = request;
193+
194+
if let Some(image_contexts) = image_contexts
195+
.as_ref()
196+
.filter(|images| !images.is_empty())
197+
.cloned()
198+
{
199+
let resolved_image_contexts = resolve_missing_image_payloads(image_contexts)?;
200+
coordinator
201+
.start_dialog_turn_with_image_contexts(
202+
session_id,
203+
user_input,
204+
resolved_image_contexts,
205+
turn_id,
206+
agent_type,
207+
)
208+
.await
209+
.map_err(|e| format!("Failed to start dialog turn: {}", e))?;
210+
} else {
211+
coordinator
212+
.start_dialog_turn(
213+
session_id,
214+
user_input,
215+
turn_id,
216+
agent_type,
217+
false,
218+
)
219+
.await
220+
.map_err(|e| format!("Failed to start dialog turn: {}", e))?;
221+
}
192222

193223
Ok(StartDialogTurnResponse {
194224
success: true,
195225
message: "Dialog turn started".to_string(),
196226
})
197227
}
198228

229+
fn is_blank_text(value: Option<&String>) -> bool {
230+
value.map(|s| s.trim().is_empty()).unwrap_or(true)
231+
}
232+
233+
fn resolve_missing_image_payloads(
234+
image_contexts: Vec<ImageContextData>,
235+
) -> Result<Vec<ImageContextData>, String> {
236+
let mut resolved = Vec::with_capacity(image_contexts.len());
237+
238+
for mut image in image_contexts {
239+
let missing_payload =
240+
is_blank_text(image.image_path.as_ref()) && is_blank_text(image.data_url.as_ref());
241+
if !missing_payload {
242+
resolved.push(image);
243+
continue;
244+
}
245+
246+
let stored = get_image_context(&image.id).ok_or_else(|| {
247+
format!(
248+
"Image context not found for image_id={}. It may have expired. Please re-attach the image and retry.",
249+
image.id
250+
)
251+
})?;
252+
253+
if is_blank_text(image.image_path.as_ref()) {
254+
image.image_path = stored
255+
.image_path
256+
.clone()
257+
.filter(|s| !s.trim().is_empty());
258+
}
259+
if is_blank_text(image.data_url.as_ref()) {
260+
image.data_url = stored
261+
.data_url
262+
.clone()
263+
.filter(|s| !s.trim().is_empty());
264+
}
265+
if image.mime_type.trim().is_empty() {
266+
image.mime_type = stored.mime_type.clone();
267+
}
268+
269+
let mut metadata = image.metadata.take().unwrap_or_else(|| serde_json::json!({}));
270+
if !metadata.is_object() {
271+
metadata = serde_json::json!({ "raw_metadata": metadata });
272+
}
273+
if let Some(obj) = metadata.as_object_mut() {
274+
if !obj.contains_key("name") {
275+
obj.insert("name".to_string(), serde_json::json!(stored.image_name));
276+
}
277+
if !obj.contains_key("width") {
278+
obj.insert("width".to_string(), serde_json::json!(stored.width));
279+
}
280+
if !obj.contains_key("height") {
281+
obj.insert("height".to_string(), serde_json::json!(stored.height));
282+
}
283+
if !obj.contains_key("file_size") {
284+
obj.insert("file_size".to_string(), serde_json::json!(stored.file_size));
285+
}
286+
if !obj.contains_key("source") {
287+
obj.insert("source".to_string(), serde_json::json!(stored.source));
288+
}
289+
obj.insert(
290+
"resolved_from_upload_cache".to_string(),
291+
serde_json::json!(true),
292+
);
293+
}
294+
image.metadata = Some(metadata);
295+
296+
let still_missing =
297+
is_blank_text(image.image_path.as_ref()) && is_blank_text(image.data_url.as_ref());
298+
if still_missing {
299+
return Err(format!(
300+
"Image context {} is missing image_path/data_url after cache resolution",
301+
image.id
302+
));
303+
}
304+
305+
resolved.push(image);
306+
}
307+
308+
Ok(resolved)
309+
}
310+
199311
#[tauri::command]
200312
pub async fn cancel_dialog_turn(
201313
coordinator: State<'_, Arc<ConversationCoordinator>>,
@@ -394,6 +506,26 @@ fn message_to_dto(message: Message) -> MessageDTO {
394506

395507
let content = match message.content {
396508
MessageContent::Text(text) => serde_json::json!({ "type": "text", "text": text }),
509+
MessageContent::Multimodal { text, images } => {
510+
let images: Vec<serde_json::Value> = images
511+
.into_iter()
512+
.map(|img| {
513+
serde_json::json!({
514+
"id": img.id,
515+
"image_path": img.image_path,
516+
"mime_type": img.mime_type,
517+
"metadata": img.metadata,
518+
"has_data_url": img.data_url.as_ref().is_some_and(|s| !s.is_empty()),
519+
})
520+
})
521+
.collect();
522+
523+
serde_json::json!({
524+
"type": "multimodal",
525+
"text": text,
526+
"images": images,
527+
})
528+
}
397529
MessageContent::ToolResult {
398530
tool_id,
399531
tool_name,

src/apps/desktop/src/api/commands.rs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,21 @@ pub async fn test_ai_config_connection(
197197
request: TestAIConfigConnectionRequest,
198198
) -> Result<bitfun_core::util::types::ConnectionTestResult, String> {
199199
let model_name = request.config.name.clone();
200+
let supports_image_input = request
201+
.config
202+
.capabilities
203+
.iter()
204+
.any(|cap| {
205+
matches!(
206+
cap,
207+
bitfun_core::service::config::types::ModelCapability::ImageUnderstanding
208+
)
209+
})
210+
|| matches!(
211+
request.config.category,
212+
bitfun_core::service::config::types::ModelCategory::Multimodal
213+
);
214+
200215
let ai_config = match request.config.try_into() {
201216
Ok(config) => config,
202217
Err(e) => {
@@ -209,6 +224,64 @@ pub async fn test_ai_config_connection(
209224

210225
match ai_client.test_connection().await {
211226
Ok(result) => {
227+
if !result.success {
228+
info!(
229+
"AI config connection test completed: model={}, success={}, response_time={}ms",
230+
model_name, result.success, result.response_time_ms
231+
);
232+
return Ok(result);
233+
}
234+
235+
if supports_image_input {
236+
match ai_client.test_image_input_connection().await {
237+
Ok(image_result) => {
238+
let response_time_ms =
239+
result.response_time_ms + image_result.response_time_ms;
240+
241+
if !image_result.success {
242+
let image_error = image_result
243+
.error_details
244+
.unwrap_or_else(|| "Unknown image input test error".to_string());
245+
let merged = bitfun_core::util::types::ConnectionTestResult {
246+
success: false,
247+
response_time_ms,
248+
model_response: image_result.model_response.or(result.model_response),
249+
error_details: Some(format!(
250+
"Basic connection passed, but multimodal image input test failed: {}",
251+
image_error
252+
)),
253+
};
254+
info!(
255+
"AI config connection test completed: model={}, success={}, response_time={}ms",
256+
model_name, merged.success, merged.response_time_ms
257+
);
258+
return Ok(merged);
259+
}
260+
261+
let merged = bitfun_core::util::types::ConnectionTestResult {
262+
success: true,
263+
response_time_ms,
264+
model_response: image_result
265+
.model_response
266+
.or(result.model_response),
267+
error_details: None,
268+
};
269+
info!(
270+
"AI config connection test completed: model={}, success={}, response_time={}ms",
271+
model_name, merged.success, merged.response_time_ms
272+
);
273+
return Ok(merged);
274+
}
275+
Err(e) => {
276+
error!(
277+
"AI config multimodal image input test failed unexpectedly: model={}, error={}",
278+
model_name, e
279+
);
280+
return Err(format!("Connection test failed: {}", e));
281+
}
282+
}
283+
}
284+
212285
info!(
213286
"AI config connection test completed: model={}, success={}, response_time={}ms",
214287
model_name, result.success, result.response_time_ms

src/apps/desktop/src/api/image_analysis_api.rs

Lines changed: 18 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
33
use crate::api::app_state::AppState;
44
use bitfun_core::agentic::coordination::ConversationCoordinator;
5-
use bitfun_core::agentic::image_analysis::*;
5+
use bitfun_core::agentic::image_analysis::{
6+
resolve_vision_model_from_ai_config, AnalyzeImagesRequest, ImageAnalysisResult, ImageAnalyzer,
7+
MessageEnhancer, SendEnhancedMessageRequest,
8+
};
69
use log::error;
710
use std::sync::Arc;
811
use tauri::State;
@@ -21,65 +24,25 @@ pub async fn analyze_images(
2124
format!("Failed to get AI config: {}", e)
2225
})?;
2326

24-
let image_model_id = ai_config
25-
.default_models
26-
.image_understanding
27-
.ok_or_else(|| {
28-
error!("Image understanding model not configured");
29-
"Image understanding model not configured".to_string()
30-
})?;
31-
32-
let image_model_id = if image_model_id.is_empty() {
33-
let vision_model = ai_config
34-
.models
35-
.iter()
36-
.find(|m| {
37-
m.enabled
38-
&& m.capabilities.iter().any(|cap| {
39-
matches!(
40-
cap,
41-
bitfun_core::service::config::types::ModelCapability::ImageUnderstanding
42-
)
43-
})
44-
})
45-
.map(|m| m.id.as_str());
46-
47-
match vision_model {
48-
Some(model_id) => model_id,
49-
None => {
50-
error!("No image understanding model found");
51-
return Err(
52-
"Image understanding model not configured and no compatible model found.\n\n\
53-
Please add a model that supports image understanding\
54-
in [Settings → AI Model Config], enable 'image_understanding' capability, \
55-
and assign it in [Settings → Super Agent]."
56-
.to_string(),
57-
);
58-
}
59-
}
60-
} else {
61-
&image_model_id
62-
};
63-
64-
let image_model = ai_config
65-
.models
66-
.iter()
67-
.find(|m| &m.id == image_model_id)
68-
.ok_or_else(|| {
69-
error!(
70-
"Model not found: model_id={}, available_models={:?}",
71-
image_model_id,
72-
ai_config.models.iter().map(|m| &m.id).collect::<Vec<_>>()
73-
);
74-
format!("Model not found: {}", image_model_id)
75-
})?
76-
.clone();
27+
let image_model = resolve_vision_model_from_ai_config(&ai_config).map_err(|e| {
28+
error!(
29+
"Image understanding model resolution failed: available_models={:?}, error={}",
30+
ai_config.models.iter().map(|m| &m.id).collect::<Vec<_>>(),
31+
e
32+
);
33+
format!(
34+
"Image understanding model is not configured.\n\n\
35+
Please select a model for [Settings → Default Model Config → Image Understanding Model].\n\n\
36+
Details: {}",
37+
e
38+
)
39+
})?;
7740

7841
let workspace_path = state.workspace_path.read().await.clone();
7942

8043
let ai_client = state
8144
.ai_client_factory
82-
.get_client_by_id(image_model_id)
45+
.get_client_by_id(&image_model.id)
8346
.await
8447
.map_err(|e| format!("Failed to create AI client: {}", e))?;
8548

0 commit comments

Comments
 (0)