Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,18 @@
import io.agentscope.core.agui.model.AguiFunctionCall;
import io.agentscope.core.agui.model.AguiMessage;
import io.agentscope.core.agui.model.AguiToolCall;
import io.agentscope.core.message.AudioBlock;
import io.agentscope.core.message.Base64Source;
import io.agentscope.core.message.ContentBlock;
import io.agentscope.core.message.ImageBlock;
import io.agentscope.core.message.Msg;
import io.agentscope.core.message.MsgRole;
import io.agentscope.core.message.Source;
import io.agentscope.core.message.TextBlock;
import io.agentscope.core.message.ToolResultBlock;
import io.agentscope.core.message.ToolUseBlock;
import io.agentscope.core.message.URLSource;
import io.agentscope.core.message.VideoBlock;
import io.agentscope.core.util.JsonException;
import io.agentscope.core.util.JsonUtils;
import java.util.ArrayList;
Expand All @@ -37,6 +43,17 @@
*
* <p>This class handles the bidirectional conversion between the AG-UI protocol's
* message format and AgentScope's internal message format.
*
* <p>Supports multimodal input per AG-UI protocol:
* <ul>
* <li>{@code text} → {@link TextBlock}</li>
* <li>{@code image} → {@link ImageBlock}</li>
* <li>{@code video} → {@link VideoBlock}</li>
* <li>{@code audio} → {@link AudioBlock}</li>
* <li>{@code document} → {@link TextBlock} (with description)</li>
* </ul>
*
* <p>See https://docs.ag-ui.com/concepts/messages.md for AG-UI InputContent spec.
*/
public class AguiMessageConverter {
/**
Expand All @@ -54,8 +71,20 @@ public Msg toMsg(AguiMessage aguiMessage) {
MsgRole role = convertRole(aguiMessage.getRole());
List<ContentBlock> blocks = new ArrayList<>();

// Add text content if present
if (aguiMessage.getContent() != null && !aguiMessage.getContent().isEmpty()) {
// Handle multimodal content (InputContent array per AG-UI protocol)
if (aguiMessage.isMultimodalContent()) {
List<Map<String, Object>> parts = aguiMessage.getMultimodalContent();
if (parts != null) {
for (Map<String, Object> part : parts) {
ContentBlock block = convertInputContent(part);
if (block != null) {
blocks.add(block);
}
}
}
}
// Handle simple text content (backward compatible)
else if (aguiMessage.getContent() != null && !aguiMessage.getContent().isEmpty()) {
if (aguiMessage.isToolMessage() && aguiMessage.getToolCallId() != null) {
// For tool messages, wrap content in ToolResultBlock
blocks.add(
Expand All @@ -78,6 +107,91 @@ public Msg toMsg(AguiMessage aguiMessage) {
return Msg.builder().id(aguiMessage.getId()).role(role).content(blocks).build();
}

/**
* Convert a single AG-UI InputContent part to an AgentScope ContentBlock.
*
* @param part The InputContent map from AG-UI protocol
* @return The converted ContentBlock, or null if type is unrecognized
*/
@SuppressWarnings("unchecked")
private ContentBlock convertInputContent(Map<String, Object> part) {
String type = (String) part.get("type");
if (type == null) {
return null;
}

switch (type) {
case "text":
String text = (String) part.get("text");
return text != null ? TextBlock.builder().text(text).build() : null;

case "image":
Source source = extractSource(part);
return source != null ? ImageBlock.builder().source(source).build() : null;

case "video":
Source videoSource = extractSource(part);
return videoSource != null
? VideoBlock.builder().source(videoSource).build()
: null;

case "audio":
Source audioSource = extractSource(part);
return audioSource != null
? AudioBlock.builder().source(audioSource).build()
: null;

case "document":
// Convert document to TextBlock with description
Source docSource = extractSource(part);
if (docSource != null) {
String docDesc = "[Document: " + extractMimeType(part) + "]";
return TextBlock.builder().text(docDesc).build();
}
return null;

default:
return null;
}
}

/**
* Extract Source from an InputContent part.
* Supports both 'url' and 'data' (base64) source types.
*/
@SuppressWarnings("unchecked")
private Source extractSource(Map<String, Object> part) {
Map<String, Object> sourceMap = (Map<String, Object>) part.get("source");
if (sourceMap == null) {
return null;
}

String sourceType = (String) sourceMap.get("type");
if ("url".equals(sourceType)) {
String url = (String) sourceMap.get("value");
return url != null ? new URLSource(url) : null;
} else if ("data".equals(sourceType)) {
String data = (String) sourceMap.get("value");
String mimeType = (String) sourceMap.get("mimeType");
if (data != null && mimeType != null) {
return new Base64Source(data, mimeType);
}
}
return null;
}

/**
* Extract mimeType from an InputContent part (for document type).
*/
private String extractMimeType(Map<String, Object> part) {
@SuppressWarnings("unchecked")
Map<String, Object> sourceMap = (Map<String, Object>) part.get("source");
if (sourceMap != null) {
return (String) sourceMap.get("mimeType");
}
return null;
}

/**
* Convert an AgentScope message to an AG-UI message.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;

/**
Expand All @@ -34,12 +35,24 @@
* <li>system - System instructions</li>
* <li>tool - Tool execution results</li>
* </ul>
*
* <p>Content can be a simple string or a multimodal array of
* {@code InputContent} objects (per AG-UI protocol).
* See https://docs.ag-ui.com/concepts/messages.md for details.
*
* <p>InputContent array element structure:
* <pre>{@code
* { "type": "text", "text": "Hello" }
* { "type": "image", "source": { "type": "url", "value": "https://...", "mimeType": "image/png" } }
* { "type": "video", "source": { "type": "url", "value": "https://...", "mimeType": "video/mp4" } }
* { "type": "audio", "source": { "type": "url", "value": "https://...", "mimeType": "audio/wav" } }
* }</pre>
*/
public class AguiMessage {

private final String id;
private final String role;
private final String content;
private final Object content; // String or List<map<string,object>> for multimodal
private final List<AguiToolCall> toolCalls;
private final String toolCallId;

Expand All @@ -48,15 +61,16 @@ public class AguiMessage {
*
* @param id The unique message ID
* @param role The message role (user, assistant, system, tool)
* @param content The message content
* @param content The message content - may be a String or a List of InputContent objects
* (multimodal input per AG-UI protocol)
* @param toolCalls Tool calls for assistant messages (optional)
* @param toolCallId Tool call ID for tool messages (optional)
*/
@JsonCreator
public AguiMessage(
@JsonProperty("id") String id,
@JsonProperty("role") String role,
@JsonProperty("content") String content,
@JsonProperty("content") Object content,
@JsonProperty("toolCalls") List<AguiToolCall> toolCalls,
@JsonProperty("toolCallId") String toolCallId) {
this.id = Objects.requireNonNull(id, "id cannot be null");
Expand Down Expand Up @@ -135,12 +149,44 @@ public String getRole() {
/**
* Get the message content.
*
* @return The content, may be null
* @return The content as a String if it is a simple text message, or null if
* the content is multimodal (InputContent array). Use {@link #getContentObject()}
* for full multimodal support.
*/
public String getContent() {
return content instanceof String ? (String) content : null;
}

/**
* Get the raw content object.
*
* @return The content as an Object - either a String for simple text messages
* or a List of InputContent maps for multimodal messages.
*/
public Object getContentObject() {
return content;
}

/**
* Check if this message contains multimodal content (InputContent array).
*
* @return true if content is a List (multimodal), false if it's a String or null
*/
public boolean isMultimodalContent() {
return content instanceof List;
}

/**
* Get the multimodal content as a list of InputContent objects.
* Each item is a Map with keys: type, text/source/etc.
*
* @return The content as a List if it is multimodal, or null if it's a simple String
*/
@SuppressWarnings("unchecked")
public List<Map<String, Object>> getMultimodalContent() {
return content instanceof List ? (List<Map<String, Object>>) content : null;
}

/**
* Get the tool calls (for assistant messages).
*
Expand Down
Loading
Loading