Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,68 @@ messages = [
]
```

**Sending Images:**

For vision-capable agents, you can include images in your messages:

```ruby
# Read image from file
image_data = File.binread('path/to/image.png')

# Create a message with text and image
message = Ai.user_message_with_image(
"What objects are in this image?",
image_data,
"image/png"
)

messages = [message]
```

For multiple images in one message, use manual construction:

```ruby
image1 = File.binread('photo1.jpg')
image2 = File.binread('photo2.png')

message = Ai::Message.new(
role: Ai::MessageRole::User,
content: [
Ai::TextPart.new(text: "Compare these images:"),
Ai::ImagePart.new(image_data: image1, media_type: "image/jpeg"),
Ai::ImagePart.new(image_data: image2, media_type: "image/png")
]
)
```

**Using Image URLs:**

Instead of sending image data, you can send a URL for the agent to fetch the image:

```ruby
# Create a message with text and image URL
message = Ai.user_message_with_image_url(
"What objects are in this image?",
"https://example.com/photo.jpg",
"image/jpeg"
)

messages = [message]
```

For multiple image URLs or mixing URLs with text:

```ruby
message = Ai::Message.new(
role: Ai::MessageRole::User,
content: [
Ai::TextPart.new(text: "Compare these images from the web:"),
Ai::ImagePart.new(image_url: "https://example.com/image1.jpg", media_type: "image/jpeg"),
Ai::ImagePart.new(image_url: "https://example.com/image2.png", media_type: "image/png")
]
)
```

#### Step 3.3: Call the Agent

```ruby
Expand Down
18 changes: 18 additions & 0 deletions lib/ai.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class Error < StandardError
autoload :LanguageModelUsage, 'ai/types/language_model_usage'
autoload :MessageRole, 'ai/types/message_role'
autoload :Message, 'ai/types/message'
autoload :TextPart, 'ai/types/text_part'
autoload :ImagePart, 'ai/types/image_part'
autoload :ReasoningDetail, 'ai/types/reasoning_detail'
autoload :ResponseMessage, 'ai/types/response_message'
autoload :ResponseMetadata, 'ai/types/response_metadata'
Expand Down Expand Up @@ -57,6 +59,22 @@ def self.system_message(content)
Ai::Message.new(role: Ai::MessageRole::System, content: content)
end

sig { params(text: String, image_data: String, media_type: String).returns(Ai::Message) }
def self.user_message_with_image(text, image_data, media_type)
Ai::Message.new(
role: Ai::MessageRole::User,
content: [Ai::TextPart.new(text: text), Ai::ImagePart.new(image_data: image_data, media_type: media_type)]
)
end

sig { params(text: String, image_url: String, media_type: String).returns(Ai::Message) }
def self.user_message_with_image_url(text, image_url, media_type)
Ai::Message.new(
role: Ai::MessageRole::User,
content: [Ai::TextPart.new(text: text), Ai::ImagePart.new(image_url: image_url, media_type: media_type)]
)
end

sig { returns(Ai::Client) }
def self.client
@client ||=
Expand Down
3 changes: 2 additions & 1 deletion lib/ai/clients/mastra.rb
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ def response(url:, messages:, options:)

# convert to camelCase and unpacking for API compatibility
camelized_options = deep_camelize_keys(options)
request.body = { messages: messages, **camelized_options }.to_json
serialized_messages = messages.map(&:as_json)
request.body = { messages: serialized_messages, **camelized_options }.to_json

response = http.request(request)

Expand Down
53 changes: 53 additions & 0 deletions lib/ai/types/image_part.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# typed: strict

require 'base64'

module Ai
class ImagePart < T::Struct
extend T::Sig

const :type, String, default: 'image'
const :image_data, T.nilable(String), default: nil
const :image_url, T.nilable(String), default: nil
const :media_type, String

sig do
params(
media_type: String,
type: String,
image_data: T.nilable(String),
image_url: T.nilable(String)
).void
end
def initialize(media_type:, type: 'image', image_data: nil, image_url: nil)
super
validate!
end

sig { returns(T::Hash[Symbol, String]) }
def as_json
image_value =
if image_url
T.must(image_url)
else
encoded = Base64.strict_encode64(T.must(image_data).b)
"data:#{media_type};base64,#{encoded}"
end

{ type: type, image: image_value, mediaType: media_type }
end

private

sig { void }
def validate!
if image_data.nil? && image_url.nil?
raise ArgumentError, "Either image_data or image_url must be provided"
end

return unless !image_data.nil? && !image_url.nil?

raise ArgumentError, "Cannot provide both image_data and image_url"
end
end
end
25 changes: 24 additions & 1 deletion lib/ai/types/message.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,31 @@
# typed: strict

module Ai
MessageContent = T.type_alias { T.any(String, T::Array[T.any(Ai::TextPart, Ai::ImagePart)]) }

class Message < T::Struct
extend T::Sig

const :role, Ai::MessageRole
const :content, String
const :content, MessageContent

sig do
returns(
T::Hash[
Symbol,
T.any(String, T::Array[T::Hash[Symbol, String]])
]
)
end
def as_json
serialized_content =
if content.is_a?(String)
content
else
T.cast(content, T::Array[T.any(Ai::TextPart, Ai::ImagePart)]).map(&:as_json)
end

{ role: role.serialize, content: serialized_content }
end
end
end
15 changes: 15 additions & 0 deletions lib/ai/types/text_part.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# typed: strict

module Ai
class TextPart < T::Struct
extend T::Sig

const :type, String, default: 'text'
const :text, String

sig { returns(T::Hash[Symbol, String]) }
def as_json
{ type: type, text: text }
end
end
end
155 changes: 155 additions & 0 deletions spec/lib/ai/ai_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,159 @@
expect(message.role).to eq(Ai::MessageRole::System)
end
end

describe '.user_message_with_image' do
it 'creates a user message with text and image parts' do
text = 'What is in this image?'
image_data = 'binary image data'
media_type = 'image/png'

message = Ai.user_message_with_image(text, image_data, media_type)

expect(message).to be_a(Ai::Message)
expect(message.role).to eq(Ai::MessageRole::User)
expect(message.content).to be_an(Array)
expect(message.content.length).to eq(2)
end

it 'creates proper content parts' do
text = 'Describe this'
image_data = 'image bytes'
media_type = 'image/jpeg'

message = Ai.user_message_with_image(text, image_data, media_type)
content = message.content

expect(content[0]).to be_a(Ai::TextPart)
expect(content[0].text).to eq(text)

expect(content[1]).to be_a(Ai::ImagePart)
expect(content[1].image_data).to eq(image_data)
expect(content[1].media_type).to eq(media_type)
end

it 'serializes correctly for API calls' do
text = 'Analyze this'
image_data = 'test image'
media_type = 'image/png'

message = Ai.user_message_with_image(text, image_data, media_type)
json = message.as_json

expect(json[:role]).to eq('user')
expect(json[:content]).to be_an(Array)
expect(json[:content][0][:type]).to eq('text')
expect(json[:content][0][:text]).to eq(text)
expect(json[:content][1][:type]).to eq('image')
expect(json[:content][1][:image]).to start_with('data:image/png;base64,')
end

it 'handles different image formats' do
formats = ['image/png', 'image/jpeg', 'image/gif', 'image/webp']

formats.each do |format|
message = Ai.user_message_with_image('Test', 'data', format)
json = message.as_json

expect(json[:content][1][:mediaType]).to eq(format)
expect(json[:content][1][:image]).to start_with("data:#{format};base64,")
end
end

it 'properly encodes binary data to base64' do
# Simulate actual binary data (PNG header bytes)
binary_data = [137, 80, 78, 71, 13, 10, 26, 10].pack('C*')

message = Ai.user_message_with_image('What is this?', binary_data, 'image/png')
json = message.as_json

# Extract and verify the base64 portion
base64_data = json[:content][1][:image].gsub('data:image/png;base64,', '')
decoded = Base64.strict_decode64(base64_data)

expect(decoded).to eq(binary_data)
end
end

describe '.user_message_with_image_url' do
it 'creates a user message with text and image URL parts' do
text = 'What is in this image?'
image_url = 'https://example.com/photo.jpg'
media_type = 'image/jpeg'

message = Ai.user_message_with_image_url(text, image_url, media_type)

expect(message).to be_a(Ai::Message)
expect(message.role).to eq(Ai::MessageRole::User)
expect(message.content).to be_an(Array)
expect(message.content.length).to eq(2)
end

it 'creates proper content parts with URL' do
text = 'Describe this'
image_url = 'https://cdn.example.com/image.png'
media_type = 'image/png'

message = Ai.user_message_with_image_url(text, image_url, media_type)
content = message.content

expect(content[0]).to be_a(Ai::TextPart)
expect(content[0].text).to eq(text)

expect(content[1]).to be_a(Ai::ImagePart)
expect(content[1].image_url).to eq(image_url)
expect(content[1].image_data).to be_nil
expect(content[1].media_type).to eq(media_type)
end

it 'serializes correctly for API calls with URL' do
text = 'Analyze this'
image_url = 'https://example.com/test.jpg'
media_type = 'image/jpeg'

message = Ai.user_message_with_image_url(text, image_url, media_type)
json = message.as_json

expect(json[:role]).to eq('user')
expect(json[:content]).to be_an(Array)
expect(json[:content].length).to eq(2)

# Check text part
expect(json[:content][0][:type]).to eq('text')
expect(json[:content][0][:text]).to eq(text)

# Check image part - should be URL, not base64
expect(json[:content][1][:type]).to eq('image')
expect(json[:content][1][:image]).to eq(image_url)
expect(json[:content][1][:image]).not_to include('base64')
expect(json[:content][1][:mediaType]).to eq(media_type)
end

it 'handles different image URL formats' do
urls = [
['https://example.com/image.png', 'image/png'],
['http://test.org/photo.jpg', 'image/jpeg'],
['https://cdn.example.com/images/12345.webp', 'image/webp']
]

urls.each do |url, media_type|
message = Ai.user_message_with_image_url('Test', url, media_type)
json = message.as_json

expect(json[:content][1][:image]).to eq(url)
expect(json[:content][1][:mediaType]).to eq(media_type)
end
end

it 'works with HTTPS URLs' do
message = Ai.user_message_with_image_url(
'Analyze',
'https://secure.example.com/image.jpg',
'image/jpeg'
)
json = message.as_json

expect(json[:content][1][:image]).to start_with('https://')
end
end
end
Loading