Skip to content

Commit a40e461

Browse files
fix: convert storage image source url to base64 (#1989)
1 parent c9fd702 commit a40e461

File tree

3 files changed

+66
-30
lines changed

3 files changed

+66
-30
lines changed

code/backend/batch/utilities/helpers/embedders/push_embedder.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import logging
44
from typing import List
55
from urllib.parse import urlparse
6-
6+
import urllib.request
77
from ...helpers.llm_helper import LLMHelper
88
from ...helpers.env_helper import EnvHelper
99
from ..azure_computer_vision_client import AzureComputerVisionClient
@@ -18,6 +18,8 @@
1818
from ..document_loading_helper import DocumentLoading
1919
from ..document_chunking_helper import DocumentChunking
2020
from ...common.source_document import SourceDocument
21+
import base64
22+
from mimetypes import guess_type
2123

2224
logger = logging.getLogger(__name__)
2325

@@ -101,6 +103,27 @@ def __embed(
101103
else:
102104
logger.warning("No documents to upload.")
103105

106+
def __local_image_to_data_url(self, image_path):
107+
"""Convert a local image file or URL to a data URL."""
108+
mime_type, _ = guess_type(image_path)
109+
if mime_type is None:
110+
mime_type = 'application/octet-stream'
111+
112+
# Check if the image_path is a URL or a local file path
113+
parsed_url = urlparse(image_path)
114+
if parsed_url.scheme in ('http', 'https'):
115+
# Download the image from the URL
116+
logger.info(f"Downloading image from URL: {image_path}")
117+
with urllib.request.urlopen(image_path) as response:
118+
image_data = response.read()
119+
base64_encoded_data = base64.b64encode(image_data).decode('utf-8')
120+
else:
121+
# Read from local file
122+
with open(image_path, "rb") as image_file:
123+
base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')
124+
125+
return f"data:{mime_type};base64,{base64_encoded_data}"
126+
104127
def __generate_image_caption(self, source_url):
105128
logger.info(f"Generating image caption for URL: {source_url}")
106129
model = self.env_helper.AZURE_OPENAI_VISION_MODEL
@@ -119,7 +142,7 @@ def __generate_image_caption(self, source_url):
119142
"text": "Describe this image in detail. Limit the response to 500 words.",
120143
"type": "text",
121144
},
122-
{"image_url": {"url": source_url}, "type": "image_url"},
145+
{"image_url": {"url": self.__local_image_to_data_url(source_url)}, "type": "image_url"},
123146
],
124147
},
125148
]

code/tests/functional/tests/functions/advanced_image_processing/test_advanced_image_processing.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,12 @@ def setup_blob_metadata_mocking(httpserver: HTTPServer, app_config: AppConfig):
6363
method="PUT",
6464
).respond_with_data()
6565

66+
# Mock GET request for image download (base64 conversion)
67+
httpserver.expect_request(
68+
f"/{app_config.get_from_json('AZURE_BLOB_STORAGE_INFO','containerName')}/{FILE_NAME}",
69+
method="GET",
70+
).respond_with_data(b"fake_image_data", content_type="image/jpeg")
71+
6672

6773
@pytest.fixture(autouse=True)
6874
def setup_caption_response(httpserver: HTTPServer, app_config: AppConfig):
@@ -192,11 +198,9 @@ def test_image_passed_to_llm_to_generate_caption(
192198
),
193199
)[0]
194200

195-
assert request.get_json()["messages"][1]["content"][1]["image_url"][
196-
"url"
197-
].startswith(
198-
f"{app_config.get('AZURE_STORAGE_ACCOUNT_ENDPOINT')}{app_config.get_from_json('AZURE_BLOB_STORAGE_INFO','containerName')}/{FILE_NAME}"
199-
)
201+
# The URL should be converted to base64 data URL
202+
image_url = request.get_json()["messages"][1]["content"][1]["image_url"]["url"]
203+
assert image_url.startswith("data:image/"), f"Expected base64 data URL, got {image_url[:100]}"
200204

201205

202206
def test_embeddings_generated_for_caption(

code/tests/utilities/helpers/test_push_embedder.py

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,20 @@ def azure_computer_vision_mock():
171171
yield mock
172172

173173

174+
@pytest.fixture(autouse=True)
175+
def urllib_request_mock():
176+
with patch(
177+
"backend.batch.utilities.helpers.embedders.push_embedder.urllib.request.urlopen"
178+
) as mock:
179+
# Create a mock response object
180+
mock_response = MagicMock()
181+
mock_response.read.return_value = b"fake_image_data"
182+
mock_response.__enter__.return_value = mock_response
183+
mock_response.__exit__.return_value = None
184+
mock.return_value = mock_response
185+
yield mock
186+
187+
174188
def test_embed_file_advanced_image_processing_vectorizes_image(
175189
azure_computer_vision_mock,
176190
):
@@ -200,29 +214,24 @@ def test_embed_file_advanced_image_processing_uses_vision_model_for_captioning(
200214
push_embedder.embed_file(source_url, "some-file-name.jpg")
201215

202216
# then
203-
llm_helper_mock.get_chat_completion.assert_called_once_with(
204-
[
205-
{
206-
"role": "system",
207-
"content": """You are an assistant that generates rich descriptions of images.
208-
You need to be accurate in the information you extract and detailed in the descriptons you generate.
209-
Do not abbreviate anything and do not shorten sentances. Explain the image completely.
210-
If you are provided with an image of a flow chart, describe the flow chart in detail.
211-
If the image is mostly text, use OCR to extract the text as it is displayed in the image.""",
212-
},
213-
{
214-
"role": "user",
215-
"content": [
216-
{
217-
"text": "Describe this image in detail. Limit the response to 500 words.",
218-
"type": "text",
219-
},
220-
{"image_url": {"url": source_url}, "type": "image_url"},
221-
],
222-
},
223-
],
224-
env_helper_mock.AZURE_OPENAI_VISION_MODEL,
225-
)
217+
# Verify the vision model is called with direct URL (not base64) for token efficiency
218+
llm_helper_mock.get_chat_completion.assert_called_once()
219+
call_args = llm_helper_mock.get_chat_completion.call_args
220+
messages = call_args[0][0]
221+
model = call_args[0][1]
222+
223+
assert model == env_helper_mock.AZURE_OPENAI_VISION_MODEL
224+
assert len(messages) == 2
225+
assert messages[0]["role"] == "system"
226+
assert "You are an assistant that generates rich descriptions of images" in messages[0]["content"]
227+
assert messages[1]["role"] == "user"
228+
assert len(messages[1]["content"]) == 2
229+
assert messages[1]["content"][0]["type"] == "text"
230+
assert "Describe this image in detail" in messages[1]["content"][0]["text"]
231+
assert messages[1]["content"][1]["type"] == "image_url"
232+
# Image should be converted to base64 data URL
233+
image_url = messages[1]["content"][1]["image_url"]["url"]
234+
assert image_url.startswith("data:image/"), f"Expected base64 data URL, got {image_url[:100]}"
226235

227236

228237
def test_embed_file_advanced_image_processing_stores_embeddings_in_search_index(

0 commit comments

Comments
 (0)