@@ -171,6 +171,20 @@ def azure_computer_vision_mock():
171171 yield mock
172172
173173
174+ @pytest .fixture (autouse = True )
175+ def urllib_request_mock ():
176+ with patch (
177+ "backend.batch.utilities.helpers.embedders.push_embedder.urllib.request.urlopen"
178+ ) as mock :
179+ # Create a mock response object
180+ mock_response = MagicMock ()
181+ mock_response .read .return_value = b"fake_image_data"
182+ mock_response .__enter__ .return_value = mock_response
183+ mock_response .__exit__ .return_value = None
184+ mock .return_value = mock_response
185+ yield mock
186+
187+
174188def test_embed_file_advanced_image_processing_vectorizes_image (
175189 azure_computer_vision_mock ,
176190):
@@ -200,29 +214,24 @@ def test_embed_file_advanced_image_processing_uses_vision_model_for_captioning(
200214 push_embedder .embed_file (source_url , "some-file-name.jpg" )
201215
202216 # then
203- llm_helper_mock .get_chat_completion .assert_called_once_with (
204- [
205- {
206- "role" : "system" ,
207- "content" : """You are an assistant that generates rich descriptions of images.
208- You need to be accurate in the information you extract and detailed in the descriptons you generate.
209- Do not abbreviate anything and do not shorten sentances. Explain the image completely.
210- If you are provided with an image of a flow chart, describe the flow chart in detail.
211- If the image is mostly text, use OCR to extract the text as it is displayed in the image.""" ,
212- },
213- {
214- "role" : "user" ,
215- "content" : [
216- {
217- "text" : "Describe this image in detail. Limit the response to 500 words." ,
218- "type" : "text" ,
219- },
220- {"image_url" : {"url" : source_url }, "type" : "image_url" },
221- ],
222- },
223- ],
224- env_helper_mock .AZURE_OPENAI_VISION_MODEL ,
225- )
217+ # Verify the vision model is called with direct URL (not base64) for token efficiency
218+ llm_helper_mock .get_chat_completion .assert_called_once ()
219+ call_args = llm_helper_mock .get_chat_completion .call_args
220+ messages = call_args [0 ][0 ]
221+ model = call_args [0 ][1 ]
222+
223+ assert model == env_helper_mock .AZURE_OPENAI_VISION_MODEL
224+ assert len (messages ) == 2
225+ assert messages [0 ]["role" ] == "system"
226+ assert "You are an assistant that generates rich descriptions of images" in messages [0 ]["content" ]
227+ assert messages [1 ]["role" ] == "user"
228+ assert len (messages [1 ]["content" ]) == 2
229+ assert messages [1 ]["content" ][0 ]["type" ] == "text"
230+ assert "Describe this image in detail" in messages [1 ]["content" ][0 ]["text" ]
231+ assert messages [1 ]["content" ][1 ]["type" ] == "image_url"
232+ # Image should be converted to base64 data URL
233+ image_url = messages [1 ]["content" ][1 ]["image_url" ]["url" ]
234+ assert image_url .startswith ("data:image/" ), f"Expected base64 data URL, got { image_url [:100 ]} "
226235
227236
228237def test_embed_file_advanced_image_processing_stores_embeddings_in_search_index (
0 commit comments