Skip to content

Commit f604655

Browse files
authored
feat: Set limit for advanced image processing images (#978)
1 parent 671da33 commit f604655

File tree

9 files changed

+117
-8
lines changed

9 files changed

+117
-8
lines changed

.vscode/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,5 @@
2121
"python.testing.cwd": "${workspaceFolder}/code",
2222
"python.testing.unittestEnabled": false,
2323
"python.testing.pytestEnabled": true,
24+
"pylint.cwd": "${workspaceFolder}/code",
2425
}

code/backend/batch/utilities/helpers/env_helper.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def __load_config(self, **kwargs) -> None:
4848
"AZURE_SEARCH_INDEX_IS_PRECHUNKED", ""
4949
)
5050
self.AZURE_SEARCH_FILTER = os.getenv("AZURE_SEARCH_FILTER", "")
51-
self.AZURE_SEARCH_TOP_K = int(os.getenv("AZURE_SEARCH_TOP_K", "5"))
51+
self.AZURE_SEARCH_TOP_K = self.get_env_var_int("AZURE_SEARCH_TOP_K", 5)
5252
self.AZURE_SEARCH_ENABLE_IN_DOMAIN = (
5353
os.getenv("AZURE_SEARCH_ENABLE_IN_DOMAIN", "true").lower() == "true"
5454
)
@@ -114,6 +114,9 @@ def __load_config(self, **kwargs) -> None:
114114
self.USE_ADVANCED_IMAGE_PROCESSING = self.get_env_var_bool(
115115
"USE_ADVANCED_IMAGE_PROCESSING", "False"
116116
)
117+
self.ADVANCED_IMAGE_PROCESSING_MAX_IMAGES = self.get_env_var_int(
118+
"ADVANCED_IMAGE_PROCESSING_MAX_IMAGES", 1
119+
)
117120
self.AZURE_COMPUTER_VISION_ENDPOINT = os.getenv(
118121
"AZURE_COMPUTER_VISION_ENDPOINT"
119122
)
@@ -244,7 +247,10 @@ def get_env_var_bool(self, var_name: str, default: str = "True") -> bool:
244247
def get_env_var_array(self, var_name: str, default: str = ""):
245248
return os.getenv(var_name, default).split(",")
246249

247-
def get_env_var_float(self, var_name: str, default: int):
250+
def get_env_var_int(self, var_name: str, default: int):
251+
return int(os.getenv(var_name, default))
252+
253+
def get_env_var_float(self, var_name: str, default: float):
248254
return float(os.getenv(var_name, default))
249255

250256
def is_auth_type_keys(self):

code/backend/batch/utilities/tools/question_answer_tool.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def create_image_url_list(self, source_documents):
186186
doc.source.replace("_SAS_TOKEN_PLACEHOLDER_", container_sas)
187187
for doc in source_documents
188188
if doc.title is not None and doc.title.split(".")[-1] in image_types
189-
]
189+
][: self.env_helper.ADVANCED_IMAGE_PROCESSING_MAX_IMAGES]
190190

191191
return image_urls
192192

code/tests/functional/app_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ class AppConfig:
7474
"AZURE_SPEECH_RECOGNIZER_LANGUAGES": "en-US,es-ES",
7575
"TIKTOKEN_CACHE_DIR": f"{os.path.dirname(os.path.realpath(__file__))}/resources",
7676
"USE_ADVANCED_IMAGE_PROCESSING": "False",
77+
"ADVANCED_IMAGE_PROCESSING_MAX_IMAGES": "1",
7778
"USE_KEY_VAULT": "False",
7879
# These values are set directly within EnvHelper, adding them here ensures
7980
# that they are removed from the environment when remove_from_environment() runs

code/tests/utilities/tools/test_question_answer_tool.py

Lines changed: 73 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def env_helper_mock():
4242
env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION = False
4343
env_helper.USE_ADVANCED_IMAGE_PROCESSING = False
4444
env_helper.AZURE_OPENAI_VISION_MODEL = "mock vision model"
45+
env_helper.ADVANCED_IMAGE_PROCESSING_MAX_IMAGES = 1
4546

4647
yield env_helper
4748

@@ -83,7 +84,7 @@ def search_handler_mock():
8384

8485

8586
@pytest.fixture(autouse=True)
86-
def source_documents_mock():
87+
def get_source_documents_mock():
8788
with patch(
8889
"backend.batch.utilities.tools.question_answer_tool.Search.get_source_documents"
8990
) as mock:
@@ -106,11 +107,11 @@ def source_documents_mock():
106107
),
107108
]
108109
mock.return_value = documents
109-
yield documents
110+
yield mock
110111

111112

112113
def test_answer_question_returns_source_documents(
113-
source_documents_mock: list[SourceDocument],
114+
get_source_documents_mock: MagicMock,
114115
):
115116
# given
116117
tool = QuestionAnswerTool()
@@ -121,7 +122,7 @@ def test_answer_question_returns_source_documents(
121122
# then
122123
assert len(answer.source_documents) == 2
123124
assert isinstance(answer.source_documents[0], SourceDocument)
124-
assert answer.source_documents == source_documents_mock
125+
assert answer.source_documents == get_source_documents_mock.return_value
125126

126127

127128
def test_answer_question_returns_answer():
@@ -350,3 +351,71 @@ def test_use_advanced_vision_processing(env_helper_mock, llm_helper_mock):
350351
assert isinstance(answer, Answer)
351352
assert answer.question == "mock question"
352353
assert answer.answer == "mock content"
354+
355+
356+
def test_limit_number_of_images_passed_to_llm(
357+
get_source_documents_mock: MagicMock,
358+
env_helper_mock: MagicMock,
359+
llm_helper_mock: MagicMock,
360+
):
361+
# given
362+
get_source_documents_mock.return_value = [
363+
SourceDocument(
364+
id="mock id",
365+
content="mock content",
366+
title="mock title",
367+
source="mock source",
368+
chunk=123,
369+
offset=123,
370+
page_number=123,
371+
),
372+
SourceDocument(
373+
id="mock id 2",
374+
content="mock content 2",
375+
title="mock title 2.jpg",
376+
source="mock source 2_SAS_TOKEN_PLACEHOLDER_",
377+
chunk_id="mock chunk id 2",
378+
),
379+
SourceDocument(
380+
id="mock id 3",
381+
content="mock content 3",
382+
title="mock title 3.jpg",
383+
source="mock source 3_SAS_TOKEN_PLACEHOLDER_",
384+
chunk_id="mock chunk id 3",
385+
),
386+
]
387+
env_helper_mock.USE_ADVANCED_IMAGE_PROCESSING = True
388+
tool = QuestionAnswerTool()
389+
390+
# when
391+
tool.answer_question("mock question", [])
392+
393+
# then
394+
llm_helper_mock.get_chat_completion.assert_called_once_with(
395+
[
396+
{"content": "mock answering system prompt", "role": "system"},
397+
{
398+
"content": 'Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock example content"}}]}, Question: mock example user question',
399+
"name": "example_user",
400+
"role": "system",
401+
},
402+
{
403+
"content": "mock example answer",
404+
"name": "example_assistant",
405+
"role": "system",
406+
},
407+
{"content": "mock azure openai system message", "role": "system"},
408+
{
409+
"content": [
410+
{
411+
"type": "text",
412+
"text": 'Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock content"}},{"[doc2]":{"content":"mock content 2"}},{"[doc3]":{"content":"mock content 3"}}]}, Question: mock question',
413+
},
414+
{"type": "image_url", "image_url": "mock source 2mock sas"},
415+
],
416+
"role": "user",
417+
},
418+
],
419+
model="mock vision model",
420+
temperature=0,
421+
)

docs/advanced_image_processing.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,11 @@ Once enabled, advanced image processing will be enabled for all supported image
3838

3939
![image](./images/enable_advanced_image_processing.png)
4040

41+
The `ADVANCED_IMAGE_PROCESSING_MAX_IMAGES` environment variable can be used to control the maximum number of images passed to GPT-4 vision in a single request (default is `1`).
42+
Increasing the number of images consumes more tokens and may result in throttled requests.
43+
44+
```bash
45+
azd env set ADVANCED_IMAGE_PROCESSING_MAX_IMAGES 2
46+
```
47+
4148
Advanced image processing is only used in the `custom` conversation flow and not the `byod` flow, as Azure OpenAI On Your Data only supports Ada embeddings. It is currently not possible to use advanced image processing when integrated vectorization is enabled.

infra/main.bicep

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ param azureOpenAIModelCapacity int = 30
110110
@description('Enables the use of a vision LLM and Computer Vision for embedding images')
111111
param useAdvancedImageProcessing bool = false
112112

113+
@description('The maximum number of images to pass to the vision model in a single request')
114+
param advancedImageProcessingMaxImages int = 1
115+
113116
@description('Azure OpenAI Vision Model Deployment Name')
114117
param azureOpenAIVisionModel string = 'gpt-4'
115118

@@ -554,6 +557,7 @@ module web './app/web.bicep' = if (hostingModel == 'code') {
554557
AZURE_SPEECH_SERVICE_REGION: location
555558
AZURE_SPEECH_RECOGNIZER_LANGUAGES: recognizedLanguages
556559
USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing
560+
ADVANCED_IMAGE_PROCESSING_MAX_IMAGES: advancedImageProcessingMaxImages
557561
ORCHESTRATION_STRATEGY: orchestrationStrategy
558562
CONVERSATION_FLOW: conversationFlow
559563
LOGLEVEL: logLevel
@@ -627,6 +631,7 @@ module web_docker './app/web.bicep' = if (hostingModel == 'container') {
627631
AZURE_SPEECH_SERVICE_REGION: location
628632
AZURE_SPEECH_RECOGNIZER_LANGUAGES: recognizedLanguages
629633
USE_ADVANCED_IMAGE_PROCESSING: useAdvancedImageProcessing
634+
ADVANCED_IMAGE_PROCESSING_MAX_IMAGES: advancedImageProcessingMaxImages
630635
ORCHESTRATION_STRATEGY: orchestrationStrategy
631636
CONVERSATION_FLOW: conversationFlow
632637
LOGLEVEL: logLevel
@@ -1097,3 +1102,5 @@ output ADMIN_WEBSITE_NAME string = hostingModel == 'code'
10971102
: adminweb_docker.outputs.WEBSITE_ADMIN_URI
10981103
output LOGLEVEL string = logLevel
10991104
output CONVERSATION_FLOW string = conversationFlow
1105+
output USE_ADVANCED_IMAGE_PROCESSING bool = useAdvancedImageProcessing
1106+
output ADVANCED_IMAGE_PROCESSING_MAX_IMAGES int = advancedImageProcessingMaxImages

infra/main.bicepparam

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ param azureOpenAIModelName = readEnvironmentVariable('AZURE_OPENAI_MODEL_NAME',
2626
param azureOpenAIModelVersion = readEnvironmentVariable('AZURE_OPENAI_MODEL_VERSION', '0613')
2727
param azureOpenAIModelCapacity = int(readEnvironmentVariable('AZURE_OPENAI_MODEL_CAPACITY', '30'))
2828
param useAdvancedImageProcessing = bool(readEnvironmentVariable('USE_ADVANCED_IMAGE_PROCESSING', 'false'))
29+
param advancedImageProcessingMaxImages = int(readEnvironmentVariable('ADVANCED_IMAGE_PROCESSING_MAX_IMAGES', '1'))
2930
param azureOpenAIVisionModel = readEnvironmentVariable('AZURE_OPENAI_VISION_MODEL', 'gpt-4')
3031
param azureOpenAIVisionModelName = readEnvironmentVariable('AZURE_OPENAI_VISION_MODEL_NAME', 'gpt-4')
3132
param azureOpenAIVisionModelVersion = readEnvironmentVariable('AZURE_OPENAI_VISION_MODEL_VERSION', 'vision-preview')

infra/main.json

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"_generator": {
66
"name": "bicep",
77
"version": "0.27.1.19265",
8-
"templateHash": "6027201902589320671"
8+
"templateHash": "10484197901623589764"
99
}
1010
},
1111
"parameters": {
@@ -229,6 +229,13 @@
229229
"description": "Enables the use of a vision LLM and Computer Vision for embedding images"
230230
}
231231
},
232+
"advancedImageProcessingMaxImages": {
233+
"type": "int",
234+
"defaultValue": 1,
235+
"metadata": {
236+
"description": "The maximum number of images to pass to the vision model in a single request"
237+
}
238+
},
232239
"azureOpenAIVisionModel": {
233240
"type": "string",
234241
"defaultValue": "gpt-4",
@@ -2031,6 +2038,7 @@
20312038
"AZURE_SPEECH_SERVICE_REGION": "[parameters('location')]",
20322039
"AZURE_SPEECH_RECOGNIZER_LANGUAGES": "[parameters('recognizedLanguages')]",
20332040
"USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]",
2041+
"ADVANCED_IMAGE_PROCESSING_MAX_IMAGES": "[parameters('advancedImageProcessingMaxImages')]",
20342042
"ORCHESTRATION_STRATEGY": "[parameters('orchestrationStrategy')]",
20352043
"CONVERSATION_FLOW": "[parameters('conversationFlow')]",
20362044
"LOGLEVEL": "[parameters('logLevel')]"
@@ -2984,6 +2992,7 @@
29842992
"AZURE_SPEECH_SERVICE_REGION": "[parameters('location')]",
29852993
"AZURE_SPEECH_RECOGNIZER_LANGUAGES": "[parameters('recognizedLanguages')]",
29862994
"USE_ADVANCED_IMAGE_PROCESSING": "[parameters('useAdvancedImageProcessing')]",
2995+
"ADVANCED_IMAGE_PROCESSING_MAX_IMAGES": "[parameters('advancedImageProcessingMaxImages')]",
29872996
"ORCHESTRATION_STRATEGY": "[parameters('orchestrationStrategy')]",
29882997
"CONVERSATION_FLOW": "[parameters('conversationFlow')]",
29892998
"LOGLEVEL": "[parameters('logLevel')]"
@@ -11102,6 +11111,14 @@
1110211111
"CONVERSATION_FLOW": {
1110311112
"type": "string",
1110411113
"value": "[parameters('conversationFlow')]"
11114+
},
11115+
"USE_ADVANCED_IMAGE_PROCESSING": {
11116+
"type": "bool",
11117+
"value": "[parameters('useAdvancedImageProcessing')]"
11118+
},
11119+
"ADVANCED_IMAGE_PROCESSING_MAX_IMAGES": {
11120+
"type": "int",
11121+
"value": "[parameters('advancedImageProcessingMaxImages')]"
1110511122
}
1110611123
}
1110711124
}

0 commit comments

Comments
 (0)