Skip to content

Commit c7ab2e2

Browse files
author
Evan Lin
committed
Revert "fix(image): handle business card JSON response and parsing errors"
This reverts commit 81c1f81.
1 parent 81c1f81 commit c7ab2e2

1 file changed

Lines changed: 8 additions & 52 deletions

File tree

main.py

Lines changed: 8 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import aiohttp
1616
import uuid
1717
from google.cloud import storage
18-
import json
1918

2019

2120
# Import LangChain components with Vertex AI
@@ -27,11 +26,8 @@
2726
# get channel_secret and channel_access_token from your environment variable
2827
channel_secret = os.getenv('ChannelSecret', None)
2928
channel_access_token = os.getenv('ChannelAccessToken', None)
30-
image_prompt = '''
31-
Analyze the image and reply in zh-TW.
32-
If the image is a business card, extract the information and format it as a JSON object with keys: "name", "company", "title", "phone", "email", and "address". If a field is not available, use "N/A".
33-
If it\'s not a business card, describe the image with scientific detail.
34-
Do not include any explanatory text in your response if it is a business card, only the JSON object.
29+
imgage_prompt = '''
30+
Describe this image with scientific detail, reply in zh-TW:
3531
'''
3632

3733
# Vertex AI needs a project ID and possibly authentication
@@ -66,7 +62,7 @@
6662
# Using a single, powerful multimodal model for both text and images.
6763
# gemini-2.0-flash is a powerful, cost-effective model for multimodal tasks.
6864
model = ChatVertexAI(
69-
model_name="gemini-1.5-flash-001",
65+
model_name="gemini-2.0-flash",
7066
project=google_project_id,
7167
location=google_location,
7268
# Increased token limit for detailed image descriptions
@@ -147,7 +143,7 @@ async def handle_callback(request: Request):
147143

148144
# Create an in-memory binary stream from the bytes
149145
image_stream = BytesIO(image_bytes)
150-
# Reset the stream\'s pointer to the beginning for the upload
146+
# Reset the stream's pointer to the beginning for the upload
151147
image_stream.seek(0)
152148

153149
file_name = f"{uuid.uuid4()}.jpg"
@@ -160,47 +156,7 @@ async def handle_callback(request: Request):
160156
image_stream, file_name, google_storage_bucket)
161157
if gcs_uri:
162158
print(f"Image uploaded to {gcs_uri}")
163-
response_text = generate_image_description(gcs_uri)
164-
try:
165-
# The model might wrap the JSON in markdown ```json ... ```
166-
if '```json' in response_text:
167-
response_text = response_text.split('```json')[1].split('```')[0].strip()
168-
169-
card_data = json.loads(response_text)
170-
171-
# FIX: Handle case where JSON is a list containing one object
172-
if isinstance(card_data, list) and card_data:
173-
card_data = card_data[0]
174-
175-
if isinstance(card_data, dict):
176-
# Lowercase keys for consistency
177-
card_data = {k.lower(): v for k, v in card_data.items()}
178-
179-
# Format the business card data for display
180-
name = card_data.get('name', 'N/A')
181-
company = card_data.get('company', 'N/A')
182-
title = card_data.get('title', 'N/A')
183-
phone = card_data.get('phone', 'N/A')
184-
email = card_data.get('email', 'N/A')
185-
address = card_data.get('address', 'N/A')
186-
187-
if isinstance(phone, list):
188-
phone = ', '.join(phone)
189-
190-
response = (
191-
f"名片資訊:\n"
192-
f"姓名:{name}\n"
193-
f"公司:{company}\n"
194-
f"職稱:{title}\n"
195-
f"電話:{phone}\n"
196-
f"Email:{email}\n"
197-
f"地址:{address}"
198-
)
199-
else:
200-
response = response_text
201-
except (json.JSONDecodeError, AttributeError):
202-
# Not a business card JSON, so it\'s a description
203-
response = response_text
159+
response = generate_image_description(gcs_uri)
204160
finally:
205161
# Clean up the GCS file if it was uploaded
206162
if gcs_uri:
@@ -242,12 +198,12 @@ def generate_image_description(image_uri):
242198
243199
Generate a description for an image using LangChain with Vertex AI.
244200
"""
245-
# The prompt is already defined globally as image_prompt
201+
# The prompt is already defined globally as imgage_prompt
246202
message = HumanMessage(
247203
content=[
248204
{
249205
"type": "text",
250-
"text": image_prompt
206+
"text": imgage_prompt
251207
},
252208
{
253209
"type": "image_url",
@@ -257,4 +213,4 @@ def generate_image_description(image_uri):
257213
)
258214

259215
response = model.invoke([message])
260-
return response.content
216+
return response.content

0 commit comments

Comments
 (0)