1212from transformers import pipeline
1313from dotenv import load_dotenv
1414
15+
1516load_dotenv ()
1617hf_token = os .getenv ("HF_TOKEN" )
1718
1819summarizer = pipeline (
1920 "summarization" ,
2021 model = "facebook/bart-large-cnn" ,
21- device = - 1 # Forces CPU usage to avoid meta tensor error
22+ device = - 1
2223)
2324
2425st .set_page_config (page_title = "Modern Edge Agent" , layout = "centered" )
2526st .title ("Modern Edge Agent: Image Captioning & Generation" )
2627st .write ("Upload an image to get a caption, or enter a prompt to generate an image. Runs on edge devices!" )
2728
2829
29- # --- Image Captioning (BLIP) ---
3030@st .cache_resource
3131def load_blip ():
3232 processor = BlipProcessor .from_pretrained (
@@ -39,21 +39,18 @@ def load_blip():
3939 return processor , model
4040
4141
42- # --- Text-to-Image (Stable Diffusion) ---
4342def generate_image_via_api (prompt ):
44- api_url = "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev"
43+ api_url = ("https://api-inference.huggingface.co/models/"
44+ "black-forest-labs/FLUX.1-dev" )
4545 headers = {"Authorization" : f"Bearer { hf_token } " }
4646 payload = {"inputs" : prompt }
4747 response = requests .post (api_url , headers = headers , json = payload )
48-
48+
4949 if response .status_code == 200 :
50- # Try to handle both raw image and base64 JSON
5150 try :
52- # Try raw image
5351 img = Image .open (BytesIO (response .content ))
5452 return img
5553 except Exception :
56- # Try base64 JSON
5754 result = response .json ()
5855 if isinstance (result , dict ) and "image" in result :
5956 img_data = base64 .b64decode (result ["image" ])
@@ -74,40 +71,37 @@ def generate_image_via_api(prompt):
7471with tab1 :
7572 st .header ("Image Captioning" )
7673 uploaded_file = st .file_uploader (
77- "Upload an image" ,
74+ "Upload an image" ,
7875 type = ["jpg" , "jpeg" , "png" ]
7976 )
80-
77+
8178 if uploaded_file :
8279 image = Image .open (uploaded_file ).convert ("RGB" )
8380 st .image (image , caption = "Uploaded Image" , use_column_width = True )
8481 processor , model = load_blip ()
85-
86- # Short caption
82+
8783 inputs = processor (image , return_tensors = "pt" )
8884 with torch .no_grad ():
8985 out = model .generate (** inputs )
9086 caption = processor .decode (out [0 ], skip_special_tokens = True )
9187 st .success (f"**Caption:** { caption } " )
9288
93- # OCR for text extraction
9489 st .write ("Extracting text from image..." )
9590 reader = easyocr .Reader (['en' ], gpu = False )
9691 image_np = np .array (image )
9792 ocr_result = reader .readtext (image_np )
98-
99- # Only keep text with high confidence and filter out short/meaningless results
93+
10094 filtered_text = [
101- item [1 ] for item in ocr_result
102- if item [2 ] > 0.5 and len (item [1 ].strip ()) > 2 and item [1 ].isalpha ()
95+ item [1 ] for item in ocr_result
96+ if (item [2 ] > 0.5 and len (item [1 ].strip ()) > 2 and
97+ item [1 ].isalpha ())
10398 ]
10499 extracted_text = " " .join (filtered_text )
105-
100+
106101 if extracted_text :
107102 st .info (f"**Extracted Text:** { extracted_text } " )
108103
109- # Generate dynamic summary using Hugging Face Transformers (small LLM)
110- st .write ("Generating dynamic summary with open-source LLM (Transformers)..." )
104+ st .write ("Generating dynamic summary with open-source LLM..." )
111105 try :
112106 if extracted_text :
113107 summary_input = f"{ caption } . { extracted_text } "
@@ -117,24 +111,25 @@ def generate_image_via_api(prompt):
117111 st .info (f"**Summary:** { output } " )
118112 except Exception as e :
119113 st .warning (f"Summary generation failed: { e } " )
120- # Fallback to previous logic
121114 if extracted_text :
122- summary = f"This image contains: { caption } . The following text is present: { extracted_text } "
115+ summary = (f"This image contains: { caption } . "
116+ f"The following text is present: { extracted_text } " )
123117 else :
124- summary = f"This image contains: { caption } . No readable text was detected."
118+ summary = (f"This image contains: { caption } . "
119+ f"No readable text was detected." )
125120 st .info (f"**Summary:** { summary } " )
126121
127122with tab2 :
128123 st .header ("Text-to-Image Generation" )
129124 prompt = st .text_area ("Enter a prompt to generate an image:" )
130-
125+
131126 if st .button ("Generate Image" ) and prompt :
132127 with st .spinner ("Generating image via Hugging Face API..." ):
133128 try :
134129 image = generate_image_via_api (prompt )
135130 st .image (
136- image ,
137- caption = "Generated Image" ,
131+ image ,
132+ caption = "Generated Image" ,
138133 use_column_width = True
139134 )
140135 except Exception as e :
0 commit comments