Skip to content

allow for multimodal llms via LiteLLM Provider #17

@isayahc

Description

@isayahc

Example code

import os
import base64
from litellm import completion

# ==========================================
# SETUP: API KEYS
# ==========================================
# os.environ["OPENAI_API_KEY"] = "sk-..."
# os.environ["ANTHROPIC_API_KEY"] = "sk-ant-..."
# os.environ["GEMINI_API_KEY"] = "AIza..."

# ==========================================
# METHOD 1: Online Image (Webpage/URL)
# ==========================================
def analyze_online_image(image_url: str, prompt: str, model="anthropic/claude-3-haiku-20240307"):
    """Sends an image from a public URL to a vision model."""
    
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": image_url
                    }
                }
            ]
        }
    ]
    
    response = completion(model=model, messages=messages)
    return response.choices[0].message.content


# ==========================================
# METHOD 2: Local Image (Base64 Encoded)
# ==========================================
def encode_local_image(image_path: str) -> str:
    """Helper function to read and base64 encode a local file."""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def analyze_local_image(image_path: str, prompt: str, model="anthropic/claude-3-haiku-20240307"):
    """Encodes a local image file and sends it to a vision model."""
    
    base64_image = encode_local_image(image_path)
    
    # Determine mime type (adjust if using png, webp, etc.)
    mime_type = "image/jpeg" if image_path.lower().endswith(('.jpg', '.jpeg')) else "image/png"
    
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {
                    "type": "image_url",
                    "image_url": {
                        # Construct the data URI string for the local file
                        "url": f"data:{mime_type};base64,{base64_image}"
                    }
                }
            ]
        }
    ]
    
    response = completion(model=model, messages=messages)
    return response.choices[0].message.content


# ==========================================
# USAGE EXAMPLES
# ==========================================
if __name__ == "__main__":
    
    prompt_text = "Describe the architecture in this image and what materials might have been used."
    
    # 1. Testing the Online Image
    print("--- ONLINE IMAGE RESULT ---")
    online_url = "https://tse1.mm.bing.net/th/id/OIP.NiXvEkf0ufIPdecZe_Y58wHaEP?rs=1&pid=ImgDetMain&o=7&rm=3"
    print(analyze_online_image(image_url=online_url, prompt=prompt_text))
    
    # 2. Testing the Local Image
    # print("\n--- LOCAL IMAGE RESULT ---")
    # local_path = "path/to/your/architecture_photo.jpg"
    # print(analyze_local_image(image_path=local_path, prompt=prompt_text))

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions