diff --git a/.gitignore b/.gitignore
index 96eaa4a..03044ca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -164,3 +164,4 @@ u2net_segm.pth
 
 data
 outputs
+veo_test.py
\ No newline at end of file
diff --git a/README.md b/README.md
index dc92873..93dc20f 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,7 @@ OpenTryOn is an open-source AI toolkit designed for fashion technology and virtu
   - GPT-Image-1 & GPT-Image-1.5 (OpenAI): High-quality image generation with strong prompt understanding, consistent composition, and reliable visual accuracy. GPT-Image-1.5 offers enhanced quality and better consistency
 - **Video Generation**:
   - Luma AI Video Generation Model (Dream Machine): High-quality video generation with text-to-image and image-to-video modes.
+  - Google Veo 3 Video Generation Model: Generate high-quality, cinematic videos from text or images with realistic motion, temporal consistency, and fine-grained control over style and camera dynamics.
 - **Datasets Module**: 
   - Fashion-MNIST dataset loader with automatic download
   - VITON-HD dataset loader with lazy loading via PyTorch DataLoader
@@ -61,6 +62,7 @@ OpenTryOn is an open-source AI toolkit designed for fashion technology and virtu
   - [Image Generation with Luma AI](#luma-ai-image-generation)
   - [Image Generation with OpenAI](#image-generation-with-gpt-image-1)
   - [Video Generation with Luma AI](#video-generation-with-luma-ai)
+  - [Video Generation with Google Veo 3](#video-generation-with-google-veo-3)
   - [Preprocessing Functions](#preprocessing-functions)
 - [Demos](#demos)
 - [Project Structure](#project-structure)
@@ -138,13 +140,13 @@ KLING_AI_BASE_URL=https://api-singapore.klingai.com  # Optional, defaults to Sin
 # Segmind Credentials (required for Segmind virtual try-on)
 SEGMIND_API_KEY=your_segmind_api_key
 
-# Google Gemini Credentials (required for Nano Banana image generation)
+# Google Gemini Credentials (required for Nano Banana image generation and Google Veo 3 Video generation)
 GEMINI_API_KEY=your_gemini_api_key
 
 # BFL API Credentials (required for FLUX.2 image generation)
 BFL_API_KEY=your_bfl_api_key
 
-# Luma AI Credentials (required for Luma AI image generation)
+# Luma AI Credentials (required for Luma AI image generation and Luma AI Video generation)
 LUMA_AI_API_KEY=your_luma_ai_api_key
 
 # OpenAI Credentials (required for OpenAI GPT-Image-1 image generation)
@@ -164,7 +166,7 @@ GOOGLE_API_KEY=your_google_api_key  # For Google Gemini
 - For Kling AI, obtain your API key and secret key from the [Kling AI Developer Portal](https://app.klingai.com/global/dev/document-api/apiReference/model/functionalityTry)
 
 - For Segmind, obtain your API key from the [Segmind API Portal](https://www.segmind.com/models/try-on-diffusion/api)
-- For Nano Banana, obtain your API key from the [Google AI Studio](https://aistudio.google.com/app/apikey)
+- For Nano Banana and Google Veo 3, obtain your API key from the [Google AI Studio](https://aistudio.google.com/app/apikey)
 - For FLUX.2 models, obtain your API key from [BFL AI](https://docs.bfl.ai/)
 
 - For FLUX.2 models, obtain your API key from [BFL AI](https://docs.bfl.ai/)
@@ -1621,7 +1623,7 @@ for idx, vid_bytes in enumerate(video_list):
 
 #### Supported Features
 
-- **Text to Video**: Generate videos using test descriptions.
+- **Text to Video**: Generate videos using text descriptions.
 - **Image to Video**: Generate videos using keyframes.
 - **Keyframe Generation**: Generate videos using a start keyframe or an end keyframe or both.
 - **Duration**: Durations in seconds (5s, 9s, 10s)
@@ -1639,6 +1641,138 @@ for idx, vid_bytes in enumerate(video_list):
 
 **Reference**: [Luma AI Video Generation Documentation](https://docs.lumalabs.ai/docs/video-generation)
 
+### Video Generation with Google Veo 3
+
+Generate high-quality, cinematic videos using Google’s Veo 3 models (Veo 3.0 and Veo 3.1), including (veo-3.1-generate-preview, veo-3.1-fast-generate-preview, veo-3.0-generate-001, and veo-3.0-fast-generate-001). These models support text-to-video, image-to-video, reference-images-to-video, and frames-to-video generation for controlled motion, realistic dynamics, and consistent visual quality.
+
+#### Prerequisites
+
+1. **Google Gemini Account Setup**: 
+   - Sign up for a Google AI Studio account at [Google AI Studio](https://aistudio.google.com/)
+   - Obtain your API key from the [API Keys page](https://aistudio.google.com/app/apikey)
+   - Configure credentials in your `.env` file (see Environment Variables section)
+
+2. **Model Selection**:
+   - **veo-3.1-generate-preview**: Generate high-quality cinematic videos with enhanced motion realism and temporal consistency using the latest Veo 3.1 model.
+   - **veo-3.1-fast-generate-preview**: Create videos quickly with optimized inference speed while retaining strong visual quality and motion coherence.
+   - **veo-3.0-generate-001**: Produce stable, high-fidelity videos using the proven Veo 3.0 generation model with reliable motion and style control.
+   - **veo-3.0-fast-generate-001**: Generate videos faster with the Veo 3.0 fast variant, balancing speed and visual quality for rapid iteration.
+
+#### Command Line Usage
+
+```bash
+# Text to Video with Google Veo 3
+python veo_video.py --provider veo-3.1-generate-preview --mode text --prompt "model at a fashion show" --aspect 16:9 --duration 8 --resolution 1080p --output_dir outputs
+
+# Video generation with negative prompt
+python veo_video.py --provider veo-3.1-generate-preview --mode text --prompt "person with a hat" --resolution 1080p --negative_prompt "cartoon, anime, kids"
+
+# Image to Video
+python veo_video.py --provider veo-3.1-generate-preview --mode image --prompt "model at a fashion show" --images person.jpg --aspect 16:9 --duration 8 --resolution 1080p
+
+# Video generation with reference images (up to 3)
+python veo_video.py --provider veo-3.1-generate-preview --mode reference --prompt "create a fashion week video" --images person1.jpg person2.jpg person3.jpg --resolution 1080p
+
+# Video generation with frames
+python veo_video.py --provider veo-3.1-generate-preview --mode frames --prompt "create a cinematic video" --start_image person1.jpg --end_image person2.jpg --aspect 16:9 --resolution 720p
+```
+
+#### Python API Usage
+
+**Google Veo 3**
+
+```python
+
+from dotenv import load_dotenv
+load_dotenv()
+
+from pathlib import Path
+from tryon.api.veo import VeoAdapter
+
+adapter = VeoAdapter()
+video_list = []
+
+
+def save_video(video_bytes: bytes, idx: int):
+    Path("outputs").mkdir(exist_ok=True)
+    out_path = Path("outputs") / f"generated_{idx}.mp4"
+    with open(out_path, "wb") as f:
+        f.write(video_bytes)
+    print(f"[SAVED] {out_path}")
+
+
+# TEXT → VIDEO
+video = adapter.generate_text_to_video(
+    prompt="A cinematic neon city with cars moving at night",
+    duration_seconds="4",
+    aspect_ratio="16:9",
+    resolution="720p",
+    model="veo-3.1-generate-preview",
+)
+video_list.append(video)
+
+
+# IMAGE → VIDEO
+video = adapter.generate_image_to_video(
+    image="model.jpg",
+    prompt="Two monsters fighting with each other",
+    duration_seconds="4",
+    aspect_ratio="16:9",
+    resolution="720p",
+    model="veo-3.1-generate-preview",
+    negative_prompt="cartoon, anime, for kids",
+)
+video_list.append(video)
+
+
+# REFERENCE IMAGES → VIDEO
+video = adapter.generate_video_with_references(
+    prompt="A fashion model walking on a runway",
+    reference_images=[
+        "test_assets/ref1.jpg",
+        "test_assets/ref2.jpg",
+    ],
+    duration_seconds="8",
+    aspect_ratio="16:9",
+    resolution="720p",
+    model="veo-3.1-generate-preview",
+)
+video_list.append(video)
+
+
+# FIRST + LAST FRAME → VIDEO
+video = adapter.generate_video_with_frames(
+    prompt="Smooth cinematic transition from grizzly bear to polar bear",
+    first_image="person1.jpg",
+    last_image="person2.jpg",
+    duration_seconds="8",
+    aspect_ratio="16:9",
+    resolution="720p",
+    model="veo-3.1-generate-preview",
+    negative_prompt="cartoon, anime, kids",
+)
+video_list.append(video)
+
+
+# SAVE ALL RESULTS
+for idx, vid_bytes in enumerate(video_list):
+    save_video(vid_bytes, idx)
+
+```
+
+#### Supported Features
+
+- **Text to Video**: Generate Video using text descriptions.
+- **Image to Video**: Generate Video using a single image.
+- **Video Generation with Reference Images**: Generate Video using reference Images (up to 3).
+- **Video Generation with Frames**: Video Generation with first frame and last frame.
+- **Duration**: Durations in seconds (4s, 6s, 8s)
+- **Resolution**: Quality of the video (720p, 1080p)
+- **Aspect Ratio**: Aspect Ratio of videos (16:9, 9:16)
+- **Negative Prompt**: Negative Prompt tells the Veo model what to avoid generating in the video.
+
+**Reference**: [Google Veo 3 Video Generation Documentation](https://ai.google.dev/gemini-api/docs/video)
+
 ### Preprocessing Functions
 
 #### Segment Garment
diff --git a/tryon/api/__init__.py b/tryon/api/__init__.py
index c3eeed2..f731845 100644
--- a/tryon/api/__init__.py
+++ b/tryon/api/__init__.py
@@ -5,6 +5,7 @@
 from .lumaAI import LumaAIAdapter
 from .flux2 import Flux2ProAdapter, Flux2FlexAdapter
 from .lumaAI.luma_video_adapter import LumaAIVideoAdapter
+from .openAI.image_adapter import GPTImageAdapter
 
 __all__ = [
     "AmazonNovaCanvasVTONAdapter",
@@ -16,4 +17,5 @@
     "Flux2ProAdapter",
     "Flux2FlexAdapter",
     "LumaAIVideoAdapter",
+    "GPTImageAdapter",
 ]
\ No newline at end of file
diff --git a/tryon/api/veo.py b/tryon/api/veo.py
new file mode 100644
index 0000000..43b0c2c
--- /dev/null
+++ b/tryon/api/veo.py
@@ -0,0 +1,873 @@
+"""
+Google Veo Video Generation API Adapter
+
+Adapter for Google's Veo video generation models, providing structured
+interfaces for multiple video creation workflows including text-driven
+generation, image-conditioned generation, and frame-controlled synthesis.
+
+Supported Models:
+- veo-3.1-generate-preview
+- veo-3.1-fast-generate-preview
+- veo-3.0-generate-001
+- veo-3.0-fast-generate-001
+
+Capabilities:
+1) Text-to-Video
+   Generate high-quality cinematic videos purely from a natural language prompt.
+
+2) Image-to-Video
+   Use a single reference image to establish style, composition, and scene
+   context while the model animates motion forward in time.
+
+3) Video Generation with Reference Images (up to 3)
+   Provide one or more guiding images (maximum three). These images help
+   influence scene structure, visual continuity, and thematic consistency.
+
+4) First Frame + Last Frame Controlled Generation
+   Supply both an initial frame and a final target frame. Veo interpolates
+   motion and visual development between the two frames to create a smooth
+   evolving sequence.
+   (Full frame-to-frame control is primarily supported in Veo 3.1 models.)
+
+Behavior Notes:
+- Not all control modes are equally supported across Veo 3.0 vs 3.1.
+  Veo 3.1 provides better guided control and stability.
+- Generation is asynchronous; polling is required until completion.
+- Output is returned as raw video bytes for downstream streaming/storage.
+- Duration, resolution, and aspect-ratio constraints depend on model config.
+
+Typical Workflow:
+1) Submit generation request with prompt (+ optional frames / references)
+2) Poll until operation finishes
+3) Download or extract MP4 video bytes
+
+Reference:
+https://ai.google.dev/gemini-api/docs/video
+
+Usage Examples:
+
+    Text-to-Video:
+        >>> adapter.generate_text_video(
+        ...     prompt="A cinematic aerial shot of a futuristic neon city at night",
+        ...     model="veo-3.1-generate-preview"
+        ... )
+
+    Image-to-Video:
+        >>> adapter.generate_image_to_video(
+        ...     prompt="Continue motion through a windy canyon",
+        ...     image="start.png",
+        ...     model="veo-3.1-generate-preview"
+        ... )
+
+    Reference Images (up to 3):
+        >>> adapter.generate_video_with_references(
+        ...     prompt="Epic fantasy battlefield reveal",
+        ...     reference_images=["a.png", "b.png", "c.png"],
+        ...     model="veo-3.1-fast-generate-preview"
+        ... )
+
+    First + Last Frame Controlled Generation:
+        >>> adapter.generate_video_with_frames(
+        ...     prompt="Smooth cinematic transition through cyberpunk streets",
+        ...     first_image="start.png",
+        ...     last_image="end.png",
+        ...     model="veo-3.1-generate-preview"
+        ... )
+"""
+
+import time
+import os
+import io
+import base64
+from PIL import Image
+from typing import Optional, Union
+
+try:
+    from google import genai
+    from google.genai import types
+    GEMINI_API_KEY = True
+except ImportError:
+    GEMINI_API_KEY = False
+    genai = None
+
+DURATION = {"4", "6", "8"}
+ASPECT_RATIO = {"16:9", "9:16"}
+RESOLUTION = {"720p", "1080p"}
+MODELS = {"veo-3.1-generate-preview", "veo-3.1-fast-generate-preview", "veo-3.0-generate-001", "veo-3.0-fast-generate-001"}
+
+class VeoAdapter:
+
+    def __init__(self, api_key: Optional[str] = None):
+
+        if not GEMINI_API_KEY:
+            raise ImportError(
+                "google-genai library is not available. " \
+                "Please install it with 'pip install google-genai'."
+            )
+        
+        self.api_key = api_key or os.getenv("GEMINI_API_KEY")
+        if not self.api_key:
+            raise ValueError("GEMINI API key must be provided either as a parameter or through the GEMINI_API_KEY environment variable.")
+        
+        self.client = genai.Client(api_key=self.api_key)
+
+    
+    def _prepare_image_input(self, image_input: Union[str, io.BytesIO, Image.Image]) -> types.Image:
+        
+        """
+        Converts various image inputs into a Veo-compatible `types.Image`.
+
+        Supports:
+        - PIL Image
+        - file-like objects (BytesIO / file handles)
+        - strings (URL, local path, or Base64)
+
+        Ensures:
+        - image is valid
+        - converted to RGB
+        - encoded as PNG
+        - returned as `types.Image(image_bytes, mime_type="image/png")`
+
+        Raises:
+            ValueError: If the input type is unsupported or the image cannot be decoded.
+        """
+        
+        # A PIL Image
+        if isinstance(image_input, Image.Image):
+            pil = image_input.convert("RGB")
+        
+        # File-like object (BytesIO, file handle)
+        elif hasattr(image_input, "read"):
+            image_input.seek(0)
+            image_bytes = image_input.read()
+            try:
+                pil = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+            except Exception as e:
+                raise ValueError(f"Invalid image data in file-like object: {e}")
+        
+        # String (URL, file path, or base64)
+        elif isinstance(image_input, str):
+            # URL
+            if image_input.startswith(("http://", "https://")):
+                try:
+                    import requests
+                    r = requests.get(image_input, timeout=10)
+                    r.raise_for_status()
+                    image_bytes = r.content
+                except Exception as e:
+                    raise ValueError(f"Failed to download image from URL: {e}")
+            
+            # File path
+            elif os.path.exists(image_input):
+                try:
+                    with open(image_input, "rb") as f:
+                        image_bytes = f.read()
+                except Exception as e:
+                    raise ValueError(f"Failed to read image file: {e}")
+            
+            # Base64 string
+            else:
+                try:
+                    image_bytes = base64.b64decode(image_input, validate=True)
+                except Exception as e:
+                    raise ValueError(f"Invalid base64 image string: {e}")
+            
+            # Convert bytes to PIL Image
+            try:
+                pil = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+            except Exception as e:
+                raise ValueError(f"Invalid image data: {e}")
+        
+        else:
+            raise ValueError(f"Unsupported image input type: {type(image_input)}")
+        
+        # Convert to PNG bytes
+        buffer = io.BytesIO()
+        pil.save(buffer, format="PNG")
+        png_bytes = buffer.getvalue() 
+        
+        # Return raw bytes
+        return types.Image(
+            image_bytes=png_bytes,
+            mime_type="image/png",
+        )
+
+    
+    def generate_text_to_video(
+        self,
+        prompt: str,
+        duration_seconds: str = "4",
+        aspect_ratio: str = "16:9",
+        resolution: str = "720p",
+        negative_prompt: Optional[str] = None,
+        model: str = "veo-3.1-generate-preview",
+    ) -> bytes:
+        
+        """
+        Generate a video from a text prompt using Google Veo.
+
+        This function sends a text prompt to a Veo video generation model, polls the
+        operation until the video is ready, then downloads and returns the raw MP4 bytes.
+        Supports duration, aspect-ratio, resolution constraints and optional
+        negative prompts. Automatically validates parameters and ensures only valid
+        Veo model + configuration combinations are used.
+
+        Args:
+            prompt:
+                The primary text prompt describing what the video should depict.
+            duration_seconds:
+                Length of the generated clip in seconds.
+                Supported values depend on the model (commonly: "4", "6", "8").
+            aspect_ratio:
+                Output aspect ratio (e.g., "16:9" or "9:16").
+            resolution:
+                Output resolution preset ("720p" or "1080p", depending on model limits).
+            negative_prompt:
+                Optional text describing content to avoid in the generation.
+            model:
+                Veo model identifier.
+                Examples:
+                    - "veo-3.1-generate-preview"
+                    - "veo-3.1-fast-generate-preview"
+                    - "veo-3.0-generate-001"
+                    - "veo-3.0-fast-generate-001"
+
+        Returns:
+            bytes:
+                Raw MP4 video bytes
+
+        Raises:
+            ValueError:
+                If prompt is missing, configuration is invalid,
+                or no video was generated.
+            RuntimeError:
+                If Veo returns an unexpected structure and video bytes
+                cannot be extracted.
+
+        Example:
+            >>> video_bytes = adapter.generate_text_to_video(
+            ...     prompt="A cinematic shot of a dragon flying over a medieval city",
+            ...     duration_seconds="8",
+            ...     aspect_ratio="16:9",
+            ...     resolution="1080p",
+            ...     model="veo-3.1-generate-preview"
+            ... )
+            >>> with open("dragon_city.mp4", "wb") as f:
+            ...     f.write(video_bytes)
+        """
+
+        # Validation Check
+        if not prompt:
+            raise ValueError("prompt is required")
+        
+        if model not in MODELS:
+            raise ValueError(f"{model} is not a recognized model. Available models are {MODELS}")
+
+        if duration_seconds not in DURATION:
+            raise ValueError("duration_seconds must be one of: 4, 6, 8")
+
+        if aspect_ratio not in ASPECT_RATIO:
+            raise ValueError("aspect_ratio must be '16:9' or '9:16'")
+
+        if resolution not in RESOLUTION:
+            raise ValueError("resolution must be '720p' or '1080p'")
+
+        if model in {"veo-3.1-generate-preview", "veo-3.1-fast-generate-preview"}:
+            if resolution == "1080p" and duration_seconds != "8":
+                raise ValueError("1080p resolution only supports 8s duration for veo 3.1 models.")
+        
+        if model in {"veo-3.0-generate-001", "veo-3.0-fast-generate-001"}:
+            if resolution == "1080p" and aspect_ratio != "16:9":
+                raise ValueError("1080p resolution only supportes 16:9 aspect ratio for veo 3 models.")
+            
+        # Create Configurations
+        kwargs = {
+            "duration_seconds": duration_seconds,
+            "aspect_ratio": aspect_ratio,
+            "resolution": resolution,
+        }
+
+        # Negative Prompt
+        if negative_prompt:
+            kwargs["negative_prompt"] = negative_prompt
+
+        # Create a generation object
+        operation = self.client.models.generate_videos(
+            model=model,
+            prompt=prompt,
+            config=types.GenerateVideosConfig(**kwargs),
+        )
+
+        # Polling
+        while not operation.done:
+            operation = self.client.operations.get(operation)
+            time.sleep(1)
+        
+        # Check for video error after polling completes
+        if getattr(operation, "error", None):
+            raise RuntimeError(f"Video generation failed: {operation.error}")
+
+        if operation.response is None:
+            raise RuntimeError("Video generation completed but no response was returned")
+
+        generated = getattr(operation.response, "generated_videos", None)
+        if not generated:
+            raise RuntimeError(
+                f"No videos were generated. operation.error={getattr(operation, 'error', None)}; "
+                f"operation_name={getattr(operation, 'name', None)}; response={operation.response}"
+            )
+
+        # Extract the generated video
+        video = operation.response.generated_videos[0].video
+        if not video:
+            raise ValueError("No video was generated.")
+
+
+        # If Video bytes are returned
+        if hasattr(video, "video_bytes") and video.video_bytes:
+            return video.video_bytes
+
+        # Else download the file via the files API
+        downloaded = self.client.files.download(file=video)
+
+        if isinstance(downloaded, (bytes, bytearray)):
+            return bytes(downloaded)
+        
+        if hasattr(downloaded, "read"):
+            try:
+                downloaded.seek(0)
+            except Exception:
+                pass
+            return downloaded.read()
+        
+        if hasattr(downloaded, "bytes"):
+            return downloaded.bytes
+        
+        if hasattr(downloaded, "data"):
+            return downloaded.data
+        
+        if hasattr(downloaded, "content"):
+            return downloaded.content
+
+        raise RuntimeError(f"Error occured. Video cannot be generated...")
+    
+
+    def generate_image_to_video(
+        self,
+        image: Union[str, io.BytesIO, Image.Image],
+        prompt: str,
+        duration_seconds: str = "4",
+        aspect_ratio: str = "16:9",
+        resolution: str = "720p",
+        negative_prompt: Optional[str] = None,
+        model: str = "veo-3.1-generate-preview",
+    ) -> bytes:
+        
+        """
+        Animate a still image into a video using Google Veo.
+
+        This function takes a single reference image and a guiding text prompt, sends
+        them to a Veo image-to-video model, polls until generation completes, and
+        returns the resulting MP4 video as raw bytes. The animation respects the
+        specified duration, aspect ratio, and resolution constraints. Supports optional
+        negative prompts and enforces model capability rules to prevent invalid requests.
+
+        Args:
+            image:
+                The input image to animate. Can be one of:
+                - str: Path to an image file.
+                - io.BytesIO: In-memory binary image stream.
+                - PIL.Image.Image: Loaded PIL image object.
+            prompt:
+                Text description guiding how the image should animate.
+            duration_seconds:
+                Length of the generated clip in seconds.
+                Supported values depend on the model (commonly: "4", "6", "8").
+            aspect_ratio:
+                Output aspect ratio (e.g., "16:9" or "9:16").
+            resolution:
+                Output video resolution preset ("720p" or "1080p", depending on model limits).
+            negative_prompt:
+                Optional text describing what should be avoided in the generated video.
+            model:
+                Veo model identifier to use for generation, such as:
+                    - "veo-3.1-generate-preview"
+                    - "veo-3.1-fast-generate-preview"
+                    - "veo-3.0-generate-001"
+                    - "veo-3.0-fast-generate-001"
+
+        Returns:
+            bytes:
+                Raw MP4 video bytes
+
+        Raises:
+            ValueError:
+                If required parameters are missing, invalid combinations are used,
+                or no video is returned from Veo.
+            RuntimeError:
+                If Veo produces an unexpected response structure and
+                video bytes cannot be extracted.
+
+        Example:
+            >>> with open("person.png", "rb") as img:
+            ...     video = adapter.generate_image_to_video(
+            ...         image=img,
+            ...         prompt="The person begins walking through a snowy forest",
+            ...         duration_seconds="8",
+            ...         aspect_ratio="16:9",
+            ...         resolution="1080p",
+            ...         model="veo-3.1-generate-preview",
+            ...     )
+            >>> with open("animated_person.mp4", "wb") as f:
+            ...     f.write(video)
+        """
+
+        # Validation Check
+        if not image:
+            raise ValueError("image is required")
+
+        if not prompt:
+            raise ValueError("prompt is required")
+        
+        if model not in MODELS:
+            raise ValueError(f"{model} is not a recognized model. Available models are {MODELS}")
+
+        if duration_seconds not in DURATION:
+            raise ValueError("duration_seconds must be one of: 4, 6, 8")
+
+        if aspect_ratio not in ASPECT_RATIO:
+            raise ValueError("aspect_ratio must be '16:9' or '9:16'")
+
+        if resolution not in RESOLUTION:
+            raise ValueError("resolution must be '720p' or '1080p'")
+        
+        if model in {"veo-3.1-generate-preview", "veo-3.1-fast-generate-preview"}:
+            if resolution == "1080p" and duration_seconds != "8":
+                raise ValueError("1080p resolution only supports 8s duration for veo 3.1 models.")
+        
+        if model in {"veo-3.0-generate-001", "veo-3.0-fast-generate-001"}:
+            if resolution == "1080p" and aspect_ratio != "16:9":
+                raise ValueError("1080p resolution only supportes 16:9 aspect ratio for veo 3 models.")
+        
+
+        # Create Configurations
+        kwargs = {
+            "duration_seconds": duration_seconds,
+            "aspect_ratio": aspect_ratio,
+            "resolution": resolution,
+        }
+
+        # Negative Prompt
+        if negative_prompt:
+            kwargs["negative_prompt"] = negative_prompt
+
+        # Create a generation object
+        operation = self.client.models.generate_videos(
+            model=model,
+            prompt=prompt,
+            image=self._prepare_image_input(image),
+            config=types.GenerateVideosConfig(**kwargs),
+        )
+
+        # Polling
+        while not operation.done:
+            operation = self.client.operations.get(operation)
+            time.sleep(1)
+        
+        # Check for video error after polling completes
+        if getattr(operation, "error", None):
+            raise RuntimeError(f"Video generation failed: {operation.error}")
+
+        if operation.response is None:
+            raise RuntimeError("Video generation completed but no response was returned")
+
+        generated = getattr(operation.response, "generated_videos", None)
+        if not generated:
+            raise RuntimeError(
+                f"No videos were generated. operation.error={getattr(operation, 'error', None)}; "
+                f"operation_name={getattr(operation, 'name', None)}; response={operation.response}"
+            )
+        
+        # Extract the generated video
+        video = operation.response.generated_videos[0].video
+        if not video:
+            raise ValueError("Video was not generated.")
+
+        # If Video bytes are returned
+        if hasattr(video, 'video_bytes') and video.video_bytes:
+            return video.video_bytes
+
+        # Else download the file
+        downloaded = self.client.files.download(file=video)
+
+        if isinstance(downloaded, (bytes, bytearray)):
+            return bytes(downloaded)
+        
+        if hasattr(downloaded, "read"):
+            try:
+                downloaded.seek(0)
+            except Exception:
+                pass
+            return downloaded.read()
+        
+        if hasattr(downloaded, "bytes"):
+            return downloaded.bytes
+        
+        if hasattr(downloaded, "data"):
+            return downloaded.data
+        
+        if hasattr(downloaded, "content"):
+            return downloaded.content
+
+        raise ValueError("Error occured. Video cannot be generated...")
+    
+
+    def generate_video_with_references(
+        self,
+        prompt: str,
+        reference_images: list[Union[str, io.BytesIO, Image.Image]],
+        duration_seconds: str = "8",
+        aspect_ratio: str = "16:9",
+        resolution: str = "720p",
+        model: str = "veo-3.1-generate-preview",
+        negative_prompt: Optional[str] = None
+    ) -> bytes:
+        
+        """
+        Generate a video using reference images as visual/style guidance with Google Veo.
+
+        This function generates a video guided by one or more reference images. The
+        images help Veo preserve identity, visual style, composition, or scene
+        continuity depending on how they are interpreted by the model. Supports up
+        to three reference images and automatically enforces Veo 3.1 model constraints
+        (8 seconds, 16:9, supported resolutions). The function polls the generation
+        operation until completion and returns the final MP4 video bytes.
+
+        Args:
+            prompt:
+                Text description guiding how the scene should animate and what should happen.
+            reference_images:
+                List of up to three images used as visual guidance.
+                Each element may be:
+                - str: Path to an image file
+                - io.BytesIO: In-memory binary stream
+                - PIL.Image.Image: Loaded PIL image object
+            duration_seconds:
+                Length of the generated clip in seconds.
+                For reference-guided generation, Veo requires `"8"`.
+            aspect_ratio:
+                Output aspect ratio. Reference-guided video currently supports `"16:9"` only.
+            resolution:
+                Output resolution preset, typically `"720p"` or `"1080p"` depending on model limits.
+            model:
+                Veo model identifier. Reference-guided generation is only supported
+                on Veo 3.1 preview / fast-preview models, e.g.:
+                    - "veo-3.1-generate-preview"
+                    - "veo-3.1-fast-generate-preview"
+            negative_prompt:
+                Optional text describing what should be avoided in the generated video.
+
+        Returns:
+            bytes:
+                Raw MP4 video bytes
+
+        Raises:
+            ValueError:
+                If required parameters are missing, invalid configurations are used,
+                unsupported models are selected, or no video is returned.
+            RuntimeError:
+                If Veo returns an unexpected response structure and the video
+                file bytes cannot be extracted.
+
+        Example:
+            >>> video_bytes = adapter.generate_video_with_references(
+            ...     prompt="A heroic knight walking through a ruined castle courtyard",
+            ...     reference_images=[
+            ...         "face_ref.png",
+            ...         "armor_style.jpg"
+            ...     ],
+            ...     resolution="1080p",
+            ...     model="veo-3.1-generate-preview"
+            ... )
+            >>> with open("knight_scene.mp4", "wb") as f:
+            ...     f.write(video_bytes)
+        """
+
+        # Valdiation Check
+        if not prompt:
+            raise ValueError("prompt is required")
+        
+        if model not in MODELS:
+            raise ValueError(f"{model} is not a recognized model. Available models are {MODELS}")
+        
+        if model in {"veo-3.0-generate-001", "veo-3.0-fast-generate-001"}:
+            raise ValueError("Video generation using reference images is only supported for veo 3.1 models.")
+
+        if not reference_images:
+            raise ValueError("At least one reference image is required")
+
+        if len(reference_images) > 3:
+            raise ValueError("Veo 3.1 supports a maximum of 3 reference images")
+
+        if resolution not in RESOLUTION:
+            raise ValueError("resolution must be '720p' or '1080p'")
+
+        if duration_seconds != "8":
+            raise ValueError("Video generation using reference images require duration_seconds='8'")
+
+        if aspect_ratio != "16:9":
+            raise ValueError("Video generation using reference images only support aspect_ratio='16:9'")
+        
+
+        # Prepare reference images
+        refs = []
+
+        for img in reference_images:
+            refs.append(
+                types.VideoGenerationReferenceImage(
+                    image=self._prepare_image_input(img),
+                    reference_type="asset"
+                )
+            )
+        
+        # Create Configurations
+        kwargs = {
+            "duration_seconds": duration_seconds,
+            "aspect_ratio": aspect_ratio,
+            "resolution": resolution,
+            "reference_images": refs
+        }
+
+        # Negative prompt
+        if negative_prompt:
+            kwargs["negative_prompt"] = negative_prompt
+
+        # Create a generation object
+        operation = self.client.models.generate_videos(
+            model=model,
+            prompt=prompt,
+            config=types.GenerateVideosConfig(**kwargs),
+        )
+
+        # Polling
+        while not operation.done:
+            operation = self.client.operations.get(operation)
+            time.sleep(1)
+        
+        # Check for video error after polling completes
+        if getattr(operation, "error", None):
+            raise RuntimeError(f"Video generation failed: {operation.error}")
+
+        if operation.response is None:
+            raise RuntimeError("Video generation completed but no response was returned")
+
+        generated = getattr(operation.response, "generated_videos", None)
+        if not generated:
+            raise RuntimeError(
+                f"No videos were generated. operation.error={getattr(operation, 'error', None)}; "
+                f"operation_name={getattr(operation, 'name', None)}; response={operation.response}"
+            )
+
+        # Extract the generated video
+        video = operation.response.generated_videos[0].video
+        if not video:
+            raise ValueError("No video was generated")
+
+        # If Video bytes are returned
+        if hasattr(video, "video_bytes") and video.video_bytes:
+            return video.video_bytes
+
+        # Else download the file
+        downloaded = self.client.files.download(file=video)
+
+        if isinstance(downloaded, (bytes, bytearray)):
+            return bytes(downloaded)
+        
+        if hasattr(downloaded, "read"):
+            try:
+                downloaded.seek(0)
+            except Exception:
+                pass
+            return downloaded.read()
+        
+        if hasattr(downloaded, "bytes"):
+            return downloaded.bytes
+        
+        if hasattr(downloaded, "data"):
+            return downloaded.data
+        
+        if hasattr(downloaded, "content"):
+            return downloaded.content
+
+        raise RuntimeError(f"Error occured. Video cannot be generated...")
+    
+
+    def generate_video_with_frames(
+        self,
+        prompt: str,
+        first_image: Union[str, io.BytesIO, Image.Image],
+        last_image: Union[str, io.BytesIO, Image.Image],
+        duration_seconds: str = "8",
+        aspect_ratio: str = "16:9",
+        resolution: str = "720p",
+        model: str = "veo-3.1-generate-preview",
+        negative_prompt: Optional[str] = None
+    ) -> bytes:
+        
+        """
+        Generate a video using a starting frame and ending frame as guidance with Google Veo.
+
+        This function generates a video using a specified first frame and last frame to
+        guide motion, composition, and visual consistency throughout the clip. The
+        first frame determines how the video begins and the last frame influences how
+        the animation resolves. The generation respects Veo 3.1 constraints
+        (8-second clips, supported aspect ratios, supported resolutions) and polls the
+        operation until the output is ready, finally returning raw MP4 bytes.
+
+        Args:
+            prompt:
+                Text description guiding what happens between the first and last frames.
+            first_image:
+                The starting frame of the video. May be:
+                - str: Path to an image file
+                - io.BytesIO: Binary image stream
+                - PIL.Image.Image: Loaded PIL image
+            last_image:
+                The ending frame of the video. Must be provided in the same supported
+                formats as `first_image`.
+            duration_seconds:
+                Duration of the generated clip. Frame-guided generation currently
+                requires `"8"`.
+            aspect_ratio:
+                Output aspect ratio (e.g., `"16:9"` or `"9:16"`, depending on model support).
+            resolution:
+                Output video resolution preset such as `"720p"` or `"1080p"`.
+            model:
+                Veo model identifier. Frame-guided generation is only supported on
+                Veo 3.1 preview / fast-preview models, such as:
+                    - "veo-3.1-generate-preview"
+                    - "veo-3.1-fast-generate-preview"
+            negative_prompt:
+                Optional text describing what should be avoided in the generation.
+
+        Returns:
+            bytes:
+                Raw MP4 video bytes
+
+        Raises:
+            ValueError:
+                If required parameters are missing, unsupported models are used,
+                invalid configurations are passed, or the API does not return a video.
+            RuntimeError:
+                If the API response structure is unexpected and the video bytes
+                cannot be extracted.
+
+        Example:
+            >>> video = adapter.generate_video_with_frames(
+            ...     prompt="A dramatic camera move through a futuristic city at night",
+            ...     first_image="start_frame.png",
+            ...     last_image="end_frame.png",
+            ...     resolution="1080p",
+            ...     model="veo-3.1-generate-preview"
+            ... )
+            >>> with open("city_transition.mp4", "wb") as f:
+            ...     f.write(video)
+        """
+
+        # Validation Check
+        if not prompt:
+            raise ValueError("Prompt is required for video generation.")
+        
+        if model not in MODELS:
+            raise ValueError(f"{model} is not a recognized model. Available models are {MODELS}")
+        
+        if model in {"veo-3.0-generate-001", "veo-3.0-fast-generate-001"}:
+            raise ValueError("Video generation using first frame and last frame is only supported for veo 3.1 models.")
+
+        if aspect_ratio not in ASPECT_RATIO:
+            raise ValueError("aspect_ratio must be '16:9' or '9:16'")
+
+        if resolution not in RESOLUTION:
+            raise ValueError("resolution must be '720p' or '1080p'")
+        
+        if duration_seconds != "8":
+            raise ValueError("Video generation using frames require duration_seconds='8'")
+        
+        if not first_image and not last_image:
+            raise ValueError("Both first frame and last frame are required for video generation.")
+        
+        # Preparing images for input
+        first_frame = self._prepare_image_input(first_image)
+        last_frame = self._prepare_image_input(last_image)
+
+        # Create Configurations
+        kwargs = {
+            "duration_seconds": duration_seconds,
+            "aspect_ratio": aspect_ratio,
+            "resolution": resolution,
+            "last_frame": last_frame,
+        }
+
+        # Negative prompt
+        if negative_prompt:
+            kwargs["negative_prompt"] = negative_prompt
+
+        # Create a generation object
+        operation = self.client.models.generate_videos(
+            model=model,
+            prompt=prompt,
+            image=first_frame,
+            config=types.GenerateVideosConfig(**kwargs),
+        )
+
+        # Polling
+        while not operation.done:
+            operation = self.client.operations.get(operation)
+            time.sleep(1)
+        
+        # Check for video error after polling completes
+        if getattr(operation, "error", None):
+            raise RuntimeError(f"Video generation failed: {operation.error}")
+
+        if operation.response is None:
+            raise RuntimeError("Video generation completed but no response was returned")
+
+        generated = getattr(operation.response, "generated_videos", None)
+        if not generated:
+            raise RuntimeError(
+                f"No videos were generated. operation.error={getattr(operation, 'error', None)}; "
+                f"operation_name={getattr(operation, 'name', None)}; response={operation.response}"
+            )
+        
+        # Extract the generated video
+        video = operation.response.generated_videos[0].video
+        if not video:
+            raise ValueError("Video was not generated.")
+
+        # If Video bytes are returned
+        if hasattr(video, 'video_bytes') and video.video_bytes:
+            return video.video_bytes
+
+        # Else download the file
+        downloaded = self.client.files.download(file=video)
+
+        if isinstance(downloaded, (bytes, bytearray)):
+            return bytes(downloaded)
+        
+        if hasattr(downloaded, "read"):
+            try:
+                downloaded.seek(0)
+            except Exception:
+                pass
+            return downloaded.read()
+        
+        if hasattr(downloaded, "bytes"):
+            return downloaded.bytes
+        
+        if hasattr(downloaded, "data"):
+            return downloaded.data
+        
+        if hasattr(downloaded, "content"):
+            return downloaded.content
+
+        raise ValueError("Error occured. Video cannot be generated...")
\ No newline at end of file
diff --git a/veo_video.py b/veo_video.py
new file mode 100644
index 0000000..9df9480
--- /dev/null
+++ b/veo_video.py
@@ -0,0 +1,266 @@
+from dotenv import load_dotenv
+load_dotenv()
+
+import os
+import time
+import argparse
+from pathlib import Path
+
+from tryon.api.veo import VeoAdapter
+
+
+# -------------------------------------------------------
+# CLI ARGUMENT PARSER
+# -------------------------------------------------------
+def build_parser():
+    parser = argparse.ArgumentParser(
+        description="Generate Videos using Google Veo Video Generation API",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+
+    # Provider selection
+    parser.add_argument(
+        "--provider",
+        type=str,
+        default="veo-3.1-generate-preview",
+        choices=[
+            "veo-3.1-generate-preview",
+            "veo-3.1-fast-generate-preview",
+            "veo-3.0-generate-001",
+            "veo-3.0-fast-generate-001",
+        ],
+        help="Veo model to use"
+    )
+
+    # Mode
+    parser.add_argument(
+        "--mode",
+        type=str,
+        default="text",
+        choices=["text", "image", "reference", "frames"],
+        help="Generation Mode: "
+             "'text' = text-to-video, "
+             "'image' = image-to-video, "
+             "'reference' = reference image guided video, "
+             "'frames' = first+last frame guided video"
+    )
+
+    # Prompt
+    parser.add_argument(
+        "-p", "--prompt",
+        type=str,
+        help="Prompt (required for all modes)"
+    )
+
+    # Duration
+    parser.add_argument(
+        "--duration",
+        type=str,
+        default="8",
+        choices=["4", "6", "8"],
+        help="Video duration (seconds)"
+    )
+
+    # Aspect Ratio
+    parser.add_argument(
+        "--aspect",
+        type=str,
+        default="16:9",
+        choices=["16:9", "9:16"],
+        help="Aspect Ratio"
+    )
+
+    # Resolution
+    parser.add_argument(
+        "--resolution",
+        type=str,
+        default="720p",
+        choices=["720p", "1080p"],
+        help="Resolution"
+    )
+
+    # Negative Prompt
+    parser.add_argument(
+        "--negative_prompt",
+        type=str,
+        default=None,
+        help="Optional negative prompt"
+    )
+
+    # Images (Single / Multiple)
+    parser.add_argument(
+        "--images",
+        type=str,
+        nargs="+",
+        help="Images for video generation"
+    )
+
+    # Start Image
+    parser.add_argument(
+        "--start_image",
+        type=str,
+        help="Start image for frames mode"
+    )
+
+    # End Image
+    parser.add_argument(
+        "--end_image",
+        type=str,
+        help="End image for frames mode"
+    )
+
+    # Output Directory
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default="outputs",
+        help="Folder to save generated videos"
+    )
+
+    return parser
+
+
+# -------------------------------------------------------
+# MAIN
+# -------------------------------------------------------
+def main():
+    parser = build_parser()
+    args = parser.parse_args()
+
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    api_key = os.environ.get("GEMINI_API_KEY")
+    if not api_key:
+        raise ValueError("GEMINI_API_KEY must be set in environment")
+
+    # -------- Validation --------
+    if not args.prompt:
+        raise ValueError("--prompt is required for all modes")
+
+    # Validate resolution for Veo 3.1
+    if args.provider in {"veo-3.1-generate-preview", "veo-3.1-fast-generate-preview"}:
+        if args.resolution == "1080p" and args.duration != "8":
+            raise ValueError("1080p supports ONLY 8s duration in Veo 3.1")
+
+    # ---------- Mode: TEXT ----------
+    if args.mode == "text":
+        pass  # only prompt required
+
+
+    # ---------- Mode: IMAGE ----------
+    if args.mode == "image":
+        if not args.images:
+            raise ValueError("image mode requires --images")
+        
+        if len(args.images) != 1:
+            raise ValueError("image mode requires exactly ONE image")
+
+        if not (args.images[0].startswith("http") or os.path.exists(args.images[0])):
+            raise ValueError(f"Image not found: {args.images[0]}")
+
+
+    # ---------- Mode: REFERENCE ----------
+    if args.mode == "reference":
+        if args.provider in {"veo-3.0-generate-001", "veo-3.0-fast-generate-001"}:
+            raise ValueError("Reference images supported ONLY in Veo 3.1 models")
+
+        if not args.images:
+            raise ValueError("reference mode requires --images")
+
+        if len(args.images) > 3:
+            raise ValueError("Maximum 3 reference images supported")
+
+        if args.duration != "8":
+            raise ValueError("Reference video requires duration = 8s")
+
+        if args.aspect != "16:9":
+            raise ValueError("Reference videos only support 16:9")
+
+        for r in args.images:
+            if not (r.startswith("http") or os.path.exists(r)):
+                raise ValueError(f"Reference image not found: {r}")
+
+
+    # ---------- Mode: FRAMES ----------
+    if args.mode == "frames":
+        if args.provider in {"veo-3.0-generate-001", "veo-3.0-fast-generate-001"}:
+            raise ValueError("Frames mode supported ONLY in Veo 3.1 models")
+        
+        if args.images:
+            raise ValueError("--images is not supported for frames mode. Use --start_image and --end_image")
+
+        if args.duration != "8":
+            raise ValueError("Frames mode requires duration = 8s")
+
+        if not args.start_image or not args.end_image:
+            raise ValueError("frames mode requires BOTH --start_image AND --end_image")
+
+        if not (args.start_image.startswith("http") or os.path.exists(args.start_image)):
+            raise ValueError(f"Start image not found: {args.start_image}")
+
+        if not (args.end_image.startswith("http") or os.path.exists(args.end_image)):
+            raise ValueError(f"End image not found: {args.end_image}")
+
+
+    # -------- Initialize Adapter --------
+    adapter = VeoAdapter(api_key=api_key)
+
+    # -------- Dispatch --------
+    if args.mode == "text":
+        video_bytes = adapter.generate_text_to_video(
+            prompt=args.prompt,
+            duration_seconds=args.duration,
+            aspect_ratio=args.aspect,
+            resolution=args.resolution,
+            model=args.provider,
+            negative_prompt=args.negative_prompt
+        )
+
+    elif args.mode == "image":
+        video_bytes = adapter.generate_image_to_video(
+            image=args.images[0],
+            prompt=args.prompt,
+            duration_seconds=args.duration,
+            aspect_ratio=args.aspect,
+            resolution=args.resolution,
+            model=args.provider,
+            negative_prompt=args.negative_prompt
+        )
+
+    elif args.mode == "reference":
+        video_bytes = adapter.generate_video_with_references(
+            prompt=args.prompt,
+            reference_images=args.images,
+            duration_seconds=args.duration,
+            aspect_ratio=args.aspect,
+            resolution=args.resolution,
+            model=args.provider,
+            negative_prompt=args.negative_prompt
+        )
+
+    elif args.mode == "frames":
+        video_bytes = adapter.generate_video_with_frames(
+            prompt=args.prompt,
+            first_image=args.start_image,
+            last_image=args.end_image,
+            duration_seconds=args.duration,
+            aspect_ratio=args.aspect,
+            resolution=args.resolution,
+            model=args.provider,
+            negative_prompt=args.negative_prompt
+        )
+
+    else:
+        raise ValueError("Invalid mode")
+
+    # -------- Save Output --------
+    name = f"{args.mode}_{int(time.time())}.mp4"
+    path = output_dir / name
+    path.write_bytes(video_bytes)
+
+    print(f"\n✓ Saved: {path}\n")
+    return 0
+
+if __name__ == "__main__":
+    main()