From c3c68ae63511f2dcd5c9721831e2b61398046e8f Mon Sep 17 00:00:00 2001
From: Lee Penkman <leepenkman@gmail.com>
Date: Sat, 30 Aug 2025 13:53:20 +1200
Subject: [PATCH 1/2] windows hdm

---
 main.py | 573 ++++++++++++++++++++++++++++++--------------------------
 1 file changed, 309 insertions(+), 264 deletions(-)

diff --git a/main.py b/main.py
index c5f68ee..849a460 100644
--- a/main.py
+++ b/main.py
@@ -19,18 +19,16 @@
 from PIL import Image
 from diffusers import (
     DiffusionPipeline,
-    StableDiffusionXLInpaintPipeline,
+    # StableDiffusionXLInpaintPipeline,  # Commented out - using HDM only
     UNet2DConditionModel,
     LCMScheduler,
-    StableDiffusionInpaintPipeline,
-    StableDiffusionImg2ImgPipeline,
+    # StableDiffusionInpaintPipeline,  # Commented out - using HDM only
+    # StableDiffusionImg2ImgPipeline,  # Commented out - using HDM only
     KDPM2AncestralDiscreteScheduler,
-    StableDiffusionXLImg2ImgPipeline,
-    ControlNetModel,
-    StableDiffusionXLControlNetPipeline,
-    AutoPipelineForImage2Image,
-    FluxPipeline,
-    FluxControlNetPipeline,
+    # StableDiffusionXLImg2ImgPipeline,  # Commented out - using HDM only
+    # ControlNetModel,  # Commented out - style transfer disabled
+    # StableDiffusionXLControlNetPipeline,  # Commented out - style transfer disabled
+    # AutoPipelineForImage2Image,  # Commented out - using HDM only
 )
 from diffusers.utils import load_image
 from fastapi import FastAPI
@@ -59,9 +57,11 @@
 except Exception as e:
     logger.error(f"Error importing pillow_avif: {e}")
 
+# All SDXL models commented out - using HDM only
 # model_name = "models/SSD-1B"
-model_name = "models/ProteusV0.2"
+# model_name = "models/ProteusV0.2"
 # model_name = "dataautogpt3/ProteusV0.2"
+USE_HDM = True  # Flag to use HDM pipeline
 # try:
 #     unet = UNet2DConditionModel.from_pretrained(
 #         "models/lcm-ssd-1b", torch_dtype=torch.float16, variant="fp16"
@@ -71,68 +71,87 @@
 #         "latent-consistency/lcm-ssd-1b", torch_dtype=torch.float16, variant="fp16"
 #     )
 
+# Legacy SDXL pipeline - commented out for HDM
+# try:
+#     pipe = DiffusionPipeline.from_pretrained(
+#         model_name, torch_dtype=torch.float16, variant="fp16"
+#     )
+# except OSError as e:
+#     pipe = DiffusionPipeline.from_pretrained(
+#         "dataautogpt3/ProteusV0.2", torch_dtype=torch.float16, variant="fp16"
+#     )
+# 
+# old_scheduler = pipe.scheduler
+# pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
+
+# Create dummy objects for compatibility
+pipe = None
+old_scheduler = None
+
+# LCM LoRA loading commented out - not needed for HDM
+# if os.getenv("LOAD_LCM_LORA", "0") == "1":
+#     if os.path.exists("models/lcm-lora-sdxl"):
+#         pipe.load_lora_weights("models/lcm-lora-sdxl", adapter_name="lcm")
+#     else:
+#         pipe.load_lora_weights(
+#             "latent-consistency/lcm-lora-sdxl", adapter_name="lcm"
+#         )
+#     pipe.set_adapters(["lcm"], adapter_weights=[1.0])
+
+# Load HDM pipeline for efficient text-to-image
 try:
-    # pipe = DiffusionPipeline.from_pretrained(
-    #     "models/SSD-1B", unet=unet, torch_dtype=torch.float16, variant="fp16"
-    # )
-    pipe = DiffusionPipeline.from_pretrained(
-        model_name, torch_dtype=torch.float16, variant="fp16"
-    )
-except OSError as e:
-    # pipe = DiffusionPipeline.from_pretrained(
-    #     "segmind/SSD-1B", unet=unet, torch_dtype=torch.float16, variant="fp16"
-    # )
-    pipe = DiffusionPipeline.from_pretrained(
-        "dataautogpt3/ProteusV0.2", torch_dtype=torch.float16, variant="fp16"
-    )
-
-old_scheduler = pipe.scheduler
-pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
-
-if os.getenv("LOAD_LCM_LORA", "0") == "1":
-    if os.path.exists("models/lcm-lora-sdxl"):
-        pipe.load_lora_weights("models/lcm-lora-sdxl", adapter_name="lcm")
-    else:
-        pipe.load_lora_weights(
-            "latent-consistency/lcm-lora-sdxl", adapter_name="lcm"
+    import sys
+    sys.path.append('../HDM/src')
+    import xut.env
+    
+    # Configure XUT optimizations
+    xut.env.TORCH_COMPILE = True
+    xut.env.USE_LIGER = False
+    xut.env.USE_VANILLA = False
+    xut.env.USE_XFORMERS = True
+    xut.env.USE_XFORMERS_LAYERS = True
+    
+    from hdm.pipeline import HDMXUTPipeline
+    
+    torch.set_float32_matmul_precision("high")
+    hdm_pipe = (
+        HDMXUTPipeline.from_pretrained(
+            "KBlueLeaf/HDM-xut-340M-anime", 
+            trust_remote_code=True
         )
-    pipe.set_adapters(["lcm"], adapter_weights=[1.0])
-
-# Load Flux Schnell pipeline for efficient text-to-image
-flux_pipe = FluxPipeline.from_pretrained(
-    "black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16
-)
-flux_pipe.enable_model_cpu_offload()
-try:
-    from dfloat11 import DFloat11Model
-    dfloat_path = os.getenv("DF11_MODEL_PATH", "DFloat11/FLUX.1-schnell-DF11")
-    DFloat11Model.from_pretrained(
-        dfloat_path,
-        device="cpu",
-        bfloat16_model=flux_pipe.transformer,
+        .to("cuda:0" if torch.cuda.is_available() else "cpu")
+        .to(torch.float16 if torch.cuda.is_available() else torch.float32)
     )
+    logger.info("HDM pipeline loaded successfully")
 except Exception as e:
-    logger.error(f"Failed to load DFloat11 weights: {e}")
-
-try:
-    flux_controlnet = ControlNetModel.from_pretrained(
-        "black-forest-labs/flux-controlnet-canny", torch_dtype=torch.bfloat16
+    logger.error(f"Failed to load HDM pipeline: {e}")
+    # Fallback to original Flux implementation
+    from diffusers import FluxPipeline
+    hdm_pipe = FluxPipeline.from_pretrained(
+        "black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16
     )
-    flux_controlnetpipe = FluxControlNetPipeline(
-        controlnet=flux_controlnet, **flux_pipe.components
-    )
-    flux_controlnetpipe.enable_model_cpu_offload()
-    try:
-        lora_path = os.getenv(
-            "CONTROLNET_LORA", "black-forest-labs/flux-controlnet-line-lora"
-        )
-        flux_controlnetpipe.load_lora_weights(lora_path, adapter_name="line")
-        flux_controlnetpipe.set_adapters(["line"], adapter_weights=[1.0])
-    except Exception as e:
-        logger.error(f"Failed to load ControlNet LoRA: {e}")
-except Exception as e:
-    logger.error(f"Failed to load Flux ControlNet: {e}")
-    flux_controlnetpipe = None
+    hdm_pipe.enable_model_cpu_offload()
+
+# Comment out ControlNet for now as requested
+# try:
+#     flux_controlnet = ControlNetModel.from_pretrained(
+#         "black-forest-labs/flux-controlnet-canny", torch_dtype=torch.bfloat16
+#     )
+#     flux_controlnetpipe = FluxControlNetPipeline(
+#         controlnet=flux_controlnet, **flux_pipe.components
+#     )
+#     flux_controlnetpipe.enable_model_cpu_offload()
+#     try:
+#         lora_path = os.getenv(
+#             "CONTROLNET_LORA", "black-forest-labs/flux-controlnet-line-lora"
+#         )
+#         flux_controlnetpipe.load_lora_weights(lora_path, adapter_name="line")
+#         flux_controlnetpipe.set_adapters(["line"], adapter_weights=[1.0])
+#     except Exception as e:
+#         logger.error(f"Failed to load ControlNet LoRA: {e}")
+# except Exception as e:
+#     logger.error(f"Failed to load Flux ControlNet: {e}")
+flux_controlnetpipe = None
 
 
 # quantizing
@@ -166,33 +185,36 @@
 # freeze(unet)
 # pipe.unet = unet
 
-pipe.enable_model_cpu_offload()
-pipe.enable_sequential_cpu_offload()
-
-# mem efficient
-pipe.enable_attention_slicing()
-pipe.enable_vae_slicing()
-
-# pipe.to("cuda")
-
-all_components = pipe.components
+# Pipe optimizations commented out - using HDM instead
+# pipe.enable_model_cpu_offload()
+# pipe.enable_sequential_cpu_offload()
+# 
+# # mem efficient
+# pipe.enable_attention_slicing()
+# pipe.enable_vae_slicing()
+# 
+# # pipe.to("cuda")
+# 
+# all_components = pipe.components
+all_components = None
 # all_components.pop("scheduler")
 # all_components.pop("text_encoder")
 # all_components.pop("text_encoder_2")
 # all_components.pop("tokenizer")
 # all_components.pop("tokenizer_2")
 
-img2img = AutoPipelineForImage2Image.from_pipe(pipe)
-img2img.watermark = None
-
-
-# mem efficient
-img2img.enable_attention_slicing()
-img2img.enable_vae_slicing()
-# img2img.to("cuda")
-# img2img.enable_xformers_memory_efficient_attention()
-img2img.enable_model_cpu_offload()
-img2img.enable_sequential_cpu_offload()
+# img2img pipeline commented out - using HDM instead
+# img2img = AutoPipelineForImage2Image.from_pipe(pipe)
+# img2img.watermark = None
+# 
+# # mem efficient
+# img2img.enable_attention_slicing()
+# img2img.enable_vae_slicing()
+# # img2img.to("cuda")
+# # img2img.enable_xformers_memory_efficient_attention()
+# img2img.enable_model_cpu_offload()
+# img2img.enable_sequential_cpu_offload()
+img2img = None
 
 # # Quantize and freeze the text_encoder
 # text_encoder = img2img.text_encoder
@@ -213,7 +235,7 @@
 #     variant="fp16",
 #     # safety_checker=None,
 # )  # todo try torch_dtype=float16
-pipe.watermark = None
+# pipe.watermark = None  # Commented out since pipe is now None
 
 
 # deepcache
@@ -229,18 +251,17 @@
 # tomesd.apply_patch(pipe, ratio=0.2)  # light speedup
 
 
-refiner = DiffusionPipeline.from_pretrained(
-    # "stabilityai/stable-diffusion-xl-refiner-1.0",
-    # "dataautogpt3/OpenDalle",
-    model_name,
-    # "models/SSD-1B",
-    unet=pipe.unet,
-    text_encoder_2=pipe.text_encoder_2,
-    vae=pipe.vae,
-    torch_dtype=torch.float16,  # safer to use bfloat?
-    use_safetensors=True,
-    variant="fp16",  # remember not to download the big model
-)
+# Comment out refiner pipeline
+# refiner = DiffusionPipeline.from_pretrained(
+#     model_name,
+#     unet=pipe.unet,
+#     text_encoder_2=pipe.text_encoder_2,
+#     vae=pipe.vae,
+#     torch_dtype=torch.float16,
+#     use_safetensors=True,
+#     variant="fp16",
+# )
+refiner = None
 
 # refiner = pipe  # same model in this case
 # refiner.scheduler = old_scheduler
@@ -248,54 +269,51 @@
 
 # refiner.schedu
 
-refiner.watermark = None
-# refiner.to("cuda")
-refiner.enable_model_cpu_offload()
-refiner.enable_sequential_cpu_offload()
-
-# {'scheduler', 'text_encoder', 'text_encoder_2', 'tokenizer', 'tokenizer_2', 'unet', 'vae'} can be passed in from existing model
-# inpaintpipe = StableDiffusionInpaintPipeline(**pipe.components)
-inpaintpipe = StableDiffusionXLInpaintPipeline.from_pretrained(
-    # "models/stable-diffusion-xl-base-1.0",
-    model_name,
-    torch_dtype=torch.float16,
-    variant="fp16",
-    use_safetensors=True,
-    scheduler=pipe.scheduler,
-    text_encoder=pipe.text_encoder,
-    text_encoder_2=pipe.text_encoder_2,
-    tokenizer=pipe.tokenizer,
-    tokenizer_2=pipe.tokenizer_2,
-    unet=pipe.unet,
-    vae=pipe.vae,
-    # load_connected_pipeline=
-)
-inpaintpipe.watermark = None
-# inpaintpipe.enable_model_cpu_offload()
-
-controlnet_conditioning_scale = 0.5  # recommended for good generalization
-controlnet = ControlNetModel.from_pretrained(
-    "diffusers/controlnet-canny-sdxl-1.0",
-    torch_dtype=torch.float16,
-    variant="fp16",
-)
-# controlnet.to("cuda")
+# Comment out refiner optimizations
+# refiner.watermark = None
+# refiner.enable_model_cpu_offload()
+# refiner.enable_sequential_cpu_offload()
 
-controlnetpipe = StableDiffusionXLControlNetPipeline.from_pretrained(
-    # "stabilityai/stable-diffusion-xl-base-1.0",
-    model_name,
-    controlnet=controlnet,
-    **pipe.components,
-)
-# controlnetpipe.to("cuda")
-controlnetpipe.watermark = None
+# Comment out inpainting pipeline
+# inpaintpipe = StableDiffusionXLInpaintPipeline.from_pretrained(
+#     model_name,
+#     torch_dtype=torch.float16,
+#     variant="fp16",
+#     use_safetensors=True,
+#     scheduler=pipe.scheduler,
+#     text_encoder=pipe.text_encoder,
+#     text_encoder_2=pipe.text_encoder_2,
+#     tokenizer=pipe.tokenizer,
+#     tokenizer_2=pipe.tokenizer_2,
+#     unet=pipe.unet,
+#     vae=pipe.vae,
+# )
+# # inpaintpipe.watermark = None  # Commented since inpaintpipe is None
+inpaintpipe = None
 
-# efficiency
-controlnetpipe.enable_model_cpu_offload()
-controlnetpipe.enable_sequential_cpu_offload()
+# Comment out ControlNet pipeline
+# controlnet_conditioning_scale = 0.5
+# controlnet = ControlNetModel.from_pretrained(
+#     "diffusers/controlnet-canny-sdxl-1.0",
+#     torch_dtype=torch.float16,
+#     variant="fp16",
+# )
+# 
+# controlnetpipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+#     model_name,
+#     controlnet=controlnet,
+#     **pipe.components,
+# )
+# controlnetpipe.watermark = None
+controlnet_conditioning_scale = None
+controlnet = None
+controlnetpipe = None
 
-controlnetpipe.enable_attention_slicing()
-controlnetpipe.enable_vae_slicing()
+# Comment out ControlNet optimizations
+# controlnetpipe.enable_model_cpu_offload()
+# controlnetpipe.enable_sequential_cpu_offload()
+# controlnetpipe.enable_attention_slicing()
+# controlnetpipe.enable_vae_slicing()
 
 # # Quantize and freeze the text_encoder
 # text_encoderz = controlnetpipe.text_encoder
@@ -342,24 +360,24 @@
 #     requires_aesthetics_score=False,
 # )
 # inpaintpipe.to("cuda")
-inpaintpipe.watermark = None
+# inpaintpipe.watermark = None  # Commented since inpaintpipe is None
 # inpaintpipe.register_to_config(requires_aesthetics_score=False)
 
-# todo do we need this?
-inpaint_refiner = StableDiffusionXLInpaintPipeline.from_pretrained(
-    # "stabilityai/stable-diffusion-xl-refiner-1.0",
-    model_name,
-    text_encoder_2=inpaintpipe.text_encoder_2,
-    vae=inpaintpipe.vae,
-    torch_dtype=torch.float16,
-    use_safetensors=True,
-    variant="fp16",
-    tokenizer_2=refiner.tokenizer_2,
-    tokenizer=refiner.tokenizer,
-    scheduler=refiner.scheduler,
-    text_encoder=refiner.text_encoder,
-    unet=refiner.unet,
-)
+# Comment out inpaint refiner
+# inpaint_refiner = StableDiffusionXLInpaintPipeline.from_pretrained(
+#     model_name,
+#     text_encoder_2=inpaintpipe.text_encoder_2,
+#     vae=inpaintpipe.vae,
+#     torch_dtype=torch.float16,
+#     use_safetensors=True,
+#     variant="fp16",
+#     tokenizer_2=refiner.tokenizer_2,
+#     tokenizer=refiner.tokenizer,
+#     scheduler=refiner.scheduler,
+#     text_encoder=refiner.text_encoder,
+#     unet=refiner.unet,
+# )
+inpaint_refiner = None
 # del inpaint_refiner.vae
 # del inpaint_refiner.text_encoder_2
 # del inpaint_refiner.text_encoder
@@ -388,7 +406,8 @@
 #     requires_aesthetics_score=False,
 # )
 # inpaint_refiner.to("cuda")
-inpaint_refiner.watermark = None
+# Comment out inpaint refiner watermark
+# inpaint_refiner.watermark = None
 # inpaint_refiner.register_to_config(requires_aesthetics_score=False)
 
 n_steps = 5
@@ -397,14 +416,9 @@
 use_refiner = False
 
 
-# efficiency
-
-# inpaintpipe.enable_model_cpu_offload()
-inpaint_refiner.enable_model_cpu_offload()
-inpaint_refiner.enable_sequential_cpu_offload()
-# pipe.enable_model_cpu_offload()
-# refiner.enable_model_cpu_offload()
-# img2img.enable_model_cpu_offload()
+# Comment out efficiency optimizations since we're using HDM
+# inpaint_refiner.enable_model_cpu_offload()
+# inpaint_refiner.enable_sequential_cpu_offload()
 
 
 # pipe.enable_xformers_memory_efficient_attention()
@@ -439,9 +453,9 @@
 # this can cause errors on some inputs so consider disabling it
 # pipe.unet = torch.compile(pipe.unet)
 # refiner.unet = torch.compile(refiner.unet)#, mode="reduce-overhead", fullgraph=True)
-# compile the inpainters - todo reuse the other unets? swap out the models for others/del them so they share models and can be swapped efficiently
-inpaintpipe.unet = pipe.unet
-inpaint_refiner.unet = refiner.unet
+# Comment out UNet sharing and compilation since we're using HDM
+# inpaintpipe.unet = pipe.unet
+# inpaint_refiner.unet = refiner.unet
 # inpaintpipe.unet = torch.compile(inpaintpipe.unet)
 # inpaint_refiner.unet = torch.compile(inpaint_refiner.unet)
 
@@ -490,9 +504,23 @@ def make_image(prompt: str, save_path: str = ""):
     if Path(save_path).exists():
         return FileResponse(save_path, media_type="image/png")
     with torch.inference_mode():
-        image = pipe(
-            prompt=prompt, num_inference_steps=n_steps, **extra_pipe_args
-        ).images[0]
+        # Use HDM instead of SDXL pipe
+        result = hdm_pipe(
+            prompts=[prompt],
+            negative_prompts="low quality, worst quality, blurry, bad anatomy",
+            width=1024,
+            height=1024,
+            cfg_scale=3.0,
+            num_inference_steps=24,
+            camera_param={
+                "zoom": 1.0,
+                "x_shift": 0.0,
+                "y_shift": 0.0,
+            },
+            tread_gamma1=0.0,
+            tread_gamma2=0.5,
+        )
+        image = result.images[0]
     if not save_path:
         save_path = f"images/{prompt}.png"
     image.save(save_path)
@@ -693,25 +721,25 @@ def style_transfer_image_from_prompt(
     generator = torch.Generator("cpu").manual_seed(0)
     for attempt in range(retries + 1):
         try:
-            if canny and flux_controlnetpipe:
-                image = flux_controlnetpipe(
-                    prompt=prompt,
-                    image=canny_image,
-                    num_inference_steps=n_steps,
-                    guidance_scale=0.0,
-                    generator=generator,
-                    max_sequence_length=256,
-                ).images[0]
-            else:
-                image = flux_pipe(
-                    prompt=prompt,
+            # Use HDM for style transfer instead of Flux
+            # Note: HDM doesn't have direct ControlNet support, so we'll use basic generation
+            with torch.inference_mode():
+                result = hdm_pipe(
+                    prompts=[prompt],
+                    negative_prompts="low quality, worst quality, blurry, bad anatomy",
                     width=input_pil.width,
                     height=input_pil.height,
-                    guidance_scale=0.0,
+                    cfg_scale=3.0,
                     num_inference_steps=n_steps,
-                    generator=generator,
-                    max_sequence_length=256,
-                ).images[0]
+                    camera_param={
+                        "zoom": 1.0,
+                        "x_shift": 0.0,
+                        "y_shift": 0.0,
+                    },
+                    tread_gamma1=0.0,
+                    tread_gamma2=0.5,
+                )
+                image = result.images[0]
             break
         except Exception as err:
             if attempt >= retries:
@@ -743,19 +771,20 @@ def style_transfer_image_from_prompt(
     #     # gc.collect()
 
     # add a refinement pass because the image is not always perfect/depending on the model if its not well tuned for LCM it might need more passes
-    if use_refiner:
-        lcm_scheduler = img2img.scheduler
-        img2img.scheduler = old_scheduler
-
-        image = img2img(
-            prompt=prompt,
-            image=image,
-            num_inference_steps=n_refiner_steps,
-            strength=strength,
-            **extra_refiner_pipe_args,
-        ).images[0]
-        # revert scheduler
-        img2img.scheduler = lcm_scheduler
+    # Comment out refiner since we're using HDM only
+    # if use_refiner:
+    #     lcm_scheduler = img2img.scheduler
+    #     img2img.scheduler = old_scheduler
+    # 
+    #     image = img2img(
+    #         prompt=prompt,
+    #         image=image,
+    #         num_inference_steps=n_refiner_steps,
+    #         strength=strength,
+    #         **extra_refiner_pipe_args,
+    #     ).images[0]
+    #     # revert scheduler
+    #     img2img.scheduler = lcm_scheduler
     if detect_too_bumpy(image):
         if retries <= 0:
             raise Exception(
@@ -775,34 +804,46 @@ def style_transfer_image_from_prompt(
 
 
 def create_image_from_prompt(
-    prompt, width, height, n_steps=5, extra_args=None, retries=3
+    prompt, width, height, n_steps=24, extra_args=None, retries=3
 ):
-    """Generate an image using the Flux Schnell pipeline with retries."""
+    """Generate an image using the HDM pipeline with retries."""
     if extra_args is None:
         extra_args = {}
 
+    # HDM works best with multiples of 64
     block_width = width - (width % 64)
     block_height = height - (height % 64)
     prompt = shorten_too_long_text(prompt)
-    generator = torch.Generator("cpu").manual_seed(extra_args.get("seed", 0))
+    
+    # HDM doesn't use the same generator setup as Flux
+    # seed = extra_args.get("seed", 0)
 
     for attempt in range(retries + 1):
         try:
-            image = flux_pipe(
-                prompt=prompt,
-                width=block_width,
-                height=block_height,
-                guidance_scale=0.0,
-                num_inference_steps=n_steps,
-                generator=generator,
-                max_sequence_length=256,
-            ).images[0]
+            # Use HDM pipeline instead of Flux
+            with torch.inference_mode():
+                result = hdm_pipe(
+                    prompts=[prompt],
+                    negative_prompts="low quality, worst quality, blurry, bad anatomy",
+                    width=block_width,
+                    height=block_height,
+                    cfg_scale=3.0,
+                    num_inference_steps=n_steps,
+                    camera_param={
+                        "zoom": 1.0,
+                        "x_shift": 0.0,
+                        "y_shift": 0.0,
+                    },
+                    tread_gamma1=0.0,
+                    tread_gamma2=0.5,
+                )
+                image = result.images[0]
             break
         except Exception as err:  # pragma: no cover - hardware/oom errors
             if attempt >= retries:
                 raise
             logger.warning(
-                f"Flux generation failed on attempt {attempt + 1}/{retries}: {err}"
+                f"HDM generation failed on attempt {attempt + 1}/{retries}: {err}"
             )
             if attempt == 0:
                 prompt = remove_stopwords(prompt)
@@ -860,45 +901,55 @@ def image_to_bytes(image):
 
 
 def inpaint_image_from_prompt(prompt, image_url: str, mask_url: str, retries=3):
-    prompt = shorten_too_long_text(prompt)
-    # image = pipe(guidance_scale=7,prompt=prompt).images[0]
-
-    init_image = load_image(image_url).convert("RGB")
-    mask_image = load_image(mask_url).convert("RGB")  # why rgb for a 1 channel mask?
-    # num_inference_steps = 75 # causes weird error ValueError: The combination of `original_steps x strength`: 50 x 1.0 is smaller than `num_inference_steps`: 75. Make sure to either reduce `num_inference_steps` to a value smaller than 50 or increase `strength` to a value higher than 1.5.
-    num_inference_steps = 40
-    high_noise_frac = 0.7
-
-    generator = torch.Generator("cpu").manual_seed(0)
-    for attempt in range(retries + 1):
-        try:
-            image = inpaintpipe(
-                prompt=prompt,
-                image=init_image,
-                mask_image=mask_image,
-                num_inference_steps=num_inference_steps,
-                denoising_start=high_noise_frac,
-                output_type="latent",
-            ).images[0]
-            break
-        except Exception as e:
-            if attempt >= retries:
-                traceback.print_exc()
-                raise
-            logger.warning(
-                f"Inpainting failed on attempt {attempt + 1}/{retries}: {e}"
-            )
-            prompt = remove_stopwords(prompt) if attempt == 0 else shorten_prompt_for_retry(prompt)
-            if not prompt:
-                raise e
-    if image != None:
-        image = inpaint_refiner(
-            prompt=prompt,
-            image=image,
-            mask_image=mask_image,
-            num_inference_steps=num_inference_steps,
-            denoising_start=high_noise_frac,
-        ).images[0]
+    """Inpainting function - currently disabled for HDM-only setup."""
+    # TODO: Implement HDM-based inpainting or use alternative approach
+    logger.warning("Inpainting is currently disabled in HDM-only mode")
+    
+    # touch progress.txt file - if we dont do this we get restarted by supervisor/other processes for reliability
+    with open("progress.txt", "w") as f:
+        current_time = datetime.now().strftime("%H:%M:%S")
+        f.write(f"{current_time}")
+    
+    # Return None for now since inpainting is not implemented
+    return None
+    
+    # # Original SDXL inpainting code commented out
+    # prompt = shorten_too_long_text(prompt)
+    # init_image = load_image(image_url).convert("RGB")
+    # mask_image = load_image(mask_url).convert("RGB")
+    # num_inference_steps = 40
+    # high_noise_frac = 0.7
+    # 
+    # generator = torch.Generator("cpu").manual_seed(0)
+    # for attempt in range(retries + 1):
+    #     try:
+    #         image = inpaintpipe(
+    #             prompt=prompt,
+    #             image=init_image,
+    #             mask_image=mask_image,
+    #             num_inference_steps=num_inference_steps,
+    #             denoising_start=high_noise_frac,
+    #             output_type="latent",
+    #         ).images[0]
+    #         break
+    #     except Exception as e:
+    #         if attempt >= retries:
+    #             traceback.print_exc()
+    #             raise
+    #         logger.warning(
+    #             f"Inpainting failed on attempt {attempt + 1}/{retries}: {e}"
+    #         )
+    #         prompt = remove_stopwords(prompt) if attempt == 0 else shorten_prompt_for_retry(prompt)
+    #         if not prompt:
+    #             raise e
+    # if image != None:
+    #     image = inpaint_refiner(
+    #         prompt=prompt,
+    #         image=image,
+    #         mask_image=mask_image,
+    #         num_inference_steps=num_inference_steps,
+    #         denoising_start=high_noise_frac,
+    #     ).images[0]
     # try:
     #     # gc.collect()
     #     torch.cuda.empty_cache()
@@ -911,10 +962,4 @@ def inpaint_image_from_prompt(prompt, image_url: str, mask_url: str, retries=3):
     #     os.system("/usr/bin/bash kill -SIGHUP `pgrep gunicorn`")
     #     os.system("kill -1 `pgrep gunicorn`")
 
-    # touch progress.txt file - if we dont do this we get restarted by supervisor/other processes for reliability
-    with open("progress.txt", "w") as f:
-        current_time = datetime.now().strftime("%H:%M:%S")
-        f.write(f"{current_time}")
-    return image_to_bytes(image)
-
 

From 6b4cd17990e34e5969a246c98974a727766a2611 Mon Sep 17 00:00:00 2001
From: Lee Penkman <leepenkman@gmail.com>
Date: Sat, 30 Aug 2025 14:18:31 +1200
Subject: [PATCH 2/2] fx

---
 create_scheduled_task.bat   |   27 +
 requirements-windows.txt    | 1001 +++++++++++++++++++++++++++++++++++
 start_server.bat            |   12 +
 start_server_production.bat |   23 +
 test_api.py                 |  110 ++++
 test_fast.py                |  109 ++++
 test_generation.py          |   58 ++
 test_hdm.py                 |   52 ++
 test_sdxl.py                |   64 +++
 test_simple.py              |   59 +++
 10 files changed, 1515 insertions(+)
 create mode 100644 create_scheduled_task.bat
 create mode 100644 requirements-windows.txt
 create mode 100644 start_server.bat
 create mode 100644 start_server_production.bat
 create mode 100644 test_api.py
 create mode 100644 test_fast.py
 create mode 100644 test_generation.py
 create mode 100644 test_hdm.py
 create mode 100644 test_sdxl.py
 create mode 100644 test_simple.py

diff --git a/create_scheduled_task.bat b/create_scheduled_task.bat
new file mode 100644
index 0000000..68be4bf
--- /dev/null
+++ b/create_scheduled_task.bat
@@ -0,0 +1,27 @@
+@echo off
+echo Creating Windows Task Scheduler task for Stable Diffusion Server...
+
+:: Create the task to run at system startup
+schtasks /create /tn "StableDiffusionServer" ^
+    /tr "D:\code\stable-diffusion-server\start_server.bat" ^
+    /sc onstart ^
+    /ru SYSTEM ^
+    /rl highest ^
+    /f
+
+:: Alternative: Run as current user at logon
+:: schtasks /create /tn "StableDiffusionServer" ^
+::     /tr "D:\code\stable-diffusion-server\start_server.bat" ^
+::     /sc onlogon ^
+::     /rl highest ^
+::     /f
+
+echo Task created successfully!
+echo.
+echo To manage the task:
+echo   Start:  schtasks /run /tn "StableDiffusionServer"
+echo   Stop:   schtasks /end /tn "StableDiffusionServer"
+echo   Delete: schtasks /delete /tn "StableDiffusionServer" /f
+echo   Query:  schtasks /query /tn "StableDiffusionServer"
+echo.
+pause
\ No newline at end of file
diff --git a/requirements-windows.txt b/requirements-windows.txt
new file mode 100644
index 0000000..62456ea
--- /dev/null
+++ b/requirements-windows.txt
@@ -0,0 +1,1001 @@
+# This file was autogenerated by uv via the following command:
+#    uv pip compile requirements.in -o requirements.txt
+absl-py==2.3.1
+    # via
+    #   fiddle
+    #   tensorboard
+accelerate==1.1.1
+    # via
+    #   -r requirements.in
+    #   dfloat11
+    #   peft
+aiofiles==24.1.0
+    # via gradio
+aiohappyeyeballs==2.4.3
+    # via aiohttp
+aiohttp==3.11.8
+    # via
+    #   fsspec
+    #   gradio
+aiosignal==1.3.1
+    # via aiohttp
+alembic==1.16.3
+    # via optuna
+altair==5.5.0
+    # via
+    #   gradio
+    #   streamlit
+annotated-types==0.7.0
+    # via
+    #   -r requirements.in
+    #   pydantic
+antlr4-python3-runtime==4.9.3
+    # via
+    #   hydra-core
+    #   omegaconf
+anyio==4.6.2.post1
+    # via
+    #   -r requirements.in
+    #   httpx
+    #   starlette
+asttokens==3.0.0
+    # via stack-data
+attrs==24.2.0
+    # via
+    #   aiohttp
+    #   jsonschema
+    #   referencing
+audioread==3.0.1
+    # via
+    #   lhotse
+    #   librosa
+bitsandbytes==0.45.3
+    # via nemo-toolkit
+blinker==1.9.0
+    # via streamlit
+boto3==1.39.3
+    # via -r requirements.in
+botocore==1.39.3
+    # via
+    #   boto3
+    #   s3transfer
+braceexpand==0.1.7
+    # via
+    #   nemo-toolkit
+    #   webdataset
+cachetools==5.5.0
+    # via
+    #   -r requirements.in
+    #   google-auth
+    #   streamlit
+certifi==2024.8.30
+    # via
+    #   -r requirements.in
+    #   httpcore
+    #   httpx
+    #   requests
+    #   sentry-sdk
+cffi==1.17.1
+    # via soundfile
+charset-normalizer==3.4.0
+    # via
+    #   -r requirements.in
+    #   requests
+click==8.1.7
+    # via
+    #   -r requirements.in
+    #   jiwer
+    #   lhotse
+    #   nltk
+    #   sacremoses
+    #   streamlit
+    #   typer
+    #   uvicorn
+    #   wandb
+cloudpickle==3.1.1
+    # via nemo-toolkit
+cmake==3.31.1
+    # via -r requirements.in
+colorlog==6.9.0
+    # via optuna
+contourpy==1.3.1
+    # via matplotlib
+cupy-cuda12x==13.4.1
+    # via dfloat11
+cycler==0.12.1
+    # via matplotlib
+cytoolz==1.0.1
+    # via lhotse
+datasets==3.6.0
+    # via nemo-toolkit
+decorator==5.2.1
+    # via
+    #   ipython
+    #   librosa
+deepcache==0.1.1
+    # via -r requirements.in
+dfloat11==0.2.0
+    # via -r requirements.in
+diffusers==0.31.0
+    # via
+    #   -r requirements.in
+    #   deepcache
+dill==0.3.8
+    # via
+    #   datasets
+    #   multiprocess
+diskcache==5.6.3
+    # via -r requirements.in
+distance==0.1.3
+    # via g2p-en
+docopt==0.6.2
+    # via
+    #   num2words
+    #   pyannote-metrics
+docstring-parser==0.16
+    # via google-cloud-aiplatform
+editdistance==0.8.1
+    # via nemo-toolkit
+einops==0.8.1
+    # via nemo-toolkit
+exceptiongroup==1.2.2
+    # via -r requirements.in
+executing==2.2.0
+    # via stack-data
+fastapi==0.115.5
+    # via
+    #   -r requirements.in
+    #   gradio
+fastrlock==0.8.3
+    # via cupy-cuda12x
+ffmpy==0.4.0
+    # via gradio
+fiddle==0.3.0
+    # via nemo-toolkit
+filelock==3.16.1
+    # via
+    #   -r requirements.in
+    #   datasets
+    #   diffusers
+    #   huggingface-hub
+    #   torch
+    #   transformers
+fonttools==4.55.0
+    # via matplotlib
+frozenlist==1.5.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2024.12.0
+    # via
+    #   -r requirements.in
+    #   datasets
+    #   gradio-client
+    #   huggingface-hub
+    #   lightning
+    #   nemo-toolkit
+    #   pytorch-lightning
+    #   torch
+future==1.0.0
+    # via pyloudnorm
+g2p-en==2.1.0
+    # via nemo-toolkit
+gitdb==4.0.11
+    # via gitpython
+gitpython==3.1.43
+    # via
+    #   streamlit
+    #   wandb
+google-api-core==2.23.0
+    # via
+    #   -r requirements.in
+    #   google-api-python-client
+    #   google-cloud-aiplatform
+    #   google-cloud-bigquery
+    #   google-cloud-core
+    #   google-cloud-datastore
+    #   google-cloud-ndb
+    #   google-cloud-resource-manager
+    #   google-cloud-storage
+google-api-python-client==2.154.0
+    # via -r requirements.in
+google-auth==2.36.0
+    # via
+    #   google-api-core
+    #   google-api-python-client
+    #   google-auth-httplib2
+    #   google-cloud-aiplatform
+    #   google-cloud-bigquery
+    #   google-cloud-core
+    #   google-cloud-datastore
+    #   google-cloud-resource-manager
+    #   google-cloud-storage
+google-auth-httplib2==0.2.0
+    # via google-api-python-client
+google-cloud-aiplatform==1.73.0
+    # via -r requirements.in
+google-cloud-bigquery==3.27.0
+    # via google-cloud-aiplatform
+google-cloud-core==2.4.1
+    # via
+    #   google-cloud-bigquery
+    #   google-cloud-datastore
+    #   google-cloud-storage
+google-cloud-datastore==2.20.1
+    # via google-cloud-ndb
+google-cloud-ndb==2.3.2
+    # via -r requirements.in
+google-cloud-resource-manager==1.13.1
+    # via google-cloud-aiplatform
+google-cloud-storage==2.18.2
+    # via
+    #   -r requirements.in
+    #   google-cloud-aiplatform
+google-crc32c==1.6.0
+    # via
+    #   google-cloud-storage
+    #   google-resumable-media
+google-resumable-media==2.7.2
+    # via
+    #   google-cloud-bigquery
+    #   google-cloud-storage
+googleapis-common-protos==1.66.0
+    # via
+    #   google-api-core
+    #   grpc-google-iam-v1
+    #   grpcio-status
+gradio==3.36.1
+    # via -r requirements.in
+gradio-client==1.5.0
+    # via gradio
+graphviz==0.21
+    # via fiddle
+greenlet==3.2.3
+    # via sqlalchemy
+grpc-google-iam-v1==0.13.1
+    # via google-cloud-resource-manager
+grpcio==1.68.0
+    # via
+    #   google-api-core
+    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
+    #   tensorboard
+grpcio-status==1.62.3
+    # via google-api-core
+gunicorn==23.0.0
+    # via -r requirements.in
+h11==0.14.0
+    # via
+    #   -r requirements.in
+    #   httpcore
+    #   uvicorn
+hf-xet==1.1.5
+    # via huggingface-hub
+httpcore==1.0.7
+    # via httpx
+httplib2==0.22.0
+    # via
+    #   google-api-python-client
+    #   google-auth-httplib2
+httpx==0.28.0
+    # via
+    #   gradio
+    #   gradio-client
+huggingface-hub==0.33.2
+    # via
+    #   -r requirements.in
+    #   accelerate
+    #   datasets
+    #   diffusers
+    #   gradio
+    #   gradio-client
+    #   nemo-toolkit
+    #   optimum-quanto
+    #   peft
+    #   tokenizers
+    #   transformers
+hydra-core==1.3.2
+    # via nemo-toolkit
+idna==3.10
+    # via
+    #   -r requirements.in
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+importlib-metadata==8.5.0
+    # via
+    #   -r requirements.in
+    #   diffusers
+inflect==7.5.0
+    # via
+    #   g2p-en
+    #   nemo-toolkit
+intervaltree==3.1.0
+    # via lhotse
+invisible-watermark==0.2.0
+    # via -r requirements.in
+ipython==9.4.0
+    # via mediapy
+ipython-pygments-lexers==1.1.1
+    # via ipython
+jedi==0.19.2
+    # via ipython
+jinja2==3.1.4
+    # via
+    #   -r requirements.in
+    #   altair
+    #   gradio
+    #   pydeck
+    #   torch
+jiwer==3.0.5
+    # via nemo-toolkit
+jmespath==1.0.1
+    # via
+    #   boto3
+    #   botocore
+joblib==1.4.2
+    # via
+    #   librosa
+    #   nltk
+    #   sacremoses
+    #   scikit-learn
+jsonschema==4.23.0
+    # via altair
+jsonschema-specifications==2024.10.1
+    # via jsonschema
+kaldi-python-io==1.2.2
+    # via nemo-toolkit
+kaldiio==2.18.1
+    # via nemo-toolkit
+kiwisolver==1.4.7
+    # via matplotlib
+lazy-loader==0.4
+    # via librosa
+levenshtein==0.27.1
+lhotse==1.30.3
+    # via nemo-toolkit
+libcst==1.8.2
+    # via fiddle
+librosa==0.11.0
+    # via nemo-toolkit
+lightning==2.4.0
+    # via nemo-toolkit
+lightning-utilities==0.14.3
+    # via
+    #   lightning
+    #   pytorch-lightning
+    #   torchmetrics
+lilcom==1.8.1
+    # via lhotse
+linkify-it-py==2.0.3
+    # via markdown-it-py
+lit==18.1.8
+    # via -r requirements.in
+llvmlite==0.44.0
+    # via numba
+loguru==0.7.2
+    # via
+    #   -r requirements.in
+mako==1.3.10
+    # via alembic
+markdown==3.8.2
+    # via tensorboard
+markdown-it-py==2.2.0
+    # via
+    #   gradio
+    #   mdit-py-plugins
+    #   rich
+markupsafe==3.0.2
+    # via
+    #   -r requirements.in
+    #   gradio
+    #   jinja2
+    #   mako
+    #   werkzeug
+marshmallow==4.0.0
+    # via nemo-toolkit
+matplotlib==3.9.2
+    # via
+    #   gradio
+    #   mediapy
+    #   pyannote-metrics
+matplotlib-inline==0.1.7
+    # via ipython
+mdit-py-plugins==0.3.3
+    # via gradio
+mdurl==0.1.2
+    # via markdown-it-py
+mediapy==1.1.6
+    # via nemo-toolkit
+more-itertools==10.7.0
+    # via inflect
+mpmath==1.3.0
+    # via
+    #   -r requirements.in
+    #   sympy
+msgpack==1.1.1
+    # via librosa
+multidict==6.1.0
+    # via
+    #   aiohttp
+    #   yarl
+multiprocess==0.70.16
+    # via datasets
+narwhals==1.14.3
+    # via altair
+nemo-toolkit==2.3.0
+    # via -r requirements.in
+networkx==3.4.2
+    # via
+    #   -r requirements.in
+    #   torch
+ninja==1.11.1.2
+    # via optimum-quanto
+nltk==3.9.1
+    # via
+    #   -r requirements.in
+    #   g2p-en
+num2words==0.5.14
+    # via nemo-toolkit
+numba==0.61.0
+    # via
+    #   librosa
+    #   nemo-toolkit
+    #   resampy
+numpy==1.26.4
+    # via
+    #   -r requirements.in
+    #   accelerate
+    #   bitsandbytes
+    #   contourpy
+    #   cupy-cuda12x
+    #   datasets
+    #   diffusers
+    #   g2p-en
+    #   gradio
+    #   invisible-watermark
+    #   kaldi-python-io
+    #   kaldiio
+    #   lhotse
+    #   librosa
+    #   lilcom
+    #   matplotlib
+    #   mediapy
+    #   nemo-toolkit
+    #   numba
+    #   onnx
+    #   opencv-python
+    #   optimum-quanto
+    #   optuna
+    #   pandas
+    #   peft
+    #   pyannote-core
+    #   pyannote-metrics
+    #   pydeck
+    #   pyloudnorm
+    #   pywavelets
+    #   resampy
+    #   scikit-learn
+    #   scipy
+    #   shapely
+    #   soundfile
+    #   sox
+    #   soxr
+    #   streamlit
+    #   tensorboard
+    #   torchmetrics
+    #   transformers
+    #   webdataset
+nvidia-cublas-cu12==12.1.3.1
+    # via
+    #   nvidia-cudnn-cu12
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cuda-cupti-cu12==12.1.105
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.1.105
+    # via torch
+nvidia-cuda-runtime-cu12==12.1.105
+    # via torch
+nvidia-cudnn-cu12==9.1.0.70
+    # via torch
+nvidia-cufft-cu12==11.0.2.54
+    # via torch
+nvidia-curand-cu12==10.3.2.106
+    # via torch
+nvidia-cusolver-cu12==11.4.5.107
+    # via torch
+nvidia-cusparse-cu12==12.1.0.106
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+    # via torch
+nvidia-nvjitlink-cu12==12.6.85
+    # via
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+nvidia-nvtx-cu12==12.1.105
+    # via torch
+omegaconf==2.3.0
+    # via
+    #   hydra-core
+    #   nemo-toolkit
+onnx==1.17.0
+    # via nemo-toolkit
+opencv-python==4.10.0.84
+    # via
+    #   -r requirements.in
+    #   invisible-watermark
+optimum-quanto==0.2.6
+    # via -r requirements.in
+optuna==4.4.0
+    # via nemo-toolkit
+orjson==3.10.12
+    # via gradio
+packaging==24.2
+    # via
+    #   -r requirements.in
+    #   accelerate
+    #   altair
+    #   datasets
+    #   google-cloud-aiplatform
+    #   google-cloud-bigquery
+    #   gradio-client
+    #   gunicorn
+    #   huggingface-hub
+    #   hydra-core
+    #   lazy-loader
+    #   lhotse
+    #   lightning
+    #   lightning-utilities
+    #   matplotlib
+    #   nemo-toolkit
+    #   optuna
+    #   peft
+    #   pooch
+    #   pytorch-lightning
+    #   streamlit
+    #   tensorboard
+    #   torchmetrics
+    #   transformers
+    #   wandb
+pandas==2.2.3
+    # via
+    #   datasets
+    #   gradio
+    #   nemo-toolkit
+    #   pyannote-database
+    #   pyannote-metrics
+    #   streamlit
+parso==0.8.4
+    # via jedi
+peft==0.13.2
+    # via
+    #   -r requirements.in
+    #   nemo-toolkit
+pexpect==4.9.0
+    # via ipython
+pillow==11.0.0
+    # via
+    #   -r requirements.in
+    #   diffusers
+    #   gradio
+    #   invisible-watermark
+    #   matplotlib
+    #   mediapy
+    #   streamlit
+pillow-avif-plugin==1.4.6
+    # via -r requirements.in
+plac==1.4.5
+platformdirs==4.3.8
+    # via
+    #   pooch
+    #   wandb
+pooch==1.8.2
+    # via librosa
+prompt-toolkit==3.0.51
+    # via ipython
+propcache==0.2.0
+    # via
+    #   aiohttp
+    #   yarl
+proto-plus==1.25.0
+    # via
+    #   google-api-core
+    #   google-cloud-aiplatform
+    #   google-cloud-datastore
+    #   google-cloud-resource-manager
+protobuf==4.24.4
+    # via
+    #   -r requirements.in
+    #   google-api-core
+    #   google-cloud-aiplatform
+    #   google-cloud-datastore
+    #   google-cloud-ndb
+    #   google-cloud-resource-manager
+    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
+    #   nemo-toolkit
+    #   onnx
+    #   proto-plus
+    #   streamlit
+    #   tensorboard
+    #   wandb
+psutil==6.1.0
+    # via
+    #   -r requirements.in
+    #   accelerate
+    #   peft
+ptyprocess==0.7.0
+    # via pexpect
+pure-eval==0.2.3
+    # via stack-data
+pyannote-core==5.0.0
+    # via
+    #   nemo-toolkit
+    #   pyannote-database
+    #   pyannote-metrics
+pyannote-database==5.1.3
+    # via pyannote-metrics
+pyannote-metrics==3.2.1
+    # via nemo-toolkit
+pyarrow==18.1.0
+    # via
+    #   datasets
+    #   streamlit
+pyasn1==0.6.1
+    # via
+    #   pyasn1-modules
+    #   rsa
+pyasn1-modules==0.4.1
+    # via google-auth
+pybind11==2.13.6
+pycparser==2.22
+    # via cffi
+pydantic==2.10.2
+    # via
+    #   -r requirements.in
+    #   fastapi
+    #   google-cloud-aiplatform
+    #   gradio
+    #   wandb
+pydantic-core==2.27.1
+    # via
+    #   -r requirements.in
+    #   pydantic
+pydeck==0.9.1
+    # via streamlit
+pydub==0.25.1
+    # via
+    #   gradio
+    #   nemo-toolkit
+pygments==2.18.0
+    # via
+    #   gradio
+    #   ipython
+    #   ipython-pygments-lexers
+    #   rich
+pyloudnorm==0.1.1
+    # via nemo-toolkit
+pymemcache==4.0.0
+    # via google-cloud-ndb
+pyparsing==3.2.0
+    # via
+    #   httplib2
+    #   matplotlib
+python-dateutil==2.9.0.post0
+    # via
+    #   botocore
+    #   google-cloud-bigquery
+    #   matplotlib
+    #   nemo-toolkit
+    #   pandas
+python-multipart==0.0.18
+    # via
+    #   -r requirements.in
+    #   gradio
+pytorch-lightning==2.5.2
+    # via lightning
+pytz==2024.2
+    # via
+    #   google-cloud-ndb
+    #   pandas
+pywavelets==1.7.0
+    # via
+    #   -r requirements.in
+    #   invisible-watermark
+pyyaml==6.0.2
+    # via
+    #   -r requirements.in
+    #   accelerate
+    #   datasets
+    #   gradio
+    #   huggingface-hub
+    #   lhotse
+    #   libcst
+    #   lightning
+    #   omegaconf
+    #   optuna
+    #   peft
+    #   pyannote-database
+    #   pytorch-lightning
+    #   transformers
+    #   wandb
+    #   webdataset
+rapidfuzz==3.13.0
+    # via
+    #   jiwer
+    #   levenshtein
+redis==5.2.0
+    # via google-cloud-ndb
+referencing==0.35.1
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+regex==2024.11.6
+    # via
+    #   -r requirements.in
+    #   diffusers
+    #   nltk
+    #   sacremoses
+    #   transformers
+requests==2.32.3
+    # via
+    #   -r requirements.in
+    #   datasets
+    #   diffusers
+    #   google-api-core
+    #   google-cloud-bigquery
+    #   google-cloud-storage
+    #   gradio
+    #   huggingface-hub
+    #   pooch
+    #   streamlit
+    #   transformers
+    #   wandb
+resampy==0.4.3
+    # via nemo-toolkit
+rich==13.9.4
+    # via
+    #   streamlit
+    #   typer
+rpds-py==0.21.0
+    # via
+    #   jsonschema
+    #   referencing
+rsa==4.9
+    # via google-auth
+ruamel-yaml==0.18.14
+    # via nemo-toolkit
+ruamel-yaml-clib==0.2.12
+    # via ruamel-yaml
+s3transfer==0.13.0
+    # via boto3
+sacremoses==0.1.1
+    # via nemo-toolkit
+safetensors==0.4.5
+    # via
+    #   -r requirements.in
+    #   accelerate
+    #   dfloat11
+    #   diffusers
+    #   optimum-quanto
+    #   peft
+    #   transformers
+scikit-learn==1.7.0
+    # via
+    #   librosa
+    #   nemo-toolkit
+    #   pyannote-metrics
+scipy==1.16.0
+    # via
+    #   librosa
+    #   nemo-toolkit
+    #   pyannote-core
+    #   pyannote-metrics
+    #   pyloudnorm
+    #   scikit-learn
+semantic-version==2.10.0
+    # via gradio
+sentencepiece==0.2.0
+    # via nemo-toolkit
+sentry-sdk==2.32.0
+    # via wandb
+setuptools==75.6.0
+    # via
+    #   lightning-utilities
+    #   nemo-toolkit
+    #   tensorboard
+    #   torch
+shapely==2.0.6
+    # via google-cloud-aiplatform
+shellingham==1.5.4
+    # via typer
+six==1.16.0
+    # via
+    #   python-dateutil
+    #   tensorboard
+smmap==5.0.1
+    # via gitdb
+sniffio==1.3.1
+    # via
+    #   -r requirements.in
+    #   anyio
+sortedcontainers==2.4.0
+    # via
+    #   intervaltree
+    #   pyannote-core
+soundfile==0.13.1
+    # via
+    #   lhotse
+    #   librosa
+    #   nemo-toolkit
+sox==1.5.0
+    # via nemo-toolkit
+soxr==0.5.0.post1
+    # via librosa
+sqlalchemy==2.0.41
+    # via
+    #   alembic
+    #   optuna
+stack-data==0.6.3
+    # via ipython
+starlette==0.41.3
+    # via
+    #   -r requirements.in
+    #   fastapi
+streamlit==1.40.2
+    # via -r requirements.in
+sympy==1.13.3
+    # via
+    #   -r requirements.in
+    #   pyannote-metrics
+    #   torch
+tabulate==0.9.0
+    # via
+    #   lhotse
+    #   pyannote-metrics
+tenacity==9.0.0
+    # via streamlit
+tensorboard==2.19.0
+    # via nemo-toolkit
+tensorboard-data-server==0.7.2
+    # via tensorboard
+termcolor==3.1.0
+text-unidecode==1.3
+    # via nemo-toolkit
+    # via nemo-toolkit
+threadpoolctl==3.6.0
+    # via scikit-learn
+tokenizers==0.21.2
+    # via
+    #   -r requirements.in
+    #   transformers
+tomesd==0.1.3
+    # via -r requirements.in
+toml==0.10.2
+    # via streamlit
+toolz==1.0.0
+    # via cytoolz
+torch==2.4.1
+    # via
+    #   -r requirements.in
+    #   accelerate
+    #   bitsandbytes
+    #   deepcache
+    #   invisible-watermark
+    #   lhotse
+    #   lightning
+    #   nemo-toolkit
+    #   optimum-quanto
+    #   peft
+    #   pytorch-lightning
+    #   tomesd
+    #   torchmetrics
+torchmetrics==1.7.4
+    # via
+    #   lightning
+    #   nemo-toolkit
+    #   pytorch-lightning
+tornado==6.4.2
+    # via streamlit
+tqdm==4.67.1
+    # via
+    #   -r requirements.in
+    #   datasets
+    #   dfloat11
+    #   huggingface-hub
+    #   lhotse
+    #   lightning
+    #   nemo-toolkit
+    #   nltk
+    #   optuna
+    #   peft
+    #   pytorch-lightning
+    #   sacremoses
+    #   transformers
+traitlets==5.14.3
+    # via
+    #   ipython
+    #   matplotlib-inline
+transformers==4.53.1
+    # via
+    #   -r requirements.in
+    #   deepcache
+    #   dfloat11
+    #   nemo-toolkit
+    #   peft
+    # via torch
+typeguard==4.4.2
+    # via inflect
+typer==0.16.0
+    # via pyannote-database
+typing-extensions==4.12.2
+    # via
+    #   -r requirements.in
+    #   alembic
+    #   altair
+    #   fastapi
+    #   fiddle
+    #   gradio-client
+    #   huggingface-hub
+    #   librosa
+    #   lightning
+    #   lightning-utilities
+    #   pyannote-core
+    #   pydantic
+    #   pydantic-core
+    #   pytorch-lightning
+    #   sox
+    #   sqlalchemy
+    #   streamlit
+    #   torch
+    #   typeguard
+    #   typer
+    #   wandb
+tzdata==2024.2
+    # via pandas
+uc-micro-py==1.0.3
+    # via linkify-it-py
+uritemplate==4.1.1
+    # via google-api-python-client
+urllib3==2.2.3
+    # via
+    #   -r requirements.in
+    #   botocore
+    #   requests
+    #   sentry-sdk
+uvicorn==0.32.1
+    # via
+    #   -r requirements.in
+    #   gradio
+wandb==0.21.0
+    # via nemo-toolkit
+watchdog==6.0.0
+    # via streamlit
+wcwidth==0.2.13
+    # via prompt-toolkit
+webdataset==1.0.2
+    # via nemo-toolkit
+websockets==12.0
+    # via
+    #   gradio
+    #   gradio-client
+werkzeug==3.1.3
+    # via tensorboard
+wget==3.2
+    # via nemo-toolkit
+wrapt==1.17.2
+    # via nemo-toolkit
+    # via -r requirements.in
+xxhash==3.5.0
+    # via datasets
+yarl==1.18.0
+    # via aiohttp
+zipp==3.21.0
+    # via
+    #   -r requirements.in
+    #   importlib-metadata
diff --git a/start_server.bat b/start_server.bat
new file mode 100644
index 0000000..6a0de63
--- /dev/null
+++ b/start_server.bat
@@ -0,0 +1,12 @@
+@echo off
+echo Starting Stable Diffusion Server...
+cd /d D:\code\stable-diffusion-server
+
+echo Activating virtual environment...
+call .venv\Scripts\activate.bat
+
+echo Starting server with uvicorn...
+uvicorn main:app --port 8000 --timeout-keep-alive 600 --workers 1 --backlog 1 --limit-concurrency 4
+
+echo Server stopped.
+pause
\ No newline at end of file
diff --git a/start_server_production.bat b/start_server_production.bat
new file mode 100644
index 0000000..aa03be1
--- /dev/null
+++ b/start_server_production.bat
@@ -0,0 +1,23 @@
+@echo off
+echo Starting Stable Diffusion Server (Production)...
+cd /d D:\code\stable-diffusion-server
+
+:: Set environment variables
+if exist "secrets\google-credentials.json" (
+    set GOOGLE_APPLICATION_CREDENTIALS=secrets\google-credentials.json
+    echo Google Cloud credentials set.
+)
+
+:: Optional: Set model paths
+:: set DF11_MODEL_PATH=DFloat11/FLUX.1-schnell-DF11
+:: set CONTROLNET_LORA=black-forest-labs/flux-controlnet-line-lora
+:: set LOAD_LCM_LORA=1
+
+echo Activating virtual environment...
+call .venv\Scripts\activate.bat
+
+echo Starting production server with gunicorn...
+gunicorn -k uvicorn.workers.UvicornWorker -b :8000 main:app --timeout 600 -w 1
+
+echo Server stopped.
+pause
\ No newline at end of file
diff --git a/test_api.py b/test_api.py
new file mode 100644
index 0000000..46e3789
--- /dev/null
+++ b/test_api.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+"""Test the API endpoints with multiple images."""
+
+import requests
+import time
+import json
+from urllib.parse import urlencode
+
+def test_make_image(prompt, save_name):
+    """Test the /make_image endpoint."""
+    print(f"\n[IMAGE] Generating: {prompt}")
+    
+    # URL encode the parameters
+    params = {
+        "prompt": prompt,
+        "width": 512,
+        "height": 512
+    }
+    
+    url = f"http://localhost:8000/make_image?{urlencode(params)}"
+    
+    start_time = time.time()
+    try:
+        response = requests.get(url, timeout=300)
+        elapsed = time.time() - start_time
+        
+        if response.status_code == 200:
+            # Save the image
+            with open(save_name, 'wb') as f:
+                f.write(response.content)
+            print(f"[OK] Saved to {save_name} ({elapsed:.1f}s)")
+            return True
+        else:
+            print(f"[ERROR] Status {response.status_code}: {response.text}")
+            return False
+    except Exception as e:
+        print(f"[FAILED] {e}")
+        return False
+
+def test_create_and_upload(prompt, save_path):
+    """Test the /create_and_upload_image endpoint."""
+    print(f"\n[UPLOAD] Creating and uploading: {prompt}")
+    
+    params = {
+        "prompt": prompt,
+        "save_path": save_path,
+        "width": 512,
+        "height": 512
+    }
+    
+    url = f"http://localhost:8000/create_and_upload_image?{urlencode(params)}"
+    
+    start_time = time.time()
+    try:
+        response = requests.get(url, timeout=300)
+        elapsed = time.time() - start_time
+        
+        if response.status_code == 200:
+            result = response.json()
+            print(f"[OK] Response: {result} ({elapsed:.1f}s)")
+            return True
+        else:
+            print(f"[WARNING] Status {response.status_code}: {response.text}")
+            # Try to save locally anyway if it's a cloud storage error
+            return False
+    except Exception as e:
+        print(f"[FAILED] {e}")
+        return False
+
+def main():
+    """Test multiple images with different prompts."""
+    
+    # Test prompts
+    test_cases = [
+        ("a cute robot playing guitar in a cyberpunk city", "robot_guitar.png"),
+        ("magical forest with glowing mushrooms at night", "magic_forest.png"),
+        ("steampunk airship flying above victorian london", "steampunk_airship.png"),
+        ("astronaut riding a horse on mars", "astronaut_mars.png"),
+        ("japanese temple in cherry blossom season, anime style", "temple_sakura.png")
+    ]
+    
+    print("=" * 60)
+    print("Testing Stable Diffusion Server API")
+    print("=" * 60)
+    
+    # Test /make_image endpoint
+    print("\n[TEST] Testing /make_image endpoint...")
+    successful = 0
+    for prompt, filename in test_cases[:3]:  # Test first 3 with make_image
+        if test_make_image(prompt, filename):
+            successful += 1
+    
+    print(f"\n[RESULT] /make_image: {successful}/3 successful")
+    
+    # Test /create_and_upload_image endpoint  
+    print("\n[TEST] Testing /create_and_upload_image endpoint...")
+    successful_upload = 0
+    for prompt, filename in test_cases[3:]:  # Test last 2 with create_and_upload
+        save_path = f"test_uploads/{filename.replace('.png', '.webp')}"
+        if test_create_and_upload(prompt, save_path):
+            successful_upload += 1
+    
+    print(f"\n[RESULT] /create_and_upload_image: {successful_upload}/2 attempted")
+    
+    print("\n" + "=" * 60)
+    print("Testing complete! Check the generated images.")
+    print("=" * 60)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/test_fast.py b/test_fast.py
new file mode 100644
index 0000000..9f3dcb7
--- /dev/null
+++ b/test_fast.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+"""Fast generation test with optimizations for RTX 3070."""
+
+import torch
+from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
+import time
+from PIL import Image
+import os
+
+def generate_fast_image():
+    print("Loading optimized pipeline for speed...")
+    
+    # Use SDXL Turbo or Lightning for faster generation
+    model_id = "stabilityai/sdxl-turbo"  # Much faster variant
+    
+    pipe = DiffusionPipeline.from_pretrained(
+        model_id, 
+        torch_dtype=torch.float16,
+        variant="fp16",
+        use_safetensors=True
+    )
+    
+    # Move to GPU
+    pipe = pipe.to("cuda")
+    
+    # Enable optimizations
+    pipe.enable_xformers_memory_efficient_attention()  # Memory efficient attention
+    pipe.enable_vae_slicing()  # VAE slicing for memory
+    pipe.enable_vae_tiling()  # VAE tiling for large images
+    
+    # Compile with torch.compile for speed (PyTorch 2.0+)
+    # pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+    
+    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
+    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
+    
+    # Generate with turbo settings (1-4 steps only!)
+    prompt = "a majestic dragon flying over a medieval castle, fantasy art, highly detailed"
+    
+    print(f"\nGenerating image with TURBO settings...")
+    print(f"Prompt: {prompt}")
+    
+    # Warm up the model
+    print("Warming up GPU...")
+    with torch.no_grad():
+        _ = pipe(prompt="test", num_inference_steps=1, guidance_scale=0.0, height=512, width=512).images[0]
+    torch.cuda.synchronize()
+    
+    # Time the actual generation
+    start_time = time.time()
+    
+    with torch.no_grad():
+        # SDXL Turbo uses 1-4 steps with no CFG
+        image = pipe(
+            prompt=prompt,
+            num_inference_steps=1,  # Turbo mode: 1-4 steps only
+            guidance_scale=0.0,  # No CFG for turbo
+            height=512,
+            width=512
+        ).images[0]
+    
+    torch.cuda.synchronize()
+    generation_time = time.time() - start_time
+    
+    # Save the image
+    output_path = "test_fast.png"
+    image.save(output_path)
+    print(f"\n✅ Image saved to: {output_path}")
+    print(f"⚡ Generation time: {generation_time:.2f} seconds")
+    
+    # Also test with slightly more steps for quality
+    print("\nGenerating higher quality version (4 steps)...")
+    start_time = time.time()
+    
+    with torch.no_grad():
+        image_hq = pipe(
+            prompt=prompt,
+            num_inference_steps=4,  # Still very fast
+            guidance_scale=0.0,
+            height=768,
+            width=768
+        ).images[0]
+    
+    torch.cuda.synchronize()
+    generation_time_hq = time.time() - start_time
+    
+    output_hq = "test_fast_hq.png"
+    image_hq.save(output_hq)
+    print(f"✅ HQ Image saved to: {output_hq}")
+    print(f"⚡ HQ Generation time: {generation_time_hq:.2f} seconds")
+    
+    return output_path
+
+if __name__ == "__main__":
+    try:
+        # Set memory fraction to avoid OOM
+        torch.cuda.set_per_process_memory_fraction(0.95)
+        
+        output_file = generate_fast_image()
+        print(f"\n🎯 Success! Check the generated images")
+        
+        # Print memory usage
+        print(f"\nGPU Memory Used: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
+        print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")
+        
+    except Exception as e:
+        print(f"\nError: {e}")
+        import traceback
+        traceback.print_exc()
\ No newline at end of file
diff --git a/test_generation.py b/test_generation.py
new file mode 100644
index 0000000..ee502bb
--- /dev/null
+++ b/test_generation.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+"""Simple test script to generate an image using the HDM pipeline."""
+
+import torch
+from hdmx import HDMXUTPipeline
+from PIL import Image
+import os
+
+def generate_test_image():
+    print("Loading HDM pipeline...")
+    
+    # Load the pipeline
+    pipe = HDMXUTPipeline.from_pretrained(
+        "hdmx/hdmx_composite3",
+        torch_dtype=torch.float16,
+        trust_remote_code=True
+    )
+    
+    # Move to GPU if available
+    if torch.cuda.is_available():
+        pipe = pipe.to("cuda")
+        print(f"Using GPU: {torch.cuda.get_device_name(0)}")
+    else:
+        print("Using CPU")
+    
+    # Generate image
+    prompt = "a beautiful fantasy landscape with mountains and a crystal clear lake at sunset, highly detailed, 4k"
+    print(f"\nGenerating image with prompt: {prompt}")
+    
+    with torch.no_grad():
+        image = pipe(
+            prompt=prompt,
+            num_inference_steps=20,
+            guidance_scale=7.5,
+            height=512,
+            width=512
+        ).images[0]
+    
+    # Save the image
+    output_path = "test_output.png"
+    image.save(output_path)
+    print(f"\nImage saved to: {output_path}")
+    
+    # Also save as webp for smaller size
+    output_webp = "test_output.webp"
+    image.save(output_webp, "WEBP", quality=90)
+    print(f"WebP version saved to: {output_webp}")
+    
+    return output_path
+
+if __name__ == "__main__":
+    try:
+        output_file = generate_test_image()
+        print(f"\n✅ Success! Generated image at: {os.path.abspath(output_file)}")
+    except Exception as e:
+        print(f"\n❌ Error: {e}")
+        import traceback
+        traceback.print_exc()
\ No newline at end of file
diff --git a/test_hdm.py b/test_hdm.py
new file mode 100644
index 0000000..539ea96
--- /dev/null
+++ b/test_hdm.py
@@ -0,0 +1,52 @@
+import torch
+import sys
+import os
+
+# Add HDM to path
+sys.path.insert(0, "D:\\code\\HDM\\src")
+
+print(f"Torch version: {torch.__version__}")
+print(f"CUDA available: {torch.cuda.is_available()}")
+if torch.cuda.is_available():
+    print(f"CUDA device: {torch.cuda.get_device_name(0)}")
+
+try:
+    # Import HDM
+    import xut
+    xut.env.USE_XFORMERS_LAYERS = True
+    from hdm.pipeline import HDMXUTPipeline
+    
+    print("Loading HDM pipeline...")
+    hdm_pipe = HDMXUTPipeline.from_pretrained(
+        "KBlueLeaf/HDM-xut-340M-anime", 
+        trust_remote_code=True
+    ).to("cuda:0" if torch.cuda.is_available() else "cpu")
+    
+    print("HDM pipeline loaded successfully!")
+    print(f"Pipeline device: {hdm_pipe.device}")
+    
+    # Try to generate an image
+    print("\nGenerating test image...")
+    with torch.inference_mode():
+        result = hdm_pipe(
+            prompts=["a simple test art"],
+            negative_prompts="low quality",
+            width=512,
+            height=512,
+            cfg_scale=3.0,
+            num_inference_steps=4,
+            camera_param={
+                "zoom": 1.0,
+                "x_shift": 0.0,
+                "y_shift": 0.0,
+            },
+        )
+    
+    print("Image generation successful!")
+    if result and len(result) > 0:
+        print(f"Generated {len(result)} images")
+        
+except Exception as e:
+    print(f"Error: {e}")
+    import traceback
+    traceback.print_exc()
\ No newline at end of file
diff --git a/test_sdxl.py b/test_sdxl.py
new file mode 100644
index 0000000..dc87528
--- /dev/null
+++ b/test_sdxl.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+"""Test script using Stable Diffusion XL model."""
+
+import torch
+from diffusers import DiffusionPipeline
+from PIL import Image
+import os
+
+def generate_test_image():
+    print("Loading Stable Diffusion XL pipeline...")
+    
+    # Load SDXL base model (public, no auth needed)
+    pipe = DiffusionPipeline.from_pretrained(
+        "stabilityai/stable-diffusion-xl-base-1.0", 
+        torch_dtype=torch.float16,
+        variant="fp16",
+        use_safetensors=True
+    )
+    
+    # Move to GPU
+    if torch.cuda.is_available():
+        pipe = pipe.to("cuda")
+        print(f"Using GPU: {torch.cuda.get_device_name(0)}")
+    else:
+        print("Using CPU")
+        pipe.enable_model_cpu_offload()
+    
+    # Generate image
+    prompt = "a beautiful fantasy landscape with mountains and a crystal clear lake at sunset, highly detailed, masterpiece, 4k"
+    negative_prompt = "ugly, blurry, low quality, distorted"
+    
+    print(f"\nGenerating image with prompt: {prompt}")
+    print("This may take a minute...")
+    
+    with torch.no_grad():
+        image = pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            num_inference_steps=30,
+            guidance_scale=7.5,
+            height=768,
+            width=768
+        ).images[0]
+    
+    # Save the image
+    output_path = "test_output.png"
+    image.save(output_path)
+    print(f"\nImage saved to: {output_path}")
+    
+    # Also save as webp for smaller size
+    output_webp = "test_output.webp"
+    image.save(output_webp, "WEBP", quality=90)
+    print(f"WebP version saved to: {output_webp}")
+    
+    return output_path
+
+if __name__ == "__main__":
+    try:
+        output_file = generate_test_image()
+        print(f"\nSuccess! Generated image at: {os.path.abspath(output_file)}")
+    except Exception as e:
+        print(f"\nError: {e}")
+        import traceback
+        traceback.print_exc()
\ No newline at end of file
diff --git a/test_simple.py b/test_simple.py
new file mode 100644
index 0000000..cb233c1
--- /dev/null
+++ b/test_simple.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+"""Simple test script to generate an image using available pipelines."""
+
+import torch
+from diffusers import FluxPipeline
+from PIL import Image
+import os
+
+def generate_test_image():
+    print("Loading Flux pipeline...")
+    
+    # Load Flux Schnell pipeline (fast variant)
+    pipe = FluxPipeline.from_pretrained(
+        "black-forest-labs/FLUX.1-schnell", 
+        torch_dtype=torch.bfloat16
+    )
+    
+    # Enable CPU offloading to manage memory
+    pipe.enable_model_cpu_offload()
+    
+    if torch.cuda.is_available():
+        print(f"Using GPU: {torch.cuda.get_device_name(0)}")
+    else:
+        print("Using CPU")
+    
+    # Generate image
+    prompt = "a beautiful fantasy landscape with mountains and a crystal clear lake at sunset, highly detailed, 4k"
+    print(f"\nGenerating image with prompt: {prompt}")
+    print("This may take a few minutes on first run...")
+    
+    with torch.no_grad():
+        image = pipe(
+            prompt=prompt,
+            num_inference_steps=4,  # Schnell is optimized for 4 steps
+            guidance_scale=0.0,  # Schnell doesn't use guidance
+            height=512,
+            width=512
+        ).images[0]
+    
+    # Save the image
+    output_path = "test_output.png"
+    image.save(output_path)
+    print(f"\nImage saved to: {output_path}")
+    
+    # Also save as webp for smaller size
+    output_webp = "test_output.webp"
+    image.save(output_webp, "WEBP", quality=90)
+    print(f"WebP version saved to: {output_webp}")
+    
+    return output_path
+
+if __name__ == "__main__":
+    try:
+        output_file = generate_test_image()
+        print(f"\n✅ Success! Generated image at: {os.path.abspath(output_file)}")
+    except Exception as e:
+        print(f"\n❌ Error: {e}")
+        import traceback
+        traceback.print_exc()
\ No newline at end of file