From c3c68ae63511f2dcd5c9721831e2b61398046e8f Mon Sep 17 00:00:00 2001 From: Lee Penkman Date: Sat, 30 Aug 2025 13:53:20 +1200 Subject: [PATCH 1/2] windows hdm --- main.py | 573 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 309 insertions(+), 264 deletions(-) diff --git a/main.py b/main.py index c5f68ee..849a460 100644 --- a/main.py +++ b/main.py @@ -19,18 +19,16 @@ from PIL import Image from diffusers import ( DiffusionPipeline, - StableDiffusionXLInpaintPipeline, + # StableDiffusionXLInpaintPipeline, # Commented out - using HDM only UNet2DConditionModel, LCMScheduler, - StableDiffusionInpaintPipeline, - StableDiffusionImg2ImgPipeline, + # StableDiffusionInpaintPipeline, # Commented out - using HDM only + # StableDiffusionImg2ImgPipeline, # Commented out - using HDM only KDPM2AncestralDiscreteScheduler, - StableDiffusionXLImg2ImgPipeline, - ControlNetModel, - StableDiffusionXLControlNetPipeline, - AutoPipelineForImage2Image, - FluxPipeline, - FluxControlNetPipeline, + # StableDiffusionXLImg2ImgPipeline, # Commented out - using HDM only + # ControlNetModel, # Commented out - style transfer disabled + # StableDiffusionXLControlNetPipeline, # Commented out - style transfer disabled + # AutoPipelineForImage2Image, # Commented out - using HDM only ) from diffusers.utils import load_image from fastapi import FastAPI @@ -59,9 +57,11 @@ except Exception as e: logger.error(f"Error importing pillow_avif: {e}") +# All SDXL models commented out - using HDM only # model_name = "models/SSD-1B" -model_name = "models/ProteusV0.2" +# model_name = "models/ProteusV0.2" # model_name = "dataautogpt3/ProteusV0.2" +USE_HDM = True # Flag to use HDM pipeline # try: # unet = UNet2DConditionModel.from_pretrained( # "models/lcm-ssd-1b", torch_dtype=torch.float16, variant="fp16" @@ -71,68 +71,87 @@ # "latent-consistency/lcm-ssd-1b", torch_dtype=torch.float16, variant="fp16" # ) +# Legacy SDXL pipeline - commented out for HDM +# try: +# pipe = DiffusionPipeline.from_pretrained( +# model_name, torch_dtype=torch.float16, variant="fp16" +# ) +# except OSError as e: +# pipe = DiffusionPipeline.from_pretrained( +# "dataautogpt3/ProteusV0.2", torch_dtype=torch.float16, variant="fp16" +# ) +# +# old_scheduler = pipe.scheduler +# pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) + +# Create dummy objects for compatibility +pipe = None +old_scheduler = None + +# LCM LoRA loading commented out - not needed for HDM +# if os.getenv("LOAD_LCM_LORA", "0") == "1": +# if os.path.exists("models/lcm-lora-sdxl"): +# pipe.load_lora_weights("models/lcm-lora-sdxl", adapter_name="lcm") +# else: +# pipe.load_lora_weights( +# "latent-consistency/lcm-lora-sdxl", adapter_name="lcm" +# ) +# pipe.set_adapters(["lcm"], adapter_weights=[1.0]) + +# Load HDM pipeline for efficient text-to-image try: - # pipe = DiffusionPipeline.from_pretrained( - # "models/SSD-1B", unet=unet, torch_dtype=torch.float16, variant="fp16" - # ) - pipe = DiffusionPipeline.from_pretrained( - model_name, torch_dtype=torch.float16, variant="fp16" - ) -except OSError as e: - # pipe = DiffusionPipeline.from_pretrained( - # "segmind/SSD-1B", unet=unet, torch_dtype=torch.float16, variant="fp16" - # ) - pipe = DiffusionPipeline.from_pretrained( - "dataautogpt3/ProteusV0.2", torch_dtype=torch.float16, variant="fp16" - ) - -old_scheduler = pipe.scheduler -pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) - -if os.getenv("LOAD_LCM_LORA", "0") == "1": - if os.path.exists("models/lcm-lora-sdxl"): - pipe.load_lora_weights("models/lcm-lora-sdxl", adapter_name="lcm") - else: - pipe.load_lora_weights( - "latent-consistency/lcm-lora-sdxl", adapter_name="lcm" + import sys + sys.path.append('../HDM/src') + import xut.env + + # Configure XUT optimizations + xut.env.TORCH_COMPILE = True + xut.env.USE_LIGER = False + xut.env.USE_VANILLA = False + xut.env.USE_XFORMERS = True + xut.env.USE_XFORMERS_LAYERS = True + + from hdm.pipeline import HDMXUTPipeline + + torch.set_float32_matmul_precision("high") + hdm_pipe = ( + HDMXUTPipeline.from_pretrained( + "KBlueLeaf/HDM-xut-340M-anime", + trust_remote_code=True ) - pipe.set_adapters(["lcm"], adapter_weights=[1.0]) - -# Load Flux Schnell pipeline for efficient text-to-image -flux_pipe = FluxPipeline.from_pretrained( - "black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16 -) -flux_pipe.enable_model_cpu_offload() -try: - from dfloat11 import DFloat11Model - dfloat_path = os.getenv("DF11_MODEL_PATH", "DFloat11/FLUX.1-schnell-DF11") - DFloat11Model.from_pretrained( - dfloat_path, - device="cpu", - bfloat16_model=flux_pipe.transformer, + .to("cuda:0" if torch.cuda.is_available() else "cpu") + .to(torch.float16 if torch.cuda.is_available() else torch.float32) ) + logger.info("HDM pipeline loaded successfully") except Exception as e: - logger.error(f"Failed to load DFloat11 weights: {e}") - -try: - flux_controlnet = ControlNetModel.from_pretrained( - "black-forest-labs/flux-controlnet-canny", torch_dtype=torch.bfloat16 + logger.error(f"Failed to load HDM pipeline: {e}") + # Fallback to original Flux implementation + from diffusers import FluxPipeline + hdm_pipe = FluxPipeline.from_pretrained( + "black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16 ) - flux_controlnetpipe = FluxControlNetPipeline( - controlnet=flux_controlnet, **flux_pipe.components - ) - flux_controlnetpipe.enable_model_cpu_offload() - try: - lora_path = os.getenv( - "CONTROLNET_LORA", "black-forest-labs/flux-controlnet-line-lora" - ) - flux_controlnetpipe.load_lora_weights(lora_path, adapter_name="line") - flux_controlnetpipe.set_adapters(["line"], adapter_weights=[1.0]) - except Exception as e: - logger.error(f"Failed to load ControlNet LoRA: {e}") -except Exception as e: - logger.error(f"Failed to load Flux ControlNet: {e}") - flux_controlnetpipe = None + hdm_pipe.enable_model_cpu_offload() + +# Comment out ControlNet for now as requested +# try: +# flux_controlnet = ControlNetModel.from_pretrained( +# "black-forest-labs/flux-controlnet-canny", torch_dtype=torch.bfloat16 +# ) +# flux_controlnetpipe = FluxControlNetPipeline( +# controlnet=flux_controlnet, **flux_pipe.components +# ) +# flux_controlnetpipe.enable_model_cpu_offload() +# try: +# lora_path = os.getenv( +# "CONTROLNET_LORA", "black-forest-labs/flux-controlnet-line-lora" +# ) +# flux_controlnetpipe.load_lora_weights(lora_path, adapter_name="line") +# flux_controlnetpipe.set_adapters(["line"], adapter_weights=[1.0]) +# except Exception as e: +# logger.error(f"Failed to load ControlNet LoRA: {e}") +# except Exception as e: +# logger.error(f"Failed to load Flux ControlNet: {e}") +flux_controlnetpipe = None # quantizing @@ -166,33 +185,36 @@ # freeze(unet) # pipe.unet = unet -pipe.enable_model_cpu_offload() -pipe.enable_sequential_cpu_offload() - -# mem efficient -pipe.enable_attention_slicing() -pipe.enable_vae_slicing() - -# pipe.to("cuda") - -all_components = pipe.components +# Pipe optimizations commented out - using HDM instead +# pipe.enable_model_cpu_offload() +# pipe.enable_sequential_cpu_offload() +# +# # mem efficient +# pipe.enable_attention_slicing() +# pipe.enable_vae_slicing() +# +# # pipe.to("cuda") +# +# all_components = pipe.components +all_components = None # all_components.pop("scheduler") # all_components.pop("text_encoder") # all_components.pop("text_encoder_2") # all_components.pop("tokenizer") # all_components.pop("tokenizer_2") -img2img = AutoPipelineForImage2Image.from_pipe(pipe) -img2img.watermark = None - - -# mem efficient -img2img.enable_attention_slicing() -img2img.enable_vae_slicing() -# img2img.to("cuda") -# img2img.enable_xformers_memory_efficient_attention() -img2img.enable_model_cpu_offload() -img2img.enable_sequential_cpu_offload() +# img2img pipeline commented out - using HDM instead +# img2img = AutoPipelineForImage2Image.from_pipe(pipe) +# img2img.watermark = None +# +# # mem efficient +# img2img.enable_attention_slicing() +# img2img.enable_vae_slicing() +# # img2img.to("cuda") +# # img2img.enable_xformers_memory_efficient_attention() +# img2img.enable_model_cpu_offload() +# img2img.enable_sequential_cpu_offload() +img2img = None # # Quantize and freeze the text_encoder # text_encoder = img2img.text_encoder @@ -213,7 +235,7 @@ # variant="fp16", # # safety_checker=None, # ) # todo try torch_dtype=float16 -pipe.watermark = None +# pipe.watermark = None # Commented out since pipe is now None # deepcache @@ -229,18 +251,17 @@ # tomesd.apply_patch(pipe, ratio=0.2) # light speedup -refiner = DiffusionPipeline.from_pretrained( - # "stabilityai/stable-diffusion-xl-refiner-1.0", - # "dataautogpt3/OpenDalle", - model_name, - # "models/SSD-1B", - unet=pipe.unet, - text_encoder_2=pipe.text_encoder_2, - vae=pipe.vae, - torch_dtype=torch.float16, # safer to use bfloat? - use_safetensors=True, - variant="fp16", # remember not to download the big model -) +# Comment out refiner pipeline +# refiner = DiffusionPipeline.from_pretrained( +# model_name, +# unet=pipe.unet, +# text_encoder_2=pipe.text_encoder_2, +# vae=pipe.vae, +# torch_dtype=torch.float16, +# use_safetensors=True, +# variant="fp16", +# ) +refiner = None # refiner = pipe # same model in this case # refiner.scheduler = old_scheduler @@ -248,54 +269,51 @@ # refiner.schedu -refiner.watermark = None -# refiner.to("cuda") -refiner.enable_model_cpu_offload() -refiner.enable_sequential_cpu_offload() - -# {'scheduler', 'text_encoder', 'text_encoder_2', 'tokenizer', 'tokenizer_2', 'unet', 'vae'} can be passed in from existing model -# inpaintpipe = StableDiffusionInpaintPipeline(**pipe.components) -inpaintpipe = StableDiffusionXLInpaintPipeline.from_pretrained( - # "models/stable-diffusion-xl-base-1.0", - model_name, - torch_dtype=torch.float16, - variant="fp16", - use_safetensors=True, - scheduler=pipe.scheduler, - text_encoder=pipe.text_encoder, - text_encoder_2=pipe.text_encoder_2, - tokenizer=pipe.tokenizer, - tokenizer_2=pipe.tokenizer_2, - unet=pipe.unet, - vae=pipe.vae, - # load_connected_pipeline= -) -inpaintpipe.watermark = None -# inpaintpipe.enable_model_cpu_offload() - -controlnet_conditioning_scale = 0.5 # recommended for good generalization -controlnet = ControlNetModel.from_pretrained( - "diffusers/controlnet-canny-sdxl-1.0", - torch_dtype=torch.float16, - variant="fp16", -) -# controlnet.to("cuda") +# Comment out refiner optimizations +# refiner.watermark = None +# refiner.enable_model_cpu_offload() +# refiner.enable_sequential_cpu_offload() -controlnetpipe = StableDiffusionXLControlNetPipeline.from_pretrained( - # "stabilityai/stable-diffusion-xl-base-1.0", - model_name, - controlnet=controlnet, - **pipe.components, -) -# controlnetpipe.to("cuda") -controlnetpipe.watermark = None +# Comment out inpainting pipeline +# inpaintpipe = StableDiffusionXLInpaintPipeline.from_pretrained( +# model_name, +# torch_dtype=torch.float16, +# variant="fp16", +# use_safetensors=True, +# scheduler=pipe.scheduler, +# text_encoder=pipe.text_encoder, +# text_encoder_2=pipe.text_encoder_2, +# tokenizer=pipe.tokenizer, +# tokenizer_2=pipe.tokenizer_2, +# unet=pipe.unet, +# vae=pipe.vae, +# ) +# # inpaintpipe.watermark = None # Commented since inpaintpipe is None +inpaintpipe = None -# efficiency -controlnetpipe.enable_model_cpu_offload() -controlnetpipe.enable_sequential_cpu_offload() +# Comment out ControlNet pipeline +# controlnet_conditioning_scale = 0.5 +# controlnet = ControlNetModel.from_pretrained( +# "diffusers/controlnet-canny-sdxl-1.0", +# torch_dtype=torch.float16, +# variant="fp16", +# ) +# +# controlnetpipe = StableDiffusionXLControlNetPipeline.from_pretrained( +# model_name, +# controlnet=controlnet, +# **pipe.components, +# ) +# controlnetpipe.watermark = None +controlnet_conditioning_scale = None +controlnet = None +controlnetpipe = None -controlnetpipe.enable_attention_slicing() -controlnetpipe.enable_vae_slicing() +# Comment out ControlNet optimizations +# controlnetpipe.enable_model_cpu_offload() +# controlnetpipe.enable_sequential_cpu_offload() +# controlnetpipe.enable_attention_slicing() +# controlnetpipe.enable_vae_slicing() # # Quantize and freeze the text_encoder # text_encoderz = controlnetpipe.text_encoder @@ -342,24 +360,24 @@ # requires_aesthetics_score=False, # ) # inpaintpipe.to("cuda") -inpaintpipe.watermark = None +# inpaintpipe.watermark = None # Commented since inpaintpipe is None # inpaintpipe.register_to_config(requires_aesthetics_score=False) -# todo do we need this? -inpaint_refiner = StableDiffusionXLInpaintPipeline.from_pretrained( - # "stabilityai/stable-diffusion-xl-refiner-1.0", - model_name, - text_encoder_2=inpaintpipe.text_encoder_2, - vae=inpaintpipe.vae, - torch_dtype=torch.float16, - use_safetensors=True, - variant="fp16", - tokenizer_2=refiner.tokenizer_2, - tokenizer=refiner.tokenizer, - scheduler=refiner.scheduler, - text_encoder=refiner.text_encoder, - unet=refiner.unet, -) +# Comment out inpaint refiner +# inpaint_refiner = StableDiffusionXLInpaintPipeline.from_pretrained( +# model_name, +# text_encoder_2=inpaintpipe.text_encoder_2, +# vae=inpaintpipe.vae, +# torch_dtype=torch.float16, +# use_safetensors=True, +# variant="fp16", +# tokenizer_2=refiner.tokenizer_2, +# tokenizer=refiner.tokenizer, +# scheduler=refiner.scheduler, +# text_encoder=refiner.text_encoder, +# unet=refiner.unet, +# ) +inpaint_refiner = None # del inpaint_refiner.vae # del inpaint_refiner.text_encoder_2 # del inpaint_refiner.text_encoder @@ -388,7 +406,8 @@ # requires_aesthetics_score=False, # ) # inpaint_refiner.to("cuda") -inpaint_refiner.watermark = None +# Comment out inpaint refiner watermark +# inpaint_refiner.watermark = None # inpaint_refiner.register_to_config(requires_aesthetics_score=False) n_steps = 5 @@ -397,14 +416,9 @@ use_refiner = False -# efficiency - -# inpaintpipe.enable_model_cpu_offload() -inpaint_refiner.enable_model_cpu_offload() -inpaint_refiner.enable_sequential_cpu_offload() -# pipe.enable_model_cpu_offload() -# refiner.enable_model_cpu_offload() -# img2img.enable_model_cpu_offload() +# Comment out efficiency optimizations since we're using HDM +# inpaint_refiner.enable_model_cpu_offload() +# inpaint_refiner.enable_sequential_cpu_offload() # pipe.enable_xformers_memory_efficient_attention() @@ -439,9 +453,9 @@ # this can cause errors on some inputs so consider disabling it # pipe.unet = torch.compile(pipe.unet) # refiner.unet = torch.compile(refiner.unet)#, mode="reduce-overhead", fullgraph=True) -# compile the inpainters - todo reuse the other unets? swap out the models for others/del them so they share models and can be swapped efficiently -inpaintpipe.unet = pipe.unet -inpaint_refiner.unet = refiner.unet +# Comment out UNet sharing and compilation since we're using HDM +# inpaintpipe.unet = pipe.unet +# inpaint_refiner.unet = refiner.unet # inpaintpipe.unet = torch.compile(inpaintpipe.unet) # inpaint_refiner.unet = torch.compile(inpaint_refiner.unet) @@ -490,9 +504,23 @@ def make_image(prompt: str, save_path: str = ""): if Path(save_path).exists(): return FileResponse(save_path, media_type="image/png") with torch.inference_mode(): - image = pipe( - prompt=prompt, num_inference_steps=n_steps, **extra_pipe_args - ).images[0] + # Use HDM instead of SDXL pipe + result = hdm_pipe( + prompts=[prompt], + negative_prompts="low quality, worst quality, blurry, bad anatomy", + width=1024, + height=1024, + cfg_scale=3.0, + num_inference_steps=24, + camera_param={ + "zoom": 1.0, + "x_shift": 0.0, + "y_shift": 0.0, + }, + tread_gamma1=0.0, + tread_gamma2=0.5, + ) + image = result.images[0] if not save_path: save_path = f"images/{prompt}.png" image.save(save_path) @@ -693,25 +721,25 @@ def style_transfer_image_from_prompt( generator = torch.Generator("cpu").manual_seed(0) for attempt in range(retries + 1): try: - if canny and flux_controlnetpipe: - image = flux_controlnetpipe( - prompt=prompt, - image=canny_image, - num_inference_steps=n_steps, - guidance_scale=0.0, - generator=generator, - max_sequence_length=256, - ).images[0] - else: - image = flux_pipe( - prompt=prompt, + # Use HDM for style transfer instead of Flux + # Note: HDM doesn't have direct ControlNet support, so we'll use basic generation + with torch.inference_mode(): + result = hdm_pipe( + prompts=[prompt], + negative_prompts="low quality, worst quality, blurry, bad anatomy", width=input_pil.width, height=input_pil.height, - guidance_scale=0.0, + cfg_scale=3.0, num_inference_steps=n_steps, - generator=generator, - max_sequence_length=256, - ).images[0] + camera_param={ + "zoom": 1.0, + "x_shift": 0.0, + "y_shift": 0.0, + }, + tread_gamma1=0.0, + tread_gamma2=0.5, + ) + image = result.images[0] break except Exception as err: if attempt >= retries: @@ -743,19 +771,20 @@ def style_transfer_image_from_prompt( # # gc.collect() # add a refinement pass because the image is not always perfect/depending on the model if its not well tuned for LCM it might need more passes - if use_refiner: - lcm_scheduler = img2img.scheduler - img2img.scheduler = old_scheduler - - image = img2img( - prompt=prompt, - image=image, - num_inference_steps=n_refiner_steps, - strength=strength, - **extra_refiner_pipe_args, - ).images[0] - # revert scheduler - img2img.scheduler = lcm_scheduler + # Comment out refiner since we're using HDM only + # if use_refiner: + # lcm_scheduler = img2img.scheduler + # img2img.scheduler = old_scheduler + # + # image = img2img( + # prompt=prompt, + # image=image, + # num_inference_steps=n_refiner_steps, + # strength=strength, + # **extra_refiner_pipe_args, + # ).images[0] + # # revert scheduler + # img2img.scheduler = lcm_scheduler if detect_too_bumpy(image): if retries <= 0: raise Exception( @@ -775,34 +804,46 @@ def style_transfer_image_from_prompt( def create_image_from_prompt( - prompt, width, height, n_steps=5, extra_args=None, retries=3 + prompt, width, height, n_steps=24, extra_args=None, retries=3 ): - """Generate an image using the Flux Schnell pipeline with retries.""" + """Generate an image using the HDM pipeline with retries.""" if extra_args is None: extra_args = {} + # HDM works best with multiples of 64 block_width = width - (width % 64) block_height = height - (height % 64) prompt = shorten_too_long_text(prompt) - generator = torch.Generator("cpu").manual_seed(extra_args.get("seed", 0)) + + # HDM doesn't use the same generator setup as Flux + # seed = extra_args.get("seed", 0) for attempt in range(retries + 1): try: - image = flux_pipe( - prompt=prompt, - width=block_width, - height=block_height, - guidance_scale=0.0, - num_inference_steps=n_steps, - generator=generator, - max_sequence_length=256, - ).images[0] + # Use HDM pipeline instead of Flux + with torch.inference_mode(): + result = hdm_pipe( + prompts=[prompt], + negative_prompts="low quality, worst quality, blurry, bad anatomy", + width=block_width, + height=block_height, + cfg_scale=3.0, + num_inference_steps=n_steps, + camera_param={ + "zoom": 1.0, + "x_shift": 0.0, + "y_shift": 0.0, + }, + tread_gamma1=0.0, + tread_gamma2=0.5, + ) + image = result.images[0] break except Exception as err: # pragma: no cover - hardware/oom errors if attempt >= retries: raise logger.warning( - f"Flux generation failed on attempt {attempt + 1}/{retries}: {err}" + f"HDM generation failed on attempt {attempt + 1}/{retries}: {err}" ) if attempt == 0: prompt = remove_stopwords(prompt) @@ -860,45 +901,55 @@ def image_to_bytes(image): def inpaint_image_from_prompt(prompt, image_url: str, mask_url: str, retries=3): - prompt = shorten_too_long_text(prompt) - # image = pipe(guidance_scale=7,prompt=prompt).images[0] - - init_image = load_image(image_url).convert("RGB") - mask_image = load_image(mask_url).convert("RGB") # why rgb for a 1 channel mask? - # num_inference_steps = 75 # causes weird error ValueError: The combination of `original_steps x strength`: 50 x 1.0 is smaller than `num_inference_steps`: 75. Make sure to either reduce `num_inference_steps` to a value smaller than 50 or increase `strength` to a value higher than 1.5. - num_inference_steps = 40 - high_noise_frac = 0.7 - - generator = torch.Generator("cpu").manual_seed(0) - for attempt in range(retries + 1): - try: - image = inpaintpipe( - prompt=prompt, - image=init_image, - mask_image=mask_image, - num_inference_steps=num_inference_steps, - denoising_start=high_noise_frac, - output_type="latent", - ).images[0] - break - except Exception as e: - if attempt >= retries: - traceback.print_exc() - raise - logger.warning( - f"Inpainting failed on attempt {attempt + 1}/{retries}: {e}" - ) - prompt = remove_stopwords(prompt) if attempt == 0 else shorten_prompt_for_retry(prompt) - if not prompt: - raise e - if image != None: - image = inpaint_refiner( - prompt=prompt, - image=image, - mask_image=mask_image, - num_inference_steps=num_inference_steps, - denoising_start=high_noise_frac, - ).images[0] + """Inpainting function - currently disabled for HDM-only setup.""" + # TODO: Implement HDM-based inpainting or use alternative approach + logger.warning("Inpainting is currently disabled in HDM-only mode") + + # touch progress.txt file - if we dont do this we get restarted by supervisor/other processes for reliability + with open("progress.txt", "w") as f: + current_time = datetime.now().strftime("%H:%M:%S") + f.write(f"{current_time}") + + # Return None for now since inpainting is not implemented + return None + + # # Original SDXL inpainting code commented out + # prompt = shorten_too_long_text(prompt) + # init_image = load_image(image_url).convert("RGB") + # mask_image = load_image(mask_url).convert("RGB") + # num_inference_steps = 40 + # high_noise_frac = 0.7 + # + # generator = torch.Generator("cpu").manual_seed(0) + # for attempt in range(retries + 1): + # try: + # image = inpaintpipe( + # prompt=prompt, + # image=init_image, + # mask_image=mask_image, + # num_inference_steps=num_inference_steps, + # denoising_start=high_noise_frac, + # output_type="latent", + # ).images[0] + # break + # except Exception as e: + # if attempt >= retries: + # traceback.print_exc() + # raise + # logger.warning( + # f"Inpainting failed on attempt {attempt + 1}/{retries}: {e}" + # ) + # prompt = remove_stopwords(prompt) if attempt == 0 else shorten_prompt_for_retry(prompt) + # if not prompt: + # raise e + # if image != None: + # image = inpaint_refiner( + # prompt=prompt, + # image=image, + # mask_image=mask_image, + # num_inference_steps=num_inference_steps, + # denoising_start=high_noise_frac, + # ).images[0] # try: # # gc.collect() # torch.cuda.empty_cache() @@ -911,10 +962,4 @@ def inpaint_image_from_prompt(prompt, image_url: str, mask_url: str, retries=3): # os.system("/usr/bin/bash kill -SIGHUP `pgrep gunicorn`") # os.system("kill -1 `pgrep gunicorn`") - # touch progress.txt file - if we dont do this we get restarted by supervisor/other processes for reliability - with open("progress.txt", "w") as f: - current_time = datetime.now().strftime("%H:%M:%S") - f.write(f"{current_time}") - return image_to_bytes(image) - From 6b4cd17990e34e5969a246c98974a727766a2611 Mon Sep 17 00:00:00 2001 From: Lee Penkman Date: Sat, 30 Aug 2025 14:18:31 +1200 Subject: [PATCH 2/2] fx --- create_scheduled_task.bat | 27 + requirements-windows.txt | 1001 +++++++++++++++++++++++++++++++++++ start_server.bat | 12 + start_server_production.bat | 23 + test_api.py | 110 ++++ test_fast.py | 109 ++++ test_generation.py | 58 ++ test_hdm.py | 52 ++ test_sdxl.py | 64 +++ test_simple.py | 59 +++ 10 files changed, 1515 insertions(+) create mode 100644 create_scheduled_task.bat create mode 100644 requirements-windows.txt create mode 100644 start_server.bat create mode 100644 start_server_production.bat create mode 100644 test_api.py create mode 100644 test_fast.py create mode 100644 test_generation.py create mode 100644 test_hdm.py create mode 100644 test_sdxl.py create mode 100644 test_simple.py diff --git a/create_scheduled_task.bat b/create_scheduled_task.bat new file mode 100644 index 0000000..68be4bf --- /dev/null +++ b/create_scheduled_task.bat @@ -0,0 +1,27 @@ +@echo off +echo Creating Windows Task Scheduler task for Stable Diffusion Server... + +:: Create the task to run at system startup +schtasks /create /tn "StableDiffusionServer" ^ + /tr "D:\code\stable-diffusion-server\start_server.bat" ^ + /sc onstart ^ + /ru SYSTEM ^ + /rl highest ^ + /f + +:: Alternative: Run as current user at logon +:: schtasks /create /tn "StableDiffusionServer" ^ +:: /tr "D:\code\stable-diffusion-server\start_server.bat" ^ +:: /sc onlogon ^ +:: /rl highest ^ +:: /f + +echo Task created successfully! +echo. +echo To manage the task: +echo Start: schtasks /run /tn "StableDiffusionServer" +echo Stop: schtasks /end /tn "StableDiffusionServer" +echo Delete: schtasks /delete /tn "StableDiffusionServer" /f +echo Query: schtasks /query /tn "StableDiffusionServer" +echo. +pause \ No newline at end of file diff --git a/requirements-windows.txt b/requirements-windows.txt new file mode 100644 index 0000000..62456ea --- /dev/null +++ b/requirements-windows.txt @@ -0,0 +1,1001 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements.in -o requirements.txt +absl-py==2.3.1 + # via + # fiddle + # tensorboard +accelerate==1.1.1 + # via + # -r requirements.in + # dfloat11 + # peft +aiofiles==24.1.0 + # via gradio +aiohappyeyeballs==2.4.3 + # via aiohttp +aiohttp==3.11.8 + # via + # fsspec + # gradio +aiosignal==1.3.1 + # via aiohttp +alembic==1.16.3 + # via optuna +altair==5.5.0 + # via + # gradio + # streamlit +annotated-types==0.7.0 + # via + # -r requirements.in + # pydantic +antlr4-python3-runtime==4.9.3 + # via + # hydra-core + # omegaconf +anyio==4.6.2.post1 + # via + # -r requirements.in + # httpx + # starlette +asttokens==3.0.0 + # via stack-data +attrs==24.2.0 + # via + # aiohttp + # jsonschema + # referencing +audioread==3.0.1 + # via + # lhotse + # librosa +bitsandbytes==0.45.3 + # via nemo-toolkit +blinker==1.9.0 + # via streamlit +boto3==1.39.3 + # via -r requirements.in +botocore==1.39.3 + # via + # boto3 + # s3transfer +braceexpand==0.1.7 + # via + # nemo-toolkit + # webdataset +cachetools==5.5.0 + # via + # -r requirements.in + # google-auth + # streamlit +certifi==2024.8.30 + # via + # -r requirements.in + # httpcore + # httpx + # requests + # sentry-sdk +cffi==1.17.1 + # via soundfile +charset-normalizer==3.4.0 + # via + # -r requirements.in + # requests +click==8.1.7 + # via + # -r requirements.in + # jiwer + # lhotse + # nltk + # sacremoses + # streamlit + # typer + # uvicorn + # wandb +cloudpickle==3.1.1 + # via nemo-toolkit +cmake==3.31.1 + # via -r requirements.in +colorlog==6.9.0 + # via optuna +contourpy==1.3.1 + # via matplotlib +cupy-cuda12x==13.4.1 + # via dfloat11 +cycler==0.12.1 + # via matplotlib +cytoolz==1.0.1 + # via lhotse +datasets==3.6.0 + # via nemo-toolkit +decorator==5.2.1 + # via + # ipython + # librosa +deepcache==0.1.1 + # via -r requirements.in +dfloat11==0.2.0 + # via -r requirements.in +diffusers==0.31.0 + # via + # -r requirements.in + # deepcache +dill==0.3.8 + # via + # datasets + # multiprocess +diskcache==5.6.3 + # via -r requirements.in +distance==0.1.3 + # via g2p-en +docopt==0.6.2 + # via + # num2words + # pyannote-metrics +docstring-parser==0.16 + # via google-cloud-aiplatform +editdistance==0.8.1 + # via nemo-toolkit +einops==0.8.1 + # via nemo-toolkit +exceptiongroup==1.2.2 + # via -r requirements.in +executing==2.2.0 + # via stack-data +fastapi==0.115.5 + # via + # -r requirements.in + # gradio +fastrlock==0.8.3 + # via cupy-cuda12x +ffmpy==0.4.0 + # via gradio +fiddle==0.3.0 + # via nemo-toolkit +filelock==3.16.1 + # via + # -r requirements.in + # datasets + # diffusers + # huggingface-hub + # torch + # transformers +fonttools==4.55.0 + # via matplotlib +frozenlist==1.5.0 + # via + # aiohttp + # aiosignal +fsspec==2024.12.0 + # via + # -r requirements.in + # datasets + # gradio-client + # huggingface-hub + # lightning + # nemo-toolkit + # pytorch-lightning + # torch +future==1.0.0 + # via pyloudnorm +g2p-en==2.1.0 + # via nemo-toolkit +gitdb==4.0.11 + # via gitpython +gitpython==3.1.43 + # via + # streamlit + # wandb +google-api-core==2.23.0 + # via + # -r requirements.in + # google-api-python-client + # google-cloud-aiplatform + # google-cloud-bigquery + # google-cloud-core + # google-cloud-datastore + # google-cloud-ndb + # google-cloud-resource-manager + # google-cloud-storage +google-api-python-client==2.154.0 + # via -r requirements.in +google-auth==2.36.0 + # via + # google-api-core + # google-api-python-client + # google-auth-httplib2 + # google-cloud-aiplatform + # google-cloud-bigquery + # google-cloud-core + # google-cloud-datastore + # google-cloud-resource-manager + # google-cloud-storage +google-auth-httplib2==0.2.0 + # via google-api-python-client +google-cloud-aiplatform==1.73.0 + # via -r requirements.in +google-cloud-bigquery==3.27.0 + # via google-cloud-aiplatform +google-cloud-core==2.4.1 + # via + # google-cloud-bigquery + # google-cloud-datastore + # google-cloud-storage +google-cloud-datastore==2.20.1 + # via google-cloud-ndb +google-cloud-ndb==2.3.2 + # via -r requirements.in +google-cloud-resource-manager==1.13.1 + # via google-cloud-aiplatform +google-cloud-storage==2.18.2 + # via + # -r requirements.in + # google-cloud-aiplatform +google-crc32c==1.6.0 + # via + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.7.2 + # via + # google-cloud-bigquery + # google-cloud-storage +googleapis-common-protos==1.66.0 + # via + # google-api-core + # grpc-google-iam-v1 + # grpcio-status +gradio==3.36.1 + # via -r requirements.in +gradio-client==1.5.0 + # via gradio +graphviz==0.21 + # via fiddle +greenlet==3.2.3 + # via sqlalchemy +grpc-google-iam-v1==0.13.1 + # via google-cloud-resource-manager +grpcio==1.68.0 + # via + # google-api-core + # googleapis-common-protos + # grpc-google-iam-v1 + # grpcio-status + # tensorboard +grpcio-status==1.62.3 + # via google-api-core +gunicorn==23.0.0 + # via -r requirements.in +h11==0.14.0 + # via + # -r requirements.in + # httpcore + # uvicorn +hf-xet==1.1.5 + # via huggingface-hub +httpcore==1.0.7 + # via httpx +httplib2==0.22.0 + # via + # google-api-python-client + # google-auth-httplib2 +httpx==0.28.0 + # via + # gradio + # gradio-client +huggingface-hub==0.33.2 + # via + # -r requirements.in + # accelerate + # datasets + # diffusers + # gradio + # gradio-client + # nemo-toolkit + # optimum-quanto + # peft + # tokenizers + # transformers +hydra-core==1.3.2 + # via nemo-toolkit +idna==3.10 + # via + # -r requirements.in + # anyio + # httpx + # requests + # yarl +importlib-metadata==8.5.0 + # via + # -r requirements.in + # diffusers +inflect==7.5.0 + # via + # g2p-en + # nemo-toolkit +intervaltree==3.1.0 + # via lhotse +invisible-watermark==0.2.0 + # via -r requirements.in +ipython==9.4.0 + # via mediapy +ipython-pygments-lexers==1.1.1 + # via ipython +jedi==0.19.2 + # via ipython +jinja2==3.1.4 + # via + # -r requirements.in + # altair + # gradio + # pydeck + # torch +jiwer==3.0.5 + # via nemo-toolkit +jmespath==1.0.1 + # via + # boto3 + # botocore +joblib==1.4.2 + # via + # librosa + # nltk + # sacremoses + # scikit-learn +jsonschema==4.23.0 + # via altair +jsonschema-specifications==2024.10.1 + # via jsonschema +kaldi-python-io==1.2.2 + # via nemo-toolkit +kaldiio==2.18.1 + # via nemo-toolkit +kiwisolver==1.4.7 + # via matplotlib +lazy-loader==0.4 + # via librosa +levenshtein==0.27.1 +lhotse==1.30.3 + # via nemo-toolkit +libcst==1.8.2 + # via fiddle +librosa==0.11.0 + # via nemo-toolkit +lightning==2.4.0 + # via nemo-toolkit +lightning-utilities==0.14.3 + # via + # lightning + # pytorch-lightning + # torchmetrics +lilcom==1.8.1 + # via lhotse +linkify-it-py==2.0.3 + # via markdown-it-py +lit==18.1.8 + # via -r requirements.in +llvmlite==0.44.0 + # via numba +loguru==0.7.2 + # via + # -r requirements.in +mako==1.3.10 + # via alembic +markdown==3.8.2 + # via tensorboard +markdown-it-py==2.2.0 + # via + # gradio + # mdit-py-plugins + # rich +markupsafe==3.0.2 + # via + # -r requirements.in + # gradio + # jinja2 + # mako + # werkzeug +marshmallow==4.0.0 + # via nemo-toolkit +matplotlib==3.9.2 + # via + # gradio + # mediapy + # pyannote-metrics +matplotlib-inline==0.1.7 + # via ipython +mdit-py-plugins==0.3.3 + # via gradio +mdurl==0.1.2 + # via markdown-it-py +mediapy==1.1.6 + # via nemo-toolkit +more-itertools==10.7.0 + # via inflect +mpmath==1.3.0 + # via + # -r requirements.in + # sympy +msgpack==1.1.1 + # via librosa +multidict==6.1.0 + # via + # aiohttp + # yarl +multiprocess==0.70.16 + # via datasets +narwhals==1.14.3 + # via altair +nemo-toolkit==2.3.0 + # via -r requirements.in +networkx==3.4.2 + # via + # -r requirements.in + # torch +ninja==1.11.1.2 + # via optimum-quanto +nltk==3.9.1 + # via + # -r requirements.in + # g2p-en +num2words==0.5.14 + # via nemo-toolkit +numba==0.61.0 + # via + # librosa + # nemo-toolkit + # resampy +numpy==1.26.4 + # via + # -r requirements.in + # accelerate + # bitsandbytes + # contourpy + # cupy-cuda12x + # datasets + # diffusers + # g2p-en + # gradio + # invisible-watermark + # kaldi-python-io + # kaldiio + # lhotse + # librosa + # lilcom + # matplotlib + # mediapy + # nemo-toolkit + # numba + # onnx + # opencv-python + # optimum-quanto + # optuna + # pandas + # peft + # pyannote-core + # pyannote-metrics + # pydeck + # pyloudnorm + # pywavelets + # resampy + # scikit-learn + # scipy + # shapely + # soundfile + # sox + # soxr + # streamlit + # tensorboard + # torchmetrics + # transformers + # webdataset +nvidia-cublas-cu12==12.1.3.1 + # via + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 + # via torch +nvidia-cuda-nvrtc-cu12==12.1.105 + # via torch +nvidia-cuda-runtime-cu12==12.1.105 + # via torch +nvidia-cudnn-cu12==9.1.0.70 + # via torch +nvidia-cufft-cu12==11.0.2.54 + # via torch +nvidia-curand-cu12==10.3.2.106 + # via torch +nvidia-cusolver-cu12==11.4.5.107 + # via torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # nvidia-cusolver-cu12 + # torch + # via torch +nvidia-nvjitlink-cu12==12.6.85 + # via + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 + # via torch +omegaconf==2.3.0 + # via + # hydra-core + # nemo-toolkit +onnx==1.17.0 + # via nemo-toolkit +opencv-python==4.10.0.84 + # via + # -r requirements.in + # invisible-watermark +optimum-quanto==0.2.6 + # via -r requirements.in +optuna==4.4.0 + # via nemo-toolkit +orjson==3.10.12 + # via gradio +packaging==24.2 + # via + # -r requirements.in + # accelerate + # altair + # datasets + # google-cloud-aiplatform + # google-cloud-bigquery + # gradio-client + # gunicorn + # huggingface-hub + # hydra-core + # lazy-loader + # lhotse + # lightning + # lightning-utilities + # matplotlib + # nemo-toolkit + # optuna + # peft + # pooch + # pytorch-lightning + # streamlit + # tensorboard + # torchmetrics + # transformers + # wandb +pandas==2.2.3 + # via + # datasets + # gradio + # nemo-toolkit + # pyannote-database + # pyannote-metrics + # streamlit +parso==0.8.4 + # via jedi +peft==0.13.2 + # via + # -r requirements.in + # nemo-toolkit +pexpect==4.9.0 + # via ipython +pillow==11.0.0 + # via + # -r requirements.in + # diffusers + # gradio + # invisible-watermark + # matplotlib + # mediapy + # streamlit +pillow-avif-plugin==1.4.6 + # via -r requirements.in +plac==1.4.5 +platformdirs==4.3.8 + # via + # pooch + # wandb +pooch==1.8.2 + # via librosa +prompt-toolkit==3.0.51 + # via ipython +propcache==0.2.0 + # via + # aiohttp + # yarl +proto-plus==1.25.0 + # via + # google-api-core + # google-cloud-aiplatform + # google-cloud-datastore + # google-cloud-resource-manager +protobuf==4.24.4 + # via + # -r requirements.in + # google-api-core + # google-cloud-aiplatform + # google-cloud-datastore + # google-cloud-ndb + # google-cloud-resource-manager + # googleapis-common-protos + # grpc-google-iam-v1 + # grpcio-status + # nemo-toolkit + # onnx + # proto-plus + # streamlit + # tensorboard + # wandb +psutil==6.1.0 + # via + # -r requirements.in + # accelerate + # peft +ptyprocess==0.7.0 + # via pexpect +pure-eval==0.2.3 + # via stack-data +pyannote-core==5.0.0 + # via + # nemo-toolkit + # pyannote-database + # pyannote-metrics +pyannote-database==5.1.3 + # via pyannote-metrics +pyannote-metrics==3.2.1 + # via nemo-toolkit +pyarrow==18.1.0 + # via + # datasets + # streamlit +pyasn1==0.6.1 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.1 + # via google-auth +pybind11==2.13.6 +pycparser==2.22 + # via cffi +pydantic==2.10.2 + # via + # -r requirements.in + # fastapi + # google-cloud-aiplatform + # gradio + # wandb +pydantic-core==2.27.1 + # via + # -r requirements.in + # pydantic +pydeck==0.9.1 + # via streamlit +pydub==0.25.1 + # via + # gradio + # nemo-toolkit +pygments==2.18.0 + # via + # gradio + # ipython + # ipython-pygments-lexers + # rich +pyloudnorm==0.1.1 + # via nemo-toolkit +pymemcache==4.0.0 + # via google-cloud-ndb +pyparsing==3.2.0 + # via + # httplib2 + # matplotlib +python-dateutil==2.9.0.post0 + # via + # botocore + # google-cloud-bigquery + # matplotlib + # nemo-toolkit + # pandas +python-multipart==0.0.18 + # via + # -r requirements.in + # gradio +pytorch-lightning==2.5.2 + # via lightning +pytz==2024.2 + # via + # google-cloud-ndb + # pandas +pywavelets==1.7.0 + # via + # -r requirements.in + # invisible-watermark +pyyaml==6.0.2 + # via + # -r requirements.in + # accelerate + # datasets + # gradio + # huggingface-hub + # lhotse + # libcst + # lightning + # omegaconf + # optuna + # peft + # pyannote-database + # pytorch-lightning + # transformers + # wandb + # webdataset +rapidfuzz==3.13.0 + # via + # jiwer + # levenshtein +redis==5.2.0 + # via google-cloud-ndb +referencing==0.35.1 + # via + # jsonschema + # jsonschema-specifications +regex==2024.11.6 + # via + # -r requirements.in + # diffusers + # nltk + # sacremoses + # transformers +requests==2.32.3 + # via + # -r requirements.in + # datasets + # diffusers + # google-api-core + # google-cloud-bigquery + # google-cloud-storage + # gradio + # huggingface-hub + # pooch + # streamlit + # transformers + # wandb +resampy==0.4.3 + # via nemo-toolkit +rich==13.9.4 + # via + # streamlit + # typer +rpds-py==0.21.0 + # via + # jsonschema + # referencing +rsa==4.9 + # via google-auth +ruamel-yaml==0.18.14 + # via nemo-toolkit +ruamel-yaml-clib==0.2.12 + # via ruamel-yaml +s3transfer==0.13.0 + # via boto3 +sacremoses==0.1.1 + # via nemo-toolkit +safetensors==0.4.5 + # via + # -r requirements.in + # accelerate + # dfloat11 + # diffusers + # optimum-quanto + # peft + # transformers +scikit-learn==1.7.0 + # via + # librosa + # nemo-toolkit + # pyannote-metrics +scipy==1.16.0 + # via + # librosa + # nemo-toolkit + # pyannote-core + # pyannote-metrics + # pyloudnorm + # scikit-learn +semantic-version==2.10.0 + # via gradio +sentencepiece==0.2.0 + # via nemo-toolkit +sentry-sdk==2.32.0 + # via wandb +setuptools==75.6.0 + # via + # lightning-utilities + # nemo-toolkit + # tensorboard + # torch +shapely==2.0.6 + # via google-cloud-aiplatform +shellingham==1.5.4 + # via typer +six==1.16.0 + # via + # python-dateutil + # tensorboard +smmap==5.0.1 + # via gitdb +sniffio==1.3.1 + # via + # -r requirements.in + # anyio +sortedcontainers==2.4.0 + # via + # intervaltree + # pyannote-core +soundfile==0.13.1 + # via + # lhotse + # librosa + # nemo-toolkit +sox==1.5.0 + # via nemo-toolkit +soxr==0.5.0.post1 + # via librosa +sqlalchemy==2.0.41 + # via + # alembic + # optuna +stack-data==0.6.3 + # via ipython +starlette==0.41.3 + # via + # -r requirements.in + # fastapi +streamlit==1.40.2 + # via -r requirements.in +sympy==1.13.3 + # via + # -r requirements.in + # pyannote-metrics + # torch +tabulate==0.9.0 + # via + # lhotse + # pyannote-metrics +tenacity==9.0.0 + # via streamlit +tensorboard==2.19.0 + # via nemo-toolkit +tensorboard-data-server==0.7.2 + # via tensorboard +termcolor==3.1.0 +text-unidecode==1.3 + # via nemo-toolkit + # via nemo-toolkit +threadpoolctl==3.6.0 + # via scikit-learn +tokenizers==0.21.2 + # via + # -r requirements.in + # transformers +tomesd==0.1.3 + # via -r requirements.in +toml==0.10.2 + # via streamlit +toolz==1.0.0 + # via cytoolz +torch==2.4.1 + # via + # -r requirements.in + # accelerate + # bitsandbytes + # deepcache + # invisible-watermark + # lhotse + # lightning + # nemo-toolkit + # optimum-quanto + # peft + # pytorch-lightning + # tomesd + # torchmetrics +torchmetrics==1.7.4 + # via + # lightning + # nemo-toolkit + # pytorch-lightning +tornado==6.4.2 + # via streamlit +tqdm==4.67.1 + # via + # -r requirements.in + # datasets + # dfloat11 + # huggingface-hub + # lhotse + # lightning + # nemo-toolkit + # nltk + # optuna + # peft + # pytorch-lightning + # sacremoses + # transformers +traitlets==5.14.3 + # via + # ipython + # matplotlib-inline +transformers==4.53.1 + # via + # -r requirements.in + # deepcache + # dfloat11 + # nemo-toolkit + # peft + # via torch +typeguard==4.4.2 + # via inflect +typer==0.16.0 + # via pyannote-database +typing-extensions==4.12.2 + # via + # -r requirements.in + # alembic + # altair + # fastapi + # fiddle + # gradio-client + # huggingface-hub + # librosa + # lightning + # lightning-utilities + # pyannote-core + # pydantic + # pydantic-core + # pytorch-lightning + # sox + # sqlalchemy + # streamlit + # torch + # typeguard + # typer + # wandb +tzdata==2024.2 + # via pandas +uc-micro-py==1.0.3 + # via linkify-it-py +uritemplate==4.1.1 + # via google-api-python-client +urllib3==2.2.3 + # via + # -r requirements.in + # botocore + # requests + # sentry-sdk +uvicorn==0.32.1 + # via + # -r requirements.in + # gradio +wandb==0.21.0 + # via nemo-toolkit +watchdog==6.0.0 + # via streamlit +wcwidth==0.2.13 + # via prompt-toolkit +webdataset==1.0.2 + # via nemo-toolkit +websockets==12.0 + # via + # gradio + # gradio-client +werkzeug==3.1.3 + # via tensorboard +wget==3.2 + # via nemo-toolkit +wrapt==1.17.2 + # via nemo-toolkit + # via -r requirements.in +xxhash==3.5.0 + # via datasets +yarl==1.18.0 + # via aiohttp +zipp==3.21.0 + # via + # -r requirements.in + # importlib-metadata diff --git a/start_server.bat b/start_server.bat new file mode 100644 index 0000000..6a0de63 --- /dev/null +++ b/start_server.bat @@ -0,0 +1,12 @@ +@echo off +echo Starting Stable Diffusion Server... +cd /d D:\code\stable-diffusion-server + +echo Activating virtual environment... +call .venv\Scripts\activate.bat + +echo Starting server with uvicorn... +uvicorn main:app --port 8000 --timeout-keep-alive 600 --workers 1 --backlog 1 --limit-concurrency 4 + +echo Server stopped. +pause \ No newline at end of file diff --git a/start_server_production.bat b/start_server_production.bat new file mode 100644 index 0000000..aa03be1 --- /dev/null +++ b/start_server_production.bat @@ -0,0 +1,23 @@ +@echo off +echo Starting Stable Diffusion Server (Production)... +cd /d D:\code\stable-diffusion-server + +:: Set environment variables +if exist "secrets\google-credentials.json" ( + set GOOGLE_APPLICATION_CREDENTIALS=secrets\google-credentials.json + echo Google Cloud credentials set. +) + +:: Optional: Set model paths +:: set DF11_MODEL_PATH=DFloat11/FLUX.1-schnell-DF11 +:: set CONTROLNET_LORA=black-forest-labs/flux-controlnet-line-lora +:: set LOAD_LCM_LORA=1 + +echo Activating virtual environment... +call .venv\Scripts\activate.bat + +echo Starting production server with gunicorn... +gunicorn -k uvicorn.workers.UvicornWorker -b :8000 main:app --timeout 600 -w 1 + +echo Server stopped. +pause \ No newline at end of file diff --git a/test_api.py b/test_api.py new file mode 100644 index 0000000..46e3789 --- /dev/null +++ b/test_api.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python +"""Test the API endpoints with multiple images.""" + +import requests +import time +import json +from urllib.parse import urlencode + +def test_make_image(prompt, save_name): + """Test the /make_image endpoint.""" + print(f"\n[IMAGE] Generating: {prompt}") + + # URL encode the parameters + params = { + "prompt": prompt, + "width": 512, + "height": 512 + } + + url = f"http://localhost:8000/make_image?{urlencode(params)}" + + start_time = time.time() + try: + response = requests.get(url, timeout=300) + elapsed = time.time() - start_time + + if response.status_code == 200: + # Save the image + with open(save_name, 'wb') as f: + f.write(response.content) + print(f"[OK] Saved to {save_name} ({elapsed:.1f}s)") + return True + else: + print(f"[ERROR] Status {response.status_code}: {response.text}") + return False + except Exception as e: + print(f"[FAILED] {e}") + return False + +def test_create_and_upload(prompt, save_path): + """Test the /create_and_upload_image endpoint.""" + print(f"\n[UPLOAD] Creating and uploading: {prompt}") + + params = { + "prompt": prompt, + "save_path": save_path, + "width": 512, + "height": 512 + } + + url = f"http://localhost:8000/create_and_upload_image?{urlencode(params)}" + + start_time = time.time() + try: + response = requests.get(url, timeout=300) + elapsed = time.time() - start_time + + if response.status_code == 200: + result = response.json() + print(f"[OK] Response: {result} ({elapsed:.1f}s)") + return True + else: + print(f"[WARNING] Status {response.status_code}: {response.text}") + # Try to save locally anyway if it's a cloud storage error + return False + except Exception as e: + print(f"[FAILED] {e}") + return False + +def main(): + """Test multiple images with different prompts.""" + + # Test prompts + test_cases = [ + ("a cute robot playing guitar in a cyberpunk city", "robot_guitar.png"), + ("magical forest with glowing mushrooms at night", "magic_forest.png"), + ("steampunk airship flying above victorian london", "steampunk_airship.png"), + ("astronaut riding a horse on mars", "astronaut_mars.png"), + ("japanese temple in cherry blossom season, anime style", "temple_sakura.png") + ] + + print("=" * 60) + print("Testing Stable Diffusion Server API") + print("=" * 60) + + # Test /make_image endpoint + print("\n[TEST] Testing /make_image endpoint...") + successful = 0 + for prompt, filename in test_cases[:3]: # Test first 3 with make_image + if test_make_image(prompt, filename): + successful += 1 + + print(f"\n[RESULT] /make_image: {successful}/3 successful") + + # Test /create_and_upload_image endpoint + print("\n[TEST] Testing /create_and_upload_image endpoint...") + successful_upload = 0 + for prompt, filename in test_cases[3:]: # Test last 2 with create_and_upload + save_path = f"test_uploads/{filename.replace('.png', '.webp')}" + if test_create_and_upload(prompt, save_path): + successful_upload += 1 + + print(f"\n[RESULT] /create_and_upload_image: {successful_upload}/2 attempted") + + print("\n" + "=" * 60) + print("Testing complete! Check the generated images.") + print("=" * 60) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_fast.py b/test_fast.py new file mode 100644 index 0000000..9f3dcb7 --- /dev/null +++ b/test_fast.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +"""Fast generation test with optimizations for RTX 3070.""" + +import torch +from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler +import time +from PIL import Image +import os + +def generate_fast_image(): + print("Loading optimized pipeline for speed...") + + # Use SDXL Turbo or Lightning for faster generation + model_id = "stabilityai/sdxl-turbo" # Much faster variant + + pipe = DiffusionPipeline.from_pretrained( + model_id, + torch_dtype=torch.float16, + variant="fp16", + use_safetensors=True + ) + + # Move to GPU + pipe = pipe.to("cuda") + + # Enable optimizations + pipe.enable_xformers_memory_efficient_attention() # Memory efficient attention + pipe.enable_vae_slicing() # VAE slicing for memory + pipe.enable_vae_tiling() # VAE tiling for large images + + # Compile with torch.compile for speed (PyTorch 2.0+) + # pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) + + print(f"Using GPU: {torch.cuda.get_device_name(0)}") + print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB") + + # Generate with turbo settings (1-4 steps only!) + prompt = "a majestic dragon flying over a medieval castle, fantasy art, highly detailed" + + print(f"\nGenerating image with TURBO settings...") + print(f"Prompt: {prompt}") + + # Warm up the model + print("Warming up GPU...") + with torch.no_grad(): + _ = pipe(prompt="test", num_inference_steps=1, guidance_scale=0.0, height=512, width=512).images[0] + torch.cuda.synchronize() + + # Time the actual generation + start_time = time.time() + + with torch.no_grad(): + # SDXL Turbo uses 1-4 steps with no CFG + image = pipe( + prompt=prompt, + num_inference_steps=1, # Turbo mode: 1-4 steps only + guidance_scale=0.0, # No CFG for turbo + height=512, + width=512 + ).images[0] + + torch.cuda.synchronize() + generation_time = time.time() - start_time + + # Save the image + output_path = "test_fast.png" + image.save(output_path) + print(f"\n✅ Image saved to: {output_path}") + print(f"⚡ Generation time: {generation_time:.2f} seconds") + + # Also test with slightly more steps for quality + print("\nGenerating higher quality version (4 steps)...") + start_time = time.time() + + with torch.no_grad(): + image_hq = pipe( + prompt=prompt, + num_inference_steps=4, # Still very fast + guidance_scale=0.0, + height=768, + width=768 + ).images[0] + + torch.cuda.synchronize() + generation_time_hq = time.time() - start_time + + output_hq = "test_fast_hq.png" + image_hq.save(output_hq) + print(f"✅ HQ Image saved to: {output_hq}") + print(f"⚡ HQ Generation time: {generation_time_hq:.2f} seconds") + + return output_path + +if __name__ == "__main__": + try: + # Set memory fraction to avoid OOM + torch.cuda.set_per_process_memory_fraction(0.95) + + output_file = generate_fast_image() + print(f"\n🎯 Success! Check the generated images") + + # Print memory usage + print(f"\nGPU Memory Used: {torch.cuda.memory_allocated() / 1024**3:.2f} GB") + print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / 1024**3:.2f} GB") + + except Exception as e: + print(f"\nError: {e}") + import traceback + traceback.print_exc() \ No newline at end of file diff --git a/test_generation.py b/test_generation.py new file mode 100644 index 0000000..ee502bb --- /dev/null +++ b/test_generation.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +"""Simple test script to generate an image using the HDM pipeline.""" + +import torch +from hdmx import HDMXUTPipeline +from PIL import Image +import os + +def generate_test_image(): + print("Loading HDM pipeline...") + + # Load the pipeline + pipe = HDMXUTPipeline.from_pretrained( + "hdmx/hdmx_composite3", + torch_dtype=torch.float16, + trust_remote_code=True + ) + + # Move to GPU if available + if torch.cuda.is_available(): + pipe = pipe.to("cuda") + print(f"Using GPU: {torch.cuda.get_device_name(0)}") + else: + print("Using CPU") + + # Generate image + prompt = "a beautiful fantasy landscape with mountains and a crystal clear lake at sunset, highly detailed, 4k" + print(f"\nGenerating image with prompt: {prompt}") + + with torch.no_grad(): + image = pipe( + prompt=prompt, + num_inference_steps=20, + guidance_scale=7.5, + height=512, + width=512 + ).images[0] + + # Save the image + output_path = "test_output.png" + image.save(output_path) + print(f"\nImage saved to: {output_path}") + + # Also save as webp for smaller size + output_webp = "test_output.webp" + image.save(output_webp, "WEBP", quality=90) + print(f"WebP version saved to: {output_webp}") + + return output_path + +if __name__ == "__main__": + try: + output_file = generate_test_image() + print(f"\n✅ Success! Generated image at: {os.path.abspath(output_file)}") + except Exception as e: + print(f"\n❌ Error: {e}") + import traceback + traceback.print_exc() \ No newline at end of file diff --git a/test_hdm.py b/test_hdm.py new file mode 100644 index 0000000..539ea96 --- /dev/null +++ b/test_hdm.py @@ -0,0 +1,52 @@ +import torch +import sys +import os + +# Add HDM to path +sys.path.insert(0, "D:\\code\\HDM\\src") + +print(f"Torch version: {torch.__version__}") +print(f"CUDA available: {torch.cuda.is_available()}") +if torch.cuda.is_available(): + print(f"CUDA device: {torch.cuda.get_device_name(0)}") + +try: + # Import HDM + import xut + xut.env.USE_XFORMERS_LAYERS = True + from hdm.pipeline import HDMXUTPipeline + + print("Loading HDM pipeline...") + hdm_pipe = HDMXUTPipeline.from_pretrained( + "KBlueLeaf/HDM-xut-340M-anime", + trust_remote_code=True + ).to("cuda:0" if torch.cuda.is_available() else "cpu") + + print("HDM pipeline loaded successfully!") + print(f"Pipeline device: {hdm_pipe.device}") + + # Try to generate an image + print("\nGenerating test image...") + with torch.inference_mode(): + result = hdm_pipe( + prompts=["a simple test art"], + negative_prompts="low quality", + width=512, + height=512, + cfg_scale=3.0, + num_inference_steps=4, + camera_param={ + "zoom": 1.0, + "x_shift": 0.0, + "y_shift": 0.0, + }, + ) + + print("Image generation successful!") + if result and len(result) > 0: + print(f"Generated {len(result)} images") + +except Exception as e: + print(f"Error: {e}") + import traceback + traceback.print_exc() \ No newline at end of file diff --git a/test_sdxl.py b/test_sdxl.py new file mode 100644 index 0000000..dc87528 --- /dev/null +++ b/test_sdxl.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +"""Test script using Stable Diffusion XL model.""" + +import torch +from diffusers import DiffusionPipeline +from PIL import Image +import os + +def generate_test_image(): + print("Loading Stable Diffusion XL pipeline...") + + # Load SDXL base model (public, no auth needed) + pipe = DiffusionPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", + torch_dtype=torch.float16, + variant="fp16", + use_safetensors=True + ) + + # Move to GPU + if torch.cuda.is_available(): + pipe = pipe.to("cuda") + print(f"Using GPU: {torch.cuda.get_device_name(0)}") + else: + print("Using CPU") + pipe.enable_model_cpu_offload() + + # Generate image + prompt = "a beautiful fantasy landscape with mountains and a crystal clear lake at sunset, highly detailed, masterpiece, 4k" + negative_prompt = "ugly, blurry, low quality, distorted" + + print(f"\nGenerating image with prompt: {prompt}") + print("This may take a minute...") + + with torch.no_grad(): + image = pipe( + prompt=prompt, + negative_prompt=negative_prompt, + num_inference_steps=30, + guidance_scale=7.5, + height=768, + width=768 + ).images[0] + + # Save the image + output_path = "test_output.png" + image.save(output_path) + print(f"\nImage saved to: {output_path}") + + # Also save as webp for smaller size + output_webp = "test_output.webp" + image.save(output_webp, "WEBP", quality=90) + print(f"WebP version saved to: {output_webp}") + + return output_path + +if __name__ == "__main__": + try: + output_file = generate_test_image() + print(f"\nSuccess! Generated image at: {os.path.abspath(output_file)}") + except Exception as e: + print(f"\nError: {e}") + import traceback + traceback.print_exc() \ No newline at end of file diff --git a/test_simple.py b/test_simple.py new file mode 100644 index 0000000..cb233c1 --- /dev/null +++ b/test_simple.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +"""Simple test script to generate an image using available pipelines.""" + +import torch +from diffusers import FluxPipeline +from PIL import Image +import os + +def generate_test_image(): + print("Loading Flux pipeline...") + + # Load Flux Schnell pipeline (fast variant) + pipe = FluxPipeline.from_pretrained( + "black-forest-labs/FLUX.1-schnell", + torch_dtype=torch.bfloat16 + ) + + # Enable CPU offloading to manage memory + pipe.enable_model_cpu_offload() + + if torch.cuda.is_available(): + print(f"Using GPU: {torch.cuda.get_device_name(0)}") + else: + print("Using CPU") + + # Generate image + prompt = "a beautiful fantasy landscape with mountains and a crystal clear lake at sunset, highly detailed, 4k" + print(f"\nGenerating image with prompt: {prompt}") + print("This may take a few minutes on first run...") + + with torch.no_grad(): + image = pipe( + prompt=prompt, + num_inference_steps=4, # Schnell is optimized for 4 steps + guidance_scale=0.0, # Schnell doesn't use guidance + height=512, + width=512 + ).images[0] + + # Save the image + output_path = "test_output.png" + image.save(output_path) + print(f"\nImage saved to: {output_path}") + + # Also save as webp for smaller size + output_webp = "test_output.webp" + image.save(output_webp, "WEBP", quality=90) + print(f"WebP version saved to: {output_webp}") + + return output_path + +if __name__ == "__main__": + try: + output_file = generate_test_image() + print(f"\n✅ Success! Generated image at: {os.path.abspath(output_file)}") + except Exception as e: + print(f"\n❌ Error: {e}") + import traceback + traceback.print_exc() \ No newline at end of file