-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathmodel_image.py
More file actions
148 lines (111 loc) · 5.19 KB
/
model_image.py
File metadata and controls
148 lines (111 loc) · 5.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# See https://huggingface.co/docs/diffusers/en/api/pipelines/flux for more info
# also https://huggingface.co/docs/diffusers/v0.18.2/en/api/pipelines/stable_diffusion/stable_diffusion_xl
# https://huggingface.co/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline
import settings
from diffusers import FluxPipeline
import torch
from PIL import Image
import matplotlib.pyplot as plt
import gc
from diffusers import FluxTransformer2DModel, FluxPipeline
from transformers import T5EncoderModel, CLIPTextModel
from optimum.quanto import freeze, qfloat8, quantize
# Load the Flux model
pipe_image = FluxPipeline.from_pretrained(settings.IMAGE_GENERATOR_MODEL, torch_dtype=torch.bfloat16)
# Get the list of available GPUs and their memory
# torch_device_map = {i: f"{torch.cuda.get_device_properties(i).total_memory // (1024 ** 3)}GiB" for i in range(torch.cuda.device_count())}
# torch_device_map["cpu"] = "32GiB"
# pipe_image = FluxPipeline.from_pretrained(
# settings.IMAGE_GENERATOR_MODEL,
# torch_dtype=torch.bfloat16,
# device_map="balanced",
# max_memory=torch_device_map,
# )
# pipe_image = pipe_image.to(settings.DEVICE)
# Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance.
pipe_image.enable_model_cpu_offload()
# Memory savings are higher than with `enable_model_cpu_offload`, but performance is lower
# pipe_image.enable_sequential_cpu_offload()
# When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
pipe_image.vae.enable_slicing()
# When this option is enabled, the VAE will split the input tensor into tiles to compute decoding and encoding in several steps. This is useful to save a large amount of memory and to allow the processing of larger images.
pipe_image.vae.enable_tiling()
from typing import Union
from typing import List, Optional
from IPython.display import display
def generate_image(prompt: Union[str, List[str]] = None, prompt2: Union[str, List[str]] = None, height: float = None, width: float = None, guidance_scale: float = 0.0) -> Image:
"""Generate an image based on a free text prompt input."""
# default to Timestep-distilled model
pipe_kwargs = {
"guidance_scale": 0.0,
"num_inference_steps": 4,
"max_sequence_length": 256,
}
# overrides for Guidance-distilled model
if "FLUX.1-dev" in settings.IMAGE_GENERATOR_MODEL:
pipe_kwargs = {
"guidance_scale": 3.5,
"num_inference_steps": 50
}
if guidance_scale:
pipe_kwargs["guidance_scale"] = guidance_scale
if prompt2:
prompt = prompt if len(prompt) > len(prompt2) else prompt2
pipe_kwargs["prompt"] = prompt
# pipe_kwargs["prompt2"] = prompt2 or prompt
pipe_kwargs["height"] = height
pipe_kwargs["width"] = width
if settings.TEMPERATURE == 0.0:
pipe_kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
images = pipe_image(**pipe_kwargs).images
if isinstance(prompt, str):
return images[0]
return images
def show_image_grid(images, main_title=None, titles=None):
"""Display a list of images as a grid with an optional main title.
if images is a dictionary, the keys will be used as titles for each image."""
if isinstance(images, dict):
titles = list(images.keys())
images = list(images.values())
# Split images into a list of lists where each sublist has at most 4 images
image_grid = [images[i:i + 4] for i in range(0, len(images), 4)]
# Calculate the number of rows and columns
num_rows = len(image_grid)
num_cols = 4
# Create the figure and axes grid
fig, axes = plt.subplots(num_rows, num_cols, figsize=(20, 5 * num_rows))
# Set the main title if provided
if main_title:
fig.suptitle(main_title, fontsize=16)
# Iterate over the grid of images
for i, row in enumerate(image_grid):
for j in range(num_cols):
ax = axes[i, j] if num_rows > 1 else axes[j]
if j < len(row):
# Show the image in the appropriate subplot
ax.imshow(row[j])
if titles:
ax.set_title(titles[i * num_cols + j], fontsize=12)
# Turn off the axis for all subplots
ax.axis('off')
# Adjust the layout to prevent overlap
plt.tight_layout()
plt.subplots_adjust(top=0.9) # Adjust top to fit the main title
plt.show()
def free_memory():
"""Free memory up after running the text model"""
print('Max mem allocated (GB) while doing text model:', torch.cuda.max_memory_allocated() / (1024 ** 3))
from numba import cuda
device = cuda.get_current_device()
device.reset()
# # Step 1: Delete all references to the models and pipelines
# try:
# del pipe_image
# except:
# pass
# # Step 2: Run garbage collection to free up Python memory references
# gc.collect()
# # Step 3: Empty the CUDA cache to free memory back to PyTorch
# torch.cuda.empty_cache()
# # Step 4 (Optional): Synchronize CUDA to ensure all operations are complete
# torch.cuda.synchronize()