diff --git a/finetuning/flux/README.md b/finetuning/flux/README.md new file mode 100644 index 0000000..55064bd --- /dev/null +++ b/finetuning/flux/README.md @@ -0,0 +1,287 @@ +# FLUX LoRA Fine-tuning on Beam + +Fast and efficient FLUX LoRA training with optimal parameters for small datasets (10-30 images). + +## Quick Start + +```bash +# 1. Create volume (one-time setup) +beam volume create flux-lora-data + +# 2. Deploy training endpoint +beam deploy finetune.py:train_flux_lora + +# 3. Deploy inference endpoint +beam deploy inference.py:generate_image + +# 4. Update your tokens in upload.py +# 5. Train your model +python upload.py train ./your_images yourTriggerWord + +# 6. Generate images +python upload.py generate "photo of yourTriggerWord woman" --lora yourTriggerWord +``` + +## Volume Storage + +The system uses persistent volume `flux-lora-data`: + +``` +flux-lora-data/ +├── flux_lora_yourTriggerWord/ # Trained LoRA models +│ └── flux_lora_yourTriggerWord.safetensors +├── hf_cache/ # Cached base models +└── generated_*.png # Generated images +``` + +## File Structure + +``` +flux/ +├── finetune.py # Training function (@function) +├── inference.py # Generation function (@function) +├── upload.py # Client interface (like client.py) +└── README.md # This guide +``` + +## Training + +### Optimal Training Parameters + +**Perfect for small datasets (10-30 images):** +``` +Images: 10-30 high-quality photos +Steps: Auto-calculated (images × 100 + 350) +Training time: ~10 minutes for 13 images +VRAM: ~32GB (H100 GPU) +Resolution: 768px + 1024px mixed +``` + +### Deploy Training Endpoint + +```bash +beam deploy finetune.py:train_flux_lora +``` + +### Start Training + +```bash +python upload.py train ./your_images yourTriggerWord +``` + +**Example:** +```bash +python upload.py train ./ira_photos irunTok +``` + +### Advanced Training Options + +```bash +python upload.py train ./photos myTrigger --steps 1650 --lr 4e-4 --rank 32 +``` + +**Training Parameters:** +``` +--steps: Training steps (default: auto-calculated) +--lr: Learning rate (default: 4e-4) +--rank: LoRA rank (default: 32) +``` + +## Dataset Preparation + +### Image Requirements + +``` +Format: JPG, JPEG, PNG, WebP +Count: 10-30 images (LoRA works great with just 10-13 images, no need for more than 30) +Quality: High-resolution, varied poses +Consistency: Similar lighting/style +``` + +### Caption Handling + +The system automatically creates captions for your images: + +```python +# Auto-generated captions include: +"portrait of yourTriggerWord woman with long brown hair, looking at camera" +"photo of yourTriggerWord woman, natural lighting" +"yourTriggerWord woman with long brown hair, outdoor setting" +``` + +**Custom captions:** Create `.txt` files with same name as images: +``` +your_folder/ +├── photo1.jpg +├── photo1.txt # "portrait of myTrigger woman smiling" +├── photo2.jpg +└── photo2.txt # "myTrigger woman in professional attire" +``` + +## Image Generation + +### Deploy Inference Endpoint + +```bash +beam deploy inference.py:generate_image +``` + +### Generate Images + +```bash +python upload.py generate "your prompt here" --lora yourTriggerWord +``` + +**Example:** +```bash +python upload.py generate "photo of irunTok woman with brown hair in Paris, on the background Eiffel Tower, high quality" --lora irunTok +``` + +### Generation Parameters + +```bash +python upload.py generate "prompt" --lora triggerWord \ + --width 1024 --height 1024 --steps 35 --guidance 3.0 --seed 42 --lora-scale 0.9 +``` + +**Parameters:** +``` +--lora: Your trigger word (LoRA name) +--width/height: Image dimensions (default: 1024x1024) +--steps: Inference steps (default: 35) +--guidance: Prompt adherence (default: 3.0) +--seed: Random seed for reproducibility +--lora-scale: LoRA strength 0.0-1.0 (default: 0.9) +``` + +## Command Reference + +### Training Commands + +```bash +# Basic training +python upload.py train ./photos triggerWord + +# Custom parameters +python upload.py train ./photos triggerWord --steps 1650 --lr 4e-4 + +# Check training status +python upload.py wait +``` + +### Generation Commands + +```bash +# Generate with LoRA +python upload.py generate "prompt" --lora triggerWord + +# Generate without LoRA (base model) +python upload.py generate "prompt" + +# Custom generation settings +python upload.py generate "prompt" --lora triggerWord --width 512 --height 768 --steps 20 +``` + +### File Management + +```bash +# List volume contents +beam ls flux-lora-data + +# Download generated image +beam cp beam://flux-lora-data/generated_random_9384.png ./ + +# Download specific file +python upload.py download --filename generated_random_9384.png +``` + +## Configuration + +**Update `upload.py` with your credentials:** + +```python +# Replace with your actual tokens +BEAM_TOKEN = "your_beam_token_here" +TRAIN_FUNCTION = "https://your-train-endpoint.app.beam.cloud" +GENERATE_FUNCTION = "https://your-generate-endpoint.app.beam.cloud" +``` + +## Step Calculation Formula + +**Automatic step calculation:** +```python +optimal_steps = (image_count × 100) + 350 + +# Examples: +# 5 images = 850 steps (~5 minutes) +# 10 images = 1,350 steps (~8 minutes) +# 13 images = 1,650 steps (~10 minutes) +# 20 images = 2,350 steps (~15 minutes) +# 30 images = 3,350 steps (~20 minutes) +``` + +## Tips & Best Practices + +### Training Tips + +``` +Use 10-30 images for best results +Varied poses and angles improve quality +Consistent lighting helps training +Training completes in ~10 minutes for 13 images +H100 GPU provides optimal performance +``` + +### Generation Tips + +``` +Always include your trigger word in prompts +Start with guidance_scale 3.0-4.0 +Use 28-35 inference steps for quality +Experiment with lora_scale 0.7-1.0 +Try different seeds for variety +``` + +### Prompt Examples + +```bash +# Portrait style +"portrait of yourTrigger woman, professional lighting, high quality" + +# Specific scenes +"photo of yourTrigger woman in Paris, Eiffel Tower background" + +# Artistic styles +"yourTrigger woman in the style of Renaissance painting" + +# Different settings +"yourTrigger woman at sunset, golden hour lighting" +``` + +## Troubleshooting + +### Training Issues + +``` +Check HF_TOKEN is set correctly +Verify images are valid formats (JPG, PNG) +Ensure 10-30 images in folder +Monitor training via Beam dashboard +``` + +### Generation Issues + +``` +Verify LoRA name matches training trigger word +Check if training completed successfully +Try different prompt variations +Adjust lora_scale if results are too strong/weak +``` + +### File Access + +``` +Use 'beam ls flux-lora-data' to list files +Download with 'beam cp beam://flux-lora-data/file.png ./' +Check Beam dashboard for task status +``` \ No newline at end of file diff --git a/finetuning/flux/finetune.py b/finetuning/flux/finetune.py new file mode 100644 index 0000000..b1814d2 --- /dev/null +++ b/finetuning/flux/finetune.py @@ -0,0 +1,237 @@ +from beam import function, Volume, Image +import os +import requests +import zipfile +import tempfile +import yaml +from PIL import Image as PILImage + +VOLUME_PATH = "./flux-lora-data" + +@function( + name="simple-flux-train", + gpu="H100", + cpu=4, + memory="32Gi", + timeout=3600, + volumes=[Volume(name="flux-lora-data", mount_path=VOLUME_PATH)], + image=Image(python_version="python3.12") + .add_commands([ + "apt-get update && apt-get install -y libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev libgomp1", + "git clone https://github.com/ostris/ai-toolkit.git /ai-toolkit", + "cd /ai-toolkit && git submodule update --init --recursive", + "pip3.12 install --no-cache-dir torch==2.6.0 torchvision==0.21.0 --index-url https://download.pytorch.org/whl/cu126", + "pip3.12 install pyyaml requests pillow opencv-python-headless", + "cd /ai-toolkit && pip3.12 install -r requirements.txt" + ]), + secrets=["HF_TOKEN"] +) + +def train_flux_lora( + image_zip: str, + trigger_word: str = "TOK", + steps: int = 1500, + learning_rate: float = 4e-4, + rank: int = 32 +): + """ + Train FLUX LoRA + """ + print(f"Training '{trigger_word}' LoRA") + + dataset_dir, image_count = setup_dataset(image_zip, trigger_word) + + optimal_steps = (image_count * 100) + 350 + print(f"Found {image_count} images") + print(f"Adjusted steps: {optimal_steps}") + steps = optimal_steps + + config = { + "job": "extension", + "config": { + "name": f"flux_lora_{trigger_word}", + "process": [{ + "type": "sd_trainer", + "training_folder": VOLUME_PATH, + "device": "cuda:0", + "trigger_word": trigger_word, + "network": { + "type": "lora", + "linear": 32, + "linear_alpha": 32, + "network_kwargs": { + "only_if_contains": [ + "transformer.single_transformer_blocks.7", + "transformer.single_transformer_blocks.12", + "transformer.single_transformer_blocks.16", + "transformer.single_transformer_blocks.20" + ] + } + }, + "save": { + "dtype": "float16", + "save_every": 10000, + "max_step_saves_to_keep": 4, + "push_to_hub": False + }, + "datasets": [{ + "folder_path": "/ai-toolkit/input", + "caption_ext": "txt", + "caption_dropout_rate": 0.05, + "shuffle_tokens": False, + "cache_latents_to_disk": True, + "resolution": [768, 1024] + }], + "train": { + "batch_size": 1, + "steps": steps, + "gradient_accumulation_steps": 1, + "train_unet": True, + "train_text_encoder": False, + "gradient_checkpointing": False, + "noise_scheduler": "flowmatch", + "optimizer": "adamw8bit", + "lr": 4e-4, + "lr_scheduler": "cosine", + "skip_first_sample": True, + "disable_sampling": True, + "ema_config": { + "use_ema": True, + "ema_decay": 0.99 + }, + "dtype": "bf16" + }, + "model": { + "name_or_path": "black-forest-labs/FLUX.1-dev", + "is_flux": True, + "quantize": False, + "low_vram": False + }, + "sample": { + "sampler": "flowmatch", + "sample_every": 10000, + "width": 1024, + "height": 1024, + "prompts": [f"portrait of {trigger_word} woman"], + "neg": "", + "seed": 42, + "walk_seed": True, + "guidance_scale": 3.5, + "sample_steps": 28 + } + }] + }, + "meta": { + "name": f"flux_lora_{trigger_word}", + "version": "1.0" + } + } + + config_path = "/ai-toolkit/train_config.yaml" + with open(config_path, 'w') as f: + yaml.dump(config, f, default_flow_style=False) + + print(f"Config saved to: {config_path}") + + import subprocess + + env = os.environ.copy() + env.update({ + 'HF_TOKEN': os.getenv('HF_TOKEN'), + 'CUDA_VISIBLE_DEVICES': '0' + }) + + print("Starting training...") + process = subprocess.Popen([ + "python3.12", "/ai-toolkit/run.py", "/ai-toolkit/train_config.yaml" + ], env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, universal_newlines=True) + + for line in process.stdout: + print(line.rstrip()) + + return_code = process.wait() + + print("Ensuring models are saved to persistent volume...") + copy_models_to_volume() + + if return_code == 0: + return { + "status": "success", + "message": f"Training completed for {trigger_word}", + "output": "Training completed successfully" + } + else: + return { + "status": "error", + "message": f"Training failed with return code {return_code}" + } + +def copy_models_to_volume(): + """Copy any models from ai-toolkit output to persistent volume""" + import shutil + + source_dir = "/ai-toolkit/output" + dest_dir = VOLUME_PATH + + if os.path.exists(source_dir): + print(f"Copying models from {source_dir} to {dest_dir}") + + for root, dirs, files in os.walk(source_dir): + for file in files: + if file.endswith('.safetensors') or file.endswith('.yaml') or file.endswith('.json'): + source_file = os.path.join(root, file) + # Create relative path structure in destination + rel_path = os.path.relpath(root, source_dir) + dest_folder = os.path.join(dest_dir, rel_path) if rel_path != '.' else dest_dir + os.makedirs(dest_folder, exist_ok=True) + + dest_file = os.path.join(dest_folder, file) + shutil.copy2(source_file, dest_file) + print(f"Copied: {file}") + else: + print(f"No source directory {source_dir} found") + +def setup_dataset(image_zip, trigger_word): + """Download and setup dataset in the format they expect""" + dataset_dir = "/ai-toolkit/input" + os.makedirs(dataset_dir, exist_ok=True) + + response = requests.get(image_zip) + with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as f: + f.write(response.content) + zip_path = f.name + + # Extract images and create captions + count = 0 + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + for file in zip_ref.namelist(): + if file.lower().endswith(('.jpg', '.jpeg', '.png')): + zip_ref.extract(file, dataset_dir) + + base_name = os.path.splitext(file)[0] + caption_path = os.path.join(dataset_dir, f"{base_name}.txt") + + # Only generate caption if .txt file doesn't exist + if not os.path.exists(caption_path): + captions = [ + f"portrait of {trigger_word} woman with long brown hair, looking at camera", + f"photo of {trigger_word} woman, long brown hair, natural lighting", + f"{trigger_word} woman with long brown hair, outdoor setting", + f"close-up portrait of {trigger_word}, long brown hair, detailed face", + f"{trigger_word} woman sitting, long brown hair, realistic photo", + f"portrait photo of {trigger_word} with long brown hair", + f"{trigger_word} woman, long brown hair, professional lighting", + f"photo of {trigger_word} woman, detailed facial features", + f"{trigger_word} with long brown hair, natural expression", + f"portrait of {trigger_word} woman, high quality photo" + ] + + caption = captions[count % len(captions)] + + with open(caption_path, 'w') as caption_file: + caption_file.write(caption) + count += 1 + + os.unlink(zip_path) + print(f"Setup {count} training images in {dataset_dir}") + return dataset_dir, count diff --git a/finetuning/flux/inference.py b/finetuning/flux/inference.py new file mode 100644 index 0000000..7081238 --- /dev/null +++ b/finetuning/flux/inference.py @@ -0,0 +1,131 @@ +from beam import function, Volume, Output, Image +import os +import base64 +from io import BytesIO + +VOLUME_PATH = "./flux-lora-data" + +def get_pipeline(lora_name: str = None): + """ + Loads the base FLUX pipeline and optionally attaches a LoRA adapter. + The base model is cached on the persistent volume to speed up subsequent loads. + """ + print("Loading FLUX pipeline...") + + from diffusers import FluxPipeline + import torch + + cache_path = os.path.join(VOLUME_PATH, "hf_cache") + os.makedirs(cache_path, exist_ok=True) + + pipeline = FluxPipeline.from_pretrained( + "black-forest-labs/FLUX.1-dev", + torch_dtype=torch.float16, + token=os.getenv("HF_TOKEN"), + cache_dir=cache_path + ).to("cuda") + + if lora_name: + lora_folder_name = f"flux_lora_{lora_name}" + lora_filename = f"{lora_folder_name}.safetensors" + lora_full_path = os.path.join(VOLUME_PATH, lora_folder_name, lora_filename) + + if os.path.exists(lora_full_path): + print(f"Loading LoRA adapter: {lora_filename}") + try: + lora_dir = os.path.dirname(lora_full_path) + pipeline.load_lora_weights(lora_dir, weight_name=lora_filename) + print(f"Successfully attached LoRA: {lora_name}") + return pipeline, lora_full_path + except Exception as e: + print(f"Failed to load LoRA adapter '{lora_filename}': {e}") + else: + print(f"LoRA adapter not found for '{lora_name}'. Using the base model.") + else: + print("No LoRA name provided, using the base model.") + + return pipeline, None + +@function( + name="simple-flux-generate", + gpu="A100-40", + cpu=4, + memory="32Gi", + volumes=[Volume(name="flux-lora-data", mount_path=VOLUME_PATH)], + image=Image(python_version="python3.12") + .add_commands([ + "apt-get update && apt-get install -y libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev libgomp1", + "git clone https://github.com/ostris/ai-toolkit.git /ai-toolkit", + "cd /ai-toolkit && git submodule update --init --recursive", + "pip3.12 install --no-cache-dir torch==2.6.0 torchvision==0.21.0 --index-url https://download.pytorch.org/whl/cu126", + "pip3.12 install pyyaml requests pillow opencv-python-headless", + "cd /ai-toolkit && pip3.12 install -r requirements.txt" + ]), + secrets=["HF_TOKEN"] +) +def generate_image( + prompt: str, + lora_name: str = None, + width: int = 1024, + height: int = 1024, + steps: int = 35, + guidance: float = 3.0, + seed: int = None, + lora_scale: float = 0.9 +): + """ + Generate image with a dynamically loaded LoRA adapter. + """ + pipeline, lora_path = get_pipeline(lora_name=lora_name) + + import torch + + print(f"Generating: '{prompt}'") + print(f"{width}x{height}, steps: {steps}, guidance: {guidance}, lora: {lora_name or 'None'}, scale: {lora_scale}") + + generator = None + if seed: + generator = torch.Generator(device="cuda").manual_seed(seed) + print(f"Using seed: {seed}") + + with torch.no_grad(): + result = pipeline( + prompt=prompt, + width=width, + height=height, + num_inference_steps=steps, + guidance_scale=guidance, + generator=generator + ) + + image = result.images[0] + + buffered = BytesIO() + image.save(buffered, format="PNG") + img_base64 = base64.b64encode(buffered.getvalue()).decode() + + filename = f"generated_{seed or 'random'}_{hash(prompt) % 10000}.png" + output = Output.from_pil_image(image) + output.save() + + local_path = os.path.join(VOLUME_PATH, filename) + image.save(local_path) + + print(f"Generated and saved: {filename}") + print(f"Local path: {local_path}") + print(f"Public URL: {output.public_url}") + + return { + "status": "success", + "image": img_base64, + "url": output.public_url, + "prompt": prompt, + "settings": { + "width": width, + "height": height, + "steps": steps, + "guidance": guidance, + "seed": seed, + "lora": lora_name + } + } \ No newline at end of file diff --git a/finetuning/flux/upload.py b/finetuning/flux/upload.py new file mode 100644 index 0000000..1c0a343 --- /dev/null +++ b/finetuning/flux/upload.py @@ -0,0 +1,282 @@ +from beam import Client +import os +import zipfile +import base64 +from PIL import Image +from io import BytesIO +import time +import requests +import argparse + +# Your Beam configuration +BEAM_TOKEN = "your_beam_token_here" +TRAIN_FUNCTION = "https://your-train-endpoint.app.beam.cloud" +GENERATE_FUNCTION = "https://your-generate-endpoint.app.beam.cloud" + +def train(folder_path: str, trigger_word: str, steps: int = 1000, lr: float = 1e-4, rank: int = 16): + """Train a LoRA using Beam SDK""" + + print(f"Training '{trigger_word}'") + print(f"Using images from: {folder_path}") + + if not os.path.exists(folder_path): + raise ValueError(f"Folder not found: {folder_path}") + + image_files = [f for f in os.listdir(folder_path) + if f.lower().endswith(('.jpg', '.jpeg', '.png', '.webp'))] + + if len(image_files) == 0: + raise ValueError(f"No images found in {folder_path}") + + print(f"Found {len(image_files)} images") + + zip_path = f"{trigger_word}_training.zip" + with zipfile.ZipFile(zip_path, 'w') as zipf: + for file in image_files: + file_path = os.path.join(folder_path, file) + zipf.write(file_path, file) + print(f" Added: {file}") + + print("Uploading with Beam...") + client = Client(token=BEAM_TOKEN) + + try: + zip_url = client.upload_file(zip_path) + print(f"Uploaded: {zip_url}") + + print("Starting training...") + response = requests.post(TRAIN_FUNCTION, + headers={'Authorization': f'Bearer {BEAM_TOKEN}', 'Content-Type': 'application/json'}, + json={ + 'image_zip': zip_url, + 'trigger_word': trigger_word, + 'steps': steps, + 'learning_rate': lr, + 'rank': rank + }) + + os.remove(zip_path) + + if response.status_code == 200: + result = response.json() + print(f"Training started!") + return {"status": "success", "result": result, "trigger_word": trigger_word} + else: + print(f"Failed: {response.status_code} - {response.text}") + return {"status": "error", "message": response.text} + + except Exception as e: + if os.path.exists(zip_path): + os.remove(zip_path) + raise Exception(f"Training failed: {e}") + +def generate(prompt: str, lora_name: str = None, width: int = 1024, height: int = 1024, steps: int = 20, guidance: float = 4.0, seed: int = None, lora_scale: float = 0.8): + """Generate image using requests like training""" + + print(f"Generating: '{prompt}'") + + params = { + 'prompt': prompt, + 'lora_name': lora_name, + 'width': width, + 'height': height, + 'steps': steps, + 'guidance': guidance, + 'seed': seed, + 'lora_scale': lora_scale + } + params = {k: v for k, v in params.items() if v is not None} + + try: + print(f"Sending request (lora: {lora_name or 'None'})...") + response = requests.post( + GENERATE_FUNCTION, + headers={'Authorization': f'Bearer {BEAM_TOKEN}', 'Content-Type': 'application/json'}, + json=params, + timeout=600 + ) + + if response.status_code == 200: + result = response.json() + + if 'task_id' in result and 'image' not in result: + print(f"Task started: {result['task_id']}") + print("Waiting for task to complete...") + print("Image will be saved to Beam volume: ./flux-lora-data/") + print("Check your Beam dashboard for progress") + print("Task submitted successfully!") + print("Check 'beam ls volume-name' to see generated images") + return result + + if result.get('image'): + print("Saving image locally...") + img_data = base64.b64decode(result['image']) + img = Image.open(BytesIO(img_data)) + + filename = f"generated_{lora_name or 'base'}_{int(time.time())}.png" + full_path = os.path.abspath(filename) + img.save(full_path) + + if os.path.exists(full_path): + print(f"Successfully saved locally: {full_path}") + + if result.get('url'): + print(f"Public URL: {result['url']}") + if result.get('settings'): + settings = result['settings'] + print(f"Settings: {settings['width']}x{settings['height']}, steps: {settings['steps']}") + else: + print(f"Failed to save file: {full_path}") + else: + print("No image data in response!") + print(f"Full response: {result}") + + return result + else: + print(f"Failed: {response.status_code} - {response.text}") + return {"status": "error", "message": response.text} + + except requests.exceptions.Timeout: + print("Request timed out after 10 minutes") + print("The generation might still be running on Beam") + print("Check your Beam dashboard for task status") + return {"status": "error", "message": "Request timed out"} + except Exception as e: + print(f"Generation failed: {e}") + return {"status": "error", "message": str(e)} + +def wait_for_task_completion(task_id: str, max_wait: int = 600, check_interval: int = 10): + """Wait for a task to complete using Beam SDK""" + + print(f"Polling task {task_id} every {check_interval} seconds...") + start_time = time.time() + + client = Client(token=BEAM_TOKEN) + + while time.time() - start_time < max_wait: + try: + task_result = client.get_task_result(task_id) + + if task_result is not None: + print("Task completed successfully!") + return task_result + else: + print("Task still running...") + + except Exception as e: + print(f"Task running (checking via dashboard)... {int(time.time() - start_time)}s elapsed") + + time.sleep(check_interval) + + print(f"Timeout waiting for task completion after {max_wait} seconds") + print("Check your Beam dashboard - the task may still be running") + return {"status": "timeout", "task_id": task_id} + +def wait_for_training(task_id: str, check_interval: int = 60): + """Check training status by polling task_id""" + + print(f"Training task: {task_id}") + print("Training typically takes 30-60 minutes depending on steps") + print("Check your Beam dashboard for real-time progress:") + print("https://dashboard.beam.cloud/") + print("Once training completes, the models will be saved to your volume") + + return {"status": "info", "message": "Check Beam dashboard for progress", "task_id": task_id} + +def download_image(filename: str = None): + """Download the latest generated image from Beam volume""" + + if filename: + try: + print(f"Downloading {filename}...") + local_path = f"./downloaded_{filename}" + + import subprocess + result = subprocess.run([ + 'beam', 'cp', f'beam://flux-lora-data/{filename}', local_path + ], capture_output=True, text=True) + + if result.returncode == 0: + print(f"Downloaded: {local_path}") + return {"status": "success", "path": local_path} + else: + print(f"Download failed: {result.stderr}") + return {"status": "error", "message": result.stderr} + + except Exception as e: + print(f"Download failed: {e}") + return {"status": "error", "message": str(e)} + else: + print("Finding latest generated image...") + print("Use: beam ls volume-name") + print("Then: python client.py download --filename generated_xxx.png") + return {"status": "info", "message": "Specify filename to download"} + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="FLUX LoRA Training & Generation (Beam SDK)") + subparsers = parser.add_subparsers(dest="command", required=True) + + # --- Train Command --- + train_parser = subparsers.add_parser("train", help="Train a LoRA model.") + train_parser.add_argument("folder", help="Path to the folder with training images.") + train_parser.add_argument("trigger_word", help="The trigger word for the LoRA (e.g., 'iraTok').") + train_parser.add_argument("--steps", type=int, default=1650, help="Number of training steps (formula: n*100+350, where n=images).") + train_parser.add_argument("--lr", type=float, default=4e-4, help="Learning rate.") + train_parser.add_argument("--rank", type=int, default=32, help="LoRA rank.") + + # --- Generate Command --- + gen_parser = subparsers.add_parser("generate", help="Generate an image.") + gen_parser.add_argument("prompt", help="The text prompt for generation.") + gen_parser.add_argument("--lora", dest="lora_name", help="Name of the LoRA to use (your trigger_word).") + gen_parser.add_argument("--width", type=int, default=1024) + gen_parser.add_argument("--height", type=int, default=1024) + gen_parser.add_argument("--steps", type=int, default=35, help="Number of inference steps.") + gen_parser.add_argument("--guidance", type=float, default=3) + gen_parser.add_argument("--seed", type=int) + gen_parser.add_argument("--lora-scale", type=float, default=0.9, help="LoRA adapter strength (0.0-1.0)") + + + # --- Wait Command --- + wait_parser = subparsers.add_parser("wait", help="Check the status of a training task.") + wait_parser.add_argument("task_id", help="The task ID to check.") + + # --- Download Command --- + download_parser = subparsers.add_parser("download", help="Download the latest generated image from volume.") + download_parser.add_argument("--filename", help="Specific filename to download (optional)") + + args = parser.parse_args() + + if args.command == "train": + result = train( + folder_path=args.folder, + trigger_word=args.trigger_word, + steps=args.steps, + lr=args.lr, + rank=args.rank + ) + print(f"\nResult: {result}") + + elif args.command == "generate": + result = generate( + prompt=args.prompt, + lora_name=args.lora_name, + width=args.width, + height=args.height, + steps=args.steps, + guidance=args.guidance, + seed=args.seed, + lora_scale=args.lora_scale + ) + print(f"\nResult: {result}") + + elif args.command == "wait": + result = wait_for_training(args.task_id) + print(f"\nResult: {result}") + + elif args.command == "download": + result = download_image(args.filename) + print(f"\nResult: {result}") + + else: + print("Invalid command. Use --help to see available commands.") + sys.exit(1) diff --git a/language_models/llama3_8b/app.py b/language_models/llama3_8b/app.py index 6f91f32..24acc2d 100644 --- a/language_models/llama3_8b/app.py +++ b/language_models/llama3_8b/app.py @@ -6,25 +6,37 @@ from transformers import AutoModelForCausalLM, AutoTokenizer # Model parameters -MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct" +MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct" MAX_LENGTH = 512 -TEMPERATURE = 1.0 -TOP_P = 0.95 -TOP_K = 40 -REPETITION_PENALTY = 1.0 -NO_REPEAT_NGRAM_SIZE = 0 -DO_SAMPLE = True +TEMPERATURE = 0.7 +TOP_P = 0.9 +TOP_K = 50 +REPETITION_PENALTY = 1.05 +NO_REPEAT_NGRAM_SIZE = 2 +DO_SAMPLE = True +NUM_BEAMS = 1 +EARLY_STOPPING = True -CACHE_PATH = "./cached_models" +BEAM_VOLUME_PATH = "./cached_models" # This runs once when the container first starts def load_models(): - tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_PATH) + tokenizer = AutoTokenizer.from_pretrained( + MODEL_NAME, + cache_dir=BEAM_VOLUME_PATH, + padding_side='left' + ) tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( - MODEL_NAME, device_map="auto", torch_dtype=torch.float16, cache_dir=CACHE_PATH + MODEL_NAME, + device_map="auto", + torch_dtype=torch.float16, + cache_dir=BEAM_VOLUME_PATH, + use_cache=True, + low_cpu_mem_usage=True ) + model.eval() return model, tokenizer @@ -38,22 +50,25 @@ def load_models(): "huggingface_hub[hf-transfer]", ] ) - .with_envs("HF_HUB_ENABLE_HF_TRANSFER=1") + .with_envs({ + "HF_HUB_ENABLE_HF_TRANSFER": "1", + "TOKENIZERS_PARALLELISM": "false", + "CUDA_VISIBLE_DEVICES": "0", + }) ) @endpoint( secrets=["HF_TOKEN"], on_start=load_models, - name="meta-llama-3-8b-instruct", + name="meta-llama-3.1-8b-instruct", cpu=2, - memory="32Gi", - gpu_count=2, + memory="16Gi", gpu="A10G", volumes=[ Volume( name="cached_models", - mount_path=CACHE_PATH, + mount_path=BEAM_VOLUME_PATH, ) ], image=image, @@ -68,7 +83,7 @@ def generate_text(context, **inputs): return {"error": "Please provide messages for text generation."} generate_args = { - "max_length": inputs.get("max_tokens", MAX_LENGTH), + "max_new_tokens": inputs.get("max_tokens", MAX_LENGTH), "temperature": inputs.get("temperature", TEMPERATURE), "top_p": inputs.get("top_p", TOP_P), "top_k": inputs.get("top_k", TOP_K), @@ -76,22 +91,35 @@ def generate_text(context, **inputs): "no_repeat_ngram_size": inputs.get( "no_repeat_ngram_size", NO_REPEAT_NGRAM_SIZE ), + "num_beams": inputs.get("num_beams", NUM_BEAMS), + "early_stopping": inputs.get("early_stopping", EARLY_STOPPING), "do_sample": inputs.get("do_sample", DO_SAMPLE), "use_cache": True, "eos_token_id": tokenizer.eos_token_id, "pad_token_id": tokenizer.pad_token_id, } - model_inputs = tokenizer.apply_chat_template( + model_inputs_str = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) - inputs = tokenizer(model_inputs, return_tensors="pt", padding=True) - input_ids = inputs["input_ids"].to("cuda") - attention_mask = inputs["attention_mask"].to("cuda") + + tokenized_inputs = tokenizer( + model_inputs_str, + return_tensors="pt", + padding=True, + truncation=True, + max_length=2048 + ) + input_ids = tokenized_inputs["input_ids"].to("cuda") + attention_mask = tokenized_inputs["attention_mask"].to("cuda") + input_ids_length = input_ids.shape[-1] with torch.no_grad(): outputs = model.generate( - input_ids=input_ids, attention_mask=attention_mask, **generate_args + input_ids=input_ids, + attention_mask=attention_mask, + **generate_args ) - output_text = tokenizer.decode(outputs[0], skip_special_tokens=True) + new_tokens = outputs[0][input_ids_length:] + output_text = tokenizer.decode(new_tokens, skip_special_tokens=True) return {"output": output_text} diff --git a/vllm/chat.py b/vllm/chat.py index 28345b5..d697c57 100644 --- a/vllm/chat.py +++ b/vllm/chat.py @@ -86,7 +86,7 @@ def process_user_input( self, user_input: str, img_link: Optional[str] = None, stream: bool = False ) -> str: """Process user input and return assistant's response.""" - if self.model == "OpenGVLab/InternVL2_5-8B" and img_link: + if self.model == "OpenGVLab/InternVL3-8B-AWQ" and img_link: self.conversation_history.append( { "role": "user", @@ -178,7 +178,7 @@ def chat() -> None: # Handle image input for vision models img_link = None - if model == "OpenGVLab/InternVL2_5-8B": + if model == "OpenGVLab/InternVL3-8B-AWQ": img_link = Prompt.ask( "[bold yellow]Image link (press enter to skip)[/bold yellow]" ) diff --git a/vllm/models.py b/vllm/models.py index 1e95552..ea2e3d8 100644 --- a/vllm/models.py +++ b/vllm/models.py @@ -1,27 +1,28 @@ from beam.integrations import VLLM, VLLMArgs from beam import Image -INTERNVL2_5 = "OpenGVLab/InternVL2_5-8B" +INTERNVL3_AWQ = "OpenGVLab/InternVL3-8B-AWQ" YI_CODER_CHAT = "01-ai/Yi-Coder-9B-Chat" MISTRAL_INSTRUCT = "mistralai/Mistral-7B-Instruct-v0.3" DEEPSEEK_R1 = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" internvl = VLLM( - name=INTERNVL2_5.split("/")[-1], - cpu=8, - memory="32Gi", + name=INTERNVL3_AWQ.split("/")[-1], + cpu=4, + memory="16Gi", gpu="A10G", - gpu_count=2, + gpu_count=1, image=(Image(python_version="python3.12")).add_python_packages( ["vllm==0.6.4.post1"] ), vllm_args=VLLMArgs( - model=INTERNVL2_5, - served_model_name=[INTERNVL2_5], + model=INTERNVL3_AWQ, + served_model_name=[INTERNVL3_AWQ], trust_remote_code=True, max_model_len=4096, - gpu_memory_utilization=0.95, + gpu_memory_utilization=0.90, limit_mm_per_prompt={"image": 2}, + quantization="awq", ), )