From e15d07fe11dcb8a5bc8ba54b0baaf4b9a895bdf2 Mon Sep 17 00:00:00 2001 From: shinbehavior Date: Thu, 5 Jun 2025 20:28:56 +0200 Subject: [PATCH 1/5] llama3.1 and intervl optimal configuration --- language_models/llama3_8b/app.py | 72 ++++++++++++++++++++++---------- vllm/chat.py | 4 +- vllm/models.py | 17 ++++---- 3 files changed, 61 insertions(+), 32 deletions(-) diff --git a/language_models/llama3_8b/app.py b/language_models/llama3_8b/app.py index 6f91f32..24acc2d 100644 --- a/language_models/llama3_8b/app.py +++ b/language_models/llama3_8b/app.py @@ -6,25 +6,37 @@ from transformers import AutoModelForCausalLM, AutoTokenizer # Model parameters -MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct" +MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct" MAX_LENGTH = 512 -TEMPERATURE = 1.0 -TOP_P = 0.95 -TOP_K = 40 -REPETITION_PENALTY = 1.0 -NO_REPEAT_NGRAM_SIZE = 0 -DO_SAMPLE = True +TEMPERATURE = 0.7 +TOP_P = 0.9 +TOP_K = 50 +REPETITION_PENALTY = 1.05 +NO_REPEAT_NGRAM_SIZE = 2 +DO_SAMPLE = True +NUM_BEAMS = 1 +EARLY_STOPPING = True -CACHE_PATH = "./cached_models" +BEAM_VOLUME_PATH = "./cached_models" # This runs once when the container first starts def load_models(): - tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_PATH) + tokenizer = AutoTokenizer.from_pretrained( + MODEL_NAME, + cache_dir=BEAM_VOLUME_PATH, + padding_side='left' + ) tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( - MODEL_NAME, device_map="auto", torch_dtype=torch.float16, cache_dir=CACHE_PATH + MODEL_NAME, + device_map="auto", + torch_dtype=torch.float16, + cache_dir=BEAM_VOLUME_PATH, + use_cache=True, + low_cpu_mem_usage=True ) + model.eval() return model, tokenizer @@ -38,22 +50,25 @@ def load_models(): "huggingface_hub[hf-transfer]", ] ) - .with_envs("HF_HUB_ENABLE_HF_TRANSFER=1") + .with_envs({ + "HF_HUB_ENABLE_HF_TRANSFER": "1", + "TOKENIZERS_PARALLELISM": "false", + "CUDA_VISIBLE_DEVICES": "0", + }) ) @endpoint( secrets=["HF_TOKEN"], on_start=load_models, - name="meta-llama-3-8b-instruct", + name="meta-llama-3.1-8b-instruct", cpu=2, - memory="32Gi", - gpu_count=2, + memory="16Gi", gpu="A10G", volumes=[ Volume( name="cached_models", - mount_path=CACHE_PATH, + mount_path=BEAM_VOLUME_PATH, ) ], image=image, @@ -68,7 +83,7 @@ def generate_text(context, **inputs): return {"error": "Please provide messages for text generation."} generate_args = { - "max_length": inputs.get("max_tokens", MAX_LENGTH), + "max_new_tokens": inputs.get("max_tokens", MAX_LENGTH), "temperature": inputs.get("temperature", TEMPERATURE), "top_p": inputs.get("top_p", TOP_P), "top_k": inputs.get("top_k", TOP_K), @@ -76,22 +91,35 @@ def generate_text(context, **inputs): "no_repeat_ngram_size": inputs.get( "no_repeat_ngram_size", NO_REPEAT_NGRAM_SIZE ), + "num_beams": inputs.get("num_beams", NUM_BEAMS), + "early_stopping": inputs.get("early_stopping", EARLY_STOPPING), "do_sample": inputs.get("do_sample", DO_SAMPLE), "use_cache": True, "eos_token_id": tokenizer.eos_token_id, "pad_token_id": tokenizer.pad_token_id, } - model_inputs = tokenizer.apply_chat_template( + model_inputs_str = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) - inputs = tokenizer(model_inputs, return_tensors="pt", padding=True) - input_ids = inputs["input_ids"].to("cuda") - attention_mask = inputs["attention_mask"].to("cuda") + + tokenized_inputs = tokenizer( + model_inputs_str, + return_tensors="pt", + padding=True, + truncation=True, + max_length=2048 + ) + input_ids = tokenized_inputs["input_ids"].to("cuda") + attention_mask = tokenized_inputs["attention_mask"].to("cuda") + input_ids_length = input_ids.shape[-1] with torch.no_grad(): outputs = model.generate( - input_ids=input_ids, attention_mask=attention_mask, **generate_args + input_ids=input_ids, + attention_mask=attention_mask, + **generate_args ) - output_text = tokenizer.decode(outputs[0], skip_special_tokens=True) + new_tokens = outputs[0][input_ids_length:] + output_text = tokenizer.decode(new_tokens, skip_special_tokens=True) return {"output": output_text} diff --git a/vllm/chat.py b/vllm/chat.py index 28345b5..d697c57 100644 --- a/vllm/chat.py +++ b/vllm/chat.py @@ -86,7 +86,7 @@ def process_user_input( self, user_input: str, img_link: Optional[str] = None, stream: bool = False ) -> str: """Process user input and return assistant's response.""" - if self.model == "OpenGVLab/InternVL2_5-8B" and img_link: + if self.model == "OpenGVLab/InternVL3-8B-AWQ" and img_link: self.conversation_history.append( { "role": "user", @@ -178,7 +178,7 @@ def chat() -> None: # Handle image input for vision models img_link = None - if model == "OpenGVLab/InternVL2_5-8B": + if model == "OpenGVLab/InternVL3-8B-AWQ": img_link = Prompt.ask( "[bold yellow]Image link (press enter to skip)[/bold yellow]" ) diff --git a/vllm/models.py b/vllm/models.py index 1e95552..ea2e3d8 100644 --- a/vllm/models.py +++ b/vllm/models.py @@ -1,27 +1,28 @@ from beam.integrations import VLLM, VLLMArgs from beam import Image -INTERNVL2_5 = "OpenGVLab/InternVL2_5-8B" +INTERNVL3_AWQ = "OpenGVLab/InternVL3-8B-AWQ" YI_CODER_CHAT = "01-ai/Yi-Coder-9B-Chat" MISTRAL_INSTRUCT = "mistralai/Mistral-7B-Instruct-v0.3" DEEPSEEK_R1 = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" internvl = VLLM( - name=INTERNVL2_5.split("/")[-1], - cpu=8, - memory="32Gi", + name=INTERNVL3_AWQ.split("/")[-1], + cpu=4, + memory="16Gi", gpu="A10G", - gpu_count=2, + gpu_count=1, image=(Image(python_version="python3.12")).add_python_packages( ["vllm==0.6.4.post1"] ), vllm_args=VLLMArgs( - model=INTERNVL2_5, - served_model_name=[INTERNVL2_5], + model=INTERNVL3_AWQ, + served_model_name=[INTERNVL3_AWQ], trust_remote_code=True, max_model_len=4096, - gpu_memory_utilization=0.95, + gpu_memory_utilization=0.90, limit_mm_per_prompt={"image": 2}, + quantization="awq", ), ) From 418334fbd625790540464ad956d2590d40c023e0 Mon Sep 17 00:00:00 2001 From: shinbehavior Date: Thu, 19 Jun 2025 01:26:27 +0200 Subject: [PATCH 2/5] flux lora example init --- finetuning/flux/README.md | 219 +++++++++++++++++++++ finetuning/flux/finetune.py | 363 +++++++++++++++++++++++++++++++++++ finetuning/flux/inference.py | 208 ++++++++++++++++++++ finetuning/flux/upload.py | 307 +++++++++++++++++++++++++++++ 4 files changed, 1097 insertions(+) create mode 100644 finetuning/flux/README.md create mode 100644 finetuning/flux/finetune.py create mode 100644 finetuning/flux/inference.py create mode 100644 finetuning/flux/upload.py diff --git a/finetuning/flux/README.md b/finetuning/flux/README.md new file mode 100644 index 0000000..f6b85c8 --- /dev/null +++ b/finetuning/flux/README.md @@ -0,0 +1,219 @@ +# FLUX LoRA Fine-tuning on Beam + +Implementation for fine-tuning FLUX models with LoRA using your own image datasets. + +## Quick Start + +1. **Create volume** (one-time setup): + ```bash + # Beam automatically creates volumes when first used, but you can pre-create: + # This happens automatically when you first deploy + ``` + +2. **Deploy endpoints**: + ```bash + beam deploy finetune.py:train_lora --name train-lora + beam deploy inference.py:generate --name generate-image + ``` + +3. **Update configuration** in `upload.py`: + - Set your `BEAM_TOKEN` + - Update `ENDPOINTS` with your deployed URLs + +4. **Train a model**: + ```bash + python upload.py train ./your_images my_concept + ``` + +5. **Generate images**: + ```bash + python upload.py generate "a photo of my_concept" my_concept + ``` + +## Volume Storage + +The system uses a persistent Beam volume named `flux-lora` that automatically stores: +- Training datasets in `./flux-lora/dataset/` +- Trained models in `./flux-lora/output/` + +**Volume is created automatically** when you first deploy - no manual setup needed! + +## File Structure + +``` +clean-version/ +├── finetune.py # Training endpoint +├── inference.py # Image generation endpoint +├── upload.py # Local dataset handling +└── README.md # This file +``` + +## Detailed Usage + +### 1. Training + +**Deploy training endpoint:** +```bash +beam deploy finetune.py:train_lora --name train-lora +``` + +**Start training with local images:** +```python +from upload import upload_dataset, start_training + +# Upload your dataset +result = upload_dataset("./training_images", "my_concept") + +# Start training +training = start_training( + result["zip_url"], + "my_concept", + steps=1000, + resolution=1024 +) +``` + +**Training parameters:** +- `trigger_word`: Token to associate with your concept (e.g., "my_dog", "abstract_art") +- `steps`: Training steps (default: 1000) +- `learning_rate`: Learning rate (default: 4e-4) +- `rank`: LoRA rank - higher = more capacity (default: 32) +- `alpha`: LoRA alpha scaling (default: 32) +- `resolution`: Image resolution (default: 1024) + +### 2. Image Generation + +**Deploy inference endpoint:** +```bash +beam deploy inference.py:generate --name generate-image +``` + +**Generate images:** +```python +from upload import generate_image + +result = generate_image( + "a sample of my_concept in sunlight", + "my_concept", + width=1024, + height=1024, + steps=20, + seed=42 +) +``` + +**Generation parameters:** +- `prompt`: Text description of desired image +- `trigger_word`: Token used during training +- `width/height`: Image dimensions (256-1024) +- `num_inference_steps`: Denoising steps (1-50) +- `guidance_scale`: Prompt following strength (1.0-20.0) +- `seed`: Random seed for reproducibility +- `negative_prompt`: What to avoid +- `num_images`: Number to generate (1-4) + +### 3. Complete Workflow + +```python +from upload import full_workflow + +# Upload, train, and optionally test +result = full_workflow( + local_folder="./my_images", + trigger_word="my_style", + test_prompt="a painting in my_style", + steps=1000, + resolution=1024 +) +``` + +## Command Line Usage + +```bash +# Upload dataset only +python upload.py upload ./images my_concept + +# Upload and start training +python upload.py train ./images my_concept + +# Generate image (after training completes) +python upload.py generate "prompt text" my_concept + +# Complete workflow +python upload.py workflow ./images my_concept "test prompt" +``` + +## Dataset Preparation + +**Supported formats:** JPG, JPEG, PNG, WebP, BMP + +**Recommendations:** +- 10-50 high-quality images work well +- Images will be resized to training resolution +- Varied poses/angles improve results +- Consistent lighting/style helps + +**Example folder structure:** +``` +training_images/ +├── image1.jpg +├── image2.png +├── image3.jpg +└── ... +``` + +## Configuration + +**Before using, update `upload.py`:** + +```python +# Your Beam authentication token +BEAM_TOKEN = "" + +# Your deployed endpoint URLs +ENDPOINTS = { + "train": "https://train-lora-.app.beam.cloud", + "inference": "https://generate-image-.app.beam.cloud" +} +``` + +## Monitoring + +- **Beam Dashboard**: https://app.beam.cloud/ +- **Training logs**: Available in Beam dashboard +- **Model files**: Stored in persistent volume `flux-lora` + +## Tips + +**For better results:** +- Use descriptive trigger words (e.g., "vintage_car" vs "car") +- Include trigger word in generation prompts +- Experiment with guidance_scale (3-15 range) +- Try different seeds for variety + +**Training tips:** +- More steps = better quality but longer training +- Higher rank = more capacity but larger files +- 1024 resolution gives best quality on H100 + +**Generation tips:** +- Start with 20 inference steps +- Use guidance_scale 7-10 for most prompts +- Add negative prompts to avoid unwanted elements + +## Troubleshooting + +**Training fails:** +- Check HF_TOKEN is set correctly +- Verify images are valid formats +- Monitor GPU memory usage + +**Generation quality poor:** +- Try different prompts including trigger word +- Adjust guidance_scale +- Check if LoRA loaded correctly + +**Slow generation:** +- Reduce inference steps +- Use smaller image sizes +- Check autoscaler settings \ No newline at end of file diff --git a/finetuning/flux/finetune.py b/finetuning/flux/finetune.py new file mode 100644 index 0000000..abae9ee --- /dev/null +++ b/finetuning/flux/finetune.py @@ -0,0 +1,363 @@ +from beam import endpoint, Volume, Image, QueueDepthAutoscaler +import torch +import os +import shutil +import yaml +import zipfile +import tempfile +import subprocess +import requests +from PIL import Image as PILImage + +VOLUME_PATH = "./flux-lora-clean" + +@endpoint( + name="train-lora", + gpu="H100", + cpu=8, + memory="32Gi", + timeout=3600, + keep_warm_seconds=60, + volumes=[Volume(name="flux-lora-clean", mount_path=VOLUME_PATH)], + image=Image(python_version="python3.11") + .add_python_packages([ + "torch==2.6.0", + "torchvision==0.21.0", + "torchao==0.9.0", + "safetensors", + "transformers==4.52.4", + "lycoris-lora==1.8.3", + "flatten_json", + "pyyaml", + "oyaml", + "tensorboard", + "kornia", + "invisible-watermark", + "einops", + "accelerate", + "toml", + "albumentations==1.4.15", + "albucore==0.0.16", + "pydantic", + "omegaconf", + "k-diffusion", + "open_clip_torch", + "timm", + "prodigyopt", + "controlnet_aux==0.0.10", + "python-dotenv", + "bitsandbytes", + "hf_transfer", + "lpips", + "pytorch_fid", + "optimum-quanto==0.2.4", + "sentencepiece", + "huggingface_hub", + "peft", + "gradio", + "python-slugify", + "opencv-python-headless", + "pytorch-wavelets==1.3.0", + "matplotlib==3.10.1", + "diffusers", + "packaging", + "setuptools<70.0.0", + "requests", + "pillow" + ]) + .add_commands([ + "apt-get update && apt-get install -y git libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev libgomp1", + "pip install git+https://github.com/jaretburkett/easy_dwpose.git", + "pip install git+https://github.com/huggingface/diffusers@363d1ab7e24c5ed6c190abb00df66d9edb74383b" + ]) + .with_envs("HF_HUB_ENABLE_HF_TRANSFER=1"), + secrets=["HF_TOKEN"], + autoscaler=QueueDepthAutoscaler(min_containers=1, max_containers=3, tasks_per_container=1) +) +def train_lora( + image_zip: str = None, + trigger_word: str = "TOK", + steps: int = 1000, + learning_rate: float = 4e-4, + rank: int = 32, + alpha: int = 32, + resolution: int = 1024 +): + """ + Fine-tune FLUX model with LoRA using uploaded dataset + + Args: + image_zip: URL to zip file containing training images + trigger_word: Token to associate with your concept + steps: Number of training steps + learning_rate: Learning rate for training + rank: LoRA rank (higher = more capacity) + alpha: LoRA alpha (scaling factor) + resolution: Training image resolution + """ + print(f"Starting LoRA fine-tuning for '{trigger_word}'") + print(f"Training: {steps} steps at {resolution}x{resolution}") + + # Setup environment + setup_environment() + + # Process dataset + image_count = process_dataset(image_zip, trigger_word, resolution) + + if image_count == 0: + return {"error": "No training images found"} + + # Configure training + config = create_training_config( + trigger_word=trigger_word, + steps=steps, + learning_rate=learning_rate, + rank=rank, + alpha=alpha, + resolution=resolution + ) + + # Run training + result = run_training(config) + + if result["status"] == "success": + print("Fine-tuning completed successfully!") + return { + "status": "success", + "message": f"LoRA training completed for '{trigger_word}'", + "models": result["models"], + "image_count": image_count, + "trigger_word": trigger_word, + "steps": steps + } + else: + return result + +def setup_environment(): + """Setup training environment and dependencies""" + print("Setting up training environment...") + + # Clone ai-toolkit if needed + toolkit_path = "/tmp/ai-toolkit" + if not os.path.exists(toolkit_path): + print("Downloading ai-toolkit...") + subprocess.run([ + "git", "clone", "https://github.com/ostris/ai-toolkit.git", toolkit_path + ], check=True) + subprocess.run([ + "git", "submodule", "update", "--init", "--recursive" + ], cwd=toolkit_path, check=True) + + # Configure environment variables + os.environ.update({ + 'DISABLE_TELEMETRY': 'YES', + 'HF_TOKEN': os.getenv("HF_TOKEN"), + 'PYTORCH_CUDA_ALLOC_CONF': 'expandable_segments:True,max_split_size_mb:512', + 'TORCH_CUDNN_V8_API_ENABLED': '1', + 'NVIDIA_TF32_OVERRIDE': '1', + 'TORCH_ALLOW_TF32_CUBLAS_OVERRIDE': '1', + 'TOKENIZERS_PARALLELISM': 'false', + 'HF_HUB_ENABLE_HF_TRANSFER': '1' + }) + + import sys + sys.path.insert(0, toolkit_path) + +def process_dataset(image_zip, trigger_word, resolution): + """Process uploaded dataset for training""" + print("Processing training dataset...") + + # Setup directories + base_dir = VOLUME_PATH + dataset_dir = os.path.join(base_dir, "dataset") + + # Clean dataset directory + if os.path.exists(dataset_dir): + shutil.rmtree(dataset_dir) + os.makedirs(dataset_dir, exist_ok=True) + + if not image_zip: + print("No dataset provided, creating dummy data") + return create_dummy_dataset(dataset_dir, trigger_word, resolution) + + # Download and extract dataset + try: + print(f"Downloading dataset: {image_zip}") + zip_response = requests.get(image_zip, timeout=60) + zip_response.raise_for_status() + + with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as temp_zip: + temp_zip.write(zip_response.content) + temp_zip_path = temp_zip.name + + # Extract and process images + image_count = 0 + with zipfile.ZipFile(temp_zip_path, 'r') as zip_ref: + with tempfile.TemporaryDirectory() as temp_extract_dir: + zip_ref.extractall(temp_extract_dir) + + for root, dirs, files in os.walk(temp_extract_dir): + for file in files: + if file.lower().endswith(('.jpg', '.jpeg', '.png', '.webp', '.bmp')): + if process_image(root, file, dataset_dir, trigger_word, resolution, image_count): + image_count += 1 + + os.remove(temp_zip_path) + print(f"Processed {image_count} training images") + return image_count + + except Exception as e: + print(f"Dataset processing failed: {e}") + return 0 + +def process_image(root, filename, dataset_dir, trigger_word, resolution, image_count): + """Process individual training image""" + try: + old_path = os.path.join(root, filename) + with PILImage.open(old_path) as img: + img = img.convert('RGB') + img = img.resize((resolution, resolution), PILImage.Resampling.LANCZOS) + + new_filename = f"training_image_{image_count + 1}.jpg" + new_path = os.path.join(dataset_dir, new_filename) + img.save(new_path, 'JPEG', quality=95) + + # Create caption file + caption_path = os.path.join(dataset_dir, f"training_image_{image_count + 1}.txt") + with open(caption_path, 'w') as f: + f.write(f"a photo of {trigger_word}") + + return True + except Exception as e: + print(f"Failed to process {filename}: {e}") + return False + +def create_dummy_dataset(dataset_dir, trigger_word, resolution): + """Create dummy dataset for testing""" + dummy_img = PILImage.new('RGB', (resolution, resolution), color='red') + dummy_img.save(os.path.join(dataset_dir, "dummy.jpg")) + + with open(os.path.join(dataset_dir, "dummy.txt"), 'w') as f: + f.write(f"a photo of {trigger_word}") + + return 1 + +def create_training_config(trigger_word, steps, learning_rate, rank, alpha, resolution): + """Create training configuration""" + base_dir = VOLUME_PATH + dataset_dir = os.path.join(base_dir, "dataset") + output_dir = os.path.join(base_dir, "output") + os.makedirs(output_dir, exist_ok=True) + + config = { + "job": "extension", + "config": { + "name": f"flux_lora_{trigger_word}_{resolution}", + "process": [{ + "type": "sd_trainer", + "training_folder": output_dir, + "device": "cuda:0", + "trigger_word": trigger_word, + "network": { + "type": "lora", + "linear": rank, + "linear_alpha": alpha + }, + "save": { + "dtype": "float16", + "save_every": steps // 2, + "max_step_saves_to_keep": 2 + }, + "datasets": [{ + "folder_path": os.path.abspath(dataset_dir), + "caption_ext": "txt", + "caption_dropout_rate": 0.05, + "cache_latents": True, + "skip_cache_check": True, + "shuffle_tokens": False, + "cache_latents_to_disk": True, + "resolution": [resolution] + }], + "train": { + "batch_size": 1, + "steps": steps, + "gradient_accumulation_steps": 8, + "train_unet": True, + "train_text_encoder": False, + "gradient_checkpointing": False, + "noise_scheduler": "flowmatch", + "optimizer": "adamw8bit", + "lr": learning_rate, + "ema_config": { + "use_ema": True, + "ema_decay": 0.99 + }, + "dtype": "fp16" + }, + "model": { + "name_or_path": "black-forest-labs/FLUX.1-dev", + "is_flux": True, + "quantize": True + }, + "sample": { + "sampler": "flowmatch", + "sample_every": steps // 2, + "width": resolution, + "height": resolution, + "prompts": [ + f"a photo of {trigger_word}", + f"{trigger_word} in professional lighting", + f"portrait of {trigger_word}, high quality" + ], + "neg": "", + "seed": 42, + "walk_seed": False, + "guidance_scale": 4, + "sample_steps": 10 + } + }] + } + } + + return config + +def run_training(config): + """Execute the training process""" + print("Starting LoRA training...") + + try: + # Save config + toolkit_path = "/tmp/ai-toolkit" + config_path = os.path.join(toolkit_path, "config", "train_config.yaml") + os.makedirs(os.path.dirname(config_path), exist_ok=True) + + with open(config_path, 'w') as f: + yaml.dump(config, f, default_flow_style=False) + + # Run training + os.chdir(toolkit_path) + result = subprocess.run( + ["python", "run.py", "config/train_config.yaml"], + capture_output=False, + text=True, + check=True + ) + + # Find trained models + output_dir = os.path.join(VOLUME_PATH, "output") + trained_models = [] + for root, dirs, files in os.walk(output_dir): + for file in files: + if file.endswith('.safetensors'): + trained_models.append(os.path.join(root, file)) + + return { + "status": "success", + "models": trained_models + } + + except subprocess.CalledProcessError as e: + return { + "status": "error", + "message": f"Training failed: {str(e)}" + } \ No newline at end of file diff --git a/finetuning/flux/inference.py b/finetuning/flux/inference.py new file mode 100644 index 0000000..1c5b094 --- /dev/null +++ b/finetuning/flux/inference.py @@ -0,0 +1,208 @@ +from beam import endpoint, Volume, Image, QueueDepthAutoscaler, Output +import torch +from diffusers import FluxPipeline +import os +from io import BytesIO +import base64 + +VOLUME_PATH = "./flux-lora-clean" + +# Global pipeline variable +pipeline = None + +def load_pipeline(): + """Load FLUX pipeline with trained LoRA""" + global pipeline + + if pipeline is not None: + return pipeline + + print("Loading FLUX pipeline...") + + try: + # Load base FLUX model with memory optimizations + pipeline = FluxPipeline.from_pretrained( + "black-forest-labs/FLUX.1-dev", + torch_dtype=torch.float16, + token=os.getenv("HF_TOKEN"), + use_safetensors=True + ).to("cuda") + + # Load trained LoRA if available + output_dir = os.path.join(VOLUME_PATH, "output") + if os.path.exists(output_dir): + lora_files = find_lora_models(output_dir) + + if lora_files: + # Prefer final models over checkpoints + final_models = [f for f in lora_files if not any(x in os.path.basename(f) for x in ['_0', 'checkpoint'])] + if final_models: + latest_lora = max(final_models, key=os.path.getctime) + else: + latest_lora = max(lora_files, key=os.path.getctime) + + load_lora_weights(pipeline, latest_lora) + print(f"Model loaded from: {os.path.relpath(latest_lora, VOLUME_PATH)}") + else: + print("No LoRA models found - using base FLUX model") + + print("Pipeline loaded successfully!") + return pipeline + + except Exception as e: + print(f"Failed to load pipeline: {e}") + raise + +def find_lora_models(output_dir): + """Find available LoRA model files""" + lora_files = [] + for root, dirs, files in os.walk(output_dir): + for file in files: + if file.endswith('.safetensors') and not '_000000' in file: # Skip checkpoints + lora_files.append(os.path.join(root, file)) + return lora_files + +def load_lora_weights(pipeline, lora_path): + """Load LoRA weights into pipeline""" + try: + pipeline.load_lora_weights( + os.path.dirname(lora_path), + weight_name=os.path.basename(lora_path) + ) + print(f"Loaded LoRA: {os.path.basename(lora_path)}") + except Exception as e: + print(f"Failed to load LoRA: {e}") + print(f" Attempted to load from: {lora_path}") + +@endpoint( + name="generate-image", + on_start=load_pipeline, + gpu="A100-40", + cpu=4, + memory="32Gi", + image=Image(python_version="python3.11") + .add_python_packages([ + "torch==2.6.0", + "diffusers", + "transformers==4.52.4", + "safetensors", + "accelerate", + "pillow", + "hf_transfer", + "protobuf", + "sentencepiece" + ]) + .with_envs("HF_HUB_ENABLE_HF_TRANSFER=1"), + volumes=[Volume(name="flux-lora-clean", mount_path=VOLUME_PATH)], + secrets=["HF_TOKEN"], + autoscaler=QueueDepthAutoscaler(max_containers=1, tasks_per_container=1), + keep_warm_seconds=300 +) +def generate( + prompt: str, + trigger_word: str = "TOK", + width: int = 512, + height: int = 512, + num_inference_steps: int = 20, + guidance_scale: float = 7.5, + seed: int = None, + negative_prompt: str = "", + num_images: int = 1 +): + """ + Generate images using fine-tuned FLUX LoRA model + + Args: + prompt: Text description of desired image + trigger_word: Token used during training + width: Image width (256-1024) + height: Image height (256-1024) + num_inference_steps: Number of denoising steps (1-50) + guidance_scale: How closely to follow prompt (1.0-20.0) + seed: Random seed for reproducibility + negative_prompt: What to avoid in generation + num_images: Number of images to generate (1-4) + """ + global pipeline + + try: + # Ensure pipeline is loaded + if pipeline is None: + pipeline = load_pipeline() + + # Validate parameters + num_images = max(1, min(num_images, 4)) + width = max(256, min(width, 1024)) + height = max(256, min(height, 1024)) + num_inference_steps = max(1, min(num_inference_steps, 50)) + guidance_scale = max(1.0, min(guidance_scale, 20.0)) + + print(f"Generating {num_images} image(s)") + print(f"Prompt: '{prompt}'") + + # Set random seed if provided + if seed is not None: + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(seed) + + # Generate images + with torch.no_grad(): + result = pipeline( + prompt=prompt, + negative_prompt=negative_prompt if negative_prompt else None, + width=width, + height=height, + num_inference_steps=num_inference_steps, + guidance_scale=guidance_scale, + num_images_per_prompt=num_images, + generator=torch.Generator(device="cuda").manual_seed(seed) if seed is not None else None + ) + + # Convert images to base64 and create shareable URLs + encoded_images = [] + image_urls = [] + + for i, image in enumerate(result.images): + # Base64 encoding for immediate use + buffered = BytesIO() + image.save(buffered, format="PNG") + img_str = base64.b64encode(buffered.getvalue()).decode() + encoded_images.append(img_str) + + # Also create shareable URL using Beam Output + try: + filename = f"generated_image_{i+1}.png" + output = Output(path=filename) + output.save(image) + image_urls.append(output.public_url) + except Exception as e: + print(f"Failed to create public URL for image {i+1}: {e}") + image_urls.append(None) + + print(f"Generated {len(encoded_images)} image(s)") + + return { + "status": "success", + "images": encoded_images, # Base64 for immediate use + "image_urls": image_urls, # Public URLs for sharing + "prompt": prompt, + "trigger_word": trigger_word, + "seed": seed, + "settings": { + "width": width, + "height": height, + "num_inference_steps": num_inference_steps, + "guidance_scale": guidance_scale, + "negative_prompt": negative_prompt + }, + "num_images": len(encoded_images) + } + + except Exception as e: + print(f"Generation failed: {e}") + return { + "status": "error", + "message": f"Failed to generate image: {str(e)}", + "prompt": prompt + } \ No newline at end of file diff --git a/finetuning/flux/upload.py b/finetuning/flux/upload.py new file mode 100644 index 0000000..17e6097 --- /dev/null +++ b/finetuning/flux/upload.py @@ -0,0 +1,307 @@ +from beam import Client +import os +import zipfile +import time +from datetime import datetime + +# Your Beam authentication token +BEAM_TOKEN = "" + +# Your deployed endpoint URLs +ENDPOINTS = { + "train": "https://train-lora-.app.beam.cloud", + "inference": "https://generate-image-.app.beam.cloud" +} + +def upload_dataset(local_folder: str, trigger_word: str): + """ + Upload local image dataset to Beam for training + + Args: + local_folder: Path to folder containing training images + trigger_word: Token to associate with your concept + + Returns: + dict: Upload result with zip URL + """ + print(f"Preparing dataset from: {local_folder}") + print(f"Trigger word: '{trigger_word}'") + + # Validate inputs + if not os.path.exists(local_folder): + raise ValueError(f"❌ Folder not found: {local_folder}") + + if not trigger_word or trigger_word.strip() == "": + raise ValueError("❌ Trigger word cannot be empty!") + + # Initialize Beam client + client = Client(token=BEAM_TOKEN) + + # Create zip file from local images + zip_filename = f"{trigger_word.replace(' ', '_')}_dataset.zip" + image_count = create_dataset_zip(local_folder, zip_filename) + + if image_count == 0: + raise ValueError(f"❌ No images found in {local_folder}") + + try: + print("Uploading dataset to Beam...") + zip_url = client.upload_file(zip_filename) + print(f"Dataset uploaded successfully!") + print(f"{image_count} images ready for training") + + # Clean up local zip + os.remove(zip_filename) + + return { + "status": "success", + "zip_url": zip_url, + "image_count": image_count, + "trigger_word": trigger_word + } + + except Exception as e: + # Clean up on failure + if os.path.exists(zip_filename): + os.remove(zip_filename) + raise Exception(f"❌ Upload failed: {e}") + +def create_dataset_zip(local_folder: str, zip_filename: str): + """Create zip file from local image folder""" + image_count = 0 + supported_formats = ('.jpg', '.jpeg', '.png', '.webp', '.bmp') + + with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf: + for filename in os.listdir(local_folder): + if filename.lower().endswith(supported_formats): + file_path = os.path.join(local_folder, filename) + if os.path.isfile(file_path): + zipf.write(file_path, filename) + image_count += 1 + print(f" Added: {filename}") + + print(f"Created dataset zip with {image_count} images") + return image_count + +def start_training(zip_url: str, trigger_word: str, **kwargs): + """ + Start LoRA training with uploaded dataset + + Args: + zip_url: URL to uploaded dataset zip + trigger_word: Token used during training + **kwargs: Additional training parameters + + Returns: + dict: Training request result + """ + import requests + + # Default training parameters + training_params = { + "image_zip": zip_url, + "trigger_word": trigger_word, + "steps": kwargs.get("steps", 1000), + "learning_rate": kwargs.get("learning_rate", 4e-4), + "rank": kwargs.get("rank", 32), + "alpha": kwargs.get("alpha", 32), + "resolution": kwargs.get("resolution", 1024) + } + + print("Starting LoRA training...") + print(f"Steps: {training_params['steps']}") + print(f"Resolution: {training_params['resolution']}x{training_params['resolution']}") + + try: + response = requests.post( + ENDPOINTS["train"], + headers={ + 'Authorization': f'Bearer {BEAM_TOKEN}', + 'Content-Type': 'application/json' + }, + json=training_params, + timeout=300 + ) + + if response.status_code == 200: + print("Training started successfully!") + return {"status": "training_started", "params": training_params} + else: + print(f"HTTP {response.status_code} - Check Beam dashboard") + return {"status": "request_sent", "params": training_params} + + except requests.exceptions.Timeout: + print("Request timed out - Training likely started") + return {"status": "timeout", "params": training_params} + except Exception as e: + print(f"Request error: {e}") + return {"status": "error", "message": str(e)} + +def generate_image(prompt: str, trigger_word: str, **kwargs): + """ + Generate image using trained model + + Args: + prompt: Text description of desired image + trigger_word: Token used during training + **kwargs: Additional generation parameters + + Returns: + dict: Generation result with base64 images + """ + import requests + import base64 + from PIL import Image + from io import BytesIO + + # Default generation parameters + gen_params = { + "prompt": prompt, + "trigger_word": trigger_word, + "width": kwargs.get("width", 512), + "height": kwargs.get("height", 512), + "num_inference_steps": kwargs.get("steps", 20), + "guidance_scale": kwargs.get("guidance_scale", 7.5), + "seed": kwargs.get("seed"), + "negative_prompt": kwargs.get("negative_prompt", ""), + "num_images": kwargs.get("num_images", 1) + } + + print(f"Generating image...") + print(f"Prompt: '{prompt}'") + + try: + response = requests.post( + ENDPOINTS["inference"], + headers={ + 'Authorization': f'Bearer {BEAM_TOKEN}', + 'Content-Type': 'application/json' + }, + json=gen_params, + timeout=120 + ) + + if response.status_code == 200: + result = response.json() + if result.get("status") == "success": + print("Image generated successfully!") + + # Save first image locally + if result.get("images"): + image_data = base64.b64decode(result["images"][0]) + image = Image.open(BytesIO(image_data)) + + filename = f"generated_{int(time.time())}.png" + image.save(filename) + print(f"Saved: {filename}") + + return result + else: + print(f"Generation failed: {result.get('message', 'Unknown error')}") + return result + else: + print(f"HTTP {response.status_code}: {response.text}") + return {"status": "error", "message": f"HTTP {response.status_code}"} + + except Exception as e: + print(f"Generation failed: {e}") + return {"status": "error", "message": str(e)} + +def full_workflow(local_folder: str, trigger_word: str, test_prompt: str = None, **training_kwargs): + """ + Complete workflow: upload dataset, train model, and test generation + + Args: + local_folder: Path to training images + trigger_word: Token for your concept + test_prompt: Optional test prompt after training + **training_kwargs: Training parameters + + Returns: + dict: Complete workflow results + """ + workflow_start = datetime.now() + + print("Starting complete LoRA workflow...") + print("="*50) + + try: + # Step 1: Upload dataset + print("STEP 1: Uploading dataset") + upload_result = upload_dataset(local_folder, trigger_word) + + # Step 2: Start training + print("\nSTEP 2: Starting training") + training_result = start_training( + upload_result["zip_url"], + trigger_word, + **training_kwargs + ) + + # Step 3: Optional test generation + if test_prompt: + print(f"\nSTEP 3: Testing with prompt: '{test_prompt}'") + # Note: In real usage, you'd wait for training to complete first + print("(Wait for training to complete before testing)") + + total_time = datetime.now() - workflow_start + + print("\nWorkflow completed!") + print(f"Total time: {str(total_time).split('.')[0]}") + + return { + "status": "success", + "upload": upload_result, + "training": training_result, + "total_time": str(total_time).split('.')[0] + } + + except Exception as e: + print(f"\nWorkflow failed: {e}") + return {"status": "error", "message": str(e)} + +# Usage examples +if __name__ == "__main__": + import sys + + # Update these before running + print("Remember to update BEAM_TOKEN and ENDPOINTS in this file!") + + if len(sys.argv) < 2: + print("Usage examples:") + print(" python upload.py upload ./images my_concept") + print(" python upload.py train ./images my_concept") + print(" python upload.py generate 'a photo of my_concept' my_concept") + print(" python upload.py workflow ./images my_concept 'test prompt'") + sys.exit(1) + + command = sys.argv[1] + + if command == "upload" and len(sys.argv) >= 4: + folder = sys.argv[2] + trigger = sys.argv[3] + result = upload_dataset(folder, trigger) + print("Result:", result) + + elif command == "train" and len(sys.argv) >= 4: + folder = sys.argv[2] + trigger = sys.argv[3] + upload_result = upload_dataset(folder, trigger) + training_result = start_training(upload_result["zip_url"], trigger) + print("Training result:", training_result) + + elif command == "generate" and len(sys.argv) >= 4: + prompt = sys.argv[2] + trigger = sys.argv[3] + result = generate_image(prompt, trigger) + print("Generation result:", result) + + elif command == "workflow" and len(sys.argv) >= 4: + folder = sys.argv[2] + trigger = sys.argv[3] + test_prompt = sys.argv[4] if len(sys.argv) > 4 else None + result = full_workflow(folder, trigger, test_prompt) + print("Workflow result:", result) + + else: + print("Invalid command or missing arguments") \ No newline at end of file From 8b2ab2c8f57ff8594a02b6d4601ea8e9585f9f57 Mon Sep 17 00:00:00 2001 From: shinbehavior Date: Sun, 22 Jun 2025 12:45:39 +0200 Subject: [PATCH 3/5] min_container fix --- finetuning/flux/finetune.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/finetuning/flux/finetune.py b/finetuning/flux/finetune.py index abae9ee..5bf1dfb 100644 --- a/finetuning/flux/finetune.py +++ b/finetuning/flux/finetune.py @@ -72,7 +72,7 @@ ]) .with_envs("HF_HUB_ENABLE_HF_TRANSFER=1"), secrets=["HF_TOKEN"], - autoscaler=QueueDepthAutoscaler(min_containers=1, max_containers=3, tasks_per_container=1) + autoscaler=QueueDepthAutoscaler(max_containers=3, tasks_per_container=1) ) def train_lora( image_zip: str = None, From a721c454af5083b941fb8714b26436345ee5a2d7 Mon Sep 17 00:00:00 2001 From: shinbehavior Date: Mon, 23 Jun 2025 21:31:38 +0200 Subject: [PATCH 4/5] folder structure fix, readme fix --- finetuning/flux/README.md | 66 ++++++++++++++++-------------------- finetuning/flux/finetune.py | 61 ++++++++++++++++++++++++++++----- finetuning/flux/inference.py | 61 ++++++++++++++++++++++++++++++--- 3 files changed, 138 insertions(+), 50 deletions(-) diff --git a/finetuning/flux/README.md b/finetuning/flux/README.md index f6b85c8..8f005d1 100644 --- a/finetuning/flux/README.md +++ b/finetuning/flux/README.md @@ -6,8 +6,7 @@ Implementation for fine-tuning FLUX models with LoRA using your own image datase 1. **Create volume** (one-time setup): ```bash - # Beam automatically creates volumes when first used, but you can pre-create: - # This happens automatically when you first deploy + beam volume create flux-lora-finetune ``` 2. **Deploy endpoints**: @@ -32,32 +31,34 @@ Implementation for fine-tuning FLUX models with LoRA using your own image datase ## Volume Storage -The system uses a persistent Beam volume named `flux-lora` that automatically stores: -- Training datasets in `./flux-lora/dataset/` -- Trained models in `./flux-lora/output/` +The system uses a persistent Beam volume named `flux-lora-finetune` that automatically stores: -**Volume is created automatically** when you first deploy - no manual setup needed! +``` +flux-lora-finetune/ +├── dataset/ # Training datasets +└── output/ # Trained models +``` ## File Structure ``` -clean-version/ +flux/ ├── finetune.py # Training endpoint ├── inference.py # Image generation endpoint ├── upload.py # Local dataset handling └── README.md # This file ``` -## Detailed Usage +## Training -### 1. Training +### Deploy Training Endpoint -**Deploy training endpoint:** ```bash beam deploy finetune.py:train_lora --name train-lora ``` -**Start training with local images:** +### Start Training with Local Images + ```python from upload import upload_dataset, start_training @@ -73,7 +74,8 @@ training = start_training( ) ``` -**Training parameters:** +### Training Parameters + - `trigger_word`: Token to associate with your concept (e.g., "my_dog", "abstract_art") - `steps`: Training steps (default: 1000) - `learning_rate`: Learning rate (default: 4e-4) @@ -81,14 +83,16 @@ training = start_training( - `alpha`: LoRA alpha scaling (default: 32) - `resolution`: Image resolution (default: 1024) -### 2. Image Generation +## Image Generation + +### Deploy Inference Endpoint -**Deploy inference endpoint:** ```bash beam deploy inference.py:generate --name generate-image ``` -**Generate images:** +### Generate Images + ```python from upload import generate_image @@ -102,7 +106,8 @@ result = generate_image( ) ``` -**Generation parameters:** +### Generation Parameters + - `prompt`: Text description of desired image - `trigger_word`: Token used during training - `width/height`: Image dimensions (256-1024) @@ -112,7 +117,7 @@ result = generate_image( - `negative_prompt`: What to avoid - `num_images`: Number to generate (1-4) -### 3. Complete Workflow +## Complete Workflow ```python from upload import full_workflow @@ -181,39 +186,28 @@ ENDPOINTS = { - **Beam Dashboard**: https://app.beam.cloud/ - **Training logs**: Available in Beam dashboard -- **Model files**: Stored in persistent volume `flux-lora` +- **Model files**: Stored in persistent volume `flux-lora-finetune` ## Tips **For better results:** +``` - Use descriptive trigger words (e.g., "vintage_car" vs "car") - Include trigger word in generation prompts - Experiment with guidance_scale (3-15 range) - Try different seeds for variety +``` **Training tips:** +``` - More steps = better quality but longer training - Higher rank = more capacity but larger files -- 1024 resolution gives best quality on H100 +- Start with 512px for testing, use 768px or 1024px for final training (better quality) +``` **Generation tips:** +``` - Start with 20 inference steps - Use guidance_scale 7-10 for most prompts - Add negative prompts to avoid unwanted elements - -## Troubleshooting - -**Training fails:** -- Check HF_TOKEN is set correctly -- Verify images are valid formats -- Monitor GPU memory usage - -**Generation quality poor:** -- Try different prompts including trigger word -- Adjust guidance_scale -- Check if LoRA loaded correctly - -**Slow generation:** -- Reduce inference steps -- Use smaller image sizes -- Check autoscaler settings \ No newline at end of file +``` \ No newline at end of file diff --git a/finetuning/flux/finetune.py b/finetuning/flux/finetune.py index 5bf1dfb..bd8ae48 100644 --- a/finetuning/flux/finetune.py +++ b/finetuning/flux/finetune.py @@ -9,7 +9,7 @@ import requests from PIL import Image as PILImage -VOLUME_PATH = "./flux-lora-clean" +VOLUME_PATH = "./flux-lora-finetune" @endpoint( name="train-lora", @@ -18,7 +18,7 @@ memory="32Gi", timeout=3600, keep_warm_seconds=60, - volumes=[Volume(name="flux-lora-clean", mount_path=VOLUME_PATH)], + volumes=[Volume(name="flux-lora-finetune", mount_path=VOLUME_PATH)], image=Image(python_version="python3.11") .add_python_packages([ "torch==2.6.0", @@ -242,13 +242,36 @@ def create_dummy_dataset(dataset_dir, trigger_word, resolution): return 1 +def clean_old_models(trigger_word, resolution): + """Clean up old models for the same trigger word""" + output_dir = "/mnt/code/flux-lora-clean/output" + if not os.path.exists(output_dir): + return + + # Find old model directories for this trigger word + old_dirs = [] + for item in os.listdir(output_dir): + item_path = os.path.join(output_dir, item) + if os.path.isdir(item_path) and trigger_word in item: + old_dirs.append(item_path) + + if old_dirs: + print(f"Cleaning up {len(old_dirs)} old model directories:") + for old_dir in old_dirs: + print(f" Removing: {os.path.basename(old_dir)}") + shutil.rmtree(old_dir) + def create_training_config(trigger_word, steps, learning_rate, rank, alpha, resolution): """Create training configuration""" base_dir = VOLUME_PATH dataset_dir = os.path.join(base_dir, "dataset") - output_dir = os.path.join(base_dir, "output") + # Make the output path absolute before it goes into the YAML + output_dir = os.path.abspath(os.path.join(base_dir, "output")) os.makedirs(output_dir, exist_ok=True) + # Clean up old models for this trigger word + clean_old_models(trigger_word, resolution) + config = { "job": "extension", "config": { @@ -343,13 +366,32 @@ def run_training(config): check=True ) - # Find trained models - output_dir = os.path.join(VOLUME_PATH, "output") + output_dir = "/mnt/code/flux-lora-clean/output" + print(f"Looking for trained models in: {output_dir}") + print(f"Output directory exists: {os.path.exists(output_dir)}") + + model_name = config["config"]["name"] + print(f"Looking for newly trained model: {model_name}") + trained_models = [] - for root, dirs, files in os.walk(output_dir): - for file in files: - if file.endswith('.safetensors'): - trained_models.append(os.path.join(root, file)) + if os.path.exists(output_dir): + print("Contents of output directory:") + for root, dirs, files in os.walk(output_dir): + print(f" Directory: {root}") + for file in files: + print(f" File: {file}") + if file.endswith('.safetensors'): + model_path = os.path.join(root, file) + # Only include models from the current training session + if model_name in model_path: + trained_models.append(model_path) + print(f" Found LoRA model: {file}") + else: + print(f" Skipped old model: {file}") + + print(f"Training completed! Found {len(trained_models)} new model(s)") + for i, model in enumerate(trained_models): + print(f" {i+1}. {os.path.relpath(model, '/mnt/code/flux-lora-clean')}") return { "status": "success", @@ -357,6 +399,7 @@ def run_training(config): } except subprocess.CalledProcessError as e: + print(f"Training failed: {str(e)}") return { "status": "error", "message": f"Training failed: {str(e)}" diff --git a/finetuning/flux/inference.py b/finetuning/flux/inference.py index 1c5b094..1fdf4f3 100644 --- a/finetuning/flux/inference.py +++ b/finetuning/flux/inference.py @@ -5,7 +5,7 @@ from io import BytesIO import base64 -VOLUME_PATH = "./flux-lora-clean" +VOLUME_PATH = "./flux-lora-finetune" # Global pipeline variable pipeline = None @@ -30,21 +30,54 @@ def load_pipeline(): # Load trained LoRA if available output_dir = os.path.join(VOLUME_PATH, "output") + print(f"Looking for LoRA models in: {output_dir}") + print(f"Volume path: {VOLUME_PATH}") + print(f"Volume exists: {os.path.exists(VOLUME_PATH)}") + if os.path.exists(output_dir): + print(f"Output directory exists: {output_dir}") + print(f"Output directory contents:") + for item in os.listdir(output_dir): + item_path = os.path.join(output_dir, item) + if os.path.isdir(item_path): + print(f" DIR: {item}/") + else: + print(f" FILE: {item}") + lora_files = find_lora_models(output_dir) + print(f"Found {len(lora_files)} LoRA files") if lora_files: - # Prefer final models over checkpoints + # Print all found files for debugging + for i, f in enumerate(lora_files): + print(f" {i+1}. {os.path.basename(f)}") + final_models = [f for f in lora_files if not any(x in os.path.basename(f) for x in ['_0', 'checkpoint'])] if final_models: latest_lora = max(final_models, key=os.path.getctime) + print(f"Selected final model: {os.path.basename(latest_lora)}") + print(f"Model creation time: {os.path.getctime(latest_lora)}") else: latest_lora = max(lora_files, key=os.path.getctime) + print(f"Selected checkpoint model: {os.path.basename(latest_lora)}") + print(f"Model creation time: {os.path.getctime(latest_lora)}") load_lora_weights(pipeline, latest_lora) print(f"Model loaded from: {os.path.relpath(latest_lora, VOLUME_PATH)}") else: print("No LoRA models found - using base FLUX model") + if os.path.exists(output_dir): + print("Contents of output directory:") + for root, dirs, files in os.walk(output_dir): + for file in files: + print(f" - {os.path.join(root, file)}") + else: + print(f"Output directory does not exist: {output_dir}") + print(f"Volume contents:") + if os.path.exists(VOLUME_PATH): + for root, dirs, files in os.walk(VOLUME_PATH): + for file in files: + print(f" - {os.path.join(root, file)}") print("Pipeline loaded successfully!") return pipeline @@ -56,10 +89,28 @@ def load_pipeline(): def find_lora_models(output_dir): """Find available LoRA model files""" lora_files = [] + print(f"Searching for LoRA files in: {output_dir}") + + if not os.path.exists(output_dir): + print(f"Output directory does not exist: {output_dir}") + return lora_files + for root, dirs, files in os.walk(output_dir): + print(f"Checking directory: {root}") for file in files: - if file.endswith('.safetensors') and not '_000000' in file: # Skip checkpoints + print(f" Found file: {file}") + if file.endswith('.safetensors'): + if not '_000000' in file and not file.endswith('_0.safetensors'): + lora_files.append(os.path.join(root, file)) + print(f" Added LoRA file: {file}") + else: + print(f" Skipped checkpoint: {file}") + elif file.endswith('.bin') or file.endswith('.pt'): + # Also check for other common LoRA formats lora_files.append(os.path.join(root, file)) + print(f" Added LoRA file: {file}") + + print(f"Total LoRA files found: {len(lora_files)}") return lora_files def load_lora_weights(pipeline, lora_path): @@ -93,10 +144,10 @@ def load_lora_weights(pipeline, lora_path): "sentencepiece" ]) .with_envs("HF_HUB_ENABLE_HF_TRANSFER=1"), - volumes=[Volume(name="flux-lora-clean", mount_path=VOLUME_PATH)], + volumes=[Volume(name="flux-lora-finetune", mount_path=VOLUME_PATH)], secrets=["HF_TOKEN"], autoscaler=QueueDepthAutoscaler(max_containers=1, tasks_per_container=1), - keep_warm_seconds=300 + keep_warm_seconds=60 ) def generate( prompt: str, From 04a714f2a123744ada24fc82c0e0b29c538a683b Mon Sep 17 00:00:00 2001 From: shinbehavior Date: Fri, 4 Jul 2025 19:11:51 +0200 Subject: [PATCH 5/5] LoRA update --- finetuning/flux/README.md | 332 ++++++++++++++--------- finetuning/flux/finetune.py | 505 ++++++++++++----------------------- finetuning/flux/inference.py | 344 ++++++++---------------- finetuning/flux/upload.py | 479 ++++++++++++++++----------------- 4 files changed, 706 insertions(+), 954 deletions(-) diff --git a/finetuning/flux/README.md b/finetuning/flux/README.md index 8f005d1..55064bd 100644 --- a/finetuning/flux/README.md +++ b/finetuning/flux/README.md @@ -1,213 +1,287 @@ # FLUX LoRA Fine-tuning on Beam -Implementation for fine-tuning FLUX models with LoRA using your own image datasets. +Fast and efficient FLUX LoRA training with optimal parameters for small datasets (10-30 images). ## Quick Start -1. **Create volume** (one-time setup): - ```bash - beam volume create flux-lora-finetune - ``` +```bash +# 1. Create volume (one-time setup) +beam volume create flux-lora-data -2. **Deploy endpoints**: - ```bash - beam deploy finetune.py:train_lora --name train-lora - beam deploy inference.py:generate --name generate-image - ``` +# 2. Deploy training endpoint +beam deploy finetune.py:train_flux_lora -3. **Update configuration** in `upload.py`: - - Set your `BEAM_TOKEN` - - Update `ENDPOINTS` with your deployed URLs +# 3. Deploy inference endpoint +beam deploy inference.py:generate_image -4. **Train a model**: - ```bash - python upload.py train ./your_images my_concept - ``` +# 4. Update your tokens in upload.py +# 5. Train your model +python upload.py train ./your_images yourTriggerWord -5. **Generate images**: - ```bash - python upload.py generate "a photo of my_concept" my_concept - ``` +# 6. Generate images +python upload.py generate "photo of yourTriggerWord woman" --lora yourTriggerWord +``` ## Volume Storage -The system uses a persistent Beam volume named `flux-lora-finetune` that automatically stores: +The system uses persistent volume `flux-lora-data`: ``` -flux-lora-finetune/ -├── dataset/ # Training datasets -└── output/ # Trained models +flux-lora-data/ +├── flux_lora_yourTriggerWord/ # Trained LoRA models +│ └── flux_lora_yourTriggerWord.safetensors +├── hf_cache/ # Cached base models +└── generated_*.png # Generated images ``` ## File Structure ``` flux/ -├── finetune.py # Training endpoint -├── inference.py # Image generation endpoint -├── upload.py # Local dataset handling -└── README.md # This file +├── finetune.py # Training function (@function) +├── inference.py # Generation function (@function) +├── upload.py # Client interface (like client.py) +└── README.md # This guide ``` ## Training +### Optimal Training Parameters + +**Perfect for small datasets (10-30 images):** +``` +Images: 10-30 high-quality photos +Steps: Auto-calculated (images × 100 + 350) +Training time: ~10 minutes for 13 images +VRAM: ~32GB (H100 GPU) +Resolution: 768px + 1024px mixed +``` + ### Deploy Training Endpoint ```bash -beam deploy finetune.py:train_lora --name train-lora +beam deploy finetune.py:train_flux_lora ``` -### Start Training with Local Images +### Start Training -```python -from upload import upload_dataset, start_training +```bash +python upload.py train ./your_images yourTriggerWord +``` -# Upload your dataset -result = upload_dataset("./training_images", "my_concept") +**Example:** +```bash +python upload.py train ./ira_photos irunTok +``` + +### Advanced Training Options + +```bash +python upload.py train ./photos myTrigger --steps 1650 --lr 4e-4 --rank 32 +``` -# Start training -training = start_training( - result["zip_url"], - "my_concept", - steps=1000, - resolution=1024 -) +**Training Parameters:** +``` +--steps: Training steps (default: auto-calculated) +--lr: Learning rate (default: 4e-4) +--rank: LoRA rank (default: 32) ``` -### Training Parameters +## Dataset Preparation -- `trigger_word`: Token to associate with your concept (e.g., "my_dog", "abstract_art") -- `steps`: Training steps (default: 1000) -- `learning_rate`: Learning rate (default: 4e-4) -- `rank`: LoRA rank - higher = more capacity (default: 32) -- `alpha`: LoRA alpha scaling (default: 32) -- `resolution`: Image resolution (default: 1024) +### Image Requirements + +``` +Format: JPG, JPEG, PNG, WebP +Count: 10-30 images (LoRA works great with just 10-13 images, no need for more than 30) +Quality: High-resolution, varied poses +Consistency: Similar lighting/style +``` + +### Caption Handling + +The system automatically creates captions for your images: + +```python +# Auto-generated captions include: +"portrait of yourTriggerWord woman with long brown hair, looking at camera" +"photo of yourTriggerWord woman, natural lighting" +"yourTriggerWord woman with long brown hair, outdoor setting" +``` + +**Custom captions:** Create `.txt` files with same name as images: +``` +your_folder/ +├── photo1.jpg +├── photo1.txt # "portrait of myTrigger woman smiling" +├── photo2.jpg +└── photo2.txt # "myTrigger woman in professional attire" +``` ## Image Generation ### Deploy Inference Endpoint ```bash -beam deploy inference.py:generate --name generate-image +beam deploy inference.py:generate_image ``` ### Generate Images -```python -from upload import generate_image +```bash +python upload.py generate "your prompt here" --lora yourTriggerWord +``` -result = generate_image( - "a sample of my_concept in sunlight", - "my_concept", - width=1024, - height=1024, - steps=20, - seed=42 -) +**Example:** +```bash +python upload.py generate "photo of irunTok woman with brown hair in Paris, on the background Eiffel Tower, high quality" --lora irunTok ``` ### Generation Parameters -- `prompt`: Text description of desired image -- `trigger_word`: Token used during training -- `width/height`: Image dimensions (256-1024) -- `num_inference_steps`: Denoising steps (1-50) -- `guidance_scale`: Prompt following strength (1.0-20.0) -- `seed`: Random seed for reproducibility -- `negative_prompt`: What to avoid -- `num_images`: Number to generate (1-4) +```bash +python upload.py generate "prompt" --lora triggerWord \ + --width 1024 --height 1024 --steps 35 --guidance 3.0 --seed 42 --lora-scale 0.9 +``` -## Complete Workflow +**Parameters:** +``` +--lora: Your trigger word (LoRA name) +--width/height: Image dimensions (default: 1024x1024) +--steps: Inference steps (default: 35) +--guidance: Prompt adherence (default: 3.0) +--seed: Random seed for reproducibility +--lora-scale: LoRA strength 0.0-1.0 (default: 0.9) +``` -```python -from upload import full_workflow +## Command Reference + +### Training Commands + +```bash +# Basic training +python upload.py train ./photos triggerWord + +# Custom parameters +python upload.py train ./photos triggerWord --steps 1650 --lr 4e-4 -# Upload, train, and optionally test -result = full_workflow( - local_folder="./my_images", - trigger_word="my_style", - test_prompt="a painting in my_style", - steps=1000, - resolution=1024 -) +# Check training status +python upload.py wait ``` -## Command Line Usage +### Generation Commands ```bash -# Upload dataset only -python upload.py upload ./images my_concept +# Generate with LoRA +python upload.py generate "prompt" --lora triggerWord -# Upload and start training -python upload.py train ./images my_concept +# Generate without LoRA (base model) +python upload.py generate "prompt" -# Generate image (after training completes) -python upload.py generate "prompt text" my_concept - -# Complete workflow -python upload.py workflow ./images my_concept "test prompt" +# Custom generation settings +python upload.py generate "prompt" --lora triggerWord --width 512 --height 768 --steps 20 ``` -## Dataset Preparation +### File Management -**Supported formats:** JPG, JPEG, PNG, WebP, BMP +```bash +# List volume contents +beam ls flux-lora-data -**Recommendations:** -- 10-50 high-quality images work well -- Images will be resized to training resolution -- Varied poses/angles improve results -- Consistent lighting/style helps +# Download generated image +beam cp beam://flux-lora-data/generated_random_9384.png ./ -**Example folder structure:** -``` -training_images/ -├── image1.jpg -├── image2.png -├── image3.jpg -└── ... +# Download specific file +python upload.py download --filename generated_random_9384.png ``` ## Configuration -**Before using, update `upload.py`:** +**Update `upload.py` with your credentials:** + +```python +# Replace with your actual tokens +BEAM_TOKEN = "your_beam_token_here" +TRAIN_FUNCTION = "https://your-train-endpoint.app.beam.cloud" +GENERATE_FUNCTION = "https://your-generate-endpoint.app.beam.cloud" +``` + +## Step Calculation Formula +**Automatic step calculation:** ```python -# Your Beam authentication token -BEAM_TOKEN = "" +optimal_steps = (image_count × 100) + 350 + +# Examples: +# 5 images = 850 steps (~5 minutes) +# 10 images = 1,350 steps (~8 minutes) +# 13 images = 1,650 steps (~10 minutes) +# 20 images = 2,350 steps (~15 minutes) +# 30 images = 3,350 steps (~20 minutes) +``` + +## Tips & Best Practices + +### Training Tips + +``` +Use 10-30 images for best results +Varied poses and angles improve quality +Consistent lighting helps training +Training completes in ~10 minutes for 13 images +H100 GPU provides optimal performance +``` -# Your deployed endpoint URLs -ENDPOINTS = { - "train": "https://train-lora-.app.beam.cloud", - "inference": "https://generate-image-.app.beam.cloud" -} +### Generation Tips + +``` +Always include your trigger word in prompts +Start with guidance_scale 3.0-4.0 +Use 28-35 inference steps for quality +Experiment with lora_scale 0.7-1.0 +Try different seeds for variety ``` -## Monitoring +### Prompt Examples -- **Beam Dashboard**: https://app.beam.cloud/ -- **Training logs**: Available in Beam dashboard -- **Model files**: Stored in persistent volume `flux-lora-finetune` +```bash +# Portrait style +"portrait of yourTrigger woman, professional lighting, high quality" + +# Specific scenes +"photo of yourTrigger woman in Paris, Eiffel Tower background" + +# Artistic styles +"yourTrigger woman in the style of Renaissance painting" + +# Different settings +"yourTrigger woman at sunset, golden hour lighting" +``` + +## Troubleshooting -## Tips +### Training Issues -**For better results:** ``` -- Use descriptive trigger words (e.g., "vintage_car" vs "car") -- Include trigger word in generation prompts -- Experiment with guidance_scale (3-15 range) -- Try different seeds for variety +Check HF_TOKEN is set correctly +Verify images are valid formats (JPG, PNG) +Ensure 10-30 images in folder +Monitor training via Beam dashboard ``` -**Training tips:** +### Generation Issues + ``` -- More steps = better quality but longer training -- Higher rank = more capacity but larger files -- Start with 512px for testing, use 768px or 1024px for final training (better quality) +Verify LoRA name matches training trigger word +Check if training completed successfully +Try different prompt variations +Adjust lora_scale if results are too strong/weak ``` -**Generation tips:** +### File Access + ``` -- Start with 20 inference steps -- Use guidance_scale 7-10 for most prompts -- Add negative prompts to avoid unwanted elements +Use 'beam ls flux-lora-data' to list files +Download with 'beam cp beam://flux-lora-data/file.png ./' +Check Beam dashboard for task status ``` \ No newline at end of file diff --git a/finetuning/flux/finetune.py b/finetuning/flux/finetune.py index bd8ae48..b1814d2 100644 --- a/finetuning/flux/finetune.py +++ b/finetuning/flux/finetune.py @@ -1,406 +1,237 @@ -from beam import endpoint, Volume, Image, QueueDepthAutoscaler -import torch +from beam import function, Volume, Image import os -import shutil -import yaml +import requests import zipfile import tempfile -import subprocess -import requests +import yaml from PIL import Image as PILImage -VOLUME_PATH = "./flux-lora-finetune" +VOLUME_PATH = "./flux-lora-data" -@endpoint( - name="train-lora", +@function( + name="simple-flux-train", gpu="H100", - cpu=8, + cpu=4, memory="32Gi", timeout=3600, - keep_warm_seconds=60, - volumes=[Volume(name="flux-lora-finetune", mount_path=VOLUME_PATH)], - image=Image(python_version="python3.11") - .add_python_packages([ - "torch==2.6.0", - "torchvision==0.21.0", - "torchao==0.9.0", - "safetensors", - "transformers==4.52.4", - "lycoris-lora==1.8.3", - "flatten_json", - "pyyaml", - "oyaml", - "tensorboard", - "kornia", - "invisible-watermark", - "einops", - "accelerate", - "toml", - "albumentations==1.4.15", - "albucore==0.0.16", - "pydantic", - "omegaconf", - "k-diffusion", - "open_clip_torch", - "timm", - "prodigyopt", - "controlnet_aux==0.0.10", - "python-dotenv", - "bitsandbytes", - "hf_transfer", - "lpips", - "pytorch_fid", - "optimum-quanto==0.2.4", - "sentencepiece", - "huggingface_hub", - "peft", - "gradio", - "python-slugify", - "opencv-python-headless", - "pytorch-wavelets==1.3.0", - "matplotlib==3.10.1", - "diffusers", - "packaging", - "setuptools<70.0.0", - "requests", - "pillow" - ]) + volumes=[Volume(name="flux-lora-data", mount_path=VOLUME_PATH)], + image=Image(python_version="python3.12") .add_commands([ - "apt-get update && apt-get install -y git libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev libgomp1", - "pip install git+https://github.com/jaretburkett/easy_dwpose.git", - "pip install git+https://github.com/huggingface/diffusers@363d1ab7e24c5ed6c190abb00df66d9edb74383b" - ]) - .with_envs("HF_HUB_ENABLE_HF_TRANSFER=1"), - secrets=["HF_TOKEN"], - autoscaler=QueueDepthAutoscaler(max_containers=3, tasks_per_container=1) + "apt-get update && apt-get install -y libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev libgomp1", + "git clone https://github.com/ostris/ai-toolkit.git /ai-toolkit", + "cd /ai-toolkit && git submodule update --init --recursive", + "pip3.12 install --no-cache-dir torch==2.6.0 torchvision==0.21.0 --index-url https://download.pytorch.org/whl/cu126", + "pip3.12 install pyyaml requests pillow opencv-python-headless", + "cd /ai-toolkit && pip3.12 install -r requirements.txt" + ]), + secrets=["HF_TOKEN"] ) -def train_lora( - image_zip: str = None, + +def train_flux_lora( + image_zip: str, trigger_word: str = "TOK", - steps: int = 1000, + steps: int = 1500, learning_rate: float = 4e-4, - rank: int = 32, - alpha: int = 32, - resolution: int = 1024 + rank: int = 32 ): """ - Fine-tune FLUX model with LoRA using uploaded dataset - - Args: - image_zip: URL to zip file containing training images - trigger_word: Token to associate with your concept - steps: Number of training steps - learning_rate: Learning rate for training - rank: LoRA rank (higher = more capacity) - alpha: LoRA alpha (scaling factor) - resolution: Training image resolution + Train FLUX LoRA """ - print(f"Starting LoRA fine-tuning for '{trigger_word}'") - print(f"Training: {steps} steps at {resolution}x{resolution}") - - # Setup environment - setup_environment() - - # Process dataset - image_count = process_dataset(image_zip, trigger_word, resolution) - - if image_count == 0: - return {"error": "No training images found"} - - # Configure training - config = create_training_config( - trigger_word=trigger_word, - steps=steps, - learning_rate=learning_rate, - rank=rank, - alpha=alpha, - resolution=resolution - ) - - # Run training - result = run_training(config) - - if result["status"] == "success": - print("Fine-tuning completed successfully!") - return { - "status": "success", - "message": f"LoRA training completed for '{trigger_word}'", - "models": result["models"], - "image_count": image_count, - "trigger_word": trigger_word, - "steps": steps - } - else: - return result - -def setup_environment(): - """Setup training environment and dependencies""" - print("Setting up training environment...") - - # Clone ai-toolkit if needed - toolkit_path = "/tmp/ai-toolkit" - if not os.path.exists(toolkit_path): - print("Downloading ai-toolkit...") - subprocess.run([ - "git", "clone", "https://github.com/ostris/ai-toolkit.git", toolkit_path - ], check=True) - subprocess.run([ - "git", "submodule", "update", "--init", "--recursive" - ], cwd=toolkit_path, check=True) - - # Configure environment variables - os.environ.update({ - 'DISABLE_TELEMETRY': 'YES', - 'HF_TOKEN': os.getenv("HF_TOKEN"), - 'PYTORCH_CUDA_ALLOC_CONF': 'expandable_segments:True,max_split_size_mb:512', - 'TORCH_CUDNN_V8_API_ENABLED': '1', - 'NVIDIA_TF32_OVERRIDE': '1', - 'TORCH_ALLOW_TF32_CUBLAS_OVERRIDE': '1', - 'TOKENIZERS_PARALLELISM': 'false', - 'HF_HUB_ENABLE_HF_TRANSFER': '1' - }) - - import sys - sys.path.insert(0, toolkit_path) - -def process_dataset(image_zip, trigger_word, resolution): - """Process uploaded dataset for training""" - print("Processing training dataset...") - - # Setup directories - base_dir = VOLUME_PATH - dataset_dir = os.path.join(base_dir, "dataset") - - # Clean dataset directory - if os.path.exists(dataset_dir): - shutil.rmtree(dataset_dir) - os.makedirs(dataset_dir, exist_ok=True) - - if not image_zip: - print("No dataset provided, creating dummy data") - return create_dummy_dataset(dataset_dir, trigger_word, resolution) - - # Download and extract dataset - try: - print(f"Downloading dataset: {image_zip}") - zip_response = requests.get(image_zip, timeout=60) - zip_response.raise_for_status() - - with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as temp_zip: - temp_zip.write(zip_response.content) - temp_zip_path = temp_zip.name - - # Extract and process images - image_count = 0 - with zipfile.ZipFile(temp_zip_path, 'r') as zip_ref: - with tempfile.TemporaryDirectory() as temp_extract_dir: - zip_ref.extractall(temp_extract_dir) - - for root, dirs, files in os.walk(temp_extract_dir): - for file in files: - if file.lower().endswith(('.jpg', '.jpeg', '.png', '.webp', '.bmp')): - if process_image(root, file, dataset_dir, trigger_word, resolution, image_count): - image_count += 1 - - os.remove(temp_zip_path) - print(f"Processed {image_count} training images") - return image_count - - except Exception as e: - print(f"Dataset processing failed: {e}") - return 0 - -def process_image(root, filename, dataset_dir, trigger_word, resolution, image_count): - """Process individual training image""" - try: - old_path = os.path.join(root, filename) - with PILImage.open(old_path) as img: - img = img.convert('RGB') - img = img.resize((resolution, resolution), PILImage.Resampling.LANCZOS) - - new_filename = f"training_image_{image_count + 1}.jpg" - new_path = os.path.join(dataset_dir, new_filename) - img.save(new_path, 'JPEG', quality=95) - - # Create caption file - caption_path = os.path.join(dataset_dir, f"training_image_{image_count + 1}.txt") - with open(caption_path, 'w') as f: - f.write(f"a photo of {trigger_word}") - - return True - except Exception as e: - print(f"Failed to process {filename}: {e}") - return False - -def create_dummy_dataset(dataset_dir, trigger_word, resolution): - """Create dummy dataset for testing""" - dummy_img = PILImage.new('RGB', (resolution, resolution), color='red') - dummy_img.save(os.path.join(dataset_dir, "dummy.jpg")) + print(f"Training '{trigger_word}' LoRA") - with open(os.path.join(dataset_dir, "dummy.txt"), 'w') as f: - f.write(f"a photo of {trigger_word}") + dataset_dir, image_count = setup_dataset(image_zip, trigger_word) - return 1 - -def clean_old_models(trigger_word, resolution): - """Clean up old models for the same trigger word""" - output_dir = "/mnt/code/flux-lora-clean/output" - if not os.path.exists(output_dir): - return - - # Find old model directories for this trigger word - old_dirs = [] - for item in os.listdir(output_dir): - item_path = os.path.join(output_dir, item) - if os.path.isdir(item_path) and trigger_word in item: - old_dirs.append(item_path) - - if old_dirs: - print(f"Cleaning up {len(old_dirs)} old model directories:") - for old_dir in old_dirs: - print(f" Removing: {os.path.basename(old_dir)}") - shutil.rmtree(old_dir) - -def create_training_config(trigger_word, steps, learning_rate, rank, alpha, resolution): - """Create training configuration""" - base_dir = VOLUME_PATH - dataset_dir = os.path.join(base_dir, "dataset") - # Make the output path absolute before it goes into the YAML - output_dir = os.path.abspath(os.path.join(base_dir, "output")) - os.makedirs(output_dir, exist_ok=True) - - # Clean up old models for this trigger word - clean_old_models(trigger_word, resolution) + optimal_steps = (image_count * 100) + 350 + print(f"Found {image_count} images") + print(f"Adjusted steps: {optimal_steps}") + steps = optimal_steps config = { "job": "extension", "config": { - "name": f"flux_lora_{trigger_word}_{resolution}", + "name": f"flux_lora_{trigger_word}", "process": [{ "type": "sd_trainer", - "training_folder": output_dir, + "training_folder": VOLUME_PATH, "device": "cuda:0", "trigger_word": trigger_word, "network": { "type": "lora", - "linear": rank, - "linear_alpha": alpha + "linear": 32, + "linear_alpha": 32, + "network_kwargs": { + "only_if_contains": [ + "transformer.single_transformer_blocks.7", + "transformer.single_transformer_blocks.12", + "transformer.single_transformer_blocks.16", + "transformer.single_transformer_blocks.20" + ] + } }, "save": { "dtype": "float16", - "save_every": steps // 2, - "max_step_saves_to_keep": 2 + "save_every": 10000, + "max_step_saves_to_keep": 4, + "push_to_hub": False }, "datasets": [{ - "folder_path": os.path.abspath(dataset_dir), + "folder_path": "/ai-toolkit/input", "caption_ext": "txt", "caption_dropout_rate": 0.05, - "cache_latents": True, - "skip_cache_check": True, "shuffle_tokens": False, "cache_latents_to_disk": True, - "resolution": [resolution] + "resolution": [768, 1024] }], "train": { "batch_size": 1, "steps": steps, - "gradient_accumulation_steps": 8, + "gradient_accumulation_steps": 1, "train_unet": True, "train_text_encoder": False, "gradient_checkpointing": False, "noise_scheduler": "flowmatch", "optimizer": "adamw8bit", - "lr": learning_rate, + "lr": 4e-4, + "lr_scheduler": "cosine", + "skip_first_sample": True, + "disable_sampling": True, "ema_config": { "use_ema": True, "ema_decay": 0.99 }, - "dtype": "fp16" + "dtype": "bf16" }, "model": { "name_or_path": "black-forest-labs/FLUX.1-dev", "is_flux": True, - "quantize": True + "quantize": False, + "low_vram": False }, "sample": { "sampler": "flowmatch", - "sample_every": steps // 2, - "width": resolution, - "height": resolution, - "prompts": [ - f"a photo of {trigger_word}", - f"{trigger_word} in professional lighting", - f"portrait of {trigger_word}, high quality" - ], + "sample_every": 10000, + "width": 1024, + "height": 1024, + "prompts": [f"portrait of {trigger_word} woman"], "neg": "", "seed": 42, - "walk_seed": False, - "guidance_scale": 4, - "sample_steps": 10 + "walk_seed": True, + "guidance_scale": 3.5, + "sample_steps": 28 } }] + }, + "meta": { + "name": f"flux_lora_{trigger_word}", + "version": "1.0" } } - return config - -def run_training(config): - """Execute the training process""" - print("Starting LoRA training...") + config_path = "/ai-toolkit/train_config.yaml" + with open(config_path, 'w') as f: + yaml.dump(config, f, default_flow_style=False) - try: - # Save config - toolkit_path = "/tmp/ai-toolkit" - config_path = os.path.join(toolkit_path, "config", "train_config.yaml") - os.makedirs(os.path.dirname(config_path), exist_ok=True) - - with open(config_path, 'w') as f: - yaml.dump(config, f, default_flow_style=False) - - # Run training - os.chdir(toolkit_path) - result = subprocess.run( - ["python", "run.py", "config/train_config.yaml"], - capture_output=False, - text=True, - check=True - ) - - output_dir = "/mnt/code/flux-lora-clean/output" - print(f"Looking for trained models in: {output_dir}") - print(f"Output directory exists: {os.path.exists(output_dir)}") - - model_name = config["config"]["name"] - print(f"Looking for newly trained model: {model_name}") - - trained_models = [] - if os.path.exists(output_dir): - print("Contents of output directory:") - for root, dirs, files in os.walk(output_dir): - print(f" Directory: {root}") - for file in files: - print(f" File: {file}") - if file.endswith('.safetensors'): - model_path = os.path.join(root, file) - # Only include models from the current training session - if model_name in model_path: - trained_models.append(model_path) - print(f" Found LoRA model: {file}") - else: - print(f" Skipped old model: {file}") - - print(f"Training completed! Found {len(trained_models)} new model(s)") - for i, model in enumerate(trained_models): - print(f" {i+1}. {os.path.relpath(model, '/mnt/code/flux-lora-clean')}") - + print(f"Config saved to: {config_path}") + + import subprocess + + env = os.environ.copy() + env.update({ + 'HF_TOKEN': os.getenv('HF_TOKEN'), + 'CUDA_VISIBLE_DEVICES': '0' + }) + + print("Starting training...") + process = subprocess.Popen([ + "python3.12", "/ai-toolkit/run.py", "/ai-toolkit/train_config.yaml" + ], env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, universal_newlines=True) + + for line in process.stdout: + print(line.rstrip()) + + return_code = process.wait() + + print("Ensuring models are saved to persistent volume...") + copy_models_to_volume() + + if return_code == 0: return { "status": "success", - "models": trained_models + "message": f"Training completed for {trigger_word}", + "output": "Training completed successfully" } - - except subprocess.CalledProcessError as e: - print(f"Training failed: {str(e)}") + else: return { - "status": "error", - "message": f"Training failed: {str(e)}" - } \ No newline at end of file + "status": "error", + "message": f"Training failed with return code {return_code}" + } + +def copy_models_to_volume(): + """Copy any models from ai-toolkit output to persistent volume""" + import shutil + + source_dir = "/ai-toolkit/output" + dest_dir = VOLUME_PATH + + if os.path.exists(source_dir): + print(f"Copying models from {source_dir} to {dest_dir}") + + for root, dirs, files in os.walk(source_dir): + for file in files: + if file.endswith('.safetensors') or file.endswith('.yaml') or file.endswith('.json'): + source_file = os.path.join(root, file) + # Create relative path structure in destination + rel_path = os.path.relpath(root, source_dir) + dest_folder = os.path.join(dest_dir, rel_path) if rel_path != '.' else dest_dir + os.makedirs(dest_folder, exist_ok=True) + + dest_file = os.path.join(dest_folder, file) + shutil.copy2(source_file, dest_file) + print(f"Copied: {file}") + else: + print(f"No source directory {source_dir} found") + +def setup_dataset(image_zip, trigger_word): + """Download and setup dataset in the format they expect""" + dataset_dir = "/ai-toolkit/input" + os.makedirs(dataset_dir, exist_ok=True) + + response = requests.get(image_zip) + with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as f: + f.write(response.content) + zip_path = f.name + + # Extract images and create captions + count = 0 + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + for file in zip_ref.namelist(): + if file.lower().endswith(('.jpg', '.jpeg', '.png')): + zip_ref.extract(file, dataset_dir) + + base_name = os.path.splitext(file)[0] + caption_path = os.path.join(dataset_dir, f"{base_name}.txt") + + # Only generate caption if .txt file doesn't exist + if not os.path.exists(caption_path): + captions = [ + f"portrait of {trigger_word} woman with long brown hair, looking at camera", + f"photo of {trigger_word} woman, long brown hair, natural lighting", + f"{trigger_word} woman with long brown hair, outdoor setting", + f"close-up portrait of {trigger_word}, long brown hair, detailed face", + f"{trigger_word} woman sitting, long brown hair, realistic photo", + f"portrait photo of {trigger_word} with long brown hair", + f"{trigger_word} woman, long brown hair, professional lighting", + f"photo of {trigger_word} woman, detailed facial features", + f"{trigger_word} with long brown hair, natural expression", + f"portrait of {trigger_word} woman, high quality photo" + ] + + caption = captions[count % len(captions)] + + with open(caption_path, 'w') as caption_file: + caption_file.write(caption) + count += 1 + + os.unlink(zip_path) + print(f"Setup {count} training images in {dataset_dir}") + return dataset_dir, count diff --git a/finetuning/flux/inference.py b/finetuning/flux/inference.py index 1fdf4f3..7081238 100644 --- a/finetuning/flux/inference.py +++ b/finetuning/flux/inference.py @@ -1,259 +1,131 @@ -from beam import endpoint, Volume, Image, QueueDepthAutoscaler, Output -import torch -from diffusers import FluxPipeline +from beam import function, Volume, Output, Image import os -from io import BytesIO import base64 +from io import BytesIO -VOLUME_PATH = "./flux-lora-finetune" - -# Global pipeline variable -pipeline = None +VOLUME_PATH = "./flux-lora-data" -def load_pipeline(): - """Load FLUX pipeline with trained LoRA""" - global pipeline - - if pipeline is not None: - return pipeline - +def get_pipeline(lora_name: str = None): + """ + Loads the base FLUX pipeline and optionally attaches a LoRA adapter. + The base model is cached on the persistent volume to speed up subsequent loads. + """ print("Loading FLUX pipeline...") - try: - # Load base FLUX model with memory optimizations - pipeline = FluxPipeline.from_pretrained( - "black-forest-labs/FLUX.1-dev", - torch_dtype=torch.float16, - token=os.getenv("HF_TOKEN"), - use_safetensors=True - ).to("cuda") - - # Load trained LoRA if available - output_dir = os.path.join(VOLUME_PATH, "output") - print(f"Looking for LoRA models in: {output_dir}") - print(f"Volume path: {VOLUME_PATH}") - print(f"Volume exists: {os.path.exists(VOLUME_PATH)}") - - if os.path.exists(output_dir): - print(f"Output directory exists: {output_dir}") - print(f"Output directory contents:") - for item in os.listdir(output_dir): - item_path = os.path.join(output_dir, item) - if os.path.isdir(item_path): - print(f" DIR: {item}/") - else: - print(f" FILE: {item}") - - lora_files = find_lora_models(output_dir) - print(f"Found {len(lora_files)} LoRA files") - - if lora_files: - # Print all found files for debugging - for i, f in enumerate(lora_files): - print(f" {i+1}. {os.path.basename(f)}") - - final_models = [f for f in lora_files if not any(x in os.path.basename(f) for x in ['_0', 'checkpoint'])] - if final_models: - latest_lora = max(final_models, key=os.path.getctime) - print(f"Selected final model: {os.path.basename(latest_lora)}") - print(f"Model creation time: {os.path.getctime(latest_lora)}") - else: - latest_lora = max(lora_files, key=os.path.getctime) - print(f"Selected checkpoint model: {os.path.basename(latest_lora)}") - print(f"Model creation time: {os.path.getctime(latest_lora)}") - - load_lora_weights(pipeline, latest_lora) - print(f"Model loaded from: {os.path.relpath(latest_lora, VOLUME_PATH)}") - else: - print("No LoRA models found - using base FLUX model") - if os.path.exists(output_dir): - print("Contents of output directory:") - for root, dirs, files in os.walk(output_dir): - for file in files: - print(f" - {os.path.join(root, file)}") - else: - print(f"Output directory does not exist: {output_dir}") - print(f"Volume contents:") - if os.path.exists(VOLUME_PATH): - for root, dirs, files in os.walk(VOLUME_PATH): - for file in files: - print(f" - {os.path.join(root, file)}") - - print("Pipeline loaded successfully!") - return pipeline - - except Exception as e: - print(f"Failed to load pipeline: {e}") - raise - -def find_lora_models(output_dir): - """Find available LoRA model files""" - lora_files = [] - print(f"Searching for LoRA files in: {output_dir}") + from diffusers import FluxPipeline + import torch - if not os.path.exists(output_dir): - print(f"Output directory does not exist: {output_dir}") - return lora_files + cache_path = os.path.join(VOLUME_PATH, "hf_cache") + os.makedirs(cache_path, exist_ok=True) - for root, dirs, files in os.walk(output_dir): - print(f"Checking directory: {root}") - for file in files: - print(f" Found file: {file}") - if file.endswith('.safetensors'): - if not '_000000' in file and not file.endswith('_0.safetensors'): - lora_files.append(os.path.join(root, file)) - print(f" Added LoRA file: {file}") - else: - print(f" Skipped checkpoint: {file}") - elif file.endswith('.bin') or file.endswith('.pt'): - # Also check for other common LoRA formats - lora_files.append(os.path.join(root, file)) - print(f" Added LoRA file: {file}") + pipeline = FluxPipeline.from_pretrained( + "black-forest-labs/FLUX.1-dev", + torch_dtype=torch.float16, + token=os.getenv("HF_TOKEN"), + cache_dir=cache_path + ).to("cuda") - print(f"Total LoRA files found: {len(lora_files)}") - return lora_files + if lora_name: + lora_folder_name = f"flux_lora_{lora_name}" + lora_filename = f"{lora_folder_name}.safetensors" + lora_full_path = os.path.join(VOLUME_PATH, lora_folder_name, lora_filename) -def load_lora_weights(pipeline, lora_path): - """Load LoRA weights into pipeline""" - try: - pipeline.load_lora_weights( - os.path.dirname(lora_path), - weight_name=os.path.basename(lora_path) - ) - print(f"Loaded LoRA: {os.path.basename(lora_path)}") - except Exception as e: - print(f"Failed to load LoRA: {e}") - print(f" Attempted to load from: {lora_path}") + if os.path.exists(lora_full_path): + print(f"Loading LoRA adapter: {lora_filename}") + try: + lora_dir = os.path.dirname(lora_full_path) + pipeline.load_lora_weights(lora_dir, weight_name=lora_filename) + print(f"Successfully attached LoRA: {lora_name}") + return pipeline, lora_full_path + except Exception as e: + print(f"Failed to load LoRA adapter '{lora_filename}': {e}") + else: + print(f"LoRA adapter not found for '{lora_name}'. Using the base model.") + else: + print("No LoRA name provided, using the base model.") + + return pipeline, None -@endpoint( - name="generate-image", - on_start=load_pipeline, +@function( + name="simple-flux-generate", gpu="A100-40", cpu=4, memory="32Gi", - image=Image(python_version="python3.11") - .add_python_packages([ - "torch==2.6.0", - "diffusers", - "transformers==4.52.4", - "safetensors", - "accelerate", - "pillow", - "hf_transfer", - "protobuf", - "sentencepiece" - ]) - .with_envs("HF_HUB_ENABLE_HF_TRANSFER=1"), - volumes=[Volume(name="flux-lora-finetune", mount_path=VOLUME_PATH)], - secrets=["HF_TOKEN"], - autoscaler=QueueDepthAutoscaler(max_containers=1, tasks_per_container=1), - keep_warm_seconds=60 + volumes=[Volume(name="flux-lora-data", mount_path=VOLUME_PATH)], + image=Image(python_version="python3.12") + .add_commands([ + "apt-get update && apt-get install -y libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev libgomp1", + "git clone https://github.com/ostris/ai-toolkit.git /ai-toolkit", + "cd /ai-toolkit && git submodule update --init --recursive", + "pip3.12 install --no-cache-dir torch==2.6.0 torchvision==0.21.0 --index-url https://download.pytorch.org/whl/cu126", + "pip3.12 install pyyaml requests pillow opencv-python-headless", + "cd /ai-toolkit && pip3.12 install -r requirements.txt" + ]), + secrets=["HF_TOKEN"] ) -def generate( +def generate_image( prompt: str, - trigger_word: str = "TOK", - width: int = 512, - height: int = 512, - num_inference_steps: int = 20, - guidance_scale: float = 7.5, + lora_name: str = None, + width: int = 1024, + height: int = 1024, + steps: int = 35, + guidance: float = 3.0, seed: int = None, - negative_prompt: str = "", - num_images: int = 1 + lora_scale: float = 0.9 ): """ - Generate images using fine-tuned FLUX LoRA model - - Args: - prompt: Text description of desired image - trigger_word: Token used during training - width: Image width (256-1024) - height: Image height (256-1024) - num_inference_steps: Number of denoising steps (1-50) - guidance_scale: How closely to follow prompt (1.0-20.0) - seed: Random seed for reproducibility - negative_prompt: What to avoid in generation - num_images: Number of images to generate (1-4) + Generate image with a dynamically loaded LoRA adapter. """ - global pipeline + pipeline, lora_path = get_pipeline(lora_name=lora_name) - try: - # Ensure pipeline is loaded - if pipeline is None: - pipeline = load_pipeline() - - # Validate parameters - num_images = max(1, min(num_images, 4)) - width = max(256, min(width, 1024)) - height = max(256, min(height, 1024)) - num_inference_steps = max(1, min(num_inference_steps, 50)) - guidance_scale = max(1.0, min(guidance_scale, 20.0)) - - print(f"Generating {num_images} image(s)") - print(f"Prompt: '{prompt}'") - - # Set random seed if provided - if seed is not None: - torch.manual_seed(seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed(seed) - - # Generate images - with torch.no_grad(): - result = pipeline( - prompt=prompt, - negative_prompt=negative_prompt if negative_prompt else None, - width=width, - height=height, - num_inference_steps=num_inference_steps, - guidance_scale=guidance_scale, - num_images_per_prompt=num_images, - generator=torch.Generator(device="cuda").manual_seed(seed) if seed is not None else None - ) - - # Convert images to base64 and create shareable URLs - encoded_images = [] - image_urls = [] - - for i, image in enumerate(result.images): - # Base64 encoding for immediate use - buffered = BytesIO() - image.save(buffered, format="PNG") - img_str = base64.b64encode(buffered.getvalue()).decode() - encoded_images.append(img_str) - - # Also create shareable URL using Beam Output - try: - filename = f"generated_image_{i+1}.png" - output = Output(path=filename) - output.save(image) - image_urls.append(output.public_url) - except Exception as e: - print(f"Failed to create public URL for image {i+1}: {e}") - image_urls.append(None) - - print(f"Generated {len(encoded_images)} image(s)") - - return { - "status": "success", - "images": encoded_images, # Base64 for immediate use - "image_urls": image_urls, # Public URLs for sharing - "prompt": prompt, - "trigger_word": trigger_word, + import torch + + print(f"Generating: '{prompt}'") + print(f"{width}x{height}, steps: {steps}, guidance: {guidance}, lora: {lora_name or 'None'}, scale: {lora_scale}") + + generator = None + if seed: + generator = torch.Generator(device="cuda").manual_seed(seed) + print(f"Using seed: {seed}") + + with torch.no_grad(): + result = pipeline( + prompt=prompt, + width=width, + height=height, + num_inference_steps=steps, + guidance_scale=guidance, + generator=generator + ) + + image = result.images[0] + + buffered = BytesIO() + image.save(buffered, format="PNG") + img_base64 = base64.b64encode(buffered.getvalue()).decode() + + filename = f"generated_{seed or 'random'}_{hash(prompt) % 10000}.png" + output = Output.from_pil_image(image) + output.save() + + local_path = os.path.join(VOLUME_PATH, filename) + image.save(local_path) + + print(f"Generated and saved: {filename}") + print(f"Local path: {local_path}") + print(f"Public URL: {output.public_url}") + + return { + "status": "success", + "image": img_base64, + "url": output.public_url, + "prompt": prompt, + "settings": { + "width": width, + "height": height, + "steps": steps, + "guidance": guidance, "seed": seed, - "settings": { - "width": width, - "height": height, - "num_inference_steps": num_inference_steps, - "guidance_scale": guidance_scale, - "negative_prompt": negative_prompt - }, - "num_images": len(encoded_images) + "lora": lora_name } - - except Exception as e: - print(f"Generation failed: {e}") - return { - "status": "error", - "message": f"Failed to generate image: {str(e)}", - "prompt": prompt - } \ No newline at end of file + } \ No newline at end of file diff --git a/finetuning/flux/upload.py b/finetuning/flux/upload.py index 17e6097..1c0a343 100644 --- a/finetuning/flux/upload.py +++ b/finetuning/flux/upload.py @@ -1,307 +1,282 @@ from beam import Client import os import zipfile +import base64 +from PIL import Image +from io import BytesIO import time -from datetime import datetime +import requests +import argparse -# Your Beam authentication token -BEAM_TOKEN = "" +# Your Beam configuration +BEAM_TOKEN = "your_beam_token_here" +TRAIN_FUNCTION = "https://your-train-endpoint.app.beam.cloud" +GENERATE_FUNCTION = "https://your-generate-endpoint.app.beam.cloud" -# Your deployed endpoint URLs -ENDPOINTS = { - "train": "https://train-lora-.app.beam.cloud", - "inference": "https://generate-image-.app.beam.cloud" -} - -def upload_dataset(local_folder: str, trigger_word: str): - """ - Upload local image dataset to Beam for training +def train(folder_path: str, trigger_word: str, steps: int = 1000, lr: float = 1e-4, rank: int = 16): + """Train a LoRA using Beam SDK""" - Args: - local_folder: Path to folder containing training images - trigger_word: Token to associate with your concept + print(f"Training '{trigger_word}'") + print(f"Using images from: {folder_path}") - Returns: - dict: Upload result with zip URL - """ - print(f"Preparing dataset from: {local_folder}") - print(f"Trigger word: '{trigger_word}'") + if not os.path.exists(folder_path): + raise ValueError(f"Folder not found: {folder_path}") - # Validate inputs - if not os.path.exists(local_folder): - raise ValueError(f"❌ Folder not found: {local_folder}") + image_files = [f for f in os.listdir(folder_path) + if f.lower().endswith(('.jpg', '.jpeg', '.png', '.webp'))] - if not trigger_word or trigger_word.strip() == "": - raise ValueError("❌ Trigger word cannot be empty!") + if len(image_files) == 0: + raise ValueError(f"No images found in {folder_path}") - # Initialize Beam client - client = Client(token=BEAM_TOKEN) + print(f"Found {len(image_files)} images") - # Create zip file from local images - zip_filename = f"{trigger_word.replace(' ', '_')}_dataset.zip" - image_count = create_dataset_zip(local_folder, zip_filename) + zip_path = f"{trigger_word}_training.zip" + with zipfile.ZipFile(zip_path, 'w') as zipf: + for file in image_files: + file_path = os.path.join(folder_path, file) + zipf.write(file_path, file) + print(f" Added: {file}") - if image_count == 0: - raise ValueError(f"❌ No images found in {local_folder}") + print("Uploading with Beam...") + client = Client(token=BEAM_TOKEN) try: - print("Uploading dataset to Beam...") - zip_url = client.upload_file(zip_filename) - print(f"Dataset uploaded successfully!") - print(f"{image_count} images ready for training") + zip_url = client.upload_file(zip_path) + print(f"Uploaded: {zip_url}") - # Clean up local zip - os.remove(zip_filename) + print("Starting training...") + response = requests.post(TRAIN_FUNCTION, + headers={'Authorization': f'Bearer {BEAM_TOKEN}', 'Content-Type': 'application/json'}, + json={ + 'image_zip': zip_url, + 'trigger_word': trigger_word, + 'steps': steps, + 'learning_rate': lr, + 'rank': rank + }) - return { - "status": "success", - "zip_url": zip_url, - "image_count": image_count, - "trigger_word": trigger_word - } - - except Exception as e: - # Clean up on failure - if os.path.exists(zip_filename): - os.remove(zip_filename) - raise Exception(f"❌ Upload failed: {e}") - -def create_dataset_zip(local_folder: str, zip_filename: str): - """Create zip file from local image folder""" - image_count = 0 - supported_formats = ('.jpg', '.jpeg', '.png', '.webp', '.bmp') - - with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf: - for filename in os.listdir(local_folder): - if filename.lower().endswith(supported_formats): - file_path = os.path.join(local_folder, filename) - if os.path.isfile(file_path): - zipf.write(file_path, filename) - image_count += 1 - print(f" Added: {filename}") - - print(f"Created dataset zip with {image_count} images") - return image_count - -def start_training(zip_url: str, trigger_word: str, **kwargs): - """ - Start LoRA training with uploaded dataset - - Args: - zip_url: URL to uploaded dataset zip - trigger_word: Token used during training - **kwargs: Additional training parameters - - Returns: - dict: Training request result - """ - import requests - - # Default training parameters - training_params = { - "image_zip": zip_url, - "trigger_word": trigger_word, - "steps": kwargs.get("steps", 1000), - "learning_rate": kwargs.get("learning_rate", 4e-4), - "rank": kwargs.get("rank", 32), - "alpha": kwargs.get("alpha", 32), - "resolution": kwargs.get("resolution", 1024) - } - - print("Starting LoRA training...") - print(f"Steps: {training_params['steps']}") - print(f"Resolution: {training_params['resolution']}x{training_params['resolution']}") - - try: - response = requests.post( - ENDPOINTS["train"], - headers={ - 'Authorization': f'Bearer {BEAM_TOKEN}', - 'Content-Type': 'application/json' - }, - json=training_params, - timeout=300 - ) + os.remove(zip_path) if response.status_code == 200: - print("Training started successfully!") - return {"status": "training_started", "params": training_params} + result = response.json() + print(f"Training started!") + return {"status": "success", "result": result, "trigger_word": trigger_word} else: - print(f"HTTP {response.status_code} - Check Beam dashboard") - return {"status": "request_sent", "params": training_params} - - except requests.exceptions.Timeout: - print("Request timed out - Training likely started") - return {"status": "timeout", "params": training_params} + print(f"Failed: {response.status_code} - {response.text}") + return {"status": "error", "message": response.text} + except Exception as e: - print(f"Request error: {e}") - return {"status": "error", "message": str(e)} + if os.path.exists(zip_path): + os.remove(zip_path) + raise Exception(f"Training failed: {e}") -def generate_image(prompt: str, trigger_word: str, **kwargs): - """ - Generate image using trained model - - Args: - prompt: Text description of desired image - trigger_word: Token used during training - **kwargs: Additional generation parameters +def generate(prompt: str, lora_name: str = None, width: int = 1024, height: int = 1024, steps: int = 20, guidance: float = 4.0, seed: int = None, lora_scale: float = 0.8): + """Generate image using requests like training""" - Returns: - dict: Generation result with base64 images - """ - import requests - import base64 - from PIL import Image - from io import BytesIO + print(f"Generating: '{prompt}'") - # Default generation parameters - gen_params = { - "prompt": prompt, - "trigger_word": trigger_word, - "width": kwargs.get("width", 512), - "height": kwargs.get("height", 512), - "num_inference_steps": kwargs.get("steps", 20), - "guidance_scale": kwargs.get("guidance_scale", 7.5), - "seed": kwargs.get("seed"), - "negative_prompt": kwargs.get("negative_prompt", ""), - "num_images": kwargs.get("num_images", 1) + params = { + 'prompt': prompt, + 'lora_name': lora_name, + 'width': width, + 'height': height, + 'steps': steps, + 'guidance': guidance, + 'seed': seed, + 'lora_scale': lora_scale } - - print(f"Generating image...") - print(f"Prompt: '{prompt}'") + params = {k: v for k, v in params.items() if v is not None} try: + print(f"Sending request (lora: {lora_name or 'None'})...") response = requests.post( - ENDPOINTS["inference"], - headers={ - 'Authorization': f'Bearer {BEAM_TOKEN}', - 'Content-Type': 'application/json' - }, - json=gen_params, - timeout=120 + GENERATE_FUNCTION, + headers={'Authorization': f'Bearer {BEAM_TOKEN}', 'Content-Type': 'application/json'}, + json=params, + timeout=600 ) if response.status_code == 200: result = response.json() - if result.get("status") == "success": - print("Image generated successfully!") + + if 'task_id' in result and 'image' not in result: + print(f"Task started: {result['task_id']}") + print("Waiting for task to complete...") + print("Image will be saved to Beam volume: ./flux-lora-data/") + print("Check your Beam dashboard for progress") + print("Task submitted successfully!") + print("Check 'beam ls volume-name' to see generated images") + return result + + if result.get('image'): + print("Saving image locally...") + img_data = base64.b64decode(result['image']) + img = Image.open(BytesIO(img_data)) - # Save first image locally - if result.get("images"): - image_data = base64.b64decode(result["images"][0]) - image = Image.open(BytesIO(image_data)) - - filename = f"generated_{int(time.time())}.png" - image.save(filename) - print(f"Saved: {filename}") + filename = f"generated_{lora_name or 'base'}_{int(time.time())}.png" + full_path = os.path.abspath(filename) + img.save(full_path) - return result + if os.path.exists(full_path): + print(f"Successfully saved locally: {full_path}") + + if result.get('url'): + print(f"Public URL: {result['url']}") + if result.get('settings'): + settings = result['settings'] + print(f"Settings: {settings['width']}x{settings['height']}, steps: {settings['steps']}") + else: + print(f"Failed to save file: {full_path}") else: - print(f"Generation failed: {result.get('message', 'Unknown error')}") - return result + print("No image data in response!") + print(f"Full response: {result}") + + return result else: - print(f"HTTP {response.status_code}: {response.text}") - return {"status": "error", "message": f"HTTP {response.status_code}"} + print(f"Failed: {response.status_code} - {response.text}") + return {"status": "error", "message": response.text} + except requests.exceptions.Timeout: + print("Request timed out after 10 minutes") + print("The generation might still be running on Beam") + print("Check your Beam dashboard for task status") + return {"status": "error", "message": "Request timed out"} except Exception as e: print(f"Generation failed: {e}") return {"status": "error", "message": str(e)} -def full_workflow(local_folder: str, trigger_word: str, test_prompt: str = None, **training_kwargs): - """ - Complete workflow: upload dataset, train model, and test generation - - Args: - local_folder: Path to training images - trigger_word: Token for your concept - test_prompt: Optional test prompt after training - **training_kwargs: Training parameters +def wait_for_task_completion(task_id: str, max_wait: int = 600, check_interval: int = 10): + """Wait for a task to complete using Beam SDK""" - Returns: - dict: Complete workflow results - """ - workflow_start = datetime.now() + print(f"Polling task {task_id} every {check_interval} seconds...") + start_time = time.time() - print("Starting complete LoRA workflow...") - print("="*50) + client = Client(token=BEAM_TOKEN) - try: - # Step 1: Upload dataset - print("STEP 1: Uploading dataset") - upload_result = upload_dataset(local_folder, trigger_word) - - # Step 2: Start training - print("\nSTEP 2: Starting training") - training_result = start_training( - upload_result["zip_url"], - trigger_word, - **training_kwargs - ) - - # Step 3: Optional test generation - if test_prompt: - print(f"\nSTEP 3: Testing with prompt: '{test_prompt}'") - # Note: In real usage, you'd wait for training to complete first - print("(Wait for training to complete before testing)") - - total_time = datetime.now() - workflow_start - - print("\nWorkflow completed!") - print(f"Total time: {str(total_time).split('.')[0]}") - - return { - "status": "success", - "upload": upload_result, - "training": training_result, - "total_time": str(total_time).split('.')[0] - } + while time.time() - start_time < max_wait: + try: + task_result = client.get_task_result(task_id) + + if task_result is not None: + print("Task completed successfully!") + return task_result + else: + print("Task still running...") + + except Exception as e: + print(f"Task running (checking via dashboard)... {int(time.time() - start_time)}s elapsed") - except Exception as e: - print(f"\nWorkflow failed: {e}") - return {"status": "error", "message": str(e)} + time.sleep(check_interval) + + print(f"Timeout waiting for task completion after {max_wait} seconds") + print("Check your Beam dashboard - the task may still be running") + return {"status": "timeout", "task_id": task_id} -# Usage examples -if __name__ == "__main__": - import sys +def wait_for_training(task_id: str, check_interval: int = 60): + """Check training status by polling task_id""" - # Update these before running - print("Remember to update BEAM_TOKEN and ENDPOINTS in this file!") + print(f"Training task: {task_id}") + print("Training typically takes 30-60 minutes depending on steps") + print("Check your Beam dashboard for real-time progress:") + print("https://dashboard.beam.cloud/") + print("Once training completes, the models will be saved to your volume") - if len(sys.argv) < 2: - print("Usage examples:") - print(" python upload.py upload ./images my_concept") - print(" python upload.py train ./images my_concept") - print(" python upload.py generate 'a photo of my_concept' my_concept") - print(" python upload.py workflow ./images my_concept 'test prompt'") - sys.exit(1) + return {"status": "info", "message": "Check Beam dashboard for progress", "task_id": task_id} + +def download_image(filename: str = None): + """Download the latest generated image from Beam volume""" - command = sys.argv[1] + if filename: + try: + print(f"Downloading {filename}...") + local_path = f"./downloaded_{filename}" + + import subprocess + result = subprocess.run([ + 'beam', 'cp', f'beam://flux-lora-data/{filename}', local_path + ], capture_output=True, text=True) + + if result.returncode == 0: + print(f"Downloaded: {local_path}") + return {"status": "success", "path": local_path} + else: + print(f"Download failed: {result.stderr}") + return {"status": "error", "message": result.stderr} + + except Exception as e: + print(f"Download failed: {e}") + return {"status": "error", "message": str(e)} + else: + print("Finding latest generated image...") + print("Use: beam ls volume-name") + print("Then: python client.py download --filename generated_xxx.png") + return {"status": "info", "message": "Specify filename to download"} + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="FLUX LoRA Training & Generation (Beam SDK)") + subparsers = parser.add_subparsers(dest="command", required=True) + + # --- Train Command --- + train_parser = subparsers.add_parser("train", help="Train a LoRA model.") + train_parser.add_argument("folder", help="Path to the folder with training images.") + train_parser.add_argument("trigger_word", help="The trigger word for the LoRA (e.g., 'iraTok').") + train_parser.add_argument("--steps", type=int, default=1650, help="Number of training steps (formula: n*100+350, where n=images).") + train_parser.add_argument("--lr", type=float, default=4e-4, help="Learning rate.") + train_parser.add_argument("--rank", type=int, default=32, help="LoRA rank.") + + # --- Generate Command --- + gen_parser = subparsers.add_parser("generate", help="Generate an image.") + gen_parser.add_argument("prompt", help="The text prompt for generation.") + gen_parser.add_argument("--lora", dest="lora_name", help="Name of the LoRA to use (your trigger_word).") + gen_parser.add_argument("--width", type=int, default=1024) + gen_parser.add_argument("--height", type=int, default=1024) + gen_parser.add_argument("--steps", type=int, default=35, help="Number of inference steps.") + gen_parser.add_argument("--guidance", type=float, default=3) + gen_parser.add_argument("--seed", type=int) + gen_parser.add_argument("--lora-scale", type=float, default=0.9, help="LoRA adapter strength (0.0-1.0)") + + + # --- Wait Command --- + wait_parser = subparsers.add_parser("wait", help="Check the status of a training task.") + wait_parser.add_argument("task_id", help="The task ID to check.") + + # --- Download Command --- + download_parser = subparsers.add_parser("download", help="Download the latest generated image from volume.") + download_parser.add_argument("--filename", help="Specific filename to download (optional)") + + args = parser.parse_args() - if command == "upload" and len(sys.argv) >= 4: - folder = sys.argv[2] - trigger = sys.argv[3] - result = upload_dataset(folder, trigger) - print("Result:", result) + if args.command == "train": + result = train( + folder_path=args.folder, + trigger_word=args.trigger_word, + steps=args.steps, + lr=args.lr, + rank=args.rank + ) + print(f"\nResult: {result}") - elif command == "train" and len(sys.argv) >= 4: - folder = sys.argv[2] - trigger = sys.argv[3] - upload_result = upload_dataset(folder, trigger) - training_result = start_training(upload_result["zip_url"], trigger) - print("Training result:", training_result) + elif args.command == "generate": + result = generate( + prompt=args.prompt, + lora_name=args.lora_name, + width=args.width, + height=args.height, + steps=args.steps, + guidance=args.guidance, + seed=args.seed, + lora_scale=args.lora_scale + ) + print(f"\nResult: {result}") - elif command == "generate" and len(sys.argv) >= 4: - prompt = sys.argv[2] - trigger = sys.argv[3] - result = generate_image(prompt, trigger) - print("Generation result:", result) + elif args.command == "wait": + result = wait_for_training(args.task_id) + print(f"\nResult: {result}") - elif command == "workflow" and len(sys.argv) >= 4: - folder = sys.argv[2] - trigger = sys.argv[3] - test_prompt = sys.argv[4] if len(sys.argv) > 4 else None - result = full_workflow(folder, trigger, test_prompt) - print("Workflow result:", result) + elif args.command == "download": + result = download_image(args.filename) + print(f"\nResult: {result}") else: - print("Invalid command or missing arguments") \ No newline at end of file + print("Invalid command. Use --help to see available commands.") + sys.exit(1)