codefuse-ai · fluoryyn-art · Dec 5, 2025
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/F2LLM/README.md b/F2LLM/README.md
@@ -1,6 +1,44 @@
-## F2LLM
+# F2LLM
 
-F2LLMs (Foundation-to-Feature Large Language Models) are foundation models directly finetuned on 6 million high-quality query-document pairs, striking a strong balance between model size, training cost, and embedding performance:
+F2LLM is a framework for converting decoder-only LLMs to embedding models.
+
+## LoRA Support
+
+F2LLM now supports Low-Rank Adaptation (LoRA) for efficient fine-tuning. This allows you to adapt base models with minimal parameter updates, significantly reducing computational costs and memory requirements.
+
+### Features
+
+- Support for LoRA with configurable rank (r), alpha, and dropout
+- Target module selection for LoRA adaptation
+- Full compatibility with existing training and inference pipelines
+- Easy model merging capabilities
+
+### Configuration
+
+To enable LoRA, set `use_lora: true` in your configuration file and specify the LoRA parameters:
+
+- `use_lora`: Enable LoRA (boolean)
+- `lora_r`: LoRA attention dimension (int, default: 8)
+- `lora_alpha`: LoRA scaling factor (int, default: 16)
+- `lora_dropout`: Dropout probability for LoRA layers (float, default: 0.05)
+- `lora_target_modules`: Target modules for LoRA (string, default: "all-linear")
+
+### Example Configuration
+
+See `config_lora_example.json` for a complete example of using LoRA with F2LLM.
+
+### Usage
+
+1. Install the required dependencies: `pip install peft`
+2. Update your config file to enable LoRA
+3. Run training as usual: `python run.py --config your_config.json`
+
+### Benefits
+
+- **Memory Efficiency**: Only train a small subset of parameters
+- **Computational Efficiency**: Faster training and lower GPU memory usage
+- **Modularity**: Multiple adapters can be applied to the same base model
+- **Compatibility**: Seamless integration with existing F2LLM pipeline
 
 <p align="center">
     <img src="imgs/overview.png" width="700"/>

diff --git a/F2LLM/arguments.py b/F2LLM/arguments.py
@@ -27,6 +27,12 @@ class Args:
     log_interval: int = 20
     checkpointing_steps: int = 100
     validation_steps: int = 100
+    # LoRA-specific arguments
+    use_lora: bool = False
+    lora_r: int = 8
+    lora_alpha: int = 16
+    lora_dropout: float = 0.05
+    lora_target_modules: str = "all-linear"  # Comma-separated list or "all-linear"
     # just placeholder, for logging purpose
     num_processes: int=0
 

diff --git a/F2LLM/config_lora_example.json b/F2LLM/config_lora_example.json
@@ -0,0 +1,25 @@
+{
+    "model_path": "models/qwen3-0.6b",
+    "experiment_id": "f2llm_lora_example",
+    "output_dir": "output",
+    "tb_dir": "tb_logs",
+    "cache_dir": "cache",
+    "train_data_path": "data_tokenized_qwen",
+    "train_batch_size": 4,
+    "max_seq_length": 1024,
+    "learning_rate": 1e-4,
+    "min_lr": 1e-6,
+    "weight_decay": 1e-2,
+    "warmup_steps": 100,
+    "num_hard_neg": 7,
+    "train_steps": 1000,
+    "train_epochs": 3,
+    "log_interval": 20,
+    "checkpointing_steps": 100,
+    "validation_steps": 100,
+    "use_lora": true,
+    "lora_r": 8,
+    "lora_alpha": 16,
+    "lora_dropout": 0.05,
+    "lora_target_modules": "all-linear"
+}
diff --git a/F2LLM/docs/lora_support.md b/F2LLM/docs/lora_support.md
@@ -0,0 +1,157 @@
+# LoRA Support in F2LLM
+
+## Overview
+
+Low-Rank Adaptation (LoRA) is a parameter-efficient fine-tuning technique that significantly reduces the number of trainable parameters while maintaining model performance. F2LLM provides built-in support for LoRA, allowing users to fine-tune large language models efficiently without requiring full model updates.
+
+## Key Benefits
+
+- **Memory Efficiency**: Dramatically reduces memory requirements during training
+- **Computational Efficiency**: Faster training with fewer parameters to update
+- **Storage Efficiency**: Smaller adapter files compared to full model checkpoints
+- **Modularity**: Easy to switch between different LoRA adapters for various tasks
+
+## Configuration
+
+LoRA can be enabled by setting the appropriate parameters in your configuration file or through command line arguments.
+
+### Configuration Parameters
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `use_lora` | bool | `false` | Enable or disable LoRA |
+| `lora_r` | int | `8` | The rank of the LoRA decomposition |
+| `lora_alpha` | int | `16` | Scaling factor for LoRA |
+| `lora_dropout` | float | `0.05` | Dropout rate applied to LoRA layers |
+| `lora_target_modules` | str | `"all-linear"` | Target modules to apply LoRA to |
+
+### Target Modules
+
+The `lora_target_modules` parameter specifies which layers to apply LoRA to:
+
+- **"all-linear"** (default): Applies LoRA to all linear projection layers including:
+  - `q_proj`: Query projections
+  - `v_proj`: Value projections
+  - `k_proj`: Key projections
+  - `o_proj`: Output projections
+  - `gate_proj`: Gate projections (in feed-forward networks)
+  - `up_proj`: Up projections (in feed-forward networks)
+  - `down_proj`: Down projections (in feed-forward networks)
+  - `lm_head`: Language model head
+
+- **Custom list**: Comma-separated module names (e.g., `"q_proj,v_proj"`)
+
+## Example Configuration
+
+```json
+{
+    "model_path": "models/qwen3-0.6b",
+    "experiment_id": "f2llm_lora_example",
+    "output_dir": "output",
+    "tb_dir": "tb_logs",
+    "cache_dir": "cache",
+    "train_data_path": "data_tokenized_qwen",
+    "train_batch_size": 4,
+    "max_seq_length": 1024,
+    "learning_rate": 1e-4,
+    "min_lr": 1e-6,
+    "weight_decay": 1e-2,
+    "warmup_steps": 100,
+    "num_hard_neg": 7,
+    "train_steps": 1000,
+    "train_epochs": 3,
+    "log_interval": 20,
+    "checkpointing_steps": 100,
+    "validation_steps": 100,
+    "use_lora": true,
+    "lora_r": 8,
+    "lora_alpha": 16,
+    "lora_dropout": 0.05,
+    "lora_target_modules": "all-linear"
+}
+```
+
+## Implementation Details
+
+### Model Initialization
+
+When `use_lora` is set to `true`, the model automatically applies LoRA during initialization in the `F2LLM.__init__()` method:
+
+1. The base model is loaded from the specified `model_path`
+2. LoRA configuration is created with the provided parameters
+3. The PEFT (Parameter-Efficient FineTuning) library applies the LoRA adapters
+
+### Parameter Efficiency
+
+With LoRA enabled, only a fraction of the model's parameters are trainable:
+
+- **Full model parameters**: All model weights
+- **Trainable parameters**: Only LoRA adapter weights and biases
+- **Memory savings**: Often 90%+ reduction in trainable parameters
+
+## Usage Examples
+
+### Training with LoRA
+
+1. Create a configuration file with LoRA enabled
+2. Run the training script:
+
+```bash
+python run.py --config config_lora_example.json
+```
+
+### Loading Models with LoRA Adapters
+
+Use the `lora_utils.py` module to load models with previously trained adapters:
+
+```python
+from lora_utils import load_model_with_lora
+
+model, tokenizer = load_model_with_lora(
+    base_model_path="path/to/base/model",
+    lora_adapter_path="path/to/lora/adapter"
+)
+```
+
+### Merging LoRA Weights
+
+To permanently merge LoRA weights with the base model:
+
+```python
+from lora_utils import merge_lora_weights
+
+merged_model = merge_lora_weights(model, save_path="path/to/merged/model")
+```
+
+## Utilities
+
+### lora_utils.py
+
+This module provides several utility functions for LoRA operations:
+
+- `load_model_with_lora()`: Load a base model with optional LoRA adapter
+- `merge_lora_weights()`: Merge LoRA weights with the base model
+- `get_lora_model_info()`: Get information about a LoRA model configuration
+- `count_parameters()`: Count model parameters (trainable vs total)
+
+## Best Practices
+
+1. **Start with default parameters**: Use r=8, alpha=16, dropout=0.05 as a starting point
+2. **Adjust r value**: Higher r values (16, 32) may improve performance but increase memory
+3. **Tune alpha**: Alpha/r ratio often around 2 is effective (e.g., r=8, alpha=16)
+4. **Monitor parameter count**: Check the trainable vs total parameter ratio during initialization
+5. **Use appropriate target modules**: "all-linear" covers most important layers, but task-specific modules might be more efficient
+
+## Troubleshooting
+
+### Common Issues
+
+- **PEFT library not found**: Install with `pip install peft`
+- **Memory issues**: Reduce LoRA rank (`lora_r`) to further decrease memory usage
+- **Performance degradation**: Try increasing `lora_r` or `lora_alpha` values
+
+### Performance Considerations
+
+- Lower ranks (r=4, 8) use less memory but may underperform
+- Higher ranks (r=32, 64) approach full fine-tuning performance but use more memory
+- The alpha/ratio is often kept around 2 for optimal performance
diff --git a/F2LLM/lora_utils.py b/F2LLM/lora_utils.py
@@ -0,0 +1,123 @@
+"""
+Utilities for LoRA (Low-Rank Adaptation) support in F2LLM.
+This module provides functions for loading LoRA models and converting between full and LoRA models.
+"""
+
+from transformers import AutoModel, AutoTokenizer
+from peft import PeftModel, LoraConfig, get_peft_model, TaskType
+import torch
+
+
+def load_model_with_lora(base_model_path, lora_adapter_path=None, **lora_kwargs):
+    """
+    Load a base model with optional LoRA adapter.
+
+    Args:
+        base_model_path (str): Path to the base model
+        lora_adapter_path (str, optional): Path to the LoRA adapter
+        **lora_kwargs: Additional LoRA configuration arguments
+
+    Returns:
+        tuple: (model, tokenizer)
+    """
+    # Load the base model
+    model = AutoModel.from_pretrained(
+        base_model_path, 
+        trust_remote_code=True, 
+        torch_dtype=torch.bfloat16,
+        attn_implementation='flash_attention_2'
+    )
+    model.config.use_cache = False
+
+    tokenizer = AutoTokenizer.from_pretrained(base_model_path)
+
+    # Apply LoRA if adapter path is provided
+    if lora_adapter_path:
+        model = PeftModel.from_pretrained(model, lora_adapter_path)
+        print(f"Loaded LoRA adapter from {lora_adapter_path}")
+    elif lora_kwargs:  # Apply new LoRA if configuration is provided
+        target_modules = lora_kwargs.get("target_modules", "all-linear")
+        if target_modules == "all-linear":
+            target_modules = [
+                "q_proj", "v_proj", "k_proj", "o_proj",
+                "gate_proj", "up_proj", "down_proj",
+                "lm_head"
+            ]
+        elif isinstance(target_modules, str):
+            target_modules = [module.strip() for module in target_modules.split(",")]
+
+        lora_config = LoraConfig(
+            task_type=TaskType.FEATURE_EXTRACTION,
+            r=lora_kwargs.get("lora_r", 8),
+            lora_alpha=lora_kwargs.get("lora_alpha", 16),
+            target_modules=target_modules,
+            lora_dropout=lora_kwargs.get("lora_dropout", 0.05),
+            bias="none",
+        )
+
+        model = get_peft_model(model, lora_config)
+        print(f"Applied LoRA with config: {lora_config}")
+
+    return model, tokenizer
+
+
+def merge_lora_weights(model, save_path=None):
+    """
+    Merge LoRA weights with the base model.
+
+    Args:
+        model: PEFT model with LoRA
+        save_path (str, optional): Path to save the merged model
+
+    Returns:
+        Merged model
+    """
+    if hasattr(model, 'merge_and_unload'):
+        merged_model = model.merge_and_unload()
+        if save_path:
+            merged_model.save_pretrained(save_path)
+        return merged_model
+    else:
+        raise ValueError("Model does not support merging. Make sure it's a PEFT model.")
+
+
+def get_lora_model_info(model):
+    """
+    Get information about a LoRA model.
+
+    Args:
+        model: PEFT model with LoRA
+
+    Returns:
+        dict: Information about the model's LoRA configuration
+    """
+    if hasattr(model, 'peft_config'):
+        info = {}
+        for adapter_name, config in model.peft_config.items():
+            info[adapter_name] = {
+                'r': config.r,
+                'alpha': config.lora_alpha,
+                'dropout': config.lora_dropout,
+                'target_modules': config.target_modules,
+                'bias': config.bias,
+            }
+        return info
+    else:
+        return {"message": "Model does not have LoRA configuration"}
+
+
+def count_parameters(model, only_trainable=False):
+    """
+    Count the number of parameters in the model.
+
+    Args:
+        model: PyTorch model
+        only_trainable (bool): Whether to count only trainable parameters
+
+    Returns:
+        int: Number of parameters
+    """
+    if only_trainable:
+        return sum(p.numel() for p in model.parameters() if p.requires_grad)
+    else:
+        return sum(p.numel() for p in model.parameters())