diff --git a/tools-py/.gitignore b/tools-py/.gitignore
new file mode 100644
index 0000000..5b6f473
--- /dev/null
+++ b/tools-py/.gitignore
@@ -0,0 +1,52 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+venv/
+ENV/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual environments
+venv/
+ENV/
+env/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# Output files
+drafts/
+*.yml.bak
+*.yaml.bak
+
+# Generated reports
+missing_models_report.txt
+*_report.txt
+
+# Logs
+*.log
+
+# OS
+.DS_Store
+Thumbs.db
\ No newline at end of file
diff --git a/tools-py/README.md b/tools-py/README.md
new file mode 100644
index 0000000..b08cb92
--- /dev/null
+++ b/tools-py/README.md
@@ -0,0 +1,444 @@
+# Model Openness Tool - Python Tools
+
+Automated tools for working with the Model Openness Framework (MOF), including scraping model information from HuggingFace and identifying missing models.
+
+## Tools Included
+
+### 1. Model Scraper (`model_scraper.py`)
+Generates draft YAML files for individual models by scraping HuggingFace.
+
+### 2. Missing Models Finder (`find_missing_models.py`)
+Identifies popular HuggingFace models that are not yet in the MOT database.
+
+## Overview
+
+These tools automate data collection and gap analysis for model evaluation by:
+- Fetching model metadata from HuggingFace API
+- Analyzing repository contents to detect available MOF components
+- Identifying license information
+- Generating MOF-compliant YAML files with confidence scores
+- Finding missing models that should be added to MOT
+- Flagging areas requiring manual review
+
+**⚠️ Important**: Generated YAML files are **DRAFTS** that require manual review and validation before submission to the MOT database.
+
+## Installation
+
+### Prerequisites
+- Python 3.8 or higher
+- pip package manager
+
+### Setup
+
+1. Navigate to the tools-py directory:
+```bash
+cd tools-py
+```
+
+2. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+
+## Usage
+
+### Tool 1: Model Scraper
+
+#### Basic Usage
+
+Scrape a model from HuggingFace:
+```bash
+python model_scraper.py <model_id>
+```
+
+Example:
+```bash
+python model_scraper.py meta-llama/Llama-3-8B
+```
+
+#### Advanced Options
+
+Specify output directory:
+```bash
+python model_scraper.py meta-llama/Llama-3-8B --output-dir ../models
+```
+
+Use HuggingFace token for gated models:
+```bash
+python model_scraper.py meta-llama/Llama-3-8B --hf-token YOUR_TOKEN
+```
+
+### Tool 2: Missing Models Finder
+
+#### Basic Usage
+
+Find missing models with default settings (min 1000 downloads):
+```bash
+python find_missing_models.py
+```
+
+#### Advanced Options
+
+Set minimum download threshold:
+```bash
+python find_missing_models.py --min-downloads 10000
+```
+
+Limit number of models to check:
+```bash
+python find_missing_models.py --limit 500
+```
+
+Save report to file:
+```bash
+python find_missing_models.py --output missing_models_report.txt
+```
+
+Filter by model type:
+```bash
+python find_missing_models.py --model-type text-generation
+```
+
+#### Command-Line Arguments
+
+- `--min-downloads`: Minimum number of downloads to consider (default: 1000)
+- `--limit`: Maximum number of models to fetch from HuggingFace (default: 1000)
+- `--models-dir`: Path to MOT models directory (default: ../models)
+- `--output`: Output file for report (default: print to console)
+- `--model-type`: Filter by model type (e.g., text-generation, image-to-text)
+
+#### Example Workflow
+
+**Option A: Manual Workflow (Selective)**
+
+1. Find missing high-priority models:
+```bash
+python find_missing_models.py --min-downloads 50000 --output report.txt
+```
+
+2. Review the report and identify models to add
+
+3. Scrape the identified models:
+```bash
+python model_scraper.py meta-llama/Llama-3-8B
+python model_scraper.py mistralai/Mistral-7B-v0.1
+```
+
+4. Review and validate the generated YAML files
+
+5. Submit PRs to add models to MOT
+
+**Option B: Batch Processing (Automated)**
+
+Use the batch script to automatically find and scrape missing models:
+
+```bash
+./batch_scrape_missing.sh
+```
+
+This will:
+1. Find missing models (default: 50k+ downloads)
+2. Extract model IDs from the report
+3. Prompt for confirmation
+4. Scrape first 20 models automatically
+5. Generate summary with success/failure counts
+
+**Customize batch processing:**
+
+```bash
+# Set minimum downloads threshold
+MIN_DOWNLOADS=100000 ./batch_scrape_missing.sh
+
+# Process more models (default is 20)
+MAX_MODELS=50 ./batch_scrape_missing.sh
+
+# Process ALL missing models (use with caution!)
+MAX_MODELS=999 ./batch_scrape_missing.sh
+
+# Use HuggingFace token for gated models
+HF_TOKEN=your_token ./batch_scrape_missing.sh
+
+# Change output directory
+OUTPUT_DIR=./draft_models ./batch_scrape_missing.sh
+
+# Combine multiple options
+MIN_DOWNLOADS=100000 MAX_MODELS=50 HF_TOKEN=your_token ./batch_scrape_missing.sh
+```
+
+**Environment Variables:**
+- `MIN_DOWNLOADS`: Minimum download threshold (default: 50000)
+- `LIMIT`: Max models to check in HuggingFace (default: 1000)
+- `MAX_MODELS`: Max models to scrape in one batch (default: 20)
+- `OUTPUT_DIR`: Output directory for YAML files (default: ../models)
+- `HF_TOKEN`: HuggingFace API token for gated models (optional)
+
+**Note:** The default limit of 20 models is a safety measure. Each model takes ~2-5 seconds to scrape, so 20 models = ~1-2 minutes. Increase MAX_MODELS carefully based on your needs.
+
+### Advanced Options
+
+Specify output directory:
+```bash
+python model_scraper.py meta-llama/Llama-3-8B --output-dir ../models
+```
+
+Use HuggingFace token for gated models:
+```bash
+python model_scraper.py meta-llama/Llama-3-8B --hf-token YOUR_TOKEN
+```
+
+### Command-Line Arguments
+
+- `model_id` (required): HuggingFace model ID (e.g., `meta-llama/Llama-3-8B`)
+- `--output-dir`: Output directory for YAML files (default: `../models`)
+- `--hf-token`: HuggingFace API token for accessing gated models
+
+## What the Scraper Does
+
+### 1. Data Collection
+- Fetches model metadata from HuggingFace API
+- Downloads and parses model card (README.md)
+- Lists repository files to detect available components
+- Extracts license information
+
+### 2. Component Detection
+
+The scraper automatically detects the following MOF components:
+
+**Code Components:**
+- Model parameters (Final) - Detects `.bin`, `.safetensors`, `.pt`, `.pth`, `.ckpt` files
+- Model metadata - Detects `config.json`, `model_config.json`
+- Model architecture - Detects Python files with modeling code
+- Inference code - Detects files with inference/generation keywords
+
+**Data Components:**
+- Training dataset - Detects references in model card
+
+**Documentation Components:**
+- Model card - Detects README.md
+- Technical report - Detects references in model card
+- Research paper - Detects paper/arxiv references
+- Evaluation results - Detects benchmark/performance mentions
+
+### 3. License Detection
+- Extracts license from HuggingFace model metadata
+- Checks for LICENSE files in repository
+- Defaults to "unlicensed" when uncertain (requires manual review)
+
+### 4. Repository Detection
+
+The scraper automatically detects GitHub repositories using multiple strategies with confidence scoring:
+
+**Detection Methods (in priority order):**
+
+1. **Model Card Parsing** (70-90% confidence)
+   - Searches the model card (README.md) for GitHub URLs
+   - Filters for the most relevant repository (matching model/organization name)
+   - Higher confidence when repository name closely matches model name
+   - Example: Found `https://github.com/bigscience-workshop/bigscience` for BLOOM model
+
+2. **Pattern-Based Inference** (60-65% confidence)
+   - Attempts direct mapping: `organization/model` → `github.com/organization/model`
+   - Validates repository existence via HTTP HEAD request
+   - Example: `mistralai/Mistral-7B-v0.1` → `github.com/mistralai/Mistral-7B-v0.1`
+
+3. **Name Variations** (60% confidence)
+   - Tries base model names without version suffixes
+   - Tests multiple naming patterns (e.g., `Mistral-7B-v0.1` → `Mistral`)
+   - Validates each attempt before accepting
+
+**Output Format:**
+```yaml
+# Repository detected: 70% confidence
+release:
+  repository: https://github.com/bigscience-workshop/bigscience
+```
+
+**When No Repository Found:**
+- Repository field left empty (requires manual addition)
+- Common for models without public code repositories
+- May indicate closed-source or proprietary models
+
+**Manual Review Required:**
+- Verify detected repository is correct and official
+- Check if repository contains actual model code/weights
+- Some models may have multiple repositories (training vs. inference)
+
+### 5. Confidence Scoring
+
+Each detected component includes a confidence score:
+- **95%**: High confidence (e.g., model parameters detected via file extensions)
+- **80-90%**: Good confidence (e.g., config files, model card)
+- **60-75%**: Medium confidence (e.g., references in documentation)
+- **50%**: Low confidence (requires verification)
+
+## Output Format
+
+Generated YAML files include:
+- MOF framework metadata
+- Model release information (name, version, date, producer)
+- Detected components with descriptions and licenses
+- Confidence scores in comments
+- Links to HuggingFace repository
+
+Example output structure:
+```yaml
+# AUTO-GENERATED DRAFT - REQUIRES MANUAL REVIEW
+# Generated by Model Openness Tool scraper
+# Source: HuggingFace model meta-llama/Llama-3-8B
+# 
+# Component confidence scores:
+#   - Model parameters (Final): 95% confidence
+#   - Model card: 95% confidence
+# 
+framework:
+  name: Model Openness Framework
+  version: '1.0'
+  date: '2024-12-15'
+release:
+  name: Llama-3-8B
+  version: 8B
+  date: '2024-10-03'
+  license: {}
+  type: language
+  architecture: transformer decoder
+  origin: llama-3-8b
+  producer: Meta
+  contact: ''
+  huggingface: https://huggingface.co/meta-llama/Llama-3-8B
+  components:
+    - name: Model parameters (Final)
+      description: Trained model parameters, weights and biases
+      license: llama-3
+```
+
+## Manual Review Checklist
+
+After generating a YAML file, you **must** review and verify:
+
+### 1. Model Metadata
+- [ ] Verify model name and version
+- [ ] Confirm producer/organization
+- [ ] Check release date accuracy
+- [ ] Validate model type (language, vision, multimodal, etc.)
+- [ ] Verify architecture classification
+
+### 2. Components
+- [ ] Confirm all detected components are actually available
+- [ ] Add any missing components not detected by scraper
+- [ ] Verify component descriptions are accurate
+- [ ] Update component locations/URLs if needed
+
+### 3. Licenses
+- [ ] **Critical**: Verify all license information
+- [ ] Check if licenses are correctly identified as open/closed
+- [ ] Add license file paths where available
+- [ ] Replace "unlicensed" with actual license names
+- [ ] Ensure license compatibility across components
+
+### 4. Additional Information
+- [ ] Add contact information if available
+- [ ] Add GitHub repository URL if different from HuggingFace
+- [ ] Include paper URLs (arXiv, conference proceedings)
+- [ ] Add any special notes or caveats
+
+## Validation
+
+After manual review, validate the YAML file:
+
+```bash
+cd ..
+php scripts/validate-model.php models/Your-Model.yml
+```
+
+The validation script checks:
+- Schema compliance with `schema/mof_schema.json`
+- Required fields are present
+- Data types are correct
+- Enum values are valid
+
+## Submission Workflow
+
+1. **Generate draft**: Run the scraper
+2. **Manual review**: Edit the generated YAML file
+3. **Validate**: Run validation script
+4. **Test locally**: Import into local MOT instance
+5. **Submit PR**: Create pull request to add model to MOT
+
+See [CONTRIBUTING.md](../CONTRIBUTING.md) for detailed submission instructions.
+
+## Limitations
+
+### Current Limitations
+- Only supports HuggingFace as a data source
+- Cannot access gated models without API token
+- License detection is basic (requires manual verification)
+- Cannot determine if training code/data is actually available
+- May miss components not clearly documented
+- Cannot validate license openness automatically
+
+### Known Issues
+- Some model cards use non-standard formats
+- License information may be incomplete or ambiguous
+- Component availability may be overstated
+- Confidence scores are heuristic-based
+
+## Future Enhancements
+
+Potential improvements:
+- [ ] Support for additional sources (GitHub, Papers with Code, etc.)
+- [ ] LLM-powered content analysis for better component detection
+- [ ] Automated license file parsing and classification
+- [ ] Integration with license databases (SPDX, OSI)
+- [ ] Batch processing for multiple models
+- [ ] Interactive mode for guided review
+- [ ] Comparison with existing MOT entries
+- [ ] GitHub API integration for repository analysis
+
+## Troubleshooting
+
+### Common Issues
+
+**"Failed to scrape model data"**
+- Check model ID is correct (format: `organization/model-name`)
+- Verify internet connection
+- For gated models, provide HuggingFace token with `--hf-token`
+
+**"Model not found"**
+- Ensure model exists on HuggingFace
+- Check for typos in model ID
+- Some models may be private or deleted
+
+**"Permission denied"**
+- Gated models require authentication
+- Get HuggingFace token from https://huggingface.co/settings/tokens
+- Use `--hf-token` argument
+
+**Low confidence scores**
+- Normal for models with minimal documentation
+- Requires more thorough manual review
+- Consider contacting model producer for clarification
+
+## Contributing
+
+To improve the scraper:
+1. Fork the repository
+2. Make your changes
+3. Test with various models
+4. Submit a pull request
+
+See [CONTRIBUTING.md](../CONTRIBUTING.md) for guidelines.
+
+## License
+
+This scraper is part of the Model Openness Tool, licensed under MIT.
+
+## Support
+
+For issues or questions:
+- File an issue on GitHub
+- Check existing issues for solutions
+- Consult the main MOT documentation
+
+## Acknowledgments
+
+This scraper was developed to accelerate the MOT model evaluation process while maintaining the quality and accuracy standards of the Model Openness Framework.
\ No newline at end of file
diff --git a/tools-py/batch_scrape_missing.sh b/tools-py/batch_scrape_missing.sh
new file mode 100755
index 0000000..f0445eb
--- /dev/null
+++ b/tools-py/batch_scrape_missing.sh
@@ -0,0 +1,140 @@
+#!/bin/bash
+
+# Batch Scrape Missing Models
+# This script finds missing models and generates draft YAML files for each
+
+set -e  # Exit on error
+
+# Configuration
+MIN_DOWNLOADS=${MIN_DOWNLOADS:-50000}  # Default: 50k downloads
+LIMIT=${LIMIT:-1000}                   # Default: check 1000 models
+MAX_MODELS=${MAX_MODELS:-20}           # Default: process 20 models (safety limit)
+OUTPUT_DIR=${OUTPUT_DIR:-../models}    # Default: ../models directory
+REPORT_FILE="missing_models_report.txt"
+HF_TOKEN=${HF_TOKEN:-""}              # Optional HuggingFace token
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+echo -e "${BLUE}========================================${NC}"
+echo -e "${BLUE}Batch Scrape Missing Models${NC}"
+echo -e "${BLUE}========================================${NC}"
+echo ""
+
+# Step 1: Find missing models
+echo -e "${YELLOW}Step 1: Finding missing models...${NC}"
+echo "  Min downloads: $MIN_DOWNLOADS"
+echo "  Limit: $LIMIT models"
+echo ""
+
+python find_missing_models.py \
+    --min-downloads "$MIN_DOWNLOADS" \
+    --limit "$LIMIT" \
+    --output "$REPORT_FILE"
+
+if [ ! -f "$REPORT_FILE" ]; then
+    echo -e "${RED}Error: Report file not generated${NC}"
+    exit 1
+fi
+
+# Step 2: Extract model IDs from report
+echo ""
+echo -e "${YELLOW}Step 2: Extracting model IDs from report...${NC}"
+
+# Extract lines with model IDs (format: "  org/model-name    downloads")
+# Look for lines starting with spaces followed by org/model pattern
+MODEL_IDS=$(grep -E "^  [a-zA-Z0-9_-]+/[a-zA-Z0-9_.-]+" "$REPORT_FILE" | \
+    awk '{print $1}' | \
+    head -n "$MAX_MODELS")
+
+MODEL_COUNT=$(echo "$MODEL_IDS" | wc -l | tr -d ' ')
+
+if [ -z "$MODEL_IDS" ]; then
+    echo -e "${RED}No missing models found in report${NC}"
+    exit 0
+fi
+
+echo "  Found $MODEL_COUNT models to scrape"
+echo ""
+
+# Step 3: Confirm with user
+echo -e "${YELLOW}Models to scrape:${NC}"
+echo "$MODEL_IDS" | nl
+echo ""
+echo -e "${YELLOW}This will generate $MODEL_COUNT YAML files in: $OUTPUT_DIR${NC}"
+read -p "Continue? (y/n) " -n 1 -r
+echo ""
+
+if [[ ! $REPLY =~ ^[Yy]$ ]]; then
+    echo "Aborted by user"
+    exit 0
+fi
+
+# Step 4: Scrape each model
+echo ""
+echo -e "${YELLOW}Step 3: Scraping models...${NC}"
+echo ""
+
+SUCCESS_COUNT=0
+FAIL_COUNT=0
+FAILED_MODELS=()
+
+# Create output directory if it doesn't exist
+mkdir -p "$OUTPUT_DIR"
+
+# Loop through each model
+CURRENT=0
+while IFS= read -r MODEL_ID; do
+    CURRENT=$((CURRENT + 1))
+    echo -e "${BLUE}[$CURRENT/$MODEL_COUNT] Scraping: $MODEL_ID${NC}"
+    
+    # Build command with optional token
+    CMD="python model_scraper.py \"$MODEL_ID\" --output-dir \"$OUTPUT_DIR\""
+    if [ -n "$HF_TOKEN" ]; then
+        CMD="$CMD --hf-token \"$HF_TOKEN\""
+    fi
+    
+    # Run scraper
+    if eval "$CMD"; then
+        echo -e "${GREEN}  ✓ Success${NC}"
+        SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
+    else
+        echo -e "${RED}  ✗ Failed${NC}"
+        FAIL_COUNT=$((FAIL_COUNT + 1))
+        FAILED_MODELS+=("$MODEL_ID")
+    fi
+    
+    echo ""
+    
+    # Small delay to avoid rate limiting
+    sleep 1
+done <<< "$MODEL_IDS"
+
+# Step 5: Summary
+echo ""
+echo -e "${BLUE}========================================${NC}"
+echo -e "${BLUE}Summary${NC}"
+echo -e "${BLUE}========================================${NC}"
+echo -e "${GREEN}Successful: $SUCCESS_COUNT${NC}"
+echo -e "${RED}Failed: $FAIL_COUNT${NC}"
+echo ""
+
+if [ $FAIL_COUNT -gt 0 ]; then
+    echo -e "${RED}Failed models:${NC}"
+    for MODEL in "${FAILED_MODELS[@]}"; do
+        echo "  - $MODEL"
+    done
+    echo ""
+fi
+
+echo -e "${YELLOW}Next steps:${NC}"
+echo "1. Review generated YAML files in: $OUTPUT_DIR"
+echo "2. Manually verify and edit each file"
+echo "3. Validate with: php ../scripts/validate-model.php models/Your-Model.yml"
+echo "4. Submit PRs for reviewed models"
+echo ""
+echo "Report saved to: $REPORT_FILE"
diff --git a/tools-py/example_usage.sh b/tools-py/example_usage.sh
new file mode 100755
index 0000000..b3907be
--- /dev/null
+++ b/tools-py/example_usage.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+# Example usage of the Model Openness Tool Python scripts
+
+echo "Model Openness Tool - Python Scripts Examples"
+echo "=============================================="
+echo ""
+
+# Example 1: Find missing models
+echo "Example 1: Find missing models from HuggingFace"
+echo "------------------------------------------------"
+echo "Command: python find_missing_models.py --min-downloads 10000"
+echo ""
+echo "This will:"
+echo "  - Compare HuggingFace models with MOT database"
+echo "  - Identify popular models not yet in MOT"
+echo "  - Generate a prioritized report"
+echo ""
+# Uncomment to run:
+# python find_missing_models.py --min-downloads 10000
+
+# Example 2: Save missing models report to file
+echo "Example 2: Save missing models report"
+echo "--------------------------------------"
+echo "Command: python find_missing_models.py --output missing_report.txt"
+echo ""
+# Uncomment to run:
+# python find_missing_models.py --output missing_report.txt
+
+# Example 3: Basic model scraping
+echo "Example 3: Scraping a single model"
+echo "-----------------------------------"
+echo "Command: python model_scraper.py google/gemma-2b"
+echo ""
+# Uncomment to run:
+# python model_scraper.py google/gemma-2b
+
+# Example 4: Scraping with custom output directory
+echo "Example 4: Custom output directory"
+echo "-----------------------------------"
+echo "Command: python model_scraper.py microsoft/phi-2 --output-dir ./drafts"
+echo ""
+# Uncomment to run:
+# python model_scraper.py microsoft/phi-2 --output-dir ./drafts
+
+# Example 5: Scraping a gated model (requires token)
+echo "Example 5: Gated model (requires HuggingFace token)"
+echo "----------------------------------------------------"
+echo "Command: python model_scraper.py meta-llama/Llama-3-8B --hf-token YOUR_TOKEN"
+echo ""
+echo "Get your token from: https://huggingface.co/settings/tokens"
+echo ""
+# Uncomment and add your token to run:
+# python model_scraper.py meta-llama/Llama-3-8B --hf-token YOUR_TOKEN
+
+# Example 6: Complete workflow
+echo "Example 6: Complete workflow (find missing + scrape)"
+echo "-----------------------------------------------------"
+echo "Step 1: Find missing models"
+echo "  python find_missing_models.py --min-downloads 50000 --output report.txt"
+echo ""
+echo "Step 2: Review report.txt and identify models to add"
+echo ""
+echo "Step 3: Scrape identified models"
+echo "  python model_scraper.py meta-llama/Llama-3-8B"
+echo "  python model_scraper.py mistralai/Mistral-7B-v0.1"
+echo ""
+echo "Step 4: Review and validate generated YAML files"
+echo "  php ../scripts/validate-model.php ../models/Llama-3-8B.yml"
+echo ""
+echo "Step 5: Submit PR to add models to MOT"
+echo ""
+
+# Example 7: Batch processing multiple models
+echo "Example 7: Batch processing"
+echo "----------------------------"
+echo "Processing multiple models in sequence:"
+echo ""
+
+models=(
+    "google/gemma-2b"
+    "microsoft/phi-2"
+    "mistralai/Mistral-7B-v0.1"
+)
+
+for model in "${models[@]}"; do
+    echo "  - $model"
+done
+echo ""
+echo "To run batch processing, uncomment the loop below in this script"
+echo ""
+
+# Uncomment to run batch processing:
+# for model in "${models[@]}"; do
+#     echo "Processing: $model"
+#     python model_scraper.py "$model"
+#     echo ""
+# done
+
+echo "=============================================="
+echo "Summary of available tools:"
+echo "  1. find_missing_models.py - Identify models to add"
+echo "  2. model_scraper.py - Generate draft YAML files"
+echo ""
+echo "Next steps after scraping:"
+echo "  1. Review generated YAML files in ../models/"
+echo "  2. Manually verify and edit the files"
+echo "  3. Validate: php ../scripts/validate-model.php ../models/YourModel.yml"
+echo "  4. Submit PR to add to MOT database"
+echo ""
diff --git a/tools-py/find_missing_models.py b/tools-py/find_missing_models.py
new file mode 100755
index 0000000..1a76871
--- /dev/null
+++ b/tools-py/find_missing_models.py
@@ -0,0 +1,450 @@
+#!/usr/bin/env python3
+"""
+Model Openness Tool - Missing Models Finder
+
+This script identifies models on HuggingFace that are not yet in the MOT database.
+It compares popular/trending models from HuggingFace against existing MOT YAML files.
+
+Usage:
+    python find_missing_models.py [--min-downloads MIN] [--limit LIMIT] [--output OUTPUT]
+    
+Example:
+    python find_missing_models.py --min-downloads 10000 --limit 500
+"""
+
+import argparse
+import json
+import os
+import re
+import sys
+from pathlib import Path
+from typing import Dict, List, Optional, Set, Tuple
+from urllib.parse import quote
+
+import requests
+import yaml
+
+
+class MissingModelsFinder:
+    """Finds models on HuggingFace that are missing from MOT."""
+    
+    def __init__(self, models_dir: str = "../models"):
+        """Initialize the finder.
+        
+        Args:
+            models_dir: Path to MOT models directory
+        """
+        self.models_dir = Path(models_dir)
+        self.session = requests.Session()
+        self.session.headers.update({
+            'User-Agent': 'MOT-Missing-Models-Finder/1.0'
+        })
+    
+    def get_mot_models(self) -> Dict[str, Dict]:
+        """Get all models currently in MOT database.
+        
+        Returns:
+            Dictionary mapping model names/IDs to their metadata
+        """
+        print("Loading existing MOT models...")
+        mot_models = {}
+        
+        if not self.models_dir.exists():
+            print(f"Warning: Models directory not found: {self.models_dir}")
+            return mot_models
+        
+        yaml_files = list(self.models_dir.glob("*.yml"))
+        print(f"Found {len(yaml_files)} YAML files in MOT database")
+        
+        for yaml_file in yaml_files:
+            try:
+                with open(yaml_file, 'r', encoding='utf-8') as f:
+                    data = yaml.safe_load(f)
+                
+                if not data or 'release' not in data:
+                    continue
+                
+                release = data['release']
+                model_name = release.get('name', '')
+                origin = release.get('origin', '')
+                huggingface = release.get('huggingface', '')
+                
+                # Store multiple identifiers for matching
+                identifiers = set()
+                if model_name:
+                    identifiers.add(model_name.lower())
+                    identifiers.add(model_name.lower().replace('-', '_'))
+                    identifiers.add(model_name.lower().replace('_', '-'))
+                if origin:
+                    identifiers.add(origin.lower())
+                if huggingface:
+                    # Extract model ID from HuggingFace URL
+                    hf_id = huggingface.replace('https://huggingface.co/', '')
+                    identifiers.add(hf_id.lower())
+                
+                mot_models[yaml_file.stem] = {
+                    'name': model_name,
+                    'origin': origin,
+                    'huggingface': huggingface,
+                    'identifiers': identifiers,
+                    'file': yaml_file.name
+                }
+                
+            except Exception as e:
+                print(f"Warning: Error reading {yaml_file.name}: {e}")
+                continue
+        
+        print(f"Loaded {len(mot_models)} models from MOT database\n")
+        return mot_models
+    
+    def get_huggingface_models(
+        self,
+        min_downloads: int = 1000,
+        limit: int = 1000,
+        model_type: Optional[str] = None
+    ) -> List[Dict]:
+        """Get popular models from HuggingFace.
+        
+        Args:
+            min_downloads: Minimum number of downloads to consider
+            limit: Maximum number of models to fetch
+            model_type: Filter by model type (e.g., 'text-generation')
+            
+        Returns:
+            List of model dictionaries
+        """
+        print(f"Fetching models from HuggingFace (min downloads: {min_downloads:,})...")
+        
+        models = []
+        page = 0
+        
+        while len(models) < limit:
+            # HuggingFace API endpoint for models
+            url = "https://huggingface.co/api/models"
+            params = {
+                'sort': 'downloads',
+                'direction': -1,
+                'limit': 100,
+                'skip': page * 100,
+                'full': True
+            }
+            
+            if model_type:
+                params['filter'] = model_type
+            
+            try:
+                response = self.session.get(url, params=params, timeout=30)
+                response.raise_for_status()
+                batch = response.json()
+                
+                if not batch:
+                    break
+                
+                for model in batch:
+                    downloads = model.get('downloads', 0)
+                    if downloads >= min_downloads:
+                        models.append(model)
+                    
+                    if len(models) >= limit:
+                        break
+                
+                page += 1
+                print(f"  Fetched {len(models)} models so far...", end='\r')
+                
+            except requests.exceptions.RequestException as e:
+                print(f"\nError fetching models: {e}")
+                break
+        
+        print(f"\nFetched {len(models)} models from HuggingFace\n")
+        return models
+    
+    def normalize_model_id(self, model_id: str) -> Set[str]:
+        """Generate normalized variations of a model ID for matching.
+        
+        Args:
+            model_id: Model ID (e.g., 'meta-llama/Llama-3-8B')
+            
+        Returns:
+            Set of normalized variations
+        """
+        variations = set()
+        
+        # Original
+        variations.add(model_id.lower())
+        
+        # Without organization prefix
+        if '/' in model_id:
+            model_name = model_id.split('/')[-1]
+            variations.add(model_name.lower())
+            variations.add(model_name.lower().replace('-', '_'))
+            variations.add(model_name.lower().replace('_', '-'))
+        
+        # With underscores/hyphens swapped
+        variations.add(model_id.lower().replace('-', '_'))
+        variations.add(model_id.lower().replace('_', '-'))
+        
+        return variations
+    
+    def is_model_in_mot(self, hf_model: Dict, mot_models: Dict) -> Tuple[bool, str]:
+        """Check if a HuggingFace model is already in MOT.
+        
+        Args:
+            hf_model: HuggingFace model dictionary
+            mot_models: Dictionary of MOT models
+            
+        Returns:
+            Tuple of (is_present, matched_file)
+        """
+        model_id = hf_model.get('id', '')
+        variations = self.normalize_model_id(model_id)
+        
+        for mot_file, mot_data in mot_models.items():
+            mot_identifiers = mot_data.get('identifiers', set())
+            
+            # Check if any variation matches
+            if variations & mot_identifiers:
+                return True, mot_data['file']
+        
+        return False, ''
+    
+    def categorize_missing_models(
+        self, 
+        missing_models: List[Dict]
+    ) -> Dict[str, List[Dict]]:
+        """Categorize missing models by type and popularity.
+        
+        Args:
+            missing_models: List of missing model dictionaries
+            
+        Returns:
+            Dictionary of categorized models
+        """
+        categories = {
+            'high_priority': [],      # >100k downloads
+            'medium_priority': [],    # 10k-100k downloads
+            'low_priority': [],       # <10k downloads
+            'by_type': {}
+        }
+        
+        for model in missing_models:
+            downloads = model.get('downloads', 0)
+            tags = model.get('tags', [])
+            
+            # Priority by downloads
+            if downloads >= 100000:
+                categories['high_priority'].append(model)
+            elif downloads >= 10000:
+                categories['medium_priority'].append(model)
+            else:
+                categories['low_priority'].append(model)
+            
+            # By type
+            model_type = 'other'
+            type_tags = [
+                'text-generation', 'text2text-generation', 
+                'image-to-text', 'text-to-image',
+                'automatic-speech-recognition', 'audio-classification',
+                'image-classification', 'object-detection'
+            ]
+            
+            for tag in tags:
+                if tag in type_tags:
+                    model_type = tag
+                    break
+            
+            if model_type not in categories['by_type']:
+                categories['by_type'][model_type] = []
+            categories['by_type'][model_type].append(model)
+        
+        return categories
+    
+    def generate_report(
+        self,
+        missing_models: List[Dict],
+        mot_models: Dict,
+        output_file: Optional[str] = None
+    ) -> str:
+        """Generate a report of missing models.
+        
+        Args:
+            missing_models: List of missing model dictionaries
+            mot_models: Dictionary of MOT models
+            output_file: Optional file to save report
+            
+        Returns:
+            Report text
+        """
+        categories = self.categorize_missing_models(missing_models)
+        
+        report_lines = []
+        report_lines.append("=" * 80)
+        report_lines.append("MODEL OPENNESS TOOL - MISSING MODELS REPORT")
+        report_lines.append("=" * 80)
+        report_lines.append("")
+        
+        # Summary
+        report_lines.append("SUMMARY")
+        report_lines.append("-" * 80)
+        report_lines.append(f"Models in MOT database:     {len(mot_models):,}")
+        report_lines.append(f"Missing models found:       {len(missing_models):,}")
+        report_lines.append(f"  - High priority (>100k):  {len(categories['high_priority']):,}")
+        report_lines.append(f"  - Medium priority (10k+): {len(categories['medium_priority']):,}")
+        report_lines.append(f"  - Low priority (<10k):    {len(categories['low_priority']):,}")
+        report_lines.append("")
+        
+        # By type
+        report_lines.append("MISSING MODELS BY TYPE")
+        report_lines.append("-" * 80)
+        for model_type, models in sorted(categories['by_type'].items(), 
+                                        key=lambda x: len(x[1]), 
+                                        reverse=True):
+            report_lines.append(f"  {model_type:30s} {len(models):5,} models")
+        report_lines.append("")
+        
+        # High priority models
+        if categories['high_priority']:
+            report_lines.append("HIGH PRIORITY MODELS (>100,000 downloads)")
+            report_lines.append("-" * 80)
+            for model in sorted(categories['high_priority'], 
+                              key=lambda x: x.get('downloads', 0), 
+                              reverse=True)[:50]:  # Top 50
+                model_id = model.get('id', 'unknown')
+                downloads = model.get('downloads', 0)
+                tags = ', '.join(model.get('tags', [])[:3])
+                report_lines.append(f"  {model_id:50s} {downloads:>10,} downloads")
+                if tags:
+                    report_lines.append(f"    Tags: {tags}")
+                report_lines.append(f"    URL: https://huggingface.co/{model_id}")
+                report_lines.append("")
+        
+        # Medium priority models
+        if categories['medium_priority']:
+            report_lines.append("MEDIUM PRIORITY MODELS (10,000-100,000 downloads)")
+            report_lines.append("-" * 80)
+            report_lines.append(f"Total: {len(categories['medium_priority'])} models")
+            report_lines.append("Top 20:")
+            for model in sorted(categories['medium_priority'], 
+                              key=lambda x: x.get('downloads', 0), 
+                              reverse=True)[:20]:
+                model_id = model.get('id', 'unknown')
+                downloads = model.get('downloads', 0)
+                report_lines.append(f"  {model_id:50s} {downloads:>10,} downloads")
+            report_lines.append("")
+        
+        # Commands to scrape
+        report_lines.append("SUGGESTED SCRAPING COMMANDS")
+        report_lines.append("-" * 80)
+        report_lines.append("High priority models (copy and run):")
+        report_lines.append("")
+        for model in sorted(categories['high_priority'], 
+                          key=lambda x: x.get('downloads', 0), 
+                          reverse=True)[:10]:
+            model_id = model.get('id', 'unknown')
+            report_lines.append(f"python model_scraper.py {model_id}")
+        report_lines.append("")
+        
+        report_lines.append("=" * 80)
+        
+        report_text = '\n'.join(report_lines)
+        
+        # Save to file if specified
+        if output_file:
+            with open(output_file, 'w', encoding='utf-8') as f:
+                f.write(report_text)
+            print(f"\nReport saved to: {output_file}")
+        
+        return report_text
+
+
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        description='Find models on HuggingFace that are missing from MOT'
+    )
+    parser.add_argument(
+        '--min-downloads',
+        type=int,
+        default=1000,
+        help='Minimum number of downloads to consider (default: 1000)'
+    )
+    parser.add_argument(
+        '--limit',
+        type=int,
+        default=1000,
+        help='Maximum number of models to fetch from HuggingFace (default: 1000)'
+    )
+    parser.add_argument(
+        '--models-dir',
+        default='../models',
+        help='Path to MOT models directory (default: ../models)'
+    )
+    parser.add_argument(
+        '--output',
+        help='Output file for report (default: print to console)'
+    )
+    parser.add_argument(
+        '--model-type',
+        help='Filter by model type (e.g., text-generation, image-to-text)'
+    )
+    
+    args = parser.parse_args()
+    
+    # Initialize finder
+    finder = MissingModelsFinder(models_dir=args.models_dir)
+    
+    print("=" * 80)
+    print("MODEL OPENNESS TOOL - MISSING MODELS FINDER")
+    print("=" * 80)
+    print()
+    
+    # Get MOT models
+    mot_models = finder.get_mot_models()
+    
+    # Get HuggingFace models
+    hf_models = finder.get_huggingface_models(
+        min_downloads=args.min_downloads,
+        limit=args.limit,
+        model_type=args.model_type
+    )
+    
+    # Find missing models
+    print("Comparing models...")
+    missing_models = []
+    
+    for hf_model in hf_models:
+        is_present, matched_file = finder.is_model_in_mot(hf_model, mot_models)
+        if not is_present:
+            missing_models.append(hf_model)
+    
+    print(f"Found {len(missing_models)} missing models\n")
+    
+    # Generate report
+    report = finder.generate_report(
+        missing_models,
+        mot_models,
+        output_file=args.output
+    )
+    
+    # Print report
+    print(report)
+    
+    # Summary
+    print("\n" + "=" * 80)
+    print("NEXT STEPS")
+    print("=" * 80)
+    print("1. Review the high priority models above")
+    print("2. Use model_scraper.py to generate draft YAML files")
+    print("3. Manually review and validate the generated files")
+    print("4. Submit PRs to add models to MOT database")
+    print()
+    print("Example workflow:")
+    print("  python model_scraper.py meta-llama/Llama-3-8B")
+    print("  # Review and edit ../models/Llama-3-8B.yml")
+    print("  php ../scripts/validate-model.php ../models/Llama-3-8B.yml")
+    print("  # Submit PR")
+    print()
+
+
+if __name__ == '__main__':
+    main()
+
diff --git a/tools-py/model_scraper.py b/tools-py/model_scraper.py
new file mode 100755
index 0000000..6358ce5
--- /dev/null
+++ b/tools-py/model_scraper.py
@@ -0,0 +1,557 @@
+#!/usr/bin/env python3
+"""
+Model Openness Tool - Automated Model Data Scraper
+
+This script scrapes model information from HuggingFace and other sources
+to generate draft YAML files for the Model Openness Framework (MOF).
+
+Usage:
+    python model_scraper.py <model_id> [--output-dir OUTPUT_DIR]
+    
+Example:
+    python model_scraper.py meta-llama/Llama-3-8B --output-dir ../models
+"""
+
+import argparse
+import json
+import os
+import re
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+from urllib.parse import urlparse
+
+import requests
+import yaml
+
+
+class ModelScraper:
+    """Scrapes model information from various sources."""
+    
+    # MOF Component names as defined in the framework
+    MOF_COMPONENTS = {
+        'code': [
+            'Training code',
+            'Inference code',
+            'Model architecture',
+            'Supporting libraries and tools',
+        ],
+        'data': [
+            'Training dataset',
+            'Training data preprocessing',
+            'Evaluation dataset',
+            'Evaluation data preprocessing',
+            'Sample model outputs',
+            'Data card',
+        ],
+        'document': [
+            'Technical report',
+            'Research paper',
+            'Model card',
+            'Evaluation results',
+            'Evaluation methodology',
+        ],
+        'parameters': [
+            'Model parameters (Final)',
+            'Model metadata',
+        ]
+    }
+    
+    # Known open licenses
+    OPEN_LICENSES = {
+        'apache-2.0', 'mit', 'bsd', 'gpl', 'lgpl', 'mpl-2.0',
+        'cc-by-4.0', 'cc-by-sa-4.0', 'openrail', 'bigscience-openrail-m',
+        'bigscience-bloom-rail-1.0', 'creativeml-openrail-m'
+    }
+    
+    def __init__(self, hf_token: Optional[str] = None):
+        """Initialize the scraper.
+        
+        Args:
+            hf_token: Optional HuggingFace API token for accessing gated models
+        """
+        self.hf_token = hf_token
+        self.session = requests.Session()
+        if hf_token:
+            self.session.headers.update({'Authorization': f'Bearer {hf_token}'})
+    
+    def scrape_huggingface_model(self, model_id: str) -> Dict:
+        """Scrape model information from HuggingFace.
+        
+        Args:
+            model_id: HuggingFace model ID (e.g., 'meta-llama/Llama-3-8B')
+            
+        Returns:
+            Dictionary containing scraped model information
+        """
+        print(f"Scraping HuggingFace model: {model_id}")
+        
+        # Get model info from HuggingFace API
+        api_url = f"https://huggingface.co/api/models/{model_id}"
+        
+        try:
+            response = self.session.get(api_url, timeout=30)
+            response.raise_for_status()
+            model_info = response.json()
+        except requests.exceptions.RequestException as e:
+            print(f"Error fetching model info: {e}")
+            return {}
+        
+        # Get model card content
+        card_url = f"https://huggingface.co/{model_id}/raw/main/README.md"
+        model_card_content = ""
+        try:
+            card_response = self.session.get(card_url, timeout=30)
+            if card_response.status_code == 200:
+                model_card_content = card_response.text
+        except requests.exceptions.RequestException:
+            pass
+        
+        # Get repository files list
+        files_url = f"https://huggingface.co/api/models/{model_id}/tree/main"
+        repo_files = []
+        try:
+            files_response = self.session.get(files_url, timeout=30)
+            if files_response.status_code == 200:
+                repo_files = [f['path'] for f in files_response.json()]
+        except requests.exceptions.RequestException:
+            pass
+        
+        # Extract information
+        scraped_data = {
+            'model_id': model_id,
+            'model_info': model_info,
+            'model_card': model_card_content,
+            'repo_files': repo_files,
+            'confidence': {}
+        }
+        
+        return scraped_data
+    
+    def detect_components(self, scraped_data: Dict) -> List[Dict]:
+        """Detect which MOF components are available.
+        
+        Args:
+            scraped_data: Dictionary containing scraped model information
+            
+        Returns:
+            List of component dictionaries with name, description, license, and confidence
+        """
+        components = []
+        repo_files = scraped_data.get('repo_files', [])
+        model_card = scraped_data.get('model_card', '').lower()
+        model_info = scraped_data.get('model_info', {})
+        
+        # Detect Model parameters (Final)
+        if any(f.endswith(('.bin', '.safetensors', '.pt', '.pth', '.ckpt')) for f in repo_files):
+            components.append({
+                'name': 'Model parameters (Final)',
+                'description': 'Trained model parameters, weights and biases',
+                'license': self._detect_license(scraped_data),
+                'confidence': 0.95,
+                'location': 'HuggingFace repository'
+            })
+        
+        # Detect Model metadata
+        if any(f in repo_files for f in ['config.json', 'model_config.json', 'configuration.json']):
+            components.append({
+                'name': 'Model metadata',
+                'description': 'Any model metadata including training configuration and optimizer states',
+                'license': self._detect_license(scraped_data),
+                'confidence': 0.90,
+                'location': 'HuggingFace repository'
+            })
+        
+        # Detect Model architecture
+        if any(f.endswith('.py') for f in repo_files) or 'modeling' in ' '.join(repo_files):
+            components.append({
+                'name': 'Model architecture',
+                'description': "Well commented code for the model's architecture",
+                'license': self._detect_license(scraped_data),
+                'confidence': 0.85,
+                'location': 'HuggingFace repository'
+            })
+        
+        # Detect Inference code
+        if any('inference' in f.lower() or 'generate' in f.lower() for f in repo_files):
+            components.append({
+                'name': 'Inference code',
+                'description': 'Code used for running the model to make predictions',
+                'license': self._detect_license(scraped_data),
+                'confidence': 0.80,
+                'location': 'HuggingFace repository'
+            })
+        
+        # Detect Model card
+        if 'README.md' in repo_files or model_card:
+            components.append({
+                'name': 'Model card',
+                'description': 'Model details including performance metrics, intended use, and limitations',
+                'license': self._detect_license(scraped_data),
+                'confidence': 0.95,
+                'location': 'HuggingFace repository'
+            })
+        
+        # Detect Technical report (check model card for links)
+        if any(keyword in model_card for keyword in ['technical report', 'tech report', 'documentation']):
+            components.append({
+                'name': 'Technical report',
+                'description': 'Technical report detailing capabilities and usage instructions for the model',
+                'license': 'unlicensed',
+                'confidence': 0.60,
+                'location': 'Referenced in model card'
+            })
+        
+        # Detect Research paper
+        if any(keyword in model_card for keyword in ['paper', 'arxiv', 'publication']):
+            components.append({
+                'name': 'Research paper',
+                'description': 'Research paper detailing the development and capabilities of the model',
+                'license': 'unlicensed',
+                'confidence': 0.70,
+                'location': 'Referenced in model card'
+            })
+        
+        # Detect Evaluation results
+        if any(keyword in model_card for keyword in ['evaluation', 'benchmark', 'performance', 'results']):
+            components.append({
+                'name': 'Evaluation results',
+                'description': 'The results from evaluating the model',
+                'license': 'unlicensed',
+                'confidence': 0.75,
+                'location': 'Model card'
+            })
+        
+        # Detect Training dataset (check for dataset references)
+        if any(keyword in model_card for keyword in ['training data', 'trained on', 'dataset']):
+            components.append({
+                'name': 'Training dataset',
+                'description': 'The dataset used to train the model',
+                'license': 'unlicensed',
+                'confidence': 0.50,
+                'location': 'Referenced in model card'
+            })
+        
+        return components
+    
+    def _detect_license(self, scraped_data: Dict) -> str:
+        """Detect the license for the model.
+        
+        Args:
+            scraped_data: Dictionary containing scraped model information
+            
+        Returns:
+            License name or 'unlicensed'
+        """
+        model_info = scraped_data.get('model_info', {})
+        
+        # Check for license in model info
+        if 'cardData' in model_info and 'license' in model_info['cardData']:
+            license_name = model_info['cardData']['license']
+            if license_name and license_name != 'other':
+                return license_name
+        
+        # Check for LICENSE file in repo
+        repo_files = scraped_data.get('repo_files', [])
+        if any(f.upper() == 'LICENSE' or f.upper() == 'LICENSE.md' for f in repo_files):
+            # Would need to fetch and parse the LICENSE file
+            return 'unlicensed'  # Placeholder - needs manual review
+        
+        return 'unlicensed'
+    
+    def _detect_repository(self, scraped_data: Dict) -> Tuple[str, float]:
+        """Detect GitHub repository with confidence score.
+        
+        Args:
+            scraped_data: Dictionary containing scraped model information
+            
+        Returns:
+            Tuple of (repository_url, confidence_score)
+        """
+        model_id = scraped_data.get('model_id', '')
+        model_card = scraped_data.get('model_card', '')
+        
+        # Method 1: Parse model card for GitHub links
+        github_pattern = r'https://github\.com/[^/\s"<>]+/[^/\s"<>]+'
+        github_urls = re.findall(github_pattern, model_card)
+        
+        if github_urls:
+            # Filter for most relevant (matching model name)
+            model_name = model_id.split('/')[-1].lower()
+            for url in github_urls:
+                # Clean up URL (remove trailing punctuation/markdown)
+                url = url.rstrip(')')
+                if model_name in url.lower():
+                    return url, 0.90
+            # Return first GitHub URL found
+            return github_urls[0].rstrip(')'), 0.70
+        
+        # Method 2: Try pattern-based inference
+        inferred_repo = f"https://github.com/{model_id}"
+        if self._check_repo_exists(inferred_repo):
+            return inferred_repo, 0.65
+        
+        # Method 3: Try organization/model-base-name
+        if '/' in model_id:
+            org, name = model_id.split('/', 1)
+            # Try various name variations
+            base_names = [
+                name.split('-')[0],  # e.g., Mistral-7B-v0.1 → Mistral
+                name.lower(),
+                name,
+            ]
+            for base_name in base_names:
+                inferred_repo = f"https://github.com/{org}/{base_name}"
+                if self._check_repo_exists(inferred_repo):
+                    return inferred_repo, 0.60
+        
+        return '', 0.0
+    
+    def _check_repo_exists(self, repo_url: str) -> bool:
+        """Check if GitHub repo exists.
+        
+        Args:
+            repo_url: GitHub repository URL
+            
+        Returns:
+            True if repo exists, False otherwise
+        """
+        try:
+            response = self.session.head(repo_url, timeout=5, allow_redirects=True)
+            return response.status_code == 200
+        except:
+            return False
+    
+    def _extract_model_metadata(self, scraped_data: Dict) -> Dict:
+        """Extract model metadata from scraped data.
+        
+        Args:
+            scraped_data: Dictionary containing scraped model information
+            
+        Returns:
+            Dictionary with model metadata
+        """
+        model_info = scraped_data.get('model_info', {})
+        model_id = scraped_data.get('model_id', '')
+        
+        # Extract producer from model_id
+        producer = model_id.split('/')[0] if '/' in model_id else 'Unknown'
+        producer = producer.replace('-', ' ').replace('_', ' ').title()
+        
+        # Extract model name
+        model_name = model_id.split('/')[-1] if '/' in model_id else model_id
+        
+        # Detect model type from tags
+        tags = model_info.get('tags', [])
+        model_type = ''
+        type_mapping = {
+            'text-generation': 'language',
+            'text2text-generation': 'language',
+            'image-to-text': 'multimodal',
+            'text-to-image': 'image',
+            'image-classification': 'vision',
+            'object-detection': 'vision',
+            'automatic-speech-recognition': 'audio',
+        }
+        
+        for tag in tags:
+            if tag in type_mapping:
+                model_type = type_mapping[tag]
+                break
+        
+        # Detect architecture
+        architecture = ''
+        model_card = scraped_data.get('model_card', '').lower()
+        if 'transformer' in model_card or 'transformer' in str(tags).lower():
+            if 'decoder' in model_card:
+                architecture = 'transformer decoder'
+            elif 'encoder' in model_card:
+                architecture = 'transformer encoder-decoder'
+            else:
+                architecture = 'transformer'
+        elif 'diffusion' in model_card or 'diffusion' in str(tags).lower():
+            architecture = 'diffusion'
+        
+        # Extract version (often in model name)
+        version_match = re.search(r'(\d+\.?\d*[BMK]?)', model_name)
+        version = version_match.group(1) if version_match else '1.0'
+        
+        # Get last modified date
+        last_modified = model_info.get('lastModified', '')
+        if last_modified:
+            date = last_modified.split('T')[0]
+        else:
+            date = datetime.now().strftime('%Y-%m-%d')
+        
+        # Detect repository
+        repository, repo_confidence = self._detect_repository(scraped_data)
+        
+        metadata = {
+            'name': model_name,
+            'version': version,
+            'producer': producer,
+            'type': model_type,
+            'architecture': architecture,
+            'date': date,
+            'origin': model_name.lower(),
+            'huggingface': f"https://huggingface.co/{model_id}",
+        }
+        
+        # Add repository if found
+        if repository:
+            metadata['repository'] = repository
+            metadata['repository_confidence'] = repo_confidence
+        
+        return metadata
+    
+    def _format_yaml_mot_style(self, metadata: Dict, components: List[Dict]) -> str:
+        """Format YAML in MOT style with proper indentation and quotes.
+        
+        Args:
+            metadata: Model metadata dictionary
+            components: List of component dictionaries
+            
+        Returns:
+            Formatted YAML string matching MOT style
+        """
+        lines = []
+        
+        # Framework section
+        lines.append("framework:")
+        lines.append("  name: 'Model Openness Framework'")
+        lines.append("  version: '1.0'")
+        lines.append("  date: '2024-12-15'")
+        
+        # Release section
+        lines.append("release:")
+        lines.append(f"  name: {metadata['name']}")
+        lines.append(f"  version: '{metadata['version']}'")
+        lines.append(f"  date: '{metadata['date']}'")
+        lines.append("  license: {  }")
+        lines.append(f"  type: '{metadata['type']}'")
+        lines.append(f"  architecture: '{metadata['architecture']}'")
+        lines.append(f"  origin: {metadata['origin']}")
+        lines.append(f"  producer: '{metadata['producer']}'")
+        lines.append("  contact: ''")
+        
+        # Add repository if present
+        if metadata.get('repository'):
+            lines.append(f"  repository: '{metadata['repository']}'")
+        
+        # Add huggingface if present
+        if metadata.get('huggingface'):
+            lines.append(f"  huggingface: '{metadata['huggingface']}'")
+        
+        # Components section
+        lines.append("  components:")
+        for comp in components:
+            lines.append("    -")
+            lines.append(f"      name: '{comp['name']}'")
+            lines.append(f"      description: \"{comp['description']}\"")
+            
+            # Format license - handle different types
+            license_val = comp['license']
+            if isinstance(license_val, list):
+                # Multiple licenses - just use first one for now
+                license_val = license_val[0] if license_val else 'unlicensed'
+            
+            if license_val and license_val != 'unlicensed':
+                lines.append(f"      license: '{license_val}'")
+            else:
+                lines.append(f"      license: unlicensed")
+        
+        return '\n'.join(lines)
+    
+    def generate_yaml(self, scraped_data: Dict, output_path: Optional[str] = None) -> str:
+        """Generate MOF-compliant YAML from scraped data.
+        
+        Args:
+            scraped_data: Dictionary containing scraped model information
+            output_path: Optional path to save the YAML file
+            
+        Returns:
+            YAML string
+        """
+        # Extract metadata
+        metadata = self._extract_model_metadata(scraped_data)
+        
+        # Detect components
+        components = self.detect_components(scraped_data)
+        
+        # Format in MOT style
+        yaml_output = self._format_yaml_mot_style(metadata, components)
+        
+        # Save to file if path provided
+        if output_path:
+            with open(output_path, 'w', encoding='utf-8') as f:
+                f.write(yaml_output)
+            print(f"YAML saved to: {output_path}")
+        
+        return yaml_output
+
+
+def main():
+    """Main entry point for the scraper."""
+    parser = argparse.ArgumentParser(
+        description='Scrape model information and generate MOF YAML files'
+    )
+    parser.add_argument(
+        'model_id',
+        help='HuggingFace model ID (e.g., meta-llama/Llama-3-8B)'
+    )
+    parser.add_argument(
+        '--output-dir',
+        default='../models',
+        help='Output directory for YAML files (default: ../models)'
+    )
+    parser.add_argument(
+        '--hf-token',
+        help='HuggingFace API token for accessing gated models'
+    )
+    
+    args = parser.parse_args()
+    
+    # Initialize scraper
+    scraper = ModelScraper(hf_token=args.hf_token)
+    
+    # Scrape model data
+    print(f"\n{'='*60}")
+    print(f"Scraping model: {args.model_id}")
+    print(f"{'='*60}\n")
+    
+    scraped_data = scraper.scrape_huggingface_model(args.model_id)
+    
+    if not scraped_data:
+        print("Failed to scrape model data")
+        sys.exit(1)
+    
+    # Generate output filename
+    model_name = args.model_id.split('/')[-1]
+    output_path = Path(args.output_dir) / f"{model_name}.yml"
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    
+    # Generate YAML
+    print(f"\n{'='*60}")
+    print("Generating YAML...")
+    print(f"{'='*60}\n")
+    
+    yaml_output = scraper.generate_yaml(scraped_data, str(output_path))
+    
+    print(f"\n{'='*60}")
+    print("DRAFT YAML GENERATED")
+    print(f"{'='*60}\n")
+    print("⚠️  IMPORTANT: This is a DRAFT that requires manual review!")
+    print("    - Verify all component availability")
+    print("    - Confirm license information")
+    print("    - Add missing components")
+    print("    - Update confidence scores")
+    print(f"\nOutput saved to: {output_path}")
+    print(f"\nNext steps:")
+    print(f"  1. Review and edit: {output_path}")
+    print(f"  2. Validate: php scripts/validate-model.php {output_path}")
+    print(f"  3. Submit PR to add to MOT database")
+    
+
+if __name__ == '__main__':
+    main()
diff --git a/tools-py/requirements.txt b/tools-py/requirements.txt
new file mode 100644
index 0000000..f987aff
--- /dev/null
+++ b/tools-py/requirements.txt
@@ -0,0 +1,5 @@
+# Python dependencies for Model Openness Tool scraper
+# Install with: pip install -r requirements.txt
+
+requests>=2.31.0
+PyYAML>=6.0.1
\ No newline at end of file