From c16eb36bc6d9fcf2ff49d77b91a787e43b9bd1bb Mon Sep 17 00:00:00 2001
From: Avinash Singh <avinashsingh.rcoem@gmail.com>
Date: Mon, 22 Dec 2025 15:15:23 +0530
Subject: [PATCH 1/2] Build top huggiface models

Signed-off-by: Avinash Singh <avinashsingh.rcoem@gmail.com>
---
 .github/workflows/build-top-models.yml | 267 ++++++++++++++++++++
 contrib/scripts/requirements.txt       |   1 +
 contrib/scripts/select-top-models.py   | 329 +++++++++++++++++++++++++
 contrib/scripts/top-model-selection.md | 131 ++++++++++
 4 files changed, 728 insertions(+)
 create mode 100644 .github/workflows/build-top-models.yml
 create mode 100644 contrib/scripts/requirements.txt
 create mode 100644 contrib/scripts/select-top-models.py
 create mode 100644 contrib/scripts/top-model-selection.md

diff --git a/.github/workflows/build-top-models.yml b/.github/workflows/build-top-models.yml
new file mode 100644
index 00000000..da50e6b2
--- /dev/null
+++ b/.github/workflows/build-top-models.yml
@@ -0,0 +1,267 @@
+name: Build Top HuggingFace Models
+
+on:
+  schedule:
+    # Run weekly on Sunday at 00:00 UTC
+    - cron: '0 0 * * 0'
+  workflow_dispatch:
+    inputs:
+      limit:
+        description: 'Number of models to build'
+        required: false
+        default: '10'
+        type: string
+      max_size:
+        description: 'Maximum model size in GB'
+        required: false
+        default: '10'
+        type: string
+      sort_by:
+        description: 'Sort criteria'
+        required: false
+        default: 'downloads'
+        type: choice
+        options:
+          - downloads
+          - likes
+          - trending
+
+permissions:
+  contents: read
+  packages: write
+
+env:
+  REGISTRY: ghcr.io
+  ORGANIZATION: ${{ github.repository_owner }}
+
+jobs:
+  select-models:
+    name: Select Top Models
+    runs-on: ubuntu-latest
+    outputs:
+      models: ${{ steps.select-models.outputs.models }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.14"
+
+      - name: Install dependencies
+        run: |
+          pip install -r contrib/scripts/requirements.txt
+
+      - name: Select compatible models
+        id: select-models
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          python contrib/scripts/select-top-models.py \
+            --limit ${{ github.event.inputs.limit || '10' }} \
+            --max-size ${{ github.event.inputs.max_size || '10' }} \
+            --sort-by ${{ github.event.inputs.sort_by || 'downloads' }} \
+            --output models.json
+
+          # Convert to single line JSON for GitHub output
+          echo "models=$(cat models.json | jq -c)" >> $GITHUB_OUTPUT
+
+          # Also display selected models for debugging
+          echo "Selected models:"
+          cat models.json | jq -r '.[] | "\(.id) (\(.format), \(.size_gb)GB)"'
+
+  build-modctl:
+    name: Build modctl
+    runs-on: ubuntu-latest
+    env:
+      PACKAGE_DIR: modctl-build-package
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Install Go
+        uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+          cache-dependency-path: go.sum
+
+      - name: Create Cache Dir
+        run: |
+          mkdir -p ${{ env.PACKAGE_DIR }}
+
+      - name: Cache Package
+        id: cache-package
+        uses: actions/cache@v4
+        with:
+          path: ${{ env.PACKAGE_DIR }}
+          key: modctl-build-packages
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y pkg-config
+          sudo DEBIAN_FRONTEND=noninteractive apt install -y build-essential \
+            cmake pkg-config libssl-dev libssh2-1-dev zlib1g-dev \
+            libhttp-parser-dev python3 wget tar git
+          mkdir -p ${{ env.PACKAGE_DIR }}
+          if [ ! -f "${{ env.PACKAGE_DIR }}/libgit2-v1.5.1.tar.gz" ]; then
+            wget https://github.com/libgit2/libgit2/archive/refs/tags/v1.5.1.tar.gz -O ${{ env.PACKAGE_DIR }}/libgit2-v1.5.1.tar.gz
+          fi
+          tar -xzf ${{ env.PACKAGE_DIR }}/libgit2-v1.5.1.tar.gz
+          cd libgit2-1.5.1 && mkdir build && cd build
+          cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF
+          make -j$(nproc)
+          sudo make install
+          sudo ldconfig
+        env:
+          LIBGIT2_SYS_USE_PKG_CONFIG: "1"
+
+      - name: Build modctl
+        run: |
+          go build -tags "static system_libgit2 enable_libgit2"
+
+      - name: Upload modctl
+        uses: actions/upload-artifact@v4
+        with:
+          name: modctl-artifact
+          path: modctl
+
+  build-and-push-models:
+    name: Build ${{ matrix.model.id }}
+    needs: [select-models, build-modctl]
+    runs-on: ubuntu-latest
+    timeout-minutes: 120
+    strategy:
+      fail-fast: false
+      max-parallel: 3  # Don't overwhelm GHCR
+      matrix:
+        model: ${{ fromJson(needs.select-models.outputs.models) }}
+    env:
+      MODEL_ID: ${{ matrix.model.id }}
+      MODEL_FAMILY: ${{ matrix.model.family }}
+      MODEL_FORMAT: ${{ matrix.model.format }}
+      MODEL_PARAM_SIZE: ${{ matrix.model.param_size }}
+      MODEL_DIR: model-files
+    steps:
+      - name: Download modctl artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: modctl-artifact
+          path: modctl
+
+      - name: Setup modctl
+        run: |
+          sudo cp modctl/modctl /bin/modctl
+          sudo chmod +x /bin/modctl
+          modctl version
+          modctl login -u ${{ github.actor }} \
+                       -p ${{ secrets.GITHUB_TOKEN }} \
+                       ${{ env.REGISTRY }}
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Cache model
+        uses: actions/cache@v4
+        id: cache-model
+        with:
+          path: ${{ env.MODEL_DIR }}
+          key: model-${{ env.MODEL_ID }}-${{ hashFiles('**/config.json') }}
+
+      - name: Download HuggingFace Model
+        if: steps.cache-model.outputs.cache-hit != 'true'
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          pip install 'huggingface_hub'
+          python << 'EOF'
+          from huggingface_hub import snapshot_download
+          import os
+
+          model_id = os.environ['MODEL_ID']
+          model_dir = os.environ['MODEL_DIR']
+
+          print(f"Downloading {model_id}...")
+          snapshot_download(
+              repo_id=model_id,
+              local_dir=model_dir,
+              token=os.environ.get('HF_TOKEN')
+          )
+          print(f"Download complete: {model_dir}")
+          EOF
+
+      - name: Generate Modelfile
+        run: |
+          cd ${{ env.MODEL_DIR }}
+          echo "Generating Modelfile for ${{ env.MODEL_ID }}"
+          modctl modelfile generate \
+            --arch transformer \
+            --family ${{ env.MODEL_FAMILY }} \
+            --format ${{ env.MODEL_FORMAT }} \
+            --param-size ${{ env.MODEL_PARAM_SIZE }} \
+            .
+
+          echo "Generated Modelfile:"
+          cat Modelfile
+
+      - name: Build and Push Model
+        run: |
+          cd ${{ env.MODEL_DIR }}
+
+          # Convert model ID to valid image name (lowercase, replace / with -)
+          IMAGE_NAME=$(echo "${{ env.MODEL_ID }}" | tr '[:upper:]' '[:lower:]' | tr '/' '-')
+          IMAGE_URL="${{ env.REGISTRY }}/${{ env.ORGANIZATION }}/${IMAGE_NAME}:latest"
+
+          echo "Building and pushing to ${IMAGE_URL}"
+
+          modctl build -f Modelfile \
+            -t ${IMAGE_URL} \
+            --raw --output-remote --log-level debug \
+            .
+
+          echo "Successfully pushed ${IMAGE_URL}"
+
+      - name: Cleanup HuggingFace Model Files
+        run: |
+          echo "Cleaning up HuggingFace model files to free disk space..."
+          du -sh ${{ env.MODEL_DIR }} || true
+          rm -rf ${{ env.MODEL_DIR }}
+          echo "Cleanup complete"
+          df -h
+
+      - name: Verify Pull
+        run: |
+          # Convert model ID to valid image name
+          IMAGE_NAME=$(echo "${{ env.MODEL_ID }}" | tr '[:upper:]' '[:lower:]' | tr '/' '-')
+          IMAGE_URL="${{ env.REGISTRY }}/${{ env.ORGANIZATION }}/${IMAGE_NAME}:latest"
+
+          mkdir -p verify-download
+          echo "Pulling ${IMAGE_URL} to verify"
+
+          modctl pull ${IMAGE_URL} \
+            --extract-dir verify-download \
+            --log-level debug
+
+          echo "Successfully verified pull from ${IMAGE_URL}"
+
+  summary:
+    name: Build Summary
+    needs: [select-models, build-and-push-models]
+    runs-on: ubuntu-latest
+    if: always()
+    steps:
+      - name: Generate Summary
+        run: |
+          echo "# Build Top Models Summary" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "## Selected Models" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo '${{ needs.select-models.outputs.models }}' | jq -r '.[] | "- **\(.id)** (\(.format), \(.param_size), \(.size_gb)GB) - \(.downloads) downloads"' >> $GITHUB_STEP_SUMMARY || true
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "## Build Status" >> $GITHUB_STEP_SUMMARY
+          echo "Check individual job results above." >> $GITHUB_STEP_SUMMARY
diff --git a/contrib/scripts/requirements.txt b/contrib/scripts/requirements.txt
new file mode 100644
index 00000000..8ff2acb0
--- /dev/null
+++ b/contrib/scripts/requirements.txt
@@ -0,0 +1 @@
+huggingface_hub>=0.20.0
diff --git a/contrib/scripts/select-top-models.py b/contrib/scripts/select-top-models.py
new file mode 100644
index 00000000..f6b2b409
--- /dev/null
+++ b/contrib/scripts/select-top-models.py
@@ -0,0 +1,329 @@
+#!/usr/bin/env python3
+"""
+Select top HuggingFace models compatible with modctl.
+
+This script fetches popular models from HuggingFace Hub and filters them
+based on modctl compatibility criteria:
+1. Has config.json for auto-detection
+2. Has model files in supported formats (safetensors, gguf, bin, pt)
+3. Size is under a reasonable limit
+4. Has necessary metadata for modelfile generation
+"""
+
+import json
+import re
+import sys
+import argparse
+from typing import List, Dict, Optional
+from huggingface_hub import HfApi
+
+# Try to import ModelFilter, fall back to dict if not available
+try:
+    from huggingface_hub import ModelFilter
+except ImportError:
+    ModelFilter = None
+
+
+# Supported model file formats (based on pkg/modelfile/constants.go)
+SUPPORTED_FORMATS = [
+    "safetensors",
+    "gguf",
+    "bin",
+    "pt",
+    "pth",
+    "onnx",
+]
+
+# Model families known to work well with modctl
+KNOWN_FAMILIES = [
+    "llama",
+    "qwen",
+    "qwen2",
+    "qwen3",
+    "mistral",
+    "phi",
+    "gpt2",
+    "gpt_neo",
+    "gpt_neox",
+    "bloom",
+    "opt",
+    "falcon",
+    "mpt",
+    "stablelm",
+]
+
+
+def get_model_size_gb(model_info) -> Optional[float]:
+    """Estimate model size in GB from model info."""
+    try:
+        total_size = 0
+        if hasattr(model_info, 'siblings') and model_info.siblings:
+            for file in model_info.siblings:
+                if hasattr(file, 'size') and file.size:
+                    total_size += file.size
+        return total_size / (1024 ** 3)  # Convert to GB
+    except Exception:
+        return None
+
+
+def has_config_json(model_info) -> bool:
+    """Check if model has config.json for auto-detection."""
+    try:
+        if hasattr(model_info, 'siblings') and model_info.siblings:
+            filenames = [f.rfilename for f in model_info.siblings]
+            return "config.json" in filenames
+        return False
+    except Exception:
+        return False
+
+
+def get_model_format(model_info) -> Optional[str]:
+    """Detect model format from repository files."""
+    try:
+        if not hasattr(model_info, 'siblings') or not model_info.siblings:
+            return None
+
+        # Check for each supported format
+        for file in model_info.siblings:
+            filename = file.rfilename.lower()
+            if filename.endswith('.safetensors'):
+                return "safetensors"
+            elif filename.endswith('.gguf'):
+                return "gguf"
+            elif filename.endswith('.onnx'):
+                return "onnx"
+            elif filename.endswith('.bin') and 'pytorch_model' in filename:
+                return "bin"
+            elif filename.endswith('.pt') or filename.endswith('.pth'):
+                return "pt"
+
+        return None
+    except Exception:
+        return None
+
+
+def extract_param_size(model_id: str) -> Optional[str]:
+    """Extract parameter size from model name or metadata."""
+    # Common patterns: 7B, 8B, 13B, 0.5B, 1.1B, etc.
+    patterns = [
+        r'(\d+\.?\d*[BM])',  # 7B, 8B, 0.5B
+        r'(\d+\.?\d*)b',     # 7b, 0.5b (lowercase)
+    ]
+
+    model_name = model_id.lower()
+    for pattern in patterns:
+        match = re.search(pattern, model_name)
+        if match:
+            size = match.group(1).upper()
+            if not size.endswith('B') and not size.endswith('M'):
+                size += 'B'
+            return size
+
+    return None
+
+
+def detect_family(model_info, model_id: str) -> Optional[str]:
+    """Detect model family from model info."""
+    try:
+        # Try to get from config
+        if hasattr(model_info, 'config') and model_info.config:
+            model_type = model_info.config.get('model_type')
+            if model_type and model_type in KNOWN_FAMILIES:
+                return model_type
+
+        # Fallback to tags
+        if hasattr(model_info, 'tags') and model_info.tags:
+            for tag in model_info.tags:
+                if tag in KNOWN_FAMILIES:
+                    return tag
+
+        # Last resort: parse from model name
+        model_name_lower = model_id.lower()
+        for family in KNOWN_FAMILIES:
+            if family in model_name_lower:
+                return family
+
+        return None
+    except Exception:
+        return None
+
+
+def is_compatible_model(api: HfApi, model_id: str, max_size_gb: float = 20.0) -> tuple[bool, Optional[Dict]]:
+    """
+    Check if model is compatible with modctl.
+
+    Returns:
+        (is_compatible, model_metadata) tuple
+    """
+    # Get all model information
+    try:
+        model_info = api.model_info(model_id, files_metadata=True)
+    except Exception as e:
+        print(f"Skipping {model_id}: Could not fetch model info: {e}", file=sys.stderr)
+        return False, None
+
+    # Check for config.json
+    if not has_config_json(model_info):
+        print(f"Skipping {model_id}: No config.json", file=sys.stderr)
+        return False, None
+
+    # Check format
+    format_type = get_model_format(model_info)
+    if not format_type:
+        print(f"Skipping {model_id}: No supported model format found", file=sys.stderr)
+        return False, None
+
+    # Check size
+    size_gb = get_model_size_gb(model_info)
+    if size_gb and size_gb > max_size_gb:
+        print(f"Skipping {model_id}: Too large ({size_gb:.2f}GB > {max_size_gb}GB)", file=sys.stderr)
+        return False, None
+
+    # Detect family
+    family = detect_family(model_info, model_id)
+
+    # Extract param size
+    param_size = extract_param_size(model_id)
+
+    metadata = {
+        "id": model_id,
+        "family": family or "unknown",
+        "arch": "transformer",  # modctl auto-detects this from config.json
+        "format": format_type,
+        "param_size": param_size or "unknown",
+        "size_gb": round(size_gb, 2) if size_gb else None,
+        "downloads": model_info.downloads if hasattr(model_info, 'downloads') else 0,
+        "likes": model_info.likes if hasattr(model_info, 'likes') else 0,
+    }
+
+    return True, metadata
+
+
+def select_top_models(
+    limit: int = 10,
+    max_size_gb: float = 20.0,
+    sort_by: str = "downloads",
+    task: Optional[str] = "text-generation",
+) -> List[Dict]:
+    """
+    Select top models from HuggingFace Hub.
+
+    Args:
+        limit: Number of models to return
+        max_size_gb: Maximum model size in GB
+        sort_by: Sort criteria (downloads, likes, trending)
+        task: Task filter (text-generation, image-classification, etc.)
+
+    Returns:
+        List of model metadata dictionaries
+    """
+    api = HfApi()
+
+    print(f"Fetching top {limit} models (sort by: {sort_by}, max size: {max_size_gb}GB)...", file=sys.stderr)
+
+    # Fetch more models than needed to account for filtering
+    fetch_limit = limit * 10
+
+    # Use ModelFilter if available, otherwise pass task as filter string
+    if ModelFilter is not None:
+        model_filter = ModelFilter(
+            task=task,
+            library="transformers",
+        )
+        models = api.list_models(
+            filter=model_filter,
+            sort=sort_by,
+            direction=-1,
+            limit=fetch_limit,
+        )
+    else:
+        # Older API without ModelFilter
+        models = api.list_models(
+            filter=task,
+            sort=sort_by,
+            direction=-1,
+            limit=fetch_limit,
+        )
+
+    selected = []
+    checked = 0
+
+    for model in models:
+        checked += 1
+        print(f"Checking {checked}/{fetch_limit}: {model.id}...", file=sys.stderr)
+
+        is_compatible, metadata = is_compatible_model(api, model.id, max_size_gb)
+
+        if is_compatible and metadata:
+            selected.append(metadata)
+            print(f"✓ Added {model.id} ({len(selected)}/{limit})", file=sys.stderr)
+
+            if len(selected) >= limit:
+                break
+
+    print(f"\nSelected {len(selected)} compatible models", file=sys.stderr)
+    return selected
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Select top HuggingFace models compatible with modctl"
+    )
+    parser.add_argument(
+        "--limit",
+        type=int,
+        default=10,
+        help="Number of models to select (default: 10)",
+    )
+    parser.add_argument(
+        "--max-size",
+        type=float,
+        default=20.0,
+        help="Maximum model size in GB (default: 20.0)",
+    )
+    parser.add_argument(
+        "--sort-by",
+        choices=["downloads", "likes", "trending"],
+        default="downloads",
+        help="Sort criteria (default: downloads)",
+    )
+    parser.add_argument(
+        "--task",
+        default="text-generation",
+        help="Task filter (default: text-generation)",
+    )
+    parser.add_argument(
+        "--output",
+        help="Output file path (default: stdout)",
+    )
+
+    args = parser.parse_args()
+
+    try:
+        models = select_top_models(
+            limit=args.limit,
+            max_size_gb=args.max_size,
+            sort_by=args.sort_by,
+            task=args.task,
+        )
+
+        output = json.dumps(models, indent=2)
+
+        if args.output:
+            with open(args.output, 'w') as f:
+                f.write(output)
+            print(f"\nWrote {len(models)} models to {args.output}", file=sys.stderr)
+        else:
+            print(output)
+
+        return 0
+
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        import traceback
+        traceback.print_exc(file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/contrib/scripts/top-model-selection.md b/contrib/scripts/top-model-selection.md
new file mode 100644
index 00000000..fbf03fb7
--- /dev/null
+++ b/contrib/scripts/top-model-selection.md
@@ -0,0 +1,131 @@
+# Model Selection Scripts
+
+This directory contains scripts for selecting and filtering HuggingFace models compatible with modctl.
+
+## select-top-models.py
+
+Python script that fetches top models from HuggingFace Hub and filters them based on modctl compatibility criteria.
+
+### Compatibility Criteria
+
+The script filters models based on:
+
+1. **Has config.json** - Required for auto-detection of model metadata
+2. **Supported formats** - Must have files in formats like:
+   - `safetensors` (preferred)
+   - `gguf`
+   - `bin` (PyTorch)
+   - `pt`, `pth` (PyTorch)
+   - `onnx`
+3. **Size limit** - Configurable maximum size (default: 20GB)
+4. **Metadata** - Attempts to extract:
+   - Model family (llama, qwen, gpt2, etc.)
+   - Parameter size (0.5B, 7B, etc.)
+   - Format type
+
+### Installation
+
+```bash
+pip install -r requirements.txt
+```
+
+### Usage
+
+Basic usage (fetch top 10 models by downloads):
+
+```bash
+python contrib/scripts/select-top-models.py
+```
+
+#### Options
+
+```bash
+python contrib/scripts/select-top-models.py \
+  --limit 10 \              # Number of models to select (default: 10)
+  --max-size 20.0 \         # Maximum model size in GB (default: 20.0)
+  --sort-by downloads \     # Sort by: downloads, likes, trending (default: downloads)
+  --task text-generation \  # Task filter (default: text-generation)
+  --output models.json      # Output file (default: stdout)
+```
+
+#### Examples
+
+Get top 5 small models (< 5GB):
+
+```bash
+python contrib/scriptsselect-top-models.py --limit 5 --max-size 5
+```
+
+Get most liked models:
+
+```bash
+python contrib/scripts/select-top-models.py --limit 10 --sort-by likes
+```
+
+Save to file:
+
+```bash
+python contrib/scripts/select-top-models.py --limit 20 --output top_models.json
+```
+
+### Output Format
+
+The script outputs JSON with model metadata:
+
+```json
+[
+  {
+    "id": "Qwen/Qwen3-0.6B",
+    "family": "qwen3",
+    "arch": "transformer",
+    "format": "safetensors",
+    "param_size": "0.6B",
+    "size_gb": 1.41,
+    "downloads": 7509488,
+    "likes": 867
+  }
+]
+```
+
+### Authentication
+
+Some models require HuggingFace authentication. Set the `HF_TOKEN` environment variable:
+
+```bash
+export HF_TOKEN="your_huggingface_token"
+python contrib/scripts/select-top-models.py
+```
+
+Or use `huggingface-cli`:
+
+```bash
+huggingface-cli login
+python contrib/scripts/select-top-models.py
+```
+
+## GitHub Workflow Integration
+
+The `build-top-models.yml` workflow uses this script to automatically:
+
+1. Select top models from HuggingFace
+2. Build them using modctl
+3. Push to GitHub Container Registry
+
+### Manual Trigger
+
+You can manually trigger the workflow from GitHub Actions tab with custom parameters:
+
+- **limit**: Number of models to build (default: 10)
+- **max_size**: Maximum model size in GB (default: 20)
+- **sort_by**: Sort criteria - downloads, likes, or trending
+
+### Scheduled Runs
+
+The workflow runs automatically every Sunday at 00:00 UTC.
+
+### Required Secrets
+
+The workflow requires these GitHub secrets:
+
+- `HF_TOKEN` - HuggingFace API token (for downloading models)
+- `GITHUB_TOKEN` - Automatically provided by GitHub Actions

From 9f4e06b4b1216cbd6309b46c3a836d558fbb29a4 Mon Sep 17 00:00:00 2001
From: Avinash Singh <avinashsingh.rcoem@gmail.com>
Date: Mon, 22 Dec 2025 15:28:46 +0530
Subject: [PATCH 2/2] optimise as per review

Signed-off-by: Avinash Singh <avinashsingh.rcoem@gmail.com>
---
 contrib/scripts/select-top-models.py   | 23 +++++++++++++----------
 contrib/scripts/top-model-selection.md |  2 +-
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/contrib/scripts/select-top-models.py b/contrib/scripts/select-top-models.py
index f6b2b409..26fe7cf3 100644
--- a/contrib/scripts/select-top-models.py
+++ b/contrib/scripts/select-top-models.py
@@ -35,7 +35,7 @@
 ]
 
 # Model families known to work well with modctl
-KNOWN_FAMILIES = [
+KNOWN_FAMILIES = {
     "llama",
     "qwen",
     "qwen2",
@@ -50,7 +50,7 @@
     "falcon",
     "mpt",
     "stablelm",
-]
+}
 
 
 def get_model_size_gb(model_info) -> Optional[float]:
@@ -62,7 +62,8 @@ def get_model_size_gb(model_info) -> Optional[float]:
                 if hasattr(file, 'size') and file.size:
                     total_size += file.size
         return total_size / (1024 ** 3)  # Convert to GB
-    except Exception:
+    except Exception as e:
+        print(f"Error: An error occurred in get_model_size_gb: {e}", file=sys.stderr)
         return None
 
 
@@ -70,11 +71,11 @@ def has_config_json(model_info) -> bool:
     """Check if model has config.json for auto-detection."""
     try:
         if hasattr(model_info, 'siblings') and model_info.siblings:
-            filenames = [f.rfilename for f in model_info.siblings]
-            return "config.json" in filenames
-        return False
-    except Exception:
+            return any(f.rfilename == "config.json" for f in model_info.siblings)
         return False
+    except Exception as e:
+        print(f"Error: An error occurred in has_config_json: {e}", file=sys.stderr)
+        return None
 
 
 def get_model_format(model_info) -> Optional[str]:
@@ -98,7 +99,8 @@ def get_model_format(model_info) -> Optional[str]:
                 return "pt"
 
         return None
-    except Exception:
+    except Exception as e:
+        print(f"Error: An error occurred in get_model_format: {e}", file=sys.stderr)
         return None
 
 
@@ -144,7 +146,8 @@ def detect_family(model_info, model_id: str) -> Optional[str]:
                 return family
 
         return None
-    except Exception:
+    except Exception as e:
+        print(f"Error: An error occurred in detect_family: {e}", file=sys.stderr)
         return None
 
 
@@ -319,7 +322,7 @@ def main():
         return 0
 
     except Exception as e:
-        print(f"Error: {e}", file=sys.stderr)
+        print(f"Error: An error occurred in main: {e}", file=sys.stderr)
         import traceback
         traceback.print_exc(file=sys.stderr)
         return 1
diff --git a/contrib/scripts/top-model-selection.md b/contrib/scripts/top-model-selection.md
index fbf03fb7..6c552066 100644
--- a/contrib/scripts/top-model-selection.md
+++ b/contrib/scripts/top-model-selection.md
@@ -53,7 +53,7 @@ python contrib/scripts/select-top-models.py \
 Get top 5 small models (< 5GB):
 
 ```bash
-python contrib/scriptsselect-top-models.py --limit 5 --max-size 5
+python contrib/scripts/select-top-models.py --limit 5 --max-size 5
 ```
 
 Get most liked models: