diff --git a/README.md b/README.md index 0d7558af8..e468dad47 100644 --- a/README.md +++ b/README.md @@ -298,6 +298,7 @@ Our installation scripts automatically handle all dependencies: | Service | Purpose | Auto-installed? | |---------|---------|-----------------| | **[Ollama](https://ollama.ai/)** | Local LLM for text processing | ✅ Yes, with default model | +| **[MLX-LM](https://github.com/ml-explore/mlx-lm)** | Fast LLM on Apple Silicon | ⚙️ Optional, via `uvx` | | **[Wyoming Faster Whisper](https://github.com/rhasspy/wyoming-faster-whisper)** | Speech-to-text | ✅ Yes, via `uvx` | | **[Wyoming Piper](https://github.com/rhasspy/wyoming-piper)** | Text-to-speech | ✅ Yes, via `uvx` | | **[Kokoro-FastAPI](https://github.com/remsky/Kokoro-FastAPI)** | Premium TTS (optional) | ⚙️ Can be added later | @@ -318,10 +319,13 @@ You can also use other OpenAI-compatible local servers: | Server | Purpose | Setup Required | |---------|---------|----------------| +| **[MLX-LM](https://github.com/ml-explore/mlx-lm)** | Fast LLM inference on Apple Silicon | `./scripts/run-mlx-lm.sh` or use `--openai-base-url http://localhost:10500/v1` | | **llama.cpp** | Local LLM inference | Use `--openai-base-url http://localhost:8080/v1` | | **vLLM** | High-performance LLM serving | Use `--openai-base-url` with server endpoint | | **Ollama** | Default local LLM | Already configured as default | +> **Apple Silicon Users**: MLX-LM provides significantly faster inference than Ollama on M1/M2/M3/M4 Macs. Start it with `./scripts/run-mlx-lm.sh` and use `--llm-provider openai --openai-base-url http://localhost:10500/v1` to connect. + ## Usage This package provides multiple command-line tools, each designed for a specific purpose. diff --git a/scripts/run-mlx-lm.sh b/scripts/run-mlx-lm.sh new file mode 100755 index 000000000..0ff27096f --- /dev/null +++ b/scripts/run-mlx-lm.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +echo "🧠 Starting MLX LLM Server on port 8080..." + +# Check if running on macOS with Apple Silicon +if [[ "$(uname)" != "Darwin" ]]; then + echo "❌ MLX only works on macOS with Apple Silicon." + exit 1 +fi + +if [[ "$(uname -m)" != "arm64" ]]; then + echo "❌ MLX requires Apple Silicon (M1/M2/M3/M4). Intel Macs are not supported." + exit 1 +fi + +# Default model - can be overridden with MLX_MODEL environment variable +# Popular options: +# - mlx-community/Qwen3-4B-4bit (fast, high quality, default) +# - mlx-community/Qwen3-8B-4bit (larger, even better quality) +# - mlx-community/gpt-oss-20b-MXFP4-Q8 (20B parameter, high quality) +MODEL="${MLX_MODEL:-mlx-community/Qwen3-4B-4bit}" +PORT="${MLX_PORT:-10500}" + +echo "📦 Model: $MODEL" +echo "🔌 Port: $PORT" +echo "" +echo "Usage with agent-cli:" +echo " agent-cli transcribe --llm --llm-provider openai --openai-base-url http://localhost:$PORT/v1 --llm-openai-model $MODEL" +echo " agent-cli autocorrect --llm-provider openai --openai-base-url http://localhost:$PORT/v1 --llm-openai-model $MODEL" +echo "" +echo "To make MLX the default, add to ~/.config/agent-cli/config.toml:" +echo " [defaults]" +echo " llm_provider = \"openai\"" +echo " openai_base_url = \"http://localhost:$PORT/v1\"" +echo " llm_openai_model = \"$MODEL\"" +echo "" + +# Run mlx-lm server using uvx +# --host 0.0.0.0 allows connections from other machines/tools +uvx --python 3.12 \ + --from "mlx-lm" \ + mlx_lm.server \ + --model "$MODEL" \ + --host 0.0.0.0 \ + --port "$PORT" diff --git a/scripts/start-all-services.sh b/scripts/start-all-services.sh index da3748c4f..ea3bf18c5 100755 --- a/scripts/start-all-services.sh +++ b/scripts/start-all-services.sh @@ -10,6 +10,21 @@ fi # Get the current directory SCRIPTS_DIR="$(cd "$(dirname "$0")" && pwd)" +# Determine LLM pane based on platform +# Use MLX-LLM on macOS ARM (Apple Silicon), Ollama otherwise +if [[ "$(uname)" == "Darwin" && "$(uname -m)" == "arm64" ]]; then + LLM_PANE=' pane { + name "MLX-LLM" + cwd "'"$SCRIPTS_DIR"'" + command "./run-mlx-lm.sh" + }' +else + LLM_PANE=' pane { + name "Ollama" + command "ollama" + args "serve" + }' +fi # Create .runtime directory and Zellij layout file mkdir -p "$SCRIPTS_DIR/.runtime" @@ -19,11 +34,7 @@ session_name "agent-cli" layout { pane split_direction="vertical" { pane split_direction="horizontal" { - pane { - name "Ollama" - command "ollama" - args "serve" - } +$LLM_PANE pane { name "Help" command "sh"