forked from morphik-org/morphik-core
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinstall_docker.sh
More file actions
executable file
·443 lines (384 loc) · 19.3 KB
/
install_docker.sh
File metadata and controls
executable file
·443 lines (384 loc) · 19.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
#!/bin/bash
set -e
# Purpose: End-user installation script for Morphik
# This script downloads docker-compose.run.yml, creates .env file, downloads morphik.toml,
# and starts all services. Creates start-morphik.sh for easy restarts with automatic
# port detection from morphik.toml configuration.
# Usage: curl -sSL https://install.morphik.ai | bash
# --- Configuration ---
REPO_URL="https://raw.githubusercontent.com/morphik-org/morphik-core/main"
COMPOSE_FILE="docker-compose.run.yml"
DIRECT_INSTALL_URL="https://www.morphik.ai/docs/getting-started#self-host-direct-installation-advanced"
EMBEDDING_PROVIDER=""
EMBEDDING_PROVIDER_LABEL=""
# --- Helper Functions ---
print_info() {
echo -e "\033[34m[INFO]\033[0m $1"
}
print_warning() {
echo -e "\033[33m[WARNING]\033[0m $1"
}
print_success() {
echo -e "\033[32m[SUCCESS]\033[0m $1"
}
print_error() {
echo -e "\033[31m[ERROR]\033[0m $1" >&2
exit 1
}
check_command() {
if ! command -v "$1" &> /dev/null; then
print_error "'$1' is not installed. Please install it to continue."
fi
}
# --- Main Script ---
# 1. Check for prerequisites
print_info "Checking for Docker and Docker Compose..."
check_command "docker"
if ! docker compose version &> /dev/null; then
print_error "Docker Compose V2 is required. Please ensure it's installed and accessible."
fi
print_success "Prerequisites are satisfied."
# 2. Apple Silicon Warning
if [[ "$(uname -s)" == "Darwin" ]] && [[ "$(uname -m)" == "arm64" ]]; then
print_warning "You are on an Apple Silicon Mac (arm64)."
print_warning "For best performance (including GPU access), we strongly recommend the Direct Installation method."
print_warning "You can find the guide here: $DIRECT_INSTALL_URL"
read -p "Do you want to continue with the Docker installation anyway? (y/N): " choice < /dev/tty
if [[ "$choice" != "y" && "$choice" != "Y" ]]; then
echo "Installation aborted by user."
exit 0
fi
fi
# 3. Download necessary files
print_info "Downloading the Docker Compose configuration file..."
if curl -fsSL -o "$COMPOSE_FILE" "$REPO_URL/$COMPOSE_FILE"; then
print_success "Downloaded '$COMPOSE_FILE'."
else
print_error "Failed to download '$COMPOSE_FILE'. Please check your internet connection and the repository URL."
fi
# 4. Create .env and get User Input for API Key
print_info "Creating '.env' file for your secrets..."
cat > .env <<EOF
# Your OpenAI API key (optional - you can configure other providers in morphik.toml)
OPENAI_API_KEY=
# A secret key for signing JWTs. A random one is generated for you.
JWT_SECRET_KEY=your-super-secret-key-that-is-long-and-random-$(openssl rand -hex 16)
# Local URI token for secure URI generation (required for creating connection URIs)
LOCAL_URI_TOKEN=
EOF
print_info "Morphik supports 100s of models including OpenAI, Anthropic (Claude), Google Gemini, local models, and even custom models!"
read -p "Please enter your OpenAI API Key (or press Enter to skip and configure later): " openai_api_key < /dev/tty
if [[ -z "$openai_api_key" ]]; then
print_warning "No OpenAI API key provided. You can add it later to .env or configure other providers in morphik.toml"
print_info "Embeddings power ingestion, search, and querying in Morphik. Please choose an alternative provider to continue."
while true; do
echo ""
echo "Select an embeddings provider:"
echo " 1) Lemonade (download at https://lemonade-server.ai/)"
echo " 2) Ollama (download at https://ollama.com/)"
read -p "Enter 1 or 2 [2]: " embedding_choice < /dev/tty
embedding_choice=${embedding_choice:-2}
case "$embedding_choice" in
1)
EMBEDDING_PROVIDER="lemonade_embedding"
EMBEDDING_PROVIDER_LABEL="Lemonade embeddings"
break
;;
2)
EMBEDDING_PROVIDER="ollama_embedding"
EMBEDDING_PROVIDER_LABEL="Ollama embeddings"
break
;;
*)
print_warning "Please enter 1 or 2."
;;
esac
done
print_info "Embeddings will be configured to use $EMBEDDING_PROVIDER_LABEL with 768 dimensions."
else
# Use sed to safely replace the key in the .env file.
sed -i.bak "s|OPENAI_API_KEY=|OPENAI_API_KEY=$openai_api_key|" .env
rm -f .env.bak
print_success "'.env' file has been configured with your API key."
fi
# 5. Download and setup configuration FIRST (before trying to modify it)
print_info "Setting up configuration file..."
# Pull the Docker image first if needed
print_info "Pulling Docker image if not already available..."
if ! docker pull ghcr.io/morphik-org/morphik-core:latest; then
print_error "Failed to pull Docker image 'ghcr.io/morphik-org/morphik-core:latest'"
print_info "Possible reasons:"
print_info " - The image hasn't been published to GitHub Container Registry yet"
print_info " - Network/firewall is blocking access to ghcr.io"
print_info " - Docker daemon is not running properly"
print_info ""
print_info "Attempting to download configuration from repository instead..."
# Try to download morphik.docker.toml (Docker-specific config) first
if curl -fsSL -o morphik.toml "$REPO_URL/morphik.docker.toml" 2>/dev/null; then
print_success "Downloaded Docker-specific configuration from repository."
elif curl -fsSL -o morphik.toml "$REPO_URL/morphik.toml" 2>/dev/null; then
print_warning "Downloaded standard morphik.toml (may need adjustments for Docker)."
else
print_error "Could not download configuration file. Installation cannot continue."
exit 1
fi
else
print_success "Docker image is available."
print_info "Extracting default 'morphik.toml' for you to customize..."
# Method 1: Try using docker run with output capture (more reliable on Windows)
CONFIG_CONTENT=$(docker run --rm ghcr.io/morphik-org/morphik-core:latest cat /app/morphik.toml.default 2>/dev/null)
if [ -n "$CONFIG_CONTENT" ]; then
echo "$CONFIG_CONTENT" > morphik.toml
if [ -f morphik.toml ] && [ -s morphik.toml ]; then
print_success "Extracted configuration from Docker image."
else
print_warning "Failed to write configuration file."
CONFIG_EXTRACTED=false
fi
else
CONFIG_EXTRACTED=false
fi
# Method 2: If Method 1 failed, try docker cp approach
if [ "$CONFIG_EXTRACTED" = "false" ] 2>/dev/null || [ ! -f morphik.toml ]; then
print_info "Trying alternative extraction method..."
TEMP_CONTAINER=$(docker create ghcr.io/morphik-org/morphik-core:latest)
if docker cp "$TEMP_CONTAINER:/app/morphik.toml.default" morphik.toml 2>/dev/null; then
docker rm "$TEMP_CONTAINER" >/dev/null 2>&1
print_success "Extracted configuration using docker cp."
else
docker rm "$TEMP_CONTAINER" >/dev/null 2>&1
print_warning "Could not extract morphik.toml from Docker image."
fi
fi
# Method 3: If still no file, download from repository
if [ ! -f morphik.toml ] || [ ! -s morphik.toml ]; then
print_info "Downloading from repository instead..."
# Try with curl first
print_info "Attempting to download: $REPO_URL/morphik.docker.toml"
if curl -fsSL "$REPO_URL/morphik.docker.toml" -o morphik.toml; then
if [ -f morphik.toml ] && [ -s morphik.toml ]; then
print_success "Downloaded Docker-specific configuration from repository."
else
rm -f morphik.toml
print_warning "Downloaded file was empty."
fi
fi
# If still no file, try the standard morphik.toml
if [ ! -f morphik.toml ] || [ ! -s morphik.toml ]; then
print_info "Attempting to download: $REPO_URL/morphik.toml"
if curl -fsSL "$REPO_URL/morphik.toml" -o morphik.toml; then
if [ -f morphik.toml ] && [ -s morphik.toml ]; then
print_warning "Downloaded standard morphik.toml (may need Docker adjustments)."
else
rm -f morphik.toml
print_error "Could not obtain a valid configuration file."
exit 1
fi
else
print_error "Could not download configuration file from repository."
print_info "Please check your internet connection and that the repository is accessible."
exit 1
fi
fi
fi
fi
# 5.0 Configure embeddings when OpenAI key is not provided
if [[ -n "$EMBEDDING_PROVIDER" ]]; then
if [ -f morphik.toml ]; then
print_info "Configuring morphik.toml to use $EMBEDDING_PROVIDER_LABEL (768 dimensions)..."
sed -i.bak \
-e "/^\\[embedding\\]/,/^\\[/ s/^[[:space:]]*model[[:space:]]*=.*/model = \"$EMBEDDING_PROVIDER\" # Reference to registered model/" \
-e "/^\\[embedding\\]/,/^\\[/ s/^[[:space:]]*dimensions[[:space:]]*=.*/dimensions = 768/" \
morphik.toml
rm -f morphik.toml.bak
else
print_warning "morphik.toml not found. Skipping embedding configuration update."
fi
fi
if [[ -n "$EMBEDDING_PROVIDER" && "$EMBEDDING_PROVIDER" == "lemonade_embedding" ]]; then
print_warning "Ensure the Lemonade SDK is installed and running (see Lemonade installation prompt later in this script)."
fi
# 5.0.5 Now that morphik.toml exists, handle LOCAL_URI_TOKEN configuration
echo ""
print_info "🔐 Setting up authentication for your Morphik deployment:"
print_info " • If you plan to access Morphik from outside this server, setting a LOCAL_URI_TOKEN will secure your deployment"
print_info " • For local-only access, you can skip this step (dev_mode will be enabled)"
print_info " • With a LOCAL_URI_TOKEN set, you'll need to use /generate_local_uri endpoint for authorization tokens"
echo ""
read -p "Please enter a secure LOCAL_URI_TOKEN (or press Enter to skip for local-only access): " local_uri_token < /dev/tty
if [[ -z "$local_uri_token" ]]; then
print_info "No LOCAL_URI_TOKEN provided - enabling development mode (dev_mode=true) for local access"
print_info "This is suitable for local development and testing"
# Enable dev_mode in morphik.toml (now that the file exists!)
if [ -f morphik.toml ]; then
sed -i.bak 's/dev_mode = false/dev_mode = true/' morphik.toml
rm -f morphik.toml.bak
else
print_warning "morphik.toml not found, cannot set dev_mode"
fi
else
print_success "LOCAL_URI_TOKEN set - keeping production mode (dev_mode=false) with authentication enabled"
print_info "Use the /generate_local_uri endpoint with this token to create authorized connection URIs"
fi
# Only update .env if a token was provided
if [[ -n "$local_uri_token" ]]; then
# Use sed to safely replace the token in the .env file.
sed -i.bak "s|LOCAL_URI_TOKEN=|LOCAL_URI_TOKEN=$local_uri_token|" .env
rm -f .env.bak
print_success "'.env' file has been configured with your LOCAL_URI_TOKEN."
fi
# 5.1 Ask about local inference with Lemonade (Windows only)
if grep -qEi "(Microsoft|WSL)" /proc/version &> /dev/null || [ -f /proc/sys/fs/binfmt_misc/WSLInterop ]; then
echo ""
print_info "🍋 Detected WSL environment. Morphik supports local inference with Lemonade SDK (Windows only)."
print_info " Lemonade provides high-performance local LLM inference with AMD GPU/NPU optimization."
print_info " This allows you to run both embeddings and completions completely locally."
echo ""
read -p "Would you like to install Lemonade SDK for local inference? (y/N): " install_lemonade < /dev/tty
if [[ "$install_lemonade" == "y" || "$install_lemonade" == "Y" ]]; then
print_info "Downloading Lemonade installer..."
# Download and run the Lemonade installer
if curl -fsSL -o lemonade-installer.sh "$REPO_URL/lemonade-installer.sh"; then
chmod +x lemonade-installer.sh
if ./lemonade-installer.sh; then
LEMONADE_INSTALLED=true
print_success "Lemonade installation completed!"
else
print_warning "Lemonade installation failed. You can retry later by running: ./lemonade-installer.sh"
fi
else
print_warning "Failed to download Lemonade installer"
print_info "You can manually install Lemonade later with:"
print_info " curl -sSL https://raw.githubusercontent.com/morphik-org/morphik-core/main/lemonade-installer.sh | bash"
fi
fi
fi
# 5.2 Ask about GPU availability for multimodal embeddings
echo ""
print_info "🚀 Morphik achieves ultra-accurate document understanding through advanced multimodal embeddings."
print_info " These embeddings excel at processing images, PDFs, and complex layouts."
print_info " While Morphik will work without a GPU, for best results we recommend using a GPU-enabled machine."
echo ""
read -p "Do you have a GPU available for Morphik to use? (y/N): " has_gpu < /dev/tty
if [[ "$has_gpu" != "y" && "$has_gpu" != "Y" ]]; then
print_warning "Disabling multimodal embeddings and reranking since no GPU is available."
print_info "Morphik will still work great with text-based embeddings!"
print_info "You can enable multimodal embeddings and reranking later if you add GPU support."
# Disable ColPali in morphik.toml
sed -i.bak 's/enable_colpali = true/enable_colpali = false/' morphik.toml
rm -f morphik.toml.bak
# Ensure reranking is disabled in morphik.toml
sed -i.bak 's/use_reranker = .*/use_reranker = false/' morphik.toml
rm -f morphik.toml.bak
print_success "Configuration updated for CPU-only operation."
else
print_success "Excellent! Multimodal embeddings will be enabled for maximum accuracy."
print_info "Make sure your Docker setup has GPU passthrough configured if using NVIDIA GPUs."
fi
print_info "Enabling configuration mounting in '$COMPOSE_FILE'..."
# Use sed to uncomment the volume mount lines for both services
sed -i.bak 's|# - ./morphik.toml:/app/morphik.toml:ro|- ./morphik.toml:/app/morphik.toml:ro|g' "$COMPOSE_FILE"
rm -f ${COMPOSE_FILE}.bak
print_success "Configuration has been set up at 'morphik.toml'."
print_info "You can edit this file to customize models, ports, or other settings."
read -p "Press [Enter] to continue with the current configuration or edit 'morphik.toml' in another terminal first..." < /dev/tty
# Update port mapping in docker-compose.run.yml to match morphik.toml
API_PORT=$(awk '/^\[api\]/{flag=1; next} /^\[/{flag=0} flag && /^port[[:space:]]*=/ {gsub(/^port[[:space:]]*=[[:space:]]*/, ""); print; exit}' morphik.toml 2>/dev/null || echo "8000")
sed -i.bak "s|\"8000:8000\"|\"${API_PORT}:${API_PORT}\"|g" "$COMPOSE_FILE"
rm -f ${COMPOSE_FILE}.bak
# 5.5. Ask about UI installation
echo ""
print_info "Morphik includes an admin UI for easier interaction."
read -p "Would you like to install the Admin UI? (y/N): " install_ui < /dev/tty
UI_PROFILE=""
if [[ "$install_ui" == "y" || "$install_ui" == "Y" ]]; then
print_info "Extracting UI component files from Docker image..."
# Extract the UI component from the Docker image (now included in the image)
docker run --rm ghcr.io/morphik-org/morphik-core:latest \
tar -czf - -C /app ee/ui-component | tar -xzf -
if [ -d "ee/ui-component" ]; then
print_success "UI component downloaded successfully."
UI_PROFILE="--profile ui"
# Update NEXT_PUBLIC_API_URL to use the correct port
sed -i.bak "s|NEXT_PUBLIC_API_URL=http://localhost:8000|NEXT_PUBLIC_API_URL=http://localhost:${API_PORT}|g" "$COMPOSE_FILE"
rm -f ${COMPOSE_FILE}.bak
# Save UI installation flag for start-morphik.sh
echo "UI_INSTALLED=true" >> .env
else
print_warning "Failed to download UI component. Continuing without UI."
fi
fi
# 6. Start the application
print_info "Starting the Morphik stack... This may take a few minutes for the first run."
docker compose -f "$COMPOSE_FILE" $UI_PROFILE up -d
print_success "🚀 Morphik has been started!"
print_info "📝 Check the logs for status - it can take a few minutes to fully load"
print_info "🔄 The URL will show 'unavailable' until the service is ready"
# Read port from morphik.toml to display correct URL
API_PORT=$(awk '/^\[api\]/{flag=1; next} /^\[/{flag=0} flag && /^port[[:space:]]*=/ {gsub(/^port[[:space:]]*=[[:space:]]*/, ""); print; exit}' morphik.toml 2>/dev/null || echo "8000")
echo ""
print_info "🌐 API endpoints:"
print_info " Health check: http://localhost:${API_PORT}/health"
print_info " API docs: http://localhost:${API_PORT}/docs"
print_info " Main API: http://localhost:${API_PORT}"
if [[ -n "$UI_PROFILE" ]]; then
echo ""
print_info "🎨 Admin UI:"
print_info " Interface: http://localhost:3003"
print_info " Note: The UI may take a few minutes to build on first run"
fi
echo ""
print_info "📋 Management commands:"
print_info " View logs: docker compose -f $COMPOSE_FILE $UI_PROFILE logs -f"
print_info " Stop services: docker compose -f $COMPOSE_FILE $UI_PROFILE down"
print_info " Restart: ./start-morphik.sh"
# Create convenience startup script
cat > start-morphik.sh << 'EOF'
#!/bin/bash
set -e
# Purpose: Production startup script for Morphik
# Automatically updates port mapping from morphik.toml and includes UI if installed
# Color functions
print_info() {
echo -e "\033[34m[INFO]\033[0m $1"
}
print_warning() {
echo -e "\033[33m[WARNING]\033[0m $1"
}
API_PORT=$(awk '/^\[api\]/{flag=1; next} /^\[/{flag=0} flag && /^port[[:space:]]*=/ {gsub(/^port[[:space:]]*=[[:space:]]*/, ""); print; exit}' morphik.toml 2>/dev/null || echo "8000")
CURRENT_PORT=$(grep -oE '"[0-9]+:[0-9]+"' docker-compose.run.yml | head -1 | cut -d: -f1 | tr -d '"')
if [ "$CURRENT_PORT" != "$API_PORT" ]; then
echo "Updating port mapping from $CURRENT_PORT to $API_PORT..."
sed -i.bak "s|\"${CURRENT_PORT}:${CURRENT_PORT}\"|\"${API_PORT}:${API_PORT}\"|g" docker-compose.run.yml
rm -f docker-compose.run.yml.bak
fi
# Check multimodal embeddings configuration
COLPALI_ENABLED=$(awk '/^\[morphik\]/{flag=1; next} /^\[/{flag=0} flag && /^enable_colpali[[:space:]]*=/ {gsub(/^enable_colpali[[:space:]]*=[[:space:]]*/, ""); print; exit}' morphik.toml 2>/dev/null || echo "true")
if [ "$COLPALI_ENABLED" = "false" ]; then
print_warning "Multimodal embeddings are disabled. For best results with images/PDFs, enable them in morphik.toml if you have a GPU."
fi
# Check if UI is installed
UI_PROFILE=""
if [ -f ".env" ] && grep -q "UI_INSTALLED=true" .env; then
UI_PROFILE="--profile ui"
fi
docker compose -f docker-compose.run.yml $UI_PROFILE up -d
echo "🚀 Morphik is running on http://localhost:${API_PORT}"
echo " Health: http://localhost:${API_PORT}/health"
echo " Docs: http://localhost:${API_PORT}/docs"
if [ -n "$UI_PROFILE" ]; then
echo ""
echo "🎨 Admin UI: http://localhost:3003"
fi
EOF
chmod +x start-morphik.sh
# Remind about Lemonade if installed
if [ "$LEMONADE_INSTALLED" = true ]; then
echo ""
print_info "🍋 Lemonade SDK has been installed! To use local inference:"
print_info " 1. Start Lemonade Server by double-clicking start_lemonade.bat on your Desktop"
print_info " 2. Select Lemonade models in the Morphik UI settings"
print_info " 3. Enjoy fully local embeddings and completions!"
fi
echo ""
print_success "🎉 Enjoy using Morphik!"