-
Notifications
You must be signed in to change notification settings - Fork 339
Expand file tree
/
Copy pathdmr.yaml
More file actions
executable file
·27 lines (24 loc) · 924 Bytes
/
dmr.yaml
File metadata and controls
executable file
·27 lines (24 loc) · 924 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/env docker agent run
agents:
root:
model: qwen
# model: qwen_speculative
description: "Pirate-themed AI assistant"
instruction: Talk like a pirate
commands:
demo: "Hey tell me a story about docker containers"
models:
qwen:
provider: dmr
model: ai/qwen3
# base_url defaults to http://localhost:12434/engines/llama.cpp/v1
# use http://model-runner.docker.internal/engines/v1 if you run docker-agent from a container
# try this model for faster inference if you have enough memory
qwen_speculative:
provider: dmr
model: ai/qwen3
# The draft model should be a smaller, faster variant of the main model with low latency
provider_opts:
speculative_draft_model: ai/qwen3:0.6B-Q4_K_M
speculative_num_tokens: 16 # (this is the llama.cpp default if omitted)
speculative_acceptance_rate: 0.8 # (this is the llama.cpp default if omitted)