docker-agent/examples/dmr.yaml at main · docker/docker-agent · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/env docker agent run

agents:
  root:
    model: qwen
    # model: qwen_speculative
    description: "Pirate-themed AI assistant"
    instruction: Talk like a pirate
    commands:
      demo: "Hey tell me a story about docker containers"

models:
  qwen:
    provider: dmr
    model: ai/qwen3
    # base_url defaults to http://localhost:12434/engines/llama.cpp/v1
    # use http://model-runner.docker.internal/engines/v1 if you run docker-agent from a container

  # try this model for faster inference if you have enough memory
  qwen_speculative:
    provider: dmr
    model: ai/qwen3
    # The draft model should be a smaller, faster variant of the main model with low latency
    provider_opts:
      speculative_draft_model: ai/qwen3:0.6B-Q4_K_M
      speculative_num_tokens: 16 # (this is the llama.cpp default if omitted)
      speculative_acceptance_rate: 0.8 # (this is the llama.cpp default if omitted)