tf-leafcloud-opencode/terraform.tfvars.example at main · leafcloudhq/tf-leafcloud-opencode · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# =============================================================================
# Team OpenCode on Leafcloud - Configuration
# =============================================================================
# Copy this file to terraform.tfvars and customize the values
# =============================================================================

# =============================================================================
# OpenStack Authentication
# =============================================================================

# Name of the cloud in your ~/.config/openstack/clouds.yaml
# Default is "openstack" which is what Leafcloud uses when you download clouds.yaml
openstack_cloud = "openstack"

# Prefix for all resources (optional, defaults to your OpenStack username)
prefix = ""

# =============================================================================
# Security - IMPORTANT!
# =============================================================================

# Restrict access to your office/VPN IP range for security
# Example: "203.0.113.0/24" or "203.0.113.42/32" for a single IP
vm_cidr_whitelist = "0.0.0.0/0"

# =============================================================================
# GPU Instance Configuration
# =============================================================================

# Available Leafcloud GPU flavors:
#   eg1.a100x1.V12-84   - 1x A100 80GB, 12 vCPUs, 84GB RAM  (€1.61/hr)
#   eg1.a100x2.V25-164  - 2x A100 80GB, 25 vCPUs, 164GB RAM (€3.20/hr)
#   eg1.a100x4.V50-324  - 4x A100 80GB, 50 vCPUs, 324GB RAM (€6.43/hr)
#   eg1.a30x1.V8-32     - 1x A30 24GB, 8 vCPUs, 32GB RAM    (cheaper, smaller models)
vm_flavor_name = "eg1.a100x1.V12-84"

# Additional storage for model cache (GB) - models can be large!
vm_additional_storage = 200

# =============================================================================
# vLLM Model Configuration
# =============================================================================

# HuggingFace model to serve
# Recommended models for coding:
#   Qwen/Qwen3-Coder-30B-A3B-Instruct  - Best for agentic coding (default)
#   Qwen/Qwen2.5-Coder-14B-Instruct    - Smaller, faster option
#   deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct - Alternative
vllm_model = "Qwen/Qwen3-Coder-30B-A3B-Instruct"

# HuggingFace token for downloading gated models
# Get yours at: https://huggingface.co/settings/tokens
# Required for some models like Llama, optional for Qwen
huggingface_token = ""

# Custom API key (leave empty to auto-generate a secure key)
vllm_api_key = ""

# Maximum context length (reduce if you get OOM errors)
vllm_max_model_len = 32768

# GPU memory utilization (0.0-1.0)
vllm_gpu_memory_utilization = 0.9