forked from ChrisTimperley/RepairChain
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathlitellm.local.yml
More file actions
160 lines (159 loc) · 4.79 KB
/
litellm.local.yml
File metadata and controls
160 lines (159 loc) · 4.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
---
model_list:
# OpenAI
- model_name: oai-gpt-4o
litellm_params:
model: openai/gpt-4o-2024-05-13
api_key: "os.environ/OPENAI_API_KEY"
tpm: 300000
rpm: 400
- model_name: oai-gpt-3.5-turbo
litellm_params:
model: openai/gpt-3.5-turbo-0125
api_key: "os.environ/OPENAI_API_KEY"
tpm: 80000
rpm: 800
- model_name: oai-gpt-4
litellm_params:
model: openai/gpt-4-0613
api_key: "os.environ/OPENAI_API_KEY"
tpm: 20000
rpm: 200
- model_name: oai-gpt-4-turbo
litellm_params:
model: openai/gpt-4-turbo-2024-04-09
api_key: "os.environ/OPENAI_API_KEY"
tpm: 60000
rpm: 400
- model_name: oai-text-embedding-3-large
litellm_params:
model: openai/text-embedding-3-large
api_key: os.environ/OPENAI_API_KEY
tpm: 200000
rpm: 500
model_info:
mode: embedding
base_model: text-embedding-3-large
- model_name: oai-text-embedding-3-small
litellm_params:
model: openai/text-embedding-3-small
api_key: os.environ/OPENAI_API_KEY
tpm: 200000
rpm: 500
model_info:
mode: embedding
base_model: text-embedding-3-small
# Anthropic/Claude
- model_name: claude-3-opus
litellm_params:
model: claude-3-opus-20240229
api_key: "os.environ/ANTHROPIC_API_KEY"
tpm: 40000
rpm: 1000
- model_name: claude-3-sonnet
litellm_params:
model: claude-3-sonnet-20240229
api_key: "os.environ/ANTHROPIC_API_KEY"
tpm: 80000
rpm: 1000
- model_name: claude-3.5-sonnet
litellm_params:
model: claude-3-5-sonnet-20240620
api_key: "os.environ/ANTHROPIC_API_KEY"
tpm: 80000
rpm: 1000
- model_name: claude-3-haiku
litellm_params:
model: claude-3-haiku-20240307
api_key: "os.environ/ANTHROPIC_API_KEY"
tpm: 100000
rpm: 1000
# Google/Vertex AI
- model_name: gemini-1.0-pro
litellm_params:
model: vertex_ai/gemini-1.0-pro-002
vertex_project: "os.environ/VERTEX_PROJECT"
vertex_location: "os.environ/VERTEX_REGION"
- model_name: gemini-1.5-pro
litellm_params:
model: vertex_ai/gemini-1.5-pro-preview-0514
vertex_project: "os.environ/VERTEX_PROJECT"
vertex_location: "os.environ/VERTEX_REGION"
- model_name: textembedding-gecko
litellm_params:
model: "textembedding-gecko@003"
vertex_project: "os.environ/VERTEX_PROJECT"
vertex_location: "os.environ/VERTEX_REGION"
# Microsoft/Azure
- model_name: azure-gpt-3.5-turbo
litellm_params:
model: azure/gpt-35-turbo # 0301
api_base: "os.environ/AZURE_API_BASE"
api_key: "os.environ/AZURE_API_KEY"
tpm: 89000
- model_name: azure-gpt-3.5-turbo-16k
litellm_params:
model: azure/gpt-35-turbo-16k # 0613
api_base: "os.environ/AZURE_API_BASE"
api_key: "os.environ/AZURE_API_KEY"
tpm: 1000
- model_name: azure-gpt-4o
litellm_params:
model: azure/gpt-4o # pinned in Azure deployment as 2024-05-13
api_base: "os.environ/AZURE_API_BASE"
api_key: "os.environ/AZURE_API_KEY"
tpm: 150000
- model_name: azure-text-embedding-3-large
litellm_params:
model: azure/text-embedding-3-large
api_base: "os.environ/AZURE_API_BASE"
api_key: "os.environ/AZURE_API_KEY"
tpm: 120000
rpm: 100
model_info:
mode: embedding
base_model: text-embedding-3-large
- model_name: azure-text-embedding-3-small
litellm_params:
model: azure/text-embedding-3-small
api_base: "os.environ/AZURE_API_BASE"
api_key: "os.environ/AZURE_API_KEY"
tpm: 120000
rpm: 100
model_info:
mode: embedding
base_model: text-embedding-3-small
# fake endpoints for load testing only.
# Do not address this during competition, it is useless!
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
- model_name: fake-openai-endpoint-rate-limits
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
tpm: 2000
rpm: 2
litellm_settings:
drop_params: true
max_budget: 100
budget_duration: 30d
num_retries: 10
request_timeout: 700
max_parallel_requests: 10000
general_settings:
master_key: sk-1234 # Leave this default value.
database_url: "os.environ/DATABASE_URL"
router_settings:
model_group_alias:
- azure-gpt-3.5-turbo: azure-gpt-3.5-turbo
- azure-gpt-3.5-turbo-16k: azure-gpt-3.5-turbo-16k
- azure-gpt-4o: azure-gpt-4o
- oai-gpt-3.5-turbo: oai-gpt-3.5-turbo-0125
- oai-gpt-3.5-turbo-16k: oai-gpt-3.5-turbo-16k
- oai-gpt-4: oai-gpt-4
- oai-gpt-4-turbo: oai-gpt-4-turbo
routing_strategy: "usage-based-routing-v2"