66import time
77from typing import List
88from openai .types .chat import ChatCompletion
9+ from openai import RateLimitError as OpenAIRateLimitError
910from azure .ai .inference import ChatCompletionsClient
1011from azure .ai .inference .models import ChatCompletions
1112from anthropic import APIStatusError as AnthropicAPIStatusError
@@ -101,7 +102,7 @@ def call_llm(
101102 stop = None
102103 ) -> ChatCompletion :
103104
104- for _ in range (retry_num ):
105+ for attempt in range (retry_num ):
105106 try :
106107 if "o1" in model :
107108 messages [0 ]['role' ] = 'user'
@@ -110,19 +111,31 @@ def call_llm(
110111 response = client .create (messages = messages , model = model )
111112 elif "gpt-5" in model :
112113 #by default running at reasoning level high
113- response = client .create (messages = messages , model = model , temperature = temperature , stop = stop , reasoning_effort = "minimal " )
114+ response = client .create (messages = messages , model = model , temperature = temperature , stop = stop , reasoning_effort = "medium " )
114115 elif "claude" in model :
115116 try :
116117 response = client .create (messages = messages , model = model , temperature = temperature , stop = stop , thinking = {"type" : "enabled" ,"budget_tokens" : 10000 })
117118 except AnthropicAPIStatusError as e :
118- if _ < retry_num - 1 : # Don't sleep on the last retry
119- time .sleep (retry_wait_time )
119+ if attempt < retry_num - 1 : # Don't sleep on the last retry
120+ wait_time = retry_wait_time * (2 ** attempt ) # Exponential backoff
121+ time .sleep (wait_time )
120122 continue
121123 else :
122124 raise # Re-raise on the last attempt
123125 else :
124126 response = client .create (messages = messages , model = model , temperature = temperature , stop = stop )
125127 break
128+ except OpenAIRateLimitError as e :
129+ if attempt < retry_num - 1 : # Don't sleep on the last retry
130+ wait_time = retry_wait_time * (2 ** attempt ) # Exponential backoff
131+ print (f"Rate limit error encountered. Retrying in { wait_time } seconds... (Attempt { attempt + 1 } /{ retry_num } )" )
132+ time .sleep (wait_time )
133+ continue
134+ else :
135+ raise # Re-raise on the last attempt
126136 except TimeoutError as e :
127- time .sleep (retry_wait_time )
137+ if attempt < retry_num - 1 :
138+ time .sleep (retry_wait_time )
139+ else :
140+ raise
128141 return response
0 commit comments