Skip to content

Commit de59024

Browse files
authored
[GCP Functions] Retry function creation on failure to improve reliability (#1437)
* [GCP Functions] Retry function creation on failure to improve reliability
1 parent e2b61b8 commit de59024

6 files changed

Lines changed: 18 additions & 22 deletions

File tree

lithops/job/job.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,6 @@ def _create_job(
177177
"""
178178
Creates a new Job
179179
"""
180-
global FUNCTION_CACHE
181-
182180
ext_env = {} if extra_env is None else extra_env.copy()
183181
if ext_env:
184182
ext_env = utils.convert_bools_to_string(ext_env)

lithops/serverless/backends/gcp_functions/config.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,14 @@
2929
RUNTIME_MEMORY_MAX = 8192 # 8GB
3030
RUNTIME_MEMORY_OPTIONS = {128, 256, 512, 1024, 2048, 4096, 8192}
3131

32-
RETRIES = 5
33-
RETRY_SLEEP = 20
34-
3532
AVAILABLE_PY_RUNTIMES = {
3633
'3.7': 'python37',
3734
'3.8': 'python38',
3835
'3.9': 'python39',
3936
'3.10': 'python310',
4037
'3.11': 'python311',
41-
'3.12': 'python312'
38+
'3.12': 'python312',
39+
'3.13': 'python313'
4240
}
4341

4442
USER_RUNTIMES_PREFIX = 'lithops.user_runtimes'
@@ -49,7 +47,9 @@
4947
'max_workers': 1000,
5048
'worker_processes': 1,
5149
'invoke_pool_threads': 1000,
52-
'trigger': 'pub/sub'
50+
'trigger': 'pub/sub',
51+
'retries': 5,
52+
'retry_sleep': 10
5353
}
5454

5555
REQUIREMENTS_FILE = """
@@ -103,8 +103,5 @@ def load_config(config_data=None):
103103
if config_data['gcp_functions']['runtime_memory'] > RUNTIME_MEMORY_MAX:
104104
config_data['gcp_functions']['runtime_memory'] = RUNTIME_MEMORY_MAX
105105

106-
config_data['gcp_functions']['retries'] = RETRIES
107-
config_data['gcp_functions']['retry_sleep'] = RETRY_SLEEP
108-
109106
if 'region' not in config_data['gcp']:
110107
config_data['gcp']['region'] = config_data['gcp_functions']['region']

lithops/serverless/backends/gcp_functions/gcp_functions.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -244,10 +244,19 @@ def _create_function(self, runtime_name, memory, timeout=60):
244244
'failurePolicy': {}
245245
}
246246

247-
operation = self._api_resource.projects().locations().functions().create(
248-
location=self._default_location,
249-
body=cloud_function
250-
).execute(num_retries=self.num_retries)
247+
logger.info(f'Deploying function {function_location}')
248+
for attempt in range(self.num_retries):
249+
try:
250+
operation = self._api_resource.projects().locations().functions().create(
251+
location=self._default_location,
252+
body=cloud_function
253+
).execute()
254+
break
255+
except Exception as e:
256+
if attempt < self.num_retries - 1:
257+
time.sleep(self.retry_sleep)
258+
else:
259+
raise Exception(f"Failed to create Cloud Function after {self.num_retries} attempts.") from e
251260

252261
# Wait until the function is completely deployed
253262
logger.info('Waiting for the function to be deployed')

lithops/serverless/backends/k8s/entry_point.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,6 @@
4444

4545
@proxy.route('/get-range/<jobkey>/<total_calls>/<chunksize>', methods=['GET'])
4646
def get_range(jobkey, total_calls, chunksize):
47-
global JOB_INDEXES
48-
4947
range_start = 0 if jobkey not in JOB_INDEXES else JOB_INDEXES[jobkey]
5048
range_end = min(range_start + int(chunksize), int(total_calls))
5149
JOB_INDEXES[jobkey] = range_end

lithops/standalone/worker.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,6 @@ def notify_task_done(job_key, call_id):
146146

147147

148148
def redis_queue_consumer(pid, work_queue_name, exec_mode, backend):
149-
global worker_threads
150-
151149
worker_threads[pid]['status'] = WorkerStatus.IDLE.value
152150

153151
logger.info(f"Redis consumer process {pid} started")
@@ -213,7 +211,6 @@ def redis_queue_consumer(pid, work_queue_name, exec_mode, backend):
213211
def run_worker():
214212
global redis_client
215213
global budget_keeper
216-
global worker_threads
217214

218215
os.makedirs(LITHOPS_TEMP_DIR, exist_ok=True)
219216

lithops/storage/storage.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -462,9 +462,6 @@ def get_runtime_meta(self, key):
462462
:param runtime: name of the runtime
463463
:return: runtime metadata
464464
"""
465-
466-
global RUNTIME_META_CACHE
467-
468465
path = [RUNTIMES_PREFIX, key + ".meta.json"]
469466
filename_local_path = os.path.join(CACHE_DIR, *path)
470467

0 commit comments

Comments
 (0)