@@ -1711,11 +1711,11 @@ async def _update_kernel() -> None:
17111711
17121712 await execute_with_retry (_update_kernel )
17131713
1714- async with self . agent_cache . rpc_context (
1715- agent_alloc_ctx . agent_id ,
1716- order_key = str ( scheduled_session . id ) ,
1717- ) as rpc :
1718- try :
1714+ try :
1715+ async with self . agent_cache . rpc_context (
1716+ agent_alloc_ctx . agent_id ,
1717+ order_key = str ( scheduled_session . id ),
1718+ ) as rpc :
17191719 get_image_ref = lambda k : image_infos [str (k .image_ref )].image_ref
17201720 # Issue a batched RPC call to create kernels on this agent
17211721 # created_infos = await rpc.call.create_kernels(
@@ -1783,42 +1783,44 @@ async def _update_kernel() -> None:
17831783 [binding .kernel .id for binding in items ],
17841784 agent_alloc_ctx .agent_id ,
17851785 )
1786- except (asyncio .TimeoutError , asyncio .CancelledError ):
1787- log .warning ("_create_kernels_in_one_agent(s:{}) cancelled" , scheduled_session .id )
1788- except Exception as e :
1789- # The agent has already cancelled or issued the destruction lifecycle event
1790- # for this batch of kernels.
1791- ex = e
1792- for binding in items :
1793- kernel_id = binding .kernel .id
1794-
1795- async def _update_failure () -> None :
1796- async with self .db .begin_session () as db_sess :
1797- now = datetime .now (tzutc ())
1798- query = (
1799- sa .update (KernelRow )
1800- .where (KernelRow .id == kernel_id )
1801- .values (
1802- status = KernelStatus .ERROR ,
1803- status_info = f"other-error ({ ex !r} )" ,
1804- status_changed = now ,
1805- terminated_at = now ,
1806- status_history = sql_json_merge (
1807- KernelRow .status_history ,
1808- (),
1809- {
1810- KernelStatus .ERROR .name : (
1811- now .isoformat ()
1812- ), # ["PULLING", "PREPARING"]
1813- },
1814- ),
1815- status_data = convert_to_status_data (ex , self .debug ),
1816- )
1786+ except (asyncio .TimeoutError , asyncio .CancelledError ):
1787+ log .warning ("_create_kernels_in_one_agent(s:{}) cancelled" , scheduled_session .id )
1788+ except Exception as e :
1789+ ex = e
1790+ err_info = convert_to_status_data (ex , self .debug )
1791+
1792+ # The agent has already cancelled or issued the destruction lifecycle event
1793+ # for this batch of kernels.
1794+ for binding in items :
1795+ kernel_id = binding .kernel .id
1796+
1797+ async def _update_failure () -> None :
1798+ async with self .db .begin_session () as db_sess :
1799+ now = datetime .now (tzutc ())
1800+ query = (
1801+ sa .update (KernelRow )
1802+ .where (KernelRow .id == kernel_id )
1803+ .values (
1804+ status = KernelStatus .ERROR ,
1805+ status_info = f"other-error ({ ex !r} )" ,
1806+ status_changed = now ,
1807+ terminated_at = now ,
1808+ status_history = sql_json_merge (
1809+ KernelRow .status_history ,
1810+ (),
1811+ {
1812+ KernelStatus .ERROR .name : (
1813+ now .isoformat ()
1814+ ), # ["PULLING", "PREPARING"]
1815+ },
1816+ ),
1817+ status_data = err_info ,
18171818 )
1818- await db_sess .execute (query )
1819+ )
1820+ await db_sess .execute (query )
18191821
1820- await execute_with_retry (_update_failure )
1821- raise
1822+ await execute_with_retry (_update_failure )
1823+ raise
18221824
18231825 async def create_cluster_ssh_keypair (self ) -> ClusterSSHKeyPair :
18241826 key = rsa .generate_private_key (
0 commit comments