@@ -122,6 +122,30 @@ def container_from_docker_container(src: DockerContainer) -> Container:
122122 )
123123
124124
125+ async def _clean_scratch (
126+ loop : asyncio .AbstractEventLoop ,
127+ scratch_type : str ,
128+ scratch_root : Path ,
129+ kernel_id : KernelId ,
130+ ) -> None :
131+ scratch_dir = scratch_root / str (kernel_id )
132+ tmp_dir = scratch_root / f"{ kernel_id } _tmp"
133+ try :
134+ if sys .platform .startswith ("linux" ) and scratch_type == "memory" :
135+ await destroy_scratch_filesystem (scratch_dir )
136+ await destroy_scratch_filesystem (tmp_dir )
137+ await loop .run_in_executor (None , shutil .rmtree , scratch_dir )
138+ await loop .run_in_executor (None , shutil .rmtree , tmp_dir )
139+ elif sys .platform .startswith ("linux" ) and scratch_type == "hostfile" :
140+ await destroy_loop_filesystem (scratch_root , kernel_id )
141+ else :
142+ await loop .run_in_executor (None , shutil .rmtree , scratch_dir )
143+ except CalledProcessError :
144+ pass
145+ except FileNotFoundError :
146+ pass
147+
148+
125149def _DockerError_reduce (self ):
126150 return (
127151 type (self ),
@@ -853,6 +877,18 @@ async def start_container(
853877 if self .local_config ["debug" ]["log-kernel-config" ]:
854878 log .debug ("full container config: {!r}" , pretty (container_config ))
855879
880+ async def _rollback_container_creation () -> None :
881+ await _clean_scratch (
882+ loop ,
883+ self .local_config ["container" ]["scratch-type" ],
884+ self .local_config ["container" ]["scratch-root" ],
885+ self .kernel_id ,
886+ )
887+ self .port_pool .update (host_ports )
888+ async with self .resource_lock :
889+ for dev_name , device_alloc in resource_spec .allocations .items ():
890+ self .computers [dev_name ].alloc_map .free (device_alloc )
891+
856892 # We are all set! Create and start the container.
857893 async with closing_async (Docker ()) as docker :
858894 container : Optional [DockerContainer ] = None
@@ -884,21 +920,7 @@ async def start_container(
884920 raise
885921 except Exception as e :
886922 # Oops, we have to restore the allocated resources!
887- scratch_type = self .local_config ["container" ]["scratch-type" ]
888- scratch_root = self .local_config ["container" ]["scratch-root" ]
889- if sys .platform .startswith ("linux" ) and scratch_type == "memory" :
890- await destroy_scratch_filesystem (self .scratch_dir )
891- await destroy_scratch_filesystem (self .tmp_dir )
892- await loop .run_in_executor (None , shutil .rmtree , self .scratch_dir )
893- await loop .run_in_executor (None , shutil .rmtree , self .tmp_dir )
894- elif sys .platform .startswith ("linux" ) and scratch_type == "hostfile" :
895- await destroy_loop_filesystem (scratch_root , self .kernel_id )
896- else :
897- await loop .run_in_executor (None , shutil .rmtree , self .scratch_dir )
898- self .port_pool .update (host_ports )
899- async with self .resource_lock :
900- for dev_name , device_alloc in resource_spec .allocations .items ():
901- self .computers [dev_name ].alloc_map .free (device_alloc )
923+ await _rollback_container_creation ()
902924 if container is not None :
903925 raise ContainerCreationError (
904926 container_id = container ._id , message = f"unknown. { repr (e )} "
@@ -1513,24 +1535,12 @@ async def log_iter():
15131535 log .warning ("container deletion timeout (k:{}, c:{})" , kernel_id , container_id )
15141536
15151537 if not restarting :
1516- scratch_type = self .local_config ["container" ]["scratch-type" ]
1517- scratch_root = self .local_config ["container" ]["scratch-root" ]
1518- scratch_dir = scratch_root / str (kernel_id )
1519- tmp_dir = scratch_root / f"{ kernel_id } _tmp"
1520- try :
1521- if sys .platform .startswith ("linux" ) and scratch_type == "memory" :
1522- await destroy_scratch_filesystem (scratch_dir )
1523- await destroy_scratch_filesystem (tmp_dir )
1524- await loop .run_in_executor (None , shutil .rmtree , scratch_dir )
1525- await loop .run_in_executor (None , shutil .rmtree , tmp_dir )
1526- elif sys .platform .startswith ("linux" ) and scratch_type == "hostfile" :
1527- await destroy_loop_filesystem (scratch_root , kernel_id )
1528- else :
1529- await loop .run_in_executor (None , shutil .rmtree , scratch_dir )
1530- except CalledProcessError :
1531- pass
1532- except FileNotFoundError :
1533- pass
1538+ await _clean_scratch (
1539+ loop ,
1540+ self .local_config ["container" ]["scratch-type" ],
1541+ self .local_config ["container" ]["scratch-root" ],
1542+ kernel_id ,
1543+ )
15341544
15351545 async def create_local_network (self , network_name : str ) -> None :
15361546 async with closing_async (Docker ()) as docker :
0 commit comments