1+ import atexit
12import multiprocessing
3+ import os
24import shutil
5+ import tempfile
36from collections .abc import Iterable , Sequence
7+ from contextlib import contextmanager
48from operator import attrgetter
59from pathlib import Path
610from typing import Optional , Union
@@ -100,6 +104,21 @@ class ExtractionConfig:
100104 dir_handlers : DirectoryHandlers = BUILTIN_DIR_HANDLERS
101105 verbose : int = 1
102106 progress_reporter : type [ProgressReporter ] = NullProgressReporter
107+ tmp_dir : Path = attrs .field (
108+ factory = lambda : Path (tempfile .mkdtemp (prefix = "unblob-tmp-" ))
109+ )
110+
111+ def __attrs_post_init__ (self ):
112+ atexit .register (self ._cleanup_tmp_dir )
113+
114+ def _cleanup_tmp_dir (self ):
115+ if isinstance (self .tmp_dir , Path ) and self .tmp_dir .exists ():
116+ try :
117+ shutil .rmtree (self .tmp_dir )
118+ except Exception as e :
119+ logger .warning (
120+ "Failed to clean up tmp_dir" , tmp_dir = self .tmp_dir , exc_info = e
121+ )
103122
104123 def _get_output_path (self , path : Path ) -> Path :
105124 """Return path under extract root."""
@@ -227,6 +246,24 @@ def write_json_report(report_file: Path, process_result: ProcessResult):
227246 logger .info ("JSON report written" , path = report_file )
228247
229248
249+ @contextmanager
250+ def tmp_dir (path ):
251+ """Context manager that temporarily sets all temp directory env vars."""
252+ tmp_vars = ("TMP" , "TMPDIR" , "TEMP" , "TEMPDIR" )
253+ saved = {}
254+ try :
255+ for var in tmp_vars :
256+ saved [var ] = os .environ .get (var )
257+ os .environ [var ] = str (path )
258+ yield
259+ finally :
260+ for var , original in saved .items ():
261+ if original is None :
262+ os .environ .pop (var , None )
263+ else :
264+ os .environ [var ] = original
265+
266+
230267class Processor :
231268 def __init__ (self , config : ExtractionConfig ):
232269 self ._config = config
@@ -244,7 +281,8 @@ def __init__(self, config: ExtractionConfig):
244281 def process_task (self , task : Task ) -> TaskResult :
245282 result = TaskResult (task = task )
246283 try :
247- self ._process_task (result , task )
284+ with tmp_dir (self ._config .tmp_dir .as_posix ()):
285+ self ._process_task (result , task )
248286 except Exception as exc :
249287 self ._process_error (result , exc )
250288 return result
0 commit comments