Skip to content

Commit 50ff71a

Browse files
committed
refactor(processing): move tmp_dir lifecycle management to CLI
Move temporary directory creation and cleanup from ExtractionConfig to CLI to make the lifecycle explicit and prevent unintended cleanup. The implicit atexit cleanup in ExtractionConfig could unexpectedly remove user-provided directories. This change moves that responsibility to the CLI layer where it's visible and under explicit control. Changes: - ExtractionConfig.tmp_dir defaults to system temp (no auto-cleanup) - Renamed tmp_dir() context manager to task_tmp_dir() - task_tmp_dir() creates and cleans up task-specific subdirectories - CLI explicitly creates unblob temp directory and manages cleanup - Tests updated for proper sandbox isolation Result: Library code has no implicit cleanup. CLI owns the lifecycle. Users can safely provide their own tmp_dir without cleanup surprises.
1 parent fa60c91 commit 50ff71a

File tree

4 files changed

+49
-22
lines changed

4 files changed

+49
-22
lines changed

python/unblob/cli.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#!/usr/bin/env python3
22
import atexit
3+
import shutil
34
import sys
5+
import tempfile
46
from collections.abc import Iterable
57
from importlib.metadata import version
68
from pathlib import Path
@@ -363,6 +365,9 @@ def cli(
363365
extra_magics_to_skip = () if clear_skip_magics else DEFAULT_SKIP_MAGIC
364366
skip_magic = tuple(sorted(set(skip_magic).union(extra_magics_to_skip)))
365367

368+
# Create dedicated unblob temp directory
369+
unblob_tmp_dir = Path(tempfile.mkdtemp(prefix="unblob-", dir=tempfile.gettempdir()))
370+
366371
config = ExtractionConfig(
367372
extract_root=extract_root,
368373
force_extract=force,
@@ -382,13 +387,25 @@ def cli(
382387
progress_reporter=NullProgressReporter
383388
if verbose
384389
else RichConsoleProgressReporter,
390+
tmp_dir=unblob_tmp_dir,
385391
)
386392

387393
logger.info("Creating extraction directory", extract_root=extract_root)
388394
extract_root.mkdir(parents=True, exist_ok=True)
389395
logger.info("Start processing file", file=file)
390396
sandbox = Sandbox(config, log_path, report_file)
391397
process_results = sandbox.run(process_file, config, file, report_file)
398+
399+
# Clean up the temp directory we created
400+
try:
401+
shutil.rmtree(unblob_tmp_dir)
402+
except Exception as e:
403+
logger.warning(
404+
"Failed to clean up tmp_dir",
405+
tmp_dir=unblob_tmp_dir,
406+
exc_info=e,
407+
)
408+
392409
if verbose == 0:
393410
if skip_extraction:
394411
print_scan_report(process_results)

python/unblob/processing.py

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import atexit
21
import multiprocessing
32
import os
43
import shutil
@@ -104,21 +103,7 @@ class ExtractionConfig:
104103
dir_handlers: DirectoryHandlers = BUILTIN_DIR_HANDLERS
105104
verbose: int = 1
106105
progress_reporter: type[ProgressReporter] = NullProgressReporter
107-
tmp_dir: Path = attrs.field(
108-
factory=lambda: Path(tempfile.mkdtemp(prefix="unblob-tmp-"))
109-
)
110-
111-
def __attrs_post_init__(self):
112-
atexit.register(self._cleanup_tmp_dir)
113-
114-
def _cleanup_tmp_dir(self):
115-
if isinstance(self.tmp_dir, Path) and self.tmp_dir.exists():
116-
try:
117-
shutil.rmtree(self.tmp_dir)
118-
except Exception as e:
119-
logger.warning(
120-
"Failed to clean up tmp_dir", tmp_dir=self.tmp_dir, exc_info=e
121-
)
106+
tmp_dir: Path = attrs.field(factory=lambda: Path(tempfile.gettempdir()))
122107

123108
def _get_output_path(self, path: Path) -> Path:
124109
"""Return path under extract root."""
@@ -247,22 +232,44 @@ def write_json_report(report_file: Path, process_result: ProcessResult):
247232

248233

249234
@contextmanager
250-
def tmp_dir(path):
251-
"""Context manager that temporarily sets all temp directory env vars."""
235+
def task_tmp_dir(parent_tmp_dir):
236+
"""Context manager that creates a task-specific temp subdirectory.
237+
238+
Creates a subdirectory under parent_tmp_dir, sets all temp env vars to it,
239+
yields the path, then cleans up the subdirectory on exit.
240+
241+
The parent_tmp_dir itself is NOT cleaned up - caller is responsible for that.
242+
"""
252243
tmp_vars = ("TMP", "TMPDIR", "TEMP", "TEMPDIR")
253244
saved = {}
245+
246+
# Create task-specific subdirectory
247+
task_temp = Path(tempfile.mkdtemp(dir=parent_tmp_dir, prefix="unblob-task-"))
248+
254249
try:
250+
# Override env vars to point to task subdirectory
255251
for var in tmp_vars:
256252
saved[var] = os.environ.get(var)
257-
os.environ[var] = str(path)
258-
yield
253+
os.environ[var] = str(task_temp)
254+
yield task_temp
259255
finally:
256+
# Restore original env vars
260257
for var, original in saved.items():
261258
if original is None:
262259
os.environ.pop(var, None)
263260
else:
264261
os.environ[var] = original
265262

263+
# Clean up the task subdirectory (NOT the parent)
264+
try:
265+
shutil.rmtree(task_temp)
266+
except Exception as e:
267+
logger.warning(
268+
"Failed to clean up task temp dir",
269+
task_temp=task_temp,
270+
exc_info=e,
271+
)
272+
266273

267274
class Processor:
268275
def __init__(self, config: ExtractionConfig):
@@ -281,7 +288,7 @@ def __init__(self, config: ExtractionConfig):
281288
def process_task(self, task: Task) -> TaskResult:
282289
result = TaskResult(task=task)
283290
try:
284-
with tmp_dir(self._config.tmp_dir.as_posix()):
291+
with task_tmp_dir(self._config.tmp_dir):
285292
self._process_task(result, task)
286293
except Exception as exc:
287294
self._process_error(result, exc)

tests/test_cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,8 @@ def test_archive_success(
273273
handlers=BUILTIN_HANDLERS,
274274
verbose=expected_verbosity,
275275
progress_reporter=expected_progress_reporter,
276+
tmp_dir=mock.ANY,
276277
)
277-
config.tmp_dir = mock.ANY
278278
process_file_mock.assert_called_once_with(config, in_path, None)
279279
logger_config_mock.assert_called_once_with(expected_verbosity, tmp_path, log_path)
280280

tests/test_sandbox.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ def extraction_config(extraction_config, tmp_path):
2121
extraction_config.extract_root = tmp_path / "extract" / "root"
2222
# parent has to exist
2323
extraction_config.extract_root.parent.mkdir()
24+
# Set tmp_dir to a specific directory under tmp_path to avoid conflicts
25+
extraction_config.tmp_dir = tmp_path / "unblob_tmp"
26+
extraction_config.tmp_dir.mkdir()
2427
return extraction_config
2528

2629

0 commit comments

Comments
 (0)