|
13 | 13 | from typing import Union, Any, Optional, Dict, List, Tuple |
14 | 14 |
|
15 | 15 | from mmif import Mmif, Document, DocumentTypes, View |
| 16 | +from mmif.utils.cli.describe import generate_param_hash # pytype: disable=import-error |
16 | 17 | from clams.appmetadata import AppMetadata, real_valued_primitives, python_type, map_param_kv_delimiter |
17 | 18 |
|
18 | 19 | logging.basicConfig( |
19 | | - level=logging.WARNING, |
| 20 | + level=getattr(logging, os.environ.get('CLAMS_LOGLEVEL', 'WARNING').upper(), logging.WARNING), |
20 | 21 | format="%(asctime)s %(name)s %(levelname)-8s %(thread)d %(message)s", |
21 | 22 | datefmt="%Y-%m-%d %H:%M:%S") |
22 | 23 |
|
@@ -47,7 +48,7 @@ class ClamsApp(ABC): |
47 | 48 | 'description': 'The JSON body of the HTTP response will be re-formatted with 2-space indentation', |
48 | 49 | }, |
49 | 50 | { |
50 | | - 'name': 'runningTime', 'type': 'boolean', 'choices': None, 'default': False, 'multivalued': False, |
| 51 | + 'name': 'runningTime', 'type': 'boolean', 'choices': None, 'default': True, 'multivalued': False, |
51 | 52 | 'description': 'The running time of the app will be recorded in the view metadata', |
52 | 53 | }, |
53 | 54 | { |
@@ -160,20 +161,19 @@ def annotate(self, mmif: Union[str, dict, Mmif], **runtime_params: List[str]) -> |
160 | 161 | hwFetch = refined.get('hwFetch', False) |
161 | 162 | runtime_recs = {} |
162 | 163 | if hwFetch: |
| 164 | + import multiprocessing |
163 | 165 | import platform, shutil, subprocess |
164 | | - runtime_recs['architecture'] = platform.machine() |
165 | | - # runtime_recs['processor'] = platform.processor() # this only works on Windows |
| 166 | + runtime_recs['cpu'] = f"{platform.machine()}, {multiprocessing.cpu_count()} cores" |
166 | 167 | runtime_recs['cuda'] = [] |
167 | 168 | # Use cuda_profiler data if available, otherwise fallback to nvidia-smi |
168 | 169 | if cuda_profiler: |
169 | | - for gpu_info, peak_memory_bytes in cuda_profiler.items(): |
170 | | - # Convert peak memory to human-readable format |
171 | | - peak_memory_mb = peak_memory_bytes / (1000 * 1000) |
172 | | - if peak_memory_mb >= 1000: |
173 | | - peak_memory_str = f"{peak_memory_mb / 1000:.2f} GiB" |
174 | | - else: |
175 | | - peak_memory_str = f"{peak_memory_mb:.1f} MiB" |
176 | | - runtime_recs['cuda'].append(f"{gpu_info}, Used {self._cuda_memory_to_str(peak_memory_bytes)}") |
| 170 | + for gpu_name, mem_info in cuda_profiler.items(): |
| 171 | + total_str = self._cuda_memory_to_str(mem_info['total']) |
| 172 | + available_str = self._cuda_memory_to_str(mem_info['available_before']) |
| 173 | + peak_str = self._cuda_memory_to_str(mem_info['peak']) |
| 174 | + runtime_recs['cuda'].append( |
| 175 | + f"{gpu_name}, {total_str} total, {available_str} available, {peak_str} peak used" |
| 176 | + ) |
177 | 177 | elif shutil.which('nvidia-smi'): |
178 | 178 | for gpu in subprocess.run(['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader'], |
179 | 179 | stdout=subprocess.PIPE).stdout.decode('utf-8').strip().split('\n'): |
@@ -348,50 +348,224 @@ def _cuda_device_name_concat(name, mem): |
348 | 348 | mem = ClamsApp._cuda_memory_to_str(mem) |
349 | 349 | return f"{name}, With {mem}" |
350 | 350 |
|
| 351 | + def _get_profile_path(self, param_hash: str) -> pathlib.Path: |
| 352 | + """ |
| 353 | + Get filesystem path for memory profile file. |
| 354 | +
|
| 355 | + Profile files are stored in a per-app directory under user's cache. |
| 356 | +
|
| 357 | + :param param_hash: Hash of parameters from :func:`mmif.utils.cli.describe.generate_param_hash` |
| 358 | + :return: Path to the profile file |
| 359 | + """ |
| 360 | + # Sanitize app identifier for filesystem use |
| 361 | + app_id = self.metadata.identifier.replace('/', '-').replace(':', '-') |
| 362 | + cache_base = pathlib.Path(os.environ.get('XDG_CACHE_HOME', pathlib.Path.home() / '.cache')) |
| 363 | + cache_dir = cache_base / 'clams' / 'memory_profiles' / app_id |
| 364 | + return cache_dir / f"memory_{param_hash}.json" |
| 365 | + |
| 366 | + @staticmethod |
| 367 | + def _get_available_vram() -> int: |
| 368 | + """ |
| 369 | + Get currently available VRAM in bytes (GPU-wide, across all processes). |
| 370 | +
|
| 371 | + Uses nvidia-smi to get actual available memory, not just current process. |
| 372 | +
|
| 373 | + :return: Available VRAM in bytes, or 0 if unavailable |
| 374 | + """ |
| 375 | + try: |
| 376 | + import subprocess |
| 377 | + import shutil |
| 378 | + if shutil.which('nvidia-smi'): |
| 379 | + # Get free memory from nvidia-smi (reports GPU-wide, not per-process) |
| 380 | + result = subprocess.run( |
| 381 | + ['nvidia-smi', '--query-gpu=memory.free', '--format=csv,noheader,nounits', '-i', '0'], |
| 382 | + capture_output=True, text=True, timeout=5 |
| 383 | + ) |
| 384 | + if result.returncode == 0 and result.stdout.strip(): |
| 385 | + free_mb = float(result.stdout.strip()) |
| 386 | + return int(free_mb * 1024 * 1024) # Convert MB to bytes |
| 387 | + except Exception: |
| 388 | + pass |
| 389 | + |
| 390 | + # Fallback to torch (only sees current process memory) |
| 391 | + try: |
| 392 | + import torch # pytype: disable=import-error |
| 393 | + if not torch.cuda.is_available(): |
| 394 | + return 0 |
| 395 | + |
| 396 | + device = torch.cuda.current_device() |
| 397 | + total = torch.cuda.get_device_properties(device).total_memory |
| 398 | + used = max(torch.cuda.memory_allocated(device), |
| 399 | + torch.cuda.memory_reserved(device)) |
| 400 | + return total - used |
| 401 | + except Exception: |
| 402 | + return 0 |
| 403 | + |
| 404 | + def _record_vram_usage(self, parameters: dict, peak_bytes: int) -> None: |
| 405 | + """ |
| 406 | + Record peak memory usage to profile file. |
| 407 | +
|
| 408 | + Uses atomic write (temp + rename) to avoid corruption from |
| 409 | + concurrent writes. Only updates if new value is higher. |
| 410 | +
|
| 411 | + Profile files are JSON containing: |
| 412 | + - peak_bytes: Peak VRAM usage by the torch process |
| 413 | + - parameters: Original parameters for human readability |
| 414 | +
|
| 415 | + :param parameters: Request parameters (for hash and recording) |
| 416 | + :param peak_bytes: Measured peak VRAM usage |
| 417 | + """ |
| 418 | + import json |
| 419 | + |
| 420 | + if peak_bytes <= 0: |
| 421 | + return |
| 422 | + |
| 423 | + param_hash = generate_param_hash(parameters) |
| 424 | + profile_path = self._get_profile_path(param_hash) |
| 425 | + |
| 426 | + try: |
| 427 | + profile_path.parent.mkdir(parents=True, exist_ok=True) |
| 428 | + |
| 429 | + # Check if we should update |
| 430 | + should_write = True |
| 431 | + if profile_path.exists(): |
| 432 | + try: |
| 433 | + existing_data = json.loads(profile_path.read_text()) |
| 434 | + existing = existing_data.get('peak_bytes', 0) |
| 435 | + if peak_bytes <= existing: |
| 436 | + should_write = False # Existing value is sufficient |
| 437 | + else: |
| 438 | + self.logger.debug( |
| 439 | + f"Updating peak memory for {param_hash}: " |
| 440 | + f"{existing/1024**3:.2f}GB -> {peak_bytes/1024**3:.2f}GB" |
| 441 | + ) |
| 442 | + except (ValueError, IOError, json.JSONDecodeError): |
| 443 | + pass # Corrupted file, overwrite |
| 444 | + |
| 445 | + if should_write: |
| 446 | + # Prepare profile data with original parameters for readability |
| 447 | + # Filter out internal keys and non-serializable values |
| 448 | + clean_params = { |
| 449 | + k: v for k, v in parameters.items() |
| 450 | + if k != self._RAW_PARAMS_KEY and not k.startswith('#') |
| 451 | + } |
| 452 | + profile_data = { |
| 453 | + 'peak_bytes': peak_bytes, |
| 454 | + 'parameters': clean_params |
| 455 | + } |
| 456 | + |
| 457 | + # Atomic write: write to temp, then rename |
| 458 | + temp_path = profile_path.with_suffix('.tmp') |
| 459 | + temp_path.write_text(json.dumps(profile_data, indent=2)) |
| 460 | + temp_path.rename(profile_path) # Atomic on POSIX |
| 461 | + |
| 462 | + self.logger.info( |
| 463 | + f"Recorded peak memory for {param_hash}: " |
| 464 | + f"{peak_bytes/1024**3:.2f}GB" |
| 465 | + ) |
| 466 | + except Exception as e: |
| 467 | + self.logger.warning(f"Failed to record memory profile: {e}") |
| 468 | + |
351 | 469 | @staticmethod |
352 | 470 | def _profile_cuda_memory(func): |
353 | 471 | """ |
354 | | - Decorator for profiling CUDA memory usage during _annotate execution. |
355 | | - |
| 472 | + Decorator for profiling CUDA memory usage and managing VRAM availability. |
| 473 | +
|
| 474 | + This decorator: |
| 475 | + 1. Checks VRAM requirements before execution (if conditions met) |
| 476 | + 2. Rejects requests if insufficient VRAM |
| 477 | + 3. Records peak memory usage after execution |
| 478 | + 4. Calls empty_cache() for cleanup |
| 479 | +
|
356 | 480 | :param func: The function to wrap (typically _annotate) |
357 | 481 | :return: Decorated function that returns (result, cuda_profiler) |
358 | 482 | where cuda_profiler is dict with "<GPU_NAME>, <GPU_TOTAL_MEMORY>" keys |
359 | | - and peak memory usage values |
| 483 | + and dict values containing 'available_before' and 'peak' memory in bytes |
360 | 484 | """ |
361 | 485 | def wrapper(*args, **kwargs): |
| 486 | + # Get the ClamsApp instance from the bound method |
| 487 | + app_instance = getattr(func, '__self__', None) |
| 488 | + |
362 | 489 | cuda_profiler = {} |
363 | 490 | torch_available = False |
364 | 491 | cuda_available = False |
365 | 492 | device_count = 0 |
366 | | - |
| 493 | + available_before = {} |
| 494 | + |
367 | 495 | try: |
368 | 496 | import torch # pytype: disable=import-error |
369 | 497 | torch_available = True |
370 | 498 | cuda_available = torch.cuda.is_available() |
371 | 499 | device_count = torch.cuda.device_count() |
372 | | - if cuda_available: |
373 | | - # Reset peak memory stats for all devices |
374 | | - torch.cuda.reset_peak_memory_stats('cuda') |
375 | 500 | except ImportError: |
376 | 501 | pass |
377 | | - |
| 502 | + |
| 503 | + # Capture available VRAM before execution and reset stats |
| 504 | + if torch_available and cuda_available: |
| 505 | + for device_id in range(device_count): |
| 506 | + device_id_str = f'cuda:{device_id}' |
| 507 | + # Get GPU-wide available memory via nvidia-smi |
| 508 | + try: |
| 509 | + import subprocess |
| 510 | + import shutil |
| 511 | + if shutil.which('nvidia-smi'): |
| 512 | + result = subprocess.run( |
| 513 | + ['nvidia-smi', '--query-gpu=memory.free', |
| 514 | + '--format=csv,noheader,nounits', '-i', str(device_id)], |
| 515 | + capture_output=True, text=True, timeout=5 |
| 516 | + ) |
| 517 | + if result.returncode == 0 and result.stdout.strip(): |
| 518 | + free_mb = float(result.stdout.strip()) |
| 519 | + available_before[device_id] = int(free_mb * 1024 * 1024) |
| 520 | + else: |
| 521 | + # Fallback to torch (process-specific) |
| 522 | + total = torch.cuda.get_device_properties(device_id_str).total_memory |
| 523 | + allocated = torch.cuda.memory_allocated(device_id_str) |
| 524 | + available_before[device_id] = total - allocated |
| 525 | + else: |
| 526 | + # Fallback to torch (process-specific) |
| 527 | + total = torch.cuda.get_device_properties(device_id_str).total_memory |
| 528 | + allocated = torch.cuda.memory_allocated(device_id_str) |
| 529 | + available_before[device_id] = total - allocated |
| 530 | + except Exception: |
| 531 | + # Fallback to torch (process-specific) |
| 532 | + total = torch.cuda.get_device_properties(device_id_str).total_memory |
| 533 | + allocated = torch.cuda.memory_allocated(device_id_str) |
| 534 | + available_before[device_id] = total - allocated |
| 535 | + # Reset peak memory stats for all devices |
| 536 | + torch.cuda.reset_peak_memory_stats('cuda') |
| 537 | + |
378 | 538 | try: |
379 | 539 | result = func(*args, **kwargs) |
380 | | - |
| 540 | + |
| 541 | + # Record peak memory usage |
| 542 | + total_peak = 0 |
381 | 543 | if torch_available and cuda_available and device_count > 0: |
382 | 544 | for device_id in range(device_count): |
383 | | - device_id = f'cuda:{device_id}' |
384 | | - peak_memory = torch.cuda.max_memory_allocated(device_id) |
385 | | - gpu_name = torch.cuda.get_device_name(device_id) |
386 | | - gpu_total_memory = torch.cuda.get_device_properties(device_id).total_memory |
387 | | - key = ClamsApp._cuda_device_name_concat(gpu_name, gpu_total_memory) |
388 | | - cuda_profiler[key] = peak_memory |
389 | | - |
| 545 | + device_id_str = f'cuda:{device_id}' |
| 546 | + peak_memory = torch.cuda.max_memory_allocated(device_id_str) |
| 547 | + total_peak = max(total_peak, peak_memory) |
| 548 | + gpu_name = torch.cuda.get_device_name(device_id_str) |
| 549 | + gpu_total_memory = torch.cuda.get_device_properties(device_id_str).total_memory |
| 550 | + cuda_profiler[gpu_name] = { |
| 551 | + 'total': gpu_total_memory, |
| 552 | + 'available_before': available_before.get(device_id, 0), |
| 553 | + 'peak': peak_memory |
| 554 | + } |
| 555 | + |
| 556 | + # Record peak memory for future requests (if GPU app) |
| 557 | + gpu_app = ( |
| 558 | + hasattr(app_instance, 'metadata') and |
| 559 | + getattr(app_instance.metadata, 'est_gpu_mem_min', 0) > 0 |
| 560 | + ) |
| 561 | + if gpu_app and total_peak > 0: |
| 562 | + app_instance._record_vram_usage(kwargs, total_peak) |
| 563 | + |
390 | 564 | return result, cuda_profiler |
391 | 565 | finally: |
392 | 566 | if torch_available and cuda_available: |
393 | 567 | torch.cuda.empty_cache() |
394 | | - |
| 568 | + |
395 | 569 | return wrapper |
396 | 570 |
|
397 | 571 | @staticmethod |
|
0 commit comments