-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgpu_utils.py
More file actions
118 lines (98 loc) · 4.7 KB
/
gpu_utils.py
File metadata and controls
118 lines (98 loc) · 4.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
"""gpu_utils.py - GPU detection, device management, and tensor backend for
full-simulation GPU acceleration.
Detects CUDA (NVIDIA), MPS (Apple Silicon), or falls back to CPU.
Provides:
- get_device() — best available torch device
- gpu_info() — hardware details for dashboard
- TensorBackend — GPU-backed grid operations (diffusion, decay, clip)
that replace scipy/numpy with torch.nn.functional equivalents so the
entire environment step runs on GPU when available.
References:
PyTorch device management: https://pytorch.org/docs/stable/tensor_attributes.html
"""
from __future__ import annotations
import numpy as np
import torch
import torch.nn.functional as F
def get_device(force_cpu: bool = False) -> torch.device:
"""Return the best available compute device."""
if force_cpu:
return torch.device("cpu")
if torch.cuda.is_available():
return torch.device("cuda")
if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
return torch.device("mps")
return torch.device("cpu")
def gpu_info() -> dict:
"""Return GPU hardware details for the dashboard UI."""
info = {
"cuda_available": torch.cuda.is_available(),
"mps_available": (
hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
),
"device": str(get_device()),
"gpu_name": None,
"gpu_memory_mb": None,
"torch_version": torch.__version__,
}
if torch.cuda.is_available():
info["gpu_name"] = torch.cuda.get_device_name(0)
props = torch.cuda.get_device_properties(0)
info["gpu_memory_mb"] = round(props.total_memory / 1e6)
return info
# ─── TensorBackend — GPU-accelerated grid primitives ─────────
class TensorBackend:
"""Wraps numpy ↔ torch for grid operations. When a CUDA/MPS device is
available every grid lives as a contiguous float32 tensor on-device;
diffusion uses torch conv2d (reflect padding) instead of scipy
uniform_filter, giving 10–50× speedup on 200×200 grids."""
def __init__(self, force_cpu: bool = False):
self.device: torch.device = get_device(force_cpu)
self.is_gpu: bool = self.device.type in ("cuda", "mps")
# 3×3 mean kernel for Fick's-law diffusion (constant, never changes)
k = torch.ones(1, 1, 3, 3, dtype=torch.float32) / 9.0
self._kernel = k.to(self.device)
# ── numpy → tensor ──
def from_numpy(self, arr: np.ndarray) -> torch.Tensor:
return torch.from_numpy(arr.astype(np.float32)).to(self.device)
# ── tensor → numpy ──
@staticmethod
def to_numpy(t: torch.Tensor) -> np.ndarray:
return t.detach().cpu().numpy().astype(np.float64)
# ── Diffusion via conv2d (Neumann/reflect BC) ──
def diffuse(self, grid: torch.Tensor, rate: float) -> torch.Tensor:
"""grid: (H, W) float32 tensor on self.device."""
g4 = grid.unsqueeze(0).unsqueeze(0) # (1,1,H,W)
blurred = F.conv2d(F.pad(g4, (1, 1, 1, 1), mode="reflect"),
self._kernel) # (1,1,H,W)
return grid + rate * (blurred.squeeze(0).squeeze(0) - grid)
# ── Element-wise ops (all in-place where possible) ──
def decay(self, grid: torch.Tensor, rate: float) -> torch.Tensor:
return grid.mul_(1.0 - rate)
def clip(self, grid: torch.Tensor, lo: float,
hi: float | None) -> torch.Tensor:
if hi is not None:
return grid.clamp_(lo, hi)
return grid.clamp_(min=lo)
def add_scalar(self, grid: torch.Tensor, val: float) -> torch.Tensor:
return grid.add_(val)
# ── Batch gather: read grid values at (ys, xs) positions ──
def gather(self, grid: torch.Tensor,
ys: torch.Tensor, xs: torch.Tensor) -> torch.Tensor:
"""Index a (H,W) grid at integer coordinates → (N,) values."""
return grid[ys.long(), xs.long()]
# ── Batch scatter-add: add values at (ys, xs) positions ──
def scatter_add(self, grid: torch.Tensor,
ys: torch.Tensor, xs: torch.Tensor,
vals: torch.Tensor) -> torch.Tensor:
"""Atomically add *vals* into *grid* at (ys, xs)."""
flat = ys.long() * grid.shape[1] + xs.long()
grid.view(-1).scatter_add_(0, flat, vals)
return grid
# ── Batch scatter-sub: subtract values at (ys, xs) positions ──
def scatter_sub(self, grid: torch.Tensor,
ys: torch.Tensor, xs: torch.Tensor,
vals: torch.Tensor) -> torch.Tensor:
flat = ys.long() * grid.shape[1] + xs.long()
grid.view(-1).scatter_add_(0, flat, -vals)
return grid