-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathallocate.py
More file actions
executable file
·63 lines (48 loc) · 1.74 KB
/
allocate.py
File metadata and controls
executable file
·63 lines (48 loc) · 1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python
"""
Script to wait for an open GPU if needed, then to queue the job
if necessary. Use with `tsp` (task-spooler) for maximum synergy.
"""
import subprocess
import argparse
import time
import logging
import sys
import os, pwd
import nvgpu
from nvgpu.list_gpus import device_statuses
logging.basicConfig(level=logging.INFO)
mem_threshold = 15
def run(cmd):
logging.info(cmd)
subprocess.run([cmd], shell=True)
def _allocate_gpu(num_gpus):
current_user = pwd.getpwuid(os.getuid()).pw_name
gpu_info = nvgpu.gpu_info()
device_info = device_statuses()
# assume nothing is available
completely_available = [False for _ in gpu_info]
same_user_available = [False for _ in gpu_info]
for i, (_info, _device) in enumerate(zip(gpu_info, device_info)):
completely_available[i] = _device['is_available']
if _info['mem_used_percent'] < mem_threshold and current_user in _device['users']:
same_user_available[i] = True
available_gpus = same_user_available
if sum(same_user_available) == 0:
available_gpus = completely_available
available_gpus = [i for i, val in enumerate(available_gpus) if val]
return available_gpus[:num_gpus]
if __name__ == "__main__":
args = sys.argv
num_gpus = int(sys.argv[1])
cmd = sys.argv[2:]
available_gpus = _allocate_gpu(num_gpus)
while len(available_gpus) < num_gpus:
logging.info("Waiting for available GPUs. Checking again in 30 seconds.")
available_gpus = _allocate_gpu(num_gpus)
time.sleep(30)
available_gpus = ','.join(map(str, available_gpus))
CUDA_VISIBLE_DEVICES = f'CUDA_VISIBLE_DEVICES={available_gpus}'
cmd = ' '.join(cmd)
cmd = f"{CUDA_VISIBLE_DEVICES} {cmd}"
run(cmd)