Skip to content

Commit 51b1fb8

Browse files
committed
updated bd to only deal with job objects, only delete kueue managed jobs, and unit tests
1 parent 6f386a2 commit 51b1fb8

4 files changed

Lines changed: 162 additions & 63 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ __pycache__
33
.pytest_cache
44
.coverage
55
htmlcov
6+
*egg-info

batchtools/bd.py

Lines changed: 30 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,82 +1,74 @@
1-
from typing import cast
2-
from typing_extensions import override
3-
4-
import argparse
51
import sys
6-
2+
import argparse
3+
from typing import cast
74
import openshift_client as oc
8-
9-
from .basecommand import Command
5+
from .basecommand import Command, override
106
from .basecommand import SubParserFactory
11-
from .helpers import oc_delete
12-
13-
14-
class DeleteJobsCommandArgs(argparse.Namespace):
15-
job_names: list[str] | None = None
7+
from .helpers import oc_delete, is_kueue_managed_job
168

179

1810
class DeleteJobsCommand(Command):
1911
"""
20-
Delete specified GPU jobs, or all GPU jobs if none are specified.
12+
batchtools bd [job-name [job-name ...]]
2113
22-
Description:
23-
Delete the specified jobs. If no jobs are specified, all current
24-
GPU-related jobs will be deleted.
14+
Delete specified Kueue-managed GPU jobs, or all such jobs if none are specified.
2515
26-
See also:
27-
See the repository README.md for documentation and examples.
16+
Description:
17+
Deletes only those Jobs that are both:
18+
- named like your GPU jobs (name starts with 'job-'), and
19+
- detected as Kueue-managed (via labels/Workload linkage).
2820
"""
2921

3022
name: str = "bd"
31-
help: str = "Delete specified GPU jobs, or all GPU jobs if none are specified"
23+
help: str = "Delete specified Kueue-managed GPU jobs, or all if none are specified"
3224

3325
@classmethod
3426
@override
3527
def build_parser(cls, subparsers: SubParserFactory):
3628
p = super().build_parser(subparsers)
3729
p.add_argument(
38-
"job_names", nargs="*", help="Optional list of job names to delete"
30+
"job_names",
31+
nargs="*",
32+
help="Optional list of job names to delete (must be Kueue-managed)",
3933
)
4034
return p
4135

4236
@staticmethod
4337
@override
4438
def run(args: argparse.Namespace):
45-
args = cast(DeleteJobsCommandArgs, args)
39+
args = cast(DeleteJobsCommand, args)
4640
try:
47-
jobs = oc.selector("workloads").objects()
41+
jobs = oc.selector("jobs").objects()
4842
if not jobs:
4943
print("No jobs found.")
5044
return
5145

52-
# only get gpu jobs (ASK ABOUT THIS)
5346
gpu_jobs = [
54-
job
55-
for job in jobs
56-
# XXX: this looks wrong; jobs created by br.py do not start with job-job.
57-
if job.model.metadata.name.startswith("job-job")
58-
# XXX: this looks wrong: names cannot contain a `/`.
59-
or job.model.metadata.name.startswith("workloads/job-job")
47+
job for job in jobs if job.model.metadata.name.startswith("job-")
6048
]
6149

62-
if not gpu_jobs:
63-
print("No GPU workloads to delete.")
50+
# only want to delete kueue jobs so filter for kueue jobs
51+
kueue_gpu_jobs = [job for job in gpu_jobs if is_kueue_managed_job(job)]
52+
53+
if not kueue_gpu_jobs:
54+
print("No Kueue-managed GPU jobs to delete.")
6455
return
6556

66-
# case where user provides jobs to delete
6757
if args.job_names:
68-
found = [job.model.metadata.name for job in gpu_jobs]
58+
# if jobs are specified, only delete specified jobs
59+
allowed = {job.model.metadata.name for job in kueue_gpu_jobs}
6960
for name in args.job_names:
70-
if name not in found:
71-
print(f"{name} is not a GPU job and cannot be deleted.")
61+
if name not in allowed:
62+
print(f"{name} is not a Kueue-managed GPU job; skipping.")
7263
continue
7364
oc_delete("job", name)
65+
print(f"Deleted job: {name}")
7466
else:
75-
# case where user does not provide jobs to delete, delete all
76-
print("No job names provided -> deleting all GPU workloads:\n")
77-
for job in gpu_jobs:
67+
print("No job names provided -> deleting all Kueue-managed GPU jobs:\n")
68+
for job in kueue_gpu_jobs:
7869
name = job.model.metadata.name
7970
oc_delete("job", name)
71+
print(f"Deleted job: {name}")
8072

8173
except oc.OpenShiftPythonException as e:
8274
sys.exit(f"Error occurred while deleting jobs: {e}")

batchtools/helpers.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,42 @@ def oc_delete(obj_type: str, obj_name: str) -> None:
2727
oc.selector(f"{obj_type}/{obj_name}").delete()
2828
except oc.OpenShiftPythonException as e:
2929
print(f"Error occurred while deleting {obj_type}/{obj_name}: {e}")
30+
31+
32+
def is_kueue_managed_job(job_obj) -> bool:
33+
"""
34+
Returns True if the Job is managed by Kueue.
35+
Checks:
36+
1) Job has label 'kueue.x-k8s.io/queue-name'
37+
2) A Workload exists that either:
38+
- has an ownerReference pointing to this Job, or
39+
- has label job-name=<job-name>
40+
"""
41+
try:
42+
md = job_obj.model.metadata
43+
labels = getattr(md, "labels", {}) or {}
44+
if "kueue.x-k8s.io/queue-name" in labels:
45+
return True
46+
47+
job_name = md.name
48+
try:
49+
workloads = oc.selector("workloads").objects()
50+
except oc.OpenShiftPythonException:
51+
workloads = []
52+
53+
for wl in workloads:
54+
wl_md = wl.model.metadata
55+
owners = getattr(wl_md, "ownerReferences", []) or []
56+
for o in owners:
57+
if (
58+
getattr(o, "kind", "") == "Job"
59+
and getattr(o, "name", "") == job_name
60+
):
61+
return True
62+
wl_labels = getattr(wl_md, "labels", {}) or {}
63+
if wl_labels.get("job-name") == job_name:
64+
return True
65+
except Exception:
66+
return False
67+
68+
return False

tests/test_bd.py

Lines changed: 92 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,29 +9,32 @@
99

1010

1111
@pytest.fixture
12-
def jobs():
12+
def kueue_jobs():
1313
return [
1414
DictToObject({"model": {"metadata": {"name": "job-job-1"}}}),
1515
DictToObject({"model": {"metadata": {"name": "job-job-2"}}}),
1616
]
1717

1818

1919
@pytest.fixture
20-
def ignored_jobs():
21-
return [DictToObject({"model": {"metadata": {"name": "ignored-1"}}})]
20+
def mixed_jobs():
21+
return [
22+
DictToObject({"model": {"metadata": {"name": "job-job-1"}}}),
23+
DictToObject({"model": {"metadata": {"name": "ignored-1"}}}),
24+
]
2225

2326

2427
@pytest.fixture
25-
def failed_jobs():
26-
return [DictToObject({"model": {"metadata": {"name": "job-job-1"}}})]
28+
def args():
29+
return argparse.Namespace(job_names=[])
2730

2831

2932
@contextmanager
30-
def patch_jobs_selector(job_list: list[DictToObject]):
33+
def patch_selector_with(job_list):
3134
"""
3235
Patches openshift_client.selector for BOTH:
33-
- the list call: selector("job", labels=...).objects() -> job_list
34-
- the delete calls: selector("job/<name>").delete()
36+
- list call: selector("jobs").objects() -> job_list
37+
- delete calls: selector("job/<name>").delete()
3538
"""
3639
with mock.patch("openshift_client.selector") as mock_selector:
3740
result = mock.Mock(name="selector_result")
@@ -40,43 +43,107 @@ def patch_jobs_selector(job_list: list[DictToObject]):
4043
yield mock_selector
4144

4245

43-
def test_no_jobs(args: argparse.Namespace, capsys):
44-
with patch_jobs_selector([]):
46+
def patch_kueue_managed(*names_that_are_kueue):
47+
"""
48+
Patch batchtools.bd.is_kueue_managed_job so ONLY the provided job names return True.
49+
50+
bd.py calls is_kueue_managed_job with a STRING job name, not an APIObject.
51+
This predicate accepts either a str or an object and normalizes to the name.
52+
"""
53+
54+
def _predicate(arg):
55+
if isinstance(arg, str):
56+
name = arg
57+
else:
58+
# tolerate APIObject/DictToObject
59+
name = getattr(
60+
getattr(getattr(arg, "model", None), "metadata", None), "name", None
61+
) or getattr(arg, "name", None)
62+
return name in names_that_are_kueue
63+
64+
return mock.patch("batchtools.bd.is_kueue_managed_job", side_effect=_predicate)
65+
66+
67+
def test_no_jobs_found(args, capsys):
68+
with patch_selector_with([]):
69+
DeleteJobsCommand.run(args)
70+
out = capsys.readouterr().out
71+
assert "No jobs found." in out
72+
73+
74+
def test_no_kueue_managed_gpu_jobs(args, kueue_jobs, capsys):
75+
with patch_selector_with(kueue_jobs), patch_kueue_managed():
4576
DeleteJobsCommand.run(args)
4677
out = capsys.readouterr().out
47-
assert "No jobs found" in out
78+
assert "No Kueue-managed GPU jobs to delete." in out
4879

4980

50-
def test_no_gpu_jobs(args: argparse.Namespace, ignored_jobs, capsys):
51-
with patch_jobs_selector(ignored_jobs):
81+
def test_ignores_non_gpu_named_jobs(args, mixed_jobs, capsys):
82+
with patch_selector_with(mixed_jobs), patch_kueue_managed("job-job-1"):
5283
DeleteJobsCommand.run(args)
5384
out = capsys.readouterr().out
54-
assert "No GPU workloads to delete" in out
85+
# delete only job-job-1
86+
assert "Deleting job/job-job-1" in out
87+
assert "Deleted job: job-job-1" in out
88+
assert "ignored-1" not in out
5589

5690

57-
def test_delete_obj(args: argparse.Namespace, jobs, capsys):
58-
args.job_names = []
59-
with patch_jobs_selector(jobs) as mock_selector:
91+
def test_delete_all_when_no_names_given(args, kueue_jobs, capsys):
92+
args.job_names = [] # explicit
93+
with patch_selector_with(kueue_jobs), patch_kueue_managed("job-job-1", "job-job-2"):
6094
DeleteJobsCommand.run(args)
6195
out = capsys.readouterr().out
6296

63-
for obj in jobs:
97+
assert "No job names provided -> deleting all Kueue-managed GPU jobs:" in out
98+
for obj in kueue_jobs:
6499
name = obj.model.metadata.name
65100
assert f"Deleting job/{name}" in out
101+
assert f"Deleted job: {name}" in out
66102

67-
called_with = [c.args[0] for c in mock_selector.call_args_list if c.args]
68-
for obj in jobs:
69-
assert f"job/{obj.model.metadata.name}" in called_with
70103

104+
def test_delete_only_specified_allowed(args, kueue_jobs, capsys):
105+
args.job_names = ["job-job-1", "job-job-2"]
106+
with patch_selector_with(kueue_jobs), patch_kueue_managed("job-job-1"):
107+
DeleteJobsCommand.run(args)
108+
out = capsys.readouterr().out
109+
110+
assert "Deleting job/job-job-1" in out
111+
assert "Deleted job: job-job-1" in out
112+
assert "job-job-2 is not a Kueue-managed GPU job; skipping." in out
113+
assert "Deleting job/job-job-2" not in out
114+
115+
116+
def test_only_deletes_listed_names_even_if_more_kueue(args, kueue_jobs, capsys):
117+
args.job_names = ["job-job-2"]
118+
with patch_selector_with(kueue_jobs), patch_kueue_managed("job-job-1", "job-job-2"):
119+
DeleteJobsCommand.run(args)
120+
out = capsys.readouterr().out
71121

72-
def test_delete_jobs_fail(args: argparse.Namespace, failed_jobs, capsys):
73-
args.job_names = []
74-
with patch_jobs_selector(failed_jobs) as mock_selector:
122+
assert "Deleting job/job-job-2" in out
123+
assert "Deleted job: job-job-2" in out
124+
# make sure job-job-1 is not deleted implicitly
125+
assert "Deleting job/job-job-1" not in out
126+
127+
128+
def test_delete_jobs_prints_error_when_delete_raises(args, capsys):
129+
jobs = [DictToObject({"model": {"metadata": {"name": "job-job-1"}}})]
130+
with patch_selector_with(jobs) as mock_selector, patch_kueue_managed("job-job-1"):
75131
mock_selector.return_value.delete.side_effect = OpenShiftPythonException(
76132
"test exception"
77133
)
78134

79135
DeleteJobsCommand.run(args)
80136
out = capsys.readouterr().out
81-
82137
assert "Error occurred while deleting job/job-job-1: test exception" in out
138+
139+
140+
def test_sys_exit_when_list_selector_raises(args):
141+
with mock.patch(
142+
"openshift_client.selector",
143+
side_effect=OpenShiftPythonException("not successful"),
144+
):
145+
with pytest.raises(SystemExit) as excinfo:
146+
DeleteJobsCommand.run(args)
147+
assert "Error occurred while deleting jobs: not successful" in str(
148+
excinfo.value
149+
)

0 commit comments

Comments
 (0)