Skip to content

Commit 350cdc9

Browse files
committed
Move to python package
Move the check_c_abi.py script to a python package, and add tests and type hints
1 parent e2ff16b commit 350cdc9

7 files changed

Lines changed: 653 additions & 0 deletions

File tree

ci/check_c_abi/LICENSE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../LICENSE

ci/check_c_abi/VERSION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../VERSION
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#
2+
# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
3+
# SPDX-License-Identifier: Apache-2.0
4+
#

ci/check_c_abi/check_c_abi/abi.py

Lines changed: 339 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,339 @@
1+
#
2+
# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
6+
import msgspec
7+
from itertools import zip_longest
8+
import pathlib
9+
from typing import Generator, Optional, Self
10+
import os
11+
12+
13+
import clang.cindex
14+
15+
try:
16+
from termcolor import colored
17+
except ImportError:
18+
19+
def colored(text, *args, **kwargs):
20+
return text
21+
22+
23+
def _is_unnamed_struct(cursor: clang.cindex.Cursor) -> bool:
24+
return (
25+
cursor.kind == clang.cindex.CursorKind.STRUCT_DECL
26+
and cursor.spelling.startswith("struct ")
27+
and "unnamed " in cursor.spelling
28+
)
29+
30+
31+
class SymbolLocation(msgspec.Struct):
32+
filename: str
33+
line: int
34+
column: int
35+
36+
@classmethod
37+
def from_cursor(
38+
cls, cursor: clang.cindex.Cursor, root_path: str = None
39+
) -> Self:
40+
filename = cursor.location.file.name
41+
if root_path:
42+
filename = str(pathlib.Path(filename).relative_to(root_path))
43+
return cls(
44+
filename=filename,
45+
line=cursor.location.line,
46+
column=cursor.location.column,
47+
)
48+
49+
50+
class FunctionDefinition(msgspec.Struct):
51+
name: str
52+
return_type: str
53+
parameters: list[tuple[str, str]]
54+
location: SymbolLocation
55+
56+
@classmethod
57+
def from_cursor(
58+
cls, cursor: clang.cindex.Cursor, root_path: str = None
59+
) -> Self:
60+
if cursor.kind != clang.cindex.CursorKind.FUNCTION_DECL:
61+
raise ValueError(
62+
f"FunctionDefinition.from_cursor called with cursor of kind={cursor.kind}"
63+
)
64+
65+
return cls(
66+
name=cursor.spelling,
67+
return_type=cursor.result_type.spelling,
68+
parameters=[
69+
(child.type.spelling, child.spelling)
70+
for child in cursor.get_children()
71+
if child.kind == clang.cindex.CursorKind.PARM_DECL
72+
],
73+
location=SymbolLocation.from_cursor(cursor, root_path),
74+
)
75+
76+
77+
class StructDefinition(msgspec.Struct):
78+
name: str
79+
members: list[tuple[str, str]]
80+
location: SymbolLocation
81+
82+
@classmethod
83+
def from_cursor(
84+
cls, cursor: clang.cindex.Cursor, root_path: str = None
85+
) -> Self:
86+
if cursor.kind != clang.cindex.CursorKind.STRUCT_DECL:
87+
raise ValueError(
88+
f"StructDefinition.from_cursor called with cursor of kind={cursor.kind}"
89+
)
90+
91+
return cls(
92+
name=cursor.spelling,
93+
members=[
94+
(child.type.spelling, child.spelling)
95+
for child in cursor.get_children()
96+
if child.kind == clang.cindex.CursorKind.FIELD_DECL
97+
],
98+
location=SymbolLocation.from_cursor(cursor, root_path),
99+
)
100+
101+
102+
class EnumDefinition(msgspec.Struct):
103+
name: str
104+
values: list[tuple[str, int]]
105+
location: SymbolLocation
106+
107+
@classmethod
108+
def from_cursor(
109+
cls, cursor: clang.cindex.Cursor, root_path: str = None
110+
) -> Self:
111+
if cursor.kind != clang.cindex.CursorKind.ENUM_DECL:
112+
raise ValueError(
113+
f"EnumDefinition.from_cursor called with cursor of kind={cursor.kind}"
114+
)
115+
116+
return cls(
117+
name=cursor.spelling,
118+
values=[
119+
(child.spelling, child.enum_value)
120+
for child in cursor.get_children()
121+
if child.kind == clang.cindex.CursorKind.ENUM_CONSTANT_DECL
122+
],
123+
location=SymbolLocation.from_cursor(cursor, root_path),
124+
)
125+
126+
127+
class Abi(msgspec.Struct):
128+
functions: list[FunctionDefinition]
129+
structs: list[StructDefinition]
130+
enums: list[EnumDefinition]
131+
132+
@classmethod
133+
def from_include_path(
134+
cls,
135+
root: str | os.PathLike,
136+
header: str | os.PathLike,
137+
extra_clang_args: Optional[list[str]] = None,
138+
) -> Self:
139+
"""Loads the Abi from a root path ('/source/cuvs/c/include') and a header file
140+
("cuvs/include/all.h")
141+
"""
142+
path = pathlib.Path(root).resolve()
143+
all_header = path / header
144+
145+
if not all_header.is_file():
146+
raise ValueError(f"header file '{all_header}' not found")
147+
148+
index = clang.cindex.Index.create()
149+
150+
args = [f"-I{str(path)}"]
151+
if extra_clang_args:
152+
args.extend(extra_clang_args)
153+
154+
tu = index.parse(all_header, args=args)
155+
156+
functions, structs, enums = [], [], []
157+
158+
# note: we could use tu.cursor.walk_preorder() here instead to recurse through the AST
159+
# but it is slightly slower to do so (extra 100ms or so) and for the cuvs C-ABI everything
160+
# is at the top level
161+
for child in tu.cursor.get_children():
162+
# ignore things like cuda headers and other files not installed in
163+
# in the cuvs C path
164+
if not (
165+
child.location.file
166+
and pathlib.Path(child.location.file.name).is_relative_to(path)
167+
):
168+
continue
169+
170+
# Store definitions for each function, struct and enum
171+
if child.kind == clang.cindex.CursorKind.FUNCTION_DECL:
172+
functions.append(
173+
FunctionDefinition.from_cursor(child, root_path=path)
174+
)
175+
elif child.kind == clang.cindex.CursorKind.STRUCT_DECL:
176+
# ignore unnamed structs (will get picked up via the typedef)
177+
if _is_unnamed_struct(child):
178+
continue
179+
structs.append(
180+
StructDefinition.from_cursor(child, root_path=path)
181+
)
182+
elif child.kind == clang.cindex.CursorKind.TYPEDEF_DECL:
183+
# check if this is a typedef to an unnamed struct, if so use the
184+
# typedef as the symbolname for the struct
185+
grandchildren = list(child.get_children())
186+
if len(grandchildren) == 1 and _is_unnamed_struct(
187+
grandchildren[0]
188+
):
189+
struct = StructDefinition.from_cursor(
190+
grandchildren[0], root_path=path
191+
)
192+
struct.name = child.spelling
193+
structs.append(struct)
194+
elif child.kind == clang.cindex.CursorKind.ENUM_DECL:
195+
enums.append(EnumDefinition.from_cursor(child, root_path=path))
196+
197+
return cls(functions, structs, enums)
198+
199+
200+
class AbiError(msgspec.Struct):
201+
"""Holds information about an ABI breaking error"""
202+
203+
error: str
204+
symbol: Optional[str] = None
205+
location: Optional[SymbolLocation] = None
206+
207+
208+
def _analyze_function_abi(old_abi: Abi, new_abi: Abi) -> Generator[AbiError]:
209+
"""This iterates over every function in the existing abi, and make sure that no functions
210+
have been removed or had function parameters removed, parameters added or the type of any
211+
parameter changed. Note: adding new functions to the new abi is allowed
212+
"""
213+
old_functions = {f.name: f for f in old_abi.functions}
214+
new_functions = {f.name: f for f in new_abi.functions}
215+
216+
for name, old_function in old_functions.items():
217+
try:
218+
new_function = new_functions[name]
219+
except KeyError:
220+
yield AbiError(
221+
"Function has been removed",
222+
symbol=old_function.name,
223+
location=old_function.location,
224+
)
225+
continue
226+
227+
if old_function.return_type != new_function.return_type:
228+
yield AbiError(
229+
f"Function has return type changed from '{old_function.return_type}' to '{new_function.return_type}'",
230+
symbol=new_function.name,
231+
location=new_function.location,
232+
)
233+
234+
for (old_type, old_name), (new_type, new_name) in zip_longest(
235+
old_function.parameters,
236+
new_function.parameters,
237+
fillvalue=(None, None),
238+
):
239+
if old_type is None:
240+
yield AbiError(
241+
f"Function has a new parameter '{new_type} {new_name}'",
242+
symbol=new_function.name,
243+
location=new_function.location,
244+
)
245+
246+
elif new_type is None:
247+
yield AbiError(
248+
f"Function has a deleted parameter '{old_type} {old_name}'",
249+
symbol=old_function.name,
250+
location=old_function.location,
251+
)
252+
253+
elif new_type != old_type:
254+
yield AbiError(
255+
f"Function has changed type '{old_type}' to '{new_type}' for parameter '{old_name}'",
256+
symbol=new_function.name,
257+
location=new_function.location,
258+
)
259+
260+
261+
def _analyze_struct_abi(old_abi: Abi, new_abi: Abi) -> Generator[AbiError]:
262+
"""Checks to see if any existing structures have had items removed, reordered, renamed, or types
263+
changed (adding new members is considered to be ok, as long as functions are initialized via
264+
a create factory function)
265+
"""
266+
old_structs = {f.name: f for f in old_abi.structs}
267+
new_structs = {f.name: f for f in new_abi.structs}
268+
269+
for name, old_struct in old_structs.items():
270+
try:
271+
new_struct = new_structs[name]
272+
except KeyError:
273+
yield AbiError(
274+
"Struct has been removed",
275+
symbol=name,
276+
location=old_struct.location,
277+
)
278+
279+
for (old_type, old_name), (new_type, new_name) in zip_longest(
280+
old_struct.members,
281+
new_struct.members,
282+
fillvalue=(None, None),
283+
):
284+
if new_type is None:
285+
yield AbiError(
286+
f"Struct has a deleted member '{old_type} {old_name}'",
287+
symbol=name,
288+
location=new_struct.location,
289+
)
290+
elif old_type is None:
291+
# adding an item to the end of the struct is allowed here
292+
pass
293+
294+
elif new_type != old_type:
295+
yield AbiError(
296+
f"Struct member has changed type '{old_type}' to '{new_type}' for member '{old_name}'",
297+
symbol=name,
298+
location=new_struct.location,
299+
)
300+
301+
302+
def _analyze_enum_abi(old_abi: Abi, new_abi: Abi) -> Generator[AbiError]:
303+
# flatten enum values: since values inside an enum in C are in the global scope
304+
old_enum_values = {
305+
k: (v, enum) for enum in old_abi.enums for k, v in enum.values
306+
}
307+
new_enum_values = {
308+
k: (v, enum) for enum in new_abi.enums for k, v in enum.values
309+
}
310+
311+
# check to see if enum values have been removed, or had their numeric values changed
312+
for name, (old_value, old_enum) in old_enum_values.items():
313+
try:
314+
new_value, new_enum = new_enum_values[name]
315+
except KeyError:
316+
yield AbiError(
317+
f"Enum value {name} has been removed",
318+
symbol=old_enum.name,
319+
location=old_enum.location,
320+
)
321+
continue
322+
323+
if new_value != old_value:
324+
yield AbiError(
325+
f"Enum value {name} has been changed from {old_value} to {new_value}",
326+
symbol=old_enum.name,
327+
location=new_enum.location,
328+
)
329+
330+
331+
def analyze_c_abi(old_abi: Abi, new_abi: Abi) -> list[AbiError]:
332+
"""Compares two Abi objects and returns a list of errors for any ABI breaking
333+
changes between them
334+
"""
335+
errors = []
336+
errors.extend(_analyze_function_abi(old_abi, new_abi))
337+
errors.extend(_analyze_struct_abi(old_abi, new_abi))
338+
errors.extend(_analyze_enum_abi(old_abi, new_abi))
339+
return errors

0 commit comments

Comments
 (0)