From 8760e9a9f6fecc6a0bfc40065e53a48d1b760156 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Wed, 28 Jan 2026 13:00:04 -0800 Subject: [PATCH 01/25] Add a script to check for breaking C ABI changes This adds a script that can check for changes that would break our C-ABI. t works by comparing two sets of header files: the `c/include` header files for the published ABI, as well as the `c/include` header files inside a new pull request. Each set of header files is parsed using libclang, and the differences between the old and new header files are examined for changes that would cause a breaking ABI change. Currently the following checks are made and flagged by this tool: * Functions that have been removed from the C-ABI * Functions that have extra parameters added * Functions that have parameters removed * Functions that have the type of any parameter changed * Structs that have been removed * Structs that have have members removed * Structs that have the types of members changed * Enum values that have been removed * Enum values that have their definition changed --- ci/check_c_abi.py | 304 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 304 insertions(+) create mode 100755 ci/check_c_abi.py diff --git a/ci/check_c_abi.py b/ci/check_c_abi.py new file mode 100755 index 0000000000..cef7a8b528 --- /dev/null +++ b/ci/check_c_abi.py @@ -0,0 +1,304 @@ +#!/usr/bin/env python3 +# +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +# + +""" +Checks for breaking changes to the C-ABI + +This scripts uses the libclang python bindings to check for breaking C-ABI changes. +It works by comparing two sets of header files: the `c/include` header files for +the published ABI, as well as the `c/include` header files inside a new pull request. +Each set of header files is parsed using libclang, and the differences between the +old and new header files are examined for changes that would cause a breaking +ABI change. + +Currently the following checks are made and flagged by this tool: + +* Functions that have been removed from the C-ABI +* Functions that have extra parameters added +* Functions that have parameters removed +* Functions that have the type of any parameter changed +* Structs that have been removed +* Structs that have have members removed +* Structs that have the types of members changed +* Enum values that have been removed +* Enum values that have their definition changed +""" + +import argparse +from dataclasses import dataclass, InitVar +import itertools +import pathlib +from typing import Optional +import sys + + +import clang.cindex + +try: + from termcolor import colored +except ImportError: + + def colored(text, *args, **kwargs): + return text + + +@dataclass +class Abi: + functions: dict + structs: dict + enums: dict + + @classmethod + def from_include_path(cls, root, header, extra_clang_args=None): + """Loads the Abi from a root path ('/source/cuvs/c/include') and a header file + ("cuvs/include/all.h") + """ + path = pathlib.Path(root).resolve() + all_header = path / header + + index = clang.cindex.Index.create() + + args = [f"-I{str(path)}"] + if extra_clang_args: + args.extend(extra_clang_args) + + tu = index.parse(all_header, args=args) + + functions, structs, enums = {}, {}, {} + + # note: we could use tu.cursor.walk_preorder() here instead to recurse through the AST + # but it is slightly slower to do so (extra 100ms or so) and for the cuvs C-ABI everything + # is at the top level + for child in tu.cursor.get_children(): + # ignore things like cuda headers and other files not installed in + # in the cuvs C path + if not child.location.file or not pathlib.Path( + child.location.file.name + ).is_relative_to(path): + continue + + # break up the AST into symbol -> node for the things we care about + if child.kind == clang.cindex.CursorKind.FUNCTION_DECL: + functions[child.spelling] = child + elif child.kind == clang.cindex.CursorKind.STRUCT_DECL: + # ignore unnamed structs (will get picked up via the typedef) + if _is_unnamed_struct(child): + continue + structs[child.spelling] = child + elif child.kind == clang.cindex.CursorKind.TYPEDEF_DECL: + # check if this is a typedef to an unnamed struct, if so use the + # typedef as the symbolname for the struct + grandchildren = list(child.get_children()) + if len(grandchildren) == 1 and _is_unnamed_struct( + grandchildren[0] + ): + structs[child.spelling] = grandchildren[0] + elif child.kind == clang.cindex.CursorKind.ENUM_DECL: + # store the enum values for checking, since we don't actually care if the + # enum itself gets renamed for the C-ABI + for value in child.get_children(): + enums[value.spelling] = value + + return cls(functions, structs, enums) + + +@dataclass +class AbiError: + """Holds information about an ABI breaking error""" + + error: str + symbol: Optional[str] = None + filename: Optional[str] = None + line: Optional[int] = None + column: Optional[int] = None + cursor: InitVar[clang.cindex.Cursor | None] = None + + def __post_init__(self, cursor): + if cursor: + # populate fields from the + if self.symbol is None: + self.symbol = cursor.spelling + if self.filename is None: + self.filename = cursor.location.file.name + if self.line is None: + self.line = cursor.location.line + if self.column is None: + self.column = cursor.location.column + + +def _is_unnamed_struct(cursor): + return ( + cursor.kind == clang.cindex.CursorKind.STRUCT_DECL + and cursor.spelling.startswith("struct ") + and "unnamed " in cursor.spelling + ) + + +def analyze_c_abi(old_path, new_path, include_file, extra_clang_args=None): + old_abi = Abi.from_include_path(old_path, include_file, extra_clang_args) + new_abi = Abi.from_include_path(new_path, include_file, extra_clang_args) + + # iterate over every function in the existing abi, and make sure that no functions + # have been removed or had function arguments removed, arguments added or the type of any + # argument changed. Note: adding new functions to the new abi is allowed + errors = [] + for name, old_function in old_abi.functions.items(): + new_function = new_abi.functions.get(name) + if new_function is None: + errors.append( + AbiError("Function has been removed", cursor=old_function) + ) + continue + + old_result_type = old_function.result_type.spelling + new_result_type = new_function.result_type.spelling + if old_result_type != new_result_type: + errors.append( + AbiError( + f"Function has return type changed from '{old_result_type}' to '{new_result_type}'", + cursor=new_function, + ) + ) + + for old_arg, new_arg in itertools.zip_longest( + old_function.get_children(), + new_function.get_children(), + fillvalue=None, + ): + if old_arg is None: + errors.append( + AbiError( + f"Function has a new argument '{new_arg.type.spelling} {new_arg.spelling}'", + cursor=new_function, + ) + ) + + elif new_arg is None: + errors.append( + AbiError( + f"Function has a deleted argument '{old_arg.type.spelling} {old_arg.spelling}'", + cursor=old_function, + ) + ) + + elif new_arg.type.spelling != old_arg.type.spelling: + errors.append( + AbiError( + f"Function has a changed argument type '{old_arg.type.spelling}' to '{new_arg.type.spelling} for argument '{old_arg.spelling}'", + cursor=new_function, + ) + ) + + # check to see if any existing structures have had items removed, reordered, renamed, or types + # changed (adding new members is considered to be ok, as long as functions are initialized via + # a create factory function) + for name, old_struct in old_abi.structs.items(): + new_struct = new_abi.structs.get(name) + if new_struct is None: + errors.append( + AbiError( + "Struct has been removed", symbol=name, cursor=old_struct + ) + ) + continue + + for old_member, new_member in itertools.zip_longest( + old_struct.get_children(), + new_struct.get_children(), + fillvalue=None, + ): + if old_member is None: + errors.append( + AbiError( + f"Struct has a new member '{new_member.type.spelling} {new_member.spelling}'", + symbol=name, + cursor=new_member, + ) + ) + + elif new_member is None: + errors.append( + AbiError( + f"Struct has a deleted member '{old_member.type.spelling} {old_member.spelling}'", + symbol=name, + cursor=old_member, + ) + ) + + elif new_member.type.spelling != old_member.type.spelling: + errors.append( + AbiError( + f"Struct member has changed type '{old_member.type.spelling}' to '{new_member.type.spelling} for member '{old_member.spelling}'", + symbol=name, + cursor=new_member, + ) + ) + + # check to see if enum values have been removed, or had their numeric values changed + for name, old_enum in old_abi.enums.items(): + new_enum = new_abi.enums.get(name) + if new_enum is None: + errors.append( + AbiError( + f"Enum value {old_enum.spelling} has been removed", + symbol=old_enum.lexical_parent.spelling, + cursor=old_enum, + ) + ) + elif new_enum.enum_value != old_enum.enum_value: + errors.append( + AbiError( + f"Enum value {old_enum.spelling} has been changed from {old_enum.enum_value} to {new_enum.enum_value}", + symbol=old_enum.lexical_parent.spelling, + cursor=new_enum, + ) + ) + + return errors + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Analyze C headers for breaking ABI changes" + ) + parser.add_argument( + "old", help="Path of existing stable C headers to use a baseline" + ) + parser.add_argument( + "new", help="Path of new C headers to examine for breaking ABI changes" + ) + parser.add_argument( + "--include_file", + help="root header file to examine (default: %(default)s)", + default="cuvs/core/all.h", + ) + args = parser.parse_args() + + # TODO: better way of specifying the dlpack header source, since missing the dlpack.h + # header means that we all dlpack types get treated as 'int' which could be misleading + # when looking for differences in the ABI (like if we change a field from `DLDataType` to + # `int` without specifying the dlpack include directory, we won't know that the type has + # changed) + dlpack_header_path = ( + pathlib.Path(args.new).parent.parent + / "cpp" + / "build" + / "_deps" + / "dlpack-src" + / "include" + ) + extra_clang_args = [f"-I{str(dlpack_header_path)}"] + + errors = analyze_c_abi( + args.old, args.new, args.include_file, extra_clang_args + ) + for error in errors: + print( + f"Error: {colored(error.error, attrs=['bold'])}. Symbol {colored(error.symbol, 'red')} from {error.filename}:{error.line}" + ) + + if errors: + sys.exit(1) From 61fa28592a0f4017588a1387f9a9435753ff24b1 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Thu, 29 Jan 2026 16:56:59 -0800 Subject: [PATCH 02/25] Split extracting abi from analyzing abi --- ci/check_c_abi.py | 365 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 257 insertions(+), 108 deletions(-) diff --git a/ci/check_c_abi.py b/ci/check_c_abi.py index cef7a8b528..67c36154b2 100755 --- a/ci/check_c_abi.py +++ b/ci/check_c_abi.py @@ -28,7 +28,9 @@ """ import argparse -from dataclasses import dataclass, InitVar +import gzip + +import msgspec import itertools import pathlib from typing import Optional @@ -45,11 +47,106 @@ def colored(text, *args, **kwargs): return text -@dataclass -class Abi: - functions: dict - structs: dict - enums: dict +def _is_unnamed_struct(cursor): + return ( + cursor.kind == clang.cindex.CursorKind.STRUCT_DECL + and cursor.spelling.startswith("struct ") + and "unnamed " in cursor.spelling + ) + + +class SymbolLocation(msgspec.Struct): + filename: str + line: int + column: int + + @classmethod + def from_cursor(cls, cursor, root_path=None): + filename = cursor.location.file.name + if root_path: + filename = str(pathlib.Path(filename).relative_to(root_path)) + return cls( + filename=filename, + line=cursor.location.line, + column=cursor.location.column, + ) + + +class FunctionDefinition(msgspec.Struct): + name: str + return_type: str + parameters: list[tuple[str, str]] + location: SymbolLocation + + @classmethod + def from_cursor(cls, cursor, root_path=None): + if cursor.kind != clang.cindex.CursorKind.FUNCTION_DECL: + raise ValueError( + "FunctionDefinition.from_cursor called with cursor of kind={cursor.kind}" + ) + + return cls( + name=cursor.spelling, + return_type=cursor.result_type.spelling, + parameters=[ + (child.type.spelling, child.spelling) + for child in cursor.get_children() + if child.kind == clang.cindex.CursorKind.PARM_DECL + ], + location=SymbolLocation.from_cursor(cursor, root_path), + ) + + +class StructDefinition(msgspec.Struct): + name: str + members: list[tuple[str, str]] + location: SymbolLocation + + @classmethod + def from_cursor(cls, cursor, root_path=None): + if cursor.kind != clang.cindex.CursorKind.STRUCT_DECL: + raise ValueError( + "StructDefinition.from_cursor called with cursor of kind={cursor.kind}" + ) + + return cls( + name=cursor.spelling, + members=[ + (child.type.spelling, child.spelling) + for child in cursor.get_children() + if child.kind == clang.cindex.CursorKind.FIELD_DECL + ], + location=SymbolLocation.from_cursor(cursor, root_path), + ) + + +class EnumDefinition(msgspec.Struct): + name: str + values: list[tuple[str, int]] + location: SymbolLocation + + @classmethod + def from_cursor(cls, cursor, root_path=None): + if cursor.kind != clang.cindex.CursorKind.ENUM_DECL: + raise ValueError( + "EnumDefinition.from_cursor called with cursor of kind={cursor.kind}" + ) + + return cls( + name=cursor.spelling, + values=[ + (child.spelling, child.enum_value) + for child in cursor.get_children() + if child.kind == clang.cindex.CursorKind.ENUM_CONSTANT_DECL + ], + location=SymbolLocation.from_cursor(cursor, root_path), + ) + + +class Abi(msgspec.Struct): + functions: list[FunctionDefinition] + structs: list[StructDefinition] + enums: list[EnumDefinition] @classmethod def from_include_path(cls, root, header, extra_clang_args=None): @@ -59,6 +156,9 @@ def from_include_path(cls, root, header, extra_clang_args=None): path = pathlib.Path(root).resolve() all_header = path / header + if not all_header.is_file(): + raise ValueError(f"header file '{all_header}' not found") + index = clang.cindex.Index.create() args = [f"-I{str(path)}"] @@ -67,7 +167,7 @@ def from_include_path(cls, root, header, extra_clang_args=None): tu = index.parse(all_header, args=args) - functions, structs, enums = {}, {}, {} + functions, structs, enums = [], [], [] # note: we could use tu.cursor.walk_preorder() here instead to recurse through the AST # but it is slightly slower to do so (extra 100ms or so) and for the cuvs C-ABI everything @@ -82,12 +182,16 @@ def from_include_path(cls, root, header, extra_clang_args=None): # break up the AST into symbol -> node for the things we care about if child.kind == clang.cindex.CursorKind.FUNCTION_DECL: - functions[child.spelling] = child + functions.append( + FunctionDefinition.from_cursor(child, root_path=path) + ) elif child.kind == clang.cindex.CursorKind.STRUCT_DECL: # ignore unnamed structs (will get picked up via the typedef) if _is_unnamed_struct(child): continue - structs[child.spelling] = child + structs.append( + StructDefinition.from_cursor(child, root_path=path) + ) elif child.kind == clang.cindex.CursorKind.TYPEDEF_DECL: # check if this is a typedef to an unnamed struct, if so use the # typedef as the symbolname for the struct @@ -95,165 +199,152 @@ def from_include_path(cls, root, header, extra_clang_args=None): if len(grandchildren) == 1 and _is_unnamed_struct( grandchildren[0] ): - structs[child.spelling] = grandchildren[0] + struct = StructDefinition.from_cursor( + grandchildren[0], root_path=path + ) + struct.name = child.spelling + structs.append(struct) elif child.kind == clang.cindex.CursorKind.ENUM_DECL: - # store the enum values for checking, since we don't actually care if the - # enum itself gets renamed for the C-ABI - for value in child.get_children(): - enums[value.spelling] = value + enums.append(EnumDefinition.from_cursor(child, root_path=path)) return cls(functions, structs, enums) -@dataclass -class AbiError: +class AbiError(msgspec.Struct): """Holds information about an ABI breaking error""" error: str symbol: Optional[str] = None - filename: Optional[str] = None - line: Optional[int] = None - column: Optional[int] = None - cursor: InitVar[clang.cindex.Cursor | None] = None - - def __post_init__(self, cursor): - if cursor: - # populate fields from the - if self.symbol is None: - self.symbol = cursor.spelling - if self.filename is None: - self.filename = cursor.location.file.name - if self.line is None: - self.line = cursor.location.line - if self.column is None: - self.column = cursor.location.column + location: Optional[SymbolLocation] = None -def _is_unnamed_struct(cursor): - return ( - cursor.kind == clang.cindex.CursorKind.STRUCT_DECL - and cursor.spelling.startswith("struct ") - and "unnamed " in cursor.spelling - ) - - -def analyze_c_abi(old_path, new_path, include_file, extra_clang_args=None): - old_abi = Abi.from_include_path(old_path, include_file, extra_clang_args) - new_abi = Abi.from_include_path(new_path, include_file, extra_clang_args) - +def analyze_c_abi(old_abi, new_abi): # iterate over every function in the existing abi, and make sure that no functions - # have been removed or had function arguments removed, arguments added or the type of any - # argument changed. Note: adding new functions to the new abi is allowed + # have been removed or had function parameters removed, parameters added or the type of any + # parameter changed. Note: adding new functions to the new abi is allowed errors = [] - for name, old_function in old_abi.functions.items(): - new_function = new_abi.functions.get(name) + old_functions = {f.name: f for f in old_abi.functions} + new_functions = {f.name: f for f in new_abi.functions} + + for name, old_function in old_functions.items(): + new_function = new_functions.get(name) if new_function is None: errors.append( - AbiError("Function has been removed", cursor=old_function) + AbiError( + "Function has been removed", + symbol=old_function.name, + location=old_function.location, + ) ) continue - old_result_type = old_function.result_type.spelling - new_result_type = new_function.result_type.spelling - if old_result_type != new_result_type: + if old_function.return_type != new_function.return_type: errors.append( AbiError( - f"Function has return type changed from '{old_result_type}' to '{new_result_type}'", - cursor=new_function, + f"Function has return type changed from '{old_function.return_type}' to '{new_function.return_type}'", + symbol=new_function.name, + location=new_function.location, ) ) - for old_arg, new_arg in itertools.zip_longest( - old_function.get_children(), - new_function.get_children(), + for old_param, new_param in itertools.zip_longest( + old_function.parameters, + new_function.parameters, fillvalue=None, ): - if old_arg is None: + if old_param is None: errors.append( AbiError( - f"Function has a new argument '{new_arg.type.spelling} {new_arg.spelling}'", - cursor=new_function, + f"Function has a new parameter '{new_param[0]} {new_param[1]}'", + symbol=new_function.name, + location=new_function.location, ) ) - elif new_arg is None: + elif new_param is None: errors.append( AbiError( - f"Function has a deleted argument '{old_arg.type.spelling} {old_arg.spelling}'", - cursor=old_function, + f"Function has a deleted parameter '{old_param[0]} {old_param[1]}'", + symbol=old_function.name, + location=old_function.location, ) ) - elif new_arg.type.spelling != old_arg.type.spelling: + elif new_param[0] != old_param[0]: errors.append( AbiError( - f"Function has a changed argument type '{old_arg.type.spelling}' to '{new_arg.type.spelling} for argument '{old_arg.spelling}'", - cursor=new_function, + f"Function has a changed parameter type '{old_param[0]}' to '{new_param[0]} for parameter '{old_param[1]}'", + symbol=new_function.name, + location=new_function.location, ) ) # check to see if any existing structures have had items removed, reordered, renamed, or types # changed (adding new members is considered to be ok, as long as functions are initialized via # a create factory function) - for name, old_struct in old_abi.structs.items(): - new_struct = new_abi.structs.get(name) + old_structs = {f.name: f for f in old_abi.structs} + new_structs = {f.name: f for f in new_abi.structs} + + for name, old_struct in old_structs.items(): + new_struct = new_structs.get(name) if new_struct is None: errors.append( AbiError( - "Struct has been removed", symbol=name, cursor=old_struct + "Struct has been removed", + symbol=name, + location=old_struct.location, ) ) continue for old_member, new_member in itertools.zip_longest( - old_struct.get_children(), - new_struct.get_children(), + old_struct.members, + new_struct.members, fillvalue=None, ): - if old_member is None: + if new_member is None: errors.append( AbiError( - f"Struct has a new member '{new_member.type.spelling} {new_member.spelling}'", + f"Struct has a deleted member '{old_member[0]} {old_member[1]}'", symbol=name, - cursor=new_member, + location=new_struct.location, ) ) - elif new_member is None: + elif new_member[0] != old_member[0]: errors.append( AbiError( - f"Struct has a deleted member '{old_member.type.spelling} {old_member.spelling}'", + f"Struct member has changed type '{old_member[0]}' to '{new_member[0]} for member '{old_member[1]}'", symbol=name, - cursor=old_member, + location=new_struct.location, ) ) - elif new_member.type.spelling != old_member.type.spelling: - errors.append( - AbiError( - f"Struct member has changed type '{old_member.type.spelling}' to '{new_member.type.spelling} for member '{old_member.spelling}'", - symbol=name, - cursor=new_member, - ) - ) + # flatten enum values: since values inside an enum in C are in the global scope + old_enum_values = { + k: (v, enum) for enum in old_abi.enums for k, v in enum.values + } + new_enum_values = { + k: (v, enum) for enum in new_abi.enums for k, v in enum.values + } # check to see if enum values have been removed, or had their numeric values changed - for name, old_enum in old_abi.enums.items(): - new_enum = new_abi.enums.get(name) + for name, (old_value, old_enum) in old_enum_values.items(): + new_value, new_enum = new_enum_values.get(name, (None, None)) if new_enum is None: errors.append( AbiError( - f"Enum value {old_enum.spelling} has been removed", - symbol=old_enum.lexical_parent.spelling, - cursor=old_enum, + f"Enum value {name} has been removed", + symbol=old_enum.name, + location=old_enum.location, ) ) - elif new_enum.enum_value != old_enum.enum_value: + elif new_value != old_value: errors.append( AbiError( - f"Enum value {old_enum.spelling} has been changed from {old_enum.enum_value} to {new_enum.enum_value}", - symbol=old_enum.lexical_parent.spelling, - cursor=new_enum, + f"Enum value {name} has been changed from {old_value} to {new_value}", + symbol=old_enum.name, + location=new_enum.location, ) ) @@ -261,21 +352,62 @@ def analyze_c_abi(old_path, new_path, include_file, extra_clang_args=None): if __name__ == "__main__": + default_c_header_path = pathlib.Path("../c/include").resolve() + parser = argparse.ArgumentParser( description="Analyze C headers for breaking ABI changes" ) - parser.add_argument( - "old", help="Path of existing stable C headers to use a baseline" + subparsers = parser.add_subparsers( + dest="command", help="Available subcommands" ) - parser.add_argument( - "new", help="Path of new C headers to examine for breaking ABI changes" + + parser_extract = subparsers.add_parser( + "extract", help="Extract the ABI from a set of header files" ) - parser.add_argument( + parser_extract.add_argument( + "--output_file", + type=str, + help="The file to output the ABI into (default: %(default)s)", + default="c_abi.json.gz", + ) + parser_extract.add_argument( + "--header_path", + help="Path of C headers to extract the ABI from (default: %(default)s)", + default=str(default_c_header_path), + ) + parser_extract.add_argument( + "--include_file", + help="root header file to examine (default: %(default)s)", + default="cuvs/core/all.h", + ) + + parser_analyze = subparsers.add_parser( + "analyze", help="Analyze a set of header files for breaking changes" + ) + parser_analyze.add_argument( + "--abi_file", + type=str, + help="The extracted ABI file to compare against (default: %(default)s)", + default="c_abi.json.gz", + ) + parser_analyze.add_argument( + "--header_path", + help="Path of C headers to analyze (default: %(default)s)", + default=str(default_c_header_path), + ) + parser_analyze.add_argument( "--include_file", help="root header file to examine (default: %(default)s)", default="cuvs/core/all.h", ) + args = parser.parse_args() + if args.command not in ("extract", "analyze"): + print(f"unknown command {args.command}") + parser.print_help() + sys.exit(1) + + header_path = pathlib.Path(args.header_path) # TODO: better way of specifying the dlpack header source, since missing the dlpack.h # header means that we all dlpack types get treated as 'int' which could be misleading @@ -283,22 +415,39 @@ def analyze_c_abi(old_path, new_path, include_file, extra_clang_args=None): # `int` without specifying the dlpack include directory, we won't know that the type has # changed) dlpack_header_path = ( - pathlib.Path(args.new).parent.parent + header_path.parent.parent / "cpp" / "build" / "_deps" / "dlpack-src" / "include" ) + if not dlpack_header_path.is_dir(): + raise ValueError(f"dlpack header {dlpack_header_path} not found") + extra_clang_args = [f"-I{str(dlpack_header_path)}"] - errors = analyze_c_abi( - args.old, args.new, args.include_file, extra_clang_args - ) - for error in errors: - print( - f"Error: {colored(error.error, attrs=['bold'])}. Symbol {colored(error.symbol, 'red')} from {error.filename}:{error.line}" + if args.command == "extract": + abi = Abi.from_include_path( + header_path, args.include_file, extra_clang_args ) + with open(args.output_file, "wb") as o: + o.write(gzip.compress(msgspec.json.encode(abi))) + print(f"wrote abi to {args.output_file}") + elif args.command == "analyze": + old_abi = msgspec.json.decode( + gzip.decompress(open(args.abi_file, "rb").read()), type=Abi + ) + new_abi = Abi.from_include_path( + header_path, args.include_file, extra_clang_args + ) + errors = analyze_c_abi(old_abi, new_abi) + for error in errors: + print( + f"Error: {colored(error.error, attrs=['bold'])}. Symbol {colored(error.symbol, 'red')} from {error.location.filename}:{error.location.line}" + ) - if errors: - sys.exit(1) + if errors: + sys.exit(1) + else: + print("no breaking abi changes detected") From 15f5e8a832cb147b8620ce20d6658d2729018d12 Mon Sep 17 00:00:00 2001 From: Michael Sarahan Date: Fri, 30 Jan 2026 20:36:02 -0600 Subject: [PATCH 03/25] Add GitHub Actions for C ABI checking - Add check-c-abi.yaml workflow for PRs - Add store-c-abi-baseline.yaml workflow for releases - Integrate ABI check into pr.yaml pipeline Co-authored-by: Cursor --- .github/workflows/check-c-abi.yaml | 137 ++++++++++++++++++++ .github/workflows/pr.yaml | 4 + .github/workflows/store-c-abi-baseline.yaml | 105 +++++++++++++++ 3 files changed, 246 insertions(+) create mode 100644 .github/workflows/check-c-abi.yaml create mode 100644 .github/workflows/store-c-abi-baseline.yaml diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml new file mode 100644 index 0000000000..f8cb74179d --- /dev/null +++ b/.github/workflows/check-c-abi.yaml @@ -0,0 +1,137 @@ +name: C ABI Compatibility Check + +on: + workflow_call: + pull_request: + paths: + - 'c/include/**' + - 'ci/check_c_abi.py' + - '.github/workflows/check-c-abi.yaml' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + check-c-abi: + runs-on: ubuntu-latest + steps: + - name: Checkout PR branch + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y \ + libclang-dev \ + clang \ + cmake \ + ninja-build + + - name: Install Python dependencies + run: | + pip install --upgrade pip + pip install msgspec libclang termcolor + + - name: Build C++ project to get dependencies (dlpack) + run: | + mkdir -p cpp/build + cd cpp/build + cmake .. \ + -GNinja \ + -DCMAKE_BUILD_TYPE=Debug \ + -DBUILD_TESTS=OFF \ + -DBUILD_EXAMPLES=OFF + # We only need the configure step to fetch dlpack headers + # No need to actually build everything + echo "Build directory created and dependencies fetched" + + - name: Download baseline ABI from artifact store + id: download-baseline + continue-on-error: true + uses: actions/download-artifact@v4 + with: + name: c-abi-baseline + path: baseline/ + + - name: Extract baseline ABI from main branch (if artifact not found) + if: steps.download-baseline.outcome == 'failure' + run: | + echo "Baseline ABI artifact not found, extracting from main branch..." + + # Checkout main branch to a separate directory + git worktree add ../cuvs-main main + + # Build main branch to get dlpack headers + mkdir -p ../cuvs-main/cpp/build + cd ../cuvs-main/cpp/build + cmake .. \ + -GNinja \ + -DCMAKE_BUILD_TYPE=Debug \ + -DBUILD_TESTS=OFF \ + -DBUILD_EXAMPLES=OFF + cd ../../.. + + # Extract baseline ABI from main branch + mkdir -p baseline + python ci/check_c_abi.py extract \ + --header_path ../cuvs-main/c/include \ + --include_file cuvs/core/all.h \ + --output_file baseline/c_abi.json.gz + + echo "Baseline ABI extracted from main branch" + + - name: Analyze current branch for ABI breaking changes + run: | + python ci/check_c_abi.py analyze \ + --abi_file baseline/c_abi.json.gz \ + --header_path c/include \ + --include_file cuvs/core/all.h + + - name: Comment on PR with results + if: failure() + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: `## ⚠️ C ABI Breaking Changes Detected + +This PR introduces breaking changes to the C ABI. Please review the changes carefully. + +Breaking ABI changes are only allowed in major releases. If this is intentional for a major release, +the baseline ABI will need to be updated after merge. + +See the job logs for details on what specific changes were detected. + +### What are breaking ABI changes? + +Breaking ABI changes include: +- Removing functions from the public API +- Changing function signatures (parameters or return types) +- Removing struct members or changing their types +- Removing or changing enum values + +### Next steps + +1. Review the changes flagged in the CI logs +2. If these changes are unintentional, update your PR to maintain ABI compatibility +3. If these changes are required, ensure: + - This is part of a major version release + - The changes are documented in the changelog + - Migration guide is provided for users + +For more information, see the [C ABI documentation](../docs/source/c_developer_guide.md). +` + }); diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 94abfa658b..bbaa1ed778 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -12,6 +12,7 @@ jobs: - check-nightly-ci - changed-files - checks + - check-c-abi - conda-cpp-build - conda-cpp-tests - conda-cpp-checks @@ -315,6 +316,9 @@ jobs: with: enable_check_generated_files: false ignored_pr_jobs: "telemetry-summarize" + check-c-abi: + needs: telemetry-setup + uses: ./.github/workflows/check-c-abi.yaml conda-cpp-build: needs: checks secrets: inherit diff --git a/.github/workflows/store-c-abi-baseline.yaml b/.github/workflows/store-c-abi-baseline.yaml new file mode 100644 index 0000000000..7f8c218e75 --- /dev/null +++ b/.github/workflows/store-c-abi-baseline.yaml @@ -0,0 +1,105 @@ +name: Store C ABI Baseline + +on: + release: + types: [published] + workflow_dispatch: + inputs: + version: + description: 'Version tag to store baseline for (e.g., v26.02.00)' + required: true + type: string + +jobs: + store-baseline: + runs-on: ubuntu-latest + steps: + - name: Checkout release tag + uses: actions/checkout@v4 + with: + ref: ${{ github.event.release.tag_name || inputs.version }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y \ + libclang-dev \ + clang \ + cmake \ + ninja-build + + - name: Install Python dependencies + run: | + pip install --upgrade pip + pip install msgspec libclang + + - name: Build C++ project to get dependencies (dlpack) + run: | + mkdir -p cpp/build + cd cpp/build + cmake .. \ + -GNinja \ + -DCMAKE_BUILD_TYPE=Debug \ + -DBUILD_TESTS=OFF \ + -DBUILD_EXAMPLES=OFF + + - name: Extract C ABI baseline + run: | + mkdir -p abi-baselines + python ci/check_c_abi.py extract \ + --header_path c/include \ + --include_file cuvs/core/all.h \ + --output_file abi-baselines/c_abi_${{ github.event.release.tag_name || inputs.version }}.json.gz + + - name: Upload ABI baseline as artifact + uses: actions/upload-artifact@v4 + with: + name: c-abi-baseline-${{ github.event.release.tag_name || inputs.version }} + path: abi-baselines/c_abi_*.json.gz + retention-days: 400 # ~13 months, enough for patch releases + + - name: Commit baseline to repository (for long-term storage) + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + # Create a baselines branch if it doesn't exist + git fetch origin baselines:baselines || git checkout -b baselines + git checkout baselines + + # Copy the baseline file + mkdir -p c-abi-baselines + cp abi-baselines/c_abi_*.json.gz c-abi-baselines/ + + # Commit and push + git add c-abi-baselines/ + git commit -m "Add C ABI baseline for ${{ github.event.release.tag_name || inputs.version }}" + git push origin baselines + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Create release comment + if: github.event_name == 'release' + uses: actions/github-script@v7 + with: + script: | + const tagName = context.payload.release.tag_name; + + github.rest.repos.createCommitComment({ + owner: context.repo.owner, + repo: context.repo.repo, + commit_sha: context.payload.release.target_commitish, + body: `✅ C ABI baseline stored for release ${tagName} + +This baseline will be used to check for breaking ABI changes in future PRs. + +The baseline is stored in: +- Artifact: \`c-abi-baseline-${tagName}\` (available for ~13 months) +- Branch: \`baselines\` (permanent storage) +` + }); From af420ea2382e6c465dc776cb81e6ba92110bcea4 Mon Sep 17 00:00:00 2001 From: Michael Sarahan Date: Fri, 30 Jan 2026 21:11:06 -0600 Subject: [PATCH 04/25] Store ABI baseline on every merge to main - Extract and store main baseline on push to main (never expires) - PRs download and compare against main baseline - Release workflow archives the main baseline with version tag - Eliminates duplicate ABI extraction work Co-authored-by: Cursor --- .github/workflows/check-c-abi.yaml | 87 +++++++++++++++++---- .github/workflows/store-c-abi-baseline.yaml | 71 +++++------------ 2 files changed, 90 insertions(+), 68 deletions(-) diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml index f8cb74179d..a158fea952 100644 --- a/.github/workflows/check-c-abi.yaml +++ b/.github/workflows/check-c-abi.yaml @@ -7,13 +7,75 @@ on: - 'c/include/**' - 'ci/check_c_abi.py' - '.github/workflows/check-c-abi.yaml' + push: + branches: + - main + paths: + - 'c/include/**' + - 'ci/check_c_abi.py' concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: - check-c-abi: + # Extract and store baseline ABI from main branch + update-baseline: + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + steps: + - name: Checkout main branch + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y \ + libclang-dev \ + clang \ + cmake \ + ninja-build + + - name: Install Python dependencies + run: | + pip install --upgrade pip + pip install msgspec libclang termcolor + + - name: Build C++ project to get dependencies (dlpack) + run: | + mkdir -p cpp/build + cd cpp/build + cmake .. \ + -GNinja \ + -DCMAKE_BUILD_TYPE=Debug \ + -DBUILD_TESTS=OFF \ + -DBUILD_EXAMPLES=OFF + echo "Build directory created and dependencies fetched" + + - name: Extract ABI from main branch + run: | + mkdir -p baseline + python ci/check_c_abi.py extract \ + --header_path c/include \ + --include_file cuvs/core/all.h \ + --output_file baseline/c_abi.json.gz + echo "ABI extracted from main branch" + + - name: Store main baseline (never expires) + uses: actions/upload-artifact@v4 + with: + name: c-abi-baseline-main + path: baseline/c_abi.json.gz + retention-days: 0 # Never expire + + # Check PRs for breaking ABI changes + check-pr: + if: github.event_name == 'pull_request' || github.event_name == 'workflow_call' runs-on: ubuntu-latest steps: - name: Checkout PR branch @@ -49,27 +111,23 @@ jobs: -DCMAKE_BUILD_TYPE=Debug \ -DBUILD_TESTS=OFF \ -DBUILD_EXAMPLES=OFF - # We only need the configure step to fetch dlpack headers - # No need to actually build everything echo "Build directory created and dependencies fetched" - - name: Download baseline ABI from artifact store + - name: Download baseline ABI from main id: download-baseline continue-on-error: true - uses: actions/download-artifact@v4 + uses: dawidd6/action-download-artifact@v3 with: - name: c-abi-baseline + name: c-abi-baseline-main + workflow: check-c-abi.yaml + branch: main path: baseline/ - - name: Extract baseline ABI from main branch (if artifact not found) + - name: Extract baseline ABI from main branch (fallback) if: steps.download-baseline.outcome == 'failure' run: | echo "Baseline ABI artifact not found, extracting from main branch..." - - # Checkout main branch to a separate directory git worktree add ../cuvs-main main - - # Build main branch to get dlpack headers mkdir -p ../cuvs-main/cpp/build cd ../cuvs-main/cpp/build cmake .. \ @@ -78,14 +136,11 @@ jobs: -DBUILD_TESTS=OFF \ -DBUILD_EXAMPLES=OFF cd ../../.. - - # Extract baseline ABI from main branch mkdir -p baseline python ci/check_c_abi.py extract \ --header_path ../cuvs-main/c/include \ --include_file cuvs/core/all.h \ --output_file baseline/c_abi.json.gz - echo "Baseline ABI extracted from main branch" - name: Analyze current branch for ABI breaking changes @@ -96,12 +151,10 @@ jobs: --include_file cuvs/core/all.h - name: Comment on PR with results - if: failure() + if: failure() && github.event_name == 'pull_request' uses: actions/github-script@v7 with: script: | - const fs = require('fs'); - github.rest.issues.createComment({ issue_number: context.issue.number, owner: context.repo.owner, diff --git a/.github/workflows/store-c-abi-baseline.yaml b/.github/workflows/store-c-abi-baseline.yaml index 7f8c218e75..f93ec49b6e 100644 --- a/.github/workflows/store-c-abi-baseline.yaml +++ b/.github/workflows/store-c-abi-baseline.yaml @@ -1,4 +1,4 @@ -name: Store C ABI Baseline +name: Archive C ABI Baseline for Release on: release: @@ -6,62 +6,31 @@ on: workflow_dispatch: inputs: version: - description: 'Version tag to store baseline for (e.g., v26.02.00)' + description: 'Version tag to archive baseline for (e.g., v26.02.00)' required: true type: string jobs: - store-baseline: + archive-baseline: runs-on: ubuntu-latest steps: - - name: Checkout release tag + - name: Checkout repository uses: actions/checkout@v4 - with: - ref: ${{ github.event.release.tag_name || inputs.version }} - - name: Set up Python - uses: actions/setup-python@v5 + - name: Download main baseline artifact + uses: dawidd6/action-download-artifact@v3 with: - python-version: '3.11' - - - name: Install system dependencies - run: | - sudo apt-get update - sudo apt-get install -y \ - libclang-dev \ - clang \ - cmake \ - ninja-build - - - name: Install Python dependencies - run: | - pip install --upgrade pip - pip install msgspec libclang - - - name: Build C++ project to get dependencies (dlpack) - run: | - mkdir -p cpp/build - cd cpp/build - cmake .. \ - -GNinja \ - -DCMAKE_BUILD_TYPE=Debug \ - -DBUILD_TESTS=OFF \ - -DBUILD_EXAMPLES=OFF - - - name: Extract C ABI baseline - run: | - mkdir -p abi-baselines - python ci/check_c_abi.py extract \ - --header_path c/include \ - --include_file cuvs/core/all.h \ - --output_file abi-baselines/c_abi_${{ github.event.release.tag_name || inputs.version }}.json.gz + name: c-abi-baseline-main + workflow: check-c-abi.yaml + branch: main + path: baseline/ - - name: Upload ABI baseline as artifact + - name: Archive baseline with release version uses: actions/upload-artifact@v4 with: name: c-abi-baseline-${{ github.event.release.tag_name || inputs.version }} - path: abi-baselines/c_abi_*.json.gz - retention-days: 400 # ~13 months, enough for patch releases + path: baseline/c_abi.json.gz + retention-days: 400 # ~13 months - name: Commit baseline to repository (for long-term storage) run: | @@ -69,16 +38,16 @@ jobs: git config user.email "github-actions[bot]@users.noreply.github.com" # Create a baselines branch if it doesn't exist - git fetch origin baselines:baselines || git checkout -b baselines - git checkout baselines + git fetch origin baselines:baselines 2>/dev/null || git checkout --orphan baselines + git checkout baselines 2>/dev/null || true - # Copy the baseline file + # Copy the baseline file with version name mkdir -p c-abi-baselines - cp abi-baselines/c_abi_*.json.gz c-abi-baselines/ + cp baseline/c_abi.json.gz c-abi-baselines/c_abi_${{ github.event.release.tag_name || inputs.version }}.json.gz # Commit and push git add c-abi-baselines/ - git commit -m "Add C ABI baseline for ${{ github.event.release.tag_name || inputs.version }}" + git commit -m "Archive C ABI baseline for ${{ github.event.release.tag_name || inputs.version }}" git push origin baselines env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -94,9 +63,9 @@ jobs: owner: context.repo.owner, repo: context.repo.repo, commit_sha: context.payload.release.target_commitish, - body: `✅ C ABI baseline stored for release ${tagName} + body: `✅ C ABI baseline archived for release ${tagName} -This baseline will be used to check for breaking ABI changes in future PRs. +This baseline has been archived from the main branch and will be available for historical reference. The baseline is stored in: - Artifact: \`c-abi-baseline-${tagName}\` (available for ~13 months) From 9d4ef693a1f96d3d3c8d0449ffb3cc0ac3125aa7 Mon Sep 17 00:00:00 2001 From: Michael Sarahan Date: Fri, 30 Jan 2026 21:17:52 -0600 Subject: [PATCH 05/25] Add workflow_dispatch to allow manual baseline creation Enables bootstrapping the initial c-abi-baseline-main artifact Co-authored-by: Cursor --- .github/workflows/check-c-abi.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml index a158fea952..cc07bbd092 100644 --- a/.github/workflows/check-c-abi.yaml +++ b/.github/workflows/check-c-abi.yaml @@ -1,6 +1,7 @@ name: C ABI Compatibility Check on: + workflow_dispatch: # Allow manual trigger for bootstrap workflow_call: pull_request: paths: @@ -21,7 +22,7 @@ concurrency: jobs: # Extract and store baseline ABI from main branch update-baseline: - if: github.event_name == 'push' && github.ref == 'refs/heads/main' + if: github.event_name == 'workflow_dispatch' || (github.event_name == 'push' && github.ref == 'refs/heads/main') runs-on: ubuntu-latest steps: - name: Checkout main branch From 0859bdac56477ed5eef3e7cb85c5dccb4f1c3a84 Mon Sep 17 00:00:00 2001 From: Michael Sarahan Date: Fri, 30 Jan 2026 21:32:25 -0600 Subject: [PATCH 06/25] Implement commit-specific baselines with cascade fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Store baselines with commit SHA for precise comparisons - Cascade: try merge-base → latest main → extract fresh - Add concurrency control to prevent race conditions - Report which baseline source was used for transparency Co-authored-by: Cursor --- .github/workflows/check-c-abi.yaml | 56 +++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml index cc07bbd092..6c3461a57a 100644 --- a/.github/workflows/check-c-abi.yaml +++ b/.github/workflows/check-c-abi.yaml @@ -24,6 +24,9 @@ jobs: update-baseline: if: github.event_name == 'workflow_dispatch' || (github.event_name == 'push' && github.ref == 'refs/heads/main') runs-on: ubuntu-latest + concurrency: + group: abi-baseline-update + cancel-in-progress: false # Queue updates, don't skip steps: - name: Checkout main branch uses: actions/checkout@v4 @@ -65,9 +68,16 @@ jobs: --header_path c/include \ --include_file cuvs/core/all.h \ --output_file baseline/c_abi.json.gz - echo "ABI extracted from main branch" + echo "ABI extracted from main branch (commit: ${{ github.sha }})" - - name: Store main baseline (never expires) + - name: Store commit-specific baseline + uses: actions/upload-artifact@v4 + with: + name: c-abi-baseline-${{ github.sha }} + path: baseline/c_abi.json.gz + retention-days: 90 # Keep for 3 months + + - name: Store main baseline (latest, never expires) uses: actions/upload-artifact@v4 with: name: c-abi-baseline-main @@ -114,8 +124,27 @@ jobs: -DBUILD_EXAMPLES=OFF echo "Build directory created and dependencies fetched" - - name: Download baseline ABI from main - id: download-baseline + - name: Find merge base commit + id: merge-base + run: | + git fetch origin main + MERGE_BASE=$(git merge-base HEAD origin/main) + echo "merge_base_sha=${MERGE_BASE}" >> $GITHUB_OUTPUT + echo "Merge base commit: ${MERGE_BASE}" + + - name: Try to download baseline for merge-base commit (most accurate) + id: download-merge-base + continue-on-error: true + uses: dawidd6/action-download-artifact@v3 + with: + name: c-abi-baseline-${{ steps.merge-base.outputs.merge_base_sha }} + workflow: check-c-abi.yaml + commit: ${{ steps.merge-base.outputs.merge_base_sha }} + path: baseline/ + + - name: Try to download latest main baseline (fallback 1) + id: download-main + if: steps.download-merge-base.outcome == 'failure' continue-on-error: true uses: dawidd6/action-download-artifact@v3 with: @@ -124,10 +153,11 @@ jobs: branch: main path: baseline/ - - name: Extract baseline ABI from main branch (fallback) - if: steps.download-baseline.outcome == 'failure' + - name: Extract baseline ABI from main branch (fallback 2) + if: steps.download-merge-base.outcome == 'failure' && steps.download-main.outcome == 'failure' run: | - echo "Baseline ABI artifact not found, extracting from main branch..." + echo "⚠️ No baseline artifacts found, extracting from main branch..." + echo "This is slower but ensures we always have a baseline for comparison." git worktree add ../cuvs-main main mkdir -p ../cuvs-main/cpp/build cd ../cuvs-main/cpp/build @@ -142,7 +172,17 @@ jobs: --header_path ../cuvs-main/c/include \ --include_file cuvs/core/all.h \ --output_file baseline/c_abi.json.gz - echo "Baseline ABI extracted from main branch" + echo "✓ Baseline ABI extracted from main branch" + + - name: Report baseline source + run: | + if [ "${{ steps.download-merge-base.outcome }}" == "success" ]; then + echo "✓ Using baseline from merge-base commit: ${{ steps.merge-base.outputs.merge_base_sha }}" + elif [ "${{ steps.download-main.outcome }}" == "success" ]; then + echo "✓ Using latest main baseline (merge-base baseline not yet available)" + else + echo "✓ Using freshly extracted baseline from main branch" + fi - name: Analyze current branch for ABI breaking changes run: | From c8251aa99f398c83d8fd9d8d0322c8d7263655d7 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Mon, 2 Feb 2026 17:26:29 -0800 Subject: [PATCH 07/25] code review fixes --- .github/workflows/check-c-abi.yaml | 58 +++++++-------- ci/check_c_abi.py | 116 +++++++++++++++++------------ 2 files changed, 99 insertions(+), 75 deletions(-) diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml index 6c3461a57a..663a2048d7 100644 --- a/.github/workflows/check-c-abi.yaml +++ b/.github/workflows/check-c-abi.yaml @@ -30,12 +30,12 @@ jobs: steps: - name: Checkout main branch uses: actions/checkout@v4 - + - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.11' - + - name: Install system dependencies run: | sudo apt-get update @@ -44,12 +44,12 @@ jobs: clang \ cmake \ ninja-build - + - name: Install Python dependencies run: | pip install --upgrade pip pip install msgspec libclang termcolor - + - name: Build C++ project to get dependencies (dlpack) run: | mkdir -p cpp/build @@ -60,23 +60,23 @@ jobs: -DBUILD_TESTS=OFF \ -DBUILD_EXAMPLES=OFF echo "Build directory created and dependencies fetched" - + - name: Extract ABI from main branch run: | mkdir -p baseline python ci/check_c_abi.py extract \ - --header_path c/include \ - --include_file cuvs/core/all.h \ - --output_file baseline/c_abi.json.gz + --header-path c/include \ + --include-file cuvs/core/all.h \ + --output-file baseline/c_abi.json.gz echo "ABI extracted from main branch (commit: ${{ github.sha }})" - + - name: Store commit-specific baseline uses: actions/upload-artifact@v4 with: name: c-abi-baseline-${{ github.sha }} path: baseline/c_abi.json.gz retention-days: 90 # Keep for 3 months - + - name: Store main baseline (latest, never expires) uses: actions/upload-artifact@v4 with: @@ -93,12 +93,12 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 - + - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.11' - + - name: Install system dependencies run: | sudo apt-get update @@ -107,12 +107,12 @@ jobs: clang \ cmake \ ninja-build - + - name: Install Python dependencies run: | pip install --upgrade pip pip install msgspec libclang termcolor - + - name: Build C++ project to get dependencies (dlpack) run: | mkdir -p cpp/build @@ -123,7 +123,7 @@ jobs: -DBUILD_TESTS=OFF \ -DBUILD_EXAMPLES=OFF echo "Build directory created and dependencies fetched" - + - name: Find merge base commit id: merge-base run: | @@ -131,7 +131,7 @@ jobs: MERGE_BASE=$(git merge-base HEAD origin/main) echo "merge_base_sha=${MERGE_BASE}" >> $GITHUB_OUTPUT echo "Merge base commit: ${MERGE_BASE}" - + - name: Try to download baseline for merge-base commit (most accurate) id: download-merge-base continue-on-error: true @@ -141,7 +141,7 @@ jobs: workflow: check-c-abi.yaml commit: ${{ steps.merge-base.outputs.merge_base_sha }} path: baseline/ - + - name: Try to download latest main baseline (fallback 1) id: download-main if: steps.download-merge-base.outcome == 'failure' @@ -152,7 +152,7 @@ jobs: workflow: check-c-abi.yaml branch: main path: baseline/ - + - name: Extract baseline ABI from main branch (fallback 2) if: steps.download-merge-base.outcome == 'failure' && steps.download-main.outcome == 'failure' run: | @@ -169,11 +169,11 @@ jobs: cd ../../.. mkdir -p baseline python ci/check_c_abi.py extract \ - --header_path ../cuvs-main/c/include \ - --include_file cuvs/core/all.h \ - --output_file baseline/c_abi.json.gz + --header-path ../cuvs-main/c/include \ + --include-file cuvs/core/all.h \ + --output-file baseline/c_abi.json.gz echo "✓ Baseline ABI extracted from main branch" - + - name: Report baseline source run: | if [ "${{ steps.download-merge-base.outcome }}" == "success" ]; then @@ -183,14 +183,14 @@ jobs: else echo "✓ Using freshly extracted baseline from main branch" fi - + - name: Analyze current branch for ABI breaking changes run: | python ci/check_c_abi.py analyze \ - --abi_file baseline/c_abi.json.gz \ - --header_path c/include \ - --include_file cuvs/core/all.h - + --abi-file baseline/c_abi.json.gz \ + --header-path c/include \ + --include-file cuvs/core/all.h + - name: Comment on PR with results if: failure() && github.event_name == 'pull_request' uses: actions/github-script@v7 @@ -201,10 +201,10 @@ jobs: owner: context.repo.owner, repo: context.repo.repo, body: `## ⚠️ C ABI Breaking Changes Detected - + This PR introduces breaking changes to the C ABI. Please review the changes carefully. -Breaking ABI changes are only allowed in major releases. If this is intentional for a major release, +Breaking ABI changes are only allowed in major releases. If this is intentional for a major release, the baseline ABI will need to be updated after merge. See the job logs for details on what specific changes were detected. diff --git a/ci/check_c_abi.py b/ci/check_c_abi.py index 67c36154b2..d552fd3017 100755 --- a/ci/check_c_abi.py +++ b/ci/check_c_abi.py @@ -31,7 +31,7 @@ import gzip import msgspec -import itertools +from itertools import zip_longest import pathlib from typing import Optional import sys @@ -82,7 +82,7 @@ class FunctionDefinition(msgspec.Struct): def from_cursor(cls, cursor, root_path=None): if cursor.kind != clang.cindex.CursorKind.FUNCTION_DECL: raise ValueError( - "FunctionDefinition.from_cursor called with cursor of kind={cursor.kind}" + f"FunctionDefinition.from_cursor called with cursor of kind={cursor.kind}" ) return cls( @@ -106,7 +106,7 @@ class StructDefinition(msgspec.Struct): def from_cursor(cls, cursor, root_path=None): if cursor.kind != clang.cindex.CursorKind.STRUCT_DECL: raise ValueError( - "StructDefinition.from_cursor called with cursor of kind={cursor.kind}" + f"StructDefinition.from_cursor called with cursor of kind={cursor.kind}" ) return cls( @@ -129,7 +129,7 @@ class EnumDefinition(msgspec.Struct): def from_cursor(cls, cursor, root_path=None): if cursor.kind != clang.cindex.CursorKind.ENUM_DECL: raise ValueError( - "EnumDefinition.from_cursor called with cursor of kind={cursor.kind}" + f"EnumDefinition.from_cursor called with cursor of kind={cursor.kind}" ) return cls( @@ -175,12 +175,13 @@ def from_include_path(cls, root, header, extra_clang_args=None): for child in tu.cursor.get_children(): # ignore things like cuda headers and other files not installed in # in the cuvs C path - if not child.location.file or not pathlib.Path( - child.location.file.name - ).is_relative_to(path): + if not ( + child.location.file + and pathlib.Path(child.location.file.name).is_relative_to(path) + ): continue - # break up the AST into symbol -> node for the things we care about + # Store definitions for each function, struct and enum if child.kind == clang.cindex.CursorKind.FUNCTION_DECL: functions.append( FunctionDefinition.from_cursor(child, root_path=path) @@ -218,17 +219,19 @@ class AbiError(msgspec.Struct): location: Optional[SymbolLocation] = None -def analyze_c_abi(old_abi, new_abi): - # iterate over every function in the existing abi, and make sure that no functions - # have been removed or had function parameters removed, parameters added or the type of any - # parameter changed. Note: adding new functions to the new abi is allowed +def _analyze_function_abi(old_abi, new_abi): + """This iterates over every function in the existing abi, and make sure that no functions + have been removed or had function parameters removed, parameters added or the type of any + parameter changed. Note: adding new functions to the new abi is allowed + """ errors = [] old_functions = {f.name: f for f in old_abi.functions} new_functions = {f.name: f for f in new_abi.functions} for name, old_function in old_functions.items(): - new_function = new_functions.get(name) - if new_function is None: + try: + new_function = new_functions[name] + except KeyError: errors.append( AbiError( "Function has been removed", @@ -247,47 +250,54 @@ def analyze_c_abi(old_abi, new_abi): ) ) - for old_param, new_param in itertools.zip_longest( + for (old_type, old_name), (new_type, new_name) in zip_longest( old_function.parameters, new_function.parameters, - fillvalue=None, + fillvalue=(None, None), ): - if old_param is None: + if old_type is None: errors.append( AbiError( - f"Function has a new parameter '{new_param[0]} {new_param[1]}'", + f"Function has a new parameter '{new_type} {new_name}'", symbol=new_function.name, location=new_function.location, ) ) - elif new_param is None: + elif new_type is None: errors.append( AbiError( - f"Function has a deleted parameter '{old_param[0]} {old_param[1]}'", + f"Function has a deleted parameter '{old_type} {old_name}'", symbol=old_function.name, location=old_function.location, ) ) - elif new_param[0] != old_param[0]: + elif new_type != old_type: errors.append( AbiError( - f"Function has a changed parameter type '{old_param[0]}' to '{new_param[0]} for parameter '{old_param[1]}'", + f"Function has changed type '{old_type}' to '{new_type}' for parameter '{old_name}'", symbol=new_function.name, location=new_function.location, ) ) + return errors + + +def _analyze_struct_abi(old_abi, new_abi): + """Checks to see if any existing structures have had items removed, reordered, renamed, or types + changed (adding new members is considered to be ok, as long as functions are initialized via + a create factory function) + """ + errors = [] - # check to see if any existing structures have had items removed, reordered, renamed, or types - # changed (adding new members is considered to be ok, as long as functions are initialized via - # a create factory function) old_structs = {f.name: f for f in old_abi.structs} new_structs = {f.name: f for f in new_abi.structs} for name, old_struct in old_structs.items(): - new_struct = new_structs.get(name) - if new_struct is None: + try: + new_struct = new_structs[name] + except KeyError: errors.append( AbiError( "Struct has been removed", @@ -297,29 +307,36 @@ def analyze_c_abi(old_abi, new_abi): ) continue - for old_member, new_member in itertools.zip_longest( + for (old_type, old_name), (new_type, new_name) in zip_longest( old_struct.members, new_struct.members, - fillvalue=None, + fillvalue=(None, None), ): - if new_member is None: + if new_type is None: errors.append( AbiError( - f"Struct has a deleted member '{old_member[0]} {old_member[1]}'", + f"Struct has a deleted member '{old_type} {old_name}'", symbol=name, location=new_struct.location, ) ) + elif old_type is None: + # adding an item to the end of the struct is allowed here + pass - elif new_member[0] != old_member[0]: + elif new_type != old_type: errors.append( AbiError( - f"Struct member has changed type '{old_member[0]}' to '{new_member[0]} for member '{old_member[1]}'", + f"Struct member has changed type '{old_type}' to '{new_type}' for member '{old_name}'", symbol=name, location=new_struct.location, ) ) + return errors + +def _analyze_enum_abi(old_abi, new_abi): + errors = [] # flatten enum values: since values inside an enum in C are in the global scope old_enum_values = { k: (v, enum) for enum in old_abi.enums for k, v in enum.values @@ -330,8 +347,9 @@ def analyze_c_abi(old_abi, new_abi): # check to see if enum values have been removed, or had their numeric values changed for name, (old_value, old_enum) in old_enum_values.items(): - new_value, new_enum = new_enum_values.get(name, (None, None)) - if new_enum is None: + try: + new_value, new_enum = new_enum_values[name] + except KeyError: errors.append( AbiError( f"Enum value {name} has been removed", @@ -339,7 +357,9 @@ def analyze_c_abi(old_abi, new_abi): location=old_enum.location, ) ) - elif new_value != old_value: + continue + + if new_value != old_value: errors.append( AbiError( f"Enum value {name} has been changed from {old_value} to {new_value}", @@ -351,6 +371,14 @@ def analyze_c_abi(old_abi, new_abi): return errors +def analyze_c_abi(old_abi, new_abi): + errors = [] + errors.extend(_analyze_function_abi(old_abi, new_abi)) + errors.extend(_analyze_struct_abi(old_abi, new_abi)) + errors.extend(_analyze_enum_abi(old_abi, new_abi)) + return errors + + if __name__ == "__main__": default_c_header_path = pathlib.Path("../c/include").resolve() @@ -365,18 +393,18 @@ def analyze_c_abi(old_abi, new_abi): "extract", help="Extract the ABI from a set of header files" ) parser_extract.add_argument( - "--output_file", + "--output-file", type=str, help="The file to output the ABI into (default: %(default)s)", default="c_abi.json.gz", ) parser_extract.add_argument( - "--header_path", + "--header-path", help="Path of C headers to extract the ABI from (default: %(default)s)", default=str(default_c_header_path), ) parser_extract.add_argument( - "--include_file", + "--include-file", help="root header file to examine (default: %(default)s)", default="cuvs/core/all.h", ) @@ -385,27 +413,23 @@ def analyze_c_abi(old_abi, new_abi): "analyze", help="Analyze a set of header files for breaking changes" ) parser_analyze.add_argument( - "--abi_file", + "--abi-file", type=str, help="The extracted ABI file to compare against (default: %(default)s)", default="c_abi.json.gz", ) parser_analyze.add_argument( - "--header_path", + "--header-path", help="Path of C headers to analyze (default: %(default)s)", default=str(default_c_header_path), ) parser_analyze.add_argument( - "--include_file", + "--include-file", help="root header file to examine (default: %(default)s)", default="cuvs/core/all.h", ) args = parser.parse_args() - if args.command not in ("extract", "analyze"): - print(f"unknown command {args.command}") - parser.print_help() - sys.exit(1) header_path = pathlib.Path(args.header_path) From 4a83a1f48b06fe6cde54151399080ed8b281a2ee Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 10 Feb 2026 11:25:57 -0800 Subject: [PATCH 08/25] Move to python package Move the check_c_abi.py script to a python package, and add tests and type hints --- ci/check_c_abi/LICENSE | 1 + ci/check_c_abi/VERSION | 1 + ci/check_c_abi/check_c_abi/__init__.py | 4 + ci/check_c_abi/check_c_abi/abi.py | 339 +++++++++++++++++++ ci/check_c_abi/check_c_abi/main.py | 137 ++++++++ ci/check_c_abi/check_c_abi/tests/test_abi.py | 149 ++++++++ ci/check_c_abi/pyproject.toml | 22 ++ 7 files changed, 653 insertions(+) create mode 120000 ci/check_c_abi/LICENSE create mode 120000 ci/check_c_abi/VERSION create mode 100644 ci/check_c_abi/check_c_abi/__init__.py create mode 100644 ci/check_c_abi/check_c_abi/abi.py create mode 100755 ci/check_c_abi/check_c_abi/main.py create mode 100644 ci/check_c_abi/check_c_abi/tests/test_abi.py create mode 100644 ci/check_c_abi/pyproject.toml diff --git a/ci/check_c_abi/LICENSE b/ci/check_c_abi/LICENSE new file mode 120000 index 0000000000..30cff7403d --- /dev/null +++ b/ci/check_c_abi/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/ci/check_c_abi/VERSION b/ci/check_c_abi/VERSION new file mode 120000 index 0000000000..558194c5a5 --- /dev/null +++ b/ci/check_c_abi/VERSION @@ -0,0 +1 @@ +../../VERSION \ No newline at end of file diff --git a/ci/check_c_abi/check_c_abi/__init__.py b/ci/check_c_abi/check_c_abi/__init__.py new file mode 100644 index 0000000000..b2e1c69898 --- /dev/null +++ b/ci/check_c_abi/check_c_abi/__init__.py @@ -0,0 +1,4 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +# diff --git a/ci/check_c_abi/check_c_abi/abi.py b/ci/check_c_abi/check_c_abi/abi.py new file mode 100644 index 0000000000..29a12bfc7b --- /dev/null +++ b/ci/check_c_abi/check_c_abi/abi.py @@ -0,0 +1,339 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +# + +import msgspec +from itertools import zip_longest +import pathlib +from typing import Generator, Optional, Self +import os + + +import clang.cindex + +try: + from termcolor import colored +except ImportError: + + def colored(text, *args, **kwargs): + return text + + +def _is_unnamed_struct(cursor: clang.cindex.Cursor) -> bool: + return ( + cursor.kind == clang.cindex.CursorKind.STRUCT_DECL + and cursor.spelling.startswith("struct ") + and "unnamed " in cursor.spelling + ) + + +class SymbolLocation(msgspec.Struct): + filename: str + line: int + column: int + + @classmethod + def from_cursor( + cls, cursor: clang.cindex.Cursor, root_path: str = None + ) -> Self: + filename = cursor.location.file.name + if root_path: + filename = str(pathlib.Path(filename).relative_to(root_path)) + return cls( + filename=filename, + line=cursor.location.line, + column=cursor.location.column, + ) + + +class FunctionDefinition(msgspec.Struct): + name: str + return_type: str + parameters: list[tuple[str, str]] + location: SymbolLocation + + @classmethod + def from_cursor( + cls, cursor: clang.cindex.Cursor, root_path: str = None + ) -> Self: + if cursor.kind != clang.cindex.CursorKind.FUNCTION_DECL: + raise ValueError( + f"FunctionDefinition.from_cursor called with cursor of kind={cursor.kind}" + ) + + return cls( + name=cursor.spelling, + return_type=cursor.result_type.spelling, + parameters=[ + (child.type.spelling, child.spelling) + for child in cursor.get_children() + if child.kind == clang.cindex.CursorKind.PARM_DECL + ], + location=SymbolLocation.from_cursor(cursor, root_path), + ) + + +class StructDefinition(msgspec.Struct): + name: str + members: list[tuple[str, str]] + location: SymbolLocation + + @classmethod + def from_cursor( + cls, cursor: clang.cindex.Cursor, root_path: str = None + ) -> Self: + if cursor.kind != clang.cindex.CursorKind.STRUCT_DECL: + raise ValueError( + f"StructDefinition.from_cursor called with cursor of kind={cursor.kind}" + ) + + return cls( + name=cursor.spelling, + members=[ + (child.type.spelling, child.spelling) + for child in cursor.get_children() + if child.kind == clang.cindex.CursorKind.FIELD_DECL + ], + location=SymbolLocation.from_cursor(cursor, root_path), + ) + + +class EnumDefinition(msgspec.Struct): + name: str + values: list[tuple[str, int]] + location: SymbolLocation + + @classmethod + def from_cursor( + cls, cursor: clang.cindex.Cursor, root_path: str = None + ) -> Self: + if cursor.kind != clang.cindex.CursorKind.ENUM_DECL: + raise ValueError( + f"EnumDefinition.from_cursor called with cursor of kind={cursor.kind}" + ) + + return cls( + name=cursor.spelling, + values=[ + (child.spelling, child.enum_value) + for child in cursor.get_children() + if child.kind == clang.cindex.CursorKind.ENUM_CONSTANT_DECL + ], + location=SymbolLocation.from_cursor(cursor, root_path), + ) + + +class Abi(msgspec.Struct): + functions: list[FunctionDefinition] + structs: list[StructDefinition] + enums: list[EnumDefinition] + + @classmethod + def from_include_path( + cls, + root: str | os.PathLike, + header: str | os.PathLike, + extra_clang_args: Optional[list[str]] = None, + ) -> Self: + """Loads the Abi from a root path ('/source/cuvs/c/include') and a header file + ("cuvs/include/all.h") + """ + path = pathlib.Path(root).resolve() + all_header = path / header + + if not all_header.is_file(): + raise ValueError(f"header file '{all_header}' not found") + + index = clang.cindex.Index.create() + + args = [f"-I{str(path)}"] + if extra_clang_args: + args.extend(extra_clang_args) + + tu = index.parse(all_header, args=args) + + functions, structs, enums = [], [], [] + + # note: we could use tu.cursor.walk_preorder() here instead to recurse through the AST + # but it is slightly slower to do so (extra 100ms or so) and for the cuvs C-ABI everything + # is at the top level + for child in tu.cursor.get_children(): + # ignore things like cuda headers and other files not installed in + # in the cuvs C path + if not ( + child.location.file + and pathlib.Path(child.location.file.name).is_relative_to(path) + ): + continue + + # Store definitions for each function, struct and enum + if child.kind == clang.cindex.CursorKind.FUNCTION_DECL: + functions.append( + FunctionDefinition.from_cursor(child, root_path=path) + ) + elif child.kind == clang.cindex.CursorKind.STRUCT_DECL: + # ignore unnamed structs (will get picked up via the typedef) + if _is_unnamed_struct(child): + continue + structs.append( + StructDefinition.from_cursor(child, root_path=path) + ) + elif child.kind == clang.cindex.CursorKind.TYPEDEF_DECL: + # check if this is a typedef to an unnamed struct, if so use the + # typedef as the symbolname for the struct + grandchildren = list(child.get_children()) + if len(grandchildren) == 1 and _is_unnamed_struct( + grandchildren[0] + ): + struct = StructDefinition.from_cursor( + grandchildren[0], root_path=path + ) + struct.name = child.spelling + structs.append(struct) + elif child.kind == clang.cindex.CursorKind.ENUM_DECL: + enums.append(EnumDefinition.from_cursor(child, root_path=path)) + + return cls(functions, structs, enums) + + +class AbiError(msgspec.Struct): + """Holds information about an ABI breaking error""" + + error: str + symbol: Optional[str] = None + location: Optional[SymbolLocation] = None + + +def _analyze_function_abi(old_abi: Abi, new_abi: Abi) -> Generator[AbiError]: + """This iterates over every function in the existing abi, and make sure that no functions + have been removed or had function parameters removed, parameters added or the type of any + parameter changed. Note: adding new functions to the new abi is allowed + """ + old_functions = {f.name: f for f in old_abi.functions} + new_functions = {f.name: f for f in new_abi.functions} + + for name, old_function in old_functions.items(): + try: + new_function = new_functions[name] + except KeyError: + yield AbiError( + "Function has been removed", + symbol=old_function.name, + location=old_function.location, + ) + continue + + if old_function.return_type != new_function.return_type: + yield AbiError( + f"Function has return type changed from '{old_function.return_type}' to '{new_function.return_type}'", + symbol=new_function.name, + location=new_function.location, + ) + + for (old_type, old_name), (new_type, new_name) in zip_longest( + old_function.parameters, + new_function.parameters, + fillvalue=(None, None), + ): + if old_type is None: + yield AbiError( + f"Function has a new parameter '{new_type} {new_name}'", + symbol=new_function.name, + location=new_function.location, + ) + + elif new_type is None: + yield AbiError( + f"Function has a deleted parameter '{old_type} {old_name}'", + symbol=old_function.name, + location=old_function.location, + ) + + elif new_type != old_type: + yield AbiError( + f"Function has changed type '{old_type}' to '{new_type}' for parameter '{old_name}'", + symbol=new_function.name, + location=new_function.location, + ) + + +def _analyze_struct_abi(old_abi: Abi, new_abi: Abi) -> Generator[AbiError]: + """Checks to see if any existing structures have had items removed, reordered, renamed, or types + changed (adding new members is considered to be ok, as long as functions are initialized via + a create factory function) + """ + old_structs = {f.name: f for f in old_abi.structs} + new_structs = {f.name: f for f in new_abi.structs} + + for name, old_struct in old_structs.items(): + try: + new_struct = new_structs[name] + except KeyError: + yield AbiError( + "Struct has been removed", + symbol=name, + location=old_struct.location, + ) + + for (old_type, old_name), (new_type, new_name) in zip_longest( + old_struct.members, + new_struct.members, + fillvalue=(None, None), + ): + if new_type is None: + yield AbiError( + f"Struct has a deleted member '{old_type} {old_name}'", + symbol=name, + location=new_struct.location, + ) + elif old_type is None: + # adding an item to the end of the struct is allowed here + pass + + elif new_type != old_type: + yield AbiError( + f"Struct member has changed type '{old_type}' to '{new_type}' for member '{old_name}'", + symbol=name, + location=new_struct.location, + ) + + +def _analyze_enum_abi(old_abi: Abi, new_abi: Abi) -> Generator[AbiError]: + # flatten enum values: since values inside an enum in C are in the global scope + old_enum_values = { + k: (v, enum) for enum in old_abi.enums for k, v in enum.values + } + new_enum_values = { + k: (v, enum) for enum in new_abi.enums for k, v in enum.values + } + + # check to see if enum values have been removed, or had their numeric values changed + for name, (old_value, old_enum) in old_enum_values.items(): + try: + new_value, new_enum = new_enum_values[name] + except KeyError: + yield AbiError( + f"Enum value {name} has been removed", + symbol=old_enum.name, + location=old_enum.location, + ) + continue + + if new_value != old_value: + yield AbiError( + f"Enum value {name} has been changed from {old_value} to {new_value}", + symbol=old_enum.name, + location=new_enum.location, + ) + + +def analyze_c_abi(old_abi: Abi, new_abi: Abi) -> list[AbiError]: + """Compares two Abi objects and returns a list of errors for any ABI breaking + changes between them + """ + errors = [] + errors.extend(_analyze_function_abi(old_abi, new_abi)) + errors.extend(_analyze_struct_abi(old_abi, new_abi)) + errors.extend(_analyze_enum_abi(old_abi, new_abi)) + return errors diff --git a/ci/check_c_abi/check_c_abi/main.py b/ci/check_c_abi/check_c_abi/main.py new file mode 100755 index 0000000000..f111f92bfc --- /dev/null +++ b/ci/check_c_abi/check_c_abi/main.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +# +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +# + +import argparse +import gzip +import msgspec +import pathlib +import sys + +from check_c_abi.abi import analyze_c_abi, Abi + +try: + from termcolor import colored +except ImportError: + + def colored(text, *args, **kwargs): + return text + + +def main_cli(): + # figure out the default header c path - by scanning recursively for the + # `cuvs/core/all.h` header + current_path = pathlib.Path(".").resolve() + while current_path: + if ( + current_path / "c" / "include" / "cuvs" / "core" / "all.h" + ).is_file(): + default_c_header_path = current_path / "c" / "include" + break + if current_path.parent == current_path: + default_c_header_path = None + break + current_path = current_path.parent + + parser = argparse.ArgumentParser( + description="Analyze C headers for breaking ABI changes" + ) + subparsers = parser.add_subparsers( + dest="command", help="Available subcommands" + ) + + parser_extract = subparsers.add_parser( + "extract", help="Extract the ABI from a set of header files" + ) + parser_extract.add_argument( + "--output-file", + type=str, + help="The file to output the ABI into (default: %(default)s)", + default="c_abi.json.gz", + ) + parser_extract.add_argument( + "--header-path", + help="Path of C headers to extract the ABI from (default: %(default)s)", + default=str(default_c_header_path), + ) + parser_extract.add_argument( + "--include-file", + help="root header file to examine (default: %(default)s)", + default="cuvs/core/all.h", + ) + + parser_analyze = subparsers.add_parser( + "analyze", help="Analyze a set of header files for breaking changes" + ) + parser_analyze.add_argument( + "--abi-file", + type=str, + help="The extracted ABI file to compare against (default: %(default)s)", + default="c_abi.json.gz", + ) + parser_analyze.add_argument( + "--header-path", + help="Path of C headers to analyze (default: %(default)s)", + default=str(default_c_header_path), + ) + parser_analyze.add_argument( + "--include-file", + help="root header file to examine (default: %(default)s)", + default="cuvs/core/all.h", + ) + + args = parser.parse_args() + if not args.command: + parser.print_help() + sys.exit(1) + + header_path = pathlib.Path(args.header_path) + + # TODO: better way of specifying the dlpack header source, since missing the dlpack.h + # header means that we all dlpack types get treated as 'int' which could be misleading + # when looking for differences in the ABI (like if we change a field from `DLDataType` to + # `int` without specifying the dlpack include directory, we won't know that the type has + # changed) + dlpack_header_path = ( + header_path.parent.parent + / "cpp" + / "build" + / "_deps" + / "dlpack-src" + / "include" + ) + if not dlpack_header_path.is_dir(): + raise ValueError(f"dlpack header {dlpack_header_path} not found") + + extra_clang_args = [f"-I{str(dlpack_header_path)}"] + + if args.command == "extract": + abi = Abi.from_include_path( + header_path, args.include_file, extra_clang_args + ) + with open(args.output_file, "wb") as o: + o.write(gzip.compress(msgspec.json.encode(abi))) + print(f"wrote abi to {args.output_file}") + elif args.command == "analyze": + old_abi = msgspec.json.decode( + gzip.decompress(open(args.abi_file, "rb").read()), type=Abi + ) + new_abi = Abi.from_include_path( + header_path, args.include_file, extra_clang_args + ) + errors = analyze_c_abi(old_abi, new_abi) + for error in errors: + print( + f"Error: {colored(error.error, attrs=['bold'])}. Symbol {colored(error.symbol, 'red')} from {error.location.filename}:{error.location.line}" + ) + + if errors: + sys.exit(1) + else: + print("no breaking abi changes detected") + + +if __name__ == "__main__": + main_cli() diff --git a/ci/check_c_abi/check_c_abi/tests/test_abi.py b/ci/check_c_abi/check_c_abi/tests/test_abi.py new file mode 100644 index 0000000000..02a3a7344d --- /dev/null +++ b/ci/check_c_abi/check_c_abi/tests/test_abi.py @@ -0,0 +1,149 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +# + +import pathlib +import tempfile + + +from check_c_abi.abi import Abi, analyze_c_abi + + +def abi_from_str(header_src: str) -> Abi: + with tempfile.NamedTemporaryFile( + mode="w+t", delete=True, suffix=".h" + ) as tmp: + tmp.write(header_src) + tmp.flush() + filename = pathlib.Path(tmp.name) + return Abi.from_include_path(filename.parent, filename.name) + + +def test_function(): + old_abi = abi_from_str("int func(int x, char * y); ") + errors = analyze_c_abi(old_abi, old_abi) + assert not errors + + # changing the return type should return an error + new_abi = abi_from_str("char func(int x, char * y); ") + errors = analyze_c_abi(old_abi, new_abi) + assert len(errors) == 1 + assert errors[0].symbol == "func" + + # adding parameters should return an error + new_abi = abi_from_str("int func(int x, char * y, float z); ") + errors = analyze_c_abi(old_abi, new_abi) + assert len(errors) == 1 + assert errors[0].symbol == "func" + + # removing parameters should return an error + new_abi = abi_from_str("int func(int x); ") + errors = analyze_c_abi(old_abi, new_abi) + assert len(errors) == 1 + assert errors[0].symbol == "func" + + # adding new functions is allowed + new_abi = abi_from_str(""" + int func(int x, char*y); + int func2(float x); + """) + errors = analyze_c_abi(old_abi, new_abi) + assert not errors + + +def test_struct(): + old_abi = abi_from_str(""" + struct Foo { + int a; + float b; + }; """) + + errors = analyze_c_abi(old_abi, old_abi) + assert not errors + + # removing a field should return an error + new_abi = abi_from_str(""" + struct Foo { + int a; + }; """) + errors = analyze_c_abi(old_abi, new_abi) + assert len(errors) == 1 + assert errors[0].symbol == "Foo" + + # adding a new field should not return an error + new_abi = abi_from_str(""" + struct Foo { + int a; + float b; + bool c; + }; """) + errors = analyze_c_abi(old_abi, new_abi) + assert not errors + + # Changing the type of a field should return an error + new_abi = abi_from_str(""" + struct Foo { + int a; + int b; + }; """) + errors = analyze_c_abi(old_abi, new_abi) + assert len(errors) == 1 + assert errors[0].symbol == "Foo" + + # adding new structs is allowed + new_abi = abi_from_str(""" + struct Foo { + int a; + float b; + }; + struct Foo2 { + double c; + }; + """) + errors = analyze_c_abi(old_abi, new_abi) + assert not errors + + +def test_enum(): + old_abi = abi_from_str(""" + enum Foo { + ZERO = 0, + TWO = 2, + HUNDRED = 100 + }; """) + + errors = analyze_c_abi(old_abi, old_abi) + assert not errors + + # test removing values from an enum + new_abi = abi_from_str(""" + enum Foo { + ZERO = 0, + HUNDRED = 100 + }; """) + errors = analyze_c_abi(old_abi, new_abi) + assert len(errors) == 1 + assert errors[0].symbol == "Foo" + + # adding new values to an enum is allowed + new_abi = abi_from_str(""" + enum Foo { + ZERO = 0, + TWO = 2, + THREE = 3, + HUNDRED = 100 + }; """) + errors = analyze_c_abi(old_abi, new_abi) + assert not errors + + # changing enum values isn't allowed + new_abi = abi_from_str(""" + enum Foo { + ZERO = 0, + TWO = 200, + HUNDRED = 100 + }; """) + errors = analyze_c_abi(old_abi, new_abi) + assert len(errors) == 1 + assert errors[0].symbol == "Foo" diff --git a/ci/check_c_abi/pyproject.toml b/ci/check_c_abi/pyproject.toml new file mode 100644 index 0000000000..23c94cbeed --- /dev/null +++ b/ci/check_c_abi/pyproject.toml @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 + +[project] +name = "check_c_abi" +dynamic = ["version"] +description = "Tool to check ABI stability by analyzing C header files" +authors = [ + { name = "NVIDIA Corporation" }, +] +license = "Apache-2.0" +requires-python = ">=3.11" +dependencies = ["libclang", "msgspec", "termcolor"] + +[project.scripts] +check-c-abi = "check_c_abi.main:main_cli" + +[project.urls] +Homepage = "https://github.com/rapidsai/cuvs" + +[tool.setuptools.dynamic] +version = { file = "VERSION" } From 9af603bd2386b35df672f3169036632c8afd2e92 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 10 Feb 2026 11:30:39 -0800 Subject: [PATCH 09/25] remove old --- ci/check_c_abi.py | 477 ---------------------------------------------- 1 file changed, 477 deletions(-) delete mode 100755 ci/check_c_abi.py diff --git a/ci/check_c_abi.py b/ci/check_c_abi.py deleted file mode 100755 index d552fd3017..0000000000 --- a/ci/check_c_abi.py +++ /dev/null @@ -1,477 +0,0 @@ -#!/usr/bin/env python3 -# -# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. -# SPDX-License-Identifier: Apache-2.0 -# - -""" -Checks for breaking changes to the C-ABI - -This scripts uses the libclang python bindings to check for breaking C-ABI changes. -It works by comparing two sets of header files: the `c/include` header files for -the published ABI, as well as the `c/include` header files inside a new pull request. -Each set of header files is parsed using libclang, and the differences between the -old and new header files are examined for changes that would cause a breaking -ABI change. - -Currently the following checks are made and flagged by this tool: - -* Functions that have been removed from the C-ABI -* Functions that have extra parameters added -* Functions that have parameters removed -* Functions that have the type of any parameter changed -* Structs that have been removed -* Structs that have have members removed -* Structs that have the types of members changed -* Enum values that have been removed -* Enum values that have their definition changed -""" - -import argparse -import gzip - -import msgspec -from itertools import zip_longest -import pathlib -from typing import Optional -import sys - - -import clang.cindex - -try: - from termcolor import colored -except ImportError: - - def colored(text, *args, **kwargs): - return text - - -def _is_unnamed_struct(cursor): - return ( - cursor.kind == clang.cindex.CursorKind.STRUCT_DECL - and cursor.spelling.startswith("struct ") - and "unnamed " in cursor.spelling - ) - - -class SymbolLocation(msgspec.Struct): - filename: str - line: int - column: int - - @classmethod - def from_cursor(cls, cursor, root_path=None): - filename = cursor.location.file.name - if root_path: - filename = str(pathlib.Path(filename).relative_to(root_path)) - return cls( - filename=filename, - line=cursor.location.line, - column=cursor.location.column, - ) - - -class FunctionDefinition(msgspec.Struct): - name: str - return_type: str - parameters: list[tuple[str, str]] - location: SymbolLocation - - @classmethod - def from_cursor(cls, cursor, root_path=None): - if cursor.kind != clang.cindex.CursorKind.FUNCTION_DECL: - raise ValueError( - f"FunctionDefinition.from_cursor called with cursor of kind={cursor.kind}" - ) - - return cls( - name=cursor.spelling, - return_type=cursor.result_type.spelling, - parameters=[ - (child.type.spelling, child.spelling) - for child in cursor.get_children() - if child.kind == clang.cindex.CursorKind.PARM_DECL - ], - location=SymbolLocation.from_cursor(cursor, root_path), - ) - - -class StructDefinition(msgspec.Struct): - name: str - members: list[tuple[str, str]] - location: SymbolLocation - - @classmethod - def from_cursor(cls, cursor, root_path=None): - if cursor.kind != clang.cindex.CursorKind.STRUCT_DECL: - raise ValueError( - f"StructDefinition.from_cursor called with cursor of kind={cursor.kind}" - ) - - return cls( - name=cursor.spelling, - members=[ - (child.type.spelling, child.spelling) - for child in cursor.get_children() - if child.kind == clang.cindex.CursorKind.FIELD_DECL - ], - location=SymbolLocation.from_cursor(cursor, root_path), - ) - - -class EnumDefinition(msgspec.Struct): - name: str - values: list[tuple[str, int]] - location: SymbolLocation - - @classmethod - def from_cursor(cls, cursor, root_path=None): - if cursor.kind != clang.cindex.CursorKind.ENUM_DECL: - raise ValueError( - f"EnumDefinition.from_cursor called with cursor of kind={cursor.kind}" - ) - - return cls( - name=cursor.spelling, - values=[ - (child.spelling, child.enum_value) - for child in cursor.get_children() - if child.kind == clang.cindex.CursorKind.ENUM_CONSTANT_DECL - ], - location=SymbolLocation.from_cursor(cursor, root_path), - ) - - -class Abi(msgspec.Struct): - functions: list[FunctionDefinition] - structs: list[StructDefinition] - enums: list[EnumDefinition] - - @classmethod - def from_include_path(cls, root, header, extra_clang_args=None): - """Loads the Abi from a root path ('/source/cuvs/c/include') and a header file - ("cuvs/include/all.h") - """ - path = pathlib.Path(root).resolve() - all_header = path / header - - if not all_header.is_file(): - raise ValueError(f"header file '{all_header}' not found") - - index = clang.cindex.Index.create() - - args = [f"-I{str(path)}"] - if extra_clang_args: - args.extend(extra_clang_args) - - tu = index.parse(all_header, args=args) - - functions, structs, enums = [], [], [] - - # note: we could use tu.cursor.walk_preorder() here instead to recurse through the AST - # but it is slightly slower to do so (extra 100ms or so) and for the cuvs C-ABI everything - # is at the top level - for child in tu.cursor.get_children(): - # ignore things like cuda headers and other files not installed in - # in the cuvs C path - if not ( - child.location.file - and pathlib.Path(child.location.file.name).is_relative_to(path) - ): - continue - - # Store definitions for each function, struct and enum - if child.kind == clang.cindex.CursorKind.FUNCTION_DECL: - functions.append( - FunctionDefinition.from_cursor(child, root_path=path) - ) - elif child.kind == clang.cindex.CursorKind.STRUCT_DECL: - # ignore unnamed structs (will get picked up via the typedef) - if _is_unnamed_struct(child): - continue - structs.append( - StructDefinition.from_cursor(child, root_path=path) - ) - elif child.kind == clang.cindex.CursorKind.TYPEDEF_DECL: - # check if this is a typedef to an unnamed struct, if so use the - # typedef as the symbolname for the struct - grandchildren = list(child.get_children()) - if len(grandchildren) == 1 and _is_unnamed_struct( - grandchildren[0] - ): - struct = StructDefinition.from_cursor( - grandchildren[0], root_path=path - ) - struct.name = child.spelling - structs.append(struct) - elif child.kind == clang.cindex.CursorKind.ENUM_DECL: - enums.append(EnumDefinition.from_cursor(child, root_path=path)) - - return cls(functions, structs, enums) - - -class AbiError(msgspec.Struct): - """Holds information about an ABI breaking error""" - - error: str - symbol: Optional[str] = None - location: Optional[SymbolLocation] = None - - -def _analyze_function_abi(old_abi, new_abi): - """This iterates over every function in the existing abi, and make sure that no functions - have been removed or had function parameters removed, parameters added or the type of any - parameter changed. Note: adding new functions to the new abi is allowed - """ - errors = [] - old_functions = {f.name: f for f in old_abi.functions} - new_functions = {f.name: f for f in new_abi.functions} - - for name, old_function in old_functions.items(): - try: - new_function = new_functions[name] - except KeyError: - errors.append( - AbiError( - "Function has been removed", - symbol=old_function.name, - location=old_function.location, - ) - ) - continue - - if old_function.return_type != new_function.return_type: - errors.append( - AbiError( - f"Function has return type changed from '{old_function.return_type}' to '{new_function.return_type}'", - symbol=new_function.name, - location=new_function.location, - ) - ) - - for (old_type, old_name), (new_type, new_name) in zip_longest( - old_function.parameters, - new_function.parameters, - fillvalue=(None, None), - ): - if old_type is None: - errors.append( - AbiError( - f"Function has a new parameter '{new_type} {new_name}'", - symbol=new_function.name, - location=new_function.location, - ) - ) - - elif new_type is None: - errors.append( - AbiError( - f"Function has a deleted parameter '{old_type} {old_name}'", - symbol=old_function.name, - location=old_function.location, - ) - ) - - elif new_type != old_type: - errors.append( - AbiError( - f"Function has changed type '{old_type}' to '{new_type}' for parameter '{old_name}'", - symbol=new_function.name, - location=new_function.location, - ) - ) - return errors - - -def _analyze_struct_abi(old_abi, new_abi): - """Checks to see if any existing structures have had items removed, reordered, renamed, or types - changed (adding new members is considered to be ok, as long as functions are initialized via - a create factory function) - """ - errors = [] - - old_structs = {f.name: f for f in old_abi.structs} - new_structs = {f.name: f for f in new_abi.structs} - - for name, old_struct in old_structs.items(): - try: - new_struct = new_structs[name] - except KeyError: - errors.append( - AbiError( - "Struct has been removed", - symbol=name, - location=old_struct.location, - ) - ) - continue - - for (old_type, old_name), (new_type, new_name) in zip_longest( - old_struct.members, - new_struct.members, - fillvalue=(None, None), - ): - if new_type is None: - errors.append( - AbiError( - f"Struct has a deleted member '{old_type} {old_name}'", - symbol=name, - location=new_struct.location, - ) - ) - elif old_type is None: - # adding an item to the end of the struct is allowed here - pass - - elif new_type != old_type: - errors.append( - AbiError( - f"Struct member has changed type '{old_type}' to '{new_type}' for member '{old_name}'", - symbol=name, - location=new_struct.location, - ) - ) - return errors - - -def _analyze_enum_abi(old_abi, new_abi): - errors = [] - # flatten enum values: since values inside an enum in C are in the global scope - old_enum_values = { - k: (v, enum) for enum in old_abi.enums for k, v in enum.values - } - new_enum_values = { - k: (v, enum) for enum in new_abi.enums for k, v in enum.values - } - - # check to see if enum values have been removed, or had their numeric values changed - for name, (old_value, old_enum) in old_enum_values.items(): - try: - new_value, new_enum = new_enum_values[name] - except KeyError: - errors.append( - AbiError( - f"Enum value {name} has been removed", - symbol=old_enum.name, - location=old_enum.location, - ) - ) - continue - - if new_value != old_value: - errors.append( - AbiError( - f"Enum value {name} has been changed from {old_value} to {new_value}", - symbol=old_enum.name, - location=new_enum.location, - ) - ) - - return errors - - -def analyze_c_abi(old_abi, new_abi): - errors = [] - errors.extend(_analyze_function_abi(old_abi, new_abi)) - errors.extend(_analyze_struct_abi(old_abi, new_abi)) - errors.extend(_analyze_enum_abi(old_abi, new_abi)) - return errors - - -if __name__ == "__main__": - default_c_header_path = pathlib.Path("../c/include").resolve() - - parser = argparse.ArgumentParser( - description="Analyze C headers for breaking ABI changes" - ) - subparsers = parser.add_subparsers( - dest="command", help="Available subcommands" - ) - - parser_extract = subparsers.add_parser( - "extract", help="Extract the ABI from a set of header files" - ) - parser_extract.add_argument( - "--output-file", - type=str, - help="The file to output the ABI into (default: %(default)s)", - default="c_abi.json.gz", - ) - parser_extract.add_argument( - "--header-path", - help="Path of C headers to extract the ABI from (default: %(default)s)", - default=str(default_c_header_path), - ) - parser_extract.add_argument( - "--include-file", - help="root header file to examine (default: %(default)s)", - default="cuvs/core/all.h", - ) - - parser_analyze = subparsers.add_parser( - "analyze", help="Analyze a set of header files for breaking changes" - ) - parser_analyze.add_argument( - "--abi-file", - type=str, - help="The extracted ABI file to compare against (default: %(default)s)", - default="c_abi.json.gz", - ) - parser_analyze.add_argument( - "--header-path", - help="Path of C headers to analyze (default: %(default)s)", - default=str(default_c_header_path), - ) - parser_analyze.add_argument( - "--include-file", - help="root header file to examine (default: %(default)s)", - default="cuvs/core/all.h", - ) - - args = parser.parse_args() - - header_path = pathlib.Path(args.header_path) - - # TODO: better way of specifying the dlpack header source, since missing the dlpack.h - # header means that we all dlpack types get treated as 'int' which could be misleading - # when looking for differences in the ABI (like if we change a field from `DLDataType` to - # `int` without specifying the dlpack include directory, we won't know that the type has - # changed) - dlpack_header_path = ( - header_path.parent.parent - / "cpp" - / "build" - / "_deps" - / "dlpack-src" - / "include" - ) - if not dlpack_header_path.is_dir(): - raise ValueError(f"dlpack header {dlpack_header_path} not found") - - extra_clang_args = [f"-I{str(dlpack_header_path)}"] - - if args.command == "extract": - abi = Abi.from_include_path( - header_path, args.include_file, extra_clang_args - ) - with open(args.output_file, "wb") as o: - o.write(gzip.compress(msgspec.json.encode(abi))) - print(f"wrote abi to {args.output_file}") - elif args.command == "analyze": - old_abi = msgspec.json.decode( - gzip.decompress(open(args.abi_file, "rb").read()), type=Abi - ) - new_abi = Abi.from_include_path( - header_path, args.include_file, extra_clang_args - ) - errors = analyze_c_abi(old_abi, new_abi) - for error in errors: - print( - f"Error: {colored(error.error, attrs=['bold'])}. Symbol {colored(error.symbol, 'red')} from {error.location.filename}:{error.location.line}" - ) - - if errors: - sys.exit(1) - else: - print("no breaking abi changes detected") From 52b1773345f7a0ff5cbc0b11319562d7a782d690 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 10 Feb 2026 11:33:36 -0800 Subject: [PATCH 10/25] fix typehint for root_path --- ci/check_c_abi/check_c_abi/abi.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/ci/check_c_abi/check_c_abi/abi.py b/ci/check_c_abi/check_c_abi/abi.py index 29a12bfc7b..099f2d81fa 100644 --- a/ci/check_c_abi/check_c_abi/abi.py +++ b/ci/check_c_abi/check_c_abi/abi.py @@ -35,7 +35,9 @@ class SymbolLocation(msgspec.Struct): @classmethod def from_cursor( - cls, cursor: clang.cindex.Cursor, root_path: str = None + cls, + cursor: clang.cindex.Cursor, + root_path: Optional[str | os.PathLike] = None, ) -> Self: filename = cursor.location.file.name if root_path: @@ -55,7 +57,9 @@ class FunctionDefinition(msgspec.Struct): @classmethod def from_cursor( - cls, cursor: clang.cindex.Cursor, root_path: str = None + cls, + cursor: clang.cindex.Cursor, + root_path: Optional[str | os.PathLike] = None, ) -> Self: if cursor.kind != clang.cindex.CursorKind.FUNCTION_DECL: raise ValueError( @@ -81,7 +85,9 @@ class StructDefinition(msgspec.Struct): @classmethod def from_cursor( - cls, cursor: clang.cindex.Cursor, root_path: str = None + cls, + cursor: clang.cindex.Cursor, + root_path: Optional[str | os.PathLike] = None, ) -> Self: if cursor.kind != clang.cindex.CursorKind.STRUCT_DECL: raise ValueError( @@ -106,7 +112,9 @@ class EnumDefinition(msgspec.Struct): @classmethod def from_cursor( - cls, cursor: clang.cindex.Cursor, root_path: str = None + cls, + cursor: clang.cindex.Cursor, + root_path: Optional[str | os.PathLike] = None, ) -> Self: if cursor.kind != clang.cindex.CursorKind.ENUM_DECL: raise ValueError( From c68dfa3c3e02363053d1b1d8ca69700c730054f5 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 24 Feb 2026 16:12:14 -0800 Subject: [PATCH 11/25] update gha to use python package --- .github/workflows/check-c-abi.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml index 663a2048d7..8144519d47 100644 --- a/.github/workflows/check-c-abi.yaml +++ b/.github/workflows/check-c-abi.yaml @@ -6,14 +6,14 @@ on: pull_request: paths: - 'c/include/**' - - 'ci/check_c_abi.py' + - 'ci/check_c_abi/**' - '.github/workflows/check-c-abi.yaml' push: branches: - main paths: - 'c/include/**' - - 'ci/check_c_abi.py' + - 'ci/check_c_abi/**' concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -48,7 +48,7 @@ jobs: - name: Install Python dependencies run: | pip install --upgrade pip - pip install msgspec libclang termcolor + pip install -e ci/check_c_abi - name: Build C++ project to get dependencies (dlpack) run: | @@ -64,7 +64,7 @@ jobs: - name: Extract ABI from main branch run: | mkdir -p baseline - python ci/check_c_abi.py extract \ + check-c-abi extract \ --header-path c/include \ --include-file cuvs/core/all.h \ --output-file baseline/c_abi.json.gz @@ -111,7 +111,7 @@ jobs: - name: Install Python dependencies run: | pip install --upgrade pip - pip install msgspec libclang termcolor + pip install ci/check_c_abi - name: Build C++ project to get dependencies (dlpack) run: | @@ -168,7 +168,7 @@ jobs: -DBUILD_EXAMPLES=OFF cd ../../.. mkdir -p baseline - python ci/check_c_abi.py extract \ + check-c-abi extract \ --header-path ../cuvs-main/c/include \ --include-file cuvs/core/all.h \ --output-file baseline/c_abi.json.gz @@ -186,7 +186,7 @@ jobs: - name: Analyze current branch for ABI breaking changes run: | - python ci/check_c_abi.py analyze \ + check-c-abi analyze \ --abi-file baseline/c_abi.json.gz \ --header-path c/include \ --include-file cuvs/core/all.h From e71af5f452b871838d579626168c3f10f57433ee Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 24 Feb 2026 16:20:14 -0800 Subject: [PATCH 12/25] fix yaml syntax --- .github/workflows/store-c-abi-baseline.yaml | 30 ++++++++++----------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/.github/workflows/store-c-abi-baseline.yaml b/.github/workflows/store-c-abi-baseline.yaml index f93ec49b6e..4cb0ad11cd 100644 --- a/.github/workflows/store-c-abi-baseline.yaml +++ b/.github/workflows/store-c-abi-baseline.yaml @@ -16,7 +16,7 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 - + - name: Download main baseline artifact uses: dawidd6/action-download-artifact@v3 with: @@ -24,51 +24,49 @@ jobs: workflow: check-c-abi.yaml branch: main path: baseline/ - + - name: Archive baseline with release version uses: actions/upload-artifact@v4 with: name: c-abi-baseline-${{ github.event.release.tag_name || inputs.version }} path: baseline/c_abi.json.gz retention-days: 400 # ~13 months - + - name: Commit baseline to repository (for long-term storage) run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" - + # Create a baselines branch if it doesn't exist git fetch origin baselines:baselines 2>/dev/null || git checkout --orphan baselines git checkout baselines 2>/dev/null || true - + # Copy the baseline file with version name mkdir -p c-abi-baselines cp baseline/c_abi.json.gz c-abi-baselines/c_abi_${{ github.event.release.tag_name || inputs.version }}.json.gz - + # Commit and push git add c-abi-baselines/ git commit -m "Archive C ABI baseline for ${{ github.event.release.tag_name || inputs.version }}" git push origin baselines env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - + - name: Create release comment if: github.event_name == 'release' uses: actions/github-script@v7 with: script: | const tagName = context.payload.release.tag_name; - + github.rest.repos.createCommitComment({ owner: context.repo.owner, repo: context.repo.repo, commit_sha: context.payload.release.target_commitish, - body: `✅ C ABI baseline archived for release ${tagName} - -This baseline has been archived from the main branch and will be available for historical reference. - -The baseline is stored in: -- Artifact: \`c-abi-baseline-${tagName}\` (available for ~13 months) -- Branch: \`baselines\` (permanent storage) -` + body: | + `✅ C ABI baseline archived for release ${tagName} + This baseline has been archived from the main branch and will be available for historical reference. + The baseline is stored in: + - Artifact: \`c-abi-baseline-${tagName}\` (available for ~13 months) + - Branch: \`baselines\` (permanent storage)` }); From 4d1e78dd3b292b5386e1c6c3754c4ba5e490de8f Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 24 Feb 2026 16:22:07 -0800 Subject: [PATCH 13/25] fix yaml syntax --- .github/workflows/check-c-abi.yaml | 38 ++++++++++++++---------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml index 8144519d47..b8eb062799 100644 --- a/.github/workflows/check-c-abi.yaml +++ b/.github/workflows/check-c-abi.yaml @@ -201,31 +201,29 @@ jobs: owner: context.repo.owner, repo: context.repo.repo, body: `## ⚠️ C ABI Breaking Changes Detected + This PR introduces breaking changes to the C ABI. Please review the changes carefully. -This PR introduces breaking changes to the C ABI. Please review the changes carefully. + Breaking ABI changes are only allowed in major releases. If this is intentional for a major release, + the baseline ABI will need to be updated after merge. -Breaking ABI changes are only allowed in major releases. If this is intentional for a major release, -the baseline ABI will need to be updated after merge. + See the job logs for details on what specific changes were detected. -See the job logs for details on what specific changes were detected. + ### What are breaking ABI changes? -### What are breaking ABI changes? + Breaking ABI changes include: + - Removing functions from the public API + - Changing function signatures (parameters or return types) + - Removing struct members or changing their types + - Removing or changing enum values -Breaking ABI changes include: -- Removing functions from the public API -- Changing function signatures (parameters or return types) -- Removing struct members or changing their types -- Removing or changing enum values + ### Next steps -### Next steps + 1. Review the changes flagged in the CI logs + 2. If these changes are unintentional, update your PR to maintain ABI compatibility + 3. If these changes are required, ensure: + - This is part of a major version release + - The changes are documented in the changelog + - Migration guide is provided for users -1. Review the changes flagged in the CI logs -2. If these changes are unintentional, update your PR to maintain ABI compatibility -3. If these changes are required, ensure: - - This is part of a major version release - - The changes are documented in the changelog - - Migration guide is provided for users - -For more information, see the [C ABI documentation](../docs/source/c_developer_guide.md). -` + For more information, see the [C ABI documentation](../docs/source/c_developer_guide.md).` }); From 12e6dc8b842266a973b340bc4ddfc55cc6aa2ec3 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 24 Feb 2026 16:29:16 -0800 Subject: [PATCH 14/25] update permissions add write contents permissions so that we can leave a comment on the PR --- .github/workflows/check-c-abi.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml index b8eb062799..2ece458100 100644 --- a/.github/workflows/check-c-abi.yaml +++ b/.github/workflows/check-c-abi.yaml @@ -88,6 +88,8 @@ jobs: check-pr: if: github.event_name == 'pull_request' || github.event_name == 'workflow_call' runs-on: ubuntu-latest + permissions: + contents: write steps: - name: Checkout PR branch uses: actions/checkout@v4 From 059d47ce7d77a0f9c79cec06d6f3dad6c895b9c4 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 24 Feb 2026 17:08:02 -0800 Subject: [PATCH 15/25] don't require a build just to get dlpack header file --- .github/workflows/check-c-abi.yaml | 24 ++---------------------- ci/check_c_abi/check_c_abi/main.py | 8 +++++++- 2 files changed, 9 insertions(+), 23 deletions(-) diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml index 2ece458100..29dd624a64 100644 --- a/.github/workflows/check-c-abi.yaml +++ b/.github/workflows/check-c-abi.yaml @@ -48,19 +48,9 @@ jobs: - name: Install Python dependencies run: | pip install --upgrade pip + pip install dlpack pip install -e ci/check_c_abi - - name: Build C++ project to get dependencies (dlpack) - run: | - mkdir -p cpp/build - cd cpp/build - cmake .. \ - -GNinja \ - -DCMAKE_BUILD_TYPE=Debug \ - -DBUILD_TESTS=OFF \ - -DBUILD_EXAMPLES=OFF - echo "Build directory created and dependencies fetched" - - name: Extract ABI from main branch run: | mkdir -p baseline @@ -113,19 +103,9 @@ jobs: - name: Install Python dependencies run: | pip install --upgrade pip + pip install dlpack pip install ci/check_c_abi - - name: Build C++ project to get dependencies (dlpack) - run: | - mkdir -p cpp/build - cd cpp/build - cmake .. \ - -GNinja \ - -DCMAKE_BUILD_TYPE=Debug \ - -DBUILD_TESTS=OFF \ - -DBUILD_EXAMPLES=OFF - echo "Build directory created and dependencies fetched" - - name: Find merge base commit id: merge-base run: | diff --git a/ci/check_c_abi/check_c_abi/main.py b/ci/check_c_abi/check_c_abi/main.py index f111f92bfc..5f8d3ac8de 100755 --- a/ci/check_c_abi/check_c_abi/main.py +++ b/ci/check_c_abi/check_c_abi/main.py @@ -9,6 +9,8 @@ import msgspec import pathlib import sys +import sysconfig + from check_c_abi.abi import analyze_c_abi, Abi @@ -103,7 +105,11 @@ def main_cli(): / "include" ) if not dlpack_header_path.is_dir(): - raise ValueError(f"dlpack header {dlpack_header_path} not found") + # check if dlpack is installed w/ 'pip install dlpack' before giving up + python_include_path = pathlib.Path(sysconfig.get_paths()["include"]) + dlpack_header_path = python_include_path.parent + if not (dlpack_header_path / "dlpack" / "dlpack.h").is_file(): + raise ValueError(f"dlpack header {dlpack_header_path} not found") extra_clang_args = [f"-I{str(dlpack_header_path)}"] From 49b9f411749899fa8917580bb09dbf7b48f103b8 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Wed, 25 Feb 2026 16:20:30 -0800 Subject: [PATCH 16/25] don't require a build just to get dlpack header file --- .github/workflows/check-c-abi.yaml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml index 29dd624a64..bdf54e045f 100644 --- a/.github/workflows/check-c-abi.yaml +++ b/.github/workflows/check-c-abi.yaml @@ -141,14 +141,6 @@ jobs: echo "⚠️ No baseline artifacts found, extracting from main branch..." echo "This is slower but ensures we always have a baseline for comparison." git worktree add ../cuvs-main main - mkdir -p ../cuvs-main/cpp/build - cd ../cuvs-main/cpp/build - cmake .. \ - -GNinja \ - -DCMAKE_BUILD_TYPE=Debug \ - -DBUILD_TESTS=OFF \ - -DBUILD_EXAMPLES=OFF - cd ../../.. mkdir -p baseline check-c-abi extract \ --header-path ../cuvs-main/c/include \ From 889c6795755d8a5daf3dd10a67542fe38a4e28cb Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Wed, 25 Feb 2026 16:27:23 -0800 Subject: [PATCH 17/25] support python 3.11 w/ generator type hint --- ci/check_c_abi/check_c_abi/abi.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ci/check_c_abi/check_c_abi/abi.py b/ci/check_c_abi/check_c_abi/abi.py index 099f2d81fa..e13fda7911 100644 --- a/ci/check_c_abi/check_c_abi/abi.py +++ b/ci/check_c_abi/check_c_abi/abi.py @@ -213,7 +213,9 @@ class AbiError(msgspec.Struct): location: Optional[SymbolLocation] = None -def _analyze_function_abi(old_abi: Abi, new_abi: Abi) -> Generator[AbiError]: +def _analyze_function_abi( + old_abi: Abi, new_abi: Abi +) -> Generator[AbiError, None, None]: """This iterates over every function in the existing abi, and make sure that no functions have been removed or had function parameters removed, parameters added or the type of any parameter changed. Note: adding new functions to the new abi is allowed @@ -266,7 +268,9 @@ def _analyze_function_abi(old_abi: Abi, new_abi: Abi) -> Generator[AbiError]: ) -def _analyze_struct_abi(old_abi: Abi, new_abi: Abi) -> Generator[AbiError]: +def _analyze_struct_abi( + old_abi: Abi, new_abi: Abi +) -> Generator[AbiError, None, None]: """Checks to see if any existing structures have had items removed, reordered, renamed, or types changed (adding new members is considered to be ok, as long as functions are initialized via a create factory function) @@ -307,7 +311,9 @@ def _analyze_struct_abi(old_abi: Abi, new_abi: Abi) -> Generator[AbiError]: ) -def _analyze_enum_abi(old_abi: Abi, new_abi: Abi) -> Generator[AbiError]: +def _analyze_enum_abi( + old_abi: Abi, new_abi: Abi +) -> Generator[AbiError, None, None]: # flatten enum values: since values inside an enum in C are in the global scope old_enum_values = { k: (v, enum) for enum in old_abi.enums for k, v in enum.values From 0e9f4c548d32bb502c30d6d7771ef4b90af49fe6 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Wed, 25 Feb 2026 16:42:03 -0800 Subject: [PATCH 18/25] Revert --- .github/workflows/check-c-abi.yaml | 34 ++++++++++++++++++++++++++---- ci/check_c_abi/check_c_abi/main.py | 8 +------ 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml index bdf54e045f..b8eb062799 100644 --- a/.github/workflows/check-c-abi.yaml +++ b/.github/workflows/check-c-abi.yaml @@ -48,9 +48,19 @@ jobs: - name: Install Python dependencies run: | pip install --upgrade pip - pip install dlpack pip install -e ci/check_c_abi + - name: Build C++ project to get dependencies (dlpack) + run: | + mkdir -p cpp/build + cd cpp/build + cmake .. \ + -GNinja \ + -DCMAKE_BUILD_TYPE=Debug \ + -DBUILD_TESTS=OFF \ + -DBUILD_EXAMPLES=OFF + echo "Build directory created and dependencies fetched" + - name: Extract ABI from main branch run: | mkdir -p baseline @@ -78,8 +88,6 @@ jobs: check-pr: if: github.event_name == 'pull_request' || github.event_name == 'workflow_call' runs-on: ubuntu-latest - permissions: - contents: write steps: - name: Checkout PR branch uses: actions/checkout@v4 @@ -103,9 +111,19 @@ jobs: - name: Install Python dependencies run: | pip install --upgrade pip - pip install dlpack pip install ci/check_c_abi + - name: Build C++ project to get dependencies (dlpack) + run: | + mkdir -p cpp/build + cd cpp/build + cmake .. \ + -GNinja \ + -DCMAKE_BUILD_TYPE=Debug \ + -DBUILD_TESTS=OFF \ + -DBUILD_EXAMPLES=OFF + echo "Build directory created and dependencies fetched" + - name: Find merge base commit id: merge-base run: | @@ -141,6 +159,14 @@ jobs: echo "⚠️ No baseline artifacts found, extracting from main branch..." echo "This is slower but ensures we always have a baseline for comparison." git worktree add ../cuvs-main main + mkdir -p ../cuvs-main/cpp/build + cd ../cuvs-main/cpp/build + cmake .. \ + -GNinja \ + -DCMAKE_BUILD_TYPE=Debug \ + -DBUILD_TESTS=OFF \ + -DBUILD_EXAMPLES=OFF + cd ../../.. mkdir -p baseline check-c-abi extract \ --header-path ../cuvs-main/c/include \ diff --git a/ci/check_c_abi/check_c_abi/main.py b/ci/check_c_abi/check_c_abi/main.py index 5f8d3ac8de..f111f92bfc 100755 --- a/ci/check_c_abi/check_c_abi/main.py +++ b/ci/check_c_abi/check_c_abi/main.py @@ -9,8 +9,6 @@ import msgspec import pathlib import sys -import sysconfig - from check_c_abi.abi import analyze_c_abi, Abi @@ -105,11 +103,7 @@ def main_cli(): / "include" ) if not dlpack_header_path.is_dir(): - # check if dlpack is installed w/ 'pip install dlpack' before giving up - python_include_path = pathlib.Path(sysconfig.get_paths()["include"]) - dlpack_header_path = python_include_path.parent - if not (dlpack_header_path / "dlpack" / "dlpack.h").is_file(): - raise ValueError(f"dlpack header {dlpack_header_path} not found") + raise ValueError(f"dlpack header {dlpack_header_path} not found") extra_clang_args = [f"-I{str(dlpack_header_path)}"] From f1c7243ddae63beaf32531210c492f44f82a9db7 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Wed, 25 Feb 2026 16:47:06 -0800 Subject: [PATCH 19/25] run in container to pick up nvcc --- .github/workflows/check-c-abi.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml index b8eb062799..2e58c3935a 100644 --- a/.github/workflows/check-c-abi.yaml +++ b/.github/workflows/check-c-abi.yaml @@ -88,6 +88,8 @@ jobs: check-pr: if: github.event_name == 'pull_request' || github.event_name == 'workflow_call' runs-on: ubuntu-latest + container: + image: rapidsai/ci-conda:26.04-cuda13.1.1-rockylinux8-py3.11 steps: - name: Checkout PR branch uses: actions/checkout@v4 From edaeaa5a15e25f1d883a804593f56600a323200a Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Wed, 25 Feb 2026 16:48:40 -0800 Subject: [PATCH 20/25] Revert "run in container to pick up nvcc" This reverts commit 252a9f85f8f633dad51af27b208feaaefecff881. --- .github/workflows/check-c-abi.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml index 2e58c3935a..b8eb062799 100644 --- a/.github/workflows/check-c-abi.yaml +++ b/.github/workflows/check-c-abi.yaml @@ -88,8 +88,6 @@ jobs: check-pr: if: github.event_name == 'pull_request' || github.event_name == 'workflow_call' runs-on: ubuntu-latest - container: - image: rapidsai/ci-conda:26.04-cuda13.1.1-rockylinux8-py3.11 steps: - name: Checkout PR branch uses: actions/checkout@v4 From abae375e27e8d62e685cb4eb856d404aa40d8ed5 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Mon, 2 Mar 2026 14:31:18 -0800 Subject: [PATCH 21/25] don't require cmake/nvcc etc just for getting dlpack header --- .github/workflows/check-c-abi.yaml | 58 ++++---------------- ci/check_c_abi/check_c_abi/main.py | 88 +++++++++++++++++------------- 2 files changed, 61 insertions(+), 85 deletions(-) diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml index b8eb062799..c2418e325e 100644 --- a/.github/workflows/check-c-abi.yaml +++ b/.github/workflows/check-c-abi.yaml @@ -36,30 +36,14 @@ jobs: with: python-version: '3.11' - - name: Install system dependencies - run: | - sudo apt-get update - sudo apt-get install -y \ - libclang-dev \ - clang \ - cmake \ - ninja-build - - name: Install Python dependencies run: | pip install --upgrade pip pip install -e ci/check_c_abi - - name: Build C++ project to get dependencies (dlpack) + - name: Get dlpack dependency run: | - mkdir -p cpp/build - cd cpp/build - cmake .. \ - -GNinja \ - -DCMAKE_BUILD_TYPE=Debug \ - -DBUILD_TESTS=OFF \ - -DBUILD_EXAMPLES=OFF - echo "Build directory created and dependencies fetched" + git clone https://github.com/dmlc/dlpack - name: Extract ABI from main branch run: | @@ -67,7 +51,8 @@ jobs: check-c-abi extract \ --header-path c/include \ --include-file cuvs/core/all.h \ - --output-file baseline/c_abi.json.gz + --output-file baseline/c_abi.json.gz \ + --dlpack-include-path dlpack/include echo "ABI extracted from main branch (commit: ${{ github.sha }})" - name: Store commit-specific baseline @@ -99,30 +84,14 @@ jobs: with: python-version: '3.11' - - name: Install system dependencies - run: | - sudo apt-get update - sudo apt-get install -y \ - libclang-dev \ - clang \ - cmake \ - ninja-build - - name: Install Python dependencies run: | pip install --upgrade pip pip install ci/check_c_abi - - name: Build C++ project to get dependencies (dlpack) + - name: Get dlpack dependency run: | - mkdir -p cpp/build - cd cpp/build - cmake .. \ - -GNinja \ - -DCMAKE_BUILD_TYPE=Debug \ - -DBUILD_TESTS=OFF \ - -DBUILD_EXAMPLES=OFF - echo "Build directory created and dependencies fetched" + git clone https://github.com/dmlc/dlpack - name: Find merge base commit id: merge-base @@ -159,19 +128,13 @@ jobs: echo "⚠️ No baseline artifacts found, extracting from main branch..." echo "This is slower but ensures we always have a baseline for comparison." git worktree add ../cuvs-main main - mkdir -p ../cuvs-main/cpp/build - cd ../cuvs-main/cpp/build - cmake .. \ - -GNinja \ - -DCMAKE_BUILD_TYPE=Debug \ - -DBUILD_TESTS=OFF \ - -DBUILD_EXAMPLES=OFF - cd ../../.. mkdir -p baseline check-c-abi extract \ --header-path ../cuvs-main/c/include \ --include-file cuvs/core/all.h \ - --output-file baseline/c_abi.json.gz + --output-file baseline/c_abi.json.gz \ + --dlpack-include-path dlpack/include + echo "✓ Baseline ABI extracted from main branch" - name: Report baseline source @@ -189,7 +152,8 @@ jobs: check-c-abi analyze \ --abi-file baseline/c_abi.json.gz \ --header-path c/include \ - --include-file cuvs/core/all.h + --include-file cuvs/core/all.h \ + --dlpack-include-path dlpack/include - name: Comment on PR with results if: failure() && github.event_name == 'pull_request' diff --git a/ci/check_c_abi/check_c_abi/main.py b/ci/check_c_abi/check_c_abi/main.py index f111f92bfc..cc001b7d64 100755 --- a/ci/check_c_abi/check_c_abi/main.py +++ b/ci/check_c_abi/check_c_abi/main.py @@ -35,6 +35,23 @@ def main_cli(): break current_path = current_path.parent + parent_parser = argparse.ArgumentParser( + add_help=False, description="Common arguments for all subcommands" + ) + parent_parser.add_argument( + "--header-path", + help="Path of C headers to analyze (default: %(default)s)", + default=str(default_c_header_path), + ) + parent_parser.add_argument( + "--include-file", + help="root header file to examine (default: %(default)s)", + default="cuvs/core/all.h", + ) + parent_parser.add_argument( + "--dlpack-include-path", help="path of dlpack header include" + ) + parser = argparse.ArgumentParser( description="Analyze C headers for breaking ABI changes" ) @@ -43,7 +60,9 @@ def main_cli(): ) parser_extract = subparsers.add_parser( - "extract", help="Extract the ABI from a set of header files" + "extract", + parents=[parent_parser], + help="Extract the ABI from a set of header files", ) parser_extract.add_argument( "--output-file", @@ -51,19 +70,11 @@ def main_cli(): help="The file to output the ABI into (default: %(default)s)", default="c_abi.json.gz", ) - parser_extract.add_argument( - "--header-path", - help="Path of C headers to extract the ABI from (default: %(default)s)", - default=str(default_c_header_path), - ) - parser_extract.add_argument( - "--include-file", - help="root header file to examine (default: %(default)s)", - default="cuvs/core/all.h", - ) parser_analyze = subparsers.add_parser( - "analyze", help="Analyze a set of header files for breaking changes" + "analyze", + parents=[parent_parser], + help="Analyze a set of header files for breaking changes", ) parser_analyze.add_argument( "--abi-file", @@ -71,16 +82,6 @@ def main_cli(): help="The extracted ABI file to compare against (default: %(default)s)", default="c_abi.json.gz", ) - parser_analyze.add_argument( - "--header-path", - help="Path of C headers to analyze (default: %(default)s)", - default=str(default_c_header_path), - ) - parser_analyze.add_argument( - "--include-file", - help="root header file to examine (default: %(default)s)", - default="cuvs/core/all.h", - ) args = parser.parse_args() if not args.command: @@ -89,23 +90,34 @@ def main_cli(): header_path = pathlib.Path(args.header_path) - # TODO: better way of specifying the dlpack header source, since missing the dlpack.h - # header means that we all dlpack types get treated as 'int' which could be misleading - # when looking for differences in the ABI (like if we change a field from `DLDataType` to - # `int` without specifying the dlpack include directory, we won't know that the type has - # changed) - dlpack_header_path = ( - header_path.parent.parent - / "cpp" - / "build" - / "_deps" - / "dlpack-src" - / "include" - ) - if not dlpack_header_path.is_dir(): - raise ValueError(f"dlpack header {dlpack_header_path} not found") + if args.dlpack_include_path: + dlpack_include_path = pathlib.Path(args.dlpack_include_path).resolve() + + else: + # try getting from the cmake build directory dependencies if we + # haven't specified the include directory + dlpack_include_path = ( + header_path.parent.parent + / "cpp" + / "build" + / "_deps" + / "dlpack-src" + / "include" + ) + + if not dlpack_include_path.is_dir(): + raise ValueError( + f"dlpack header path '{dlpack_include_path}' not found" + ) + + if not (dlpack_include_path / "dlpack" / "dlpack.h").is_file(): + raise ValueError( + f"dlpack header 'dlpack/dlpack.h' not found in '{dlpack_include_path}'" + ) + + print(f"using dlpack from {dlpack_include_path}") - extra_clang_args = [f"-I{str(dlpack_header_path)}"] + extra_clang_args = [f"-I{str(dlpack_include_path)}"] if args.command == "extract": abi = Abi.from_include_path( From f086edfaeab24ed9ec5a61789835976bebac31b1 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Fri, 13 Mar 2026 11:18:06 -0700 Subject: [PATCH 22/25] remove on pullrequest trigger from check-c-abi.yaml --- .github/workflows/check-c-abi.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml index c2418e325e..6d86f8e1eb 100644 --- a/.github/workflows/check-c-abi.yaml +++ b/.github/workflows/check-c-abi.yaml @@ -3,11 +3,6 @@ name: C ABI Compatibility Check on: workflow_dispatch: # Allow manual trigger for bootstrap workflow_call: - pull_request: - paths: - - 'c/include/**' - - 'ci/check_c_abi/**' - - '.github/workflows/check-c-abi.yaml' push: branches: - main From 0c330056339a8db7b45d1c8aab6c0530ba012d20 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Fri, 13 Mar 2026 14:37:48 -0700 Subject: [PATCH 23/25] remove concurrency section from check-c-abi.yaml --- .github/workflows/check-c-abi.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml index 6d86f8e1eb..3a302098a1 100644 --- a/.github/workflows/check-c-abi.yaml +++ b/.github/workflows/check-c-abi.yaml @@ -10,10 +10,6 @@ on: - 'c/include/**' - 'ci/check_c_abi/**' -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - jobs: # Extract and store baseline ABI from main branch update-baseline: From 923bf9540091c1777174ca653b240841093ef90d Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Fri, 13 Mar 2026 15:46:51 -0700 Subject: [PATCH 24/25] debug gha --- .github/workflows/check-c-abi.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml index 3a302098a1..063ac03158 100644 --- a/.github/workflows/check-c-abi.yaml +++ b/.github/workflows/check-c-abi.yaml @@ -15,9 +15,6 @@ jobs: update-baseline: if: github.event_name == 'workflow_dispatch' || (github.event_name == 'push' && github.ref == 'refs/heads/main') runs-on: ubuntu-latest - concurrency: - group: abi-baseline-update - cancel-in-progress: false # Queue updates, don't skip steps: - name: Checkout main branch uses: actions/checkout@v4 @@ -62,9 +59,11 @@ jobs: # Check PRs for breaking ABI changes check-pr: - if: github.event_name == 'pull_request' || github.event_name == 'workflow_call' + # if: github.event_name == 'pull_request' || github.event_name == 'workflow_call' runs-on: ubuntu-latest steps: + - name: debug + run: echo "The event that triggered this workflow is ${{ github.event_name }}." - name: Checkout PR branch uses: actions/checkout@v4 with: From e7b038428db93a780b6189c4ae861ad3ee712e86 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Fri, 13 Mar 2026 16:04:21 -0700 Subject: [PATCH 25/25] split update baseline to its own workflow --- .github/workflows/check-c-abi.yaml | 56 -------------------- .github/workflows/update-c-abi-baseline.yaml | 56 ++++++++++++++++++++ 2 files changed, 56 insertions(+), 56 deletions(-) create mode 100644 .github/workflows/update-c-abi-baseline.yaml diff --git a/.github/workflows/check-c-abi.yaml b/.github/workflows/check-c-abi.yaml index 063ac03158..323d554d4a 100644 --- a/.github/workflows/check-c-abi.yaml +++ b/.github/workflows/check-c-abi.yaml @@ -1,69 +1,13 @@ name: C ABI Compatibility Check on: - workflow_dispatch: # Allow manual trigger for bootstrap workflow_call: - push: - branches: - - main - paths: - - 'c/include/**' - - 'ci/check_c_abi/**' jobs: - # Extract and store baseline ABI from main branch - update-baseline: - if: github.event_name == 'workflow_dispatch' || (github.event_name == 'push' && github.ref == 'refs/heads/main') - runs-on: ubuntu-latest - steps: - - name: Checkout main branch - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Install Python dependencies - run: | - pip install --upgrade pip - pip install -e ci/check_c_abi - - - name: Get dlpack dependency - run: | - git clone https://github.com/dmlc/dlpack - - - name: Extract ABI from main branch - run: | - mkdir -p baseline - check-c-abi extract \ - --header-path c/include \ - --include-file cuvs/core/all.h \ - --output-file baseline/c_abi.json.gz \ - --dlpack-include-path dlpack/include - echo "ABI extracted from main branch (commit: ${{ github.sha }})" - - - name: Store commit-specific baseline - uses: actions/upload-artifact@v4 - with: - name: c-abi-baseline-${{ github.sha }} - path: baseline/c_abi.json.gz - retention-days: 90 # Keep for 3 months - - - name: Store main baseline (latest, never expires) - uses: actions/upload-artifact@v4 - with: - name: c-abi-baseline-main - path: baseline/c_abi.json.gz - retention-days: 0 # Never expire - # Check PRs for breaking ABI changes check-pr: - # if: github.event_name == 'pull_request' || github.event_name == 'workflow_call' runs-on: ubuntu-latest steps: - - name: debug - run: echo "The event that triggered this workflow is ${{ github.event_name }}." - name: Checkout PR branch uses: actions/checkout@v4 with: diff --git a/.github/workflows/update-c-abi-baseline.yaml b/.github/workflows/update-c-abi-baseline.yaml new file mode 100644 index 0000000000..2b363c55de --- /dev/null +++ b/.github/workflows/update-c-abi-baseline.yaml @@ -0,0 +1,56 @@ +name: C ABI Update Baseleine + +on: + workflow_dispatch: # Allow manual trigger for bootstrap + push: + branches: + - main + paths: + - 'c/include/**' + - 'ci/check_c_abi/**' + +jobs: + # Extract and store baseline ABI from main branch + update-baseline: + runs-on: ubuntu-latest + steps: + - name: Checkout main branch + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Python dependencies + run: | + pip install --upgrade pip + pip install -e ci/check_c_abi + + - name: Get dlpack dependency + run: | + git clone https://github.com/dmlc/dlpack + + - name: Extract ABI from main branch + run: | + mkdir -p baseline + check-c-abi extract \ + --header-path c/include \ + --include-file cuvs/core/all.h \ + --output-file baseline/c_abi.json.gz \ + --dlpack-include-path dlpack/include + echo "ABI extracted from main branch (commit: ${{ github.sha }})" + + - name: Store commit-specific baseline + uses: actions/upload-artifact@v4 + with: + name: c-abi-baseline-${{ github.sha }} + path: baseline/c_abi.json.gz + retention-days: 90 # Keep for 3 months + + - name: Store main baseline (latest, never expires) + uses: actions/upload-artifact@v4 + with: + name: c-abi-baseline-main + path: baseline/c_abi.json.gz + retention-days: 0 # Never expire