Skip to content

Commit 2b36a2e

Browse files
committed
Add GitHub Actions CI for external contributors
1 parent ff12318 commit 2b36a2e

49 files changed

Lines changed: 2455 additions & 2438 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/ci.yml

Lines changed: 81 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,26 @@ concurrency:
1616
cancel-in-progress: true
1717

1818
jobs:
19+
images:
20+
name: Define Base Images
21+
runs-on: ubuntu-latest
22+
outputs:
23+
lint: ghcr.io/nvidia/cutile-python/lint:2026-01-12-aea51b7409cc
24+
docs: ghcr.io/nvidia/cutile-python/docs:2026-01-12-96c265b9029e
25+
build_py310: ghcr.io/nvidia/cutile-python/build_py_3.10_x86_64:2026-01-12-a3f084500fb0
26+
build_py311: ghcr.io/nvidia/cutile-python/build_py_3.11_x86_64:2026-01-12-d0a88a59d0fd
27+
build_py312: ghcr.io/nvidia/cutile-python/build_py_3.12_x86_64:2026-01-12-9cf7e54a5580
28+
build_py313: ghcr.io/nvidia/cutile-python/build_py_3.13_x86_64:2026-01-12-7f9db97c8ad8
29+
steps:
30+
- run: echo "Defining image tags"
31+
1932
lint:
2033
name: Lint
34+
needs: images
2135
runs-on: ubuntu-latest
2236
timeout-minutes: 10
2337
container:
24-
image: ghcr.io/nvidia/cutile-python/lint:2025-12-06-4cb7d16e4c20
38+
image: ${{ needs.images.outputs.lint }}
2539
steps:
2640
- name: Checkout repository
2741
uses: actions/checkout@v6
@@ -30,10 +44,73 @@ jobs:
3044
run: flake8
3145

3246
- name: Run cpplint
33-
run: python3 ci/cpplint.py
47+
run: python scripts/cpplint.py
3448

3549
- name: Check license headers (REUSE)
36-
run: ci/scripts/check_license.sh
50+
run: scripts/check_license.sh
3751

3852
- name: Check inline samples are up to date
39-
run: python3 test/tools/inline_samples.py --check
53+
run: python test/tools/inline_samples.py --check
54+
55+
docs:
56+
name: Build Docs
57+
needs: [images, build]
58+
runs-on: ubuntu-latest
59+
timeout-minutes: 10
60+
container:
61+
image: ${{ needs.images.outputs.docs }}
62+
steps:
63+
- name: Checkout repository
64+
uses: actions/checkout@v6
65+
66+
- name: Download wheel
67+
uses: actions/download-artifact@v4
68+
with:
69+
name: wheel-py3.12-linux-x86_64
70+
path: dist/
71+
72+
- name: Install wheel
73+
run: pip install dist/*.whl
74+
75+
- name: Build documentation
76+
run: make -C docs html
77+
78+
- name: Upload docs artifact
79+
uses: actions/upload-artifact@v4
80+
with:
81+
name: docs-html
82+
path: docs/build/html
83+
retention-days: 7
84+
85+
build:
86+
name: Build Wheel (Python ${{ matrix.python-version }})
87+
needs: images
88+
runs-on: ubuntu-latest
89+
timeout-minutes: 30
90+
strategy:
91+
matrix:
92+
include:
93+
- python-version: "3.10"
94+
image_key: build_py310
95+
- python-version: "3.11"
96+
image_key: build_py311
97+
- python-version: "3.12"
98+
image_key: build_py312
99+
- python-version: "3.13"
100+
image_key: build_py313
101+
container:
102+
image: ${{ needs.images.outputs[matrix.image_key] }}
103+
steps:
104+
- name: Checkout repository
105+
uses: actions/checkout@v6
106+
107+
- name: Build wheel
108+
run: python setup.py bdist_wheel
109+
110+
- name: Upload wheel artifact
111+
uses: actions/upload-artifact@v4
112+
with:
113+
name: wheel-py${{ matrix.python-version }}-linux-x86_64
114+
path: dist/*.whl
115+
if-no-files-found: error
116+
retention-days: 7

cext/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ target_link_options(_cext_shared PUBLIC ${cext_link_flags} ${nostdlib_flags} -Wl
7474
add_executable(test_stream_buffer
7575
test/test_stream_buffer.cpp
7676
cuda_loader.cpp
77-
cuda_helper.cpp
7877
memory.cpp
7978
)
8079
target_compile_options(test_stream_buffer PUBLIC ${cext_compile_flags} ${test_coverage_options})

cext/cuda_helper.cpp

Lines changed: 20 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -6,52 +6,40 @@
66
#include "cuda_loader.h"
77

88

9-
const char* get_cuda_error(const DriverApi* driver, CUresult res) {
9+
const char* get_cuda_error(CUresult res) {
1010
const char* str = nullptr;
11-
driver->cuGetErrorString(res, &str);
11+
g_cuGetErrorString(res, &str);
1212
return str ? str : "Unknown error";
1313
}
1414

15-
Status check_driver_version(const DriverApi* driver, int minimum_version) {
16-
int version;
17-
CUresult res = driver->cuDriverGetVersion(&version);
15+
void try_init_cuda() {
16+
ErrorGuard guard;
17+
CUresult res = g_cuInit(0);
1818
if (res != CUDA_SUCCESS) {
19-
PyErr_Format(PyExc_RuntimeError, "cuDriverGetVersion: %s", get_cuda_error(driver, res));
20-
return ErrorRaised;
21-
}
22-
if (version < minimum_version) {
23-
int major = version / 1000;
24-
int minor = (version % 1000) / 10;
25-
int required_major = minimum_version / 1000;
26-
PyErr_Format(PyExc_RuntimeError,
27-
"Minimum driver version required is %d.0, got %d.%d",
28-
required_major, major, minor);
29-
return ErrorRaised;
19+
raise(PyExc_RuntimeError, "cuInit: %s", get_cuda_error(res));
20+
SavedException exc = save_raised_exception();
21+
LOG_PYTHON_ERROR("warning", exc, "Failed to initialized CUDA");
3022
}
31-
return OK;
3223
}
3324

3425
PyObject* get_max_grid_size(PyObject *self, PyObject *args) {
3526
int device_id;
3627
if (!PyArg_ParseTuple(args, "i", &device_id))
3728
return NULL;
3829

39-
Result<const DriverApi*> driver = get_driver_api();
40-
if (!driver.is_ok()) return NULL;
41-
4230
CUdevice dev;
43-
CUresult res = (*driver)->cuDeviceGet(&dev, device_id);
31+
CUresult res = g_cuDeviceGet(&dev, device_id);
4432
if (res != CUDA_SUCCESS)
45-
return PyErr_Format(PyExc_RuntimeError, "cuDeviceGet: %s", get_cuda_error(*driver, res));
33+
return PyErr_Format(PyExc_RuntimeError, "cuDeviceGet: %s", get_cuda_error(res));
4634

4735
int max_grid_size[3];
4836
for (int i = 0; i < 3; ++i) {
49-
res = (*driver)->cuDeviceGetAttribute(&max_grid_size[i],
37+
res = g_cuDeviceGetAttribute(&max_grid_size[i],
5038
static_cast<CUdevice_attribute>(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X + i),
5139
dev);
5240
if (res != CUDA_SUCCESS) {
5341
return PyErr_Format(PyExc_RuntimeError,
54-
"cuDeviceGetAttribute: %s", get_cuda_error(*driver, res));
42+
"cuDeviceGetAttribute: %s", get_cuda_error(res));
5543
}
5644
}
5745
return Py_BuildValue("(iii)", max_grid_size[0], max_grid_size[1], max_grid_size[2]);
@@ -60,36 +48,26 @@ PyObject* get_max_grid_size(PyObject *self, PyObject *args) {
6048
PyObject* get_compute_capability(PyObject *self, PyObject *Py_UNUSED(ignored)) {
6149
int major, minor;
6250
CUdevice dev;
63-
64-
Result<const DriverApi*> driver_result = get_driver_api();
65-
if (!driver_result.is_ok()) return NULL;
66-
const DriverApi* d = *driver_result;
67-
68-
CUresult res = d->cuDeviceGet(&dev, 0);
51+
CUresult res = g_cuDeviceGet(&dev, 0);
6952
if (res != CUDA_SUCCESS) {
70-
return PyErr_Format(PyExc_RuntimeError, "cuDeviceGet: %s", get_cuda_error(d, res));
53+
return PyErr_Format(PyExc_RuntimeError, "cuDeviceGet: %s", get_cuda_error(res));
7154
}
72-
res = d->cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev);
55+
res = g_cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev);
7356
if (res != CUDA_SUCCESS) {
74-
return PyErr_Format(PyExc_RuntimeError, "cuDeviceGetAttribute: %s", get_cuda_error(d, res));
57+
return PyErr_Format(PyExc_RuntimeError, "cuDeviceGetAttribute: %s", get_cuda_error(res));
7558
}
76-
res = d->cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev);
59+
res = g_cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev);
7760
if (res != CUDA_SUCCESS) {
78-
return PyErr_Format(PyExc_RuntimeError, "cuDeviceGetAttribute: %s", get_cuda_error(d, res));
61+
return PyErr_Format(PyExc_RuntimeError, "cuDeviceGetAttribute: %s", get_cuda_error(res));
7962
}
8063
return Py_BuildValue("(ii)", major, minor);
8164
}
8265

8366
PyObject* get_driver_version(PyObject *self, PyObject *Py_UNUSED(ignored)) {
8467
int major, minor;
85-
86-
Result<const DriverApi*> driver_result = get_driver_api();
87-
if (!driver_result.is_ok()) return NULL;
88-
const DriverApi* d = *driver_result;
89-
90-
CUresult res = d->cuDriverGetVersion(&major);
68+
CUresult res = g_cuDriverGetVersion(&major);
9169
if (res != CUDA_SUCCESS) {
92-
return PyErr_Format(PyExc_RuntimeError, "cuDriverGetVersion: %s", get_cuda_error(d, res));
70+
return PyErr_Format(PyExc_RuntimeError, "cuDriverGetVersion: %s", get_cuda_error(res));
9371
}
9472
minor = (major % 1000) / 10;
9573
major = major / 1000;

cext/cuda_helper.h

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,8 @@
99
#include "py.h"
1010
#include <cuda.h>
1111

12-
struct DriverApi;
13-
1412
Status cuda_helper_init(PyObject* m);
1513

16-
const char* get_cuda_error(const DriverApi*, CUresult res);
17-
18-
void try_cuInit(const DriverApi*);
14+
const char* get_cuda_error(CUresult res);
1915

20-
Status check_driver_version(const DriverApi*, int minimum_version);
16+
void try_init_cuda();

cext/cuda_loader.cpp

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,11 @@ F get_proc_address(cuGetProcAddress_v2_t getter,
5050
FOREACH_CUDA_FUNCTION_TO_LOAD(DEFINE_CUDA_FUNCTION_GLOBAL)
5151

5252
#define GET_PROC_ADDRESS(name, cuda_ver) \
53-
if (!(driver_api.name = \
54-
get_proc_address<decltype(name)*>(_cuGetProcAddress, #name, cuda_ver))) \
53+
if (!(g_##name = get_proc_address<decltype(name)*>(_cuGetProcAddress, #name, cuda_ver))) \
5554
return ErrorRaised;
5655

5756

58-
static Status cuda_loader_init(DriverApi& driver_api) {
57+
Status cuda_loader_init() {
5958
PyPtr load_libcuda_mod = steal(PyImport_ImportModule("cuda.tile._load_libcuda"));
6059
if (!load_libcuda_mod) return ErrorRaised;
6160

@@ -73,22 +72,3 @@ static Status cuda_loader_init(DriverApi& driver_api) {
7372

7473
return OK;
7574
}
76-
77-
78-
static constexpr int MIN_DRIVER_VERSION = 13000;
79-
80-
Result<const DriverApi*> get_driver_api() {
81-
static bool initialized;
82-
static DriverApi instance;
83-
if (!initialized) {
84-
if (!cuda_loader_init(instance))
85-
return ErrorRaised;
86-
CUresult res = instance.cuInit(0);
87-
if (res != CUDA_SUCCESS)
88-
return raise(PyExc_RuntimeError, "cuInit: %s", get_cuda_error(&instance, res));
89-
if (!check_driver_version(&instance, MIN_DRIVER_VERSION))
90-
return ErrorRaised;
91-
initialized = true;
92-
}
93-
return &instance;
94-
}

cext/cuda_loader.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#include "py.h"
1010
#include <cuda.h>
1111

12+
Status cuda_loader_init();
13+
1214
#define FOREACH_CUDA_FUNCTION_TO_LOAD(X) \
1315
X(cuInit, 2000) \
1416
X(cuLibraryLoadFromFile, 12000) \
@@ -45,10 +47,8 @@
4547

4648

4749
#define DECLARE_CUDA_FUNC_EXTERN(name, _cuda_version) \
48-
decltype(::name)* name;
50+
extern decltype(name)* g_##name;
51+
52+
FOREACH_CUDA_FUNCTION_TO_LOAD(DECLARE_CUDA_FUNC_EXTERN)
4953

50-
struct DriverApi {
51-
FOREACH_CUDA_FUNCTION_TO_LOAD(DECLARE_CUDA_FUNC_EXTERN)
52-
};
5354

54-
Result<const DriverApi*> get_driver_api();

cext/module.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#include "py.h"
66

7+
#include "cuda_loader.h"
78
#include "tile_kernel.h"
89
#include "cuda_helper.h"
910

@@ -19,6 +20,11 @@ static PyModuleDef module_def = {
1920
};
2021

2122
PyMODINIT_FUNC PyInit__cext() {
23+
if (!cuda_loader_init())
24+
return nullptr;
25+
26+
try_init_cuda();
27+
2228
PyPtr m = steal(PyModule_Create(&module_def));
2329
if (!m) return nullptr;
2430

0 commit comments

Comments
 (0)