siamize/Makefile at main · NeuroJSON/siamize · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
# Convenience top-level Makefile for siamize. Wraps the CMake build and adds
# code-formatting / cleanup / packaging targets. CMake is still the primary
# build system (this Makefile just shells out to it).
#
# Targets:
#
#   make                  configure + build CPU CLI (Release) under build/
#   make cuda             configure + build CUDA CLI (re-fetches GPU ORT if needed)
#   make tensorrt         configure + build TensorRT CLI (re-fetches GPU ORT if needed)
#
#   make opencl           configure + build the MNN-backed CLI with the OpenCL
#                         backend enabled (NVIDIA via ICD, AMD, Intel iGPU,
#                         Mali, Adreno). First run also builds libMNN via
#                         scripts/fetch_mnn.sh; pass MNN_STATIC=1 to get a
#                         self-contained binary (no libMNN.so to ship).
#   make openclmex        MATLAB MEX, MNN backend (siamex.mex<a64|maca64|w64>)
#   make opencloct        Octave  MEX, MNN backend (siamex.mex)
#
#   make mex-octave       build the Octave MEX (siamex.mex)
#   make mex-matlab       build the MATLAB MEX (siamex.mex{a64,maca64,w64})
#   make mex-test         run matlab/tests/run_tests.m in Octave (30 unit tests)
#
#   make package          stage + zip the CPU CLI bundle
#   make package-cuda     stage + zip the CUDA CLI bundle (needs `make cuda` first)
#   make package-tensorrt stage + zip the TRT  CLI bundle (needs `make tensorrt`)
#   make package-mex      stage + zip the MEX bundle      (needs `make mex-*`)
#
#   make dockerimg        build the CUDA12+cuDNN9 Docker image (docker/Dockerfile)
#                         bundling siamize (ORT/CUDA) + siamize-opencl (MNN/OpenCL).
#                         Tag/CUDA via DOCKER_IMG=... DOCKER_CUDA=...
#
#   make test             run tests/run_regression.sh (needs models/ populated)
#
#   make doc              build doxygen HTML docs -> build/doc/html/index.html
#   make doc-clean        rm -rf build/doc/
#
#   make pretty           astyle on C++, black on Python
#   make pretty-cpp / pretty-py
#
#   make clean            rm -rf build/   (keeps third_party/)
#   make distclean        rm -rf build/ third_party/onnxruntime/
#
# Notes:
#   * The CPU and GPU ORT prebuilts share `third_party/onnxruntime/`, so
#     switching between `make` and `make cuda` re-fetches ORT only if the
#     current install doesn't have the right provider plugins.
#   * `make package*` runs `scripts/package.sh` which uses 7z; on macOS
#     you may need `brew install p7zip` if 7z isn't already present.

BUILD_DIR  ?= build
BUILD_TYPE ?= Release
ORT_DIR    := third_party/onnxruntime
MNN_DIR    := third_party/mnn

# Filenames used to detect which ORT prebuilt (CPU vs GPU) is currently
# installed under third_party/onnxruntime/.
ORT_GPU_MARKER := $(ORT_DIR)/lib/libonnxruntime_providers_cuda.so
ORT_GPU_MARKER_DLL := $(ORT_DIR)/lib/onnxruntime_providers_cuda.dll

# MNN stage marker. fetch_mnn.sh writes either libMNN.a (MNN_STATIC=1)
# or libMNN.{so,dylib,dll}; checking the include header is the cheapest
# stable signal that the stage is populated.
MNN_MARKER := $(MNN_DIR)/include/MNN/Interpreter.hpp

# Default the MNN backend to a static libMNN.a so `make opencl` (and the MEX
# variants) produce a self-contained binary -- matching the released artifacts
# and sidestepping the -fvisibility=hidden symbol-export pitfall of a shared
# libMNN.so. Override with `make opencl MNN_STATIC=0` for a shared build.
# Exported so scripts/fetch_mnn.sh (invoked by mnn-deps) picks it up.
MNN_STATIC ?= 1
export MNN_STATIC

# Docker image (bundles both backends: siamize = ORT/CUDA, siamize-opencl =
# MNN/OpenCL). The tag is a calendar version vYYYY.M (year.month); bump it
# per release (e.g. v2026.6 -> v2026.9). Override the tag / CUDA base on the
# make line, e.g.
# `make dockerimg DOCKER_IMG=neurojson/siamize:v2026.6 DOCKER_CUDA=12.4.1`.
DOCKER_IMG  ?= siamize:v2026.6
DOCKER_CUDA ?= 12.6.3

.PHONY: all build cuda tensorrt mex-octave mex-matlab mex-test \
        package package-cuda package-tensorrt package-mex \
        package-opencl package-openclmex \
        cudaoct cudamex coreml coremloct coremlmex \
        opencl openclmex opencloct mnn-deps \
        ort-cpu ort-gpu clean distclean pretty pretty-cpp pretty-py test \
        doc doc-clean dockerimg

# ---- CLI builds -------------------------------------------------------------

all: build

build: ort-cpu
	cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
	cmake --build $(BUILD_DIR) --config $(BUILD_TYPE) --parallel

cuda: ort-gpu
	cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DSIAMIZE_GPU=cuda
	cmake --build $(BUILD_DIR) --config $(BUILD_TYPE) --parallel
	@echo
	@echo "[make cuda] built $(BUILD_DIR)/siamize with CUDA EP."
	@echo "Runtime: see README.md > 'Optional: NVIDIA GPU build' for the"
	@echo "LD_LIBRARY_PATH one-liner that exposes CUDA/cuDNN libs to ORT."

tensorrt: ort-gpu
	cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DSIAMIZE_GPU=tensorrt
	cmake --build $(BUILD_DIR) --config $(BUILD_TYPE) --parallel
	@echo
	@echo "[make tensorrt] built $(BUILD_DIR)/siamize with TensorRT + CUDA EPs."

# macOS-only. CoreML EP is statically baked into ORT 1.26's macOS dylib
# (no separate provider plugin to fetch), so we use the standard CPU
# ORT bundle. -DSIAMIZE_GPU=coreml turns on the SIAMIZE_HAS_COREML
# define and links the CoreML / Foundation frameworks. Runtime selects
# CPU / GPU / Neural Engine via --coreml-units (default 'all').
coreml: ort-cpu
	cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DSIAMIZE_GPU=coreml
	cmake --build $(BUILD_DIR) --config $(BUILD_TYPE) --parallel
	@echo
	@echo "[make coreml] built $(BUILD_DIR)/siamize with CoreML EP."
	@echo "First run compiles the ONNX -> .mlmodelc (~10-30 s, cached)."

# ---- MNN-backed builds ------------------------------------------------------
# `opencl` is the user-facing name for the MNN backend's headline GPU
# path (also covers Vulkan / Metal at runtime via -c). Build-time, this
# is `-DSIAMIZE_BACKEND=mnn` -- MNN's OpenCL backend gets enabled at
# fetch_mnn.sh time (MNN_OPENCL=1, default). The CLI's -c flag then
# picks the actual runtime (cpu | opencl | vulkan | metal).
#
# MNN_STATIC=1 builds libMNN.a and produces a self-contained binary.
# Pass it on the make line and it propagates to scripts/fetch_mnn.sh.
opencl: mnn-deps
	cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DSIAMIZE_BACKEND=mnn
	cmake --build $(BUILD_DIR) --config $(BUILD_TYPE) --parallel
	@echo
	@echo "[make opencl] built $(BUILD_DIR)/siamize with MNN backend."
	@echo "Runtime: pass -c {cpu|opencl|vulkan|metal}. Default auto picks"
	@echo "OpenCL when MNN was built with MNN_OPENCL=ON, else CPU."

# ---- MNN prebuilt management ------------------------------------------------

# Build (or skip if cached) the MNN runtime under third_party/mnn/.
# fetch_mnn.sh is itself idempotent (skips when the stage is populated
# unless FORCE=1), but having a Make-level marker dep is faster than
# spawning bash every invocation.
mnn-deps:
	@# Always defer to fetch_mnn.sh: it is idempotent (skips instantly when the
	@# correct libMNN.a / .so for the current MNN_STATIC is already staged) and
	@# lib-aware, so it rebuilds when switching between static and shared --
	@# unlike a header-only Make marker, which can't tell the two apart.
	@scripts/fetch_mnn.sh

# ---- ORT prebuilt management ------------------------------------------------

# CPU ORT: fetch only if no CPU-only install is present. If a GPU build is
# currently installed, wipe build/ + ORT and refetch.
ort-cpu:
	@if [ -d $(ORT_DIR) ] && [ ! -e $(ORT_GPU_MARKER) ] && [ ! -e $(ORT_GPU_MARKER_DLL) ]; then \
	    echo "[ort] CPU ORT already installed under $(ORT_DIR)"; \
	else \
	    echo "[ort] (re)installing CPU ORT prebuilt"; \
	    rm -rf $(ORT_DIR) $(BUILD_DIR); \
	    scripts/fetch_deps.sh; \
	fi

# GPU ORT: fetch only if the CUDA provider plugin isn't already present.
ort-gpu:
	@if [ -e $(ORT_GPU_MARKER) ] || [ -e $(ORT_GPU_MARKER_DLL) ]; then \
	    echo "[ort] GPU ORT already installed under $(ORT_DIR)"; \
	else \
	    echo "[ort] (re)installing GPU ORT prebuilt"; \
	    rm -rf $(ORT_DIR) $(BUILD_DIR); \
	    ORT_BUILD=gpu scripts/fetch_deps.sh; \
	fi

# ---- MEX builds -------------------------------------------------------------

mex-octave: ort-cpu
	cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DSIAMIZE_BUILD_OCTAVE_MEX=ON
	cmake --build $(BUILD_DIR) --config $(BUILD_TYPE) --parallel
	@echo "[make mex-octave] built matlab/siamex.mex"

mex-matlab: ort-cpu
	cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DSIAMIZE_BUILD_MATLAB_MEX=ON
	cmake --build $(BUILD_DIR) --config $(BUILD_TYPE) --parallel
	@echo "[make mex-matlab] built matlab/siamex.mex<a64|maca64|w64>"

# CUDA-enabled MEX variants: same as mex-octave / mex-matlab but
# fetch the GPU-flavor ORT (libonnxruntime_providers_cuda.so) and
# pass -DSIAMIZE_GPU=cuda so sliding.cpp's CUDA EP probe is compiled
# in. At MATLAB / Octave runtime the user still has to set
# LD_LIBRARY_PATH to include CUDA + cuDNN (same recipe as the CLI;
# see README "Required shared libraries by exact filename").
cudaoct: ort-gpu
	cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
	    -DSIAMIZE_GPU=cuda -DSIAMIZE_BUILD_OCTAVE_MEX=ON
	cmake --build $(BUILD_DIR) --config $(BUILD_TYPE) --parallel
	@echo "[make cudaoct] built matlab/siamex.mex (CUDA-enabled Octave MEX)"

cudamex: ort-gpu
	cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
	    -DSIAMIZE_GPU=cuda -DSIAMIZE_BUILD_MATLAB_MEX=ON
	cmake --build $(BUILD_DIR) --config $(BUILD_TYPE) --parallel
	@echo "[make cudamex] built matlab/siamex.mex<a64|maca64|w64> (CUDA-enabled MATLAB MEX)"

# macOS-only. Like cudaoct / cudamex but builds the CoreML MEX
# (Apple Silicon CPU + GPU + ANE). Uses the standard CPU ORT bundle
# since CoreML EP is statically baked into ORT's macOS dylib.
coremloct: ort-cpu
	cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
	    -DSIAMIZE_GPU=coreml -DSIAMIZE_BUILD_OCTAVE_MEX=ON
	cmake --build $(BUILD_DIR) --config $(BUILD_TYPE) --parallel
	@echo "[make coremloct] built matlab/siamex.mex (CoreML-enabled Octave MEX)"

coremlmex: ort-cpu
	cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
	    -DSIAMIZE_GPU=coreml -DSIAMIZE_BUILD_MATLAB_MEX=ON
	cmake --build $(BUILD_DIR) --config $(BUILD_TYPE) --parallel
	@echo "[make coremlmex] built matlab/siamex.mex<maca64|maci64> (CoreML-enabled MATLAB MEX)"

# MNN-backed MEX variants. siamex.mex (Octave) or siamex.mex<a64|w64|...>
# (MATLAB) gets a tiny libMNN-static link line if MNN_STATIC=1 was set
# at scripts/fetch_mnn.sh time. The MATLAB-side wrapper queries
# siamex('backend') at runtime to pick the right fold filename
# (_fp32.mnn) and the matching NeuroJSON doc=mnn_n3d URL, so callers
# don't need backend-specific changes to their .m scripts.
opencloct: mnn-deps
	cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
	    -DSIAMIZE_BACKEND=mnn -DSIAMIZE_BUILD_OCTAVE_MEX=ON
	cmake --build $(BUILD_DIR) --config $(BUILD_TYPE) --parallel
	@echo "[make opencloct] built matlab/siamex.mex (MNN-backed Octave MEX)"

openclmex: mnn-deps
	cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
	    -DSIAMIZE_BACKEND=mnn -DSIAMIZE_BUILD_MATLAB_MEX=ON
	cmake --build $(BUILD_DIR) --config $(BUILD_TYPE) --parallel
	@echo "[make openclmex] built matlab/siamex.mex<a64|maca64|w64> (MNN-backed MATLAB MEX)"

mex-test:
	octave-cli --no-gui --eval "cd matlab/tests; run_tests('--exit')"

# ---- Packaging --------------------------------------------------------------

# Each package-* target stages files under dist/<name>/ and zips to
# <name>.zip in the repo root via scripts/package.sh.

package:
	scripts/package.sh cpu      siamize-cpu

package-cuda:
	scripts/package.sh cuda     siamize-cuda

package-tensorrt:
	scripts/package.sh tensorrt siamize-tensorrt

package-mex:
	scripts/package.sh mex      siamex

package-opencl:
	scripts/package.sh opencl   siamize-opencl

package-openclmex:
	scripts/package.sh openclmex siamex-opencl

# ---- Docker image -----------------------------------------------------------
# Multi-stage build of docker/Dockerfile. Context is the repo root; the
# Dockerfile + its .dockerignore live under docker/. The final CUDA 12 +
# cuDNN 9 image carries both binaries: `siamize` (ORT/CUDA, default entrypoint)
# and `siamize-opencl` (MNN/OpenCL, run via `--entrypoint siamize-opencl`).
# Needs Docker with buildx. First build compiles MNN (~15-20 min, builder-only).
dockerimg:
	docker build -f docker/Dockerfile --build-arg CUDA_VERSION=$(DOCKER_CUDA) -t $(DOCKER_IMG) .
	@echo
	@echo "[make dockerimg] built $(DOCKER_IMG)  (siamize=ORT/CUDA, siamize-opencl=MNN/OpenCL)"
	@echo "  run ORT/CUDA : docker run --rm --gpus all -v \"\$$PWD\":/data $(DOCKER_IMG) -i /data/in.nii.gz -o /data/out.nii.gz -M 0,1,2,3,4 -c cuda"
	@echo "  run OpenCL   : docker run --rm --gpus all -v \"\$$PWD\":/data --entrypoint siamize-opencl $(DOCKER_IMG) -i /data/in.nii.gz -o /data/out.nii.gz -M 0,1,2,3,4 -c opencl"
	@echo "  push         : docker tag $(DOCKER_IMG) <user>/$(DOCKER_IMG) && docker push <user>/$(DOCKER_IMG)"

# ---- Documentation ----------------------------------------------------------

# Build doxygen HTML docs from the in-source doxygen blocks. Output lands
# under build/doc/html/. Requires the `doxygen` binary on PATH
# (apt install doxygen / brew install doxygen).
doc:
	@command -v doxygen >/dev/null 2>&1 || { \
	    echo "doxygen not found. Install it:"; \
	    echo "    Debian/Ubuntu: sudo apt install doxygen"; \
	    echo "    macOS:         brew install doxygen"; \
	    exit 1; \
	}
	doxygen Doxyfile
	@echo
	@echo "[make doc] generated $(BUILD_DIR)/doc/html/index.html"

doc-clean:
	rm -rf $(BUILD_DIR)/doc

# ---- Misc -------------------------------------------------------------------

test:
	tests/run_regression.sh

clean:
	rm -rf $(BUILD_DIR) dist/ \
	    siamize-cpu.zip siamize-cuda.zip siamize-tensorrt.zip \
	    siamize-opencl.zip siamex.zip siamex-opencl.zip

distclean: clean
	rm -rf $(ORT_DIR) $(MNN_DIR) third_party/mnn-build

pretty: pretty-cpp pretty-py

# astyle settings borrowed from MCX (https://github.com/fangq/mcx),
# itself derived from https://github.com/nlohmann/json.
pretty-cpp:
	astyle \
	    --style=attach \
	    --indent=spaces=4 \
	    --indent-modifiers \
	    --indent-switches \
	    --indent-preproc-block \
	    --indent-preproc-define \
	    --indent-col1-comments \
	    --pad-oper \
	    --pad-header \
	    --align-pointer=type \
	    --align-reference=type \
	    --add-brackets \
	    --convert-tabs \
	    --close-templates \
	    --lineend=linux \
	    --preserve-date \
	    --suffix=none \
	    --formatted \
	    --break-blocks \
	    "src/*.cpp" "src/*.h"

# Python formatting via black (PEP 8 conformant, default 88-col line length).
pretty-py:
	black py/ tools/