Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
4623fe7
Fix CoreML backend batched evaluation bug
ChinChangYang Dec 31, 2025
6a67e80
Add Core ML backend support to CMake configuration
ChinChangYang Dec 31, 2025
1b1d246
Add CoreML backend entry to .gitignore
ChinChangYang Dec 31, 2025
7e4ef7b
Fix CoreML backend MLMultiArray stride handling bug
ChinChangYang Dec 31, 2025
2c77de1
Add KataGoCoreML-swift.h to .gitignore
ChinChangYang Dec 31, 2025
927af3b
Fix CoreML backend pass policy output name mismatch
ChinChangYang Dec 31, 2025
402e8d9
Add configurable FP16/FP32 precision to CoreML backend
ChinChangYang Jan 1, 2026
65c27b1
Replace Python CoreML converter with native katagocoreml library
ChinChangYang Jan 5, 2026
e6fd901
Add hybrid CoreML + MPSGraph backend for improved throughput
ChinChangYang Jan 5, 2026
8f24d1a
Optimize MPSGraph mask operations when requireExactNNLen is true
ChinChangYang Jan 5, 2026
270651b
Remove unused maskSize variable in HybridComputeHandle
ChinChangYang Jan 5, 2026
31a36a3
Add CoreML backend build instructions to Compiling.md
ChinChangYang Jan 5, 2026
f952519
Add CoreML backend CI job to GitHub Actions workflow
ChinChangYang Jan 6, 2026
bb69d0d
Simplify CoreML model loading with dynamic batch size support
ChinChangYang Jan 20, 2026
e914062
Add FP32 GPU-only mode using MPSGraph to bypass CoreML converter
ChinChangYang Jan 20, 2026
20ab597
Use MPSGraph-only mode when batch size is too small for hybrid split
ChinChangYang Jan 21, 2026
dad0dac
Improve ThroughputTracker for selfplay workloads
ChinChangYang Jan 21, 2026
46eb2c5
Unify CoreML and Metal into single Metal backend
ChinChangYang Jan 24, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,56 @@ jobs:
name: katago-macos-opencl
path: cpp/katago

build-macos-metal:
runs-on: macos-latest
permissions:
contents: read

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Install dependencies
run: |
brew install ninja zlib libzip
brew tap chinchangyang/katagocoreml-cpp
brew install katagocoreml

- name: Cache CMake build
uses: actions/cache@v4
with:
path: |
cpp/CMakeCache.txt
cpp/CMakeFiles
cpp/build.ninja
cpp/.ninja_deps
cpp/.ninja_log
key: ${{ runner.os }}-cmake-metal-${{ hashFiles('**/CMakeLists.txt') }}
restore-keys: |
${{ runner.os }}-cmake-metal-

- name: Configure CMake
working-directory: cpp
run: |
cmake . -G Ninja -DUSE_BACKEND=METAL -DCMAKE_BUILD_TYPE=Release

- name: Build
working-directory: cpp
run: |
ninja

- name: Run tests
working-directory: cpp
run: |
./katago runtests

- name: Upload artifact
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
uses: actions/upload-artifact@v4
with:
name: katago-macos-metal
path: cpp/katago

build-windows:
runs-on: windows-latest
permissions:
Expand Down
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,7 @@ cpp/.ninja_log
cpp/build.ninja
cpp/KataGoSwift.*
cpp/include/KataGoSwift/KataGoSwift-swift.h

# For CoreML Backend
cpp/KataGoCoreML.*
cpp/include/KataGoCoreML/KataGoCoreML-swift.h
7 changes: 6 additions & 1 deletion Compiling.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,12 @@ As also mentioned in the instructions below but repeated here for visibility, if
* If using OpenCL, you will want to verify that KataGo is picking up the correct device (e.g. some systems may have both an Intel CPU OpenCL and GPU OpenCL, if KataGo appears to pick the wrong one, you can correct this by specifying `openclGpuToUse` in `configs/gtp_example.cfg`).

## MacOS
* TLDR:
* TLDR (Metal backend - recommended for most users, hybrid CPU+GPU+Neural Engine for maximum throughput):
```
# First, install the katagocoreml library via Homebrew
brew tap chinchangyang/katagocoreml-cpp
brew install katagocoreml

git clone https://github.com/lightvector/KataGo.git
cd KataGo/cpp
# If you get missing library errors, install the appropriate packages using your system package manager and try again.
Expand All @@ -132,6 +136,7 @@ As also mentioned in the instructions below but repeated here for visibility, if
* CMake with a minimum version of 3.18.2: `brew install cmake`.
* AppleClang and Swift compilers: `xcode-select --install`.
* If using the Metal backend, [Ninja](https://ninja-build.org): `brew install ninja`
* If using the Metal backend, katagocoreml library: `brew tap chinchangyang/katagocoreml-cpp && brew install katagocoreml`
* libzip: `brew install libzip`.
* If you want to do self-play training and research, probably Google perftools `brew install gperftools` for TCMalloc or some other better malloc implementation. For unknown reasons, the allocation pattern in self-play with large numbers of threads and parallel games causes a lot of memory fragmentation under glibc malloc that will eventually run your machine out of memory, but better mallocs handle it fine.
* If compiling to contribute to public distributed training runs, OpenSSL is required (`brew install openssl`).
Expand Down
23 changes: 15 additions & 8 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.18.2)
if(USE_BACKEND STREQUAL "METAL")
if(USE_BACKEND STREQUAL "METAL" OR USE_BACKEND STREQUAL "COREML")
project(katago LANGUAGES CXX Swift)
else()
project(katago)
Expand Down Expand Up @@ -32,7 +32,7 @@ endif()
set(BUILD_DISTRIBUTED 0 CACHE BOOL "Build with http support for contributing to distributed training")
set(USE_BACKEND CACHE STRING "Neural net backend")
string(TOUPPER "${USE_BACKEND}" USE_BACKEND)
set_property(CACHE USE_BACKEND PROPERTY STRINGS "" CUDA TENSORRT OPENCL EIGEN)
set_property(CACHE USE_BACKEND PROPERTY STRINGS "" CUDA TENSORRT OPENCL EIGEN METAL)

set(USE_TCMALLOC 0 CACHE BOOL "Use TCMalloc")
set(NO_GIT_REVISION 0 CACHE BOOL "Disable embedding the git revision into the compiled exe")
Expand Down Expand Up @@ -97,7 +97,7 @@ elseif(USE_BACKEND STREQUAL "TENSORRT")
message(FATAL_ERROR "Combining USE_CACHE_TENSORRT_PLAN with BUILD_DISTRIBUTED is not supported - it would consume excessive disk space and might worsen performance every time models are updated. Use only one at a time in a given build of KataGo.")
endif()
elseif(USE_BACKEND STREQUAL "METAL")
message(STATUS "-DUSE_BACKEND=METAL, using Metal backend.")
message(STATUS "-DUSE_BACKEND=METAL, using Metal backend with hybrid MPSGraph + CoreML execution.")
if(NOT "${CMAKE_GENERATOR}" STREQUAL "Ninja")
message(FATAL_ERROR "Bidirectional C++ Interop requires Ninja generator. Have ${CMAKE_GENERATOR}")
endif()
Expand All @@ -107,6 +107,8 @@ elseif(USE_BACKEND STREQUAL "METAL")
if(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
message(FATAL_ERROR "Project requires building with AppleClang. Have ${CMAKE_CXX_COMPILER_ID}")
endif()
find_package(PkgConfig REQUIRED)
pkg_check_modules(KATAGOCOREML REQUIRED katagocoreml)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/external/macos/cmake/modules")
include(InitializeSwift)
include(AddSwift)
Expand All @@ -115,11 +117,11 @@ elseif(USE_BACKEND STREQUAL "METAL")
neuralnet/metalbackend.cpp
)
add_library(KataGoSwift STATIC
neuralnet/metalbackend.swift)
neuralnet/metalbackend.swift
neuralnet/metallayers.swift)
_swift_generate_cxx_header(
KataGoSwift
"${CMAKE_CURRENT_BINARY_DIR}/include/KataGoSwift/KataGoSwift-swift.h"
SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/neuralnet/metalbackend.swift")
"${CMAKE_CURRENT_BINARY_DIR}/include/KataGoSwift/KataGoSwift-swift.h")
target_include_directories(KataGoSwift PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/include")
set_target_properties(KataGoSwift PROPERTIES Swift_MODULE_NAME "KataGoSwift")
target_compile_options(KataGoSwift PUBLIC
Expand Down Expand Up @@ -399,9 +401,14 @@ elseif(USE_BACKEND STREQUAL "TENSORRT")
target_link_libraries(katago CUDA::cudart_static ${TENSORRT_LIBRARY})
elseif(USE_BACKEND STREQUAL "METAL")
target_compile_definitions(katago PRIVATE USE_METAL_BACKEND)
target_link_libraries(katago KataGoSwift)
target_include_directories(katago PRIVATE ${KATAGOCOREML_INCLUDE_DIRS})
find_library(KATAGOCOREML_LIB katagocoreml HINTS /usr/local/lib REQUIRED)
target_link_directories(katago PRIVATE ${KATAGOCOREML_LIBRARY_DIRS})
target_link_libraries(katago KataGoSwift ${KATAGOCOREML_LIB} ${KATAGOCOREML_LDFLAGS}
"-framework MetalPerformanceShaders"
"-framework MetalPerformanceShadersGraph")
if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64")
message(WARNING "You are currently running cmake on an Intel-based processor. It is known that running KataGo in this configuration may encounter performance issues. It is recommended to switch to a cmake version designed for ARM64 architecture for optimal performance.")
message(WARNING "Metal backend may not work optimally on Intel. ARM64 architecture is recommended.")
endif()
elseif(USE_BACKEND STREQUAL "OPENCL")
target_compile_definitions(katago PRIVATE USE_OPENCL_BACKEND)
Expand Down
3 changes: 3 additions & 0 deletions cpp/command/benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,9 @@ int MainCmds::benchmark(const vector<string>& args) {
#ifdef USE_METAL_BACKEND
cout << "You are currently using the Metal version of KataGo." << endl;
#endif
#ifdef USE_COREML_BACKEND
cout << "You are currently using the Core ML version of KataGo." << endl;
#endif
#ifdef USE_OPENCL_BACKEND
cout << "You are currently using the OpenCL version of KataGo." << endl;
cout << "If you have a strong GPU capable of FP16 tensor cores (e.g. RTX2080), "
Expand Down
Loading
Loading