lightvector · ChinChangYang · Dec 31, 2025 · Dec 31, 2025 · Dec 31, 2025 · Dec 31, 2025
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -97,6 +97,56 @@ jobs:
           name: katago-macos-opencl
           path: cpp/katago
 
+  build-macos-metal:
+    runs-on: macos-latest
+    permissions:
+      contents: read
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          brew install ninja zlib libzip
+          brew tap chinchangyang/katagocoreml-cpp
+          brew install katagocoreml
+
+      - name: Cache CMake build
+        uses: actions/cache@v4
+        with:
+          path: |
+            cpp/CMakeCache.txt
+            cpp/CMakeFiles
+            cpp/build.ninja
+            cpp/.ninja_deps
+            cpp/.ninja_log
+          key: ${{ runner.os }}-cmake-metal-${{ hashFiles('**/CMakeLists.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-cmake-metal-
+
+      - name: Configure CMake
+        working-directory: cpp
+        run: |
+          cmake . -G Ninja -DUSE_BACKEND=METAL -DCMAKE_BUILD_TYPE=Release
+
+      - name: Build
+        working-directory: cpp
+        run: |
+          ninja
+
+      - name: Run tests
+        working-directory: cpp
+        run: |
+          ./katago runtests
+
+      - name: Upload artifact
+        if: github.event_name == 'push' && github.ref == 'refs/heads/master'
+        uses: actions/upload-artifact@v4
+        with:
+          name: katago-macos-metal
+          path: cpp/katago
+
   build-windows:
     runs-on: windows-latest
     permissions:

diff --git a/.gitignore b/.gitignore
@@ -90,3 +90,7 @@ cpp/.ninja_log
 cpp/build.ninja
 cpp/KataGoSwift.*
 cpp/include/KataGoSwift/KataGoSwift-swift.h
+
+# For CoreML Backend
+cpp/KataGoCoreML.*
+cpp/include/KataGoCoreML/KataGoCoreML-swift.h
diff --git a/Compiling.md b/Compiling.md
@@ -118,8 +118,12 @@ As also mentioned in the instructions below but repeated here for visibility, if
    * If using OpenCL, you will want to verify that KataGo is picking up the correct device (e.g. some systems may have both an Intel CPU OpenCL and GPU OpenCL, if KataGo appears to pick the wrong one, you can correct this by specifying `openclGpuToUse` in `configs/gtp_example.cfg`).
 
 ## MacOS
-   * TLDR:
+   * TLDR (Metal backend - recommended for most users, hybrid CPU+GPU+Neural Engine for maximum throughput):
      ```
+     # First, install the katagocoreml library via Homebrew
+     brew tap chinchangyang/katagocoreml-cpp
+     brew install katagocoreml
+
      git clone https://github.com/lightvector/KataGo.git
      cd KataGo/cpp
      # If you get missing library errors, install the appropriate packages using your system package manager and try again.
@@ -132,6 +136,7 @@ As also mentioned in the instructions below but repeated here for visibility, if
       * CMake with a minimum version of 3.18.2: `brew install cmake`.
       * AppleClang and Swift compilers: `xcode-select --install`.
       * If using the Metal backend, [Ninja](https://ninja-build.org): `brew install ninja`
+      * If using the Metal backend, katagocoreml library: `brew tap chinchangyang/katagocoreml-cpp && brew install katagocoreml`
       * libzip: `brew install libzip`.
       * If you want to do self-play training and research, probably Google perftools `brew install gperftools` for TCMalloc or some other better malloc implementation. For unknown reasons, the allocation pattern in self-play with large numbers of threads and parallel games causes a lot of memory fragmentation under glibc malloc that will eventually run your machine out of memory, but better mallocs handle it fine.
       * If compiling to contribute to public distributed training runs, OpenSSL is required (`brew install openssl`).

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.18.2)
-if(USE_BACKEND STREQUAL "METAL")
+if(USE_BACKEND STREQUAL "METAL" OR USE_BACKEND STREQUAL "COREML")
   project(katago LANGUAGES CXX Swift)
 else()
   project(katago)
@@ -32,7 +32,7 @@ endif()
 set(BUILD_DISTRIBUTED 0 CACHE BOOL "Build with http support for contributing to distributed training")
 set(USE_BACKEND CACHE STRING "Neural net backend")
 string(TOUPPER "${USE_BACKEND}" USE_BACKEND)
-set_property(CACHE USE_BACKEND PROPERTY STRINGS "" CUDA TENSORRT OPENCL EIGEN)
+set_property(CACHE USE_BACKEND PROPERTY STRINGS "" CUDA TENSORRT OPENCL EIGEN METAL)
 
 set(USE_TCMALLOC 0 CACHE BOOL "Use TCMalloc")
 set(NO_GIT_REVISION 0 CACHE BOOL "Disable embedding the git revision into the compiled exe")
@@ -97,7 +97,7 @@ elseif(USE_BACKEND STREQUAL "TENSORRT")
     message(FATAL_ERROR "Combining USE_CACHE_TENSORRT_PLAN with BUILD_DISTRIBUTED is not supported - it would consume excessive disk space and might worsen performance every time models are updated. Use only one at a time in a given build of KataGo.")
   endif()
 elseif(USE_BACKEND STREQUAL "METAL")
-  message(STATUS "-DUSE_BACKEND=METAL, using Metal backend.")
+  message(STATUS "-DUSE_BACKEND=METAL, using Metal backend with hybrid MPSGraph + CoreML execution.")
   if(NOT "${CMAKE_GENERATOR}" STREQUAL "Ninja")
     message(FATAL_ERROR "Bidirectional C++ Interop requires Ninja generator. Have ${CMAKE_GENERATOR}")
   endif()
@@ -107,6 +107,8 @@ elseif(USE_BACKEND STREQUAL "METAL")
   if(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
     message(FATAL_ERROR "Project requires building with AppleClang. Have ${CMAKE_CXX_COMPILER_ID}")
   endif()
+  find_package(PkgConfig REQUIRED)
+  pkg_check_modules(KATAGOCOREML REQUIRED katagocoreml)
   list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/external/macos/cmake/modules")
   include(InitializeSwift)
   include(AddSwift)
@@ -115,11 +117,11 @@ elseif(USE_BACKEND STREQUAL "METAL")
     neuralnet/metalbackend.cpp
     )
   add_library(KataGoSwift STATIC
-    neuralnet/metalbackend.swift)
+    neuralnet/metalbackend.swift
+    neuralnet/metallayers.swift)
   _swift_generate_cxx_header(
     KataGoSwift
-    "${CMAKE_CURRENT_BINARY_DIR}/include/KataGoSwift/KataGoSwift-swift.h"
-    SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/neuralnet/metalbackend.swift")
+    "${CMAKE_CURRENT_BINARY_DIR}/include/KataGoSwift/KataGoSwift-swift.h")
   target_include_directories(KataGoSwift PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/include")
   set_target_properties(KataGoSwift PROPERTIES Swift_MODULE_NAME "KataGoSwift")
   target_compile_options(KataGoSwift PUBLIC
@@ -399,9 +401,14 @@ elseif(USE_BACKEND STREQUAL "TENSORRT")
   target_link_libraries(katago CUDA::cudart_static ${TENSORRT_LIBRARY})
 elseif(USE_BACKEND STREQUAL "METAL")
   target_compile_definitions(katago PRIVATE USE_METAL_BACKEND)
-  target_link_libraries(katago KataGoSwift)
+  target_include_directories(katago PRIVATE ${KATAGOCOREML_INCLUDE_DIRS})
+  find_library(KATAGOCOREML_LIB katagocoreml HINTS /usr/local/lib REQUIRED)
+  target_link_directories(katago PRIVATE ${KATAGOCOREML_LIBRARY_DIRS})
+  target_link_libraries(katago KataGoSwift ${KATAGOCOREML_LIB} ${KATAGOCOREML_LDFLAGS}
+    "-framework MetalPerformanceShaders"
+    "-framework MetalPerformanceShadersGraph")
   if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64")
-    message(WARNING "You are currently running cmake on an Intel-based processor. It is known that running KataGo in this configuration may encounter performance issues. It is recommended to switch to a cmake version designed for ARM64 architecture for optimal performance.")
+    message(WARNING "Metal backend may not work optimally on Intel. ARM64 architecture is recommended.")
   endif()
 elseif(USE_BACKEND STREQUAL "OPENCL")
   target_compile_definitions(katago PRIVATE USE_OPENCL_BACKEND)

diff --git a/cpp/command/benchmark.cpp b/cpp/command/benchmark.cpp
@@ -260,6 +260,9 @@ int MainCmds::benchmark(const vector<string>& args) {
 #ifdef USE_METAL_BACKEND
   cout << "You are currently using the Metal version of KataGo." << endl;
 #endif
+#ifdef USE_COREML_BACKEND
+  cout << "You are currently using the Core ML version of KataGo." << endl;
+#endif
 #ifdef USE_OPENCL_BACKEND
   cout << "You are currently using the OpenCL version of KataGo." << endl;
   cout << "If you have a strong GPU capable of FP16 tensor cores (e.g. RTX2080), "