diff --git a/.github/workflows/05-windows-build.yml b/.github/workflows/05-windows-build.yml
index 0db11c200..fa7c25d6c 100644
--- a/.github/workflows/05-windows-build.yml
+++ b/.github/workflows/05-windows-build.yml
@@ -18,7 +18,16 @@ jobs:
       matrix:
         include:
           - platform: windows-2022
+            msvc_arch: x64
+            python_version: '3.10'
           - platform: windows-2025
+            msvc_arch: x64
+            python_version: '3.10'
+          # Windows ARM64: Python 3.10 has no official ARM64 installer;
+          # 3.11 is the first CPython release with a Windows-on-ARM build.
+          - platform: windows-11-arm
+            msvc_arch: arm64
+            python_version: '3.11'
 
     steps:
       - name: Show env info
@@ -41,14 +50,14 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v6
         with:
-          python-version: '3.10'
+          python-version: ${{ matrix.python_version }}
           cache: 'pip'
           cache-dependency-path: 'pyproject.toml'
 
       - name: Set up MSVC environment
         uses: ilammy/msvc-dev-cmd@v1
         with:
-          arch: x64
+          arch: ${{ matrix.msvc_arch }}
 
       - name: Set up environment variables
         run: |
diff --git a/src/ailego/CMakeLists.txt b/src/ailego/CMakeLists.txt
index fdaa1b13c..9ce8691a7 100644
--- a/src/ailego/CMakeLists.txt
+++ b/src/ailego/CMakeLists.txt
@@ -91,29 +91,35 @@ if(NOT ANDROID AND AUTO_DETECT_ARCH)
         )
     endforeach()
     elseif (HOST_ARCH MATCHES "^(arm|arm64)$")
-        if(MSVC)
-            return()
-        endif()
-        set(MATH_MARCH_FLAG_NEON "-march=armv8-a")
-
-        file(GLOB_RECURSE MATH_FILES_NEON
-          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_dispatch.cc
-          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_dispatch.c
-          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_dispatch.cc
-          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_dispatch.c
-          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_neon.cc
-          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_neon.c
-          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_neon.cc
-          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_neon.c
-        )
+        if(NOT MSVC)
+            set(MATH_MARCH_FLAG_NEON "-march=armv8-a")
+
+            file(GLOB_RECURSE MATH_FILES_NEON
+              ${CMAKE_CURRENT_SOURCE_DIR}/math/*_dispatch.cc
+              ${CMAKE_CURRENT_SOURCE_DIR}/math/*_dispatch.c
+              ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_dispatch.cc
+              ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_dispatch.c
+              ${CMAKE_CURRENT_SOURCE_DIR}/math/*_neon.cc
+              ${CMAKE_CURRENT_SOURCE_DIR}/math/*_neon.c
+              ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_neon.cc
+              ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_neon.c
+            )
 
-        foreach(MATH_FILE ${MATH_FILES_NEON})
-          set_source_files_properties(
-              ${MATH_FILE}
-              PROPERTIES
-              COMPILE_FLAGS "${MATH_MARCH_FLAG_NEON}"
-          )
-        endforeach()
+            foreach(MATH_FILE ${MATH_FILES_NEON})
+              set_source_files_properties(
+                  ${MATH_FILE}
+                  PROPERTIES
+                  COMPILE_FLAGS "${MATH_MARCH_FLAG_NEON}"
+              )
+            endforeach()
+        else()
+            # MSVC on ARM64: NEON is the ARMv8 baseline and is always enabled,
+            # so no `-march` flag is required (MSVC does not accept GCC-style
+            # `-march=` anyway). The NEON math kernels still get compiled via
+            # the ALL_SRCS glob above; their `#if defined(__ARM_NEON)` guards
+            # were extended in this PR to also accept `_M_ARM64` so the bodies
+            # actually emit code under MSVC.
+        endif()
     endif()
 endif()
 
diff --git a/src/ailego/internal/cpu_features.cc b/src/ailego/internal/cpu_features.cc
index e2dd2b23a..395e6fc13 100644
--- a/src/ailego/internal/cpu_features.cc
+++ b/src/ailego/internal/cpu_features.cc
@@ -15,9 +15,10 @@
 #include "cpu_features.h"
 #include <cstddef>
 
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
 #include <intrin.h>
-#elif !defined(__ARM_ARCH)
+#elif !defined(_MSC_VER) && !defined(__ARM_ARCH) && \
+    !(defined(__aarch64__) || defined(_M_ARM64))
 #include <cpuid.h>
 #endif
 
@@ -34,7 +35,7 @@ namespace internal {
 
 CpuFeatures::CpuFlags CpuFeatures::flags_;
 
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
 CpuFeatures::CpuFlags::CpuFlags(void)
     : L1_ECX(0), L1_EDX(0), L7_EBX(0), L7_ECX(0), L7_EDX(0) {
   int l1[4] = {0, 0, 0, 0};
@@ -48,7 +49,8 @@ CpuFeatures::CpuFlags::CpuFlags(void)
   L7_ECX = l7[2];
   L7_EDX = l7[3];
 }
-#elif !defined(__ARM_ARCH)
+#elif !defined(_MSC_VER) && !defined(__ARM_ARCH) && \
+    !(defined(__aarch64__) || defined(_M_ARM64))
 CpuFeatures::CpuFlags::CpuFlags(void)
     : L1_ECX(0), L1_EDX(0), L7_EBX(0), L7_ECX(0), L7_EDX(0) {
   uint32_t eax, ebx, ecx, edx;
@@ -336,7 +338,7 @@ bool CpuFeatures::HYPERVISOR(void) {
 
 const char *CpuFeatures::Intrinsics(void) {
   return ""
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
          "Neon"
 #if defined(__ARM_FEATURE_CRC32)
          "+CRC"
diff --git a/src/ailego/math/distance_matrix_accum_fp32.i b/src/ailego/math/distance_matrix_accum_fp32.i
index c186492c6..913784076 100644
--- a/src/ailego/math/distance_matrix_accum_fp32.i
+++ b/src/ailego/math/distance_matrix_accum_fp32.i
@@ -30,7 +30,7 @@
                                             _mm512_castps_si512(b)))
 #endif  // __AVX512DQ__
 
-#if defined(__ARM_NEON) && !defined(__aarch64__)
+#if (defined(__ARM_NEON) || defined(_M_ARM64)) && !(defined(__aarch64__) || defined(_M_ARM64))
 static inline float32_t vaddvq_f32(float32x4_t v) {
   float32x2_t s = vadd_f32(vget_low_f32(v), vget_high_f32(v));
   return vget_lane_f32(vpadd_f32(s, s), 0);
@@ -42,7 +42,7 @@ static inline int32_t vaddvq_s32(int32x4_t v) {
 }
 #endif  //__ARM_NEON && !__aarch64__
 
-#if defined(__aarch64__)
+#if (defined(__aarch64__) || defined(_M_ARM64))
 #define ACCUM_FP32_2X1_NEON ACCUM_FP32_2X1_NEON_A64
 #else
 #define ACCUM_FP32_2X1_NEON ACCUM_FP32_2X1_NEON_A32
diff --git a/src/ailego/math/distance_matrix_fp32.i b/src/ailego/math/distance_matrix_fp32.i
index a9ddcd075..f4e6a16ab 100644
--- a/src/ailego/math/distance_matrix_fp32.i
+++ b/src/ailego/math/distance_matrix_fp32.i
@@ -26,7 +26,7 @@
   _mm256_insertf128_ps(_mm256_castps128_ps256(b), (a), 1)
 #endif  // __AVX__
 
-#if defined(__ARM_NEON) && !defined(__aarch64__)
+#if (defined(__ARM_NEON) || defined(_M_ARM64)) && !(defined(__aarch64__) || defined(_M_ARM64))
 #define vdupq_laneq_f32(a, b) vdupq_n_f32(vgetq_lane_f32(a, b))
 #endif  // __ARM_NEON && __aarch64__
 
diff --git a/src/ailego/math/euclidean_distance_matrix_fp16_dispatch.cc b/src/ailego/math/euclidean_distance_matrix_fp16_dispatch.cc
index fb145265e..e9e205201 100644
--- a/src/ailego/math/euclidean_distance_matrix_fp16_dispatch.cc
+++ b/src/ailego/math/euclidean_distance_matrix_fp16_dispatch.cc
@@ -18,7 +18,7 @@
 namespace zvec {
 namespace ailego {
 
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
 float SquaredEuclideanDistanceFp16NEON(const Float16 *lhs, const Float16 *rhs,
                                        size_t size);
 #endif
@@ -46,7 +46,7 @@ void SquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(const ValueType *m,
                                                             const ValueType *q,
                                                             size_t dim,
                                                             float *out) {
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
   *out = SquaredEuclideanDistanceFp16NEON(m, q, dim);
 #else
 #if defined(__AVX512FP16__)
diff --git a/src/ailego/math/euclidean_distance_matrix_fp16_neon.cc b/src/ailego/math/euclidean_distance_matrix_fp16_neon.cc
index 3d3bf8787..84b75eb8c 100644
--- a/src/ailego/math/euclidean_distance_matrix_fp16_neon.cc
+++ b/src/ailego/math/euclidean_distance_matrix_fp16_neon.cc
@@ -19,7 +19,7 @@
 namespace zvec {
 namespace ailego {
 
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
 float SquaredEuclideanDistanceFp16NEON(const Float16 *lhs, const Float16 *rhs,
                                        size_t size) {
   float score{0.0f};
diff --git a/src/ailego/math/euclidean_distance_matrix_fp32_dispatch.cc b/src/ailego/math/euclidean_distance_matrix_fp32_dispatch.cc
index cc3044389..f0650a08e 100644
--- a/src/ailego/math/euclidean_distance_matrix_fp32_dispatch.cc
+++ b/src/ailego/math/euclidean_distance_matrix_fp32_dispatch.cc
@@ -18,7 +18,7 @@
 namespace zvec {
 namespace ailego {
 
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
 void SquaredEuclideanDistanceFp32NEON(const float *lhs, const float *rhs,
                                       size_t size, float *out);
 #endif
@@ -49,7 +49,7 @@ void SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(const ValueType *m,
                                                           const ValueType *q,
                                                           size_t dim,
                                                           float *out) {
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
   SquaredEuclideanDistanceFp32NEON(m, q, dim, out);
 #else
 #if defined(__AVX512F__)
diff --git a/src/ailego/math/euclidean_distance_matrix_fp32_neon.cc b/src/ailego/math/euclidean_distance_matrix_fp32_neon.cc
index aa1694e21..14ce90767 100644
--- a/src/ailego/math/euclidean_distance_matrix_fp32_neon.cc
+++ b/src/ailego/math/euclidean_distance_matrix_fp32_neon.cc
@@ -19,7 +19,7 @@
 namespace zvec {
 namespace ailego {
 
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
 //! Squared Euclidean Distance
 void SquaredEuclideanDistanceFp32NEON(const float *lhs, const float *rhs,
                                       size_t size, float *out) {
diff --git a/src/ailego/math/inner_product_matrix_fp16_dispatch.cc b/src/ailego/math/inner_product_matrix_fp16_dispatch.cc
index 3c46bc32b..3b70d6f0a 100644
--- a/src/ailego/math/inner_product_matrix_fp16_dispatch.cc
+++ b/src/ailego/math/inner_product_matrix_fp16_dispatch.cc
@@ -21,7 +21,7 @@ namespace ailego {
 //--------------------------------------------------
 // Dense
 //--------------------------------------------------
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
 float InnerProductFp16NEON(const Float16 *lhs, const Float16 *rhs, size_t size);
 float MinusInnerProductFp16NEON(const Float16 *lhs, const Float16 *rhs,
                                 size_t size);
@@ -56,7 +56,7 @@ float MinusInnerProductFp16Scalar(const Float16 *lhs, const Float16 *rhs,
 void InnerProductMatrix<Float16, 1, 1>::Compute(const ValueType *m,
                                                 const ValueType *q, size_t dim,
                                                 float *out) {
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
   *out = InnerProductFp16NEON(m, q, dim);
 #else
 #if defined(__AVX512FP16__)
@@ -86,7 +86,7 @@ void InnerProductMatrix<Float16, 1, 1>::Compute(const ValueType *m,
 void MinusInnerProductMatrix<Float16, 1, 1>::Compute(const ValueType *m,
                                                      const ValueType *q,
                                                      size_t dim, float *out) {
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
   *out = MinusInnerProductFp16NEON(m, q, dim);
 #else
 #if defined(__AVX512FP16__)
diff --git a/src/ailego/math/inner_product_matrix_fp16_neon.cc b/src/ailego/math/inner_product_matrix_fp16_neon.cc
index 3d6c0d621..29a3dccea 100644
--- a/src/ailego/math/inner_product_matrix_fp16_neon.cc
+++ b/src/ailego/math/inner_product_matrix_fp16_neon.cc
@@ -19,7 +19,7 @@
 namespace zvec {
 namespace ailego {
 
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
 float InnerProductFp16NEON(const Float16 *lhs, const Float16 *rhs,
                            size_t size) {
   float score;
diff --git a/src/ailego/math/inner_product_matrix_fp32_dispatch.cc b/src/ailego/math/inner_product_matrix_fp32_dispatch.cc
index 8b289b6e6..32540296b 100644
--- a/src/ailego/math/inner_product_matrix_fp32_dispatch.cc
+++ b/src/ailego/math/inner_product_matrix_fp32_dispatch.cc
@@ -20,7 +20,7 @@ namespace ailego {
 //--------------------------------------------------
 // Dense
 //--------------------------------------------------
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
 float InnerProductFp32NEON(const float *lhs, const float *rhs, size_t size);
 float MinusInnerProductFp32NEON(const float *lhs, const float *rhs,
                                 size_t size);
@@ -49,7 +49,7 @@ float MinusInnerProductFp32Scalar(const float *lhs, const float *rhs,
 //! Compute the distance between matrix and query (FP32, M=1, N=1)
 void InnerProductMatrix<float, 1, 1>::Compute(const float *m, const float *q,
                                               size_t dim, float *out) {
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
   *out = InnerProductFp32NEON(m, q, dim);
 #else
 #if defined(__AVX512F__)
@@ -80,7 +80,7 @@ void InnerProductMatrix<float, 1, 1>::Compute(const float *m, const float *q,
 void MinusInnerProductMatrix<float, 1, 1>::Compute(const float *m,
                                                    const float *q, size_t dim,
                                                    float *out) {
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
   *out = MinusInnerProductFp32NEON(m, q, dim);
 #else
 #if defined(__AVX512F__)
diff --git a/src/ailego/math/inner_product_matrix_fp32_neon.cc b/src/ailego/math/inner_product_matrix_fp32_neon.cc
index c457b3ea2..e21fd3abf 100644
--- a/src/ailego/math/inner_product_matrix_fp32_neon.cc
+++ b/src/ailego/math/inner_product_matrix_fp32_neon.cc
@@ -22,7 +22,7 @@ namespace ailego {
 //--------------------------------------------------
 // Dense
 //--------------------------------------------------
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
 float InnerProductFp32NEON(const float *lhs, const float *rhs, size_t size) {
   const float *last = lhs + size;
   const float *last_aligned = lhs + ((size >> 3) << 3);
diff --git a/src/ailego/math/mips_euclidean_distance_matrix_fp16_dispatch.cc b/src/ailego/math/mips_euclidean_distance_matrix_fp16_dispatch.cc
index 8e40563cf..ee80f61ef 100644
--- a/src/ailego/math/mips_euclidean_distance_matrix_fp16_dispatch.cc
+++ b/src/ailego/math/mips_euclidean_distance_matrix_fp16_dispatch.cc
@@ -18,7 +18,7 @@
 namespace zvec {
 namespace ailego {
 
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
 float MipsEuclideanDistanceRepeatedQuadraticInjectionFp16NEON(
     const Float16 *lhs, const Float16 *rhs, size_t size, size_t m, float e2);
 float MipsEuclideanDistanceSphericalInjectionFp16NEON(const Float16 *lhs,
@@ -51,7 +51,7 @@ float MipsEuclideanDistanceSphericalInjectionFp16Scalar(
 //! Compute the distance between matrix and query by SphericalInjection
 void MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(
     const ValueType *p, const ValueType *q, size_t dim, float e2, float *out) {
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
   *out = MipsEuclideanDistanceSphericalInjectionFp16NEON(p, q, dim, e2);
 #else
 #if defined(__AVX512F__)
@@ -75,7 +75,7 @@ void MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(
 void MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(
     const ValueType *p, const ValueType *q, size_t dim, size_t m, float e2,
     float *out) {
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
   *out =
       MipsEuclideanDistanceRepeatedQuadraticInjectionFp16NEON(p, q, dim, m, e2);
 #else
diff --git a/src/ailego/math/mips_euclidean_distance_matrix_fp16_neon.cc b/src/ailego/math/mips_euclidean_distance_matrix_fp16_neon.cc
index b4f4c970d..3d6628e76 100644
--- a/src/ailego/math/mips_euclidean_distance_matrix_fp16_neon.cc
+++ b/src/ailego/math/mips_euclidean_distance_matrix_fp16_neon.cc
@@ -19,7 +19,8 @@
 namespace zvec {
 namespace ailego {
 
-#if defined(__ARM_NEON) && defined(__aarch64__)
+#if (defined(__ARM_NEON) || defined(_M_ARM64)) && \
+    (defined(__aarch64__) || defined(_M_ARM64))
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
 //! Compute the Inner Product between p and q, and each Squared L2-Norm value
 float InnerProductAndSquaredNormFp16NEON(const Float16 *lhs, const Float16 *rhs,
diff --git a/src/ailego/math/mips_euclidean_distance_matrix_fp32_dispatch.cc b/src/ailego/math/mips_euclidean_distance_matrix_fp32_dispatch.cc
index f48626a3f..37c8a1daf 100644
--- a/src/ailego/math/mips_euclidean_distance_matrix_fp32_dispatch.cc
+++ b/src/ailego/math/mips_euclidean_distance_matrix_fp32_dispatch.cc
@@ -18,7 +18,7 @@
 namespace zvec {
 namespace ailego {
 
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
 float InnerProductAndSquaredNormFp32NEON(const float *lhs, const float *rhs,
                                          size_t size, float *sql, float *sqr);
 #endif
@@ -98,7 +98,7 @@ void MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(
 void MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(
     const ValueType *p, const ValueType *q, size_t dim, size_t m, float e2,
     float *out) {
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
   float u2{0.0f};
   float v2{0.0f};
   float sum = InnerProductAndSquaredNormFp32NEON(p, q, dim, &u2, &v2);
diff --git a/src/ailego/math/mips_euclidean_distance_matrix_fp32_neon.cc b/src/ailego/math/mips_euclidean_distance_matrix_fp32_neon.cc
index 6491f2260..e5bff681e 100644
--- a/src/ailego/math/mips_euclidean_distance_matrix_fp32_neon.cc
+++ b/src/ailego/math/mips_euclidean_distance_matrix_fp32_neon.cc
@@ -19,7 +19,7 @@
 namespace zvec {
 namespace ailego {
 
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
 //! Compute the Inner Product between p and q, and each Squared L2-Norm value
 float InnerProductAndSquaredNormFp32NEON(const float *lhs, const float *rhs,
                                          size_t size, float *sql, float *sqr) {
diff --git a/src/ailego/math/norm1_matrix.h b/src/ailego/math/norm1_matrix.h
index 7e8d9cbc8..3263f4c22 100644
--- a/src/ailego/math/norm1_matrix.h
+++ b/src/ailego/math/norm1_matrix.h
@@ -116,7 +116,8 @@ struct Norm1Matrix<
   }
 };
 
-#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))
+#if defined(__SSE__) || ((defined(__ARM_NEON) || defined(_M_ARM64)) && \
+                         (defined(__aarch64__) || defined(_M_ARM64)))
 /*! L1-Norm Matrix (FP32, M=1)
  */
 template <>
@@ -129,8 +130,9 @@ struct Norm1Matrix<float, 1> {
 };
 #endif  // __SSE__ || (__ARM_NEON && __aarch64__)
 
-#if (defined(__F16C__) && defined(__AVX__)) || \
-    (defined(__ARM_NEON) && defined(__aarch64__))
+#if (defined(__F16C__) && defined(__AVX__)) ||     \
+    ((defined(__ARM_NEON) || defined(_M_ARM64)) && \
+     (defined(__aarch64__) || defined(_M_ARM64)))
 /*! L1-Norm Matrix (FP16, M=1)
  */
 template <>
diff --git a/src/ailego/math/norm1_matrix_fp16.cc b/src/ailego/math/norm1_matrix_fp16.cc
index e75b3e0a8..1060196ad 100644
--- a/src/ailego/math/norm1_matrix_fp16.cc
+++ b/src/ailego/math/norm1_matrix_fp16.cc
@@ -67,12 +67,13 @@ static const __m512 ABS_MASK_FP32_AVX512 =
 //! Calculate sum of absolute (NEON)
 #define SA_FP16_NEON(v_m, v_sum) v_sum = vaddq_f16(vabsq_f16(v_m), v_sum);
 
-#if (defined(__F16C__) && defined(__AVX__)) || \
-    (defined(__ARM_NEON) && defined(__aarch64__))
+#if (defined(__F16C__) && defined(__AVX__)) ||     \
+    ((defined(__ARM_NEON) || defined(_M_ARM64)) && \
+     (defined(__aarch64__) || defined(_M_ARM64)))
 //! Compute the L1-norm of vectors (FP16, M=1)
 void Norm1Matrix<Float16, 1>::Compute(const ValueType *m, size_t dim,
                                       float *out) {
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
   NORM_FP16_1_NEON(m, dim, out, )
 #else
 #if defined(__AVX512F__)
diff --git a/src/ailego/math/norm1_matrix_fp32.cc b/src/ailego/math/norm1_matrix_fp32.cc
index 2e7279118..b4194b2f7 100644
--- a/src/ailego/math/norm1_matrix_fp32.cc
+++ b/src/ailego/math/norm1_matrix_fp32.cc
@@ -56,11 +56,12 @@ namespace ailego {
 //! Calculate sum of absolute (NEON)
 #define SA_FP32_NEON(v_m, v_sum) v_sum = vaddq_f32(vabsq_f32(v_m), v_sum);
 
-#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))
+#if defined(__SSE__) || ((defined(__ARM_NEON) || defined(_M_ARM64)) && \
+                         (defined(__aarch64__) || defined(_M_ARM64)))
 //! Compute the L1-norm of vectors (FP32, M=1)
 void Norm1Matrix<float, 1>::Compute(const ValueType *m, size_t dim,
                                     float *out) {
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
   NORM_FP32_1_NEON(m, dim, out, )
 #else
 #if defined(__AVX512F__)
diff --git a/src/ailego/math/norm2_matrix.h b/src/ailego/math/norm2_matrix.h
index 3c905147d..530653382 100644
--- a/src/ailego/math/norm2_matrix.h
+++ b/src/ailego/math/norm2_matrix.h
@@ -371,7 +371,8 @@ struct SquaredNorm2Matrix<uint8_t, M, typename std::enable_if<M >= 2>::type> {
   }
 };
 
-#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))
+#if defined(__SSE__) || ((defined(__ARM_NEON) || defined(_M_ARM64)) && \
+                         (defined(__aarch64__) || defined(_M_ARM64)))
 /*! L2-Norm Matrix (FP32, M=1)
  */
 template <>
@@ -395,8 +396,9 @@ struct SquaredNorm2Matrix<float, 1> {
 };
 #endif  // __SSE__ || (__ARM_NEON && __aarch64__)
 
-#if (defined(__F16C__) && defined(__AVX__)) || \
-    (defined(__ARM_NEON) && defined(__aarch64__))
+#if (defined(__F16C__) && defined(__AVX__)) ||     \
+    ((defined(__ARM_NEON) || defined(_M_ARM64)) && \
+     (defined(__aarch64__) || defined(_M_ARM64)))
 /*! L2-Norm Matrix (FP16, M=1)
  */
 template <>
diff --git a/src/ailego/math/norm2_matrix_fp16.cc b/src/ailego/math/norm2_matrix_fp16.cc
index 6bb8dd06c..4a617fdd3 100644
--- a/src/ailego/math/norm2_matrix_fp16.cc
+++ b/src/ailego/math/norm2_matrix_fp16.cc
@@ -52,12 +52,13 @@ namespace ailego {
 //! Calculate sum of squared (NEON)
 #define SS_FP16_NEON(v_m, v_sum) v_sum = vfmaq_f16(v_sum, v_m, v_m);
 
-#if (defined(__F16C__) && defined(__AVX__)) || \
-    (defined(__ARM_NEON) && defined(__aarch64__))
+#if (defined(__F16C__) && defined(__AVX__)) ||     \
+    ((defined(__ARM_NEON) || defined(_M_ARM64)) && \
+     (defined(__aarch64__) || defined(_M_ARM64)))
 //! Compute the L2-norm of vectors (FP16, M=1)
 void Norm2Matrix<Float16, 1>::Compute(const ValueType *m, size_t dim,
                                       float *out) {
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
   NORM_FP16_1_NEON(m, dim, out, std::sqrt)
 #else
 #if defined(__AVX512F__)
@@ -73,7 +74,7 @@ void Norm2Matrix<Float16, 1>::Compute(const ValueType *m, size_t dim,
 //! Compute the L2-norm of vectors (FP16, M=1)
 void SquaredNorm2Matrix<Float16, 1>::Compute(const ValueType *m, size_t dim,
                                              float *out) {
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
   NORM_FP16_1_NEON(m, dim, out, )
 #else
 #if defined(__AVX512F__)
diff --git a/src/ailego/math/norm2_matrix_fp32.cc b/src/ailego/math/norm2_matrix_fp32.cc
index 8cc76c1f5..b1c1ad399 100644
--- a/src/ailego/math/norm2_matrix_fp32.cc
+++ b/src/ailego/math/norm2_matrix_fp32.cc
@@ -43,11 +43,12 @@ namespace ailego {
 //! Calculate sum of squared (NEON)
 #define SS_FP32_NEON(v_m, v_sum) v_sum = vfmaq_f32(v_sum, v_m, v_m);
 
-#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))
+#if defined(__SSE__) || ((defined(__ARM_NEON) || defined(_M_ARM64)) && \
+                         (defined(__aarch64__) || defined(_M_ARM64)))
 //! Compute the L2-norm of vectors (FP32, M=1)
 void Norm2Matrix<float, 1>::Compute(const ValueType *m, size_t dim,
                                     float *out) {
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
   NORM_FP32_1_NEON(m, dim, out, std::sqrt)
 #else
 #if defined(__AVX512F__)
@@ -69,7 +70,7 @@ void Norm2Matrix<float, 1>::Compute(const ValueType *m, size_t dim,
 //! Compute the squared L2-norm of vectors (FP32, M=1)
 void SquaredNorm2Matrix<float, 1>::Compute(const ValueType *m, size_t dim,
                                            float *out) {
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
   NORM_FP32_1_NEON(m, dim, out, )
 #else
 #if defined(__AVX512F__)
diff --git a/src/ailego/math/normalizer.cc b/src/ailego/math/normalizer.cc
index a31a9f350..0b33929dd 100644
--- a/src/ailego/math/normalizer.cc
+++ b/src/ailego/math/normalizer.cc
@@ -17,7 +17,8 @@
 namespace zvec {
 namespace ailego {
 
-#if (defined(__ARM_NEON) && defined(__aarch64__))
+#if ((defined(__ARM_NEON) || defined(_M_ARM64)) && \
+     (defined(__aarch64__) || defined(_M_ARM64)))
 static inline void NormalizeNEON(float *arr, size_t dim, float norm) {
   float *last = arr + dim;
   float *last_aligned = arr + ((dim >> 3) << 3);
@@ -392,10 +393,11 @@ static inline void NormalizeSSE(float *arr, size_t dim, float norm) {
 }
 #endif  // __SSE__
 
-#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))
+#if defined(__SSE__) || ((defined(__ARM_NEON) || defined(_M_ARM64)) && \
+                         (defined(__aarch64__) || defined(_M_ARM64)))
 //! Compute the norm of vector
 void Normalizer<float>::Compute(ValueType *arr, size_t dim, float norm) {
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
   NormalizeNEON(arr, dim, norm);
 #else
 #if defined(__AVX512F__)
@@ -415,11 +417,12 @@ void Normalizer<float>::Compute(ValueType *arr, size_t dim, float norm) {
 }
 #endif  // __SSE__ || (__ARM_NEON && __aarch64__)
 
-#if (defined(__F16C__) && defined(__AVX__)) || \
-    (defined(__ARM_NEON) && defined(__aarch64__))
+#if (defined(__F16C__) && defined(__AVX__)) ||     \
+    ((defined(__ARM_NEON) || defined(_M_ARM64)) && \
+     (defined(__aarch64__) || defined(_M_ARM64)))
 //! Compute the norm of vector
 void Normalizer<Float16>::Compute(ValueType *arr, size_t dim, float norm) {
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
   NormalizeNEON(reinterpret_cast<float16_t *>(arr), dim, norm);
 #else
 #if defined(__AVX512F__)
diff --git a/src/ailego/math/normalizer.h b/src/ailego/math/normalizer.h
index 2c191b0e7..52300284a 100644
--- a/src/ailego/math/normalizer.h
+++ b/src/ailego/math/normalizer.h
@@ -51,7 +51,8 @@ struct Normalizer {
   }
 };
 
-#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))
+#if defined(__SSE__) || ((defined(__ARM_NEON) || defined(_M_ARM64)) && \
+                         (defined(__aarch64__) || defined(_M_ARM64)))
 /*! Normalizer (FP32)
  */
 template <>
@@ -80,8 +81,9 @@ struct Normalizer<float> {
 };
 #endif  // __SSE__ || (__ARM_NEON && __aarch64__)
 
-#if (defined(__F16C__) && defined(__AVX__)) || \
-    (defined(__ARM_NEON) && defined(__aarch64__))
+#if (defined(__F16C__) && defined(__AVX__)) ||     \
+    ((defined(__ARM_NEON) || defined(_M_ARM64)) && \
+     (defined(__aarch64__) || defined(_M_ARM64)))
 /*! Normalizer (FP16)
  */
 template <>
diff --git a/src/ailego/utility/bitset_helper.cc b/src/ailego/utility/bitset_helper.cc
index 19be34847..47d5c6238 100644
--- a/src/ailego/utility/bitset_helper.cc
+++ b/src/ailego/utility/bitset_helper.cc
@@ -23,7 +23,7 @@
 #define bitset_popcount64 _mm_popcnt_u64
 #endif  // !__SSE4_2__
 
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
 static inline void bitset_and(uint32_t *lhs, const uint32_t *rhs, size_t size) {
   uint32_t *last = lhs + size;
   uint32_t *last_aligned = lhs + ((size >> 2) << 2);
@@ -1480,7 +1480,8 @@ static inline bool bitset_test_none(const uint32_t *lhs, size_t size) {
 #endif  // AILEGO_M64
 #endif  // __AVX2__
 
-#if (defined(__ARM_NEON) && defined(__aarch64__))
+#if ((defined(__ARM_NEON) || defined(_M_ARM64)) && \
+     (defined(__aarch64__) || defined(_M_ARM64)))
 static inline size_t bitset_cardinality(const uint32_t *lhs, size_t size) {
   const uint32_t *last = lhs + size;
   const uint32_t *last_aligned = lhs + ((size >> 2) << 2);
diff --git a/src/ailego/utility/float_helper.cc b/src/ailego/utility/float_helper.cc
index 6e33fedd7..a14c49e76 100644
--- a/src/ailego/utility/float_helper.cc
+++ b/src/ailego/utility/float_helper.cc
@@ -21,7 +21,10 @@
 // #define float32(x) _cvtsh_ss(x)
 // #endif  // __F16C__ && __AVX__
 
-#if defined(__aarch64__)
+// MSVC ARM64 lacks the GCC/Clang `__fp16` extension type, so keep this
+// path gated on `__aarch64__` (predefined only by GCC/Clang on AArch64).
+// MSVC ARM64 falls through to the F16C/scalar paths below.
+#if defined(__aarch64__) && !defined(_MSC_VER)
 static inline float float32(uint16_t val) {
   __fp16 *p = reinterpret_cast<__fp16 *>(&val);
   return *p;
diff --git a/src/ailego/version.i b/src/ailego/version.i
index c1b14be2e..f8e78d443 100644
--- a/src/ailego/version.i
+++ b/src/ailego/version.i
@@ -225,7 +225,7 @@
 #define AILEGO_VERSION_OPENMP ""
 #endif
 
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
 #define AILEGO_VERSION_SIMD "  Arm Neon Instruction Set\n"
 #elif defined(__AVX512FP16__)
 #define AILEGO_VERSION_SIMD "  AVX-512FP16 Instruction Set\n"
diff --git a/src/include/zvec/ailego/buffer/concurrentqueue.h b/src/include/zvec/ailego/buffer/concurrentqueue.h
index f7f3d77ed..1bff8b08e 100644
--- a/src/include/zvec/ailego/buffer/concurrentqueue.h
+++ b/src/include/zvec/ailego/buffer/concurrentqueue.h
@@ -129,8 +129,9 @@ static inline thread_id_t thread_id() {
 }
 }  // namespace details
 }  // namespace moodycamel
-#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \
-    (defined(__APPLE__) && TARGET_OS_IPHONE) || defined(__MVS__) ||  \
+#elif defined(__arm__) || defined(_M_ARM) ||                        \
+    (defined(__aarch64__) || defined(_M_ARM64)) ||                  \
+    (defined(__APPLE__) && TARGET_OS_IPHONE) || defined(__MVS__) || \
     defined(MOODYCAMEL_NO_THREAD_LOCAL)
 namespace moodycamel {
 namespace details {
@@ -293,13 +294,14 @@ inline thread_id_t thread_id() {
 // support thread_local either. Finally, iOS/ARM doesn't have support for it
 // either, and g++/ARM allows it to compile but it's unconfirmed to actually
 // work
-#if (!defined(_MSC_VER) || _MSC_VER >= 1900) &&                        \
-    (!defined(__MINGW32__) && !defined(__MINGW64__) ||                 \
-     !defined(__WINPTHREADS_VERSION)) &&                               \
-    (!defined(__GNUC__) || __GNUC__ > 4 ||                             \
-     (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) &&                        \
-    (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && \
-    !defined(_M_ARM) && !defined(__aarch64__) && !defined(__MVS__)
+#if (!defined(_MSC_VER) || _MSC_VER >= 1900) &&                         \
+    (!defined(__MINGW32__) && !defined(__MINGW64__) ||                  \
+     !defined(__WINPTHREADS_VERSION)) &&                                \
+    (!defined(__GNUC__) || __GNUC__ > 4 ||                              \
+     (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) &&                         \
+    (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) &&  \
+    !defined(_M_ARM) && !(defined(__aarch64__) || defined(_M_ARM64)) && \
+    !defined(__MVS__)
 // Assume `thread_local` is fully supported in all other C++11
 // compilers/platforms
 #define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED  // tentatively enabled for now;
diff --git a/src/include/zvec/ailego/internal/platform.h b/src/include/zvec/ailego/internal/platform.h
index ccd33971e..614c0a206 100644
--- a/src/include/zvec/ailego/internal/platform.h
+++ b/src/include/zvec/ailego/internal/platform.h
@@ -30,13 +30,16 @@
 
 #if defined(_MSC_VER)
 #include <intrin.h>
+#if defined(_M_ARM64)
+#include <arm_neon.h>
+#endif
 #else
 #include <strings.h>
 #include <unistd.h>
 #if defined(__x86_64__) || defined(__i386)
 #include <x86intrin.h>
 #endif
-#if defined(__ARM_NEON)
+#if (defined(__ARM_NEON) || defined(_M_ARM64))
 #include <arm_neon.h>
 #endif
 #if defined(__ARM_FEATURE_CRC32)
@@ -110,7 +113,8 @@ extern "C" {
 #endif
 
 #if defined(__GNUC__)
-#if defined(__x86_64__) || defined(__aarch64__) || defined(__ppc64__)
+#if defined(__x86_64__) || (defined(__aarch64__) || defined(_M_ARM64)) || \
+    defined(__ppc64__)
 #define AILEGO_M64
 #else
 #define AILEGO_M32
@@ -228,7 +232,7 @@ static inline int ailego_clz64(uint64_t x) {
 #define ailego_popcount ailego_popcount32
 #endif  // AILEGO_M64
 
-#if defined(__arm__) || defined(__aarch64__)
+#if defined(__arm__) || (defined(__aarch64__) || defined(_M_ARM64))
 // ARMv7 Architecture Reference Manual (for YIELD)
 // ARM Compiler toolchain Compiler Reference (for __yield() instrinsic)
 #if defined(__CC_ARM)
@@ -273,11 +277,12 @@ static inline int ailego_clz64(uint64_t x) {
 #define ailego_malloc(SIZE) ailego_aligned_malloc((SIZE), 32)
 #elif defined(__SSE__)
 #define ailego_malloc(SIZE) ailego_aligned_malloc((SIZE), 16)
-#elif defined(__ARM_NEON)
+#elif (defined(__ARM_NEON) || defined(_M_ARM64))
 #define ailego_malloc(SIZE) ailego_aligned_malloc((SIZE), 16)
 #endif
 #endif  // !ailego_malloc
-#if (defined(__SSE__) || defined(__ARM_NEON)) && !defined(ailego_free)
+#if (defined(__SSE__) || (defined(__ARM_NEON) || defined(_M_ARM64))) && \
+    !defined(ailego_free)
 #define ailego_free ailego_aligned_free
 #endif
 #endif  // !__SANITIZE_ADDRESS__
diff --git a/src/include/zvec/ailego/utility/float_helper.h b/src/include/zvec/ailego/utility/float_helper.h
index 5dc2fe69a..d32eb3404 100644
--- a/src/include/zvec/ailego/utility/float_helper.h
+++ b/src/include/zvec/ailego/utility/float_helper.h
@@ -52,7 +52,10 @@ struct FloatHelper {
   }
 };
 
-#if !defined(__aarch64__)
+// The `#else` branch below stores `Float16::value_` as `__fp16` — a GCC/Clang
+// extension type that MSVC does not provide even on ARM64. Keep the uint16_t
+// storage path for MSVC (including MSVC ARM64) so the wrapper compiles.
+#if !defined(__aarch64__) || defined(_MSC_VER)
 /*! Half-Precision Floating Point
  */
 class Float16 {
diff --git a/thirdparty/arrow/CMakeLists.txt b/thirdparty/arrow/CMakeLists.txt
index 0c4603b65..7b8b2a2b0 100644
--- a/thirdparty/arrow/CMakeLists.txt
+++ b/thirdparty/arrow/CMakeLists.txt
@@ -12,6 +12,10 @@ endif()
 if(MSVC)
         set(ARROW_WIN_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/arrow.windows.patch)
         apply_patch_once("arrow_windows_crt_fix" "${ARROW_SRC_DIR}" "${ARROW_WIN_PATCH}")
+        if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(ARM64|arm64|aarch64)$")
+                set(ARROW_WIN_ARM64_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/arrow.windows-arm64.patch)
+                apply_patch_once("arrow_windows_arm64_fix" "${ARROW_SRC_DIR}" "${ARROW_WIN_ARM64_PATCH}")
+        endif()
 endif()
 
 include(ExternalProject)
@@ -105,6 +109,14 @@ elseif (MSVC)
         -DARROW_USE_STATIC_CRT=${ZVEC_USE_STATIC_CRT}
         "-DCMAKE_MSVC_RUNTIME_LIBRARY=${_ARROW_MSVC_RUNTIME}"
     )
+    # Arrow 21.0's xsimd-13 does not provide make_sized_batch_t for MSVC ARM64,
+    # so disable SIMD on that target. x86/x64 MSVC keeps the default SSE4.2 path.
+    if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(ARM64|arm64|aarch64)$")
+        list(APPEND ARROW_EXTRA_CMAKE_ARGS
+            -DARROW_SIMD_LEVEL=NONE
+            -DARROW_RUNTIME_SIMD_LEVEL=NONE
+        )
+    endif()
         ExternalProject_Add(
                 ARROW.BUILD PREFIX arrow
                 SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/apache-arrow-21.0.0
diff --git a/thirdparty/arrow/arrow.windows-arm64.patch b/thirdparty/arrow/arrow.windows-arm64.patch
new file mode 100644
index 000000000..f815a0c49
--- /dev/null
+++ b/thirdparty/arrow/arrow.windows-arm64.patch
@@ -0,0 +1,35 @@
+diff --git a/cpp/src/arrow/vendored/pcg/pcg_uint128.hpp b/cpp/src/arrow/vendored/pcg/pcg_uint128.hpp
+index 0181e69e4e..349e3b6bfa 100644
+--- a/cpp/src/arrow/vendored/pcg/pcg_uint128.hpp
++++ b/cpp/src/arrow/vendored/pcg/pcg_uint128.hpp
+@@ -67,7 +67,8 @@
+         #define PCG_LITTLE_ENDIAN 1
+     #elif __BIG_ENDIAN__ || _BIG_ENDIAN
+         #define PCG_LITTLE_ENDIAN 0
+-    #elif __x86_64 || __x86_64__ || _M_X64 || __i386 || __i386__ || _M_IX86
++    #elif __x86_64 || __x86_64__ || _M_X64 || __i386 || __i386__ || _M_IX86 \
++          || _M_ARM64 || _M_ARM || __aarch64__ || __arm__
+         #define PCG_LITTLE_ENDIAN 1
+     #elif __powerpc__ || __POWERPC__ || __ppc__ || __PPC__ \
+           || __m68k__ || __mc68000__
+@@ -733,7 +734,7 @@ uint_x4<UInt,UIntX2> operator*(const uint_x4<UInt,UIntX2>& a,
+ }
+ 
+ #if PCG_64BIT_SPECIALIZATIONS
+-#if defined(_MSC_VER)
++#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_ARM)
+ #pragma intrinsic(_umul128)
+ #endif
+ 
+@@ -742,7 +743,10 @@ template <typename UInt32>
+ uint_x4<UInt32,uint64_t> operator*(const uint_x4<UInt32,uint64_t>& a,
+ 				   const uint_x4<UInt32,uint64_t>& b)
+ {
+-#if defined(_MSC_VER)
++#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM))
++    uint64_t lo = a.d.v01 * b.d.v01;
++    uint64_t hi = __umulh(a.d.v01, b.d.v01);
++#elif defined(_MSC_VER)
+     uint64_t hi;
+     uint64_t lo = _umul128(a.d.v01, b.d.v01, &hi);
+ #else