Skip to content

Commit b6755b6

Browse files
zaibie01meta-codesync[bot]
authored andcommitted
Add ARM architecture support for adsim (#522)
Summary: Pull Request resolved: #522 - Extend build options and instruction to support ARM beyond x86 - Correct package installation errors in Ubuntu environment - Fix failed cp operations that blocked build process - Upgrade FBGEMM version to 1.4 for consistency with fbgemm benchmark Pull Request resolved: #258 Test Plan: Tested on x86 Bergamo and aarch64 t11_GRC_ARM and the installation works for both. Reviewed By: excelle08 Differential Revision: D96225562 Pulled By: ahmadelyoussef fbshipit-source-id: a0e3135763d6b2959bd38d0a1dcb49ef54966040
1 parent 030c922 commit b6755b6

5 files changed

Lines changed: 64 additions & 25 deletions

File tree

packages/adsim/buildfiles/fbgemm/cmake.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ cp libfbgemm.so* "${ADSIM_STAGING_DIR}/lib/" 2>/dev/null || true
3939
# Copy headers from source
4040
cp -r ../include/fbgemm "${ADSIM_STAGING_DIR}/include/" 2>/dev/null || true
4141

42+
# Copy asmjit headers (new dependency in FBGEMM v1.4.0)
43+
cp -r ../external/asmjit/src/asmjit "${ADSIM_STAGING_DIR}/include/"
44+
4245
echo "[BUILD] FBGEMM files copied to staging directory"
4346

4447
# Try make install but don't fail if it errors

packages/adsim/install_fbgemm.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616
MINICONDA_PREFIX=${FBGEMM_STAGING_DIR}/miniconda
1717

1818
# Version of FBGEMM to install
19-
FBGEMM_VERSION=v1.2.0
19+
FBGEMM_VERSION=v1.4.0
2020

2121
# Version of PyTorch to install
22-
PYTORCH_VERSION=2.7.0
22+
PYTORCH_VERSION=2.8.0
2323

2424
MINICONDA_VERSION="5.1-0"
2525

packages/adsim/patches/treadmill.patch

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
diff -wbBdu -ruN '--exclude=.git' '--exclude=*.rej' '--exclude=*.orig' '--exclude=gen-cpp2' '--exclude=build' '--exclude=third_party' treadmill-src/build.sh treadmill/build.sh
22
--- treadmill-src/build.sh 1969-12-31 16:00:00.000000000 -0800
33
+++ treadmill/build.sh 2025-08-04 15:44:21.731476550 -0700
4-
@@ -0,0 +1,42 @@
4+
@@ -0,0 +1,47 @@
55
+#!/bin/bash
66
+
77
+# Exit on error
@@ -15,7 +15,12 @@ diff -wbBdu -ruN '--exclude=.git' '--exclude=*.rej' '--exclude=*.orig' '--exclud
1515
+INSTALL=false
1616
+
1717
+
18-
+sudo dnf install -y numactl-devel
18+
+if command -v dnf >/dev/null 2>&1; then
19+
+ sudo dnf install -y numactl-devel
20+
+elif command -v apt-get >/dev/null 2>&1; then
21+
+ sudo apt-get update
22+
+ sudo apt-get install -y libnuma-dev numactl
23+
+fi
1924
+# Create build directory
2025
+mkdir -p build
2126
+cd build
@@ -6977,7 +6982,7 @@ diff -wbBdu -ruN '--exclude=.git' '--exclude=*.rej' '--exclude=*.orig' '--exclud
69776982
diff -wbBdu -ruN '--exclude=.git' '--exclude=*.rej' '--exclude=*.orig' '--exclude=gen-cpp2' '--exclude=build' '--exclude=third_party' treadmill-src/src/Scheduler.cpp treadmill/src/Scheduler.cpp
69786983
--- treadmill-src/src/Scheduler.cpp 1969-12-31 16:00:00.000000000 -0800
69796984
+++ treadmill/src/Scheduler.cpp 2025-08-04 15:44:21.785297704 -0700
6980-
@@ -0,0 +1,107 @@
6985+
@@ -0,0 +1,113 @@
69816986
+/*
69826987
+ * Copyright (c) 2014, Facebook, Inc.
69836988
+ * All rights reserved.
@@ -7039,8 +7044,14 @@ diff -wbBdu -ruN '--exclude=.git' '--exclude=*.rej' '--exclude=*.orig' '--exclud
70397044
+ to avoid memory order violation, which greatly improves its performance.
70407045
+ http://siyobik.info.gf/main/reference/instruction/PAUSE */
70417046
+ for (auto start = nowNs(); nowNs() - start < ns;) {
7042-
+ asm volatile("pause");
7043-
+ }
7047+
+#if defined(__x86_64__) || defined(__i386__)
7048+
+ asm volatile("pause");
7049+
+#elif defined(__aarch64__) || defined(__arm64__)
7050+
+ asm volatile("yield" ::: "memory");
7051+
+#else
7052+
+ asm volatile("" ::: "memory");
7053+
+#endif
7054+
+}
70447055
+}
70457056
+
70467057
+/**

packages/adsim/src/cpp2/server/CMakeLists.txt

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,14 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
99
endif()
1010

1111
target_compile_definitions(folly_memcpy_obj PRIVATE FOLLY_MEMCPY_IS_MEMCPY)
12-
target_compile_options(folly_memcpy_obj PRIVATE
12+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64")
13+
target_compile_options(folly_memcpy_obj PRIVATE
1314
-mavx2 -march=haswell)
15+
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
16+
target_compile_options(folly_memcpy_obj PRIVATE
17+
-march=armv8.5-a+sve2
18+
-mcpu=native)
19+
endif()
1420

1521
# Data objects library
1622
add_library(data_objects DataObjects.cpp)
@@ -61,13 +67,18 @@ target_link_libraries(adsim_server
6167
Atomic::Atomic
6268
${FMT_LIBRARIES}
6369
${JEMALLOC_LIB}
64-
$<LINK_LIBRARY:WHOLE_ARCHIVE,folly_memcpy_obj>
6570
)
6671

72+
# Link folly_memcpy_obj only on x86 — on ARM the custom memmove causes
73+
# R_AARCH64_CALL26 relocation overflows and is a no-op anyway
74+
# (FOLLY_MEMCPY_IS_MEMCPY is defined).
75+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64")
76+
target_link_libraries(adsim_server
77+
PUBLIC $<LINK_LIBRARY:WHOLE_ARCHIVE,folly_memcpy_obj>)
78+
target_compile_options(adsim_server PRIVATE
79+
-fno-builtin-mempcpy
80+
-fno-builtin-memmove)
81+
endif()
6782

6883
# Linker and compiler options
6984
target_link_options(adsim_server PRIVATE "-Wl,--export-dynamic")
70-
target_compile_options(adsim_server PRIVATE
71-
-fno-builtin-mempcpy
72-
-fno-builtin-memmove
73-
)

packages/adsim/src/cpp2/server/dwarfs/CMakeLists.txt

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,19 @@ add_library(gemm GEMM.cc GEMM.h
2020
${FBGEMM_SRC_DIR}/test/QuantizationHelpers.cc
2121
)
2222

23-
target_compile_options(gemm PRIVATE
24-
${COROUTINES_FLAG}
25-
-m64
26-
-mavx2
27-
-mfma
28-
-masm=intel)
23+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64")
24+
target_compile_options(gemm PRIVATE
25+
${COROUTINES_FLAG}
26+
-m64
27+
-mavx2
28+
-mfma
29+
-masm=intel)
30+
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
31+
target_compile_options(gemm PRIVATE
32+
${COROUTINES_FLAG}
33+
-march=armv8.5-a+sve2
34+
-mcpu=native)
35+
endif()
2936
target_link_directories(gemm
3037
PUBLIC
3138
${ADSIM_STAGING_DIR}/include
@@ -40,12 +47,19 @@ add_dependencies(gemm fbgemm)
4047

4148
add_library(embedding Embedding.cc Embedding.h)
4249

43-
target_compile_options(embedding PRIVATE
44-
${COROUTINES_FLAG}
45-
-m64
46-
-mavx2
47-
-mfma
48-
-masm=intel)
50+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64")
51+
target_compile_options(gemm PRIVATE
52+
${COROUTINES_FLAG}
53+
-m64
54+
-mavx2
55+
-mfma
56+
-masm=intel)
57+
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
58+
target_compile_options(gemm PRIVATE
59+
${COROUTINES_FLAG}
60+
-march=armv8.5-a+sve2
61+
-mcpu=native)
62+
endif()
4963
target_link_directories(embedding
5064
PUBLIC
5165
${ADSIM_STAGING_DIR}/include

0 commit comments

Comments
 (0)