Skip to content

Commit db07496

Browse files
committed
Link statically
Pull Request resolved: #2085 Link everything into monarch statically, or dlopen it. This makes the rdma related libraries and libc++ linked statically for increased portability. This means monarch built with rdma support and with a different version of libc++ than the current system can still be used. By building rdma-core from source, we can also make sure Dennis' 64-bit patch is applied. Total size of our so is 63MB now with no uncommon deps: ``` (monarch) [zdevito@devgpu014 /data/users/zdevito/fbsource/fbcode/monarch]$ ldd ./python/monarch/_rust_bindings.so linux-vdso.so.1 (0x00007f347a684000) libpython3.11.so.1.0 => /home/zdevito/local/miniconda3/envs/monarch/lib/libpython3.11.so.1.0 (0x00007f3477200000) libgcc_s.so.1 => /home/zdevito/local/miniconda3/envs/monarch/lib/libgcc_s.so.1 (0x00007f347a664000) libm.so.6 => /lib64/libm.so.6 (0x00007f3477125000) libc.so.6 => /lib64/libc.so.6 (0x00007f3476e00000) /lib64/ld-linux-x86-64.so.2 (0x00007f347a686000) libpthread.so.0 => /lib64/libpthread.so.0 (0x00007f347a64a000) libdl.so.2 => /lib64/libdl.so.2 (0x00007f347a645000) libutil.so.1 => /lib64/libutil.so.1 (0x00007f347a640000) (monarch) [zdevito@devgpu014 /data/users/zdevito/fbsource/fbcode/monarch]$ du -h ./python/monarch/_rust_bindings.so 63M ./python/monarch/_rust_bindings.so ``` ghstack-source-id: 328253183 Differential Revision: [D88540873](https://our.internmc.facebook.com/intern/diff/D88540873/)
1 parent f58e5eb commit db07496

File tree

10 files changed

+428
-32
lines changed

10 files changed

+428
-32
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,3 @@ docs/_build/**
3333
docs/build/**
3434
docs/**/generated/**
3535
*/sg_execution_times.rst
36-
nccl/**

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ members = [
1717
"monarch_rdma",
1818
"monarch_tensor_worker",
1919
"monarch_types",
20+
"monarch_cpp_static_libs",
2021
"nccl-sys",
2122
"ndslice",
2223
"preempt_rwlock",

build_utils/src/lib.rs

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,100 @@ pub fn print_cuda_lib_error_help() {
276276
eprintln!("Or: export CUDA_LIB_DIR=/usr/lib64");
277277
}
278278

279+
/// Emit cargo directives to statically link libstdc++
280+
///
281+
/// This finds the GCC library path containing libstdc++.a and emits the
282+
/// appropriate cargo directives to link it statically. This avoids runtime
283+
/// dependency on system libstdc++.so which can cause GLIBCXX version conflicts.
284+
///
285+
/// Uses the `cc` crate to detect the C++ compiler, ensuring we use the same
286+
/// compiler that `cc::Build` and `cxx_build` would use.
287+
pub fn link_libstdcpp_static() {
288+
// Use cc crate to get the C++ compiler, same as cc::Build and cxx_build use
289+
let compiler = cc::Build::new().cpp(true).get_compiler();
290+
let gcc_lib_path = std::process::Command::new(compiler.path())
291+
.args(["-print-file-name=libstdc++.a"])
292+
.output()
293+
.ok()
294+
.and_then(|output| {
295+
if output.status.success() {
296+
String::from_utf8(output.stdout).ok().and_then(|s| {
297+
let path = PathBuf::from(s.trim());
298+
path.parent().map(|p| p.to_path_buf())
299+
})
300+
} else {
301+
None
302+
}
303+
});
304+
if let Some(gcc_lib_path) = gcc_lib_path {
305+
println!("cargo:rustc-link-search=native={}", gcc_lib_path.display());
306+
}
307+
println!("cargo:rustc-link-lib=static=stdc++");
308+
}
309+
310+
/// Configuration for rdma-core static libraries from monarch_cpp_static_libs.
311+
///
312+
/// Use `CppStaticLibsConfig::from_env()` to get the paths, then use the include
313+
/// paths for bindgen/cc, and call `emit_link_directives()` to link.
314+
pub struct CppStaticLibsConfig {
315+
pub rdma_include: String,
316+
pub rdma_lib_dir: String,
317+
pub rdma_util_dir: String,
318+
}
319+
320+
impl CppStaticLibsConfig {
321+
/// Load configuration from DEP_* environment variables set by monarch_cpp_static_libs.
322+
///
323+
/// The monarch_cpp_static_libs crate must be listed as a build-dependency.
324+
pub fn from_env() -> Self {
325+
Self {
326+
rdma_include: std::env::var("DEP_MONARCH_CPP_STATIC_LIBS_RDMA_INCLUDE")
327+
.expect("DEP_MONARCH_CPP_STATIC_LIBS_RDMA_INCLUDE not set - add monarch_cpp_static_libs as build-dependency"),
328+
rdma_lib_dir: std::env::var("DEP_MONARCH_CPP_STATIC_LIBS_RDMA_LIB_DIR")
329+
.expect("DEP_MONARCH_CPP_STATIC_LIBS_RDMA_LIB_DIR not set - add monarch_cpp_static_libs as build-dependency"),
330+
rdma_util_dir: std::env::var("DEP_MONARCH_CPP_STATIC_LIBS_RDMA_UTIL_DIR")
331+
.expect("DEP_MONARCH_CPP_STATIC_LIBS_RDMA_UTIL_DIR not set - add monarch_cpp_static_libs as build-dependency"),
332+
}
333+
}
334+
335+
/// Emit all cargo link directives for static linking of rdma-core.
336+
///
337+
/// This emits search paths and link-lib directives for:
338+
/// - libmlx5.a
339+
/// - libibverbs.a
340+
/// - librdma_util.a
341+
pub fn emit_link_directives(&self) {
342+
// Emit link search paths
343+
println!("cargo::rustc-link-search=native={}", self.rdma_lib_dir);
344+
println!("cargo::rustc-link-search=native={}", self.rdma_util_dir);
345+
346+
// Use whole-archive for rdma-core static libraries
347+
println!("cargo::rustc-link-arg=-Wl,--whole-archive");
348+
println!("cargo::rustc-link-lib=static=mlx5");
349+
println!("cargo::rustc-link-lib=static=ibverbs");
350+
println!("cargo::rustc-link-arg=-Wl,--no-whole-archive");
351+
352+
// rdma_util helper library
353+
println!("cargo::rustc-link-lib=static=rdma_util");
354+
}
355+
}
356+
357+
/// Convenience function to set up rdma-core static linking.
358+
///
359+
/// Returns the config with include paths, and emits all link directives.
360+
/// The monarch_cpp_static_libs crate must be listed as a build-dependency.
361+
///
362+
/// Example:
363+
/// ```ignore
364+
/// let config = build_utils::setup_cpp_static_libs();
365+
/// // Use config.rdma_include for bindgen/cc
366+
/// ```
367+
pub fn setup_cpp_static_libs() -> CppStaticLibsConfig {
368+
let config = CppStaticLibsConfig::from_env();
369+
config.emit_link_directives();
370+
config
371+
}
372+
279373
#[cfg(test)]
280374
mod tests {
281375
use super::*;

monarch_cpp_static_libs/build.rs

Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,277 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
//! Static rdma-core build script
10+
//!
11+
//! This build script:
12+
//! 1. Obtains rdma-core source (from MONARCH_RDMA_CORE_SRC or by cloning)
13+
//! 2. Builds rdma-core with static libraries (libibverbs.a, libmlx5.a)
14+
//! 3. Emits link directives for downstream crates
15+
16+
use std::path::Path;
17+
use std::path::PathBuf;
18+
use std::process::Command;
19+
20+
// Repository configuration
21+
const RDMA_CORE_REPO: &str = "https://github.com/linux-rdma/rdma-core";
22+
const RDMA_CORE_TAG: &str = "224154663a9ad5b1ad5629fb76a0c40c675fb936";
23+
24+
#[cfg(target_os = "macos")]
25+
fn main() {}
26+
27+
#[cfg(not(target_os = "macos"))]
28+
fn main() {
29+
let out_dir = PathBuf::from(std::env::var("OUT_DIR").expect("OUT_DIR not set"));
30+
let vendor_dir = out_dir.join("vendor");
31+
std::fs::create_dir_all(&vendor_dir).expect("Failed to create vendor directory");
32+
33+
let rdma_core_dir = vendor_dir.join("rdma-core");
34+
35+
// Get or clone rdma-core source
36+
get_or_clone_rdma_core(&rdma_core_dir);
37+
38+
// Build rdma-core
39+
let rdma_build_dir = build_rdma_core(&rdma_core_dir);
40+
41+
// Emit link directives
42+
emit_link_directives(&rdma_build_dir);
43+
}
44+
45+
/// Get or clone rdma-core source.
46+
///
47+
/// If MONARCH_RDMA_CORE_SRC is set, copies from that directory.
48+
/// Otherwise, clones from GitHub at the specified tag.
49+
fn get_or_clone_rdma_core(target_dir: &Path) {
50+
// Skip if already exists
51+
if target_dir.exists() {
52+
println!(
53+
"cargo:warning=rdma-core source already exists at {}",
54+
target_dir.display()
55+
);
56+
return;
57+
}
58+
59+
// Check for MONARCH_RDMA_CORE_SRC environment variable
60+
println!("cargo:rerun-if-env-changed=MONARCH_RDMA_CORE_SRC");
61+
if let Ok(src_path) = std::env::var("MONARCH_RDMA_CORE_SRC") {
62+
let src_dir = PathBuf::from(src_path);
63+
println!(
64+
"cargo:warning=Using rdma-core source from MONARCH_RDMA_CORE_SRC: {}",
65+
src_dir.display()
66+
);
67+
copy_dir(&src_dir, target_dir);
68+
} else {
69+
println!(
70+
"cargo:warning=MONARCH_RDMA_CORE_SRC not set, cloning from {} (commit {})",
71+
RDMA_CORE_REPO, RDMA_CORE_TAG
72+
);
73+
clone_rdma_core(target_dir);
74+
}
75+
}
76+
77+
/// Clone rdma-core from GitHub at the specified commit.
78+
fn clone_rdma_core(target_dir: &Path) {
79+
// First, clone the repository without checking out
80+
let status = Command::new("git")
81+
.args([
82+
"clone",
83+
"--no-checkout",
84+
RDMA_CORE_REPO,
85+
target_dir.to_str().unwrap(),
86+
])
87+
.status()
88+
.expect("Failed to execute git clone");
89+
90+
if !status.success() {
91+
panic!("Failed to clone rdma-core from {}", RDMA_CORE_REPO);
92+
}
93+
94+
// Then checkout the specific commit
95+
let status = Command::new("git")
96+
.args(["checkout", RDMA_CORE_TAG])
97+
.current_dir(target_dir)
98+
.status()
99+
.expect("Failed to execute git checkout");
100+
101+
if !status.success() {
102+
panic!("Failed to checkout rdma-core commit {}", RDMA_CORE_TAG);
103+
}
104+
105+
println!(
106+
"cargo:warning=Successfully cloned rdma-core at commit {}",
107+
RDMA_CORE_TAG
108+
);
109+
}
110+
111+
fn copy_dir(src_dir: &Path, target_dir: &Path) {
112+
if target_dir.exists() {
113+
println!(
114+
"cargo:warning=Directory already exists at {}",
115+
target_dir.display()
116+
);
117+
return;
118+
}
119+
120+
println!(
121+
"cargo:warning=Copying {} to {}",
122+
src_dir.display(),
123+
target_dir.display()
124+
);
125+
126+
let status = Command::new("cp")
127+
.args([
128+
"-r",
129+
src_dir.to_str().unwrap(),
130+
target_dir.to_str().unwrap(),
131+
])
132+
.status()
133+
.expect("Failed to execute cp");
134+
135+
if !status.success() {
136+
panic!(
137+
"Failed to copy from {} to {}",
138+
src_dir.display(),
139+
target_dir.display()
140+
);
141+
}
142+
}
143+
144+
fn build_rdma_core(rdma_core_dir: &Path) -> PathBuf {
145+
let build_dir = rdma_core_dir.join("build");
146+
147+
// Check if already built
148+
if build_dir.join("lib/statics/libibverbs.a").exists() {
149+
println!("cargo:warning=rdma-core already built");
150+
return build_dir;
151+
}
152+
153+
std::fs::create_dir_all(&build_dir).expect("Failed to create rdma-core build directory");
154+
155+
println!("cargo:warning=Building rdma-core...");
156+
157+
// Detect cmake command
158+
let cmake = if Command::new("cmake3").arg("--version").status().is_ok() {
159+
"cmake3"
160+
} else {
161+
"cmake"
162+
};
163+
164+
// Detect ninja
165+
let use_ninja = Command::new("ninja-build")
166+
.arg("--version")
167+
.status()
168+
.is_ok()
169+
|| Command::new("ninja").arg("--version").status().is_ok();
170+
171+
let ninja_cmd = if Command::new("ninja-build")
172+
.arg("--version")
173+
.status()
174+
.is_ok()
175+
{
176+
"ninja-build"
177+
} else {
178+
"ninja"
179+
};
180+
181+
// CMake configuration
182+
// IMPORTANT: -DCMAKE_POSITION_INDEPENDENT_CODE=ON is required for static libs
183+
// that will be linked into a shared object (.so)
184+
let mut cmake_args = vec![
185+
"-DIN_PLACE=1",
186+
"-DENABLE_STATIC=1",
187+
"-DENABLE_RESOLVE_NEIGH=0",
188+
"-DNO_PYVERBS=1",
189+
"-DNO_MAN_PAGES=1",
190+
"-DCMAKE_POSITION_INDEPENDENT_CODE=ON",
191+
"-DCMAKE_C_FLAGS=-fPIC",
192+
"-DCMAKE_CXX_FLAGS=-fPIC",
193+
];
194+
195+
if use_ninja {
196+
cmake_args.push("-GNinja");
197+
}
198+
199+
cmake_args.push("..");
200+
201+
let status = Command::new(cmake)
202+
.current_dir(&build_dir)
203+
.args(&cmake_args)
204+
.status()
205+
.expect("Failed to run cmake for rdma-core");
206+
207+
if !status.success() {
208+
panic!("Failed to configure rdma-core with cmake");
209+
}
210+
211+
// Build only the targets we need: libibverbs.a, libmlx5.a, and librdma_util.a
212+
// We don't need librdmacm which has build issues with long paths
213+
let targets = [
214+
"lib/statics/libibverbs.a",
215+
"lib/statics/libmlx5.a",
216+
"util/librdma_util.a",
217+
];
218+
219+
for target in &targets {
220+
let status = if use_ninja {
221+
Command::new(ninja_cmd)
222+
.current_dir(&build_dir)
223+
.arg(target)
224+
.status()
225+
.expect("Failed to run ninja for rdma-core")
226+
} else {
227+
let num_jobs = std::thread::available_parallelism()
228+
.map(|p| p.get())
229+
.unwrap_or(4);
230+
Command::new("make")
231+
.current_dir(&build_dir)
232+
.args(["-j", &num_jobs.to_string(), target])
233+
.status()
234+
.expect("Failed to run make for rdma-core")
235+
};
236+
237+
if !status.success() {
238+
panic!("Failed to build rdma-core target: {}", target);
239+
}
240+
}
241+
242+
println!("cargo:warning=rdma-core build complete");
243+
build_dir
244+
}
245+
246+
fn emit_link_directives(rdma_build_dir: &Path) {
247+
let rdma_static_dir = rdma_build_dir.join("lib/statics");
248+
let rdma_util_dir = rdma_build_dir.join("util");
249+
250+
// Emit search paths
251+
println!(
252+
"cargo:rustc-link-search=native={}",
253+
rdma_static_dir.display()
254+
);
255+
println!("cargo:rustc-link-search=native={}", rdma_util_dir.display());
256+
257+
// Static libraries - use whole-archive for rdma-core static libraries
258+
println!("cargo:rustc-link-arg=-Wl,--whole-archive");
259+
println!("cargo:rustc-link-lib=static=mlx5");
260+
println!("cargo:rustc-link-lib=static=ibverbs");
261+
println!("cargo:rustc-link-arg=-Wl,--no-whole-archive");
262+
263+
// rdma_util helper library
264+
println!("cargo:rustc-link-lib=static=rdma_util");
265+
266+
// Export metadata for dependent crates
267+
// Use cargo:: (double colon) format for proper DEP_<LINKS>_<KEY> env vars
268+
println!(
269+
"cargo::metadata=RDMA_INCLUDE={}",
270+
rdma_build_dir.join("include").display()
271+
);
272+
println!("cargo::metadata=RDMA_LIB_DIR={}", rdma_static_dir.display());
273+
println!("cargo::metadata=RDMA_UTIL_DIR={}", rdma_util_dir.display());
274+
275+
// Re-run if build scripts change
276+
println!("cargo:rerun-if-changed=build.rs");
277+
}

0 commit comments

Comments
 (0)