Skip to content

Commit b4e39ff

Browse files
committed
Remove the need to call clang for std::offload usages
1 parent 864339a commit b4e39ff

File tree

6 files changed

+145
-27
lines changed

6 files changed

+145
-27
lines changed

compiler/rustc_codegen_llvm/src/back/write.rs

Lines changed: 70 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -707,11 +707,9 @@ pub(crate) unsafe fn llvm_optimize(
707707
llvm::set_value_name(new_fn, &name);
708708
}
709709

710-
if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Enable) {
710+
if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Device) {
711711
let cx =
712712
SimpleCx::new(module.module_llvm.llmod(), module.module_llvm.llcx, cgcx.pointer_size);
713-
// For now we only support up to 10 kernels named kernel_0 ... kernel_9, a follow-up PR is
714-
// introducing a proper offload intrinsic to solve this limitation.
715713
for func in cx.get_functions() {
716714
let offload_kernel = "offload-kernel";
717715
if attributes::has_string_attr(func, offload_kernel) {
@@ -773,12 +771,79 @@ pub(crate) unsafe fn llvm_optimize(
773771
)
774772
};
775773

776-
if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Enable) {
774+
if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Device) {
775+
let device_path = cgcx.output_filenames.path(OutputType::Object);
776+
let device_dir = device_path.parent().unwrap();
777+
let device_out = device_dir.join("host.out");
778+
let device_out_c = path_to_c_string(device_out.as_path());
777779
unsafe {
778-
llvm::LLVMRustBundleImages(module.module_llvm.llmod(), module.module_llvm.tm.raw());
780+
// 1) Bundle device module into offload image host.out (device TM)
781+
let ok = llvm::LLVMRustBundleImages(
782+
module.module_llvm.llmod(),
783+
module.module_llvm.tm.raw(),
784+
device_out_c.as_ptr(),
785+
);
786+
assert!(ok, "LLVMRustBundleImages (device -> host.out) failed");
787+
if !device_out.exists() {
788+
panic!("BundleImages failed, `host.out` was not created!");
789+
}
779790
}
780791
}
781792

793+
// This assumes that we previously compiled our kernels for a gpu target, which created a
794+
// `host.out` artifact. The user is supposed to provide us with a path to this artifact, we
795+
// don't need any other artifacts from the previous run. We will embed this artifact into our
796+
// LLVM-IR host module, to create a `host.o` ObjectFile, which we will write to disk.
797+
// The last, not yet automated steps uses the `clang-linker-wrapper` to process `host.o`.
798+
if !cgcx.target_is_like_gpu {
799+
if let Some(device_path) = config
800+
.offload
801+
.iter()
802+
.find_map(|o| if let config::Offload::Host(path) = o { Some(path) } else { None })
803+
{
804+
let device_pathbuf = PathBuf::from(device_path);
805+
if device_pathbuf.is_relative() {
806+
panic!("Absolute path is needed");
807+
} else if device_pathbuf
808+
.file_name()
809+
.and_then(|n| n.to_str())
810+
.is_some_and(|n| n != "host.out")
811+
{
812+
panic!("Need path to the host.out file");
813+
}
814+
assert!(device_pathbuf.exists());
815+
let host_path = cgcx.output_filenames.path(OutputType::Object);
816+
let host_dir = host_path.parent().unwrap();
817+
let out_obj = host_dir.join("host.o");
818+
let host_out_c = path_to_c_string(device_pathbuf.as_path());
819+
820+
// 2) Finalize host: lib.bc + host.out -> host.o (host TM)
821+
// We create a full clone of our LLVM host module, since we will embed the device IR
822+
// into it, and this might break caching or incremental compilation otherwise.
823+
let llmod2 = llvm::LLVMCloneModule(module.module_llvm.llmod());
824+
let ok =
825+
unsafe { llvm::LLVMRustOffloadEmbedBufferInModule(llmod2, host_out_c.as_ptr()) };
826+
assert!(ok, "LLVMRustOffloadEmbedBufferInModule failed");
827+
write_output_file(
828+
dcx,
829+
module.module_llvm.tm.raw(),
830+
config.no_builtins,
831+
llmod2,
832+
&out_obj,
833+
None,
834+
llvm::FileType::ObjectFile,
835+
&cgcx.prof,
836+
true,
837+
);
838+
if !out_obj.exists() {
839+
dbg!("{:?} does not exist!", out_obj);
840+
panic!("FinalizeOffload failed!");
841+
}
842+
// We ignore cgcx.save_temps here and unconditionally always keep our `host.out` artifact.
843+
// Otherwise, recompiling the host code would fail since we deleted that device artifact
844+
// in the previous host compilation, which would be confusing at best.
845+
}
846+
}
782847
result.into_result().unwrap_or_else(|()| llvm_err(dcx, LlvmError::RunLlvmPasses))
783848
}
784849

compiler/rustc_codegen_llvm/src/intrinsic.rs

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -202,13 +202,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
202202
return Ok(());
203203
}
204204
sym::offload => {
205-
if !tcx
206-
.sess
207-
.opts
208-
.unstable_opts
209-
.offload
210-
.contains(&rustc_session::config::Offload::Enable)
211-
{
205+
if tcx.sess.opts.unstable_opts.offload.is_empty() {
212206
let _ = tcx.dcx().emit_almost_fatal(OffloadWithoutEnable);
213207
}
214208

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1722,7 +1722,15 @@ mod Offload {
17221722
use super::*;
17231723
unsafe extern "C" {
17241724
/// Processes the module and writes it in an offload compatible way into a "host.out" file.
1725-
pub(crate) fn LLVMRustBundleImages<'a>(M: &'a Module, TM: &'a TargetMachine) -> bool;
1725+
pub(crate) fn LLVMRustBundleImages<'a>(
1726+
M: &'a Module,
1727+
TM: &'a TargetMachine,
1728+
host_out: *const c_char,
1729+
) -> bool;
1730+
pub(crate) unsafe fn LLVMRustOffloadEmbedBufferInModule<'a>(
1731+
_M: &'a Module,
1732+
_host_out: *const c_char,
1733+
) -> bool;
17261734
pub(crate) fn LLVMRustOffloadMapper<'a>(OldFn: &'a Value, NewFn: &'a Value);
17271735
}
17281736
}
@@ -1736,7 +1744,17 @@ mod Offload_fallback {
17361744
/// Processes the module and writes it in an offload compatible way into a "host.out" file.
17371745
/// Marked as unsafe to match the real offload wrapper which is unsafe due to FFI.
17381746
#[allow(unused_unsafe)]
1739-
pub(crate) unsafe fn LLVMRustBundleImages<'a>(_M: &'a Module, _TM: &'a TargetMachine) -> bool {
1747+
pub(crate) unsafe fn LLVMRustBundleImages<'a>(
1748+
_M: &'a Module,
1749+
_TM: &'a TargetMachine,
1750+
_host_out: *const c_char,
1751+
) -> bool {
1752+
unimplemented!("This rustc version was not built with LLVM Offload support!");
1753+
}
1754+
pub(crate) unsafe fn LLVMRustOffloadEmbedBufferInModule<'a>(
1755+
_M: &'a Module,
1756+
_host_out: *const c_char,
1757+
) -> bool {
17401758
unimplemented!("This rustc version was not built with LLVM Offload support!");
17411759
}
17421760
#[allow(unused_unsafe)]

compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,10 @@
4343
// available. As such, we only try to build it in the first place, if
4444
// llvm.offload is enabled.
4545
#ifdef OFFLOAD
46+
#include "llvm/Bitcode/BitcodeReader.h"
4647
#include "llvm/Object/OffloadBinary.h"
4748
#include "llvm/Target/TargetMachine.h"
49+
#include "llvm/Transforms/Utils/ModuleUtils.h"
4850
#endif
4951

5052
// for raw `write` in the bad-alloc handler
@@ -174,12 +176,13 @@ static Error writeFile(StringRef Filename, StringRef Data) {
174176
// --image=file=device.bc,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=openmp
175177
// The input module is the rust code compiled for a gpu target like amdgpu.
176178
// Based on clang/tools/clang-offload-packager/ClangOffloadPackager.cpp
177-
extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM) {
179+
extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM,
180+
const char *HostOutPath) {
178181
std::string Storage;
179182
llvm::raw_string_ostream OS1(Storage);
180183
llvm::WriteBitcodeToFile(*unwrap(M), OS1);
181184
OS1.flush();
182-
auto MB = llvm::MemoryBuffer::getMemBufferCopy(Storage, "module.bc");
185+
auto MB = llvm::MemoryBuffer::getMemBufferCopy(Storage, "device.bc");
183186

184187
SmallVector<char, 1024> BinaryData;
185188
raw_svector_ostream OS2(BinaryData);
@@ -188,19 +191,38 @@ extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM) {
188191
ImageBinary.TheImageKind = object::IMG_Bitcode;
189192
ImageBinary.Image = std::move(MB);
190193
ImageBinary.TheOffloadKind = object::OFK_OpenMP;
191-
ImageBinary.StringData["triple"] = TM.getTargetTriple().str();
192-
ImageBinary.StringData["arch"] = TM.getTargetCPU();
194+
195+
std::string TripleStr = TM.getTargetTriple().str();
196+
llvm::StringRef CPURef = TM.getTargetCPU();
197+
ImageBinary.StringData["triple"] = TripleStr;
198+
ImageBinary.StringData["arch"] = CPURef;
193199
llvm::SmallString<0> Buffer = OffloadBinary::write(ImageBinary);
194200
if (Buffer.size() % OffloadBinary::getAlignment() != 0)
195201
// Offload binary has invalid size alignment
196202
return false;
197203
OS2 << Buffer;
198-
if (Error E = writeFile("host.out",
204+
if (Error E = writeFile(HostOutPath,
199205
StringRef(BinaryData.begin(), BinaryData.size())))
200206
return false;
201207
return true;
202208
}
203209

210+
extern "C" bool LLVMRustOffloadEmbedBufferInModule(LLVMModuleRef HostM,
211+
const char *HostOutPath) {
212+
auto MBOrErr = MemoryBuffer::getFile(HostOutPath);
213+
if (!MBOrErr) {
214+
auto E = MBOrErr.getError();
215+
auto _B = errorCodeToError(E);
216+
return false;
217+
}
218+
MemoryBufferRef Buf = (*MBOrErr)->getMemBufferRef();
219+
Module *M = unwrap(HostM);
220+
StringRef SectionName = ".llvm.offloading";
221+
Align Alignment = Align(8);
222+
llvm::embedBufferInModule(*M, Buf, SectionName, Alignment);
223+
return true;
224+
}
225+
204226
extern "C" void LLVMRustOffloadMapper(LLVMValueRef OldFn, LLVMValueRef NewFn) {
205227
llvm::Function *oldFn = llvm::unwrap<llvm::Function>(OldFn);
206228
llvm::Function *newFn = llvm::unwrap<llvm::Function>(NewFn);

compiler/rustc_session/src/config.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,12 @@ pub enum CoverageLevel {
193193
}
194194

195195
// The different settings that the `-Z offload` flag can have.
196-
#[derive(Clone, Copy, PartialEq, Hash, Debug)]
196+
#[derive(Clone, PartialEq, Hash, Debug)]
197197
pub enum Offload {
198-
/// Enable the llvm offload pipeline
199-
Enable,
198+
/// Entry point for `std::offload`, enables kernel compilation for a gpu device
199+
Device,
200+
/// Second step in the offload pipeline, generates the host code to call kernels.
201+
Host(String),
200202
}
201203

202204
/// The different settings that the `-Z autodiff` flag can have.
@@ -2631,9 +2633,7 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
26312633
)
26322634
}
26332635

2634-
if !nightly_options::is_unstable_enabled(matches)
2635-
&& unstable_opts.offload.contains(&Offload::Enable)
2636-
{
2636+
if !nightly_options::is_unstable_enabled(matches) && !unstable_opts.offload.is_empty() {
26372637
early_dcx.early_fatal(
26382638
"`-Zoffload=Enable` also requires `-Zunstable-options` \
26392639
and a nightly compiler",

compiler/rustc_session/src/options.rs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1459,8 +1459,27 @@ pub mod parse {
14591459
let mut v: Vec<&str> = v.split(",").collect();
14601460
v.sort_unstable();
14611461
for &val in v.iter() {
1462-
let variant = match val {
1463-
"Enable" => Offload::Enable,
1462+
// Split each entry on '=' if it has an argument
1463+
let (key, arg) = match val.split_once('=') {
1464+
Some((k, a)) => (k, Some(a)),
1465+
None => (val, None),
1466+
};
1467+
1468+
let variant = match key {
1469+
"Host" => {
1470+
if let Some(p) = arg {
1471+
Offload::Host(p.to_string())
1472+
} else {
1473+
return false;
1474+
}
1475+
}
1476+
"Device" => {
1477+
if let Some(_) = arg {
1478+
// Device does not accept a value
1479+
return false;
1480+
}
1481+
Offload::Device
1482+
}
14641483
_ => {
14651484
// FIXME(ZuseZ4): print an error saying which value is not recognized
14661485
return false;

0 commit comments

Comments
 (0)