Skip to content

Commit 2f17a17

Browse files
authored
Merge pull request #257 from influxdata/crepererum/target-triplet
feat: allow setting a target triplet
2 parents 97e72ee + 612b4a6 commit 2f17a17

File tree

10 files changed

+183
-35
lines changed

10 files changed

+183
-35
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ sqlparser = { version = "0.58.0", default-features = false, features = [
4242
"visitor"
4343
] }
4444
tar = { version = "0.4.44", default-features = false }
45+
target-lexicon = { version = "0.13", default-features = false }
4546
tempfile = { version = "3.23.0", default-features = false }
4647
tokio = { version = "1.48.0", default-features = false }
4748
uuid = { version = "1.19.0", default-features = false, features = ["v4"] }

host/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,13 @@ datafusion-udf-wasm-bundle = { workspace = true, features = [
3232
] }
3333
insta.workspace = true
3434
regex.workspace = true
35+
target-lexicon.workspace = true
3536
tokio = { workspace = true, features = ["fs", "macros"] }
3637
wiremock = "0.6.5"
3738

39+
[features]
40+
# enable all architectures in cranelift/wasmtime
41+
all-arch = ["wasmtime/all-arch"]
42+
3843
[lints]
3944
workspace = true

host/src/component.rs

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,37 @@ use crate::{
2121
};
2222

2323
/// Create WASM engine.
24-
fn create_engine() -> DataFusionResult<Engine> {
25-
Engine::new(
26-
wasmtime::Config::new()
27-
.async_support(true)
28-
.epoch_interruption(true)
29-
.memory_init_cow(true)
30-
// Disable backtraces for now since debug info parsing doesn't seem to work and hence error
31-
// messages are nondeterministic.
32-
.wasm_backtrace(false),
33-
)
34-
.context("create WASM engine", None)
24+
fn create_engine(flags: &CompilationFlags) -> DataFusionResult<Engine> {
25+
// TODO: Once https://github.com/bytecodealliance/wasmtime/pull/12089 is released, make this an `Option` and treat
26+
// `None` as `Config::without_compiler`.
27+
let CompilationFlags { target } = flags;
28+
29+
let mut config = wasmtime::Config::new();
30+
config.async_support(true);
31+
config.epoch_interruption(true);
32+
config.memory_init_cow(true);
33+
// Disable backtraces for now since debug info parsing doesn't seem to work and hence error
34+
// messages are nondeterministic.
35+
config.wasm_backtrace(false);
36+
37+
if let Some(target) = &target {
38+
config
39+
.target(target)
40+
.with_context(|_| format!("cannot set target: {target}"), None)?;
41+
}
42+
43+
Engine::new(&config).context("create WASM engine", None)
44+
}
45+
46+
/// Code compilation flags.
47+
///
48+
/// This is used when [compiling a component](WasmComponentPrecompiled::compile).
49+
#[derive(Debug, Default, Clone)]
50+
pub struct CompilationFlags {
51+
/// Target (triplet).
52+
///
53+
/// Set to [`None`] to use the host configuration. Note that this may lead to unportable compiled code.
54+
pub target: Option<String>,
3555
}
3656

3757
/// Pre-compiled WASM component.
@@ -51,11 +71,14 @@ impl WasmComponentPrecompiled {
5171
///
5272
///
5373
/// [binary format]: https://webassembly.github.io/spec/core/binary/index.html
54-
pub async fn new(wasm_binary: Arc<[u8]>) -> DataFusionResult<Self> {
55-
tokio::task::spawn_blocking(move || {
56-
// Create temporary engine that we need for compilation.
57-
let engine = create_engine()?;
74+
pub async fn compile(
75+
wasm_binary: Arc<[u8]>,
76+
flags: &CompilationFlags,
77+
) -> DataFusionResult<Self> {
78+
// Create temporary engine that we need for compilation.
79+
let engine = create_engine(flags)?;
5880

81+
tokio::task::spawn_blocking(move || {
5982
let compiled_component = engine
6083
.precompile_component(&wasm_binary)
6184
.context("pre-compile component", None)?;
@@ -83,7 +106,7 @@ impl WasmComponentPrecompiled {
83106
///
84107
/// # Exposure
85108
/// It is generally safe to leak/expose the pre-compiled data to the user that provided the WASM bytecode (see
86-
/// [`new`](Self::new)). However, you must prevent the user from tampering the data, see "safety" section of
109+
/// [`compile`](Self::compile)). However, you must prevent the user from tampering the data, see "safety" section of
87110
/// [`load`](Self::load).
88111
///
89112
/// The exposed data is opaque and we make no guarantees about the internal structure of it.
@@ -139,7 +162,7 @@ impl WasmComponentPrecompiled {
139162
};
140163

141164
// test hydration
142-
let engine = create_engine()?;
165+
let engine = create_engine(&CompilationFlags::default())?;
143166
this.hydrate(&engine)?;
144167

145168
Ok(this)
@@ -187,7 +210,7 @@ impl WasmComponentInstance {
187210
io_rt: Handle,
188211
memory_pool: &Arc<dyn MemoryPool>,
189212
) -> DataFusionResult<Self> {
190-
let engine = create_engine()?;
213+
let engine = create_engine(&CompilationFlags::default())?;
191214

192215
// set up epoch timer
193216
let mut epoch_task = JoinSet::new();

host/src/error.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,20 +43,43 @@ pub(crate) trait WasmToDataFusionResultExt {
4343
/// [`Ok`] payload.
4444
type T;
4545

46+
/// [`Err`] payload
47+
type E;
48+
4649
/// Add context to error.
4750
///
4851
/// The context has:
4952
/// - `msg`: a human-readable context description
5053
/// - `stderr`: stderr output of the WASM payload if available
5154
fn context(self, msg: &str, stderr: Option<&[u8]>) -> Result<Self::T, DataFusionError>;
55+
56+
/// Add context to error.
57+
///
58+
/// The context has:
59+
/// - `msg`: a closure that generates a human-readable context description based on the error
60+
/// - `stderr`: stderr output of the WASM payload if available
61+
fn with_context<F>(self, msg: F, stderr: Option<&[u8]>) -> Result<Self::T, DataFusionError>
62+
where
63+
F: for<'a> FnOnce(&'a Self::E) -> String;
5264
}
5365

5466
impl<T> WasmToDataFusionResultExt for Result<T, wasmtime::Error> {
5567
type T = T;
68+
type E = wasmtime::Error;
5669

5770
fn context(self, msg: &str, stderr: Option<&[u8]>) -> Result<Self::T, DataFusionError> {
5871
self.map_err(|err| WasmToDataFusionErrorExt::context(err, msg, stderr))
5972
}
73+
74+
fn with_context<F>(self, msg: F, stderr: Option<&[u8]>) -> Result<Self::T, DataFusionError>
75+
where
76+
F: for<'a> FnOnce(&'a Self::E) -> String,
77+
{
78+
self.map_err(|err| {
79+
let msg = msg(&err);
80+
WasmToDataFusionErrorExt::context(err, &msg, stderr)
81+
})
82+
}
6083
}
6184

6285
/// Extension trait for [`Result`] containing a [`DataFusionError`].

host/src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
//! [DataFusion]: https://datafusion.apache.org/
55
66
pub use crate::{
7-
component::WasmComponentPrecompiled,
7+
component::{CompilationFlags, WasmComponentPrecompiled},
88
conversion::limits::TrustedDataLimits,
99
http::{
1010
AllowCertainHttpRequests, HttpMethod, HttpRequestMatcher, HttpRequestRejected,
@@ -22,6 +22,8 @@ use datafusion_udf_wasm_bundle as _;
2222
#[cfg(test)]
2323
use regex as _;
2424
#[cfg(test)]
25+
use target_lexicon as _;
26+
#[cfg(test)]
2527
use wiremock as _;
2628

2729
mod bindings;

host/tests/integration_tests/evil/test_utils.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ use std::sync::{Arc, LazyLock};
22

33
use datafusion_common::DataFusionError;
44
use datafusion_execution::memory_pool::GreedyMemoryPool;
5-
use datafusion_udf_wasm_host::{WasmComponentPrecompiled, WasmPermissions, WasmScalarUdf};
5+
use datafusion_udf_wasm_host::{
6+
CompilationFlags, WasmComponentPrecompiled, WasmPermissions, WasmScalarUdf,
7+
};
68
use tokio::{runtime::Runtime, sync::OnceCell};
79

810
/// Static memory limit.
@@ -17,9 +19,12 @@ static COMPONENT: OnceCell<WasmComponentPrecompiled> = OnceCell::const_new();
1719
pub(crate) async fn component() -> &'static WasmComponentPrecompiled {
1820
COMPONENT
1921
.get_or_init(async || {
20-
WasmComponentPrecompiled::new(datafusion_udf_wasm_bundle::BIN_EVIL.into())
21-
.await
22-
.unwrap()
22+
WasmComponentPrecompiled::compile(
23+
datafusion_udf_wasm_bundle::BIN_EVIL.into(),
24+
&CompilationFlags::default(),
25+
)
26+
.await
27+
.unwrap()
2328
})
2429
.await
2530
}

host/tests/integration_tests/python/test_utils.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::sync::Arc;
22

33
use datafusion_common::DataFusionError;
44
use datafusion_execution::memory_pool::GreedyMemoryPool;
5-
use datafusion_udf_wasm_host::{WasmComponentPrecompiled, WasmScalarUdf};
5+
use datafusion_udf_wasm_host::{CompilationFlags, WasmComponentPrecompiled, WasmScalarUdf};
66
use tokio::{runtime::Handle, sync::OnceCell};
77

88
/// Memory limit in bytes.
@@ -17,9 +17,12 @@ static COMPONENT: OnceCell<WasmComponentPrecompiled> = OnceCell::const_new();
1717
pub(crate) async fn python_component() -> &'static WasmComponentPrecompiled {
1818
COMPONENT
1919
.get_or_init(async || {
20-
WasmComponentPrecompiled::new(datafusion_udf_wasm_bundle::BIN_PYTHON.into())
21-
.await
22-
.unwrap()
20+
WasmComponentPrecompiled::compile(
21+
datafusion_udf_wasm_bundle::BIN_PYTHON.into(),
22+
&CompilationFlags::default(),
23+
)
24+
.await
25+
.unwrap()
2326
})
2427
.await
2528
}

host/tests/integration_tests/rust.rs

Lines changed: 86 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ use datafusion_expr::{
1212
async_udf::AsyncScalarUDFImpl,
1313
};
1414
use datafusion_udf_wasm_host::{
15-
StaticResourceLimits, WasmComponentPrecompiled, WasmPermissions, WasmScalarUdf,
15+
CompilationFlags, StaticResourceLimits, WasmComponentPrecompiled, WasmPermissions,
16+
WasmScalarUdf,
1617
};
1718
use tokio::{runtime::Handle, sync::OnceCell};
1819

@@ -274,14 +275,95 @@ async fn test_limit_initial_n_memories() {
274275
);
275276
}
276277

278+
#[tokio::test]
279+
async fn test_match_target() {
280+
let component = WasmComponentPrecompiled::compile(
281+
datafusion_udf_wasm_bundle::BIN_EXAMPLE.into(),
282+
&CompilationFlags {
283+
target: Some(target_lexicon::HOST.to_string()),
284+
},
285+
)
286+
.await
287+
.unwrap();
288+
289+
// instantiating works
290+
WasmScalarUdf::new(
291+
&component,
292+
&Default::default(),
293+
Handle::current(),
294+
&(Arc::new(UnboundedMemoryPool::default()) as _),
295+
"".to_owned(),
296+
)
297+
.await
298+
.unwrap();
299+
300+
// and load->store also works
301+
let data = component.store().to_vec();
302+
// SAFETY: we just compiled that
303+
let res = unsafe { WasmComponentPrecompiled::load(data) };
304+
res.unwrap();
305+
}
306+
307+
#[cfg(feature = "all-arch")]
308+
#[tokio::test]
309+
async fn test_mismatch_target() {
310+
let component = WasmComponentPrecompiled::compile(
311+
datafusion_udf_wasm_bundle::BIN_EXAMPLE.into(),
312+
&CompilationFlags {
313+
// It's unlikely that someone is gonna run the tests on a RISC-V 64bit host, but if they do, we need to
314+
// make the test code smarter. It won't fail as expected.
315+
target: Some("riscv64gc-unknown-linux-gnu".to_owned()),
316+
},
317+
)
318+
.await
319+
.unwrap();
320+
321+
// instantiating doesn't work
322+
let err = WasmScalarUdf::new(
323+
&component,
324+
&Default::default(),
325+
Handle::current(),
326+
&(Arc::new(UnboundedMemoryPool::default()) as _),
327+
"".to_owned(),
328+
)
329+
.await
330+
.unwrap_err();
331+
332+
insta::assert_snapshot!(
333+
err,
334+
@r"
335+
create WASM component
336+
caused by
337+
External error: Module was compiled for architecture 'riscv64gc'
338+
"
339+
);
340+
341+
// and load->store also fails
342+
let data = component.store().to_vec();
343+
// SAFETY: we just compiled that
344+
let res = unsafe { WasmComponentPrecompiled::load(data) };
345+
346+
insta::assert_snapshot!(
347+
res.unwrap_err(),
348+
@r"
349+
create WASM component
350+
caused by
351+
External error: Module was compiled for architecture 'riscv64gc'
352+
"
353+
);
354+
}
355+
277356
async fn component() -> &'static WasmComponentPrecompiled {
278357
static COMPONENT: OnceCell<WasmComponentPrecompiled> = OnceCell::const_new();
279358

280359
COMPONENT
281360
.get_or_init(async || {
282-
WasmComponentPrecompiled::new(datafusion_udf_wasm_bundle::BIN_EXAMPLE.into())
283-
.await
284-
.unwrap()
361+
WasmComponentPrecompiled::compile(
362+
datafusion_udf_wasm_bundle::BIN_EXAMPLE.into(),
363+
&CompilationFlags::default(),
364+
)
365+
.await
366+
.unwrap()
285367
})
286368
.await
287369
}
Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use datafusion_udf_wasm_host::WasmComponentPrecompiled;
1+
use datafusion_udf_wasm_host::{CompilationFlags, WasmComponentPrecompiled};
22
use tokio::sync::OnceCell;
33

44
/// Static precompiled Python WASM component for tests
@@ -8,9 +8,12 @@ static COMPONENT: OnceCell<WasmComponentPrecompiled> = OnceCell::const_new();
88
pub(crate) async fn python_component() -> &'static WasmComponentPrecompiled {
99
COMPONENT
1010
.get_or_init(async || {
11-
WasmComponentPrecompiled::new(datafusion_udf_wasm_bundle::BIN_PYTHON.into())
12-
.await
13-
.unwrap()
11+
WasmComponentPrecompiled::compile(
12+
datafusion_udf_wasm_bundle::BIN_PYTHON.into(),
13+
&CompilationFlags::default(),
14+
)
15+
.await
16+
.unwrap()
1417
})
1518
.await
1619
}

0 commit comments

Comments
 (0)