Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ async-cuda = "0.6.1"
cpp = "0.5"
tracing = "0.1"

[patch.crates-io]
# need to upstream cuda get devices
async-cuda = { git = "https://github.com/micahcc/async-cuda.git", branch = "main"}

[dev-dependencies]
tempfile = "3.4"
tokio = { version = "1", default-features = false, features = [
Expand Down
144 changes: 127 additions & 17 deletions src/ffi/sync/engine.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use cpp::cpp;

use async_cuda::device::DeviceId;
use async_cuda::ffi::device::Device;

use crate::error::last_error;
use crate::ffi::memory::HostBuffer;
use crate::ffi::result;
use crate::ffi::sync::runtime::Runtime;
use async_cuda::device::DeviceId;
use async_cuda::ffi::device::Device;
use async_cuda::ffi::ptr::DevicePtr;
use cpp::cpp;
use std::sync::Arc;

type Result<T> = std::result::Result<T, crate::error::Error>;

Expand All @@ -32,6 +32,40 @@ unsafe impl Send for Engine {}
/// The TensorRT API is thread-safe with regards to all operations on [`Engine`].
unsafe impl Sync for Engine {}

#[derive(Copy, Clone, Debug)]
pub enum DataType {
/// 32-bit floating point format.
Float,
/// IEEE 16-bit floating-point format – has a 5 bit exponent and 11 bit significand.
Half,
/// Signed 8-bit integer representing a quantized floating-point value.
Int8,
/// Signed 32-bit integer format.
Int32,
/// 8-bit boolean. 0 = false, 1 = true, other values undefined.
Bool,
/// Unsigned 8-bit integer format. Cannot be used to represent quantized floating-point values.
/// Use the IdentityLayer to convert kUINT8 network-level inputs to {kFLOAT, kHALF} prior to
/// use with other TensorRT layers, or to convert intermediate output before kUINT8
/// network-level outputs from {kFLOAT, kHALF} to kUINT8. kUINT8 conversions are only supported
/// for {kFLOAT, kHALF}. kUINT8 to {kFLOAT, kHALF} conversion will convert the integer values
/// to equivalent floating point values. {kFLOAT, kHALF} to kUINT8 conversion will convert the
/// floating point values to integer values by truncating towards zero. This conversion has
/// undefined behavior for floating point values outside the range [0.0F, 256.0F) after
/// truncation. kUINT8 conversions are not supported for {kINT8, kINT32, kBOOL}.
Uint8,
/// Signed 8-bit floating point with 1 sign bit, 4 exponent bits, 3 mantissa bits, and exponent-bias 7.
Fp8,
/// Brain float – has an 8 bit exponent and 8 bit significand.
Bf16,
///Signed 64-bit integer type.
Int64,
/// Signed 4-bit integer type.
Int4,
/// 4-bit floating point type 1 bit sign, 2 bit exponent, 1 bit mantissa
Fp4,
}

impl Engine {
#[inline]
pub(crate) fn wrap(internal: *mut std::ffi::c_void, runtime: Runtime) -> Self {
Expand All @@ -58,6 +92,42 @@ impl Engine {
num_io_tensors as usize
}

pub fn io_tensor_type(&self, io_tensor_index: usize) -> DataType {
let internal = self.as_ptr();
let io_tensor_index = io_tensor_index as std::os::raw::c_int;
let data_type: i32 = cpp!(unsafe [
internal as "const void*",
io_tensor_index as "int"
] -> i32 as "DataType" {
// Added in TRT 8
#if NV_TENSORRT_MAJOR >= 8
const char* name = ((const ICudaEngine*) internal)->getIOTensorName(io_tensor_index);
if(name == nullptr) {
return DataType::kFLOAT;
}
return ((const ICudaEngine*) internal)->getTensorDataType(name);
#else
// Removed in TRT 10
return ((const ICudaEngine*) internal)->getBindingDataType(io_tensor_index);
#endif
});

match data_type {
0 => DataType::Float,
1 => DataType::Half,
2 => DataType::Int8,
3 => DataType::Int32,
4 => DataType:: Bool,
5 => DataType::Uint8,
6 => DataType::Fp8,
7 => DataType::Bf16,
8 => DataType::Int64,
9 => DataType::Int4,
10 => DataType::Fp4,
_ => panic!("Unknown data type ({data_type}), you might be using an unsupported version of TensorRT")
}
}

pub fn io_tensor_name(&self, io_tensor_index: usize) -> String {
let internal = self.as_ptr();
let io_tensor_index = io_tensor_index as std::os::raw::c_int;
Expand Down Expand Up @@ -175,8 +245,8 @@ unsafe impl<'engine> Send for ExecutionContext<'engine> {}
unsafe impl<'engine> Sync for ExecutionContext<'engine> {}

impl ExecutionContext<'static> {
pub fn from_engine(mut engine: Engine) -> Result<Self> {
let internal = unsafe { Self::new_internal(&mut engine) };
pub fn from_engine(engine: Engine) -> Result<Self> {
let internal = unsafe { Self::new_internal(&engine) };
result!(
internal,
Self {
Expand All @@ -188,10 +258,23 @@ impl ExecutionContext<'static> {
)
}

pub fn from_engine_many(mut engine: Engine, num: usize) -> Result<Vec<Self>> {
pub fn from_shared_engine(engine: Arc<Engine>) -> Result<Self> {
let internal = unsafe { Self::new_internal(&engine) };
result!(
internal,
Self {
internal,
device: engine.device(),
_parent: Some(engine),
_phantom: Default::default(),
}
)
}

pub fn from_engine_many(engine: Engine, num: usize) -> Result<Vec<Self>> {
let mut internals = Vec::with_capacity(num);
for _ in 0..num {
internals.push(unsafe { Self::new_internal(&mut engine) });
internals.push(unsafe { Self::new_internal(&engine) });
}
let device = engine.device();
let parent = std::sync::Arc::new(engine);
Expand All @@ -213,7 +296,7 @@ impl ExecutionContext<'static> {
}

impl<'engine> ExecutionContext<'engine> {
pub fn new(engine: &'engine mut Engine) -> Result<Self> {
pub fn new(engine: &'engine Engine) -> Result<Self> {
let internal = unsafe { Self::new_internal(engine) };
result!(
internal,
Expand All @@ -226,6 +309,32 @@ impl<'engine> ExecutionContext<'engine> {
)
}

pub fn bind_tensor<T: Copy>(
&mut self,
tensor_name: &str,
buffer: &mut async_cuda::ffi::memory::DeviceTensor<T>,
) -> Result<()> {
Ok(unsafe { self.set_tensor_address::<T>(tensor_name, buffer.as_mut_internal()) }?)
}

/// Enqueue with pre-bound
/// this allows for assorted types of inputs
pub fn enqueue_prebound(&mut self, stream: &async_cuda::ffi::stream::Stream) -> Result<()> {
let internal = self.as_mut_ptr();
let stream_ptr = stream.as_internal().as_ptr();
let success = cpp!(unsafe [
internal as "void*",
stream_ptr as "const void*"
] -> bool as "bool" {
return ((IExecutionContext*) internal)->enqueueV3((cudaStream_t) stream_ptr);
});
if success {
Ok(())
} else {
Err(last_error())
}
}

pub fn enqueue<T: Copy>(
&mut self,
io_tensors: &mut std::collections::HashMap<
Expand All @@ -237,7 +346,7 @@ impl<'engine> ExecutionContext<'engine> {
let internal = self.as_mut_ptr();
for (tensor_name, buffer) in io_tensors {
unsafe {
self.set_tensor_address(tensor_name, buffer)?;
self.set_tensor_address::<T>(tensor_name, buffer.as_mut_internal())?;
}
}
let stream_ptr = stream.as_internal().as_ptr();
Expand Down Expand Up @@ -271,28 +380,29 @@ impl<'engine> ExecutionContext<'engine> {
self.device
}

unsafe fn new_internal(engine: &mut Engine) -> *mut std::ffi::c_void {
unsafe fn new_internal(engine: &Engine) -> *mut std::ffi::c_void {
Device::set_or_panic(engine.device());
let internal_engine = engine.as_mut_ptr();
let internal_engine = engine.as_ptr();
let internal = cpp!(unsafe [
internal_engine as "void*"
] -> *mut std::ffi::c_void as "void*" {
return (void*) ((ICudaEngine*) internal_engine)->createExecutionContext();
void* out = (void*) ((ICudaEngine*) internal_engine)->createExecutionContext();
return out;
});
internal
}

unsafe fn set_tensor_address<T: Copy>(
&mut self,
tensor_name: &str,
buffer: &mut async_cuda::ffi::memory::DeviceBuffer<T>,
buffer_ptr: &mut DevicePtr,
) -> Result<()> {
let internal = self.as_mut_ptr();
let tensor_name_cstr = std::ffi::CString::new(tensor_name).unwrap();
let tensor_name_ptr = tensor_name_cstr.as_ptr();
let buffer_ptr = buffer.as_mut_internal().as_mut_ptr();
let buffer_ptr = buffer_ptr.as_ptr();
let success = cpp!(unsafe [
internal as "const void*",
internal as "void*",
tensor_name_ptr as "const char*",
buffer_ptr as "void*"
] -> bool as "bool" {
Expand Down