diff --git a/README.md b/README.md
index a02688977d2..b351ee79a4f 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
+
TensorRT LLM
===========================
TensorRT LLM provides users with an easy-to-use Python API to define Large Language Models (LLMs) and supports
diff --git a/docker/Dockerfile.multi b/docker/Dockerfile.multi
index f5d7915b02e..c2a2b851338 100644
--- a/docker/Dockerfile.multi
+++ b/docker/Dockerfile.multi
@@ -76,7 +76,8 @@ COPY constraints.txt /tmp/constraints.txt
RUN pip3 install --no-cache-dir -r /tmp/constraints.txt && rm /tmp/constraints.txt
# Remove nbconvert to avoid https://github.com/advisories/GHSA-xm59-rqc7-hhvf in the base NGC PyTorch image.
-RUN pip3 uninstall -y nbconvert || true
+# Remove pillow to avoid https://github.com/advisories/GHSA-cfh3-3jmp-rvhc in the base NGC PyTorch image.
+RUN pip3 uninstall -y nbconvert pillow || true
# Install UCX, NIXL, etcd
# TODO: Combine these into the main install.sh script
diff --git a/tensorrt_llm/functional.py b/tensorrt_llm/functional.py
index f341d752206..981d3b13695 100755
--- a/tensorrt_llm/functional.py
+++ b/tensorrt_llm/functional.py
@@ -33,7 +33,7 @@
fp16_array, fp32_array, get_sm_version, int32_array,
int64_array, np_dtype_to_trt, str_dtype_to_trt,
trt_dtype_to_np, trt_dtype_to_str)
-from .network import PluginInfo, set_np_weight, set_plugin_info
+from .network import PluginInfo, get_np_weight, set_np_weight, set_plugin_info
from .plugin import TRT_LLM_PLUGIN_NAMESPACE, current_all_reduce_helper
from .quantization import QuantMode
@@ -3543,6 +3543,24 @@ def avg_pool2d(input: Tensor,
return output
+def _get_trt_weight(weight: Tensor) -> Tuple[trt.Weights, bool]:
+ is_weight_constant = (weight.producer is not None
+ and weight.producer.type == trt.LayerType.CONSTANT)
+ if is_weight_constant:
+ ndarray = get_np_weight(default_trtnet(), weight.producer.name)
+ if ndarray is not None:
+ trt_weight = trt.Weights(np_dtype_to_trt(ndarray.dtype),
+ ndarray.ctypes.data,
+ int(np.prod(ndarray.shape)))
+ else:
+ weight.producer.__class__ = trt.IConstantLayer
+ trt_weight = weight.producer.weights
+ else:
+ trt_weight = trt.Weights()
+
+ return trt_weight, is_weight_constant
+
+
def conv1d(input: Tensor,
weight: Tensor,
bias: Optional[Tensor] = None,
@@ -3553,30 +3571,32 @@ def conv1d(input: Tensor,
noutput = weight.size()[0]
kernel_size = weight.size()[-2]
- is_weight_constant = (weight.producer is not None
- and weight.producer.type == trt.LayerType.CONSTANT)
- weight = weight.producer.weights if is_weight_constant else trt.Weights()
+ kernel_shape = trt.Dims([kernel_size, 1])
+
+ trt_weight, is_weight_constant = _get_trt_weight(weight)
+ weight_tensor = weight
if bias is not None:
- is_bias_constant = (bias.producer is not None
- and bias.producer.type == trt.LayerType.CONSTANT)
- bias = bias.producer.weights if is_bias_constant else trt.Weights()
+ bias_tensor = bias
+ trt_bias, is_bias_constant = _get_trt_weight(bias)
+ else:
+ bias_tensor = None
+ trt_bias = None
input_shuffled = stack([input], dim=input.ndim())
- kernel_size = trt.Dims([kernel_size, 1])
layer = default_trtnet().add_convolution_nd(input_shuffled.trt_tensor,
- noutput, kernel_size, weight,
- bias)
+ noutput, kernel_shape,
+ trt_weight, trt_bias)
layer.stride_nd = (stride, 2)
layer.padding_nd = (padding, 0)
layer.dilation_nd = (dilation, 2)
layer.num_groups = groups
if not is_weight_constant:
- layer.set_input(1, weight.trt_tensor)
- if bias is not None and not is_bias_constant:
- layer.set_input(2, bias.trt_tensor)
+ layer.set_input(1, weight_tensor.trt_tensor)
+ if bias_tensor is not None and not is_bias_constant:
+ layer.set_input(2, bias_tensor.trt_tensor)
output_2d = _create_tensor(layer.get_output(0), layer)
output_1d = squeeze(output_2d, dim=-1)
@@ -3602,18 +3622,21 @@ def conv2d(input: Tensor,
noutput = weight.size()[0]
kernel_size = (weight.size()[-2], weight.size()[-1])
+ kernel_shape = trt.Dims(list(kernel_size))
- is_weight_constant = (weight.producer is not None
- and weight.producer.type == trt.LayerType.CONSTANT)
- weight = weight.producer.weights if is_weight_constant else trt.Weights()
+ trt_weight, is_weight_constant = _get_trt_weight(weight)
+ weight_tensor = weight
if bias is not None:
- is_bias_constant = (bias.producer is not None
- and bias.producer.type == trt.LayerType.CONSTANT)
- bias = bias.producer.weights if is_bias_constant else trt.Weights()
+ bias_tensor = bias
+ trt_bias, is_bias_constant = _get_trt_weight(bias)
+ else:
+ bias_tensor = None
+ trt_bias = None
layer = default_trtnet().add_convolution_nd(input.trt_tensor, noutput,
- kernel_size, weight, bias)
+ kernel_shape, trt_weight,
+ trt_bias)
layer.stride_nd = stride
layer.padding_nd = padding
layer.dilation_nd = dilation
@@ -3625,9 +3648,9 @@ def conv2d(input: Tensor,
layer.post_padding = post_padding
if not is_weight_constant:
- layer.set_input(1, weight.trt_tensor)
- if bias is not None and not is_bias_constant:
- layer.set_input(2, bias.trt_tensor)
+ layer.set_input(1, weight_tensor.trt_tensor)
+ if bias_tensor is not None and not is_bias_constant:
+ layer.set_input(2, bias_tensor.trt_tensor)
output = _create_tensor(layer.get_output(0), layer)
@@ -3666,18 +3689,21 @@ def conv3d(input: Tensor,
noutput = weight.size()[0]
kernel_size = (weight.size()[-3], weight.size()[-2], weight.size()[-1])
+ kernel_shape = trt.Dims(list(kernel_size))
- is_weight_constant = (weight.producer is not None
- and weight.producer.type == trt.LayerType.CONSTANT)
- weight = weight.producer.weights if is_weight_constant else trt.Weights()
+ trt_weight, is_weight_constant = _get_trt_weight(weight)
+ weight_tensor = weight
if bias is not None:
- is_bias_constant = (bias.producer is not None
- and bias.producer.type == trt.LayerType.CONSTANT)
- bias = bias.producer.weights if is_bias_constant else trt.Weights()
+ bias_tensor = bias
+ trt_bias, is_bias_constant = _get_trt_weight(bias)
+ else:
+ bias_tensor = None
+ trt_bias = None
layer = default_trtnet().add_convolution_nd(input.trt_tensor, noutput,
- kernel_size, weight, bias)
+ kernel_shape, trt_weight,
+ trt_bias)
layer.stride_nd = stride
layer.padding_nd = padding
layer.dilation_nd = dilation
@@ -3685,9 +3711,9 @@ def conv3d(input: Tensor,
layer.dilation_nd = dilation
if not is_weight_constant:
- layer.set_input(1, weight.trt_tensor)
- if bias is not None and not is_bias_constant:
- layer.set_input(2, bias.trt_tensor)
+ layer.set_input(1, weight_tensor.trt_tensor)
+ if bias_tensor is not None and not is_bias_constant:
+ layer.set_input(2, bias_tensor.trt_tensor)
output = _create_tensor(layer.get_output(0), layer)
return output
@@ -3713,26 +3739,29 @@ def conv_transpose2d(input: Tensor,
noutput = weight.size()[1]
kernel_size = (weight.size()[-2], weight.size()[-1])
+ kernel_shape = trt.Dims(list(kernel_size))
- is_weight_constant = (weight.producer is not None
- and weight.producer.type == trt.LayerType.CONSTANT)
- weight = weight.producer.weights if is_weight_constant else trt.Weights()
+ trt_weight, is_weight_constant = _get_trt_weight(weight)
+ weight_tensor = weight
if bias is not None:
- is_bias_constant = (bias.producer is not None
- and bias.producer.type == trt.LayerType.CONSTANT)
- bias = bias.producer.weights if is_bias_constant else trt.Weights()
+ bias_tensor = bias
+ trt_bias, is_bias_constant = _get_trt_weight(bias)
+ else:
+ bias_tensor = None
+ trt_bias = None
layer = default_trtnet().add_deconvolution_nd(input.trt_tensor, noutput,
- kernel_size, weight, bias)
+ kernel_shape, trt_weight,
+ trt_bias)
layer.stride_nd = stride
layer.padding_nd = padding
layer.num_groups = groups
if not is_weight_constant:
- layer.set_input(1, weight.trt_tensor)
- if bias is not None and not is_bias_constant:
- layer.set_input(2, bias.trt_tensor)
+ layer.set_input(1, weight_tensor.trt_tensor)
+ if bias_tensor is not None and not is_bias_constant:
+ layer.set_input(2, bias_tensor.trt_tensor)
output = _create_tensor(layer.get_output(0), layer)
diff --git a/tensorrt_llm/parameter.py b/tensorrt_llm/parameter.py
index 7859eff9365..d740cbb0bdb 100644
--- a/tensorrt_llm/parameter.py
+++ b/tensorrt_llm/parameter.py
@@ -243,16 +243,23 @@ def set_value_or_dummy(self, v: Union[np.ndarray, torch.Tensor]):
self.value = v
- def set_name(self, name: str, network):
+ def set_name(self, name: str, network: Network):
self._name = name
if self.is_managed(network):
self._get_weights(network).name = name
return True
else:
- return network.trt_network.set_weights_name(
- self._get_weights(network), name)
-
- def _get_weights(self, network) -> trt.Weights | Tensor | None:
+ weights = self._get_weights(network)
+ # TensorRT bindings may return numpy array instead of trt.Weights
+ if isinstance(weights, np.ndarray):
+ trt_dtype = np_dtype_to_trt(
+ weights.dtype
+ ) if weights.dtype != np.object_ else self._dtype
+ trt_count = int(np.prod(weights.shape))
+ weights = trt.Weights(trt_dtype, weights.ctypes.data, trt_count)
+ return network.trt_network.set_weights_name(weights, name)
+
+ def _get_weights(self, network: Network) -> trt.Weights | Tensor | None:
tensor = network.get_parameter_tensor(self)
if self.is_managed(network):
return tensor