NVIDIA · yuanjingx87 · Apr 9, 2026 · Feb 23, 2026 · Apr 10, 2026
@@ -1,5 +1,6 @@
 <div align="center">
 
+
 TensorRT LLM
 ===========================
 <h4>TensorRT LLM provides users with an easy-to-use Python API to define Large Language Models (LLMs) and supports

@@ -76,7 +76,8 @@ COPY constraints.txt /tmp/constraints.txt
 RUN pip3 install --no-cache-dir -r /tmp/constraints.txt && rm /tmp/constraints.txt
 
 # Remove nbconvert to avoid https://github.com/advisories/GHSA-xm59-rqc7-hhvf in the base NGC PyTorch image.
-RUN pip3 uninstall -y nbconvert || true
+# Remove pillow to avoid https://github.com/advisories/GHSA-cfh3-3jmp-rvhc in the base NGC PyTorch image.
+RUN pip3 uninstall -y nbconvert pillow || true
 
 # Install UCX, NIXL, etcd
 # TODO: Combine these into the main install.sh script

@@ -33,7 +33,7 @@
                      fp16_array, fp32_array, get_sm_version, int32_array,
                      int64_array, np_dtype_to_trt, str_dtype_to_trt,
                      trt_dtype_to_np, trt_dtype_to_str)
-from .network import PluginInfo, set_np_weight, set_plugin_info
+from .network import PluginInfo, get_np_weight, set_np_weight, set_plugin_info
 from .plugin import TRT_LLM_PLUGIN_NAMESPACE, current_all_reduce_helper
 from .quantization import QuantMode
 
@@ -3543,6 +3543,24 @@ def avg_pool2d(input: Tensor,
     return output
 
 
+def _get_trt_weight(weight: Tensor) -> Tuple[trt.Weights, bool]:
+    is_weight_constant = (weight.producer is not None
+                          and weight.producer.type == trt.LayerType.CONSTANT)
+    if is_weight_constant:
+        ndarray = get_np_weight(default_trtnet(), weight.producer.name)
+        if ndarray is not None:
+            trt_weight = trt.Weights(np_dtype_to_trt(ndarray.dtype),
+                                     ndarray.ctypes.data,
+                                     int(np.prod(ndarray.shape)))
+        else:
+            weight.producer.__class__ = trt.IConstantLayer
+            trt_weight = weight.producer.weights
+    else:
+        trt_weight = trt.Weights()
+
+    return trt_weight, is_weight_constant
+
+
 def conv1d(input: Tensor,
            weight: Tensor,
            bias: Optional[Tensor] = None,
@@ -3553,30 +3571,32 @@ def conv1d(input: Tensor,
 
     noutput = weight.size()[0]
     kernel_size = weight.size()[-2]
-    is_weight_constant = (weight.producer is not None
-                          and weight.producer.type == trt.LayerType.CONSTANT)
-    weight = weight.producer.weights if is_weight_constant else trt.Weights()
+    kernel_shape = trt.Dims([kernel_size, 1])
+
+    trt_weight, is_weight_constant = _get_trt_weight(weight)
+    weight_tensor = weight
 
     if bias is not None:
-        is_bias_constant = (bias.producer is not None
-                            and bias.producer.type == trt.LayerType.CONSTANT)
-        bias = bias.producer.weights if is_bias_constant else trt.Weights()
+        bias_tensor = bias
+        trt_bias, is_bias_constant = _get_trt_weight(bias)
+    else:
+        bias_tensor = None
+        trt_bias = None
 
     input_shuffled = stack([input], dim=input.ndim())
-    kernel_size = trt.Dims([kernel_size, 1])
 
     layer = default_trtnet().add_convolution_nd(input_shuffled.trt_tensor,
-                                                noutput, kernel_size, weight,
-                                                bias)
+                                                noutput, kernel_shape,
+                                                trt_weight, trt_bias)
     layer.stride_nd = (stride, 2)
     layer.padding_nd = (padding, 0)
     layer.dilation_nd = (dilation, 2)
     layer.num_groups = groups
 
     if not is_weight_constant:
-        layer.set_input(1, weight.trt_tensor)
-    if bias is not None and not is_bias_constant:
-        layer.set_input(2, bias.trt_tensor)
+        layer.set_input(1, weight_tensor.trt_tensor)
+    if bias_tensor is not None and not is_bias_constant:
+        layer.set_input(2, bias_tensor.trt_tensor)
 
     output_2d = _create_tensor(layer.get_output(0), layer)
     output_1d = squeeze(output_2d, dim=-1)
@@ -3602,18 +3622,21 @@ def conv2d(input: Tensor,
 
     noutput = weight.size()[0]
     kernel_size = (weight.size()[-2], weight.size()[-1])
+    kernel_shape = trt.Dims(list(kernel_size))
 
-    is_weight_constant = (weight.producer is not None
-                          and weight.producer.type == trt.LayerType.CONSTANT)
-    weight = weight.producer.weights if is_weight_constant else trt.Weights()
+    trt_weight, is_weight_constant = _get_trt_weight(weight)
+    weight_tensor = weight
 
     if bias is not None:
-        is_bias_constant = (bias.producer is not None
-                            and bias.producer.type == trt.LayerType.CONSTANT)
-        bias = bias.producer.weights if is_bias_constant else trt.Weights()
+        bias_tensor = bias
+        trt_bias, is_bias_constant = _get_trt_weight(bias)
+    else:
+        bias_tensor = None
+        trt_bias = None
 
     layer = default_trtnet().add_convolution_nd(input.trt_tensor, noutput,
-                                                kernel_size, weight, bias)
+                                                kernel_shape, trt_weight,
+                                                trt_bias)
     layer.stride_nd = stride
     layer.padding_nd = padding
     layer.dilation_nd = dilation
@@ -3625,9 +3648,9 @@ def conv2d(input: Tensor,
         layer.post_padding = post_padding
 
     if not is_weight_constant:
-        layer.set_input(1, weight.trt_tensor)
-    if bias is not None and not is_bias_constant:
-        layer.set_input(2, bias.trt_tensor)
+        layer.set_input(1, weight_tensor.trt_tensor)
+    if bias_tensor is not None and not is_bias_constant:
+        layer.set_input(2, bias_tensor.trt_tensor)
 
     output = _create_tensor(layer.get_output(0), layer)
 
@@ -3666,28 +3689,31 @@ def conv3d(input: Tensor,
 
     noutput = weight.size()[0]
     kernel_size = (weight.size()[-3], weight.size()[-2], weight.size()[-1])
+    kernel_shape = trt.Dims(list(kernel_size))
 
-    is_weight_constant = (weight.producer is not None
-                          and weight.producer.type == trt.LayerType.CONSTANT)
-    weight = weight.producer.weights if is_weight_constant else trt.Weights()
+    trt_weight, is_weight_constant = _get_trt_weight(weight)
+    weight_tensor = weight
 
     if bias is not None:
-        is_bias_constant = (bias.producer is not None
-                            and bias.producer.type == trt.LayerType.CONSTANT)
-        bias = bias.producer.weights if is_bias_constant else trt.Weights()
+        bias_tensor = bias
+        trt_bias, is_bias_constant = _get_trt_weight(bias)
+    else:
+        bias_tensor = None
+        trt_bias = None
 
     layer = default_trtnet().add_convolution_nd(input.trt_tensor, noutput,
-                                                kernel_size, weight, bias)
+                                                kernel_shape, trt_weight,
+                                                trt_bias)
     layer.stride_nd = stride
     layer.padding_nd = padding
     layer.dilation_nd = dilation
     layer.num_groups = groups
     layer.dilation_nd = dilation
 
     if not is_weight_constant:
-        layer.set_input(1, weight.trt_tensor)
-    if bias is not None and not is_bias_constant:
-        layer.set_input(2, bias.trt_tensor)
+        layer.set_input(1, weight_tensor.trt_tensor)
+    if bias_tensor is not None and not is_bias_constant:
+        layer.set_input(2, bias_tensor.trt_tensor)
 
     output = _create_tensor(layer.get_output(0), layer)
     return output
@@ -3713,26 +3739,29 @@ def conv_transpose2d(input: Tensor,
 
     noutput = weight.size()[1]
     kernel_size = (weight.size()[-2], weight.size()[-1])
+    kernel_shape = trt.Dims(list(kernel_size))
 
-    is_weight_constant = (weight.producer is not None
-                          and weight.producer.type == trt.LayerType.CONSTANT)
-    weight = weight.producer.weights if is_weight_constant else trt.Weights()
+    trt_weight, is_weight_constant = _get_trt_weight(weight)
+    weight_tensor = weight
 
     if bias is not None:
-        is_bias_constant = (bias.producer is not None
-                            and bias.producer.type == trt.LayerType.CONSTANT)
-        bias = bias.producer.weights if is_bias_constant else trt.Weights()
+        bias_tensor = bias
+        trt_bias, is_bias_constant = _get_trt_weight(bias)
+    else:
+        bias_tensor = None
+        trt_bias = None
 
     layer = default_trtnet().add_deconvolution_nd(input.trt_tensor, noutput,
-                                                  kernel_size, weight, bias)
+                                                  kernel_shape, trt_weight,
+                                                  trt_bias)
     layer.stride_nd = stride
     layer.padding_nd = padding
     layer.num_groups = groups
 
     if not is_weight_constant:
-        layer.set_input(1, weight.trt_tensor)
-    if bias is not None and not is_bias_constant:
-        layer.set_input(2, bias.trt_tensor)
+        layer.set_input(1, weight_tensor.trt_tensor)
+    if bias_tensor is not None and not is_bias_constant:
+        layer.set_input(2, bias_tensor.trt_tensor)
 
     output = _create_tensor(layer.get_output(0), layer)
 

@@ -243,16 +243,23 @@ def set_value_or_dummy(self, v: Union[np.ndarray, torch.Tensor]):
 
         self.value = v
 
-    def set_name(self, name: str, network):
+    def set_name(self, name: str, network: Network):
         self._name = name
         if self.is_managed(network):
             self._get_weights(network).name = name
             return True
         else:
-            return network.trt_network.set_weights_name(
-                self._get_weights(network), name)
-
-    def _get_weights(self, network) -> trt.Weights | Tensor | None:
+            weights = self._get_weights(network)
+            # TensorRT bindings may return numpy array instead of trt.Weights
+            if isinstance(weights, np.ndarray):
+                trt_dtype = np_dtype_to_trt(
+                    weights.dtype
+                ) if weights.dtype != np.object_ else self._dtype
+                trt_count = int(np.prod(weights.shape))
+                weights = trt.Weights(trt_dtype, weights.ctypes.data, trt_count)
+            return network.trt_network.set_weights_name(weights, name)
+
+    def _get_weights(self, network: Network) -> trt.Weights | Tensor | None:
         tensor = network.get_parameter_tensor(self)
         if self.is_managed(network):
             return tensor