Skip to content
Closed

debug #12880

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
<div align="center">


TensorRT LLM
===========================
<h4>TensorRT LLM provides users with an easy-to-use Python API to define Large Language Models (LLMs) and supports
Expand Down
3 changes: 2 additions & 1 deletion docker/Dockerfile.multi
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ COPY constraints.txt /tmp/constraints.txt
RUN pip3 install --no-cache-dir -r /tmp/constraints.txt && rm /tmp/constraints.txt

# Remove nbconvert to avoid https://github.com/advisories/GHSA-xm59-rqc7-hhvf in the base NGC PyTorch image.
RUN pip3 uninstall -y nbconvert || true
# Remove pillow to avoid https://github.com/advisories/GHSA-cfh3-3jmp-rvhc in the base NGC PyTorch image.
RUN pip3 uninstall -y nbconvert pillow || true

# Install UCX, NIXL, etcd
# TODO: Combine these into the main install.sh script
Expand Down
115 changes: 72 additions & 43 deletions tensorrt_llm/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
fp16_array, fp32_array, get_sm_version, int32_array,
int64_array, np_dtype_to_trt, str_dtype_to_trt,
trt_dtype_to_np, trt_dtype_to_str)
from .network import PluginInfo, set_np_weight, set_plugin_info
from .network import PluginInfo, get_np_weight, set_np_weight, set_plugin_info
from .plugin import TRT_LLM_PLUGIN_NAMESPACE, current_all_reduce_helper
from .quantization import QuantMode

Expand Down Expand Up @@ -3543,6 +3543,24 @@ def avg_pool2d(input: Tensor,
return output


def _get_trt_weight(weight: Tensor) -> Tuple[trt.Weights, bool]:
is_weight_constant = (weight.producer is not None
and weight.producer.type == trt.LayerType.CONSTANT)
if is_weight_constant:
ndarray = get_np_weight(default_trtnet(), weight.producer.name)
if ndarray is not None:
trt_weight = trt.Weights(np_dtype_to_trt(ndarray.dtype),
ndarray.ctypes.data,
int(np.prod(ndarray.shape)))
else:
weight.producer.__class__ = trt.IConstantLayer
trt_weight = weight.producer.weights
else:
trt_weight = trt.Weights()

return trt_weight, is_weight_constant


def conv1d(input: Tensor,
weight: Tensor,
bias: Optional[Tensor] = None,
Expand All @@ -3553,30 +3571,32 @@ def conv1d(input: Tensor,

noutput = weight.size()[0]
kernel_size = weight.size()[-2]
is_weight_constant = (weight.producer is not None
and weight.producer.type == trt.LayerType.CONSTANT)
weight = weight.producer.weights if is_weight_constant else trt.Weights()
kernel_shape = trt.Dims([kernel_size, 1])

trt_weight, is_weight_constant = _get_trt_weight(weight)
weight_tensor = weight

if bias is not None:
is_bias_constant = (bias.producer is not None
and bias.producer.type == trt.LayerType.CONSTANT)
bias = bias.producer.weights if is_bias_constant else trt.Weights()
bias_tensor = bias
trt_bias, is_bias_constant = _get_trt_weight(bias)
else:
bias_tensor = None
trt_bias = None

input_shuffled = stack([input], dim=input.ndim())
kernel_size = trt.Dims([kernel_size, 1])

layer = default_trtnet().add_convolution_nd(input_shuffled.trt_tensor,
noutput, kernel_size, weight,
bias)
noutput, kernel_shape,
trt_weight, trt_bias)
layer.stride_nd = (stride, 2)
layer.padding_nd = (padding, 0)
layer.dilation_nd = (dilation, 2)
layer.num_groups = groups

if not is_weight_constant:
layer.set_input(1, weight.trt_tensor)
if bias is not None and not is_bias_constant:
layer.set_input(2, bias.trt_tensor)
layer.set_input(1, weight_tensor.trt_tensor)
if bias_tensor is not None and not is_bias_constant:
layer.set_input(2, bias_tensor.trt_tensor)

output_2d = _create_tensor(layer.get_output(0), layer)
output_1d = squeeze(output_2d, dim=-1)
Expand All @@ -3602,18 +3622,21 @@ def conv2d(input: Tensor,

noutput = weight.size()[0]
kernel_size = (weight.size()[-2], weight.size()[-1])
kernel_shape = trt.Dims(list(kernel_size))

is_weight_constant = (weight.producer is not None
and weight.producer.type == trt.LayerType.CONSTANT)
weight = weight.producer.weights if is_weight_constant else trt.Weights()
trt_weight, is_weight_constant = _get_trt_weight(weight)
weight_tensor = weight

if bias is not None:
is_bias_constant = (bias.producer is not None
and bias.producer.type == trt.LayerType.CONSTANT)
bias = bias.producer.weights if is_bias_constant else trt.Weights()
bias_tensor = bias
trt_bias, is_bias_constant = _get_trt_weight(bias)
else:
bias_tensor = None
trt_bias = None

layer = default_trtnet().add_convolution_nd(input.trt_tensor, noutput,
kernel_size, weight, bias)
kernel_shape, trt_weight,
trt_bias)
layer.stride_nd = stride
layer.padding_nd = padding
layer.dilation_nd = dilation
Expand All @@ -3625,9 +3648,9 @@ def conv2d(input: Tensor,
layer.post_padding = post_padding

if not is_weight_constant:
layer.set_input(1, weight.trt_tensor)
if bias is not None and not is_bias_constant:
layer.set_input(2, bias.trt_tensor)
layer.set_input(1, weight_tensor.trt_tensor)
if bias_tensor is not None and not is_bias_constant:
layer.set_input(2, bias_tensor.trt_tensor)

output = _create_tensor(layer.get_output(0), layer)

Expand Down Expand Up @@ -3666,28 +3689,31 @@ def conv3d(input: Tensor,

noutput = weight.size()[0]
kernel_size = (weight.size()[-3], weight.size()[-2], weight.size()[-1])
kernel_shape = trt.Dims(list(kernel_size))

is_weight_constant = (weight.producer is not None
and weight.producer.type == trt.LayerType.CONSTANT)
weight = weight.producer.weights if is_weight_constant else trt.Weights()
trt_weight, is_weight_constant = _get_trt_weight(weight)
weight_tensor = weight

if bias is not None:
is_bias_constant = (bias.producer is not None
and bias.producer.type == trt.LayerType.CONSTANT)
bias = bias.producer.weights if is_bias_constant else trt.Weights()
bias_tensor = bias
trt_bias, is_bias_constant = _get_trt_weight(bias)
else:
bias_tensor = None
trt_bias = None

layer = default_trtnet().add_convolution_nd(input.trt_tensor, noutput,
kernel_size, weight, bias)
kernel_shape, trt_weight,
trt_bias)
layer.stride_nd = stride
layer.padding_nd = padding
layer.dilation_nd = dilation
layer.num_groups = groups
layer.dilation_nd = dilation

if not is_weight_constant:
layer.set_input(1, weight.trt_tensor)
if bias is not None and not is_bias_constant:
layer.set_input(2, bias.trt_tensor)
layer.set_input(1, weight_tensor.trt_tensor)
if bias_tensor is not None and not is_bias_constant:
layer.set_input(2, bias_tensor.trt_tensor)

output = _create_tensor(layer.get_output(0), layer)
return output
Expand All @@ -3713,26 +3739,29 @@ def conv_transpose2d(input: Tensor,

noutput = weight.size()[1]
kernel_size = (weight.size()[-2], weight.size()[-1])
kernel_shape = trt.Dims(list(kernel_size))

is_weight_constant = (weight.producer is not None
and weight.producer.type == trt.LayerType.CONSTANT)
weight = weight.producer.weights if is_weight_constant else trt.Weights()
trt_weight, is_weight_constant = _get_trt_weight(weight)
weight_tensor = weight

if bias is not None:
is_bias_constant = (bias.producer is not None
and bias.producer.type == trt.LayerType.CONSTANT)
bias = bias.producer.weights if is_bias_constant else trt.Weights()
bias_tensor = bias
trt_bias, is_bias_constant = _get_trt_weight(bias)
else:
bias_tensor = None
trt_bias = None

layer = default_trtnet().add_deconvolution_nd(input.trt_tensor, noutput,
kernel_size, weight, bias)
kernel_shape, trt_weight,
trt_bias)
layer.stride_nd = stride
layer.padding_nd = padding
layer.num_groups = groups

if not is_weight_constant:
layer.set_input(1, weight.trt_tensor)
if bias is not None and not is_bias_constant:
layer.set_input(2, bias.trt_tensor)
layer.set_input(1, weight_tensor.trt_tensor)
if bias_tensor is not None and not is_bias_constant:
layer.set_input(2, bias_tensor.trt_tensor)

output = _create_tensor(layer.get_output(0), layer)

Expand Down
17 changes: 12 additions & 5 deletions tensorrt_llm/parameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,16 +243,23 @@ def set_value_or_dummy(self, v: Union[np.ndarray, torch.Tensor]):

self.value = v

def set_name(self, name: str, network):
def set_name(self, name: str, network: Network):
self._name = name
if self.is_managed(network):
self._get_weights(network).name = name
return True
else:
return network.trt_network.set_weights_name(
self._get_weights(network), name)

def _get_weights(self, network) -> trt.Weights | Tensor | None:
weights = self._get_weights(network)
# TensorRT bindings may return numpy array instead of trt.Weights
if isinstance(weights, np.ndarray):
trt_dtype = np_dtype_to_trt(
weights.dtype
) if weights.dtype != np.object_ else self._dtype
trt_count = int(np.prod(weights.shape))
weights = trt.Weights(trt_dtype, weights.ctypes.data, trt_count)
return network.trt_network.set_weights_name(weights, name)

def _get_weights(self, network: Network) -> trt.Weights | Tensor | None:
tensor = network.get_parameter_tensor(self)
if self.is_managed(network):
return tensor
Expand Down
Loading