diff --git a/tensorrt_llm/_torch/autotuner.py b/tensorrt_llm/_torch/autotuner.py index 691ecb45911..c71b8583e8d 100644 --- a/tensorrt_llm/_torch/autotuner.py +++ b/tensorrt_llm/_torch/autotuner.py @@ -511,6 +511,11 @@ def _deserialize_cache_from_json( cache = {} cache_data = serializable_cache["cache_data"] + def lists_to_tuples(obj): + if isinstance(obj, list): + return tuple(lists_to_tuples(x) for x in obj) + return obj + for key_str, value in cache_data.items(): # Reconstruct the tuple key safely try: @@ -521,7 +526,7 @@ def _deserialize_cache_from_json( continue runner_id = value["runner_id"] - tactic = value["tactic"] + tactic = lists_to_tuples(value["tactic"]) min_time = value["min_time"] cache[key] = (runner_id, tactic, min_time) diff --git a/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py b/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py index d40c1fd5844..89607193b18 100644 --- a/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py +++ b/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py @@ -486,10 +486,10 @@ def __init__( self.cublaslt_runner = CublasLtFP4GemmRunner.runner_dict[instance_key] def unique_id(self): - return hash(( + return ( self.to_userbuffers, self.output_dtype, - )) + ) def get_valid_tactics(self, inputs: List[torch.Tensor], profile: OptimizationProfile, **kwargs) -> List[int]: