diff --git a/libs/partners/nvidia-trt/langchain_nvidia_trt/llms.py b/libs/partners/nvidia-trt/langchain_nvidia_trt/llms.py index 36e1e6e5ca..0ea1fca1df 100644 --- a/libs/partners/nvidia-trt/langchain_nvidia_trt/llms.py +++ b/libs/partners/nvidia-trt/langchain_nvidia_trt/llms.py @@ -199,10 +199,13 @@ class TritonTensorRTLLM(BaseLLM): result_queue = self._invoke_triton(self.model_name, inputs, outputs, stop) result_str = "" - for token in result_queue: - result_str += token - - self.client.stop_stream() + try: + for token in result_queue: + if isinstance(token, Exception): + raise token + result_str += token + finally: + self.client.stop_stream() return result_str