mirror of
https://github.com/hwchase17/langchain
synced 2024-11-18 09:25:54 +00:00
nvidia-trt[patch]: Invoke callback prior to yielding token (#18446)
## PR title nvidia-trt[patch]: Invoke callback prior to yielding ## PR message - Description: Invoke on_llm_new_token callback prior to yielding token in _stream method. - Issue: https://github.com/langchain-ai/langchain/issues/16913 - Dependencies: None
This commit is contained in:
parent
275877980e
commit
a63cee04ac
@ -176,9 +176,9 @@ class TritonTensorRTLLM(BaseLLM):
|
||||
result_queue = self._invoke_triton(self.model_name, inputs, outputs, stop_words)
|
||||
|
||||
for token in result_queue:
|
||||
yield GenerationChunk(text=token)
|
||||
if run_manager:
|
||||
run_manager.on_llm_new_token(token)
|
||||
yield GenerationChunk(text=token)
|
||||
|
||||
self.client.stop_stream()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user