community[patch]: Invoke callback prior to yielding token (#17348)

**Description:** Invoke callback prior to yielding token in stream
method for Ollama.
**Issue:** [Callback for on_llm_new_token should be invoked before the
token is yielded by the model
#16913](https://github.com/langchain-ai/langchain/issues/16913)

Co-authored-by: Robby <h0rv@users.noreply.github.com>
This commit is contained in:
Robby 2024-02-12 22:22:55 -05:00 committed by GitHub
parent ab025507bc
commit e135dc70c3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -313,12 +313,12 @@ class ChatOllama(BaseChatModel, _OllamaCommon):
for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
if stream_resp:
chunk = _chat_stream_response_to_chat_generation_chunk(stream_resp)
yield chunk
if run_manager:
run_manager.on_llm_new_token(
chunk.text,
verbose=self.verbose,
)
yield chunk
except OllamaEndpointNotFoundError:
yield from self._legacy_stream(messages, stop, **kwargs)
@ -332,12 +332,12 @@ class ChatOllama(BaseChatModel, _OllamaCommon):
async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
if stream_resp:
chunk = _chat_stream_response_to_chat_generation_chunk(stream_resp)
yield chunk
if run_manager:
await run_manager.on_llm_new_token(
chunk.text,
verbose=self.verbose,
)
yield chunk
@deprecated("0.0.3", alternative="_stream")
def _legacy_stream(
@ -351,9 +351,9 @@ class ChatOllama(BaseChatModel, _OllamaCommon):
for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
if stream_resp:
chunk = _stream_response_to_chat_generation_chunk(stream_resp)
yield chunk
if run_manager:
run_manager.on_llm_new_token(
chunk.text,
verbose=self.verbose,
)
yield chunk