diff --git a/libs/community/langchain_community/chat_models/ollama.py b/libs/community/langchain_community/chat_models/ollama.py index 92b5afb52b..73a194a90f 100644 --- a/libs/community/langchain_community/chat_models/ollama.py +++ b/libs/community/langchain_community/chat_models/ollama.py @@ -313,12 +313,12 @@ class ChatOllama(BaseChatModel, _OllamaCommon): for stream_resp in self._create_chat_stream(messages, stop, **kwargs): if stream_resp: chunk = _chat_stream_response_to_chat_generation_chunk(stream_resp) - yield chunk if run_manager: run_manager.on_llm_new_token( chunk.text, verbose=self.verbose, ) + yield chunk except OllamaEndpointNotFoundError: yield from self._legacy_stream(messages, stop, **kwargs) @@ -332,12 +332,12 @@ class ChatOllama(BaseChatModel, _OllamaCommon): async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs): if stream_resp: chunk = _chat_stream_response_to_chat_generation_chunk(stream_resp) - yield chunk if run_manager: await run_manager.on_llm_new_token( chunk.text, verbose=self.verbose, ) + yield chunk @deprecated("0.0.3", alternative="_stream") def _legacy_stream( @@ -351,9 +351,9 @@ class ChatOllama(BaseChatModel, _OllamaCommon): for stream_resp in self._create_generate_stream(prompt, stop, **kwargs): if stream_resp: chunk = _stream_response_to_chat_generation_chunk(stream_resp) - yield chunk if run_manager: run_manager.on_llm_new_token( chunk.text, verbose=self.verbose, ) + yield chunk