community[patch]: Invoke callback prior to yielding token (#17348)

**Description:** Invoke callback prior to yielding token in stream method for Ollama. **Issue:** [Callback for on_llm_new_token should be invoked before the token is yielded by the model #16913](https://github.com/langchain-ai/langchain/issues/16913) Co-authored-by: Robby <h0rv@users.noreply.github.com>
2024-11-18 09:25:54 +00:00 · 2024-02-12 22:22:55 -05:00 · 2024-02-12 22:22:55 -05:00 · e135dc70c3
commit e135dc70c3
parent ab025507bc
1 changed files with 3 additions and 3 deletions
--- a/libs/community/langchain_community/chat_models/ollama.py
+++ b/libs/community/langchain_community/chat_models/ollama.py
@ -313,12 +313,12 @@ class ChatOllama(BaseChatModel, _OllamaCommon):
            for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
                if stream_resp:
                    chunk = _chat_stream_response_to_chat_generation_chunk(stream_resp)
-                    yield chunk
                    if run_manager:
                        run_manager.on_llm_new_token(
                            chunk.text,
                            verbose=self.verbose,
                        )
+                    yield chunk
        except OllamaEndpointNotFoundError:
            yield from self._legacy_stream(messages, stop, **kwargs)

@ -332,12 +332,12 @@ class ChatOllama(BaseChatModel, _OllamaCommon):
        async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
            if stream_resp:
                chunk = _chat_stream_response_to_chat_generation_chunk(stream_resp)
-                yield chunk
                if run_manager:
                    await run_manager.on_llm_new_token(
                        chunk.text,
                        verbose=self.verbose,
                    )
+                yield chunk

    @deprecated("0.0.3", alternative="_stream")
    def _legacy_stream(
@ -351,9 +351,9 @@ class ChatOllama(BaseChatModel, _OllamaCommon):
        for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
            if stream_resp:
                chunk = _stream_response_to_chat_generation_chunk(stream_resp)
-                yield chunk
                if run_manager:
                    run_manager.on_llm_new_token(
                        chunk.text,
                        verbose=self.verbose,
                    )
+                yield chunk