community[patch]: Invoke on_llm_new_token callback before yielding chunk (#24938)

**Description**: Invoke on_llm_new_token callback before yielding chunk in streaming mode **Issue**: [#16913](https://github.com/langchain-ai/langchain/issues/16913)
2024-11-10 01:10:59 +00:00 · 2024-08-01 19:39:04 +03:00 · 2024-08-01 19:39:04 +03:00 · 2204d8cb7d
commit 2204d8cb7d
parent ff6274d32d
1 changed files with 1 additions and 1 deletions
--- a/libs/community/langchain_community/chat_models/mlx.py
+++ b/libs/community/langchain_community/chat_models/mlx.py
@ -186,9 +186,9 @@ class ChatMLX(BaseChatModel):
            # yield text, if any
            if text:
                chunk = ChatGenerationChunk(message=AIMessageChunk(content=text))
-                yield chunk
                if run_manager:
                    run_manager.on_llm_new_token(text, chunk=chunk)
+                yield chunk

            # break if stop sequence found
            if token == eos_token_id or (stop is not None and text in stop):