langchain/libs/community/langchain_community/chat_models/ollama.py

import json
from typing import Any, Iterator, List, Optional

from langchain_core.callbacks import (
    CallbackManagerForLLMRun,
)
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import (
    AIMessage,
    AIMessageChunk,
    BaseMessage,
    ChatMessage,
    HumanMessage,
    SystemMessage,
)
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult

from langchain_community.llms.ollama import _OllamaCommon


def _stream_response_to_chat_generation_chunk(
    stream_response: str,
) -> ChatGenerationChunk:
    """Convert a stream response to a generation chunk."""
    parsed_response = json.loads(stream_response)
    generation_info = parsed_response if parsed_response.get("done") is True else None
    return ChatGenerationChunk(
        message=AIMessageChunk(content=parsed_response.get("response", "")),
        generation_info=generation_info,
    )


class ChatOllama(BaseChatModel, _OllamaCommon):
    """Ollama locally runs large language models.

    To use, follow the instructions at https://ollama.ai/.

    Example:
        .. code-block:: python

            from langchain_community.chat_models import ChatOllama
            ollama = ChatOllama(model="llama2")
    """

    @property
    def _llm_type(self) -> str:
        """Return type of chat model."""
        return "ollama-chat"

    @classmethod
    def is_lc_serializable(cls) -> bool:
        """Return whether this model can be serialized by Langchain."""
        return False

    def _format_message_as_text(self, message: BaseMessage) -> str:
        if isinstance(message, ChatMessage):
            message_text = f"\n\n{message.role.capitalize()}: {message.content}"
        elif isinstance(message, HumanMessage):
            message_text = f"[INST] {message.content} [/INST]"
        elif isinstance(message, AIMessage):
            message_text = f"{message.content}"
        elif isinstance(message, SystemMessage):
            message_text = f"<<SYS>> {message.content} <</SYS>>"
        else:
            raise ValueError(f"Got unknown type {message}")
        return message_text

    def _format_messages_as_text(self, messages: List[BaseMessage]) -> str:
        return "\n".join(
            [self._format_message_as_text(message) for message in messages]
        )

    def _generate(
        self,
        messages: List[BaseMessage],
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> ChatResult:
        """Call out to Ollama's generate endpoint.

        Args:
            messages: The list of base messages to pass into the model.
            stop: Optional list of stop words to use when generating.

        Returns:
            Chat generations from the model

        Example:
            .. code-block:: python

                response = ollama([
                    HumanMessage(content="Tell me about the history of AI")
                ])
        """

        prompt = self._format_messages_as_text(messages)
        final_chunk = super()._stream_with_aggregation(
            prompt, stop=stop, run_manager=run_manager, verbose=self.verbose, **kwargs
        )
        chat_generation = ChatGeneration(
            message=AIMessage(content=final_chunk.text),
            generation_info=final_chunk.generation_info,
        )
        return ChatResult(generations=[chat_generation])

    def _stream(
        self,
        messages: List[BaseMessage],
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> Iterator[ChatGenerationChunk]:
        prompt = self._format_messages_as_text(messages)
        for stream_resp in self._create_stream(prompt, stop, **kwargs):
            if stream_resp:
                chunk = _stream_response_to_chat_generation_chunk(stream_resp)
                yield chunk
                if run_manager:
                    run_manager.on_llm_new_token(
                        chunk.text,
                        verbose=self.verbose,
                    )
community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463) Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes 10 months ago			`import json`
			`from typing import Any, Iterator, List, Optional`

			`from langchain_core.callbacks import (`
			`CallbackManagerForLLMRun,`
			`)`
			`from langchain_core.language_models.chat_models import BaseChatModel`
			`from langchain_core.messages import (`
			`AIMessage,`
			`AIMessageChunk,`
			`BaseMessage,`
			`ChatMessage,`
			`HumanMessage,`
			`SystemMessage,`
			`)`
			`from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult`

			`from langchain_community.llms.ollama import _OllamaCommon`


			`def _stream_response_to_chat_generation_chunk(`
			`stream_response: str,`
			`) -> ChatGenerationChunk:`
			`"""Convert a stream response to a generation chunk."""`
			`parsed_response = json.loads(stream_response)`
			`generation_info = parsed_response if parsed_response.get("done") is True else None`
			`return ChatGenerationChunk(`
			`message=AIMessageChunk(content=parsed_response.get("response", "")),`
			`generation_info=generation_info,`
			`)`


			`class ChatOllama(BaseChatModel, _OllamaCommon):`
			`"""Ollama locally runs large language models.`

			`To use, follow the instructions at https://ollama.ai/.`

			`Example:`
			`.. code-block:: python`

			`from langchain_community.chat_models import ChatOllama`
			`ollama = ChatOllama(model="llama2")`
			`"""`

			`@property`
			`def _llm_type(self) -> str:`
			`"""Return type of chat model."""`
			`return "ollama-chat"`

			`@classmethod`
			`def is_lc_serializable(cls) -> bool:`
			`"""Return whether this model can be serialized by Langchain."""`
			`return False`

			`def _format_message_as_text(self, message: BaseMessage) -> str:`
			`if isinstance(message, ChatMessage):`
			`message_text = f"\n\n{message.role.capitalize()}: {message.content}"`
			`elif isinstance(message, HumanMessage):`
			`message_text = f"[INST] {message.content} [/INST]"`
			`elif isinstance(message, AIMessage):`
			`message_text = f"{message.content}"`
			`elif isinstance(message, SystemMessage):`
			`message_text = f"<<SYS>> {message.content} <</SYS>>"`
			`else:`
			`raise ValueError(f"Got unknown type {message}")`
			`return message_text`

			`def _format_messages_as_text(self, messages: List[BaseMessage]) -> str:`
			`return "\n".join(`
			`[self._format_message_as_text(message) for message in messages]`
			`)`

			`def _generate(`
			`self,`
			`messages: List[BaseMessage],`
			`stop: Optional[List[str]] = None,`
			`run_manager: Optional[CallbackManagerForLLMRun] = None,`
			`**kwargs: Any,`
			`) -> ChatResult:`
			`"""Call out to Ollama's generate endpoint.`

			`Args:`
			`messages: The list of base messages to pass into the model.`
			`stop: Optional list of stop words to use when generating.`

			`Returns:`
			`Chat generations from the model`

			`Example:`
			`.. code-block:: python`

			`response = ollama([`
			`HumanMessage(content="Tell me about the history of AI")`
			`])`
			`"""`

			`prompt = self._format_messages_as_text(messages)`
			`final_chunk = super()._stream_with_aggregation(`
			`prompt, stop=stop, run_manager=run_manager, verbose=self.verbose, **kwargs`
			`)`
			`chat_generation = ChatGeneration(`
			`message=AIMessage(content=final_chunk.text),`
			`generation_info=final_chunk.generation_info,`
			`)`
			`return ChatResult(generations=[chat_generation])`

			`def _stream(`
			`self,`
			`messages: List[BaseMessage],`
			`stop: Optional[List[str]] = None,`
			`run_manager: Optional[CallbackManagerForLLMRun] = None,`
			`**kwargs: Any,`
			`) -> Iterator[ChatGenerationChunk]:`
			`prompt = self._format_messages_as_text(messages)`
			`for stream_resp in self._create_stream(prompt, stop, **kwargs):`
			`if stream_resp:`
			`chunk = _stream_response_to_chat_generation_chunk(stream_resp)`
			`yield chunk`
			`if run_manager:`
			`run_manager.on_llm_new_token(`
			`chunk.text,`
			`verbose=self.verbose,`
			`)`