make image inputs compatible with langchain_ollama (#24619)

2024-11-10 01:10:59 +00:00 · 2024-07-26 17:39:57 -07:00 · 2024-07-26 17:39:57 -07:00 · 152427eca1
commit 152427eca1
parent 0535d72927
2 changed files with 33 additions and 8 deletions
--- a/libs/partners/ollama/langchain_ollama/chat_models.py
+++ b/libs/partners/ollama/langchain_ollama/chat_models.py
@ -346,7 +346,7 @@ class ChatOllama(BaseChatModel):
    ) -> Sequence[Message]:
        ollama_messages: List = []
        for message in messages:
-            role = ""
+            role: Literal["user", "assistant", "system", "tool"]
            tool_call_id: Optional[str] = None
            tool_calls: Optional[List[Dict[str, Any]]] = None
            if isinstance(message, HumanMessage):
@ -383,11 +383,13 @@ class ChatOllama(BaseChatModel):
                        image_url = None
                        temp_image_url = content_part.get("image_url")
                        if isinstance(temp_image_url, str):
-                            image_url = content_part["image_url"]
-                        elif (
-                            isinstance(temp_image_url, dict) and "url" in temp_image_url
-                        ):
                            image_url = temp_image_url
+                        elif (
+                            isinstance(temp_image_url, dict)
+                            and "url" in temp_image_url
+                            and isinstance(temp_image_url["url"], str)
+                        ):
+                            image_url = temp_image_url["url"]
                        else:
                            raise ValueError(
                                "Only string image_url or dict with string 'url' "
@ -408,15 +410,16 @@ class ChatOllama(BaseChatModel):
                            "Must either have type 'text' or type 'image_url' "
                            "with a string 'image_url' field."
                        )
-            msg = {
+            # Should convert to ollama.Message once role includes tool, and tool_call_id is in Message # noqa: E501
+            msg: dict = {
                "role": role,
                "content": content,
                "images": images,
            }
+            if tool_calls:
+                msg["tool_calls"] = tool_calls  # type: ignore
            if tool_call_id:
                msg["tool_call_id"] = tool_call_id
-            if tool_calls:
-                msg["tool_calls"] = tool_calls
            ollama_messages.append(msg)

        return ollama_messages
--- a/libs/partners/ollama/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/ollama/tests/integration_tests/test_chat_models.py
@ -2,6 +2,8 @@

 from typing import Type

+import pytest
+from langchain_core.language_models import BaseChatModel
 from langchain_standard_tests.integration_tests import ChatModelIntegrationTests

 from langchain_ollama.chat_models import ChatOllama
@ -15,3 +17,23 @@ class TestChatOllama(ChatModelIntegrationTests):
    @property
    def chat_model_params(self) -> dict:
        return {"model": "llama3-groq-tool-use"}
+
+    @property
+    def supports_image_inputs(self) -> bool:
+        return True
+
+    @pytest.mark.xfail(
+        reason=(
+            "Fails with 'AssertionError'. Ollama does not support 'tool_choice' yet."
+        )
+    )
+    def test_structured_output(self, model: BaseChatModel) -> None:
+        super().test_structured_output(model)
+
+    @pytest.mark.xfail(
+        reason=(
+            "Fails with 'AssertionError'. Ollama does not support 'tool_choice' yet."
+        )
+    )
+    def test_structured_output_pydantic_2_v1(self, model: BaseChatModel) -> None:
+        super().test_structured_output_pydantic_2_v1(model)