From ecd72d26cfb5b07b3b7dcce5395080460a24b7e0 Mon Sep 17 00:00:00 2001 From: Brad Erickson <195061+13rac1@users.noreply.github.com> Date: Thu, 22 Feb 2024 08:59:55 -0800 Subject: [PATCH] community: Bugfix - correct Ollama API path to avoid HTTP 307 (#17895) Sets the correct /api/generate path, without ending /, to reduce HTTP requests. Reference: https://github.com/ollama/ollama/blob/efe040f8/docs/api.md#generate-request-streaming Before: DEBUG: Starting new HTTP connection (1): localhost:11434 DEBUG: http://localhost:11434 "POST /api/generate/ HTTP/1.1" 307 0 DEBUG: http://localhost:11434 "POST /api/generate HTTP/1.1" 200 None After: DEBUG: Starting new HTTP connection (1): localhost:11434 DEBUG: http://localhost:11434 "POST /api/generate HTTP/1.1" 200 None --- libs/community/langchain_community/llms/ollama.py | 6 +++--- libs/community/tests/unit_tests/llms/test_ollama.py | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/libs/community/langchain_community/llms/ollama.py b/libs/community/langchain_community/llms/ollama.py index c6aba99acc..963d28576f 100644 --- a/libs/community/langchain_community/llms/ollama.py +++ b/libs/community/langchain_community/llms/ollama.py @@ -65,7 +65,7 @@ class _OllamaCommon(BaseLanguageModel): CPU cores your system has (as opposed to the logical number of cores).""" num_predict: Optional[int] = None - """Maximum number of tokens to predict when generating text. + """Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)""" repeat_last_n: Optional[int] = None @@ -159,7 +159,7 @@ class _OllamaCommon(BaseLanguageModel): yield from self._create_stream( payload=payload, stop=stop, - api_url=f"{self.base_url}/api/generate/", + api_url=f"{self.base_url}/api/generate", **kwargs, ) @@ -174,7 +174,7 @@ class _OllamaCommon(BaseLanguageModel): async for item in self._acreate_stream( payload=payload, stop=stop, - api_url=f"{self.base_url}/api/generate/", + api_url=f"{self.base_url}/api/generate", **kwargs, ): yield item diff --git a/libs/community/tests/unit_tests/llms/test_ollama.py b/libs/community/tests/unit_tests/llms/test_ollama.py index 6dcb0bd38e..3b1798fd2e 100644 --- a/libs/community/tests/unit_tests/llms/test_ollama.py +++ b/libs/community/tests/unit_tests/llms/test_ollama.py @@ -32,7 +32,7 @@ def test_pass_headers_if_provided(monkeypatch: MonkeyPatch) -> None: ) def mock_post(url, headers, json, stream, timeout): # type: ignore[no-untyped-def] - assert url == "https://ollama-hostname:8000/api/generate/" + assert url == "https://ollama-hostname:8000/api/generate" assert headers == { "Content-Type": "application/json", "Authentication": "Bearer TEST-TOKEN-VALUE", @@ -53,7 +53,7 @@ def test_handle_if_headers_not_provided(monkeypatch: MonkeyPatch) -> None: llm = Ollama(base_url="https://ollama-hostname:8000", model="foo", timeout=300) def mock_post(url, headers, json, stream, timeout): # type: ignore[no-untyped-def] - assert url == "https://ollama-hostname:8000/api/generate/" + assert url == "https://ollama-hostname:8000/api/generate" assert headers == { "Content-Type": "application/json", } @@ -73,7 +73,7 @@ def test_handle_kwargs_top_level_parameters(monkeypatch: MonkeyPatch) -> None: llm = Ollama(base_url="https://ollama-hostname:8000", model="foo", timeout=300) def mock_post(url, headers, json, stream, timeout): # type: ignore[no-untyped-def] - assert url == "https://ollama-hostname:8000/api/generate/" + assert url == "https://ollama-hostname:8000/api/generate" assert headers == { "Content-Type": "application/json", } @@ -119,7 +119,7 @@ def test_handle_kwargs_with_unknown_param(monkeypatch: MonkeyPatch) -> None: llm = Ollama(base_url="https://ollama-hostname:8000", model="foo", timeout=300) def mock_post(url, headers, json, stream, timeout): # type: ignore[no-untyped-def] - assert url == "https://ollama-hostname:8000/api/generate/" + assert url == "https://ollama-hostname:8000/api/generate" assert headers == { "Content-Type": "application/json", } @@ -166,7 +166,7 @@ def test_handle_kwargs_with_options(monkeypatch: MonkeyPatch) -> None: llm = Ollama(base_url="https://ollama-hostname:8000", model="foo", timeout=300) def mock_post(url, headers, json, stream, timeout): # type: ignore[no-untyped-def] - assert url == "https://ollama-hostname:8000/api/generate/" + assert url == "https://ollama-hostname:8000/api/generate" assert headers == { "Content-Type": "application/json", }