diff --git a/libs/community/langchain_community/llms/ollama.py b/libs/community/langchain_community/llms/ollama.py index db8d661704..2d12dd1322 100644 --- a/libs/community/langchain_community/llms/ollama.py +++ b/libs/community/langchain_community/llms/ollama.py @@ -64,6 +64,10 @@ class _OllamaCommon(BaseLanguageModel): It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores).""" + num_predict: Optional[int] = None + """Maximum number of tokens to predict when generating text. + (Default: 128, -1 = infinite generation, -2 = fill context)""" + repeat_last_n: Optional[int] = None """Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)""" @@ -126,6 +130,7 @@ class _OllamaCommon(BaseLanguageModel): "num_ctx": self.num_ctx, "num_gpu": self.num_gpu, "num_thread": self.num_thread, + "num_predict": self.num_predict, "repeat_last_n": self.repeat_last_n, "repeat_penalty": self.repeat_penalty, "temperature": self.temperature, diff --git a/libs/community/tests/unit_tests/llms/test_ollama.py b/libs/community/tests/unit_tests/llms/test_ollama.py index bf2229b4fc..63a323eb35 100644 --- a/libs/community/tests/unit_tests/llms/test_ollama.py +++ b/libs/community/tests/unit_tests/llms/test_ollama.py @@ -88,6 +88,7 @@ def test_handle_kwargs_top_level_parameters(monkeypatch: MonkeyPatch) -> None: "num_ctx": None, "num_gpu": None, "num_thread": None, + "num_predict": None, "repeat_last_n": None, "repeat_penalty": None, "stop": [], @@ -133,6 +134,7 @@ def test_handle_kwargs_with_unknown_param(monkeypatch: MonkeyPatch) -> None: "num_ctx": None, "num_gpu": None, "num_thread": None, + "num_predict": None, "repeat_last_n": None, "repeat_penalty": None, "stop": [],