2023-12-11 21:53:30 +00:00
|
|
|
import json
|
2023-12-26 20:08:04 +00:00
|
|
|
from typing import Any, AsyncIterator, Dict, Iterator, List, Mapping, Optional
|
2023-12-11 21:53:30 +00:00
|
|
|
|
2023-12-26 20:08:04 +00:00
|
|
|
import aiohttp
|
2023-12-11 21:53:30 +00:00
|
|
|
import requests
|
2023-12-26 20:08:04 +00:00
|
|
|
from langchain_core.callbacks import (
|
|
|
|
AsyncCallbackManagerForLLMRun,
|
|
|
|
CallbackManagerForLLMRun,
|
|
|
|
)
|
2023-12-11 21:53:30 +00:00
|
|
|
from langchain_core.language_models import BaseLanguageModel
|
|
|
|
from langchain_core.language_models.llms import BaseLLM
|
|
|
|
from langchain_core.outputs import GenerationChunk, LLMResult
|
|
|
|
from langchain_core.pydantic_v1 import Extra
|
|
|
|
|
|
|
|
|
|
|
|
def _stream_response_to_generation_chunk(
|
|
|
|
stream_response: str,
|
|
|
|
) -> GenerationChunk:
|
|
|
|
"""Convert a stream response to a generation chunk."""
|
|
|
|
parsed_response = json.loads(stream_response)
|
|
|
|
generation_info = parsed_response if parsed_response.get("done") is True else None
|
|
|
|
return GenerationChunk(
|
|
|
|
text=parsed_response.get("response", ""), generation_info=generation_info
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2023-12-16 00:00:55 +00:00
|
|
|
class OllamaEndpointNotFoundError(Exception):
|
|
|
|
"""Raised when the Ollama endpoint is not found."""
|
|
|
|
|
|
|
|
|
2023-12-11 21:53:30 +00:00
|
|
|
class _OllamaCommon(BaseLanguageModel):
|
|
|
|
base_url: str = "http://localhost:11434"
|
|
|
|
"""Base url the model is hosted under."""
|
|
|
|
|
|
|
|
model: str = "llama2"
|
|
|
|
"""Model name to use."""
|
|
|
|
|
|
|
|
mirostat: Optional[int] = None
|
|
|
|
"""Enable Mirostat sampling for controlling perplexity.
|
|
|
|
(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
|
|
|
|
|
|
|
|
mirostat_eta: Optional[float] = None
|
|
|
|
"""Influences how quickly the algorithm responds to feedback
|
|
|
|
from the generated text. A lower learning rate will result in
|
|
|
|
slower adjustments, while a higher learning rate will make
|
|
|
|
the algorithm more responsive. (Default: 0.1)"""
|
|
|
|
|
|
|
|
mirostat_tau: Optional[float] = None
|
|
|
|
"""Controls the balance between coherence and diversity
|
|
|
|
of the output. A lower value will result in more focused and
|
|
|
|
coherent text. (Default: 5.0)"""
|
|
|
|
|
|
|
|
num_ctx: Optional[int] = None
|
|
|
|
"""Sets the size of the context window used to generate the
|
|
|
|
next token. (Default: 2048) """
|
|
|
|
|
|
|
|
num_gpu: Optional[int] = None
|
|
|
|
"""The number of GPUs to use. On macOS it defaults to 1 to
|
|
|
|
enable metal support, 0 to disable."""
|
|
|
|
|
|
|
|
num_thread: Optional[int] = None
|
|
|
|
"""Sets the number of threads to use during computation.
|
|
|
|
By default, Ollama will detect this for optimal performance.
|
|
|
|
It is recommended to set this value to the number of physical
|
|
|
|
CPU cores your system has (as opposed to the logical number of cores)."""
|
|
|
|
|
2024-01-26 23:00:19 +00:00
|
|
|
num_predict: Optional[int] = None
|
2024-02-22 16:59:55 +00:00
|
|
|
"""Maximum number of tokens to predict when generating text.
|
2024-01-26 23:00:19 +00:00
|
|
|
(Default: 128, -1 = infinite generation, -2 = fill context)"""
|
|
|
|
|
2023-12-11 21:53:30 +00:00
|
|
|
repeat_last_n: Optional[int] = None
|
|
|
|
"""Sets how far back for the model to look back to prevent
|
|
|
|
repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
|
|
|
|
|
|
|
|
repeat_penalty: Optional[float] = None
|
|
|
|
"""Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
|
|
|
|
will penalize repetitions more strongly, while a lower value (e.g., 0.9)
|
|
|
|
will be more lenient. (Default: 1.1)"""
|
|
|
|
|
|
|
|
temperature: Optional[float] = None
|
|
|
|
"""The temperature of the model. Increasing the temperature will
|
|
|
|
make the model answer more creatively. (Default: 0.8)"""
|
|
|
|
|
|
|
|
stop: Optional[List[str]] = None
|
|
|
|
"""Sets the stop tokens to use."""
|
|
|
|
|
|
|
|
tfs_z: Optional[float] = None
|
|
|
|
"""Tail free sampling is used to reduce the impact of less probable
|
|
|
|
tokens from the output. A higher value (e.g., 2.0) will reduce the
|
|
|
|
impact more, while a value of 1.0 disables this setting. (default: 1)"""
|
|
|
|
|
|
|
|
top_k: Optional[int] = None
|
|
|
|
"""Reduces the probability of generating nonsense. A higher value (e.g. 100)
|
|
|
|
will give more diverse answers, while a lower value (e.g. 10)
|
|
|
|
will be more conservative. (Default: 40)"""
|
|
|
|
|
2024-01-15 19:59:39 +00:00
|
|
|
top_p: Optional[float] = None
|
2023-12-11 21:53:30 +00:00
|
|
|
"""Works together with top-k. A higher value (e.g., 0.95) will lead
|
|
|
|
to more diverse text, while a lower value (e.g., 0.5) will
|
|
|
|
generate more focused and conservative text. (Default: 0.9)"""
|
|
|
|
|
|
|
|
system: Optional[str] = None
|
|
|
|
"""system prompt (overrides what is defined in the Modelfile)"""
|
|
|
|
|
|
|
|
template: Optional[str] = None
|
|
|
|
"""full prompt or prompt template (overrides what is defined in the Modelfile)"""
|
|
|
|
|
|
|
|
format: Optional[str] = None
|
|
|
|
"""Specify the format of the output (e.g., json)"""
|
|
|
|
|
|
|
|
timeout: Optional[int] = None
|
|
|
|
"""Timeout for the request stream"""
|
|
|
|
|
2024-01-12 05:40:35 +00:00
|
|
|
headers: Optional[dict] = None
|
|
|
|
"""Additional headers to pass to endpoint (e.g. Authorization, Referer).
|
|
|
|
This is useful when Ollama is hosted on cloud services that require
|
|
|
|
tokens for authentication.
|
|
|
|
"""
|
|
|
|
|
2023-12-11 21:53:30 +00:00
|
|
|
@property
|
|
|
|
def _default_params(self) -> Dict[str, Any]:
|
|
|
|
"""Get the default parameters for calling Ollama."""
|
|
|
|
return {
|
|
|
|
"model": self.model,
|
|
|
|
"format": self.format,
|
|
|
|
"options": {
|
|
|
|
"mirostat": self.mirostat,
|
|
|
|
"mirostat_eta": self.mirostat_eta,
|
|
|
|
"mirostat_tau": self.mirostat_tau,
|
|
|
|
"num_ctx": self.num_ctx,
|
|
|
|
"num_gpu": self.num_gpu,
|
|
|
|
"num_thread": self.num_thread,
|
2024-01-26 23:00:19 +00:00
|
|
|
"num_predict": self.num_predict,
|
2023-12-11 21:53:30 +00:00
|
|
|
"repeat_last_n": self.repeat_last_n,
|
|
|
|
"repeat_penalty": self.repeat_penalty,
|
|
|
|
"temperature": self.temperature,
|
|
|
|
"stop": self.stop,
|
|
|
|
"tfs_z": self.tfs_z,
|
|
|
|
"top_k": self.top_k,
|
|
|
|
"top_p": self.top_p,
|
|
|
|
},
|
|
|
|
"system": self.system,
|
|
|
|
"template": self.template,
|
|
|
|
}
|
|
|
|
|
|
|
|
@property
|
|
|
|
def _identifying_params(self) -> Mapping[str, Any]:
|
|
|
|
"""Get the identifying parameters."""
|
|
|
|
return {**{"model": self.model, "format": self.format}, **self._default_params}
|
|
|
|
|
2023-12-16 00:00:55 +00:00
|
|
|
def _create_generate_stream(
|
2023-12-11 21:53:30 +00:00
|
|
|
self,
|
|
|
|
prompt: str,
|
|
|
|
stop: Optional[List[str]] = None,
|
2023-12-16 00:00:55 +00:00
|
|
|
images: Optional[List[str]] = None,
|
|
|
|
**kwargs: Any,
|
|
|
|
) -> Iterator[str]:
|
|
|
|
payload = {"prompt": prompt, "images": images}
|
|
|
|
yield from self._create_stream(
|
|
|
|
payload=payload,
|
|
|
|
stop=stop,
|
2024-02-22 16:59:55 +00:00
|
|
|
api_url=f"{self.base_url}/api/generate",
|
2023-12-16 00:00:55 +00:00
|
|
|
**kwargs,
|
|
|
|
)
|
|
|
|
|
2023-12-26 20:08:04 +00:00
|
|
|
async def _acreate_generate_stream(
|
|
|
|
self,
|
|
|
|
prompt: str,
|
|
|
|
stop: Optional[List[str]] = None,
|
|
|
|
images: Optional[List[str]] = None,
|
|
|
|
**kwargs: Any,
|
|
|
|
) -> AsyncIterator[str]:
|
|
|
|
payload = {"prompt": prompt, "images": images}
|
|
|
|
async for item in self._acreate_stream(
|
|
|
|
payload=payload,
|
|
|
|
stop=stop,
|
2024-02-22 16:59:55 +00:00
|
|
|
api_url=f"{self.base_url}/api/generate",
|
2023-12-26 20:08:04 +00:00
|
|
|
**kwargs,
|
|
|
|
):
|
|
|
|
yield item
|
|
|
|
|
2023-12-16 00:00:55 +00:00
|
|
|
def _create_stream(
|
|
|
|
self,
|
|
|
|
api_url: str,
|
|
|
|
payload: Any,
|
|
|
|
stop: Optional[List[str]] = None,
|
2023-12-11 21:53:30 +00:00
|
|
|
**kwargs: Any,
|
|
|
|
) -> Iterator[str]:
|
|
|
|
if self.stop is not None and stop is not None:
|
|
|
|
raise ValueError("`stop` found in both the input and default params.")
|
|
|
|
elif self.stop is not None:
|
|
|
|
stop = self.stop
|
|
|
|
elif stop is None:
|
|
|
|
stop = []
|
|
|
|
|
|
|
|
params = self._default_params
|
|
|
|
|
2024-01-15 18:17:58 +00:00
|
|
|
for key in self._default_params:
|
|
|
|
if key in kwargs:
|
|
|
|
params[key] = kwargs[key]
|
2023-12-11 21:53:30 +00:00
|
|
|
|
|
|
|
if "options" in kwargs:
|
|
|
|
params["options"] = kwargs["options"]
|
|
|
|
else:
|
|
|
|
params["options"] = {
|
|
|
|
**params["options"],
|
|
|
|
"stop": stop,
|
2024-01-15 18:17:58 +00:00
|
|
|
**{k: v for k, v in kwargs.items() if k not in self._default_params},
|
2023-12-11 21:53:30 +00:00
|
|
|
}
|
|
|
|
|
2023-12-16 00:00:55 +00:00
|
|
|
if payload.get("messages"):
|
|
|
|
request_payload = {"messages": payload.get("messages", []), **params}
|
|
|
|
else:
|
|
|
|
request_payload = {
|
|
|
|
"prompt": payload.get("prompt"),
|
|
|
|
"images": payload.get("images", []),
|
|
|
|
**params,
|
|
|
|
}
|
|
|
|
|
2023-12-11 21:53:30 +00:00
|
|
|
response = requests.post(
|
2023-12-16 00:00:55 +00:00
|
|
|
url=api_url,
|
2024-01-12 05:40:35 +00:00
|
|
|
headers={
|
|
|
|
"Content-Type": "application/json",
|
|
|
|
**(self.headers if isinstance(self.headers, dict) else {}),
|
|
|
|
},
|
2023-12-16 00:00:55 +00:00
|
|
|
json=request_payload,
|
2023-12-11 21:53:30 +00:00
|
|
|
stream=True,
|
|
|
|
timeout=self.timeout,
|
|
|
|
)
|
|
|
|
response.encoding = "utf-8"
|
|
|
|
if response.status_code != 200:
|
2023-12-16 00:00:55 +00:00
|
|
|
if response.status_code == 404:
|
|
|
|
raise OllamaEndpointNotFoundError(
|
2023-12-26 19:07:39 +00:00
|
|
|
"Ollama call failed with status code 404. "
|
|
|
|
"Maybe your model is not found "
|
|
|
|
f"and you should pull the model with `ollama pull {self.model}`."
|
2023-12-16 00:00:55 +00:00
|
|
|
)
|
|
|
|
else:
|
community[patch]: don't try to parse json in case of errored response (#18317)
Related issue: #13896.
In case Ollama is behind a proxy, proxy error responses cannot be
viewed. You aren't even able to check response code.
For example, if your Ollama has basic access authentication and it's not
passed, `JSONDecodeError` will overwrite the truth response error.
<details>
<summary><b>Log now:</b></summary>
```
{
"name": "JSONDecodeError",
"message": "Expecting value: line 1 column 1 (char 0)",
"stack": "---------------------------------------------------------------------------
JSONDecodeError Traceback (most recent call last)
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/requests/models.py:971, in Response.json(self, **kwargs)
970 try:
--> 971 return complexjson.loads(self.text, **kwargs)
972 except JSONDecodeError as e:
973 # Catch JSON-related errors and raise as requests.JSONDecodeError
974 # This aliases json.JSONDecodeError and simplejson.JSONDecodeError
File /opt/miniforge3/envs/.gpt/lib/python3.10/json/__init__.py:346, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
343 if (cls is None and object_hook is None and
344 parse_int is None and parse_float is None and
345 parse_constant is None and object_pairs_hook is None and not kw):
--> 346 return _default_decoder.decode(s)
347 if cls is None:
File /opt/miniforge3/envs/.gpt/lib/python3.10/json/decoder.py:337, in JSONDecoder.decode(self, s, _w)
333 \"\"\"Return the Python representation of ``s`` (a ``str`` instance
334 containing a JSON document).
335
336 \"\"\"
--> 337 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
338 end = _w(s, end).end()
File /opt/miniforge3/envs/.gpt/lib/python3.10/json/decoder.py:355, in JSONDecoder.raw_decode(self, s, idx)
354 except StopIteration as err:
--> 355 raise JSONDecodeError(\"Expecting value\", s, err.value) from None
356 return obj, end
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
During handling of the above exception, another exception occurred:
JSONDecodeError Traceback (most recent call last)
Cell In[3], line 1
----> 1 print(translate_func().invoke('text'))
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/runnables/base.py:2053, in RunnableSequence.invoke(self, input, config)
2051 try:
2052 for i, step in enumerate(self.steps):
-> 2053 input = step.invoke(
2054 input,
2055 # mark each step as a child run
2056 patch_config(
2057 config, callbacks=run_manager.get_child(f\"seq:step:{i+1}\")
2058 ),
2059 )
2060 # finish the root run
2061 except BaseException as e:
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:165, in BaseChatModel.invoke(self, input, config, stop, **kwargs)
154 def invoke(
155 self,
156 input: LanguageModelInput,
(...)
160 **kwargs: Any,
161 ) -> BaseMessage:
162 config = ensure_config(config)
163 return cast(
164 ChatGeneration,
--> 165 self.generate_prompt(
166 [self._convert_input(input)],
167 stop=stop,
168 callbacks=config.get(\"callbacks\"),
169 tags=config.get(\"tags\"),
170 metadata=config.get(\"metadata\"),
171 run_name=config.get(\"run_name\"),
172 **kwargs,
173 ).generations[0][0],
174 ).message
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:543, in BaseChatModel.generate_prompt(self, prompts, stop, callbacks, **kwargs)
535 def generate_prompt(
536 self,
537 prompts: List[PromptValue],
(...)
540 **kwargs: Any,
541 ) -> LLMResult:
542 prompt_messages = [p.to_messages() for p in prompts]
--> 543 return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:407, in BaseChatModel.generate(self, messages, stop, callbacks, tags, metadata, run_name, **kwargs)
405 if run_managers:
406 run_managers[i].on_llm_error(e, response=LLMResult(generations=[]))
--> 407 raise e
408 flattened_outputs = [
409 LLMResult(generations=[res.generations], llm_output=res.llm_output)
410 for res in results
411 ]
412 llm_output = self._combine_llm_outputs([res.llm_output for res in results])
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:397, in BaseChatModel.generate(self, messages, stop, callbacks, tags, metadata, run_name, **kwargs)
394 for i, m in enumerate(messages):
395 try:
396 results.append(
--> 397 self._generate_with_cache(
398 m,
399 stop=stop,
400 run_manager=run_managers[i] if run_managers else None,
401 **kwargs,
402 )
403 )
404 except BaseException as e:
405 if run_managers:
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:576, in BaseChatModel._generate_with_cache(self, messages, stop, run_manager, **kwargs)
572 raise ValueError(
573 \"Asked to cache, but no cache found at `langchain.cache`.\"
574 )
575 if new_arg_supported:
--> 576 return self._generate(
577 messages, stop=stop, run_manager=run_manager, **kwargs
578 )
579 else:
580 return self._generate(messages, stop=stop, **kwargs)
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_community/chat_models/ollama.py:250, in ChatOllama._generate(self, messages, stop, run_manager, **kwargs)
226 def _generate(
227 self,
228 messages: List[BaseMessage],
(...)
231 **kwargs: Any,
232 ) -> ChatResult:
233 \"\"\"Call out to Ollama's generate endpoint.
234
235 Args:
(...)
247 ])
248 \"\"\"
--> 250 final_chunk = self._chat_stream_with_aggregation(
251 messages,
252 stop=stop,
253 run_manager=run_manager,
254 verbose=self.verbose,
255 **kwargs,
256 )
257 chat_generation = ChatGeneration(
258 message=AIMessage(content=final_chunk.text),
259 generation_info=final_chunk.generation_info,
260 )
261 return ChatResult(generations=[chat_generation])
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_community/chat_models/ollama.py:183, in ChatOllama._chat_stream_with_aggregation(self, messages, stop, run_manager, verbose, **kwargs)
174 def _chat_stream_with_aggregation(
175 self,
176 messages: List[BaseMessage],
(...)
180 **kwargs: Any,
181 ) -> ChatGenerationChunk:
182 final_chunk: Optional[ChatGenerationChunk] = None
--> 183 for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
184 if stream_resp:
185 chunk = _chat_stream_response_to_chat_generation_chunk(stream_resp)
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_community/chat_models/ollama.py:156, in ChatOllama._create_chat_stream(self, messages, stop, **kwargs)
147 def _create_chat_stream(
148 self,
149 messages: List[BaseMessage],
150 stop: Optional[List[str]] = None,
151 **kwargs: Any,
152 ) -> Iterator[str]:
153 payload = {
154 \"messages\": self._convert_messages_to_ollama_messages(messages),
155 }
--> 156 yield from self._create_stream(
157 payload=payload, stop=stop, api_url=f\"{self.base_url}/api/chat/\", **kwargs
158 )
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_community/llms/ollama.py:234, in _OllamaCommon._create_stream(self, api_url, payload, stop, **kwargs)
228 raise OllamaEndpointNotFoundError(
229 \"Ollama call failed with status code 404. \"
230 \"Maybe your model is not found \"
231 f\"and you should pull the model with `ollama pull {self.model}`.\"
232 )
233 else:
--> 234 optional_detail = response.json().get(\"error\")
235 raise ValueError(
236 f\"Ollama call failed with status code {response.status_code}.\"
237 f\" Details: {optional_detail}\"
238 )
239 return response.iter_lines(decode_unicode=True)
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/requests/models.py:975, in Response.json(self, **kwargs)
971 return complexjson.loads(self.text, **kwargs)
972 except JSONDecodeError as e:
973 # Catch JSON-related errors and raise as requests.JSONDecodeError
974 # This aliases json.JSONDecodeError and simplejson.JSONDecodeError
--> 975 raise RequestsJSONDecodeError(e.msg, e.doc, e.pos)
JSONDecodeError: Expecting value: line 1 column 1 (char 0)"
}
```
</details>
<details>
<summary><b>Log after a fix:</b></summary>
```
{
"name": "ValueError",
"message": "Ollama call failed with status code 401. Details: <html>\r
<head><title>401 Authorization Required</title></head>\r
<body>\r
<center><h1>401 Authorization Required</h1></center>\r
<hr><center>nginx/1.18.0 (Ubuntu)</center>\r
</body>\r
</html>\r
",
"stack": "---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[2], line 1
----> 1 print(translate_func().invoke('text'))
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/runnables/base.py:2053, in RunnableSequence.invoke(self, input, config)
2051 try:
2052 for i, step in enumerate(self.steps):
-> 2053 input = step.invoke(
2054 input,
2055 # mark each step as a child run
2056 patch_config(
2057 config, callbacks=run_manager.get_child(f\"seq:step:{i+1}\")
2058 ),
2059 )
2060 # finish the root run
2061 except BaseException as e:
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:165, in BaseChatModel.invoke(self, input, config, stop, **kwargs)
154 def invoke(
155 self,
156 input: LanguageModelInput,
(...)
160 **kwargs: Any,
161 ) -> BaseMessage:
162 config = ensure_config(config)
163 return cast(
164 ChatGeneration,
--> 165 self.generate_prompt(
166 [self._convert_input(input)],
167 stop=stop,
168 callbacks=config.get(\"callbacks\"),
169 tags=config.get(\"tags\"),
170 metadata=config.get(\"metadata\"),
171 run_name=config.get(\"run_name\"),
172 **kwargs,
173 ).generations[0][0],
174 ).message
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:543, in BaseChatModel.generate_prompt(self, prompts, stop, callbacks, **kwargs)
535 def generate_prompt(
536 self,
537 prompts: List[PromptValue],
(...)
540 **kwargs: Any,
541 ) -> LLMResult:
542 prompt_messages = [p.to_messages() for p in prompts]
--> 543 return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:407, in BaseChatModel.generate(self, messages, stop, callbacks, tags, metadata, run_name, **kwargs)
405 if run_managers:
406 run_managers[i].on_llm_error(e, response=LLMResult(generations=[]))
--> 407 raise e
408 flattened_outputs = [
409 LLMResult(generations=[res.generations], llm_output=res.llm_output)
410 for res in results
411 ]
412 llm_output = self._combine_llm_outputs([res.llm_output for res in results])
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:397, in BaseChatModel.generate(self, messages, stop, callbacks, tags, metadata, run_name, **kwargs)
394 for i, m in enumerate(messages):
395 try:
396 results.append(
--> 397 self._generate_with_cache(
398 m,
399 stop=stop,
400 run_manager=run_managers[i] if run_managers else None,
401 **kwargs,
402 )
403 )
404 except BaseException as e:
405 if run_managers:
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:576, in BaseChatModel._generate_with_cache(self, messages, stop, run_manager, **kwargs)
572 raise ValueError(
573 \"Asked to cache, but no cache found at `langchain.cache`.\"
574 )
575 if new_arg_supported:
--> 576 return self._generate(
577 messages, stop=stop, run_manager=run_manager, **kwargs
578 )
579 else:
580 return self._generate(messages, stop=stop, **kwargs)
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_community/chat_models/ollama.py:250, in ChatOllama._generate(self, messages, stop, run_manager, **kwargs)
226 def _generate(
227 self,
228 messages: List[BaseMessage],
(...)
231 **kwargs: Any,
232 ) -> ChatResult:
233 \"\"\"Call out to Ollama's generate endpoint.
234
235 Args:
(...)
247 ])
248 \"\"\"
--> 250 final_chunk = self._chat_stream_with_aggregation(
251 messages,
252 stop=stop,
253 run_manager=run_manager,
254 verbose=self.verbose,
255 **kwargs,
256 )
257 chat_generation = ChatGeneration(
258 message=AIMessage(content=final_chunk.text),
259 generation_info=final_chunk.generation_info,
260 )
261 return ChatResult(generations=[chat_generation])
File /storage/gpt-project/Repos/repo_nikita/gpt_lib/langchain/ollama.py:328, in ChatOllamaCustom._chat_stream_with_aggregation(self, messages, stop, run_manager, verbose, **kwargs)
319 def _chat_stream_with_aggregation(
320 self,
321 messages: List[BaseMessage],
(...)
325 **kwargs: Any,
326 ) -> ChatGenerationChunk:
327 final_chunk: Optional[ChatGenerationChunk] = None
--> 328 for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
329 if stream_resp:
330 chunk = _chat_stream_response_to_chat_generation_chunk(stream_resp)
File /storage/gpt-project/Repos/repo_nikita/gpt_lib/langchain/ollama.py:301, in ChatOllamaCustom._create_chat_stream(self, messages, stop, **kwargs)
292 def _create_chat_stream(
293 self,
294 messages: List[BaseMessage],
295 stop: Optional[List[str]] = None,
296 **kwargs: Any,
297 ) -> Iterator[str]:
298 payload = {
299 \"messages\": self._convert_messages_to_ollama_messages(messages),
300 }
--> 301 yield from self._create_stream(
302 payload=payload, stop=stop, api_url=f\"{self.base_url}/api/chat\", **kwargs
303 )
File /storage/gpt-project/Repos/repo_nikita/gpt_lib/langchain/ollama.py:134, in _OllamaCommonCustom._create_stream(self, api_url, payload, stop, **kwargs)
132 else:
133 optional_detail = response.text
--> 134 raise ValueError(
135 f\"Ollama call failed with status code {response.status_code}.\"
136 f\" Details: {optional_detail}\"
137 )
138 return response.iter_lines(decode_unicode=True)
ValueError: Ollama call failed with status code 401. Details: <html>\r
<head><title>401 Authorization Required</title></head>\r
<body>\r
<center><h1>401 Authorization Required</h1></center>\r
<hr><center>nginx/1.18.0 (Ubuntu)</center>\r
</body>\r
</html>\r
"
}
```
</details>
The same is true for timeout errors or when you simply mistyped in
`base_url` arg and get response from some other service, for instance.
Real Ollama errors are still clearly readable:
```
ValueError: Ollama call failed with status code 400. Details: {"error":"invalid options: unknown_option"}
```
---------
Co-authored-by: Bagatur <baskaryan@gmail.com>
2024-03-01 20:17:29 +00:00
|
|
|
optional_detail = response.text
|
2023-12-16 00:00:55 +00:00
|
|
|
raise ValueError(
|
|
|
|
f"Ollama call failed with status code {response.status_code}."
|
|
|
|
f" Details: {optional_detail}"
|
|
|
|
)
|
2023-12-11 21:53:30 +00:00
|
|
|
return response.iter_lines(decode_unicode=True)
|
|
|
|
|
2023-12-26 20:08:04 +00:00
|
|
|
async def _acreate_stream(
|
|
|
|
self,
|
|
|
|
api_url: str,
|
|
|
|
payload: Any,
|
|
|
|
stop: Optional[List[str]] = None,
|
|
|
|
**kwargs: Any,
|
|
|
|
) -> AsyncIterator[str]:
|
|
|
|
if self.stop is not None and stop is not None:
|
|
|
|
raise ValueError("`stop` found in both the input and default params.")
|
|
|
|
elif self.stop is not None:
|
|
|
|
stop = self.stop
|
|
|
|
elif stop is None:
|
|
|
|
stop = []
|
|
|
|
|
|
|
|
params = self._default_params
|
|
|
|
|
2024-01-15 18:17:58 +00:00
|
|
|
for key in self._default_params:
|
|
|
|
if key in kwargs:
|
|
|
|
params[key] = kwargs[key]
|
2023-12-26 20:08:04 +00:00
|
|
|
|
|
|
|
if "options" in kwargs:
|
|
|
|
params["options"] = kwargs["options"]
|
|
|
|
else:
|
|
|
|
params["options"] = {
|
|
|
|
**params["options"],
|
|
|
|
"stop": stop,
|
2024-01-15 18:17:58 +00:00
|
|
|
**{k: v for k, v in kwargs.items() if k not in self._default_params},
|
2023-12-26 20:08:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if payload.get("messages"):
|
|
|
|
request_payload = {"messages": payload.get("messages", []), **params}
|
|
|
|
else:
|
|
|
|
request_payload = {
|
|
|
|
"prompt": payload.get("prompt"),
|
|
|
|
"images": payload.get("images", []),
|
|
|
|
**params,
|
|
|
|
}
|
|
|
|
|
|
|
|
async with aiohttp.ClientSession() as session:
|
|
|
|
async with session.post(
|
|
|
|
url=api_url,
|
2024-01-28 00:11:32 +00:00
|
|
|
headers={
|
|
|
|
"Content-Type": "application/json",
|
|
|
|
**(self.headers if isinstance(self.headers, dict) else {}),
|
|
|
|
},
|
2023-12-26 20:08:04 +00:00
|
|
|
json=request_payload,
|
|
|
|
timeout=self.timeout,
|
|
|
|
) as response:
|
|
|
|
if response.status != 200:
|
|
|
|
if response.status == 404:
|
|
|
|
raise OllamaEndpointNotFoundError(
|
|
|
|
"Ollama call failed with status code 404."
|
|
|
|
)
|
|
|
|
else:
|
community[patch]: don't try to parse json in case of errored response (#18317)
Related issue: #13896.
In case Ollama is behind a proxy, proxy error responses cannot be
viewed. You aren't even able to check response code.
For example, if your Ollama has basic access authentication and it's not
passed, `JSONDecodeError` will overwrite the truth response error.
<details>
<summary><b>Log now:</b></summary>
```
{
"name": "JSONDecodeError",
"message": "Expecting value: line 1 column 1 (char 0)",
"stack": "---------------------------------------------------------------------------
JSONDecodeError Traceback (most recent call last)
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/requests/models.py:971, in Response.json(self, **kwargs)
970 try:
--> 971 return complexjson.loads(self.text, **kwargs)
972 except JSONDecodeError as e:
973 # Catch JSON-related errors and raise as requests.JSONDecodeError
974 # This aliases json.JSONDecodeError and simplejson.JSONDecodeError
File /opt/miniforge3/envs/.gpt/lib/python3.10/json/__init__.py:346, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
343 if (cls is None and object_hook is None and
344 parse_int is None and parse_float is None and
345 parse_constant is None and object_pairs_hook is None and not kw):
--> 346 return _default_decoder.decode(s)
347 if cls is None:
File /opt/miniforge3/envs/.gpt/lib/python3.10/json/decoder.py:337, in JSONDecoder.decode(self, s, _w)
333 \"\"\"Return the Python representation of ``s`` (a ``str`` instance
334 containing a JSON document).
335
336 \"\"\"
--> 337 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
338 end = _w(s, end).end()
File /opt/miniforge3/envs/.gpt/lib/python3.10/json/decoder.py:355, in JSONDecoder.raw_decode(self, s, idx)
354 except StopIteration as err:
--> 355 raise JSONDecodeError(\"Expecting value\", s, err.value) from None
356 return obj, end
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
During handling of the above exception, another exception occurred:
JSONDecodeError Traceback (most recent call last)
Cell In[3], line 1
----> 1 print(translate_func().invoke('text'))
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/runnables/base.py:2053, in RunnableSequence.invoke(self, input, config)
2051 try:
2052 for i, step in enumerate(self.steps):
-> 2053 input = step.invoke(
2054 input,
2055 # mark each step as a child run
2056 patch_config(
2057 config, callbacks=run_manager.get_child(f\"seq:step:{i+1}\")
2058 ),
2059 )
2060 # finish the root run
2061 except BaseException as e:
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:165, in BaseChatModel.invoke(self, input, config, stop, **kwargs)
154 def invoke(
155 self,
156 input: LanguageModelInput,
(...)
160 **kwargs: Any,
161 ) -> BaseMessage:
162 config = ensure_config(config)
163 return cast(
164 ChatGeneration,
--> 165 self.generate_prompt(
166 [self._convert_input(input)],
167 stop=stop,
168 callbacks=config.get(\"callbacks\"),
169 tags=config.get(\"tags\"),
170 metadata=config.get(\"metadata\"),
171 run_name=config.get(\"run_name\"),
172 **kwargs,
173 ).generations[0][0],
174 ).message
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:543, in BaseChatModel.generate_prompt(self, prompts, stop, callbacks, **kwargs)
535 def generate_prompt(
536 self,
537 prompts: List[PromptValue],
(...)
540 **kwargs: Any,
541 ) -> LLMResult:
542 prompt_messages = [p.to_messages() for p in prompts]
--> 543 return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:407, in BaseChatModel.generate(self, messages, stop, callbacks, tags, metadata, run_name, **kwargs)
405 if run_managers:
406 run_managers[i].on_llm_error(e, response=LLMResult(generations=[]))
--> 407 raise e
408 flattened_outputs = [
409 LLMResult(generations=[res.generations], llm_output=res.llm_output)
410 for res in results
411 ]
412 llm_output = self._combine_llm_outputs([res.llm_output for res in results])
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:397, in BaseChatModel.generate(self, messages, stop, callbacks, tags, metadata, run_name, **kwargs)
394 for i, m in enumerate(messages):
395 try:
396 results.append(
--> 397 self._generate_with_cache(
398 m,
399 stop=stop,
400 run_manager=run_managers[i] if run_managers else None,
401 **kwargs,
402 )
403 )
404 except BaseException as e:
405 if run_managers:
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:576, in BaseChatModel._generate_with_cache(self, messages, stop, run_manager, **kwargs)
572 raise ValueError(
573 \"Asked to cache, but no cache found at `langchain.cache`.\"
574 )
575 if new_arg_supported:
--> 576 return self._generate(
577 messages, stop=stop, run_manager=run_manager, **kwargs
578 )
579 else:
580 return self._generate(messages, stop=stop, **kwargs)
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_community/chat_models/ollama.py:250, in ChatOllama._generate(self, messages, stop, run_manager, **kwargs)
226 def _generate(
227 self,
228 messages: List[BaseMessage],
(...)
231 **kwargs: Any,
232 ) -> ChatResult:
233 \"\"\"Call out to Ollama's generate endpoint.
234
235 Args:
(...)
247 ])
248 \"\"\"
--> 250 final_chunk = self._chat_stream_with_aggregation(
251 messages,
252 stop=stop,
253 run_manager=run_manager,
254 verbose=self.verbose,
255 **kwargs,
256 )
257 chat_generation = ChatGeneration(
258 message=AIMessage(content=final_chunk.text),
259 generation_info=final_chunk.generation_info,
260 )
261 return ChatResult(generations=[chat_generation])
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_community/chat_models/ollama.py:183, in ChatOllama._chat_stream_with_aggregation(self, messages, stop, run_manager, verbose, **kwargs)
174 def _chat_stream_with_aggregation(
175 self,
176 messages: List[BaseMessage],
(...)
180 **kwargs: Any,
181 ) -> ChatGenerationChunk:
182 final_chunk: Optional[ChatGenerationChunk] = None
--> 183 for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
184 if stream_resp:
185 chunk = _chat_stream_response_to_chat_generation_chunk(stream_resp)
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_community/chat_models/ollama.py:156, in ChatOllama._create_chat_stream(self, messages, stop, **kwargs)
147 def _create_chat_stream(
148 self,
149 messages: List[BaseMessage],
150 stop: Optional[List[str]] = None,
151 **kwargs: Any,
152 ) -> Iterator[str]:
153 payload = {
154 \"messages\": self._convert_messages_to_ollama_messages(messages),
155 }
--> 156 yield from self._create_stream(
157 payload=payload, stop=stop, api_url=f\"{self.base_url}/api/chat/\", **kwargs
158 )
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_community/llms/ollama.py:234, in _OllamaCommon._create_stream(self, api_url, payload, stop, **kwargs)
228 raise OllamaEndpointNotFoundError(
229 \"Ollama call failed with status code 404. \"
230 \"Maybe your model is not found \"
231 f\"and you should pull the model with `ollama pull {self.model}`.\"
232 )
233 else:
--> 234 optional_detail = response.json().get(\"error\")
235 raise ValueError(
236 f\"Ollama call failed with status code {response.status_code}.\"
237 f\" Details: {optional_detail}\"
238 )
239 return response.iter_lines(decode_unicode=True)
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/requests/models.py:975, in Response.json(self, **kwargs)
971 return complexjson.loads(self.text, **kwargs)
972 except JSONDecodeError as e:
973 # Catch JSON-related errors and raise as requests.JSONDecodeError
974 # This aliases json.JSONDecodeError and simplejson.JSONDecodeError
--> 975 raise RequestsJSONDecodeError(e.msg, e.doc, e.pos)
JSONDecodeError: Expecting value: line 1 column 1 (char 0)"
}
```
</details>
<details>
<summary><b>Log after a fix:</b></summary>
```
{
"name": "ValueError",
"message": "Ollama call failed with status code 401. Details: <html>\r
<head><title>401 Authorization Required</title></head>\r
<body>\r
<center><h1>401 Authorization Required</h1></center>\r
<hr><center>nginx/1.18.0 (Ubuntu)</center>\r
</body>\r
</html>\r
",
"stack": "---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[2], line 1
----> 1 print(translate_func().invoke('text'))
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/runnables/base.py:2053, in RunnableSequence.invoke(self, input, config)
2051 try:
2052 for i, step in enumerate(self.steps):
-> 2053 input = step.invoke(
2054 input,
2055 # mark each step as a child run
2056 patch_config(
2057 config, callbacks=run_manager.get_child(f\"seq:step:{i+1}\")
2058 ),
2059 )
2060 # finish the root run
2061 except BaseException as e:
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:165, in BaseChatModel.invoke(self, input, config, stop, **kwargs)
154 def invoke(
155 self,
156 input: LanguageModelInput,
(...)
160 **kwargs: Any,
161 ) -> BaseMessage:
162 config = ensure_config(config)
163 return cast(
164 ChatGeneration,
--> 165 self.generate_prompt(
166 [self._convert_input(input)],
167 stop=stop,
168 callbacks=config.get(\"callbacks\"),
169 tags=config.get(\"tags\"),
170 metadata=config.get(\"metadata\"),
171 run_name=config.get(\"run_name\"),
172 **kwargs,
173 ).generations[0][0],
174 ).message
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:543, in BaseChatModel.generate_prompt(self, prompts, stop, callbacks, **kwargs)
535 def generate_prompt(
536 self,
537 prompts: List[PromptValue],
(...)
540 **kwargs: Any,
541 ) -> LLMResult:
542 prompt_messages = [p.to_messages() for p in prompts]
--> 543 return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:407, in BaseChatModel.generate(self, messages, stop, callbacks, tags, metadata, run_name, **kwargs)
405 if run_managers:
406 run_managers[i].on_llm_error(e, response=LLMResult(generations=[]))
--> 407 raise e
408 flattened_outputs = [
409 LLMResult(generations=[res.generations], llm_output=res.llm_output)
410 for res in results
411 ]
412 llm_output = self._combine_llm_outputs([res.llm_output for res in results])
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:397, in BaseChatModel.generate(self, messages, stop, callbacks, tags, metadata, run_name, **kwargs)
394 for i, m in enumerate(messages):
395 try:
396 results.append(
--> 397 self._generate_with_cache(
398 m,
399 stop=stop,
400 run_manager=run_managers[i] if run_managers else None,
401 **kwargs,
402 )
403 )
404 except BaseException as e:
405 if run_managers:
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_core/language_models/chat_models.py:576, in BaseChatModel._generate_with_cache(self, messages, stop, run_manager, **kwargs)
572 raise ValueError(
573 \"Asked to cache, but no cache found at `langchain.cache`.\"
574 )
575 if new_arg_supported:
--> 576 return self._generate(
577 messages, stop=stop, run_manager=run_manager, **kwargs
578 )
579 else:
580 return self._generate(messages, stop=stop, **kwargs)
File /opt/miniforge3/envs/.gpt/lib/python3.10/site-packages/langchain_community/chat_models/ollama.py:250, in ChatOllama._generate(self, messages, stop, run_manager, **kwargs)
226 def _generate(
227 self,
228 messages: List[BaseMessage],
(...)
231 **kwargs: Any,
232 ) -> ChatResult:
233 \"\"\"Call out to Ollama's generate endpoint.
234
235 Args:
(...)
247 ])
248 \"\"\"
--> 250 final_chunk = self._chat_stream_with_aggregation(
251 messages,
252 stop=stop,
253 run_manager=run_manager,
254 verbose=self.verbose,
255 **kwargs,
256 )
257 chat_generation = ChatGeneration(
258 message=AIMessage(content=final_chunk.text),
259 generation_info=final_chunk.generation_info,
260 )
261 return ChatResult(generations=[chat_generation])
File /storage/gpt-project/Repos/repo_nikita/gpt_lib/langchain/ollama.py:328, in ChatOllamaCustom._chat_stream_with_aggregation(self, messages, stop, run_manager, verbose, **kwargs)
319 def _chat_stream_with_aggregation(
320 self,
321 messages: List[BaseMessage],
(...)
325 **kwargs: Any,
326 ) -> ChatGenerationChunk:
327 final_chunk: Optional[ChatGenerationChunk] = None
--> 328 for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
329 if stream_resp:
330 chunk = _chat_stream_response_to_chat_generation_chunk(stream_resp)
File /storage/gpt-project/Repos/repo_nikita/gpt_lib/langchain/ollama.py:301, in ChatOllamaCustom._create_chat_stream(self, messages, stop, **kwargs)
292 def _create_chat_stream(
293 self,
294 messages: List[BaseMessage],
295 stop: Optional[List[str]] = None,
296 **kwargs: Any,
297 ) -> Iterator[str]:
298 payload = {
299 \"messages\": self._convert_messages_to_ollama_messages(messages),
300 }
--> 301 yield from self._create_stream(
302 payload=payload, stop=stop, api_url=f\"{self.base_url}/api/chat\", **kwargs
303 )
File /storage/gpt-project/Repos/repo_nikita/gpt_lib/langchain/ollama.py:134, in _OllamaCommonCustom._create_stream(self, api_url, payload, stop, **kwargs)
132 else:
133 optional_detail = response.text
--> 134 raise ValueError(
135 f\"Ollama call failed with status code {response.status_code}.\"
136 f\" Details: {optional_detail}\"
137 )
138 return response.iter_lines(decode_unicode=True)
ValueError: Ollama call failed with status code 401. Details: <html>\r
<head><title>401 Authorization Required</title></head>\r
<body>\r
<center><h1>401 Authorization Required</h1></center>\r
<hr><center>nginx/1.18.0 (Ubuntu)</center>\r
</body>\r
</html>\r
"
}
```
</details>
The same is true for timeout errors or when you simply mistyped in
`base_url` arg and get response from some other service, for instance.
Real Ollama errors are still clearly readable:
```
ValueError: Ollama call failed with status code 400. Details: {"error":"invalid options: unknown_option"}
```
---------
Co-authored-by: Bagatur <baskaryan@gmail.com>
2024-03-01 20:17:29 +00:00
|
|
|
optional_detail = response.text
|
2023-12-26 20:08:04 +00:00
|
|
|
raise ValueError(
|
|
|
|
f"Ollama call failed with status code {response.status}."
|
|
|
|
f" Details: {optional_detail}"
|
|
|
|
)
|
|
|
|
async for line in response.content:
|
|
|
|
yield line.decode("utf-8")
|
|
|
|
|
2023-12-11 21:53:30 +00:00
|
|
|
def _stream_with_aggregation(
|
|
|
|
self,
|
|
|
|
prompt: str,
|
|
|
|
stop: Optional[List[str]] = None,
|
|
|
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
|
|
verbose: bool = False,
|
|
|
|
**kwargs: Any,
|
|
|
|
) -> GenerationChunk:
|
|
|
|
final_chunk: Optional[GenerationChunk] = None
|
2023-12-16 00:00:55 +00:00
|
|
|
for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
|
2023-12-11 21:53:30 +00:00
|
|
|
if stream_resp:
|
|
|
|
chunk = _stream_response_to_generation_chunk(stream_resp)
|
|
|
|
if final_chunk is None:
|
|
|
|
final_chunk = chunk
|
|
|
|
else:
|
|
|
|
final_chunk += chunk
|
|
|
|
if run_manager:
|
|
|
|
run_manager.on_llm_new_token(
|
|
|
|
chunk.text,
|
|
|
|
verbose=verbose,
|
|
|
|
)
|
|
|
|
if final_chunk is None:
|
|
|
|
raise ValueError("No data received from Ollama stream.")
|
|
|
|
|
|
|
|
return final_chunk
|
|
|
|
|
2023-12-26 20:08:04 +00:00
|
|
|
async def _astream_with_aggregation(
|
|
|
|
self,
|
|
|
|
prompt: str,
|
|
|
|
stop: Optional[List[str]] = None,
|
|
|
|
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
|
|
|
verbose: bool = False,
|
|
|
|
**kwargs: Any,
|
|
|
|
) -> GenerationChunk:
|
|
|
|
final_chunk: Optional[GenerationChunk] = None
|
|
|
|
async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
|
|
|
|
if stream_resp:
|
|
|
|
chunk = _stream_response_to_generation_chunk(stream_resp)
|
|
|
|
if final_chunk is None:
|
|
|
|
final_chunk = chunk
|
|
|
|
else:
|
|
|
|
final_chunk += chunk
|
|
|
|
if run_manager:
|
|
|
|
await run_manager.on_llm_new_token(
|
|
|
|
chunk.text,
|
|
|
|
verbose=verbose,
|
|
|
|
)
|
|
|
|
if final_chunk is None:
|
|
|
|
raise ValueError("No data received from Ollama stream.")
|
|
|
|
|
|
|
|
return final_chunk
|
|
|
|
|
2023-12-11 21:53:30 +00:00
|
|
|
|
|
|
|
class Ollama(BaseLLM, _OllamaCommon):
|
|
|
|
"""Ollama locally runs large language models.
|
|
|
|
|
|
|
|
To use, follow the instructions at https://ollama.ai/.
|
|
|
|
|
|
|
|
Example:
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
from langchain_community.llms import Ollama
|
|
|
|
ollama = Ollama(model="llama2")
|
|
|
|
"""
|
|
|
|
|
|
|
|
class Config:
|
|
|
|
"""Configuration for this pydantic object."""
|
|
|
|
|
|
|
|
extra = Extra.forbid
|
|
|
|
|
|
|
|
@property
|
|
|
|
def _llm_type(self) -> str:
|
|
|
|
"""Return type of llm."""
|
|
|
|
return "ollama-llm"
|
|
|
|
|
2024-02-05 19:22:06 +00:00
|
|
|
def _generate( # type: ignore[override]
|
2023-12-11 21:53:30 +00:00
|
|
|
self,
|
|
|
|
prompts: List[str],
|
|
|
|
stop: Optional[List[str]] = None,
|
2023-12-16 00:00:55 +00:00
|
|
|
images: Optional[List[str]] = None,
|
2023-12-11 21:53:30 +00:00
|
|
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
|
|
**kwargs: Any,
|
|
|
|
) -> LLMResult:
|
|
|
|
"""Call out to Ollama's generate endpoint.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
prompt: The prompt to pass into the model.
|
|
|
|
stop: Optional list of stop words to use when generating.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The string generated by the model.
|
|
|
|
|
|
|
|
Example:
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
response = ollama("Tell me a joke.")
|
|
|
|
"""
|
|
|
|
# TODO: add caching here.
|
|
|
|
generations = []
|
|
|
|
for prompt in prompts:
|
|
|
|
final_chunk = super()._stream_with_aggregation(
|
|
|
|
prompt,
|
|
|
|
stop=stop,
|
2023-12-16 00:00:55 +00:00
|
|
|
images=images,
|
2023-12-11 21:53:30 +00:00
|
|
|
run_manager=run_manager,
|
|
|
|
verbose=self.verbose,
|
|
|
|
**kwargs,
|
|
|
|
)
|
|
|
|
generations.append([final_chunk])
|
|
|
|
return LLMResult(generations=generations)
|
|
|
|
|
2024-02-05 19:22:06 +00:00
|
|
|
async def _agenerate( # type: ignore[override]
|
2023-12-26 20:08:04 +00:00
|
|
|
self,
|
|
|
|
prompts: List[str],
|
|
|
|
stop: Optional[List[str]] = None,
|
|
|
|
images: Optional[List[str]] = None,
|
|
|
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
|
|
**kwargs: Any,
|
|
|
|
) -> LLMResult:
|
|
|
|
"""Call out to Ollama's generate endpoint.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
prompt: The prompt to pass into the model.
|
|
|
|
stop: Optional list of stop words to use when generating.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The string generated by the model.
|
|
|
|
|
|
|
|
Example:
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
response = ollama("Tell me a joke.")
|
|
|
|
"""
|
|
|
|
# TODO: add caching here.
|
|
|
|
generations = []
|
|
|
|
for prompt in prompts:
|
|
|
|
final_chunk = await super()._astream_with_aggregation(
|
|
|
|
prompt,
|
|
|
|
stop=stop,
|
|
|
|
images=images,
|
2024-02-05 19:22:06 +00:00
|
|
|
run_manager=run_manager, # type: ignore[arg-type]
|
2023-12-26 20:08:04 +00:00
|
|
|
verbose=self.verbose,
|
|
|
|
**kwargs,
|
|
|
|
)
|
|
|
|
generations.append([final_chunk])
|
|
|
|
return LLMResult(generations=generations)
|
|
|
|
|
2023-12-11 21:53:30 +00:00
|
|
|
def _stream(
|
|
|
|
self,
|
|
|
|
prompt: str,
|
|
|
|
stop: Optional[List[str]] = None,
|
|
|
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
|
|
**kwargs: Any,
|
|
|
|
) -> Iterator[GenerationChunk]:
|
2024-01-01 22:03:53 +00:00
|
|
|
for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
|
2023-12-11 21:53:30 +00:00
|
|
|
if stream_resp:
|
|
|
|
chunk = _stream_response_to_generation_chunk(stream_resp)
|
|
|
|
yield chunk
|
|
|
|
if run_manager:
|
|
|
|
run_manager.on_llm_new_token(
|
|
|
|
chunk.text,
|
|
|
|
verbose=self.verbose,
|
|
|
|
)
|
2023-12-26 20:08:04 +00:00
|
|
|
|
|
|
|
async def _astream(
|
|
|
|
self,
|
|
|
|
prompt: str,
|
|
|
|
stop: Optional[List[str]] = None,
|
|
|
|
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
|
|
|
**kwargs: Any,
|
|
|
|
) -> AsyncIterator[GenerationChunk]:
|
2024-01-17 17:42:41 +00:00
|
|
|
async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
|
2023-12-26 20:08:04 +00:00
|
|
|
if stream_resp:
|
|
|
|
chunk = _stream_response_to_generation_chunk(stream_resp)
|
|
|
|
yield chunk
|
|
|
|
if run_manager:
|
|
|
|
await run_manager.on_llm_new_token(
|
|
|
|
chunk.text,
|
|
|
|
verbose=self.verbose,
|
|
|
|
)
|