mirror of
https://github.com/hwchase17/langchain
synced 2024-11-10 01:10:59 +00:00
0bc4a9b3fc
* **Description:** Adds a simple LLM implementation for interacting with [llamafile](https://github.com/Mozilla-Ocho/llamafile)-based models. * **Dependencies:** N/A * **Issue:** N/A **Detail** [llamafile](https://github.com/Mozilla-Ocho/llamafile) lets you run LLMs locally from a single file on most computers without installing any dependencies. To use the llamafile LLM implementation, the user needs to: 1. Download a llamafile e.g. https://huggingface.co/jartine/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile?download=true 2. Make the file executable. 3. Run the llamafile in 'server mode'. (All llamafiles come packaged with a lightweight server; by default, the server listens at `http://localhost:8080`.) ```bash wget https://url/of/model.llamafile chmod +x model.llamafile ./model.llamafile --server --nobrowser ``` Now, the user can invoke the LLM via the LangChain client: ```python from langchain_community.llms.llamafile import Llamafile llm = Llamafile() llm.invoke("Tell me a joke.") ```
47 lines
1.2 KiB
Python
47 lines
1.2 KiB
Python
import os
|
|
from typing import Generator
|
|
|
|
import pytest
|
|
import requests
|
|
from requests.exceptions import ConnectionError, HTTPError
|
|
|
|
from langchain_community.llms.llamafile import Llamafile
|
|
|
|
LLAMAFILE_SERVER_BASE_URL = os.getenv(
|
|
"LLAMAFILE_SERVER_BASE_URL", "http://localhost:8080"
|
|
)
|
|
|
|
|
|
def _ping_llamafile_server() -> bool:
|
|
try:
|
|
response = requests.get(LLAMAFILE_SERVER_BASE_URL)
|
|
response.raise_for_status()
|
|
except (ConnectionError, HTTPError):
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
not _ping_llamafile_server(),
|
|
reason=f"unable to find llamafile server at {LLAMAFILE_SERVER_BASE_URL}, "
|
|
f"please start one and re-run this test",
|
|
)
|
|
def test_llamafile_call() -> None:
|
|
llm = Llamafile()
|
|
output = llm.invoke("Say foo:")
|
|
assert isinstance(output, str)
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
not _ping_llamafile_server(),
|
|
reason=f"unable to find llamafile server at {LLAMAFILE_SERVER_BASE_URL}, "
|
|
f"please start one and re-run this test",
|
|
)
|
|
def test_llamafile_streaming() -> None:
|
|
llm = Llamafile(streaming=True)
|
|
generator = llm.stream("Tell me about Roman dodecahedrons.")
|
|
assert isinstance(generator, Generator)
|
|
for token in generator:
|
|
assert isinstance(token, str)
|