langchain/libs/community/tests/integration_tests/llms/test_llamafile.py

import os
from typing import Generator

import pytest
import requests
from requests.exceptions import ConnectionError, HTTPError

from langchain_community.llms.llamafile import Llamafile

LLAMAFILE_SERVER_BASE_URL = os.getenv(
    "LLAMAFILE_SERVER_BASE_URL", "http://localhost:8080"
)


def _ping_llamafile_server() -> bool:
    try:
        response = requests.get(LLAMAFILE_SERVER_BASE_URL)
        response.raise_for_status()
    except (ConnectionError, HTTPError):
        return False

    return True


@pytest.mark.skipif(
    not _ping_llamafile_server(),
    reason=f"unable to find llamafile server at {LLAMAFILE_SERVER_BASE_URL}, "
    f"please start one and re-run this test",
)
def test_llamafile_call() -> None:
    llm = Llamafile()
    output = llm.invoke("Say foo:")
    assert isinstance(output, str)


@pytest.mark.skipif(
    not _ping_llamafile_server(),
    reason=f"unable to find llamafile server at {LLAMAFILE_SERVER_BASE_URL}, "
    f"please start one and re-run this test",
)
def test_llamafile_streaming() -> None:
    llm = Llamafile(streaming=True)
    generator = llm.stream("Tell me about Roman dodecahedrons.")
    assert isinstance(generator, Generator)
    for token in generator:
        assert isinstance(token, str)
community[minor]: Adds Llamafile as an LLM (#17431) * Description: Adds a simple LLM implementation for interacting with [llamafile](https://github.com/Mozilla-Ocho/llamafile)-based models. * Dependencies: N/A * Issue: N/A Detail [llamafile](https://github.com/Mozilla-Ocho/llamafile) lets you run LLMs locally from a single file on most computers without installing any dependencies. To use the llamafile LLM implementation, the user needs to: 1. Download a llamafile e.g. https://huggingface.co/jartine/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile?download=true 2. Make the file executable. 3. Run the llamafile in 'server mode'. (All llamafiles come packaged with a lightweight server; by default, the server listens at `http://localhost:8080`.) ```bash wget https://url/of/model.llamafile chmod +x model.llamafile ./model.llamafile --server --nobrowser ``` Now, the user can invoke the LLM via the LangChain client: ```python from langchain_community.llms.llamafile import Llamafile llm = Llamafile() llm.invoke("Tell me a joke.") ``` 2024-02-14 19:15:24 +00:00			`import os`
			`from typing import Generator`

			`import pytest`
			`import requests`
			`from requests.exceptions import ConnectionError, HTTPError`

			`from langchain_community.llms.llamafile import Llamafile`

			`LLAMAFILE_SERVER_BASE_URL = os.getenv(`
			`"LLAMAFILE_SERVER_BASE_URL", "http://localhost:8080"`
			`)`


			`def _ping_llamafile_server() -> bool:`
			`try:`
			`response = requests.get(LLAMAFILE_SERVER_BASE_URL)`
			`response.raise_for_status()`
			`except (ConnectionError, HTTPError):`
			`return False`

			`return True`


			`@pytest.mark.skipif(`
			`not _ping_llamafile_server(),`
			`reason=f"unable to find llamafile server at {LLAMAFILE_SERVER_BASE_URL}, "`
			`f"please start one and re-run this test",`
			`)`
			`def test_llamafile_call() -> None:`
			`llm = Llamafile()`
			`output = llm.invoke("Say foo:")`
			`assert isinstance(output, str)`


			`@pytest.mark.skipif(`
			`not _ping_llamafile_server(),`
			`reason=f"unable to find llamafile server at {LLAMAFILE_SERVER_BASE_URL}, "`
			`f"please start one and re-run this test",`
			`)`
			`def test_llamafile_streaming() -> None:`
			`llm = Llamafile(streaming=True)`
			`generator = llm.stream("Tell me about Roman dodecahedrons.")`
			`assert isinstance(generator, Generator)`
			`for token in generator:`
			`assert isinstance(token, str)`