langchain/tests/integration_tests/llms/test_huggingface_pipeline.py

"""Test HuggingFace Pipeline wrapper."""

from pathlib import Path

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

from langchain.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.llms.loading import load_llm
from tests.integration_tests.llms.utils import assert_llm_equality


def test_huggingface_pipeline_text_generation() -> None:
    """Test valid call to HuggingFace text generation model."""
    llm = HuggingFacePipeline.from_model_id(
        model_id="gpt2", task="text-generation", model_kwargs={"max_new_tokens": 10}
    )
    output = llm("Say foo:")
    assert isinstance(output, str)


def test_huggingface_pipeline_text2text_generation() -> None:
    """Test valid call to HuggingFace text2text generation model."""
    llm = HuggingFacePipeline.from_model_id(
        model_id="google/flan-t5-small", task="text2text-generation"
    )
    output = llm("Say foo:")
    assert isinstance(output, str)


def test_saving_loading_llm(tmp_path: Path) -> None:
    """Test saving/loading an HuggingFaceHub LLM."""
    llm = HuggingFacePipeline.from_model_id(
        model_id="gpt2", task="text-generation", model_kwargs={"max_new_tokens": 10}
    )
    llm.save(file_path=tmp_path / "hf.yaml")
    loaded_llm = load_llm(tmp_path / "hf.yaml")
    assert_llm_equality(llm, loaded_llm)


def test_init_with_pipeline() -> None:
    """Test initialization with a HF pipeline."""
    model_id = "gpt2"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id)
    pipe = pipeline(
        "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=10
    )
    llm = HuggingFacePipeline(pipeline=pipe)
    output = llm("Say foo:")
    assert isinstance(output, str)
Add HuggingFacePipeline LLM (#353) https://github.com/hwchase17/langchain/issues/354 Add support for running your own HF pipeline locally. This would allow you to get a lot more dynamic with what HF features and models you support since you wouldn't be beholden to what is hosted in HF hub. You could also do stuff with HF Optimum to quantize your models and stuff to get pretty fast inference even running on a laptop. 2022-12-17 15:00:04 +00:00			`"""Test HuggingFace Pipeline wrapper."""`

			`from pathlib import Path`

			`from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline`

			`from langchain.llms.huggingface_pipeline import HuggingFacePipeline`
			`from langchain.llms.loading import load_llm`
			`from tests.integration_tests.llms.utils import assert_llm_equality`


			`def test_huggingface_pipeline_text_generation() -> None:`
			`"""Test valid call to HuggingFace text generation model."""`
			`llm = HuggingFacePipeline.from_model_id(`
			`model_id="gpt2", task="text-generation", model_kwargs={"max_new_tokens": 10}`
			`)`
			`output = llm("Say foo:")`
			`assert isinstance(output, str)`


Harrison/version 0040 (#366) 2022-12-17 15:53:22 +00:00			`def test_huggingface_pipeline_text2text_generation() -> None:`
			`"""Test valid call to HuggingFace text2text generation model."""`
			`llm = HuggingFacePipeline.from_model_id(`
			`model_id="google/flan-t5-small", task="text2text-generation"`
			`)`
			`output = llm("Say foo:")`
			`assert isinstance(output, str)`


Add HuggingFacePipeline LLM (#353) https://github.com/hwchase17/langchain/issues/354 Add support for running your own HF pipeline locally. This would allow you to get a lot more dynamic with what HF features and models you support since you wouldn't be beholden to what is hosted in HF hub. You could also do stuff with HF Optimum to quantize your models and stuff to get pretty fast inference even running on a laptop. 2022-12-17 15:00:04 +00:00			`def test_saving_loading_llm(tmp_path: Path) -> None:`
			`"""Test saving/loading an HuggingFaceHub LLM."""`
			`llm = HuggingFacePipeline.from_model_id(`
			`model_id="gpt2", task="text-generation", model_kwargs={"max_new_tokens": 10}`
			`)`
			`llm.save(file_path=tmp_path / "hf.yaml")`
			`loaded_llm = load_llm(tmp_path / "hf.yaml")`
			`assert_llm_equality(llm, loaded_llm)`


			`def test_init_with_pipeline() -> None:`
			`"""Test initialization with a HF pipeline."""`
			`model_id = "gpt2"`
			`tokenizer = AutoTokenizer.from_pretrained(model_id)`
			`model = AutoModelForCausalLM.from_pretrained(model_id)`
			`pipe = pipeline(`
			`"text-generation", model=model, tokenizer=tokenizer, max_new_tokens=10`
			`)`
			`llm = HuggingFacePipeline(pipeline=pipe)`
			`output = llm("Say foo:")`
			`assert isinstance(output, str)`