langchain/tests/unit_tests/chains/test_hyde.py

"""Test HyDE."""
from typing import List, Optional

import numpy as np

from langchain.callbacks.manager import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
)
from langchain.chains.hyde.base import HypotheticalDocumentEmbedder
from langchain.chains.hyde.prompts import PROMPT_MAP
from langchain.embeddings.base import Embeddings
from langchain.llms.base import BaseLLM
from langchain.schema import Generation, LLMResult


class FakeEmbeddings(Embeddings):
    """Fake embedding class for tests."""

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Return random floats."""
        return [list(np.random.uniform(0, 1, 10)) for _ in range(10)]

    def embed_query(self, text: str) -> List[float]:
        """Return random floats."""
        return list(np.random.uniform(0, 1, 10))


class FakeLLM(BaseLLM):
    """Fake LLM wrapper for testing purposes."""

    n: int = 1

    def _generate(
        self,
        prompts: List[str],
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
    ) -> LLMResult:
        return LLMResult(generations=[[Generation(text="foo") for _ in range(self.n)]])

    async def _agenerate(
        self,
        prompts: List[str],
        stop: Optional[List[str]] = None,
        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
    ) -> LLMResult:
        return LLMResult(generations=[[Generation(text="foo") for _ in range(self.n)]])

    @property
    def _llm_type(self) -> str:
        """Return type of llm."""
        return "fake"


def test_hyde_from_llm() -> None:
    """Test loading HyDE from all prompts."""
    for key in PROMPT_MAP:
        embedding = HypotheticalDocumentEmbedder.from_llm(
            FakeLLM(), FakeEmbeddings(), key
        )
        embedding.embed_query("foo")


def test_hyde_from_llm_with_multiple_n() -> None:
    """Test loading HyDE from all prompts."""
    for key in PROMPT_MAP:
        embedding = HypotheticalDocumentEmbedder.from_llm(
            FakeLLM(n=8), FakeEmbeddings(), key
        )
        embedding.embed_query("foo")
(WIP) add HyDE (#393) Co-authored-by: cameronccohen <cameron.c.cohen@gmail.com> Co-authored-by: Cameron Cohen <cameron.cohen@quantco.com> 2022-12-22 01:46:41 +00:00			`"""Test HyDE."""`
			`from typing import List, Optional`

			`import numpy as np`

Callbacks Refactor [base] (#3256) Co-authored-by: Nuno Campos <nuno@boringbits.io> Co-authored-by: Davis Chase <130488702+dev2049@users.noreply.github.com> Co-authored-by: Zander Chase <130414180+vowelparrot@users.noreply.github.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> 2023-04-30 18:14:09 +00:00			`from langchain.callbacks.manager import (`
			`AsyncCallbackManagerForLLMRun,`
			`CallbackManagerForLLMRun,`
			`)`
move hyde into chains (#728) Co-authored-by: scadEfUr <> 2023-01-25 06:23:32 +00:00			`from langchain.chains.hyde.base import HypotheticalDocumentEmbedder`
			`from langchain.chains.hyde.prompts import PROMPT_MAP`
(WIP) add HyDE (#393) Co-authored-by: cameronccohen <cameron.c.cohen@gmail.com> Co-authored-by: Cameron Cohen <cameron.cohen@quantco.com> 2022-12-22 01:46:41 +00:00			`from langchain.embeddings.base import Embeddings`
Add BaseCallbackHandler and CallbackManager (#478) Co-authored-by: Ankush Gola <9536492+agola11@users.noreply.github.com> 2023-01-04 15:54:25 +00:00			`from langchain.llms.base import BaseLLM`
			`from langchain.schema import Generation, LLMResult`
(WIP) add HyDE (#393) Co-authored-by: cameronccohen <cameron.c.cohen@gmail.com> Co-authored-by: Cameron Cohen <cameron.cohen@quantco.com> 2022-12-22 01:46:41 +00:00

			`class FakeEmbeddings(Embeddings):`
			`"""Fake embedding class for tests."""`

			`def embed_documents(self, texts: List[str]) -> List[List[float]]:`
			`"""Return random floats."""`
			`return [list(np.random.uniform(0, 1, 10)) for _ in range(10)]`

			`def embed_query(self, text: str) -> List[float]:`
			`"""Return random floats."""`
			`return list(np.random.uniform(0, 1, 10))`


Removed duplicate BaseModel dependencies (#2471) Removed duplicate BaseModel dependencies in class inheritances. Also, sorted imports by `isort`. 2023-04-06 19:45:16 +00:00			`class FakeLLM(BaseLLM):`
(WIP) add HyDE (#393) Co-authored-by: cameronccohen <cameron.c.cohen@gmail.com> Co-authored-by: Cameron Cohen <cameron.cohen@quantco.com> 2022-12-22 01:46:41 +00:00			`"""Fake LLM wrapper for testing purposes."""`

			`n: int = 1`

			`def _generate(`
Callbacks Refactor [base] (#3256) Co-authored-by: Nuno Campos <nuno@boringbits.io> Co-authored-by: Davis Chase <130488702+dev2049@users.noreply.github.com> Co-authored-by: Zander Chase <130414180+vowelparrot@users.noreply.github.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> 2023-04-30 18:14:09 +00:00			`self,`
			`prompts: List[str],`
			`stop: Optional[List[str]] = None,`
			`run_manager: Optional[CallbackManagerForLLMRun] = None,`
(WIP) add HyDE (#393) Co-authored-by: cameronccohen <cameron.c.cohen@gmail.com> Co-authored-by: Cameron Cohen <cameron.cohen@quantco.com> 2022-12-22 01:46:41 +00:00			`) -> LLMResult:`
			`return LLMResult(generations=[[Generation(text="foo") for _ in range(self.n)]])`

Add asyncio support for LLM (OpenAI), Chain (LLMChain, LLMMathChain), and Agent (#841) Supporting asyncio in langchain primitives allows for users to run them concurrently and creates more seamless integration with asyncio-supported frameworks (FastAPI, etc.) Summary of changes: LLM * Add `agenerate` and `_agenerate` * Implement in OpenAI by leveraging `client.Completions.acreate` Chain * Add `arun`, `acall`, `_acall` * Implement them in `LLMChain` and `LLMMathChain` for now Agent * Refactor and leverage async chain and llm methods * Add ability for `Tools` to contain async coroutine * Implement async SerpaPI `arun` Create demo notebook. Open questions: * Should all the async stuff go in separate classes? I've seen both patterns (keeping the same class and having async and sync methods vs. having class separation) 2023-02-08 05:21:57 +00:00			`async def _agenerate(`
Callbacks Refactor [base] (#3256) Co-authored-by: Nuno Campos <nuno@boringbits.io> Co-authored-by: Davis Chase <130488702+dev2049@users.noreply.github.com> Co-authored-by: Zander Chase <130414180+vowelparrot@users.noreply.github.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> 2023-04-30 18:14:09 +00:00			`self,`
			`prompts: List[str],`
			`stop: Optional[List[str]] = None,`
			`run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,`
Add asyncio support for LLM (OpenAI), Chain (LLMChain, LLMMathChain), and Agent (#841) Supporting asyncio in langchain primitives allows for users to run them concurrently and creates more seamless integration with asyncio-supported frameworks (FastAPI, etc.) Summary of changes: LLM * Add `agenerate` and `_agenerate` * Implement in OpenAI by leveraging `client.Completions.acreate` Chain * Add `arun`, `acall`, `_acall` * Implement them in `LLMChain` and `LLMMathChain` for now Agent * Refactor and leverage async chain and llm methods * Add ability for `Tools` to contain async coroutine * Implement async SerpaPI `arun` Create demo notebook. Open questions: * Should all the async stuff go in separate classes? I've seen both patterns (keeping the same class and having async and sync methods vs. having class separation) 2023-02-08 05:21:57 +00:00			`) -> LLMResult:`
			`return LLMResult(generations=[[Generation(text="foo") for _ in range(self.n)]])`

(WIP) add HyDE (#393) Co-authored-by: cameronccohen <cameron.c.cohen@gmail.com> Co-authored-by: Cameron Cohen <cameron.cohen@quantco.com> 2022-12-22 01:46:41 +00:00			`@property`
			`def _llm_type(self) -> str:`
			`"""Return type of llm."""`
			`return "fake"`


			`def test_hyde_from_llm() -> None:`
			`"""Test loading HyDE from all prompts."""`
			`for key in PROMPT_MAP:`
			`embedding = HypotheticalDocumentEmbedder.from_llm(`
			`FakeLLM(), FakeEmbeddings(), key`
			`)`
			`embedding.embed_query("foo")`


			`def test_hyde_from_llm_with_multiple_n() -> None:`
			`"""Test loading HyDE from all prompts."""`
			`for key in PROMPT_MAP:`
			`embedding = HypotheticalDocumentEmbedder.from_llm(`
			`FakeLLM(n=8), FakeEmbeddings(), key`
			`)`
			`embedding.embed_query("foo")`