From 3610ef2830851f639a3dc241e37c018fbb0f319a Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Tue, 7 Mar 2023 15:23:46 -0800 Subject: [PATCH] add fake embeddings class (#1503) --- .../modules/indexes/examples/embeddings.ipynb | 60 ++++++++++++++++++- langchain/embeddings/__init__.py | 2 + langchain/embeddings/fake.py | 19 ++++++ 3 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 langchain/embeddings/fake.py diff --git a/docs/modules/indexes/examples/embeddings.ipynb b/docs/modules/indexes/examples/embeddings.ipynb index bac1a35c..080441c1 100644 --- a/docs/modules/indexes/examples/embeddings.ipynb +++ b/docs/modules/indexes/examples/embeddings.ipynb @@ -463,6 +463,64 @@ "source": [ "query_result = embeddings.embed_query(text)" ] + }, + { + "cell_type": "markdown", + "id": "f9c02c78", + "metadata": {}, + "source": [ + "## Fake Embeddings\n", + "\n", + "LangChain also provides a fake embedding class. You can use this to test your pipelines." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "2ffc2e4b", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.embeddings import FakeEmbeddings" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "80777571", + "metadata": {}, + "outputs": [], + "source": [ + "embeddings = FakeEmbeddings(size=1352)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3ec9d8f0", + "metadata": {}, + "outputs": [], + "source": [ + "query_result = embeddings.embed_query(\"foo\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "3b9ae9e1", + "metadata": {}, + "outputs": [], + "source": [ + "doc_results = embeddings.embed_documents([\"foo\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88d366bd", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -481,7 +539,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.9.1" }, "vscode": { "interpreter": { diff --git a/langchain/embeddings/__init__.py b/langchain/embeddings/__init__.py index 403616c5..261447b1 100644 --- a/langchain/embeddings/__init__.py +++ b/langchain/embeddings/__init__.py @@ -3,6 +3,7 @@ import logging from typing import Any from langchain.embeddings.cohere import CohereEmbeddings +from langchain.embeddings.fake import FakeEmbeddings from langchain.embeddings.huggingface import ( HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings, @@ -28,6 +29,7 @@ __all__ = [ "SelfHostedEmbeddings", "SelfHostedHuggingFaceEmbeddings", "SelfHostedHuggingFaceInstructEmbeddings", + "FakeEmbeddings", ] diff --git a/langchain/embeddings/fake.py b/langchain/embeddings/fake.py new file mode 100644 index 00000000..9328f927 --- /dev/null +++ b/langchain/embeddings/fake.py @@ -0,0 +1,19 @@ +from typing import List + +import numpy as np +from pydantic import BaseModel + +from langchain.embeddings.base import Embeddings + + +class FakeEmbeddings(Embeddings, BaseModel): + size: int + + def _get_embedding(self) -> List[float]: + return list(np.random.normal(size=self.size)) + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + return [self._get_embedding() for _ in texts] + + def embed_query(self, text: str) -> List[float]: + return self._get_embedding()