diff --git a/docs/api_reference/api_reference.rst b/docs/api_reference/api_reference.rst index cdc4580b46..fea725bc9b 100644 --- a/docs/api_reference/api_reference.rst +++ b/docs/api_reference/api_reference.rst @@ -1316,7 +1316,7 @@ Classes retrievers.azure_cognitive_search.AzureCognitiveSearchRetriever retrievers.chatgpt_plugin_retriever.ChatGPTPluginRetriever retrievers.contextual_compression.ContextualCompressionRetriever - retrievers.databerry.DataberryRetriever + retrievers.chaindesk.ChaindeskRetriever retrievers.docarray.DocArrayRetriever retrievers.docarray.SearchType retrievers.document_compressors.base.BaseDocumentCompressor diff --git a/docs/docs_skeleton/docs/_static/DataberryDashboard.png b/docs/docs_skeleton/docs/_static/ChaindeskDashboard.png similarity index 100% rename from docs/docs_skeleton/docs/_static/DataberryDashboard.png rename to docs/docs_skeleton/docs/_static/ChaindeskDashboard.png diff --git a/docs/docs_skeleton/vercel.json b/docs/docs_skeleton/vercel.json index ba23634f9c..2fb14aaa69 100644 --- a/docs/docs_skeleton/vercel.json +++ b/docs/docs_skeleton/vercel.json @@ -137,8 +137,8 @@ "destination": "/docs/ecosystem/integrations/ctransformers" }, { - "source": "/en/latest/integrations/databerry.html", - "destination": "/docs/ecosystem/integrations/databerry" + "source": "/en/latest/integrations/chaindesk.html", + "destination": "/docs/ecosystem/integrations/chaindesk" }, { "source": "/en/latest/integrations/databricks/databricks.html", @@ -1329,8 +1329,8 @@ "destination": "/docs/modules/data_connection/retrievers/integrations/cohere-reranker" }, { - "source": "/en/latest/modules/indexes/retrievers/examples/databerry.html", - "destination": "/docs/modules/data_connection/retrievers/integrations/databerry" + "source": "/en/latest/modules/indexes/retrievers/examples/chaindesk.html", + "destination": "/docs/modules/data_connection/retrievers/integrations/chaindesk" }, { "source": "/en/latest/modules/indexes/retrievers/examples/elastic_search_bm25.html", @@ -2125,4 +2125,4 @@ "destination": "/docs/:path*" } ] -} \ No newline at end of file +} diff --git a/docs/extras/ecosystem/integrations/databerry.mdx b/docs/extras/ecosystem/integrations/chaindesk.mdx similarity index 52% rename from docs/extras/ecosystem/integrations/databerry.mdx rename to docs/extras/ecosystem/integrations/chaindesk.mdx index feec6b118e..9fcabf4b8e 100644 --- a/docs/extras/ecosystem/integrations/databerry.mdx +++ b/docs/extras/ecosystem/integrations/chaindesk.mdx @@ -1,17 +1,17 @@ -# Databerry +# Chaindesk ->[Databerry](https://databerry.ai) is an [open source](https://github.com/gmpetrov/databerry) document retrieval platform that helps to connect your personal data with Large Language Models. +>[Chaindesk](https://chaindesk.ai) is an [open source](https://github.com/gmpetrov/databerry) document retrieval platform that helps to connect your personal data with Large Language Models. ## Installation and Setup -We need to sign up for Databerry, create a datastore, add some data and get your datastore api endpoint url. -We need the [API Key](https://docs.databerry.ai/api-reference/authentication). +We need to sign up for Chaindesk, create a datastore, add some data and get your datastore api endpoint url. +We need the [API Key](https://docs.chaindesk.ai/api-reference/authentication). ## Retriever -See a [usage example](/docs/modules/data_connection/retrievers/integrations/databerry.html). +See a [usage example](/docs/modules/data_connection/retrievers/integrations/chaindesk.html). ```python -from langchain.retrievers import DataberryRetriever +from langchain.retrievers import ChaindeskRetriever ``` diff --git a/docs/extras/modules/data_connection/retrievers/integrations/databerry.ipynb b/docs/extras/modules/data_connection/retrievers/integrations/chaindesk.ipynb similarity index 84% rename from docs/extras/modules/data_connection/retrievers/integrations/databerry.ipynb rename to docs/extras/modules/data_connection/retrievers/integrations/chaindesk.ipynb index f135615b5a..43248f827a 100644 --- a/docs/extras/modules/data_connection/retrievers/integrations/databerry.ipynb +++ b/docs/extras/modules/data_connection/retrievers/integrations/chaindesk.ipynb @@ -1,21 +1,31 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "9fc6205b", "metadata": {}, "source": [ - "# Databerry\n", + "# Chaindesk\n", "\n", - ">[Databerry platform](https://docs.databerry.ai/introduction) brings data from anywhere (Datsources: Text, PDF, Word, PowerPpoint, Excel, Notion, Airtable, Google Sheets, etc..) into Datastores (container of multiple Datasources).\n", - "Then your Datastores can be connected to ChatGPT via Plugins or any other Large Langue Model (LLM) via the `Databerry API`.\n", + ">[Chaindesk platform](https://docs.chaindesk.ai/introduction) brings data from anywhere (Datsources: Text, PDF, Word, PowerPpoint, Excel, Notion, Airtable, Google Sheets, etc..) into Datastores (container of multiple Datasources).\n", + "Then your Datastores can be connected to ChatGPT via Plugins or any other Large Langue Model (LLM) via the `Chaindesk API`.\n", "\n", - "This notebook shows how to use [Databerry's](https://www.databerry.ai/) retriever.\n", + "This notebook shows how to use [Chaindesk's](https://www.chaindesk.ai/) retriever.\n", "\n", - "First, you will need to sign up for Databerry, create a datastore, add some data and get your datastore api endpoint url. You need the [API Key](https://docs.databerry.ai/api-reference/authentication)." + "First, you will need to sign up for Chaindesk, create a datastore, add some data and get your datastore api endpoint url. You need the [API Key](https://docs.chaindesk.ai/api-reference/authentication)." ] }, { + "cell_type": "code", + "execution_count": null, + "id": "3697b9fd", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "attachments": {}, "cell_type": "markdown", "id": "944e172b", "metadata": {}, @@ -34,7 +44,7 @@ }, "outputs": [], "source": [ - "from langchain.retrievers import DataberryRetriever" + "from langchain.retrievers import ChaindeskRetriever" ] }, { @@ -46,9 +56,9 @@ }, "outputs": [], "source": [ - "retriever = DataberryRetriever(\n", - " datastore_url=\"https://clg1xg2h80000l708dymr0fxc.databerry.ai/query\",\n", - " # api_key=\"DATABERRY_API_KEY\", # optional if datastore is public\n", + "retriever = ChaindeskRetriever(\n", + " datastore_url=\"https://clg1xg2h80000l708dymr0fxc.chaindesk.ai/query\",\n", + " # api_key=\"CHAINDESK_API_KEY\", # optional if datastore is public\n", " # top_k=10 # optional\n", ")" ] diff --git a/langchain/retrievers/__init__.py b/langchain/retrievers/__init__.py index 137a8ae19a..87ef899fdd 100644 --- a/langchain/retrievers/__init__.py +++ b/langchain/retrievers/__init__.py @@ -1,8 +1,8 @@ from langchain.retrievers.arxiv import ArxivRetriever from langchain.retrievers.azure_cognitive_search import AzureCognitiveSearchRetriever +from langchain.retrievers.chaindesk import ChaindeskRetriever from langchain.retrievers.chatgpt_plugin_retriever import ChatGPTPluginRetriever from langchain.retrievers.contextual_compression import ContextualCompressionRetriever -from langchain.retrievers.databerry import DataberryRetriever from langchain.retrievers.docarray import DocArrayRetriever from langchain.retrievers.elastic_search_bm25 import ElasticSearchBM25Retriever from langchain.retrievers.kendra import AmazonKendraRetriever @@ -36,7 +36,7 @@ __all__ = [ "AzureCognitiveSearchRetriever", "ChatGPTPluginRetriever", "ContextualCompressionRetriever", - "DataberryRetriever", + "ChaindeskRetriever", "ElasticSearchBM25Retriever", "KNNRetriever", "LlamaIndexGraphRetriever", diff --git a/langchain/retrievers/chaindesk.py b/langchain/retrievers/chaindesk.py new file mode 100644 index 0000000000..f2bf654de0 --- /dev/null +++ b/langchain/retrievers/chaindesk.py @@ -0,0 +1,92 @@ +from typing import Any, List, Optional + +import aiohttp +import requests + +from langchain.callbacks.manager import ( + AsyncCallbackManagerForRetrieverRun, + CallbackManagerForRetrieverRun, +) +from langchain.schema import BaseRetriever, Document + + +class ChaindeskRetriever(BaseRetriever): + """Retriever that uses the Chaindesk API.""" + + datastore_url: str + top_k: Optional[int] + api_key: Optional[str] + + def __init__( + self, + datastore_url: str, + top_k: Optional[int] = None, + api_key: Optional[str] = None, + ): + self.datastore_url = datastore_url + self.api_key = api_key + self.top_k = top_k + + def _get_relevant_documents( + self, + query: str, + *, + run_manager: CallbackManagerForRetrieverRun, + **kwargs: Any, + ) -> List[Document]: + response = requests.post( + self.datastore_url, + json={ + "query": query, + **({"topK": self.top_k} if self.top_k is not None else {}), + }, + headers={ + "Content-Type": "application/json", + **( + {"Authorization": f"Bearer {self.api_key}"} + if self.api_key is not None + else {} + ), + }, + ) + data = response.json() + return [ + Document( + page_content=r["text"], + metadata={"source": r["source"], "score": r["score"]}, + ) + for r in data["results"] + ] + + async def _aget_relevant_documents( + self, + query: str, + *, + run_manager: AsyncCallbackManagerForRetrieverRun, + **kwargs: Any, + ) -> List[Document]: + async with aiohttp.ClientSession() as session: + async with session.request( + "POST", + self.datastore_url, + json={ + "query": query, + **({"topK": self.top_k} if self.top_k is not None else {}), + }, + headers={ + "Content-Type": "application/json", + **( + {"Authorization": f"Bearer {self.api_key}"} + if self.api_key is not None + else {} + ), + }, + ) as response: + data = await response.json() + return [ + Document( + page_content=r["text"], + metadata={"source": r["source"], "score": r["score"]}, + ) + for r in data["results"] + ]