Rename Databerry to Chaindesk (#7022)

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
Georges Petrov 2023-07-07 23:28:04 +02:00 committed by GitHub
parent da5b0723d2
commit ec033ae277
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 125 additions and 23 deletions

View File

@ -1316,7 +1316,7 @@ Classes
retrievers.azure_cognitive_search.AzureCognitiveSearchRetriever retrievers.azure_cognitive_search.AzureCognitiveSearchRetriever
retrievers.chatgpt_plugin_retriever.ChatGPTPluginRetriever retrievers.chatgpt_plugin_retriever.ChatGPTPluginRetriever
retrievers.contextual_compression.ContextualCompressionRetriever retrievers.contextual_compression.ContextualCompressionRetriever
retrievers.databerry.DataberryRetriever retrievers.chaindesk.ChaindeskRetriever
retrievers.docarray.DocArrayRetriever retrievers.docarray.DocArrayRetriever
retrievers.docarray.SearchType retrievers.docarray.SearchType
retrievers.document_compressors.base.BaseDocumentCompressor retrievers.document_compressors.base.BaseDocumentCompressor

View File

Before

Width:  |  Height:  |  Size: 157 KiB

After

Width:  |  Height:  |  Size: 157 KiB

View File

@ -137,8 +137,8 @@
"destination": "/docs/ecosystem/integrations/ctransformers" "destination": "/docs/ecosystem/integrations/ctransformers"
}, },
{ {
"source": "/en/latest/integrations/databerry.html", "source": "/en/latest/integrations/chaindesk.html",
"destination": "/docs/ecosystem/integrations/databerry" "destination": "/docs/ecosystem/integrations/chaindesk"
}, },
{ {
"source": "/en/latest/integrations/databricks/databricks.html", "source": "/en/latest/integrations/databricks/databricks.html",
@ -1329,8 +1329,8 @@
"destination": "/docs/modules/data_connection/retrievers/integrations/cohere-reranker" "destination": "/docs/modules/data_connection/retrievers/integrations/cohere-reranker"
}, },
{ {
"source": "/en/latest/modules/indexes/retrievers/examples/databerry.html", "source": "/en/latest/modules/indexes/retrievers/examples/chaindesk.html",
"destination": "/docs/modules/data_connection/retrievers/integrations/databerry" "destination": "/docs/modules/data_connection/retrievers/integrations/chaindesk"
}, },
{ {
"source": "/en/latest/modules/indexes/retrievers/examples/elastic_search_bm25.html", "source": "/en/latest/modules/indexes/retrievers/examples/elastic_search_bm25.html",

View File

@ -1,17 +1,17 @@
# Databerry # Chaindesk
>[Databerry](https://databerry.ai) is an [open source](https://github.com/gmpetrov/databerry) document retrieval platform that helps to connect your personal data with Large Language Models. >[Chaindesk](https://chaindesk.ai) is an [open source](https://github.com/gmpetrov/databerry) document retrieval platform that helps to connect your personal data with Large Language Models.
## Installation and Setup ## Installation and Setup
We need to sign up for Databerry, create a datastore, add some data and get your datastore api endpoint url. We need to sign up for Chaindesk, create a datastore, add some data and get your datastore api endpoint url.
We need the [API Key](https://docs.databerry.ai/api-reference/authentication). We need the [API Key](https://docs.chaindesk.ai/api-reference/authentication).
## Retriever ## Retriever
See a [usage example](/docs/modules/data_connection/retrievers/integrations/databerry.html). See a [usage example](/docs/modules/data_connection/retrievers/integrations/chaindesk.html).
```python ```python
from langchain.retrievers import DataberryRetriever from langchain.retrievers import ChaindeskRetriever
``` ```

View File

@ -1,21 +1,31 @@
{ {
"cells": [ "cells": [
{ {
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"id": "9fc6205b", "id": "9fc6205b",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Databerry\n", "# Chaindesk\n",
"\n", "\n",
">[Databerry platform](https://docs.databerry.ai/introduction) brings data from anywhere (Datsources: Text, PDF, Word, PowerPpoint, Excel, Notion, Airtable, Google Sheets, etc..) into Datastores (container of multiple Datasources).\n", ">[Chaindesk platform](https://docs.chaindesk.ai/introduction) brings data from anywhere (Datsources: Text, PDF, Word, PowerPpoint, Excel, Notion, Airtable, Google Sheets, etc..) into Datastores (container of multiple Datasources).\n",
"Then your Datastores can be connected to ChatGPT via Plugins or any other Large Langue Model (LLM) via the `Databerry API`.\n", "Then your Datastores can be connected to ChatGPT via Plugins or any other Large Langue Model (LLM) via the `Chaindesk API`.\n",
"\n", "\n",
"This notebook shows how to use [Databerry's](https://www.databerry.ai/) retriever.\n", "This notebook shows how to use [Chaindesk's](https://www.chaindesk.ai/) retriever.\n",
"\n", "\n",
"First, you will need to sign up for Databerry, create a datastore, add some data and get your datastore api endpoint url. You need the [API Key](https://docs.databerry.ai/api-reference/authentication)." "First, you will need to sign up for Chaindesk, create a datastore, add some data and get your datastore api endpoint url. You need the [API Key](https://docs.chaindesk.ai/api-reference/authentication)."
] ]
}, },
{ {
"cell_type": "code",
"execution_count": null,
"id": "3697b9fd",
"metadata": {},
"outputs": [],
"source": []
},
{
"attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
"id": "944e172b", "id": "944e172b",
"metadata": {}, "metadata": {},
@ -34,7 +44,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.retrievers import DataberryRetriever" "from langchain.retrievers import ChaindeskRetriever"
] ]
}, },
{ {
@ -46,9 +56,9 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"retriever = DataberryRetriever(\n", "retriever = ChaindeskRetriever(\n",
" datastore_url=\"https://clg1xg2h80000l708dymr0fxc.databerry.ai/query\",\n", " datastore_url=\"https://clg1xg2h80000l708dymr0fxc.chaindesk.ai/query\",\n",
" # api_key=\"DATABERRY_API_KEY\", # optional if datastore is public\n", " # api_key=\"CHAINDESK_API_KEY\", # optional if datastore is public\n",
" # top_k=10 # optional\n", " # top_k=10 # optional\n",
")" ")"
] ]

View File

@ -1,8 +1,8 @@
from langchain.retrievers.arxiv import ArxivRetriever from langchain.retrievers.arxiv import ArxivRetriever
from langchain.retrievers.azure_cognitive_search import AzureCognitiveSearchRetriever from langchain.retrievers.azure_cognitive_search import AzureCognitiveSearchRetriever
from langchain.retrievers.chaindesk import ChaindeskRetriever
from langchain.retrievers.chatgpt_plugin_retriever import ChatGPTPluginRetriever from langchain.retrievers.chatgpt_plugin_retriever import ChatGPTPluginRetriever
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain.retrievers.databerry import DataberryRetriever
from langchain.retrievers.docarray import DocArrayRetriever from langchain.retrievers.docarray import DocArrayRetriever
from langchain.retrievers.elastic_search_bm25 import ElasticSearchBM25Retriever from langchain.retrievers.elastic_search_bm25 import ElasticSearchBM25Retriever
from langchain.retrievers.kendra import AmazonKendraRetriever from langchain.retrievers.kendra import AmazonKendraRetriever
@ -36,7 +36,7 @@ __all__ = [
"AzureCognitiveSearchRetriever", "AzureCognitiveSearchRetriever",
"ChatGPTPluginRetriever", "ChatGPTPluginRetriever",
"ContextualCompressionRetriever", "ContextualCompressionRetriever",
"DataberryRetriever", "ChaindeskRetriever",
"ElasticSearchBM25Retriever", "ElasticSearchBM25Retriever",
"KNNRetriever", "KNNRetriever",
"LlamaIndexGraphRetriever", "LlamaIndexGraphRetriever",

View File

@ -0,0 +1,92 @@
from typing import Any, List, Optional
import aiohttp
import requests
from langchain.callbacks.manager import (
AsyncCallbackManagerForRetrieverRun,
CallbackManagerForRetrieverRun,
)
from langchain.schema import BaseRetriever, Document
class ChaindeskRetriever(BaseRetriever):
"""Retriever that uses the Chaindesk API."""
datastore_url: str
top_k: Optional[int]
api_key: Optional[str]
def __init__(
self,
datastore_url: str,
top_k: Optional[int] = None,
api_key: Optional[str] = None,
):
self.datastore_url = datastore_url
self.api_key = api_key
self.top_k = top_k
def _get_relevant_documents(
self,
query: str,
*,
run_manager: CallbackManagerForRetrieverRun,
**kwargs: Any,
) -> List[Document]:
response = requests.post(
self.datastore_url,
json={
"query": query,
**({"topK": self.top_k} if self.top_k is not None else {}),
},
headers={
"Content-Type": "application/json",
**(
{"Authorization": f"Bearer {self.api_key}"}
if self.api_key is not None
else {}
),
},
)
data = response.json()
return [
Document(
page_content=r["text"],
metadata={"source": r["source"], "score": r["score"]},
)
for r in data["results"]
]
async def _aget_relevant_documents(
self,
query: str,
*,
run_manager: AsyncCallbackManagerForRetrieverRun,
**kwargs: Any,
) -> List[Document]:
async with aiohttp.ClientSession() as session:
async with session.request(
"POST",
self.datastore_url,
json={
"query": query,
**({"topK": self.top_k} if self.top_k is not None else {}),
},
headers={
"Content-Type": "application/json",
**(
{"Authorization": f"Bearer {self.api_key}"}
if self.api_key is not None
else {}
),
},
) as response:
data = await response.json()
return [
Document(
page_content=r["text"],
metadata={"source": r["source"], "score": r["score"]},
)
for r in data["results"]
]