mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
Rename Databerry to Chaindesk (#7022)
--------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
da5b0723d2
commit
ec033ae277
@ -1316,7 +1316,7 @@ Classes
|
||||
retrievers.azure_cognitive_search.AzureCognitiveSearchRetriever
|
||||
retrievers.chatgpt_plugin_retriever.ChatGPTPluginRetriever
|
||||
retrievers.contextual_compression.ContextualCompressionRetriever
|
||||
retrievers.databerry.DataberryRetriever
|
||||
retrievers.chaindesk.ChaindeskRetriever
|
||||
retrievers.docarray.DocArrayRetriever
|
||||
retrievers.docarray.SearchType
|
||||
retrievers.document_compressors.base.BaseDocumentCompressor
|
||||
|
Before Width: | Height: | Size: 157 KiB After Width: | Height: | Size: 157 KiB |
@ -137,8 +137,8 @@
|
||||
"destination": "/docs/ecosystem/integrations/ctransformers"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/integrations/databerry.html",
|
||||
"destination": "/docs/ecosystem/integrations/databerry"
|
||||
"source": "/en/latest/integrations/chaindesk.html",
|
||||
"destination": "/docs/ecosystem/integrations/chaindesk"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/integrations/databricks/databricks.html",
|
||||
@ -1329,8 +1329,8 @@
|
||||
"destination": "/docs/modules/data_connection/retrievers/integrations/cohere-reranker"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/indexes/retrievers/examples/databerry.html",
|
||||
"destination": "/docs/modules/data_connection/retrievers/integrations/databerry"
|
||||
"source": "/en/latest/modules/indexes/retrievers/examples/chaindesk.html",
|
||||
"destination": "/docs/modules/data_connection/retrievers/integrations/chaindesk"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/indexes/retrievers/examples/elastic_search_bm25.html",
|
||||
@ -2125,4 +2125,4 @@
|
||||
"destination": "/docs/:path*"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
@ -1,17 +1,17 @@
|
||||
# Databerry
|
||||
# Chaindesk
|
||||
|
||||
>[Databerry](https://databerry.ai) is an [open source](https://github.com/gmpetrov/databerry) document retrieval platform that helps to connect your personal data with Large Language Models.
|
||||
>[Chaindesk](https://chaindesk.ai) is an [open source](https://github.com/gmpetrov/databerry) document retrieval platform that helps to connect your personal data with Large Language Models.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to sign up for Databerry, create a datastore, add some data and get your datastore api endpoint url.
|
||||
We need the [API Key](https://docs.databerry.ai/api-reference/authentication).
|
||||
We need to sign up for Chaindesk, create a datastore, add some data and get your datastore api endpoint url.
|
||||
We need the [API Key](https://docs.chaindesk.ai/api-reference/authentication).
|
||||
|
||||
## Retriever
|
||||
|
||||
See a [usage example](/docs/modules/data_connection/retrievers/integrations/databerry.html).
|
||||
See a [usage example](/docs/modules/data_connection/retrievers/integrations/chaindesk.html).
|
||||
|
||||
```python
|
||||
from langchain.retrievers import DataberryRetriever
|
||||
from langchain.retrievers import ChaindeskRetriever
|
||||
```
|
@ -1,21 +1,31 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "9fc6205b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Databerry\n",
|
||||
"# Chaindesk\n",
|
||||
"\n",
|
||||
">[Databerry platform](https://docs.databerry.ai/introduction) brings data from anywhere (Datsources: Text, PDF, Word, PowerPpoint, Excel, Notion, Airtable, Google Sheets, etc..) into Datastores (container of multiple Datasources).\n",
|
||||
"Then your Datastores can be connected to ChatGPT via Plugins or any other Large Langue Model (LLM) via the `Databerry API`.\n",
|
||||
">[Chaindesk platform](https://docs.chaindesk.ai/introduction) brings data from anywhere (Datsources: Text, PDF, Word, PowerPpoint, Excel, Notion, Airtable, Google Sheets, etc..) into Datastores (container of multiple Datasources).\n",
|
||||
"Then your Datastores can be connected to ChatGPT via Plugins or any other Large Langue Model (LLM) via the `Chaindesk API`.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use [Databerry's](https://www.databerry.ai/) retriever.\n",
|
||||
"This notebook shows how to use [Chaindesk's](https://www.chaindesk.ai/) retriever.\n",
|
||||
"\n",
|
||||
"First, you will need to sign up for Databerry, create a datastore, add some data and get your datastore api endpoint url. You need the [API Key](https://docs.databerry.ai/api-reference/authentication)."
|
||||
"First, you will need to sign up for Chaindesk, create a datastore, add some data and get your datastore api endpoint url. You need the [API Key](https://docs.chaindesk.ai/api-reference/authentication)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3697b9fd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "944e172b",
|
||||
"metadata": {},
|
||||
@ -34,7 +44,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.retrievers import DataberryRetriever"
|
||||
"from langchain.retrievers import ChaindeskRetriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -46,9 +56,9 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = DataberryRetriever(\n",
|
||||
" datastore_url=\"https://clg1xg2h80000l708dymr0fxc.databerry.ai/query\",\n",
|
||||
" # api_key=\"DATABERRY_API_KEY\", # optional if datastore is public\n",
|
||||
"retriever = ChaindeskRetriever(\n",
|
||||
" datastore_url=\"https://clg1xg2h80000l708dymr0fxc.chaindesk.ai/query\",\n",
|
||||
" # api_key=\"CHAINDESK_API_KEY\", # optional if datastore is public\n",
|
||||
" # top_k=10 # optional\n",
|
||||
")"
|
||||
]
|
@ -1,8 +1,8 @@
|
||||
from langchain.retrievers.arxiv import ArxivRetriever
|
||||
from langchain.retrievers.azure_cognitive_search import AzureCognitiveSearchRetriever
|
||||
from langchain.retrievers.chaindesk import ChaindeskRetriever
|
||||
from langchain.retrievers.chatgpt_plugin_retriever import ChatGPTPluginRetriever
|
||||
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
|
||||
from langchain.retrievers.databerry import DataberryRetriever
|
||||
from langchain.retrievers.docarray import DocArrayRetriever
|
||||
from langchain.retrievers.elastic_search_bm25 import ElasticSearchBM25Retriever
|
||||
from langchain.retrievers.kendra import AmazonKendraRetriever
|
||||
@ -36,7 +36,7 @@ __all__ = [
|
||||
"AzureCognitiveSearchRetriever",
|
||||
"ChatGPTPluginRetriever",
|
||||
"ContextualCompressionRetriever",
|
||||
"DataberryRetriever",
|
||||
"ChaindeskRetriever",
|
||||
"ElasticSearchBM25Retriever",
|
||||
"KNNRetriever",
|
||||
"LlamaIndexGraphRetriever",
|
||||
|
92
langchain/retrievers/chaindesk.py
Normal file
92
langchain/retrievers/chaindesk.py
Normal file
@ -0,0 +1,92 @@
|
||||
from typing import Any, List, Optional
|
||||
|
||||
import aiohttp
|
||||
import requests
|
||||
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForRetrieverRun,
|
||||
CallbackManagerForRetrieverRun,
|
||||
)
|
||||
from langchain.schema import BaseRetriever, Document
|
||||
|
||||
|
||||
class ChaindeskRetriever(BaseRetriever):
|
||||
"""Retriever that uses the Chaindesk API."""
|
||||
|
||||
datastore_url: str
|
||||
top_k: Optional[int]
|
||||
api_key: Optional[str]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
datastore_url: str,
|
||||
top_k: Optional[int] = None,
|
||||
api_key: Optional[str] = None,
|
||||
):
|
||||
self.datastore_url = datastore_url
|
||||
self.api_key = api_key
|
||||
self.top_k = top_k
|
||||
|
||||
def _get_relevant_documents(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
run_manager: CallbackManagerForRetrieverRun,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
response = requests.post(
|
||||
self.datastore_url,
|
||||
json={
|
||||
"query": query,
|
||||
**({"topK": self.top_k} if self.top_k is not None else {}),
|
||||
},
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
**(
|
||||
{"Authorization": f"Bearer {self.api_key}"}
|
||||
if self.api_key is not None
|
||||
else {}
|
||||
),
|
||||
},
|
||||
)
|
||||
data = response.json()
|
||||
return [
|
||||
Document(
|
||||
page_content=r["text"],
|
||||
metadata={"source": r["source"], "score": r["score"]},
|
||||
)
|
||||
for r in data["results"]
|
||||
]
|
||||
|
||||
async def _aget_relevant_documents(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
run_manager: AsyncCallbackManagerForRetrieverRun,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.request(
|
||||
"POST",
|
||||
self.datastore_url,
|
||||
json={
|
||||
"query": query,
|
||||
**({"topK": self.top_k} if self.top_k is not None else {}),
|
||||
},
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
**(
|
||||
{"Authorization": f"Bearer {self.api_key}"}
|
||||
if self.api_key is not None
|
||||
else {}
|
||||
),
|
||||
},
|
||||
) as response:
|
||||
data = await response.json()
|
||||
return [
|
||||
Document(
|
||||
page_content=r["text"],
|
||||
metadata={"source": r["source"], "score": r["score"]},
|
||||
)
|
||||
for r in data["results"]
|
||||
]
|
Loading…
Reference in New Issue
Block a user