mirror of
https://github.com/hwchase17/langchain
synced 2024-11-08 07:10:35 +00:00
Rename Databerry to Chaindesk (#7022)
--------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
da5b0723d2
commit
ec033ae277
@ -1316,7 +1316,7 @@ Classes
|
|||||||
retrievers.azure_cognitive_search.AzureCognitiveSearchRetriever
|
retrievers.azure_cognitive_search.AzureCognitiveSearchRetriever
|
||||||
retrievers.chatgpt_plugin_retriever.ChatGPTPluginRetriever
|
retrievers.chatgpt_plugin_retriever.ChatGPTPluginRetriever
|
||||||
retrievers.contextual_compression.ContextualCompressionRetriever
|
retrievers.contextual_compression.ContextualCompressionRetriever
|
||||||
retrievers.databerry.DataberryRetriever
|
retrievers.chaindesk.ChaindeskRetriever
|
||||||
retrievers.docarray.DocArrayRetriever
|
retrievers.docarray.DocArrayRetriever
|
||||||
retrievers.docarray.SearchType
|
retrievers.docarray.SearchType
|
||||||
retrievers.document_compressors.base.BaseDocumentCompressor
|
retrievers.document_compressors.base.BaseDocumentCompressor
|
||||||
|
Before Width: | Height: | Size: 157 KiB After Width: | Height: | Size: 157 KiB |
@ -137,8 +137,8 @@
|
|||||||
"destination": "/docs/ecosystem/integrations/ctransformers"
|
"destination": "/docs/ecosystem/integrations/ctransformers"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": "/en/latest/integrations/databerry.html",
|
"source": "/en/latest/integrations/chaindesk.html",
|
||||||
"destination": "/docs/ecosystem/integrations/databerry"
|
"destination": "/docs/ecosystem/integrations/chaindesk"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": "/en/latest/integrations/databricks/databricks.html",
|
"source": "/en/latest/integrations/databricks/databricks.html",
|
||||||
@ -1329,8 +1329,8 @@
|
|||||||
"destination": "/docs/modules/data_connection/retrievers/integrations/cohere-reranker"
|
"destination": "/docs/modules/data_connection/retrievers/integrations/cohere-reranker"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": "/en/latest/modules/indexes/retrievers/examples/databerry.html",
|
"source": "/en/latest/modules/indexes/retrievers/examples/chaindesk.html",
|
||||||
"destination": "/docs/modules/data_connection/retrievers/integrations/databerry"
|
"destination": "/docs/modules/data_connection/retrievers/integrations/chaindesk"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": "/en/latest/modules/indexes/retrievers/examples/elastic_search_bm25.html",
|
"source": "/en/latest/modules/indexes/retrievers/examples/elastic_search_bm25.html",
|
||||||
|
@ -1,17 +1,17 @@
|
|||||||
# Databerry
|
# Chaindesk
|
||||||
|
|
||||||
>[Databerry](https://databerry.ai) is an [open source](https://github.com/gmpetrov/databerry) document retrieval platform that helps to connect your personal data with Large Language Models.
|
>[Chaindesk](https://chaindesk.ai) is an [open source](https://github.com/gmpetrov/databerry) document retrieval platform that helps to connect your personal data with Large Language Models.
|
||||||
|
|
||||||
|
|
||||||
## Installation and Setup
|
## Installation and Setup
|
||||||
|
|
||||||
We need to sign up for Databerry, create a datastore, add some data and get your datastore api endpoint url.
|
We need to sign up for Chaindesk, create a datastore, add some data and get your datastore api endpoint url.
|
||||||
We need the [API Key](https://docs.databerry.ai/api-reference/authentication).
|
We need the [API Key](https://docs.chaindesk.ai/api-reference/authentication).
|
||||||
|
|
||||||
## Retriever
|
## Retriever
|
||||||
|
|
||||||
See a [usage example](/docs/modules/data_connection/retrievers/integrations/databerry.html).
|
See a [usage example](/docs/modules/data_connection/retrievers/integrations/chaindesk.html).
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from langchain.retrievers import DataberryRetriever
|
from langchain.retrievers import ChaindeskRetriever
|
||||||
```
|
```
|
@ -1,21 +1,31 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "9fc6205b",
|
"id": "9fc6205b",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Databerry\n",
|
"# Chaindesk\n",
|
||||||
"\n",
|
"\n",
|
||||||
">[Databerry platform](https://docs.databerry.ai/introduction) brings data from anywhere (Datsources: Text, PDF, Word, PowerPpoint, Excel, Notion, Airtable, Google Sheets, etc..) into Datastores (container of multiple Datasources).\n",
|
">[Chaindesk platform](https://docs.chaindesk.ai/introduction) brings data from anywhere (Datsources: Text, PDF, Word, PowerPpoint, Excel, Notion, Airtable, Google Sheets, etc..) into Datastores (container of multiple Datasources).\n",
|
||||||
"Then your Datastores can be connected to ChatGPT via Plugins or any other Large Langue Model (LLM) via the `Databerry API`.\n",
|
"Then your Datastores can be connected to ChatGPT via Plugins or any other Large Langue Model (LLM) via the `Chaindesk API`.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This notebook shows how to use [Databerry's](https://www.databerry.ai/) retriever.\n",
|
"This notebook shows how to use [Chaindesk's](https://www.chaindesk.ai/) retriever.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"First, you will need to sign up for Databerry, create a datastore, add some data and get your datastore api endpoint url. You need the [API Key](https://docs.databerry.ai/api-reference/authentication)."
|
"First, you will need to sign up for Chaindesk, create a datastore, add some data and get your datastore api endpoint url. You need the [API Key](https://docs.chaindesk.ai/api-reference/authentication)."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "3697b9fd",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "944e172b",
|
"id": "944e172b",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -34,7 +44,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain.retrievers import DataberryRetriever"
|
"from langchain.retrievers import ChaindeskRetriever"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -46,9 +56,9 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"retriever = DataberryRetriever(\n",
|
"retriever = ChaindeskRetriever(\n",
|
||||||
" datastore_url=\"https://clg1xg2h80000l708dymr0fxc.databerry.ai/query\",\n",
|
" datastore_url=\"https://clg1xg2h80000l708dymr0fxc.chaindesk.ai/query\",\n",
|
||||||
" # api_key=\"DATABERRY_API_KEY\", # optional if datastore is public\n",
|
" # api_key=\"CHAINDESK_API_KEY\", # optional if datastore is public\n",
|
||||||
" # top_k=10 # optional\n",
|
" # top_k=10 # optional\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
@ -1,8 +1,8 @@
|
|||||||
from langchain.retrievers.arxiv import ArxivRetriever
|
from langchain.retrievers.arxiv import ArxivRetriever
|
||||||
from langchain.retrievers.azure_cognitive_search import AzureCognitiveSearchRetriever
|
from langchain.retrievers.azure_cognitive_search import AzureCognitiveSearchRetriever
|
||||||
|
from langchain.retrievers.chaindesk import ChaindeskRetriever
|
||||||
from langchain.retrievers.chatgpt_plugin_retriever import ChatGPTPluginRetriever
|
from langchain.retrievers.chatgpt_plugin_retriever import ChatGPTPluginRetriever
|
||||||
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
|
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
|
||||||
from langchain.retrievers.databerry import DataberryRetriever
|
|
||||||
from langchain.retrievers.docarray import DocArrayRetriever
|
from langchain.retrievers.docarray import DocArrayRetriever
|
||||||
from langchain.retrievers.elastic_search_bm25 import ElasticSearchBM25Retriever
|
from langchain.retrievers.elastic_search_bm25 import ElasticSearchBM25Retriever
|
||||||
from langchain.retrievers.kendra import AmazonKendraRetriever
|
from langchain.retrievers.kendra import AmazonKendraRetriever
|
||||||
@ -36,7 +36,7 @@ __all__ = [
|
|||||||
"AzureCognitiveSearchRetriever",
|
"AzureCognitiveSearchRetriever",
|
||||||
"ChatGPTPluginRetriever",
|
"ChatGPTPluginRetriever",
|
||||||
"ContextualCompressionRetriever",
|
"ContextualCompressionRetriever",
|
||||||
"DataberryRetriever",
|
"ChaindeskRetriever",
|
||||||
"ElasticSearchBM25Retriever",
|
"ElasticSearchBM25Retriever",
|
||||||
"KNNRetriever",
|
"KNNRetriever",
|
||||||
"LlamaIndexGraphRetriever",
|
"LlamaIndexGraphRetriever",
|
||||||
|
92
langchain/retrievers/chaindesk.py
Normal file
92
langchain/retrievers/chaindesk.py
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
from typing import Any, List, Optional
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from langchain.callbacks.manager import (
|
||||||
|
AsyncCallbackManagerForRetrieverRun,
|
||||||
|
CallbackManagerForRetrieverRun,
|
||||||
|
)
|
||||||
|
from langchain.schema import BaseRetriever, Document
|
||||||
|
|
||||||
|
|
||||||
|
class ChaindeskRetriever(BaseRetriever):
|
||||||
|
"""Retriever that uses the Chaindesk API."""
|
||||||
|
|
||||||
|
datastore_url: str
|
||||||
|
top_k: Optional[int]
|
||||||
|
api_key: Optional[str]
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
datastore_url: str,
|
||||||
|
top_k: Optional[int] = None,
|
||||||
|
api_key: Optional[str] = None,
|
||||||
|
):
|
||||||
|
self.datastore_url = datastore_url
|
||||||
|
self.api_key = api_key
|
||||||
|
self.top_k = top_k
|
||||||
|
|
||||||
|
def _get_relevant_documents(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
*,
|
||||||
|
run_manager: CallbackManagerForRetrieverRun,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> List[Document]:
|
||||||
|
response = requests.post(
|
||||||
|
self.datastore_url,
|
||||||
|
json={
|
||||||
|
"query": query,
|
||||||
|
**({"topK": self.top_k} if self.top_k is not None else {}),
|
||||||
|
},
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
**(
|
||||||
|
{"Authorization": f"Bearer {self.api_key}"}
|
||||||
|
if self.api_key is not None
|
||||||
|
else {}
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
data = response.json()
|
||||||
|
return [
|
||||||
|
Document(
|
||||||
|
page_content=r["text"],
|
||||||
|
metadata={"source": r["source"], "score": r["score"]},
|
||||||
|
)
|
||||||
|
for r in data["results"]
|
||||||
|
]
|
||||||
|
|
||||||
|
async def _aget_relevant_documents(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
*,
|
||||||
|
run_manager: AsyncCallbackManagerForRetrieverRun,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> List[Document]:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.request(
|
||||||
|
"POST",
|
||||||
|
self.datastore_url,
|
||||||
|
json={
|
||||||
|
"query": query,
|
||||||
|
**({"topK": self.top_k} if self.top_k is not None else {}),
|
||||||
|
},
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
**(
|
||||||
|
{"Authorization": f"Bearer {self.api_key}"}
|
||||||
|
if self.api_key is not None
|
||||||
|
else {}
|
||||||
|
),
|
||||||
|
},
|
||||||
|
) as response:
|
||||||
|
data = await response.json()
|
||||||
|
return [
|
||||||
|
Document(
|
||||||
|
page_content=r["text"],
|
||||||
|
metadata={"source": r["source"], "score": r["score"]},
|
||||||
|
)
|
||||||
|
for r in data["results"]
|
||||||
|
]
|
Loading…
Reference in New Issue
Block a user