Harrison/databerry (#2688)

Co-authored-by: Georges Petrov <georgesm.petrov@gmail.com>
fix_agent_callbacks
Harrison Chase 1 year ago committed by GitHub
parent b286d0e63f
commit ad3c5dd186
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

Binary file not shown.

After

Width:  |  Height:  |  Size: 157 KiB

@ -0,0 +1,25 @@
# Databerry
This page covers how to use the [Databerry](https://databerry.ai) within LangChain.
## What is Databerry?
Databerry is an [open source](https://github.com/gmpetrov/databerry) document retrievial platform that helps to connect your personal data with Large Language Models.
![Databerry](../_static/DataberryDashboard.png)
## Quick start
Retrieving documents stored in Databerry from LangChain is very easy!
```python
from langchain.retrievers import DataberryRetriever
retriever = DataberryRetriever(
datastore_url="https://api.databerry.ai/query/clg1xg2h80000l708dymr0fxc",
# api_key="DATABERRY_API_KEY", # optional if datastore is public
# top_k=10 # optional
)
docs = retriever.get_relevant_documents("What's Databerry?")
```

@ -0,0 +1,95 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "9fc6205b",
"metadata": {},
"source": [
"# Databerry\n",
"\n",
"This notebook shows how to use [Databerry's](https://www.databerry.ai/) retriever.\n",
"\n",
"First, you will need to sign up for Databerry, create a datastore, add some data and get your datastore api endpoint url"
]
},
{
"cell_type": "markdown",
"id": "944e172b",
"metadata": {},
"source": [
"## Query\n",
"\n",
"Now that our index is set up, we can set up a retriever and start querying it."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "d0e6f506",
"metadata": {},
"outputs": [],
"source": [
"from langchain.retrievers import DataberryRetriever"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f381f642",
"metadata": {},
"outputs": [],
"source": [
"retriever = DataberryRetriever(\n",
" datastore_url=\"https://clg1xg2h80000l708dymr0fxc.databerry.ai/query\",\n",
" # api_key=\"DATABERRY_API_KEY\", # optional if datastore is public\n",
" # top_k=10 # optional\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "20ae1a74",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='✨ Made with DaftpageOpen main menuPricingTemplatesLoginSearchHelpGetting StartedFeaturesAffiliate ProgramGetting StartedDaftpage is a new type of website builder that works like a doc.It makes website building easy, fun and offers tons of powerful features for free. Just type / in your page to get started!DaftpageCopyright © 2022 Daftpage, Inc.All rights reserved.ProductPricingTemplatesHelp & SupportHelp CenterGetting startedBlogCompanyAboutRoadmapTwitterAffiliate Program👾 Discord', metadata={'source': 'https:/daftpage.com/help/getting-started', 'score': 0.8697265}),\n",
" Document(page_content=\"✨ Made with DaftpageOpen main menuPricingTemplatesLoginSearchHelpGetting StartedFeaturesAffiliate ProgramHelp CenterWelcome to Daftpages help center—the one-stop shop for learning everything about building websites with Daftpage.Daftpage is the simplest way to create websites for all purposes in seconds. Without knowing how to code, and for free!Get StartedDaftpage is a new type of website builder that works like a doc.It makes website building easy, fun and offers tons of powerful features for free. Just type / in your page to get started!Start here✨ Create your first site🧱 Add blocks🚀 PublishGuides🔖 Add a custom domainFeatures🔥 Drops🎨 Drawings👻 Ghost mode💀 Skeleton modeCant find the answer you're looking for?mail us at support@daftpage.comJoin the awesome Daftpage community on: 👾 DiscordDaftpageCopyright © 2022 Daftpage, Inc.All rights reserved.ProductPricingTemplatesHelp & SupportHelp CenterGetting startedBlogCompanyAboutRoadmapTwitterAffiliate Program👾 Discord\", metadata={'source': 'https:/daftpage.com/help', 'score': 0.86570895}),\n",
" Document(page_content=\" is the simplest way to create websites for all purposes in seconds. Without knowing how to code, and for free!Get StartedDaftpage is a new type of website builder that works like a doc.It makes website building easy, fun and offers tons of powerful features for free. Just type / in your page to get started!Start here✨ Create your first site🧱 Add blocks🚀 PublishGuides🔖 Add a custom domainFeatures🔥 Drops🎨 Drawings👻 Ghost mode💀 Skeleton modeCant find the answer you're looking for?mail us at support@daftpage.comJoin the awesome Daftpage community on: 👾 DiscordDaftpageCopyright © 2022 Daftpage, Inc.All rights reserved.ProductPricingTemplatesHelp & SupportHelp CenterGetting startedBlogCompanyAboutRoadmapTwitterAffiliate Program👾 Discord\", metadata={'source': 'https:/daftpage.com/help', 'score': 0.8645384})]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retriever.get_relevant_documents(\"What is Daftpage?\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -1,4 +1,5 @@
from langchain.retrievers.chatgpt_plugin_retriever import ChatGPTPluginRetriever
from langchain.retrievers.databerry import DataberryRetriever
from langchain.retrievers.elastic_search_bm25 import ElasticSearchBM25Retriever
from langchain.retrievers.metal import MetalRetriever
from langchain.retrievers.pinecone_hybrid_search import PineconeHybridSearchRetriever
@ -14,4 +15,5 @@ __all__ = [
"ElasticSearchBM25Retriever",
"TFIDFRetriever",
"WeaviateHybridSearchRetriever",
"DataberryRetriever",
]

@ -0,0 +1,74 @@
from typing import List, Optional
import aiohttp
import requests
from langchain.schema import BaseRetriever, Document
class DataberryRetriever(BaseRetriever):
datastore_url: str
top_k: Optional[int]
api_key: Optional[str]
def __init__(
self,
datastore_url: str,
top_k: Optional[int] = None,
api_key: Optional[str] = None,
):
self.datastore_url = datastore_url
self.api_key = api_key
self.top_k = top_k
def get_relevant_documents(self, query: str) -> List[Document]:
response = requests.post(
self.datastore_url,
json={
"query": query,
**({"topK": self.top_k} if self.top_k is not None else {}),
},
headers={
"Content-Type": "application/json",
**(
{"Authorization": f"Bearer {self.api_key}"}
if self.api_key is not None
else {}
),
},
)
data = response.json()
return [
Document(
page_content=r["text"],
metadata={"source": r["source"], "score": r["score"]},
)
for r in data["results"]
]
async def aget_relevant_documents(self, query: str) -> List[Document]:
async with aiohttp.ClientSession() as session:
async with session.request(
"POST",
self.datastore_url,
json={
"query": query,
**({"topK": self.top_k} if self.top_k is not None else {}),
},
headers={
"Content-Type": "application/json",
**(
{"Authorization": f"Bearer {self.api_key}"}
if self.api_key is not None
else {}
),
},
) as response:
data = await response.json()
return [
Document(
page_content=r["text"],
metadata={"source": r["source"], "score": r["score"]},
)
for r in data["results"]
]

@ -12,13 +12,20 @@ class RemoteLangChainRetriever(BaseRetriever, BaseModel):
headers: Optional[dict] = None
input_key: str = "message"
response_key: str = "response"
page_content_key: str = "page_content"
metadata_key: str = "metadata"
def get_relevant_documents(self, query: str) -> List[Document]:
response = requests.post(
self.url, json={self.input_key: query}, headers=self.headers
)
result = response.json()
return [Document(**r) for r in result[self.response_key]]
return [
Document(
page_content=r[self.page_content_key], metadata=r[self.metadata_key]
)
for r in result[self.response_key]
]
async def aget_relevant_documents(self, query: str) -> List[Document]:
async with aiohttp.ClientSession() as session:
@ -26,4 +33,9 @@ class RemoteLangChainRetriever(BaseRetriever, BaseModel):
"POST", self.url, headers=self.headers, json={self.input_key: query}
) as response:
result = await response.json()
return [Document(**r) for r in result[self.response_key]]
return [
Document(
page_content=r[self.page_content_key], metadata=r[self.metadata_key]
)
for r in result[self.response_key]
]

Loading…
Cancel
Save