mirror of https://github.com/hwchase17/langchain
Harrison/databerry (#2688)
Co-authored-by: Georges Petrov <georgesm.petrov@gmail.com>pull/2694/head
parent
b286d0e63f
commit
ad3c5dd186
Binary file not shown.
After Width: | Height: | Size: 157 KiB |
@ -0,0 +1,25 @@
|
||||
# Databerry
|
||||
|
||||
This page covers how to use the [Databerry](https://databerry.ai) within LangChain.
|
||||
|
||||
## What is Databerry?
|
||||
|
||||
Databerry is an [open source](https://github.com/gmpetrov/databerry) document retrievial platform that helps to connect your personal data with Large Language Models.
|
||||
|
||||
![Databerry](../_static/DataberryDashboard.png)
|
||||
|
||||
## Quick start
|
||||
|
||||
Retrieving documents stored in Databerry from LangChain is very easy!
|
||||
|
||||
```python
|
||||
from langchain.retrievers import DataberryRetriever
|
||||
|
||||
retriever = DataberryRetriever(
|
||||
datastore_url="https://api.databerry.ai/query/clg1xg2h80000l708dymr0fxc",
|
||||
# api_key="DATABERRY_API_KEY", # optional if datastore is public
|
||||
# top_k=10 # optional
|
||||
)
|
||||
|
||||
docs = retriever.get_relevant_documents("What's Databerry?")
|
||||
```
|
@ -0,0 +1,74 @@
|
||||
from typing import List, Optional
|
||||
|
||||
import aiohttp
|
||||
import requests
|
||||
|
||||
from langchain.schema import BaseRetriever, Document
|
||||
|
||||
|
||||
class DataberryRetriever(BaseRetriever):
|
||||
datastore_url: str
|
||||
top_k: Optional[int]
|
||||
api_key: Optional[str]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
datastore_url: str,
|
||||
top_k: Optional[int] = None,
|
||||
api_key: Optional[str] = None,
|
||||
):
|
||||
self.datastore_url = datastore_url
|
||||
self.api_key = api_key
|
||||
self.top_k = top_k
|
||||
|
||||
def get_relevant_documents(self, query: str) -> List[Document]:
|
||||
response = requests.post(
|
||||
self.datastore_url,
|
||||
json={
|
||||
"query": query,
|
||||
**({"topK": self.top_k} if self.top_k is not None else {}),
|
||||
},
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
**(
|
||||
{"Authorization": f"Bearer {self.api_key}"}
|
||||
if self.api_key is not None
|
||||
else {}
|
||||
),
|
||||
},
|
||||
)
|
||||
data = response.json()
|
||||
return [
|
||||
Document(
|
||||
page_content=r["text"],
|
||||
metadata={"source": r["source"], "score": r["score"]},
|
||||
)
|
||||
for r in data["results"]
|
||||
]
|
||||
|
||||
async def aget_relevant_documents(self, query: str) -> List[Document]:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.request(
|
||||
"POST",
|
||||
self.datastore_url,
|
||||
json={
|
||||
"query": query,
|
||||
**({"topK": self.top_k} if self.top_k is not None else {}),
|
||||
},
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
**(
|
||||
{"Authorization": f"Bearer {self.api_key}"}
|
||||
if self.api_key is not None
|
||||
else {}
|
||||
),
|
||||
},
|
||||
) as response:
|
||||
data = await response.json()
|
||||
return [
|
||||
Document(
|
||||
page_content=r["text"],
|
||||
metadata={"source": r["source"], "score": r["score"]},
|
||||
)
|
||||
for r in data["results"]
|
||||
]
|
Loading…
Reference in New Issue