mirror of https://github.com/hwchase17/langchain
Harrison/databerry (#2688)
Co-authored-by: Georges Petrov <georgesm.petrov@gmail.com>pull/2694/head
parent
b286d0e63f
commit
ad3c5dd186
Binary file not shown.
After Width: | Height: | Size: 157 KiB |
@ -0,0 +1,25 @@
|
|||||||
|
# Databerry
|
||||||
|
|
||||||
|
This page covers how to use the [Databerry](https://databerry.ai) within LangChain.
|
||||||
|
|
||||||
|
## What is Databerry?
|
||||||
|
|
||||||
|
Databerry is an [open source](https://github.com/gmpetrov/databerry) document retrievial platform that helps to connect your personal data with Large Language Models.
|
||||||
|
|
||||||
|
![Databerry](../_static/DataberryDashboard.png)
|
||||||
|
|
||||||
|
## Quick start
|
||||||
|
|
||||||
|
Retrieving documents stored in Databerry from LangChain is very easy!
|
||||||
|
|
||||||
|
```python
|
||||||
|
from langchain.retrievers import DataberryRetriever
|
||||||
|
|
||||||
|
retriever = DataberryRetriever(
|
||||||
|
datastore_url="https://api.databerry.ai/query/clg1xg2h80000l708dymr0fxc",
|
||||||
|
# api_key="DATABERRY_API_KEY", # optional if datastore is public
|
||||||
|
# top_k=10 # optional
|
||||||
|
)
|
||||||
|
|
||||||
|
docs = retriever.get_relevant_documents("What's Databerry?")
|
||||||
|
```
|
@ -0,0 +1,74 @@
|
|||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from langchain.schema import BaseRetriever, Document
|
||||||
|
|
||||||
|
|
||||||
|
class DataberryRetriever(BaseRetriever):
|
||||||
|
datastore_url: str
|
||||||
|
top_k: Optional[int]
|
||||||
|
api_key: Optional[str]
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
datastore_url: str,
|
||||||
|
top_k: Optional[int] = None,
|
||||||
|
api_key: Optional[str] = None,
|
||||||
|
):
|
||||||
|
self.datastore_url = datastore_url
|
||||||
|
self.api_key = api_key
|
||||||
|
self.top_k = top_k
|
||||||
|
|
||||||
|
def get_relevant_documents(self, query: str) -> List[Document]:
|
||||||
|
response = requests.post(
|
||||||
|
self.datastore_url,
|
||||||
|
json={
|
||||||
|
"query": query,
|
||||||
|
**({"topK": self.top_k} if self.top_k is not None else {}),
|
||||||
|
},
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
**(
|
||||||
|
{"Authorization": f"Bearer {self.api_key}"}
|
||||||
|
if self.api_key is not None
|
||||||
|
else {}
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
data = response.json()
|
||||||
|
return [
|
||||||
|
Document(
|
||||||
|
page_content=r["text"],
|
||||||
|
metadata={"source": r["source"], "score": r["score"]},
|
||||||
|
)
|
||||||
|
for r in data["results"]
|
||||||
|
]
|
||||||
|
|
||||||
|
async def aget_relevant_documents(self, query: str) -> List[Document]:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.request(
|
||||||
|
"POST",
|
||||||
|
self.datastore_url,
|
||||||
|
json={
|
||||||
|
"query": query,
|
||||||
|
**({"topK": self.top_k} if self.top_k is not None else {}),
|
||||||
|
},
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
**(
|
||||||
|
{"Authorization": f"Bearer {self.api_key}"}
|
||||||
|
if self.api_key is not None
|
||||||
|
else {}
|
||||||
|
),
|
||||||
|
},
|
||||||
|
) as response:
|
||||||
|
data = await response.json()
|
||||||
|
return [
|
||||||
|
Document(
|
||||||
|
page_content=r["text"],
|
||||||
|
metadata={"source": r["source"], "score": r["score"]},
|
||||||
|
)
|
||||||
|
for r in data["results"]
|
||||||
|
]
|
Loading…
Reference in New Issue