diff --git a/docs/_static/DataberryDashboard.png b/docs/_static/DataberryDashboard.png new file mode 100644 index 0000000000..090f7d0955 Binary files /dev/null and b/docs/_static/DataberryDashboard.png differ diff --git a/docs/ecosystem/databerry.md b/docs/ecosystem/databerry.md new file mode 100644 index 0000000000..6e6e8a7298 --- /dev/null +++ b/docs/ecosystem/databerry.md @@ -0,0 +1,25 @@ +# Databerry + +This page covers how to use the [Databerry](https://databerry.ai) within LangChain. + +## What is Databerry? + +Databerry is an [open source](https://github.com/gmpetrov/databerry) document retrievial platform that helps to connect your personal data with Large Language Models. + +![Databerry](../_static/DataberryDashboard.png) + +## Quick start + +Retrieving documents stored in Databerry from LangChain is very easy! + +```python +from langchain.retrievers import DataberryRetriever + +retriever = DataberryRetriever( + datastore_url="https://api.databerry.ai/query/clg1xg2h80000l708dymr0fxc", + # api_key="DATABERRY_API_KEY", # optional if datastore is public + # top_k=10 # optional +) + +docs = retriever.get_relevant_documents("What's Databerry?") +``` diff --git a/docs/modules/indexes/retrievers/examples/databerry.ipynb b/docs/modules/indexes/retrievers/examples/databerry.ipynb new file mode 100644 index 0000000000..81c2af11d3 --- /dev/null +++ b/docs/modules/indexes/retrievers/examples/databerry.ipynb @@ -0,0 +1,95 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "9fc6205b", + "metadata": {}, + "source": [ + "# Databerry\n", + "\n", + "This notebook shows how to use [Databerry's](https://www.databerry.ai/) retriever.\n", + "\n", + "First, you will need to sign up for Databerry, create a datastore, add some data and get your datastore api endpoint url" + ] + }, + { + "cell_type": "markdown", + "id": "944e172b", + "metadata": {}, + "source": [ + "## Query\n", + "\n", + "Now that our index is set up, we can set up a retriever and start querying it." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d0e6f506", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.retrievers import DataberryRetriever" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f381f642", + "metadata": {}, + "outputs": [], + "source": [ + "retriever = DataberryRetriever(\n", + " datastore_url=\"https://clg1xg2h80000l708dymr0fxc.databerry.ai/query\",\n", + " # api_key=\"DATABERRY_API_KEY\", # optional if datastore is public\n", + " # top_k=10 # optional\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "20ae1a74", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='✨ Made with DaftpageOpen main menuPricingTemplatesLoginSearchHelpGetting StartedFeaturesAffiliate ProgramGetting StartedDaftpage is a new type of website builder that works like a doc.It makes website building easy, fun and offers tons of powerful features for free. Just type / in your page to get started!DaftpageCopyright © 2022 Daftpage, Inc.All rights reserved.ProductPricingTemplatesHelp & SupportHelp CenterGetting startedBlogCompanyAboutRoadmapTwitterAffiliate Program👾 Discord', metadata={'source': 'https:/daftpage.com/help/getting-started', 'score': 0.8697265}),\n", + " Document(page_content=\"✨ Made with DaftpageOpen main menuPricingTemplatesLoginSearchHelpGetting StartedFeaturesAffiliate ProgramHelp CenterWelcome to Daftpage’s help center—the one-stop shop for learning everything about building websites with Daftpage.Daftpage is the simplest way to create websites for all purposes in seconds. Without knowing how to code, and for free!Get StartedDaftpage is a new type of website builder that works like a doc.It makes website building easy, fun and offers tons of powerful features for free. Just type / in your page to get started!Start here✨ Create your first site🧱 Add blocks🚀 PublishGuides🔖 Add a custom domainFeatures🔥 Drops🎨 Drawings👻 Ghost mode💀 Skeleton modeCant find the answer you're looking for?mail us at support@daftpage.comJoin the awesome Daftpage community on: 👾 DiscordDaftpageCopyright © 2022 Daftpage, Inc.All rights reserved.ProductPricingTemplatesHelp & SupportHelp CenterGetting startedBlogCompanyAboutRoadmapTwitterAffiliate Program👾 Discord\", metadata={'source': 'https:/daftpage.com/help', 'score': 0.86570895}),\n", + " Document(page_content=\" is the simplest way to create websites for all purposes in seconds. Without knowing how to code, and for free!Get StartedDaftpage is a new type of website builder that works like a doc.It makes website building easy, fun and offers tons of powerful features for free. Just type / in your page to get started!Start here✨ Create your first site🧱 Add blocks🚀 PublishGuides🔖 Add a custom domainFeatures🔥 Drops🎨 Drawings👻 Ghost mode💀 Skeleton modeCant find the answer you're looking for?mail us at support@daftpage.comJoin the awesome Daftpage community on: 👾 DiscordDaftpageCopyright © 2022 Daftpage, Inc.All rights reserved.ProductPricingTemplatesHelp & SupportHelp CenterGetting startedBlogCompanyAboutRoadmapTwitterAffiliate Program👾 Discord\", metadata={'source': 'https:/daftpage.com/help', 'score': 0.8645384})]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retriever.get_relevant_documents(\"What is Daftpage?\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/langchain/retrievers/__init__.py b/langchain/retrievers/__init__.py index 33ea97aa98..cb1ed4ce3a 100644 --- a/langchain/retrievers/__init__.py +++ b/langchain/retrievers/__init__.py @@ -1,4 +1,5 @@ from langchain.retrievers.chatgpt_plugin_retriever import ChatGPTPluginRetriever +from langchain.retrievers.databerry import DataberryRetriever from langchain.retrievers.elastic_search_bm25 import ElasticSearchBM25Retriever from langchain.retrievers.metal import MetalRetriever from langchain.retrievers.pinecone_hybrid_search import PineconeHybridSearchRetriever @@ -14,4 +15,5 @@ __all__ = [ "ElasticSearchBM25Retriever", "TFIDFRetriever", "WeaviateHybridSearchRetriever", + "DataberryRetriever", ] diff --git a/langchain/retrievers/databerry.py b/langchain/retrievers/databerry.py new file mode 100644 index 0000000000..71da972d7c --- /dev/null +++ b/langchain/retrievers/databerry.py @@ -0,0 +1,74 @@ +from typing import List, Optional + +import aiohttp +import requests + +from langchain.schema import BaseRetriever, Document + + +class DataberryRetriever(BaseRetriever): + datastore_url: str + top_k: Optional[int] + api_key: Optional[str] + + def __init__( + self, + datastore_url: str, + top_k: Optional[int] = None, + api_key: Optional[str] = None, + ): + self.datastore_url = datastore_url + self.api_key = api_key + self.top_k = top_k + + def get_relevant_documents(self, query: str) -> List[Document]: + response = requests.post( + self.datastore_url, + json={ + "query": query, + **({"topK": self.top_k} if self.top_k is not None else {}), + }, + headers={ + "Content-Type": "application/json", + **( + {"Authorization": f"Bearer {self.api_key}"} + if self.api_key is not None + else {} + ), + }, + ) + data = response.json() + return [ + Document( + page_content=r["text"], + metadata={"source": r["source"], "score": r["score"]}, + ) + for r in data["results"] + ] + + async def aget_relevant_documents(self, query: str) -> List[Document]: + async with aiohttp.ClientSession() as session: + async with session.request( + "POST", + self.datastore_url, + json={ + "query": query, + **({"topK": self.top_k} if self.top_k is not None else {}), + }, + headers={ + "Content-Type": "application/json", + **( + {"Authorization": f"Bearer {self.api_key}"} + if self.api_key is not None + else {} + ), + }, + ) as response: + data = await response.json() + return [ + Document( + page_content=r["text"], + metadata={"source": r["source"], "score": r["score"]}, + ) + for r in data["results"] + ] diff --git a/langchain/retrievers/remote_retriever.py b/langchain/retrievers/remote_retriever.py index 958e6d74f9..53b2e7dd79 100644 --- a/langchain/retrievers/remote_retriever.py +++ b/langchain/retrievers/remote_retriever.py @@ -12,13 +12,20 @@ class RemoteLangChainRetriever(BaseRetriever, BaseModel): headers: Optional[dict] = None input_key: str = "message" response_key: str = "response" + page_content_key: str = "page_content" + metadata_key: str = "metadata" def get_relevant_documents(self, query: str) -> List[Document]: response = requests.post( self.url, json={self.input_key: query}, headers=self.headers ) result = response.json() - return [Document(**r) for r in result[self.response_key]] + return [ + Document( + page_content=r[self.page_content_key], metadata=r[self.metadata_key] + ) + for r in result[self.response_key] + ] async def aget_relevant_documents(self, query: str) -> List[Document]: async with aiohttp.ClientSession() as session: @@ -26,4 +33,9 @@ class RemoteLangChainRetriever(BaseRetriever, BaseModel): "POST", self.url, headers=self.headers, json={self.input_key: query} ) as response: result = await response.json() - return [Document(**r) for r in result[self.response_key]] + return [ + Document( + page_content=r[self.page_content_key], metadata=r[self.metadata_key] + ) + for r in result[self.response_key] + ]