forked from Archives/langchain
Harrison/fauna loader (#5864)
Co-authored-by: Shadid12 <Shadid12@users.noreply.github.com>
This commit is contained in:
parent
5518f24ec3
commit
658f8bdee7
84
docs/modules/indexes/document_loaders/examples/fauna.ipynb
Normal file
84
docs/modules/indexes/document_loaders/examples/fauna.ipynb
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Fauna\n",
|
||||||
|
"\n",
|
||||||
|
">[Fauna](https://fauna.com/) is a Document Database.\n",
|
||||||
|
"\n",
|
||||||
|
"Query `Fauna` documents"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#!pip install fauna"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Query data example"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.document_loaders.fauna import FaunaLoader\n",
|
||||||
|
"\n",
|
||||||
|
"secret = \"<enter-valid-fauna-secret>\"\n",
|
||||||
|
"query = \"Item.all()\" # Fauna query. Assumes that the collection is called \"Item\"\n",
|
||||||
|
"field = \"text\" # The field that contains the page content. Assumes that the field is called \"text\"\n",
|
||||||
|
"\n",
|
||||||
|
"loader = FaunaLoader(query, field, secret)\n",
|
||||||
|
"docs = loader.lazy_load()\n",
|
||||||
|
"\n",
|
||||||
|
"for value in docs:\n",
|
||||||
|
" print(value)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Query with Pagination\n",
|
||||||
|
"You get a `after` value if there are more data. You can get values after the curcor by passing in the `after` string in query. \n",
|
||||||
|
"\n",
|
||||||
|
"To learn more following [this link](https://fqlx-beta--fauna-docs.netlify.app/fqlx/beta/reference/schema_entities/set/static-paginate)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
"Item.paginate(\"hs+DzoPOg ... aY1hOohozrV7A\")\n",
|
||||||
|
"Item.all()\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"loader = FaunaLoader(query, field, secret)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"language_info": {
|
||||||
|
"name": "python"
|
||||||
|
},
|
||||||
|
"orig_nbformat": 4
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
@ -34,6 +34,7 @@ from langchain.document_loaders.epub import UnstructuredEPubLoader
|
|||||||
from langchain.document_loaders.evernote import EverNoteLoader
|
from langchain.document_loaders.evernote import EverNoteLoader
|
||||||
from langchain.document_loaders.excel import UnstructuredExcelLoader
|
from langchain.document_loaders.excel import UnstructuredExcelLoader
|
||||||
from langchain.document_loaders.facebook_chat import FacebookChatLoader
|
from langchain.document_loaders.facebook_chat import FacebookChatLoader
|
||||||
|
from langchain.document_loaders.fauna import FaunaLoader
|
||||||
from langchain.document_loaders.figma import FigmaFileLoader
|
from langchain.document_loaders.figma import FigmaFileLoader
|
||||||
from langchain.document_loaders.gcs_directory import GCSDirectoryLoader
|
from langchain.document_loaders.gcs_directory import GCSDirectoryLoader
|
||||||
from langchain.document_loaders.gcs_file import GCSFileLoader
|
from langchain.document_loaders.gcs_file import GCSFileLoader
|
||||||
@ -155,6 +156,7 @@ __all__ = [
|
|||||||
"DocugamiLoader",
|
"DocugamiLoader",
|
||||||
"Docx2txtLoader",
|
"Docx2txtLoader",
|
||||||
"DuckDBLoader",
|
"DuckDBLoader",
|
||||||
|
"FaunaLoader",
|
||||||
"EverNoteLoader",
|
"EverNoteLoader",
|
||||||
"FacebookChatLoader",
|
"FacebookChatLoader",
|
||||||
"FigmaFileLoader",
|
"FigmaFileLoader",
|
||||||
|
63
langchain/document_loaders/fauna.py
Normal file
63
langchain/document_loaders/fauna.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
from typing import Iterator, List, Optional, Sequence
|
||||||
|
|
||||||
|
from langchain.docstore.document import Document
|
||||||
|
from langchain.document_loaders.base import BaseLoader
|
||||||
|
|
||||||
|
|
||||||
|
class FaunaLoader(BaseLoader):
|
||||||
|
"""
|
||||||
|
Attributes:
|
||||||
|
query (str): The FQL query string to execute.
|
||||||
|
page_content_field (str): The field that contains the content of each page.
|
||||||
|
secret (str): The secret key for authenticating to FaunaDB.
|
||||||
|
metadata_fields (Optional[Sequence[str]]):
|
||||||
|
Optional list of field names to include in metadata.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
page_content_field: str,
|
||||||
|
secret: str,
|
||||||
|
metadata_fields: Optional[Sequence[str]] = None,
|
||||||
|
):
|
||||||
|
self.query = query
|
||||||
|
self.page_content_field = page_content_field
|
||||||
|
self.secret = secret
|
||||||
|
self.metadata_fields = metadata_fields
|
||||||
|
|
||||||
|
def load(self) -> List[Document]:
|
||||||
|
return list(self.lazy_load())
|
||||||
|
|
||||||
|
def lazy_load(self) -> Iterator[Document]:
|
||||||
|
try:
|
||||||
|
from fauna import Page, fql
|
||||||
|
from fauna.client import Client
|
||||||
|
from fauna.encoding import QuerySuccess
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"Could not import fauna python package. "
|
||||||
|
"Please install it with `pip install fauna`."
|
||||||
|
)
|
||||||
|
# Create Fauna Client
|
||||||
|
client = Client(secret=self.secret)
|
||||||
|
# Run FQL Query
|
||||||
|
response: QuerySuccess = client.query(fql(self.query))
|
||||||
|
page: Page = response.data
|
||||||
|
for result in page:
|
||||||
|
if result is not None:
|
||||||
|
document_dict = dict(result.items())
|
||||||
|
page_content = ""
|
||||||
|
for key, value in document_dict.items():
|
||||||
|
if key == self.page_content_field:
|
||||||
|
page_content = value
|
||||||
|
document: Document = Document(
|
||||||
|
page_content=page_content,
|
||||||
|
metadata={"id": result.id, "ts": result.ts},
|
||||||
|
)
|
||||||
|
yield document
|
||||||
|
if page.after is not None:
|
||||||
|
yield Document(
|
||||||
|
page_content="Next Page Exists",
|
||||||
|
metadata={"after": page.after},
|
||||||
|
)
|
41
tests/integration_tests/document_loaders/test_fauna.py
Normal file
41
tests/integration_tests/document_loaders/test_fauna.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
import unittest
|
||||||
|
|
||||||
|
from langchain.document_loaders.fauna import FaunaLoader
|
||||||
|
|
||||||
|
try:
|
||||||
|
import fauna # noqa: F401
|
||||||
|
|
||||||
|
fauna_installed = True
|
||||||
|
except ImportError:
|
||||||
|
fauna_installed = False
|
||||||
|
|
||||||
|
|
||||||
|
@unittest.skipIf(not fauna_installed, "fauna not installed")
|
||||||
|
class TestFaunaLoader(unittest.TestCase):
|
||||||
|
def setUp(self) -> None:
|
||||||
|
self.fauna_secret = "<enter-valid-fauna-secret>"
|
||||||
|
self.valid_fql_query = "Item.all()"
|
||||||
|
self.valid_page_content_field = "text"
|
||||||
|
self.valid_metadata_fields = ["valid_metadata_fields"]
|
||||||
|
|
||||||
|
def test_fauna_loader(self) -> None:
|
||||||
|
"""Test Fauna loader."""
|
||||||
|
loader = FaunaLoader(
|
||||||
|
query=self.valid_fql_query,
|
||||||
|
page_content_field=self.valid_page_content_field,
|
||||||
|
secret=self.fauna_secret,
|
||||||
|
metadata_fields=self.valid_metadata_fields,
|
||||||
|
)
|
||||||
|
docs = loader.load()
|
||||||
|
|
||||||
|
assert len(docs) > 0 # assuming the query returns at least one document
|
||||||
|
for doc in docs:
|
||||||
|
assert (
|
||||||
|
doc.page_content != ""
|
||||||
|
) # assuming that every document has page_content
|
||||||
|
assert (
|
||||||
|
"id" in doc.metadata and doc.metadata["id"] != ""
|
||||||
|
) # assuming that every document has 'id'
|
||||||
|
assert (
|
||||||
|
"ts" in doc.metadata and doc.metadata["ts"] != ""
|
||||||
|
) # assuming that every document has 'ts'
|
Loading…
Reference in New Issue
Block a user