forked from Archives/langchain
Harrison/fauna loader (#5864)
Co-authored-by: Shadid12 <Shadid12@users.noreply.github.com>
This commit is contained in:
parent
5518f24ec3
commit
658f8bdee7
84
docs/modules/indexes/document_loaders/examples/fauna.ipynb
Normal file
84
docs/modules/indexes/document_loaders/examples/fauna.ipynb
Normal file
@ -0,0 +1,84 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Fauna\n",
|
||||
"\n",
|
||||
">[Fauna](https://fauna.com/) is a Document Database.\n",
|
||||
"\n",
|
||||
"Query `Fauna` documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install fauna"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query data example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.fauna import FaunaLoader\n",
|
||||
"\n",
|
||||
"secret = \"<enter-valid-fauna-secret>\"\n",
|
||||
"query = \"Item.all()\" # Fauna query. Assumes that the collection is called \"Item\"\n",
|
||||
"field = \"text\" # The field that contains the page content. Assumes that the field is called \"text\"\n",
|
||||
"\n",
|
||||
"loader = FaunaLoader(query, field, secret)\n",
|
||||
"docs = loader.lazy_load()\n",
|
||||
"\n",
|
||||
"for value in docs:\n",
|
||||
" print(value)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Query with Pagination\n",
|
||||
"You get a `after` value if there are more data. You can get values after the curcor by passing in the `after` string in query. \n",
|
||||
"\n",
|
||||
"To learn more following [this link](https://fqlx-beta--fauna-docs.netlify.app/fqlx/beta/reference/schema_entities/set/static-paginate)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"\"\"\n",
|
||||
"Item.paginate(\"hs+DzoPOg ... aY1hOohozrV7A\")\n",
|
||||
"Item.all()\n",
|
||||
"\"\"\"\n",
|
||||
"loader = FaunaLoader(query, field, secret)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@ -34,6 +34,7 @@ from langchain.document_loaders.epub import UnstructuredEPubLoader
|
||||
from langchain.document_loaders.evernote import EverNoteLoader
|
||||
from langchain.document_loaders.excel import UnstructuredExcelLoader
|
||||
from langchain.document_loaders.facebook_chat import FacebookChatLoader
|
||||
from langchain.document_loaders.fauna import FaunaLoader
|
||||
from langchain.document_loaders.figma import FigmaFileLoader
|
||||
from langchain.document_loaders.gcs_directory import GCSDirectoryLoader
|
||||
from langchain.document_loaders.gcs_file import GCSFileLoader
|
||||
@ -155,6 +156,7 @@ __all__ = [
|
||||
"DocugamiLoader",
|
||||
"Docx2txtLoader",
|
||||
"DuckDBLoader",
|
||||
"FaunaLoader",
|
||||
"EverNoteLoader",
|
||||
"FacebookChatLoader",
|
||||
"FigmaFileLoader",
|
||||
|
63
langchain/document_loaders/fauna.py
Normal file
63
langchain/document_loaders/fauna.py
Normal file
@ -0,0 +1,63 @@
|
||||
from typing import Iterator, List, Optional, Sequence
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
|
||||
|
||||
class FaunaLoader(BaseLoader):
|
||||
"""
|
||||
Attributes:
|
||||
query (str): The FQL query string to execute.
|
||||
page_content_field (str): The field that contains the content of each page.
|
||||
secret (str): The secret key for authenticating to FaunaDB.
|
||||
metadata_fields (Optional[Sequence[str]]):
|
||||
Optional list of field names to include in metadata.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
query: str,
|
||||
page_content_field: str,
|
||||
secret: str,
|
||||
metadata_fields: Optional[Sequence[str]] = None,
|
||||
):
|
||||
self.query = query
|
||||
self.page_content_field = page_content_field
|
||||
self.secret = secret
|
||||
self.metadata_fields = metadata_fields
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
try:
|
||||
from fauna import Page, fql
|
||||
from fauna.client import Client
|
||||
from fauna.encoding import QuerySuccess
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import fauna python package. "
|
||||
"Please install it with `pip install fauna`."
|
||||
)
|
||||
# Create Fauna Client
|
||||
client = Client(secret=self.secret)
|
||||
# Run FQL Query
|
||||
response: QuerySuccess = client.query(fql(self.query))
|
||||
page: Page = response.data
|
||||
for result in page:
|
||||
if result is not None:
|
||||
document_dict = dict(result.items())
|
||||
page_content = ""
|
||||
for key, value in document_dict.items():
|
||||
if key == self.page_content_field:
|
||||
page_content = value
|
||||
document: Document = Document(
|
||||
page_content=page_content,
|
||||
metadata={"id": result.id, "ts": result.ts},
|
||||
)
|
||||
yield document
|
||||
if page.after is not None:
|
||||
yield Document(
|
||||
page_content="Next Page Exists",
|
||||
metadata={"after": page.after},
|
||||
)
|
41
tests/integration_tests/document_loaders/test_fauna.py
Normal file
41
tests/integration_tests/document_loaders/test_fauna.py
Normal file
@ -0,0 +1,41 @@
|
||||
import unittest
|
||||
|
||||
from langchain.document_loaders.fauna import FaunaLoader
|
||||
|
||||
try:
|
||||
import fauna # noqa: F401
|
||||
|
||||
fauna_installed = True
|
||||
except ImportError:
|
||||
fauna_installed = False
|
||||
|
||||
|
||||
@unittest.skipIf(not fauna_installed, "fauna not installed")
|
||||
class TestFaunaLoader(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
self.fauna_secret = "<enter-valid-fauna-secret>"
|
||||
self.valid_fql_query = "Item.all()"
|
||||
self.valid_page_content_field = "text"
|
||||
self.valid_metadata_fields = ["valid_metadata_fields"]
|
||||
|
||||
def test_fauna_loader(self) -> None:
|
||||
"""Test Fauna loader."""
|
||||
loader = FaunaLoader(
|
||||
query=self.valid_fql_query,
|
||||
page_content_field=self.valid_page_content_field,
|
||||
secret=self.fauna_secret,
|
||||
metadata_fields=self.valid_metadata_fields,
|
||||
)
|
||||
docs = loader.load()
|
||||
|
||||
assert len(docs) > 0 # assuming the query returns at least one document
|
||||
for doc in docs:
|
||||
assert (
|
||||
doc.page_content != ""
|
||||
) # assuming that every document has page_content
|
||||
assert (
|
||||
"id" in doc.metadata and doc.metadata["id"] != ""
|
||||
) # assuming that every document has 'id'
|
||||
assert (
|
||||
"ts" in doc.metadata and doc.metadata["ts"] != ""
|
||||
) # assuming that every document has 'ts'
|
Loading…
Reference in New Issue
Block a user