langchain/libs/community/langchain_community/document_loaders/couchbase.py
Bagatur ed58eeb9c5
community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463)
Moved the following modules to new package langchain-community in a backwards compatible fashion:

```
mv langchain/langchain/adapters community/langchain_community
mv langchain/langchain/callbacks community/langchain_community/callbacks
mv langchain/langchain/chat_loaders community/langchain_community
mv langchain/langchain/chat_models community/langchain_community
mv langchain/langchain/document_loaders community/langchain_community
mv langchain/langchain/docstore community/langchain_community
mv langchain/langchain/document_transformers community/langchain_community
mv langchain/langchain/embeddings community/langchain_community
mv langchain/langchain/graphs community/langchain_community
mv langchain/langchain/llms community/langchain_community
mv langchain/langchain/memory/chat_message_histories community/langchain_community
mv langchain/langchain/retrievers community/langchain_community
mv langchain/langchain/storage community/langchain_community
mv langchain/langchain/tools community/langchain_community
mv langchain/langchain/utilities community/langchain_community
mv langchain/langchain/vectorstores community/langchain_community
mv langchain/langchain/agents/agent_toolkits community/langchain_community
mv langchain/langchain/cache.py community/langchain_community
mv langchain/langchain/adapters community/langchain_community
mv langchain/langchain/callbacks community/langchain_community/callbacks
mv langchain/langchain/chat_loaders community/langchain_community
mv langchain/langchain/chat_models community/langchain_community
mv langchain/langchain/document_loaders community/langchain_community
mv langchain/langchain/docstore community/langchain_community
mv langchain/langchain/document_transformers community/langchain_community
mv langchain/langchain/embeddings community/langchain_community
mv langchain/langchain/graphs community/langchain_community
mv langchain/langchain/llms community/langchain_community
mv langchain/langchain/memory/chat_message_histories community/langchain_community
mv langchain/langchain/retrievers community/langchain_community
mv langchain/langchain/storage community/langchain_community
mv langchain/langchain/tools community/langchain_community
mv langchain/langchain/utilities community/langchain_community
mv langchain/langchain/vectorstores community/langchain_community
mv langchain/langchain/agents/agent_toolkits community/langchain_community
mv langchain/langchain/cache.py community/langchain_community
```

Moved the following to core
```
mv langchain/langchain/utils/json_schema.py core/langchain_core/utils
mv langchain/langchain/utils/html.py core/langchain_core/utils
mv langchain/langchain/utils/strings.py core/langchain_core/utils
cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py
rm langchain/langchain/utils/env.py
```

See .scripts/community_split/script_integrations.sh for all changes
2023-12-11 13:53:30 -08:00

101 lines
3.6 KiB
Python

import logging
from typing import Iterator, List, Optional
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseLoader
logger = logging.getLogger(__name__)
class CouchbaseLoader(BaseLoader):
"""Load documents from `Couchbase`.
Each document represents one row of the result. The `page_content_fields` are
written into the `page_content`of the document. The `metadata_fields` are written
into the `metadata` of the document. By default, all columns are written into
the `page_content` and none into the `metadata`.
"""
def __init__(
self,
connection_string: str,
db_username: str,
db_password: str,
query: str,
*,
page_content_fields: Optional[List[str]] = None,
metadata_fields: Optional[List[str]] = None,
) -> None:
"""Initialize Couchbase document loader.
Args:
connection_string (str): The connection string to the Couchbase cluster.
db_username (str): The username to connect to the Couchbase cluster.
db_password (str): The password to connect to the Couchbase cluster.
query (str): The SQL++ query to execute.
page_content_fields (Optional[List[str]]): The columns to write into the
`page_content` field of the document. By default, all columns are
written.
metadata_fields (Optional[List[str]]): The columns to write into the
`metadata` field of the document. By default, no columns are written.
"""
try:
from couchbase.auth import PasswordAuthenticator
from couchbase.cluster import Cluster
from couchbase.options import ClusterOptions
except ImportError as e:
raise ImportError(
"Could not import couchbase package."
"Please install couchbase SDK with `pip install couchbase`."
) from e
if not connection_string:
raise ValueError("connection_string must be provided.")
if not db_username:
raise ValueError("db_username must be provided.")
if not db_password:
raise ValueError("db_password must be provided.")
auth = PasswordAuthenticator(
db_username,
db_password,
)
self.cluster: Cluster = Cluster(connection_string, ClusterOptions(auth))
self.query = query
self.page_content_fields = page_content_fields
self.metadata_fields = metadata_fields
def load(self) -> List[Document]:
"""Load Couchbase data into Document objects."""
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]:
"""Load Couchbase data into Document objects lazily."""
from datetime import timedelta
# Ensure connection to Couchbase cluster
self.cluster.wait_until_ready(timedelta(seconds=5))
# Run SQL++ Query
result = self.cluster.query(self.query)
for row in result:
metadata_fields = self.metadata_fields
page_content_fields = self.page_content_fields
if not page_content_fields:
page_content_fields = list(row.keys())
if not metadata_fields:
metadata_fields = []
metadata = {field: row[field] for field in metadata_fields}
document = "\n".join(
f"{k}: {v}" for k, v in row.items() if k in page_content_fields
)
yield (Document(page_content=document, metadata=metadata))