mirror of
https://github.com/hwchase17/langchain
synced 2024-11-02 09:40:22 +00:00
ed58eeb9c5
Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
90 lines
3.1 KiB
Python
90 lines
3.1 KiB
Python
from typing import Dict, List, Optional, cast
|
|
|
|
from langchain_core.documents import Document
|
|
|
|
from langchain_community.document_loaders.base import BaseLoader
|
|
|
|
|
|
class DuckDBLoader(BaseLoader):
|
|
"""Load from `DuckDB`.
|
|
|
|
Each document represents one row of the result. The `page_content_columns`
|
|
are written into the `page_content` of the document. The `metadata_columns`
|
|
are written into the `metadata` of the document. By default, all columns
|
|
are written into the `page_content` and none into the `metadata`.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
query: str,
|
|
database: str = ":memory:",
|
|
read_only: bool = False,
|
|
config: Optional[Dict[str, str]] = None,
|
|
page_content_columns: Optional[List[str]] = None,
|
|
metadata_columns: Optional[List[str]] = None,
|
|
):
|
|
"""
|
|
|
|
Args:
|
|
query: The query to execute.
|
|
database: The database to connect to. Defaults to ":memory:".
|
|
read_only: Whether to open the database in read-only mode.
|
|
Defaults to False.
|
|
config: A dictionary of configuration options to pass to the database.
|
|
Optional.
|
|
page_content_columns: The columns to write into the `page_content`
|
|
of the document. Optional.
|
|
metadata_columns: The columns to write into the `metadata` of the document.
|
|
Optional.
|
|
"""
|
|
self.query = query
|
|
self.database = database
|
|
self.read_only = read_only
|
|
self.config = config or {}
|
|
self.page_content_columns = page_content_columns
|
|
self.metadata_columns = metadata_columns
|
|
|
|
def load(self) -> List[Document]:
|
|
try:
|
|
import duckdb
|
|
except ImportError:
|
|
raise ImportError(
|
|
"Could not import duckdb python package. "
|
|
"Please install it with `pip install duckdb`."
|
|
)
|
|
|
|
docs = []
|
|
with duckdb.connect(
|
|
database=self.database, read_only=self.read_only, config=self.config
|
|
) as con:
|
|
query_result = con.execute(self.query)
|
|
results = query_result.fetchall()
|
|
description = cast(list, query_result.description)
|
|
field_names = [c[0] for c in description]
|
|
|
|
if self.page_content_columns is None:
|
|
page_content_columns = field_names
|
|
else:
|
|
page_content_columns = self.page_content_columns
|
|
|
|
if self.metadata_columns is None:
|
|
metadata_columns = []
|
|
else:
|
|
metadata_columns = self.metadata_columns
|
|
|
|
for result in results:
|
|
page_content = "\n".join(
|
|
f"{column}: {result[field_names.index(column)]}"
|
|
for column in page_content_columns
|
|
)
|
|
|
|
metadata = {
|
|
column: result[field_names.index(column)]
|
|
for column in metadata_columns
|
|
}
|
|
|
|
doc = Document(page_content=page_content, metadata=metadata)
|
|
docs.append(doc)
|
|
|
|
return docs
|