|
|
@ -25,8 +25,6 @@ logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AstraDBLoader(BaseLoader):
|
|
|
|
class AstraDBLoader(BaseLoader):
|
|
|
|
"""Load DataStax Astra DB documents."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
self,
|
|
|
|
collection_name: str,
|
|
|
|
collection_name: str,
|
|
|
@ -42,6 +40,26 @@ class AstraDBLoader(BaseLoader):
|
|
|
|
nb_prefetched: int = 1000,
|
|
|
|
nb_prefetched: int = 1000,
|
|
|
|
extraction_function: Callable[[Dict], str] = json.dumps,
|
|
|
|
extraction_function: Callable[[Dict], str] = json.dumps,
|
|
|
|
) -> None:
|
|
|
|
) -> None:
|
|
|
|
|
|
|
|
"""Load DataStax Astra DB documents.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
|
|
collection_name: name of the Astra DB collection to use.
|
|
|
|
|
|
|
|
token: API token for Astra DB usage.
|
|
|
|
|
|
|
|
api_endpoint: full URL to the API endpoint,
|
|
|
|
|
|
|
|
such as `https://<DB-ID>-us-east1.apps.astra.datastax.com`.
|
|
|
|
|
|
|
|
astra_db_client: *alternative to token+api_endpoint*,
|
|
|
|
|
|
|
|
you can pass an already-created 'astrapy.db.AstraDB' instance.
|
|
|
|
|
|
|
|
async_astra_db_client: *alternative to token+api_endpoint*,
|
|
|
|
|
|
|
|
you can pass an already-created 'astrapy.db.AsyncAstraDB' instance.
|
|
|
|
|
|
|
|
namespace: namespace (aka keyspace) where the
|
|
|
|
|
|
|
|
collection is. Defaults to the database's "default namespace".
|
|
|
|
|
|
|
|
filter_criteria: Criteria to filter documents.
|
|
|
|
|
|
|
|
projection: Specifies the fields to return.
|
|
|
|
|
|
|
|
find_options: Additional options for the query.
|
|
|
|
|
|
|
|
nb_prefetched: Max number of documents to pre-fetch. Defaults to 1000.
|
|
|
|
|
|
|
|
extraction_function: Function applied to collection documents to create
|
|
|
|
|
|
|
|
the `page_content` of the LangChain Document. Defaults to `json.dumps`.
|
|
|
|
|
|
|
|
"""
|
|
|
|
astra_env = _AstraDBEnvironment(
|
|
|
|
astra_env = _AstraDBEnvironment(
|
|
|
|
token=token,
|
|
|
|
token=token,
|
|
|
|
api_endpoint=api_endpoint,
|
|
|
|
api_endpoint=api_endpoint,
|
|
|
@ -59,7 +77,6 @@ class AstraDBLoader(BaseLoader):
|
|
|
|
self.extraction_function = extraction_function
|
|
|
|
self.extraction_function = extraction_function
|
|
|
|
|
|
|
|
|
|
|
|
def load(self) -> List[Document]:
|
|
|
|
def load(self) -> List[Document]:
|
|
|
|
"""Eagerly load the content."""
|
|
|
|
|
|
|
|
return list(self.lazy_load())
|
|
|
|
return list(self.lazy_load())
|
|
|
|
|
|
|
|
|
|
|
|
def lazy_load(self) -> Iterator[Document]:
|
|
|
|
def lazy_load(self) -> Iterator[Document]:
|
|
|
|