mirror of
https://github.com/hwchase17/langchain
synced 2024-11-11 19:11:02 +00:00
12d65f17ff
This PR contains 4 added functions: - max_marginal_relevance_search_by_vector - amax_marginal_relevance_search_by_vector - max_marginal_relevance_search - amax_marginal_relevance_search I'm no langchain expert, but tried do inspect other vectorstore sources like chroma, to build these functions for SurrealDB. If someone has some changes for me, please let me know. Otherwise I would be happy, if these changes are added to the repository, so that I can use the orignal repo and not my local monkey patched version. --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
710 lines
23 KiB
Python
710 lines
23 KiB
Python
import asyncio
|
|
from typing import (
|
|
Any,
|
|
Dict,
|
|
Iterable,
|
|
List,
|
|
Optional,
|
|
Tuple,
|
|
)
|
|
|
|
import numpy as np
|
|
from langchain_core.documents import Document
|
|
from langchain_core.embeddings import Embeddings
|
|
from langchain_core.vectorstores import VectorStore
|
|
|
|
from langchain_community.vectorstores.utils import maximal_marginal_relevance
|
|
|
|
DEFAULT_K = 4 # Number of Documents to return.
|
|
|
|
|
|
class SurrealDBStore(VectorStore):
|
|
"""
|
|
SurrealDB as Vector Store.
|
|
|
|
To use, you should have the ``surrealdb`` python package installed.
|
|
|
|
Args:
|
|
embedding_function: Embedding function to use.
|
|
dburl: SurrealDB connection url
|
|
ns: surrealdb namespace for the vector store. (default: "langchain")
|
|
db: surrealdb database for the vector store. (default: "database")
|
|
collection: surrealdb collection for the vector store.
|
|
(default: "documents")
|
|
|
|
(optional) db_user and db_pass: surrealdb credentials
|
|
|
|
Example:
|
|
.. code-block:: python
|
|
|
|
from langchain_community.vectorstores.surrealdb import SurrealDBStore
|
|
from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
|
|
embedding_function = HuggingFaceEmbeddings()
|
|
dburl = "ws://localhost:8000/rpc"
|
|
ns = "langchain"
|
|
db = "docstore"
|
|
collection = "documents"
|
|
db_user = "root"
|
|
db_pass = "root"
|
|
|
|
sdb = SurrealDBStore.from_texts(
|
|
texts=texts,
|
|
embedding=embedding_function,
|
|
dburl,
|
|
ns, db, collection,
|
|
db_user=db_user, db_pass=db_pass)
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
embedding_function: Embeddings,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
try:
|
|
from surrealdb import Surreal
|
|
except ImportError as e:
|
|
raise ImportError(
|
|
"""Cannot import from surrealdb.
|
|
please install with `pip install surrealdb`."""
|
|
) from e
|
|
|
|
self.dburl = kwargs.pop("dburl", "ws://localhost:8000/rpc")
|
|
|
|
if self.dburl[0:2] == "ws":
|
|
self.sdb = Surreal(self.dburl)
|
|
else:
|
|
raise ValueError("Only websocket connections are supported at this time.")
|
|
|
|
self.ns = kwargs.pop("ns", "langchain")
|
|
self.db = kwargs.pop("db", "database")
|
|
self.collection = kwargs.pop("collection", "documents")
|
|
self.embedding_function = embedding_function
|
|
self.kwargs = kwargs
|
|
|
|
async def initialize(self) -> None:
|
|
"""
|
|
Initialize connection to surrealdb database
|
|
and authenticate if credentials are provided
|
|
"""
|
|
await self.sdb.connect()
|
|
if "db_user" in self.kwargs and "db_pass" in self.kwargs:
|
|
user = self.kwargs.get("db_user")
|
|
password = self.kwargs.get("db_pass")
|
|
await self.sdb.signin({"user": user, "pass": password})
|
|
await self.sdb.use(self.ns, self.db)
|
|
|
|
@property
|
|
def embeddings(self) -> Optional[Embeddings]:
|
|
return (
|
|
self.embedding_function
|
|
if isinstance(self.embedding_function, Embeddings)
|
|
else None
|
|
)
|
|
|
|
async def aadd_texts(
|
|
self,
|
|
texts: Iterable[str],
|
|
metadatas: Optional[List[dict]] = None,
|
|
**kwargs: Any,
|
|
) -> List[str]:
|
|
"""Add list of text along with embeddings to the vector store asynchronously
|
|
|
|
Args:
|
|
texts (Iterable[str]): collection of text to add to the database
|
|
|
|
Returns:
|
|
List of ids for the newly inserted documents
|
|
"""
|
|
embeddings = self.embedding_function.embed_documents(list(texts))
|
|
ids = []
|
|
for idx, text in enumerate(texts):
|
|
data = {"text": text, "embedding": embeddings[idx]}
|
|
if metadatas is not None and idx < len(metadatas):
|
|
data["metadata"] = metadatas[idx] # type: ignore[assignment]
|
|
else:
|
|
data["metadata"] = []
|
|
record = await self.sdb.create(
|
|
self.collection,
|
|
data,
|
|
)
|
|
ids.append(record[0]["id"])
|
|
return ids
|
|
|
|
def add_texts(
|
|
self,
|
|
texts: Iterable[str],
|
|
metadatas: Optional[List[dict]] = None,
|
|
**kwargs: Any,
|
|
) -> List[str]:
|
|
"""Add list of text along with embeddings to the vector store
|
|
|
|
Args:
|
|
texts (Iterable[str]): collection of text to add to the database
|
|
|
|
Returns:
|
|
List of ids for the newly inserted documents
|
|
"""
|
|
|
|
async def _add_texts(
|
|
texts: Iterable[str],
|
|
metadatas: Optional[List[dict]] = None,
|
|
**kwargs: Any,
|
|
) -> List[str]:
|
|
await self.initialize()
|
|
return await self.aadd_texts(texts, metadatas, **kwargs)
|
|
|
|
return asyncio.run(_add_texts(texts, metadatas, **kwargs))
|
|
|
|
async def adelete(
|
|
self,
|
|
ids: Optional[List[str]] = None,
|
|
**kwargs: Any,
|
|
) -> Optional[bool]:
|
|
"""Delete by document ID asynchronously.
|
|
|
|
Args:
|
|
ids: List of ids to delete.
|
|
**kwargs: Other keyword arguments that subclasses might use.
|
|
|
|
Returns:
|
|
Optional[bool]: True if deletion is successful,
|
|
False otherwise.
|
|
"""
|
|
|
|
if ids is None:
|
|
await self.sdb.delete(self.collection)
|
|
return True
|
|
else:
|
|
if isinstance(ids, str):
|
|
await self.sdb.delete(ids)
|
|
return True
|
|
else:
|
|
if isinstance(ids, list) and len(ids) > 0:
|
|
_ = [await self.sdb.delete(id) for id in ids]
|
|
return True
|
|
return False
|
|
|
|
def delete(
|
|
self,
|
|
ids: Optional[List[str]] = None,
|
|
**kwargs: Any,
|
|
) -> Optional[bool]:
|
|
"""Delete by document ID.
|
|
|
|
Args:
|
|
ids: List of ids to delete.
|
|
**kwargs: Other keyword arguments that subclasses might use.
|
|
|
|
Returns:
|
|
Optional[bool]: True if deletion is successful,
|
|
False otherwise.
|
|
"""
|
|
|
|
async def _delete(ids: Optional[List[str]], **kwargs: Any) -> Optional[bool]:
|
|
await self.initialize()
|
|
return await self.adelete(ids=ids, **kwargs)
|
|
|
|
return asyncio.run(_delete(ids, **kwargs))
|
|
|
|
async def _asimilarity_search_by_vector_with_score(
|
|
self,
|
|
embedding: List[float],
|
|
k: int = DEFAULT_K,
|
|
*,
|
|
filter: Optional[Dict[str, str]] = None,
|
|
**kwargs: Any,
|
|
) -> List[Tuple[Document, float, Any]]:
|
|
"""Run similarity search for query embedding asynchronously
|
|
and return documents and scores
|
|
|
|
Args:
|
|
embedding (List[float]): Query embedding.
|
|
k (int): Number of results to return. Defaults to 4.
|
|
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
|
|
|
Returns:
|
|
List of Documents most similar along with scores
|
|
"""
|
|
args = {
|
|
"collection": self.collection,
|
|
"embedding": embedding,
|
|
"k": k,
|
|
"score_threshold": kwargs.get("score_threshold", 0),
|
|
}
|
|
|
|
# build additional filter criteria
|
|
custom_filter = ""
|
|
if filter:
|
|
for key in filter:
|
|
# check value type
|
|
if type(filter[key]) in [str, bool]:
|
|
filter_value = f"'{filter[key]}'"
|
|
else:
|
|
filter_value = f"{filter[key]}"
|
|
|
|
custom_filter += f"and metadata.{key} = {filter_value} "
|
|
|
|
query = f"""
|
|
select
|
|
id,
|
|
text,
|
|
metadata,
|
|
embedding,
|
|
vector::similarity::cosine(embedding, $embedding) as similarity
|
|
from ⟨{args["collection"]}⟩
|
|
where vector::similarity::cosine(embedding, $embedding) >= $score_threshold
|
|
{custom_filter}
|
|
order by similarity desc LIMIT $k;
|
|
"""
|
|
results = await self.sdb.query(query, args)
|
|
|
|
if len(results) == 0:
|
|
return []
|
|
|
|
result = results[0]
|
|
|
|
if result["status"] != "OK":
|
|
from surrealdb.ws import SurrealException
|
|
|
|
err = result.get("result", "Unknown Error")
|
|
raise SurrealException(err)
|
|
|
|
return [
|
|
(
|
|
Document(
|
|
page_content=doc["text"],
|
|
metadata={"id": doc["id"], **(doc.get("metadata") or {})},
|
|
),
|
|
doc["similarity"],
|
|
doc["embedding"],
|
|
)
|
|
for doc in result["result"]
|
|
]
|
|
|
|
async def asimilarity_search_with_relevance_scores(
|
|
self,
|
|
query: str,
|
|
k: int = DEFAULT_K,
|
|
*,
|
|
filter: Optional[Dict[str, str]] = None,
|
|
**kwargs: Any,
|
|
) -> List[Tuple[Document, float]]:
|
|
"""Run similarity search asynchronously and return relevance scores
|
|
|
|
Args:
|
|
query (str): Query
|
|
k (int): Number of results to return. Defaults to 4.
|
|
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
|
|
|
Returns:
|
|
List of Documents most similar along with relevance scores
|
|
"""
|
|
query_embedding = self.embedding_function.embed_query(query)
|
|
return [
|
|
(document, similarity)
|
|
for document, similarity, _ in (
|
|
await self._asimilarity_search_by_vector_with_score(
|
|
query_embedding, k, filter=filter, **kwargs
|
|
)
|
|
)
|
|
]
|
|
|
|
def similarity_search_with_relevance_scores(
|
|
self,
|
|
query: str,
|
|
k: int = DEFAULT_K,
|
|
*,
|
|
filter: Optional[Dict[str, str]] = None,
|
|
**kwargs: Any,
|
|
) -> List[Tuple[Document, float]]:
|
|
"""Run similarity search synchronously and return relevance scores
|
|
|
|
Args:
|
|
query (str): Query
|
|
k (int): Number of results to return. Defaults to 4.
|
|
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
|
|
|
Returns:
|
|
List of Documents most similar along with relevance scores
|
|
"""
|
|
|
|
async def _similarity_search_with_relevance_scores() -> (
|
|
List[Tuple[Document, float]]
|
|
):
|
|
await self.initialize()
|
|
return await self.asimilarity_search_with_relevance_scores(
|
|
query, k, filter=filter, **kwargs
|
|
)
|
|
|
|
return asyncio.run(_similarity_search_with_relevance_scores())
|
|
|
|
async def asimilarity_search_with_score(
|
|
self,
|
|
query: str,
|
|
k: int = DEFAULT_K,
|
|
*,
|
|
filter: Optional[Dict[str, str]] = None,
|
|
**kwargs: Any,
|
|
) -> List[Tuple[Document, float]]:
|
|
"""Run similarity search asynchronously and return distance scores
|
|
|
|
Args:
|
|
query (str): Query
|
|
k (int): Number of results to return. Defaults to 4.
|
|
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
|
|
|
Returns:
|
|
List of Documents most similar along with relevance distance scores
|
|
"""
|
|
query_embedding = self.embedding_function.embed_query(query)
|
|
return [
|
|
(document, similarity)
|
|
for document, similarity, _ in (
|
|
await self._asimilarity_search_by_vector_with_score(
|
|
query_embedding, k, filter=filter, **kwargs
|
|
)
|
|
)
|
|
]
|
|
|
|
def similarity_search_with_score(
|
|
self,
|
|
query: str,
|
|
k: int = DEFAULT_K,
|
|
*,
|
|
filter: Optional[Dict[str, str]] = None,
|
|
**kwargs: Any,
|
|
) -> List[Tuple[Document, float]]:
|
|
"""Run similarity search synchronously and return distance scores
|
|
|
|
Args:
|
|
query (str): Query
|
|
k (int): Number of results to return. Defaults to 4.
|
|
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
|
|
|
Returns:
|
|
List of Documents most similar along with relevance distance scores
|
|
"""
|
|
|
|
async def _similarity_search_with_score() -> List[Tuple[Document, float]]:
|
|
await self.initialize()
|
|
return await self.asimilarity_search_with_score(
|
|
query, k, filter=filter, **kwargs
|
|
)
|
|
|
|
return asyncio.run(_similarity_search_with_score())
|
|
|
|
async def asimilarity_search_by_vector(
|
|
self,
|
|
embedding: List[float],
|
|
k: int = DEFAULT_K,
|
|
*,
|
|
filter: Optional[Dict[str, str]] = None,
|
|
**kwargs: Any,
|
|
) -> List[Document]:
|
|
"""Run similarity search on query embedding asynchronously
|
|
|
|
Args:
|
|
embedding (List[float]): Query embedding
|
|
k (int): Number of results to return. Defaults to 4.
|
|
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
|
|
|
Returns:
|
|
List of Documents most similar to the query
|
|
"""
|
|
return [
|
|
document
|
|
for document, _, _ in await self._asimilarity_search_by_vector_with_score(
|
|
embedding, k, filter=filter, **kwargs
|
|
)
|
|
]
|
|
|
|
def similarity_search_by_vector(
|
|
self,
|
|
embedding: List[float],
|
|
k: int = DEFAULT_K,
|
|
*,
|
|
filter: Optional[Dict[str, str]] = None,
|
|
**kwargs: Any,
|
|
) -> List[Document]:
|
|
"""Run similarity search on query embedding
|
|
|
|
Args:
|
|
embedding (List[float]): Query embedding
|
|
k (int): Number of results to return. Defaults to 4.
|
|
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
|
|
|
Returns:
|
|
List of Documents most similar to the query
|
|
"""
|
|
|
|
async def _similarity_search_by_vector() -> List[Document]:
|
|
await self.initialize()
|
|
return await self.asimilarity_search_by_vector(
|
|
embedding, k, filter=filter, **kwargs
|
|
)
|
|
|
|
return asyncio.run(_similarity_search_by_vector())
|
|
|
|
async def asimilarity_search(
|
|
self,
|
|
query: str,
|
|
k: int = DEFAULT_K,
|
|
*,
|
|
filter: Optional[Dict[str, str]] = None,
|
|
**kwargs: Any,
|
|
) -> List[Document]:
|
|
"""Run similarity search on query asynchronously
|
|
|
|
Args:
|
|
query (str): Query
|
|
k (int): Number of results to return. Defaults to 4.
|
|
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
|
|
|
Returns:
|
|
List of Documents most similar to the query
|
|
"""
|
|
query_embedding = self.embedding_function.embed_query(query)
|
|
return await self.asimilarity_search_by_vector(
|
|
query_embedding, k, filter=filter, **kwargs
|
|
)
|
|
|
|
def similarity_search(
|
|
self,
|
|
query: str,
|
|
k: int = DEFAULT_K,
|
|
*,
|
|
filter: Optional[Dict[str, str]] = None,
|
|
**kwargs: Any,
|
|
) -> List[Document]:
|
|
"""Run similarity search on query
|
|
|
|
Args:
|
|
query (str): Query
|
|
k (int): Number of results to return. Defaults to 4.
|
|
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
|
|
|
Returns:
|
|
List of Documents most similar to the query
|
|
"""
|
|
|
|
async def _similarity_search() -> List[Document]:
|
|
await self.initialize()
|
|
return await self.asimilarity_search(query, k, filter=filter, **kwargs)
|
|
|
|
return asyncio.run(_similarity_search())
|
|
|
|
async def amax_marginal_relevance_search_by_vector(
|
|
self,
|
|
embedding: List[float],
|
|
k: int = DEFAULT_K,
|
|
fetch_k: int = 20,
|
|
lambda_mult: float = 0.5,
|
|
*,
|
|
filter: Optional[Dict[str, str]] = None,
|
|
**kwargs: Any,
|
|
) -> List[Document]:
|
|
"""Return docs selected using the maximal marginal relevance.
|
|
Maximal marginal relevance optimizes for similarity to query AND diversity
|
|
among selected documents.
|
|
|
|
Args:
|
|
embedding: Embedding to look up documents similar to.
|
|
k: Number of Documents to return. Defaults to 4.
|
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
|
lambda_mult: Number between 0 and 1 that determines the degree
|
|
of diversity among the results with 0 corresponding
|
|
to maximum diversity and 1 to minimum diversity.
|
|
Defaults to 0.5.
|
|
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
|
|
|
Returns:
|
|
List of Documents selected by maximal marginal relevance.
|
|
"""
|
|
|
|
result = await self._asimilarity_search_by_vector_with_score(
|
|
embedding, fetch_k, filter=filter, **kwargs
|
|
)
|
|
|
|
# extract only document from result
|
|
docs = [sub[0] for sub in result]
|
|
# extract only embedding from result
|
|
embeddings = [sub[-1] for sub in result]
|
|
|
|
mmr_selected = maximal_marginal_relevance(
|
|
np.array(embedding, dtype=np.float32),
|
|
embeddings,
|
|
k=k,
|
|
lambda_mult=lambda_mult,
|
|
)
|
|
|
|
return [docs[i] for i in mmr_selected]
|
|
|
|
def max_marginal_relevance_search_by_vector(
|
|
self,
|
|
embedding: List[float],
|
|
k: int = DEFAULT_K,
|
|
fetch_k: int = 20,
|
|
lambda_mult: float = 0.5,
|
|
*,
|
|
filter: Optional[Dict[str, str]] = None,
|
|
**kwargs: Any,
|
|
) -> List[Document]:
|
|
"""Return docs selected using the maximal marginal relevance.
|
|
|
|
Maximal marginal relevance optimizes for similarity to query AND diversity
|
|
among selected documents.
|
|
|
|
Args:
|
|
embedding: Embedding to look up documents similar to.
|
|
k: Number of Documents to return. Defaults to 4.
|
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
|
lambda_mult: Number between 0 and 1 that determines the degree
|
|
of diversity among the results with 0 corresponding
|
|
to maximum diversity and 1 to minimum diversity.
|
|
Defaults to 0.5.
|
|
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
|
|
|
Returns:
|
|
List of Documents selected by maximal marginal relevance.
|
|
"""
|
|
|
|
async def _max_marginal_relevance_search_by_vector() -> List[Document]:
|
|
await self.initialize()
|
|
return await self.amax_marginal_relevance_search_by_vector(
|
|
embedding, k, fetch_k, lambda_mult, filter=filter, **kwargs
|
|
)
|
|
|
|
return asyncio.run(_max_marginal_relevance_search_by_vector())
|
|
|
|
async def amax_marginal_relevance_search(
|
|
self,
|
|
query: str,
|
|
k: int = 4,
|
|
fetch_k: int = 20,
|
|
lambda_mult: float = 0.5,
|
|
*,
|
|
filter: Optional[Dict[str, str]] = None,
|
|
**kwargs: Any,
|
|
) -> List[Document]:
|
|
"""Return docs selected using the maximal marginal relevance.
|
|
|
|
Maximal marginal relevance optimizes for similarity to query AND diversity
|
|
among selected documents.
|
|
|
|
Args:
|
|
query: Text to look up documents similar to.
|
|
k: Number of Documents to return. Defaults to 4.
|
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
|
lambda_mult: Number between 0 and 1 that determines the degree
|
|
of diversity among the results with 0 corresponding
|
|
to maximum diversity and 1 to minimum diversity.
|
|
Defaults to 0.5.
|
|
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
|
|
|
Returns:
|
|
List of Documents selected by maximal marginal relevance.
|
|
"""
|
|
|
|
embedding = self.embedding_function.embed_query(query)
|
|
docs = await self.amax_marginal_relevance_search_by_vector(
|
|
embedding, k, fetch_k, lambda_mult, filter=filter, **kwargs
|
|
)
|
|
return docs
|
|
|
|
def max_marginal_relevance_search(
|
|
self,
|
|
query: str,
|
|
k: int = DEFAULT_K,
|
|
fetch_k: int = 20,
|
|
lambda_mult: float = 0.5,
|
|
*,
|
|
filter: Optional[Dict[str, str]] = None,
|
|
**kwargs: Any,
|
|
) -> List[Document]:
|
|
"""Return docs selected using the maximal marginal relevance.
|
|
Maximal marginal relevance optimizes for similarity to query AND diversity
|
|
among selected documents.
|
|
|
|
Args:
|
|
query: Text to look up documents similar to.
|
|
k: Number of Documents to return. Defaults to 4.
|
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
|
lambda_mult: Number between 0 and 1 that determines the degree
|
|
of diversity among the results with 0 corresponding
|
|
to maximum diversity and 1 to minimum diversity.
|
|
Defaults to 0.5.
|
|
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
|
|
|
Returns:
|
|
List of Documents selected by maximal marginal relevance.
|
|
"""
|
|
|
|
async def _max_marginal_relevance_search() -> List[Document]:
|
|
await self.initialize()
|
|
return await self.amax_marginal_relevance_search(
|
|
query, k, fetch_k, lambda_mult, filter=filter, **kwargs
|
|
)
|
|
|
|
return asyncio.run(_max_marginal_relevance_search())
|
|
|
|
@classmethod
|
|
async def afrom_texts(
|
|
cls,
|
|
texts: List[str],
|
|
embedding: Embeddings,
|
|
metadatas: Optional[List[dict]] = None,
|
|
**kwargs: Any,
|
|
) -> "SurrealDBStore":
|
|
"""Create SurrealDBStore from list of text asynchronously
|
|
|
|
Args:
|
|
texts (List[str]): list of text to vectorize and store
|
|
embedding (Optional[Embeddings]): Embedding function.
|
|
dburl (str): SurrealDB connection url
|
|
(default: "ws://localhost:8000/rpc")
|
|
ns (str): surrealdb namespace for the vector store.
|
|
(default: "langchain")
|
|
db (str): surrealdb database for the vector store.
|
|
(default: "database")
|
|
collection (str): surrealdb collection for the vector store.
|
|
(default: "documents")
|
|
|
|
(optional) db_user and db_pass: surrealdb credentials
|
|
|
|
Returns:
|
|
SurrealDBStore object initialized and ready for use."""
|
|
|
|
sdb = cls(embedding, **kwargs)
|
|
await sdb.initialize()
|
|
await sdb.aadd_texts(texts, metadatas, **kwargs)
|
|
return sdb
|
|
|
|
@classmethod
|
|
def from_texts(
|
|
cls,
|
|
texts: List[str],
|
|
embedding: Embeddings,
|
|
metadatas: Optional[List[dict]] = None,
|
|
**kwargs: Any,
|
|
) -> "SurrealDBStore":
|
|
"""Create SurrealDBStore from list of text
|
|
|
|
Args:
|
|
texts (List[str]): list of text to vectorize and store
|
|
embedding (Optional[Embeddings]): Embedding function.
|
|
dburl (str): SurrealDB connection url
|
|
ns (str): surrealdb namespace for the vector store.
|
|
(default: "langchain")
|
|
db (str): surrealdb database for the vector store.
|
|
(default: "database")
|
|
collection (str): surrealdb collection for the vector store.
|
|
(default: "documents")
|
|
|
|
(optional) db_user and db_pass: surrealdb credentials
|
|
|
|
Returns:
|
|
SurrealDBStore object initialized and ready for use."""
|
|
sdb = asyncio.run(cls.afrom_texts(texts, embedding, metadatas, **kwargs))
|
|
return sdb
|