mirror of
https://github.com/hwchase17/langchain
synced 2024-11-04 06:00:26 +00:00
Add from_embeddings for opensearch (#10957)
This commit is contained in:
parent
73693c18fc
commit
db05ea2b78
@ -736,7 +736,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> OpenSearchVectorSearch:
|
) -> OpenSearchVectorSearch:
|
||||||
"""Construct OpenSearchVectorSearch wrapper from raw documents.
|
"""Construct OpenSearchVectorSearch wrapper from raw texts.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
@ -754,6 +754,74 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
and lucene engines recommended for large datasets. Also supports brute force
|
and lucene engines recommended for large datasets. Also supports brute force
|
||||||
search through Script Scoring and Painless Scripting.
|
search through Script Scoring and Painless Scripting.
|
||||||
|
|
||||||
|
Optional Args:
|
||||||
|
vector_field: Document field embeddings are stored in. Defaults to
|
||||||
|
"vector_field".
|
||||||
|
|
||||||
|
text_field: Document field the text of the document is stored in. Defaults
|
||||||
|
to "text".
|
||||||
|
|
||||||
|
Optional Keyword Args for Approximate Search:
|
||||||
|
engine: "nmslib", "faiss", "lucene"; default: "nmslib"
|
||||||
|
|
||||||
|
space_type: "l2", "l1", "cosinesimil", "linf", "innerproduct"; default: "l2"
|
||||||
|
|
||||||
|
ef_search: Size of the dynamic list used during k-NN searches. Higher values
|
||||||
|
lead to more accurate but slower searches; default: 512
|
||||||
|
|
||||||
|
ef_construction: Size of the dynamic list used during k-NN graph creation.
|
||||||
|
Higher values lead to more accurate graph but slower indexing speed;
|
||||||
|
default: 512
|
||||||
|
|
||||||
|
m: Number of bidirectional links created for each new element. Large impact
|
||||||
|
on memory consumption. Between 2 and 100; default: 16
|
||||||
|
|
||||||
|
Keyword Args for Script Scoring or Painless Scripting:
|
||||||
|
is_appx_search: False
|
||||||
|
|
||||||
|
"""
|
||||||
|
embeddings = embedding.embed_documents(texts)
|
||||||
|
return cls.from_embeddings(
|
||||||
|
embeddings,
|
||||||
|
texts,
|
||||||
|
embedding,
|
||||||
|
metadatas=metadatas,
|
||||||
|
bulk_size=bulk_size,
|
||||||
|
ids=ids,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_embeddings(
|
||||||
|
cls,
|
||||||
|
embeddings: List[List[float]],
|
||||||
|
texts: List[str],
|
||||||
|
embedding: Embeddings,
|
||||||
|
metadatas: Optional[List[dict]] = None,
|
||||||
|
bulk_size: int = 500,
|
||||||
|
ids: Optional[List[str]] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> OpenSearchVectorSearch:
|
||||||
|
"""Construct OpenSearchVectorSearch wrapper from pre-vectorized embeddings.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from langchain.vectorstores import OpenSearchVectorSearch
|
||||||
|
from langchain.embeddings import OpenAIEmbeddings
|
||||||
|
embedder = OpenAIEmbeddings()
|
||||||
|
embeddings = embedder.embed_documents(["foo", "bar"])
|
||||||
|
opensearch_vector_search = OpenSearchVectorSearch.from_embeddings(
|
||||||
|
embeddings,
|
||||||
|
texts,
|
||||||
|
embedder,
|
||||||
|
opensearch_url="http://localhost:9200"
|
||||||
|
)
|
||||||
|
|
||||||
|
OpenSearch by default supports Approximate Search powered by nmslib, faiss
|
||||||
|
and lucene engines recommended for large datasets. Also supports brute force
|
||||||
|
search through Script Scoring and Painless Scripting.
|
||||||
|
|
||||||
Optional Args:
|
Optional Args:
|
||||||
vector_field: Document field embeddings are stored in. Defaults to
|
vector_field: Document field embeddings are stored in. Defaults to
|
||||||
"vector_field".
|
"vector_field".
|
||||||
@ -799,7 +867,6 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
"max_chunk_bytes",
|
"max_chunk_bytes",
|
||||||
"is_aoss",
|
"is_aoss",
|
||||||
]
|
]
|
||||||
embeddings = embedding.embed_documents(texts)
|
|
||||||
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size)
|
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size)
|
||||||
dim = len(embeddings[0])
|
dim = len(embeddings[0])
|
||||||
# Get the index name from either from kwargs or ENV Variable
|
# Get the index name from either from kwargs or ENV Variable
|
||||||
@ -843,8 +910,8 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
index_name,
|
index_name,
|
||||||
embeddings,
|
embeddings,
|
||||||
texts,
|
texts,
|
||||||
metadatas=metadatas,
|
|
||||||
ids=ids,
|
ids=ids,
|
||||||
|
metadatas=metadatas,
|
||||||
vector_field=vector_field,
|
vector_field=vector_field,
|
||||||
text_field=text_field,
|
text_field=text_field,
|
||||||
mapping=mapping,
|
mapping=mapping,
|
||||||
|
Loading…
Reference in New Issue
Block a user