diff --git a/libs/langchain/langchain/vectorstores/opensearch_vector_search.py b/libs/langchain/langchain/vectorstores/opensearch_vector_search.py index 12e1928d05..bb3508eaa1 100644 --- a/libs/langchain/langchain/vectorstores/opensearch_vector_search.py +++ b/libs/langchain/langchain/vectorstores/opensearch_vector_search.py @@ -736,7 +736,7 @@ class OpenSearchVectorSearch(VectorStore): ids: Optional[List[str]] = None, **kwargs: Any, ) -> OpenSearchVectorSearch: - """Construct OpenSearchVectorSearch wrapper from raw documents. + """Construct OpenSearchVectorSearch wrapper from raw texts. Example: .. code-block:: python @@ -754,6 +754,74 @@ class OpenSearchVectorSearch(VectorStore): and lucene engines recommended for large datasets. Also supports brute force search through Script Scoring and Painless Scripting. + Optional Args: + vector_field: Document field embeddings are stored in. Defaults to + "vector_field". + + text_field: Document field the text of the document is stored in. Defaults + to "text". + + Optional Keyword Args for Approximate Search: + engine: "nmslib", "faiss", "lucene"; default: "nmslib" + + space_type: "l2", "l1", "cosinesimil", "linf", "innerproduct"; default: "l2" + + ef_search: Size of the dynamic list used during k-NN searches. Higher values + lead to more accurate but slower searches; default: 512 + + ef_construction: Size of the dynamic list used during k-NN graph creation. + Higher values lead to more accurate graph but slower indexing speed; + default: 512 + + m: Number of bidirectional links created for each new element. Large impact + on memory consumption. Between 2 and 100; default: 16 + + Keyword Args for Script Scoring or Painless Scripting: + is_appx_search: False + + """ + embeddings = embedding.embed_documents(texts) + return cls.from_embeddings( + embeddings, + texts, + embedding, + metadatas=metadatas, + bulk_size=bulk_size, + ids=ids, + **kwargs, + ) + + @classmethod + def from_embeddings( + cls, + embeddings: List[List[float]], + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + bulk_size: int = 500, + ids: Optional[List[str]] = None, + **kwargs: Any, + ) -> OpenSearchVectorSearch: + """Construct OpenSearchVectorSearch wrapper from pre-vectorized embeddings. + + Example: + .. code-block:: python + + from langchain.vectorstores import OpenSearchVectorSearch + from langchain.embeddings import OpenAIEmbeddings + embedder = OpenAIEmbeddings() + embeddings = embedder.embed_documents(["foo", "bar"]) + opensearch_vector_search = OpenSearchVectorSearch.from_embeddings( + embeddings, + texts, + embedder, + opensearch_url="http://localhost:9200" + ) + + OpenSearch by default supports Approximate Search powered by nmslib, faiss + and lucene engines recommended for large datasets. Also supports brute force + search through Script Scoring and Painless Scripting. + Optional Args: vector_field: Document field embeddings are stored in. Defaults to "vector_field". @@ -799,7 +867,6 @@ class OpenSearchVectorSearch(VectorStore): "max_chunk_bytes", "is_aoss", ] - embeddings = embedding.embed_documents(texts) _validate_embeddings_and_bulk_size(len(embeddings), bulk_size) dim = len(embeddings[0]) # Get the index name from either from kwargs or ENV Variable @@ -843,8 +910,8 @@ class OpenSearchVectorSearch(VectorStore): index_name, embeddings, texts, - metadatas=metadatas, ids=ids, + metadatas=metadatas, vector_field=vector_field, text_field=text_field, mapping=mapping,