langchain/tests/integration_tests/vectorstores/test_vectara.py
Ofer Mendelevitch f8cf09a230
Update to Vectara integration (#5950)
This PR updates the Vectara integration (@hwchase17 ):
* Adds reuse of requests.session to imrpove efficiency and speed.
* Utilizes Vectara's low-level API (instead of standard API) to better
match user's specific chunking with LangChain
* Now add_texts puts all the texts into a single Vectara document so
indexing is much faster.
* updated variables names from alpha to lambda_val (to be consistent
with Vectara docs) and added n_context_sentence so it's available to use
if needed.
* Updates to documentation and tests

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-06-10 16:27:01 -07:00

37 lines
1.4 KiB
Python

from langchain.docstore.document import Document
from langchain.vectorstores.vectara import Vectara
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
def get_abbr(s: str) -> str:
words = s.split(" ") # Split the string into words
first_letters = [word[0] for word in words] # Extract the first letter of each word
return "".join(first_letters) # Join the first letters into a single string
def test_vectara_add_documents() -> None:
"""Test end to end construction and search."""
# start with some initial documents
texts = ["grounded generation", "retrieval augmented generation", "data privacy"]
docsearch: Vectara = Vectara.from_texts(
texts,
embedding=FakeEmbeddings(),
metadatas=[{"abbr": "gg"}, {"abbr": "rag"}, {"abbr": "dp"}],
)
# then add some additional documents
new_texts = ["large language model", "information retrieval", "question answering"]
docsearch.add_documents(
[Document(page_content=t, metadata={"abbr": get_abbr(t)}) for t in new_texts]
)
# finally do a similarity search to see if all works okay
output = docsearch.similarity_search(
"large language model", k=2, n_sentence_context=0
)
assert output[0].page_content == "large language model"
assert output[0].metadata == {"abbr": "llm"}
assert output[1].page_content == "information retrieval"
assert output[1].metadata == {"abbr": "ir"}