langchain/tests/integration_tests/vectorstores/test_vectara.py

from langchain.docstore.document import Document
from langchain.vectorstores.vectara import Vectara
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings


def get_abbr(s: str) -> str:
    words = s.split(" ")  # Split the string into words
    first_letters = [word[0] for word in words]  # Extract the first letter of each word
    return "".join(first_letters)  # Join the first letters into a single string


def test_vectara_add_documents() -> None:
    """Test end to end construction and search."""

    # start with some initial documents
    texts = ["grounded generation", "retrieval augmented generation", "data privacy"]
    docsearch: Vectara = Vectara.from_texts(
        texts,
        embedding=FakeEmbeddings(),
        metadatas=[{"abbr": "gg"}, {"abbr": "rag"}, {"abbr": "dp"}],
    )

    # then add some additional documents
    new_texts = ["large language model", "information retrieval", "question answering"]
    docsearch.add_documents(
        [Document(page_content=t, metadata={"abbr": get_abbr(t)}) for t in new_texts]
    )

    # finally do a similarity search to see if all works okay
    output = docsearch.similarity_search(
        "large language model", k=2, n_sentence_context=0
    )
    assert output[0].page_content == "large language model"
    assert output[0].metadata == {"abbr": "llm"}
    assert output[1].page_content == "information retrieval"
    assert output[1].metadata == {"abbr": "ir"}
Vectara (#5069) # Vectara Integration This PR provides integration with Vectara. Implemented here are: * langchain/vectorstore/vectara.py * tests/integration_tests/vectorstores/test_vectara.py * langchain/retrievers/vectara_retriever.py And two IPYNB notebooks to do more testing: * docs/modules/chains/index_examples/vectara_text_generation.ipynb * docs/modules/indexes/vectorstores/examples/vectara.ipynb --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com> 2023-05-24 08:24:58 +00:00			`from langchain.docstore.document import Document`
			`from langchain.vectorstores.vectara import Vectara`
			`from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings`


			`def get_abbr(s: str) -> str:`
			`words = s.split(" ") # Split the string into words`
			`first_letters = [word[0] for word in words] # Extract the first letter of each word`
			`return "".join(first_letters) # Join the first letters into a single string`


			`def test_vectara_add_documents() -> None:`
			`"""Test end to end construction and search."""`

			`# start with some initial documents`
			`texts = ["grounded generation", "retrieval augmented generation", "data privacy"]`
			`docsearch: Vectara = Vectara.from_texts(`
			`texts,`
			`embedding=FakeEmbeddings(),`
			`metadatas=[{"abbr": "gg"}, {"abbr": "rag"}, {"abbr": "dp"}],`
			`)`

			`# then add some additional documents`
			`new_texts = ["large language model", "information retrieval", "question answering"]`
			`docsearch.add_documents(`
			`[Document(page_content=t, metadata={"abbr": get_abbr(t)}) for t in new_texts]`
			`)`

			`# finally do a similarity search to see if all works okay`
Update to Vectara integration (#5950) This PR updates the Vectara integration (@hwchase17 ): * Adds reuse of requests.session to imrpove efficiency and speed. * Utilizes Vectara's low-level API (instead of standard API) to better match user's specific chunking with LangChain * Now add_texts puts all the texts into a single Vectara document so indexing is much faster. * updated variables names from alpha to lambda_val (to be consistent with Vectara docs) and added n_context_sentence so it's available to use if needed. * Updates to documentation and tests --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> 2023-06-10 23:27:01 +00:00			`output = docsearch.similarity_search(`
			`"large language model", k=2, n_sentence_context=0`
			`)`
Vectara (#5069) # Vectara Integration This PR provides integration with Vectara. Implemented here are: * langchain/vectorstore/vectara.py * tests/integration_tests/vectorstores/test_vectara.py * langchain/retrievers/vectara_retriever.py And two IPYNB notebooks to do more testing: * docs/modules/chains/index_examples/vectara_text_generation.ipynb * docs/modules/indexes/vectorstores/examples/vectara.ipynb --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com> 2023-05-24 08:24:58 +00:00			`assert output[0].page_content == "large language model"`
			`assert output[0].metadata == {"abbr": "llm"}`
			`assert output[1].page_content == "information retrieval"`
			`assert output[1].metadata == {"abbr": "ir"}`