diff --git a/docs/extras/integrations/providers/vectara/index.mdx b/docs/extras/integrations/providers/vectara/index.mdx index 627a234a3b..0e30735f0b 100644 --- a/docs/extras/integrations/providers/vectara/index.mdx +++ b/docs/extras/integrations/providers/vectara/index.mdx @@ -63,7 +63,7 @@ results = vectara.similarity_score("what is LangChain?") - `k`: number of results to return (defaults to 5) - `lambda_val`: the [lexical matching](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) factor for hybrid search (defaults to 0.025) - `filter`: a [filter](https://docs.vectara.com/docs/common-use-cases/filtering-by-metadata/filter-overview) to apply to the results (default None) -- `n_sentence_context`: number of sentences to include before/after the actual matching segment when returning results. This defaults to 0 so as to return the exact text segment that matches, but can be used with other values e.g. 2 or 3 to return adjacent text segments. +- `n_sentence_context`: number of sentences to include before/after the actual matching segment when returning results. This defaults to 2. The results are returned as a list of relevant documents, and a relevance score of each document. diff --git a/libs/langchain/langchain/vectorstores/vectara.py b/libs/langchain/langchain/vectorstores/vectara.py index 8c7b0ac2d3..16a6e8c500 100644 --- a/libs/langchain/langchain/vectorstores/vectara.py +++ b/libs/langchain/langchain/vectorstores/vectara.py @@ -245,7 +245,7 @@ class Vectara(VectorStore): k: int = 5, lambda_val: float = 0.025, filter: Optional[str] = None, - n_sentence_context: int = 0, + n_sentence_context: int = 2, **kwargs: Any, ) -> List[Tuple[Document, float]]: """Return Vectara documents most similar to query, along with scores. @@ -259,7 +259,7 @@ class Vectara(VectorStore): https://docs.vectara.com/docs/search-apis/sql/filter-overview for more details. n_sentence_context: number of sentences before/after the matching segment - to add + to add, defaults to 2 Returns: List of Documents most similar to the query and score for each. @@ -328,7 +328,7 @@ class Vectara(VectorStore): k: int = 5, lambda_val: float = 0.025, filter: Optional[str] = None, - n_sentence_context: int = 0, + n_sentence_context: int = 2, **kwargs: Any, ) -> List[Document]: """Return Vectara documents most similar to query, along with scores. @@ -341,7 +341,7 @@ class Vectara(VectorStore): https://docs.vectara.com/docs/search-apis/sql/filter-overview for more details. n_sentence_context: number of sentences before/after the matching segment - to add + to add, defaults to 2 Returns: List of Documents most similar to the query @@ -427,7 +427,7 @@ class VectaraRetriever(VectorStoreRetriever): "lambda_val": 0.025, "k": 5, "filter": "", - "n_sentence_context": "0", + "n_sentence_context": "2", } ) """Search params. diff --git a/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py b/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py index 2d725f88a3..57338e7f99 100644 --- a/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py +++ b/libs/langchain/tests/integration_tests/vectorstores/test_vectara.py @@ -86,7 +86,6 @@ def test_vectara_from_files() -> None: n_sentence_context=0, filter="doc.test_num = 2", ) - print(output) assert output[0].page_content == ( "By the commonly adopted machine learning tradition " "(e.g., Chapter 28 in Murphy, 2012; Deng and Li, 2013), it may be natural " @@ -94,3 +93,24 @@ def test_vectara_from_files() -> None: "(e.g., DNNs) and deep probabilistic generative models (e.g., DBN, Deep " "Boltzmann Machine (DBM))." ) + + # finally do a similarity search to see if all works okay + output = docsearch.similarity_search( + "By the commonly adopted machine learning tradition", + k=1, + n_sentence_context=1, + filter="doc.test_num = 2", + ) + print(output[0].page_content) + assert output[0].page_content == ( + """\ +Note the use of “hybrid” in 3) above is different from that used sometimes in the literature, \ +which for example refers to the hybrid systems for speech recognition feeding the output probabilities of a neural network into an HMM \ +(Bengio et al., 1991; Bourlard and Morgan, 1993; Morgan, 2012). \ +By the commonly adopted machine learning tradition (e.g., Chapter 28 in Murphy, 2012; Deng and Li, 2013), \ +it may be natural to just classify deep learning techniques into deep discriminative models (e.g., DNNs) \ +and deep probabilistic generative models (e.g., DBN, Deep Boltzmann Machine (DBM)). \ +This classification scheme, however, misses a key insight gained in deep learning research about how generative \ +models can greatly improve the training of DNNs and other deep discriminative models via better regularization.\ +""" # noqa: E501 + )