diff --git a/docs/docs/integrations/document_transformers/voyageai-reranker.ipynb b/docs/docs/integrations/document_transformers/voyageai-reranker.ipynb index 1086941fea..27e471d753 100644 --- a/docs/docs/integrations/document_transformers/voyageai-reranker.ipynb +++ b/docs/docs/integrations/document_transformers/voyageai-reranker.ipynb @@ -87,7 +87,9 @@ "## Set up the base vector store retriever\n", "Let's start by initializing a simple vector store retriever and storing the 2023 State of the Union speech (in chunks). We can set up the retriever to retrieve a high number (20) of docs. You can use any of the following Embeddings models: ([source](https://docs.voyageai.com/docs/embeddings)):\n", "\n", - "- `voyage-large-2` (default)\n", + "- `voyage-3`\n", + "- `voyage-3-lite` \n", + "- `voyage-large-2`\n", "- `voyage-code-2`\n", "- `voyage-2`\n", "- `voyage-law-2`\n", @@ -341,6 +343,8 @@ "## Doing reranking with VoyageAIRerank\n", "Now let's wrap our base retriever with a `ContextualCompressionRetriever`. We'll use the Voyage AI reranker to rerank the returned results. You can use any of the following Reranking models: ([source](https://docs.voyageai.com/docs/reranker)):\n", "\n", + "- `rerank-2`\n", + "- `rerank-2-lite`\n", "- `rerank-1`\n", "- `rerank-lite-1`" ] diff --git a/docs/docs/integrations/text_embedding/voyageai.ipynb b/docs/docs/integrations/text_embedding/voyageai.ipynb index 908b16cfc6..1bc2391ba9 100644 --- a/docs/docs/integrations/text_embedding/voyageai.ipynb +++ b/docs/docs/integrations/text_embedding/voyageai.ipynb @@ -29,7 +29,9 @@ "source": [ "Voyage AI utilizes API keys to monitor usage and manage permissions. To obtain your key, create an account on our [homepage](https://www.voyageai.com). Then, create a VoyageEmbeddings model with your API key. You can use any of the following models: ([source](https://docs.voyageai.com/docs/embeddings)):\n", "\n", - "- `voyage-large-2` (default)\n", + "- `voyage-3`\n", + "- `voyage-3-lite`\n", + "- `voyage-large-2`\n", "- `voyage-code-2`\n", "- `voyage-2`\n", "- `voyage-law-2`\n", diff --git a/docs/src/theme/FeatureTables.js b/docs/src/theme/FeatureTables.js index 0b80e6f207..05f96a4b3f 100644 --- a/docs/src/theme/FeatureTables.js +++ b/docs/src/theme/FeatureTables.js @@ -364,6 +364,12 @@ const FEATURE_TABLES = { package: "langchain-databricks", apiLink: "https://python.langchain.com/api_reference/nomic/embeddings/langchain_databricks.embeddings.DatabricksEmbeddings.html" }, + { + name: "VoyageAI", + link: "voyageai", + package: "langchain-voyageai", + apiLink: "https://python.langchain.com/api_reference/voyageai/embeddings/langchain_voyageai.embeddings.VoyageAIEmbeddings.html" + }, ] }, document_retrievers: { diff --git a/libs/partners/voyageai/langchain_voyageai/embeddings.py b/libs/partners/voyageai/langchain_voyageai/embeddings.py index 594ae156c4..725dc176ca 100644 --- a/libs/partners/voyageai/langchain_voyageai/embeddings.py +++ b/libs/partners/voyageai/langchain_voyageai/embeddings.py @@ -16,6 +16,11 @@ from typing_extensions import Self logger = logging.getLogger(__name__) +DEFAULT_VOYAGE_2_BATCH_SIZE = 72 +DEFAULT_VOYAGE_3_LITE_BATCH_SIZE = 30 +DEFAULT_VOYAGE_3_BATCH_SIZE = 10 +DEFAULT_BATCH_SIZE = 7 + class VoyageAIEmbeddings(BaseModel, Embeddings): """VoyageAIEmbeddings embedding model. @@ -55,7 +60,19 @@ class VoyageAIEmbeddings(BaseModel, Embeddings): model = values.get("model") batch_size = values.get("batch_size") if batch_size is None: - values["batch_size"] = 72 if model in ["voyage-2", "voyage-02"] else 7 + values["batch_size"] = ( + DEFAULT_VOYAGE_2_BATCH_SIZE + if model in ["voyage-2", "voyage-02"] + else ( + DEFAULT_VOYAGE_3_LITE_BATCH_SIZE + if model == "voyage-3-lite" + else ( + DEFAULT_VOYAGE_3_BATCH_SIZE + if model == "voyage-3" + else DEFAULT_BATCH_SIZE + ) + ) + ) return values @model_validator(mode="after")