mirror of
https://github.com/hwchase17/langchain
synced 2024-11-13 19:10:52 +00:00
voyageai: new models (#26907)
Co-authored-by: fzowl <zoltan@voyageai.com> Co-authored-by: fzowl <160063452+fzowl@users.noreply.github.com>
This commit is contained in:
parent
2a0d9d05fb
commit
8bc12df2eb
@ -87,7 +87,9 @@
|
||||
"## Set up the base vector store retriever\n",
|
||||
"Let's start by initializing a simple vector store retriever and storing the 2023 State of the Union speech (in chunks). We can set up the retriever to retrieve a high number (20) of docs. You can use any of the following Embeddings models: ([source](https://docs.voyageai.com/docs/embeddings)):\n",
|
||||
"\n",
|
||||
"- `voyage-large-2` (default)\n",
|
||||
"- `voyage-3`\n",
|
||||
"- `voyage-3-lite` \n",
|
||||
"- `voyage-large-2`\n",
|
||||
"- `voyage-code-2`\n",
|
||||
"- `voyage-2`\n",
|
||||
"- `voyage-law-2`\n",
|
||||
@ -341,6 +343,8 @@
|
||||
"## Doing reranking with VoyageAIRerank\n",
|
||||
"Now let's wrap our base retriever with a `ContextualCompressionRetriever`. We'll use the Voyage AI reranker to rerank the returned results. You can use any of the following Reranking models: ([source](https://docs.voyageai.com/docs/reranker)):\n",
|
||||
"\n",
|
||||
"- `rerank-2`\n",
|
||||
"- `rerank-2-lite`\n",
|
||||
"- `rerank-1`\n",
|
||||
"- `rerank-lite-1`"
|
||||
]
|
||||
|
@ -29,7 +29,9 @@
|
||||
"source": [
|
||||
"Voyage AI utilizes API keys to monitor usage and manage permissions. To obtain your key, create an account on our [homepage](https://www.voyageai.com). Then, create a VoyageEmbeddings model with your API key. You can use any of the following models: ([source](https://docs.voyageai.com/docs/embeddings)):\n",
|
||||
"\n",
|
||||
"- `voyage-large-2` (default)\n",
|
||||
"- `voyage-3`\n",
|
||||
"- `voyage-3-lite`\n",
|
||||
"- `voyage-large-2`\n",
|
||||
"- `voyage-code-2`\n",
|
||||
"- `voyage-2`\n",
|
||||
"- `voyage-law-2`\n",
|
||||
|
@ -364,6 +364,12 @@ const FEATURE_TABLES = {
|
||||
package: "langchain-databricks",
|
||||
apiLink: "https://python.langchain.com/api_reference/nomic/embeddings/langchain_databricks.embeddings.DatabricksEmbeddings.html"
|
||||
},
|
||||
{
|
||||
name: "VoyageAI",
|
||||
link: "voyageai",
|
||||
package: "langchain-voyageai",
|
||||
apiLink: "https://python.langchain.com/api_reference/voyageai/embeddings/langchain_voyageai.embeddings.VoyageAIEmbeddings.html"
|
||||
},
|
||||
]
|
||||
},
|
||||
document_retrievers: {
|
||||
|
@ -16,6 +16,11 @@ from typing_extensions import Self
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_VOYAGE_2_BATCH_SIZE = 72
|
||||
DEFAULT_VOYAGE_3_LITE_BATCH_SIZE = 30
|
||||
DEFAULT_VOYAGE_3_BATCH_SIZE = 10
|
||||
DEFAULT_BATCH_SIZE = 7
|
||||
|
||||
|
||||
class VoyageAIEmbeddings(BaseModel, Embeddings):
|
||||
"""VoyageAIEmbeddings embedding model.
|
||||
@ -55,7 +60,19 @@ class VoyageAIEmbeddings(BaseModel, Embeddings):
|
||||
model = values.get("model")
|
||||
batch_size = values.get("batch_size")
|
||||
if batch_size is None:
|
||||
values["batch_size"] = 72 if model in ["voyage-2", "voyage-02"] else 7
|
||||
values["batch_size"] = (
|
||||
DEFAULT_VOYAGE_2_BATCH_SIZE
|
||||
if model in ["voyage-2", "voyage-02"]
|
||||
else (
|
||||
DEFAULT_VOYAGE_3_LITE_BATCH_SIZE
|
||||
if model == "voyage-3-lite"
|
||||
else (
|
||||
DEFAULT_VOYAGE_3_BATCH_SIZE
|
||||
if model == "voyage-3"
|
||||
else DEFAULT_BATCH_SIZE
|
||||
)
|
||||
)
|
||||
)
|
||||
return values
|
||||
|
||||
@model_validator(mode="after")
|
||||
|
Loading…
Reference in New Issue
Block a user