From 5da9dd1195d2e2bb0ec7424cd7d3137b14314275 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Wed, 24 Apr 2024 17:38:21 -0700 Subject: [PATCH] mistral: comment batching param (#20868) Addresses #20523 --- libs/partners/mistralai/langchain_mistralai/embeddings.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libs/partners/mistralai/langchain_mistralai/embeddings.py b/libs/partners/mistralai/langchain_mistralai/embeddings.py index 2003cd7e43..268918a9f9 100644 --- a/libs/partners/mistralai/langchain_mistralai/embeddings.py +++ b/libs/partners/mistralai/langchain_mistralai/embeddings.py @@ -18,6 +18,10 @@ from tokenizers import Tokenizer # type: ignore logger = logging.getLogger(__name__) MAX_TOKENS = 16_000 +"""A batching parameter for the Mistral API. This is NOT the maximum number of tokens +accepted by the embedding model for each document/chunk, but rather the maximum number +of tokens that can be sent in a single request to the Mistral API (across multiple +documents/chunks)""" class DummyTokenizer: