From e131156805962fd8fa2a984f0f7d996475d0c089 Mon Sep 17 00:00:00 2001 From: AeroXi <31943024+AeroXi@users.noreply.github.com> Date: Fri, 7 Apr 2023 13:32:24 +0800 Subject: [PATCH] set default embedding max token size (#2330) #991 has already implemented this convenient feature to prevent exceeding max token limit in embedding model. > By default, this function is deactivated so as not to change the previous behavior. If you specify something like 8191 here, it will work as desired. According to the author, this is not set by default. Until now, the default model in OpenAIEmbeddings's max token size is 8191 tokens, no other openai model has a larger token limit. So I believe it will be better to set this as default value, other wise users may encounter this error and hard to solve it. --- langchain/embeddings/openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langchain/embeddings/openai.py b/langchain/embeddings/openai.py index fd57d281..6c206ebc 100644 --- a/langchain/embeddings/openai.py +++ b/langchain/embeddings/openai.py @@ -96,7 +96,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings): # https://github.com/openai/openai-python/issues/132 document_model_name: str = "text-embedding-ada-002" query_model_name: str = "text-embedding-ada-002" - embedding_ctx_length: int = -1 + embedding_ctx_length: int = 8191 openai_api_key: Optional[str] = None chunk_size: int = 1000 """Maximum number of texts to embed in each batch"""