set default embedding max token size (#2330)

#991 has already implemented this convenient feature to prevent
exceeding max token limit in embedding model.

> By default, this function is deactivated so as not to change the
previous behavior. If you specify something like 8191 here, it will work
as desired.
According to the author, this is not set by default. 
Until now, the default model in OpenAIEmbeddings's max token size is
8191 tokens, no other openai model has a larger token limit.
So I believe it will be better to set this as default value, other wise
users may encounter this error and hard to solve it.
doc
AeroXi 1 year ago committed by GitHub
parent 0316900d2f
commit e131156805
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -96,7 +96,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
# https://github.com/openai/openai-python/issues/132
document_model_name: str = "text-embedding-ada-002"
query_model_name: str = "text-embedding-ada-002"
embedding_ctx_length: int = -1
embedding_ctx_length: int = 8191
openai_api_key: Optional[str] = None
chunk_size: int = 1000
"""Maximum number of texts to embed in each batch"""

Loading…
Cancel
Save