New line should be remove only for the 1st gen embedding models (#3853)

Only 1st generation OpenAI embeddings models are negatively impacted by
new lines.

Context:
https://github.com/openai/openai-python/issues/418#issuecomment-1525939500
fix_agent_callbacks
Rafal Wojdyla 1 year ago committed by GitHub
parent 6bd367916c
commit a5a4999fb7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -158,8 +158,10 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
indices = []
encoding = tiktoken.model.encoding_for_model(self.model)
for i, text in enumerate(texts):
# replace newlines, which can negatively affect performance.
text = text.replace("\n", " ")
if self.model.endswith("001"):
# See: https://github.com/openai/openai-python/issues/418#issuecomment-1525939500
# replace newlines, which can negatively affect performance.
text = text.replace("\n", " ")
token = encoding.encode(
text,
allowed_special=self.allowed_special,
@ -212,8 +214,10 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
if len(text) > self.embedding_ctx_length:
return self._get_len_safe_embeddings([text], engine=engine)[0]
else:
# replace newlines, which can negatively affect performance.
text = text.replace("\n", " ")
if self.model.endswith("001"):
# See: https://github.com/openai/openai-python/issues/418#issuecomment-1525939500
# replace newlines, which can negatively affect performance.
text = text.replace("\n", " ")
return embed_with_retry(self, input=[text], engine=engine)["data"][0][
"embedding"
]

Loading…
Cancel
Save