|
|
|
@ -120,6 +120,8 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
|
|
|
|
"""Maximum number of texts to embed in each batch"""
|
|
|
|
|
max_retries: int = 6
|
|
|
|
|
"""Maximum number of retries to make when generating."""
|
|
|
|
|
request_timeout: Optional[Union[float, Tuple[float, float]]] = None
|
|
|
|
|
"""Timeout in seconds for the OpenAPI request."""
|
|
|
|
|
|
|
|
|
|
class Config:
|
|
|
|
|
"""Configuration for this pydantic object."""
|
|
|
|
@ -207,6 +209,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
|
|
|
|
self,
|
|
|
|
|
input=tokens[i : i + _chunk_size],
|
|
|
|
|
engine=self.deployment,
|
|
|
|
|
request_timeout=self.request_timeout,
|
|
|
|
|
)
|
|
|
|
|
batched_embeddings += [r["embedding"] for r in response["data"]]
|
|
|
|
|
|
|
|
|
@ -219,9 +222,12 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
|
|
|
|
for i in range(len(texts)):
|
|
|
|
|
_result = results[i]
|
|
|
|
|
if len(_result) == 0:
|
|
|
|
|
average = embed_with_retry(self, input="", engine=self.deployment)[
|
|
|
|
|
"data"
|
|
|
|
|
][0]["embedding"]
|
|
|
|
|
average = embed_with_retry(
|
|
|
|
|
self,
|
|
|
|
|
input="",
|
|
|
|
|
engine=self.deployment,
|
|
|
|
|
request_timeout=self.request_timeout,
|
|
|
|
|
)["data"][0]["embedding"]
|
|
|
|
|
else:
|
|
|
|
|
average = np.average(
|
|
|
|
|
_result, axis=0, weights=num_tokens_in_batch[i]
|
|
|
|
@ -247,9 +253,9 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
|
|
|
|
# See: https://github.com/openai/openai-python/issues/418#issuecomment-1525939500
|
|
|
|
|
# replace newlines, which can negatively affect performance.
|
|
|
|
|
text = text.replace("\n", " ")
|
|
|
|
|
return embed_with_retry(self, input=[text], engine=engine)["data"][0][
|
|
|
|
|
"embedding"
|
|
|
|
|
]
|
|
|
|
|
return embed_with_retry(
|
|
|
|
|
self, input=[text], engine=engine, request_timeout=self.request_timeout
|
|
|
|
|
)["data"][0]["embedding"]
|
|
|
|
|
|
|
|
|
|
def embed_documents(
|
|
|
|
|
self, texts: List[str], chunk_size: Optional[int] = 0
|
|
|
|
|