Reduce generation of temporary objects (#7950)

Just a tiny change to use `list.append(...)` and `list.extend(...)`
instead of `list += [...]` so that no unnecessary temporary lists are
created.

Since its a tiny miscellaneous thing I guess @baskaryan is the
maintainer to tag?

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
pull/8738/head
Mathias Panzenböck 1 year ago committed by GitHub
parent d1b95db874
commit 873a80e496
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -338,10 +338,10 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
disallowed_special=self.disallowed_special,
)
for j in range(0, len(token), self.embedding_ctx_length):
tokens += [token[j : j + self.embedding_ctx_length]]
indices += [i]
tokens.append(token[j : j + self.embedding_ctx_length])
indices.append(i)
batched_embeddings = []
batched_embeddings: List[List[float]] = []
_chunk_size = chunk_size or self.chunk_size
if self.show_progress_bar:
@ -360,7 +360,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
input=tokens[i : i + _chunk_size],
**self._invocation_params,
)
batched_embeddings += [r["embedding"] for r in response["data"]]
batched_embeddings.extend(r["embedding"] for r in response["data"])
results: List[List[List[float]]] = [[] for _ in range(len(texts))]
num_tokens_in_batch: List[List[int]] = [[] for _ in range(len(texts))]
@ -419,10 +419,10 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
disallowed_special=self.disallowed_special,
)
for j in range(0, len(token), self.embedding_ctx_length):
tokens += [token[j : j + self.embedding_ctx_length]]
indices += [i]
tokens.append(token[j : j + self.embedding_ctx_length])
indices.append(i)
batched_embeddings = []
batched_embeddings: List[List[float]] = []
_chunk_size = chunk_size or self.chunk_size
for i in range(0, len(tokens), _chunk_size):
response = await async_embed_with_retry(
@ -430,7 +430,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
input=tokens[i : i + _chunk_size],
**self._invocation_params,
)
batched_embeddings += [r["embedding"] for r in response["data"]]
batched_embeddings.extend(r["embedding"] for r in response["data"])
results: List[List[List[float]]] = [[] for _ in range(len(texts))]
num_tokens_in_batch: List[List[int]] = [[] for _ in range(len(texts))]

Loading…
Cancel
Save