Reduce generation of temporary objects (#7950)

Just a tiny change to use `list.append(...)` and `list.extend(...)`
instead of `list += [...]` so that no unnecessary temporary lists are
created.

Since its a tiny miscellaneous thing I guess @baskaryan is the
maintainer to tag?

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
Mathias Panzenböck 2023-08-04 06:24:08 +02:00 committed by GitHub
parent d1b95db874
commit 873a80e496
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -338,10 +338,10 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
disallowed_special=self.disallowed_special, disallowed_special=self.disallowed_special,
) )
for j in range(0, len(token), self.embedding_ctx_length): for j in range(0, len(token), self.embedding_ctx_length):
tokens += [token[j : j + self.embedding_ctx_length]] tokens.append(token[j : j + self.embedding_ctx_length])
indices += [i] indices.append(i)
batched_embeddings = [] batched_embeddings: List[List[float]] = []
_chunk_size = chunk_size or self.chunk_size _chunk_size = chunk_size or self.chunk_size
if self.show_progress_bar: if self.show_progress_bar:
@ -360,7 +360,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
input=tokens[i : i + _chunk_size], input=tokens[i : i + _chunk_size],
**self._invocation_params, **self._invocation_params,
) )
batched_embeddings += [r["embedding"] for r in response["data"]] batched_embeddings.extend(r["embedding"] for r in response["data"])
results: List[List[List[float]]] = [[] for _ in range(len(texts))] results: List[List[List[float]]] = [[] for _ in range(len(texts))]
num_tokens_in_batch: List[List[int]] = [[] for _ in range(len(texts))] num_tokens_in_batch: List[List[int]] = [[] for _ in range(len(texts))]
@ -419,10 +419,10 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
disallowed_special=self.disallowed_special, disallowed_special=self.disallowed_special,
) )
for j in range(0, len(token), self.embedding_ctx_length): for j in range(0, len(token), self.embedding_ctx_length):
tokens += [token[j : j + self.embedding_ctx_length]] tokens.append(token[j : j + self.embedding_ctx_length])
indices += [i] indices.append(i)
batched_embeddings = [] batched_embeddings: List[List[float]] = []
_chunk_size = chunk_size or self.chunk_size _chunk_size = chunk_size or self.chunk_size
for i in range(0, len(tokens), _chunk_size): for i in range(0, len(tokens), _chunk_size):
response = await async_embed_with_retry( response = await async_embed_with_retry(
@ -430,7 +430,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
input=tokens[i : i + _chunk_size], input=tokens[i : i + _chunk_size],
**self._invocation_params, **self._invocation_params,
) )
batched_embeddings += [r["embedding"] for r in response["data"]] batched_embeddings.extend(r["embedding"] for r in response["data"])
results: List[List[List[float]]] = [[] for _ in range(len(texts))] results: List[List[List[float]]] = [[] for _ in range(len(texts))]
num_tokens_in_batch: List[List[int]] = [[] for _ in range(len(texts))] num_tokens_in_batch: List[List[int]] = [[] for _ in range(len(texts))]