From 873a80e496f63b4711703852d6e636549530556d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathias=20Panzenb=C3=B6ck?= Date: Fri, 4 Aug 2023 06:24:08 +0200 Subject: [PATCH] Reduce generation of temporary objects (#7950) Just a tiny change to use `list.append(...)` and `list.extend(...)` instead of `list += [...]` so that no unnecessary temporary lists are created. Since its a tiny miscellaneous thing I guess @baskaryan is the maintainer to tag? --------- Co-authored-by: Harrison Chase --- libs/langchain/langchain/embeddings/openai.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/libs/langchain/langchain/embeddings/openai.py b/libs/langchain/langchain/embeddings/openai.py index 383c8f4649..53fb30d16c 100644 --- a/libs/langchain/langchain/embeddings/openai.py +++ b/libs/langchain/langchain/embeddings/openai.py @@ -338,10 +338,10 @@ class OpenAIEmbeddings(BaseModel, Embeddings): disallowed_special=self.disallowed_special, ) for j in range(0, len(token), self.embedding_ctx_length): - tokens += [token[j : j + self.embedding_ctx_length]] - indices += [i] + tokens.append(token[j : j + self.embedding_ctx_length]) + indices.append(i) - batched_embeddings = [] + batched_embeddings: List[List[float]] = [] _chunk_size = chunk_size or self.chunk_size if self.show_progress_bar: @@ -360,7 +360,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings): input=tokens[i : i + _chunk_size], **self._invocation_params, ) - batched_embeddings += [r["embedding"] for r in response["data"]] + batched_embeddings.extend(r["embedding"] for r in response["data"]) results: List[List[List[float]]] = [[] for _ in range(len(texts))] num_tokens_in_batch: List[List[int]] = [[] for _ in range(len(texts))] @@ -419,10 +419,10 @@ class OpenAIEmbeddings(BaseModel, Embeddings): disallowed_special=self.disallowed_special, ) for j in range(0, len(token), self.embedding_ctx_length): - tokens += [token[j : j + self.embedding_ctx_length]] - indices += [i] + tokens.append(token[j : j + self.embedding_ctx_length]) + indices.append(i) - batched_embeddings = [] + batched_embeddings: List[List[float]] = [] _chunk_size = chunk_size or self.chunk_size for i in range(0, len(tokens), _chunk_size): response = await async_embed_with_retry( @@ -430,7 +430,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings): input=tokens[i : i + _chunk_size], **self._invocation_params, ) - batched_embeddings += [r["embedding"] for r in response["data"]] + batched_embeddings.extend(r["embedding"] for r in response["data"]) results: List[List[List[float]]] = [[] for _ in range(len(texts))] num_tokens_in_batch: List[List[int]] = [[] for _ in range(len(texts))]