From c50099161be9a9c5c4b04f440ed207a4fb66cc53 Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Tue, 16 Apr 2024 09:40:44 -0400 Subject: [PATCH] community[patch]: Use uuid4 not uuid1 (#20487) Using UUID1 is incorrect since it's time dependent, which makes it easy to generate the exact same uuid --- .../langchain_community/vectorstores/analyticdb.py | 2 +- libs/community/langchain_community/vectorstores/atlas.py | 2 +- .../community/langchain_community/vectorstores/bageldb.py | 2 +- libs/community/langchain_community/vectorstores/dingo.py | 4 ++-- .../langchain_community/vectorstores/hologres.py | 4 ++-- .../langchain_community/vectorstores/kinetica.py | 4 ++-- .../community/langchain_community/vectorstores/lantern.py | 2 +- .../langchain_community/vectorstores/pgembedding.py | 4 ++-- .../langchain_community/vectorstores/pgvector.py | 4 ++-- .../langchain_community/vectorstores/timescalevector.py | 8 ++++---- libs/community/langchain_community/vectorstores/vdms.py | 8 ++++---- 11 files changed, 22 insertions(+), 22 deletions(-) diff --git a/libs/community/langchain_community/vectorstores/analyticdb.py b/libs/community/langchain_community/vectorstores/analyticdb.py index 767a836a06..f1eda6f2c3 100644 --- a/libs/community/langchain_community/vectorstores/analyticdb.py +++ b/libs/community/langchain_community/vectorstores/analyticdb.py @@ -157,7 +157,7 @@ class AnalyticDB(VectorStore): List of ids from adding the texts into the vectorstore. """ if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] embeddings = self.embedding_function.embed_documents(list(texts)) diff --git a/libs/community/langchain_community/vectorstores/atlas.py b/libs/community/langchain_community/vectorstores/atlas.py index 88b2da8f7c..b5e6b3e789 100644 --- a/libs/community/langchain_community/vectorstores/atlas.py +++ b/libs/community/langchain_community/vectorstores/atlas.py @@ -119,7 +119,7 @@ class AtlasDB(VectorStore): texts = list(texts) if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] # Embedding upload case if self._embedding_function is not None: diff --git a/libs/community/langchain_community/vectorstores/bageldb.py b/libs/community/langchain_community/vectorstores/bageldb.py index a7b9ddc47d..d6c98fbea7 100644 --- a/libs/community/langchain_community/vectorstores/bageldb.py +++ b/libs/community/langchain_community/vectorstores/bageldb.py @@ -146,7 +146,7 @@ class Bagel(VectorStore): """ # creating unique ids if None if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] texts = list(texts) if self._embedding_function and embeddings is None and texts: diff --git a/libs/community/langchain_community/vectorstores/dingo.py b/libs/community/langchain_community/vectorstores/dingo.py index cf6e5b39f9..a21c308eb4 100644 --- a/libs/community/langchain_community/vectorstores/dingo.py +++ b/libs/community/langchain_community/vectorstores/dingo.py @@ -107,7 +107,7 @@ class Dingo(VectorStore): """ # Embed and create the documents - ids = ids or [str(uuid.uuid1().int)[:13] for _ in texts] + ids = ids or [str(uuid.uuid4().int)[:13] for _ in texts] metadatas_list = [] texts = list(texts) embeds = self._embedding.embed_documents(texts) @@ -347,7 +347,7 @@ class Dingo(VectorStore): # Embed and create the documents - ids = ids or [str(uuid.uuid1().int)[:13] for _ in texts] + ids = ids or [str(uuid.uuid4().int)[:13] for _ in texts] metadatas_list = [] texts = list(texts) embeds = embedding.embed_documents(texts) diff --git a/libs/community/langchain_community/vectorstores/hologres.py b/libs/community/langchain_community/vectorstores/hologres.py index b2572f40c3..84486dffd1 100644 --- a/libs/community/langchain_community/vectorstores/hologres.py +++ b/libs/community/langchain_community/vectorstores/hologres.py @@ -80,7 +80,7 @@ class Hologres(VectorStore): **kwargs: Any, ) -> Hologres: if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] if not metadatas: metadatas = [{} for _ in texts] @@ -141,7 +141,7 @@ class Hologres(VectorStore): List of ids from adding the texts into the vectorstore. """ if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] embeddings = self.embedding_function.embed_documents(list(texts)) diff --git a/libs/community/langchain_community/vectorstores/kinetica.py b/libs/community/langchain_community/vectorstores/kinetica.py index 24cd5dc3f5..c252568834 100644 --- a/libs/community/langchain_community/vectorstores/kinetica.py +++ b/libs/community/langchain_community/vectorstores/kinetica.py @@ -252,7 +252,7 @@ class Kinetica(VectorStore): Kinetica: An instance of Kinetica class """ if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] if not metadatas: metadatas = [{} for _ in texts] @@ -330,7 +330,7 @@ class Kinetica(VectorStore): kwargs: vectorstore specific parameters """ if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] if not metadatas: metadatas = [{} for _ in texts] diff --git a/libs/community/langchain_community/vectorstores/lantern.py b/libs/community/langchain_community/vectorstores/lantern.py index 643993bffb..75e4d012ae 100644 --- a/libs/community/langchain_community/vectorstores/lantern.py +++ b/libs/community/langchain_community/vectorstores/lantern.py @@ -441,7 +441,7 @@ class Lantern(VectorStore): - Useful for testing. """ if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] if not metadatas: metadatas = [{} for _ in texts] diff --git a/libs/community/langchain_community/vectorstores/pgembedding.py b/libs/community/langchain_community/vectorstores/pgembedding.py index 21d24b2f9f..48f04ffef3 100644 --- a/libs/community/langchain_community/vectorstores/pgembedding.py +++ b/libs/community/langchain_community/vectorstores/pgembedding.py @@ -237,7 +237,7 @@ class PGEmbedding(VectorStore): **kwargs: Any, ) -> PGEmbedding: if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] if not metadatas: metadatas = [{} for _ in texts] @@ -288,7 +288,7 @@ class PGEmbedding(VectorStore): **kwargs: Any, ) -> List[str]: if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] embeddings = self.embedding_function.embed_documents(list(texts)) diff --git a/libs/community/langchain_community/vectorstores/pgvector.py b/libs/community/langchain_community/vectorstores/pgvector.py index 40fdfe6550..2ef7c85323 100644 --- a/libs/community/langchain_community/vectorstores/pgvector.py +++ b/libs/community/langchain_community/vectorstores/pgvector.py @@ -471,7 +471,7 @@ class PGVector(VectorStore): **kwargs: Any, ) -> PGVector: if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] if not metadatas: metadatas = [{} for _ in texts] @@ -511,7 +511,7 @@ class PGVector(VectorStore): kwargs: vectorstore specific parameters """ if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] if not metadatas: metadatas = [{} for _ in texts] diff --git a/libs/community/langchain_community/vectorstores/timescalevector.py b/libs/community/langchain_community/vectorstores/timescalevector.py index caa03086bb..5e3e3c41ad 100644 --- a/libs/community/langchain_community/vectorstores/timescalevector.py +++ b/libs/community/langchain_community/vectorstores/timescalevector.py @@ -150,7 +150,7 @@ class TimescaleVector(VectorStore): num_dimensions = len(embeddings[0]) if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] if not metadatas: metadatas = [{} for _ in texts] @@ -191,7 +191,7 @@ class TimescaleVector(VectorStore): num_dimensions = len(embeddings[0]) if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] if not metadatas: metadatas = [{} for _ in texts] @@ -232,7 +232,7 @@ class TimescaleVector(VectorStore): kwargs: vectorstore specific parameters """ if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] if not metadatas: metadatas = [{} for _ in texts] @@ -259,7 +259,7 @@ class TimescaleVector(VectorStore): kwargs: vectorstore specific parameters """ if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] if not metadatas: metadatas = [{} for _ in texts] diff --git a/libs/community/langchain_community/vectorstores/vdms.py b/libs/community/langchain_community/vectorstores/vdms.py index 52de71201b..6c3bf4183e 100644 --- a/libs/community/langchain_community/vectorstores/vdms.py +++ b/libs/community/langchain_community/vectorstores/vdms.py @@ -255,7 +255,7 @@ class VDMS(VectorStore): metadatas = metadatas if metadatas is not None else [None for _ in texts] _len_check_if_sized(texts, metadatas, "texts", "metadatas") - ids = ids if ids is not None else [str(uuid.uuid1()) for _ in texts] + ids = ids if ids is not None else [str(uuid.uuid4()) for _ in texts] _len_check_if_sized(texts, ids, "texts", "ids") all_queries: List[Any] = [] @@ -535,7 +535,7 @@ class VDMS(VectorStore): metadatas.append({"image_path": uri}) # Populate IDs - ids = ids if ids is not None else [str(uuid.uuid1()) for _ in uris] + ids = ids if ids is not None else [str(uuid.uuid4()) for _ in uris] # Set embeddings embeddings = self._embed_image(uris=uris) @@ -577,7 +577,7 @@ class VDMS(VectorStore): texts = list(texts) if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] embeddings = self._embed_documents(texts) @@ -873,7 +873,7 @@ class VDMS(VectorStore): # **kwargs, ) if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] + ids = [str(uuid.uuid4()) for _ in texts] vdms_collection.add_texts( texts=texts, metadatas=metadatas,