From e173e032bcceae3a7d3bb400c34d554f04be14ca Mon Sep 17 00:00:00 2001 From: mbchang Date: Mon, 22 May 2023 15:47:03 -0700 Subject: [PATCH] fix: assign current_time to datetime.now() if current_time is None (#5045) # Assign `current_time` to `datetime.now()` if it `current_time is None` in `time_weighted_retriever` Fixes #4825 As implemented, `add_documents` in `TimeWeightedVectorStoreRetriever` assigns `doc.metadata["last_accessed_at"]` and `doc.metadata["created_at"]` to `datetime.datetime.now()` if `current_time` is not in `kwargs`. ```python def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]: """Add documents to vectorstore.""" current_time = kwargs.get("current_time", datetime.datetime.now()) # Avoid mutating input documents dup_docs = [deepcopy(d) for d in documents] for i, doc in enumerate(dup_docs): if "last_accessed_at" not in doc.metadata: doc.metadata["last_accessed_at"] = current_time if "created_at" not in doc.metadata: doc.metadata["created_at"] = current_time doc.metadata["buffer_idx"] = len(self.memory_stream) + i self.memory_stream.extend(dup_docs) return self.vectorstore.add_documents(dup_docs, **kwargs) ``` However, from the way `add_documents` is being called from `GenerativeAgentMemory`, `current_time` is set as a `kwarg`, but it is given a value of `None`: ```python def add_memory( self, memory_content: str, now: Optional[datetime] = None ) -> List[str]: """Add an observation or memory to the agent's memory.""" importance_score = self._score_memory_importance(memory_content) self.aggregate_importance += importance_score document = Document( page_content=memory_content, metadata={"importance": importance_score} ) result = self.memory_retriever.add_documents([document], current_time=now) ``` The default of `now` was set in #4658 to be None. The proposed fix is the following: ```python def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]: """Add documents to vectorstore.""" current_time = kwargs.get("current_time", datetime.datetime.now()) # `current_time` may exist in kwargs, but may still have the value of None. if current_time is None: current_time = datetime.datetime.now() ``` Alternatively, we could just set the default of `now` to be `datetime.datetime.now()` everywhere instead. Thoughts @hwchase17? If we still want to keep the default to be `None`, then this PR should fix the above issue. If we want to set the default to be `datetime.datetime.now()` instead, I can update this PR with that alternative fix. EDIT: seems like from #5018 it looks like we would prefer to keep the default to be `None`, in which case this PR should fix the error. --- langchain/retrievers/time_weighted_retriever.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/langchain/retrievers/time_weighted_retriever.py b/langchain/retrievers/time_weighted_retriever.py index bd2d9cec..f715337b 100644 --- a/langchain/retrievers/time_weighted_retriever.py +++ b/langchain/retrievers/time_weighted_retriever.py @@ -109,7 +109,9 @@ class TimeWeightedVectorStoreRetriever(BaseRetriever, BaseModel): def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]: """Add documents to vectorstore.""" - current_time = kwargs.get("current_time", datetime.datetime.now()) + current_time = kwargs.get("current_time") + if current_time is None: + current_time = datetime.datetime.now() # Avoid mutating input documents dup_docs = [deepcopy(d) for d in documents] for i, doc in enumerate(dup_docs):