Harrison/relevancy score (#3907)

Co-authored-by: Ryan Grippeling <R.Grippeling@hotmail.com>
Co-authored-by: Ryan <ryan@webgrip.nl>
Co-authored-by: Zander Chase <130414180+vowelparrot@users.noreply.github.com>
fix_agent_callbacks
Harrison Chase 1 year ago committed by GitHub
parent c582f2e9e3
commit 13269fb583
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

1
.gitignore vendored

@ -1,3 +1,4 @@
.vs/
.vscode/
.idea/
# Byte-compiled / optimized / DLL files

@ -74,9 +74,10 @@ class TimeWeightedVectorStoreRetriever(BaseRetriever, BaseModel):
)
results = {}
for fetched_doc, relevance in docs_and_scores:
buffer_idx = fetched_doc.metadata["buffer_idx"]
doc = self.memory_stream[buffer_idx]
results[buffer_idx] = (doc, relevance)
if "buffer_idx" in fetched_doc.metadata:
buffer_idx = fetched_doc.metadata["buffer_idx"]
doc = self.memory_stream[buffer_idx]
results[buffer_idx] = (doc, relevance)
return results
def get_relevant_documents(self, query: str) -> List[Document]:

@ -81,6 +81,10 @@ def _redis_prefix(index_name: str) -> str:
return f"doc:{index_name}"
def _default_relevance_score(val: float) -> float:
return 1 - val
class Redis(VectorStore):
"""Wrapper around Redis vector database.
@ -108,6 +112,9 @@ class Redis(VectorStore):
content_key: str = "content",
metadata_key: str = "metadata",
vector_key: str = "content_vector",
relevance_score_fn: Optional[
Callable[[float], float]
] = _default_relevance_score,
**kwargs: Any,
):
"""Initialize with necessary components."""
@ -133,6 +140,7 @@ class Redis(VectorStore):
self.content_key = content_key
self.metadata_key = metadata_key
self.vector_key = vector_key
self.relevance_score_fn = relevance_score_fn
def _create_index(self, dim: int = 1536) -> None:
try:
@ -328,6 +336,24 @@ class Redis(VectorStore):
return docs
def _similarity_search_with_relevance_scores(
self,
query: str,
k: int = 4,
**kwargs: Any,
) -> List[Tuple[Document, float]]:
"""Return docs and relevance scores, normalized on a scale from 0 to 1.
0 is dissimilar, 1 is most similar.
"""
if self.relevance_score_fn is None:
raise ValueError(
"relevance_score_fn must be provided to"
" Weaviate constructor to normalize scores"
)
docs_and_scores = self.similarity_search_with_score(query, k=k)
return [(doc, self.relevance_score_fn(score)) for doc, score in docs_and_scores]
@classmethod
def from_texts(
cls: Type[Redis],

@ -1,7 +1,8 @@
"""Wrapper around weaviate vector database."""
from __future__ import annotations
from typing import Any, Dict, Iterable, List, Optional, Type
import datetime
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type
from uuid import uuid4
import numpy as np
@ -58,6 +59,10 @@ def _create_weaviate_client(**kwargs: Any) -> Any:
return client
def _default_score_normalizer(val: float) -> float:
return 1 - 1 / (1 + np.exp(val))
class Weaviate(VectorStore):
"""Wrapper around Weaviate vector database.
@ -80,6 +85,9 @@ class Weaviate(VectorStore):
text_key: str,
embedding: Optional[Embeddings] = None,
attributes: Optional[List[str]] = None,
relevance_score_fn: Optional[
Callable[[float], float]
] = _default_score_normalizer,
):
"""Initialize with Weaviate client."""
try:
@ -98,6 +106,7 @@ class Weaviate(VectorStore):
self._embedding = embedding
self._text_key = text_key
self._query_attrs = [self._text_key]
self._relevance_score_fn = relevance_score_fn
if attributes is not None:
self._query_attrs.extend(attributes)
@ -110,6 +119,11 @@ class Weaviate(VectorStore):
"""Upload texts with metadata (properties) to Weaviate."""
from weaviate.util import get_valid_uuid
def json_serializable(value: Any) -> Any:
if isinstance(value, datetime.datetime):
return value.isoformat()
return value
with self._client.batch as batch:
ids = []
for i, doc in enumerate(texts):
@ -118,7 +132,7 @@ class Weaviate(VectorStore):
}
if metadatas is not None:
for key in metadatas[i].keys():
data_properties[key] = metadatas[i][key]
data_properties[key] = json_serializable(metadatas[i][key])
_id = get_valid_uuid(uuid4())
@ -267,9 +281,57 @@ class Weaviate(VectorStore):
payload[idx].pop("_additional")
meta = payload[idx]
docs.append(Document(page_content=text, metadata=meta))
return docs
def similarity_search_with_score(
self, query: str, k: int = 4, **kwargs: Any
) -> List[Tuple[Document, float]]:
content: Dict[str, Any] = {"concepts": [query]}
if kwargs.get("search_distance"):
content["certainty"] = kwargs.get("search_distance")
query_obj = self._client.query.get(self._index_name, self._query_attrs)
result = (
query_obj.with_near_text(content)
.with_limit(k)
.with_additional("vector")
.do()
)
if "errors" in result:
raise ValueError(f"Error during query: {result['errors']}")
docs_and_scores = []
if self._embedding is None:
raise ValueError(
"_embedding cannot be None for similarity_search_with_score"
)
for res in result["data"]["Get"][self._index_name]:
text = res.pop(self._text_key)
score = np.dot(
res["_additional"]["vector"], self._embedding.embed_query(query)
)
docs_and_scores.append((Document(page_content=text, metadata=res), score))
return docs_and_scores
def _similarity_search_with_relevance_scores(
self,
query: str,
k: int = 4,
**kwargs: Any,
) -> List[Tuple[Document, float]]:
"""Return docs and relevance scores, normalized on a scale from 0 to 1.
0 is dissimilar, 1 is most similar.
"""
if self._relevance_score_fn is None:
raise ValueError(
"relevance_score_fn must be provided to"
" Weaviate constructor to normalize scores"
)
docs_and_scores = self.similarity_search_with_score(query, k=k)
return [
(doc, self._relevance_score_fn(score)) for doc, score in docs_and_scores
]
@classmethod
def from_texts(
cls: Type[Weaviate],

Loading…
Cancel
Save