Upgrade the version of AwaDB and add some new interfaces (#6565)

1. upgrade the version of AwaDB
2. add some new interfaces
3. fix bug of packing page content error

@dev2049  please review, thanks!

---------

Co-authored-by: vincent <awadb.vincent@gmail.com>
multi_strategy_parser
ljeagle 11 months ago committed by GitHub
parent 937a7e93f2
commit ca24dc2d5f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -2,6 +2,7 @@
from __future__ import annotations
import logging
import uuid
from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Tuple, Type
from langchain.docstore.document import Document
@ -48,10 +49,15 @@ class AwaDB(VectorStore):
else:
self.awadb_client = awadb.Client()
self.awadb_client.Create(table_name)
self.embedding_model = embedding_model
if table_name == self._DEFAULT_TABLE_NAME:
table_name += "_"
table_name += str(uuid.uuid4()).split("-")[-1]
self.added_doc_count = 0
self.awadb_client.Create(table_name)
self.table2embeddings: dict[str, Embeddings] = {}
if embedding_model is not None:
self.table2embeddings[table_name] = embedding_model
self.using_table_name = table_name
def add_texts(
self,
@ -74,16 +80,23 @@ class AwaDB(VectorStore):
raise ValueError("AwaDB client is None!!!")
embeddings = None
if self.embedding_model is not None:
embeddings = self.embedding_model.embed_documents(list(texts))
if self.using_table_name in self.table2embeddings:
embeddings = self.table2embeddings[self.using_table_name].embed_documents(
list(texts)
)
return self.awadb_client.AddTexts(
"text", "text_embedding", texts, embeddings, metadatas, is_duplicate_texts
"embedding_text",
"text_embedding",
texts,
embeddings,
metadatas,
is_duplicate_texts,
)
def load_local(
self,
table_name: str = _DEFAULT_TABLE_NAME,
table_name: str,
**kwargs: Any,
) -> bool:
if self.awadb_client is None:
@ -102,8 +115,8 @@ class AwaDB(VectorStore):
raise ValueError("AwaDB client is None!!!")
embedding = None
if self.embedding_model is not None:
embedding = self.embedding_model.embed_query(query)
if self.using_table_name in self.table2embeddings:
embedding = self.table2embeddings[self.using_table_name].embed_query(query)
else:
from awadb import llm_embedding
@ -127,21 +140,16 @@ class AwaDB(VectorStore):
raise ValueError("AwaDB client is None!!!")
embedding = None
if self.embedding_model is not None:
embedding = self.embedding_model.embed_query(query)
if self.using_table_name in self.table2embeddings:
embedding = self.table2embeddings[self.using_table_name].embed_query(query)
else:
from awadb import llm_embedding
llm = llm_embedding.LLMEmbedding()
embedding = llm.Embedding(query)
# show_results = self.awadb_client.Search(embedding, k)
results: List[Tuple[Document, float]] = []
# if show_results.__len__() == 0:
# return results
scores: List[float] = []
retrieval_docs = self.similarity_search_by_vector(embedding, k, scores)
@ -173,8 +181,8 @@ class AwaDB(VectorStore):
raise ValueError("AwaDB client is None!!!")
embedding = None
if self.embedding_model is not None:
embedding = self.embedding_model.embed_query(query)
if self.using_table_name in self.table2embeddings:
embedding = self.table2embeddings[self.using_table_name].embed_query(query)
show_results = self.awadb_client.Search(embedding, k)
@ -234,12 +242,15 @@ class AwaDB(VectorStore):
meta_data = {}
for item_key in item_detail:
if (
item_key == "Field@0" and self.embedding_model is not None
item_key == "Field@0"
and self.using_table_name in self.table2embeddings
): # text for the document
content = item_detail[item_key]
elif self.embedding_model is None and item_key == "embedding_text":
elif item_key == "embedding_text":
content = item_detail[item_key]
elif item_key == "Field@1": # embedding field for the document
elif (
item_key == "Field@1" or item_key == "text_embedding"
): # embedding field for the document
continue
elif item_key == "score": # L2 distance
if scores is not None:
@ -250,6 +261,57 @@ class AwaDB(VectorStore):
results.append(Document(page_content=content, metadata=meta_data))
return results
def create_table(
self,
table_name: str,
**kwargs: Any,
) -> bool:
"""Create a new table."""
if self.awadb_client is None:
return False
ret = self.awadb_client.Create(table_name)
if ret:
self.using_table_name = table_name
return ret
def use(
self,
table_name: str,
**kwargs: Any,
) -> bool:
"""Use the specified table. Don't know the tables, please invoke list_tables."""
if self.awadb_client is None:
return False
ret = self.awadb_client.Use(table_name)
if ret:
self.using_table_name = table_name
return ret
def list_tables(
self,
**kwargs: Any,
) -> List[str]:
"""List all the tables created by the client."""
if self.awadb_client is None:
return []
return self.awadb_client.ListAllTables()
def get_current_table(
self,
**kwargs: Any,
) -> str:
"""Get the current table."""
return self.using_table_name
@classmethod
def from_texts(
cls: Type[AwaDB],
@ -300,7 +362,7 @@ class AwaDB(VectorStore):
Args:
documents (List[Document]): List of documents to add to the vectorstore.
embedding (Optional[Embeddings]): Embedding function. Defaults to None.
table_name (str): Name of the collection to create.
table_name (str): Name of the table to create.
logging_and_data_dir (Optional[str]): Directory to persist the table.
client (Optional[awadb.Client]): AwaDB client

16
poetry.lock generated

@ -572,19 +572,19 @@ test = ["coverage (>=5,<6)", "pytest (>=6,<7)"]
[[package]]
name = "awadb"
version = "0.3.3"
version = "0.3.5"
description = "The AI Native database for embedding vectors"
category = "main"
optional = true
python-versions = ">=3.7"
files = [
{file = "awadb-0.3.3-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:daebc108103c8cace41dfb3235fcfdda28ea48e6cd6548b6072f7ad49b64274b"},
{file = "awadb-0.3.3-cp311-cp311-macosx_10_13_universal2.whl", hash = "sha256:2bb3ca2f943448060b1bba4395dd99e2218d7f2149507a8fdfa7a3fd4cfe97ec"},
{file = "awadb-0.3.3-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:7b99662af9f7b58e217661a70c295e40605900552bec6d8e9553d90dbf19c5c1"},
{file = "awadb-0.3.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:94be44e587f28fa26b2cade0b6f4c04689f50cb0c07183db5ee50e48fe2e9ae3"},
{file = "awadb-0.3.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:314929dc3a8d25c0f234a2b86c920543050f4eb298a6f68bd2c97c9fe3fb6224"},
{file = "awadb-0.3.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:8bfccff1c7373899153427d93d96a97ae5371e8a6f09ff4dcbd28fb9f3f63ff4"},
{file = "awadb-0.3.3-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:810021a90b873f668d8ab63e2c2747b2b2835bf0ae25f4223b6c94f06faffea4"},
{file = "awadb-0.3.5-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:9addae6f0cfd57cdb3e8915778772c51fc1c41f5cacebad1322c5bfe30c95516"},
{file = "awadb-0.3.5-cp311-cp311-macosx_10_13_universal2.whl", hash = "sha256:1b06099c4baf906829e4550f3cf0da602aba44465c89ede5889943619c0b49ce"},
{file = "awadb-0.3.5-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:dedbb62496aadb70894fa965922625ff3003397445f38d5da7f4092e17f93725"},
{file = "awadb-0.3.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:0f7eafcbe5311cc7b976faca368bc666a70f22ebd7f0039b5c5f791f2909377e"},
{file = "awadb-0.3.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:950ddb5c483ea1ce24550d32c79e4c40c10dfc73cafc6f6faa8b14f22271dce5"},
{file = "awadb-0.3.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:98b5d5a7a0d11253e23bec48295dc45356eead2338eac0d4f73a3755060992db"},
{file = "awadb-0.3.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:1c14d9014e7e0ccf8eaecc540e0c88893fcdd3a1438f71c110b9ec80b565dae6"},
]
[package.extras]

Loading…
Cancel
Save