From caa6caeb8abf2dcae85898574f70252b885e142c Mon Sep 17 00:00:00 2001 From: ljeagle Date: Fri, 28 Jul 2023 08:20:50 +0800 Subject: [PATCH] Upgrade the AwaDB from v0.3.7 to v0.3.9 and change the default embeddings (#8281) 1. Upgrade the AwaDB from v0.3.7 to v0.3.9 2. Change the default embedding to AwaEmbedding --------- Co-authored-by: ljeagle Co-authored-by: Harrison Chase --- .../langchain/langchain/vectorstores/awadb.py | 15 +++---- libs/langchain/poetry.lock | 41 +++++++++++-------- libs/langchain/pyproject.toml | 2 +- 3 files changed, 30 insertions(+), 28 deletions(-) diff --git a/libs/langchain/langchain/vectorstores/awadb.py b/libs/langchain/langchain/vectorstores/awadb.py index edcd73a07b..44317b9371 100644 --- a/libs/langchain/langchain/vectorstores/awadb.py +++ b/libs/langchain/langchain/vectorstores/awadb.py @@ -167,10 +167,9 @@ class AwaDB(VectorStore): if self.using_table_name in self.table2embeddings: embedding = self.table2embeddings[self.using_table_name].embed_query(query) else: - from awadb import llm_embedding + from awadb import AwaEmbedding - llm = llm_embedding.LLMEmbedding() - embedding = llm.Embedding(query) + embedding = AwaEmbedding().Embedding(query) not_include_fields: Set[str] = {"text_embedding", "_id", "score"} return self.similarity_search_by_vector( @@ -210,10 +209,9 @@ class AwaDB(VectorStore): if self.using_table_name in self.table2embeddings: embedding = self.table2embeddings[self.using_table_name].embed_query(query) else: - from awadb import llm_embedding + from awadb import AwaEmbedding - llm = llm_embedding.LLMEmbedding() - embedding = llm.Embedding(query) + embedding = AwaEmbedding().Embedding(query) results: List[Tuple[Document, float]] = [] @@ -333,10 +331,9 @@ class AwaDB(VectorStore): if self.using_table_name in self.table2embeddings: embedding = self.table2embeddings[self.using_table_name].embed_query(query) else: - from awadb import llm_embedding + from awadb import AwaEmbedding - llm = llm_embedding.LLMEmbedding() - embedding = llm.Embedding(query) + embedding = AwaEmbedding().Embedding(query) if embedding.__len__() == 0: return [] diff --git a/libs/langchain/poetry.lock b/libs/langchain/poetry.lock index 067702769d..55ae1975b6 100644 --- a/libs/langchain/poetry.lock +++ b/libs/langchain/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. +# This file is automatically @generated by Poetry and should not be changed by hand. [[package]] name = "absl-py" @@ -613,22 +613,27 @@ cryptography = ">=3.2" [[package]] name = "awadb" -version = "0.3.7" +version = "0.3.9" description = "AI Native database for embedding vectors" category = "main" optional = true python-versions = ">=3.7" files = [ - {file = "awadb-0.3.7-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:deec44f3687aad3ef13ba3fac3e2e4fd83c710a30194d228b3f520d2fb013542"}, - {file = "awadb-0.3.7-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:96e1e92e029f4b1000631bc06d6f845d554e4698851e515eafd35ff4f2b1994f"}, - {file = "awadb-0.3.7-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:46d44c4e8d97aaeb73234a2b9945b393b91dfaeae98e9fc6632ffb64bbc9e995"}, - {file = "awadb-0.3.7-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:d5e379ea2f9f44687edb99c1d35719d1bed116759f800d212d9561cef99736a3"}, - {file = "awadb-0.3.7-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:f2de28efa210035d278a55466023b44b8479254f3d5de69c944e6a3fcfb97879"}, - {file = "awadb-0.3.7-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:7a2a3f063a133c945e12ea0ab9d9c7ab038c8255dbd867067dba0a513557124b"}, - {file = "awadb-0.3.7-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:15a8f3349ea84bdfc9c096b3760cf00363eb1908e71728b4a1e3beecda763cd5"}, - {file = "awadb-0.3.7-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:3ac1ef7e1d1a591eb36a57ea65824e717f54fc521e4ae303d634e510817ba270"}, - {file = "awadb-0.3.7-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:947b6576a07e90cac31d8ff709cd0e0abc9779bc71276df817b2ffe18c1fa541"}, - {file = "awadb-0.3.7-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:e4014edade8134579993639a4a3b18ff20c8449fdfc5ff511b24617109be5df7"}, + {file = "awadb-0.3.9-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:8a3f88f9b4426f1c588752a5af89f2daa52b4faf5fe25046f9bfcaa8d8201298"}, + {file = "awadb-0.3.9-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:1d7fa3a75fcdd81d486a5254237234058d772cb70478c2f197ee9560a3596813"}, + {file = "awadb-0.3.9-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:8276b5ee5dddc48c2e111253231d64a12684bf10971c44dd5bc01ea737fdffdc"}, + {file = "awadb-0.3.9-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:9c770aa9f9f17852e8cb3e78e1d3677b2c4ffda817889cf23935b3147c94a013"}, + {file = "awadb-0.3.9-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:0abaf08b1accd5b58296a13143eab2f55c576c00d896b692f8a2846bfbb33be9"}, + {file = "awadb-0.3.9-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:ac6d34a6688b6daf412a859ffd85b5c18a30d25d267074dcdbc176a005fc5db2"}, + {file = "awadb-0.3.9-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:151ae39f9b74a032f86a4cde7399cb5c1bdd796e995de550ae3583ddd1e45884"}, + {file = "awadb-0.3.9-cp37-cp37m-macosx_10_13_x86_64.whl", hash = "sha256:1f62bf20c0824ad57711c4898d96b74d7f083cb129b11455f36e64850fe6a064"}, + {file = "awadb-0.3.9-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:3b78e0a3a87ae60242dc9d07eba12f7c455838e6c9afb22fc68c75b510f27614"}, + {file = "awadb-0.3.9-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:51ca452327011cf97aa6a7316e39c7cda5c537c5a8cbbbd28a5f0f1394499845"}, + {file = "awadb-0.3.9-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:5b816916b0d61aace79b0ed0cd0bdc4b35bfc5299a095ba6e2bb350d8f485860"}, + {file = "awadb-0.3.9-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:608472fcc694e298b4ff920e4e6da502d5314b6441637f608869dc6d4af627db"}, + {file = "awadb-0.3.9-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:50f1ab8cf92cd714ef57dd2c496b9e65003fb531295b8f99cd89b7575578d382"}, + {file = "awadb-0.3.9-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:911b78156cb3c0e723317c622a1d3cabcaea771f464200b30709bbb4143eb8a8"}, + {file = "awadb-0.3.9-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:330440be36f18e3f02cb645465618fd2a425b3260f2f645d7065d9f93857cf11"}, ] [package.extras] @@ -11274,7 +11279,7 @@ files = [ ] [package.dependencies] -accelerate = {version = ">=0.20.2", optional = true, markers = "extra == \"accelerate\" or extra == \"torch\""} +accelerate = {version = ">=0.20.2", optional = true, markers = "extra == \"accelerate\""} filelock = "*" huggingface-hub = ">=0.14.1,<1.0" numpy = ">=1.17" @@ -12531,15 +12536,15 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\ cffi = ["cffi (>=1.11)"] [extras] -all = ["O365", "aleph-alpha-client", "amadeus", "anthropic", "arxiv", "atlassian-python-api", "awadb", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-cosmos", "azure-identity", "beautifulsoup4", "clarifai", "clickhouse-connect", "cohere", "deeplake", "docarray", "duckduckgo-search", "elasticsearch", "esprima", "faiss-cpu", "google-api-python-client", "google-auth", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jina", "jinja2", "jq", "lancedb", "langkit", "lark", "libdeeplake", "lxml", "manifest-ml", "marqo", "momento", "nebula3-python", "neo4j", "networkx", "nlpcloud", "nltk", "nomic", "octoai-sdk", "openai", "openlm", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pymongo", "pyowm", "pypdf", "pytesseract", "python-arango", "pyvespa", "qdrant-client", "rdflib", "redis", "requests-toolbelt", "sentence-transformers", "singlestoredb", "spacy", "steamship", "tensorflow-text", "tigrisdb", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"] -azure = ["azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-core", "azure-cosmos", "azure-identity", "azure-search-documents", "openai"] +all = ["anthropic", "clarifai", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "marqo", "pymongo", "weaviate-client", "redis", "google-api-python-client", "google-auth", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "libdeeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "azure-cosmos", "lancedb", "langkit", "lark", "pexpect", "pyvespa", "O365", "jq", "docarray", "steamship", "pdfminer-six", "lxml", "requests-toolbelt", "neo4j", "openlm", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "momento", "singlestoredb", "tigrisdb", "nebula3-python", "awadb", "esprima", "octoai-sdk", "rdflib", "amadeus", "python-arango"] +azure = ["azure-identity", "azure-cosmos", "openai", "azure-core", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-search-documents"] clarifai = ["clarifai"] cohere = ["cohere"] docarray = ["docarray"] embeddings = ["sentence-transformers"] -extended-testing = ["atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "esprima", "geopandas", "gql", "html2text", "jinja2", "jq", "lxml", "mwparserfromhell", "mwxml", "openai", "openai", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "requests-toolbelt", "scikit-learn", "streamlit", "sympy", "telethon", "tqdm", "zep-python"] +extended-testing = ["beautifulsoup4", "bibtexparser", "cassio", "chardet", "esprima", "jq", "pdfminer-six", "pgvector", "pypdf", "pymupdf", "pypdfium2", "tqdm", "lxml", "atlassian-python-api", "mwparserfromhell", "mwxml", "pandas", "telethon", "psychicapi", "zep-python", "gql", "requests-toolbelt", "html2text", "py-trello", "scikit-learn", "streamlit", "pyspark", "openai", "sympy", "rapidfuzz", "openai", "rank-bm25", "geopandas", "jinja2"] javascript = ["esprima"] -llms = ["anthropic", "clarifai", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "openllm", "openlm", "torch", "transformers"] +llms = ["anthropic", "clarifai", "cohere", "openai", "openllm", "openlm", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"] openai = ["openai", "tiktoken"] qdrant = ["qdrant-client"] text-helpers = ["chardet"] @@ -12547,4 +12552,4 @@ text-helpers = ["chardet"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "dfd8a8fc0b896d75c92b268160bdd5bc87de1f997014c0f092fbc442b5c3f900" +content-hash = "922d5837b6c26c44290cad10c90fc424075d6772daa6d4aed38c13470653882e" diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml index 57bb94bc29..06fecf7327 100644 --- a/libs/langchain/pyproject.toml +++ b/libs/langchain/pyproject.toml @@ -110,7 +110,7 @@ tigrisdb = {version = "^1.0.0b6", optional = true} nebula3-python = {version = "^3.4.0", optional = true} mwparserfromhell = {version = "^0.6.4", optional = true} mwxml = {version = "^0.3.3", optional = true} -awadb = {version = "^0.3.3", optional = true} +awadb = {version = "^0.3.9", optional = true} azure-search-documents = {version = "11.4.0b6", optional = true} esprima = {version = "^4.0.1", optional = true} openllm = {version = ">=0.1.19", optional = true}