mirror of
https://github.com/hwchase17/langchain
synced 2024-11-10 01:10:59 +00:00
123 lines
3.8 KiB
Plaintext
123 lines
3.8 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"collapsed": true,
|
|
"jupyter": {
|
|
"outputs_hidden": true
|
|
}
|
|
},
|
|
"source": [
|
|
"# Tencent Cloud VectorDB\n",
|
|
"\n",
|
|
">[Tencent Cloud VectorDB](https://cloud.tencent.com/document/product/1709) is a fully managed, self-developed, enterprise-level distributed database service designed for storing, retrieving, and analyzing multi-dimensional vector data. The database supports multiple index types and similarity calculation methods. A single index can support a vector scale of up to 1 billion and can support millions of QPS and millisecond-level query latency. Tencent Cloud Vector Database can not only provide an external knowledge base for large models to improve the accuracy of large model responses but can also be widely used in AI fields such as recommendation systems, NLP services, computer vision, and intelligent customer service.\n",
|
|
"\n",
|
|
"This notebook shows how to use functionality related to the Tencent vector database.\n",
|
|
"\n",
|
|
"To run, you should have a [Database instance.](https://cloud.tencent.com/document/product/1709/95101)."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"!pip3 install tcvectordb"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.embeddings.fake import FakeEmbeddings\n",
|
|
"from langchain.text_splitter import CharacterTextSplitter\n",
|
|
"from langchain.vectorstores import TencentVectorDB\n",
|
|
"from langchain.vectorstores.tencentvectordb import ConnectionParams\n",
|
|
"from langchain.document_loaders import TextLoader"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
|
"documents = loader.load()\n",
|
|
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
|
"docs = text_splitter.split_documents(documents)\n",
|
|
"embeddings = FakeEmbeddings(size=128)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"conn_params = ConnectionParams(url=\"http://10.0.X.X\", \n",
|
|
" key=\"eC4bLRy2va******************************\", \n",
|
|
" username=\"root\", \n",
|
|
" timeout=20)\n",
|
|
"\n",
|
|
"vector_db = TencentVectorDB.from_documents(\n",
|
|
" docs,\n",
|
|
" embeddings,\n",
|
|
" connection_params=conn_params,\n",
|
|
" # drop_old=True,\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
|
"docs = vector_db.similarity_search(query)\n",
|
|
"docs[0].page_content"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vector_db = TencentVectorDB(embeddings, conn_params)\n",
|
|
"\n",
|
|
"vector_db.add_texts([\"Ankush went to Princeton\"])\n",
|
|
"query = \"Where did Ankush go to college?\"\n",
|
|
"docs = vector_db.max_marginal_relevance_search(query)\n",
|
|
"docs[0].page_content"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.1"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|