mirror of
https://github.com/hwchase17/langchain
synced 2024-10-29 17:07:25 +00:00
0abe996409
--------- Co-authored-by: zhanghexian1 <zhanghexian1@jd.com> Co-authored-by: Bagatur <baskaryan@gmail.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
446 lines
34 KiB
Plaintext
446 lines
34 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/export/anaconda3/envs/vearch_cluster_langchain/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||
" from .autonotebook import tqdm as notebook_tqdm\n",
|
||
"Loading checkpoint shards: 100%|██████████| 7/7 [00:07<00:00, 1.01s/it]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"from langchain.document_loaders import TextLoader\n",
|
||
"from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n",
|
||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||
"from transformers import AutoModel, AutoTokenizer\n",
|
||
"from langchain.vectorstores.vearch import Vearch\n",
|
||
"\n",
|
||
"# repalce to your local model path\n",
|
||
"model_path =\"/data/zhx/zhx/langchain-ChatGLM_new/chatglm2-6b\" \n",
|
||
"\n",
|
||
"tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)\n",
|
||
"model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().cuda(0)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Human: 你好!\n",
|
||
"ChatGLM:你好👋!我是人工智能助手 ChatGLM2-6B,很高兴见到你,欢迎问我任何问题。\n",
|
||
"\n",
|
||
"Human: 你知道凌波微步吗,你知道都有谁学会了吗?\n",
|
||
"ChatGLM:凌波微步是一种步伐,最早出自《倚天屠龙记》。在电视剧《人民的名义》中,侯亮平也学会了凌波微步。\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"query = \"你好!\"\n",
|
||
"response, history = model.chat(tokenizer, query, history=[])\n",
|
||
"print(f\"Human: {query}\\nChatGLM:{response}\\n\")\n",
|
||
"query = \"你知道凌波微步吗,你知道都有谁学会了吗?\"\n",
|
||
"response, history = model.chat(tokenizer, query, history=history)\n",
|
||
"print(f\"Human: {query}\\nChatGLM:{response}\\n\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"No sentence-transformers model found with name /data/zhx/zhx/langchain-ChatGLM_new/text2vec/text2vec-large-chinese. Creating a new one with MEAN pooling.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Add your local knowledge files\n",
|
||
"file_path = \"/data/zhx/zhx/langchain-ChatGLM_new/knowledge_base/天龙八部/lingboweibu.txt\"#Your local file path\"\n",
|
||
"loader = TextLoader(file_path,encoding=\"utf-8\")\n",
|
||
"documents = loader.load()\n",
|
||
"\n",
|
||
"# split text into sentences and embedding the sentences\n",
|
||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n",
|
||
"texts = text_splitter.split_documents(documents)\n",
|
||
"\n",
|
||
"#replace to your model path\n",
|
||
"embedding_path = '/data/zhx/zhx/langchain-ChatGLM_new/text2vec/text2vec-large-chinese'\n",
|
||
"embeddings = HuggingFaceEmbeddings(model_name=embedding_path)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"docids ['18ce6747dca04a2c833e60e8dfd83c04', 'aafacb0e46574b378a9f433877ab06a8', '9776bccfdd8643a8b219ccee0596f370']\n",
|
||
"***************after is cluster res*****************\n",
|
||
"docids ['1841638988191686991', '-4519586577642625749', '5028230008472292907']\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"#first add your document into vearch vectorstore\n",
|
||
"vearch_standalone = Vearch.from_documents(\n",
|
||
" texts,embeddings,path_or_url=\"/data/zhx/zhx/langchain-ChatGLM_new/knowledge_base/localdb_new_test\",table_name=\"localdb_new_test\",flag=0)\n",
|
||
"\n",
|
||
"print(\"***************after is cluster res*****************\")\n",
|
||
"\n",
|
||
"vearch_cluster = Vearch.from_documents(\n",
|
||
" texts,embeddings,path_or_url=\"http://test-vearch-langchain-router.vectorbase.svc.ht1.n.jd.local\",db_name=\"vearch_cluster_langchian\",table_name=\"tobenumone\",flag=1)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"####################第1段相关文档####################\n",
|
||
"\n",
|
||
"午饭过后,段誉又练“凌波微步”,走一步,吸一口气,走第二步时将气呼出,六十四卦走完,四肢全无麻痹之感,料想呼吸顺畅,便无害处。第二次再走时连走两步吸一口气,再走两步始行呼出。这“凌波微步”是以动功修习内功,脚步踏遍六十四卦一个周天,内息自然而然地也转了一个周天。因此他每走一遍,内力便有一分进益。\n",
|
||
"\n",
|
||
"这般练了几天,“凌波微步”已走得颇为纯熟,不须再数呼吸,纵然疾行,气息也已无所窒滞。心意既畅,跨步时渐渐想到《洛神赋》中那些与“凌波微步”有关的句子:“仿佛兮若轻云之蔽月,飘飘兮若流风之回雪”,“竦轻躯以鹤立,若将飞而未翔”,“体迅飞凫,飘忽若神”,“动无常则,若危若安。进止难期,若往若还”。\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"百度简介\n",
|
||
"\n",
|
||
"凌波微步是「逍遥派」独门轻功身法,精妙异常。\n",
|
||
"\n",
|
||
"凌波微步乃是一门极上乘的轻功,所以列于卷轴之末,以易经八八六十四卦为基础,使用者按特定顺序踏着卦象方位行进,从第一步到最后一步正好行走一个大圈。此步法精妙异常,原是要待人练成「北冥神功」,吸人内力,自身内力已【颇为深厚】之后再练。\n",
|
||
"\n",
|
||
"####################第2段相关文档####################\n",
|
||
"\n",
|
||
"《天龙八部》第五回 微步縠纹生\n",
|
||
"\n",
|
||
"卷轴中此外诸种经脉修习之法甚多,皆是取人内力的法门,段誉虽自语宽解,总觉习之有违本性,单是贪多务得,便非好事,当下暂不理会。\n",
|
||
"\n",
|
||
"卷到卷轴末端,又见到了“凌波微步”那四字,登时便想起《洛神赋》中那些句子来:“凌波微步,罗袜生尘……转眄流精,光润玉颜。含辞未吐,气若幽兰。华容婀娜,令我忘餐。”曹子建那些千古名句,在脑海中缓缓流过:“秾纤得衷,修短合度,肩若削成,腰如约素。延颈秀项,皓质呈露。芳泽无加,铅华弗御。云髻峨峨,修眉连娟。丹唇外朗,皓齿内鲜。明眸善睐,靥辅承权。瑰姿艳逸,仪静体闲。柔情绰态,媚于语言……”这些句子用在木婉清身上,“这话倒也有理”;但如用之于神仙姊姊,只怕更为适合。想到神仙姊姊的姿容体态,“皎若太阳升朝霞,灼若芙蓉出绿波”,但觉依她吩咐行事,实为人生至乐,心想:“我先来练这‘凌波微步’,此乃逃命之妙法,非害人之手段也,练之有百利而无一害。”\n",
|
||
"\n",
|
||
"####################第3段相关文档####################\n",
|
||
"\n",
|
||
"《天龙八部》第二回 玉壁月华明\n",
|
||
"\n",
|
||
"再展帛卷,长卷上源源皆是裸女画像,或立或卧,或现前胸,或见后背。人像的面容都是一般,但或喜或愁,或含情凝眸,或轻嗔薄怒,神情各异。一共有三十六幅图像,每幅像上均有颜色细线,注明穴道部位及练功法诀。\n",
|
||
"\n",
|
||
"帛卷尽处题着“凌波微步”四字,其后绘的是无数足印,注明“妇妹”、“无妄”等等字样,尽是《易经》中的方位。段誉前几日还正全心全意地钻研《易经》,一见到这些名称,登时精神大振,便似遇到故交良友一般。只见足印密密麻麻,不知有几千百个,自一个足印至另一个足印均有绿线贯串,线上绘有箭头,最后写着一行字道:“步法神妙,保身避敌,待积内力,再取敌命。”\n",
|
||
"\n",
|
||
"段誉心道:“神仙姊姊所遗的步法,必定精妙之极,遇到强敌时脱身逃走,那就很好,‘再取敌命’也就不必了。”\n",
|
||
"卷好帛卷,对之作了两个揖,珍而重之地揣入怀中,转身对那玉像道:“神仙姊姊,你吩咐我朝午晚三次练功,段誉不敢有违。今后我对人加倍客气,别人不会来打我,我自然也不会去吸他内力。你这套‘凌波微步’我更要用心练熟,眼见不对,立刻溜之大吉,就吸不到他内力了。”至于“杀尽我逍遥派弟子”一节,却想也不敢去想。\n",
|
||
"\n",
|
||
"********ChatGLM:凌波微步是一门极上乘的轻功,源于《易经》八八六十四卦。使用者按照特定顺序踏着卦象方位行进,从第一步到最后一步正好行走一个大圈。这门轻功精妙异常,可以使人内力大为提升,但需在练成“北冥神功”后才能真正掌握。凌波微步在金庸先生的《天龙八部》中得到了充分的描写。\n",
|
||
"\n",
|
||
"***************************after is cluster res******************************\n",
|
||
"####################第1段相关文档####################\n",
|
||
"\n",
|
||
"午饭过后,段誉又练“凌波微步”,走一步,吸一口气,走第二步时将气呼出,六十四卦走完,四肢全无麻痹之感,料想呼吸顺畅,便无害处。第二次再走时连走两步吸一口气,再走两步始行呼出。这“凌波微步”是以动功修习内功,脚步踏遍六十四卦一个周天,内息自然而然地也转了一个周天。因此他每走一遍,内力便有一分进益。\n",
|
||
"\n",
|
||
"这般练了几天,“凌波微步”已走得颇为纯熟,不须再数呼吸,纵然疾行,气息也已无所窒滞。心意既畅,跨步时渐渐想到《洛神赋》中那些与“凌波微步”有关的句子:“仿佛兮若轻云之蔽月,飘飘兮若流风之回雪”,“竦轻躯以鹤立,若将飞而未翔”,“体迅飞凫,飘忽若神”,“动无常则,若危若安。进止难期,若往若还”。\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"百度简介\n",
|
||
"\n",
|
||
"凌波微步是「逍遥派」独门轻功身法,精妙异常。\n",
|
||
"\n",
|
||
"凌波微步乃是一门极上乘的轻功,所以列于卷轴之末,以易经八八六十四卦为基础,使用者按特定顺序踏着卦象方位行进,从第一步到最后一步正好行走一个大圈。此步法精妙异常,原是要待人练成「北冥神功」,吸人内力,自身内力已【颇为深厚】之后再练。\n",
|
||
"\n",
|
||
"####################第2段相关文档####################\n",
|
||
"\n",
|
||
"《天龙八部》第五回 微步縠纹生\n",
|
||
"\n",
|
||
"卷轴中此外诸种经脉修习之法甚多,皆是取人内力的法门,段誉虽自语宽解,总觉习之有违本性,单是贪多务得,便非好事,当下暂不理会。\n",
|
||
"\n",
|
||
"卷到卷轴末端,又见到了“凌波微步”那四字,登时便想起《洛神赋》中那些句子来:“凌波微步,罗袜生尘……转眄流精,光润玉颜。含辞未吐,气若幽兰。华容婀娜,令我忘餐。”曹子建那些千古名句,在脑海中缓缓流过:“秾纤得衷,修短合度,肩若削成,腰如约素。延颈秀项,皓质呈露。芳泽无加,铅华弗御。云髻峨峨,修眉连娟。丹唇外朗,皓齿内鲜。明眸善睐,靥辅承权。瑰姿艳逸,仪静体闲。柔情绰态,媚于语言……”这些句子用在木婉清身上,“这话倒也有理”;但如用之于神仙姊姊,只怕更为适合。想到神仙姊姊的姿容体态,“皎若太阳升朝霞,灼若芙蓉出绿波”,但觉依她吩咐行事,实为人生至乐,心想:“我先来练这‘凌波微步’,此乃逃命之妙法,非害人之手段也,练之有百利而无一害。”\n",
|
||
"\n",
|
||
"####################第3段相关文档####################\n",
|
||
"\n",
|
||
"《天龙八部》第二回 玉壁月华明\n",
|
||
"\n",
|
||
"再展帛卷,长卷上源源皆是裸女画像,或立或卧,或现前胸,或见后背。人像的面容都是一般,但或喜或愁,或含情凝眸,或轻嗔薄怒,神情各异。一共有三十六幅图像,每幅像上均有颜色细线,注明穴道部位及练功法诀。\n",
|
||
"\n",
|
||
"帛卷尽处题着“凌波微步”四字,其后绘的是无数足印,注明“妇妹”、“无妄”等等字样,尽是《易经》中的方位。段誉前几日还正全心全意地钻研《易经》,一见到这些名称,登时精神大振,便似遇到故交良友一般。只见足印密密麻麻,不知有几千百个,自一个足印至另一个足印均有绿线贯串,线上绘有箭头,最后写着一行字道:“步法神妙,保身避敌,待积内力,再取敌命。”\n",
|
||
"\n",
|
||
"段誉心道:“神仙姊姊所遗的步法,必定精妙之极,遇到强敌时脱身逃走,那就很好,‘再取敌命’也就不必了。”\n",
|
||
"卷好帛卷,对之作了两个揖,珍而重之地揣入怀中,转身对那玉像道:“神仙姊姊,你吩咐我朝午晚三次练功,段誉不敢有违。今后我对人加倍客气,别人不会来打我,我自然也不会去吸他内力。你这套‘凌波微步’我更要用心练熟,眼见不对,立刻溜之大吉,就吸不到他内力了。”至于“杀尽我逍遥派弟子”一节,却想也不敢去想。\n",
|
||
"\n",
|
||
"********ChatGLM:凌波微步是一门极上乘的轻功,源于《易经》中的六十四卦。使用者按照特定顺序踏着卦象方位行进,从第一步到最后一步正好行走一个大圈。这门轻功精妙异常,可以使人内力增进,但需要谨慎练习,避免伤害他人。凌波微步在逍遥派中尤为流行,但并非所有逍遥派弟子都会凌波微步。\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"query = \"你知道凌波微步吗,你知道都有谁会凌波微步?\"\n",
|
||
"vearch_standalone_res=vearch_standalone.similarity_search(query, 3)\n",
|
||
"for idx,tmp in enumerate(vearch_standalone_res): \n",
|
||
" print(f\"{'#'*20}第{idx+1}段相关文档{'#'*20}\\n\\n{tmp.page_content}\\n\")\n",
|
||
"\n",
|
||
"# combine your local knowleadge and query \n",
|
||
"context = \"\".join([tmp.page_content for tmp in vearch_standalone_res])\n",
|
||
"new_query = f\"基于以下信息,尽可能准确的来回答用户的问题。背景信息:\\n {context} \\n 回答用户这个问题:{query}\\n\\n\"\n",
|
||
"response, history = model.chat(tokenizer, new_query, history=[])\n",
|
||
"print(f\"********ChatGLM:{response}\\n\")\n",
|
||
"\n",
|
||
"print(\"***************************after is cluster res******************************\")\n",
|
||
"\n",
|
||
"query_c = \"你知道凌波微步吗,你知道都有谁会凌波微步?\"\n",
|
||
"cluster_res=vearch_cluster.similarity_search(query_c, 3)\n",
|
||
"for idx,tmp in enumerate(cluster_res): \n",
|
||
" print(f\"{'#'*20}第{idx+1}段相关文档{'#'*20}\\n\\n{tmp.page_content}\\n\")\n",
|
||
"\n",
|
||
"# combine your local knowleadge and query \n",
|
||
"context_c = \"\".join([tmp.page_content for tmp in cluster_res])\n",
|
||
"new_query_c = f\"基于以下信息,尽可能准确的来回答用户的问题。背景信息:\\n {context_c} \\n 回答用户这个问题:{query_c}\\n\\n\"\n",
|
||
"response_c, history_c = model.chat(tokenizer, new_query_c, history=[])\n",
|
||
"print(f\"********ChatGLM:{response_c}\\n\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Human: 你知道vearch是什么吗?\n",
|
||
"ChatGLM:是的,我知道 Vearch。Vearch 是一种用于计算机械系统极化子的工具,它可以用于模拟和优化电路的性能。它是一个基于Matlab的电路仿真软件,可以用于设计和分析各种类型的电路,包括交流电路和直流电路。\n",
|
||
"\n",
|
||
"docids ['eee5e7468434427eb49829374c1e8220', '2776754da8fc4bb58d3e482006010716', '9223acd6d89d4c2c84ff42677ac0d47c']\n",
|
||
"*****************after is cluster res********************\n",
|
||
"docids ['-4311783201092343475', '-2899734009733762895', '1342026762029067927']\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['-4311783201092343475', '-2899734009733762895', '1342026762029067927']"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"query = \"你知道vearch是什么吗?\"\n",
|
||
"response, history = model.chat(tokenizer, query, history=history)\n",
|
||
"print(f\"Human: {query}\\nChatGLM:{response}\\n\")\n",
|
||
"\n",
|
||
"vearch_info = [\"Vearch 是一款存储大语言模型数据的向量数据库,用于存储和快速搜索模型embedding后的向量,可用于基于个人知识库的大模型应用\",\n",
|
||
" \"Vearch 支持OpenAI, Llama, ChatGLM等模型,以及LangChain库\",\n",
|
||
" \"vearch 是基于C语言,go语言开发的,并提供python接口,可以直接通过pip安装\"]\n",
|
||
"vearch_source=[{'source': '/data/zhx/zhx/langchain-ChatGLM_new/knowledge_base/tlbb/three_body.txt'},{'source': '/data/zhx/zhx/langchain-ChatGLM_new/knowledge_base/tlbb/three_body.txt'},{'source': '/data/zhx/zhx/langchain-ChatGLM_new/knowledge_base/tlbb/three_body.txt'}]\n",
|
||
"vearch_standalone.add_texts(vearch_info,vearch_source)\n",
|
||
"\n",
|
||
"print(\"*****************after is cluster res********************\")\n",
|
||
"\n",
|
||
"vearch_cluster.add_texts(vearch_info,vearch_source)\n",
|
||
"\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"####################第1段相关文档####################\n",
|
||
"\n",
|
||
"Vearch 是一款存储大语言模型数据的向量数据库,用于存储和快速搜索模型embedding后的向量,可用于基于个人知识库的大模型应用\n",
|
||
"\n",
|
||
"####################第2段相关文档####################\n",
|
||
"\n",
|
||
"Vearch 支持OpenAI, Llama, ChatGLM等模型,以及LangChain库\n",
|
||
"\n",
|
||
"####################第3段相关文档####################\n",
|
||
"\n",
|
||
"vearch 是基于C语言,go语言开发的,并提供python接口,可以直接通过pip安装\n",
|
||
"\n",
|
||
"***************ChatGLM:是的,Varch是一个向量数据库,旨在存储和快速搜索模型embedding后的向量。它支持OpenAI、ChatGLM等模型,并可直接通过pip安装。\n",
|
||
"\n",
|
||
"***************after is cluster res******************\n",
|
||
"####################第1段相关文档####################\n",
|
||
"\n",
|
||
"Vearch 是一款存储大语言模型数据的向量数据库,用于存储和快速搜索模型embedding后的向量,可用于基于个人知识库的大模型应用\n",
|
||
"\n",
|
||
"####################第2段相关文档####################\n",
|
||
"\n",
|
||
"Vearch 支持OpenAI, Llama, ChatGLM等模型,以及LangChain库\n",
|
||
"\n",
|
||
"####################第3段相关文档####################\n",
|
||
"\n",
|
||
"vearch 是基于C语言,go语言开发的,并提供python接口,可以直接通过pip安装\n",
|
||
"\n",
|
||
"***************ChatGLM:是的,Varch是一个向量数据库,旨在存储和快速搜索模型embedding后的向量。它支持OpenAI,ChatGLM等模型,并可用于基于个人知识库的大模型应用。Varch基于C语言和Go语言开发,并提供Python接口,可以通过pip安装。\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"query3 = \"你知道vearch是什么吗?\"\n",
|
||
"res1 = vearch_standalone.similarity_search(query3, 3)\n",
|
||
"for idx,tmp in enumerate(res1): \n",
|
||
" print(f\"{'#'*20}第{idx+1}段相关文档{'#'*20}\\n\\n{tmp.page_content}\\n\")\n",
|
||
"\n",
|
||
"context1 = \"\".join([tmp.page_content for tmp in res1])\n",
|
||
"new_query1 = f\"基于以下信息,尽可能准确的来回答用户的问题。背景信息:\\n {context1} \\n 回答用户这个问题:{query3}\\n\\n\"\n",
|
||
"response, history = model.chat(tokenizer, new_query1, history=[])\n",
|
||
"print(f\"***************ChatGLM:{response}\\n\")\n",
|
||
"\n",
|
||
"print(\"***************after is cluster res******************\")\n",
|
||
"\n",
|
||
"query3_c = \"你知道vearch是什么吗?\"\n",
|
||
"res1_c = vearch_standalone.similarity_search(query3_c, 3)\n",
|
||
"for idx,tmp in enumerate(res1_c): \n",
|
||
" print(f\"{'#'*20}第{idx+1}段相关文档{'#'*20}\\n\\n{tmp.page_content}\\n\")\n",
|
||
"\n",
|
||
"context1_C = \"\".join([tmp.page_content for tmp in res1_c])\n",
|
||
"new_query1_c = f\"基于以下信息,尽可能准确的来回答用户的问题。背景信息:\\n {context1_C} \\n 回答用户这个问题:{query3_c}\\n\\n\"\n",
|
||
"response_c, history_c = model.chat(tokenizer, new_query1_c, history=[])\n",
|
||
"\n",
|
||
"print(f\"***************ChatGLM:{response_c}\\n\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"delete vearch standalone docid True\n",
|
||
"Human: 你知道vearch是什么吗?\n",
|
||
"ChatGLM:Vearch是一种用于处理向量的库,可以轻松地将向量转换为矩阵,并提供许多有用的函数和算法,以操作向量。 Vearch支持许多常见的向量操作,例如加法、减法、乘法、除法、矩阵乘法、求和、统计和归一化等。 Vearch还提供了一些高级功能,例如L2正则化、协方差矩阵、稀疏矩阵和奇异值分解等。\n",
|
||
"\n",
|
||
"delete vearch cluster docid True\n",
|
||
"Human: 你知道vearch是什么吗?\n",
|
||
"ChatGLM:Vearch是一种用于处理向量数据的函数,可以应用于多种不同的编程语言和数据结构中。\n",
|
||
"\n",
|
||
"Vearch最初是作为Java中一个名为“vearch”的包而出现的,它的目的是提供一种高效的向量数据结构。它支持向量的多态性,可以轻松地实现不同类型的向量之间的转换,同时还支持向量的压缩和反向操作等操作。\n",
|
||
"\n",
|
||
"后来,Vearch被广泛应用于其他编程语言中,如Python、Ruby、JavaScript等。在Python中,它被称为“vectorize”,在Ruby中,它被称为“Vector”。\n",
|
||
"\n",
|
||
"Vearch的主要优点是它的向量操作具有多态性,可以应用于不同类型的向量数据,同时还支持高效的向量操作和反向操作,因此可以提高程序的性能。\n",
|
||
"\n",
|
||
"after delete docid to query again: {}\n",
|
||
"get existed docid {'18ce6747dca04a2c833e60e8dfd83c04': Document(page_content='《天龙八部》第二回 玉壁月华明\\n\\n再展帛卷,长卷上源源皆是裸女画像,或立或卧,或现前胸,或见后背。人像的面容都是一般,但或喜或愁,或含情凝眸,或轻嗔薄怒,神情各异。一共有三十六幅图像,每幅像上均有颜色细线,注明穴道部位及练功法诀。\\n\\n帛卷尽处题着“凌波微步”四字,其后绘的是无数足印,注明“妇妹”、“无妄”等等字样,尽是《易经》中的方位。段誉前几日还正全心全意地钻研《易经》,一见到这些名称,登时精神大振,便似遇到故交良友一般。只见足印密密麻麻,不知有几千百个,自一个足印至另一个足印均有绿线贯串,线上绘有箭头,最后写着一行字道:“步法神妙,保身避敌,待积内力,再取敌命。”\\n\\n段誉心道:“神仙姊姊所遗的步法,必定精妙之极,遇到强敌时脱身逃走,那就很好,‘再取敌命’也就不必了。”\\n卷好帛卷,对之作了两个揖,珍而重之地揣入怀中,转身对那玉像道:“神仙姊姊,你吩咐我朝午晚三次练功,段誉不敢有违。今后我对人加倍客气,别人不会来打我,我自然也不会去吸他内力。你这套‘凌波微步’我更要用心练熟,眼见不对,立刻溜之大吉,就吸不到他内力了。”至于“杀尽我逍遥派弟子”一节,却想也不敢去想。', metadata={'source': '/data/zhx/zhx/langchain-ChatGLM_new/knowledge_base/天龙八部/lingboweibu.txt'}), 'aafacb0e46574b378a9f433877ab06a8': Document(page_content='《天龙八部》第五回 微步縠纹生\\n\\n卷轴中此外诸种经脉修习之法甚多,皆是取人内力的法门,段誉虽自语宽解,总觉习之有违本性,单是贪多务得,便非好事,当下暂不理会。\\n\\n卷到卷轴末端,又见到了“凌波微步”那四字,登时便想起《洛神赋》中那些句子来:“凌波微步,罗袜生尘……转眄流精,光润玉颜。含辞未吐,气若幽兰。华容婀娜,令我忘餐。”曹子建那些千古名句,在脑海中缓缓流过:“秾纤得衷,修短合度,肩若削成,腰如约素。延颈秀项,皓质呈露。芳泽无加,铅华弗御。云髻峨峨,修眉连娟。丹唇外朗,皓齿内鲜。明眸善睐,靥辅承权。瑰姿艳逸,仪静体闲。柔情绰态,媚于语言……”这些句子用在木婉清身上,“这话倒也有理”;但如用之于神仙姊姊,只怕更为适合。想到神仙姊姊的姿容体态,“皎若太阳升朝霞,灼若芙蓉出绿波”,但觉依她吩咐行事,实为人生至乐,心想:“我先来练这‘凌波微步’,此乃逃命之妙法,非害人之手段也,练之有百利而无一害。”', metadata={'source': '/data/zhx/zhx/langchain-ChatGLM_new/knowledge_base/天龙八部/lingboweibu.txt'}), '9776bccfdd8643a8b219ccee0596f370': Document(page_content='午饭过后,段誉又练“凌波微步”,走一步,吸一口气,走第二步时将气呼出,六十四卦走完,四肢全无麻痹之感,料想呼吸顺畅,便无害处。第二次再走时连走两步吸一口气,再走两步始行呼出。这“凌波微步”是以动功修习内功,脚步踏遍六十四卦一个周天,内息自然而然地也转了一个周天。因此他每走一遍,内力便有一分进益。\\n\\n这般练了几天,“凌波微步”已走得颇为纯熟,不须再数呼吸,纵然疾行,气息也已无所窒滞。心意既畅,跨步时渐渐想到《洛神赋》中那些与“凌波微步”有关的句子:“仿佛兮若轻云之蔽月,飘飘兮若流风之回雪”,“竦轻躯以鹤立,若将飞而未翔”,“体迅飞凫,飘忽若神”,“动无常则,若危若安。进止难期,若往若还”。\\n\\n\\n\\n百度简介\\n\\n凌波微步是「逍遥派」独门轻功身法,精妙异常。\\n\\n凌波微步乃是一门极上乘的轻功,所以列于卷轴之末,以易经八八六十四卦为基础,使用者按特定顺序踏着卦象方位行进,从第一步到最后一步正好行走一个大圈。此步法精妙异常,原是要待人练成「北冥神功」,吸人内力,自身内力已【颇为深厚】之后再练。', metadata={'source': '/data/zhx/zhx/langchain-ChatGLM_new/knowledge_base/天龙八部/lingboweibu.txt'})}\n",
|
||
"after delete docid to query again: {}\n",
|
||
"get existed docid {'1841638988191686991': Document(page_content='《天龙八部》第二回 玉壁月华明\\n\\n再展帛卷,长卷上源源皆是裸女画像,或立或卧,或现前胸,或见后背。人像的面容都是一般,但或喜或愁,或含情凝眸,或轻嗔薄怒,神情各异。一共有三十六幅图像,每幅像上均有颜色细线,注明穴道部位及练功法诀。\\n\\n帛卷尽处题着“凌波微步”四字,其后绘的是无数足印,注明“妇妹”、“无妄”等等字样,尽是《易经》中的方位。段誉前几日还正全心全意地钻研《易经》,一见到这些名称,登时精神大振,便似遇到故交良友一般。只见足印密密麻麻,不知有几千百个,自一个足印至另一个足印均有绿线贯串,线上绘有箭头,最后写着一行字道:“步法神妙,保身避敌,待积内力,再取敌命。”\\n\\n段誉心道:“神仙姊姊所遗的步法,必定精妙之极,遇到强敌时脱身逃走,那就很好,‘再取敌命’也就不必了。”\\n卷好帛卷,对之作了两个揖,珍而重之地揣入怀中,转身对那玉像道:“神仙姊姊,你吩咐我朝午晚三次练功,段誉不敢有违。今后我对人加倍客气,别人不会来打我,我自然也不会去吸他内力。你这套‘凌波微步’我更要用心练熟,眼见不对,立刻溜之大吉,就吸不到他内力了。”至于“杀尽我逍遥派弟子”一节,却想也不敢去想。', metadata={'source': '/data/zhx/zhx/langchain-ChatGLM_new/knowledge_base/天龙八部/lingboweibu.txt'}), '-4519586577642625749': Document(page_content='《天龙八部》第五回 微步縠纹生\\n\\n卷轴中此外诸种经脉修习之法甚多,皆是取人内力的法门,段誉虽自语宽解,总觉习之有违本性,单是贪多务得,便非好事,当下暂不理会。\\n\\n卷到卷轴末端,又见到了“凌波微步”那四字,登时便想起《洛神赋》中那些句子来:“凌波微步,罗袜生尘……转眄流精,光润玉颜。含辞未吐,气若幽兰。华容婀娜,令我忘餐。”曹子建那些千古名句,在脑海中缓缓流过:“秾纤得衷,修短合度,肩若削成,腰如约素。延颈秀项,皓质呈露。芳泽无加,铅华弗御。云髻峨峨,修眉连娟。丹唇外朗,皓齿内鲜。明眸善睐,靥辅承权。瑰姿艳逸,仪静体闲。柔情绰态,媚于语言……”这些句子用在木婉清身上,“这话倒也有理”;但如用之于神仙姊姊,只怕更为适合。想到神仙姊姊的姿容体态,“皎若太阳升朝霞,灼若芙蓉出绿波”,但觉依她吩咐行事,实为人生至乐,心想:“我先来练这‘凌波微步’,此乃逃命之妙法,非害人之手段也,练之有百利而无一害。”', metadata={'source': '/data/zhx/zhx/langchain-ChatGLM_new/knowledge_base/天龙八部/lingboweibu.txt'}), '5028230008472292907': Document(page_content='午饭过后,段誉又练“凌波微步”,走一步,吸一口气,走第二步时将气呼出,六十四卦走完,四肢全无麻痹之感,料想呼吸顺畅,便无害处。第二次再走时连走两步吸一口气,再走两步始行呼出。这“凌波微步”是以动功修习内功,脚步踏遍六十四卦一个周天,内息自然而然地也转了一个周天。因此他每走一遍,内力便有一分进益。\\n\\n这般练了几天,“凌波微步”已走得颇为纯熟,不须再数呼吸,纵然疾行,气息也已无所窒滞。心意既畅,跨步时渐渐想到《洛神赋》中那些与“凌波微步”有关的句子:“仿佛兮若轻云之蔽月,飘飘兮若流风之回雪”,“竦轻躯以鹤立,若将飞而未翔”,“体迅飞凫,飘忽若神”,“动无常则,若危若安。进止难期,若往若还”。\\n\\n\\n\\n百度简介\\n\\n凌波微步是「逍遥派」独门轻功身法,精妙异常。\\n\\n凌波微步乃是一门极上乘的轻功,所以列于卷轴之末,以易经八八六十四卦为基础,使用者按特定顺序踏着卦象方位行进,从第一步到最后一步正好行走一个大圈。此步法精妙异常,原是要待人练成「北冥神功」,吸人内力,自身内力已【颇为深厚】之后再练。', metadata={'source': '/data/zhx/zhx/langchain-ChatGLM_new/knowledge_base/天龙八部/lingboweibu.txt'})}\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"##delete and get function need to maintian docids \n",
|
||
"##your docid\n",
|
||
"\n",
|
||
"res_d=vearch_standalone.delete(['eee5e7468434427eb49829374c1e8220', '2776754da8fc4bb58d3e482006010716', '9223acd6d89d4c2c84ff42677ac0d47c'])\n",
|
||
"print(\"delete vearch standalone docid\",res_d)\n",
|
||
"query = \"你知道vearch是什么吗?\"\n",
|
||
"response, history = model.chat(tokenizer, query, history=[])\n",
|
||
"print(f\"Human: {query}\\nChatGLM:{response}\\n\")\n",
|
||
"\n",
|
||
"res_cluster=vearch_cluster.delete(['-4311783201092343475', '-2899734009733762895', '1342026762029067927'])\n",
|
||
"print(\"delete vearch cluster docid\",res_cluster)\n",
|
||
"query_c = \"你知道vearch是什么吗?\"\n",
|
||
"response_c, history = model.chat(tokenizer, query_c, history=[])\n",
|
||
"print(f\"Human: {query}\\nChatGLM:{response_c}\\n\")\n",
|
||
"\n",
|
||
"\n",
|
||
"get_delet_doc=vearch_standalone.get(['eee5e7468434427eb49829374c1e8220', '2776754da8fc4bb58d3e482006010716', '9223acd6d89d4c2c84ff42677ac0d47c'])\n",
|
||
"print(\"after delete docid to query again:\",get_delet_doc)\n",
|
||
"get_id_doc=vearch_standalone.get(['18ce6747dca04a2c833e60e8dfd83c04', 'aafacb0e46574b378a9f433877ab06a8', '9776bccfdd8643a8b219ccee0596f370','9223acd6d89d4c2c84ff42677ac0d47c'])\n",
|
||
"print(\"get existed docid\",get_id_doc)\n",
|
||
"\n",
|
||
"get_delet_doc=vearch_cluster.get(['-4311783201092343475', '-2899734009733762895', '1342026762029067927'])\n",
|
||
"print(\"after delete docid to query again:\",get_delet_doc)\n",
|
||
"get_id_doc=vearch_cluster.get(['1841638988191686991', '-4519586577642625749', '5028230008472292907','1342026762029067927'])\n",
|
||
"print(\"get existed docid\",get_id_doc)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3.10.13 ('vearch_cluster_langchain')",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.13"
|
||
},
|
||
"orig_nbformat": 4,
|
||
"vscode": {
|
||
"interpreter": {
|
||
"hash": "f1da10a89896267ed34b497c9568817f36cc7ea79826b5cfca4d96376f5b4835"
|
||
}
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|