Merge pull request #13 from sean1832/Major_Dev

Major dev 1.0.0
1 year ago · 39e0ee9676
parent 67fba77473 75fbee382c
commit 39e0ee9676
15 changed files with 320 additions and 193 deletions
--- a/.core/manifest.json
+++ b/.core/manifest.json
@ -1,6 +1,6 @@
 {
  "name": "GPT-Brain",
-  "version": "0.1.1",
+  "version": "1.0.0",
  "license": "MIT",
  "author": "Zeke Zhang",
  "homepage": "https://github.com/sean1832/GPT-Brain",
--- a/Documentation/README_CN.md
+++ b/Documentation/README_CN.md
@ -9,8 +9,15 @@

 *💡本人并非专业程序猿，并且是一个python小白，此项目可能会出现各种bug。如果你遇到bug，请在[问题栏](https://github.com/sean1832/GPT-Brain/issues)里提出，我会尽可能的进行修补。*

+### 简介
+本程序利用[GPT-3](https://platform.openai.com/docs/models/gpt-3)和[3.5](https://platform.openai.com/docs/models/gpt-3-5)的能力，提供对原子笔记内容的概括，以及针对笔记的特定内容的回答。
+该程序扫描指定目录（通常是包含多个笔记的vault），并将所有笔记的内容附加到单个文件中。
+该文件随后用作用户查询的上下文。程序能够识别笔记内容之间的关系，并生成一个精炼的回答，概括关键要点。
+
+尽管该程序与使用markdown或txt的其他笔记软件兼容，但它主要是针对[Obsidian](https://obsidian.md/)设计的。
+
 ### 功能
- [x] 使用 [OpenAI GPT-3](https://platform.openai.com/docs/models/gpt-3) 生成回答。
+- [x] 使用 [OpenAI GPT-3](https://platform.openai.com/docs/models/gpt-3)和 [GPT-3.5 (ChatGPT)](https://platform.openai.com/docs/models/gpt-3-5) 生成回答。
 - [x] 使用 [OpenAI embedding](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings) 对笔记内容和问题进行对称比较，以增强搜索效果。
 - [x] 可配置prompt。
 - [x] 可个性化的个人背景信息以获得更贴切的答案。
@ -23,13 +30,9 @@
 - [x] 基本和高级参数滑块，以便于调整OpenAI语言模型配置。

 ### 未来计划
- [x] ~~batch脚本更新库。~~
- [x] ~~版本控制。~~
- [x] ~~参数提示。~~
- [x] ~~支持多语言UI~~。
- [x] ~~支持多语言检索。~~
- [ ] 提供详细操作指南。
- [ ] 发布windows版本。
+- [ ] 支持PDF笔记格式。
+- [ ] 支持PDF OCR扫描。
+- [ ] 支持Word文档格式。

 ## 安装
 ### 1. 所需条件
--- a/Documentation/README_JP.md
+++ b/Documentation/README_JP.md
@ -9,8 +9,19 @@

 *💡私はプロのプログラマーではなく、Pythonにもかなり慣れていないため、このプロジェクトにはバグが含まれているかもしれません。もし何か問題があれば、[Issues section](https://github.com/sean1832/GPT-Brain/issues)で提案してください。*

+### 紹介
+このプログラムは、[GPT-3](https://platform.openai.com/docs/models/gpt-3)と[3.5](https://platform.openai.com/docs/models/gpt-3-5)の力を活用して、原子的なノートの内容の要約と、
+特定のノートに関連する質問に回答することを提供します。
+プログラムは、通常、複数のノートを含むvaultとして指定されたディレクトリをスキャンし、
+すべてのノートの内容を単一のファイルに追加します。
+このファイルは、ユーザーのクエリの文脈として機能します。プログラムは、ノートの内容の関係を識別し、
+主要なポイントを要約する洗練された応答を生成できます。
+
+このプログラムは、markdownまたはtxtを使用する他のノート取りソフトウェアでも互換性がありますが、
+主に[Obsidian](https://obsidian.md/)を想定して設計されています。
+
 ### フィーチャー
- [x] [OpenAI GPT-3](https://platform.openai.com/docs/models/gpt-3)を使って、レスポンスを生成します。
+- [x] [OpenAI GPT-3](https://platform.openai.com/docs/models/gpt-3)と[GPT-3.5 (ChatGPT)](https://platform.openai.com/docs/models/gpt-3-5)を使って、レスポンスを生成します。
 - [x] [OpenAIエンベッディング](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings)を使用して、質問とノートの内容を意味的に比較し、検索を強化します。
 - [x] 設定可能なプロンプト。
 - [x] より正確な回答を得るために、個人の背景情報をカスタマイズすることができます。
@ -23,13 +34,9 @@
 - [x] OpenAI言語モデルの構成に対する基本的および高度なパラメータースライダー。。

 ### Todo
- [x] ~~ライブラリの更新を行うバッチスクリプト。~~
- [x]　~~バージョニング。~~
- [x] ~~パラメータに関するヒント。~~
- [x] ~~多言語UI。~~
- [x] ~~多言語検索に対応。~~
- [ ] ユーザー向けの詳細なドキュメントを提供する。
- [ ] Windows用をリリース。
+- [ ] PDFサポート。
+- [ ] PDF OCRスキャンをサポート。
+- [ ] Word文書をサポート。

 ## 設置
 ### 1. 必要なもの
--- a/GPT/init.py
+++ b/GPT/init.py
@ -1,3 +1,3 @@
 from GPT import query
-from GPT import toolkit
+from GPT import gpt_tools
 from GPT import model
--- a/GPT/gpt_tools.py
+++ b/GPT/gpt_tools.py
@ -0,0 +1,94 @@
+import openai
+import numpy as np
+import requests
+import sseclient
+
+
+# this function compare similarity between two vectors.
+# The higher value the dot product have, the more alike between these vectors
+def similarity(v1, v2):
+    return np.dot(v1, v2)
+
+
+# return a list of vectors
+def embedding(content, engine='text-embedding-ada-002'):
+    response = openai.Embedding.create(input=content, engine=engine)
+    vector = response['data'][0]['embedding']
+    return vector
+
+
+def search_chunks(query, data, count=1):
+    vector = embedding(query)
+    points = []
+
+    for item in data:
+        # compare search terms with brain-data
+        point = similarity(vector, item['vector'])
+        points.append({
+            'content': item['content'],
+            'point': point
+        })
+    # sort points base on descendant order
+    ordered = sorted(points, key=lambda d: d['point'], reverse=True)
+
+    return ordered[0:count]
+
+
+def gpt3(prompt, model, params):
+    response = openai.Completion.create(
+        model=model,
+        prompt=prompt,
+        temperature=params.temp,
+        max_tokens=params.max_tokens,
+        top_p=params.top_p,
+        frequency_penalty=params.frequency_penalty,
+        presence_penalty=params.present_penalty
+    )
+    text = response['choices'][0]['text'].strip()
+    return text
+
+
+def gpt35(prompt, params, system_role_content: str = 'You are a helpful assistant.'):
+    completions = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        max_tokens=params.max_tokens,
+        temperature=params.temp,
+        top_p=params.top_p,
+        frequency_penalty=params.frequency_penalty,
+        presence_penalty=params.present_penalty,
+        messages=[
+            {"role": "system", "content": system_role_content},
+            {"role": "user", "content": prompt}
+        ])
+    text = completions['choices'][0]['message']['content']
+    return text
+
+
+def gpt3_stream(prompt, model, params):
+    response = openai.Completion.create(
+        model=model,
+        stream=True,
+        prompt=prompt,
+        temperature=params.temp,
+        max_tokens=params.max_tokens,
+        top_p=params.top_p,
+        frequency_penalty=params.frequency_penalty,
+        presence_penalty=params.present_penalty
+    )
+    return response
+
+
+def gpt35_stream(prompt, params, system_role_content: str = 'You are a helpful assistant.'):
+    completions = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        max_tokens=params.max_tokens,
+        temperature=params.temp,
+        top_p=params.top_p,
+        frequency_penalty=params.frequency_penalty,
+        presence_penalty=params.present_penalty,
+        stream=True,
+        messages=[
+            {"role": "system", "content": system_role_content},
+            {"role": "user", "content": prompt}
+        ])
+    return completions
--- a/GPT/query.py
+++ b/GPT/query.py
@ -11,12 +11,12 @@ API_KEY = util.read_file(r'.user\API-KEYS.txt').strip()

 openai.api_key = API_KEY

-
 SESSION_LANG = st.session_state['SESSION_LANGUAGE']
 _ = language.set_language()


 def build(chunk_size=4000):
+    openai.api_key = API_KEY
    all_text = util.read_file(r'.user\input.txt')

    # split text into smaller chunk of 4000 char each
@ -24,7 +24,7 @@ def build(chunk_size=4000):
    chunk_count = len(chunks)
    result = []
    for idx, chunk in enumerate(chunks):
-        embedding = GPT.toolkit.embedding(chunk.encode(encoding='ASCII', errors='ignore').decode())
+        embedding = GPT.gpt_tools.embedding(chunk.encode(encoding='ASCII', errors='ignore').decode())
        info = {'content': chunk, 'vector': embedding}
        print(info, '\n\n\n')

@ -38,7 +38,7 @@ def build(chunk_size=4000):
 def run(query, model, prompt_file, isQuestion, params, info_file=None):
    if isQuestion:
        data = util.read_json(INFO.BRAIN_DATA)
-        results = GPT.toolkit.search_chunks(query, data, params.chunk_count)
+        results = GPT.gpt_tools.search_chunks(query, data, params.chunk_count)
        answers = []
        for result in results:
            my_info = util.read_file(info_file)
@ -47,7 +47,10 @@ def run(query, model, prompt_file, isQuestion, params, info_file=None):
            prompt = prompt.replace('<<QS>>', query)
            prompt = prompt.replace('<<MY-INFO>>', my_info)

-            answer = GPT.toolkit.gpt3(prompt, model, params)
+            if model == 'gpt-3.5-turbo':
+                answer = GPT.gpt_tools.gpt35(prompt, params)
+            else:
+                answer = GPT.gpt_tools.gpt3(prompt, model, params)
            answers.append(answer)
        all_response = '\n\n'.join(answers)
    else:
@ -55,27 +58,38 @@ def run(query, model, prompt_file, isQuestion, params, info_file=None):
        responses = []
        for chunk in chunks:
            prompt = util.read_file(prompt_file).replace('<<DATA>>', chunk)
-            response = GPT.toolkit.gpt3(prompt, model, params)
+            if model == 'gpt-3.5-turbo':
+                response = GPT.gpt_tools.gpt35(prompt, params)
+            else:
+                response = GPT.gpt_tools.gpt3(prompt, model, params)
            responses.append(response)
        all_response = '\n\n'.join(responses)
    return all_response


-def run_stream(query, model, prompt_file, isQuestion, params, info_file=None):
-    client = None
+def get_stream_prompt(query, prompt_file, isQuestion, info_file=None):
+    openai.api_key = API_KEY
    if isQuestion:
        data = util.read_json(INFO.BRAIN_DATA)
-        results = GPT.toolkit.search_chunks(query, data, count=1)
-        for result in results:
+        if data:
+            result = GPT.gpt_tools.search_chunks(query, data, count=1)
            my_info = util.read_file(info_file)
            prompt = util.read_file(prompt_file)
-            prompt = prompt.replace('<<INFO>>', result['content'])
+            prompt = prompt.replace('<<INFO>>', result[0]['content'])
            prompt = prompt.replace('<<QS>>', query)
            prompt = prompt.replace('<<MY-INFO>>', my_info)
-            client = GPT.toolkit.gpt3_stream(API_KEY, prompt, model, params)
-
+        else:
+            prompt = ''
    else:
        chunk = textwrap.wrap(query, 10000)[0]
        prompt = util.read_file(prompt_file).replace('<<DATA>>', chunk)
-        client = GPT.toolkit.gpt3_stream(API_KEY, prompt, model, params)
+    return prompt
+
+
+def run_stream(query, model, prompt_file, isQuestion, params, info_file=None):
+    prompt = get_stream_prompt(query, prompt_file, isQuestion, info_file)
+    if model == 'gpt-3.5-turbo':
+        client = GPT.gpt_tools.gpt35_stream(prompt, params)
+    else:
+        client = GPT.gpt_tools.gpt3_stream(prompt, model, params)
    return client
--- a/GPT/toolkit.py
+++ b/GPT/toolkit.py
@ -1,71 +0,0 @@
-import openai
-import numpy as np
-import requests
-import sseclient
-
-
-# this function compare similarity between two vectors.
-# The higher value the dot product have, the more alike between these vectors
-def similarity(v1, v2):
-    return np.dot(v1, v2)
-
-
-# return a list of vectors
-def embedding(content, engine='text-embedding-ada-002'):
-    response = openai.Embedding.create(input=content, engine=engine)
-    vector = response['data'][0]['embedding']
-    return vector
-
-
-def search_chunks(text, data, count=1):
-    vector = embedding(text)
-    points = []
-
-    for item in data:
-        # compare search terms with brain-data
-        point = similarity(vector, item['vector'])
-        points.append({
-            'content': item['content'],
-            'point': point
-        })
-    # sort points base on descendant order
-    ordered = sorted(points, key=lambda d: d['point'], reverse=True)
-
-    return ordered[0:count]
-
-
-def gpt3(prompt, model, params):
-    response = openai.Completion.create(
-        model=model,
-        prompt=prompt,
-        temperature=params.temp,
-        max_tokens=params.max_tokens,
-        top_p=params.top_p,
-        frequency_penalty=params.frequency_penalty,
-        presence_penalty=params.present_penalty
-    )
-    text = response['choices'][0]['text'].strip()
-    return text
-
-
-def gpt3_stream(API_KEY, prompt, model, params):
-    url = 'https://api.openai.com/v1/completions'
-    headers = {
-        'Accept': 'text/event-stream',
-        'Authorization': 'Bearer ' + API_KEY
-    }
-    body = {
-        'model': model,
-        'prompt': prompt,
-        'max_tokens': params.max_tokens,
-        'temperature': params.temp,
-        'top_p': params.top_p,
-        'frequency_penalty': params.frequency_penalty,
-        'presence_penalty': params.present_penalty,
-        'stream': True,
-    }
-
-    req = requests.post(url, stream=True, headers=headers, json=body)
-    client = sseclient.SSEClient(req)
-    return client
-    # print(json.loads(event.data)['choices'][0]['text'], end='', flush=True)
--- a/README.md
+++ b/README.md
@ -9,8 +9,23 @@

 *💡As I am not a professional programmer and am fairly new to Python, this project may contain bugs. If you encounter any issues, please suggest them in the [Issues section](https://github.com/sean1832/GPT-Brain/issues).*

+### Description
+This program leverages the power of [GPT-3](https://platform.openai.com/docs/models/gpt-3) & [3.5](https://platform.openai.com/docs/models/gpt-3-5) to provide a summary of the content of atomic notes, 
+as well as answer questions related specifically to your notes. 
+The program scans a designated directory, 
+which is typically a vault containing multiple notes, 
+and appends the contents of all the notes to a single file. 
+This file then serves as the context for the user's query. 
+The program is able to identify
+relationships between the contents of the notes, 
+and generate a refined response that summarizes the key points.
+
+Although the program is compatible with other note-taking software that uses
+markdown or txt, 
+it is primarily designed with [Obsidian](https://obsidian.md/) in mind. 
+
 ### Feature
- [x] Use [OpenAI GPT-3](https://platform.openai.com/docs/models/gpt-3) to generate response.
+- [x] Use [OpenAI GPT-3](https://platform.openai.com/docs/models/gpt-3) and [GPT-3.5 (ChatGPT)](https://platform.openai.com/docs/models/gpt-3-5) to generate response.
 - [x] Use [OpenAI embedding](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings) for semetic comparison of question and note content for enhanced searching.
 - [x] Configurable prompts.
 - [x] Customizable personal background information for more accurate answers.
@ -23,13 +38,10 @@
 - [x] Basic & Advanced parameter sliders for OpenAI Language model configurations.

 ### Todo
- [x] ~~Batch script to update library.~~
- [x] ~~Versioning.~~
- [x] ~~Tooltips for parameters.~~
- [x] ~~Multilingual support for UI.~~
- [x] ~~Multilingual search support.~~
- [ ] Provide detail documentation for users.
- [ ] Release for windows.
+- [ ] Support PDF format。
+- [ ] Support PDF OCR scan。
+- [ ] Support Word document。
+

 ## Install
 ### 1. What you need
--- a/Seanium_Brain.py
+++ b/Seanium_Brain.py
@ -64,12 +64,15 @@ with st.sidebar:
                                  "your prompt plus `max_tokens` cannot exceed the model's context length. Most "
                                  "models have a context length of 2048 tokens (except for the newest models, "
                                  "which support 4096)."))
-    chunk_size = st.slider(_('Chunk size'), 1500, 4500,
-                           value=util.read_json_at(INFO.BRAIN_MEMO, 'chunk_size', 4000),
-                           help=_("The number of tokens to consider at each step. The larger this is, the more "
-                                  "context the model has to work with, but the slower generation and expensive "
-                                  "will it be."))
-
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        chunk_size = st.slider(_('Chunk size'), 1500, 4500,
+                               value=util.read_json_at(INFO.BRAIN_MEMO, 'chunk_size', 4000),
+                               help=_("The number of tokens to consider at each step. The larger this is, the more "
+                                      "context the model has to work with, but the slower generation and expensive "
+                                      "will it be."))
+    with col2:
+        update_brain = st.button(_('Update Brain'))
    with st.expander(label=_('Advanced Options')):
        top_p = st.slider(_('Top_P'), 0.0, 1.0, value=util.read_json_at(INFO.BRAIN_MEMO, 'top_p', 1.0),
                          help=_("An alternative to sampling with temperature, called nucleus sampling, where the "
@ -90,7 +93,7 @@ with st.sidebar:
                                     "(https://platform.openai.com/docs/api-reference/parameter-details)"))
        enable_stream = st_toggle.st_toggle_switch(_('Stream (experimental)'),
                                                   default_value=util.read_json_at(INFO.BRAIN_MEMO, 'enable_stream',
-                                                                                   False))
+                                                                                   True))

        if not enable_stream:
            chunk_count = st.slider(_('Answer count'), 1, 5, value=util.read_json_at(INFO.BRAIN_MEMO, 'chunk_count', 1),
@ -136,7 +139,7 @@ with header:

 # main
 with body:
-    question = st.text_area(_('Ask Brain: '))
+    query = st.text_area(_('Ask Brain: '))
    col1, col2 = st.columns([1, 3])
    with col1:
        send = st.button(_('📩Send'))
@ -144,13 +147,49 @@ with body:
        if os.path.exists(CURRENT_LOG_FILE):
            st_tool.download_as(_("📥download log"))
    # execute brain calculation
-    if not question == '' and send:
-        st_tool.execute_brain(question,
-                              param,
-                              op,
-                              models,
-                              prompt_core,
-                              prompt_dictionary,
-                              _('question'),
-                              enable_stream,
-                              SESSION_LANG)
+    if update_brain:
+        st_tool.rebuild_brain(chunk_size)
+    if not query == '':
+        if models.question_model == 'text-davinci-003' or 'text-davinci-003' in models.other_models:
+            max_model_token = 4096
+        elif models.question_model == 'gpt-3.5-turbo' or 'gpt-3.5-turbo' in models.other_models:
+            max_model_token = 4096
+        else:
+            max_model_token = 2048
+
+        tokens, isTokenZero = st_tool.predict_token(query, prompt_core)
+        token_panel = st.empty()
+        if isTokenZero:
+            token_panel.markdown('Prompt token: `Not Available`')
+        else:
+            token_panel.markdown(f'Prompt token: `{tokens}/{max_model_token}`')
+        if send:
+            st_tool.execute_brain(query,
+                                  param,
+                                  op,
+                                  models,
+                                  prompt_core,
+                                  prompt_dictionary,
+                                  _('question'),
+                                  enable_stream)
+
+            # convert param to dictionary
+            param_dict = vars(param)
+
+            # write param to json
+            for key in param_dict:
+                value = param_dict[key]
+                util.update_json(INFO.BRAIN_MEMO, key, value)
+
+            # write operation to json
+            util.update_json(INFO.BRAIN_MEMO, f'operations_{SESSION_LANG}', op.operations)
+
+            # write question model to json
+            util.update_json(INFO.BRAIN_MEMO, 'question_model', models.question_model)
+
+            # write other models to json
+            for i in range(len(op.operations_no_question)):
+                util.update_json(INFO.BRAIN_MEMO, f'{op.operations_no_question[i]}_model', models.other_models[i])
+
+            # write stream to json
+            util.update_json(INFO.BRAIN_MEMO, 'enable_stream', enable_stream)
--- a/modules/INFO.py
+++ b/modules/INFO.py
@ -33,4 +33,4 @@ if 'FILTER_ROW_COUNT' not in st.session_state:
    st.session_state['FILTER_ROW_COUNT'] = util.read_json_at(BRAIN_MEMO, 'filter_row_count', default_value=1)

 # models
-MODELS_OPTIONS = ['text-davinci-003', 'text-curie-001', 'text-babbage-001', 'text-ada-001']
+MODELS_OPTIONS = ['gpt-3.5-turbo', 'text-davinci-003', 'text-curie-001', 'text-babbage-001', 'text-ada-001']
--- a/modules/utilities.py
+++ b/modules/utilities.py
@ -77,7 +77,7 @@ def parse_data(data, delimiter='', force=False):
    return data


-def read_files(file_dir, delimiter='', force=False, single_string=True, exclude_dir: list = None, supported_formats: list = None):
+def read_bind_files(file_dir, delimiter='', force=False, single_string=True, exclude_dir: list = None, supported_formats: list = None):
    contents = []
    if exclude_dir is None:
        exclude_dir = []
--- a/pages/1_Configs.py
+++ b/pages/1_Configs.py
@ -132,13 +132,13 @@ def main():
                    exclude_dir = exclude_dir_official
                # if advanced mode enabled
                if advanced_mode:
-                    note_datas = util.read_files(note_dir, single_string=False, exclude_dir=exclude_dir)
+                    note_datas = util.read_bind_files(note_dir, single_string=False, exclude_dir=exclude_dir)
                    note_datas, filter_info = st_tools.filter_data(note_datas, add_filter_button, del_filter_button)
                    # note_datas, filter_key, filter_logic, filter_val = filter_data(note_datas, True)
                    modified_data = util.parse_data(note_datas, delimiter, force_delimiter)
                else:
-                    modified_data = util.read_files(note_dir, single_string=True, delimiter=delimiter,
-                                                    force=force_delimiter, exclude_dir=exclude_dir)
+                    modified_data = util.read_bind_files(note_dir, single_string=True, delimiter=delimiter,
+                                                         force=force_delimiter, exclude_dir=exclude_dir)

                # append mode
                if append_mode:
--- a/requirements.txt
+++ b/requirements.txt
@ -1,8 +1,10 @@
 numpy==1.24.2
-openai==0.26.5
+openai==0.27.0
 requests==2.28.2
 sseclient==0.0.27
 sseclient_py==1.7.2
-streamlit==1.18.1
+streamlit==1.19.0
 streamlit_tags==1.2.8
 streamlit_toggle_switch==1.0.2
+langchain==0.0.100
+tiktoken==0.3.0
--- a/streamlit_toolkit/tools.py
+++ b/streamlit_toolkit/tools.py
@ -4,6 +4,7 @@ import json
 import streamlit as st
 import tkinter as tk
 from tkinter import filedialog
+from langchain.llms import OpenAI

 import modules.utilities as util
 import modules.INFO as INFO
@ -18,6 +19,16 @@ SESSION_TIME = st.session_state['SESSION_TIME']
 CURRENT_LOG_FILE = f'{INFO.LOG_PATH}/log_{SESSION_TIME}.log'


+def predict_token(query: str, prompt_core: GPT.model.prompt_core) -> (int, bool):
+    """predict how many tokens to generate"""
+    llm = OpenAI()
+    prompt = GPT.query.get_stream_prompt(query, prompt_file=prompt_core.question,
+                                         isQuestion=True,
+                                         info_file=prompt_core.my_info)
+    token = llm.get_num_tokens(prompt)
+    return token, token == 0
+
+
 def create_log():
    if not os.path.exists(CURRENT_LOG_FILE):
        util.write_file(f'Session {SESSION_TIME}\n\n', CURRENT_LOG_FILE)
@ -229,69 +240,99 @@ def process_response_stream(query, target_model, prompt_file: str, params: GPT.m
    # check if exclude model is not target model
    file_name = util.get_file_name(prompt_file)
    with st.spinner(_('Thinking on ') + f"{file_name}..."):
-        client = GPT.query.run_stream(query,
-                                      target_model,
-                                      prompt_file,
-                                      isQuestion=False,
-                                      params=params)
+        responses = GPT.query.run_stream(query,
+                                         target_model,
+                                         prompt_file,
+                                         isQuestion=False,
+                                         params=params)

    # displaying results
    st.header(f'📃{file_name}')
    response_panel = st.empty()
    previous_chars = ''
-    for event in client.events():
-        if event.data != '[DONE]':
-            char = json.loads(event.data)['choices'][0]['text']
-            response = previous_chars + char
-            response_panel.info(f'{response}')
-            previous_chars += char
+    for response_json in responses:
+        choice = response_json['choices'][0]
+        if choice['finish_reason'] == 'stop':
+            break
+        # error handling
+        if choice['finish_reason'] == 'length':
+            st.warning("⚠️ " + _('Result cut off. max_tokens') + f' ({params.max_tokens}) ' + _('too small. Consider increasing max_tokens.'))
+            break
+
+        if 'gpt-3.5-turbo' in target_model:
+            delta = choice['delta']
+            if "role" in delta or delta == {}:
+                char = ''
+            else:
+                char = delta['content']
+        else:
+            char = choice['text']
+        response = previous_chars + char
+        response_panel.info(f'{response}')
+        previous_chars += char

    time.sleep(1)
    log(previous_chars, delimiter=f'{file_name.upper()}')


+def rebuild_brain(chunk_size: int):
+    msg = st.warning(_('Updating Brain...'), icon="⏳")
+    progress_bar = st.progress(0)
+    for idx, chunk_num in GPT.query.build(chunk_size):
+        progress_bar.progress((idx + 1) / chunk_num)
+    msg.success(_('Brain Updated!'), icon="👍")
+    time.sleep(2)
+
+
 def execute_brain(q, params: GPT.model.param,
                  op: GPT.model.Operation,
                  model: GPT.model.Model,
                  prompt_core: GPT.model.prompt_core,
                  prompt_dictionary: dict,
                  question_prompt: str,
-                  stream: bool,
-                  session_language,
+                  stream: bool
                  ):
    # log question
    log(f'\n\n\n\n[{str(time.ctime())}] - QUESTION: {q}')

    if mod.check_update.is_input_updated() or mod.check_update.is_param_updated(params.chunk_size, 'chunk_size'):
-        msg = st.warning(_('Updating Brain...'), icon="⏳")
-        progress_bar = st.progress(0)
-        for idx, chunk_num in GPT.query.build(params.chunk_size):
-            progress_bar.progress((idx + 1) / chunk_num)
-        msg.success(_('Brain Updated!'), icon="👍")
-        time.sleep(2)
+        rebuild_brain(params.chunk_size)

    # =================stream=================
    if stream:
        previous_chars = ''
        is_question_selected = util.contains(op.operations, question_prompt)
        with st.spinner(_('Thinking on Answer')):
-            answer_clients = GPT.query.run_stream(q, model.question_model,
-                                                  prompt_file=prompt_core.question,
-                                                  isQuestion=True,
-                                                  params=params,
-                                                  info_file=prompt_core.my_info)
+            responses = GPT.query.run_stream(q, model.question_model,
+                                             prompt_file=prompt_core.question,
+                                             isQuestion=True,
+                                             params=params,
+                                             info_file=prompt_core.my_info)
        if is_question_selected:
            # displaying results
            st.header(_('💬Answer'))

        answer_panel = st.empty()
-        for event in answer_clients.events():
-            if event.data != '[DONE]':
-                char = json.loads(event.data)['choices'][0]['text']
-                answer = previous_chars + char
-                if is_question_selected:
-                    answer_panel.info(f'{answer}')
-                previous_chars += char
+        for response_json in responses:
+            choice = response_json['choices'][0]
+            if choice['finish_reason'] == 'stop':
+                break
+            # error handling
+            if choice['finish_reason'] == 'length':
+                st.warning("⚠️ " + _('Result cut off. max_tokens') + f' ({params.max_tokens}) ' + _('too small. Consider increasing max_tokens.'))
+                break
+            if 'gpt-3.5-turbo' in model.question_model:
+                delta = choice['delta']
+                if "role" in delta or delta == {}:
+                    char = ''
+                else:
+                    char = delta['content']
+            else:
+                char = choice['text']
+            answer = previous_chars + char
+            if is_question_selected:
+                answer_panel.info(f'{answer}')
+            previous_chars += char

        time.sleep(0.1)
        log(previous_chars, delimiter='ANSWER')
@ -304,42 +345,24 @@ def execute_brain(q, params: GPT.model.param,
    else:
        # thinking on answer
        with st.spinner(_('Thinking on Answer')):
-            answer = GPT.query.run(q, model.question_model,
-                                   prompt_file=prompt_core.question,
-                                   isQuestion=True,
-                                   params=params,
-                                   info_file=prompt_core.my_info)
+            responses = GPT.query.run(q, model.question_model,
+                                      prompt_file=prompt_core.question,
+                                      isQuestion=True,
+                                      params=params,
+                                      info_file=prompt_core.my_info)
            if util.contains(op.operations, question_prompt):
                # displaying results
                st.header(_('💬Answer'))
-                st.info(f'{answer}')
+                st.info(f'{responses}')
                time.sleep(1.5)
-                log(answer, delimiter='ANSWER')
+                log(responses, delimiter='ANSWER')

        # thinking on other outputs
        if len(op.operations_no_question) > 0:
            for i in range(len(op.operations_no_question)):
                prompt_path = prompt_dictionary[op.operations_no_question[i]]
                other_model = model.other_models[i]
-                process_response(answer, other_model, prompt_path, params)
-
-    # convert param to dictionary
-    param_dict = vars(params)
-
-    # write param to json
-    for key in param_dict:
-        value = param_dict[key]
-        util.update_json(INFO.BRAIN_MEMO, key, value)
-
-    # write operation to json
-    util.update_json(INFO.BRAIN_MEMO, f'operations_{session_language}', op.operations)
-
-    # write question model to json
-    util.update_json(INFO.BRAIN_MEMO, 'question_model', model.question_model)
-
-    # write other models to json
-    for i in range(len(op.operations_no_question)):
-        util.update_json(INFO.BRAIN_MEMO, f'{op.operations_no_question[i]}_model', model.other_models[i])
+                process_response(responses, other_model, prompt_path, params)


 def message(msg, condition=None):
--- a/update.bat
+++ b/update.bat
@ -1,3 +1,7 @@
+@echo off
+echo Activating Virtural environment!
+call .\venv\Scripts\activate
+
 echo Checking library updates...
 set "REQUIREMENTS=requirements.txt"
 set "LAST_MODIFIED=requirements.temp"
@ -26,4 +30,4 @@ if "%mod_date%" neq "%last_mod_date%" (
 )

 rem copy example prompt to user folder without overwrite
-xcopy "example_prompt\*.*" ".user\prompt" /I /E /Y /D
+xcopy "example_prompt\*.*" ".user\prompt" /I /E /Y /D