diff --git a/GPT/model.py b/GPT/model.py index 72c2a8b..28eabdc 100644 --- a/GPT/model.py +++ b/GPT/model.py @@ -19,3 +19,9 @@ class Operation: def __init__(self, operations, operations_no_question): self.operations = operations self.operations_no_question = operations_no_question + + +class prompt_core: + def __init__(self, question, my_info): + self.question = question + self.my_info = my_info diff --git a/GPT/query.py b/GPT/query.py index 1c93b65..480e862 100644 --- a/GPT/query.py +++ b/GPT/query.py @@ -5,6 +5,7 @@ import streamlit as st import modules.utilities as util import modules.language as language import GPT +import modules.INFO as INFO API_KEY = util.read_file(r'.user\API-KEYS.txt').strip() @@ -13,8 +14,10 @@ openai.api_key = API_KEY # if 'SESSION_LANGUAGE' not in st.session_state: # st.session_state['SESSION_LANGUAGE'] = util.read_json_at('.user/language.json', 'SESSION_LANGUAGE', 'en_US') + SESSION_LANG = st.session_state['SESSION_LANGUAGE'] -prompt_dir = f'.user/prompt/{SESSION_LANG}' +# print('SESSION_LANG', SESSION_LANG) +# prompt_dir = f'.user/prompt/{SESSION_LANG}' _ = language.set_language() @@ -37,52 +40,87 @@ def build(chunk_size=4000): util.write_json(result, r'.user\brain-data.json') -def run_answer(query, model, temp, max_tokens, top_p, freq_penl, pres_penl, chunk_count): - brain_data = util.read_json(r'.user\brain-data.json') - results = GPT.toolkit.search_chunks(query, brain_data, chunk_count) - answers = [] - for result in results: - my_info = util.read_file(f'{prompt_dir}/' + _('my-info') + '.txt') - - prompt = util.read_file(f'{prompt_dir}/' + _('question') + '.txt') - prompt = prompt.replace('<>', result['content']) - prompt = prompt.replace('<>', query) - prompt = prompt.replace('<>', my_info) - - answer = GPT.toolkit.gpt3(prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl) - answers.append(answer) - - all_answers = '\n\n'.join(answers) - return all_answers - - -def run_answer_stream(query, model, temp, max_tokens, top_p, freq_penl, pres_penl): - brain_data = util.read_json(r'.user\brain-data.json') - results = GPT.toolkit.search_chunks(query, brain_data, count=1) - for result in results: - my_info = util.read_file(f'{prompt_dir}/' + _('my-info') + '.txt') - prompt = util.read_file(f'{prompt_dir}/' + _('question') + '.txt') - prompt = prompt.replace('<>', result['content']) - prompt = prompt.replace('<>', query) - prompt = prompt.replace('<>', my_info) - - answer_client = GPT.toolkit.gpt3_stream(API_KEY, prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl) - return answer_client - - -def run(query, model, prompt_file, temp, max_tokens, top_p, freq_penl, pres_penl): - chunks = textwrap.wrap(query, 10000) - responses = [] - for chunk in chunks: - prompt = util.read_file(prompt_file).replace('<>', chunk) - response = GPT.toolkit.gpt3(prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl) - responses.append(response) - all_response = '\n\n'.join(responses) +# def run_answer(query, model, temp, max_tokens, top_p, freq_penl, pres_penl, chunk_count): +# brain_data = util.read_json(r'.user\brain-data.json') +# results = GPT.toolkit.search_chunks(query, brain_data, chunk_count) +# answers = [] +# for result in results: +# my_info = util.read_file(f'{prompt_dir}/' + _('my-info') + '.txt') +# +# prompt = util.read_file(f'{prompt_dir}/' + _('question') + '.txt') +# prompt = prompt.replace('<>', result['content']) +# prompt = prompt.replace('<>', query) +# prompt = prompt.replace('<>', my_info) +# +# answer = GPT.toolkit.gpt3(prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl) +# answers.append(answer) +# +# all_answers = '\n\n'.join(answers) +# return all_answers + + +# def run_answer_stream(query, model, temp, max_tokens, top_p, freq_penl, pres_penl): +# brain_data = util.read_json(r'.user\brain-data.json') +# results = GPT.toolkit.search_chunks(query, brain_data, count=1) +# for result in results: +# my_info = util.read_file(f'{prompt_dir}/' + _('my-info') + '.txt') +# prompt = util.read_file(f'{prompt_dir}/' + _('question') + '.txt') +# prompt = prompt.replace('<>', result['content']) +# prompt = prompt.replace('<>', query) +# prompt = prompt.replace('<>', my_info) +# +# answer_client = GPT.toolkit.gpt3_stream(API_KEY, prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl) +# return answer_client + + +def run(query, model, prompt_file, isQuestion, params, info_file=None): + + if isQuestion: + data = util.read_json(INFO.BRAIN_DATA) + results = GPT.toolkit.search_chunks(query, data, params.chunk_count) + answers = [] + for result in results: + my_info = util.read_file(info_file) + prompt = util.read_file(prompt_file) + prompt = prompt.replace('<>', result['content']) + prompt = prompt.replace('<>', query) + prompt = prompt.replace('<>', my_info) + + answer = GPT.toolkit.gpt3(prompt, model, params) + answers.append(answer) + all_response = '\n\n'.join(answers) + else: + chunks = textwrap.wrap(query, 10000) + responses = [] + for chunk in chunks: + prompt = util.read_file(prompt_file).replace('<>', chunk) + response = GPT.toolkit.gpt3(prompt, model, params) + responses.append(response) + all_response = '\n\n'.join(responses) return all_response -def run_stream(query, model, prompt_file, temp, max_tokens, top_p, freq_penl, pres_penl): - chunk = textwrap.wrap(query, 10000)[0] - prompt = util.read_file(prompt_file).replace('<>', chunk) - client = GPT.toolkit.gpt3_stream(API_KEY, prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl) +# def run_stream(query, model, prompt_file, temp, max_tokens, top_p, freq_penl, pres_penl): +# chunk = textwrap.wrap(query, 10000)[0] +# prompt = util.read_file(prompt_file).replace('<>', chunk) +# client = GPT.toolkit.gpt3_stream(API_KEY, prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl) +# return client + +def run_stream(query, model, prompt_file, isQuestion, params, info_file=None): + client = None + if isQuestion: + data = util.read_json(INFO.BRAIN_DATA) + results = GPT.toolkit.search_chunks(query, data, count=1) + for result in results: + my_info = util.read_file(info_file) + prompt = util.read_file(prompt_file) + prompt = prompt.replace('<>', result['content']) + prompt = prompt.replace('<>', query) + prompt = prompt.replace('<>', my_info) + client = GPT.toolkit.gpt3_stream(API_KEY, prompt, model, params) + + else: + chunk = textwrap.wrap(query, 10000)[0] + prompt = util.read_file(prompt_file).replace('<>', chunk) + client = GPT.toolkit.gpt3_stream(API_KEY, prompt, model, params) return client diff --git a/GPT/toolkit.py b/GPT/toolkit.py index 7375e50..e2e80a6 100644 --- a/GPT/toolkit.py +++ b/GPT/toolkit.py @@ -3,6 +3,7 @@ import numpy as np import requests import sseclient import json +import GPT # this function compare similarity between two vectors. @@ -35,22 +36,21 @@ def search_chunks(text, data, count=1): return ordered[0:count] -def gpt3(prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl): +def gpt3(prompt, model, params): response = openai.Completion.create( model=model, prompt=prompt, - temperature=temp, - max_tokens=max_tokens, - top_p=top_p, - frequency_penalty=freq_penl, - presence_penalty=pres_penl + temperature=params.temp, + max_tokens=params.max_tokens, + top_p=params.top_p, + frequency_penalty=params.frequency_penalty, + presence_penalty=params.present_penalty ) text = response['choices'][0]['text'].strip() return text -def gpt3_stream(API_KEY, prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl): - +def gpt3_stream(API_KEY, prompt, model, params): url = 'https://api.openai.com/v1/completions' headers = { 'Accept': 'text/event-stream', @@ -59,15 +59,15 @@ def gpt3_stream(API_KEY, prompt, model, temp, max_tokens, top_p, freq_penl, pres body = { 'model': model, 'prompt': prompt, - 'max_tokens': max_tokens, - 'temperature': temp, - 'top_p': top_p, - 'frequency_penalty': freq_penl, - 'presence_penalty': pres_penl, + 'max_tokens': params.max_tokens, + 'temperature': params.temp, + 'top_p': params.top_p, + 'frequency_penalty': params.frequency_penalty, + 'presence_penalty': params.present_penalty, 'stream': True, } req = requests.post(url, stream=True, headers=headers, json=body) client = sseclient.SSEClient(req) return client - # print(json.loads(event.data)['choices'][0]['text'], end='', flush=True) + # print(json.loads(event.data)['choices'][0]['text'], end='', flush=True) diff --git a/Seanium_Brain.py b/Seanium_Brain.py index 6fa9718..0eb3640 100644 --- a/Seanium_Brain.py +++ b/Seanium_Brain.py @@ -91,7 +91,8 @@ with st.sidebar: "context the model has to work with, but the slower generation and expensive " "will it be.")) enable_stream = st_toggle.st_toggle_switch(_('Stream (experimental)'), - default_value=util.read_json_at(INFO.BRAIN_MEMO, 'enable_stream', True)) + default_value=util.read_json_at(INFO.BRAIN_MEMO, 'enable_stream', + True)) if not enable_stream: chunk_count = st.slider(_('Answer count'), 1, 5, value=util.read_json_at(INFO.BRAIN_MEMO, 'chunk_count', 1), @@ -114,6 +115,9 @@ with st.sidebar: models = GPT.model.Model(question_model=question_model, other_models=other_models) + prompt_core = GPT.model.prompt_core(question=f'{PROMPT_PATH}/' + _('question') + '.txt', + my_info=f'{PROMPT_PATH}/' + _('my-info') + '.txt') + if st.button(_('Clear Log'), on_click=st_tool.clear_log): st.success(_('Log Cleared')) @@ -143,4 +147,12 @@ with body: st_tool.download_as(_("📥download log")) # execute brain calculation if not question == '' and send: - st_tool.execute_brain(question, param, op, models, prompt_dictionary, _('question'), enable_stream, SESSION_LANG) + st_tool.execute_brain(question, + param, + op, + models, + prompt_core, + prompt_dictionary, + _('question'), + enable_stream, + SESSION_LANG) diff --git a/modules/INFO.py b/modules/INFO.py index 72821fd..cb415cf 100644 --- a/modules/INFO.py +++ b/modules/INFO.py @@ -10,19 +10,20 @@ st.set_page_config( USER_DIR = '.user' LOG_PATH = '.user/log' BRAIN_MEMO = '.user/brain-memo.json' +BRAIN_DATA = '.user/brain-data.json' MANIFEST = '.core/manifest.json' INIT_LANGUAGE = '.user/language.json' # exclude directory EXCLUDE_DIR_OFFICIAL = ['__pycache__', - '.git', - '.idea', - '.vscode', - '.obsidian', - '.trash', - '.git', - '.gitignore', - '.gitattributes'] + '.git', + '.idea', + '.vscode', + '.obsidian', + '.trash', + '.git', + '.gitignore', + '.gitattributes'] # activate session if 'SESSION_LANGUAGE' not in st.session_state: diff --git a/streamlit_toolkit/tools.py b/streamlit_toolkit/tools.py index f3c8949..fac7c03 100644 --- a/streamlit_toolkit/tools.py +++ b/streamlit_toolkit/tools.py @@ -217,16 +217,15 @@ def filter_data(pages: list, add_filter_button, del_filter_button): return filtered_contents, filter_datas -def process_response(query, target_model, prompt_file: str, data: GPT.model.param): +def process_response(query, target_model, prompt_file: str, params: GPT.model.param): # check if exclude model is not target model file_name = util.get_file_name(prompt_file) with st.spinner(_('Thinking on ') + f"{file_name}..."): - results = GPT.query.run(query, target_model, prompt_file, - data.temp, - data.max_tokens, - data.top_p, - data.frequency_penalty, - data.present_penalty) + results = GPT.query.run(query, + target_model, + prompt_file, + isQuestion=False, + params=params) # displaying results st.header(f'📃{file_name}') st.info(f'{results}') @@ -234,16 +233,16 @@ def process_response(query, target_model, prompt_file: str, data: GPT.model.para log(results, delimiter=f'{file_name.upper()}') -def process_response_stream(query, target_model, prompt_file: str, data: GPT.model.param): +def process_response_stream(query, target_model, prompt_file: str, params: GPT.model.param): # check if exclude model is not target model file_name = util.get_file_name(prompt_file) with st.spinner(_('Thinking on ') + f"{file_name}..."): - client = GPT.query.run_stream(query, target_model, prompt_file, - data.temp, - data.max_tokens, - data.top_p, - data.frequency_penalty, - data.present_penalty) + client = GPT.query.run_stream(query, + target_model, + prompt_file, + isQuestion=False, + params=params) + # displaying results st.header(f'📃{file_name}') response_panel = st.empty() @@ -262,6 +261,7 @@ def process_response_stream(query, target_model, prompt_file: str, data: GPT.mod def execute_brain(q, params: GPT.model.param, op: GPT.model.Operation, model: GPT.model.Model, + prompt_core: GPT.model.prompt_core, prompt_dictionary: dict, question_prompt: str, stream: bool, @@ -283,12 +283,11 @@ def execute_brain(q, params: GPT.model.param, previous_chars = '' is_question_selected = util.contains(op.operations, question_prompt) with st.spinner(_('Thinking on Answer')): - answer_clients = GPT.query.run_answer_stream(q, model.question_model, - params.temp, - params.max_tokens, - params.top_p, - params.frequency_penalty, - params.present_penalty) + answer_clients = GPT.query.run_stream(q, model.question_model, + prompt_file=prompt_core.question, + isQuestion=True, + params=params, + info_file=prompt_core.my_info) if is_question_selected: # displaying results st.header(_('💬Answer')) @@ -313,13 +312,11 @@ def execute_brain(q, params: GPT.model.param, else: # thinking on answer with st.spinner(_('Thinking on Answer')): - answer = GPT.query.run_answer(q, model.question_model, - params.temp, - params.max_tokens, - params.top_p, - params.frequency_penalty, - params.present_penalty, - chunk_count=params.chunk_count) + answer = GPT.query.run(q, model.question_model, + prompt_file=prompt_core.question, + isQuestion=True, + params=params, + info_file=prompt_core.my_info) if util.contains(op.operations, question_prompt): # displaying results st.header(_('💬Answer'))