From 559dd4700e91fa0fd7a67b23f341ba4878c86baf Mon Sep 17 00:00:00 2001 From: sean1832 Date: Sat, 4 Mar 2023 01:33:58 +1100 Subject: [PATCH] fix: no error code for stream mode --- GPT/gpt_tools.py | 47 +++++++++++++---------- GPT/query.py | 8 +++- Seanium_Brain.py | 3 +- modules/INFO.py | 2 +- streamlit_toolkit/tools.py | 78 ++++++++++++++++++++++---------------- 5 files changed, 81 insertions(+), 57 deletions(-) diff --git a/GPT/gpt_tools.py b/GPT/gpt_tools.py index 79bab6b..9512f64 100644 --- a/GPT/gpt_tools.py +++ b/GPT/gpt_tools.py @@ -48,24 +48,31 @@ def gpt3(prompt, model, params): return text -def gpt3_stream(API_KEY, prompt, model, params): - url = 'https://api.openai.com/v1/completions' - headers = { - 'Accept': 'text/event-stream', - 'Authorization': 'Bearer ' + API_KEY - } - body = { - 'model': model, - 'prompt': prompt, - 'max_tokens': params.max_tokens, - 'temperature': params.temp, - 'top_p': params.top_p, - 'frequency_penalty': params.frequency_penalty, - 'presence_penalty': params.present_penalty, - 'stream': True, - } +def gpt3_stream(prompt, model, params): + response = openai.Completion.create( + model=model, + stream=True, + prompt=prompt, + temperature=params.temp, + max_tokens=params.max_tokens, + top_p=params.top_p, + frequency_penalty=params.frequency_penalty, + presence_penalty=params.present_penalty + ) + return response - req = requests.post(url, stream=True, headers=headers, json=body) - client = sseclient.SSEClient(req) - return client - # print(json.loads(event.data)['choices'][0]['text'], end='', flush=True) + +def gpt35_stream(prompt, params, system_role_content: str = 'You are a helpful assistant.'): + completions = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + max_tokens=params.max_tokens, + temperature=params.temp, + top_p=params.top_p, + frequency_penalty=params.frequency_penalty, + presence_penalty=params.present_penalty, + stream=True, + messages=[ + {"role": "system", "content": system_role_content}, + {"role": "user", "content": prompt} + ]) + return completions diff --git a/GPT/query.py b/GPT/query.py index 52c4968..72e4acb 100644 --- a/GPT/query.py +++ b/GPT/query.py @@ -79,5 +79,11 @@ def get_stream_prompt(query, prompt_file, isQuestion, info_file=None): def run_stream(query, model, prompt_file, isQuestion, params, info_file=None): prompt = get_stream_prompt(query, prompt_file, isQuestion, info_file) - client = GPT.gpt_tools.gpt3_stream(API_KEY, prompt, model, params) + client = GPT.gpt_tools.gpt3_stream(prompt, model, params) + return client + + +def run_35_Stream(query, prompt_file, isQuestion, params, info_file=None): + prompt = get_stream_prompt(query, prompt_file, isQuestion, info_file) + client = GPT.gpt_tools.gpt35_stream(prompt, params) return client diff --git a/Seanium_Brain.py b/Seanium_Brain.py index b308be9..db2109f 100644 --- a/Seanium_Brain.py +++ b/Seanium_Brain.py @@ -164,8 +164,7 @@ with body: prompt_core, prompt_dictionary, _('question'), - enable_stream, - SESSION_LANG) + enable_stream) # convert param to dictionary param_dict = vars(param) diff --git a/modules/INFO.py b/modules/INFO.py index 51c1f95..6413726 100644 --- a/modules/INFO.py +++ b/modules/INFO.py @@ -33,4 +33,4 @@ if 'FILTER_ROW_COUNT' not in st.session_state: st.session_state['FILTER_ROW_COUNT'] = util.read_json_at(BRAIN_MEMO, 'filter_row_count', default_value=1) # models -MODELS_OPTIONS = ['text-davinci-003', 'text-curie-001', 'text-babbage-001', 'text-ada-001'] +MODELS_OPTIONS = ['gpt-3.5-turbo', 'text-davinci-003', 'text-curie-001', 'text-babbage-001', 'text-ada-001'] diff --git a/streamlit_toolkit/tools.py b/streamlit_toolkit/tools.py index 3f404a4..3b69465 100644 --- a/streamlit_toolkit/tools.py +++ b/streamlit_toolkit/tools.py @@ -239,22 +239,28 @@ def process_response_stream(query, target_model, prompt_file: str, params: GPT.m # check if exclude model is not target model file_name = util.get_file_name(prompt_file) with st.spinner(_('Thinking on ') + f"{file_name}..."): - client = GPT.query.run_stream(query, - target_model, - prompt_file, - isQuestion=False, - params=params) + responses = GPT.query.run_stream(query, + target_model, + prompt_file, + isQuestion=False, + params=params) # displaying results st.header(f'📃{file_name}') response_panel = st.empty() previous_chars = '' - for event in client.events(): - if event.data != '[DONE]': - char = json.loads(event.data)['choices'][0]['text'] - response = previous_chars + char - response_panel.info(f'{response}') - previous_chars += char + for response_json in responses: + choice = response_json['choices'][0] + if choice['finish_reason'] == 'stop': + break + # error handling + if choice['finish_reason'] == 'length': + st.warning("⚠️ " + _('Result cut off. max_tokens') + f' ({params.max_tokens}) ' + _('too small. Consider increasing max_tokens.')) + break + char = choice['text'] + response = previous_chars + char + response_panel.info(f'{response}') + previous_chars += char time.sleep(1) log(previous_chars, delimiter=f'{file_name.upper()}') @@ -275,8 +281,7 @@ def execute_brain(q, params: GPT.model.param, prompt_core: GPT.model.prompt_core, prompt_dictionary: dict, question_prompt: str, - stream: bool, - session_language, + stream: bool ): # log question log(f'\n\n\n\n[{str(time.ctime())}] - QUESTION: {q}') @@ -289,23 +294,30 @@ def execute_brain(q, params: GPT.model.param, previous_chars = '' is_question_selected = util.contains(op.operations, question_prompt) with st.spinner(_('Thinking on Answer')): - answer_clients = GPT.query.run_stream(q, model.question_model, - prompt_file=prompt_core.question, - isQuestion=True, - params=params, - info_file=prompt_core.my_info) + responses = GPT.query.run_stream(q, model.question_model, + prompt_file=prompt_core.question, + isQuestion=True, + params=params, + info_file=prompt_core.my_info) if is_question_selected: # displaying results st.header(_('💬Answer')) answer_panel = st.empty() - for event in answer_clients.events(): - if event.data != '[DONE]': - char = json.loads(event.data)['choices'][0]['text'] - answer = previous_chars + char - if is_question_selected: - answer_panel.info(f'{answer}') - previous_chars += char + for response_json in responses: + choice = response_json['choices'][0] + if choice['finish_reason'] == 'stop': + break + # error handling + if choice['finish_reason'] == 'length': + st.warning("⚠️ " + _('Result cut off. max_tokens') + f' ({params.max_tokens}) ' + _('too small. Consider increasing max_tokens.')) + break + + char = choice['text'] + answer = previous_chars + char + if is_question_selected: + answer_panel.info(f'{answer}') + previous_chars += char time.sleep(0.1) log(previous_chars, delimiter='ANSWER') @@ -318,24 +330,24 @@ def execute_brain(q, params: GPT.model.param, else: # thinking on answer with st.spinner(_('Thinking on Answer')): - answer = GPT.query.run(q, model.question_model, - prompt_file=prompt_core.question, - isQuestion=True, - params=params, - info_file=prompt_core.my_info) + responses = GPT.query.run(q, model.question_model, + prompt_file=prompt_core.question, + isQuestion=True, + params=params, + info_file=prompt_core.my_info) if util.contains(op.operations, question_prompt): # displaying results st.header(_('💬Answer')) - st.info(f'{answer}') + st.info(f'{responses}') time.sleep(1.5) - log(answer, delimiter='ANSWER') + log(responses, delimiter='ANSWER') # thinking on other outputs if len(op.operations_no_question) > 0: for i in range(len(op.operations_no_question)): prompt_path = prompt_dictionary[op.operations_no_question[i]] other_model = model.other_models[i] - process_response(answer, other_model, prompt_path, params) + process_response(responses, other_model, prompt_path, params) def message(msg, condition=None):