fix: no error code for stream mode

pull/13/head
sean1832 1 year ago
parent b2c6f8f202
commit 559dd4700e

@ -48,24 +48,31 @@ def gpt3(prompt, model, params):
return text return text
def gpt3_stream(API_KEY, prompt, model, params): def gpt3_stream(prompt, model, params):
url = 'https://api.openai.com/v1/completions' response = openai.Completion.create(
headers = { model=model,
'Accept': 'text/event-stream', stream=True,
'Authorization': 'Bearer ' + API_KEY prompt=prompt,
} temperature=params.temp,
body = { max_tokens=params.max_tokens,
'model': model, top_p=params.top_p,
'prompt': prompt, frequency_penalty=params.frequency_penalty,
'max_tokens': params.max_tokens, presence_penalty=params.present_penalty
'temperature': params.temp, )
'top_p': params.top_p, return response
'frequency_penalty': params.frequency_penalty,
'presence_penalty': params.present_penalty,
'stream': True,
}
req = requests.post(url, stream=True, headers=headers, json=body)
client = sseclient.SSEClient(req) def gpt35_stream(prompt, params, system_role_content: str = 'You are a helpful assistant.'):
return client completions = openai.ChatCompletion.create(
# print(json.loads(event.data)['choices'][0]['text'], end='', flush=True) model="gpt-3.5-turbo",
max_tokens=params.max_tokens,
temperature=params.temp,
top_p=params.top_p,
frequency_penalty=params.frequency_penalty,
presence_penalty=params.present_penalty,
stream=True,
messages=[
{"role": "system", "content": system_role_content},
{"role": "user", "content": prompt}
])
return completions

@ -79,5 +79,11 @@ def get_stream_prompt(query, prompt_file, isQuestion, info_file=None):
def run_stream(query, model, prompt_file, isQuestion, params, info_file=None): def run_stream(query, model, prompt_file, isQuestion, params, info_file=None):
prompt = get_stream_prompt(query, prompt_file, isQuestion, info_file) prompt = get_stream_prompt(query, prompt_file, isQuestion, info_file)
client = GPT.gpt_tools.gpt3_stream(API_KEY, prompt, model, params) client = GPT.gpt_tools.gpt3_stream(prompt, model, params)
return client
def run_35_Stream(query, prompt_file, isQuestion, params, info_file=None):
prompt = get_stream_prompt(query, prompt_file, isQuestion, info_file)
client = GPT.gpt_tools.gpt35_stream(prompt, params)
return client return client

@ -164,8 +164,7 @@ with body:
prompt_core, prompt_core,
prompt_dictionary, prompt_dictionary,
_('question'), _('question'),
enable_stream, enable_stream)
SESSION_LANG)
# convert param to dictionary # convert param to dictionary
param_dict = vars(param) param_dict = vars(param)

@ -33,4 +33,4 @@ if 'FILTER_ROW_COUNT' not in st.session_state:
st.session_state['FILTER_ROW_COUNT'] = util.read_json_at(BRAIN_MEMO, 'filter_row_count', default_value=1) st.session_state['FILTER_ROW_COUNT'] = util.read_json_at(BRAIN_MEMO, 'filter_row_count', default_value=1)
# models # models
MODELS_OPTIONS = ['text-davinci-003', 'text-curie-001', 'text-babbage-001', 'text-ada-001'] MODELS_OPTIONS = ['gpt-3.5-turbo', 'text-davinci-003', 'text-curie-001', 'text-babbage-001', 'text-ada-001']

@ -239,22 +239,28 @@ def process_response_stream(query, target_model, prompt_file: str, params: GPT.m
# check if exclude model is not target model # check if exclude model is not target model
file_name = util.get_file_name(prompt_file) file_name = util.get_file_name(prompt_file)
with st.spinner(_('Thinking on ') + f"{file_name}..."): with st.spinner(_('Thinking on ') + f"{file_name}..."):
client = GPT.query.run_stream(query, responses = GPT.query.run_stream(query,
target_model, target_model,
prompt_file, prompt_file,
isQuestion=False, isQuestion=False,
params=params) params=params)
# displaying results # displaying results
st.header(f'📃{file_name}') st.header(f'📃{file_name}')
response_panel = st.empty() response_panel = st.empty()
previous_chars = '' previous_chars = ''
for event in client.events(): for response_json in responses:
if event.data != '[DONE]': choice = response_json['choices'][0]
char = json.loads(event.data)['choices'][0]['text'] if choice['finish_reason'] == 'stop':
response = previous_chars + char break
response_panel.info(f'{response}') # error handling
previous_chars += char if choice['finish_reason'] == 'length':
st.warning("⚠️ " + _('Result cut off. max_tokens') + f' ({params.max_tokens}) ' + _('too small. Consider increasing max_tokens.'))
break
char = choice['text']
response = previous_chars + char
response_panel.info(f'{response}')
previous_chars += char
time.sleep(1) time.sleep(1)
log(previous_chars, delimiter=f'{file_name.upper()}') log(previous_chars, delimiter=f'{file_name.upper()}')
@ -275,8 +281,7 @@ def execute_brain(q, params: GPT.model.param,
prompt_core: GPT.model.prompt_core, prompt_core: GPT.model.prompt_core,
prompt_dictionary: dict, prompt_dictionary: dict,
question_prompt: str, question_prompt: str,
stream: bool, stream: bool
session_language,
): ):
# log question # log question
log(f'\n\n\n\n[{str(time.ctime())}] - QUESTION: {q}') log(f'\n\n\n\n[{str(time.ctime())}] - QUESTION: {q}')
@ -289,23 +294,30 @@ def execute_brain(q, params: GPT.model.param,
previous_chars = '' previous_chars = ''
is_question_selected = util.contains(op.operations, question_prompt) is_question_selected = util.contains(op.operations, question_prompt)
with st.spinner(_('Thinking on Answer')): with st.spinner(_('Thinking on Answer')):
answer_clients = GPT.query.run_stream(q, model.question_model, responses = GPT.query.run_stream(q, model.question_model,
prompt_file=prompt_core.question, prompt_file=prompt_core.question,
isQuestion=True, isQuestion=True,
params=params, params=params,
info_file=prompt_core.my_info) info_file=prompt_core.my_info)
if is_question_selected: if is_question_selected:
# displaying results # displaying results
st.header(_('💬Answer')) st.header(_('💬Answer'))
answer_panel = st.empty() answer_panel = st.empty()
for event in answer_clients.events(): for response_json in responses:
if event.data != '[DONE]': choice = response_json['choices'][0]
char = json.loads(event.data)['choices'][0]['text'] if choice['finish_reason'] == 'stop':
answer = previous_chars + char break
if is_question_selected: # error handling
answer_panel.info(f'{answer}') if choice['finish_reason'] == 'length':
previous_chars += char st.warning("⚠️ " + _('Result cut off. max_tokens') + f' ({params.max_tokens}) ' + _('too small. Consider increasing max_tokens.'))
break
char = choice['text']
answer = previous_chars + char
if is_question_selected:
answer_panel.info(f'{answer}')
previous_chars += char
time.sleep(0.1) time.sleep(0.1)
log(previous_chars, delimiter='ANSWER') log(previous_chars, delimiter='ANSWER')
@ -318,24 +330,24 @@ def execute_brain(q, params: GPT.model.param,
else: else:
# thinking on answer # thinking on answer
with st.spinner(_('Thinking on Answer')): with st.spinner(_('Thinking on Answer')):
answer = GPT.query.run(q, model.question_model, responses = GPT.query.run(q, model.question_model,
prompt_file=prompt_core.question, prompt_file=prompt_core.question,
isQuestion=True, isQuestion=True,
params=params, params=params,
info_file=prompt_core.my_info) info_file=prompt_core.my_info)
if util.contains(op.operations, question_prompt): if util.contains(op.operations, question_prompt):
# displaying results # displaying results
st.header(_('💬Answer')) st.header(_('💬Answer'))
st.info(f'{answer}') st.info(f'{responses}')
time.sleep(1.5) time.sleep(1.5)
log(answer, delimiter='ANSWER') log(responses, delimiter='ANSWER')
# thinking on other outputs # thinking on other outputs
if len(op.operations_no_question) > 0: if len(op.operations_no_question) > 0:
for i in range(len(op.operations_no_question)): for i in range(len(op.operations_no_question)):
prompt_path = prompt_dictionary[op.operations_no_question[i]] prompt_path = prompt_dictionary[op.operations_no_question[i]]
other_model = model.other_models[i] other_model = model.other_models[i]
process_response(answer, other_model, prompt_path, params) process_response(responses, other_model, prompt_path, params)
def message(msg, condition=None): def message(msg, condition=None):

Loading…
Cancel
Save