fix: no error code for stream mode

pull/13/head
sean1832 1 year ago
parent b2c6f8f202
commit 559dd4700e

@ -48,24 +48,31 @@ def gpt3(prompt, model, params):
return text
def gpt3_stream(API_KEY, prompt, model, params):
url = 'https://api.openai.com/v1/completions'
headers = {
'Accept': 'text/event-stream',
'Authorization': 'Bearer ' + API_KEY
}
body = {
'model': model,
'prompt': prompt,
'max_tokens': params.max_tokens,
'temperature': params.temp,
'top_p': params.top_p,
'frequency_penalty': params.frequency_penalty,
'presence_penalty': params.present_penalty,
'stream': True,
}
def gpt3_stream(prompt, model, params):
response = openai.Completion.create(
model=model,
stream=True,
prompt=prompt,
temperature=params.temp,
max_tokens=params.max_tokens,
top_p=params.top_p,
frequency_penalty=params.frequency_penalty,
presence_penalty=params.present_penalty
)
return response
req = requests.post(url, stream=True, headers=headers, json=body)
client = sseclient.SSEClient(req)
return client
# print(json.loads(event.data)['choices'][0]['text'], end='', flush=True)
def gpt35_stream(prompt, params, system_role_content: str = 'You are a helpful assistant.'):
completions = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
max_tokens=params.max_tokens,
temperature=params.temp,
top_p=params.top_p,
frequency_penalty=params.frequency_penalty,
presence_penalty=params.present_penalty,
stream=True,
messages=[
{"role": "system", "content": system_role_content},
{"role": "user", "content": prompt}
])
return completions

@ -79,5 +79,11 @@ def get_stream_prompt(query, prompt_file, isQuestion, info_file=None):
def run_stream(query, model, prompt_file, isQuestion, params, info_file=None):
prompt = get_stream_prompt(query, prompt_file, isQuestion, info_file)
client = GPT.gpt_tools.gpt3_stream(API_KEY, prompt, model, params)
client = GPT.gpt_tools.gpt3_stream(prompt, model, params)
return client
def run_35_Stream(query, prompt_file, isQuestion, params, info_file=None):
prompt = get_stream_prompt(query, prompt_file, isQuestion, info_file)
client = GPT.gpt_tools.gpt35_stream(prompt, params)
return client

@ -164,8 +164,7 @@ with body:
prompt_core,
prompt_dictionary,
_('question'),
enable_stream,
SESSION_LANG)
enable_stream)
# convert param to dictionary
param_dict = vars(param)

@ -33,4 +33,4 @@ if 'FILTER_ROW_COUNT' not in st.session_state:
st.session_state['FILTER_ROW_COUNT'] = util.read_json_at(BRAIN_MEMO, 'filter_row_count', default_value=1)
# models
MODELS_OPTIONS = ['text-davinci-003', 'text-curie-001', 'text-babbage-001', 'text-ada-001']
MODELS_OPTIONS = ['gpt-3.5-turbo', 'text-davinci-003', 'text-curie-001', 'text-babbage-001', 'text-ada-001']

@ -239,22 +239,28 @@ def process_response_stream(query, target_model, prompt_file: str, params: GPT.m
# check if exclude model is not target model
file_name = util.get_file_name(prompt_file)
with st.spinner(_('Thinking on ') + f"{file_name}..."):
client = GPT.query.run_stream(query,
target_model,
prompt_file,
isQuestion=False,
params=params)
responses = GPT.query.run_stream(query,
target_model,
prompt_file,
isQuestion=False,
params=params)
# displaying results
st.header(f'📃{file_name}')
response_panel = st.empty()
previous_chars = ''
for event in client.events():
if event.data != '[DONE]':
char = json.loads(event.data)['choices'][0]['text']
response = previous_chars + char
response_panel.info(f'{response}')
previous_chars += char
for response_json in responses:
choice = response_json['choices'][0]
if choice['finish_reason'] == 'stop':
break
# error handling
if choice['finish_reason'] == 'length':
st.warning("⚠️ " + _('Result cut off. max_tokens') + f' ({params.max_tokens}) ' + _('too small. Consider increasing max_tokens.'))
break
char = choice['text']
response = previous_chars + char
response_panel.info(f'{response}')
previous_chars += char
time.sleep(1)
log(previous_chars, delimiter=f'{file_name.upper()}')
@ -275,8 +281,7 @@ def execute_brain(q, params: GPT.model.param,
prompt_core: GPT.model.prompt_core,
prompt_dictionary: dict,
question_prompt: str,
stream: bool,
session_language,
stream: bool
):
# log question
log(f'\n\n\n\n[{str(time.ctime())}] - QUESTION: {q}')
@ -289,23 +294,30 @@ def execute_brain(q, params: GPT.model.param,
previous_chars = ''
is_question_selected = util.contains(op.operations, question_prompt)
with st.spinner(_('Thinking on Answer')):
answer_clients = GPT.query.run_stream(q, model.question_model,
prompt_file=prompt_core.question,
isQuestion=True,
params=params,
info_file=prompt_core.my_info)
responses = GPT.query.run_stream(q, model.question_model,
prompt_file=prompt_core.question,
isQuestion=True,
params=params,
info_file=prompt_core.my_info)
if is_question_selected:
# displaying results
st.header(_('💬Answer'))
answer_panel = st.empty()
for event in answer_clients.events():
if event.data != '[DONE]':
char = json.loads(event.data)['choices'][0]['text']
answer = previous_chars + char
if is_question_selected:
answer_panel.info(f'{answer}')
previous_chars += char
for response_json in responses:
choice = response_json['choices'][0]
if choice['finish_reason'] == 'stop':
break
# error handling
if choice['finish_reason'] == 'length':
st.warning("⚠️ " + _('Result cut off. max_tokens') + f' ({params.max_tokens}) ' + _('too small. Consider increasing max_tokens.'))
break
char = choice['text']
answer = previous_chars + char
if is_question_selected:
answer_panel.info(f'{answer}')
previous_chars += char
time.sleep(0.1)
log(previous_chars, delimiter='ANSWER')
@ -318,24 +330,24 @@ def execute_brain(q, params: GPT.model.param,
else:
# thinking on answer
with st.spinner(_('Thinking on Answer')):
answer = GPT.query.run(q, model.question_model,
prompt_file=prompt_core.question,
isQuestion=True,
params=params,
info_file=prompt_core.my_info)
responses = GPT.query.run(q, model.question_model,
prompt_file=prompt_core.question,
isQuestion=True,
params=params,
info_file=prompt_core.my_info)
if util.contains(op.operations, question_prompt):
# displaying results
st.header(_('💬Answer'))
st.info(f'{answer}')
st.info(f'{responses}')
time.sleep(1.5)
log(answer, delimiter='ANSWER')
log(responses, delimiter='ANSWER')
# thinking on other outputs
if len(op.operations_no_question) > 0:
for i in range(len(op.operations_no_question)):
prompt_path = prompt_dictionary[op.operations_no_question[i]]
other_model = model.other_models[i]
process_response(answer, other_model, prompt_path, params)
process_response(responses, other_model, prompt_path, params)
def message(msg, condition=None):

Loading…
Cancel
Save