From 559dd4700e91fa0fd7a67b23f341ba4878c86baf Mon Sep 17 00:00:00 2001
From: sean1832 <sean1832725142@gmail.com>
Date: Sat, 4 Mar 2023 01:33:58 +1100
Subject: [PATCH] fix: no error code for stream mode

---
 GPT/gpt_tools.py           | 47 +++++++++++++----------
 GPT/query.py               |  8 +++-
 Seanium_Brain.py           |  3 +-
 modules/INFO.py            |  2 +-
 streamlit_toolkit/tools.py | 78 ++++++++++++++++++++++----------------
 5 files changed, 81 insertions(+), 57 deletions(-)

diff --git a/GPT/gpt_tools.py b/GPT/gpt_tools.py
index 79bab6b..9512f64 100644
--- a/GPT/gpt_tools.py
+++ b/GPT/gpt_tools.py
@@ -48,24 +48,31 @@ def gpt3(prompt, model, params):
     return text
 
 
-def gpt3_stream(API_KEY, prompt, model, params):
-    url = 'https://api.openai.com/v1/completions'
-    headers = {
-        'Accept': 'text/event-stream',
-        'Authorization': 'Bearer ' + API_KEY
-    }
-    body = {
-        'model': model,
-        'prompt': prompt,
-        'max_tokens': params.max_tokens,
-        'temperature': params.temp,
-        'top_p': params.top_p,
-        'frequency_penalty': params.frequency_penalty,
-        'presence_penalty': params.present_penalty,
-        'stream': True,
-    }
+def gpt3_stream(prompt, model, params):
+    response = openai.Completion.create(
+        model=model,
+        stream=True,
+        prompt=prompt,
+        temperature=params.temp,
+        max_tokens=params.max_tokens,
+        top_p=params.top_p,
+        frequency_penalty=params.frequency_penalty,
+        presence_penalty=params.present_penalty
+    )
+    return response
 
-    req = requests.post(url, stream=True, headers=headers, json=body)
-    client = sseclient.SSEClient(req)
-    return client
-    # print(json.loads(event.data)['choices'][0]['text'], end='', flush=True)
+
+def gpt35_stream(prompt, params, system_role_content: str = 'You are a helpful assistant.'):
+    completions = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        max_tokens=params.max_tokens,
+        temperature=params.temp,
+        top_p=params.top_p,
+        frequency_penalty=params.frequency_penalty,
+        presence_penalty=params.present_penalty,
+        stream=True,
+        messages=[
+            {"role": "system", "content": system_role_content},
+            {"role": "user", "content": prompt}
+        ])
+    return completions
diff --git a/GPT/query.py b/GPT/query.py
index 52c4968..72e4acb 100644
--- a/GPT/query.py
+++ b/GPT/query.py
@@ -79,5 +79,11 @@ def get_stream_prompt(query, prompt_file, isQuestion, info_file=None):
 
 def run_stream(query, model, prompt_file, isQuestion, params, info_file=None):
     prompt = get_stream_prompt(query, prompt_file, isQuestion, info_file)
-    client = GPT.gpt_tools.gpt3_stream(API_KEY, prompt, model, params)
+    client = GPT.gpt_tools.gpt3_stream(prompt, model, params)
+    return client
+
+
+def run_35_Stream(query, prompt_file, isQuestion, params, info_file=None):
+    prompt = get_stream_prompt(query, prompt_file, isQuestion, info_file)
+    client = GPT.gpt_tools.gpt35_stream(prompt, params)
     return client
diff --git a/Seanium_Brain.py b/Seanium_Brain.py
index b308be9..db2109f 100644
--- a/Seanium_Brain.py
+++ b/Seanium_Brain.py
@@ -164,8 +164,7 @@ with body:
                                   prompt_core,
                                   prompt_dictionary,
                                   _('question'),
-                                  enable_stream,
-                                  SESSION_LANG)
+                                  enable_stream)
 
             # convert param to dictionary
             param_dict = vars(param)
diff --git a/modules/INFO.py b/modules/INFO.py
index 51c1f95..6413726 100644
--- a/modules/INFO.py
+++ b/modules/INFO.py
@@ -33,4 +33,4 @@ if 'FILTER_ROW_COUNT' not in st.session_state:
     st.session_state['FILTER_ROW_COUNT'] = util.read_json_at(BRAIN_MEMO, 'filter_row_count', default_value=1)
 
 # models
-MODELS_OPTIONS = ['text-davinci-003', 'text-curie-001', 'text-babbage-001', 'text-ada-001']
+MODELS_OPTIONS = ['gpt-3.5-turbo', 'text-davinci-003', 'text-curie-001', 'text-babbage-001', 'text-ada-001']
diff --git a/streamlit_toolkit/tools.py b/streamlit_toolkit/tools.py
index 3f404a4..3b69465 100644
--- a/streamlit_toolkit/tools.py
+++ b/streamlit_toolkit/tools.py
@@ -239,22 +239,28 @@ def process_response_stream(query, target_model, prompt_file: str, params: GPT.m
     # check if exclude model is not target model
     file_name = util.get_file_name(prompt_file)
     with st.spinner(_('Thinking on ') + f"{file_name}..."):
-        client = GPT.query.run_stream(query,
-                                      target_model,
-                                      prompt_file,
-                                      isQuestion=False,
-                                      params=params)
+        responses = GPT.query.run_stream(query,
+                                         target_model,
+                                         prompt_file,
+                                         isQuestion=False,
+                                         params=params)
 
     # displaying results
     st.header(f'📃{file_name}')
     response_panel = st.empty()
     previous_chars = ''
-    for event in client.events():
-        if event.data != '[DONE]':
-            char = json.loads(event.data)['choices'][0]['text']
-            response = previous_chars + char
-            response_panel.info(f'{response}')
-            previous_chars += char
+    for response_json in responses:
+        choice = response_json['choices'][0]
+        if choice['finish_reason'] == 'stop':
+            break
+        # error handling
+        if choice['finish_reason'] == 'length':
+            st.warning("⚠️ " + _('Result cut off. max_tokens') + f' ({params.max_tokens}) ' + _('too small. Consider increasing max_tokens.'))
+            break
+        char = choice['text']
+        response = previous_chars + char
+        response_panel.info(f'{response}')
+        previous_chars += char
 
     time.sleep(1)
     log(previous_chars, delimiter=f'{file_name.upper()}')
@@ -275,8 +281,7 @@ def execute_brain(q, params: GPT.model.param,
                   prompt_core: GPT.model.prompt_core,
                   prompt_dictionary: dict,
                   question_prompt: str,
-                  stream: bool,
-                  session_language,
+                  stream: bool
                   ):
     # log question
     log(f'\n\n\n\n[{str(time.ctime())}] - QUESTION: {q}')
@@ -289,23 +294,30 @@ def execute_brain(q, params: GPT.model.param,
         previous_chars = ''
         is_question_selected = util.contains(op.operations, question_prompt)
         with st.spinner(_('Thinking on Answer')):
-            answer_clients = GPT.query.run_stream(q, model.question_model,
-                                                  prompt_file=prompt_core.question,
-                                                  isQuestion=True,
-                                                  params=params,
-                                                  info_file=prompt_core.my_info)
+            responses = GPT.query.run_stream(q, model.question_model,
+                                             prompt_file=prompt_core.question,
+                                             isQuestion=True,
+                                             params=params,
+                                             info_file=prompt_core.my_info)
         if is_question_selected:
             # displaying results
             st.header(_('💬Answer'))
 
         answer_panel = st.empty()
-        for event in answer_clients.events():
-            if event.data != '[DONE]':
-                char = json.loads(event.data)['choices'][0]['text']
-                answer = previous_chars + char
-                if is_question_selected:
-                    answer_panel.info(f'{answer}')
-                previous_chars += char
+        for response_json in responses:
+            choice = response_json['choices'][0]
+            if choice['finish_reason'] == 'stop':
+                break
+            # error handling
+            if choice['finish_reason'] == 'length':
+                st.warning("⚠️ " + _('Result cut off. max_tokens') + f' ({params.max_tokens}) ' + _('too small. Consider increasing max_tokens.'))
+                break
+
+            char = choice['text']
+            answer = previous_chars + char
+            if is_question_selected:
+                answer_panel.info(f'{answer}')
+            previous_chars += char
 
         time.sleep(0.1)
         log(previous_chars, delimiter='ANSWER')
@@ -318,24 +330,24 @@ def execute_brain(q, params: GPT.model.param,
     else:
         # thinking on answer
         with st.spinner(_('Thinking on Answer')):
-            answer = GPT.query.run(q, model.question_model,
-                                   prompt_file=prompt_core.question,
-                                   isQuestion=True,
-                                   params=params,
-                                   info_file=prompt_core.my_info)
+            responses = GPT.query.run(q, model.question_model,
+                                      prompt_file=prompt_core.question,
+                                      isQuestion=True,
+                                      params=params,
+                                      info_file=prompt_core.my_info)
             if util.contains(op.operations, question_prompt):
                 # displaying results
                 st.header(_('💬Answer'))
-                st.info(f'{answer}')
+                st.info(f'{responses}')
                 time.sleep(1.5)
-                log(answer, delimiter='ANSWER')
+                log(responses, delimiter='ANSWER')
 
         # thinking on other outputs
         if len(op.operations_no_question) > 0:
             for i in range(len(op.operations_no_question)):
                 prompt_path = prompt_dictionary[op.operations_no_question[i]]
                 other_model = model.other_models[i]
-                process_response(answer, other_model, prompt_path, params)
+                process_response(responses, other_model, prompt_path, params)
 
 
 def message(msg, condition=None):