mirror of https://github.com/sean1832/GPT-Brain
Initial commit
commit
577db13cf2
@ -0,0 +1,2 @@
|
||||
# Auto detect text files and perform LF normalization
|
||||
* text=auto
|
@ -0,0 +1,160 @@
|
||||
.user/
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
.idea/
|
||||
|
||||
# vsCode
|
||||
.vscode/
|
||||
|
||||
# test folder
|
||||
.test/
|
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 sean1832
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -0,0 +1,21 @@
|
||||
# Auto Setup
|
||||
### What you need
|
||||
- Install python `3.11`
|
||||
- OpenAI API keys
|
||||
|
||||
### .bat file
|
||||
1. Run `setup.bat`
|
||||
2. Enter OpenAI API Key
|
||||
|
||||
# Manual Setup
|
||||
### Python
|
||||
1. Make sure to install python `3.11`
|
||||
1. Create venv using `python -m venv venv` under root project root directory
|
||||
2. Enter venv using `venv\Scripts\activate`
|
||||
3. Update pip by using `python -m pip install --upgrade pip`
|
||||
4. Installing required libraries using `pip3 install -r requirement.txt`
|
||||
|
||||
### API Key file
|
||||
1. Create API Key file using cmd with command `if not exist .user\ (md .user\) & echo [YOUR API KEYS]> .user\API-KEYS.txt`
|
||||
|
||||
|
@ -0,0 +1,16 @@
|
||||
@echo off
|
||||
cd..
|
||||
echo Activating Virtural environment...
|
||||
call .\venv\Scripts\activate
|
||||
|
||||
echo upgrading pip...
|
||||
python -m pip install --upgrade pip
|
||||
|
||||
|
||||
echo Installing pipreqs...
|
||||
pip install pipreqs
|
||||
|
||||
echo Export to requirements.txt
|
||||
pipreqs . --force --encoding utf-8
|
||||
|
||||
pause
|
@ -0,0 +1,8 @@
|
||||
@echo off
|
||||
cd..
|
||||
echo Activating Virtural environment...
|
||||
call .\venv\Scripts\activate
|
||||
|
||||
echo building brain...
|
||||
python build-brain.py
|
||||
echo complete building brain!
|
@ -0,0 +1,73 @@
|
||||
import openai
|
||||
import numpy as np
|
||||
import textwrap
|
||||
import utilities
|
||||
|
||||
openai.api_key = utilities.open_file(r'.user\API-KEYS.txt').strip()
|
||||
BRAIN_DATA = utilities.read_json_file(r'.user\brain-data.json')
|
||||
|
||||
# this function compare similarity between two vectors.
|
||||
# The higher value the dot product have, the more alike between these vectors
|
||||
def similarity(v1, v2):
|
||||
return np.dot(v1, v2)
|
||||
|
||||
def search_chunks(text, data, count=1):
|
||||
vector = utilities.embedding(text)
|
||||
points = []
|
||||
|
||||
for item in data:
|
||||
# compare search terms with brain-data
|
||||
point = similarity(vector, item['vector'])
|
||||
points.append({
|
||||
'content': item['content'],
|
||||
'point': point
|
||||
})
|
||||
# sort points base on decendent order
|
||||
ordered = sorted(points, key=lambda d: d['point'], reverse=True)
|
||||
|
||||
return ordered[0:count]
|
||||
|
||||
def gpt3(prompt, model='text-davinci-003'):
|
||||
response = openai.Completion.create(
|
||||
model= model,
|
||||
prompt=prompt,
|
||||
temperature=0.1,
|
||||
max_tokens=1000,
|
||||
top_p=1,
|
||||
frequency_penalty=0,
|
||||
presence_penalty=0
|
||||
)
|
||||
text = response['choices'][0]['text'].strip()
|
||||
return text
|
||||
|
||||
def main():
|
||||
while True:
|
||||
|
||||
query = input('\n\nAsk brain: ')
|
||||
results = search_chunks(query, BRAIN_DATA)
|
||||
answers = []
|
||||
answers_count = 0
|
||||
for result in results:
|
||||
my_info = utilities.open_file(r'prompt\my-info.txt')
|
||||
|
||||
prompt = utilities.open_file(r'prompt\question.txt')
|
||||
prompt = prompt.replace('<<INFO>>', result['content'])
|
||||
prompt = prompt.replace('<<QS>>', query)
|
||||
prompt = prompt.replace('<<MY-INFO>>', my_info)
|
||||
|
||||
answer = gpt3(prompt, model='text-davinci-003')
|
||||
answers.append(answer)
|
||||
answers_count += 1
|
||||
|
||||
all_answers = '\n\n'.join(answers)
|
||||
print('\n\n============ANSWER============\n\n', all_answers)
|
||||
|
||||
chunks = textwrap.wrap(all_answers, 10000)
|
||||
end = []
|
||||
for chunk in chunks:
|
||||
prompt = utilities.open_file(r'prompt\summarize.txt').replace('<<SUM>>', chunk)
|
||||
summary = gpt3(prompt, model='text-curie-001')
|
||||
end.append(summary)
|
||||
print('\n\n============SUMMRY============\n\n', '\n\n'.join(end))
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,25 @@
|
||||
import openai
|
||||
import textwrap
|
||||
import utilities
|
||||
|
||||
|
||||
openai.api_key = utilities.open_file(r'.user\API-KEYS.txt').strip()
|
||||
|
||||
def main():
|
||||
all_text = utilities.open_file(r'.user\input.txt')
|
||||
|
||||
# split text into smaller chunk of 4000 char each
|
||||
chunks = textwrap.wrap(all_text, 4000)
|
||||
|
||||
result = []
|
||||
|
||||
for chunk in chunks:
|
||||
embedding = utilities.embedding(chunk.encode(encoding='ASCII', errors='ignore').decode())
|
||||
info = {'content':chunk, 'vector':embedding}
|
||||
print(info, '\n\n\n')
|
||||
result.append(info)
|
||||
|
||||
utilities.write_json_file(result, r'.user\brain-data.json')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,44 @@
|
||||
import os
|
||||
import time
|
||||
import utilities
|
||||
|
||||
file_path = r'.user\input.txt'
|
||||
temp_file = r'.user\input_last-run.temp'
|
||||
sig_file = r'.user\input_sig.temp'
|
||||
|
||||
def compare_time(t1, t2):
|
||||
return t1 == t2
|
||||
|
||||
def write_sig(bool):
|
||||
utilities.write_file(bool, sig_file)
|
||||
|
||||
def check():
|
||||
if os.path.exists(file_path):
|
||||
# get modification time of the file
|
||||
mod_time = os.path.getmtime(file_path)
|
||||
|
||||
# convert the modification time to readable format
|
||||
read_mod_time = time.ctime(mod_time)
|
||||
|
||||
if os.path.exists(temp_file):
|
||||
temp_info = utilities.open_file(temp_file)
|
||||
if compare_time(read_mod_time, temp_info):
|
||||
write_sig('not updated')
|
||||
print('File has not been updated.')
|
||||
else:
|
||||
print('File has been updated.')
|
||||
utilities.write_file(read_mod_time, temp_file)
|
||||
write_sig('updated')
|
||||
else:
|
||||
print('Temp file not exist, writing temp file...')
|
||||
# write to temp file
|
||||
utilities.write_file(read_mod_time, temp_file)
|
||||
write_sig('not updated')
|
||||
else:
|
||||
raise FileNotFoundError(f'File: {file_path} does not exist.')
|
||||
|
||||
def main():
|
||||
check()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,27 @@
|
||||
@echo off
|
||||
cd..
|
||||
echo Activating Virtural environment...
|
||||
call .\venv\Scripts\activate
|
||||
|
||||
rem checking if input.txt is updated
|
||||
python console_app\check_update.py
|
||||
|
||||
setlocal enabledelayedexpansion
|
||||
set "tempFile=.user\input_sig.temp"
|
||||
|
||||
for /f "usebackq delims=" %%a in ("%tempFile%") do (
|
||||
set "tempValue=%%a"
|
||||
)
|
||||
|
||||
if "%tempValue%" == "not updated" (
|
||||
goto end
|
||||
) else (
|
||||
call batch-programs\run-build-brain.bat
|
||||
cls
|
||||
echo Brain updated!
|
||||
)
|
||||
|
||||
|
||||
:end
|
||||
echo running brain...
|
||||
python console_app\brain.py
|
@ -0,0 +1,24 @@
|
||||
import json
|
||||
import openai
|
||||
|
||||
def open_file(filepath):
|
||||
with open(filepath, 'r', encoding='utf-8') as file:
|
||||
return file.read()
|
||||
|
||||
def write_file(content, filepath):
|
||||
with open(filepath, 'w') as file:
|
||||
file.write(content)
|
||||
|
||||
def write_json_file(content, filepath):
|
||||
with open(filepath, 'w') as file:
|
||||
json.dump(content, file, indent=2)
|
||||
|
||||
def read_json_file(filepath):
|
||||
with open(filepath, 'r') as file:
|
||||
return json.load(file)
|
||||
|
||||
# return a list of vectors
|
||||
def embedding(content, engine='text-embedding-ada-002'):
|
||||
response = openai.Embedding.create(input=content, engine=engine)
|
||||
vector = response['data'][0]['embedding']
|
||||
return vector
|
@ -0,0 +1,2 @@
|
||||
My name is Zeke Zhang.
|
||||
I am an architecture academic.
|
@ -0,0 +1,3 @@
|
||||
numpy==1.24.1
|
||||
openai==0.26.4
|
||||
streamlit==1.17.0
|
@ -0,0 +1,5 @@
|
||||
@echo off
|
||||
echo Activating Virtural environment...
|
||||
call .\venv\Scripts\activate
|
||||
|
||||
streamlit run web_ui/Seanium_Brain.py
|
@ -0,0 +1,45 @@
|
||||
@echo off
|
||||
echo Creating Virtural environment folder...
|
||||
|
||||
python -m venv venv
|
||||
echo Virtural environment created successfully!
|
||||
ping 127.0.0.1 -n 2 > NUL
|
||||
|
||||
|
||||
echo Activating Virtural environment!
|
||||
call .\venv\Scripts\activate
|
||||
|
||||
echo updating pip
|
||||
python -m pip install --upgrade pip
|
||||
|
||||
|
||||
pip3 install -r requirements.txt
|
||||
ping 127.0.0.1 -n 2 > NUL
|
||||
echo Virtual requirements installed successfully!
|
||||
cls
|
||||
|
||||
echo Creating OpenAI API keys profile...
|
||||
REM if .user\ not exist, create one
|
||||
if not exist .user\ (md .user\)
|
||||
|
||||
REM Create API KEY file
|
||||
set /p API_KEYS=[Enter your API keys]:
|
||||
echo %API_KEYS%> .user\API-KEYS.txt
|
||||
echo API key written to file!
|
||||
|
||||
REM copy example prompt
|
||||
if not exist .user\prompt (md .user\prompt)
|
||||
xcopy "example_prompt\*.*" ".user\prompt" /s /i
|
||||
|
||||
REM wait 2 tick
|
||||
ping 127.0.0.1 -n 2 > NUL
|
||||
|
||||
REM create input txt file
|
||||
echo.> .user\input.txt
|
||||
echo input file created!
|
||||
|
||||
echo Setup complete! Exiting...
|
||||
|
||||
|
||||
|
||||
pause
|
@ -0,0 +1,123 @@
|
||||
import streamlit as st
|
||||
from modules import utilities as util
|
||||
import brain
|
||||
import check_update
|
||||
import time
|
||||
import os
|
||||
|
||||
|
||||
|
||||
|
||||
# activate session
|
||||
if 'SESSION_TIME' not in st.session_state:
|
||||
st.session_state['SESSION_TIME'] = time.strftime("%Y%m%d-%H%H%S")
|
||||
|
||||
st.set_page_config(
|
||||
page_title='Seanium Brain'
|
||||
)
|
||||
|
||||
model_options = ['text-davinci-003', 'text-curie-001','text-babbage-001','text-ada-001']
|
||||
header = st.container()
|
||||
body = st.container()
|
||||
LOG_PATH = '.user/log'
|
||||
SESSION_TIME = st.session_state['SESSION_TIME']
|
||||
CURRENT_LOG_FILE = f'{LOG_PATH}/log_{SESSION_TIME}.log'
|
||||
|
||||
|
||||
def create_log():
|
||||
if not os.path.exists(CURRENT_LOG_FILE):
|
||||
util.write_file(f'Session {SESSION_TIME}\n\n', CURRENT_LOG_FILE)
|
||||
return CURRENT_LOG_FILE
|
||||
|
||||
def log(content, path=LOG_PATH, seperater_text=''):
|
||||
log_file = create_log()
|
||||
|
||||
if seperater_text != '':
|
||||
seperater_text = f'\n\n=============={seperater_text}==============\n'
|
||||
|
||||
util.write_file(f'\n{seperater_text + content}', log_file, 'a')
|
||||
|
||||
def clear_log():
|
||||
log_file_name = f'log_{SESSION_TIME}.log'
|
||||
for root, dirs, files in os.walk(LOG_PATH):
|
||||
for file in files:
|
||||
if not file == log_file_name:
|
||||
os.remove(os.path.join(root, file))
|
||||
|
||||
def save_as():
|
||||
# download log file
|
||||
with open(CURRENT_LOG_FILE, 'rb') as f:
|
||||
bytes = f.read()
|
||||
st.download_button(
|
||||
label="📥download log",
|
||||
data=bytes,
|
||||
file_name=f'log_{SESSION_TIME}.txt',
|
||||
mime='text/plain'
|
||||
)
|
||||
|
||||
# sidebar
|
||||
with st.sidebar:
|
||||
st.title('Settings')
|
||||
output_types = st.multiselect('Output Types',['Answer', 'Summary'],default=['Answer'])
|
||||
answer_model = st.selectbox('Answer Model', model_options)
|
||||
if util.contains(output_types, 'Summary'):
|
||||
summary_model = st.selectbox('Summary Model', model_options)
|
||||
|
||||
temp = st.slider('Temperature', 0.0, 1.0, value=0.1)
|
||||
max_tokens = st.slider('Max Tokens', 850, 2500, value=1000)
|
||||
top_p = st.slider('Top_P', 0.0, 1.0, value=1.0)
|
||||
freq_panl = st.slider('Frequency penalty', 0.0, 1.0, value=0.0)
|
||||
pres_panl = st.slider('Presence penalty', 0.0, 1.0, value=0.0)
|
||||
|
||||
chunk_size = st.slider('Chunk Size', 1500, 4500, value=4000)
|
||||
chunk_count = st.slider('Answer Count', 1, 5, value=1)
|
||||
|
||||
if st.button('Clear Log',on_click=clear_log):
|
||||
st.success('Log Cleared')
|
||||
with header:
|
||||
st.title('🧠Seanium Brain')
|
||||
st.text('This is my personal AI powered brain feeding my own Obsidian notes. Ask anything.')
|
||||
|
||||
|
||||
def execute_brain(question):
|
||||
# log question
|
||||
log(f'\n\n\n\n[{str(time.ctime())}] - QUESTION: {question}')
|
||||
|
||||
if check_update.isUpdated():
|
||||
# if brain-info is updated
|
||||
brain.build(chunk_size)
|
||||
st.success('Brain rebuilded!')
|
||||
time.sleep(2)
|
||||
|
||||
# thinking on answer
|
||||
with st.spinner('Thinking on Answer'):
|
||||
answer = brain.run_answer(question, answer_model, temp, max_tokens, top_p, freq_panl, pres_panl, chunk_count=chunk_count)
|
||||
if util.contains(output_types, 'Answer'):
|
||||
# displaying results
|
||||
st.header('💬Answer')
|
||||
st.success(answer)
|
||||
log(answer, seperater_text='ANSWER')
|
||||
|
||||
# thinking on summary
|
||||
if util.contains(output_types, 'Summary'):
|
||||
with st.spinner('Thinking on Summary'):
|
||||
time.sleep(2)
|
||||
summary = brain.run_summary(answer, summary_model, temp, max_tokens, top_p, freq_panl, pres_panl)
|
||||
# displaying results
|
||||
st.header('📃Summary')
|
||||
st.success(summary)
|
||||
log(summary, seperater_text='SUMMARY')
|
||||
|
||||
# main
|
||||
with body:
|
||||
question = st.text_input('Ask Brain: ')
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
with col1:
|
||||
send = st.button('📩Send')
|
||||
with col2:
|
||||
if os.path.exists(CURRENT_LOG_FILE):
|
||||
save_as()
|
||||
|
||||
# execute brain calculation
|
||||
if not question == '' and send:
|
||||
execute_brain(question)
|
@ -0,0 +1,54 @@
|
||||
import openai
|
||||
import textwrap
|
||||
|
||||
from modules import utilities as util
|
||||
from modules import gpt_util as gpt
|
||||
|
||||
openai.api_key = util.read_file(r'.user\API-KEYS.txt').strip()
|
||||
BRAIN_DATA = util.read_json_file(r'.user\brain-data.json')
|
||||
prompt_dir = '.user/prompt'
|
||||
|
||||
def build(chunk_size=4000):
|
||||
all_text = util.read_file(r'.user\input.txt')
|
||||
|
||||
# split text into smaller chunk of 4000 char each
|
||||
chunks = textwrap.wrap(all_text, chunk_size)
|
||||
|
||||
result = []
|
||||
|
||||
for chunk in chunks:
|
||||
embedding = gpt.embedding(chunk.encode(encoding='ASCII', errors='ignore').decode())
|
||||
info = {'content':chunk, 'vector':embedding}
|
||||
print(info, '\n\n\n')
|
||||
result.append(info)
|
||||
|
||||
util.write_json_file(result, r'.user\brain-data.json')
|
||||
|
||||
def run_answer(query, model, temp, max_tokens, top_p, freq_penl, pres_penl, chunk_count):
|
||||
results = gpt.search_chunks(query, BRAIN_DATA, chunk_count)
|
||||
answers = []
|
||||
for result in results:
|
||||
my_info = util.read_file(f'{prompt_dir}/my-info.txt')
|
||||
|
||||
prompt = util.read_file(f'{prompt_dir}/question.txt')
|
||||
prompt = prompt.replace('<<INFO>>', result['content'])
|
||||
prompt = prompt.replace('<<QS>>', query)
|
||||
prompt = prompt.replace('<<MY-INFO>>', my_info)
|
||||
|
||||
answer = gpt.gpt3(prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl)
|
||||
answers.append(answer)
|
||||
|
||||
all_answers = '\n\n'.join(answers)
|
||||
# print('\n\n============ANSWER============\n\n', all_answers)
|
||||
return all_answers
|
||||
|
||||
def run_summary(query, model, temp, max_tokens, top_p, freq_penl, pres_penl):
|
||||
chunks = textwrap.wrap(query, 10000)
|
||||
summaries = []
|
||||
for chunk in chunks:
|
||||
prompt = util.read_file(f'{prompt_dir}/summarize.txt').replace('<<SUM>>', chunk)
|
||||
summary = gpt.gpt3(prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl)
|
||||
summaries.append(summary)
|
||||
all_summary = '\n\n'.join(summaries)
|
||||
# print('\n\n============SUMMRY============\n\n', all_summary)
|
||||
return all_summary
|
@ -0,0 +1,34 @@
|
||||
import os
|
||||
import time
|
||||
from modules import utilities as util
|
||||
|
||||
file_path = r'.user\input.txt'
|
||||
temp_file = r'.user\input_last-run.temp'
|
||||
|
||||
def compare_time(t1, t2):
|
||||
return t1 == t2
|
||||
|
||||
def isUpdated():
|
||||
if os.path.exists(file_path):
|
||||
# get modification time of the file
|
||||
mod_time = os.path.getmtime(file_path)
|
||||
|
||||
# convert the modification time to readable format
|
||||
read_mod_time = time.ctime(mod_time)
|
||||
|
||||
if os.path.exists(temp_file):
|
||||
temp_info = util.read_file(temp_file)
|
||||
if compare_time(read_mod_time, temp_info):
|
||||
print('File has not been updated.')
|
||||
return False
|
||||
else:
|
||||
print('File has been updated.')
|
||||
util.write_file(read_mod_time, temp_file)
|
||||
return True
|
||||
else:
|
||||
print('Temp file not exist, writing temp file...')
|
||||
# write to temp file
|
||||
util.write_file(read_mod_time, temp_file)
|
||||
return False
|
||||
else:
|
||||
raise FileNotFoundError(f'File: {file_path} does not exist.')
|
@ -0,0 +1,42 @@
|
||||
import openai
|
||||
import numpy as np
|
||||
|
||||
# this function compare similarity between two vectors.
|
||||
# The higher value the dot product have, the more alike between these vectors
|
||||
def similarity(v1, v2):
|
||||
return np.dot(v1, v2)
|
||||
|
||||
# return a list of vectors
|
||||
def embedding(content, engine='text-embedding-ada-002'):
|
||||
response = openai.Embedding.create(input=content, engine=engine)
|
||||
vector = response['data'][0]['embedding']
|
||||
return vector
|
||||
|
||||
def search_chunks(text, data, count=1):
|
||||
vector = embedding(text)
|
||||
points = []
|
||||
|
||||
for item in data:
|
||||
# compare search terms with brain-data
|
||||
point = similarity(vector, item['vector'])
|
||||
points.append({
|
||||
'content': item['content'],
|
||||
'point': point
|
||||
})
|
||||
# sort points base on decendent order
|
||||
ordered = sorted(points, key=lambda d: d['point'], reverse=True)
|
||||
|
||||
return ordered[0:count]
|
||||
|
||||
def gpt3(prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl):
|
||||
response = openai.Completion.create(
|
||||
model= model,
|
||||
prompt=prompt,
|
||||
temperature=temp,
|
||||
max_tokens=max_tokens,
|
||||
top_p=top_p,
|
||||
frequency_penalty=freq_penl,
|
||||
presence_penalty=pres_penl
|
||||
)
|
||||
text = response['choices'][0]['text'].strip()
|
||||
return text
|
@ -0,0 +1,99 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
def extract_string(text, delimiter):
|
||||
# Extract string between delimiters
|
||||
start_index = text.index(delimiter) + len(delimiter)
|
||||
end_index = text.index(delimiter, start_index)
|
||||
return text[start_index:end_index]
|
||||
|
||||
|
||||
def extract_string(text, delimiter, force=False):
|
||||
if not delimiter in text:
|
||||
if force:
|
||||
return ''
|
||||
else:
|
||||
return text
|
||||
else:
|
||||
substring = text.split(delimiter)
|
||||
result = []
|
||||
for i in range(1, len(substring), 2):
|
||||
result.append(substring[i])
|
||||
return ''.join(result)
|
||||
|
||||
|
||||
def create_not_exist(path):
|
||||
dir = os.path.dirname(path)
|
||||
if not os.path.exists(dir):
|
||||
os.makedirs(dir)
|
||||
|
||||
def create_file_not_exist(path):
|
||||
if not os.path.exists(path):
|
||||
write_file('', path)
|
||||
|
||||
def read_file(filepath, delimiter='', force=False):
|
||||
with open(filepath, 'r', encoding='utf-8') as file:
|
||||
data = file.read()
|
||||
if delimiter != '':
|
||||
data = extract_string(data, delimiter, force)
|
||||
return data
|
||||
|
||||
|
||||
def read_files(file_dir, delimiter='', force=False):
|
||||
contents = []
|
||||
|
||||
# Read all files in a directory
|
||||
for root, dirs, files in os.walk(file_dir):
|
||||
for file in files:
|
||||
# extract file path
|
||||
filepath = os.path.join(root, file)
|
||||
# extract filename with extension
|
||||
filename = os.path.basename(filepath)
|
||||
# extract filename without extension
|
||||
filename = os.path.splitext(filename)[0]
|
||||
file_data = read_file(filepath, delimiter, force)
|
||||
if force and file_data == '':
|
||||
continue
|
||||
|
||||
content = [f'[{filename}]', file_data]
|
||||
contents.append('\n\n'.join(content))
|
||||
|
||||
result = '\n\n\n\n'.join(contents)
|
||||
return result
|
||||
|
||||
def write_file(content, filepath, mode='w'):
|
||||
create_not_exist(filepath)
|
||||
with open(filepath, mode, encoding='utf-8') as file:
|
||||
file.write(content)
|
||||
|
||||
def create_json_not_exist(filepath, initial_value={}):
|
||||
if not os.path.exists(filepath):
|
||||
write_json_file(initial_value, filepath)
|
||||
|
||||
def write_json_file(content, filepath, mode='w'):
|
||||
with open(filepath, mode) as file:
|
||||
json.dump(content, file, indent=2)
|
||||
|
||||
|
||||
def read_json_file(filepath):
|
||||
with open(filepath, 'r') as file:
|
||||
return json.load(file)
|
||||
|
||||
def read_json_at(filepath, key):
|
||||
data = read_json_file(filepath)
|
||||
if data[key] == 'True' or data[key] == 'true':
|
||||
return True
|
||||
elif data[key] == 'False' or data[key] == 'false':
|
||||
return False
|
||||
else:
|
||||
return data[key]
|
||||
|
||||
def update_json(filepath, key, value):
|
||||
data = read_json_file(filepath)
|
||||
data[key] = value
|
||||
write_json_file(data, filepath)
|
||||
|
||||
|
||||
def contains(list, item):
|
||||
result = list.count(item)
|
||||
return result > 0
|
@ -0,0 +1,113 @@
|
||||
import streamlit as st
|
||||
import os
|
||||
from modules import utilities as util
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
|
||||
st.set_page_config(
|
||||
page_title='Configs'
|
||||
)
|
||||
|
||||
body = st.container()
|
||||
|
||||
user_dir = '.user/'
|
||||
prompt_dir = f'{user_dir}prompt/'
|
||||
brain_memo = f'{user_dir}brain_memo.json'
|
||||
|
||||
|
||||
def save(content, path, page=''):
|
||||
save_but = st.button('💾Save')
|
||||
if save_but:
|
||||
util.write_file(content, path)
|
||||
st.success(f'✅File saved!')
|
||||
# write to json file
|
||||
if page == '💽Brain Memory':
|
||||
util.update_json(brain_memo, 'delimiter', delimiter)
|
||||
util.update_json(brain_memo, 'append_mode', append_mode)
|
||||
util.update_json(brain_memo, 'force_mode', force_delimiter)
|
||||
|
||||
def select_directory():
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
# make sure the dialoge is on top of the main window
|
||||
root.attributes('-topmost', True)
|
||||
directory = filedialog.askdirectory(initialdir=os.getcwd(), title='Select Note Directory', master=root)
|
||||
return directory
|
||||
|
||||
with st.sidebar:
|
||||
st.title('Settings')
|
||||
menu = st.radio('Menu', [
|
||||
'📝Prompts',
|
||||
'💽Brain Memory',
|
||||
'🔑API Keys'
|
||||
])
|
||||
|
||||
with body:
|
||||
match menu:
|
||||
case '📝Prompts':
|
||||
st.title('📝Prompts')
|
||||
st.text('Configuration of prompts.')
|
||||
selected_file = st.selectbox('Prompt File', os.listdir(prompt_dir))
|
||||
selected_path = prompt_dir + selected_file
|
||||
mod_text = st.text_area('Prompts',value=util.read_file(selected_path), height=500)
|
||||
save(mod_text, selected_path)
|
||||
|
||||
case '💽Brain Memory':
|
||||
st.title('💽Brain Memory')
|
||||
st.text('Modify your brain knowledge base.')
|
||||
memory_data = util.read_file(f'{user_dir}input.txt')
|
||||
|
||||
note_dir = ''
|
||||
|
||||
util.create_json_not_exist(brain_memo, {'note_dir': '', 'delimiter': '', 'append_mode': 'False', 'force_mode': 'False'})
|
||||
util.create_file_not_exist(f'{user_dir}note_dir_info.txt')
|
||||
util.create_file_not_exist(f'{user_dir}brain_mem_sig.temp')
|
||||
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
st.button('🔄Refresh')
|
||||
with col2:
|
||||
if st.button('📁Select Note Directory'):
|
||||
note_dir = select_directory()
|
||||
util.update_json(brain_memo, 'note_dir', note_dir)
|
||||
note_dir = st.text_input('Note Directory', value=util.read_json_at(brain_memo, 'note_dir'), placeholder='Select Note Directory', key='note_dir')
|
||||
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
delimiter_memo = util.read_json_at(brain_memo, 'delimiter')
|
||||
delimiter = st.text_input('Delimiter', delimiter_memo, placeholder='e.g. +++')
|
||||
|
||||
with col2:
|
||||
append_mode = st.checkbox('Append Mode', value=util.read_json_at(brain_memo, 'append_mode'))
|
||||
|
||||
force_delimiter = st.checkbox('Force Delimiter', value=util.read_json_at(brain_memo, 'force_mode'))
|
||||
|
||||
|
||||
# if note directory is selected
|
||||
if note_dir != '':
|
||||
|
||||
note_data = util.read_files(note_dir, delimiter, force_delimiter)
|
||||
if append_mode:
|
||||
memory_data += note_data
|
||||
else:
|
||||
memory_data = note_data
|
||||
|
||||
# if st.button('📝Update Brain Memory') or util.read_json_at(brain_memo, 'save') == 'true':
|
||||
# util.update_json(brain_memo, 'save', 'true')
|
||||
# util.update_json(brain_memo, 'delimiter', delimiter)
|
||||
# note_data = util.read_files(note_dir, delimiter, force_delimiter)
|
||||
# if append_mode:
|
||||
# memory_data += note_data
|
||||
# else:
|
||||
# memory_data = note_data
|
||||
|
||||
mod_text = st.text_area('Raw Memory Inputs', value=memory_data, height=500)
|
||||
save(mod_text, f'{user_dir}input.txt', '💽Brain Memory')
|
||||
|
||||
case '🔑API Keys':
|
||||
st.title('🔑API Keys')
|
||||
st.text('Configure your OpenAI API keys.')
|
||||
mod_text = st.text_input('API Keys', value=util.read_file(f'{user_dir}API-KEYS.txt'))
|
||||
save(mod_text, f'{user_dir}API-KEYS.txt')
|
||||
|
||||
|
Loading…
Reference in New Issue