mirror of https://github.com/sean1832/GPT-Brain
Initial commit
commit
577db13cf2
@ -0,0 +1,2 @@
|
|||||||
|
# Auto detect text files and perform LF normalization
|
||||||
|
* text=auto
|
@ -0,0 +1,160 @@
|
|||||||
|
.user/
|
||||||
|
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
# vsCode
|
||||||
|
.vscode/
|
||||||
|
|
||||||
|
# test folder
|
||||||
|
.test/
|
@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2023 sean1832
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
@ -0,0 +1,21 @@
|
|||||||
|
# Auto Setup
|
||||||
|
### What you need
|
||||||
|
- Install python `3.11`
|
||||||
|
- OpenAI API keys
|
||||||
|
|
||||||
|
### .bat file
|
||||||
|
1. Run `setup.bat`
|
||||||
|
2. Enter OpenAI API Key
|
||||||
|
|
||||||
|
# Manual Setup
|
||||||
|
### Python
|
||||||
|
1. Make sure to install python `3.11`
|
||||||
|
1. Create venv using `python -m venv venv` under root project root directory
|
||||||
|
2. Enter venv using `venv\Scripts\activate`
|
||||||
|
3. Update pip by using `python -m pip install --upgrade pip`
|
||||||
|
4. Installing required libraries using `pip3 install -r requirement.txt`
|
||||||
|
|
||||||
|
### API Key file
|
||||||
|
1. Create API Key file using cmd with command `if not exist .user\ (md .user\) & echo [YOUR API KEYS]> .user\API-KEYS.txt`
|
||||||
|
|
||||||
|
|
@ -0,0 +1,16 @@
|
|||||||
|
@echo off
|
||||||
|
cd..
|
||||||
|
echo Activating Virtural environment...
|
||||||
|
call .\venv\Scripts\activate
|
||||||
|
|
||||||
|
echo upgrading pip...
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
|
||||||
|
|
||||||
|
echo Installing pipreqs...
|
||||||
|
pip install pipreqs
|
||||||
|
|
||||||
|
echo Export to requirements.txt
|
||||||
|
pipreqs . --force --encoding utf-8
|
||||||
|
|
||||||
|
pause
|
@ -0,0 +1,8 @@
|
|||||||
|
@echo off
|
||||||
|
cd..
|
||||||
|
echo Activating Virtural environment...
|
||||||
|
call .\venv\Scripts\activate
|
||||||
|
|
||||||
|
echo building brain...
|
||||||
|
python build-brain.py
|
||||||
|
echo complete building brain!
|
@ -0,0 +1,73 @@
|
|||||||
|
import openai
|
||||||
|
import numpy as np
|
||||||
|
import textwrap
|
||||||
|
import utilities
|
||||||
|
|
||||||
|
openai.api_key = utilities.open_file(r'.user\API-KEYS.txt').strip()
|
||||||
|
BRAIN_DATA = utilities.read_json_file(r'.user\brain-data.json')
|
||||||
|
|
||||||
|
# this function compare similarity between two vectors.
|
||||||
|
# The higher value the dot product have, the more alike between these vectors
|
||||||
|
def similarity(v1, v2):
|
||||||
|
return np.dot(v1, v2)
|
||||||
|
|
||||||
|
def search_chunks(text, data, count=1):
|
||||||
|
vector = utilities.embedding(text)
|
||||||
|
points = []
|
||||||
|
|
||||||
|
for item in data:
|
||||||
|
# compare search terms with brain-data
|
||||||
|
point = similarity(vector, item['vector'])
|
||||||
|
points.append({
|
||||||
|
'content': item['content'],
|
||||||
|
'point': point
|
||||||
|
})
|
||||||
|
# sort points base on decendent order
|
||||||
|
ordered = sorted(points, key=lambda d: d['point'], reverse=True)
|
||||||
|
|
||||||
|
return ordered[0:count]
|
||||||
|
|
||||||
|
def gpt3(prompt, model='text-davinci-003'):
|
||||||
|
response = openai.Completion.create(
|
||||||
|
model= model,
|
||||||
|
prompt=prompt,
|
||||||
|
temperature=0.1,
|
||||||
|
max_tokens=1000,
|
||||||
|
top_p=1,
|
||||||
|
frequency_penalty=0,
|
||||||
|
presence_penalty=0
|
||||||
|
)
|
||||||
|
text = response['choices'][0]['text'].strip()
|
||||||
|
return text
|
||||||
|
|
||||||
|
def main():
|
||||||
|
while True:
|
||||||
|
|
||||||
|
query = input('\n\nAsk brain: ')
|
||||||
|
results = search_chunks(query, BRAIN_DATA)
|
||||||
|
answers = []
|
||||||
|
answers_count = 0
|
||||||
|
for result in results:
|
||||||
|
my_info = utilities.open_file(r'prompt\my-info.txt')
|
||||||
|
|
||||||
|
prompt = utilities.open_file(r'prompt\question.txt')
|
||||||
|
prompt = prompt.replace('<<INFO>>', result['content'])
|
||||||
|
prompt = prompt.replace('<<QS>>', query)
|
||||||
|
prompt = prompt.replace('<<MY-INFO>>', my_info)
|
||||||
|
|
||||||
|
answer = gpt3(prompt, model='text-davinci-003')
|
||||||
|
answers.append(answer)
|
||||||
|
answers_count += 1
|
||||||
|
|
||||||
|
all_answers = '\n\n'.join(answers)
|
||||||
|
print('\n\n============ANSWER============\n\n', all_answers)
|
||||||
|
|
||||||
|
chunks = textwrap.wrap(all_answers, 10000)
|
||||||
|
end = []
|
||||||
|
for chunk in chunks:
|
||||||
|
prompt = utilities.open_file(r'prompt\summarize.txt').replace('<<SUM>>', chunk)
|
||||||
|
summary = gpt3(prompt, model='text-curie-001')
|
||||||
|
end.append(summary)
|
||||||
|
print('\n\n============SUMMRY============\n\n', '\n\n'.join(end))
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -0,0 +1,25 @@
|
|||||||
|
import openai
|
||||||
|
import textwrap
|
||||||
|
import utilities
|
||||||
|
|
||||||
|
|
||||||
|
openai.api_key = utilities.open_file(r'.user\API-KEYS.txt').strip()
|
||||||
|
|
||||||
|
def main():
|
||||||
|
all_text = utilities.open_file(r'.user\input.txt')
|
||||||
|
|
||||||
|
# split text into smaller chunk of 4000 char each
|
||||||
|
chunks = textwrap.wrap(all_text, 4000)
|
||||||
|
|
||||||
|
result = []
|
||||||
|
|
||||||
|
for chunk in chunks:
|
||||||
|
embedding = utilities.embedding(chunk.encode(encoding='ASCII', errors='ignore').decode())
|
||||||
|
info = {'content':chunk, 'vector':embedding}
|
||||||
|
print(info, '\n\n\n')
|
||||||
|
result.append(info)
|
||||||
|
|
||||||
|
utilities.write_json_file(result, r'.user\brain-data.json')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -0,0 +1,44 @@
|
|||||||
|
import os
|
||||||
|
import time
|
||||||
|
import utilities
|
||||||
|
|
||||||
|
file_path = r'.user\input.txt'
|
||||||
|
temp_file = r'.user\input_last-run.temp'
|
||||||
|
sig_file = r'.user\input_sig.temp'
|
||||||
|
|
||||||
|
def compare_time(t1, t2):
|
||||||
|
return t1 == t2
|
||||||
|
|
||||||
|
def write_sig(bool):
|
||||||
|
utilities.write_file(bool, sig_file)
|
||||||
|
|
||||||
|
def check():
|
||||||
|
if os.path.exists(file_path):
|
||||||
|
# get modification time of the file
|
||||||
|
mod_time = os.path.getmtime(file_path)
|
||||||
|
|
||||||
|
# convert the modification time to readable format
|
||||||
|
read_mod_time = time.ctime(mod_time)
|
||||||
|
|
||||||
|
if os.path.exists(temp_file):
|
||||||
|
temp_info = utilities.open_file(temp_file)
|
||||||
|
if compare_time(read_mod_time, temp_info):
|
||||||
|
write_sig('not updated')
|
||||||
|
print('File has not been updated.')
|
||||||
|
else:
|
||||||
|
print('File has been updated.')
|
||||||
|
utilities.write_file(read_mod_time, temp_file)
|
||||||
|
write_sig('updated')
|
||||||
|
else:
|
||||||
|
print('Temp file not exist, writing temp file...')
|
||||||
|
# write to temp file
|
||||||
|
utilities.write_file(read_mod_time, temp_file)
|
||||||
|
write_sig('not updated')
|
||||||
|
else:
|
||||||
|
raise FileNotFoundError(f'File: {file_path} does not exist.')
|
||||||
|
|
||||||
|
def main():
|
||||||
|
check()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -0,0 +1,27 @@
|
|||||||
|
@echo off
|
||||||
|
cd..
|
||||||
|
echo Activating Virtural environment...
|
||||||
|
call .\venv\Scripts\activate
|
||||||
|
|
||||||
|
rem checking if input.txt is updated
|
||||||
|
python console_app\check_update.py
|
||||||
|
|
||||||
|
setlocal enabledelayedexpansion
|
||||||
|
set "tempFile=.user\input_sig.temp"
|
||||||
|
|
||||||
|
for /f "usebackq delims=" %%a in ("%tempFile%") do (
|
||||||
|
set "tempValue=%%a"
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%tempValue%" == "not updated" (
|
||||||
|
goto end
|
||||||
|
) else (
|
||||||
|
call batch-programs\run-build-brain.bat
|
||||||
|
cls
|
||||||
|
echo Brain updated!
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
:end
|
||||||
|
echo running brain...
|
||||||
|
python console_app\brain.py
|
@ -0,0 +1,24 @@
|
|||||||
|
import json
|
||||||
|
import openai
|
||||||
|
|
||||||
|
def open_file(filepath):
|
||||||
|
with open(filepath, 'r', encoding='utf-8') as file:
|
||||||
|
return file.read()
|
||||||
|
|
||||||
|
def write_file(content, filepath):
|
||||||
|
with open(filepath, 'w') as file:
|
||||||
|
file.write(content)
|
||||||
|
|
||||||
|
def write_json_file(content, filepath):
|
||||||
|
with open(filepath, 'w') as file:
|
||||||
|
json.dump(content, file, indent=2)
|
||||||
|
|
||||||
|
def read_json_file(filepath):
|
||||||
|
with open(filepath, 'r') as file:
|
||||||
|
return json.load(file)
|
||||||
|
|
||||||
|
# return a list of vectors
|
||||||
|
def embedding(content, engine='text-embedding-ada-002'):
|
||||||
|
response = openai.Embedding.create(input=content, engine=engine)
|
||||||
|
vector = response['data'][0]['embedding']
|
||||||
|
return vector
|
@ -0,0 +1,2 @@
|
|||||||
|
My name is Zeke Zhang.
|
||||||
|
I am an architecture academic.
|
@ -0,0 +1,3 @@
|
|||||||
|
numpy==1.24.1
|
||||||
|
openai==0.26.4
|
||||||
|
streamlit==1.17.0
|
@ -0,0 +1,5 @@
|
|||||||
|
@echo off
|
||||||
|
echo Activating Virtural environment...
|
||||||
|
call .\venv\Scripts\activate
|
||||||
|
|
||||||
|
streamlit run web_ui/Seanium_Brain.py
|
@ -0,0 +1,45 @@
|
|||||||
|
@echo off
|
||||||
|
echo Creating Virtural environment folder...
|
||||||
|
|
||||||
|
python -m venv venv
|
||||||
|
echo Virtural environment created successfully!
|
||||||
|
ping 127.0.0.1 -n 2 > NUL
|
||||||
|
|
||||||
|
|
||||||
|
echo Activating Virtural environment!
|
||||||
|
call .\venv\Scripts\activate
|
||||||
|
|
||||||
|
echo updating pip
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
|
||||||
|
|
||||||
|
pip3 install -r requirements.txt
|
||||||
|
ping 127.0.0.1 -n 2 > NUL
|
||||||
|
echo Virtual requirements installed successfully!
|
||||||
|
cls
|
||||||
|
|
||||||
|
echo Creating OpenAI API keys profile...
|
||||||
|
REM if .user\ not exist, create one
|
||||||
|
if not exist .user\ (md .user\)
|
||||||
|
|
||||||
|
REM Create API KEY file
|
||||||
|
set /p API_KEYS=[Enter your API keys]:
|
||||||
|
echo %API_KEYS%> .user\API-KEYS.txt
|
||||||
|
echo API key written to file!
|
||||||
|
|
||||||
|
REM copy example prompt
|
||||||
|
if not exist .user\prompt (md .user\prompt)
|
||||||
|
xcopy "example_prompt\*.*" ".user\prompt" /s /i
|
||||||
|
|
||||||
|
REM wait 2 tick
|
||||||
|
ping 127.0.0.1 -n 2 > NUL
|
||||||
|
|
||||||
|
REM create input txt file
|
||||||
|
echo.> .user\input.txt
|
||||||
|
echo input file created!
|
||||||
|
|
||||||
|
echo Setup complete! Exiting...
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
pause
|
@ -0,0 +1,123 @@
|
|||||||
|
import streamlit as st
|
||||||
|
from modules import utilities as util
|
||||||
|
import brain
|
||||||
|
import check_update
|
||||||
|
import time
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# activate session
|
||||||
|
if 'SESSION_TIME' not in st.session_state:
|
||||||
|
st.session_state['SESSION_TIME'] = time.strftime("%Y%m%d-%H%H%S")
|
||||||
|
|
||||||
|
st.set_page_config(
|
||||||
|
page_title='Seanium Brain'
|
||||||
|
)
|
||||||
|
|
||||||
|
model_options = ['text-davinci-003', 'text-curie-001','text-babbage-001','text-ada-001']
|
||||||
|
header = st.container()
|
||||||
|
body = st.container()
|
||||||
|
LOG_PATH = '.user/log'
|
||||||
|
SESSION_TIME = st.session_state['SESSION_TIME']
|
||||||
|
CURRENT_LOG_FILE = f'{LOG_PATH}/log_{SESSION_TIME}.log'
|
||||||
|
|
||||||
|
|
||||||
|
def create_log():
|
||||||
|
if not os.path.exists(CURRENT_LOG_FILE):
|
||||||
|
util.write_file(f'Session {SESSION_TIME}\n\n', CURRENT_LOG_FILE)
|
||||||
|
return CURRENT_LOG_FILE
|
||||||
|
|
||||||
|
def log(content, path=LOG_PATH, seperater_text=''):
|
||||||
|
log_file = create_log()
|
||||||
|
|
||||||
|
if seperater_text != '':
|
||||||
|
seperater_text = f'\n\n=============={seperater_text}==============\n'
|
||||||
|
|
||||||
|
util.write_file(f'\n{seperater_text + content}', log_file, 'a')
|
||||||
|
|
||||||
|
def clear_log():
|
||||||
|
log_file_name = f'log_{SESSION_TIME}.log'
|
||||||
|
for root, dirs, files in os.walk(LOG_PATH):
|
||||||
|
for file in files:
|
||||||
|
if not file == log_file_name:
|
||||||
|
os.remove(os.path.join(root, file))
|
||||||
|
|
||||||
|
def save_as():
|
||||||
|
# download log file
|
||||||
|
with open(CURRENT_LOG_FILE, 'rb') as f:
|
||||||
|
bytes = f.read()
|
||||||
|
st.download_button(
|
||||||
|
label="📥download log",
|
||||||
|
data=bytes,
|
||||||
|
file_name=f'log_{SESSION_TIME}.txt',
|
||||||
|
mime='text/plain'
|
||||||
|
)
|
||||||
|
|
||||||
|
# sidebar
|
||||||
|
with st.sidebar:
|
||||||
|
st.title('Settings')
|
||||||
|
output_types = st.multiselect('Output Types',['Answer', 'Summary'],default=['Answer'])
|
||||||
|
answer_model = st.selectbox('Answer Model', model_options)
|
||||||
|
if util.contains(output_types, 'Summary'):
|
||||||
|
summary_model = st.selectbox('Summary Model', model_options)
|
||||||
|
|
||||||
|
temp = st.slider('Temperature', 0.0, 1.0, value=0.1)
|
||||||
|
max_tokens = st.slider('Max Tokens', 850, 2500, value=1000)
|
||||||
|
top_p = st.slider('Top_P', 0.0, 1.0, value=1.0)
|
||||||
|
freq_panl = st.slider('Frequency penalty', 0.0, 1.0, value=0.0)
|
||||||
|
pres_panl = st.slider('Presence penalty', 0.0, 1.0, value=0.0)
|
||||||
|
|
||||||
|
chunk_size = st.slider('Chunk Size', 1500, 4500, value=4000)
|
||||||
|
chunk_count = st.slider('Answer Count', 1, 5, value=1)
|
||||||
|
|
||||||
|
if st.button('Clear Log',on_click=clear_log):
|
||||||
|
st.success('Log Cleared')
|
||||||
|
with header:
|
||||||
|
st.title('🧠Seanium Brain')
|
||||||
|
st.text('This is my personal AI powered brain feeding my own Obsidian notes. Ask anything.')
|
||||||
|
|
||||||
|
|
||||||
|
def execute_brain(question):
|
||||||
|
# log question
|
||||||
|
log(f'\n\n\n\n[{str(time.ctime())}] - QUESTION: {question}')
|
||||||
|
|
||||||
|
if check_update.isUpdated():
|
||||||
|
# if brain-info is updated
|
||||||
|
brain.build(chunk_size)
|
||||||
|
st.success('Brain rebuilded!')
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
# thinking on answer
|
||||||
|
with st.spinner('Thinking on Answer'):
|
||||||
|
answer = brain.run_answer(question, answer_model, temp, max_tokens, top_p, freq_panl, pres_panl, chunk_count=chunk_count)
|
||||||
|
if util.contains(output_types, 'Answer'):
|
||||||
|
# displaying results
|
||||||
|
st.header('💬Answer')
|
||||||
|
st.success(answer)
|
||||||
|
log(answer, seperater_text='ANSWER')
|
||||||
|
|
||||||
|
# thinking on summary
|
||||||
|
if util.contains(output_types, 'Summary'):
|
||||||
|
with st.spinner('Thinking on Summary'):
|
||||||
|
time.sleep(2)
|
||||||
|
summary = brain.run_summary(answer, summary_model, temp, max_tokens, top_p, freq_panl, pres_panl)
|
||||||
|
# displaying results
|
||||||
|
st.header('📃Summary')
|
||||||
|
st.success(summary)
|
||||||
|
log(summary, seperater_text='SUMMARY')
|
||||||
|
|
||||||
|
# main
|
||||||
|
with body:
|
||||||
|
question = st.text_input('Ask Brain: ')
|
||||||
|
col1, col2, col3, col4 = st.columns(4)
|
||||||
|
with col1:
|
||||||
|
send = st.button('📩Send')
|
||||||
|
with col2:
|
||||||
|
if os.path.exists(CURRENT_LOG_FILE):
|
||||||
|
save_as()
|
||||||
|
|
||||||
|
# execute brain calculation
|
||||||
|
if not question == '' and send:
|
||||||
|
execute_brain(question)
|
@ -0,0 +1,54 @@
|
|||||||
|
import openai
|
||||||
|
import textwrap
|
||||||
|
|
||||||
|
from modules import utilities as util
|
||||||
|
from modules import gpt_util as gpt
|
||||||
|
|
||||||
|
openai.api_key = util.read_file(r'.user\API-KEYS.txt').strip()
|
||||||
|
BRAIN_DATA = util.read_json_file(r'.user\brain-data.json')
|
||||||
|
prompt_dir = '.user/prompt'
|
||||||
|
|
||||||
|
def build(chunk_size=4000):
|
||||||
|
all_text = util.read_file(r'.user\input.txt')
|
||||||
|
|
||||||
|
# split text into smaller chunk of 4000 char each
|
||||||
|
chunks = textwrap.wrap(all_text, chunk_size)
|
||||||
|
|
||||||
|
result = []
|
||||||
|
|
||||||
|
for chunk in chunks:
|
||||||
|
embedding = gpt.embedding(chunk.encode(encoding='ASCII', errors='ignore').decode())
|
||||||
|
info = {'content':chunk, 'vector':embedding}
|
||||||
|
print(info, '\n\n\n')
|
||||||
|
result.append(info)
|
||||||
|
|
||||||
|
util.write_json_file(result, r'.user\brain-data.json')
|
||||||
|
|
||||||
|
def run_answer(query, model, temp, max_tokens, top_p, freq_penl, pres_penl, chunk_count):
|
||||||
|
results = gpt.search_chunks(query, BRAIN_DATA, chunk_count)
|
||||||
|
answers = []
|
||||||
|
for result in results:
|
||||||
|
my_info = util.read_file(f'{prompt_dir}/my-info.txt')
|
||||||
|
|
||||||
|
prompt = util.read_file(f'{prompt_dir}/question.txt')
|
||||||
|
prompt = prompt.replace('<<INFO>>', result['content'])
|
||||||
|
prompt = prompt.replace('<<QS>>', query)
|
||||||
|
prompt = prompt.replace('<<MY-INFO>>', my_info)
|
||||||
|
|
||||||
|
answer = gpt.gpt3(prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl)
|
||||||
|
answers.append(answer)
|
||||||
|
|
||||||
|
all_answers = '\n\n'.join(answers)
|
||||||
|
# print('\n\n============ANSWER============\n\n', all_answers)
|
||||||
|
return all_answers
|
||||||
|
|
||||||
|
def run_summary(query, model, temp, max_tokens, top_p, freq_penl, pres_penl):
|
||||||
|
chunks = textwrap.wrap(query, 10000)
|
||||||
|
summaries = []
|
||||||
|
for chunk in chunks:
|
||||||
|
prompt = util.read_file(f'{prompt_dir}/summarize.txt').replace('<<SUM>>', chunk)
|
||||||
|
summary = gpt.gpt3(prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl)
|
||||||
|
summaries.append(summary)
|
||||||
|
all_summary = '\n\n'.join(summaries)
|
||||||
|
# print('\n\n============SUMMRY============\n\n', all_summary)
|
||||||
|
return all_summary
|
@ -0,0 +1,34 @@
|
|||||||
|
import os
|
||||||
|
import time
|
||||||
|
from modules import utilities as util
|
||||||
|
|
||||||
|
file_path = r'.user\input.txt'
|
||||||
|
temp_file = r'.user\input_last-run.temp'
|
||||||
|
|
||||||
|
def compare_time(t1, t2):
|
||||||
|
return t1 == t2
|
||||||
|
|
||||||
|
def isUpdated():
|
||||||
|
if os.path.exists(file_path):
|
||||||
|
# get modification time of the file
|
||||||
|
mod_time = os.path.getmtime(file_path)
|
||||||
|
|
||||||
|
# convert the modification time to readable format
|
||||||
|
read_mod_time = time.ctime(mod_time)
|
||||||
|
|
||||||
|
if os.path.exists(temp_file):
|
||||||
|
temp_info = util.read_file(temp_file)
|
||||||
|
if compare_time(read_mod_time, temp_info):
|
||||||
|
print('File has not been updated.')
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
print('File has been updated.')
|
||||||
|
util.write_file(read_mod_time, temp_file)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print('Temp file not exist, writing temp file...')
|
||||||
|
# write to temp file
|
||||||
|
util.write_file(read_mod_time, temp_file)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
raise FileNotFoundError(f'File: {file_path} does not exist.')
|
@ -0,0 +1,42 @@
|
|||||||
|
import openai
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# this function compare similarity between two vectors.
|
||||||
|
# The higher value the dot product have, the more alike between these vectors
|
||||||
|
def similarity(v1, v2):
|
||||||
|
return np.dot(v1, v2)
|
||||||
|
|
||||||
|
# return a list of vectors
|
||||||
|
def embedding(content, engine='text-embedding-ada-002'):
|
||||||
|
response = openai.Embedding.create(input=content, engine=engine)
|
||||||
|
vector = response['data'][0]['embedding']
|
||||||
|
return vector
|
||||||
|
|
||||||
|
def search_chunks(text, data, count=1):
|
||||||
|
vector = embedding(text)
|
||||||
|
points = []
|
||||||
|
|
||||||
|
for item in data:
|
||||||
|
# compare search terms with brain-data
|
||||||
|
point = similarity(vector, item['vector'])
|
||||||
|
points.append({
|
||||||
|
'content': item['content'],
|
||||||
|
'point': point
|
||||||
|
})
|
||||||
|
# sort points base on decendent order
|
||||||
|
ordered = sorted(points, key=lambda d: d['point'], reverse=True)
|
||||||
|
|
||||||
|
return ordered[0:count]
|
||||||
|
|
||||||
|
def gpt3(prompt, model, temp, max_tokens, top_p, freq_penl, pres_penl):
|
||||||
|
response = openai.Completion.create(
|
||||||
|
model= model,
|
||||||
|
prompt=prompt,
|
||||||
|
temperature=temp,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
top_p=top_p,
|
||||||
|
frequency_penalty=freq_penl,
|
||||||
|
presence_penalty=pres_penl
|
||||||
|
)
|
||||||
|
text = response['choices'][0]['text'].strip()
|
||||||
|
return text
|
@ -0,0 +1,99 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
def extract_string(text, delimiter):
|
||||||
|
# Extract string between delimiters
|
||||||
|
start_index = text.index(delimiter) + len(delimiter)
|
||||||
|
end_index = text.index(delimiter, start_index)
|
||||||
|
return text[start_index:end_index]
|
||||||
|
|
||||||
|
|
||||||
|
def extract_string(text, delimiter, force=False):
|
||||||
|
if not delimiter in text:
|
||||||
|
if force:
|
||||||
|
return ''
|
||||||
|
else:
|
||||||
|
return text
|
||||||
|
else:
|
||||||
|
substring = text.split(delimiter)
|
||||||
|
result = []
|
||||||
|
for i in range(1, len(substring), 2):
|
||||||
|
result.append(substring[i])
|
||||||
|
return ''.join(result)
|
||||||
|
|
||||||
|
|
||||||
|
def create_not_exist(path):
|
||||||
|
dir = os.path.dirname(path)
|
||||||
|
if not os.path.exists(dir):
|
||||||
|
os.makedirs(dir)
|
||||||
|
|
||||||
|
def create_file_not_exist(path):
|
||||||
|
if not os.path.exists(path):
|
||||||
|
write_file('', path)
|
||||||
|
|
||||||
|
def read_file(filepath, delimiter='', force=False):
|
||||||
|
with open(filepath, 'r', encoding='utf-8') as file:
|
||||||
|
data = file.read()
|
||||||
|
if delimiter != '':
|
||||||
|
data = extract_string(data, delimiter, force)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def read_files(file_dir, delimiter='', force=False):
|
||||||
|
contents = []
|
||||||
|
|
||||||
|
# Read all files in a directory
|
||||||
|
for root, dirs, files in os.walk(file_dir):
|
||||||
|
for file in files:
|
||||||
|
# extract file path
|
||||||
|
filepath = os.path.join(root, file)
|
||||||
|
# extract filename with extension
|
||||||
|
filename = os.path.basename(filepath)
|
||||||
|
# extract filename without extension
|
||||||
|
filename = os.path.splitext(filename)[0]
|
||||||
|
file_data = read_file(filepath, delimiter, force)
|
||||||
|
if force and file_data == '':
|
||||||
|
continue
|
||||||
|
|
||||||
|
content = [f'[{filename}]', file_data]
|
||||||
|
contents.append('\n\n'.join(content))
|
||||||
|
|
||||||
|
result = '\n\n\n\n'.join(contents)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def write_file(content, filepath, mode='w'):
|
||||||
|
create_not_exist(filepath)
|
||||||
|
with open(filepath, mode, encoding='utf-8') as file:
|
||||||
|
file.write(content)
|
||||||
|
|
||||||
|
def create_json_not_exist(filepath, initial_value={}):
|
||||||
|
if not os.path.exists(filepath):
|
||||||
|
write_json_file(initial_value, filepath)
|
||||||
|
|
||||||
|
def write_json_file(content, filepath, mode='w'):
|
||||||
|
with open(filepath, mode) as file:
|
||||||
|
json.dump(content, file, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
def read_json_file(filepath):
|
||||||
|
with open(filepath, 'r') as file:
|
||||||
|
return json.load(file)
|
||||||
|
|
||||||
|
def read_json_at(filepath, key):
|
||||||
|
data = read_json_file(filepath)
|
||||||
|
if data[key] == 'True' or data[key] == 'true':
|
||||||
|
return True
|
||||||
|
elif data[key] == 'False' or data[key] == 'false':
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return data[key]
|
||||||
|
|
||||||
|
def update_json(filepath, key, value):
|
||||||
|
data = read_json_file(filepath)
|
||||||
|
data[key] = value
|
||||||
|
write_json_file(data, filepath)
|
||||||
|
|
||||||
|
|
||||||
|
def contains(list, item):
|
||||||
|
result = list.count(item)
|
||||||
|
return result > 0
|
@ -0,0 +1,113 @@
|
|||||||
|
import streamlit as st
|
||||||
|
import os
|
||||||
|
from modules import utilities as util
|
||||||
|
import tkinter as tk
|
||||||
|
from tkinter import filedialog
|
||||||
|
|
||||||
|
st.set_page_config(
|
||||||
|
page_title='Configs'
|
||||||
|
)
|
||||||
|
|
||||||
|
body = st.container()
|
||||||
|
|
||||||
|
user_dir = '.user/'
|
||||||
|
prompt_dir = f'{user_dir}prompt/'
|
||||||
|
brain_memo = f'{user_dir}brain_memo.json'
|
||||||
|
|
||||||
|
|
||||||
|
def save(content, path, page=''):
|
||||||
|
save_but = st.button('💾Save')
|
||||||
|
if save_but:
|
||||||
|
util.write_file(content, path)
|
||||||
|
st.success(f'✅File saved!')
|
||||||
|
# write to json file
|
||||||
|
if page == '💽Brain Memory':
|
||||||
|
util.update_json(brain_memo, 'delimiter', delimiter)
|
||||||
|
util.update_json(brain_memo, 'append_mode', append_mode)
|
||||||
|
util.update_json(brain_memo, 'force_mode', force_delimiter)
|
||||||
|
|
||||||
|
def select_directory():
|
||||||
|
root = tk.Tk()
|
||||||
|
root.withdraw()
|
||||||
|
# make sure the dialoge is on top of the main window
|
||||||
|
root.attributes('-topmost', True)
|
||||||
|
directory = filedialog.askdirectory(initialdir=os.getcwd(), title='Select Note Directory', master=root)
|
||||||
|
return directory
|
||||||
|
|
||||||
|
with st.sidebar:
|
||||||
|
st.title('Settings')
|
||||||
|
menu = st.radio('Menu', [
|
||||||
|
'📝Prompts',
|
||||||
|
'💽Brain Memory',
|
||||||
|
'🔑API Keys'
|
||||||
|
])
|
||||||
|
|
||||||
|
with body:
|
||||||
|
match menu:
|
||||||
|
case '📝Prompts':
|
||||||
|
st.title('📝Prompts')
|
||||||
|
st.text('Configuration of prompts.')
|
||||||
|
selected_file = st.selectbox('Prompt File', os.listdir(prompt_dir))
|
||||||
|
selected_path = prompt_dir + selected_file
|
||||||
|
mod_text = st.text_area('Prompts',value=util.read_file(selected_path), height=500)
|
||||||
|
save(mod_text, selected_path)
|
||||||
|
|
||||||
|
case '💽Brain Memory':
|
||||||
|
st.title('💽Brain Memory')
|
||||||
|
st.text('Modify your brain knowledge base.')
|
||||||
|
memory_data = util.read_file(f'{user_dir}input.txt')
|
||||||
|
|
||||||
|
note_dir = ''
|
||||||
|
|
||||||
|
util.create_json_not_exist(brain_memo, {'note_dir': '', 'delimiter': '', 'append_mode': 'False', 'force_mode': 'False'})
|
||||||
|
util.create_file_not_exist(f'{user_dir}note_dir_info.txt')
|
||||||
|
util.create_file_not_exist(f'{user_dir}brain_mem_sig.temp')
|
||||||
|
|
||||||
|
col1, col2 = st.columns(2)
|
||||||
|
with col1:
|
||||||
|
st.button('🔄Refresh')
|
||||||
|
with col2:
|
||||||
|
if st.button('📁Select Note Directory'):
|
||||||
|
note_dir = select_directory()
|
||||||
|
util.update_json(brain_memo, 'note_dir', note_dir)
|
||||||
|
note_dir = st.text_input('Note Directory', value=util.read_json_at(brain_memo, 'note_dir'), placeholder='Select Note Directory', key='note_dir')
|
||||||
|
|
||||||
|
col1, col2 = st.columns(2)
|
||||||
|
with col1:
|
||||||
|
delimiter_memo = util.read_json_at(brain_memo, 'delimiter')
|
||||||
|
delimiter = st.text_input('Delimiter', delimiter_memo, placeholder='e.g. +++')
|
||||||
|
|
||||||
|
with col2:
|
||||||
|
append_mode = st.checkbox('Append Mode', value=util.read_json_at(brain_memo, 'append_mode'))
|
||||||
|
|
||||||
|
force_delimiter = st.checkbox('Force Delimiter', value=util.read_json_at(brain_memo, 'force_mode'))
|
||||||
|
|
||||||
|
|
||||||
|
# if note directory is selected
|
||||||
|
if note_dir != '':
|
||||||
|
|
||||||
|
note_data = util.read_files(note_dir, delimiter, force_delimiter)
|
||||||
|
if append_mode:
|
||||||
|
memory_data += note_data
|
||||||
|
else:
|
||||||
|
memory_data = note_data
|
||||||
|
|
||||||
|
# if st.button('📝Update Brain Memory') or util.read_json_at(brain_memo, 'save') == 'true':
|
||||||
|
# util.update_json(brain_memo, 'save', 'true')
|
||||||
|
# util.update_json(brain_memo, 'delimiter', delimiter)
|
||||||
|
# note_data = util.read_files(note_dir, delimiter, force_delimiter)
|
||||||
|
# if append_mode:
|
||||||
|
# memory_data += note_data
|
||||||
|
# else:
|
||||||
|
# memory_data = note_data
|
||||||
|
|
||||||
|
mod_text = st.text_area('Raw Memory Inputs', value=memory_data, height=500)
|
||||||
|
save(mod_text, f'{user_dir}input.txt', '💽Brain Memory')
|
||||||
|
|
||||||
|
case '🔑API Keys':
|
||||||
|
st.title('🔑API Keys')
|
||||||
|
st.text('Configure your OpenAI API keys.')
|
||||||
|
mod_text = st.text_input('API Keys', value=util.read_file(f'{user_dir}API-KEYS.txt'))
|
||||||
|
save(mod_text, f'{user_dir}API-KEYS.txt')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue