refactor: update function names and remove unused imports

- Refactor cli.py and llm.py to use new function names for create_vector_store and send_question respectively
- Remove unused imports in utils.py
pull/1/head
Saryev Rustam 1 year ago
parent 7cb2e17c57
commit 589a419391

@ -1,7 +1,7 @@
# talk-codebase is a powerful tool for chatting with your codebase
<p align="center">
<img src="https://github.com/rsaryev/talk-codebase/assets/70219513/b0cb4d00-94b6-407e-8545-92e79d442d89" width="800" alt="chat">
<img src="https://github.com/rsaryev/talk-codebase/assets/70219513/87a031ec-51e2-4123-abe6-91bb4d248b4d" width="800" alt="chat">
</p>
## Description

@ -1,6 +1,6 @@
[tool.poetry]
name = "talk-codebase"
version = "0.1.1"
version = "0.1.12"
description = "talk-codebase is a powerful tool for querying and analyzing codebases."
authors = ["Saryev Rustam <rustam1997@gmail.com>"]
readme = "README.md"

@ -39,7 +39,6 @@ rfc3986==2.0.0
rich==13.3.5
six==1.16.0
SQLAlchemy==2.0.15
talk-codebase==0.1.0
tenacity==8.2.2
termcolor==2.3.0
tiktoken==0.4.0

@ -1,8 +1,7 @@
import os
import fire
import yaml
from talk_codebase.utils import create_retriever
from talk_codebase.llm import send_question
from talk_codebase.llm import create_vector_store, send_question
def get_config():
@ -40,7 +39,7 @@ def chat(root_dir):
if not (api_key and model_name):
configure()
chat(root_dir)
retriever = create_retriever(root_dir, api_key)
vector_store = create_vector_store(root_dir, api_key)
while True:
question = input("👉 ")
if not question:
@ -48,7 +47,7 @@ def chat(root_dir):
continue
if question.lower() in ('exit', 'quit'):
break
send_question(question, retriever, api_key, model_name)
send_question(question, vector_store, api_key, model_name)
except KeyboardInterrupt:
print("\n🤖 Bye!")
except Exception as e:

@ -1,12 +1,32 @@
import os
from langchain import FAISS
from langchain.callbacks.manager import CallbackManager
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from talk_codebase.utils import StreamStdOut
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from talk_codebase.utils import StreamStdOut, load_files
def create_vector_store(root_dir, openai_api_key):
docs = load_files(root_dir)
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(docs)
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
db = FAISS.from_documents(texts, embeddings)
return db
def send_question(question, retriever, openai_api_key, model_name):
def send_question(question, vector_store, openai_api_key, model_name):
model = ChatOpenAI(model_name=model_name, openai_api_key=openai_api_key, streaming=True,
callback_manager=CallbackManager([StreamStdOut()]))
qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever)
qa = ConversationalRetrievalChain.from_llm(model,
retriever=vector_store.as_retriever(search_kwargs={"k": 2}),
return_source_documents=True)
answer = qa({"question": question, "chat_history": []})
print('\n' + '\n'.join([f'📄 {os.path.abspath(s.metadata["source"])}:' for s in answer["source_documents"]]))
return answer

@ -1,11 +1,8 @@
import os
import sys
from langchain import FAISS
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.document_loaders import TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from talk_codebase.consts import EXCLUDE_DIRS, EXCLUDE_FILES, ALLOW_FILES
@ -40,15 +37,3 @@ def load_files(root_dir):
print(f"Error loading file {file}: {e}")
print(f"🤖 Loaded {len(docs)} documents")
return docs
def create_retriever(root_dir, openai_api_key):
docs = load_files(root_dir)
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(docs)
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
db = FAISS.from_documents(texts, embeddings)
retriever = db.as_retriever()
return retriever

Loading…
Cancel
Save