refactor: update function names and remove unused imports

- Refactor cli.py and llm.py to use new function names for create_vector_store and send_question respectively - Remove unused imports in utils.py
1 year ago · 589a419391
parent 7cb2e17c57
commit 589a419391
11 changed files with 28 additions and 25 deletions
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
 # talk-codebase is a powerful tool for chatting with your codebase

 <p align="center">
-  <img src="https://github.com/rsaryev/talk-codebase/assets/70219513/b0cb4d00-94b6-407e-8545-92e79d442d89" width="800" alt="chat">
+  <img src="https://github.com/rsaryev/talk-codebase/assets/70219513/87a031ec-51e2-4123-abe6-91bb4d248b4d" width="800" alt="chat">
 </p>

 ## Description
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "talk-codebase"
-version = "0.1.1"
+version = "0.1.12"
 description = "talk-codebase is a powerful tool for querying and analyzing codebases."
 authors = ["Saryev Rustam <rustam1997@gmail.com>"]
 readme = "README.md"
--- a/requirements.txt
+++ b/requirements.txt
@ -39,7 +39,6 @@ rfc3986==2.0.0
 rich==13.3.5
 six==1.16.0
 SQLAlchemy==2.0.15
-talk-codebase==0.1.0
 tenacity==8.2.2
 termcolor==2.3.0
 tiktoken==0.4.0
--- a/talk_codebase/pycache/init.cpython-39.pyc
+++ b/talk_codebase/pycache/init.cpython-39.pyc
--- a/talk_codebase/pycache/cli.cpython-39.pyc
+++ b/talk_codebase/pycache/cli.cpython-39.pyc
--- a/talk_codebase/pycache/consts.cpython-39.pyc
+++ b/talk_codebase/pycache/consts.cpython-39.pyc
--- a/talk_codebase/pycache/llm.cpython-39.pyc
+++ b/talk_codebase/pycache/llm.cpython-39.pyc
--- a/talk_codebase/pycache/utils.cpython-39.pyc
+++ b/talk_codebase/pycache/utils.cpython-39.pyc
--- a/talk_codebase/cli.py
+++ b/talk_codebase/cli.py
@ -1,8 +1,7 @@
 import os
 import fire
 import yaml
-from talk_codebase.utils import create_retriever
-from talk_codebase.llm import send_question
+from talk_codebase.llm import create_vector_store, send_question


 def get_config():
@ -40,7 +39,7 @@ def chat(root_dir):
        if not (api_key and model_name):
            configure()
            chat(root_dir)
-        retriever = create_retriever(root_dir, api_key)
+        vector_store = create_vector_store(root_dir, api_key)
        while True:
            question = input("👉 ")
            if not question:
@ -48,7 +47,7 @@ def chat(root_dir):
                continue
            if question.lower() in ('exit', 'quit'):
                break
-            send_question(question, retriever, api_key, model_name)
+            send_question(question, vector_store, api_key, model_name)
    except KeyboardInterrupt:
        print("\n🤖 Bye!")
    except Exception as e:
--- a/talk_codebase/llm.py
+++ b/talk_codebase/llm.py
@ -1,12 +1,32 @@
+import os
+
+from langchain import FAISS
 from langchain.callbacks.manager import CallbackManager
 from langchain.chains import ConversationalRetrievalChain
 from langchain.chat_models import ChatOpenAI
-from talk_codebase.utils import StreamStdOut
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+
+from talk_codebase.utils import StreamStdOut, load_files
+
+
+def create_vector_store(root_dir, openai_api_key):
+    docs = load_files(root_dir)
+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+    texts = text_splitter.split_documents(docs)
+
+    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
+    db = FAISS.from_documents(texts, embeddings)
+
+    return db


-def send_question(question, retriever, openai_api_key, model_name):
+def send_question(question, vector_store, openai_api_key, model_name):
    model = ChatOpenAI(model_name=model_name, openai_api_key=openai_api_key, streaming=True,
                       callback_manager=CallbackManager([StreamStdOut()]))
-    qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever)
+    qa = ConversationalRetrievalChain.from_llm(model,
+                                               retriever=vector_store.as_retriever(search_kwargs={"k": 2}),
+                                               return_source_documents=True)
    answer = qa({"question": question, "chat_history": []})
+    print('\n' + '\n'.join([f'📄 {os.path.abspath(s.metadata["source"])}:' for s in answer["source_documents"]]))
    return answer
--- a/talk_codebase/utils.py
+++ b/talk_codebase/utils.py
@ -1,11 +1,8 @@
 import os
 import sys

-from langchain import FAISS
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from langchain.document_loaders import TextLoader
-from langchain.embeddings import OpenAIEmbeddings
-from langchain.text_splitter import CharacterTextSplitter

 from talk_codebase.consts import EXCLUDE_DIRS, EXCLUDE_FILES, ALLOW_FILES

@ -40,15 +37,3 @@ def load_files(root_dir):
                    print(f"Error loading file {file}: {e}")
    print(f"🤖 Loaded {len(docs)} documents")
    return docs
-
-
-def create_retriever(root_dir, openai_api_key):
-    docs = load_files(root_dir)
-    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-    texts = text_splitter.split_documents(docs)
-
-    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
-    db = FAISS.from_documents(texts, embeddings)
-    retriever = db.as_retriever()
-
-    return retriever