@ -8,7 +8,7 @@ from langchain.callbacks.manager import CallbackManager
from langchain . chains import ConversationalRetrievalChain
from langchain . chat_models import ChatOpenAI
from langchain . embeddings import OpenAIEmbeddings
from langchain . text_splitter import CharacterTextSplitter
from langchain . text_splitter import Recursive CharacterTextSplitter
from talk_codebase . utils import StreamStdOut , load_files
@ -47,7 +47,7 @@ def create_vector_store(root_dir, openai_api_key, model_name):
if len ( docs ) == 0 :
print ( " ✘ No documents found " )
exit ( 0 )
text_splitter = CharacterTextSplitter( )
text_splitter = Recursive CharacterTextSplitter( chunk_size = 500 , chunk_overlap = 50 )
texts = text_splitter . split_documents ( docs )
cost = calculate_cost ( docs , model_name )
@ -74,7 +74,7 @@ def send_question(question, vector_store, openai_api_key, model_name):
model = ChatOpenAI ( model_name = model_name , openai_api_key = openai_api_key , streaming = True ,
callback_manager = CallbackManager ( [ StreamStdOut ( ) ] ) )
qa = ConversationalRetrievalChain . from_llm ( model ,
retriever = vector_store . as_retriever ( search_kwargs = { " k " : 2 } ) ,
retriever = vector_store . as_retriever ( search_kwargs = { " k " : 4 } ) ,
return_source_documents = True )
answer = qa ( { " question " : question , " chat_history " : [ ] } )
print ( ' \n ' + ' \n ' . join ( [ f ' 📄 { os . path . abspath ( s . metadata [ " source " ] ) } : ' for s in answer [ " source_documents " ] ] ) )