lint: ruff

pull/915/head
Alex 1 month ago
parent 4b849d7201
commit 8d7a134cb4

@ -8,12 +8,10 @@ import traceback
from pymongo import MongoClient
from bson.objectid import ObjectId
from application.utils import count_tokens
from application.core.settings import settings
from application.vectorstore.vector_creator import VectorCreator
from application.llm.llm_creator import LLMCreator
from application.retriever.retriever_creator import RetrieverCreator
from application.error import bad_request

@ -283,10 +283,12 @@ def check_docs():
else:
file_url = urlparse(base_path + vectorstore + "index.faiss")
if file_url.scheme in ['https'] and file_url.netloc == 'raw.githubusercontent.com' and file_url.path.startswith('/arc53/DocsHUB/main/'):
if (
file_url.scheme in ['https'] and
file_url.netloc == 'raw.githubusercontent.com' and
file_url.path.startswith('/arc53/DocsHUB/main/')
):
r = requests.get(file_url.geturl())
if r.status_code != 200:
return {"status": "null"}
else:
@ -295,7 +297,6 @@ def check_docs():
with open(vectorstore + "index.faiss", "wb") as f:
f.write(r.content)
# download the store
r = requests.get(base_path + vectorstore + "index.pkl")
with open(vectorstore + "index.pkl", "wb") as f:
f.write(r.content)

@ -9,7 +9,7 @@ current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__
class Settings(BaseSettings):
LLM_NAME: str = "docsgpt"
MODEL_NAME: Optional[str] = None # when LLM_NAME is openai, MODEL_NAME can be e.g. gpt-4-turbo-preview or gpt-3.5-turbo
MODEL_NAME: Optional[str] = None # if LLM_NAME is openai, MODEL_NAME can be gpt-4 or gpt-3.5-turbo
EMBEDDINGS_NAME: str = "huggingface_sentence-transformers/all-mpnet-base-v2"
CELERY_BROKER_URL: str = "redis://localhost:6379/0"
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"

@ -22,7 +22,10 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
doc_len = len(tiktoken.get_encoding("cl100k_base").encode(doc.text))
# Check if current group is empty or if the document can be added based on token count and matching metadata
if current_group is None or (len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and doc_len < min_tokens and current_group.extra_info == doc.extra_info):
if (current_group is None or
(len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and
doc_len < min_tokens and
current_group.extra_info == doc.extra_info)):
if current_group is None:
current_group = doc # Use the document directly to retain its metadata
else:

@ -1,5 +1,4 @@
import os
import json
from application.retriever.base import BaseRetriever
from application.core.settings import settings
from application.vectorstore.vector_creator import VectorCreator
@ -39,9 +38,19 @@ class ClassicRAG(BaseRetriever):
if self.chunks == 0:
docs = []
else:
docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, self.vectorstore, settings.EMBEDDINGS_KEY)
docsearch = VectorCreator.create_vectorstore(
settings.VECTOR_STORE,
self.vectorstore,
settings.EMBEDDINGS_KEY
)
docs_temp = docsearch.search(self.question, k=self.chunks)
docs = [{"title": i.metadata['title'].split('/')[-1] if i.metadata else i.page_content, "text": i.page_content} for i in docs_temp]
docs = [
{
"title": i.metadata['title'].split('/')[-1] if i.metadata else i.page_content,
"text": i.page_content
}
for i in docs_temp
]
if settings.LLM_NAME == "llama.cpp":
docs = [docs[0]]

Loading…
Cancel
Save