From 8d7a134cb40502b0bd8474a1ed603da2ced8ac08 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 9 Apr 2024 17:25:08 +0100 Subject: [PATCH] lint: ruff --- application/api/answer/routes.py | 2 -- application/api/user/routes.py | 9 +++++---- application/core/settings.py | 2 +- application/parser/token_func.py | 5 ++++- application/retriever/classic_rag.py | 15 ++++++++++++--- 5 files changed, 22 insertions(+), 11 deletions(-) diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py index 97eb36c..fa0ac4f 100644 --- a/application/api/answer/routes.py +++ b/application/api/answer/routes.py @@ -8,12 +8,10 @@ import traceback from pymongo import MongoClient from bson.objectid import ObjectId -from application.utils import count_tokens from application.core.settings import settings -from application.vectorstore.vector_creator import VectorCreator from application.llm.llm_creator import LLMCreator from application.retriever.retriever_creator import RetrieverCreator from application.error import bad_request diff --git a/application/api/user/routes.py b/application/api/user/routes.py index 3222832..cacfbd7 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -283,10 +283,12 @@ def check_docs(): else: file_url = urlparse(base_path + vectorstore + "index.faiss") - if file_url.scheme in ['https'] and file_url.netloc == 'raw.githubusercontent.com' and file_url.path.startswith('/arc53/DocsHUB/main/'): - + if ( + file_url.scheme in ['https'] and + file_url.netloc == 'raw.githubusercontent.com' and + file_url.path.startswith('/arc53/DocsHUB/main/') + ): r = requests.get(file_url.geturl()) - if r.status_code != 200: return {"status": "null"} else: @@ -295,7 +297,6 @@ def check_docs(): with open(vectorstore + "index.faiss", "wb") as f: f.write(r.content) - # download the store r = requests.get(base_path + vectorstore + "index.pkl") with open(vectorstore + "index.pkl", "wb") as f: f.write(r.content) diff --git a/application/core/settings.py b/application/core/settings.py index d8d0eb3..26c27ed 100644 --- a/application/core/settings.py +++ b/application/core/settings.py @@ -9,7 +9,7 @@ current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__ class Settings(BaseSettings): LLM_NAME: str = "docsgpt" - MODEL_NAME: Optional[str] = None # when LLM_NAME is openai, MODEL_NAME can be e.g. gpt-4-turbo-preview or gpt-3.5-turbo + MODEL_NAME: Optional[str] = None # if LLM_NAME is openai, MODEL_NAME can be gpt-4 or gpt-3.5-turbo EMBEDDINGS_NAME: str = "huggingface_sentence-transformers/all-mpnet-base-v2" CELERY_BROKER_URL: str = "redis://localhost:6379/0" CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1" diff --git a/application/parser/token_func.py b/application/parser/token_func.py index 36ae7e5..7511cde 100644 --- a/application/parser/token_func.py +++ b/application/parser/token_func.py @@ -22,7 +22,10 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int) doc_len = len(tiktoken.get_encoding("cl100k_base").encode(doc.text)) # Check if current group is empty or if the document can be added based on token count and matching metadata - if current_group is None or (len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and doc_len < min_tokens and current_group.extra_info == doc.extra_info): + if (current_group is None or + (len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and + doc_len < min_tokens and + current_group.extra_info == doc.extra_info)): if current_group is None: current_group = doc # Use the document directly to retain its metadata else: diff --git a/application/retriever/classic_rag.py b/application/retriever/classic_rag.py index a5bf8e3..b5f1eb9 100644 --- a/application/retriever/classic_rag.py +++ b/application/retriever/classic_rag.py @@ -1,5 +1,4 @@ import os -import json from application.retriever.base import BaseRetriever from application.core.settings import settings from application.vectorstore.vector_creator import VectorCreator @@ -39,9 +38,19 @@ class ClassicRAG(BaseRetriever): if self.chunks == 0: docs = [] else: - docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, self.vectorstore, settings.EMBEDDINGS_KEY) + docsearch = VectorCreator.create_vectorstore( + settings.VECTOR_STORE, + self.vectorstore, + settings.EMBEDDINGS_KEY + ) docs_temp = docsearch.search(self.question, k=self.chunks) - docs = [{"title": i.metadata['title'].split('/')[-1] if i.metadata else i.page_content, "text": i.page_content} for i in docs_temp] + docs = [ + { + "title": i.metadata['title'].split('/')[-1] if i.metadata else i.page_content, + "text": i.page_content + } + for i in docs_temp + ] if settings.LLM_NAME == "llama.cpp": docs = [docs[0]]