lint: ruff

1 month ago · 8d7a134cb4
parent 4b849d7201
commit 8d7a134cb4
5 changed files with 22 additions and 11 deletions
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@ -8,12 +8,10 @@ import traceback

 from pymongo import MongoClient
 from bson.objectid import ObjectId
-from application.utils import count_tokens



 from application.core.settings import settings
-from application.vectorstore.vector_creator import VectorCreator
 from application.llm.llm_creator import LLMCreator
 from application.retriever.retriever_creator import RetrieverCreator
 from application.error import bad_request
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@ -283,10 +283,12 @@ def check_docs():
    else:
        file_url = urlparse(base_path + vectorstore + "index.faiss")
        
-        if file_url.scheme in ['https'] and file_url.netloc == 'raw.githubusercontent.com' and file_url.path.startswith('/arc53/DocsHUB/main/'):
-            
+        if (
+            file_url.scheme in ['https'] and 
+            file_url.netloc == 'raw.githubusercontent.com' and 
+            file_url.path.startswith('/arc53/DocsHUB/main/')
+        ):
            r = requests.get(file_url.geturl())
-
            if r.status_code != 200:
                return {"status": "null"}
            else:
@ -295,7 +297,6 @@ def check_docs():
                with open(vectorstore + "index.faiss", "wb") as f:
                    f.write(r.content)

-                # download the store
                r = requests.get(base_path + vectorstore + "index.pkl")
                with open(vectorstore + "index.pkl", "wb") as f:
                    f.write(r.content)
--- a/application/core/settings.py
+++ b/application/core/settings.py
@ -9,7 +9,7 @@ current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__

 class Settings(BaseSettings):
    LLM_NAME: str = "docsgpt"
-    MODEL_NAME: Optional[str] = None # when LLM_NAME is openai, MODEL_NAME can be e.g. gpt-4-turbo-preview or gpt-3.5-turbo
+    MODEL_NAME: Optional[str] = None # if LLM_NAME is openai, MODEL_NAME can be gpt-4 or gpt-3.5-turbo
    EMBEDDINGS_NAME: str = "huggingface_sentence-transformers/all-mpnet-base-v2"
    CELERY_BROKER_URL: str = "redis://localhost:6379/0"
    CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
--- a/application/parser/token_func.py
+++ b/application/parser/token_func.py
@ -22,7 +22,10 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
        doc_len = len(tiktoken.get_encoding("cl100k_base").encode(doc.text))

        # Check if current group is empty or if the document can be added based on token count and matching metadata
-        if current_group is None or (len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and doc_len < min_tokens and current_group.extra_info == doc.extra_info):
+        if (current_group is None or 
+            (len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and 
+             doc_len < min_tokens and 
+             current_group.extra_info == doc.extra_info)):
            if current_group is None:
                current_group = doc  # Use the document directly to retain its metadata
            else:
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@ -1,5 +1,4 @@
 import os
-import json
 from application.retriever.base import BaseRetriever
 from application.core.settings import settings
 from application.vectorstore.vector_creator import VectorCreator
@ -39,9 +38,19 @@ class ClassicRAG(BaseRetriever):
        if self.chunks == 0:
            docs = []
        else:
-            docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, self.vectorstore, settings.EMBEDDINGS_KEY)
+            docsearch = VectorCreator.create_vectorstore(
+                settings.VECTOR_STORE, 
+                self.vectorstore, 
+                settings.EMBEDDINGS_KEY
+            )
            docs_temp = docsearch.search(self.question, k=self.chunks)
-            docs = [{"title": i.metadata['title'].split('/')[-1] if i.metadata else i.page_content, "text": i.page_content} for i in docs_temp]
+            docs = [
+                {
+                    "title": i.metadata['title'].split('/')[-1] if i.metadata else i.page_content, 
+                    "text": i.page_content
+                } 
+                for i in docs_temp
+            ]
        if settings.LLM_NAME == "llama.cpp":
            docs = [docs[0]]