From 8d7a134cb40502b0bd8474a1ed603da2ced8ac08 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Tue, 9 Apr 2024 17:25:08 +0100
Subject: [PATCH] lint: ruff

---
 application/api/answer/routes.py     |  2 --
 application/api/user/routes.py       |  9 +++++----
 application/core/settings.py         |  2 +-
 application/parser/token_func.py     |  5 ++++-
 application/retriever/classic_rag.py | 15 ++++++++++++---
 5 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py
index 97eb36c..fa0ac4f 100644
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -8,12 +8,10 @@ import traceback
 
 from pymongo import MongoClient
 from bson.objectid import ObjectId
-from application.utils import count_tokens
 
 
 
 from application.core.settings import settings
-from application.vectorstore.vector_creator import VectorCreator
 from application.llm.llm_creator import LLMCreator
 from application.retriever.retriever_creator import RetrieverCreator
 from application.error import bad_request
diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 3222832..cacfbd7 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -283,10 +283,12 @@ def check_docs():
     else:
         file_url = urlparse(base_path + vectorstore + "index.faiss")
         
-        if file_url.scheme in ['https'] and file_url.netloc == 'raw.githubusercontent.com' and file_url.path.startswith('/arc53/DocsHUB/main/'):
-            
+        if (
+            file_url.scheme in ['https'] and 
+            file_url.netloc == 'raw.githubusercontent.com' and 
+            file_url.path.startswith('/arc53/DocsHUB/main/')
+        ):
             r = requests.get(file_url.geturl())
-
             if r.status_code != 200:
                 return {"status": "null"}
             else:
@@ -295,7 +297,6 @@ def check_docs():
                 with open(vectorstore + "index.faiss", "wb") as f:
                     f.write(r.content)
 
-                # download the store
                 r = requests.get(base_path + vectorstore + "index.pkl")
                 with open(vectorstore + "index.pkl", "wb") as f:
                     f.write(r.content)
diff --git a/application/core/settings.py b/application/core/settings.py
index d8d0eb3..26c27ed 100644
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -9,7 +9,7 @@ current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__
 
 class Settings(BaseSettings):
     LLM_NAME: str = "docsgpt"
-    MODEL_NAME: Optional[str] = None # when LLM_NAME is openai, MODEL_NAME can be e.g. gpt-4-turbo-preview or gpt-3.5-turbo
+    MODEL_NAME: Optional[str] = None # if LLM_NAME is openai, MODEL_NAME can be gpt-4 or gpt-3.5-turbo
     EMBEDDINGS_NAME: str = "huggingface_sentence-transformers/all-mpnet-base-v2"
     CELERY_BROKER_URL: str = "redis://localhost:6379/0"
     CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
diff --git a/application/parser/token_func.py b/application/parser/token_func.py
index 36ae7e5..7511cde 100644
--- a/application/parser/token_func.py
+++ b/application/parser/token_func.py
@@ -22,7 +22,10 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
         doc_len = len(tiktoken.get_encoding("cl100k_base").encode(doc.text))
 
         # Check if current group is empty or if the document can be added based on token count and matching metadata
-        if current_group is None or (len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and doc_len < min_tokens and current_group.extra_info == doc.extra_info):
+        if (current_group is None or 
+            (len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and 
+             doc_len < min_tokens and 
+             current_group.extra_info == doc.extra_info)):
             if current_group is None:
                 current_group = doc  # Use the document directly to retain its metadata
             else:
diff --git a/application/retriever/classic_rag.py b/application/retriever/classic_rag.py
index a5bf8e3..b5f1eb9 100644
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -1,5 +1,4 @@
 import os
-import json
 from application.retriever.base import BaseRetriever
 from application.core.settings import settings
 from application.vectorstore.vector_creator import VectorCreator
@@ -39,9 +38,19 @@ class ClassicRAG(BaseRetriever):
         if self.chunks == 0:
             docs = []
         else:
-            docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, self.vectorstore, settings.EMBEDDINGS_KEY)
+            docsearch = VectorCreator.create_vectorstore(
+                settings.VECTOR_STORE, 
+                self.vectorstore, 
+                settings.EMBEDDINGS_KEY
+            )
             docs_temp = docsearch.search(self.question, k=self.chunks)
-            docs = [{"title": i.metadata['title'].split('/')[-1] if i.metadata else i.page_content, "text": i.page_content} for i in docs_temp]
+            docs = [
+                {
+                    "title": i.metadata['title'].split('/')[-1] if i.metadata else i.page_content, 
+                    "text": i.page_content
+                } 
+                for i in docs_temp
+            ]
         if settings.LLM_NAME == "llama.cpp":
             docs = [docs[0]]