script + cpu optimisations

8 months ago · 9a33bf2210
parent 6045cbbc62
commit 9a33bf2210
6 changed files with 12 additions and 38 deletions
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@ -118,6 +118,8 @@ def complete_stream(question, docsearch, chat_history, api_key, conversation_id)
    docs = docsearch.search(question, k=2)
    if settings.LLM_NAME == "llama.cpp":
        docs = [docs[0]]
    # join all page_content together with a newline
    docs_together = "\n".join([doc.page_content for doc in docs])
    p_chat_combine = chat_combine_template.replace("{summaries}", docs_together)
--- a/application/core/settings.py
+++ b/application/core/settings.py
@ -2,7 +2,7 @@ from pathlib import Path
 import os
 from pydantic import BaseSettings
-current_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 class Settings(BaseSettings):
--- a/application/llm/llama_cpp.py
+++ b/application/llm/llama_cpp.py
@ -1,15 +1,16 @@
 from application.llm.base import BaseLLM
 from application.core.settings import settings
 class LlamaCpp(BaseLLM):
-    def __init__(self, api_key, llm_name='/Users/pavel/Desktop/docsgpt/application/models/orca-test.bin'):
+    def __init__(self, api_key, llm_name=settings.MODEL_PATH, **kwargs):
        global llama
        try:
            from llama_cpp import Llama
        except ImportError:
            raise ImportError("Please install llama_cpp using pip install llama-cpp-python")
-        llama = Llama(model_path=llm_name)
+        llama = Llama(model_path=llm_name, n_ctx=2048)
    def gen(self, model, engine, messages, stream=False, **kwargs):
        context = messages[0]['content']
--- a/docker-compose-local.yaml
+++ b/docker-compose-local.yaml
@ -6,42 +6,10 @@ services:
    environment:
      - VITE_API_HOST=http://localhost:7091
      - VITE_API_STREAMING=$VITE_API_STREAMING
      - VITE_EMBEDDINGS_NAME=$EMBEDDINGS_NAME
    ports:
      - "5173:5173"
  # backend:
  #   build: ./application
  #   environment:
  #     - LLM_NAME=$LLM_NAME
  #     - EMBEDDINGS_NAME=$EMBEDDINGS_NAME
  #     - CELERY_BROKER_URL=redis://redis:6379/0
  #     - CELERY_RESULT_BACKEND=redis://redis:6379/1
  #     - MONGO_URI=mongodb://mongo:27017/docsgpt
  #   ports:
  #     - "7091:7091"
  #   volumes:
  #     - ./application/indexes:/app/application/indexes
  #     - ./application/inputs:/app/application/inputs
  #     - ./application/vectors:/app/application/vectors
  #     - ./application/models:/app/application/models
  #   depends_on:
  #     - redis
  #     - mongo
  worker:
    build: ./application
    command: celery -A application.app.celery worker -l INFO
    environment:
      - LLM_NAME=$LLM_NAME
      - EMBEDDINGS_NAME=$EMBEDDINGS_NAME
      - CELERY_BROKER_URL=redis://redis:6379/0
      - CELERY_RESULT_BACKEND=redis://redis:6379/1
      - MONGO_URI=mongodb://mongo:27017/docsgpt
      - API_URL=http://backend:7091
    depends_on:
      - redis
      - mongo
  redis:
    image: redis:6-alpine
    ports:
--- a/frontend/src/Navigation.tsx
+++ b/frontend/src/Navigation.tsx
@ -59,6 +59,7 @@ export default function Navigation({
  const navRef = useRef(null);
  const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
  const embeddingsName = import.meta.env.VITE_EMBEDDINGS_NAME || 'openai_text-embedding-ada-002';
  useEffect(() => {
    if (!conversations) {
@ -253,7 +254,7 @@ export default function Navigation({
              <div className="absolute top-12 left-0 right-6 ml-2 mr-4 max-h-52 overflow-y-scroll bg-white shadow-lg">
                {docs ? (
                  docs.map((doc, index) => {
-                    if (doc.model === 'openai_text-embedding-ada-002') {
+                    if (doc.model === embeddingsName) {
                      return (
                        <div
                          key={index}
--- a/setup.sh
+++ b/setup.sh
@ -34,13 +34,15 @@ download_locally() {
    source venv/bin/activate
    pip install -r application/requirements.txt
    pip install llama-cpp-python
    pip install sentence-transformers
    export FLASK_APP=application/app.py
    export FLASK_DEBUG=true
    echo "The application is now running on http://localhost:5173"
    echo "You can stop the application by running the following command:"
    echo "Ctrl + C and then"
    echo "docker-compose down"
-    flask run --host=0.0.0.0 --port=7091
+    flask run --host=0.0.0.0 --port=7091 &
    celery -A application.app.celery worker -l INFO
 }
 # Function to handle the choice to use the OpenAI API