From b47ecab1a9cd1c9987708773760867b81412f31c Mon Sep 17 00:00:00 2001
From: Pavel <pabin@yandex.ru>
Date: Sat, 30 Sep 2023 23:38:48 +0400
Subject: [PATCH 1/2] llama-cpp local

---
 application/llm/llama_cpp.py   | 35 ++++++++++++++++++++++++++++++++++
 application/llm/llm_creator.py |  4 +++-
 2 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 application/llm/llama_cpp.py

diff --git a/application/llm/llama_cpp.py b/application/llm/llama_cpp.py
new file mode 100644
index 0000000..d54d6f1
--- /dev/null
+++ b/application/llm/llama_cpp.py
@@ -0,0 +1,35 @@
+from application.llm.base import BaseLLM
+
+class LlamaCpp(BaseLLM):
+
+    def __init__(self, api_key, llm_name='/Users/pavel/Desktop/docsgpt/application/models/orca-test.bin'):
+        global llama
+        from llama_cpp import Llama
+
+        llama = Llama(model_path=llm_name)
+
+    def gen(self, model, engine, messages, stream=False, **kwargs):
+        context = messages[0]['content']
+        user_question = messages[-1]['content']
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+
+        result = llama(prompt, max_tokens=150, echo=False)
+
+        # import sys
+        # print(result['choices'][0]['text'].split('### Answer \n')[-1], file=sys.stderr)
+        
+        return result['choices'][0]['text'].split('### Answer \n')[-1]
+
+    def gen_stream(self, model, engine, messages, stream=True, **kwargs):
+        context = messages[0]['content']
+        user_question = messages[-1]['content']
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+
+        result = llama(prompt, max_tokens=150, echo=False, stream=stream)
+
+        # import sys
+        # print(list(result), file=sys.stderr)
+
+        for item in result:
+            for choice in item['choices']:
+                yield choice['text']
diff --git a/application/llm/llm_creator.py b/application/llm/llm_creator.py
index a7ffc0f..6a60f1b 100644
--- a/application/llm/llm_creator.py
+++ b/application/llm/llm_creator.py
@@ -1,6 +1,7 @@
 from application.llm.openai import OpenAILLM, AzureOpenAILLM
 from application.llm.sagemaker import SagemakerAPILLM
 from application.llm.huggingface import HuggingFaceLLM
+from application.llm.llama_cpp import LlamaCpp
 
 
 
@@ -9,7 +10,8 @@ class LLMCreator:
         'openai': OpenAILLM,
         'azure_openai': AzureOpenAILLM,
         'sagemaker': SagemakerAPILLM,
-        'huggingface': HuggingFaceLLM
+        'huggingface': HuggingFaceLLM,
+        'llama.cpp': LlamaCpp
     }
 
     @classmethod

From 9bbf4044e0115ef71090f63bf511db231fd1da98 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Sun, 1 Oct 2023 17:20:47 +0100
Subject: [PATCH 2/2] script

---
 .gitignore                       |   1 +
 application/api/answer/routes.py |  14 ----
 application/core/settings.py     |   5 +-
 application/llm/llama_cpp.py     |   5 +-
 application/vectorstore/base.py  |   4 +-
 docker-compose-local.yaml        |  58 +++++++++++++++
 setup.sh                         | 118 ++++++++++++++++++++-----------
 7 files changed, 143 insertions(+), 62 deletions(-)
 create mode 100644 docker-compose-local.yaml

diff --git a/.gitignore b/.gitignore
index a896c29..053e579 100644
--- a/.gitignore
+++ b/.gitignore
@@ -171,3 +171,4 @@ application/vectors/
 
 node_modules/
 .vscode/settings.json
+models/
diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py
index 86943ff..566203c 100644
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -32,20 +32,6 @@ if settings.LLM_NAME == "gpt4":
 else:
     gpt_model = 'gpt-3.5-turbo'
 
-if settings.SELF_HOSTED_MODEL:
-    from langchain.llms import HuggingFacePipeline
-    from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-
-    model_id = settings.LLM_NAME  # hf model id (Arc53/docsgpt-7b-falcon, Arc53/docsgpt-14b)
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
-    model = AutoModelForCausalLM.from_pretrained(model_id)
-    pipe = pipeline(
-        "text-generation", model=model,
-        tokenizer=tokenizer, max_new_tokens=2000,
-        device_map="auto", eos_token_id=tokenizer.eos_token_id
-    )
-    hf = HuggingFacePipeline(pipeline=pipe)
-
 # load the prompts
 current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 with open(os.path.join(current_dir, "prompts", "combine_prompt.txt"), "r") as f:
diff --git a/application/core/settings.py b/application/core/settings.py
index 1479beb..7895aef 100644
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -1,6 +1,8 @@
 from pathlib import Path
+import os
 
 from pydantic import BaseSettings
+current_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
 
 class Settings(BaseSettings):
@@ -9,9 +11,8 @@ class Settings(BaseSettings):
     CELERY_BROKER_URL: str = "redis://localhost:6379/0"
     CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
     MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
-    MODEL_PATH: str = "./models/gpt4all-model.bin"
+    MODEL_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
     TOKENS_MAX_HISTORY: int = 150
-    SELF_HOSTED_MODEL: bool = False
     UPLOAD_FOLDER: str = "inputs"
 
     API_URL: str = "http://localhost:7091"  # backend url for celery worker
diff --git a/application/llm/llama_cpp.py b/application/llm/llama_cpp.py
index d54d6f1..ebd713c 100644
--- a/application/llm/llama_cpp.py
+++ b/application/llm/llama_cpp.py
@@ -4,7 +4,10 @@ class LlamaCpp(BaseLLM):
 
     def __init__(self, api_key, llm_name='/Users/pavel/Desktop/docsgpt/application/models/orca-test.bin'):
         global llama
-        from llama_cpp import Llama
+        try:
+            from llama_cpp import Llama
+        except ImportError:
+            raise ImportError("Please install llama_cpp using pip install llama-cpp-python")
 
         llama = Llama(model_path=llm_name)
 
diff --git a/application/vectorstore/base.py b/application/vectorstore/base.py
index ad48174..29eac07 100644
--- a/application/vectorstore/base.py
+++ b/application/vectorstore/base.py
@@ -2,7 +2,7 @@ from abc import ABC, abstractmethod
 import os
 from langchain.embeddings import (
     OpenAIEmbeddings,
-    HuggingFaceHubEmbeddings,
+    HuggingFaceEmbeddings,
     CohereEmbeddings,
     HuggingFaceInstructEmbeddings,
 )
@@ -22,7 +22,7 @@ class BaseVectorStore(ABC):
     def _get_docsearch(self, embeddings_name, embeddings_key=None):
         embeddings_factory = {
             "openai_text-embedding-ada-002": OpenAIEmbeddings,
-            "huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceHubEmbeddings,
+            "huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceEmbeddings,
             "huggingface_hkunlp/instructor-large": HuggingFaceInstructEmbeddings,
             "cohere_medium": CohereEmbeddings
         }
diff --git a/docker-compose-local.yaml b/docker-compose-local.yaml
new file mode 100644
index 0000000..b24ed48
--- /dev/null
+++ b/docker-compose-local.yaml
@@ -0,0 +1,58 @@
+version: "3.9"
+
+services:
+  frontend:
+    build: ./frontend
+    environment:
+      - VITE_API_HOST=http://localhost:7091
+      - VITE_API_STREAMING=$VITE_API_STREAMING
+    ports:
+      - "5173:5173"
+
+  # backend:
+  #   build: ./application
+  #   environment:
+  #     - LLM_NAME=$LLM_NAME
+  #     - EMBEDDINGS_NAME=$EMBEDDINGS_NAME
+  #     - CELERY_BROKER_URL=redis://redis:6379/0
+  #     - CELERY_RESULT_BACKEND=redis://redis:6379/1
+  #     - MONGO_URI=mongodb://mongo:27017/docsgpt
+  #   ports:
+  #     - "7091:7091"
+  #   volumes:
+  #     - ./application/indexes:/app/application/indexes
+  #     - ./application/inputs:/app/application/inputs
+  #     - ./application/vectors:/app/application/vectors
+  #     - ./application/models:/app/application/models
+  #   depends_on:
+  #     - redis
+  #     - mongo
+
+  worker:
+    build: ./application
+    command: celery -A application.app.celery worker -l INFO
+    environment:
+      - LLM_NAME=$LLM_NAME
+      - EMBEDDINGS_NAME=$EMBEDDINGS_NAME
+      - CELERY_BROKER_URL=redis://redis:6379/0
+      - CELERY_RESULT_BACKEND=redis://redis:6379/1
+      - MONGO_URI=mongodb://mongo:27017/docsgpt
+      - API_URL=http://backend:7091
+    depends_on:
+      - redis
+      - mongo
+
+  redis:
+    image: redis:6-alpine
+    ports:
+      - 6379:6379
+
+  mongo:
+    image: mongo:6
+    ports:
+      - 27017:27017
+    volumes:
+      - mongodb_data_container:/data/db
+
+volumes:
+  mongodb_data_container:
diff --git a/setup.sh b/setup.sh
index cd5712b..281a124 100755
--- a/setup.sh
+++ b/setup.sh
@@ -1,45 +1,77 @@
 #!/bin/bash
-cd "$(dirname "$0")" || exit
-
-# Create the required directories on the host machine if they don't exist
-[ ! -d "./application/indexes" ] && mkdir -p ./application/indexes
-[ ! -d "./application/inputs" ] && mkdir -p ./application/inputs
-[ ! -d "./application/vectors" ] && mkdir -p ./application/vectors
-
-# Build frontend and backend images
-docker build -t frontend_image ./frontend
-docker build -t backend_image ./application
-
-# Run redis and mongo services
-docker run -d --name redis -p 6379:6379 redis:6-alpine
-docker run -d --name mongo -p 27017:27017 -v mongodb_data_container:/data/db mongo:6
-
-# Run backend and worker services
-docker run -d --name backend -p 7091:7091 \
-  --link redis:redis --link mongo:mongo \
-  -v $(pwd)/application/indexes:/app/indexes \
-  -v $(pwd)/application/inputs:/app/inputs \
-  -v $(pwd)/application/vectors:/app/vectors \
-  -e API_KEY=$OPENAI_API_KEY \
-  -e EMBEDDINGS_KEY=$OPENAI_API_KEY \
-  -e CELERY_BROKER_URL=redis://redis:6379/0 \
-  -e CELERY_RESULT_BACKEND=redis://redis:6379/1 \
-  -e MONGO_URI=mongodb://mongo:27017/docsgpt \
-  backend_image
-
-docker run -d --name worker \
-  --link redis:redis --link mongo:mongo \
-  -e API_KEY=$OPENAI_API_KEY \
-  -e EMBEDDINGS_KEY=$OPENAI_API_KEY \
-  -e CELERY_BROKER_URL=redis://redis:6379/0 \
-  -e CELERY_RESULT_BACKEND=redis://redis:6379/1 \
-  -e MONGO_URI=mongodb://mongo:27017/docsgpt \
-  -e API_URL=http://backend:7091 \
-  backend_image \
-  celery -A app.celery worker -l INFO
-
-# Run frontend service
-docker run -d --name frontend -p 5173:5173 \
-  -e VITE_API_HOST=http://localhost:7091 \
-  frontend_image
 
+# Function to prompt the user for their choice
+prompt_user() {
+    echo "Do you want to:"
+    echo "1. Download the language model locally (12GB)"
+    echo "2. Use the OpenAI API"
+    read -p "Enter your choice (1/2): " choice
+}
+
+# Function to handle the choice to download the model locally
+download_locally() {
+    echo "LLM_NAME=llama.cpp" > .env
+    echo "VITE_API_STREAMING=true" >> .env
+    echo "EMBEDDINGS_NAME=huggingface_sentence-transformers/all-mpnet-base-v2" >> .env
+    echo "The .env file has been created with LLM_NAME set to llama.cpp."
+
+    # Creating the directory if it does not exist
+    mkdir -p models
+    
+    # Downloading the model to the specific directory
+    echo "Downloading the model..."
+    # check if docsgpt-7b-f16.gguf does not exist
+    if [ ! -f models/docsgpt-7b-f16.gguf ]; then
+        echo "Downloading the model..."
+        wget -P models https://docsgpt.s3.eu-west-1.amazonaws.com/models/docsgpt-7b-f16.gguf
+        echo "Model downloaded to models directory."
+    else
+        echo "Model already exists."
+    fi
+   
+    docker-compose -f docker-compose-local.yaml build && docker-compose -f docker-compose-local.yaml up -d
+    python -m venv venv
+    source venv/bin/activate
+    pip install -r application/requirements.txt
+    pip install llama-cpp-python
+    export FLASK_APP=application/app.py
+    export FLASK_DEBUG=true
+    echo "The application is now running on http://localhost:5173"
+    echo "You can stop the application by running the following command:"
+    echo "Ctrl + C and then"
+    echo "docker-compose down"
+    flask run --host=0.0.0.0 --port=7091
+}
+
+# Function to handle the choice to use the OpenAI API
+use_openai() {
+    read -p "Please enter your OpenAI API key: " api_key
+    echo "API_KEY=$api_key" > .env
+    echo "LLM_NAME=openai" >> .env
+    echo "VITE_API_STREAMING=true" >> .env
+    echo "The .env file has been created with API_KEY set to your provided key."
+
+    docker-compose build && docker-compose up -d
+
+
+
+    echo "The application is will runn on http://localhost:5173"
+    echo "You can stop the application by running the following command:"
+    echo "docker-compose down"
+}
+
+# Prompt the user for their choice
+prompt_user
+
+# Handle the user's choice
+case $choice in
+    1)
+        download_locally
+        ;;
+    2)
+        use_openai
+        ;;
+    *)
+        echo "Invalid choice. Please choose either 1 or 2."
+        ;;
+esac