From b47ecab1a9cd1c9987708773760867b81412f31c Mon Sep 17 00:00:00 2001 From: Pavel Date: Sat, 30 Sep 2023 23:38:48 +0400 Subject: [PATCH 1/2] llama-cpp local --- application/llm/llama_cpp.py | 35 ++++++++++++++++++++++++++++++++++ application/llm/llm_creator.py | 4 +++- 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 application/llm/llama_cpp.py diff --git a/application/llm/llama_cpp.py b/application/llm/llama_cpp.py new file mode 100644 index 0000000..d54d6f1 --- /dev/null +++ b/application/llm/llama_cpp.py @@ -0,0 +1,35 @@ +from application.llm.base import BaseLLM + +class LlamaCpp(BaseLLM): + + def __init__(self, api_key, llm_name='/Users/pavel/Desktop/docsgpt/application/models/orca-test.bin'): + global llama + from llama_cpp import Llama + + llama = Llama(model_path=llm_name) + + def gen(self, model, engine, messages, stream=False, **kwargs): + context = messages[0]['content'] + user_question = messages[-1]['content'] + prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n" + + result = llama(prompt, max_tokens=150, echo=False) + + # import sys + # print(result['choices'][0]['text'].split('### Answer \n')[-1], file=sys.stderr) + + return result['choices'][0]['text'].split('### Answer \n')[-1] + + def gen_stream(self, model, engine, messages, stream=True, **kwargs): + context = messages[0]['content'] + user_question = messages[-1]['content'] + prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n" + + result = llama(prompt, max_tokens=150, echo=False, stream=stream) + + # import sys + # print(list(result), file=sys.stderr) + + for item in result: + for choice in item['choices']: + yield choice['text'] diff --git a/application/llm/llm_creator.py b/application/llm/llm_creator.py index a7ffc0f..6a60f1b 100644 --- a/application/llm/llm_creator.py +++ b/application/llm/llm_creator.py @@ -1,6 +1,7 @@ from application.llm.openai import OpenAILLM, AzureOpenAILLM from application.llm.sagemaker import SagemakerAPILLM from application.llm.huggingface import HuggingFaceLLM +from application.llm.llama_cpp import LlamaCpp @@ -9,7 +10,8 @@ class LLMCreator: 'openai': OpenAILLM, 'azure_openai': AzureOpenAILLM, 'sagemaker': SagemakerAPILLM, - 'huggingface': HuggingFaceLLM + 'huggingface': HuggingFaceLLM, + 'llama.cpp': LlamaCpp } @classmethod From 9bbf4044e0115ef71090f63bf511db231fd1da98 Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 1 Oct 2023 17:20:47 +0100 Subject: [PATCH 2/2] script --- .gitignore | 1 + application/api/answer/routes.py | 14 ---- application/core/settings.py | 5 +- application/llm/llama_cpp.py | 5 +- application/vectorstore/base.py | 4 +- docker-compose-local.yaml | 58 +++++++++++++++ setup.sh | 118 ++++++++++++++++++++----------- 7 files changed, 143 insertions(+), 62 deletions(-) create mode 100644 docker-compose-local.yaml diff --git a/.gitignore b/.gitignore index a896c29..053e579 100644 --- a/.gitignore +++ b/.gitignore @@ -171,3 +171,4 @@ application/vectors/ node_modules/ .vscode/settings.json +models/ diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py index 86943ff..566203c 100644 --- a/application/api/answer/routes.py +++ b/application/api/answer/routes.py @@ -32,20 +32,6 @@ if settings.LLM_NAME == "gpt4": else: gpt_model = 'gpt-3.5-turbo' -if settings.SELF_HOSTED_MODEL: - from langchain.llms import HuggingFacePipeline - from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline - - model_id = settings.LLM_NAME # hf model id (Arc53/docsgpt-7b-falcon, Arc53/docsgpt-14b) - tokenizer = AutoTokenizer.from_pretrained(model_id) - model = AutoModelForCausalLM.from_pretrained(model_id) - pipe = pipeline( - "text-generation", model=model, - tokenizer=tokenizer, max_new_tokens=2000, - device_map="auto", eos_token_id=tokenizer.eos_token_id - ) - hf = HuggingFacePipeline(pipeline=pipe) - # load the prompts current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) with open(os.path.join(current_dir, "prompts", "combine_prompt.txt"), "r") as f: diff --git a/application/core/settings.py b/application/core/settings.py index 1479beb..7895aef 100644 --- a/application/core/settings.py +++ b/application/core/settings.py @@ -1,6 +1,8 @@ from pathlib import Path +import os from pydantic import BaseSettings +current_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) class Settings(BaseSettings): @@ -9,9 +11,8 @@ class Settings(BaseSettings): CELERY_BROKER_URL: str = "redis://localhost:6379/0" CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1" MONGO_URI: str = "mongodb://localhost:27017/docsgpt" - MODEL_PATH: str = "./models/gpt4all-model.bin" + MODEL_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf") TOKENS_MAX_HISTORY: int = 150 - SELF_HOSTED_MODEL: bool = False UPLOAD_FOLDER: str = "inputs" API_URL: str = "http://localhost:7091" # backend url for celery worker diff --git a/application/llm/llama_cpp.py b/application/llm/llama_cpp.py index d54d6f1..ebd713c 100644 --- a/application/llm/llama_cpp.py +++ b/application/llm/llama_cpp.py @@ -4,7 +4,10 @@ class LlamaCpp(BaseLLM): def __init__(self, api_key, llm_name='/Users/pavel/Desktop/docsgpt/application/models/orca-test.bin'): global llama - from llama_cpp import Llama + try: + from llama_cpp import Llama + except ImportError: + raise ImportError("Please install llama_cpp using pip install llama-cpp-python") llama = Llama(model_path=llm_name) diff --git a/application/vectorstore/base.py b/application/vectorstore/base.py index ad48174..29eac07 100644 --- a/application/vectorstore/base.py +++ b/application/vectorstore/base.py @@ -2,7 +2,7 @@ from abc import ABC, abstractmethod import os from langchain.embeddings import ( OpenAIEmbeddings, - HuggingFaceHubEmbeddings, + HuggingFaceEmbeddings, CohereEmbeddings, HuggingFaceInstructEmbeddings, ) @@ -22,7 +22,7 @@ class BaseVectorStore(ABC): def _get_docsearch(self, embeddings_name, embeddings_key=None): embeddings_factory = { "openai_text-embedding-ada-002": OpenAIEmbeddings, - "huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceHubEmbeddings, + "huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceEmbeddings, "huggingface_hkunlp/instructor-large": HuggingFaceInstructEmbeddings, "cohere_medium": CohereEmbeddings } diff --git a/docker-compose-local.yaml b/docker-compose-local.yaml new file mode 100644 index 0000000..b24ed48 --- /dev/null +++ b/docker-compose-local.yaml @@ -0,0 +1,58 @@ +version: "3.9" + +services: + frontend: + build: ./frontend + environment: + - VITE_API_HOST=http://localhost:7091 + - VITE_API_STREAMING=$VITE_API_STREAMING + ports: + - "5173:5173" + + # backend: + # build: ./application + # environment: + # - LLM_NAME=$LLM_NAME + # - EMBEDDINGS_NAME=$EMBEDDINGS_NAME + # - CELERY_BROKER_URL=redis://redis:6379/0 + # - CELERY_RESULT_BACKEND=redis://redis:6379/1 + # - MONGO_URI=mongodb://mongo:27017/docsgpt + # ports: + # - "7091:7091" + # volumes: + # - ./application/indexes:/app/application/indexes + # - ./application/inputs:/app/application/inputs + # - ./application/vectors:/app/application/vectors + # - ./application/models:/app/application/models + # depends_on: + # - redis + # - mongo + + worker: + build: ./application + command: celery -A application.app.celery worker -l INFO + environment: + - LLM_NAME=$LLM_NAME + - EMBEDDINGS_NAME=$EMBEDDINGS_NAME + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/1 + - MONGO_URI=mongodb://mongo:27017/docsgpt + - API_URL=http://backend:7091 + depends_on: + - redis + - mongo + + redis: + image: redis:6-alpine + ports: + - 6379:6379 + + mongo: + image: mongo:6 + ports: + - 27017:27017 + volumes: + - mongodb_data_container:/data/db + +volumes: + mongodb_data_container: diff --git a/setup.sh b/setup.sh index cd5712b..281a124 100755 --- a/setup.sh +++ b/setup.sh @@ -1,45 +1,77 @@ #!/bin/bash -cd "$(dirname "$0")" || exit - -# Create the required directories on the host machine if they don't exist -[ ! -d "./application/indexes" ] && mkdir -p ./application/indexes -[ ! -d "./application/inputs" ] && mkdir -p ./application/inputs -[ ! -d "./application/vectors" ] && mkdir -p ./application/vectors - -# Build frontend and backend images -docker build -t frontend_image ./frontend -docker build -t backend_image ./application - -# Run redis and mongo services -docker run -d --name redis -p 6379:6379 redis:6-alpine -docker run -d --name mongo -p 27017:27017 -v mongodb_data_container:/data/db mongo:6 - -# Run backend and worker services -docker run -d --name backend -p 7091:7091 \ - --link redis:redis --link mongo:mongo \ - -v $(pwd)/application/indexes:/app/indexes \ - -v $(pwd)/application/inputs:/app/inputs \ - -v $(pwd)/application/vectors:/app/vectors \ - -e API_KEY=$OPENAI_API_KEY \ - -e EMBEDDINGS_KEY=$OPENAI_API_KEY \ - -e CELERY_BROKER_URL=redis://redis:6379/0 \ - -e CELERY_RESULT_BACKEND=redis://redis:6379/1 \ - -e MONGO_URI=mongodb://mongo:27017/docsgpt \ - backend_image - -docker run -d --name worker \ - --link redis:redis --link mongo:mongo \ - -e API_KEY=$OPENAI_API_KEY \ - -e EMBEDDINGS_KEY=$OPENAI_API_KEY \ - -e CELERY_BROKER_URL=redis://redis:6379/0 \ - -e CELERY_RESULT_BACKEND=redis://redis:6379/1 \ - -e MONGO_URI=mongodb://mongo:27017/docsgpt \ - -e API_URL=http://backend:7091 \ - backend_image \ - celery -A app.celery worker -l INFO - -# Run frontend service -docker run -d --name frontend -p 5173:5173 \ - -e VITE_API_HOST=http://localhost:7091 \ - frontend_image +# Function to prompt the user for their choice +prompt_user() { + echo "Do you want to:" + echo "1. Download the language model locally (12GB)" + echo "2. Use the OpenAI API" + read -p "Enter your choice (1/2): " choice +} + +# Function to handle the choice to download the model locally +download_locally() { + echo "LLM_NAME=llama.cpp" > .env + echo "VITE_API_STREAMING=true" >> .env + echo "EMBEDDINGS_NAME=huggingface_sentence-transformers/all-mpnet-base-v2" >> .env + echo "The .env file has been created with LLM_NAME set to llama.cpp." + + # Creating the directory if it does not exist + mkdir -p models + + # Downloading the model to the specific directory + echo "Downloading the model..." + # check if docsgpt-7b-f16.gguf does not exist + if [ ! -f models/docsgpt-7b-f16.gguf ]; then + echo "Downloading the model..." + wget -P models https://docsgpt.s3.eu-west-1.amazonaws.com/models/docsgpt-7b-f16.gguf + echo "Model downloaded to models directory." + else + echo "Model already exists." + fi + + docker-compose -f docker-compose-local.yaml build && docker-compose -f docker-compose-local.yaml up -d + python -m venv venv + source venv/bin/activate + pip install -r application/requirements.txt + pip install llama-cpp-python + export FLASK_APP=application/app.py + export FLASK_DEBUG=true + echo "The application is now running on http://localhost:5173" + echo "You can stop the application by running the following command:" + echo "Ctrl + C and then" + echo "docker-compose down" + flask run --host=0.0.0.0 --port=7091 +} + +# Function to handle the choice to use the OpenAI API +use_openai() { + read -p "Please enter your OpenAI API key: " api_key + echo "API_KEY=$api_key" > .env + echo "LLM_NAME=openai" >> .env + echo "VITE_API_STREAMING=true" >> .env + echo "The .env file has been created with API_KEY set to your provided key." + + docker-compose build && docker-compose up -d + + + + echo "The application is will runn on http://localhost:5173" + echo "You can stop the application by running the following command:" + echo "docker-compose down" +} + +# Prompt the user for their choice +prompt_user + +# Handle the user's choice +case $choice in + 1) + download_locally + ;; + 2) + use_openai + ;; + *) + echo "Invalid choice. Please choose either 1 or 2." + ;; +esac