Merge pull request #355 from arc53/feature/cpu-llm

llama-cpp local
pull/361/head
Alex 8 months ago committed by GitHub
commit 6045cbbc62
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

1
.gitignore vendored

@ -171,3 +171,4 @@ application/vectors/
node_modules/
.vscode/settings.json
models/

@ -32,20 +32,6 @@ if settings.LLM_NAME == "gpt4":
else:
gpt_model = 'gpt-3.5-turbo'
if settings.SELF_HOSTED_MODEL:
from langchain.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
model_id = settings.LLM_NAME # hf model id (Arc53/docsgpt-7b-falcon, Arc53/docsgpt-14b)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
pipe = pipeline(
"text-generation", model=model,
tokenizer=tokenizer, max_new_tokens=2000,
device_map="auto", eos_token_id=tokenizer.eos_token_id
)
hf = HuggingFacePipeline(pipeline=pipe)
# load the prompts
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
with open(os.path.join(current_dir, "prompts", "combine_prompt.txt"), "r") as f:

@ -1,6 +1,8 @@
from pathlib import Path
import os
from pydantic import BaseSettings
current_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
class Settings(BaseSettings):
@ -9,9 +11,8 @@ class Settings(BaseSettings):
CELERY_BROKER_URL: str = "redis://localhost:6379/0"
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
MODEL_PATH: str = "./models/gpt4all-model.bin"
MODEL_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
TOKENS_MAX_HISTORY: int = 150
SELF_HOSTED_MODEL: bool = False
UPLOAD_FOLDER: str = "inputs"
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch"

@ -0,0 +1,38 @@
from application.llm.base import BaseLLM
class LlamaCpp(BaseLLM):
def __init__(self, api_key, llm_name='/Users/pavel/Desktop/docsgpt/application/models/orca-test.bin'):
global llama
try:
from llama_cpp import Llama
except ImportError:
raise ImportError("Please install llama_cpp using pip install llama-cpp-python")
llama = Llama(model_path=llm_name)
def gen(self, model, engine, messages, stream=False, **kwargs):
context = messages[0]['content']
user_question = messages[-1]['content']
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
result = llama(prompt, max_tokens=150, echo=False)
# import sys
# print(result['choices'][0]['text'].split('### Answer \n')[-1], file=sys.stderr)
return result['choices'][0]['text'].split('### Answer \n')[-1]
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
context = messages[0]['content']
user_question = messages[-1]['content']
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
result = llama(prompt, max_tokens=150, echo=False, stream=stream)
# import sys
# print(list(result), file=sys.stderr)
for item in result:
for choice in item['choices']:
yield choice['text']

@ -1,6 +1,7 @@
from application.llm.openai import OpenAILLM, AzureOpenAILLM
from application.llm.sagemaker import SagemakerAPILLM
from application.llm.huggingface import HuggingFaceLLM
from application.llm.llama_cpp import LlamaCpp
@ -9,7 +10,8 @@ class LLMCreator:
'openai': OpenAILLM,
'azure_openai': AzureOpenAILLM,
'sagemaker': SagemakerAPILLM,
'huggingface': HuggingFaceLLM
'huggingface': HuggingFaceLLM,
'llama.cpp': LlamaCpp
}
@classmethod

@ -2,7 +2,7 @@ from abc import ABC, abstractmethod
import os
from langchain.embeddings import (
OpenAIEmbeddings,
HuggingFaceHubEmbeddings,
HuggingFaceEmbeddings,
CohereEmbeddings,
HuggingFaceInstructEmbeddings,
)
@ -22,7 +22,7 @@ class BaseVectorStore(ABC):
def _get_embeddings(self, embeddings_name, embeddings_key=None):
embeddings_factory = {
"openai_text-embedding-ada-002": OpenAIEmbeddings,
"huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceHubEmbeddings,
"huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceEmbeddings,
"huggingface_hkunlp/instructor-large": HuggingFaceInstructEmbeddings,
"cohere_medium": CohereEmbeddings
}

@ -0,0 +1,58 @@
version: "3.9"
services:
frontend:
build: ./frontend
environment:
- VITE_API_HOST=http://localhost:7091
- VITE_API_STREAMING=$VITE_API_STREAMING
ports:
- "5173:5173"
# backend:
# build: ./application
# environment:
# - LLM_NAME=$LLM_NAME
# - EMBEDDINGS_NAME=$EMBEDDINGS_NAME
# - CELERY_BROKER_URL=redis://redis:6379/0
# - CELERY_RESULT_BACKEND=redis://redis:6379/1
# - MONGO_URI=mongodb://mongo:27017/docsgpt
# ports:
# - "7091:7091"
# volumes:
# - ./application/indexes:/app/application/indexes
# - ./application/inputs:/app/application/inputs
# - ./application/vectors:/app/application/vectors
# - ./application/models:/app/application/models
# depends_on:
# - redis
# - mongo
worker:
build: ./application
command: celery -A application.app.celery worker -l INFO
environment:
- LLM_NAME=$LLM_NAME
- EMBEDDINGS_NAME=$EMBEDDINGS_NAME
- CELERY_BROKER_URL=redis://redis:6379/0
- CELERY_RESULT_BACKEND=redis://redis:6379/1
- MONGO_URI=mongodb://mongo:27017/docsgpt
- API_URL=http://backend:7091
depends_on:
- redis
- mongo
redis:
image: redis:6-alpine
ports:
- 6379:6379
mongo:
image: mongo:6
ports:
- 27017:27017
volumes:
- mongodb_data_container:/data/db
volumes:
mongodb_data_container:

@ -1,45 +1,77 @@
#!/bin/bash
cd "$(dirname "$0")" || exit
# Create the required directories on the host machine if they don't exist
[ ! -d "./application/indexes" ] && mkdir -p ./application/indexes
[ ! -d "./application/inputs" ] && mkdir -p ./application/inputs
[ ! -d "./application/vectors" ] && mkdir -p ./application/vectors
# Build frontend and backend images
docker build -t frontend_image ./frontend
docker build -t backend_image ./application
# Run redis and mongo services
docker run -d --name redis -p 6379:6379 redis:6-alpine
docker run -d --name mongo -p 27017:27017 -v mongodb_data_container:/data/db mongo:6
# Run backend and worker services
docker run -d --name backend -p 7091:7091 \
--link redis:redis --link mongo:mongo \
-v $(pwd)/application/indexes:/app/indexes \
-v $(pwd)/application/inputs:/app/inputs \
-v $(pwd)/application/vectors:/app/vectors \
-e API_KEY=$OPENAI_API_KEY \
-e EMBEDDINGS_KEY=$OPENAI_API_KEY \
-e CELERY_BROKER_URL=redis://redis:6379/0 \
-e CELERY_RESULT_BACKEND=redis://redis:6379/1 \
-e MONGO_URI=mongodb://mongo:27017/docsgpt \
backend_image
docker run -d --name worker \
--link redis:redis --link mongo:mongo \
-e API_KEY=$OPENAI_API_KEY \
-e EMBEDDINGS_KEY=$OPENAI_API_KEY \
-e CELERY_BROKER_URL=redis://redis:6379/0 \
-e CELERY_RESULT_BACKEND=redis://redis:6379/1 \
-e MONGO_URI=mongodb://mongo:27017/docsgpt \
-e API_URL=http://backend:7091 \
backend_image \
celery -A app.celery worker -l INFO
# Run frontend service
docker run -d --name frontend -p 5173:5173 \
-e VITE_API_HOST=http://localhost:7091 \
frontend_image
# Function to prompt the user for their choice
prompt_user() {
echo "Do you want to:"
echo "1. Download the language model locally (12GB)"
echo "2. Use the OpenAI API"
read -p "Enter your choice (1/2): " choice
}
# Function to handle the choice to download the model locally
download_locally() {
echo "LLM_NAME=llama.cpp" > .env
echo "VITE_API_STREAMING=true" >> .env
echo "EMBEDDINGS_NAME=huggingface_sentence-transformers/all-mpnet-base-v2" >> .env
echo "The .env file has been created with LLM_NAME set to llama.cpp."
# Creating the directory if it does not exist
mkdir -p models
# Downloading the model to the specific directory
echo "Downloading the model..."
# check if docsgpt-7b-f16.gguf does not exist
if [ ! -f models/docsgpt-7b-f16.gguf ]; then
echo "Downloading the model..."
wget -P models https://docsgpt.s3.eu-west-1.amazonaws.com/models/docsgpt-7b-f16.gguf
echo "Model downloaded to models directory."
else
echo "Model already exists."
fi
docker-compose -f docker-compose-local.yaml build && docker-compose -f docker-compose-local.yaml up -d
python -m venv venv
source venv/bin/activate
pip install -r application/requirements.txt
pip install llama-cpp-python
export FLASK_APP=application/app.py
export FLASK_DEBUG=true
echo "The application is now running on http://localhost:5173"
echo "You can stop the application by running the following command:"
echo "Ctrl + C and then"
echo "docker-compose down"
flask run --host=0.0.0.0 --port=7091
}
# Function to handle the choice to use the OpenAI API
use_openai() {
read -p "Please enter your OpenAI API key: " api_key
echo "API_KEY=$api_key" > .env
echo "LLM_NAME=openai" >> .env
echo "VITE_API_STREAMING=true" >> .env
echo "The .env file has been created with API_KEY set to your provided key."
docker-compose build && docker-compose up -d
echo "The application is will runn on http://localhost:5173"
echo "You can stop the application by running the following command:"
echo "docker-compose down"
}
# Prompt the user for their choice
prompt_user
# Handle the user's choice
case $choice in
1)
download_locally
;;
2)
use_openai
;;
*)
echo "Invalid choice. Please choose either 1 or 2."
;;
esac

Loading…
Cancel
Save