script + cpu optimisations

pull/362/head
Alex 8 months ago
parent 6045cbbc62
commit 9a33bf2210

@ -118,6 +118,8 @@ def complete_stream(question, docsearch, chat_history, api_key, conversation_id)
docs = docsearch.search(question, k=2) docs = docsearch.search(question, k=2)
if settings.LLM_NAME == "llama.cpp":
docs = [docs[0]]
# join all page_content together with a newline # join all page_content together with a newline
docs_together = "\n".join([doc.page_content for doc in docs]) docs_together = "\n".join([doc.page_content for doc in docs])
p_chat_combine = chat_combine_template.replace("{summaries}", docs_together) p_chat_combine = chat_combine_template.replace("{summaries}", docs_together)

@ -2,7 +2,7 @@ from pathlib import Path
import os import os
from pydantic import BaseSettings from pydantic import BaseSettings
current_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
class Settings(BaseSettings): class Settings(BaseSettings):

@ -1,15 +1,16 @@
from application.llm.base import BaseLLM from application.llm.base import BaseLLM
from application.core.settings import settings
class LlamaCpp(BaseLLM): class LlamaCpp(BaseLLM):
def __init__(self, api_key, llm_name='/Users/pavel/Desktop/docsgpt/application/models/orca-test.bin'): def __init__(self, api_key, llm_name=settings.MODEL_PATH, **kwargs):
global llama global llama
try: try:
from llama_cpp import Llama from llama_cpp import Llama
except ImportError: except ImportError:
raise ImportError("Please install llama_cpp using pip install llama-cpp-python") raise ImportError("Please install llama_cpp using pip install llama-cpp-python")
llama = Llama(model_path=llm_name) llama = Llama(model_path=llm_name, n_ctx=2048)
def gen(self, model, engine, messages, stream=False, **kwargs): def gen(self, model, engine, messages, stream=False, **kwargs):
context = messages[0]['content'] context = messages[0]['content']

@ -6,42 +6,10 @@ services:
environment: environment:
- VITE_API_HOST=http://localhost:7091 - VITE_API_HOST=http://localhost:7091
- VITE_API_STREAMING=$VITE_API_STREAMING - VITE_API_STREAMING=$VITE_API_STREAMING
- VITE_EMBEDDINGS_NAME=$EMBEDDINGS_NAME
ports: ports:
- "5173:5173" - "5173:5173"
# backend:
# build: ./application
# environment:
# - LLM_NAME=$LLM_NAME
# - EMBEDDINGS_NAME=$EMBEDDINGS_NAME
# - CELERY_BROKER_URL=redis://redis:6379/0
# - CELERY_RESULT_BACKEND=redis://redis:6379/1
# - MONGO_URI=mongodb://mongo:27017/docsgpt
# ports:
# - "7091:7091"
# volumes:
# - ./application/indexes:/app/application/indexes
# - ./application/inputs:/app/application/inputs
# - ./application/vectors:/app/application/vectors
# - ./application/models:/app/application/models
# depends_on:
# - redis
# - mongo
worker:
build: ./application
command: celery -A application.app.celery worker -l INFO
environment:
- LLM_NAME=$LLM_NAME
- EMBEDDINGS_NAME=$EMBEDDINGS_NAME
- CELERY_BROKER_URL=redis://redis:6379/0
- CELERY_RESULT_BACKEND=redis://redis:6379/1
- MONGO_URI=mongodb://mongo:27017/docsgpt
- API_URL=http://backend:7091
depends_on:
- redis
- mongo
redis: redis:
image: redis:6-alpine image: redis:6-alpine
ports: ports:

@ -59,6 +59,7 @@ export default function Navigation({
const navRef = useRef(null); const navRef = useRef(null);
const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com'; const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
const embeddingsName = import.meta.env.VITE_EMBEDDINGS_NAME || 'openai_text-embedding-ada-002';
useEffect(() => { useEffect(() => {
if (!conversations) { if (!conversations) {
@ -253,7 +254,7 @@ export default function Navigation({
<div className="absolute top-12 left-0 right-6 ml-2 mr-4 max-h-52 overflow-y-scroll bg-white shadow-lg"> <div className="absolute top-12 left-0 right-6 ml-2 mr-4 max-h-52 overflow-y-scroll bg-white shadow-lg">
{docs ? ( {docs ? (
docs.map((doc, index) => { docs.map((doc, index) => {
if (doc.model === 'openai_text-embedding-ada-002') { if (doc.model === embeddingsName) {
return ( return (
<div <div
key={index} key={index}

@ -34,13 +34,15 @@ download_locally() {
source venv/bin/activate source venv/bin/activate
pip install -r application/requirements.txt pip install -r application/requirements.txt
pip install llama-cpp-python pip install llama-cpp-python
pip install sentence-transformers
export FLASK_APP=application/app.py export FLASK_APP=application/app.py
export FLASK_DEBUG=true export FLASK_DEBUG=true
echo "The application is now running on http://localhost:5173" echo "The application is now running on http://localhost:5173"
echo "You can stop the application by running the following command:" echo "You can stop the application by running the following command:"
echo "Ctrl + C and then" echo "Ctrl + C and then"
echo "docker-compose down" echo "docker-compose down"
flask run --host=0.0.0.0 --port=7091 flask run --host=0.0.0.0 --port=7091 &
celery -A application.app.celery worker -l INFO
} }
# Function to handle the choice to use the OpenAI API # Function to handle the choice to use the OpenAI API

Loading…
Cancel
Save