Merge branch 'main' into feature/remote-loads

feature/remote-loads
Alex 3 months ago committed by GitHub
commit 4a701cb993
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

2
.gitignore vendored

@ -172,4 +172,4 @@ application/vectors/
node_modules/
.vscode/settings.json
models/
model/
model/

@ -123,7 +123,7 @@ docker compose -f docker-compose-dev.yaml up -d
> [!Note]
> Make sure you have Python 3.10 or 3.11 installed.
1. Export required environment variables or prepare a `.env` file in the `/application` folder:
1. Export required environment variables or prepare a `.env` file in the project folder:
- Copy [.env_sample](https://github.com/arc53/DocsGPT/blob/main/application/.env_sample) and create `.env`.
(check out [`application/core/settings.py`](application/core/settings.py) if you want to see more config options.)
@ -152,11 +152,12 @@ You can use the script below, or download it manually from [here](https://d3dg10
wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
unzip mpnet-base-v2.zip -d model
rm mpnet-base-v2.zip
```
4. Change to the `application/` subdir by the command `cd application/` and install dependencies for the backend:
4. Install dependencies for the backend:
```commandline
pip install -r requirements.txt
pip install -r application/requirements.txt
```
5. Run the app using `flask --app application/app.py run --host=0.0.0.0 --port=7091`.

@ -2,15 +2,17 @@ FROM python:3.11-slim-bullseye as builder
# Tiktoken requires Rust toolchain, so build it in a separate stage
RUN apt-get update && apt-get install -y gcc curl
RUN apt-get install -y wget unzip
RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
RUN unzip mpnet-base-v2.zip -d model
RUN rm mpnet-base-v2.zip
RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && apt-get install --reinstall libc6-dev -y
ENV PATH="/root/.cargo/bin:${PATH}"
RUN pip install --upgrade pip && pip install tiktoken==0.5.2
COPY requirements.txt .
RUN pip install -r requirements.txt
RUN apt-get install -y wget unzip
RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
RUN unzip mpnet-base-v2.zip -d model
RUN rm mpnet-base-v2.zip
FROM python:3.11-slim-bullseye

@ -39,6 +39,9 @@ class Settings(BaseSettings):
SAGEMAKER_ACCESS_KEY: Optional[str] = None # SageMaker access key
SAGEMAKER_SECRET_KEY: Optional[str] = None # SageMaker secret key
# prem ai project id
PREMAI_PROJECT_ID: Optional[str] = None
path = Path(__file__).parent.parent.absolute()
settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")

@ -20,7 +20,7 @@ class DocsGPTAPILLM(BaseLLM):
"max_new_tokens": 30
}
)
response_clean = response.json()['a'].split("###")[0]
response_clean = response.json()['a'].replace("###", "")
return response_clean

@ -4,6 +4,7 @@ from application.llm.huggingface import HuggingFaceLLM
from application.llm.llama_cpp import LlamaCpp
from application.llm.anthropic import AnthropicLLM
from application.llm.docsgpt_provider import DocsGPTAPILLM
from application.llm.premai import PremAILLM
@ -15,7 +16,8 @@ class LLMCreator:
'huggingface': HuggingFaceLLM,
'llama.cpp': LlamaCpp,
'anthropic': AnthropicLLM,
'docsgpt': DocsGPTAPILLM
'docsgpt': DocsGPTAPILLM,
'premai': PremAILLM,
}
@classmethod

@ -0,0 +1,33 @@
from application.llm.base import BaseLLM
from application.core.settings import settings
class PremAILLM(BaseLLM):
def __init__(self, api_key):
from premai import Prem
self.client = Prem(
api_key=api_key
)
self.api_key = api_key
self.project_id = settings.PREMAI_PROJECT_ID
def gen(self, model, engine, messages, stream=False, **kwargs):
response = self.client.chat.completions.create(model=model,
project_id=self.project_id,
messages=messages,
stream=stream,
**kwargs)
return response.choices[0].message["content"]
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
response = self.client.chat.completions.create(model=model,
project_id=self.project_id,
messages=messages,
stream=stream,
**kwargs)
for line in response:
if line.choices[0].delta["content"] is not None:
yield line.choices[0].delta["content"]

@ -147,12 +147,24 @@ class SimpleDirectoryReader(BaseReader):
# do standard read
with open(input_file, "r", errors=self.errors) as f:
data = f.read()
# Prepare metadata for this file
if self.file_metadata is not None:
file_metadata = self.file_metadata(str(input_file))
else:
# Provide a default empty metadata
file_metadata = {'title': '', 'store': ''}
# TODO: Find a case with no metadata and check if breaks anything
if isinstance(data, List):
data_list.extend(data)
# Extend data_list with each item in the data list
data_list.extend([str(d) for d in data])
# For each item in the data list, add the file's metadata to metadata_list
metadata_list.extend([file_metadata for _ in data])
else:
# Add the single piece of data to data_list
data_list.append(str(data))
if self.file_metadata is not None:
metadata_list.append(self.file_metadata(str(input_file)))
# Add the file's metadata to metadata_list
metadata_list.append(file_metadata)
if concatenate:
return [Document("\n".join(data_list))]

@ -21,16 +21,15 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
for doc in documents:
doc_len = len(tiktoken.get_encoding("cl100k_base").encode(doc.text))
if current_group is None:
current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
extra_info=doc.extra_info)
elif len(tiktoken.get_encoding("cl100k_base").encode(
current_group.text)) + doc_len < max_tokens and doc_len < min_tokens:
current_group.text += " " + doc.text
# Check if current group is empty or if the document can be added based on token count and matching metadata
if current_group is None or (len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and doc_len < min_tokens and current_group.extra_info == doc.extra_info):
if current_group is None:
current_group = doc # Use the document directly to retain its metadata
else:
current_group.text += " " + doc.text # Append text to the current group
else:
docs.append(current_group)
current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
extra_info=doc.extra_info)
current_group = doc # Start a new group with the current document
if current_group is not None:
docs.append(current_group)

@ -8,10 +8,10 @@ import {
selectPrompt,
setPrompt,
selectSourceDocs,
setSourceDocs,
} from './preferences/preferenceSlice';
import { Doc } from './preferences/preferenceApi';
import { useDarkTheme } from './hooks';
import { Light } from 'react-syntax-highlighter';
type PromptProps = {
prompts: { name: string; id: string; type: string }[];
selectedPrompt: { name: string; id: string; type: string };
@ -86,13 +86,11 @@ const Setting: React.FC = () => {
fetch(`${apiHost}/api/delete_old?path=${docPath}`, {
method: 'GET',
})
.then(() => {
// remove the image element from the DOM
const imageElement = document.querySelector(
`#img-${index}`,
) as HTMLElement;
const parentElement = imageElement.parentNode as HTMLElement;
parentElement.parentNode?.removeChild(parentElement);
.then((response) => {
if(response.ok && documents){
const updatedDocuments = [...documents.slice(0, index), ...documents.slice(index + 1)];
dispatch(setSourceDocs(updatedDocuments));
}
})
.catch((error) => console.error(error));
};

@ -140,12 +140,12 @@ export default function Conversation() {
)}
{queries.length > 0 && (
<div className="mt-20 flex flex-col transition-all md:w-3/4">
<div className="mt-20 mb-9 flex flex-col transition-all md:w-3/4">
{queries.map((query, index) => {
return (
<Fragment key={index}>
<ConversationBubble
className={'last:mb-27 mb-7'}
className={'last:mb-28 mb-7'}
key={`${index}QUESTION`}
message={query.prompt}
type="QUESTION"
@ -160,7 +160,7 @@ export default function Conversation() {
{queries.length === 0 && (
<Hero className="mt-24 h-[100vh] md:mt-52"></Hero>
)}
<div className="relative bottom-0 flex w-10/12 flex-col items-end self-center bg-white dark:bg-raisin-black pt-3 md:fixed md:w-[65%]">
<div className="absolute bottom-0 flex w-11/12 md:w-[65%] flex-col items-end self-center bg-white dark:bg-raisin-black pt-4 md:fixed">
<div className="flex h-full w-full">
<div
id="inputbox"
@ -169,7 +169,7 @@ export default function Conversation() {
placeholder="Type your message here..."
contentEditable
onPaste={handlePaste}
className={`border-000000 overflow-x-hidden; max-h-24 min-h-[2.6rem] w-full overflow-y-auto whitespace-pre-wrap rounded-3xl border bg-white dark:bg-transparent dark:text-bright-gray py-2 pl-4 pr-9 text-base leading-7 opacity-100 focus:outline-none`}
className={`border-000000 overflow-x-hidden max-h-24 min-h-[2.6rem] w-full overflow-y-auto whitespace-pre-wrap rounded-3xl border bg-white dark:bg-raisin-black dark:text-bright-gray py-2 pl-4 pr-9 text-base leading-7 opacity-100 focus:outline-none`}
onKeyDown={(e) => {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
@ -200,9 +200,8 @@ export default function Conversation() {
</div>
)}
</div>
<p className="text-gray-595959 dark:text-bright-gray w-[100vw] self-center bg-transparent p-5 text-center text-xs md:w-full">
This is a chatbot that uses the GPT-3, Faiss and LangChain to answer
questions.
<p className="text-gray-595959 dark:text-bright-gray bg-white dark:bg-raisin-black w-[100vw] self-center bg-transparent p-5 text-center text-xs md:w-full">
DocsGPT uses GenAI, please review critial information using sources.
</p>
</div>
</div>

@ -0,0 +1,5 @@
# Elastic Beanstalk Files
.elasticbeanstalk/*
!.elasticbeanstalk/*.cfg.yml
!.elasticbeanstalk/*.global.yml

@ -6,6 +6,6 @@ COPY package*.json ./
RUN npm install
COPY . .
EXPOSE 7091
EXPOSE 8080
CMD [ "npm", "run", "start"]

@ -9,6 +9,7 @@
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"cors": "^2.8.5",
"json-server": "^0.17.4",
"uuid": "^9.0.1"
},

@ -12,6 +12,7 @@
"author": "",
"license": "ISC",
"dependencies": {
"cors": "^2.8.5",
"json-server": "^0.17.4",
"uuid": "^9.0.1"
},

@ -225,7 +225,19 @@
"version": "0.1.0"
}
],
"conversations": [],
"conversations": [
{
"id": "65cf39c936523eea21ebe117",
"name": "Request clarification"
},
{
"id": "65cf39ba36523eea21ebe116",
"name": "Clarification request"
},
{
"id": "65cf37e97d527c332bbac933",
"name": "Greetings, assistance inquiry."
}],
"docs_check": {
"status": "loaded"
}

@ -1,7 +1,7 @@
import jsonServer from "json-server";
import routes from "./mocks/routes.json" assert { type: "json" };
import { v4 as uuid } from "uuid";
import cors from 'cors'
const server = jsonServer.create();
const router = jsonServer.router("./src/mocks/db.json");
const middlewares = jsonServer.defaults();
@ -9,7 +9,7 @@ const middlewares = jsonServer.defaults();
const localStorage = [];
server.use(middlewares);
server.use(cors({ origin: ['*'] }))
server.use(jsonServer.rewriter(routes));
server.use((req, res, next) => {
@ -49,22 +49,83 @@ router.render = (req, res) => {
} else {
res.status(404).jsonp({});
}
} else if (req.url === "/stream") {
res.status(200).jsonp({
data: "The answer is 42",
sources: [
"https://en.wikipedia.org/wiki/42_(number)",
"https://en.wikipedia.org/wiki/42_(number)",
],
conversation_id: "1234",
} else if (req.url === "/stream" && req.method === "POST") {
res.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive'
});
} else {
const message = ('Hi, How are you today?').split(' ');
let index = 0;
const interval = setInterval(() => {
if (index < message.length) {
res.write(`data: {"answer": "${message[index++]} "}\n`);
} else {
res.write(`data: {"type": "id", "id": "65cbc39d11f077b9eeb06d26"}\n`)
res.write(`data: {"type": "end"}\n`)
clearInterval(interval); // Stop the interval once the message is fully streamed
res.end(); // End the response
}
}, 500); // Send a word every 1 second
}
else if (req.url === '/search' && req.method === 'POST') {
res.status(200).json(
[
{
"text": "\n\n/api/answer\nIt's a POST request that sends a JSON in body with 4 values. It will receive an answer for a user provided question.\n",
"title": "API-docs.md"
},
{
"text": "\n\nOur Standards\n\nExamples of behavior that contribute to a positive environment for our\ncommunity include:\n* Demonstrating empathy and kindness towards other people\n",
"title": "How-to-use-different-LLM.md"
}
]
)
}
else if (req.url === '/get_prompts' && req.method === 'GET') {
res.status(200).json([
{
"id": "default",
"name": "default",
"type": "public"
},
{
"id": "creative",
"name": "creative",
"type": "public"
},
{
"id": "strict",
"name": "strict",
"type": "public"
}
]);
}
else if (req.url.startsWith('/get_single_prompt') && req.method==='GET') {
const id = req.query.id;
console.log('hre');
if (id === 'creative')
res.status(200).json({
"content": "You are a DocsGPT, friendly and helpful AI assistant by Arc53 that provides help with documents. You give thorough answers with code examples if possible."
})
else if (id === 'strict') {
res.status(200).json({
"content": "You are an AI Assistant, DocsGPT, adept at offering document assistance. \nYour expertise lies in providing answer on top of provided context."
})
}
else {
res.status(200).json({
"content": "You are a helpful AI assistant, DocsGPT, specializing in document assistance, designed to offer detailed and informative responses."
})
}
}
else {
res.status(res.statusCode).jsonp(res.locals.data);
}
};
server.use(router);
server.listen(7091, () => {
server.listen(8080, () => {
console.log("JSON Server is running");
});

Loading…
Cancel
Save