DocsGPT/application/api/user/routes.py

import os
from flask import Blueprint, request, jsonify
import requests
import json
from pymongo import MongoClient
from bson.objectid import ObjectId
from werkzeug.utils import secure_filename
import http.client

from application.api.user.tasks import ingest

from application.core.settings import settings
from application.vectorstore.vector_creator import VectorCreator

mongo = MongoClient(settings.MONGO_URI)
db = mongo["docsgpt"]
conversations_collection = db["conversations"]
vectors_collection = db["vectors"]
user = Blueprint('user', __name__)

current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

@user.route("/api/delete_conversation", methods=["POST"])
def delete_conversation():
    # deletes a conversation from the database
    conversation_id = request.args.get("id")
    # write to mongodb
    conversations_collection.delete_one(
        {
            "_id": ObjectId(conversation_id),
        }
    )

    return {"status": "ok"}

@user.route("/api/get_conversations", methods=["get"])
def get_conversations():
    # provides a list of conversations
    conversations = conversations_collection.find().sort("date", -1)
    list_conversations = []
    for conversation in conversations:
        list_conversations.append({"id": str(conversation["_id"]), "name": conversation["name"]})

    #list_conversations = [{"id": "default", "name": "default"}, {"id": "jeff", "name": "jeff"}]

    return jsonify(list_conversations)


@user.route("/api/get_single_conversation", methods=["get"])
def get_single_conversation():
    # provides data for a conversation
    conversation_id = request.args.get("id")
    conversation = conversations_collection.find_one({"_id": ObjectId(conversation_id)})
    return jsonify(conversation['queries'])


@user.route("/api/feedback", methods=["POST"])
def api_feedback():
    data = request.get_json()
    question = data["question"]
    answer = data["answer"]
    feedback = data["feedback"]

    print("-" * 5)
    print("Question: " + question)
    print("Answer: " + answer)
    print("Feedback: " + feedback)
    print("-" * 5)
    response = requests.post(
        url="https://86x89umx77.execute-api.eu-west-2.amazonaws.com/docsgpt-feedback",
        headers={
            "Content-Type": "application/json; charset=utf-8",
        },
        data=json.dumps({"answer": answer, "question": question, "feedback": feedback}),
    )
    return {"status": http.client.responses.get(response.status_code, "ok")}


@user.route("/api/delete_old", methods=["get"])
def delete_old():
    """Delete old indexes."""
    import shutil

    path = request.args.get("path")
    dirs = path.split("/")
    dirs_clean = []
    for i in range(1, len(dirs)):
        dirs_clean.append(secure_filename(dirs[i]))
    # check that path strats with indexes or vectors
    if dirs[0] not in ["indexes", "vectors"]:
        return {"status": "error"}
    path_clean = "/".join(dirs)
    vectors_collection.delete_one({"location": path})
    if settings.VECTOR_STORE == "faiss":
        try:
            shutil.rmtree(os.path.join(current_dir, path_clean))
        except FileNotFoundError:
            pass
    else:
        vetorstore = VectorCreator.create_vectorstore(
            settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean)
        )
        vetorstore.delete_index()
        
    return {"status": "ok"}

@user.route("/api/upload", methods=["POST"])
def upload_file():
    """Upload a file to get vectorized and indexed."""
    if "user" not in request.form:
        return {"status": "no user"}
    user = secure_filename(request.form["user"])
    if "name" not in request.form:
        return {"status": "no name"}
    job_name = secure_filename(request.form["name"])
    # check if the post request has the file part
    if "file" not in request.files:
        print("No file part")
        return {"status": "no file"}
    file = request.files["file"]
    if file.filename == "":
        return {"status": "no file name"}

    if file:
        filename = secure_filename(file.filename)
        # save dir
        save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)
        # create dir if not exists
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        file.save(os.path.join(save_dir, filename))
        task = ingest.delay(settings.UPLOAD_FOLDER, [".rst", ".md", ".pdf", ".txt"], job_name, filename, user)
        # task id
        task_id = task.id
        return {"status": "ok", "task_id": task_id}
    else:
        return {"status": "error"}

@user.route("/api/task_status", methods=["GET"])
def task_status():
    """Get celery job status."""
    task_id = request.args.get("task_id")
    from application.celery import celery
    task = celery.AsyncResult(task_id)
    task_meta = task.info
    return {"status": task.status, "result": task_meta}


@user.route("/api/combine", methods=["GET"])
def combined_json():
    user = "local"
    """Provide json file with combined available indexes."""
    # get json from https://d3dg1063dc54p9.cloudfront.net/combined.json

    data = [
        {
            "name": "default",
            "language": "default",
            "version": "",
            "description": "default",
            "fullName": "default",
            "date": "default",
            "docLink": "default",
            "model": settings.EMBEDDINGS_NAME,
            "location": "local",
        }
    ]
    # structure: name, language, version, description, fullName, date, docLink
    # append data from vectors_collection
    for index in vectors_collection.find({"user": user}):
        data.append(
            {
                "name": index["name"],
                "language": index["language"],
                "version": "",
                "description": index["name"],
                "fullName": index["name"],
                "date": index["date"],
                "docLink": index["location"],
                "model": settings.EMBEDDINGS_NAME,
                "location": "local",
            }
        )
    if settings.VECTOR_STORE == "faiss":
        data_remote = requests.get("https://d3dg1063dc54p9.cloudfront.net/combined.json").json()
        for index in data_remote:
            index["location"] = "remote"
            data.append(index)

    return jsonify(data)


@user.route("/api/docs_check", methods=["POST"])
def check_docs():
    # check if docs exist in a vectorstore folder
    data = request.get_json()
    # split docs on / and take first part
    if data["docs"].split("/")[0] == "local":
        return {"status": "exists"}
    vectorstore = "vectors/" + data["docs"]
    base_path = "https://raw.githubusercontent.com/arc53/DocsHUB/main/"
    if os.path.exists(vectorstore) or data["docs"] == "default":
        return {"status": "exists"}
    else:
        r = requests.get(base_path + vectorstore + "index.faiss")

        if r.status_code != 200:
            return {"status": "null"}
        else:
            if not os.path.exists(vectorstore):
                os.makedirs(vectorstore)
            with open(vectorstore + "index.faiss", "wb") as f:
                f.write(r.content)

            # download the store
            r = requests.get(base_path + vectorstore + "index.pkl")
            with open(vectorstore + "index.pkl", "wb") as f:
                f.write(r.content)

        return {"status": "loaded"}
testings 2023-09-26 09:03:22 +00:00			`import os`
			`from flask import Blueprint, request, jsonify`
			`import requests`
			`import json`
			`from pymongo import MongoClient`
			`from bson.objectid import ObjectId`
			`from werkzeug.utils import secure_filename`
			`import http.client`
working full 2023-09-27 15:25:57 +00:00
			`from application.api.user.tasks import ingest`
testings 2023-09-26 09:03:22 +00:00
			`from application.core.settings import settings`
elastic2 2023-09-29 16:17:48 +00:00			`from application.vectorstore.vector_creator import VectorCreator`

testings 2023-09-26 09:03:22 +00:00			`mongo = MongoClient(settings.MONGO_URI)`
			`db = mongo["docsgpt"]`
			`conversations_collection = db["conversations"]`
			`vectors_collection = db["vectors"]`
			`user = Blueprint('user', __name__)`

working full 2023-09-27 15:25:57 +00:00			`current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))`

testings 2023-09-26 09:03:22 +00:00			`@user.route("/api/delete_conversation", methods=["POST"])`
			`def delete_conversation():`
			`# deletes a conversation from the database`
			`conversation_id = request.args.get("id")`
			`# write to mongodb`
			`conversations_collection.delete_one(`
			`{`
			`"_id": ObjectId(conversation_id),`
			`}`
			`)`

			`return {"status": "ok"}`

			`@user.route("/api/get_conversations", methods=["get"])`
			`def get_conversations():`
			`# provides a list of conversations`
			`conversations = conversations_collection.find().sort("date", -1)`
			`list_conversations = []`
			`for conversation in conversations:`
			`list_conversations.append({"id": str(conversation["_id"]), "name": conversation["name"]})`

			`#list_conversations = [{"id": "default", "name": "default"}, {"id": "jeff", "name": "jeff"}]`

			`return jsonify(list_conversations)`


			`@user.route("/api/get_single_conversation", methods=["get"])`
			`def get_single_conversation():`
			`# provides data for a conversation`
			`conversation_id = request.args.get("id")`
			`conversation = conversations_collection.find_one({"_id": ObjectId(conversation_id)})`
			`return jsonify(conversation['queries'])`


			`@user.route("/api/feedback", methods=["POST"])`
			`def api_feedback():`
			`data = request.get_json()`
			`question = data["question"]`
			`answer = data["answer"]`
			`feedback = data["feedback"]`

			`print("-" * 5)`
			`print("Question: " + question)`
			`print("Answer: " + answer)`
			`print("Feedback: " + feedback)`
			`print("-" * 5)`
			`response = requests.post(`
			`url="https://86x89umx77.execute-api.eu-west-2.amazonaws.com/docsgpt-feedback",`
			`headers={`
			`"Content-Type": "application/json; charset=utf-8",`
			`},`
			`data=json.dumps({"answer": answer, "question": question, "feedback": feedback}),`
			`)`
			`return {"status": http.client.responses.get(response.status_code, "ok")}`


			`@user.route("/api/delete_old", methods=["get"])`
			`def delete_old():`
			`"""Delete old indexes."""`
			`import shutil`

			`path = request.args.get("path")`
			`dirs = path.split("/")`
			`dirs_clean = []`
			`for i in range(1, len(dirs)):`
			`dirs_clean.append(secure_filename(dirs[i]))`
			`# check that path strats with indexes or vectors`
			`if dirs[0] not in ["indexes", "vectors"]:`
			`return {"status": "error"}`
			`path_clean = "/".join(dirs)`
			`vectors_collection.delete_one({"location": path})`
elastic2 2023-09-29 16:17:48 +00:00			`if settings.VECTOR_STORE == "faiss":`
			`try:`
			`shutil.rmtree(os.path.join(current_dir, path_clean))`
			`except FileNotFoundError:`
			`pass`
			`else:`
			`vetorstore = VectorCreator.create_vectorstore(`
			`settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean)`
			`)`
			`vetorstore.delete_index()`

testings 2023-09-26 09:03:22 +00:00			`return {"status": "ok"}`

			`@user.route("/api/upload", methods=["POST"])`
			`def upload_file():`
			`"""Upload a file to get vectorized and indexed."""`
			`if "user" not in request.form:`
			`return {"status": "no user"}`
			`user = secure_filename(request.form["user"])`
			`if "name" not in request.form:`
			`return {"status": "no name"}`
			`job_name = secure_filename(request.form["name"])`
			`# check if the post request has the file part`
			`if "file" not in request.files:`
			`print("No file part")`
			`return {"status": "no file"}`
			`file = request.files["file"]`
			`if file.filename == "":`
			`return {"status": "no file name"}`

			`if file:`
			`filename = secure_filename(file.filename)`
			`# save dir`
working full 2023-09-27 15:25:57 +00:00			`save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)`
testings 2023-09-26 09:03:22 +00:00			`# create dir if not exists`
			`if not os.path.exists(save_dir):`
			`os.makedirs(save_dir)`

			`file.save(os.path.join(save_dir, filename))`
working full 2023-09-27 15:25:57 +00:00			`task = ingest.delay(settings.UPLOAD_FOLDER, [".rst", ".md", ".pdf", ".txt"], job_name, filename, user)`
testings 2023-09-26 09:03:22 +00:00			`# task id`
			`task_id = task.id`
			`return {"status": "ok", "task_id": task_id}`
			`else:`
			`return {"status": "error"}`

			`@user.route("/api/task_status", methods=["GET"])`
			`def task_status():`
			`"""Get celery job status."""`
			`task_id = request.args.get("task_id")`
celery syncs 2023-10-01 19:05:13 +00:00			`from application.celery import celery`
			`task = celery.AsyncResult(task_id)`
testings 2023-09-26 09:03:22 +00:00			`task_meta = task.info`
			`return {"status": task.status, "result": task_meta}`


			`@user.route("/api/combine", methods=["GET"])`
			`def combined_json():`
			`user = "local"`
			`"""Provide json file with combined available indexes."""`
			`# get json from https://d3dg1063dc54p9.cloudfront.net/combined.json`

			`data = [`
			`{`
			`"name": "default",`
			`"language": "default",`
			`"version": "",`
			`"description": "default",`
			`"fullName": "default",`
			`"date": "default",`
			`"docLink": "default",`
			`"model": settings.EMBEDDINGS_NAME,`
			`"location": "local",`
			`}`
			`]`
			`# structure: name, language, version, description, fullName, date, docLink`
			`# append data from vectors_collection`
			`for index in vectors_collection.find({"user": user}):`
			`data.append(`
			`{`
			`"name": index["name"],`
			`"language": index["language"],`
			`"version": "",`
			`"description": index["name"],`
			`"fullName": index["name"],`
			`"date": index["date"],`
			`"docLink": index["location"],`
			`"model": settings.EMBEDDINGS_NAME,`
			`"location": "local",`
			`}`
			`)`
elastic2 2023-09-29 16:17:48 +00:00			`if settings.VECTOR_STORE == "faiss":`
			`data_remote = requests.get("https://d3dg1063dc54p9.cloudfront.net/combined.json").json()`
			`for index in data_remote:`
			`index["location"] = "remote"`
			`data.append(index)`
testings 2023-09-26 09:03:22 +00:00
			`return jsonify(data)`


			`@user.route("/api/docs_check", methods=["POST"])`
			`def check_docs():`
			`# check if docs exist in a vectorstore folder`
			`data = request.get_json()`
			`# split docs on / and take first part`
			`if data["docs"].split("/")[0] == "local":`
			`return {"status": "exists"}`
			`vectorstore = "vectors/" + data["docs"]`
			`base_path = "https://raw.githubusercontent.com/arc53/DocsHUB/main/"`
			`if os.path.exists(vectorstore) or data["docs"] == "default":`
			`return {"status": "exists"}`
			`else:`
			`r = requests.get(base_path + vectorstore + "index.faiss")`

			`if r.status_code != 200:`
			`return {"status": "null"}`
			`else:`
			`if not os.path.exists(vectorstore):`
			`os.makedirs(vectorstore)`
			`with open(vectorstore + "index.faiss", "wb") as f:`
			`f.write(r.content)`

			`# download the store`
			`r = requests.get(base_path + vectorstore + "index.pkl")`
			`with open(vectorstore + "index.pkl", "wb") as f:`
			`f.write(r.content)`

			`return {"status": "loaded"}`