From 605c599b5d4417e10b627c197c7ee021d9e3cb98 Mon Sep 17 00:00:00 2001
From: Alex <gh@cld.tushynski.me>
Date: Wed, 8 Feb 2023 19:06:23 +0000
Subject: [PATCH 01/14] Create code_docs_gen.py

---
 scripts/code_docs_gen.py | 126 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 scripts/code_docs_gen.py

diff --git a/scripts/code_docs_gen.py b/scripts/code_docs_gen.py
new file mode 100644
index 0000000..a2170ea
--- /dev/null
+++ b/scripts/code_docs_gen.py
@@ -0,0 +1,126 @@
+from pathlib import Path
+from langchain.text_splitter import CharacterTextSplitter
+import faiss
+from langchain.vectorstores import FAISS
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.llms import OpenAI
+from langchain.prompts import PromptTemplate
+import pickle
+import dotenv
+import tiktoken
+import sys
+from argparse import ArgumentParser
+import ast
+
+dotenv.load_dotenv()
+
+
+ps = list(Path("inputs").glob("**/*.py"))
+data = []
+sources = []
+for p in ps:
+    with open(p) as f:
+        data.append(f.read())
+    sources.append(p)
+
+
+
+# with open('inputs/client.py', 'r') as f:
+#     tree = ast.parse(f.read())
+
+# print(tree)
+
+
+def get_functions_in_class(node):
+    functions = []
+    functions_code = []
+    for child in node.body:
+        if isinstance(child, ast.FunctionDef):
+            functions.append(child.name)
+            functions_code.append(ast.unparse(child))
+
+    return functions, functions_code
+
+
+def get_classes_and_functions(source_code):
+    tree = ast.parse(source_code)
+    classes = {}
+    for node in tree.body:
+        if isinstance(node, ast.ClassDef):
+            class_name = node.name
+            function_name, function = get_functions_in_class(node)
+            # join function name and function code
+            functions = dict(zip(function_name, function))
+            classes[class_name] = functions
+    return classes
+
+
+structure_dict = {}
+c1 = 0
+for code in data:
+    classes = get_classes_and_functions(ast.parse(code))
+    source = str(sources[c1])
+    structure_dict[source] = classes
+    c1 += 1
+
+# save the structure dict as json
+import json
+with open('structure_dict.json', 'w') as f:
+    json.dump(structure_dict, f)
+
+
+# llm = OpenAI(temperature=0)
+# prompt = PromptTemplate(
+#     input_variables=["code"],
+#     template="Code: {code}, Documentation: ",
+# )
+#
+# print(prompt.format(code="print('hello world')"))
+# print(llm(prompt.format(code="print('hello world')")))
+
+
+if not Path("outputs").exists():
+    Path("outputs").mkdir()
+
+c1 = len(structure_dict)
+c2 = 0
+for source, classes in structure_dict.items():
+    c2 += 1
+    print(f"Processing file {c2}/{c1}")
+    f1 = len(classes)
+    f2 = 0
+    for class_name, functions in classes.items():
+        f2 += 1
+        print(f"Processing class {f2}/{f1}")
+        source_w = source.replace("inputs/", "")
+        source_w = source_w.replace(".py", ".txt")
+        if not Path(f"outputs/{source_w}").exists():
+            with open(f"outputs/{source_w}", "w") as f:
+                f.write(f"Class: {class_name}")
+        else:
+            with open(f"outputs/{source_w}", "a") as f:
+                f.write(f"\n\nClass: {class_name}")
+        # append class name to the front
+        for function in functions:
+            b1 = len(functions)
+            b2 = 0
+            print(f"Processing function {b2}/{b1}")
+            b2 += 1
+            prompt = PromptTemplate(
+                input_variables=["code"],
+                template="Code: \n{code}, \nDocumentation: ",
+            )
+            llm = OpenAI(temperature=0)
+            response = llm(prompt.format(code=functions[function]))
+
+            if not Path(f"outputs/{source_w}").exists():
+                with open(f"outputs/{source_w}", "w") as f:
+                    f.write(f"Function: {functions[function]}, \nDocumentation: {response}")
+            else:
+                with open(f"outputs/{source_w}", "a") as f:
+                    f.write(f"\n\nFunction: {functions[function]}, \nDocumentation: {response}")
+
+
+
+
+

From d0b472ad3893b41d190f6937676ef99c6e985953 Mon Sep 17 00:00:00 2001
From: Manan <manan19jain@gmail.com>
Date: Sun, 19 Feb 2023 01:53:16 +0530
Subject: [PATCH 02/14] Implemented html_parser: cleaning & chunk creation

---
 scripts/parser/file/bulk.py        |  2 +
 scripts/parser/file/html_parser.py | 73 ++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+)
 create mode 100644 scripts/parser/file/html_parser.py

diff --git a/scripts/parser/file/bulk.py b/scripts/parser/file/bulk.py
index 7808186..4fdea6f 100644
--- a/scripts/parser/file/bulk.py
+++ b/scripts/parser/file/bulk.py
@@ -7,6 +7,7 @@ from parser.file.base import BaseReader
 from parser.file.base_parser import BaseParser
 from parser.file.docs_parser import DocxParser, PDFParser
 from parser.file.epub_parser import EpubParser
+from parser.file.html_parser import HTMLParser
 from parser.file.markdown_parser import MarkdownParser
 from parser.file.rst_parser import RstParser
 from parser.file.tabular_parser import PandasCSVParser
@@ -19,6 +20,7 @@ DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
     ".epub": EpubParser(),
     ".md": MarkdownParser(),
     ".rst": RstParser(),
+    ".html": HTMLParser(),
 }
 
 
diff --git a/scripts/parser/file/html_parser.py b/scripts/parser/file/html_parser.py
new file mode 100644
index 0000000..c941dd7
--- /dev/null
+++ b/scripts/parser/file/html_parser.py
@@ -0,0 +1,73 @@
+"""HTML parser.
+
+Contains parser for html files.
+
+"""
+import re
+from pathlib import Path
+from typing import Dict, Union
+
+from parser.file.base_parser import BaseParser
+
+class HTMLParser(BaseParser):
+    """HTML parser."""
+
+    def _init_parser(self) -> Dict:
+        """Init parser."""
+        return {}
+
+    def parse_file(self, file: Path, errors: str = "ignore") -> str:
+        """Parse file."""
+        try:
+            import unstructured
+        except ImportError:
+            raise ValueError("unstructured package is required to parse HTML files.")
+        from unstructured.partition.html import partition_html
+        from unstructured.staging.base import convert_to_isd
+        from unstructured.cleaners.core import clean
+
+        with open(file, "r", encoding="utf-8") as fp:
+            elements = partition_html(file=fp)
+            isd = convert_to_isd(elements)
+
+            # Removing non ascii charactwers from isd_el['text']
+            for isd_el in isd:
+                isd_el['text'] = isd_el['text'].encode("ascii", "ignore").decode()
+
+            # Removing all the \n characters from isd_el['text'] using regex and replace with single space
+            # Removing all the extra spaces  from isd_el['text'] using regex and replace with single space
+            for isd_el in isd:
+                isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE|re.DOTALL)
+                isd_el['text'] = re.sub(r"\s{2,}"," ", isd_el['text'], flags=re.MULTILINE|re.DOTALL)
+
+            # more cleaning: extra_whitespaces, dashes, bullets, trailing_punctuation
+            for isd_el in isd:
+                clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True )
+
+            # Creating a list of all the indexes of isd_el['type'] = 'Title'
+            title_indexes = [i for i,isd_el in enumerate(isd) if isd_el['type'] == 'Title']
+
+            # Creating 'Chunks' - List of lists of strings 
+            # each list starting with with isd_el['type'] = 'Title' and all the data till the next 'Title'
+            # Each Chunk can be thought of as an individual set of data, which can be sent to the model
+
+            Chunks = list(list())
+
+            for i,isd_el in enumerate(isd):
+                if i in title_indexes:
+                    Chunks.append([])
+                Chunks[-1].append(isd_el['text'])
+
+            print(Chunks)
+
+            # writing the chunks to a file
+            # with open('chunks.txt', 'w') as f:
+                # for chunk in Chunks:
+                    # f.write("%s \n" % chunk)
+
+
+        # # convert to isd ;Format : {'text': 'Navigation', 'type': 'Title'}         
+        # with open(file, "r", encoding="utf-8") as fp:
+        #     elements = partition_html(file=fp)
+        #     isd = convert_to_isd(elements)
+        #     print(isd)
\ No newline at end of file

From 4f5beaaa9e29cf14153068c02539cf9bba40a78c Mon Sep 17 00:00:00 2001
From: Manan <manan19jain@gmail.com>
Date: Sun, 19 Feb 2023 01:54:00 +0530
Subject: [PATCH 03/14] require package unstructued for html_parser

---
 application/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/application/requirements.txt b/application/requirements.txt
index 9e8f73b..8531ab4 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -62,6 +62,7 @@ tqdm==4.64.1
 transformers==4.26.0
 typing-inspect==0.8.0
 typing_extensions==4.4.0
+unstructured==0.4.8
 urllib3==1.26.14
 Werkzeug==2.2.2
 XlsxWriter==3.0.8

From c92f5dba3209178333b06009480d3eb1d4f31e9d Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 20 Feb 2023 17:15:18 +0000
Subject: [PATCH 04/14] New docs gen

---
 scripts/docs_genv2.py | 160 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 160 insertions(+)
 create mode 100644 scripts/docs_genv2.py

diff --git a/scripts/docs_genv2.py b/scripts/docs_genv2.py
new file mode 100644
index 0000000..6360b91
--- /dev/null
+++ b/scripts/docs_genv2.py
@@ -0,0 +1,160 @@
+from pathlib import Path
+from langchain.text_splitter import CharacterTextSplitter
+import faiss
+from langchain.vectorstores import FAISS
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.llms import OpenAI
+from langchain.prompts import PromptTemplate
+import pickle
+import dotenv
+import tiktoken
+import sys
+from argparse import ArgumentParser
+import ast
+
+dotenv.load_dotenv()
+
+
+ps = list(Path("inputs").glob("**/*.py"))
+data = []
+sources = []
+for p in ps:
+    with open(p) as f:
+        data.append(f.read())
+    sources.append(p)
+
+
+
+
+
+def get_all_functions(source_code):
+    tree = ast.parse(source_code)
+    functions = {}
+    for node in tree.body:
+        if isinstance(node, ast.FunctionDef):
+            functions[node.name] = ast.unparse(node)
+
+    return functions
+
+def get_all_functions_names(node):
+    functions = []
+    for child in node.body:
+        if isinstance(child, ast.FunctionDef):
+            functions.append(child.name)
+    return functions
+
+
+
+def get_classes(source_code):
+    tree = ast.parse(source_code)
+    classes = {}
+    for node in tree.body:
+        if isinstance(node, ast.ClassDef):
+            classes[node.name] = get_all_functions_names(node)
+    return classes
+
+def get_functions_in_class(source_code, class_name):
+    tree = ast.parse(source_code)
+    functions = []
+    for node in tree.body:
+        if isinstance(node, ast.ClassDef):
+            if node.name == class_name:
+                for function in node.body:
+                    if isinstance(function, ast.FunctionDef):
+                        functions.append(function.name)
+    return functions
+
+
+functions_dict = {}
+classes_dict = {}
+c1 = 0
+for code in data:
+    functions = get_all_functions(ast.parse(code))
+    source = str(sources[c1])
+    functions_dict[source] = functions
+    classes = get_classes(code)
+    classes_dict[source] = classes
+    c1 += 1
+
+
+
+if not Path("outputs").exists():
+    Path("outputs").mkdir()
+
+c1 = len(functions_dict)
+c2 = 0
+functions_dict = {}
+for source, functions in functions_dict.items():
+    c2 += 1
+    print(f"Processing file {c2}/{c1}")
+    f1 = len(functions)
+    f2 = 0
+    source_w = source.replace("inputs/", "")
+    source_w = source_w.replace(".py", ".md")
+    # this is how we check subfolders
+    if "/" in source_w:
+        subfolders = source_w.split("/")
+        subfolders = subfolders[:-1]
+        subfolders = "/".join(subfolders)
+        if not Path(f"outputs/{subfolders}").exists():
+            Path(f"outputs/{subfolders}").mkdir(parents=True)
+
+    for name, function in functions.items():
+        f2 += 1
+        print(f"Processing function {f2}/{f1}")
+        prompt = PromptTemplate(
+            input_variables=["code"],
+            template="Code: \n{code}, \nDocumentation: ",
+        )
+        llm = OpenAI(temperature=0)
+        response = llm(prompt.format(code=function))
+
+        if not Path(f"outputs/{source_w}").exists():
+            with open(f"outputs/{source_w}", "w") as f:
+                f.write(f"# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
+        else:
+            with open(f"outputs/{source_w}", "a") as f:
+                f.write(f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
+
+
+
+c1 = len(classes_dict)
+c2 = 0
+
+for source, classes in classes_dict.items():
+    c2 += 1
+    print(f"Processing file {c2}/{c1}")
+    f1 = len(classes)
+    f2 = 0
+    source_w = source.replace("inputs/", "")
+    source_w = source_w.replace(".py", ".md")
+
+    if "/" in source_w:
+        subfolders = source_w.split("/")
+        subfolders = subfolders[:-1]
+        subfolders = "/".join(subfolders)
+        if not Path(f"outputs/{subfolders}").exists():
+            Path(f"outputs/{subfolders}").mkdir(parents=True)
+
+    for name, function_names in classes.items():
+        print(f"Processing Class {f2}/{f1}")
+        f2 += 1
+        prompt = PromptTemplate(
+            input_variables=["class_name", "functions_names"],
+            template="Class name: {class_name} \nFunctions: {functions_names}, \nDocumentation: ",
+        )
+        llm = OpenAI(temperature=0)
+        response = llm(prompt.format(class_name=name, functions_names=function_names))
+
+        if not Path(f"outputs/{source_w}").exists():
+            with open(f"outputs/{source_w}", "w") as f:
+                f.write(f"# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")
+        else:
+            with open(f"outputs/{source_w}", "a") as f:
+                f.write(f"\n\n# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")
+
+
+
+
+
+

From 381af7b2fe5e61d7e2febb071f2c517925c55255 Mon Sep 17 00:00:00 2001
From: Nick <vansserafim@gmail.com>
Date: Tue, 21 Feb 2023 19:23:14 +0300
Subject: [PATCH 05/14] Update README.md

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index ddc15e2..655c850 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,9 @@ Say goodbye to time-consuming manual searches, and let <strong>DocsGPT</strong>
 
 </div>
 
+![Group 8](https://user-images.githubusercontent.com/17906039/220401693-befc2291-4087-4863-9e25-79809ff20318.png)
+
+
 ## Roadmap
 
 You can find our [Roadmap](https://github.com/orgs/arc53/projects/2) here, please don't hesitate contributing or creating issues, it helps us make DocsGPT better!

From a5f1a75dc0f2a2430be333fe328eb01a6d6d04d1 Mon Sep 17 00:00:00 2001
From: Nick <vansserafim@gmail.com>
Date: Tue, 21 Feb 2023 19:59:36 +0300
Subject: [PATCH 06/14] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 655c850..aa942d5 100644
--- a/README.md
+++ b/README.md
@@ -13,12 +13,12 @@ Say goodbye to time-consuming manual searches, and let <strong>DocsGPT</strong>
 </p>
 
 <div align="center">
-
+  
   <a href="https://discord.gg/n5BX8dh8rU">![example1](https://img.shields.io/github/stars/arc53/docsgpt?style=social)</a>
   <a href="https://discord.gg/n5BX8dh8rU">![example2](https://img.shields.io/github/forks/arc53/docsgpt?style=social)</a>
   <a href="https://discord.gg/n5BX8dh8rU">![example3](https://img.shields.io/github/license/arc53/docsgpt)</a>
   <a href="https://discord.gg/n5BX8dh8rU">![example3](https://img.shields.io/discord/1070046503302877216)</a>
-
+  
 </div>
 
 ![Group 8](https://user-images.githubusercontent.com/17906039/220401693-befc2291-4087-4863-9e25-79809ff20318.png)

From 16eb503e3634dbac6422209795803972fa9e635d Mon Sep 17 00:00:00 2001
From: Manan <manan19jain@gmail.com>
Date: Tue, 21 Feb 2023 23:06:00 +0530
Subject: [PATCH 07/14] Added HTML Support. read, clean-up, filter return

---
 scripts/ingest.py                  |  2 +-
 scripts/parser/file/html_parser.py | 99 ++++++++++++++++--------------
 2 files changed, 55 insertions(+), 46 deletions(-)

diff --git a/scripts/ingest.py b/scripts/ingest.py
index 2194a7c..f41b86e 100644
--- a/scripts/ingest.py
+++ b/scripts/ingest.py
@@ -37,7 +37,7 @@ def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
                                                    help="Maximum number of files to read."),
            formats: Optional[List[str]] = typer.Option([".rst", ".md"],
                                                    help="""List of required extensions (list with .)
-                                                        Currently supported: .rst, .md, .pdf, .docx, .csv, .epub"""),
+                                                        Currently supported: .rst, .md, .pdf, .docx, .csv, .epub, .html"""),
            exclude: Optional[bool] = typer.Option(True, help="Whether to exclude hidden files (dotfiles).")):
 
     """
diff --git a/scripts/parser/file/html_parser.py b/scripts/parser/file/html_parser.py
index c941dd7..12c01ae 100644
--- a/scripts/parser/file/html_parser.py
+++ b/scripts/parser/file/html_parser.py
@@ -16,8 +16,12 @@ class HTMLParser(BaseParser):
         """Init parser."""
         return {}
 
-    def parse_file(self, file: Path, errors: str = "ignore") -> str:
-        """Parse file."""
+    def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, list[str]]:
+        """Parse file.
+
+            Returns:
+            Union[str, List[str]]: a string or a List of strings.
+        """
         try:
             import unstructured
         except ImportError:
@@ -26,48 +30,53 @@ class HTMLParser(BaseParser):
         from unstructured.staging.base import convert_to_isd
         from unstructured.cleaners.core import clean
 
+        # Using the unstructured library to convert the html to isd format
+        # isd sample : isd = [
+                            #   {"text": "My Title", "type": "Title"},
+                            #   {"text": "My Narrative", "type": "NarrativeText"}
+                            # ]
         with open(file, "r", encoding="utf-8") as fp:
             elements = partition_html(file=fp)
-            isd = convert_to_isd(elements)
-
-            # Removing non ascii charactwers from isd_el['text']
-            for isd_el in isd:
-                isd_el['text'] = isd_el['text'].encode("ascii", "ignore").decode()
-
-            # Removing all the \n characters from isd_el['text'] using regex and replace with single space
-            # Removing all the extra spaces  from isd_el['text'] using regex and replace with single space
-            for isd_el in isd:
-                isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE|re.DOTALL)
-                isd_el['text'] = re.sub(r"\s{2,}"," ", isd_el['text'], flags=re.MULTILINE|re.DOTALL)
-
-            # more cleaning: extra_whitespaces, dashes, bullets, trailing_punctuation
-            for isd_el in isd:
-                clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True )
-
-            # Creating a list of all the indexes of isd_el['type'] = 'Title'
-            title_indexes = [i for i,isd_el in enumerate(isd) if isd_el['type'] == 'Title']
-
-            # Creating 'Chunks' - List of lists of strings 
-            # each list starting with with isd_el['type'] = 'Title' and all the data till the next 'Title'
-            # Each Chunk can be thought of as an individual set of data, which can be sent to the model
-
-            Chunks = list(list())
-
-            for i,isd_el in enumerate(isd):
-                if i in title_indexes:
-                    Chunks.append([])
-                Chunks[-1].append(isd_el['text'])
-
-            print(Chunks)
-
-            # writing the chunks to a file
-            # with open('chunks.txt', 'w') as f:
-                # for chunk in Chunks:
-                    # f.write("%s \n" % chunk)
-
-
-        # # convert to isd ;Format : {'text': 'Navigation', 'type': 'Title'}         
-        # with open(file, "r", encoding="utf-8") as fp:
-        #     elements = partition_html(file=fp)
-        #     isd = convert_to_isd(elements)
-        #     print(isd)
\ No newline at end of file
+            isd = convert_to_isd(elements)  
+
+        # Removing non ascii charactwers from isd_el['text']
+        for isd_el in isd:
+            isd_el['text'] = isd_el['text'].encode("ascii", "ignore").decode()
+
+        # Removing all the \n characters from isd_el['text'] using regex and replace with single space
+        # Removing all the extra spaces  from isd_el['text'] using regex and replace with single space
+        for isd_el in isd:
+            isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE|re.DOTALL)
+            isd_el['text'] = re.sub(r"\s{2,}"," ", isd_el['text'], flags=re.MULTILINE|re.DOTALL)
+
+        # more cleaning: extra_whitespaces, dashes, bullets, trailing_punctuation
+        for isd_el in isd:
+            clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True )
+
+        # Creating a list of all the indexes of isd_el['type'] = 'Title'
+        title_indexes = [i for i,isd_el in enumerate(isd) if isd_el['type'] == 'Title']
+
+        # Creating 'Chunks' - List of lists of strings 
+        # each list starting with with isd_el['type'] = 'Title' and all the data till the next 'Title'
+        # Each Chunk can be thought of as an individual set of data, which can be sent to the model
+        # Where Each Title is grouped together with the data under it
+
+        Chunks = list(list())
+        final_chunks = list(list())
+
+        for i,isd_el in enumerate(isd):
+            if i in title_indexes:
+                Chunks.append([])
+            Chunks[-1].append(isd_el['text'])
+
+        # Removing all the chunks with sum of lenth of all the strings in the chunk < 25 #TODO: This value can be an user defined variable
+        for chunk in Chunks:
+            # sum of lenth of all the strings in the chunk
+            sum = 0
+            sum += len(str(chunk))
+            if sum < 25:
+                Chunks.remove(chunk)
+            else :         
+                # appending all the approved chunks to final_chunks as a single string       
+                final_chunks.append(" ".join([str(item) for item in chunk]))
+        return final_chunks

From 3a65a40c343d1a40a21f52d5d0a8be4219bb5302 Mon Sep 17 00:00:00 2001
From: Nick <vansserafim@gmail.com>
Date: Tue, 21 Feb 2023 21:20:47 +0300
Subject: [PATCH 08/14] Update README.md

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index aa942d5..2fd458c 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,8 @@ Say goodbye to time-consuming manual searches, and let <strong>DocsGPT</strong>
   
 </div>
 
-![Group 8](https://user-images.githubusercontent.com/17906039/220401693-befc2291-4087-4863-9e25-79809ff20318.png)
+![Group 9](https://user-images.githubusercontent.com/17906039/220427472-2644cff4-7666-46a5-819f-fc4a521f63c7.png)
+
 
 
 ## Roadmap

From 524e0f6f01a72824ca212a6d552460065cb89468 Mon Sep 17 00:00:00 2001
From: Manan <manan19jain@gmail.com>
Date: Wed, 22 Feb 2023 20:20:54 +0530
Subject: [PATCH 09/14] fix | Chunk creation error when title not the first
 element in HTML

---
 scripts/parser/file/html_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/parser/file/html_parser.py b/scripts/parser/file/html_parser.py
index 12c01ae..53d7492 100644
--- a/scripts/parser/file/html_parser.py
+++ b/scripts/parser/file/html_parser.py
@@ -61,7 +61,7 @@ class HTMLParser(BaseParser):
         # Each Chunk can be thought of as an individual set of data, which can be sent to the model
         # Where Each Title is grouped together with the data under it
 
-        Chunks = list(list())
+        Chunks = [[]]
         final_chunks = list(list())
 
         for i,isd_el in enumerate(isd):

From 0fb28e5213389661c25152735318ee9c2520ae0c Mon Sep 17 00:00:00 2001
From: Pavel <pabin@yandex.ru>
Date: Wed, 22 Feb 2023 21:19:13 +0400
Subject: [PATCH 10/14] Calc + structure

---
 scripts/docs_genv2.py    | 160 -------------------------------------
 scripts/ingest.py        |  29 ++++++-
 scripts/outputs/test.md  | 167 +++++++++++++++++++++++++++++++++++++++
 scripts/parser/py2doc.py | 155 ++++++++++++++++++++++++++++++++++++
 4 files changed, 350 insertions(+), 161 deletions(-)
 delete mode 100644 scripts/docs_genv2.py
 create mode 100644 scripts/outputs/test.md
 create mode 100644 scripts/parser/py2doc.py

diff --git a/scripts/docs_genv2.py b/scripts/docs_genv2.py
deleted file mode 100644
index 6360b91..0000000
--- a/scripts/docs_genv2.py
+++ /dev/null
@@ -1,160 +0,0 @@
-from pathlib import Path
-from langchain.text_splitter import CharacterTextSplitter
-import faiss
-from langchain.vectorstores import FAISS
-from langchain.embeddings import OpenAIEmbeddings
-from langchain.llms import OpenAI
-from langchain.prompts import PromptTemplate
-import pickle
-import dotenv
-import tiktoken
-import sys
-from argparse import ArgumentParser
-import ast
-
-dotenv.load_dotenv()
-
-
-ps = list(Path("inputs").glob("**/*.py"))
-data = []
-sources = []
-for p in ps:
-    with open(p) as f:
-        data.append(f.read())
-    sources.append(p)
-
-
-
-
-
-def get_all_functions(source_code):
-    tree = ast.parse(source_code)
-    functions = {}
-    for node in tree.body:
-        if isinstance(node, ast.FunctionDef):
-            functions[node.name] = ast.unparse(node)
-
-    return functions
-
-def get_all_functions_names(node):
-    functions = []
-    for child in node.body:
-        if isinstance(child, ast.FunctionDef):
-            functions.append(child.name)
-    return functions
-
-
-
-def get_classes(source_code):
-    tree = ast.parse(source_code)
-    classes = {}
-    for node in tree.body:
-        if isinstance(node, ast.ClassDef):
-            classes[node.name] = get_all_functions_names(node)
-    return classes
-
-def get_functions_in_class(source_code, class_name):
-    tree = ast.parse(source_code)
-    functions = []
-    for node in tree.body:
-        if isinstance(node, ast.ClassDef):
-            if node.name == class_name:
-                for function in node.body:
-                    if isinstance(function, ast.FunctionDef):
-                        functions.append(function.name)
-    return functions
-
-
-functions_dict = {}
-classes_dict = {}
-c1 = 0
-for code in data:
-    functions = get_all_functions(ast.parse(code))
-    source = str(sources[c1])
-    functions_dict[source] = functions
-    classes = get_classes(code)
-    classes_dict[source] = classes
-    c1 += 1
-
-
-
-if not Path("outputs").exists():
-    Path("outputs").mkdir()
-
-c1 = len(functions_dict)
-c2 = 0
-functions_dict = {}
-for source, functions in functions_dict.items():
-    c2 += 1
-    print(f"Processing file {c2}/{c1}")
-    f1 = len(functions)
-    f2 = 0
-    source_w = source.replace("inputs/", "")
-    source_w = source_w.replace(".py", ".md")
-    # this is how we check subfolders
-    if "/" in source_w:
-        subfolders = source_w.split("/")
-        subfolders = subfolders[:-1]
-        subfolders = "/".join(subfolders)
-        if not Path(f"outputs/{subfolders}").exists():
-            Path(f"outputs/{subfolders}").mkdir(parents=True)
-
-    for name, function in functions.items():
-        f2 += 1
-        print(f"Processing function {f2}/{f1}")
-        prompt = PromptTemplate(
-            input_variables=["code"],
-            template="Code: \n{code}, \nDocumentation: ",
-        )
-        llm = OpenAI(temperature=0)
-        response = llm(prompt.format(code=function))
-
-        if not Path(f"outputs/{source_w}").exists():
-            with open(f"outputs/{source_w}", "w") as f:
-                f.write(f"# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
-        else:
-            with open(f"outputs/{source_w}", "a") as f:
-                f.write(f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
-
-
-
-c1 = len(classes_dict)
-c2 = 0
-
-for source, classes in classes_dict.items():
-    c2 += 1
-    print(f"Processing file {c2}/{c1}")
-    f1 = len(classes)
-    f2 = 0
-    source_w = source.replace("inputs/", "")
-    source_w = source_w.replace(".py", ".md")
-
-    if "/" in source_w:
-        subfolders = source_w.split("/")
-        subfolders = subfolders[:-1]
-        subfolders = "/".join(subfolders)
-        if not Path(f"outputs/{subfolders}").exists():
-            Path(f"outputs/{subfolders}").mkdir(parents=True)
-
-    for name, function_names in classes.items():
-        print(f"Processing Class {f2}/{f1}")
-        f2 += 1
-        prompt = PromptTemplate(
-            input_variables=["class_name", "functions_names"],
-            template="Class name: {class_name} \nFunctions: {functions_names}, \nDocumentation: ",
-        )
-        llm = OpenAI(temperature=0)
-        response = llm(prompt.format(class_name=name, functions_names=function_names))
-
-        if not Path(f"outputs/{source_w}").exists():
-            with open(f"outputs/{source_w}", "w") as f:
-                f.write(f"# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")
-        else:
-            with open(f"outputs/{source_w}", "a") as f:
-                f.write(f"\n\n# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")
-
-
-
-
-
-
diff --git a/scripts/ingest.py b/scripts/ingest.py
index 2194a7c..4fca0e7 100644
--- a/scripts/ingest.py
+++ b/scripts/ingest.py
@@ -1,10 +1,12 @@
-from collections import defaultdict
 import os
 import sys
 import nltk
 import dotenv
 import typer
+import ast
 
+from collections import defaultdict
+from pathlib import Path
 from typing import List, Optional
 
 from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -12,6 +14,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
 from parser.file.bulk import SimpleDirectoryReader
 from parser.schema.base import Document
 from parser.open_ai_func import call_openai_api, get_user_permission
+from parser.py2doc import get_classes, get_functions, transform_to_docs
 
 dotenv.load_dotenv()
 
@@ -78,5 +81,29 @@ def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
     for directory, folder_name in zip(dir, folder_names):
         process_one_docs(directory, folder_name)
 
+
+@app.command()
+def convert():
+    ps = list(Path("inputs").glob("**/*.py"))
+    data = []
+    sources = []
+    for p in ps:
+        with open(p) as f:
+            data.append(f.read())
+        sources.append(p)
+
+    functions_dict = {}
+    classes_dict = {}
+    c1 = 0
+    for code in data:
+        functions = get_functions(ast.parse(code))
+        source = str(sources[c1])
+        functions_dict[source] = functions
+        classes = get_classes(code)
+        classes_dict[source] = classes
+        c1 += 1
+
+    transform_to_docs(functions_dict, classes_dict)
+
 if __name__ == "__main__":
   app()
diff --git a/scripts/outputs/test.md b/scripts/outputs/test.md
new file mode 100644
index 0000000..7c1c419
--- /dev/null
+++ b/scripts/outputs/test.md
@@ -0,0 +1,167 @@
+# Function name: get_functions_in_class 
+
+Function: 
+```
+def get_functions_in_class(source_code, class_name):
+    tree = ast.parse(source_code)
+    functions = []
+    for node in tree.body:
+        if isinstance(node, ast.ClassDef):
+            if node.name == class_name:
+                for function in node.body:
+                    if isinstance(function, ast.FunctionDef):
+                        functions.append(function.name)
+    return functions
+```, 
+Documentation: 
+
+
+get_functions_in_class(source_code, class_name)
+
+Inputs: 
+source_code (str): The source code of the program.
+class_name (str): The name of the class.
+
+Outputs: 
+functions (list): A list of the functions in the class.
+
+Description: 
+This function takes in a source code and a class name and returns a list of the functions in the class. It uses the ast module to parse the source code and find the class definition. It then iterates through the body of the class and checks if each node is a function definition. If it is, it adds the name of the function to the list of functions.
+
+# Function name: process_functions 
+
+Function: 
+```
+def process_functions(functions_dict):
+    c1 = len(functions_dict)
+    c2 = 0
+    for (source, functions) in functions_dict.items():
+        c2 += 1
+        print(f'Processing file {c2}/{c1}')
+        f1 = len(functions)
+        f2 = 0
+        source_w = source.replace('inputs/', '')
+        source_w = source_w.replace('.py', '.md')
+        create_subfolder(source_w)
+        for (name, function) in functions.items():
+            f2 += 1
+            print(f'Processing function {f2}/{f1}')
+            response = generate_response(function)
+            write_output_file(source_w, name, function, response)
+```, 
+Documentation: 
+
+
+This function takes in a dictionary of functions and processes them. It takes the source file and the functions from the dictionary and creates a subfolder for the source file. It then generates a response for each function and writes the output file. The output file contains the function, the response, and the source file.
+
+# Function name: get_functions_in_class 
+
+Function: 
+```
+def get_functions_in_class(source_code, class_name):
+    tree = ast.parse(source_code)
+    functions = []
+    for node in tree.body:
+        if isinstance(node, ast.ClassDef):
+            if node.name == class_name:
+                for function in node.body:
+                    if isinstance(function, ast.FunctionDef):
+                        functions.append(function.name)
+    return functions
+```, 
+Documentation: 
+
+
+get_functions_in_class(source_code, class_name)
+
+Inputs: 
+source_code (str): The source code of the program.
+class_name (str): The name of the class.
+
+Outputs: 
+functions (list): A list of the functions in the class.
+
+Description: 
+This function takes in a source code and a class name and returns a list of the functions in the class. It uses the ast module to parse the source code and find the class definition. It then iterates through the body of the class and checks if each node is a function definition. If it is, it adds the name of the function to the list of functions.
+
+# Function name: process_functions 
+
+Function: 
+```
+def process_functions(functions_dict):
+    c1 = len(functions_dict)
+    c2 = 0
+    for (source, functions) in functions_dict.items():
+        c2 += 1
+        print(f'Processing file {c2}/{c1}')
+        f1 = len(functions)
+        f2 = 0
+        source_w = source.replace('inputs/', '')
+        source_w = source_w.replace('.py', '.md')
+        create_subfolder(source_w)
+        for (name, function) in functions.items():
+            f2 += 1
+            print(f'Processing function {f2}/{f1}')
+            response = generate_response(function)
+            write_output_file(source_w, name, function, response)
+```, 
+Documentation: 
+
+
+This function takes in a dictionary of functions and processes them. It takes the source file and the functions from the dictionary and creates a subfolder for the source file. It then generates a response for each function and writes the output file for each function.
+
+# Function name: get_functions_in_class 
+
+Function: 
+```
+def get_functions_in_class(source_code, class_name):
+    tree = ast.parse(source_code)
+    functions = []
+    for node in tree.body:
+        if isinstance(node, ast.ClassDef):
+            if node.name == class_name:
+                for function in node.body:
+                    if isinstance(function, ast.FunctionDef):
+                        functions.append(function.name)
+    return functions
+```, 
+Documentation: 
+
+
+get_functions_in_class(source_code, class_name)
+
+Inputs: 
+source_code (str): The source code of the program.
+class_name (str): The name of the class.
+
+Outputs: 
+functions (list): A list of the functions in the class.
+
+Description: 
+This function takes in a source code and a class name and returns a list of the functions in the class. It uses the ast module to parse the source code and find the class definition. It then iterates through the body of the class and checks if each node is a function definition. If it is, it adds the name of the function to the list of functions.
+
+# Function name: process_functions 
+
+Function: 
+```
+def process_functions(functions_dict):
+    c1 = len(functions_dict)
+    c2 = 0
+    for (source, functions) in functions_dict.items():
+        c2 += 1
+        print(f'Processing file {c2}/{c1}')
+        f1 = len(functions)
+        f2 = 0
+        source_w = source.replace('inputs/', '')
+        source_w = source_w.replace('.py', '.md')
+        create_subfolder(source_w)
+        for (name, function) in functions.items():
+            f2 += 1
+            print(f'Processing function {f2}/{f1}')
+            response = generate_response(function)
+            write_output_file(source_w, name, function, response)
+```, 
+Documentation: 
+
+
+This function takes in a dictionary of functions and processes them. It takes the source file and the functions from the dictionary and creates a subfolder for the source file. It then generates a response for each function and writes the output file for each function.
\ No newline at end of file
diff --git a/scripts/parser/py2doc.py b/scripts/parser/py2doc.py
new file mode 100644
index 0000000..b5a37c2
--- /dev/null
+++ b/scripts/parser/py2doc.py
@@ -0,0 +1,155 @@
+from pathlib import Path
+from langchain.llms import OpenAI
+from langchain.prompts import PromptTemplate
+import dotenv
+import ast
+import typer
+import tiktoken
+
+dotenv.load_dotenv()
+
+def get_functions(source_code):
+    tree = ast.parse(source_code)
+    functions = {}
+    for node in tree.body:
+        if isinstance(node, ast.FunctionDef):
+            functions[node.name] = ast.unparse(node)
+
+    return functions
+
+def get_functions_names(node):
+    functions = []
+    for child in node.body:
+        if isinstance(child, ast.FunctionDef):
+            functions.append(child.name)
+    return functions
+
+
+
+def get_classes(source_code):
+    tree = ast.parse(source_code)
+    classes = {}
+    for node in tree.body:
+        if isinstance(node, ast.ClassDef):
+            classes[node.name] = get_functions_names(node)
+    return classes
+
+def get_functions_in_class(source_code, class_name):
+    tree = ast.parse(source_code)
+    functions = []
+    for node in tree.body:
+        if isinstance(node, ast.ClassDef):
+            if node.name == class_name:
+                for function in node.body:
+                    if isinstance(function, ast.FunctionDef):
+                        functions.append(function.name)
+    return functions
+
+
+def parse_functions(functions_dict):
+    c1 = len(functions_dict)
+    c2 = 0
+    for source, functions in functions_dict.items():
+        c2 += 1
+        print(f"Processing file {c2}/{c1}")
+        f1 = len(functions)
+        f2 = 0
+        source_w = source.replace("inputs/", "")
+        source_w = source_w.replace(".py", ".md")
+        # this is how we check subfolders
+        if "/" in source_w:
+            subfolders = source_w.split("/")
+            subfolders = subfolders[:-1]
+            subfolders = "/".join(subfolders)
+            if not Path(f"outputs/{subfolders}").exists():
+                Path(f"outputs/{subfolders}").mkdir(parents=True)
+
+        for name, function in functions.items():
+            f2 += 1
+            print(f"Processing function {f2}/{f1}")
+            prompt = PromptTemplate(
+                input_variables=["code"],
+                template="Code: \n{code}, \nDocumentation: ",
+            )
+            llm = OpenAI(temperature=0)
+            response = llm(prompt.format(code=function))
+
+            if not Path(f"outputs/{source_w}").exists():
+                with open(f"outputs/{source_w}", "w") as f:
+                    f.write(f"# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
+            else:
+                with open(f"outputs/{source_w}", "a") as f:
+                    f.write(f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
+
+
+def parse_classes(classes_dict):
+    c1 = len(classes_dict)
+    c2 = 0
+    for source, classes in classes_dict.items():
+        c2 += 1
+        print(f"Processing file {c2}/{c1}")
+        f1 = len(classes)
+        f2 = 0
+        source_w = source.replace("inputs/", "")
+        source_w = source_w.replace(".py", ".md")
+
+        if "/" in source_w:
+            subfolders = source_w.split("/")
+            subfolders = subfolders[:-1]
+            subfolders = "/".join(subfolders)
+            if not Path(f"outputs/{subfolders}").exists():
+                Path(f"outputs/{subfolders}").mkdir(parents=True)
+
+        for name, function_names in classes.items():
+            print(f"Processing Class {f2}/{f1}")
+            f2 += 1
+            prompt = PromptTemplate(
+                input_variables=["class_name", "functions_names"],
+                template="Class name: {class_name} \nFunctions: {functions_names}, \nDocumentation: ",
+            )
+            llm = OpenAI(temperature=0)
+            response = llm(prompt.format(class_name=name, functions_names=function_names))
+
+            if not Path(f"outputs/{source_w}").exists():
+                with open(f"outputs/{source_w}", "w") as f:
+                    f.write(f"# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")
+            else:
+                with open(f"outputs/{source_w}", "a") as f:
+                    f.write(f"\n\n# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")
+
+
+#User permission
+def transform_to_docs(functions_dict, classes_dict):
+# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
+    # Here we convert dicts to a string and calculate the number of OpenAI tokens the string represents.
+    docs_content = ""
+    for key, value in functions_dict.items():
+        docs_content += str(key) + str(value)
+    for key, value in classes_dict.items():
+        docs_content += str(key) + str(value)
+
+    encoding = tiktoken.get_encoding("cl100k_base")
+    num_tokens = len(encoding.encode(docs_content))
+    total_price = ((num_tokens / 1000) * 0.02)
+
+    # Here we print the number of tokens and the approx user cost with some visually appealing formatting.
+    print(f"Number of Tokens = {format(num_tokens, ',d')}")
+    print(f"Approx Cost = ${format(total_price, ',.2f')}")
+    #Here we check for user permission before calling the API.
+    user_input = input("Price Okay? (Y/N) \n").lower()
+    if user_input == "y":
+        if not Path("outputs").exists():
+            Path("outputs").mkdir()
+        parse_functions(functions_dict)
+        print("Functions done!")
+        parse_classes(classes_dict)
+        print("All done!")
+    elif user_input == "":
+        if not Path("outputs").exists():
+            Path("outputs").mkdir()
+        parse_functions(functions_dict)
+        print("Functions done!")
+        parse_classes(classes_dict)
+        print("All done!")
+    else:
+        print("The API was not called. No money was spent.")
\ No newline at end of file

From fabe4d53d6f9c30f1d01f57f9862b8726ff1e9a5 Mon Sep 17 00:00:00 2001
From: UnknownDev <elsamatchole@gmail.com>
Date: Thu, 23 Feb 2023 11:29:52 +0000
Subject: [PATCH 11/14] Fix the servor 500 error and show error message to
 client

---
 .gitignore                       |   2 +-
 application/app.py               | 119 +++++++++++++++--------------
 application/error.py             |  13 ++++
 application/static/src/chat.js   | 126 ++++++++++++++++++-------------
 application/templates/index.html |  27 ++++++-
 5 files changed, 174 insertions(+), 113 deletions(-)
 create mode 100644 application/error.py

diff --git a/.gitignore b/.gitignore
index 0003c21..5af7552 100644
--- a/.gitignore
+++ b/.gitignore
@@ -108,7 +108,7 @@ venv/
 ENV/
 env.bak/
 venv.bak/
-
+.flaskenv
 # Spyder project settings
 .spyderproject
 .spyproject
diff --git a/application/app.py b/application/app.py
index b6360ae..33a1e2e 100644
--- a/application/app.py
+++ b/application/app.py
@@ -9,7 +9,7 @@ from langchain import OpenAI, VectorDBQA, HuggingFaceHub, Cohere
 from langchain.chains.question_answering import load_qa_chain
 from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, HuggingFaceInstructEmbeddings
 from langchain.prompts import PromptTemplate
-
+from error import bad_request
 # os.environ["LANGCHAIN_HANDLER"] = "langchain"
 
 if os.getenv("LLM_NAME") is not None:
@@ -74,6 +74,8 @@ def api_answer():
     data = request.get_json()
     question = data["question"]
     history = data["history"]
+    print('-'*5)
+    print(data["embeddings_key"])
     if not api_key_set:
         api_key = data["api_key"]
     else:
@@ -83,62 +85,69 @@ def api_answer():
     else:
         embeddings_key = os.getenv("EMBEDDINGS_KEY")
 
+    # use try and except  to check for exception
+    try:
 
-    # check if the vectorstore is set
-    if "active_docs" in data:
-        vectorstore = "vectors/" + data["active_docs"]
-        if data['active_docs'] == "default":
+        # check if the vectorstore is set
+        if "active_docs" in data:
+            vectorstore = "vectors/" + data["active_docs"]
+            if data['active_docs'] == "default":
+                vectorstore = ""
+        else:
             vectorstore = ""
-    else:
-        vectorstore = ""
-
-    # loading the index and the store and the prompt template
-    # Note if you have used other embeddings than OpenAI, you need to change the embeddings
-    if embeddings_choice == "openai_text-embedding-ada-002":
-        docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
-    elif embeddings_choice == "huggingface_sentence-transformers/all-mpnet-base-v2":
-        docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
-    elif embeddings_choice == "huggingface_hkunlp/instructor-large":
-        docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
-    elif embeddings_choice == "cohere_medium":
-        docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
-
-    # create a prompt template
-    if history:
-        history = json.loads(history)
-        template_temp = template_hist.replace("{historyquestion}", history[0]).replace("{historyanswer}", history[1])
-        c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template_temp, template_format="jinja2")
-    else:
-        c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template, template_format="jinja2")
-
-    if llm_choice == "openai":
-        llm = OpenAI(openai_api_key=api_key, temperature=0)
-    elif llm_choice == "manifest":
-        llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
-    elif llm_choice == "huggingface":
-        llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
-    elif llm_choice == "cohere":
-        llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
-
-    qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
-                             combine_prompt=c_prompt)
-
-    chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=4)
-
-    # fetch the answer
-    result = chain({"query": question})
-    print(result)
-
-    # some formatting for the frontend
-    result['answer'] = result['result']
-    result['answer'] = result['answer'].replace("\\n", "<br>")
-    result['answer'] = result['answer'].replace("SOURCES:", "")
-    # mock result
-    # result = {
-    #     "answer": "The answer is 42",
-    #     "sources": ["https://en.wikipedia.org/wiki/42_(number)", "https://en.wikipedia.org/wiki/42_(number)"]
-    # }
-    return result
+
+        # loading the index and the store and the prompt template
+        # Note if you have used other embeddings than OpenAI, you need to change the embeddings
+        if embeddings_choice == "openai_text-embedding-ada-002":
+            docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
+        elif embeddings_choice == "huggingface_sentence-transformers/all-mpnet-base-v2":
+            docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
+        elif embeddings_choice == "huggingface_hkunlp/instructor-large":
+            docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
+        elif embeddings_choice == "cohere_medium":
+            docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
+
+        # create a prompt template
+        if history:
+            history = json.loads(history)
+            print(history)
+            template_temp = template_hist.replace("{historyquestion}", history[0]).replace("{historyanswer}", history[1])
+            c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template_temp, template_format="jinja2")
+        else:
+            c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template, template_format="jinja2")
+
+        if llm_choice == "openai":
+            llm = OpenAI(openai_api_key=api_key, temperature=0)
+        elif llm_choice == "manifest":
+            llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
+        elif llm_choice == "huggingface":
+            llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
+        elif llm_choice == "cohere":
+            llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
+
+        qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
+                                combine_prompt=c_prompt)
+
+        chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=4)
+
+        
+        # fetch the answer
+        result = chain({"query": question})
+        print(result)
+
+        # some formatting for the frontend
+        result['answer'] = result['result']
+        result['answer'] = result['answer'].replace("\\n", "<br>")
+        result['answer'] = result['answer'].replace("SOURCES:", "")
+        # mock result
+        # result = {
+        #     "answer": "The answer is 42",
+        #     "sources": ["https://en.wikipedia.org/wiki/42_(number)", "https://en.wikipedia.org/wiki/42_(number)"]
+        # }
+        return result
+    except Exception as e:
+        print(str(e))
+        return bad_request(500,str(e))
 
 
 @app.route("/api/docs_check", methods=["POST"])
diff --git a/application/error.py b/application/error.py
new file mode 100644
index 0000000..b73856b
--- /dev/null
+++ b/application/error.py
@@ -0,0 +1,13 @@
+from flask import jsonify
+from werkzeug.http import HTTP_STATUS_CODES
+
+def response_error(code_status,message=None):
+    payload = {'error':HTTP_STATUS_CODES.get(code_status,"something get weong")}
+    if message:
+        payload['message'] = message
+    response = jsonify(payload)
+    response.status_code = code_status
+    return response
+
+def bad_request(status_code=400,message=''):
+    return response_error(code_status=status_code,message=message)
\ No newline at end of file
diff --git a/application/static/src/chat.js b/application/static/src/chat.js
index 553d7f3..3c997ad 100644
--- a/application/static/src/chat.js
+++ b/application/static/src/chat.js
@@ -1,55 +1,73 @@
-var el = document.getElementById('message-form');
-if (el) {
-    el.addEventListener("submit", function (event) {
-        console.log("submitting")
-        event.preventDefault()
-        var message = document.getElementById("message-input").value;
-        msg_html = '<div class="bg-blue-500 text-white p-2 rounded-lg mb-2 self-end"><p class="text-sm">'
-        msg_html += message
-        msg_html += '</p></div>'
-        document.getElementById("messages").innerHTML += msg_html;
-        let chatWindow = document.getElementById("messages-container");
-        chatWindow.scrollTop = chatWindow.scrollHeight;
-        document.getElementById("message-input").value = "";
-        document.getElementById("button-submit").innerHTML = '<i class="fa fa-circle-o-notch fa-spin"></i> Thinking...';
-        document.getElementById("button-submit").disabled = true;
-        if (localStorage.getItem('activeDocs') == null) {
-            localStorage.setItem('activeDocs', 'default')
-        }
-
-        fetch('/api/answer', {
-            method: 'POST',
-            headers: {
-                'Content-Type': 'application/json',
-            },
-
-            body: JSON.stringify({question: message,
-                api_key: localStorage.getItem('apiKey'),
-                embeddings_key: localStorage.getItem('apiKey'),
-                history: localStorage.getItem('chatHistory'),
-                active_docs: localStorage.getItem('activeDocs')}),
+var form = document.getElementById('message-form');
+var errorModal = document.getElementById('error-alert')
+document.getElementById('close').addEventListener('click',()=>{
+    errorModal.classList.toggle('hidden')
+})
+
+
+function submitForm(event){
+    event.preventDefault()
+    var message = document.getElementById("message-input").value;
+    console.log(message.length)
+    if(message.length === 0){
+        return
+    }
+    msg_html = '<div class="bg-blue-500 text-white p-2 rounded-lg mb-2 self-end"><p class="text-sm">'
+    msg_html += message
+    msg_html += '</p></div>'
+    document.getElementById("messages").innerHTML += msg_html;
+    let chatWindow = document.getElementById("messages-container");
+    chatWindow.scrollTop = chatWindow.scrollHeight;
+    document.getElementById("message-input").value = "";
+    document.getElementById("button-submit").innerHTML = '<i class="fa fa-circle-o-notch fa-spin"></i> Thinking...';
+    document.getElementById("button-submit").disabled = true;
+    if (localStorage.getItem('activeDocs') == null) {
+        localStorage.setItem('activeDocs', 'default')
+    }
+
+    
+    fetch('/api/answer', {
+        method: 'POST',
+        headers: {
+            'Content-Type': 'application/json',
+        },
+
+        body: JSON.stringify({question: message,
+            api_key: localStorage.getItem('apiKey'),
+            embeddings_key: localStorage.getItem('apiKey'),
+            history: localStorage.getItem('chatHistory'),
+            active_docs: localStorage.getItem('activeDocs')}),
+    }).then((response)=> response.json())
+    .then(data => {
+            console.log('Success:', data);
+            if(data.error){
+            document.getElementById('text-error').textContent = `Error : ${JSON.stringify(data.message)}`
+            errorModal.classList.toggle('hidden')
+            }
+            if(data.answer){
+            msg_html = '<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start"><code class="text-sm">'
+            msg_html += data.answer
+            msg_html += '</code></div>'
+            document.getElementById("messages").innerHTML += msg_html;
+            let chatWindow = document.getElementById("messages-container");
+            chatWindow.scrollTop = chatWindow.scrollHeight;
+            }
+            document.getElementById("button-submit").innerHTML = 'Send';
+            document.getElementById("button-submit").disabled = false;
+            let chatHistory = [message, data.answer || ''];
+            localStorage.setItem('chatHistory', JSON.stringify(chatHistory));
+
+            
+
+
         })
-            .then(response => response.json())
-            .then(data => {
-                console.log('Success:', data);
-                msg_html = '<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start"><code class="text-sm">'
-                msg_html += data.answer
-                msg_html += '</code></div>'
-                document.getElementById("messages").innerHTML += msg_html;
-                let chatWindow = document.getElementById("messages-container");
-                chatWindow.scrollTop = chatWindow.scrollHeight;
-                document.getElementById("button-submit").innerHTML = 'Send';
-                document.getElementById("button-submit").disabled = false;
-                let chatHistory = [message, data.answer];
-                localStorage.setItem('chatHistory', JSON.stringify(chatHistory));
-            })
-            .catch((error) => {
-                console.error('Error:', error);
-                console.log(error);
-                document.getElementById("button-submit").innerHTML = 'Send';
-                document.getElementById("button-submit").disabled = false;
-            });
-
-
-    });
-}
\ No newline at end of file
+        .catch((error) => {
+            console.error('Error:', error);
+            // console.log(error);
+            // document.getElementById("button-submit").innerHTML = 'Send';
+            // document.getElementById("button-submit").disabled = false;
+
+        });
+}
+
+window.addEventListener('submit',submitForm)
diff --git a/application/templates/index.html b/application/templates/index.html
index e6dad78..4f8e471 100644
--- a/application/templates/index.html
+++ b/application/templates/index.html
@@ -16,7 +16,7 @@
 
 
   <body>
-
+    
 
 
     <header class="bg-white p-2 flex justify-between items-center">
@@ -28,6 +28,17 @@
         {% endif %}
             </div>
     </header>
+
+  
+ <!-- Alert Info  -->
+ <div class="border flex justify-between 
+  w-auto px-4 py-3 rounded relative 
+  hidden" style="background-color: rgb(197, 51, 51);color: white;" id="error-alert" role="alert">
+  <span class="block sm:inline" id="text-error"></span>
+  <strong class="text-xl align-center alert-del" style="cursor: pointer;" id="close">&times;</strong>
+</div>
+
+
     <div class="lg:flex ml-2 mr-2">
       <div class="lg:w-3/4 min-h-screen max-h-screen">
         <div class="w-full flex flex-col h-5/6">
@@ -59,6 +70,8 @@ This will return a new DataFrame with all the columns from both tables, and only
         </form>
         </div>
 
+        
+
 
     </div>
         </div>
@@ -77,11 +90,16 @@ This will return a new DataFrame with all the columns from both tables, and only
     </div>
 
   <div class="flex items-center justify-center h-full">
-
+    
+ 
 </div>
 
+
+
+
 {% if not api_key_set %}
-<div class="fixed z-10 overflow-y-auto top-0 w-full left-0 hidden" id="modal">
+
+<div class="fixed z-10 overflow-y-auto top-0 w-full left-0 show" id="modal">
   <div class="flex items-center justify-center min-height-100vh pt-4 px-4 pb-20 text-center sm:block sm:p-0">
     <div class="fixed inset-0 transition-opacity">
       <div class="absolute inset-0 bg-gray-900 opacity-75" />
@@ -105,6 +123,9 @@ This will return a new DataFrame with all the columns from both tables, and only
   </div>
 </div>
 {% endif %}
+
+
+
       <script>
           function docsIndex() {
                 // loads latest index from https://raw.githubusercontent.com/arc53/DocsHUB/main/combined.json

From 46f7dfc568b5196ea260f8fff2ea04b04baf8cb9 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Thu, 23 Feb 2023 14:08:34 +0000
Subject: [PATCH 12/14] Update Dockerfile

---
 application/Dockerfile | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/application/Dockerfile b/application/Dockerfile
index a8e778b..a37a3be 100644
--- a/application/Dockerfile
+++ b/application/Dockerfile
@@ -1,10 +1,19 @@
-FROM python:3.9
+FROM python:3.11-slim-bullseye as builder
 
+# Tiktoken requires Rust toolchain, so build it in a separate stage
+RUN apt-get update && apt-get install -y gcc curl
+RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && apt-get install --reinstall libc6-dev -y
+ENV PATH="/root/.cargo/bin:${PATH}"
+RUN pip install --upgrade pip && pip install tiktoken==0.1.2
+
+FROM python:3.11-slim-bullseye
+# Copy pre-built packages from builder stage
+COPY --from=builder /usr/local/lib/python3.11/site-packages/ /usr/local/lib/python3.11/site-packages/
 WORKDIR /app
 COPY . /app
-RUN pip install --no-cache-dir -r requirements.txt
 ENV FLASK_APP=app.py
 ENV FLASK_ENV=development
+RUN pip install -r requirements.txt
 
 EXPOSE 5000
 

From e5aaaf78f0598d258c3fd20f02c77281fbffe81e Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Thu, 23 Feb 2023 18:05:21 +0000
Subject: [PATCH 13/14] Update app.py

---
 application/app.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/application/app.py b/application/app.py
index 33a1e2e..a509ed6 100644
--- a/application/app.py
+++ b/application/app.py
@@ -75,7 +75,6 @@ def api_answer():
     question = data["question"]
     history = data["history"]
     print('-'*5)
-    print(data["embeddings_key"])
     if not api_key_set:
         api_key = data["api_key"]
     else:
@@ -110,7 +109,6 @@ def api_answer():
         # create a prompt template
         if history:
             history = json.loads(history)
-            print(history)
             template_temp = template_hist.replace("{historyquestion}", history[0]).replace("{historyanswer}", history[1])
             c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template_temp, template_format="jinja2")
         else:

From 3791394bf8ba14df106f04d8a4a9e71da47396b3 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Thu, 23 Feb 2023 18:07:11 +0000
Subject: [PATCH 14/14] Update error.py

---
 application/error.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/error.py b/application/error.py
index b73856b..cab5ea3 100644
--- a/application/error.py
+++ b/application/error.py
@@ -2,7 +2,7 @@ from flask import jsonify
 from werkzeug.http import HTTP_STATUS_CODES
 
 def response_error(code_status,message=None):
-    payload = {'error':HTTP_STATUS_CODES.get(code_status,"something get weong")}
+    payload = {'error':HTTP_STATUS_CODES.get(code_status,"something went wrong")}
     if message:
         payload['message'] = message
     response = jsonify(payload)